rstanarm/0000755000176200001440000000000014552326563012114 5ustar liggesusersrstanarm/NAMESPACE0000644000176200001440000001615014551552004013324 0ustar liggesusers# Generated by roxygen2: do not edit by hand S3method(VarCorr,stanreg) S3method(as.array,stanreg) S3method(as.data.frame,stanreg) S3method(as.data.frame,summary.stanreg) S3method(as.matrix,stanreg) S3method(as_draws,stanreg) S3method(as_draws_array,stanreg) S3method(as_draws_df,stanreg) S3method(as_draws_list,stanreg) S3method(as_draws_matrix,stanreg) S3method(as_draws_rvars,stanreg) S3method(bayes_R2,stanreg) S3method(coef,stanmvreg) S3method(coef,stanreg) S3method(confint,stanreg) S3method(family,stanmvreg) S3method(family,stanreg) S3method(fitted,stanmvreg) S3method(fitted,stanreg) S3method(fixef,stanmvreg) S3method(fixef,stanreg) S3method(formula,stanmvreg) S3method(formula,stanreg) S3method(get_x,default) S3method(get_x,gamm4) S3method(get_x,lmerMod) S3method(get_x,stanmvreg) S3method(get_y,default) S3method(get_y,stanmvreg) S3method(get_z,lmerMod) S3method(get_z,stanmvreg) S3method(kfold,stanreg) S3method(launch_shinystan,stanreg) S3method(log_lik,stanjm) S3method(log_lik,stanmvreg) S3method(log_lik,stanreg) S3method(loo,stanreg) S3method(loo_R2,stanreg) S3method(loo_compare,stanreg) S3method(loo_compare,stanreg_list) S3method(loo_linpred,stanreg) S3method(loo_model_weights,stanreg_list) S3method(loo_predict,stanreg) S3method(loo_predictive_interval,stanreg) S3method(model.frame,stanmvreg) S3method(model.frame,stanreg) S3method(model.matrix,stanreg) S3method(names,stanreg_list) S3method(ngrps,stanmvreg) S3method(ngrps,stanreg) S3method(nobs,stanmvreg) S3method(nobs,stanreg) S3method(nsamples,stanreg) S3method(pairs,stanreg) S3method(plot,predict.stanjm) S3method(plot,stanreg) S3method(plot,survfit.stanjm) S3method(posterior_epred,stanreg) S3method(posterior_interval,stanreg) S3method(posterior_linpred,stanreg) S3method(posterior_predict,stanmvreg) S3method(posterior_predict,stanreg) S3method(posterior_vs_prior,stanreg) S3method(pp_check,stanreg) S3method(predict,stanreg) S3method(predictive_error,matrix) S3method(predictive_error,ppd) S3method(predictive_error,stanreg) S3method(predictive_interval,matrix) S3method(predictive_interval,ppd) S3method(predictive_interval,stanreg) S3method(print,compare_rstanarm_loos) S3method(print,prior_summary.stanreg) S3method(print,stanmvreg) S3method(print,stanreg) S3method(print,stanreg_list) S3method(print,summary.stanmvreg) S3method(print,summary.stanreg) S3method(print,survfit.stanjm) S3method(prior_summary,stanreg) S3method(ranef,stanmvreg) S3method(ranef,stanreg) S3method(residuals,stanmvreg) S3method(residuals,stanreg) S3method(se,stanmvreg) S3method(se,stanreg) S3method(sigma,stanmvreg) S3method(sigma,stanreg) S3method(summary,stanmvreg) S3method(summary,stanreg) S3method(terms,stanmvreg) S3method(terms,stanreg) S3method(update,stanjm) S3method(update,stanmvreg) S3method(update,stanreg) S3method(vcov,stanreg) S3method(waic,stanreg) export(R2) export(Surv) export(VarCorr) export(as_draws) export(as_draws_array) export(as_draws_df) export(as_draws_list) export(as_draws_matrix) export(as_draws_rvars) export(bayes_R2) export(cauchy) export(compare_models) export(decov) export(default_prior_coef) export(default_prior_intercept) export(dirichlet) export(exponential) export(fixef) export(get_x) export(get_y) export(get_z) export(hs) export(hs_plus) export(invlogit) export(kfold) export(laplace) export(lasso) export(launch_shinystan) export(lkj) export(log_lik) export(logit) export(loo) export(loo_R2) export(loo_compare) export(loo_linpred) export(loo_model_weights) export(loo_predict) export(loo_predictive_interval) export(neg_binomial_2) export(ngrps) export(normal) export(nsamples) export(pairs_condition) export(pairs_style_np) export(plot_nonlinear) export(plot_stack_jm) export(posterior_epred) export(posterior_interval) export(posterior_linpred) export(posterior_predict) export(posterior_survfit) export(posterior_traj) export(posterior_vs_prior) export(pp_check) export(pp_validate) export(predictive_error) export(predictive_interval) export(prior_options) export(prior_summary) export(product_normal) export(ps_check) export(ranef) export(se) export(sigma) export(stan_aov) export(stan_betareg) export(stan_betareg.fit) export(stan_biglm) export(stan_biglm.fit) export(stan_clogit) export(stan_gamm4) export(stan_glm) export(stan_glm.fit) export(stan_glm.nb) export(stan_glmer) export(stan_glmer.nb) export(stan_jm) export(stan_lm) export(stan_lm.fit) export(stan_lm.wfit) export(stan_lmer) export(stan_mvmer) export(stan_nlmer) export(stan_polr) export(stan_polr.fit) export(stanjm_list) export(stanmvreg_list) export(stanreg_list) export(student_t) export(waic) if(getRversion()>='3.3.0') importFrom(stats, sigma) else importFrom(lme4,sigma) import(Rcpp) import(bayesplot) import(methods) import(rstantools) import(shinystan) import(stats) importFrom(Matrix,Matrix) importFrom(Matrix,t) importFrom(RcppParallel,RcppParallelLibs) importFrom(bayesplot,pairs_condition) importFrom(bayesplot,pairs_style_np) importFrom(ggplot2,"%+replace%") importFrom(ggplot2,aes) importFrom(ggplot2,aes_) importFrom(ggplot2,aes_string) importFrom(ggplot2,coord_cartesian) importFrom(ggplot2,element_line) importFrom(ggplot2,element_text) importFrom(ggplot2,expand_limits) importFrom(ggplot2,facet_wrap) importFrom(ggplot2,geom_contour) importFrom(ggplot2,geom_line) importFrom(ggplot2,geom_point) importFrom(ggplot2,geom_pointrange) importFrom(ggplot2,geom_ribbon) importFrom(ggplot2,geom_segment) importFrom(ggplot2,geom_smooth) importFrom(ggplot2,geom_step) importFrom(ggplot2,geom_vline) importFrom(ggplot2,ggplot) importFrom(ggplot2,ggplot_build) importFrom(ggplot2,labs) importFrom(ggplot2,rel) importFrom(ggplot2,scale_color_gradient2) importFrom(ggplot2,scale_x_continuous) importFrom(ggplot2,scale_x_discrete) importFrom(ggplot2,theme) importFrom(ggplot2,theme_bw) importFrom(ggplot2,xlab) importFrom(lme4,findbars) importFrom(lme4,fixef) importFrom(lme4,glFormula) importFrom(lme4,glmer) importFrom(lme4,glmerControl) importFrom(lme4,lmer) importFrom(lme4,lmerControl) importFrom(lme4,mkVarCorr) importFrom(lme4,ngrps) importFrom(lme4,nlformula) importFrom(lme4,nlmer) importFrom(lme4,nlmerControl) importFrom(lme4,ranef) importFrom(loo,is.kfold) importFrom(loo,is.loo) importFrom(loo,is.waic) importFrom(loo,kfold) importFrom(loo,loo) importFrom(loo,loo.function) importFrom(loo,loo.matrix) importFrom(loo,loo_compare) importFrom(loo,loo_model_weights) importFrom(loo,psis) importFrom(loo,waic) importFrom(loo,waic.function) importFrom(loo,waic.matrix) importFrom(nlme,VarCorr) importFrom(posterior,as_draws) importFrom(posterior,as_draws_array) importFrom(posterior,as_draws_df) importFrom(posterior,as_draws_list) importFrom(posterior,as_draws_matrix) importFrom(posterior,as_draws_rvars) importFrom(rstan,constrain_pars) importFrom(rstan,extract) importFrom(rstan,extract_sparse_parts) importFrom(rstan,get_posterior_mean) importFrom(rstan,optimizing) importFrom(rstan,sampling) importFrom(rstan,stanc) importFrom(rstan,vb) importFrom(rstantools,loo_R2) importFrom(rstantools,nsamples) importFrom(stats,cov2cor) importFrom(stats,getInitial) importFrom(survival,Surv) importFrom(utils,capture.output) importFrom(utils,head) importFrom(utils,packageVersion) importFrom(utils,tail) importMethodsFrom(rstan,summary) useDynLib(rstanarm, .registration = TRUE) rstanarm/demo/0000755000176200001440000000000013340675562013040 5ustar liggesusersrstanarm/demo/ARM_Ch12_13.R0000644000176200001440000000441213340675562014663 0ustar liggesusers# loads packages, creates ROOT, SEED, and DATA_ENV demo("SETUP", package = "rstanarm", verbose = FALSE, echo = FALSE, ask = FALSE) ### Radon data source(paste0(ROOT, "ARM/Ch.12/radon.data.R"), local = DATA_ENV, verbose = FALSE) radon <- with(DATA_ENV, data.frame(y, x, u, radon, county)) # complete pooling (pool <- stan_glm(y ~ x, data = radon, seed = SEED, refresh = REFRESH)) # no pooling (no_pool <- update(pool, formula = y ~ x + factor(county) - 1)) # varying intercept with no predictors M0 <- stan_lmer(y ~ 1 + (1 | county), data = radon, seed = SEED, refresh = REFRESH) # varying intercept with individual-level predictor M1 <- update(M0, formula = y ~ x + (1 | county)) # include group-level predictor M2 <- update(M0, formula = y ~ x + u + (1 | county)) # varying intercepts and slopes M3 <- update(M0, formula = y ~ x + (1 + x | county)) # varying intercepts and slopes with group-level predictor M4 <- update(M0, formula = y ~ x + u + x:u + (1 + x | county)) # Can use VarCorr, coef, fixef, ranef just like after using lmer, e.g. VarCorr(M2) coef(M2) fixef(M2) ranef(M2) ### Pilots data source(paste0(ROOT, "ARM/Ch.13/pilots.data.R"), local = DATA_ENV, verbose = FALSE) pilots <- with(DATA_ENV, data.frame(y, scenario_id, group_id)) M5 <- stan_lmer(y ~ 1 + (1 | group_id) + (1 | scenario_id), data = pilots, seed = SEED, refresh = REFRESH) VarCorr(M5) ### Earnings data # regressions of earnings on ethnicity categories, age categories, and height source(paste0(ROOT, "ARM/Ch.13/earnings.data.R"), local = DATA_ENV, verbose = FALSE) earnings <- with(DATA_ENV, data.frame(earn = earn / 1e4, height = scale(height), eth, age)) f1 <- log(earn) ~ 1 + (1 + height | eth) f2 <- log(earn) ~ 1 + (1 + height | eth) + (1 + height | age) + (1 + height | eth:age) (fit1 <- stan_lmer(f1, data = earnings, seed = SEED, refresh = REFRESH)) (fit2 <- update(fit1, formula = f2)) ANSWER <- tolower(readline("Do you want to remove the objects this demo created? (y/n) ")) if (ANSWER != "n") { rm(radon, pilots, earnings, f1, f2, ANSWER) # removes stanreg and loo objects, plus what was created by STARTUP demo("CLEANUP", package = "rstanarm", verbose = FALSE, echo = FALSE, ask = FALSE) } rstanarm/demo/ARM_Ch07.R0000644000176200001440000000255413340675562014371 0ustar liggesusers# loads packages, creates ROOT, SEED, and DATA_ENV demo("SETUP", package = "rstanarm", verbose = FALSE, echo = FALSE, ask = FALSE) source(paste0(ROOT, "ARM/Ch.7/congress.data.R"), local = DATA_ENV, verbose = FALSE) cong_dat <- with(DATA_ENV, data.frame(incumbency_88, vote_88, vote_86)) # The stuff in sections 7.0 -- 7.2 is not very relevant post1 <- stan_lm(vote_88 ~ vote_86 + incumbency_88, data = cong_dat, prior = R2(0.9, what = "mean"), seed = SEED, refresh = REFRESH) post1 # badly underfitting y_tilde <- posterior_predict(post1) # incumbency_90 is not available summary(rowSums(y_tilde > 0.5)) data(wells, package = "rstanarm") wells$dist100 <- with(wells, dist / 100) post2 <- stan_glm(switch ~ dist100, data = wells, family = "binomial", iter = 100, chains = 1, seed = SEED, refresh = REFRESH) prop.table(table(c(posterior_predict(post2)))) # the compound model is not good because it assumes the two errors are # independent. rstanarm will eventually support Heckman models, which # would be a better choice here. ANSWER <- tolower(readline("Do you want to remove the objects this demo created? (y/n) ")) if (ANSWER != "n") { rm(y_tilde, wells, ANSWER) # removes stanreg and loo objects, plus what was created by STARTUP demo("CLEANUP", package = "rstanarm", verbose = FALSE, echo = FALSE, ask = FALSE) } rstanarm/demo/ARM_Ch04.R0000644000176200001440000001163613340675562014367 0ustar liggesusers# loads packages, creates ROOT, SEED, and DATA_ENV demo("SETUP", package = "rstanarm", verbose = FALSE, echo = FALSE, ask = FALSE) source(paste0(ROOT, "ARM/Ch.4/earnings.data.R"), local = DATA_ENV, verbose = FALSE) earnings_dat <- with(DATA_ENV, data.frame(earn, height, male)) # The stuff in sections 4.0 -- 4.3 is not very relevant # Moreover, centering predictors is NOT recommended in the rstanarm package # Just look at the posterior predictive distribution # over a range of values to interpret the effect of a predictor # These two models are essentially equivalent in the likelihood # But the "same" priors affect the posterior differently post1 <- stan_glm(log(earn) ~ height, data = earnings_dat, family = gaussian(link = "identity"), seed = SEED, refresh = REFRESH) # post2 <- stan_glm(earn ~ height, data = earnings_dat, # family = gaussian(link = "log"), # seed = SEED, refresh = REFRESH) # and this does not even converge # These models add terms to the right-hand side post3 <- stan_lm(log(earn) ~ height + male, data = earnings_dat, prior = R2(location = 0.3, what = "mean"), seed = SEED, refresh = REFRESH) post4 <- update(post3, formula = log(earn) ~ height * male) # Compare them with loo loo1 <- loo(post1) # post2 is not comparable to the others anyway loo3 <- loo(post3) loo4 <- loo(post4) compare(loo1, loo3, loo4) # loo1 is dominated # Generate predictions to interpret WOMEN_SEQ <- seq(from = 58, to = 75, by = 1) MEN_SEQ <- seq(from = 60, to = 77, by = 1) YLIM <- c(500, 100000) y_women <- posterior_predict(post4, fun = exp, newdata = data.frame(male = 0, height = WOMEN_SEQ)) y_men <- posterior_predict(post4, fun = exp, newdata = data.frame(male = 1, height = MEN_SEQ)) par(mfrow = c(1:2), mar = c(5,4,2,1) + .1) boxplot(y_women, axes = FALSE, outline = FALSE, log = "y", ylim = YLIM, xlab = "Height in Inches", ylab = "", main = "Predicted Earnings of Women") axis(1, at = 1:ncol(y_women), labels = WOMEN_SEQ, las = 3) axis(2, las = 1) boxplot(y_men, outline = FALSE, col = "red", axes = FALSE, log = "y", ylim = YLIM, xlab = "Height in Inches", ylab = "", main = "Predicted Earnings of Men") axis(1, at = 1:ncol(y_men), labels = MEN_SEQ, las = 3) # Prediction of the weight of mesquite trees DATA_ENV <- new.env() source(paste0(ROOT, "ARM/Ch.4/mesquite.data.R"), local = DATA_ENV, verbose = FALSE) tree_dat <- as.data.frame(do.call(cbind, as.list(DATA_ENV))) CONTINUE1 <- tolower(readline( paste("A heads up: the next part of the demo (Predicting weight of mesquite trees )", "prints many lines \nto the console as it runs many models and compares the results", "Proceed? (y/n)") )) if (CONTINUE1 != "n") { post5 <- stan_lm(weight ~ diam1 + diam2 + canopy_height + total_height + density + group, data = tree_dat, prior = R2(0.9), seed = SEED, refresh = REFRESH) post6 <- update(post5, formula = log(weight) ~ log(diam1) + log(diam2) + log(canopy_height) + log(total_height) + log(density) + group) post7 <- update(post5, formula = log(weight) ~ log(diam1 * diam2 * canopy_height), prior = R2(0.75, what = "mean")) post8 <- update(post5, formula = log(weight) ~ log(diam1 * diam2 * canopy_height) + log(diam1 * diam2) + group, prior = R2(0.8)) post9 <- update(post5, formula = log(weight) ~ log(diam1 * diam2 * canopy_height) + log(diam1 * diam2) + log(diam1 / diam2) + group, prior = R2(0.85)) # Compare them with loo compare(loo(post5), loo(post6), loo(post7), loo(post8), loo(post9)) } # Predicting "continuous" party ID over time without multilevel stuff CONTINUE2 <- tolower(readline( paste("A heads up: the next part of the demo (Predicting party ID over time)", "prints many lines \nto the console as it runs many models and compares the results", "Proceed? (y/n)") )) if (CONTINUE2 != "n") { YEARS <- as.character(seq(from = 1972, to = 1980, by = 4)) round(digits = 2, x = sapply(YEARS, FUN = function(YEAR) { DATA_ENV <- new.env() source(paste0(ROOT, "ARM/Ch.4/nes", YEAR, ".data.R"), local = DATA_ENV, verbose = FALSE) pid_dat <- as.data.frame(do.call(cbind, as.list(DATA_ENV))) coef(stan_lm(partyid7 ~ real_ideo + I(race_adj == 1) + as.factor(age_discrete) + educ1 + gender + income, data = pid_dat, prior = R2(0.5), seed = SEED, refresh = 0)) })) } ANSWER <- tolower(readline("Do you want to remove the objects this demo created? (y/n) ")) if (ANSWER != "n") { if (CONTINUE2 != "n") rm(YEARS) rm(WOMEN_SEQ, MEN_SEQ, y_women, y_men, YLIM, ANSWER, CONTINUE1, CONTINUE2) # removes stanreg and loo objects, plus what was created by STARTUP demo("CLEANUP", package = "rstanarm", verbose = FALSE, echo = FALSE, ask = FALSE) } rstanarm/demo/ARM_Ch03.R0000644000176200001440000000570713340675562014370 0ustar liggesusers# loads packages, creates ROOT, SEED, and DATA_ENV demo("SETUP", package = "rstanarm", verbose = FALSE, echo = FALSE, ask = FALSE) source(paste0(ROOT, "ARM/Ch.3/kidiq.data.R"), local = DATA_ENV, verbose = FALSE) dat <- with(DATA_ENV, data.frame(kid_score, mom_hs, mom_iq)) # Estimate four contending models post1 <- stan_glm(kid_score ~ mom_hs, data = dat, family = gaussian(link = "identity"), seed = SEED, refresh = REFRESH) post2 <- update(post1, formula = kid_score ~ mom_iq) post3 <- stan_lm(kid_score ~ mom_hs + mom_iq, data = dat, prior = R2(location = 0.25, what = "mean"), seed = SEED, refresh = REFRESH) post4 <- update(post3, formula = kid_score ~ mom_hs * mom_iq, prior = R2(location = 0.30, what = "mean")) # Compare them with loo loo1 <- loo(post1) loo2 <- loo(post2) loo3 <- loo(post3) loo4 <- loo(post4) par(mfrow = c(2,2), mar = c(4,4,2,1) + .1) plot(loo1, label_points = TRUE); title(main = "Model 1") plot(loo2, label_points = TRUE); title(main = "Model 2") plot(loo3, label_points = TRUE); title(main = "Model 3") plot(loo4, label_points = TRUE); title(main = "Model 4") compare(loo1, loo2, loo3, loo4) # fourth model dominates # Generate predictions IQ_SEQ <- seq(from = 75, to = 135, by = 5) y_nohs <- posterior_predict(post4, newdata = data.frame(mom_hs = 0, mom_iq = IQ_SEQ)) y_hs <- posterior_predict(post4, newdata = data.frame(mom_hs = 1, mom_iq = IQ_SEQ)) par(mfrow = c(1:2), mar = c(5,4,2,1)) boxplot(y_hs, axes = FALSE, outline = FALSE, ylim = c(30,160), xlab = "Mom IQ", ylab = "Predicted Kid IQ", main = "Mom HS") axis(1, at = 1:ncol(y_hs), labels = IQ_SEQ, las = 3) axis(2, las = 1) boxplot(y_nohs, outline = FALSE, col = "red", axes = FALSE, ylim = c(30,160), xlab = "Mom IQ", ylab = "Predicted Kid IQ", main = "Mom No HS") axis(1, at = 1:ncol(y_hs), labels = IQ_SEQ, las = 3) axis(2, las = 1) # External Validation source(paste0(ROOT, "ARM/Ch.3/kids_before1987.data.R"), local = DATA_ENV, verbose = FALSE) source(paste0(ROOT, "ARM/Ch.3/kids_after1987.data.R"), local = DATA_ENV, verbose = FALSE) fit_data <- with(DATA_ENV, data.frame(ppvt, hs, afqt)) pred_data <- with(DATA_ENV, data.frame(ppvt_ev, hs_ev, afqt_ev)) post5 <- stan_lm(ppvt ~ hs + afqt, data = fit_data, prior = R2(location = 0.25, what = "mean"), seed = SEED, refresh = REFRESH) y_ev <- posterior_predict( post5, newdata = with(pred_data, data.frame(hs = hs_ev, afqt = afqt_ev)) ) par(mfrow = c(1,1)) hist(-sweep(y_ev, 2, STATS = pred_data$ppvt_ev, FUN = "-"), prob = TRUE, xlab = "Predictive Errors in ppvt", main = "", las = 2) ANSWER <- tolower(readline("Do you want to remove the objects this demo created? (y/n) ")) if (ANSWER != "n") { rm(IQ_SEQ, y_nohs, y_hs, y_ev, ANSWER) # removes stanreg and loo objects, plus what was created by STARTUP demo("CLEANUP", package = "rstanarm", verbose = FALSE, echo = FALSE, ask = FALSE) } rstanarm/demo/SETUP.R0000644000176200001440000000063613340675562014070 0ustar liggesusersstopifnot(require(rstanarm)) stopifnot(require(loo)) stopifnot(require(ggplot2)) stopifnot(require(parallel)) options(mc.cores = parallel::detectCores()) SEED <- 12345L REFRESH <- 1000L if (R.version$major < 3 || R.version$minor < 2.0) warning("This demo may not work on older versions of R due to HTTPS URLs") ROOT <- "https://raw.githubusercontent.com/stan-dev/example-models/master/" DATA_ENV <- new.env() rstanarm/demo/ARM_Ch09.R0000644000176200001440000000317013340675562014366 0ustar liggesusers# loads packages, creates ROOT, SEED, and DATA_ENV demo("SETUP", package = "rstanarm", verbose = FALSE, echo = FALSE, ask = FALSE) # read data into DATA_ENV environment source(paste0(ROOT, "ARM/Ch.9/electric_grade4.data.R"), local = DATA_ENV, verbose = FALSE) dat <- with(DATA_ENV, data.frame(post_test, grade, pre_test, treatment)) post1 <- stan_lm(post_test ~ treatment * pre_test, data = dat, prior = R2(0.75), seed = SEED, refresh = REFRESH) post1 # underfitting but ok because it is an experiment plot(post1) y_0 <- posterior_predict(post1, data.frame(treatment = 0, pre_test = dat$pre_test)) y_1 <- posterior_predict(post1, data.frame(treatment = 1, pre_test = dat$pre_test)) diff <- y_1 - y_0 mean(diff) sd(diff) # much larger than in ARM hist(diff, prob = TRUE, main = "", xlab = "Estimated Average Treatment Effect", las = 1) stopifnot(require(bayesplot)) plots <- sapply(1:4, simplify = FALSE, FUN = function(k) { dat$supp <- source(paste0(ROOT, "ARM/Ch.9/electric_grade", k, "_supp.data.R"), verbose = FALSE)$value out <- plot(stan_lm(post_test ~ supp + pre_test, data = dat, seed = SEED, refresh = REFRESH, prior = R2(0.75, what = "mean"))) out + ggtitle(paste("Grade =", k)) }) bayesplot_grid(plots = plots, grid_args = list(nrow = 2, ncol = 2)) ANSWER <- tolower(readline("Do you want to remove the objects this demo created? (y/n) ")) if (ANSWER != "n") { rm(y_0, y_1, diff, plots, ANSWER) # removes stanreg and loo objects, plus what was created by STARTUP demo("CLEANUP", package = "rstanarm", verbose = FALSE, echo = FALSE, ask = FALSE) } rstanarm/demo/ARM_Ch05.R0000644000176200001440000000744113340675562014367 0ustar liggesusers# loads packages, creates ROOT, SEED, and DATA_ENV demo("SETUP", package = "rstanarm", verbose = FALSE, echo = FALSE, ask = FALSE) source(paste0(ROOT, "ARM/Ch.5/nes1992_vote.data.R"), local = DATA_ENV, verbose = FALSE) nes1992 <- with(DATA_ENV, data.frame(vote, income)) invlogit <- plogis # We'll use a Student t distribution with 7 degrees of freedom as our default # weakly informative prior for logistic regression coefficients. This prior # reflects the belief that the coefficients are probably close to zero, are as # likely to be positive as they are to be negative, but do have a small chance # of being quite far from zero. Using a normal distribution instead of the t # distribution would be a more informative prior, as the tails of the normal # distribution as less heavy. The t is therefore a bit more robust. t_prior <- student_t(df = 7, location = 0, scale = 2.5) # Logistic regression with one predictor vote_fit <- stan_glm(vote ~ income, data = nes1992, family=binomial(link="logit"), prior = t_prior, prior_intercept = t_prior, seed = SEED, refresh = REFRESH) print(vote_fit, digits = 2) b <- coef(vote_fit) plot(vote_fit, "hist", pars = names(b)) # Probability of Bush vote at various values of income pr_bush <- function(x, coefs) invlogit(coefs[[1]] + coefs[[2]] * x) income_vals <- with(nes1992, c(min(income), median(income), max(income))) pr_bush(income_vals, b) # How the probability differs with a unit difference in x near the central value pr_bush(3, b) - pr_bush(2, b) # Wells in Bangladesh source(paste0(ROOT, "ARM/Ch.5/wells.data.R"), local = DATA_ENV, verbose = FALSE) wells <- with(DATA_ENV, data.frame(switch = switched, dist100 = dist/100, arsenic)) # Only use distance (in 100m) as predictor post1 <- stan_glm(switch ~ dist100, data = wells, family = "binomial", prior = t_prior, prior_intercept = t_prior, seed = SEED, refresh = REFRESH) # Add arsenic as predictor post2 <- update(post1, formula = switch ~ dist100 + arsenic) # Add interaction of dist100 and arsenic post3 <- update(post2, formula = .~. + dist100:arsenic) plot(post3, "areas", prob = 0.9, prob_outer = 1) # Compare them with loo loo1 <- loo(post1) loo2 <- loo(post2) loo3 <- loo(post3) compare(loo1, loo2, loo3) # loo1 is dominated # Graphing the fitted models op <- par('mfrow') par(mfrow = c(1,2)) jitter.binary <- function(a, jitt=.05){ ifelse(a==0, runif(length(a), 0, jitt), runif (length(a), 1-jitt, 1)) } b2 <- coef(post2) b3 <- coef(post3) # As function of dist100 with(wells, plot(dist100, jitter.binary(switch), xlim=c(0, max(dist100)), ylab = "Prob")) # Model with two predictors in red curve(invlogit(cbind(1, x, .5) %*% b2), add = TRUE, col = "red", lty = 2) curve(invlogit(cbind(1, x, 1) %*% b2), add = TRUE, col = "red", lty = 2) # Model with interaction in blue curve(invlogit(cbind(1, x, .5, .5 * x) %*% b3), add = TRUE, col = "blue") curve(invlogit(cbind(1, x, 1, 1 * x) %*% b3), add = TRUE, col = "blue") # As function of arsenic with(wells, plot(arsenic, jitter.binary(switch), xlim=c(0, max(arsenic)), ylab = "Prob")) curve(invlogit(cbind (1, 0, x) %*% b2), add = TRUE, col = "red", lty = 2) curve(invlogit(cbind (1,.5, x) %*% b2), add = TRUE, col = "red", lty = 2) curve(invlogit(cbind(1, 0, x, 0 * x) %*% b3), add = TRUE, col = "blue") curve(invlogit(cbind(1, .5, x, .5 * x) %*% b3), add = TRUE, col = "blue") par(mfrow = op) ANSWER <- tolower(readline("Do you want to remove the objects this demo created? (y/n) ")) if (ANSWER != "n") { rm(nes1992, invlogit, t_prior, b, pr_bush, income_vals, wells, jitter.binary, b2, b3, op, ANSWER) # removes stanreg and loo objects, plus what was created by STARTUP demo("CLEANUP", package = "rstanarm", verbose = FALSE, echo = FALSE, ask = FALSE) } rstanarm/demo/CLEANUP.R0000644000176200001440000000025413340675562014253 0ustar liggesusersrm(DATA_ENV, ROOT, SEED, REFRESH) ours <- unlist(eapply(.GlobalEnv, FUN = function(x) is(x, "stanreg") | is(x, "loo"))) ours <- names(ours[ours]) rm(list = ours) rm(ours) rstanarm/demo/ARM_Ch08.R0000644000176200001440000000444213340675562014370 0ustar liggesusers# loads packages, creates ROOT, SEED, and DATA_ENV demo("SETUP", package = "rstanarm", verbose = FALSE, echo = FALSE, ask = FALSE) source(paste0(ROOT, "ARM/Ch.8/lightspeed.data.R"), local = DATA_ENV, verbose = FALSE) light_dat <- with(DATA_ENV, data.frame(y)) # The stuff in sections 8.0 -- 8.2 is not very relevant (post1 <- stan_glm(y ~ 1, data = light_dat, seed = SEED, refresh = REFRESH)) y_rep <- posterior_predict(post1) pp_check(post1, plotfun = "stat", stat = "min") + ggtitle("Minimum Predicted Measurement Error") # make similar plot manually hist(apply(y_rep, 1, min), prob = TRUE, main = "", las = 1, xlab = "Minimum Predicted Measurement Error", xlim = c(-45,20)) abline(v = min(DATA_ENV$y), col = "red") # Compare observed y to several replicated y (y_rep) from posterior predictive # distribution ttl <- paste("Measurement Error for the Speed of Light", "\nvs Predicted Measurement Error") pp_check(post1, plotfun = "hist") + ggtitle(ttl) # Make similar plot manually but combine all y_rep op <- par('mfrow') par(mfrow = 1:2, mar = c(5,4,1,1) + .1) hist(light_dat$y, prob = TRUE, main = "", las = 1, xlab = "Measurement Error for the Speed of Light") hist(y_rep, prob = TRUE, main = "", las = 1, xlab = "Predicted Measurement Error") par(mfrow = op) # Roaches example data(roaches, package = "rstanarm") post2 <- stan_glm(y ~ roach1 + treatment + senior, data = roaches, family = poisson(link = "log"), seed = SEED, refresh = REFRESH) y_rep <- posterior_predict(post2) # Compare observed proportion of zeros to predicted proportion of zeros mean(y_rep == 0) mean(roaches$y == 0) summary(apply(y_rep == 0, 1, mean)) prop0 <- function(x) mean(x == 0) pp_check(post2, plotfun = "stat", stat = "prop0") # model doesn't predict enough zeros # Negative binomial model does a much better job handling the zeros post3 <- update(post2, family = neg_binomial_2()) pp_check(post3, plotfun = "stat", stat = "prop0") # rstanarm does not yet support time-series models ANSWER <- tolower(readline("Do you want to remove the objects this demo created? (y/n) ")) if (ANSWER != "n") { rm(y_rep, prop0, ttl, op, ANSWER) # removes stanreg and loo objects, plus what was created by STARTUP demo("CLEANUP", package = "rstanarm", verbose = FALSE, echo = FALSE, ask = FALSE) } rstanarm/demo/00Index0000644000176200001440000000077613340675562014204 0ustar liggesusersARM_Ch03 Some stan_lm demos with data on kids IQ ARM_Ch04 Some stan_lm demos with transformed data ARM_Ch05 Some stan_glm demos with logit models ARM_Ch07 More examples of posterior predictive distributions ARM_Ch08 More examples of posterior predictive distributions ARM_Ch09 Regression for causal inference ARM_Ch12_13 Models with group-specific parameters ARM_Ch14 More models with group-specific parameters CLEANUP Removes objects created by our demos SETUP Loads packages, creates objects, etc. rstanarm/demo/ARM_Ch14.R0000644000176200001440000000245313340675562014365 0ustar liggesusers# loads packages, creates ROOT, SEED, and DATA_ENV demo("SETUP", package = "rstanarm", verbose = FALSE, echo = FALSE, ask = FALSE) source(paste0(ROOT, "ARM/Ch.14/election88.data.R"), local = DATA_ENV, verbose = FALSE) election88 <- with(DATA_ENV, data.frame(y, black, v.prev.full = v_prev_full, region.full = region_full, age.edu = age_edu, age, edu, female, state)) t_prior <- student_t(df = 7) fmla1 <- y ~ black + female + (1 | state) M1 <- stan_glmer(fmla1, data = election88, family = binomial(link="logit"), prior = t_prior, prior_intercept = t_prior, seed = SEED, iter = 250, refresh = 125) # this model is a bit slow to run print(M1, digits = 2) # can also do fixef(M1), ranef(M1), VarCorr(M1), etc. fmla2 <- y ~ black + female + black:female + v.prev.full + (1 | age) + (1 | edu) + (1 | age.edu) + (1 | state) + (1 | region.full) M2 <- update(M1, formula = fmla2) print(M2) ANSWER <- tolower(readline("Do you want to remove the objects this demo created? (y/n) ")) if (ANSWER != "n") { rm(election88, t_prior, fmla1, fmla2, ANSWER) # removes stanreg and loo objects, plus what was created by STARTUP demo("CLEANUP", package = "rstanarm", verbose = FALSE, echo = FALSE, ask = FALSE) } rstanarm/tools/0000755000176200001440000000000014370470372013250 5ustar liggesusersrstanarm/tools/make_cc.R0000644000176200001440000000475414370470372014767 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. options(warn = 3L) options("useFancyQuotes" = FALSE) make_cc <- function(file) { file <- sub("\\.cc$", ".stan", file) cppcode <- rstan::stanc(file, allow_undefined = TRUE, obfuscate_model_name = FALSE)$cppcode cppcode <- sub("(class[[:space:]][A-Za-z_][A-Za-z0-9_]*[[:space:]])", paste("#include \n", "\\1"), cppcode) writeLines(c(readLines(dir("stan_files", pattern = "license.stan", recursive = TRUE, full.names = TRUE)), "#ifndef MODELS_HPP", "#define MODELS_HPP", "#define STAN__SERVICES__COMMAND_HPP", "#include ", cppcode, "#endif"), con = sub("\\.stan$", ".hpp", file)) f <- sub("\\.stan$", "", basename(file)) Rcpp::exposeClass(class = paste0("model_", f), constructors = list(c("SEXP", "SEXP", "SEXP")), fields = character(), methods = c("call_sampler", "param_names", "param_names_oi", "param_fnames_oi", "param_dims", "param_dims_oi", "update_param_oi", "param_oi_tidx", "grad_log_prob", "log_prob", "unconstrain_pars", "constrain_pars", "num_pars_unconstrained", "unconstrained_param_names", "constrained_param_names"), file = file.path("stan_files", paste0(f, ".cc")), header = paste0('#include "', f, '.hpp"'), module = paste0("stan_fit4", f, "_mod"), CppClass = "rstan::stan_fit ", Rfile = FALSE) return(invisible(NULL)) } rstanarm/cleanup.win0000755000176200001440000000043714370470372014265 0ustar liggesusers#!/bin/sh -e # Note to Windows users: This is not actually platform specific. "${R_HOME}/bin/R" --vanilla --slave -e 'roxygen2::roxygenize(load_code = roxygen2::load_source, clean = TRUE)' cp -r src/stan_files tests/testthat/stan_files cp -r inst/include tests/testthat/include exit $? rstanarm/data/0000755000176200001440000000000014406606742013023 5ustar liggesusersrstanarm/data/bball2006.rda0000644000176200001440000000213313340675562015100 0ustar liggesusersBZh91AY&SYN9~{n7s *&d4dѩOFOS&1M O)4<&i="m44hOMFzjm5 O$dfM zɣ͔M4ѡ$2ij= 16F!4e410hd4Ѡ&4b@#A b4Ѡ 4 S*yOP !dFMh  24414 dd@ =Z`TZLBd|&kHx4F<T!lm -4e gT;cM/e eaQHe3,<@٪w^p4J29řdI2(hC+@U~gΪ&;+ p<?NK8p`2vQ^z) ;Y J9ИjL&l_$3"ƉU+XP}DA=wšVq1V!r4+U(+Itޠv"=|3Vel%8!.Db Q3o[el@=I~[|dFrY>Cq^PGNV{-I=:5:$hqRje!I!ߑ_(6Gk\nС",Q^4`L{v(J<  3!4x,_|c!N z ?iTPK^IJwꨟ)pga~3 :`!=$ ;(ZH,BHHP$) $fQɰW?k/70%!eiJU)QI,)Qbk_0{zb` Pْ(P6$(Ae0Xe-SLroa_It*bK5ej:,;m݃l+Y9(\HRII c Ad Y QLS R,X?EvH  !rstanarm/data/wells.rda0000644000176200001440000004417113340675562014652 0ustar liggesusersBZh91AY&SY ǡt]LpqREd€0 hD$@TmIP +p;^fz#UUmwjyu+_{ֻu\[Aw0H$;0{`H=G]vx`ɦade6` iMF4h MM MM6ښ'SjzSښB OȚOQ#C@3Q P 4L@SDLz zb@44m@=CM2ɩ2z ѧPhSƨ@jFB %ڀ4 =C@d@h44d 4hC@z@4I&$?҈z644@@h4@@(@MɠLCIa3F)=41ISeLSQFy12d)馉 7I ڞy5Q,c~43 ~ UzY_;[{v?Ѐ2q&`̠ץVV "z$ :|iɆWW53L<+ZַTROFA-ل0]`<> *^Hr 9!Ŏ! COI[X"(er7XaP[Bf5"iE܇y%!93X͖Mh[kXRA K7L&'!;vP2Ghk=-hr:Y5'OkW^  0b(vOꥼƌ҆ BY+XfaíW, #,58M4!Jcxun-+q bU(jGJldȨ ,R(Y5n2" ,hQ*eZ+"0R 8 !Rs)YR-eP4' 1jVy}́1RZSk*sRIa d@UIgyKClQ;,t [X{:gOd@GԾˬXȗn!Sqn'ŷV3L䤈%Vyʄ0IP4:J%s Gu2|bA@wLCUYb*J-+&0*H,FEOHaXȅb JdþyY^>YO9j(;LY&[+J/(CVk Im -Ww̜6z!iYҦ8T򶊂#UVva{:=sT 19IRJ-jV$c2+1 V(pPC@gfqĻ!S`AKDNYos^\y!kѵS3v t(_5lj,\K$_ Ɗ1,9 k/*=6*l!cTAI` H)@Q܁2y]+Yn٥!zT>:)/bdvQ)f)h22 (vX <5XiםXsqݡ85ڞCUJ#m#y/rᛴ\6M0eZNJ M$F*aԭhpnɷnw Xz>ﹸy. e  &iHcMT&J| ~}+ <C4bHaki? MA(TPP@]SXa B%zܷ"X"o><[ rSs/fEM W~,fKd-2OO쮝Ń'g},@Ġe.YV{1;4Tz ]cfX7VtI=ۅ 4Qr EXa{j9nK=[nq}=ٕh 1<,~,Wy-2g6}SBNwi Cʬ#ij>/KS7F "_v' rtP@Ƿ"+6]uQn%ާΆ鍯mk BI^wJ6.[1ampQ$qb/ih>N V͚ 0eS e5SU(0jg:hPPjQ#_e9ye賵r]] BQ$riY6p@;ޔ+˭ҷw %g]\1B3A_:[M@V]7Y02`cdH C/Xh̵ 3JĖtAYUM#Jw_|ח]M-0DcyY@5S(z;K0+JAHߐ{h&f'p\x8LjnCs'/H)}cD{U${y/3ն|*/mZϿMBf|@8b n\D6=*>+ƬVb;3j=kÛֵӺt4u7@'Fc|IIqwwQI/e0-ϭ|A8>]:pdUpVl>/LX죙;;LS3r4RGEYr4"TIV'9+>䭃>R =9fZ!bfC3?mJer7pCY>~/8'G/D6o6ʵ0l7#j2ZE}k(zM , %P;6Ih$CL`3FƏY qnkm62G0*,9{sʬ'Y~Q*JO)ɛtbROQB t'SUʊrU)j ~gvO4d"?uE?k0p̿P%ʜbL8tJ)ׅib4/bQ)U#ηIKa9Wo7CMÀK14=?Wطsn?8G4qg`.& n:5iѼ=m%;Kb-@(nW9~?_%4ʒl˭ )}Tk/~V1!H/:Tjsc mBY,x JǓ+&J:)9QeIݓEWؙAxkDA1 [Q% J[WF˜QJnцO%c5Eb=63YK,BVRtXbZk-e[NI\r3T4/07ƋMޣ*$PV e] A#dUDeҁlBt7tWkL{KdSM%Ytg1&qg8We(e*ȒQbHE'1(uQr,aҗ5FIXx!< HP6 2vr6ZFT8AJy(IHBFRNUnDKFF,M|1+$sަ;\w8A#hPEIJ] 9-c=WH-'0q!؍1dr,Ewuu$ɽFY(FS5jΗc"-FY` I9h1UPY' 25ݏ2qծKV\ AQQbT $֪l=anI =}ȦMM EvNʦ^;6SPtbv+MLk` aiZdPgҌ'Y yd_T(+2M @cK:\N6'uNۥO c!s%%qd#ݗ]wHCT&rIz!VEDD-Bʹ &s+//eo͓%!p:D/CD\YC"fcZ 0HfylOi@TЭ8誎-`mVDE)JUd(]LԐᲜ挵CLzS7>hQGBﵔ꠱(@% 0$ o#bM-hwFfsALQ5]y5XC3=MuNLGBSM7cXX)_Wbd(3S[oxم|qz1X r3dk9P= e(Rǯ3keNv֞bX1sN@#he\tHe+W6A0FYLI}E)M)***ikrhYB(aFZ:ۣuֵpŷ4jkNv]ɉTRI,|`LMH P@Ijj~M+4aNѺ%QCC(F2~p[3y&PƋʌZ(VdXNīemdZii`\]/Gq懶9!$$,m z߈Gk髨b i-ZpC(*.%$A8 FB,KT/xݗCy1QbYMfɒdq_> H#))LI 1m8bZ-L1St'lڊE%7^x B\,rb`shn:]Y 4||R8-<'7,]W#LH7bcoΞ"e2>o1Y% ܪ[‘ڇJkYMB[,D%0U(p =47vl%N):+@6c4eI6Yn;/c[Fq*Cgd9lrD-!v gUc0ESyK̦P )e(6ESb4zMFFom1̡U$I XdCJttL.mTdQeAVP["Aa66TΊU:k Ti9SlĜpQuZN,pz=D{ٳԄ -w%n< ϢH,lµfJ #?`t6rŝXBqcWV C~@]c &si{+kaZsC{t qc=p_ }H *awxpRoiGlbD~lXNm+]a]iv9W4v_[sd}1+-9o>D_?MR7u#x~=NvD.?;ߠi^<_vW?躿t߇]=E~=2qFNݿ;9tOéJ޷{yon}?m m{kLa _תy%0 rGG-IѮTe٥sX¦L J+ 8i`MW+AÿwYucc|zxX-]mv,WHvUVvߦa.k F̶}(Nga= 9m9s_fw?KS׬VvRЌJgYhBh/ËND}E>S">''˷)W5nA$'OZ.9_քcvoͳ⟎\:MTgγYcrs|(L|(Wn"vbjfHӧnn:L/?Gk=G.QM>eǥ r+z(Vu5vW.-xƩaMC}ҹnDb nbop?C$ g/֒o 1 o\b GGdO0Y ҝ}n}J\O4#L6F&zZ5 =_b#u(Ru(3Zx1=6U&BZR3xCΚ{\ڙ -OMK^2;+x(Q{g>FfZgC.vQSl=L/*>A<01a=JgW.v.=R) e’J]f}ҟ^o8Zd͹ؕ[h5A|޲NQw4, q?r.:n)4y uゖqc_t8oӕPep;c$MJӝtΪCGƜ;Cz76wrzY._$Xz >5]^M: EJ ~۬4[Q9&Gg/F/hځ L"ڹQ, `0|)`OOkauɶa@&,I29[GE9JV$f Mm ܍{Dʮ<(e$r΋9)anm}j1:62Aͣ`f Q$/lXblApwb}2cR1.D!iH&hL3iee+7H],F1g:F>>hcEWG,J&p*\% %2 o tpV@^CI~HlD'6efT 7ws.' AN@b]M d,Gįc1XɞiM>x`9"iSД#|M~z< o;҈[S+ jH9;3&oIq&~>}]u={B 7 ^cRBbuXcC۞PCv;nYGqp[NZi!kKz,X)f5yš7QAJiFژ^`/P_?ŢqN PϡP{r@@*I1RѦoߵR#I 5@zPj~ń|;9 Fy;U\y ?j)rK< (TPK_W/bD G7&k:Xf /2C*k8OV.ܒ C|"@eh9Fvwt,sv'Ϣ<)x@RE9D2z g;PkRPi&p>^_5Vr1y92 -㵣30qu3XQ" (br?g 7 9~.$g@9lT \>{m폕^;sndB"p։H |#X:L@6!CԿ5*RjhMzjCyA1hR ASS9O*}L?խk[VWr>>e<UXvH *j0 ;_Lʀ*w|ݧn$]~9/fS qLtQ|B6QCkQ d3qo2de cSEGWa>KymRU~~Z~SPҹ#ylm_g"xJIьeSE';@N3QMS`\Kb- ׼.a%,@"{)nj q Uyxwy ]bS.2!)QLuۺTmsnأ}ceSn,s\K< i0UHQ4J/ nAQ{etھXy.3nBU:8<8zQ[:!ktMttcQW0gA's%`8Oط!"Mض\{gfˆNmO;A [B&d()zkc4wf}o<eg# [:5kZDXMGE} ?;MtC_Uk%7=ZfkBd=kJNubOA`!U3|3=琄B[-$$K^'fQYM QʘarZ_@+Ӕ2!0m}.| d`b! bBJyim}it4 Evm-g~Ϡ[t=S4HTVq^]4ʅjf55|-lRX>JDZ. Ra_^>];N%|,F{Wx'"UmNEj.q\ߊOitɖd[TIh 2!akSubrեq4Q _WV1Qmm%EN>;T<Z]ȟwОrNnt"ӷ)jruSnsE\vOᛁd@:߈ÑVJ3ɋ˷.s?,8ja`xq.NwVˆ0tӽ-o6,LR$"/Wc =x.-=[3w _űþ^𸟤jԥ Rmm kˆROf &_12XV{e iTsf=Y|R8WH8zHcߢۛ.G}i|uLWg)clmUckae*/ kOp]sgֈJi` on=wQMc.W?WGZTn{_ _u\*DL3gˊbτx@!Td` ~nЂ!ɣKJpܼb1:|6ftp;kDwQ­u*Y7p^uT2MxxaUuvir9vdͪTy9 ؜ $${ O##ĊB-z˟вd׭./KA%^>Km! pm0g1r#2VG0(ADS+\YMM#dcb/R;55FƓRAt 7)!Sh.4a9l$8^l@?U'cփ!+xf/Gs(}Tᒅ넚֍1F"d~ooF+55+B=ݴ/8cYao~xt{lTTӞlL`,(#o&o1BD.edžV9M)1 V}VPz8 ըJLd]r ls$J @0jƢrsDff`#pp2Y=5雼eIYꩃab4"GM? +7^niDY*qgڳi LXuO 0!㸻d;kmЭ=T2K\jióO`p|+vtJ@$;NklNR gm#4gaױRW9IvhBaڅ7H8{D-3c -hAmh"_)?W|GWmMDs|cZ`#l,jRƟ̱v,v{^iSJ|&b6ɀYTwM_k ?-k[Gy/\)8^ _ @-XYO1!?=w}?u,,+v']&%Y귍F݆pn}iuwMWY{lraهѹpⷹSxx=!>/@icplqy5#mJ> 3E}R 0OmWV E73K%`ői@ 2h %%L~Vќ?Y1>kv-o]JRRmkkZ}bu>GWK 6yZ/7v[n#1kҎUI{Kzs媡>(?3 Մ͜ܬPXf#IVYBs46%'4u k/NJ\}!RNigjh0@|ƸJ>oC)N(Hmg58ʣ,L&P1k5b}g+j\P,]\eZo+H_\\ֈ^2-vgTv\I#F-(P AT-WN{qt5a\cȴܮ&?~ 3HZ23 }znFtz,2KsHP| /~!zݯ@28!$|E*,!jQ~,6;-Jm,5|/Հ|BEb$J/ZH^dj}q#}vqcȅqV;'xmǙ骨b(qxM~Fysz shoa`E}RQjY<|,r Ͻ<h@fᴌgkŵ{{:C5⛥<kلZyY?U ~RxwXgxϐi[48Heoj>CEUPoX Hq-e#cfK:'P;$r4ҕ7;X#(TKйt!-Cf(68 ٝ>ss[u^EIۮv fNPIz|R5Gq}+\) 5"Ueָ6bfuLNR,Xχm]Z4sN;9ިY۠k^HC(1Tcq24VULV!n.4o]G!ɳp>\{0\MbC}yq}{W뀶5{$#O$W8 WyjoՑ[-wƮ> #lsl sykzL--:1: 7;4ZX{?"kHtңe͈A&,j5~A`f'5 wb("x6:)Q=$D%1Q"9h]A.]3vѡCa.LΛ_T^ 각CT=9SѧGex=?4ɭG_'_,JoEY^ra!*c^hg9DU?fkuaa2 S@aةu|9a,D uGLDYӖ6ˀ"*!k`(gs}Q{4#ڲicU+>g2Ohv׊-]wi. [=U3xT캋68Լ"G@;Tߢmnm b <-S6o*_cz5yd iT\0"81hy(ګ읢i܏UmfViCoSuuPXw\ X3}PwۚL5aUb,jP'v,6Ӣn8vW9c8nM9(&3w߂M@80}rgr7w]00Ur"kEQv:N +ġNG4ZmEkuPGwkMi>Pmw11< H! %C$@ڴ);A@u+Oyr8|HT򥐷-ogAIso <\M`:Ub/d5vA m!LCzw\ۇ[hxxIdjhD}*)mP;X sW'W܂ 94o,-mK,>Ҋ&CaQByLY#_SC.oYAtlߡBpV];E@07BAO0@ O>^1oKm.F;D;nq7W1l  4>?-Ap)U MTSeEy eTx~m^{gcRz s v5&zJs<:KTs\'+TyxA?-e+~)|_ߡ{蔯Ss^qwvM6Ɗ;텈7].뵘*Xik26)^'oz~2w-׷K7ya>.LTW5< ef҈o.-_0S-Yд65 7I痛is͐M(*"E!* уJ[mDmjRNcKj^ZEQQ]ɸ1ymZ,mZX;bTPEV XUTAB4)QZ:1`]SEڣhmm-bPm-(ЫZՊDmiKV,[m@G˦2,fZQ jֲ-* ((ʅF\NMHGU /*-h 21(6!|)WVPcYQmuJA`AQX"1EG‡N﯆׾  ^TX}7%J/=5lw qUZJm Thhhh4ɠ4idd@24 i@ɑ=FM=OQTUBz&Й h &&4&M4h)OHd14hlS4i124M h4h4DL'iii=OIh􌞠hh@Ci4=&h h@(jG=H=m=Oқe2zj1ih=6&= 4i=@4d4izz SҦzM=2?S)i=#К=OQ2~ 4ئS2 =CCЃ'4z=O&Fji#bbzHA =CU& 4PiSSڍG66ѴjbFHO&I42bzOSɦmFjcFMOAd'2hhMi670]f7>N4{N" fl3r2hI3M},GFm2S,Ah@ $ZZM"LvTgauf`mdө&UNIF0f&eP) 5d$-Jq K'q$ )ܤ`I2w@;&dNŁ.)I-+;̹h2v;'I IȬ'{I;'&BM$ m;x 6 ɳKʬ2KT5 6mY9bd9& B#נAFV-Ey|wg~FA2 i Goju]uvSV'{ﶎpk-xs{oӵluw}olu}{m{mw^7 ~c~lgiX\/R•RILLtI$I$I%I$I$I%I$I$I$I$ggiiZWUUUUUUUUUUUUV ŀUUUUUUUUUUUUUUUUUUUUUFªUUUU*j=uUp*hiJUDVr f2m!/uI!qq &+Aj:"ĉ"vFpZ T#!!;ZAG,VXE HNDPN!: P $"l]t,|`H! "B թ@T3Bcǎ`mJS>*T fZn!ɽ> TI$IR 6VXQqLo$%J )S`5Re(xʨatũYt}R9ݩA hɭCtPceG)h)& +f:?Zavh}N יִ":O7@:%5CP_s eP7:Γ>uܴ8ӱ2 рiMbcy $rtxBA)K3@B(p!C(]A!GdO#R8(#r! zJibÉ܍EFt'K,Sw=g_t | iIqYx`Ig1BJ7:eAG"-UUP!$c@52|; <RM cPy4+JSLPvH[x9EL&$Z3RM7T5 JRN,'JTUTy^C,?^j.<,JkVY 2z;z "˜$ ɧK`0-fwA˙QNpGU|%w! C]n@\Dl6`!\XQ>iҖ R IBՉPt!9!|d3Qhp US@}W!`:bWBNbEvkyMV;j vߢa3DӛX.jBt62M-/?kNǝb4yMFPo9=> edr<}\;銣|--xGYRjmYbTʑK1dA'(ΎeTc2S4/|M74JPT\_}E,?*.b2\y B"Q[X2QcäJ*Fͼ7 ޷VABHxAPtz đťj}Z+bAC")i B1TXJMIoD қVWM2⹕ǸR|*-Zk\TS/T!`]=5ϵgdXf,$iJ]O}~X-n].$xRdPF>~L8W3q1ӪccTݝisQ95aJ}Hq)])4447˞$\Y*V4MqUR 212[~Vgud"f7ѲxεL BUSy9lr;'6]fߜyvhb8kqV `6&-l@Rz DD"*H4ifblOh Heq,`fŤwhZV4Sa(=rȈx!@ 804k\X6m ^4`d!9g,[4k'IROi+HGSϷ'KJFr '8xx`urI  VJVJY)d6ي;NN_[y v BjT40fɡ(@lv͒!iv'-)Tg}xh2qi1th#pM)N\ ,pq2D4$R@W`ZdUi RdpX ͝&`"XliqE3K@Se(NLՆp:t50b)_JҀjC܀*\=0J 8۝a q0@Ί9e]Kt 2eb0(a}XJ.*3n j a ,ox2ʺ+Åk&6,tN|vmnXppu) `auݳ>zoR9j!Ba<'y'Yf8\I?ţ1l8~2f1 Mӝ ֦mF]~<0SFԕ_qE{&,tS k@[:hj}[.[,;r""y,չ[n\_{#3feXh B` t$G"@,Zk (%h_Zd<| 8{NZ ?R^R m~`\tٳfIC]? =A=>I5x^ʪdCn٥ɣ]uׁ2lӟ;Õ]w = Zi`4 XՆXi@HxYE$aC45 Xp#bŋPٍ9nI6Muj(7bI֥QGN%b/!Hvyע1||]K$KԦ H}B믊9.eU]C9;$ ! @t;@BL֎H61&qcpq#HBًCE_#O1f)y4ߪ\cz=/!ϩ?ȅ6l?+˳'V1dl'HB1A.:gUr4 >}˾喎`Iѫ}<$$R22s`M4N.5 ;}Pva <!Y2k]Ex "hW9UhM,8=:KJ B`ɦ PcnvyqZ8٧ƒyBYep[BGx$8ߕnao&˪B0oPϥIF-PÅhBthX7i ﷣ ն,~vXWuBV .r@5.-~d9[(ɇ4,&2x&9z'R_5&pRS i׈9#>J&9i#(w.1*^Jwco':1# ?\SM=w=R]!MjթNM0<584;{=<\!zQV׾>/jmv^@G˝>ƃڈ CD;x"!Ä pz@f>cO~.>@2rB 4sj~r^-ߨP@9QE=++U>rvJP=. I>*AgцLzMdY;E C BӠ6WUdØ ^:[AȏQYiR[+1rVRw.(aVSvio5u{[!:zӞxz ,B{"/d/{$xˎũ.4 MTaHFH#E-{& yCq[w ,.$!] jp䋌s &&_`v8<tI:\LF$X8k*9 ߇O~Ss韜P1xpfr.krʄ@:~dkykǢ11H̯wMzz72B،?q>Uml@vh~ݟ۳:wH~oqsG堌-Yˁ[k9ꬢQWeM'+Lť:`5UXgZk*9]wZF ]\kl`w 韤W(G"$IDr$:H_ߩJd҈4#-1B>Ed c 1!iMLjrstanarm/data/mortality.rda0000644000176200001440000000027713340675562015547 0ustar liggesusersBZh91AY&SY |wXB@/g@ #SjmL =A5%FM0FL/0 ZNPꚤ:tV&s ^.ܕ$yIń2@J@@B#62dmHK!/"cX&dfX`rE8P rstanarm/data/tumors.rda0000644000176200001440000000040013340675562015040 0ustar liggesusersBZh91AY&SYڡS@/@ hy'ښ4l4m@Q6z#@4)FhF"5 ,`*T%"t&%\Q΢LA)kfCdH,!*(3Ha2iJʶ9$_`ȅ"dB{ ,c`HH %IC.hi.++\,}̟JǰP 4,{bs@0ܑN$>urstanarm/data/roaches.rda0000644000176200001440000000272013340675562015142 0ustar liggesusersBZh91AY&SYc;jѕ4 n~I㨠nځA'Qm#'zjdjh4mM4Pm G4z@&%3IM1Mz4h=C@'4hL40&##0LL#CCA 4h!i &dd4dFѓ@H4MM4m@4bhi hdz|d|XU4(~B(T¼<5/iD@G:EeFyg ̊jR;8$3Q:)&3߅c3'0ᨙknź|%#M]U\jƔKM)jL"$|mo0 ȩ9fktOccVkv w`C(@$l{oGvsb`h(W"+{{lVtq ,s|b &Ё"`kz] GPAuzK+(w`D{9o(> wg.U3rs %[ ĂIHZt,@WIX_L]D*9B@Q6R@" M rX< R؄֒Ym ҢH D)AZXXM$ȠhXv4-6p&S4!W;b* ^K#:! @# L | @ +c"/5FEed#0~䆆0FA1 T$=>bG<24ٗIKk9ϕ&a bt# t@*\(lt6ȡWtɗa3BhJ!QAZ&(@L}:ñt`ia΅ M.tliD?&vݧ2J|#8k9G,0nJ4bN5+am 80k#ŠNP* Hؓ&O\#2q9J(* (AZ DR "de^]BALrstanarm/data/bball1970.rda0000644000176200001440000000071713340675562015117 0ustar liggesusersBZh91AY&SYabp=z?߱1@H@zDl5Mɩ Fz14@L*ȇi4h$Q@dh 4Vs՝!5T)Ԃ”`X/mzDϳդ-4ŀ ˛.TDf\pX@X>gjsQT!|9ȀYMa_|dU"\F=KPDАd Z`u1Kb #SKf2j%}V{`A^WgK1V3/ɰf2h|N!KfDBy" `' 09]+ ,)F(Ţɤ-& XphX" & DpTyP"sFUUMn"(HnNrstanarm/data/kidiq.rda0000644000176200001440000001030313340675562014613 0ustar liggesusersBZh91AY&SYo ^xC4iDq :e@ H& 2dS~=0axDѦИj~=L100'i 蘦MjfMa=#H5)M)5O==F5'Oe0MS̊{IMMbe y M8 BR&VDTx*n Ćf{"b"Q &(EyE"D& Z gp=a!058@YH TDƆ`"9{,d 1"7_D ,۲4fΆX%k4sk2ٿTcM/@[atIYIE|8 P=4PƦˤ`V%yb( !ݱfmhҩf3bJrgZ ә%D))˓2*Tѩ*3$YIU:@F #J u\C_$x5>xEb8z*[z2W#i֪t*pl4 xǭ$s̜凪rnM=oDBJLnSבodX ;@-r61jb mŃ U/4'A 8tۃ*t[r`(W!>vg""/ONW#"+7R`g|X ;ƃ9 vUX "PǢtyU"R1yMgr!5hM1u G8!HVN3ذ*w<I.H0|j$̿0$ Pnå؍Yl8͊Jl\/|Tn!B-waٯ;ɜxEMzY YcM~`ۖH=1d$<母1#4H$[.p$J87(6AڕP<{F2dHG[xAlusu0[D! R>hS; )F3Mߋ9̤NC/),;2)+~p:I=|vZȒ%oao@tclό _D ٝ t> da9 |;{K"omZxUԔ~N&>*  #OE.3|A~Fqt#p줏ϻj_Z(ׇ♾5-/<ܲʩ/EA37$2#U`}N{}k C Uu U9gN{QLdEC.ukZm.{f7~\'{E%D:Uz"cF "U}Ho9RjYd~O6#Iʿ> 2S8@KBcQJU2f ͘:q._Z5}Qxyisť|֍n|lv>ڶQư{Tˆ7y}hrOA'4sknVO!yK*/;VA<(.&Sý@A)oE&a:I9(y,QExtѰl.֏Q;8,zq_ޘoSLP6WBAPEVږJ|1w@?@.o7䨇 /[9dFHIU++/m}]EC1cp_) (u 01iBd }zД8l=-< ˥ }ICޮ%kaԄ9W#dй/׍O0DBFsC200beP?VK +t/{a% iXi"RKDm~﵌yl6*brC1E&E z&U ^i gIe!Iǿ&p \%0L"CZaq9*XpwuLy2, ]ԛDm,,SW}y'oKmWI%xb: ۖr"g $ H'XסDuZҷ{Ϊ5$6ǃ]E;bTT8"N2$4،gF. I55*B 81|:XOx//R0yј4P>2*Ť)&&!H&m*CMbt5tbQ)nl>Hd;%FQr& 3 3#0nQsP2S(_="NG3[ץ@7)}dd.GGbO%;UZ+ nZg=P H(ap2ˆ-zH@~ VO)4eΈNՠy "Dcm9bۛc:f\?(:)$:ntc'7k#+8@ryehbk1ߕ?,©ci#6V"T Yi)Jq|K^חva$&!*f0>/xɱ~ߩ9[:&Dɿ8wzp`?{C8]vWK4u)%s#$0%dG7-b2'n5Ub)j^gwJWDgP|} @xwP>ڱf-jWH?aטb +(|6U]b>AoG5;EV`[vqamح Wj &]M_:q܉mx/%Z7RɵJdRky%bJ(\^Y!H2evIx=T\zgp*2t ք<%^0d <}(!'+.<6P"!HY+1 i6v_2cq95o~:9cL.|,Z7cE*=#2"9:}ocDjuiftGqnyB`utSsfhop1he|QNSYM`WANNg<[8w-ba9U<ָVCfFu)Tb\%8Z+ V&3:~D!z`6DScY३`܉)ujv@U< b~ݳfpE~P3 m{GGMAA4blSXYf.ltiT 9-5ê["2l UcG̲ç7,;74-yדj껒ԢZ[lpr:n4m.oBrS`w-XECgu:C=L:)Sh'AuܓW:TpfGB q%BN|7 =QԦ%h#|E:f_]BCurstanarm/data/pbcLong.rda0000644000176200001440000001141413340675562015102 0ustar liggesusers[XT׶>S` WQѨ1XY"؂T!(jDM%Fc1&F͵ vKlc BQQ}Z3#{{}?딽^_eϜDtfdA+tZAcz- !':vjdgwJCdh_:ρU .y8-¾pxKં[);x <J ; R"/"'QFR,  /1h8TEнΔ,[O\-[v /?xe [WjlR/jFLӲQͽK-[/xEkc GPBU^XӶݡ$Ku;weYѨZ介IШP WE}-v:6aEW5 F5ުOl0(3<> ^6^?Ŏ^'ww']9;b\Zp8`^bډ5v5֭̂36|>?f>|wW )4H0mW!hYBE3'(f@h쾿_&N5bW,ʬu`;=SY{dCFxM˯+#*t k'fvv߮%rWa2>?Uۿ wA*s۶vo2>' bW?zom0.&_7 <%bЏlSvOfac܍w ] 9eX8pkqzźg>]xDOE˯-|p?i;0s3^w|;u(qzf Ʈ~Osm:ݣOϛs48 BHs[iܲbS hg+1\(fZkޠY}tNdž`14_pr@pux՞K3ճM6CoG|8Vw18 cb>*7uX$OlV3S<@3.r0ןh0|m2wgz `ً|^쯜Gbּj~p ,Μ&f+$~]\/UA^#=x-&!]eU;>aoBe٘*-[9(`)b6S~CS[0W_hxr_V߷@YqܠLyק̨̃2;~; *lˁ(?_5xoͼ`Tq]^^VK̒> 2WU(&8۹K=WX7xW3 ̲PųLUb+W7w>+fzkOϏm@a[n89C\#/+?-O=>|]V:x2tS!a#xڞ]g؇x FVkVk_ɇ?I+CƇjdN e2m7/`u/9qܱ%]|]0V.f9X3T ^v[I _i7KnNTqzUz#3֣|!x[U>=@cϪN<áޘ\V1w6\m _>7y@yeCU.Y ?ݝe͚SBy+]J5<:P658Ӂ[椄As*riMyA`BNhK/GEߕ3U'L3J]Ţ?UrgY6kW"8?X{'v4<_9qVH5qm?}`9`%c;X=p-mhE̿+K,G*zS~'>Q n7#a 8 7$'KVZWlo<*dba,:79jg;GD6ߤ 8#uX&O^>yEMor;f/0no+z? r?p. gS]j$>1A)s[W-8W]x/wLL+t3?rѦbn‚:b{RU̚=0tbϽ6(>9yJK9Kb0mkJ" 25r䞐t12%Ӫqm\wHksFX~6`7E|߮gv7?};&NQ[?Ak&wy(;Soo_y {/9q>sRCljUtI-[ܿ.w4QgO@; t]Frd`V%惺qP0XV|kq77)y1zVYi&ֻ{^\i4E?RGP~`ɟLa$on.f8Y:^WLVLyNzRܲF+çg1UuT#zYAR]ċ}S=~b=u^U;j?hECUzQ[cS*O,\,ꭄTx=55R|}eyyUᇵb@1JtMՁhXt}{80;hƧ0a:O7 >ڝvx.FztϭΚ†,< 8 uS꿩 <AfR^&iIYUӛ4< L$q4xv8|tvh5f~] fC$7ğ=kVYC}t^'cՄad0.cfHqQ|mG)/Mc1x߁iaK??a<};hq==ɪ=u'|f3cOu#꣼p]{_ыT<8vTG#\PQ}o_'s'OjOu.kAQ܌3@GyF<ڠ~<" ŕ<żR] ?:ԯשp3ő$iRE~lGy}15S_q>`uJ\%MZ'%$>&ҏ6b:#񊼓4!OmUR]( ;QB=ȃ#=K) ާ~o<:|GqQIQTyJvP>cWCuHGh?d;BCڗ)~ DyMyEy œ%꡾ē֡cuaJ^j*0"5g߉rJQ <Ldw!k=e(`͙# ec2yȰ?3mВ߄7}Đːʐpae ;c ?1g0'&d5f>a7r3 vƎe8s0\`ɐpNFc2aHA[q~ ֢Ok H5ӣ)y_hޓ9l 9f1˿# GnX4z ,G4r.Xp?$1$~ a~ R^J؉ܟbX>s zA5:Ͱd0x|kj<t4WƬfXhRevfc= mean(pbcLong$logBili)) mv3 <- stan_jm( formulaLong = list( ybern ~ year + (1 | id), albumin ~ sex + year + (year | id)), dataLong = pbcLong, formulaEvent = Surv(futimeYears, death) ~ sex + trt, dataEvent = pbcSurv, family = list(binomial, gaussian), time_var = "year", chains = 1, cores = 1, seed = 12345, iter = 1000) } } } \seealso{ \code{\link{stanreg-objects}}, \code{\link{stanmvreg-methods}}, \code{\link{print.stanmvreg}}, \code{\link{summary.stanmvreg}}, \code{\link{posterior_traj}}, \code{\link{posterior_survfit}}, \code{\link{posterior_predict}}, \code{\link{posterior_interval}}, \code{\link{pp_check}}, \code{\link{ps_check}}, \code{\link{stan_mvmer}}. } rstanarm/man/se.Rd0000644000176200001440000000070114551552005013552 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/stanreg-methods.R \name{se} \alias{se} \title{Extract standard errors} \usage{ se(object, ...) } \arguments{ \item{object}{A fitted model object.} \item{...}{Arguments to methods.} } \value{ Standard errors of model parameters. } \description{ Generic function for extracting standard errors from fitted models. } \seealso{ \code{\link{se.stanreg}} } \keyword{internal} rstanarm/man/plot.survfit.stanjm.Rd0000644000176200001440000001345114551552004017122 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/posterior_survfit.R \name{plot.survfit.stanjm} \alias{plot.survfit.stanjm} \alias{plot_stack_jm} \title{Plot the estimated subject-specific or marginal survival function} \usage{ \method{plot}{survfit.stanjm}( x, ids = NULL, limits = c("ci", "none"), xlab = NULL, ylab = NULL, facet_scales = "free", ci_geom_args = NULL, ... ) plot_stack_jm(yplot, survplot) } \arguments{ \item{x}{A data frame and object of class \code{survfit.stanjm} returned by a call to the function \code{\link{posterior_survfit}}. The object contains point estimates and uncertainty interval limits for estimated values of the survival function.} \item{ids}{An optional vector providing a subset of subject IDs for whom the predicted curves should be plotted.} \item{limits}{A quoted character string specifying the type of limits to include in the plot. Can be one of: \code{"ci"} for the Bayesian posterior uncertainty interval for the estimated survival probability (often known as a credible interval); or \code{"none"} for no interval limits.} \item{xlab, ylab}{An optional axis label passed to \code{\link[ggplot2]{labs}}.} \item{facet_scales}{A character string passed to the \code{scales} argument of \code{\link[ggplot2]{facet_wrap}} when plotting the longitudinal trajectory for more than one individual.} \item{ci_geom_args}{Optional arguments passed to \code{\link[ggplot2]{geom_ribbon}} and used to control features of the plotted interval limits. They should be supplied as a named list.} \item{...}{Optional arguments passed to \code{\link[ggplot2:geom_path]{geom_line}} and used to control features of the plotted survival function.} \item{yplot}{An object of class \code{plot.predict.stanjm}, returned by a call to the generic \code{\link[=plot.predict.stanjm]{plot}} method for objects of class \code{predict.stanjm}. If there is more than one longitudinal outcome, then a list of such objects can be provided.} \item{survplot}{An object of class \code{plot.survfit.stanjm}, returned by a call to the generic \code{\link[=plot.survfit.stanjm]{plot}} method for objects of class \code{survfit.stanjm}.} } \value{ The plot method returns a \code{ggplot} object, also of class \code{plot.survfit.stanjm}. This object can be further customised using the \pkg{ggplot2} package. It can also be passed to the function \code{plot_stack_jm}. \code{plot_stack_jm} returns an object of class \code{\link[bayesplot]{bayesplot_grid}} that includes plots of the estimated subject-specific longitudinal trajectories stacked on top of the associated subject-specific survival curve. } \description{ This generic \code{plot} method for \code{survfit.stanjm} objects will plot the estimated subject-specific or marginal survival function using the data frame returned by a call to \code{\link{posterior_survfit}}. The call to \code{posterior_survfit} should ideally have included an "extrapolation" of the survival function, obtained by setting the \code{extrapolate} argument to \code{TRUE}. The \code{plot_stack_jm} function takes arguments containing the plots of the estimated subject-specific longitudinal trajectory (or trajectories if a multivariate joint model was estimated) and the plot of the estimated subject-specific survival function and combines them into a single figure. This is most easily understood by running the \strong{Examples} below. } \examples{ if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { \donttest{ # Run example model if not already loaded if (!exists("example_jm")) example(example_jm) # Obtain subject-specific conditional survival probabilities # for all individuals in the estimation dataset. ps1 <- posterior_survfit(example_jm, extrapolate = TRUE) # We then plot the conditional survival probabilities for # a subset of individuals plot(ps1, ids = c(7,13,15)) # We can change or add attributes to the plot plot(ps1, ids = c(7,13,15), limits = "none") plot(ps1, ids = c(7,13,15), xlab = "Follow up time") plot(ps1, ids = c(7,13,15), ci_geom_args = list(fill = "red"), color = "blue", linetype = 2) plot(ps1, ids = c(7,13,15), facet_scales = "fixed") # Since the returned plot is also a ggplot object, we can # modify some of its attributes after it has been returned plot1 <- plot(ps1, ids = c(7,13,15)) plot1 + ggplot2::theme(strip.background = ggplot2::element_blank()) + ggplot2::coord_cartesian(xlim = c(0, 15)) + ggplot2::labs(title = "Some plotted survival functions") # We can also combine the plot(s) of the estimated # subject-specific survival functions, with plot(s) # of the estimated longitudinal trajectories for the # same individuals ps1 <- posterior_survfit(example_jm, ids = c(7,13,15)) pt1 <- posterior_traj(example_jm, , ids = c(7,13,15)) plot_surv <- plot(ps1) plot_traj <- plot(pt1, vline = TRUE, plot_observed = TRUE) plot_stack_jm(plot_traj, plot_surv) # Lastly, let us plot the standardised survival function # based on all individuals in our estimation dataset ps2 <- posterior_survfit(example_jm, standardise = TRUE, times = 0, control = list(epoints = 20)) plot(ps2) } } if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { \donttest{ if (!exists("example_jm")) example(example_jm) ps1 <- posterior_survfit(example_jm, ids = c(7,13,15)) pt1 <- posterior_traj(example_jm, ids = c(7,13,15), extrapolate = TRUE) plot_surv <- plot(ps1) plot_traj <- plot(pt1, vline = TRUE, plot_observed = TRUE) plot_stack_jm(plot_traj, plot_surv) } } } \seealso{ \code{\link{posterior_survfit}}, \code{\link{plot_stack_jm}}, \code{\link{posterior_traj}}, \code{\link{plot.predict.stanjm}} \code{\link{plot.predict.stanjm}}, \code{\link{plot.survfit.stanjm}}, \code{\link{posterior_predict}}, \code{\link{posterior_survfit}} } rstanarm/man/example_jm.Rd0000644000176200001440000000253314551552004015270 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/doc-example_jm.R \name{example_jm} \alias{example_jm} \title{Example joint longitudinal and time-to-event model} \format{ Calling \code{example("example_jm")} will run the model in the Examples section, below, and the resulting stanmvreg object will then be available in the global environment. The \code{chains} and \code{iter} arguments are specified to make this example be small in size. In practice, we recommend that they be left unspecified in order to use the default values or increased if there are convergence problems. The \code{cores} argument is optional and on a multicore system, the user may well want to set that equal to the number of chains being executed. } \description{ A model for use in the \pkg{rstanarm} examples related to \code{\link{stan_jm}}. } \examples{ # set.seed(123) if (.Platform$OS.type != "windows" || .Platform$r_arch !="i386") example_jm <- stan_jm(formulaLong = logBili ~ year + (1 | id), dataLong = pbcLong[1:101,], formulaEvent = survival::Surv(futimeYears, death) ~ sex + trt, dataEvent = pbcSurv[1:15,], time_var = "year", # this next line is only to keep the example small in size! chains = 1, seed = 12345, iter = 100, refresh = 0) } rstanarm/man/rstanarm-package.Rd0000644000176200001440000003011414551552004016363 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/doc-rstanarm-package.R \docType{package} \name{rstanarm-package} \alias{rstanarm-package} \alias{rstanarm} \title{Applied Regression Modeling via RStan} \description{ \if{html}{ \figure{stanlogo.png}{options: width="50" alt="https://mc-stan.org/about/logo/"} \emph{Stan Development Team} } The \pkg{rstanarm} package is an appendage to the \pkg{rstan} package that enables many of the most common applied regression models to be estimated using Markov Chain Monte Carlo, variational approximations to the posterior distribution, or optimization. The \pkg{rstanarm} package allows these models to be specified using the customary R modeling syntax (e.g., like that of \code{\link[stats]{glm}} with a \code{formula} and a \code{data.frame}). The sections below provide an overview of the modeling functions and estimation algorithms used by \pkg{rstanarm}. } \details{ The set of models supported by \pkg{rstanarm} is large (and will continue to grow), but also limited enough so that it is possible to integrate them tightly with the \code{\link{pp_check}} function for graphical posterior predictive checks with \pkg{\link[bayesplot:bayesplot-package]{bayesplot}} and the \code{\link{posterior_predict}} function to easily estimate the effect of specific manipulations of predictor variables or to predict the outcome in a training set. The objects returned by the \pkg{rstanarm} modeling functions are called \code{\link[=stanreg-objects]{stanreg}} objects. In addition to all of the typical \code{\link[=stanreg-methods]{methods}} defined for fitted model objects, stanreg objects can be passed to the \code{\link[loo]{loo}} function in the \pkg{loo} package for model comparison or to the \code{\link[shinystan]{launch_shinystan}} function in the \pkg{shinystan} package in order to visualize the posterior distribution using the ShinyStan graphical user interface. See the \pkg{rstanarm} vignettes for more details about the entire process. } \section{Prior distributions}{ See \link[=priors]{priors help page} and the vignette \href{https://mc-stan.org/rstanarm/articles/priors.html}{\emph{Prior Distributions for rstanarm Models}} for an overview of the various choices the user can make for prior distributions. The package vignettes for the modeling functions also provide examples of using many of the available priors as well as more detailed descriptions of some of the novel priors used by \pkg{rstanarm}. } \section{Modeling functions}{ The model estimating functions are described in greater detail in their individual help pages and vignettes. Here we provide a very brief overview: \describe{ \item{\code{\link{stan_lm}}, \code{stan_aov}, \code{stan_biglm}}{ Similar to \code{\link[stats]{lm}} or \code{\link[stats]{aov}} but with novel regularizing priors on the model parameters that are driven by prior beliefs about \eqn{R^2}, the proportion of variance in the outcome attributable to the predictors in a linear model. } \item{\code{\link{stan_glm}}, \code{stan_glm.nb}}{ Similar to \code{\link[stats]{glm}} but with various possible prior distributions for the coefficients and, if applicable, a prior distribution for any auxiliary parameter in a Generalized Linear Model (GLM) that is characterized by a \code{\link[stats]{family}} object (e.g. the shape parameter in Gamma models). It is also possible to estimate a negative binomial model in a similar way to the \code{\link[MASS]{glm.nb}} function in the \pkg{MASS} package. } \item{\code{\link{stan_glmer}}, \code{stan_glmer.nb}, \code{stan_lmer}}{ Similar to the \code{\link[lme4]{glmer}}, \code{\link[lme4]{glmer.nb}} and \code{\link[lme4]{lmer}} functions in the \pkg{lme4} package in that GLMs are augmented to have group-specific terms that deviate from the common coefficients according to a mean-zero multivariate normal distribution with a highly-structured but unknown covariance matrix (for which \pkg{rstanarm} introduces an innovative prior distribution). MCMC provides more appropriate estimates of uncertainty for models that consist of a mix of common and group-specific parameters. } \item{\code{\link{stan_nlmer}}}{ Similar to \code{\link[lme4]{nlmer}} in the \pkg{lme4} package for nonlinear "mixed-effects" models, but the group-specific coefficients have flexible priors on their unknown covariance matrices. } \item{\code{\link{stan_gamm4}}}{ Similar to \code{\link[gamm4]{gamm4}} in the \pkg{gamm4} package, which augments a GLM (possibly with group-specific terms) with nonlinear smooth functions of the predictors to form a Generalized Additive Mixed Model (GAMM). Rather than calling \code{\link[lme4]{glmer}} like \code{\link[gamm4]{gamm4}} does, \code{\link{stan_gamm4}} essentially calls \code{\link{stan_glmer}}, which avoids the optimization issues that often crop up with GAMMs and provides better estimates for the uncertainty of the parameter estimates. } \item{\code{\link{stan_polr}}}{ Similar to \code{\link[MASS]{polr}} in the \pkg{MASS} package in that it models an ordinal response, but the Bayesian model also implies a prior distribution on the unknown cutpoints. Can also be used to model binary outcomes, possibly while estimating an unknown exponent governing the probability of success. } \item{\code{\link{stan_betareg}}}{ Similar to \code{\link[betareg]{betareg}} in that it models an outcome that is a rate (proportion) but, rather than performing maximum likelihood estimation, full Bayesian estimation is performed by default, with customizable prior distributions for all parameters. } \item{\code{\link{stan_clogit}}}{ Similar to \code{\link[survival]{clogit}} in that it models an binary outcome where the number of successes and failures is fixed within each stratum by the research design. There are some minor syntactical differences relative to \code{\link[survival]{clogit}} that allow \code{stan_clogit} to accept group-specific terms as in \code{\link{stan_glmer}}. } \item{\code{\link{stan_mvmer}}}{ A multivariate form of \code{\link{stan_glmer}}, whereby the user can specify one or more submodels each consisting of a GLM with group-specific terms. If more than one submodel is specified (i.e. there is more than one outcome variable) then a dependence is induced by assuming that the group-specific terms for each grouping factor are correlated across submodels. } \item{\code{\link{stan_jm}}}{ Estimates shared parameter joint models for longitudinal and time-to-event (i.e. survival) data. The joint model can be univariate (i.e. one longitudinal outcome) or multivariate (i.e. more than one longitudinal outcome). A variety of parameterisations are available for linking the longitudinal and event processes (i.e. a variety of association structures). } } } \section{Estimation algorithms}{ The modeling functions in the \pkg{rstanarm} package take an \code{algorithm} argument that can be one of the following: \describe{ \item{\strong{Sampling} (\code{algorithm="sampling"})}{ Uses Markov Chain Monte Carlo (MCMC) --- in particular, Hamiltonian Monte Carlo (HMC) with a tuned but diagonal mass matrix --- to draw from the posterior distribution of the parameters. See \code{\link[rstan:stanmodel-method-sampling]{sampling}} (\pkg{rstan}) for more details. This is the slowest but most reliable of the available estimation algorithms and it is \strong{the default and recommended algorithm for statistical inference.} } \item{\strong{Mean-field} (\code{algorithm="meanfield"})}{ Uses mean-field variational inference to draw from an approximation to the posterior distribution. In particular, this algorithm finds the set of independent normal distributions in the unconstrained space that --- when transformed into the constrained space --- most closely approximate the posterior distribution. Then it draws repeatedly from these independent normal distributions and transforms them into the constrained space. The entire process is much faster than HMC and yields independent draws but \strong{is not recommended for final statistical inference}. It can be useful to narrow the set of candidate models in large problems, particularly when specifying \code{QR=TRUE} in \code{\link{stan_glm}}, \code{\link{stan_glmer}}, and \code{\link{stan_gamm4}}, but is \strong{only an approximation to the posterior distribution}. } \item{\strong{Full-rank} (\code{algorithm="fullrank"})}{ Uses full-rank variational inference to draw from an approximation to the posterior distribution by finding the multivariate normal distribution in the unconstrained space that --- when transformed into the constrained space --- most closely approximates the posterior distribution. Then it draws repeatedly from this multivariate normal distribution and transforms the draws into the constrained space. This process is slower than meanfield variational inference but is faster than HMC. Although still an approximation to the posterior distribution and thus \strong{not recommended for final statistical inference}, the approximation is more realistic than that of mean-field variational inference because the parameters are not assumed to be independent in the unconstrained space. Nevertheless, fullrank variational inference is a more difficult optimization problem and the algorithm is more prone to non-convergence or convergence to a local optimum. } \item{\strong{Optimizing} (\code{algorithm="optimizing"})}{ Finds the posterior mode using a C++ implementation of the LBGFS algorithm. See \code{\link[rstan:stanmodel-method-optimizing]{optimizing}} for more details. If there is no prior information, then this is equivalent to maximum likelihood, in which case there is no great reason to use the functions in the \pkg{rstanarm} package over the emulated functions in other packages. However, if priors are specified, then the estimates are penalized maximum likelihood estimates, which may have some redeeming value. Currently, optimization is only supported for \code{\link{stan_glm}}. } } } \references{ Bates, D., Maechler, M., Bolker, B., and Walker, S. (2015). Fitting linear mixed-Effects models using lme4. \emph{Journal of Statistical Software}. 67(1), 1--48. Gelman, A., Carlin, J. B., Stern, H. S., Dunson, D. B., Vehtari, A., and Rubin, D. B. (2013). \emph{Bayesian Data Analysis.} Chapman & Hall/CRC Press, London, third edition. \url{https://stat.columbia.edu/~gelman/book/} Gelman, A. and Hill, J. (2007). \emph{Data Analysis Using Regression and Multilevel/Hierarchical Models.} Cambridge University Press, Cambridge, UK. \url{https://stat.columbia.edu/~gelman/arm/} Stan Development Team. \emph{Stan Modeling Language Users Guide and Reference Manual.} \url{https://mc-stan.org/users/documentation/}. Vehtari, A., Gelman, A., and Gabry, J. (2017). Practical Bayesian model evaluation using leave-one-out cross-validation and WAIC. \emph{Statistics and Computing}. 27(5), 1413--1432. doi:10.1007/s11222-016-9696-4. arXiv preprint: \url{https://arxiv.org/abs/1507.04544} Yao, Y., Vehtari, A., Simpson, D., and Gelman, A. (2018) Using stacking to average Bayesian predictive distributions. \emph{Bayesian Analysis}, advance publication, \doi{10.1214/17-BA1091}. Gabry, J. , Simpson, D. , Vehtari, A. , Betancourt, M. and Gelman, A. (2019), Visualization in Bayesian workflow. \emph{J. R. Stat. Soc. A}, 182: 389-402. doi:10.1111/rssa.12378, \href{https://arxiv.org/abs/1709.01449}{arXiv preprint}, \href{https://github.com/jgabry/bayes-vis-paper}{code on GitHub}) Muth, C., Oravecz, Z., and Gabry, J. (2018) User-friendly Bayesian regression modeling: A tutorial with rstanarm and shinystan. \emph{The Quantitative Methods for Psychology}. 14(2), 99--119. \url{https://www.tqmp.org/RegularArticles/vol14-2/p099/p099.pdf} } \seealso{ \itemize{ \item \url{https://mc-stan.org/} for more information on the Stan C++ package used by \pkg{rstanarm} for model fitting. \item \url{https://github.com/stan-dev/rstanarm/issues/} to submit a bug report or feature request. \item \url{https://discourse.mc-stan.org} to ask a question about \pkg{rstanarm} on the Stan-users forum. } } rstanarm/man/print.stanreg.Rd0000644000176200001440000000533214551552004015745 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/print-and-summary.R \name{print.stanreg} \alias{print.stanreg} \alias{print.stanmvreg} \title{Print method for stanreg objects} \usage{ \method{print}{stanreg}(x, digits = 1, detail = TRUE, ...) \method{print}{stanmvreg}(x, digits = 3, ...) } \arguments{ \item{x}{A fitted model object returned by one of the \pkg{rstanarm} modeling functions. See \code{\link{stanreg-objects}}.} \item{digits}{Number of digits to use for formatting numbers.} \item{detail}{Logical, defaulting to \code{TRUE}. If \code{FALSE} a more minimal summary is printed consisting only of the parameter estimates.} \item{...}{Ignored.} } \value{ Returns \code{x}, invisibly. } \description{ The \code{print} method for stanreg objects displays a compact summary of the fitted model. See the \strong{Details} section below for descriptions of the different components of the printed output. For additional summary statistics and diagnostics use the \code{\link[=summary.stanreg]{summary}} method. } \details{ \subsection{Point estimates}{ Regardless of the estimation algorithm, point estimates are medians computed from simulations. For models fit using MCMC (\code{"sampling"}) the posterior sample is used. For optimization (\code{"optimizing"}), the simulations are generated from the asymptotic Gaussian sampling distribution of the parameters. For the \code{"meanfield"} and \code{"fullrank"} variational approximations, draws from the variational approximation to the posterior are used. In all cases, the point estimates reported are the same as the values returned by \code{\link[=coef.stanreg]{coef}}. } \subsection{Uncertainty estimates (MAD_SD)}{ The standard deviations reported (labeled \code{MAD_SD} in the print output) are computed from the same set of draws described above and are proportional to the median absolute deviation (\code{\link[stats]{mad}}) from the median. Compared to the raw posterior standard deviation, the MAD_SD will be more robust for long-tailed distributions. These are the same as the values returned by \code{\link[=se.stanreg]{se}}. } \subsection{Additional output}{ \itemize{ \item For GLMs with group-specific terms (see \code{\link{stan_glmer}}) the printed output also shows point estimates of the standard deviations of the group effects (and correlations if there are both intercept and slopes that vary by group). \item For analysis of variance models (see \code{\link{stan_aov}}) models, an ANOVA-like table is also displayed. \item For joint longitudinal and time-to-event (see \code{\link{stan_jm}}) models the estimates are presented separately for each of the distinct submodels. } } } \seealso{ \code{\link{summary.stanreg}}, \code{\link{stanreg-methods}} } rstanarm/man/loo.stanreg.Rd0000644000176200001440000002704114551552004015403 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/loo.R \name{loo.stanreg} \alias{loo.stanreg} \alias{loo} \alias{waic.stanreg} \alias{waic} \alias{loo_compare.stanreg} \alias{loo_compare} \alias{loo_compare.stanreg_list} \alias{loo_model_weights.stanreg_list} \alias{loo_model_weights} \alias{compare_models} \title{Information criteria and cross-validation} \usage{ \method{loo}{stanreg}( x, ..., cores = getOption("mc.cores", 1), save_psis = FALSE, k_threshold = NULL ) \method{waic}{stanreg}(x, ...) \method{loo_compare}{stanreg}(x, ..., criterion = c("loo", "kfold", "waic"), detail = FALSE) \method{loo_compare}{stanreg_list}(x, ..., criterion = c("loo", "kfold", "waic"), detail = FALSE) \method{loo_model_weights}{stanreg_list}(x, ..., cores = getOption("mc.cores", 1), k_threshold = NULL) compare_models(..., loos = list(), detail = FALSE) } \arguments{ \item{x}{For \code{loo} and \code{waic}, a fitted model object returned by one of the rstanarm modeling functions. See \link{stanreg-objects}. For the \code{loo_model_weights} method, \code{x} should be a "stanreg_list" object, which is a list of fitted model objects created by \code{\link{stanreg_list}}. \code{loo_compare} also allows \code{x} to be a single stanreg object, with the remaining objects passed via \code{...}, or a single \code{stanreg_list} object.} \item{...}{For \code{loo_compare.stanreg}, \code{...} can contain objects returned by the \code{loo}, \code{\link[=kfold.stanreg]{kfold}}, or \code{waic} method (see the \strong{Examples} section, below). For \code{loo_model_weights}, \code{...} should contain arguments (e.g. \code{method}) to pass to the default \code{\link[loo]{loo_model_weights}} method from the \pkg{loo} package.} \item{cores, save_psis}{Passed to \code{\link[loo]{loo}}.} \item{k_threshold}{Threshold for flagging estimates of the Pareto shape parameters \eqn{k} estimated by \code{loo}. See the \emph{How to proceed when \code{loo} gives warnings} section, below, for details.} \item{criterion}{For \code{loo_compare.stanreg} and \code{loo_compare.stanreg_list}, should the comparison be based on LOO-CV (\code{criterion="loo"}), K-fold-CV (\code{criterion="kfold"}), or WAIC (\code{criterion="waic"}). The default is LOO-CV. See the \strong{Comparing models} and \strong{Examples} sections below.} \item{detail}{For \code{loo_compare.stanreg} and \code{loo_compare.stanreg_list}, if \code{TRUE} then extra information about each model (currently just the model formulas) will be printed with the output.} \item{loos}{a list of objects produced by the \code{\link{loo}} function} } \value{ The structure of the objects returned by \code{loo} and \code{waic} methods are documented in detail in the \strong{Value} section in \code{\link[loo]{loo}} and \code{\link[loo]{waic}} (from the \pkg{loo} package). \code{loo_compare} returns a matrix with class 'compare.loo'. See the \strong{Comparing models} section below for more details. } \description{ For models fit using MCMC, compute approximate leave-one-out cross-validation (LOO, LOOIC) or, less preferably, the Widely Applicable Information Criterion (WAIC) using the \pkg{\link[=loo-package]{loo}} package. (For \eqn{K}-fold cross-validation see \code{\link{kfold.stanreg}}.) Functions for model comparison, and model weighting/averaging are also provided. \strong{Note}: these functions are not guaranteed to work properly unless the \code{data} argument was specified when the model was fit. Also, as of \pkg{loo} version \code{2.0.0} the default number of cores is now only 1, but we recommend using as many (or close to as many) cores as possible by setting the \code{cores} argument or using \code{options(mc.cores = VALUE)} to set it for an entire session. } \section{Approximate LOO CV}{ The \code{loo} method for stanreg objects provides an interface to the \pkg{\link[=loo-package]{loo}} package for approximate leave-one-out cross-validation (LOO). The LOO Information Criterion (LOOIC) has the same purpose as the Akaike Information Criterion (AIC) that is used by frequentists. Both are intended to estimate the expected log predictive density (ELPD) for a new dataset. However, the AIC ignores priors and assumes that the posterior distribution is multivariate normal, whereas the functions from the \pkg{loo} package do not make this distributional assumption and integrate over uncertainty in the parameters. This only assumes that any one observation can be omitted without having a major effect on the posterior distribution, which can be judged using the diagnostic plot provided by the \code{\link[loo:pareto-k-diagnostic]{plot.loo}} method and the warnings provided by the \code{\link[loo]{print.loo}} method (see the \emph{How to Use the rstanarm Package} vignette for an example of this process). \subsection{How to proceed when \code{loo} gives warnings (k_threshold)}{ The \code{k_threshold} argument to the \code{loo} method for \pkg{rstanarm} models is provided as a possible remedy when the diagnostics reveal problems stemming from the posterior's sensitivity to particular observations. Warnings about Pareto \eqn{k} estimates indicate observations for which the approximation to LOO is problematic (this is described in detail in Vehtari, Gelman, and Gabry (2017) and the \pkg{\link[=loo-package]{loo}} package documentation). The \code{k_threshold} argument can be used to set the \eqn{k} value above which an observation is flagged. If \code{k_threshold} is not \code{NULL} and there are \eqn{J} observations with \eqn{k} estimates above \code{k_threshold} then when \code{loo} is called it will refit the original model \eqn{J} times, each time leaving out one of the \eqn{J} problematic observations. The pointwise contributions of these observations to the total ELPD are then computed directly and substituted for the previous estimates from these \eqn{J} observations that are stored in the object created by \code{loo}. Another option to consider is K-fold cross-validation, which is documented on a separate page (see \code{\link[=kfold.stanreg]{kfold}}). \strong{Note}: in the warning messages issued by \code{loo} about large Pareto \eqn{k} estimates we recommend setting \code{k_threshold} to at least \eqn{0.7}. There is a theoretical reason, explained in Vehtari, Gelman, and Gabry (2017), for setting the threshold to the stricter value of \eqn{0.5}, but in practice they find that errors in the LOO approximation start to increase non-negligibly when \eqn{k > 0.7}. } } \section{Comparing models}{ "loo" (or "waic" or "kfold") objects can be passed to the \code{\link[loo]{loo_compare}} function in the \pkg{loo} package to perform model comparison. \pkg{rstanarm} also provides a \code{loo_compare.stanreg} method that can be used if the "loo" (or "waic" or "kfold") object has been added to the fitted model object (see the \strong{Examples} section below for how to do this). This second method allows \pkg{rstanarm} to perform some extra checks that can't be done by the \pkg{loo} package itself (e.g., verifying that all models to be compared were fit using the same outcome variable). \code{loo_compare} will return a matrix with one row per model and columns containing the ELPD difference and the standard error of the difference. In the first row of the matrix will be the model with the largest ELPD (smallest LOOIC) and will contain zeros (there is no difference between this model and itself). For each of the remaining models the ELPD difference and SE are reported relative to the model with the best ELPD (the first row). See the \strong{Details} section at the \code{\link[loo]{loo_compare}} page in the \pkg{loo} package for more information. } \section{Model weights}{ The \code{loo_model_weights} method can be used to compute model weights for a \code{"stanreg_list"} object, which is a list of fitted model objects made with \code{\link{stanreg_list}}. The end of the \strong{Examples} section has a demonstration. For details see the \code{\link[loo]{loo_model_weights}} documentation in the \pkg{loo} package. } \examples{ if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { \donttest{ fit1 <- stan_glm(mpg ~ wt, data = mtcars, refresh = 0) fit2 <- stan_glm(mpg ~ wt + cyl, data = mtcars, refresh = 0) # (for bigger models use as many cores as possible) loo1 <- loo(fit1, cores = 1) print(loo1) loo2 <- loo(fit2, cores = 1) print(loo2) # when comparing models the loo objects can be passed to loo_compare # as individual arguments or as a list of loo objects loo_compare(loo1, loo2) loo_compare(list(loo1, loo2)) # if the fitted model objects contain a loo object in the component "loo" # then the model objects can be passed directly or as a stanreg_list fit1$loo <- loo1 fit2$loo <- loo2 loo_compare(fit1, fit2) # if the fitted model objects contain a loo object _and_ a waic or kfold # object, then the criterion argument determines which of them the comparison # is based on fit1$waic <- waic(fit1) fit2$waic <- waic(fit2) loo_compare(fit1, fit2, criterion = "waic") # the models can also be combined into a stanreg_list object, and more # informative model names can be provided to use when printing model_list <- stanreg_list(fit1, fit2, model_names = c("Fewer predictors", "More predictors")) loo_compare(model_list) fit3 <- stan_glm(mpg ~ disp * as.factor(cyl), data = mtcars, refresh = 0) loo3 <- loo(fit3, cores = 2, k_threshold = 0.7) loo_compare(loo1, loo2, loo3) # setting detail=TRUE will also print model formulas if used with # loo_compare.stanreg or loo_compare.stanreg_list fit3$loo <- loo3 model_list <- stanreg_list(fit1, fit2, fit3) loo_compare(model_list, detail=TRUE) # Computing model weights # # if the objects in model_list already have 'loo' components then those # will be used. otherwise loo will be computed for each model internally # (in which case the 'cores' argument may also be used and is passed to loo()) loo_model_weights(model_list) # defaults to method="stacking" loo_model_weights(model_list, method = "pseudobma") loo_model_weights(model_list, method = "pseudobma", BB = FALSE) # you can also pass precomputed loo objects directly to loo_model_weights loo_list <- list(A = loo1, B = loo2, C = loo3) # names optional (affects printing) loo_model_weights(loo_list) } } } \references{ Vehtari, A., Gelman, A., and Gabry, J. (2017). Practical Bayesian model evaluation using leave-one-out cross-validation and WAIC. \emph{Statistics and Computing}. 27(5), 1413--1432. doi:10.1007/s11222-016-9696-4. arXiv preprint: \url{https://arxiv.org/abs/1507.04544} Yao, Y., Vehtari, A., Simpson, D., and Gelman, A. (2018) Using stacking to average Bayesian predictive distributions. \emph{Bayesian Analysis}, advance publication, \doi{10.1214/17-BA1091}. Gabry, J. , Simpson, D. , Vehtari, A. , Betancourt, M. and Gelman, A. (2019), Visualization in Bayesian workflow. \emph{J. R. Stat. Soc. A}, 182: 389-402. doi:10.1111/rssa.12378, \href{https://arxiv.org/abs/1709.01449}{arXiv preprint}, \href{https://github.com/jgabry/bayes-vis-paper}{code on GitHub}) } \seealso{ \itemize{ \item The new \href{https://mc-stan.org/loo/articles/}{\pkg{loo} package vignettes} and various \href{https://mc-stan.org/rstanarm/articles/}{\pkg{rstanarm} vignettes} for more examples using \code{loo} and related functions with \pkg{rstanarm} models. \item \code{\link[loo]{pareto-k-diagnostic}} in the \pkg{loo} package for more on Pareto \eqn{k} diagnostics. \item \code{\link{log_lik.stanreg}} to directly access the pointwise log-likelihood matrix. } } rstanarm/man/stanreg_list.Rd0000644000176200001440000000275114551552005015650 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/stanreg_list.R \name{stanreg_list} \alias{stanreg_list} \alias{stanmvreg_list} \alias{stanjm_list} \alias{print.stanreg_list} \title{Create lists of fitted model objects, combine them, or append new models to existing lists of models.} \usage{ stanreg_list(..., model_names = NULL) stanmvreg_list(..., model_names = NULL) stanjm_list(..., model_names = NULL) \method{print}{stanreg_list}(x, ...) } \arguments{ \item{...}{Objects to combine into a \code{"stanreg_list"}, \code{"stanmvreg_list"}, or \code{"stanjm_list"}. Can be fitted model objects, existing \code{"stan*_list"} objects to combine, or one existing \code{"stan*_list"} object followed by fitted model objects to append to the list.} \item{model_names}{Optionally, a character vector of model names. If not specified then the names are inferred from the name of the objects passed in via \code{...}. These model names are used, for example, when printing the results of the \code{loo_compare.stanreg_list} and \code{loo_model_weights.stanreg_list} methods.} \item{x}{The object to print.} } \value{ A list of class \code{"stanreg_list"}, \code{"stanmvreg_list"}, or \code{"stanjm_list"}, containing the fitted model objects and some metadata stored as attributes. } \description{ Create lists of fitted model objects, combine them, or append new models to existing lists of models. } \seealso{ \code{\link{loo_model_weights}} for usage of \code{stanreg_list}. } rstanarm/man/stan_glm.Rd0000644000176200001440000004204014551552005014751 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/stan_glm.R, R/stan_glm.fit.R \name{stan_glm} \alias{stan_glm} \alias{stan_glm.nb} \alias{stan_glm.fit} \title{Bayesian generalized linear models via Stan} \usage{ stan_glm( formula, family = gaussian(), data, weights, subset, na.action = NULL, offset = NULL, model = TRUE, x = FALSE, y = TRUE, contrasts = NULL, ..., prior = default_prior_coef(family), prior_intercept = default_prior_intercept(family), prior_aux = exponential(autoscale = TRUE), prior_PD = FALSE, algorithm = c("sampling", "optimizing", "meanfield", "fullrank"), mean_PPD = algorithm != "optimizing" && !prior_PD, adapt_delta = NULL, QR = FALSE, sparse = FALSE ) stan_glm.nb( formula, data, weights, subset, na.action = NULL, offset = NULL, model = TRUE, x = FALSE, y = TRUE, contrasts = NULL, link = "log", ..., prior = default_prior_coef(family), prior_intercept = default_prior_intercept(family), prior_aux = exponential(autoscale = TRUE), prior_PD = FALSE, algorithm = c("sampling", "optimizing", "meanfield", "fullrank"), mean_PPD = algorithm != "optimizing", adapt_delta = NULL, QR = FALSE ) stan_glm.fit( x, y, weights = rep(1, NROW(y)), offset = rep(0, NROW(y)), family = gaussian(), ..., prior = default_prior_coef(family), prior_intercept = default_prior_intercept(family), prior_aux = exponential(autoscale = TRUE), prior_smooth = exponential(autoscale = FALSE), prior_ops = NULL, group = list(), prior_PD = FALSE, algorithm = c("sampling", "optimizing", "meanfield", "fullrank"), mean_PPD = algorithm != "optimizing" && !prior_PD, adapt_delta = NULL, QR = FALSE, sparse = FALSE, importance_resampling = algorithm != "sampling", keep_every = algorithm != "sampling" ) } \arguments{ \item{formula, data, subset}{Same as \code{\link[stats]{glm}}, but \emph{we strongly advise against omitting the \code{data} argument}. Unless \code{data} is specified (and is a data frame) many post-estimation functions (including \code{update}, \code{loo}, \code{kfold}) are not guaranteed to work properly.} \item{family}{Same as \code{\link[stats]{glm}}, except negative binomial GLMs are also possible using the \code{\link{neg_binomial_2}} family object.} \item{na.action, contrasts}{Same as \code{\link[stats]{glm}}, but rarely specified.} \item{model, offset, weights}{Same as \code{\link[stats]{glm}}.} \item{x}{In \code{stan_glm}, logical scalar indicating whether to return the design matrix. In \code{stan_glm.fit}, usually a design matrix but can also be a list of design matrices with the same number of rows, in which case the first element of the list is interpreted as the primary design matrix and the remaining list elements collectively constitute a basis for a smooth nonlinear function of the predictors indicated by the \code{formula} argument to \code{\link{stan_gamm4}}.} \item{y}{In \code{stan_glm}, logical scalar indicating whether to return the response vector. In \code{stan_glm.fit}, a response vector.} \item{...}{Further arguments passed to the function in the \pkg{rstan} package (\code{\link[rstan:stanmodel-method-sampling]{sampling}}, \code{\link[rstan:stanmodel-method-vb]{vb}}, or \code{\link[rstan:stanmodel-method-optimizing]{optimizing}}), corresponding to the estimation method named by \code{algorithm}. For example, if \code{algorithm} is \code{"sampling"} it is possible to specify \code{iter}, \code{chains}, \code{cores}, and other MCMC controls. Another useful argument that can be passed to \pkg{rstan} via \code{...} is \code{refresh}, which specifies how often to print updates when sampling (i.e., show the progress every \code{refresh} iterations). \code{refresh=0} turns off the iteration updates.} \item{prior}{The prior distribution for the (non-hierarchical) regression coefficients. The default priors are described in the vignette \href{https://mc-stan.org/rstanarm/articles/priors.html}{\emph{Prior Distributions for rstanarm Models}}. If not using the default, \code{prior} should be a call to one of the various functions provided by \pkg{rstanarm} for specifying priors. The subset of these functions that can be used for the prior on the coefficients can be grouped into several "families": \tabular{ll}{ \strong{Family} \tab \strong{Functions} \cr \emph{Student t family} \tab \code{normal}, \code{student_t}, \code{cauchy} \cr \emph{Hierarchical shrinkage family} \tab \code{hs}, \code{hs_plus} \cr \emph{Laplace family} \tab \code{laplace}, \code{lasso} \cr \emph{Product normal family} \tab \code{product_normal} \cr } See the \link[=priors]{priors help page} for details on the families and how to specify the arguments for all of the functions in the table above. To omit a prior ---i.e., to use a flat (improper) uniform prior--- \code{prior} can be set to \code{NULL}, although this is rarely a good idea. \strong{Note:} Unless \code{QR=TRUE}, if \code{prior} is from the Student t family or Laplace family, and if the \code{autoscale} argument to the function used to specify the prior (e.g. \code{\link{normal}}) is left at its default and recommended value of \code{TRUE}, then the default or user-specified prior scale(s) may be adjusted internally based on the scales of the predictors. See the \link[=priors]{priors help page} and the \emph{Prior Distributions} vignette for details on the rescaling and the \code{\link{prior_summary}} function for a summary of the priors used for a particular model.} \item{prior_intercept}{The prior distribution for the intercept (after centering all predictors, see note below). The default prior is described in the vignette \href{https://mc-stan.org/rstanarm/articles/priors.html}{\emph{Prior Distributions for rstanarm Models}}. If not using the default, \code{prior_intercept} can be a call to \code{normal}, \code{student_t} or \code{cauchy}. See the \link[=priors]{priors help page} for details on these functions. To omit a prior on the intercept ---i.e., to use a flat (improper) uniform prior--- \code{prior_intercept} can be set to \code{NULL}. \strong{Note:} If using a dense representation of the design matrix ---i.e., if the \code{sparse} argument is left at its default value of \code{FALSE}--- then the prior distribution for the intercept is set so it applies to the value \emph{when all predictors are centered} (you don't need to manually center them). This is explained further in [Prior Distributions for rstanarm Models](https://mc-stan.org/rstanarm/articles/priors.html) If you prefer to specify a prior on the intercept without the predictors being auto-centered, then you have to omit the intercept from the \code{\link[stats]{formula}} and include a column of ones as a predictor, in which case some element of \code{prior} specifies the prior on it, rather than \code{prior_intercept}. Regardless of how \code{prior_intercept} is specified, the reported \emph{estimates} of the intercept always correspond to a parameterization without centered predictors (i.e., same as in \code{glm}).} \item{prior_aux}{The prior distribution for the "auxiliary" parameter (if applicable). The "auxiliary" parameter refers to a different parameter depending on the \code{family}. For Gaussian models \code{prior_aux} controls \code{"sigma"}, the error standard deviation. For negative binomial models \code{prior_aux} controls \code{"reciprocal_dispersion"}, which is similar to the \code{"size"} parameter of \code{\link[stats:NegBinomial]{rnbinom}}: smaller values of \code{"reciprocal_dispersion"} correspond to greater dispersion. For gamma models \code{prior_aux} sets the prior on to the \code{"shape"} parameter (see e.g., \code{\link[stats:GammaDist]{rgamma}}), and for inverse-Gaussian models it is the so-called \code{"lambda"} parameter (which is essentially the reciprocal of a scale parameter). Binomial and Poisson models do not have auxiliary parameters. The default prior is described in the vignette \href{https://mc-stan.org/rstanarm/articles/priors.html}{\emph{Prior Distributions for rstanarm Models}}. If not using the default, \code{prior_aux} can be a call to \code{exponential} to use an exponential distribution, or \code{normal}, \code{student_t} or \code{cauchy}, which results in a half-normal, half-t, or half-Cauchy prior. See \code{\link{priors}} for details on these functions. To omit a prior ---i.e., to use a flat (improper) uniform prior--- set \code{prior_aux} to \code{NULL}.} \item{prior_PD}{A logical scalar (defaulting to \code{FALSE}) indicating whether to draw from the prior predictive distribution instead of conditioning on the outcome.} \item{algorithm}{A string (possibly abbreviated) indicating the estimation approach to use. Can be \code{"sampling"} for MCMC (the default), \code{"optimizing"} for optimization, \code{"meanfield"} for variational inference with independent normal distributions, or \code{"fullrank"} for variational inference with a multivariate normal distribution. See \code{\link{rstanarm-package}} for more details on the estimation algorithms. NOTE: not all fitting functions support all four algorithms.} \item{mean_PPD}{A logical value indicating whether the sample mean of the posterior predictive distribution of the outcome should be calculated in the \code{generated quantities} block. If \code{TRUE} then \code{mean_PPD} is computed and displayed as a diagnostic in the \link[=print.stanreg]{printed output}. The default is \code{TRUE} except if \code{algorithm=="optimizing"}. A useful heuristic is to check if \code{mean_PPD} is plausible when compared to \code{mean(y)}. If it is plausible then this does \emph{not} mean that the model is good in general (only that it can reproduce the sample mean), but if \code{mean_PPD} is implausible then there may be something wrong, e.g., severe model misspecification, problems with the data and/or priors, computational issues, etc.} \item{adapt_delta}{Only relevant if \code{algorithm="sampling"}. See the \link{adapt_delta} help page for details.} \item{QR}{A logical scalar defaulting to \code{FALSE}, but if \code{TRUE} applies a scaled \code{\link{qr}} decomposition to the design matrix. The transformation does not change the likelihood of the data but is recommended for computational reasons when there are multiple predictors. See the \link{QR-argument} documentation page for details on how \pkg{rstanarm} does the transformation and important information about how to interpret the prior distributions of the model parameters when using \code{QR=TRUE}.} \item{sparse}{A logical scalar (defaulting to \code{FALSE}) indicating whether to use a sparse representation of the design (X) matrix. If \code{TRUE}, the the design matrix is not centered (since that would destroy the sparsity) and likewise it is not possible to specify both \code{QR = TRUE} and \code{sparse = TRUE}. Depending on how many zeros there are in the design matrix, setting \code{sparse = TRUE} may make the code run faster and can consume much less RAM.} \item{link}{For \code{stan_glm.nb} only, the link function to use. See \code{\link{neg_binomial_2}}.} \item{prior_smooth}{The prior distribution for the hyperparameters in GAMs, with lower values yielding less flexible smooth functions. \code{prior_smooth} can be a call to \code{exponential} to use an exponential distribution, or \code{normal}, \code{student_t} or \code{cauchy}, which results in a half-normal, half-t, or half-Cauchy prior. See \code{\link{priors}} for details on these functions. To omit a prior ---i.e., to use a flat (improper) uniform prior--- set \code{prior_smooth} to \code{NULL}. The number of hyperparameters depends on the model specification but a scalar prior will be recylced as necessary to the appropriate length.} \item{prior_ops}{Deprecated. See \link{rstanarm-deprecated} for details.} \item{group}{A list, possibly of length zero (the default), but otherwise having the structure of that produced by \code{\link[lme4]{mkReTrms}} to indicate the group-specific part of the model. In addition, this list must have elements for the \code{regularization}, \code{concentration} \code{shape}, and \code{scale} components of a \code{\link{decov}} prior for the covariance matrices among the group-specific coefficients.} \item{importance_resampling}{Logical scalar indicating whether to use importance resampling when approximating the posterior distribution with a multivariate normal around the posterior mode, which only applies when \code{algorithm} is \code{"optimizing"} but defaults to \code{TRUE} in that case} \item{keep_every}{Positive integer, which defaults to 1, but can be higher in order to "thin" the importance sampling realizations. Applies only when \code{importance_resampling=TRUE}.} } \value{ A \link[=stanreg-objects]{stanreg} object is returned for \code{stan_glm, stan_glm.nb}. A \link[=stanfit-class]{stanfit} object (or a slightly modified stanfit object) is returned if \code{stan_glm.fit} is called directly. } \description{ \if{html}{\figure{stanlogo.png}{options: width="25" alt="https://mc-stan.org/about/logo/"}} Generalized linear modeling with optional prior distributions for the coefficients, intercept, and auxiliary parameters. } \details{ The \code{stan_glm} function is similar in syntax to \code{\link[stats]{glm}} but rather than performing maximum likelihood estimation of generalized linear models, full Bayesian estimation is performed (if \code{algorithm} is \code{"sampling"}) via MCMC. The Bayesian model adds priors (independent by default) on the coefficients of the GLM. The \code{stan_glm} function calls the workhorse \code{stan_glm.fit} function, but it is also possible to call the latter directly. The \code{stan_glm.nb} function, which takes the extra argument \code{link}, is a wrapper for \code{stan_glm} with \code{family = \link{neg_binomial_2}(link)}. } \examples{ if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { ### Linear regression mtcars$mpg10 <- mtcars$mpg / 10 fit <- stan_glm( mpg10 ~ wt + cyl + am, data = mtcars, QR = TRUE, # for speed of example only (default is "sampling") algorithm = "fullrank", refresh = 0 ) plot(fit, prob = 0.5) plot(fit, prob = 0.5, pars = "beta") plot(fit, "hist", pars = "sigma") \donttest{ ### Logistic regression head(wells) wells$dist100 <- wells$dist / 100 fit2 <- stan_glm( switch ~ dist100 + arsenic, data = wells, family = binomial(link = "logit"), prior_intercept = normal(0, 10), QR = TRUE, refresh = 0, # for speed of example only chains = 2, iter = 200 ) print(fit2) prior_summary(fit2) # ?bayesplot::mcmc_areas plot(fit2, plotfun = "areas", prob = 0.9, pars = c("(Intercept)", "arsenic")) # ?bayesplot::ppc_error_binned pp_check(fit2, plotfun = "error_binned") ### Poisson regression (example from help("glm")) count_data <- data.frame( counts = c(18,17,15,20,10,20,25,13,12), outcome = gl(3,1,9), treatment = gl(3,3) ) fit3 <- stan_glm( counts ~ outcome + treatment, data = count_data, family = poisson(link="log"), prior = normal(0, 2), refresh = 0, # for speed of example only chains = 2, iter = 250 ) print(fit3) bayesplot::color_scheme_set("viridis") plot(fit3) plot(fit3, regex_pars = c("outcome", "treatment")) plot(fit3, plotfun = "combo", regex_pars = "treatment") # ?bayesplot::mcmc_combo posterior_vs_prior(fit3, regex_pars = c("outcome", "treatment")) ### Gamma regression (example from help("glm")) clotting <- data.frame(log_u = log(c(5,10,15,20,30,40,60,80,100)), lot1 = c(118,58,42,35,27,25,21,19,18), lot2 = c(69,35,26,21,18,16,13,12,12)) fit4 <- stan_glm( lot1 ~ log_u, data = clotting, family = Gamma(link="log"), iter = 500, # for speed of example only refresh = 0 ) print(fit4, digits = 2) fit5 <- update(fit4, formula = lot2 ~ log_u) # ?bayesplot::ppc_dens_overlay bayesplot::bayesplot_grid( pp_check(fit4, seed = 123), pp_check(fit5, seed = 123), titles = c("lot1", "lot2") ) ### Negative binomial regression fit6 <- stan_glm.nb( Days ~ Sex/(Age + Eth*Lrn), data = MASS::quine, link = "log", prior_aux = exponential(1.5, autoscale=TRUE), chains = 2, iter = 200, # for speed of example only refresh = 0 ) prior_summary(fit6) bayesplot::color_scheme_set("brightblue") plot(fit6) pp_check(fit6, plotfun = "hist", nreps = 5) # ?bayesplot::ppc_hist # 80\% interval of estimated reciprocal_dispersion parameter posterior_interval(fit6, pars = "reciprocal_dispersion", prob = 0.8) plot(fit6, "areas", pars = "reciprocal_dispersion", prob = 0.8) } } } \references{ Gelman, A. and Hill, J. (2007). \emph{Data Analysis Using Regression and Multilevel/Hierarchical Models.} Cambridge University Press, Cambridge, UK. (Ch. 3-6) Muth, C., Oravecz, Z., and Gabry, J. (2018) User-friendly Bayesian regression modeling: A tutorial with rstanarm and shinystan. \emph{The Quantitative Methods for Psychology}. 14(2), 99--119. \url{https://www.tqmp.org/RegularArticles/vol14-2/p099/p099.pdf} } \seealso{ \code{\link{stanreg-methods}} and \code{\link[stats]{glm}}. The various vignettes for \code{stan_glm} at \url{https://mc-stan.org/rstanarm/articles/}. } rstanarm/man/stan_biglm.Rd0000644000176200001440000001671414551552005015275 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/stan_biglm.R, R/stan_biglm.fit.R \name{stan_biglm} \alias{stan_biglm} \alias{stan_biglm.fit} \title{Bayesian regularized linear but big models via Stan} \usage{ stan_biglm( biglm, xbar, ybar, s_y, ..., prior = R2(stop("'location' must be specified")), prior_intercept = NULL, prior_PD = FALSE, algorithm = c("sampling", "meanfield", "fullrank"), adapt_delta = NULL ) stan_biglm.fit( b, R, SSR, N, xbar, ybar, s_y, has_intercept = TRUE, ..., prior = R2(stop("'location' must be specified")), prior_intercept = NULL, prior_PD = FALSE, algorithm = c("sampling", "meanfield", "fullrank", "optimizing"), adapt_delta = NULL, importance_resampling = TRUE, keep_every = 1 ) } \arguments{ \item{biglm}{The list output by \code{\link[biglm]{biglm}} in the \pkg{biglm} package.} \item{xbar}{A numeric vector of column means in the implicit design matrix excluding the intercept for the observations included in the model.} \item{ybar}{A numeric scalar indicating the mean of the outcome for the observations included in the model.} \item{s_y}{A numeric scalar indicating the unbiased sample standard deviation of the outcome for the observations included in the model.} \item{...}{Further arguments passed to the function in the \pkg{rstan} package (\code{\link[rstan:stanmodel-method-sampling]{sampling}}, \code{\link[rstan:stanmodel-method-vb]{vb}}, or \code{\link[rstan:stanmodel-method-optimizing]{optimizing}}), corresponding to the estimation method named by \code{algorithm}. For example, if \code{algorithm} is \code{"sampling"} it is possible to specify \code{iter}, \code{chains}, \code{cores}, and other MCMC controls. Another useful argument that can be passed to \pkg{rstan} via \code{...} is \code{refresh}, which specifies how often to print updates when sampling (i.e., show the progress every \code{refresh} iterations). \code{refresh=0} turns off the iteration updates.} \item{prior}{Must be a call to \code{\link{R2}} with its \code{location} argument specified or \code{NULL}, which would indicate a standard uniform prior for the \eqn{R^2}.} \item{prior_intercept}{Either \code{NULL} (the default) or a call to \code{\link{normal}}. If a \code{\link{normal}} prior is specified without a \code{scale}, then the standard deviation is taken to be the marginal standard deviation of the outcome divided by the square root of the sample size, which is legitimate because the marginal standard deviation of the outcome is a primitive parameter being estimated. \strong{Note:} If using a dense representation of the design matrix ---i.e., if the \code{sparse} argument is left at its default value of \code{FALSE}--- then the prior distribution for the intercept is set so it applies to the value \emph{when all predictors are centered}. If you prefer to specify a prior on the intercept without the predictors being auto-centered, then you have to omit the intercept from the \code{\link[stats]{formula}} and include a column of ones as a predictor, in which case some element of \code{prior} specifies the prior on it, rather than \code{prior_intercept}. Regardless of how \code{prior_intercept} is specified, the reported \emph{estimates} of the intercept always correspond to a parameterization without centered predictors (i.e., same as in \code{glm}).} \item{prior_PD}{A logical scalar (defaulting to \code{FALSE}) indicating whether to draw from the prior predictive distribution instead of conditioning on the outcome.} \item{algorithm}{A string (possibly abbreviated) indicating the estimation approach to use. Can be \code{"sampling"} for MCMC (the default), \code{"optimizing"} for optimization, \code{"meanfield"} for variational inference with independent normal distributions, or \code{"fullrank"} for variational inference with a multivariate normal distribution. See \code{\link{rstanarm-package}} for more details on the estimation algorithms. NOTE: not all fitting functions support all four algorithms.} \item{adapt_delta}{Only relevant if \code{algorithm="sampling"}. See the \link{adapt_delta} help page for details.} \item{b}{A numeric vector of OLS coefficients, excluding the intercept} \item{R}{A square upper-triangular matrix from the QR decomposition of the design matrix, excluding the intercept} \item{SSR}{A numeric scalar indicating the sum-of-squared residuals for OLS} \item{N}{A integer scalar indicating the number of included observations} \item{has_intercept}{A logical scalar indicating whether to add an intercept to the model when estimating it.} \item{importance_resampling}{Logical scalar indicating whether to use importance resampling when approximating the posterior distribution with a multivariate normal around the posterior mode, which only applies when \code{algorithm} is \code{"optimizing"} but defaults to \code{TRUE} in that case} \item{keep_every}{Positive integer, which defaults to 1, but can be higher in order to thin the importance sampling realizations and also only apples when \code{algorithm} is \code{"optimizing"} but defaults to \code{TRUE} in that case} } \value{ The output of both \code{stan_biglm} and \code{stan_biglm.fit} is an object of \code{\link[rstan]{stanfit-class}} rather than \code{\link{stanreg-objects}}, which is more limited and less convenient but necessitated by the fact that \code{stan_biglm} does not bring the full design matrix into memory. Without the full design matrix,some of the elements of a \code{\link{stanreg-objects}} object cannot be calculated, such as residuals. Thus, the functions in the \pkg{rstanarm} package that input \code{\link{stanreg-objects}}, such as \code{\link{posterior_predict}} cannot be used. } \description{ \if{html}{\figure{stanlogo.png}{options: width="25" alt="https://mc-stan.org/about/logo/"}} This is the same model as with \code{\link{stan_lm}} but it utilizes the output from \code{\link[biglm]{biglm}} in the \pkg{biglm} package in order to proceed when the data is too large to fit in memory. } \details{ The \code{stan_biglm} function is intended to be used in the same circumstances as the \code{\link[biglm]{biglm}} function in the \pkg{biglm} package but with an informative prior on the \eqn{R^2} of the regression. Like \code{\link[biglm]{biglm}}, the memory required to estimate the model depends largely on the number of predictors rather than the number of observations. However, \code{stan_biglm} and \code{stan_biglm.fit} have additional required arguments that are not necessary in \code{\link[biglm]{biglm}}, namely \code{xbar}, \code{ybar}, and \code{s_y}. If any observations have any missing values on any of the predictors or the outcome, such observations do not contribute to these statistics. } \examples{ if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { # create inputs ols <- lm(mpg ~ wt + qsec + am, data = mtcars, # all row are complete so ... na.action = na.exclude) # not necessary in this case b <- coef(ols)[-1] R <- qr.R(ols$qr)[-1,-1] SSR <- crossprod(ols$residuals)[1] not_NA <- !is.na(fitted(ols)) N <- sum(not_NA) xbar <- colMeans(mtcars[not_NA,c("wt", "qsec", "am")]) y <- mtcars$mpg[not_NA] ybar <- mean(y) s_y <- sd(y) post <- stan_biglm.fit(b, R, SSR, N, xbar, ybar, s_y, prior = R2(.75), # the next line is only to make the example go fast chains = 1, iter = 500, seed = 12345) cbind(lm = b, stan_lm = rstan::get_posterior_mean(post)[13:15,]) # shrunk } } rstanarm/man/stan_polr.Rd0000644000176200001440000002107614551552005015154 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/stan_polr.R, R/stan_polr.fit.R \name{stan_polr} \alias{stan_polr} \alias{stan_polr.fit} \title{Bayesian ordinal regression models via Stan} \usage{ stan_polr( formula, data, weights, ..., subset, na.action = getOption("na.action", "na.omit"), contrasts = NULL, model = TRUE, method = c("logistic", "probit", "loglog", "cloglog", "cauchit"), prior = R2(stop("'location' must be specified")), prior_counts = dirichlet(1), shape = NULL, rate = NULL, prior_PD = FALSE, algorithm = c("sampling", "meanfield", "fullrank"), adapt_delta = NULL, do_residuals = NULL ) stan_polr.fit( x, y, wt = NULL, offset = NULL, method = c("logistic", "probit", "loglog", "cloglog", "cauchit"), ..., prior = R2(stop("'location' must be specified")), prior_counts = dirichlet(1), shape = NULL, rate = NULL, prior_PD = FALSE, algorithm = c("sampling", "meanfield", "fullrank"), adapt_delta = NULL, do_residuals = algorithm == "sampling" ) } \arguments{ \item{formula, data, subset}{Same as \code{\link[MASS]{polr}}, but \emph{we strongly advise against omitting the \code{data} argument}. Unless \code{data} is specified (and is a data frame) many post-estimation functions (including \code{update}, \code{loo}, \code{kfold}) are not guaranteed to work properly.} \item{weights, na.action, contrasts, model}{Same as \code{\link[MASS]{polr}}, but rarely specified.} \item{...}{Further arguments passed to the function in the \pkg{rstan} package (\code{\link[rstan:stanmodel-method-sampling]{sampling}}, \code{\link[rstan:stanmodel-method-vb]{vb}}, or \code{\link[rstan:stanmodel-method-optimizing]{optimizing}}), corresponding to the estimation method named by \code{algorithm}. For example, if \code{algorithm} is \code{"sampling"} it is possible to specify \code{iter}, \code{chains}, \code{cores}, and other MCMC controls. Another useful argument that can be passed to \pkg{rstan} via \code{...} is \code{refresh}, which specifies how often to print updates when sampling (i.e., show the progress every \code{refresh} iterations). \code{refresh=0} turns off the iteration updates.} \item{method}{One of 'logistic', 'probit', 'loglog', 'cloglog' or 'cauchit', but can be abbreviated. See \code{\link[MASS]{polr}} for more details.} \item{prior}{Prior for coefficients. Should be a call to \code{\link{R2}} to specify the prior location of the \eqn{R^2} but can be \code{NULL} to indicate a standard uniform prior. See \code{\link{priors}}.} \item{prior_counts}{A call to \code{\link{dirichlet}} to specify the prior counts of the outcome when the predictors are at their sample means.} \item{shape}{Either \code{NULL} or a positive scalar that is interpreted as the shape parameter for a \code{\link[stats]{GammaDist}}ribution on the exponent applied to the probability of success when there are only two outcome categories. If \code{NULL}, which is the default, then the exponent is taken to be fixed at \eqn{1}.} \item{rate}{Either \code{NULL} or a positive scalar that is interpreted as the rate parameter for a \code{\link[stats]{GammaDist}}ribution on the exponent applied to the probability of success when there are only two outcome categories. If \code{NULL}, which is the default, then the exponent is taken to be fixed at \eqn{1}.} \item{prior_PD}{A logical scalar (defaulting to \code{FALSE}) indicating whether to draw from the prior predictive distribution instead of conditioning on the outcome.} \item{algorithm}{A string (possibly abbreviated) indicating the estimation approach to use. Can be \code{"sampling"} for MCMC (the default), \code{"optimizing"} for optimization, \code{"meanfield"} for variational inference with independent normal distributions, or \code{"fullrank"} for variational inference with a multivariate normal distribution. See \code{\link{rstanarm-package}} for more details on the estimation algorithms. NOTE: not all fitting functions support all four algorithms.} \item{adapt_delta}{Only relevant if \code{algorithm="sampling"}. See the \link{adapt_delta} help page for details.} \item{do_residuals}{A logical scalar indicating whether or not to automatically calculate fit residuals after sampling completes. Defaults to \code{TRUE} if and only if \code{algorithm="sampling"}. Setting \code{do_residuals=FALSE} is only useful in the somewhat rare case that \code{stan_polr} appears to finish sampling but hangs instead of returning the fitted model object.} \item{x}{A design matrix.} \item{y}{A response variable, which must be a (preferably ordered) factor.} \item{wt}{A numeric vector (possibly \code{NULL}) of observation weights.} \item{offset}{A numeric vector (possibly \code{NULL}) of offsets.} } \value{ A \link[=stanreg-objects]{stanreg} object is returned for \code{stan_polr}. A \link[=stanfit-class]{stanfit} object (or a slightly modified stanfit object) is returned if \code{stan_polr.fit} is called directly. } \description{ \if{html}{\figure{stanlogo.png}{options: width="25" alt="https://mc-stan.org/about/logo/"}} Bayesian inference for ordinal (or binary) regression models under a proportional odds assumption. } \details{ The \code{stan_polr} function is similar in syntax to \code{\link[MASS]{polr}} but rather than performing maximum likelihood estimation of a proportional odds model, Bayesian estimation is performed (if \code{algorithm = "sampling"}) via MCMC. The \code{stan_polr} function calls the workhorse \code{stan_polr.fit} function, but it is possible to call the latter directly. As for \code{\link{stan_lm}}, it is necessary to specify the prior location of \eqn{R^2}. In this case, the \eqn{R^2} pertains to the proportion of variance in the latent variable (which is discretized by the cutpoints) attributable to the predictors in the model. Prior beliefs about the cutpoints are governed by prior beliefs about the outcome when the predictors are at their sample means. Both of these are explained in the help page on \code{\link{priors}} and in the \pkg{rstanarm} vignettes. Unlike \code{\link[MASS]{polr}}, \code{stan_polr} also allows the "ordinal" outcome to contain only two levels, in which case the likelihood is the same by default as for \code{\link{stan_glm}} with \code{family = binomial} but the prior on the coefficients is different. However, \code{stan_polr} allows the user to specify the \code{shape} and \code{rate} hyperparameters, in which case the probability of success is defined as the logistic CDF of the linear predictor, raised to the power of \code{alpha} where \code{alpha} has a gamma prior with the specified \code{shape} and \code{rate}. This likelihood is called \dQuote{scobit} by Nagler (1994) because if \code{alpha} is not equal to \eqn{1}, then the relationship between the linear predictor and the probability of success is skewed. If \code{shape} or \code{rate} is \code{NULL}, then \code{alpha} is assumed to be fixed to \eqn{1}. Otherwise, it is usually advisible to set \code{shape} and \code{rate} to the same number so that the expected value of \code{alpha} is \eqn{1} while leaving open the possibility that \code{alpha} may depart from \eqn{1} a little bit. It is often necessary to have a lot of data in order to estimate \code{alpha} with much precision and always necessary to inspect the Pareto shape parameters calculated by \code{\link{loo}} to see if the results are particularly sensitive to individual observations. Users should think carefully about how the outcome is coded when using a scobit-type model. When \code{alpha} is not \eqn{1}, the asymmetry implies that the probability of success is most sensitive to the predictors when the probability of success is less than \eqn{0.63}. Reversing the coding of the successes and failures allows the predictors to have the greatest impact when the probability of failure is less than \eqn{0.63}. Also, the gamma prior on \code{alpha} is positively skewed, but you can reverse the coding of the successes and failures to circumvent this property. } \examples{ if (.Platform$OS.type != "windows" || .Platform$r_arch !="i386") { fit <- stan_polr(tobgp ~ agegp, data = esoph, method = "probit", prior = R2(0.2, "mean"), init_r = 0.1, seed = 12345, algorithm = "fullrank") # for speed only print(fit) plot(fit) } } \references{ Nagler, J., (1994). Scobit: An Alternative Estimator to Logit and Probit. \emph{American Journal of Political Science}. 230 -- 255. } \seealso{ \code{\link{stanreg-methods}} and \code{\link[MASS]{polr}}. The vignette for \code{stan_polr}. \url{https://mc-stan.org/rstanarm/articles/} } rstanarm/man/model.matrix.stanreg.Rd0000644000176200001440000000062314551552005017213 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/stanreg-methods.R \name{model.matrix.stanreg} \alias{model.matrix.stanreg} \title{model.matrix method for stanreg objects} \usage{ \method{model.matrix}{stanreg}(object, ...) } \arguments{ \item{object, ...}{See \code{\link[stats]{model.matrix}}.} } \description{ model.matrix method for stanreg objects } \keyword{internal} rstanarm/man/priors.Rd0000644000176200001440000006046314551552004014473 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/priors.R \name{priors} \alias{priors} \alias{normal} \alias{student_t} \alias{cauchy} \alias{hs} \alias{hs_plus} \alias{laplace} \alias{lasso} \alias{product_normal} \alias{exponential} \alias{decov} \alias{lkj} \alias{dirichlet} \alias{R2} \alias{default_prior_intercept} \alias{default_prior_coef} \title{Prior distributions and options} \usage{ normal(location = 0, scale = NULL, autoscale = FALSE) student_t(df = 1, location = 0, scale = NULL, autoscale = FALSE) cauchy(location = 0, scale = NULL, autoscale = FALSE) hs(df = 1, global_df = 1, global_scale = 0.01, slab_df = 4, slab_scale = 2.5) hs_plus( df1 = 1, df2 = 1, global_df = 1, global_scale = 0.01, slab_df = 4, slab_scale = 2.5 ) laplace(location = 0, scale = NULL, autoscale = FALSE) lasso(df = 1, location = 0, scale = NULL, autoscale = FALSE) product_normal(df = 2, location = 0, scale = 1) exponential(rate = 1, autoscale = FALSE) decov(regularization = 1, concentration = 1, shape = 1, scale = 1) lkj(regularization = 1, scale = 10, df = 1, autoscale = TRUE) dirichlet(concentration = 1) R2(location = NULL, what = c("mode", "mean", "median", "log")) default_prior_intercept(family) default_prior_coef(family) } \arguments{ \item{location}{Prior location. In most cases, this is the prior mean, but for \code{cauchy} (which is equivalent to \code{student_t} with \code{df=1}), the mean does not exist and \code{location} is the prior median. The default value is \eqn{0}, except for \code{R2} which has no default value for \code{location}. For \code{R2}, \code{location} pertains to the prior location of the \eqn{R^2} under a Beta distribution, but the interpretation of the \code{location} parameter depends on the specified value of the \code{what} argument (see the \emph{R2 family} section in \strong{Details}).} \item{scale}{Prior scale. The default depends on the family (see \strong{Details}).} \item{autoscale}{If \code{TRUE} then the scales of the priors on the intercept and regression coefficients may be additionally modified internally by \pkg{rstanarm} in the following cases. First, for Gaussian models only, the prior scales for the intercept, coefficients, and the auxiliary parameter \code{sigma} (error standard deviation) are multiplied by \code{sd(y)}. Additionally --- not only for Gaussian models --- if the \code{QR} argument to the model fitting function (e.g. \code{stan_glm}) is \code{FALSE} then we also divide the prior scale(s) by \code{sd(x)}. Prior autoscaling is also discussed in the vignette \href{https://mc-stan.org/rstanarm/articles/priors.html}{\emph{Prior Distributions for rstanarm Models}}} \item{df, df1, df2}{Prior degrees of freedom. The default is \eqn{1} for \code{student_t}, in which case it is equivalent to \code{cauchy}. For the hierarchical shrinkage priors (\code{hs} and \code{hs_plus}) the degrees of freedom parameter(s) default to \eqn{1}. For the \code{product_normal} prior, the degrees of freedom parameter must be an integer (vector) that is at least \eqn{2} (the default).} \item{global_df, global_scale, slab_df, slab_scale}{Optional arguments for the hierarchical shrinkage priors. See the \emph{Hierarchical shrinkage family} section below.} \item{rate}{Prior rate for the exponential distribution. Defaults to \code{1}. For the exponential distribution, the rate parameter is the \emph{reciprocal} of the mean.} \item{regularization}{Exponent for an LKJ prior on the correlation matrix in the \code{decov} or \code{lkj} prior. The default is \eqn{1}, implying a joint uniform prior.} \item{concentration}{Concentration parameter for a symmetric Dirichlet distribution. The default is \eqn{1}, implying a joint uniform prior.} \item{shape}{Shape parameter for a gamma prior on the scale parameter in the \code{decov} prior. If \code{shape} and \code{scale} are both \eqn{1} (the default) then the gamma prior simplifies to the unit-exponential distribution.} \item{what}{A character string among \code{'mode'} (the default), \code{'mean'}, \code{'median'}, or \code{'log'} indicating how the \code{location} parameter is interpreted in the \code{LKJ} case. If \code{'log'}, then \code{location} is interpreted as the expected logarithm of the \eqn{R^2} under a Beta distribution. Otherwise, \code{location} is interpreted as the \code{what} of the \eqn{R^2} under a Beta distribution. If the number of predictors is less than or equal to two, the mode of this Beta distribution does not exist and an error will prompt the user to specify another choice for \code{what}.} \item{family}{Not currently used.} } \value{ A named list to be used internally by the \pkg{rstanarm} model fitting functions. } \description{ The functions described on this page are used to specify the prior-related arguments of the various modeling functions in the \pkg{rstanarm} package (to view the priors used for an existing model see \code{\link{prior_summary}}). The default priors used in the various \pkg{rstanarm} modeling functions are intended to be \emph{weakly informative} in that they provide moderate regularization and help stabilize computation. For many applications the defaults will perform well, but prudent use of more informative priors is encouraged. Uniform prior distributions are possible (e.g. by setting \code{\link{stan_glm}}'s \code{prior} argument to \code{NULL}) but, unless the data is very strong, they are not recommended and are \emph{not} non-informative, giving the same probability mass to implausible values as plausible ones. More information on priors is available in the vignette \href{https://mc-stan.org/rstanarm/articles/priors.html}{\emph{Prior Distributions for rstanarm Models}} as well as the vignettes for the various modeling functions. For details on the priors used for multilevel models in particular see the vignette \href{https://mc-stan.org/rstanarm/articles/glmer.html}{\emph{Estimating Generalized (Non-)Linear Models with Group-Specific Terms with rstanarm}} and also the \strong{Covariance matrices} section lower down on this page. } \details{ The details depend on the family of the prior being used: \subsection{Student t family}{ Family members: \itemize{ \item \code{normal(location, scale)} \item \code{student_t(df, location, scale)} \item \code{cauchy(location, scale)} } Each of these functions also takes an argument \code{autoscale}. For the prior distribution for the intercept, \code{location}, \code{scale}, and \code{df} should be scalars. For the prior for the other coefficients they can either be vectors of length equal to the number of coefficients (not including the intercept), or they can be scalars, in which case they will be recycled to the appropriate length. As the degrees of freedom approaches infinity, the Student t distribution approaches the normal distribution and if the degrees of freedom are one, then the Student t distribution is the Cauchy distribution. If \code{scale} is not specified it will default to \eqn{2.5}, unless the probit link function is used, in which case these defaults are scaled by a factor of \code{dnorm(0)/dlogis(0)}, which is roughly \eqn{1.6}. If the \code{autoscale} argument is \code{TRUE}, then the scales will be further adjusted as described above in the documentation of the \code{autoscale} argument in the \strong{Arguments} section. } \subsection{Hierarchical shrinkage family}{ Family members: \itemize{ \item \code{hs(df, global_df, global_scale, slab_df, slab_scale)} \item \code{hs_plus(df1, df2, global_df, global_scale, slab_df, slab_scale)} } The hierarchical shrinkage priors are normal with a mean of zero and a standard deviation that is also a random variable. The traditional hierarchical shrinkage prior utilizes a standard deviation that is distributed half Cauchy with a median of zero and a scale parameter that is also half Cauchy. This is called the "horseshoe prior". The hierarchical shrinkage (\code{hs}) prior in the \pkg{rstanarm} package instead utilizes a regularized horseshoe prior, as described by Piironen and Vehtari (2017), which recommends setting the \code{global_scale} argument equal to the ratio of the expected number of non-zero coefficients to the expected number of zero coefficients, divided by the square root of the number of observations. The hierarhical shrinkpage plus (\code{hs_plus}) prior is similar except that the standard deviation that is distributed as the product of two independent half Cauchy parameters that are each scaled in a similar way to the \code{hs} prior. The hierarchical shrinkage priors have very tall modes and very fat tails. Consequently, they tend to produce posterior distributions that are very concentrated near zero, unless the predictor has a strong influence on the outcome, in which case the prior has little influence. Hierarchical shrinkage priors often require you to increase the \code{\link{adapt_delta}} tuning parameter in order to diminish the number of divergent transitions. For more details on tuning parameters and divergent transitions see the Troubleshooting section of the \emph{How to Use the rstanarm Package} vignette. } \subsection{Laplace family}{ Family members: \itemize{ \item \code{laplace(location, scale)} \item \code{lasso(df, location, scale)} } Each of these functions also takes an argument \code{autoscale}. The Laplace distribution is also known as the double-exponential distribution. It is a symmetric distribution with a sharp peak at its mean / median / mode and fairly long tails. This distribution can be motivated as a scale mixture of normal distributions and the remarks above about the normal distribution apply here as well. The lasso approach to supervised learning can be expressed as finding the posterior mode when the likelihood is Gaussian and the priors on the coefficients have independent Laplace distributions. It is commonplace in supervised learning to choose the tuning parameter by cross-validation, whereas a more Bayesian approach would be to place a prior on \dQuote{it}, or rather its reciprocal in our case (i.e. \emph{smaller} values correspond to more shrinkage toward the prior location vector). We use a chi-square prior with degrees of freedom equal to that specified in the call to \code{lasso} or, by default, 1. The expectation of a chi-square random variable is equal to this degrees of freedom and the mode is equal to the degrees of freedom minus 2, if this difference is positive. It is also common in supervised learning to standardize the predictors before training the model. We do not recommend doing so. Instead, it is better to specify \code{autoscale = TRUE}, which will adjust the scales of the priors according to the dispersion in the variables. See the documentation of the \code{autoscale} argument above and also the \code{\link{prior_summary}} page for more information. } \subsection{Product-normal family}{ Family members: \itemize{ \item \code{product_normal(df, location, scale)} } The product-normal distribution is the product of at least two independent normal variates each with mean zero, shifted by the \code{location} parameter. It can be shown that the density of a product-normal variate is symmetric and infinite at \code{location}, so this prior resembles a \dQuote{spike-and-slab} prior for sufficiently large values of the \code{scale} parameter. For better or for worse, this prior may be appropriate when it is strongly believed (by someone) that a regression coefficient \dQuote{is} equal to the \code{location}, parameter even though no true Bayesian would specify such a prior. Each element of \code{df} must be an integer of at least \eqn{2} because these \dQuote{degrees of freedom} are interpreted as the number of normal variates being multiplied and then shifted by \code{location} to yield the regression coefficient. Higher degrees of freedom produce a sharper spike at \code{location}. Each element of \code{scale} must be a non-negative real number that is interpreted as the standard deviation of the normal variates being multiplied and then shifted by \code{location} to yield the regression coefficient. In other words, the elements of \code{scale} may differ, but the k-th standard deviation is presumed to hold for all the normal deviates that are multiplied together and shifted by the k-th element of \code{location} to yield the k-th regression coefficient. The elements of \code{scale} are not the prior standard deviations of the regression coefficients. The prior variance of the regression coefficients is equal to the scale raised to the power of \eqn{2} times the corresponding element of \code{df}. Thus, larger values of \code{scale} put more prior volume on values of the regression coefficient that are far from zero. } \subsection{Dirichlet family}{ Family members: \itemize{ \item \code{dirichlet(concentration)} } The Dirichlet distribution is a multivariate generalization of the beta distribution. It is perhaps the easiest prior distribution to specify because the concentration parameters can be interpreted as prior counts (although they need not be integers) of a multinomial random variable. The Dirichlet distribution is used in \code{\link{stan_polr}} for an implicit prior on the cutpoints in an ordinal regression model. More specifically, the Dirichlet prior pertains to the prior probability of observing each category of the ordinal outcome when the predictors are at their sample means. Given these prior probabilities, it is straightforward to add them to form cumulative probabilities and then use an inverse CDF transformation of the cumulative probabilities to define the cutpoints. If a scalar is passed to the \code{concentration} argument of the \code{dirichlet} function, then it is replicated to the appropriate length and the Dirichlet distribution is symmetric. If \code{concentration} is a vector and all elements are \eqn{1}, then the Dirichlet distribution is jointly uniform. If all concentration parameters are equal but greater than \eqn{1} then the prior mode is that the categories are equiprobable, and the larger the value of the identical concentration parameters, the more sharply peaked the distribution is at the mode. The elements in \code{concentration} can also be given different values to represent that not all outcome categories are a priori equiprobable. } \subsection{Covariance matrices}{ Family members: \itemize{ \item \code{decov(regularization, concentration, shape, scale)} \item \code{lkj(regularization, scale, df)} } (Also see vignette for \code{stan_glmer}, \href{https://mc-stan.org/rstanarm/articles/glmer.html}{\emph{Estimating Generalized (Non-)Linear Models with Group-Specific Terms with rstanarm}}) Covariance matrices are decomposed into correlation matrices and variances. The variances are in turn decomposed into the product of a simplex vector and the trace of the matrix. Finally, the trace is the product of the order of the matrix and the square of a scale parameter. This prior on a covariance matrix is represented by the \code{decov} function. The prior for a correlation matrix is called LKJ whose density is proportional to the determinant of the correlation matrix raised to the power of a positive regularization parameter minus one. If \code{regularization = 1} (the default), then this prior is jointly uniform over all correlation matrices of that size. If \code{regularization > 1}, then the identity matrix is the mode and in the unlikely case that \code{regularization < 1}, the identity matrix is the trough. The trace of a covariance matrix is equal to the sum of the variances. We set the trace equal to the product of the order of the covariance matrix and the \emph{square} of a positive scale parameter. The particular variances are set equal to the product of a simplex vector --- which is non-negative and sums to \eqn{1} --- and the scalar trace. In other words, each element of the simplex vector represents the proportion of the trace attributable to the corresponding variable. A symmetric Dirichlet prior is used for the simplex vector, which has a single (positive) \code{concentration} parameter, which defaults to \eqn{1} and implies that the prior is jointly uniform over the space of simplex vectors of that size. If \code{concentration > 1}, then the prior mode corresponds to all variables having the same (proportion of total) variance, which can be used to ensure the the posterior variances are not zero. As the \code{concentration} parameter approaches infinity, this mode becomes more pronounced. In the unlikely case that \code{concentration < 1}, the variances are more polarized. If all the variables were multiplied by a number, the trace of their covariance matrix would increase by that number squared. Thus, it is reasonable to use a scale-invariant prior distribution for the positive scale parameter, and in this case we utilize a Gamma distribution, whose \code{shape} and \code{scale} are both \eqn{1} by default, implying a unit-exponential distribution. Set the \code{shape} hyperparameter to some value greater than \eqn{1} to ensure that the posterior trace is not zero. If \code{regularization}, \code{concentration}, \code{shape} and / or \code{scale} are positive scalars, then they are recycled to the appropriate length. Otherwise, each can be a positive vector of the appropriate length, but the appropriate length depends on the number of covariance matrices in the model and their sizes. A one-by-one covariance matrix is just a variance and thus does not have \code{regularization} or \code{concentration} parameters, but does have \code{shape} and \code{scale} parameters for the prior standard deviation of that variable. Note that for \code{\link{stan_mvmer}} and \code{\link{stan_jm}} models an additional prior distribution is provided through the \code{lkj} function. This prior is in fact currently used as the default for those modelling functions (although \code{decov} is still available as an option if the user wishes to specify it through the \code{prior_covariance} argument). The \code{lkj} prior uses the same decomposition of the covariance matrices into correlation matrices and variances, however, the variances are not further decomposed into a simplex vector and the trace; instead the standard deviations (square root of the variances) for each of the group specific parameters are given a half Student t distribution with the scale and df parameters specified through the \code{scale} and \code{df} arguments to the \code{lkj} function. The scale parameter default is 10 which is then autoscaled, whilst the df parameter default is 1 (therefore equivalent to a half Cauchy prior distribution for the standard deviation of each group specific parameter). This prior generally leads to similar results as the \code{decov} prior, but it is also likely to be **less** diffuse compared with the \code{decov} prior; therefore it sometimes seems to lead to faster estimation times, hence why it has been chosen as the default prior for \code{\link{stan_mvmer}} and \code{\link{stan_jm}} where estimation times can be long. } \subsection{R2 family}{ Family members: \itemize{ \item \code{R2(location, what)} } The \code{\link{stan_lm}}, \code{\link{stan_aov}}, and \code{\link{stan_polr}} functions allow the user to utilize a function called \code{R2} to convey prior information about all the parameters. This prior hinges on prior beliefs about the location of \eqn{R^2}, the proportion of variance in the outcome attributable to the predictors, which has a \code{\link[stats]{Beta}} prior with first shape hyperparameter equal to half the number of predictors and second shape hyperparameter free. By specifying \code{what} to be the prior mode (the default), mean, median, or expected log of \eqn{R^2}, the second shape parameter for this Beta distribution is determined internally. If \code{what = 'log'}, location should be a negative scalar; otherwise it should be a scalar on the \eqn{(0,1)} interval. For example, if \eqn{R^2 = 0.5}, then the mode, mean, and median of the \code{\link[stats]{Beta}} distribution are all the same and thus the second shape parameter is also equal to half the number of predictors. The second shape parameter of the \code{\link[stats]{Beta}} distribution is actually the same as the shape parameter in the LKJ prior for a correlation matrix described in the previous subsection. Thus, the smaller is \eqn{R^2}, the larger is the shape parameter, the smaller are the prior correlations among the outcome and predictor variables, and the more concentrated near zero is the prior density for the regression coefficients. Hence, the prior on the coefficients is regularizing and should yield a posterior distribution with good out-of-sample predictions \emph{if} the prior location of \eqn{R^2} is specified in a reasonable fashion. } } \examples{ if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { fmla <- mpg ~ wt + qsec + drat + am # Draw from prior predictive distribution (by setting prior_PD = TRUE) prior_pred_fit <- stan_glm(fmla, data = mtcars, prior_PD = TRUE, chains = 1, seed = 12345, iter = 250, # for speed only prior = student_t(df = 4, 0, 2.5), prior_intercept = cauchy(0,10), prior_aux = exponential(1/2)) plot(prior_pred_fit, "hist") \donttest{ # Can assign priors to names N05 <- normal(0, 5) fit <- stan_glm(fmla, data = mtcars, prior = N05, prior_intercept = N05) } # Visually compare normal, student_t, cauchy, laplace, and product_normal compare_priors <- function(scale = 1, df_t = 2, xlim = c(-10, 10)) { dt_loc_scale <- function(x, df, location, scale) { 1/scale * dt((x - location)/scale, df) } dlaplace <- function(x, location, scale) { 0.5 / scale * exp(-abs(x - location) / scale) } dproduct_normal <- function(x, scale) { besselK(abs(x) / scale ^ 2, nu = 0) / (scale ^ 2 * pi) } stat_dist <- function(dist, ...) { ggplot2::stat_function(ggplot2::aes_(color = dist), ...) } ggplot2::ggplot(data.frame(x = xlim), ggplot2::aes(x)) + stat_dist("normal", size = .75, fun = dnorm, args = list(mean = 0, sd = scale)) + stat_dist("student_t", size = .75, fun = dt_loc_scale, args = list(df = df_t, location = 0, scale = scale)) + stat_dist("cauchy", size = .75, linetype = 2, fun = dcauchy, args = list(location = 0, scale = scale)) + stat_dist("laplace", size = .75, linetype = 2, fun = dlaplace, args = list(location = 0, scale = scale)) + stat_dist("product_normal", size = .75, linetype = 2, fun = dproduct_normal, args = list(scale = 1)) } # Cauchy has fattest tails, followed by student_t, laplace, and normal compare_priors() # The student_t with df = 1 is the same as the cauchy compare_priors(df_t = 1) # Even a scale of 5 is somewhat large. It gives plausibility to rather # extreme values compare_priors(scale = 5, xlim = c(-20,20)) # If you use a prior like normal(0, 1000) to be "non-informative" you are # actually saying that a coefficient value of e.g. -500 is quite plausible compare_priors(scale = 1000, xlim = c(-1000,1000)) } } \references{ Gelman, A., Carlin, J. B., Stern, H. S., Dunson, D. B., Vehtari, A., and Rubin, D. B. (2013). \emph{Bayesian Data Analysis.} Chapman & Hall/CRC Press, London, third edition. \url{https://stat.columbia.edu/~gelman/book/} Gelman, A., Jakulin, A., Pittau, M. G., and Su, Y. (2008). A weakly informative default prior distribution for logistic and other regression models. \emph{Annals of Applied Statistics}. 2(4), 1360--1383. Piironen, J., and Vehtari, A. (2017). Sparsity information and regularization in the horseshoe and other shrinkage priors. \url{https://arxiv.org/abs/1707.01694} Stan Development Team. \emph{Stan Modeling Language Users Guide and Reference Manual.} \url{https://mc-stan.org/users/documentation/}. } \seealso{ The various vignettes for the \pkg{rstanarm} package also discuss and demonstrate the use of some of the supported prior distributions. } rstanarm/man/plot.predict.stanjm.Rd0000644000176200001440000001212614551552004017050 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/posterior_traj.R \name{plot.predict.stanjm} \alias{plot.predict.stanjm} \title{Plot the estimated subject-specific or marginal longitudinal trajectory} \usage{ \method{plot}{predict.stanjm}( x, ids = NULL, limits = c("ci", "pi", "none"), xlab = NULL, ylab = NULL, vline = FALSE, plot_observed = FALSE, facet_scales = "free_x", ci_geom_args = NULL, grp_overlay = FALSE, ... ) } \arguments{ \item{x}{A data frame and object of class \code{predict.stanjm} returned by a call to the function \code{\link{posterior_traj}}. The object contains point estimates and uncertainty interval limits for the fitted values of the longitudinal response.} \item{ids}{An optional vector providing a subset of subject IDs for whom the predicted curves should be plotted.} \item{limits}{A quoted character string specifying the type of limits to include in the plot. Can be one of: \code{"ci"} for the Bayesian posterior uncertainty interval for the estimated mean longitudinal response (often known as a credible interval); \code{"pi"} for the prediction interval for the estimated (raw) longitudinal response; or \code{"none"} for no interval limits.} \item{xlab, ylab}{An optional axis label passed to \code{\link[ggplot2]{labs}}.} \item{vline}{A logical. If \code{TRUE} then a vertical dashed line is added to the plot indicating the event or censoring time for the individual. Can only be used if each plot within the figure is for a single individual.} \item{plot_observed}{A logical. If \code{TRUE} then the observed longitudinal measurements are overlaid on the plot.} \item{facet_scales}{A character string passed to the \code{scales} argument of \code{\link[ggplot2]{facet_wrap}} when plotting the longitudinal trajectory for more than one individual.} \item{ci_geom_args}{Optional arguments passed to \code{\link[ggplot2]{geom_ribbon}} and used to control features of the plotted interval limits. They should be supplied as a named list.} \item{grp_overlay}{Only relevant if the model had lower level units clustered within an individual. If \code{TRUE}, then the fitted trajectories for the lower level units will be overlaid in the same plot region (that is, all lower level units for a single individual will be shown within a single facet). If \code{FALSE}, then the fitted trajectories for each lower level unit will be shown in a separate facet.} \item{...}{Optional arguments passed to \code{\link[ggplot2]{geom_smooth}} and used to control features of the plotted longitudinal trajectory.} } \value{ A \code{ggplot} object, also of class \code{plot.predict.stanjm}. This object can be further customised using the \pkg{ggplot2} package. It can also be passed to the function \code{\link{plot_stack_jm}}. } \description{ This generic \code{plot} method for \code{predict.stanjm} objects will plot the estimated subject-specific or marginal longitudinal trajectory using the data frame returned by a call to \code{\link{posterior_traj}}. To ensure that enough data points are available to plot the longitudinal trajectory, it is assumed that the call to \code{\link{posterior_traj}} would have used the default \code{interpolate = TRUE}, and perhaps also \code{extrapolate = TRUE} (the latter being optional, depending on whether or not the user wants to see extrapolation of the longitudinal trajectory beyond the last observation time). } \examples{ if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { \donttest{ # Run example model if not already loaded if (!exists("example_jm")) example(example_jm) # For a subset of individuals in the estimation dataset we will # obtain subject-specific predictions for the longitudinal submodel # at evenly spaced times between 0 and their event or censoring time. pt1 <- posterior_traj(example_jm, ids = c(7,13,15), interpolate = TRUE) plot(pt1) # credible interval for mean response plot(pt1, limits = "pi") # prediction interval for raw response plot(pt1, limits = "none") # no uncertainty interval # We can also extrapolate the longitudinal trajectories. pt2 <- posterior_traj(example_jm, ids = c(7,13,15), interpolate = TRUE, extrapolate = TRUE) plot(pt2) plot(pt2, vline = TRUE) # add line indicating event or censoring time plot(pt2, vline = TRUE, plot_observed = TRUE) # overlay observed longitudinal data # We can change or add attributes to the plot plot1 <- plot(pt2, ids = c(7,13,15), xlab = "Follow up time", vline = TRUE, plot_observed = TRUE, facet_scales = "fixed", color = "blue", linetype = 2, ci_geom_args = list(fill = "red")) plot1 # Since the returned plot is also a ggplot object, we can # modify some of its attributes after it has been returned plot1 + ggplot2::theme(strip.background = ggplot2::element_blank()) + ggplot2::labs(title = "Some plotted longitudinal trajectories") } } } \seealso{ \code{\link{posterior_traj}}, \code{\link{plot_stack_jm}}, \code{\link{posterior_survfit}}, \code{\link{plot.survfit.stanjm}} } rstanarm/man/posterior_traj.Rd0000644000176200001440000003772614551552004016231 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/posterior_traj.R \name{posterior_traj} \alias{posterior_traj} \title{Estimate the subject-specific or marginal longitudinal trajectory} \usage{ posterior_traj( object, m = 1, newdata = NULL, newdataLong = NULL, newdataEvent = NULL, interpolate = TRUE, extrapolate = FALSE, control = list(), last_time = NULL, prob = 0.95, ids, dynamic = TRUE, scale = 1.5, draws = NULL, seed = NULL, return_matrix = FALSE, ... ) } \arguments{ \item{object}{A fitted model object returned by the \code{\link{stan_jm}} modelling function. See \code{\link{stanreg-objects}}.} \item{m}{Integer specifying the number or name of the submodel} \item{newdata}{\strong{Deprecated}: please use \code{newdataLong} instead. Optionally, a data frame in which to look for variables with which to predict. If omitted, the model matrix is used. If \code{newdata} is provided and any variables were transformed (e.g. rescaled) in the data used to fit the model, then these variables must also be transformed in \code{newdata}. This only applies if variables were transformed before passing the data to one of the modeling functions and \emph{not} if transformations were specified inside the model formula.} \item{newdataLong, newdataEvent}{Optionally, a data frame (or in the case of \code{newdataLong} this can be a list of data frames) in which to look for variables with which to predict. If omitted, the model matrices are used. If new data is provided, then two options are available. Either one can provide observed covariate and outcome data, collected up to some time \emph{t}, and use this data to draw new individual-specific coefficients (i.e. individual-level random effects). This is the default behaviour when new data is provided, determined by the argument \code{dynamic = TRUE}, and requiring both \code{newdataLong} and \code{newdataEvent} to be specified. Alternatively, one can specify \code{dynamic = FALSE}, and then predict using just covariate data, by marginalising over the distribution of the group-specific coefficients; in this case, only \code{newdataLong} needs to be specified and it only needs to be a single data frame with the covariate data for the predictions for the one longitudinal submodel.} \item{interpolate}{A logical specifying whether to interpolate the estimated longitudinal trajectory in between the observation times. This can be used to achieve a smooth estimate of the longitudinal trajectory across the entire follow up time. If \code{TRUE} then the interpolation can be further controlled using the \code{control} argument.} \item{extrapolate}{A logical specifying whether to extrapolate the estimated longitudinal trajectory beyond the time of the last known observation time. If \code{TRUE} then the extrapolation can be further controlled using the \code{control} argument.} \item{control}{A named list with parameters controlling the interpolation or extrapolation of the estimated longitudinal trajectory when either \code{interpolate = TRUE} or \code{extrapolate = TRUE}. The list can contain one or more of the following named elements: \cr \describe{ \item{\code{ipoints}}{a positive integer specifying the number of discrete time points at which to calculate the estimated longitudinal response for \code{interpolate = TRUE}. These time points are evenly spaced starting at 0 and ending at the last known observation time for each individual. The last observation time for each individual is taken to be either: the event or censoring time if no new data is provided; the time specified in the "last_time" column if provided in the new data (see \strong{Details} section below); or the time of the last longitudinal measurement if new data is provided but no "last_time" column is included. The default is 15.} \item{\code{epoints}}{a positive integer specifying the number of discrete time points at which to calculate the estimated longitudinal response for \code{extrapolate = TRUE}. These time points are evenly spaced between the last known observation time for each individual and the extrapolation distance specifed using either \code{edist} or \code{eprop}. The default is 15.} \item{\code{eprop}}{a positive scalar between 0 and 1 specifying the amount of time across which to extrapolate the longitudinal trajectory, represented as a proportion of the total observed follow up time for each individual. For example specifying \code{eprop = 0.2} means that for an individual for whom the latest of their measurement, event or censoring times was 10 years, their estimated longitudinal trajectory will be extrapolated out to 12 years (i.e. 10 + (0.2 * 10)). The default value is 0.2.} \item{\code{edist}}{a positive scalar specifying the amount of time across which to extrapolate the longitudinal trajectory for each individual, represented in units of the time variable \code{time_var} (from fitting the model). This cannot be specified if \code{eprop} is specified.} }} \item{last_time}{A scalar, character string, or \code{NULL}. This argument specifies the last known survival time for each individual when conditional predictions are being obtained. If \code{newdataEvent} is provided and conditional survival predictions are being obtained, then the \code{last_time} argument can be one of the following: (i) a scalar, this will use the same last time for each individual in \code{newdataEvent}; (ii) a character string, naming a column in \code{newdataEvent} in which to look for the last time for each individual; (iii) \code{NULL}, in which case the default is to use the time of the latest longitudinal observation in \code{newdataLong}. If \code{newdataEvent} is \code{NULL} then the \code{last_time} argument cannot be specified directly; instead it will be set equal to the event or censoring time for each individual in the dataset that was used to estimate the model. If standardised survival probabilities are requested (i.e. \code{standardise = TRUE}) then conditional survival probabilities are not allowed and therefore the \code{last_time} argument is ignored.} \item{prob}{A scalar between 0 and 1 specifying the width to use for the uncertainty interval (sometimes called credible interval) for the predicted mean response and the prediction interval for the predicted (raw) response. For example \code{prob = 0.95} (the default) means that the 2.5th and 97.5th percentiles will be provided. Only relevant when \code{return_matrix} is \code{FALSE}.} \item{ids}{An optional vector specifying a subset of subject IDs for whom the predictions should be obtained. The default is to predict for all individuals who were used in estimating the model or, if \code{newdata} is specified, then all individuals contained in \code{newdata}.} \item{dynamic}{A logical that is only relevant if new data is provided via the \code{newdata} argument. If \code{dynamic = TRUE}, then new group-specific parameters are drawn for the individuals in the new data, conditional on their longitudinal biomarker data contained in \code{newdata}. These group-specific parameters are then used to generate individual-specific survival probabilities for these individuals. These are often referred to as "dynamic predictions" in the joint modelling context, because the predictions can be updated each time additional longitudinal biomarker data is collected on the individual. On the other hand, if \code{dynamic = FALSE} then the survival probabilities will just be marginalised over the distribution of the group-specific coefficients; this will mean that the predictions will incorporate all uncertainty due to between-individual variation so there will likely be very wide credible intervals on the predicted survival probabilities.} \item{scale}{A scalar, specifying how much to multiply the asymptotic variance-covariance matrix for the random effects by, which is then used as the "width" (ie. variance-covariance matrix) of the multivariate Student-t proposal distribution in the Metropolis-Hastings algorithm. This is only relevant when \code{newdataEvent} is supplied and \code{dynamic = TRUE}, in which case new random effects are simulated for the individuals in the new data using the Metropolis-Hastings algorithm.} \item{draws}{An integer indicating the number of MCMC draws to return. The default is to set the number of draws equal to 200, or equal to the size of the posterior sample if that is less than 200.} \item{seed}{An optional \code{\link[=set.seed]{seed}} to use.} \item{return_matrix}{A logical. If \code{TRUE} then a \code{draws} by \code{nrow(newdata)} matrix is returned which contains all the actual simulations or draws from the posterior predictive distribution. Otherwise if \code{return_matrix} is set to \code{FALSE} (the default) then a data frame is returned, as described in the \strong{Value} section below.} \item{...}{Other arguments passed to \code{\link{posterior_predict}}, for example \code{draws}, \code{re.form}, \code{seed}, etc.} } \value{ When \code{return_matrix = FALSE}, a data frame of class \code{predict.stanjm}. The data frame includes a column for the median of the posterior predictions of the mean longitudinal response (\code{yfit}), a column for each of the lower and upper limits of the uncertainty interval corresponding to the posterior predictions of the mean longitudinal response (\code{ci_lb} and \code{ci_ub}), and a column for each of the lower and upper limits of the prediction interval corresponding to the posterior predictions of the (raw) longitudinal response. The data frame also includes columns for the subject ID variable, and each of the predictor variables. The returned object also includes a number of attributes. When \code{return_matrix = TRUE}, the returned object is the same as that described for \code{\link{posterior_predict}}. } \description{ This function allows us to generate an estimated longitudinal trajectory (either subject-specific, or by marginalising over the distribution of the group-specific parameters) based on draws from the posterior predictive distribution. } \details{ The \code{posterior_traj} function acts as a wrapper to the \code{\link{posterior_predict}} function, but allows predictions to be easily generated at time points that are interpolated and/or extrapolated between time zero (baseline) and the last known survival time for the individual, thereby providing predictions that correspond to a smooth estimate of the longitudinal trajectory (useful for the plotting via the associated \code{\link{plot.predict.stanjm}} method). In addition it returns a data frame by default, whereas the \code{\link{posterior_predict}} function returns a matrix; see the \strong{Value} section below for details. Also, \code{posterior_traj} allows predictions to only be generated for a subset of individuals, via the \code{ids} argument. } \examples{ if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { \donttest{ # Run example model if not already loaded if (!exists("example_jm")) example(example_jm) # Obtain subject-specific predictions for all individuals # in the estimation dataset pt1 <- posterior_traj(example_jm, interpolate = FALSE, extrapolate = FALSE) head(pt1) # Obtain subject-specific predictions only for a few selected individuals pt2 <- posterior_traj(example_jm, ids = c(1,3,8)) # If we wanted to obtain subject-specific predictions in order to plot the # longitudinal trajectories, then we might want to ensure a full trajectory # is obtained by interpolating and extrapolating time. We can then use the # generic plot function to plot the subject-specific predicted trajectories # for the first three individuals. Interpolation and extrapolation is # carried out by default. pt3 <- posterior_traj(example_jm) head(pt3) # predictions at additional time points compared with pt1 plot(pt3, ids = 1:3) # If we wanted to extrapolate further in time, but decrease the number of # discrete time points at which we obtain predictions for each individual, # then we could specify a named list in the 'control' argument pt4 <- posterior_traj(example_jm, control = list(ipoints = 10, epoints = 10, eprop = 0.5)) # If we have prediction data for a new individual, and we want to # estimate the longitudinal trajectory for that individual conditional # on this new data (perhaps extrapolating forward from our last # longitudinal measurement) then we can do that. It requires drawing # new individual-specific parameters, based on the full likelihood, # so we must supply new data for both the longitudinal and event # submodels. These are sometimes known as dynamic predictions. ndL <- pbcLong[pbcLong$id == 8, , drop = FALSE] ndE <- pbcSurv[pbcSurv$id == 8, , drop = FALSE] ndL$id <- "new_subject" # new id can't match one used in training data ndE$id <- "new_subject" pt5 <- posterior_traj(example_jm, newdataLong = ndL, newdataEvent = ndE) # By default it is assumed that the last known survival time for # the individual is the time of their last biomarker measurement, # but if we know they survived to some later time then we can # condition on that information using the last_time argument pt6 <- posterior_traj(example_jm, newdataLong = ndL, newdataEvent = ndE, last_time = "futimeYears") # Alternatively we may want to estimate the marginal longitudinal # trajectory for a given set of covariates. To do this, we can pass # the desired covariate values in a new data frame (however the only # covariate in our fitted model was the time variable, year). To make sure # that we marginalise over the random effects, we need to specify an ID value # which does not correspond to any of the individuals who were used in the # model estimation and specify the argument dynamic=FALSE. # The marginal prediction is obtained by generating subject-specific # predictions using a series of random draws from the random # effects distribution, and then integrating (ie, averaging) over these. # Our marginal prediction will therefore capture the between-individual # variation associated with the random effects. nd <- data.frame(id = rep("new1", 11), year = (0:10 / 2)) pt7 <- posterior_traj(example_jm, newdataLong = nd, dynamic = FALSE) head(pt7) # note the greater width of the uncertainty interval compared # with the subject-specific predictions in pt1, pt2, etc # Alternatively, we could have estimated the "marginal" trajectory by # ignoring the random effects (ie, assuming the random effects were set # to zero). This will generate a predicted longitudinal trajectory only # based on the fixed effect component of the model. In essence, for a # linear mixed effects model (ie, a model that uses an identity link # function), we should obtain a similar point estimate ("yfit") to the # estimates obtained in pt5 (since the mean of the estimated random effects # distribution will be approximately 0). However, it is important to note that # the uncertainty interval will be much more narrow, since it completely # ignores the between-individual variability captured by the random effects. # Further, if the model uses a non-identity link function, then the point # estimate ("yfit") obtained only using the fixed effect component of the # model will actually provide a biased estimate of the marginal prediction. # Nonetheless, to demonstrate how we can obtain the predictions only using # the fixed effect component of the model, we simply specify 're.form = NA'. # (We will use the same covariate values as used in the prediction for # example for pt5). pt8 <- posterior_traj(example_jm, newdataLong = nd, dynamic = FALSE, re.form = NA) head(pt8) # note the much narrower ci, compared with pt5 } } } \seealso{ \code{\link{plot.predict.stanjm}}, \code{\link{posterior_predict}}, \code{\link{posterior_survfit}} } rstanarm/man/rstanarm-datasets.Rd0000644000176200001440000001463714551552004016614 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/doc-datasets.R \name{rstanarm-datasets} \alias{rstanarm-datasets} \alias{kidiq} \alias{roaches} \alias{wells} \alias{bball1970} \alias{bball2006} \alias{mortality} \alias{tumors} \alias{radon} \alias{pbcLong} \alias{pbcSurv} \title{Datasets for rstanarm examples} \format{ \describe{ \item{\code{bball1970}}{ Data on hits and at-bats from the 1970 Major League Baseball season for 18 players. Source: Efron and Morris (1975). 18 obs. of 5 variables \itemize{ \item \code{Player} Player's last name \item \code{Hits} Number of hits in the first 45 at-bats of the season \item \code{AB} Number of at-bats (45 for all players) \item \code{RemainingAB} Number of remaining at-bats (different for most players) \item \code{RemainingHits} Number of remaining hits } } \item{\code{bball2006}}{ Hits and at-bats for the entire 2006 American League season of Major League Baseball. Source: Carpenter (2009) 302 obs. of 2 variables \itemize{ \item \code{y} Number of hits \item \code{K} Number of at-bats } } \item{\code{kidiq}}{ Data from a survey of adult American women and their children (a subsample from the National Longitudinal Survey of Youth). Source: Gelman and Hill (2007) 434 obs. of 4 variables \itemize{ \item \code{kid_score} Child's IQ score \item \code{mom_hs} Indicator for whether the mother has a high school degree \item \code{mom_iq} Mother's IQ score \item \code{mom_age} Mother's age } } \item{\code{mortality}}{ Surgical mortality rates in 12 hospitals performing cardiac surgery in babies. Source: Spiegelhalter et al. (1996). 12 obs. of 2 variables \itemize{ \item \code{y} Number of deaths \item \code{K} Number of surgeries } } \item{\code{pbcLong,pbcSurv}}{ Longitudinal biomarker and time-to-event survival data for 40 patients with primary biliary cirrhosis who participated in a randomised placebo controlled trial of D-penicillamine conducted at the Mayo Clinic between 1974 and 1984. Source: Therneau and Grambsch (2000) 304 obs. of 8 variables (\code{pbcLong}) and 40 obs. of 7 variables (\code{pbcSurv}) \itemize{ \item \code{age} in years \item \code{albumin} serum albumin (g/dl) \item \code{logBili} logarithm of serum bilirubin \item \code{death} indicator of death at endpoint \item \code{futimeYears} time (in years) between baseline and the earliest of death, transplantion or censoring \item \code{id} numeric ID unique to each individual \item \code{platelet} platelet count \item \code{sex} gender (m = male, f = female) \item \code{status} status at endpoint (0 = censored, 1 = transplant, 2 = dead) \item \code{trt} binary treatment code (0 = placebo, 1 = D-penicillamine) \item \code{year} time (in years) of the longitudinal measurements, taken as time since baseline) } } \item{\code{radon}}{ Data on radon levels in houses in the state of Minnesota. Source: Gelman and Hill (2007) 919 obs. of 4 variables \itemize{ \item \code{log_radon} Radon measurement from the house (log scale) \item \code{log_uranium} Uranium level in the county (log scale) \item \code{floor} Indicator for radon measurement made on the first floor of the house (0 = basement, 1 = first floor) \item \code{county} County name (\code{\link{factor}}) } } \item{\code{roaches}}{ Data on the efficacy of a pest management system at reducing the number of roaches in urban apartments. Source: Gelman and Hill (2007) 262 obs. of 6 variables \itemize{ \item \code{y} Number of roaches caught \item \code{roach1} Pretreatment number of roaches \item \code{treatment} Treatment indicator \item \code{senior} Indicator for only elderly residents in building \item \code{exposure2} Number of days for which the roach traps were used } } \item{\code{tumors}}{ Tarone (1982) provides a data set of tumor incidence in historical control groups of rats; specifically endometrial stromal polyps in female lab rats of type F344. Source: Gelman and Hill (2007) 71 obs. of 2 variables \itemize{ \item \code{y} Number of rats with tumors \item \code{K} Number of rats } } \item{\code{wells}}{ A survey of 3200 residents in a small area of Bangladesh suffering from arsenic contamination of groundwater. Respondents with elevated arsenic levels in their wells had been encouraged to switch their water source to a safe public or private well in the nearby area and the survey was conducted several years later to learn which of the affected residents had switched wells. Souce: Gelman and Hill (2007) 3020 obs. of 5 variables \itemize{ \item \code{switch} Indicator for well-switching \item \code{arsenic} Arsenic level in respondent's well \item \code{dist} Distance (meters) from the respondent's house to the nearest well with safe drinking water. \item \code{assoc} Indicator for member(s) of household participate in community organizations \item \code{educ} Years of education (head of household) } } } } \description{ Small datasets for use in \pkg{rstanarm} examples and vignettes. } \examples{ if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { # Using 'kidiq' dataset fit <- stan_lm(kid_score ~ mom_hs * mom_iq, data = kidiq, prior = R2(location = 0.30, what = "mean"), # the next line is only to make the example go fast enough chains = 1, iter = 500, seed = 12345) pp_check(fit, nreps = 20) \donttest{ bayesplot::color_scheme_set("brightblue") pp_check(fit, plotfun = "stat_grouped", stat = "median", group = factor(kidiq$mom_hs, labels = c("No HS", "HS"))) } } } \references{ Carpenter, B. (2009) Bayesian estimators for the beta-binomial model of batting ability. \url{https://web.archive.org/web/20220618114439/https://lingpipe-blog.com/2009/09/23/} Efron, B. and Morris, C. (1975) Data analysis using Stein's estimator and its generalizations. \emph{Journal of the American Statistical Association} \strong{70}(350), 311--319. Gelman, A. and Hill, J. (2007). \emph{Data Analysis Using Regression and Multilevel/Hierarchical Models.} Cambridge University Press, Cambridge, UK. \url{https://stat.columbia.edu/~gelman/arm/} Spiegelhalter, D., Thomas, A., Best, N., & Gilks, W. (1996) BUGS 0.5 Examples. MRC Biostatistics Unit, Institute of Public health, Cambridge, UK. Tarone, R. E. (1982) The use of historical control information in testing for a trend in proportions. \emph{Biometrics} \strong{38}(1):215--220. Therneau, T. and Grambsch, P. (2000) \emph{Modeling Survival Data: Extending the Cox Model}. Springer-Verlag, New York, US. } rstanarm/man/prior_summary.stanreg.Rd0000644000176200001440000001337114551552004017523 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/prior_summary.R \name{prior_summary.stanreg} \alias{prior_summary.stanreg} \alias{prior_summary} \title{Summarize the priors used for an rstanarm model} \usage{ \method{prior_summary}{stanreg}(object, digits = 2, ...) } \arguments{ \item{object}{A fitted model object returned by one of the \pkg{rstanarm} modeling functions. See \code{\link{stanreg-objects}}.} \item{digits}{Number of digits to use for rounding.} \item{...}{Currently ignored by the method for stanreg objects.} } \value{ A list of class "prior_summary.stanreg", which has its own print method. } \description{ The \code{prior_summary} method provides a summary of the prior distributions used for the parameters in a given model. In some cases the user-specified prior does not correspond exactly to the prior used internally by \pkg{rstanarm} (see the sections below). Especially in these cases, but also in general, it can be much more useful to visualize the priors. Visualizing the priors can be done using the \code{\link{posterior_vs_prior}} function, or alternatively by fitting the model with the \code{prior_PD} argument set to \code{TRUE} (to draw from the prior predictive distribution instead of conditioning on the outcome) and then plotting the parameters. } \section{Intercept (after predictors centered)}{ For \pkg{rstanarm} modeling functions that accept a \code{prior_intercept} argument, the specified prior for the intercept term applies to the intercept after \pkg{rstanarm} internally centers the predictors so they each have mean zero. The estimate of the intercept returned to the user correspond to the intercept with the predictors as specified by the user (unmodified by \pkg{rstanarm}), but when \emph{specifying} the prior the intercept can be thought of as the expected outcome when the predictors are set to their means. The only exception to this is for models fit with the \code{sparse} argument set to \code{TRUE} (which is only possible with a subset of the modeling functions and never the default). } \section{Adjusted scales}{ For some models you may see "\code{adjusted scale}" in the printed output and adjusted scales included in the object returned by \code{prior_summary}. These adjusted scale values are the prior scales actually used by \pkg{rstanarm} and are computed by adjusting the prior scales specified by the user to account for the scales of the predictors (as described in the documentation for the \code{\link[=priors]{autoscale}} argument). To disable internal prior scale adjustments set the \code{autoscale} argument to \code{FALSE} when setting a prior using one of the distributions that accepts an \code{autoscale} argument. For example, \code{normal(0, 5, autoscale=FALSE)} instead of just \code{normal(0, 5)}. } \section{Coefficients in Q-space}{ For the models fit with an \pkg{rstanarm} modeling function that supports the \code{QR} argument (see e.g, \code{\link{stan_glm}}), if \code{QR} is set to \code{TRUE} then the prior distributions for the regression coefficients specified using the \code{prior} argument are not relative to the original predictor variables \eqn{X} but rather to the variables in the matrix \eqn{Q} obtained from the \eqn{QR} decomposition of \eqn{X}. In particular, if \code{prior = normal(location,scale)}, then this prior on the coefficients in \eqn{Q}-space can be easily translated into a joint multivariate normal (MVN) prior on the coefficients on the original predictors in \eqn{X}. Letting \eqn{\theta} denote the coefficients on \eqn{Q} and \eqn{\beta} the coefficients on \eqn{X} then if \eqn{\theta \sim N(\mu, \sigma)}{\theta ~ N(\mu, \sigma)} the corresponding prior on \eqn{\beta} is \eqn{\beta \sim MVN(R\mu, R'R\sigma^2)}{\beta ~ MVN(R\mu, R'R\sigma)}, where \eqn{\mu} and \eqn{\sigma} are vectors of the appropriate length. Technically, \pkg{rstanarm} uses a scaled \eqn{QR} decomposition to ensure that the columns of the predictor matrix used to fit the model all have unit scale, when the \code{autoscale} argument to the function passed to the \code{prior} argument is \code{TRUE} (the default), in which case the matrices actually used are \eqn{Q^\ast = Q \sqrt{n-1}}{Q* = Q (n-1)^0.5} and \eqn{R^\ast = \frac{1}{\sqrt{n-1}} R}{R* = (n-1)^(-0.5) R}. If \code{autoscale = FALSE} we instead scale such that the lower-right element of \eqn{R^\ast}{R*} is \eqn{1}, which is useful if you want to specify a prior on the coefficient of the last predictor in its original units (see the documentation for the \code{\link[=stan_glm]{QR}} argument). If you are interested in the prior on \eqn{\beta} implied by the prior on \eqn{\theta}, we strongly recommend visualizing it as described above in the \strong{Description} section, which is simpler than working it out analytically. } \examples{ if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { if (!exists("example_model")) example(example_model) prior_summary(example_model) priors <- prior_summary(example_model) names(priors) priors$prior$scale priors$prior$adjusted_scale # for a glm with adjusted scales (see Details, above), compare # the default (rstanarm adjusting the scales) to setting # autoscale=FALSE for prior on coefficients fit <- stan_glm(mpg ~ wt + am, data = mtcars, prior = normal(0, c(2.5, 4)), prior_intercept = normal(0, 5), iter = 10, chains = 1) # only for demonstration prior_summary(fit) fit2 <- update(fit, prior = normal(0, c(2.5, 4), autoscale=FALSE), prior_intercept = normal(0, 5, autoscale=FALSE)) prior_summary(fit2) } } \seealso{ The \link[=priors]{priors help page} and the \emph{Prior Distributions} vignette. } rstanarm/man/adapt_delta.Rd0000644000176200001440000000365714551552004015421 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/doc-adapt_delta.R \name{adapt_delta} \alias{adapt_delta} \title{\code{adapt_delta}: Target average acceptance probability} \description{ Details about the \code{adapt_delta} argument to \pkg{rstanarm}'s modeling functions. } \details{ For the No-U-Turn Sampler (NUTS), the variant of Hamiltonian Monte Carlo used used by \pkg{rstanarm}, \code{adapt_delta} is the target average proposal acceptance probability during Stan's adaptation period. \code{adapt_delta} is ignored by \pkg{rstanarm} if the \code{algorithm} argument is not set to \code{"sampling"}. The default value of \code{adapt_delta} is 0.95, except when the prior for the regression coefficients is \code{\link{R2}}, \code{\link{hs}}, or \code{\link{hs_plus}}, in which case the default is 0.99. These defaults are higher (more conservative) than the default of \code{adapt_delta=0.8} used in the \pkg{rstan} package, which may result in slower sampling speeds but will be more robust to posterior distributions with high curvature. In general you should not need to change \code{adapt_delta} unless you see a warning message about divergent transitions, in which case you can increase \code{adapt_delta} from the default to a value \emph{closer} to 1 (e.g. from 0.95 to 0.99, or from 0.99 to 0.999, etc). The step size used by the numerical integrator is a function of \code{adapt_delta} in that increasing \code{adapt_delta} will result in a smaller step size and fewer divergences. Increasing \code{adapt_delta} will typically result in a slower sampler, but it will always lead to a more robust sampler. } \references{ Stan Development Team. \emph{Stan Modeling Language Users Guide and Reference Manual.} \url{https://mc-stan.org/users/documentation/}. Brief Guide to Stan's Warnings: \url{https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup} } rstanarm/man/stan_nlmer.Rd0000644000176200001440000002114114551552005015306 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/stan_nlmer.R \name{stan_nlmer} \alias{stan_nlmer} \title{Bayesian nonlinear models with group-specific terms via Stan} \usage{ stan_nlmer( formula, data = NULL, subset, weights, na.action, offset, contrasts = NULL, ..., prior = normal(autoscale = TRUE), prior_aux = exponential(autoscale = TRUE), prior_covariance = decov(), prior_PD = FALSE, algorithm = c("sampling", "meanfield", "fullrank"), adapt_delta = NULL, QR = FALSE, sparse = FALSE ) } \arguments{ \item{formula, data}{Same as for \code{\link[lme4]{nlmer}}. \emph{We strongly advise against omitting the \code{data} argument}. Unless \code{data} is specified (and is a data frame) many post-estimation functions (including \code{update}, \code{loo}, \code{kfold}) are not guaranteed to work properly.} \item{subset, weights, offset}{Same as \code{\link[stats]{glm}}.} \item{na.action, contrasts}{Same as \code{\link[stats]{glm}}, but rarely specified.} \item{...}{Further arguments passed to the function in the \pkg{rstan} package (\code{\link[rstan:stanmodel-method-sampling]{sampling}}, \code{\link[rstan:stanmodel-method-vb]{vb}}, or \code{\link[rstan:stanmodel-method-optimizing]{optimizing}}), corresponding to the estimation method named by \code{algorithm}. For example, if \code{algorithm} is \code{"sampling"} it is possible to specify \code{iter}, \code{chains}, \code{cores}, and other MCMC controls. Another useful argument that can be passed to \pkg{rstan} via \code{...} is \code{refresh}, which specifies how often to print updates when sampling (i.e., show the progress every \code{refresh} iterations). \code{refresh=0} turns off the iteration updates.} \item{prior}{The prior distribution for the (non-hierarchical) regression coefficients. The default priors are described in the vignette \href{https://mc-stan.org/rstanarm/articles/priors.html}{\emph{Prior Distributions for rstanarm Models}}. If not using the default, \code{prior} should be a call to one of the various functions provided by \pkg{rstanarm} for specifying priors. The subset of these functions that can be used for the prior on the coefficients can be grouped into several "families": \tabular{ll}{ \strong{Family} \tab \strong{Functions} \cr \emph{Student t family} \tab \code{normal}, \code{student_t}, \code{cauchy} \cr \emph{Hierarchical shrinkage family} \tab \code{hs}, \code{hs_plus} \cr \emph{Laplace family} \tab \code{laplace}, \code{lasso} \cr \emph{Product normal family} \tab \code{product_normal} \cr } See the \link[=priors]{priors help page} for details on the families and how to specify the arguments for all of the functions in the table above. To omit a prior ---i.e., to use a flat (improper) uniform prior--- \code{prior} can be set to \code{NULL}, although this is rarely a good idea. \strong{Note:} Unless \code{QR=TRUE}, if \code{prior} is from the Student t family or Laplace family, and if the \code{autoscale} argument to the function used to specify the prior (e.g. \code{\link{normal}}) is left at its default and recommended value of \code{TRUE}, then the default or user-specified prior scale(s) may be adjusted internally based on the scales of the predictors. See the \link[=priors]{priors help page} and the \emph{Prior Distributions} vignette for details on the rescaling and the \code{\link{prior_summary}} function for a summary of the priors used for a particular model.} \item{prior_aux}{The prior distribution for the "auxiliary" parameter (if applicable). The "auxiliary" parameter refers to a different parameter depending on the \code{family}. For Gaussian models \code{prior_aux} controls \code{"sigma"}, the error standard deviation. For negative binomial models \code{prior_aux} controls \code{"reciprocal_dispersion"}, which is similar to the \code{"size"} parameter of \code{\link[stats:NegBinomial]{rnbinom}}: smaller values of \code{"reciprocal_dispersion"} correspond to greater dispersion. For gamma models \code{prior_aux} sets the prior on to the \code{"shape"} parameter (see e.g., \code{\link[stats:GammaDist]{rgamma}}), and for inverse-Gaussian models it is the so-called \code{"lambda"} parameter (which is essentially the reciprocal of a scale parameter). Binomial and Poisson models do not have auxiliary parameters. The default prior is described in the vignette \href{https://mc-stan.org/rstanarm/articles/priors.html}{\emph{Prior Distributions for rstanarm Models}}. If not using the default, \code{prior_aux} can be a call to \code{exponential} to use an exponential distribution, or \code{normal}, \code{student_t} or \code{cauchy}, which results in a half-normal, half-t, or half-Cauchy prior. See \code{\link{priors}} for details on these functions. To omit a prior ---i.e., to use a flat (improper) uniform prior--- set \code{prior_aux} to \code{NULL}.} \item{prior_covariance}{Cannot be \code{NULL}; see \code{\link{decov}} for more information about the default arguments.} \item{prior_PD}{A logical scalar (defaulting to \code{FALSE}) indicating whether to draw from the prior predictive distribution instead of conditioning on the outcome.} \item{algorithm}{A string (possibly abbreviated) indicating the estimation approach to use. Can be \code{"sampling"} for MCMC (the default), \code{"optimizing"} for optimization, \code{"meanfield"} for variational inference with independent normal distributions, or \code{"fullrank"} for variational inference with a multivariate normal distribution. See \code{\link{rstanarm-package}} for more details on the estimation algorithms. NOTE: not all fitting functions support all four algorithms.} \item{adapt_delta}{Only relevant if \code{algorithm="sampling"}. See the \link{adapt_delta} help page for details.} \item{QR}{A logical scalar defaulting to \code{FALSE}, but if \code{TRUE} applies a scaled \code{\link{qr}} decomposition to the design matrix. The transformation does not change the likelihood of the data but is recommended for computational reasons when there are multiple predictors. See the \link{QR-argument} documentation page for details on how \pkg{rstanarm} does the transformation and important information about how to interpret the prior distributions of the model parameters when using \code{QR=TRUE}.} \item{sparse}{A logical scalar (defaulting to \code{FALSE}) indicating whether to use a sparse representation of the design (X) matrix. If \code{TRUE}, the the design matrix is not centered (since that would destroy the sparsity) and likewise it is not possible to specify both \code{QR = TRUE} and \code{sparse = TRUE}. Depending on how many zeros there are in the design matrix, setting \code{sparse = TRUE} may make the code run faster and can consume much less RAM.} } \value{ A \link[=stanreg-objects]{stanreg} object is returned for \code{stan_nlmer}. } \description{ \if{html}{\figure{stanlogo.png}{options: width="25" alt="https://mc-stan.org/about/logo/"}} Bayesian inference for NLMMs with group-specific coefficients that have unknown covariance matrices with flexible priors. } \details{ The \code{stan_nlmer} function is similar in syntax to \code{\link[lme4]{nlmer}} but rather than performing (approximate) maximum marginal likelihood estimation, Bayesian estimation is by default performed via MCMC. The Bayesian model adds independent priors on the "coefficients" --- which are really intercepts --- in the same way as \code{\link{stan_nlmer}} and priors on the terms of a decomposition of the covariance matrices of the group-specific parameters. See \code{\link{priors}} for more information about the priors. The supported transformation functions are limited to the named "self-starting" functions in the \pkg{stats} library: \code{\link[stats]{SSasymp}}, \code{\link[stats]{SSasympOff}}, \code{\link[stats]{SSasympOrig}}, \code{\link[stats]{SSbiexp}}, \code{\link[stats]{SSfol}}, \code{\link[stats]{SSfpl}}, \code{\link[stats]{SSgompertz}}, \code{\link[stats]{SSlogis}}, \code{\link[stats]{SSmicmen}}, and \code{\link[stats]{SSweibull}}. } \examples{ if (.Platform$OS.type != "windows" || .Platform$r_arch !="i386") { \donttest{ data("Orange", package = "datasets") Orange$circumference <- Orange$circumference / 100 Orange$age <- Orange$age / 100 fit <- stan_nlmer( circumference ~ SSlogis(age, Asym, xmid, scal) ~ Asym|Tree, data = Orange, # for speed only chains = 1, iter = 1000 ) print(fit) posterior_interval(fit) plot(fit, regex_pars = "b\\\\[") } } } \seealso{ \code{\link{stanreg-methods}} and \code{\link[lme4]{nlmer}}. The vignette for \code{stan_glmer}, which also discusses \code{stan_nlmer} models. \url{https://mc-stan.org/rstanarm/articles/} } rstanarm/man/terms.stanmvreg.Rd0000644000176200001440000000076014551552005016307 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/stanmvreg-methods.R \name{terms.stanmvreg} \alias{terms.stanmvreg} \title{terms method for stanmvreg objects} \usage{ \method{terms}{stanmvreg}(x, fixed.only = TRUE, random.only = FALSE, m = NULL, ...) } \arguments{ \item{x, fixed.only, random.only, ...}{See lme4:::terms.merMod.} \item{m}{Integer specifying the number or name of the submodel} } \description{ terms method for stanmvreg objects } \keyword{internal} rstanarm/man/rstanarm-deprecated.Rd0000644000176200001440000000304214551552004017070 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/doc-rstanarm-deprecated.R \name{rstanarm-deprecated} \alias{rstanarm-deprecated} \alias{prior_options} \title{Deprecated functions} \usage{ prior_options( prior_scale_for_dispersion = 5, min_prior_scale = 1e-12, scaled = TRUE ) } \arguments{ \item{prior_scale_for_dispersion, min_prior_scale, scaled}{Arguments to deprecated \code{prior_options} function. The functionality provided by the now deprecated \code{prior_options} function has been replaced as follows: \describe{ \item{\code{prior_scale_for_dispersion}}{ Instead of using the \code{prior_scale_for_dispersion} argument to \code{prior_options}, priors for these parameters can now be specified directly when calling \code{\link{stan_glm}} (or \code{\link{stan_glmer}}, etc.) using the new \code{prior_aux} argument. } \item{\code{scaled}}{ Instead of setting \code{prior_options(scaled=FALSE)}, internal rescaling is now toggled using the new \code{autoscale} arguments to \code{\link{normal}}, \code{\link{student_t}}, and \code{\link{cauchy}} (the other prior distributions do not support 'autoscale'). } \item{\code{min_prior_scale}}{ No replacement. \code{min_prior_scale} (the minimum possible scale parameter value that be used for priors) is now fixed to \code{1e-12}. } }} } \description{ These functions are deprecated and will be removed in a future release. The \strong{Arguments} section below provides details on how the functionality obtained via each of the arguments has been replaced. } rstanarm/man/stanmvreg-methods.Rd0000644000176200001440000001431514551552005016620 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/stanmvreg-methods.R \name{stanmvreg-methods} \alias{stanmvreg-methods} \alias{coef.stanmvreg} \alias{fitted.stanmvreg} \alias{residuals.stanmvreg} \alias{se.stanmvreg} \alias{formula.stanmvreg} \alias{update.stanmvreg} \alias{update.stanjm} \alias{fixef.stanmvreg} \alias{ngrps.stanmvreg} \alias{ranef.stanmvreg} \alias{sigma.stanmvreg} \title{Methods for stanmvreg objects} \usage{ \method{coef}{stanmvreg}(object, m = NULL, ...) \method{fitted}{stanmvreg}(object, m = NULL, ...) \method{residuals}{stanmvreg}(object, m = NULL, ...) \method{se}{stanmvreg}(object, m = NULL, ...) \method{formula}{stanmvreg}(x, fixed.only = FALSE, random.only = FALSE, m = NULL, ...) \method{update}{stanmvreg}(object, formula., ..., evaluate = TRUE) \method{update}{stanjm}(object, formulaLong., formulaEvent., ..., evaluate = TRUE) \method{fixef}{stanmvreg}(object, m = NULL, remove_stub = TRUE, ...) \method{ngrps}{stanmvreg}(object, ...) \method{ranef}{stanmvreg}(object, m = NULL, ...) \method{sigma}{stanmvreg}(object, m = NULL, ...) } \arguments{ \item{object, x}{A fitted model object returned by one of the multivariate \pkg{rstanarm} modelling functions. See \code{\link{stanreg-objects}}.} \item{m}{Integer specifying the number or name of the submodel} \item{...}{Ignored, except by the \code{update} method. See \code{\link{update}}.} \item{fixed.only}{A logical specifying whether to only retain the fixed effect part of the longitudinal submodel formulas} \item{random.only}{A logical specifying whether to only retain the random effect part of the longitudinal submodel formulas} \item{formula.}{An updated formula for the model. For a multivariate model \code{formula.} should be a list of formulas, as described for the \code{formula} argument in \code{\link{stan_mvmer}}.} \item{evaluate}{See \code{\link[stats]{update}}.} \item{formulaLong., formulaEvent.}{An updated formula for the longitudinal or event submodel, when \code{object} was estimated using \code{\link{stan_jm}}. For a multivariate joint model \code{formulaLong.} should be a list of formulas, as described for the \code{formulaLong} argument in \code{\link{stan_jm}}.} \item{remove_stub}{Logical specifying whether to remove the string identifying the submodel (e.g. \code{y1|}, \code{y2|}, \code{Long1|}, \code{Long2|}, \code{Event|}) from each of the parameter names.} } \description{ S3 methods for \link[=stanreg-objects]{stanmvreg} objects. There are also several methods (listed in \strong{See Also}, below) with their own individual help pages. The main difference between these methods and the \link[=stanreg-methods]{stanreg} methods is that the methods described here generally include an additional argument \code{m} which allows the user to specify which submodel they wish to return the result for. If the argument \code{m} is set to \code{NULL} then the result will generally be a named list with each element of the list containing the result for one of the submodels. } \details{ Most of these methods are similar to the methods defined for objects of class 'lm', 'glm', 'glmer', etc. However there are a few exceptions: \describe{ \item{\code{coef}}{ Medians are used for point estimates. See the \emph{Point estimates} section in \code{\link{print.stanmvreg}} for more details. \code{coef} returns a list equal to the length of the number of submodels. The first elements of the list are the coefficients from each of the fitted longitudinal submodels and are the same layout as those returned by \code{coef} method of the \pkg{lme4} package, that is, the sum of the random and fixed effects coefficients for each explanatory variable for each level of each grouping factor. The final element of the returned list is a vector of fixed effect coefficients from the event submodel. } \item{\code{se}}{ The \code{se} function returns standard errors based on \code{\link{mad}}. See the \emph{Uncertainty estimates} section in \code{\link{print.stanmvreg}} for more details. } \item{\code{confint}}{ Not supplied, since the \code{\link{posterior_interval}} function should be used instead to compute Bayesian uncertainty intervals. } \item{\code{residuals}}{ Residuals are \emph{always} of type \code{"response"} (not \code{"deviance"} residuals or any other type). } } } \seealso{ \itemize{ \item The \code{\link[=print.stanmvreg]{print}}, \code{\link[=summary.stanmvreg]{summary}}, and \code{\link{prior_summary}} methods for \code{stanmvreg} objects for information on the fitted model. \item The \code{\link[=plot.stanreg]{plot}} method to plot estimates and diagnostics. \item The \code{\link{pp_check}} method for graphical posterior predictive checking of the longitudinal or glmer submodels. \item The \code{\link{ps_check}} method for graphical posterior predictive checking of the event submodel. \item The \code{\link{posterior_traj}} for predictions for the longitudinal submodel (for models estimated using \code{\link{stan_jm}}), as well as it's associated \code{\link[=plot.predict.stanjm]{plot}} method. \item The \code{\link{posterior_survfit}} for predictions for the event submodel, including so-called "dynamic" predictions (for models estimated using \code{\link{stan_jm}}), as well as it's associated \code{\link[=plot.survfit.stanjm]{plot}} method. \item The \code{\link{posterior_predict}} for predictions for the glmer submodel (for models estimated using \code{\link{stan_mvmer}}). \item The \code{\link{posterior_interval}} for uncertainty intervals for model parameters. \item The \code{\link[=loo.stanreg]{loo}}, and \code{\link[=log_lik.stanmvreg]{log_lik}} methods for leave-one-out model comparison, and computing the log-likelihood of (possibly new) data. \item The \code{\link[=as.matrix.stanreg]{as.matrix}}, \code{as.data.frame}, and \code{as.array} methods to access posterior draws. } Other S3 methods for stanmvreg objects, which have separate documentation, including \code{\link{print.stanmvreg}}, and \code{\link{summary.stanmvreg}}. Also \code{\link{posterior_interval}} for an alternative to \code{confint}, and \code{posterior_predict}, \code{posterior_traj} and \code{posterior_survfit} for predictions based on the fitted joint model. } rstanarm/man/family.stanmvreg.Rd0000644000176200001440000000070614551552005016436 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/stanmvreg-methods.R \name{family.stanmvreg} \alias{family.stanmvreg} \title{family method for stanmvreg objects} \usage{ \method{family}{stanmvreg}(object, m = NULL, ...) } \arguments{ \item{object, ...}{See \code{\link[stats]{family}}.} \item{m}{Integer specifying the number or name of the submodel} } \description{ family method for stanmvreg objects } \keyword{internal} rstanarm/man/log_lik.stanreg.Rd0000644000176200001440000000601514551552004016230 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/log_lik.R \name{log_lik.stanreg} \alias{log_lik.stanreg} \alias{log_lik} \alias{log_lik.stanmvreg} \alias{log_lik.stanjm} \title{Pointwise log-likelihood matrix} \usage{ \method{log_lik}{stanreg}(object, newdata = NULL, offset = NULL, ...) \method{log_lik}{stanmvreg}(object, m = 1, newdata = NULL, ...) \method{log_lik}{stanjm}(object, newdataLong = NULL, newdataEvent = NULL, ...) } \arguments{ \item{object}{A fitted model object returned by one of the \pkg{rstanarm} modeling functions. See \code{\link{stanreg-objects}}.} \item{newdata}{An optional data frame of new data (e.g. holdout data) to use when evaluating the log-likelihood. See the description of \code{newdata} for \code{\link{posterior_predict}}.} \item{offset}{A vector of offsets. Only required if \code{newdata} is specified and an \code{offset} was specified when fitting the model.} \item{...}{Currently ignored.} \item{m}{Integer specifying the number or name of the submodel} \item{newdataLong, newdataEvent}{Optional data frames containing new data (e.g. holdout data) to use when evaluating the log-likelihood for a model estimated using \code{\link{stan_jm}}. If the fitted model was a multivariate joint model (i.e. more than one longitudinal outcome), then \code{newdataLong} is allowed to be a list of data frames. If supplying new data, then \code{newdataEvent} should also include variables corresponding to the event time and event indicator as these are required for evaluating the log likelihood for the event submodel. For more details, see the description of \code{newdataLong} and \code{newdataEvent} for \code{\link{posterior_survfit}}.} } \value{ For the \code{stanreg} and \code{stanmvreg} methods an \eqn{S} by \eqn{N} matrix, where \eqn{S} is the size of the posterior sample and \eqn{N} is the number of data points. For the \code{stanjm} method an \eqn{S} by \eqn{Npat} matrix where \eqn{Npat} is the number of individuals. } \description{ For models fit using MCMC only, the \code{log_lik} method returns the \eqn{S} by \eqn{N} pointwise log-likelihood matrix, where \eqn{S} is the size of the posterior sample and \eqn{N} is the number of data points, or in the case of the \code{stanmvreg} method (when called on \code{\link{stan_jm}} model objects) an \eqn{S} by \eqn{Npat} matrix where \eqn{Npat} is the number of individuals. } \examples{ if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { \donttest{ roaches$roach100 <- roaches$roach1 / 100 fit <- stan_glm( y ~ roach100 + treatment + senior, offset = log(exposure2), data = roaches, family = poisson(link = "log"), prior = normal(0, 2.5), prior_intercept = normal(0, 10), iter = 500, # just to speed up example, refresh = 0 ) ll <- log_lik(fit) dim(ll) all.equal(ncol(ll), nobs(fit)) # using newdata argument nd <- roaches[1:2, ] nd$treatment[1:2] <- c(0, 1) ll2 <- log_lik(fit, newdata = nd, offset = c(0, 0)) head(ll2) dim(ll2) all.equal(ncol(ll2), nrow(nd)) } } } rstanarm/man/summary.stanreg.Rd0000644000176200001440000001222214551552004016302 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/print-and-summary.R \name{summary.stanreg} \alias{summary.stanreg} \alias{print.summary.stanreg} \alias{as.data.frame.summary.stanreg} \alias{summary.stanmvreg} \alias{print.summary.stanmvreg} \title{Summary method for stanreg objects} \usage{ \method{summary}{stanreg}( object, pars = NULL, regex_pars = NULL, probs = c(0.1, 0.5, 0.9), ..., digits = 1 ) \method{print}{summary.stanreg}(x, digits = max(1, attr(x, "print.digits")), ...) \method{as.data.frame}{summary.stanreg}(x, ...) \method{summary}{stanmvreg}(object, pars = NULL, regex_pars = NULL, probs = NULL, ..., digits = 3) \method{print}{summary.stanmvreg}(x, digits = max(1, attr(x, "print.digits")), ...) } \arguments{ \item{object}{A fitted model object returned by one of the \pkg{rstanarm} modeling functions. See \code{\link{stanreg-objects}}.} \item{pars}{An optional character vector specifying a subset of parameters to display. Parameters can be specified by name or several shortcuts can be used. Using \code{pars="beta"} will restrict the displayed parameters to only the regression coefficients (without the intercept). \code{"alpha"} can also be used as a shortcut for \code{"(Intercept)"}. If the model has varying intercepts and/or slopes they can be selected using \code{pars = "varying"}. In addition, for \code{stanmvreg} objects there are some additional shortcuts available. Using \code{pars = "long"} will display the parameter estimates for the longitudinal submodels only (excluding group-specific pparameters, but including auxiliary parameters). Using \code{pars = "event"} will display the parameter estimates for the event submodel only, including any association parameters. Using \code{pars = "assoc"} will display only the association parameters. Using \code{pars = "fixef"} will display all fixed effects, but not the random effects or the auxiliary parameters. \code{pars} and \code{regex_pars} are set to \code{NULL} then all fixed effect regression coefficients are selected, as well as any auxiliary parameters and the log posterior. If \code{pars} is \code{NULL} all parameters are selected for a \code{stanreg} object, while for a \code{stanmvreg} object all fixed effect regression coefficients are selected as well as any auxiliary parameters and the log posterior. See \strong{Examples}.} \item{regex_pars}{An optional character vector of \link[=grep]{regular expressions} to use for parameter selection. \code{regex_pars} can be used in place of \code{pars} or in addition to \code{pars}. Currently, all functions that accept a \code{regex_pars} argument ignore it for models fit using optimization.} \item{probs}{For models fit using MCMC or one of the variational algorithms, an optional numeric vector of probabilities passed to \code{\link[stats]{quantile}}.} \item{...}{Currently ignored.} \item{digits}{Number of digits to use for formatting numbers when printing. When calling \code{summary}, the value of digits is stored as the \code{"print.digits"} attribute of the returned object.} \item{x}{An object of class \code{"summary.stanreg"}.} } \value{ The \code{summary} method returns an object of class \code{"summary.stanreg"} (or \code{"summary.stanmvreg"}, inheriting \code{"summary.stanreg"}), which is a matrix of summary statistics and diagnostics, with attributes storing information for use by the \code{print} method. The \code{print} method for \code{summary.stanreg} or \code{summary.stanmvreg} objects is called for its side effect and just returns its input. The \code{as.data.frame} method for \code{summary.stanreg} objects converts the matrix to a data.frame, preserving row and column names but dropping the \code{print}-related attributes. } \description{ Summaries of parameter estimates and MCMC convergence diagnostics (Monte Carlo error, effective sample size, Rhat). } \details{ \subsection{mean_PPD diagnostic}{ Summary statistics are also reported for \code{mean_PPD}, the sample average posterior predictive distribution of the outcome. This is useful as a quick diagnostic. A useful heuristic is to check if \code{mean_PPD} is plausible when compared to \code{mean(y)}. If it is plausible then this does \emph{not} mean that the model is good in general (only that it can reproduce the sample mean), however if \code{mean_PPD} is implausible then it is a sign that something is wrong (severe model misspecification, problems with the data, computational issues, etc.). } } \examples{ if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { if (!exists("example_model")) example(example_model) summary(example_model, probs = c(0.1, 0.9)) # These produce the same output for this example, # but the second method can be used for any model summary(example_model, pars = c("(Intercept)", "size", paste0("period", 2:4))) summary(example_model, pars = c("alpha", "beta")) # Only show parameters varying by group summary(example_model, pars = "varying") as.data.frame(summary(example_model, pars = "varying")) } } \seealso{ \code{\link{prior_summary}} to extract or print a summary of the priors used for a particular model. } rstanarm/man/pairs.stanreg.Rd0000644000176200001440000001053414551552004015727 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/plots.R \name{pairs.stanreg} \alias{pairs.stanreg} \alias{pairs_style_np} \alias{pairs_condition} \title{Pairs method for stanreg objects} \usage{ \method{pairs}{stanreg}( x, pars = NULL, regex_pars = NULL, condition = pairs_condition(nuts = "accept_stat__"), ... ) } \arguments{ \item{x}{A fitted model object returned by one of the \pkg{rstanarm} modeling functions. See \code{\link{stanreg-objects}}.} \item{pars}{An optional character vector of parameter names. All parameters are included by default, but for models with more than just a few parameters it may be far too many to visualize on a small computer screen and also may require substantial computing time.} \item{regex_pars}{An optional character vector of \link[=grep]{regular expressions} to use for parameter selection. \code{regex_pars} can be used in place of \code{pars} or in addition to \code{pars}. Currently, all functions that accept a \code{regex_pars} argument ignore it for models fit using optimization.} \item{condition}{Same as the \code{condition} argument to \code{\link[bayesplot:MCMC-scatterplots]{mcmc_pairs}} except the \emph{default is different} for \pkg{rstanarm} models. By default, the \code{mcmc_pairs} function in the \pkg{bayesplot} package plots some of the Markov chains (half, in the case of an even number of chains) in the panels above the diagonal and the other half in the panels below the diagonal. However since we know that \pkg{rstanarm} models were fit using Stan (which \pkg{bayesplot} doesn't assume) we can make the default more useful by splitting the draws according to the \code{accept_stat__} diagnostic. The plots below the diagonal will contain realizations that are below the median \code{accept_stat__} and the plots above the diagonal will contain realizations that are above the median \code{accept_stat__}. To change this behavior see the documentation of the \code{condition} argument at \code{\link[bayesplot:MCMC-scatterplots]{mcmc_pairs}}.} \item{...}{Optional arguments passed to \code{\link[bayesplot:MCMC-scatterplots]{mcmc_pairs}}. The \code{np}, \code{lp}, and \code{max_treedepth} arguments to \code{mcmc_pairs} are handled automatically by \pkg{rstanarm} and do not need to be specified by the user in \code{...}. The arguments that can be specified in \code{...} include \code{transformations}, \code{diag_fun}, \code{off_diag_fun}, \code{diag_args}, \code{off_diag_args}, and \code{np_style}. These arguments are documented thoroughly on the help page for \code{\link[bayesplot:MCMC-scatterplots]{mcmc_pairs}}.} } \description{ Interface to \pkg{bayesplot}'s \code{\link[bayesplot:MCMC-scatterplots]{mcmc_pairs}} function for use with \pkg{rstanarm} models. Be careful not to specify too many parameters to include or the plot will be both hard to read and slow to render. } \examples{ if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { \donttest{ if (!exists("example_model")) example(example_model) bayesplot::color_scheme_set("purple") # see 'condition' argument above for details on the plots below and # above the diagonal. default is to split by accept_stat__. pairs(example_model, pars = c("(Intercept)", "log-posterior")) # for demonstration purposes, intentionally fit a model that # will (almost certainly) have some divergences fit <- stan_glm( mpg ~ ., data = mtcars, iter = 1000, # this combo of prior and adapt_delta should lead to some divergences prior = hs(), adapt_delta = 0.9, refresh = 0 ) pairs(fit, pars = c("wt", "sigma", "log-posterior")) # requires hexbin package # pairs( # fit, # pars = c("wt", "sigma", "log-posterior"), # transformations = list(sigma = "log"), # show log(sigma) instead of sigma # off_diag_fun = "hex" # use hexagonal heatmaps instead of scatterplots # ) bayesplot::color_scheme_set("brightblue") pairs( fit, pars = c("(Intercept)", "wt", "sigma", "log-posterior"), transformations = list(sigma = "log"), off_diag_args = list(size = 3/4, alpha = 1/3), # size and transparency of scatterplot points np_style = pairs_style_np(div_color = "black", div_shape = 2) # color and shape of the divergences ) # Using the condition argument to show divergences above the diagonal pairs( fit, pars = c("(Intercept)", "wt", "log-posterior"), condition = pairs_condition(nuts = "divergent__") ) } } } rstanarm/man/QR-argument.Rd0000644000176200001440000000441314551552004015310 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/doc-QR.R \name{QR-argument} \alias{QR-argument} \title{The \code{QR} argument} \description{ Details about the \code{QR} argument to \pkg{rstanarm}'s modeling functions. } \details{ The \code{QR} argument is a logical scalar defaulting to \code{FALSE}, but if \code{TRUE} applies a scaled \code{\link{qr}} decomposition to the design matrix, \eqn{X = Q^\ast R^\ast}{X = Q* R*}. If \code{autoscale = TRUE} (the default) in the call to the function passed to the \code{prior} argument, then \eqn{Q^\ast = Q \sqrt{n-1}}{Q* = Q (n-1)^0.5} and \eqn{R^\ast = \frac{1}{\sqrt{n-1}} R}{R* = (n-1)^(-0.5) R}. When \code{autoscale = FALSE}, \eqn{R} is scaled such that the lower-right element of \eqn{R^\ast}{R*} is \eqn{1}. The coefficients relative to \eqn{Q^\ast}{Q*} are obtained and then premultiplied by the inverse of \eqn{R^{\ast}}{R*} to obtain coefficients relative to the original predictors, \eqn{X}. Thus, when \code{autoscale = FALSE}, the coefficient on the last column of \eqn{X} is the same as the coefficient on the last column of \eqn{Q^\ast}{Q*}. These transformations do not change the likelihood of the data but are recommended for computational reasons when there are multiple predictors. Importantly, while the columns of \eqn{X} are almost generally correlated, the columns of \eqn{Q^\ast}{Q*} are uncorrelated by design, which often makes sampling from the posterior easier. However, because when \code{QR} is \code{TRUE} the \code{prior} argument applies to the coefficients relative to \eqn{Q^\ast}{Q*} (and those are not very interpretable), setting \code{QR=TRUE} is only recommended if you do not have an informative prior for the regression coefficients or if the only informative prior is on the last regression coefficient (in which case you should set \code{autoscale = FALSE} when specifying such priors). For more details see the Stan case study \emph{The QR Decomposition For Regression Models} at \url{https://mc-stan.org/users/documentation/case-studies/qr_regression.html}. } \references{ Stan Development Team. \emph{Stan Modeling Language Users Guide and Reference Manual.} \url{https://mc-stan.org/users/documentation/}. } rstanarm/man/neg_binomial_2.Rd0000644000176200001440000000274514551552004016020 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/neg_binomial_2.R \name{neg_binomial_2} \alias{neg_binomial_2} \title{Family function for negative binomial GLMs} \usage{ neg_binomial_2(link = "log") } \arguments{ \item{link}{The same as for \code{\link[stats:family]{poisson}}, typically a character vector of length one among \code{"log"}, \code{"identity"}, and \code{"sqrt"}.} } \value{ An object of class \code{\link[stats]{family}} very similar to that of \code{\link[stats:family]{poisson}} but with a different family name. } \description{ Specifies the information required to fit a Negative Binomial GLM in a similar way to \code{\link[MASS]{negative.binomial}}. However, here the overdispersion parameter \code{theta} is not specified by the user and always estimated (really the \emph{reciprocal} of the dispersion parameter is estimated). A call to this function can be passed to the \code{family} argument of \code{\link{stan_glm}} or \code{\link{stan_glmer}} to estimate a Negative Binomial model. Alternatively, the \code{\link{stan_glm.nb}} and \code{\link{stan_glmer.nb}} wrapper functions may be used, which call \code{neg_binomial_2} internally. } \examples{ if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") stan_glm(Days ~ Sex/(Age + Eth*Lrn), data = MASS::quine, seed = 123, family = neg_binomial_2, QR = TRUE, algorithm = "optimizing") # or, equivalently, call stan_glm.nb() without specifying the family } rstanarm/man/figures/0000755000176200001440000000000014370470372014327 5ustar liggesusersrstanarm/man/figures/stanlogo.png0000644000176200001440000003745413365374540016703 0ustar liggesusersPNG  IHDRwx+sBIT|d pHYs&:4tEXtSoftwarewww.inkscape.org< IDATxw|ՙsf$w+$` ؘbQCB ے'7B6R6uq6e7M ل$ ؒm Л$W43+WY{nyޯ/lIw >̙31 B2J #7¡# #a@H P U] ]tuW~V-nUt+ˊ@l sy#/f TILxC&f~I&`= PX]&b.{gʘɋE 邞0t hrh=OuO\Κ;gnnՓ zt2L¾xYIqAsb?l_3bw끳1s+WAŮmZ􇕻OA_LӀsxH`9pO_Can5 j.͠gڪ' UX;i\}2Ə A|g2xEnE٬Z;),V%sNLbALpCfX3j8w5O+~WϪg}1~X%L]PSyL1|/cʽ atC=؟{9ROLl;-!/aKH> `<` 4u-7%ʽNiܻ ;)x+֑|1c^"Qs.ȇ} hLOSq#cʽ-p+5o P;)7Ŵ0o܋|F dS |1J7(`-Nczʽ,a؈#~ܔgw3VE`ܗyBwS o0{V,sQ?|}1K"{/Y.+q5Jy9NZx "j9UX\oӶwa^2[xmoG!F@ǘ,٤׆2O2X{Lã)A¿6ҲwrdgK?%F#c]JF>;H9rϓJ?#ti;/evyʁ{4Qs%AFb_ .YB*2wc K ^;Kri*oC}1@J;-ߙ 0=Q=S8NRJܳZRGӠ_.[|s~5wS JBja킾 ˘ʎ7՞6rfjߣOASdb1E 8y)PF҄we߁ʑ{-aї1hnY@ʽG1a8wc Jл,Exq@f_VsaLy%p",CþYTFnwc r =U[(H_,N?+LpleK鲑;ɕt\/;}g1&V.NpTi/}2W徘 z+YɒdFɒzd˽ ^ r,C<{hyt$CʶR}&{)R{)-`ʲQ} VĘUnw*{9+EԾW¦mDe_鑲*&j&` J5Kgw Sʦܛ -=;r\Ȕ(&j?s^KY;)M%_¿aʚMޕ )|wSvv 9A;[Y;)?%9r^#ࣾ@,]NߙLy+ro?@;)i1"ДܴLfnrdP%Xs(%5roS5sJE2^n1Ţdʽ+\O 7oV.܂9/E)*%Q~ <5}" 9~wc˽F)&̞*"ܔ.%AQd mĉ_1( $W69I1ٗ۩{AP!Ug[S2r_ȸG,003.;1&rA5 Z[hL}15O89}4 5U|'1GNK}؍)/9-SypZ.a0B"Fq xN\t뮐z˘ÑY{sO;hnLƏ Vӕ|bYモc= [܊"&p 4a/71 b؍)5xSLSXHwyܛȋ@U6kЖ&u-y4,er$S''L&o+Hl;#wV7؍g9;U0i S#'!> |*[6rof\<?dϫevA)TiB2k$~*?+/z1ٷqnT 2 =϶vO][V-e쨈j`@6gzF3 9zk|g1X)d"]8&B8ɄY=&(Vy ؍f L z2ȉ'v]qx pe9Xhg7f?ׁ͋la\v(o#y/Vy'r{)w9$`*N2S+'r4/2zYҟ+H35O`F5&sSZjXOG+i2_#b3e­L<"E<]$EO9`-x]Eh]L^o ˜@j'(LKΡSA|BKĻ1sKЇzg;}12ۀ'+:ݡ')LVuG`j=ˋɻ$-T pCNQS]T[?^}'(7&(-3K"_ie&?1G"'55'_{DVA+)\Z]U]yBJ ~ړq rES>lV|*=NHftɚ*7.zX=ZD8Vlk> Cꩂ:8;) NՇ U4p{a۽~\n68Ȕ%Nu]#1\;y͵ԥb*SaG~ȅq{&kuϋLo88d3ɂW}M\Vy߳bYc# z:l8͘Cp!ɥa]!kLOll}(UK86">8]f88"3zOMqpn˽R&IoRHe5h!JR&W=[3߹ɂeF6m;t;rBA$sh 9pTzZÖωHW 6hJKcC}ae%V55ss 3dގw)u̦.%YOd?P-vxΑmph66*7H߬ Z|qȟ&jM4y"i;wBڣ8`&pv1HN͚nF\2Jpe|TQ{<{X87A'eLq.Kn޴ V!e9(3Ag>ksw$g*6Le2|#m#W.5g8$ru9c~;D?WթzUvB$:̍?}ș 5Cs@,!zKqtu;ZE.>v#azblbɳCh=<#?QB + \1>}`;U]oorF&VˠԞ -L˸JTo9vO >ҮɻSW\8G!` \'.慃س}ޠQc;Bڈ.$S#!=g&`TOjuW>fX|a_uLz7:.ΊǎQ#ԖBm)}LxE< zZ>s~c撷"8o8* 83d1Y9z6}0SP"~c_N.ʘqQ`+yc"!5sorדRږ_]~q+UAѱcZ2Jޘ~C'*3 88jB{*zx`.ΑmR2HLX'u{~ V<+EGhn%(/c␩Q@13ݞLF>zn7=η3zsnv\]ld+$!p|r|ƌ) `o7E\½ 4(dBߚ81.wE&o)cB-scae~*m[Dy0ˆcc 6ecʀ1j2Vd]wRuOJݛ\2WrK'Ȉq(({p)aw 2&ϡe{j{ڵo5 q k*:88ҾMQdr,%Ikw?t^n+a vQ ##jqH͛1]a(5M+ { >w s8`R؍v;Miџ*;9m]<FK<-ܘ;YȸG'dҗ]bJcj0(և<~d^ҒyKAĈD8&maSƅ [=/Vw]Edt8P+wpVg :*Xś%_R, eN r/E\Qh?5%V0Q|RÉMQゎ .`x,XVfˎ"i)PF694/\|YivT8W]sA !S P Δ=ӵ<qVgtӣЖ^[nHžw sU*483p^N{8HIKvb'^q / !τ123  A6;W|1sd831]Ɍ(dXsx;*:Xk-EB`&;*T8. &R67ފNBlRxw]Bf)qzgA Agĺx$"R P`j”b^ bۍ-f?/u}_#e{9%dsed 'D!6Ɓ-4X'>;͡WrJ=mSGM.aetM" wWtp~Gm%MQ, Ч|1:y]5U8!VE'ɲ jS3f(mABKK솗25@:*f_TbD`<@Eiq Ͷ /;hnQLAT8Mԧ+9% 9v,§"kX ]7)V\QACΐV%o(:mGҢ E 6r76H)qȔ^vѝ0IDAT .Rgv|G1r+Y0~RLҢuYԊb&{jTbRccbeKX^%0fdaqJne2WgD&Hhu1ĊH)ʃN.IWlrU>ØBLNx9Hhski+1Q cN陂tM,Ua^S(뜲&iu VE!+` zZ=K!Qg2JJu,]ɬt%ӣБV޼:{K$gO7Q 0<ɒK,c=|$OБb\bw5zHUgy]3!P;jbv&$]2g9.x޿٧yr7%Axqe%%촢Ϫ];dL 0:qNu+Niu-Ab ^kSiyv4N0"IqJ]f+M _ ҜzD^3S v42nEct>Bb}P˘0L9J.(d-{wn*\PצnBNBwc|쒁*^nh6Ӣ.aUb6gJQc)/Joc{ bd#&2,,FnØči¿Jճvd2x^GFgeLQ;岢 O.TOeŖ?xL61#j‚8Si Zu..7$2T;v8 A61+E湱oHѐ(Ű[q3Dݦ;*~ R=htܝ"ywci 'D'D] fK(7!$M+_QJ.B7D9cLu.fe&(i)xaz-)TN8g[zWS`|03EcΐE2ms4$Ċ!]lݘz%J.8,إ'U~^y>;C.9Id9]*U\@J=Az[N"`D9C}>ہrsHU 'DJ蘒&?͵ w}KNyc{cU>{97d71$1cgBbuM Wo|mlcYy `rE c鋁*{.cyCƷw}c7L~h7ЃuM> z\"cay*;wHJwmof@%h M_h]7ݘ4> :*CdItŞ+P[ \ϒhNA_AM85Q&jZKk]a+g olߴP_LjGi0onɴ 4e-1&%uTpNg\x Ck+vE?O;LCoN[~2q800"sT"EaCǺ!ȦdAOG從Q< `S)*!prrtH*]RqM?=@9џ]\Ý@0!*Q1:ݛb,2 Y =|u/'&E>Nk]#-}Cw;w!Yzp=MAy8O1S4'Oy8)L߭urG]flnLaFZ>| =-v` 1&T`A@2bM[0Y 7/CAz:cLN$@Sۥ=6S}"bh7/c1~%W+ <+I "z~cX k1/z^ neq+3^(6Tŋ޼3_cCPޓbpDƊ`}|zy0ч17t~-ٖC E_^rn}}1[OhW='3ۚyBo19w_ 4rPGZ=?>Rws֭Դ>z_ OY+D3V8t`}}u:kzc \=`bm_YQ{AgJ_D2vTDq)#i&%[RWǸ )Y ;#YM X<Ê|2"_OB*v +B"ءs{"G3ۂڲHc*l<;̡dls=AۋYK{cJD$$pˌ@Յ\ U؞C[/#0_cSYp pͪuMԾ'KlHS6*̇y_¦T Ɓ2rv4oBЛҶ<|3ɍ}g1;V5S}"1)iл/f߁jW`,WO뙽yRw6rHo ,1$ p.Re߁Vj @;9l;dyǰ.ެ7r@5QȧJy!@yEy\cr(^'pk#k}ʆLJrC?pnVG%dqLYwlzԷZqs@coLYwl[L }حԋm Łs2r̕_\Ø hSi!Sx;Dq_s:mL!ɃaZ)yw ڜ.߁r)˄K}?h=P;ɪ6\ >nstqk^Ϩ?^ÝO!$ﳟ$R@Yyrk?>?Քd K}?S4_ne[@;-FZs~g.4QspIg*;,ͮq@0p* odA@8SΖB>NpwֳY߁e)+#җ]^A६DDNfjyN㝂> !s=XHC\;CFZ)_kgvzmGeB5^S.s@13tu;9M959.p @ܗd` \͡\\Oy>gپ3%)NEt$ֳv@LxE%3L(H{}޼]P[@>od2S-LqoF \Z:@-M):?.7Qn|Vpp#>3=AV*:T%(:[tY@l_g2-GĖy!w[PM30Uq 0))&d[@Q |+w2g"] ^;!H) \( b7י,R}gDR;sdLѻqnK4!""A.dJZ;i":R80w".Kp*S9 Id&91QXƔ0J|,AR@O(,sݻH[.oo#zp>0^2@}o; y{H];Or)d΂|AO@މr@[<~20SLY v(ٴ\;Au[9@c:oȧhSD/{.p"h=ɩn8Aꀩsiw(=яU) /)Kۮ|?0iBBtVW}yNeܱBw{6v5f AuJR'TcKam+QlȻOf98vM|g+~ZNƟ )rt{j1EqWLӺcKymS^Νͪud{7v/?k9rcJIK@xlV$ܻ۬dZq)r}VN{e]pϡe$PN}m+Ӏscf% Xw\)r_DI7KɌk}g2xd@Rc_Qs N#B|lZ7kw|hdwŸ|g1xs_;Y[e2-R&WFo.W^_9ޭvoP63ƔE!o+]av&Y-3crGJG[VʲwkKW)ƔLė/E^!诀c]};Oe_ >! )vil~A|rLxyQY;H!(u=QOۚv4b-Czk2imT+czJ{1\B+9&jjLڮiY;H!rBj:t0wc>^ppZPٴL7.ieY|Y1{4gXwF=DA Ř2+GiyL7{g+fwcʉB$idrYMB=kS.Jd+ޱ{d6 `<Ɣ]|Ra1G'}ifbLy( yl<;Hi~Cˊ!wcJXܳd5 ;1E5z FYHCh?i'[gs 3Z4Y)pwXrR1ǘ +|g.ľÔ"+~Y) H>lLs=O~ A?_Oo})VyC;IY=jLإ*:b\X{̸Z} ]؟)iꕾ+R}f(:Y.}TOF=S&)7&M['>L/^YTB>%9aK$?gNa{Y!~8wc' I;1[|1r/PVm}s-cd^S;9gv+m89{[1[BywO<۫^~y4vq>>̮reoh3q{6s +2邼O@<&o^+zs#m 4z >]]r們w@TUsKo D7~1ʝ{jv8wu nZִˡjWtSw.$qoX

Vf>?+}piܱ++G0YS 4^~әnD r{gC8F05M`U~k'yZ&>00<)Ky#bڴkKup\!S9 Hq;<,VfV14wC8y +Vp{.fzUU ޠ*X|X5j9W/ׯyAEy qK鱽oA )9 3"2p_}x}qOq$sCuO+o}LJNDÖ+w'F CdFg98ɶ)fAy6K|Yk++G 7*=`GPy7/z\lerj5*0MHLf|gˣx^yOͦe 9pQEeˍ#N ԝae4`F,\|Y[jd (0O1.`+dUׯW%z<{5wzΗs6lS]os$7^%LI4!@&&1F(:BHō t3f 3c}f]yJX9Q4Gn@~NERCI;D8'WݴsByK)j0~D%P! d޹EQpm,Fi*RڂX U`$j2G1l?Πɡ"^@p_zNƙa$FF:u5ȏ2IENDB`rstanarm/man/figures/logo.svg0000644000176200001440000002316214370470372016014 0ustar liggesusers rstanarm/man/predict.stanreg.Rd0000644000176200001440000000233714551552004016245 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/predict.R \name{predict.stanreg} \alias{predict.stanreg} \title{Predict method for stanreg objects} \usage{ \method{predict}{stanreg}( object, ..., newdata = NULL, type = c("link", "response"), se.fit = FALSE ) } \arguments{ \item{object}{A fitted model object returned by one of the \pkg{rstanarm} modeling functions. See \code{\link{stanreg-objects}}.} \item{...}{Ignored.} \item{newdata}{Optionally, a data frame in which to look for variables with which to predict. If omitted, the model matrix is used.} \item{type}{The type of prediction. The default \code{'link'} is on the scale of the linear predictors; the alternative \code{'response'} is on the scale of the response variable.} \item{se.fit}{A logical scalar indicating if standard errors should be returned. The default is \code{FALSE}.} } \value{ A vector if \code{se.fit} is \code{FALSE} and a list if \code{se.fit} is \code{TRUE}. } \description{ This method is primarily intended to be used only for models fit using optimization. For models fit using MCMC or one of the variational approximations, see \code{\link{posterior_predict}}. } \seealso{ \code{\link{posterior_predict}} } rstanarm/man/terms.stanreg.Rd0000644000176200001440000000063114551552005015741 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/stanreg-methods.R \name{terms.stanreg} \alias{terms.stanreg} \title{terms method for stanreg objects} \usage{ \method{terms}{stanreg}(x, ..., fixed.only = TRUE, random.only = FALSE) } \arguments{ \item{x, fixed.only, random.only, ...}{See lme4:::terms.merMod.} } \description{ terms method for stanreg objects } \keyword{internal} rstanarm/man/reexports.Rd0000644000176200001440000000062014551552004015175 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/jm_data_block.R \docType{import} \name{reexports} \alias{reexports} \alias{Surv} \title{Objects exported from other packages} \keyword{internal} \description{ These objects are imported from other packages. Follow the links below to see their documentation. \describe{ \item{survival}{\code{\link[survival]{Surv}}} }} rstanarm/man/stan_mvmer.Rd0000644000176200001440000002106714551552005015326 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/stan_mvmer.R \name{stan_mvmer} \alias{stan_mvmer} \title{Bayesian multivariate generalized linear models with correlated group-specific terms via Stan} \usage{ stan_mvmer( formula, data, family = gaussian, weights, prior = normal(autoscale = TRUE), prior_intercept = normal(autoscale = TRUE), prior_aux = cauchy(0, 5, autoscale = TRUE), prior_covariance = lkj(autoscale = TRUE), prior_PD = FALSE, algorithm = c("sampling", "meanfield", "fullrank"), adapt_delta = NULL, max_treedepth = 10L, init = "random", QR = FALSE, sparse = FALSE, ... ) } \arguments{ \item{formula}{A two-sided linear formula object describing both the fixed-effects and random-effects parts of the longitudinal submodel similar in vein to formula specification in the \strong{lme4} package (see \code{\link[lme4]{glmer}} or the \strong{lme4} vignette for details). Note however that the double bar (\code{||}) notation is not allowed when specifying the random-effects parts of the formula, and neither are nested grouping factors (e.g. \code{(1 | g1/g2))} or \code{(1 | g1:g2)}, where \code{g1}, \code{g2} are grouping factors. For a multivariate GLM this should be a list of such formula objects, with each element of the list providing the formula for one of the GLM submodels.} \item{data}{A data frame containing the variables specified in \code{formula}. For a multivariate GLM, this can be either a single data frame which contains the data for all GLM submodels, or it can be a list of data frames where each element of the list provides the data for one of the GLM submodels.} \item{family}{The family (and possibly also the link function) for the GLM submodel(s). See \code{\link[lme4]{glmer}} for details. If fitting a multivariate GLM, then this can optionally be a list of families, in which case each element of the list specifies the family for one of the GLM submodels. In other words, a different family can be specified for each GLM submodel.} \item{weights}{Same as in \code{\link[stats]{glm}}, except that when fitting a multivariate GLM and a list of data frames is provided in \code{data} then a corresponding list of weights must be provided. If weights are provided for one of the GLM submodels, then they must be provided for all GLM submodels.} \item{prior, prior_intercept, prior_aux}{Same as in \code{\link{stan_glmer}} except that for a multivariate GLM a list of priors can be provided for any of \code{prior}, \code{prior_intercept} or \code{prior_aux} arguments. That is, different priors can optionally be specified for each of the GLM submodels. If a list is not provided, then the same prior distributions are used for each GLM submodel. Note that the \code{"product_normal"} prior is not allowed for \code{stan_mvmer}.} \item{prior_covariance}{Cannot be \code{NULL}; see \code{\link{priors}} for more information about the prior distributions on covariance matrices. Note however that the default prior for covariance matrices in \code{stan_mvmer} is slightly different to that in \code{\link{stan_glmer}} (the details of which are described on the \code{\link{priors}} page).} \item{prior_PD}{A logical scalar (defaulting to \code{FALSE}) indicating whether to draw from the prior predictive distribution instead of conditioning on the outcome.} \item{algorithm}{A string (possibly abbreviated) indicating the estimation approach to use. Can be \code{"sampling"} for MCMC (the default), \code{"optimizing"} for optimization, \code{"meanfield"} for variational inference with independent normal distributions, or \code{"fullrank"} for variational inference with a multivariate normal distribution. See \code{\link{rstanarm-package}} for more details on the estimation algorithms. NOTE: not all fitting functions support all four algorithms.} \item{adapt_delta}{Only relevant if \code{algorithm="sampling"}. See the \link{adapt_delta} help page for details.} \item{max_treedepth}{A positive integer specifying the maximum treedepth for the non-U-turn sampler. See the \code{control} argument in \code{\link[rstan]{stan}}.} \item{init}{The method for generating initial values. See \code{\link[rstan]{stan}}.} \item{QR}{A logical scalar defaulting to \code{FALSE}, but if \code{TRUE} applies a scaled \code{\link{qr}} decomposition to the design matrix. The transformation does not change the likelihood of the data but is recommended for computational reasons when there are multiple predictors. See the \link{QR-argument} documentation page for details on how \pkg{rstanarm} does the transformation and important information about how to interpret the prior distributions of the model parameters when using \code{QR=TRUE}.} \item{sparse}{A logical scalar (defaulting to \code{FALSE}) indicating whether to use a sparse representation of the design (X) matrix. If \code{TRUE}, the the design matrix is not centered (since that would destroy the sparsity) and likewise it is not possible to specify both \code{QR = TRUE} and \code{sparse = TRUE}. Depending on how many zeros there are in the design matrix, setting \code{sparse = TRUE} may make the code run faster and can consume much less RAM.} \item{...}{Further arguments passed to the function in the \pkg{rstan} package (\code{\link[rstan:stanmodel-method-sampling]{sampling}}, \code{\link[rstan:stanmodel-method-vb]{vb}}, or \code{\link[rstan:stanmodel-method-optimizing]{optimizing}}), corresponding to the estimation method named by \code{algorithm}. For example, if \code{algorithm} is \code{"sampling"} it is possible to specify \code{iter}, \code{chains}, \code{cores}, and other MCMC controls. Another useful argument that can be passed to \pkg{rstan} via \code{...} is \code{refresh}, which specifies how often to print updates when sampling (i.e., show the progress every \code{refresh} iterations). \code{refresh=0} turns off the iteration updates.} } \value{ A \link[=stanreg-objects]{stanmvreg} object is returned. } \description{ \if{html}{\figure{stanlogo.png}{options: width="25" alt="https://mc-stan.org/about/logo/"}} Bayesian inference for multivariate GLMs with group-specific coefficients that are assumed to be correlated across the GLM submodels. } \details{ The \code{stan_mvmer} function can be used to fit a multivariate generalized linear model (GLM) with group-specific terms. The model consists of distinct GLM submodels, each which contains group-specific terms; within a grouping factor (for example, patient ID) the grouping-specific terms are assumed to be correlated across the different GLM submodels. It is possible to specify a different outcome type (for example a different family and/or link function) for each of the GLM submodels. \cr \cr Bayesian estimation of the model is performed via MCMC, in the same way as for \code{\link{stan_glmer}}. Also, similar to \code{\link{stan_glmer}}, an unstructured covariance matrix is used for the group-specific terms within a given grouping factor, with priors on the terms of a decomposition of the covariance matrix.See \code{\link{priors}} for more information about the priors distributions that are available for the covariance matrices, the regression coefficients and the intercept and auxiliary parameters. } \examples{ if (.Platform$OS.type != "windows" || .Platform$r_arch !="i386") { \donttest{ ##### # A multivariate GLM with two submodels. For the grouping factor 'id', the # group-specific intercept from the first submodel (logBili) is assumed to # be correlated with the group-specific intercept and linear slope in the # second submodel (albumin) f1 <- stan_mvmer( formula = list( logBili ~ year + (1 | id), albumin ~ sex + year + (year | id)), data = pbcLong, # this next line is only to keep the example small in size! chains = 1, cores = 1, seed = 12345, iter = 1000) summary(f1) ##### # A multivariate GLM with one bernoulli outcome and one # gaussian outcome. We will artificially create the bernoulli # outcome by dichotomising log serum bilirubin pbcLong$ybern <- as.integer(pbcLong$logBili >= mean(pbcLong$logBili)) f2 <- stan_mvmer( formula = list( ybern ~ year + (1 | id), albumin ~ sex + year + (year | id)), data = pbcLong, family = list(binomial, gaussian), chains = 1, cores = 1, seed = 12345, iter = 1000) } } } \seealso{ \code{\link{stan_glmer}}, \code{\link{stan_jm}}, \code{\link{stanreg-objects}}, \code{\link{stanmvreg-methods}}, \code{\link{print.stanmvreg}}, \code{\link{summary.stanmvreg}}, \code{\link{posterior_predict}}, \code{\link{posterior_interval}}. } rstanarm/man/ps_check.Rd0000644000176200001440000000535314551552004014731 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ps_check.R \name{ps_check} \alias{ps_check} \title{Graphical checks of the estimated survival function} \usage{ ps_check( object, check = "survival", limits = c("ci", "none"), draws = NULL, seed = NULL, xlab = NULL, ylab = NULL, ci_geom_args = NULL, ... ) } \arguments{ \item{object}{A fitted model object returned by the \code{\link{stan_jm}} modelling function. See \code{\link{stanreg-objects}}.} \item{check}{The type of plot to show. Currently only "survival" is allowed, which compares the estimated marginal survival function under the joint model to the estimated Kaplan-Meier curve based on the observed data.} \item{limits}{A quoted character string specifying the type of limits to include in the plot. Can be one of: \code{"ci"} for the Bayesian posterior uncertainty interval (often known as a credible interval); or \code{"none"} for no interval limits.} \item{draws}{An integer indicating the number of MCMC draws to use to to estimate the survival function. The default and maximum number of draws is the size of the posterior sample.} \item{seed}{An optional \code{\link[=set.seed]{seed}} to use.} \item{xlab, ylab}{An optional axis label passed to \code{\link[ggplot2]{labs}}.} \item{ci_geom_args}{Optional arguments passed to \code{\link[ggplot2]{geom_ribbon}} and used to control features of the plotted interval limits. They should be supplied as a named list.} \item{...}{Optional arguments passed to \code{\link[ggplot2:geom_path]{geom_line}} and used to control features of the plotted trajectory.} } \value{ A ggplot object that can be further customized using the \pkg{ggplot2} package. } \description{ This function plots the estimated marginal survival function based on draws from the posterior predictive distribution of the fitted joint model, and then overlays the Kaplan-Meier curve based on the observed data. } \examples{ if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { \donttest{ if (!exists("example_jm")) example(example_jm) # Compare estimated survival function to Kaplan-Meier curve ps <- ps_check(example_jm) ps + ggplot2::scale_color_manual(values = c("red", "black")) + # change colors ggplot2::scale_size_manual(values = c(0.5, 3)) + # change line sizes ggplot2::scale_fill_manual(values = c(NA, NA)) # remove fill } } } \seealso{ \code{\link{posterior_survfit}} for the estimated marginal or subject-specific survival function based on draws of the model parameters from the posterior distribution, \code{\link{posterior_predict}} for drawing from the posterior predictive distribution for the longitudinal submodel, and \code{\link{pp_check}} for graphical checks of the longitudinal submodel. } rstanarm/man/model.frame.stanmvreg.Rd0000644000176200001440000000110714551552005017342 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/stanmvreg-methods.R \name{model.frame.stanmvreg} \alias{model.frame.stanmvreg} \title{model.frame method for stanmvreg objects} \usage{ \method{model.frame}{stanmvreg}(formula, fixed.only = FALSE, m = NULL, ...) } \arguments{ \item{formula, ...}{See \code{\link[stats]{model.frame}}.} \item{fixed.only}{See \code{\link[lme4:merMod-class]{model.frame.merMod}}.} \item{m}{Integer specifying the number or name of the submodel} } \description{ model.frame method for stanmvreg objects } \keyword{internal} rstanarm/man/posterior_predict.stanreg.Rd0000644000176200001440000001562314551552004020355 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/posterior_predict.R \name{posterior_predict.stanreg} \alias{posterior_predict.stanreg} \alias{posterior_predict} \alias{posterior_predict.stanmvreg} \title{Draw from posterior predictive distribution} \usage{ \method{posterior_predict}{stanreg}( object, newdata = NULL, draws = NULL, re.form = NULL, fun = NULL, seed = NULL, offset = NULL, ... ) \method{posterior_predict}{stanmvreg}( object, m = 1, newdata = NULL, draws = NULL, re.form = NULL, fun = NULL, seed = NULL, offset = NULL, ... ) } \arguments{ \item{object}{A fitted model object returned by one of the \pkg{rstanarm} modeling functions. See \code{\link{stanreg-objects}}.} \item{newdata}{Optionally, a data frame in which to look for variables with which to predict. If omitted, the model matrix is used. If \code{newdata} is provided and any variables were transformed (e.g. rescaled) in the data used to fit the model, then these variables must also be transformed in \code{newdata}. This only applies if variables were transformed before passing the data to one of the modeling functions and \emph{not} if transformations were specified inside the model formula. Also see the Note section below for a note about using the \code{newdata} argument with with binomial models.} \item{draws}{An integer indicating the number of draws to return. The default and maximum number of draws is the size of the posterior sample.} \item{re.form}{If \code{object} contains \code{\link[=stan_glmer]{group-level}} parameters, a formula indicating which group-level parameters to condition on when making predictions. \code{re.form} is specified in the same form as for \code{\link[lme4]{predict.merMod}}. The default, \code{NULL}, indicates that all estimated group-level parameters are conditioned on. To refrain from conditioning on any group-level parameters, specify \code{NA} or \code{~0}. The \code{newdata} argument may include new \emph{levels} of the grouping factors that were specified when the model was estimated, in which case the resulting posterior predictions marginalize over the relevant variables.} \item{fun}{An optional function to apply to the results. \code{fun} is found by a call to \code{\link{match.fun}} and so can be specified as a function object, a string naming a function, etc.} \item{seed}{An optional \code{\link[=set.seed]{seed}} to use.} \item{offset}{A vector of offsets. Only required if \code{newdata} is specified and an \code{offset} argument was specified when fitting the model.} \item{...}{For \code{stanmvreg} objects, argument \code{m} can be specified indicating the submodel for which you wish to obtain predictions.} \item{m}{Integer specifying the number or name of the submodel} } \value{ A \code{draws} by \code{nrow(newdata)} matrix of simulations from the posterior predictive distribution. Each row of the matrix is a vector of predictions generated using a single draw of the model parameters from the posterior distribution. } \description{ The posterior predictive distribution is the distribution of the outcome implied by the model after using the observed data to update our beliefs about the unknown parameters in the model. Simulating data from the posterior predictive distribution using the observed predictors is useful for checking the fit of the model. Drawing from the posterior predictive distribution at interesting values of the predictors also lets us visualize how a manipulation of a predictor affects (a function of) the outcome(s). With new observations of predictor variables we can use the posterior predictive distribution to generate predicted outcomes. } \note{ For binomial models with a number of trials greater than one (i.e., not Bernoulli models), if \code{newdata} is specified then it must include all variables needed for computing the number of binomial trials to use for the predictions. For example if the left-hand side of the model formula is \code{cbind(successes, failures)} then both \code{successes} and \code{failures} must be in \code{newdata}. The particular values of \code{successes} and \code{failures} in \code{newdata} do not matter so long as their sum is the desired number of trials. If the left-hand side of the model formula were \code{cbind(successes, trials - successes)} then both \code{trials} and \code{successes} would need to be in \code{newdata}, probably with \code{successes} set to \code{0} and \code{trials} specifying the number of trials. See the Examples section below and the \emph{How to Use the rstanarm Package} for examples. For models estimated with \code{\link{stan_clogit}}, the number of successes per stratum is ostensibly fixed by the research design. Thus, when doing posterior prediction with new data, the \code{data.frame} passed to the \code{newdata} argument must contain an outcome variable and a stratifying factor, both with the same name as in the original \code{data.frame}. Then, the posterior predictions will condition on this outcome in the new data. } \examples{ if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { if (!exists("example_model")) example(example_model) yrep <- posterior_predict(example_model) table(yrep) \donttest{ # Using newdata counts <- c(18,17,15,20,10,20,25,13,12) outcome <- gl(3,1,9) treatment <- gl(3,3) dat <- data.frame(counts, treatment, outcome) fit3 <- stan_glm( counts ~ outcome + treatment, data = dat, family = poisson(link="log"), prior = normal(0, 1, autoscale = FALSE), prior_intercept = normal(0, 5, autoscale = FALSE), refresh = 0 ) nd <- data.frame(treatment = factor(rep(1,3)), outcome = factor(1:3)) ytilde <- posterior_predict(fit3, nd, draws = 500) print(dim(ytilde)) # 500 by 3 matrix (draws by nrow(nd)) ytilde <- data.frame( count = c(ytilde), outcome = rep(nd$outcome, each = 500) ) ggplot2::ggplot(ytilde, ggplot2::aes(x=outcome, y=count)) + ggplot2::geom_boxplot() + ggplot2::ylab("predicted count") # Using newdata with a binomial model. # example_model is binomial so we need to set # the number of trials to use for prediction. # This could be a different number for each # row of newdata or the same for all rows. # Here we'll use the same value for all. nd <- lme4::cbpp print(formula(example_model)) # cbind(incidence, size - incidence) ~ ... nd$size <- max(nd$size) + 1L # number of trials nd$incidence <- 0 # set to 0 so size - incidence = number of trials ytilde <- posterior_predict(example_model, newdata = nd) # Using fun argument to transform predictions mtcars2 <- mtcars mtcars2$log_mpg <- log(mtcars2$mpg) fit <- stan_glm(log_mpg ~ wt, data = mtcars2, refresh = 0) ytilde <- posterior_predict(fit, fun = exp) } } } \seealso{ \code{\link{pp_check}} for graphical posterior predictive checks. Examples of posterior predictive checking can also be found in the \pkg{rstanarm} vignettes and demos. \code{\link{predictive_error}} and \code{\link{predictive_interval}}. } rstanarm/man/pp_validate.Rd0000644000176200001440000000721714551552004015443 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/pp_validate.R \name{pp_validate} \alias{pp_validate} \title{Model validation via simulation} \usage{ pp_validate(object, nreps = 20, seed = 12345, ...) } \arguments{ \item{object}{A fitted model object returned by one of the \pkg{rstanarm} modeling functions. See \code{\link{stanreg-objects}}.} \item{nreps}{The number of replications to be performed. \code{nreps} must be sufficiently large so that the statistics described below in Details are meaningful. Depending on the model and the size of the data, running \code{pp_validate} may be slow. See also the Note section below for advice on avoiding numerical issues.} \item{seed}{A seed passed to Stan to use when refitting the model.} \item{...}{Currently ignored.} } \value{ A ggplot object that can be further customized using the \pkg{ggplot2} package. } \description{ The \code{pp_validate} function is based on the methods described in Cook, Gelman, and Rubin (2006) for validating software developed to fit particular Bayesian models. Here we take the perspective that models themselves are software and thus it is useful to apply this validation approach to individual models. } \details{ We repeat \code{nreps} times the process of simulating parameters and data from the model and refitting the model to this simulated data. For each of the \code{nreps} replications we do the following: \enumerate{ \item Refit the model but \emph{without} conditioning on the data (setting \code{prior_PD=TRUE}), obtaining draws \eqn{\theta^{true}}{\theta_true} from the \emph{prior} distribution of the model parameters. \item Given \eqn{\theta^{true}}{\theta_true}, simulate data \eqn{y^\ast}{y*} from the \emph{prior} predictive distribution (calling \code{\link{posterior_predict}} on the fitted model object obtained in step 1). \item Fit the model to the simulated outcome \eqn{y^\ast}{y*}, obtaining parameters \eqn{\theta^{post}}{\theta_post}. } For any individual parameter, the quantile of the "true" parameter value with respect to its posterior distribution \emph{should} be uniformly distributed. The validation procedure entails looking for deviations from uniformity by computing statistics for a test that the quantiles are uniformly distributed. The absolute values of the computed test statistics are plotted for batches of parameters (e.g., non-varying coefficients are grouped into a batch called "beta", parameters that vary by group level are in batches named for the grouping variable, etc.). See Cook, Gelman, and Rubin (2006) for more details on the validation procedure. } \note{ In order to make it through \code{nreps} replications without running into numerical difficulties you may have to restrict the range for randomly generating initial values for parameters when you fit the \emph{original} model. With any of \pkg{rstanarm}'s modeling functions this can be done by specifying the optional argument \code{init_r} as some number less than the default of \eqn{2}. } \examples{ if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { \dontrun{ if (!exists("example_model")) example(example_model) try(pp_validate(example_model)) # fails with default seed / priors } } } \references{ Cook, S., Gelman, A., and Rubin, D. (2006). Validation of software for Bayesian models using posterior quantiles. \emph{Journal of Computational and Graphical Statistics}. 15(3), 675--692. } \seealso{ \code{\link{pp_check}} for graphical posterior predictive checks and \code{\link{posterior_predict}} to draw from the posterior predictive distribution. \code{\link[bayesplot:bayesplot-colors]{color_scheme_set}} to change the color scheme of the plot. } rstanarm/man/bayes_R2.stanreg.Rd0000644000176200001440000000336114551552004016257 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/bayes_R2.R \name{bayes_R2.stanreg} \alias{bayes_R2.stanreg} \alias{bayes_R2} \alias{loo_R2.stanreg} \alias{loo_R2} \title{Compute a Bayesian version of R-squared or LOO-adjusted R-squared for regression models.} \usage{ \method{bayes_R2}{stanreg}(object, ..., re.form = NULL) \method{loo_R2}{stanreg}(object, ...) } \arguments{ \item{object}{A fitted model object returned by one of the \pkg{rstanarm} modeling functions. See \code{\link{stanreg-objects}}.} \item{...}{Currently ignored.} \item{re.form}{For models with group-level terms, \code{re.form} is passed to \code{\link{posterior_epred}} if specified.} } \value{ A vector of R-squared values with length equal to the posterior sample size (the posterior distribution of R-squared). } \description{ Compute a Bayesian version of R-squared or LOO-adjusted R-squared for regression models. } \examples{ if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { fit <- stan_glm( mpg ~ wt + cyl, data = mtcars, QR = TRUE, chains = 2, refresh = 0 ) rsq <- bayes_R2(fit) print(median(rsq)) hist(rsq) loo_rsq <- loo_R2(fit) print(median(loo_rsq)) # multilevel binomial model if (!exists("example_model")) example(example_model) print(example_model) median(bayes_R2(example_model)) median(bayes_R2(example_model, re.form = NA)) # exclude group-level } } \references{ Andrew Gelman, Ben Goodrich, Jonah Gabry, and Aki Vehtari (2018). R-squared for Bayesian regression models. \emph{The American Statistician}, to appear. \doi{10.1080/00031305.2018.1549100} (\href{http://www.stat.columbia.edu/~gelman/research/published/bayes_R2_v3.pdf}{Preprint}, \href{https://avehtari.github.io/bayes_R2/bayes_R2.html}{Notebook}) } rstanarm/man/example_model.Rd0000644000176200001440000000242714551552004015764 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/doc-example_model.R \name{example_model} \alias{example_model} \title{Example model} \format{ Calling \code{example("example_model")} will run the model in the Examples section, below, and the resulting stanreg object will then be available in the global environment. The \code{chains} and \code{iter} arguments are specified to make this example be small in size. In practice, we recommend that they be left unspecified in order to use the default values (4 and 2000 respectively) or increased if there are convergence problems. The \code{cores} argument is optional and on a multicore system, the user may well want to set that equal to the number of chains being executed. } \description{ A model for use in \pkg{rstanarm} examples. } \examples{ if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { example_model <- stan_glmer(cbind(incidence, size - incidence) ~ size + period + (1|herd), data = lme4::cbpp, family = binomial, QR = TRUE, # this next line is only to keep the example small in size! chains = 2, cores = 1, seed = 12345, iter = 1000, refresh = 0) example_model } } \seealso{ \code{\link[lme4]{cbpp}} for a description of the data. } rstanarm/man/posterior_linpred.stanreg.Rd0000644000176200001440000000722314551552004020355 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/posterior_linpred.R \name{posterior_linpred.stanreg} \alias{posterior_linpred.stanreg} \alias{posterior_linpred} \alias{posterior_epred} \alias{posterior_epred.stanreg} \title{Posterior distribution of the (possibly transformed) linear predictor} \usage{ \method{posterior_linpred}{stanreg}( object, transform = FALSE, newdata = NULL, draws = NULL, re.form = NULL, offset = NULL, XZ = FALSE, ... ) \method{posterior_epred}{stanreg}( object, newdata = NULL, draws = NULL, re.form = NULL, offset = NULL, XZ = FALSE, ... ) } \arguments{ \item{object}{A fitted model object returned by one of the \pkg{rstanarm} modeling functions. See \code{\link{stanreg-objects}}.} \item{transform}{Should the linear predictor be transformed using the inverse-link function? The default is \code{FALSE}. This argument is still allowed but not recommended because the \code{posterior_epred} function now provides the equivalent of \code{posterior_linpred(..., transform=TRUE)}. See \strong{Examples}.} \item{newdata, draws, re.form, offset}{Same as for \code{\link{posterior_predict}}.} \item{XZ}{If \code{TRUE} then instead of computing the linear predictor the design matrix \code{X} (or \code{cbind(X,Z)} for models with group-specific terms) constructed from \code{newdata} is returned. The default is \code{FALSE}.} \item{...}{Currently ignored.} } \value{ The default is to return a \code{draws} by \code{nrow(newdata)} matrix of simulations from the posterior distribution of the (possibly transformed) linear predictor. The exception is if the argument \code{XZ} is set to \code{TRUE} (see the \code{XZ} argument description above). } \description{ Extract the posterior draws of the linear predictor, possibly transformed by the inverse-link function. This function is occasionally useful, but it should be used sparingly: inference and model checking should generally be carried out using the posterior predictive distribution (i.e., using \code{\link{posterior_predict}}). } \details{ The \code{posterior_linpred} function returns the posterior distribution of the linear predictor, while the \code{posterior_epred} function returns the posterior distribution of the conditional expectation. In the special case of a Gaussian likelihood with an identity link function, these two concepts are the same. The \code{posterior_epred} function is a less noisy way to obtain expectations over the output of \code{\link{posterior_predict}}. } \note{ For models estimated with \code{\link{stan_clogit}}, the number of successes per stratum is ostensibly fixed by the research design. Thus, when calling \code{posterior_linpred} with new data and \code{transform = TRUE}, the \code{data.frame} passed to the \code{newdata} argument must contain an outcome variable and a stratifying factor, both with the same name as in the original \code{data.frame}. Then, the probabilities will condition on this outcome in the new data. } \examples{ if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { if (!exists("example_model")) example(example_model) print(family(example_model)) # linear predictor on log-odds scale linpred <- posterior_linpred(example_model) colMeans(linpred) # probabilities # same as posterior_linpred(example_model, transform = TRUE) probs <- posterior_epred(example_model) colMeans(probs) # not conditioning on any group-level parameters probs2 <- posterior_epred(example_model, re.form = NA) apply(probs2, 2, median) } } \seealso{ \code{\link{posterior_predict}} to draw from the posterior predictive distribution of the outcome, which is typically preferable. } rstanarm/man/launch_shinystan.stanreg.Rd0000644000176200001440000001271314551552004020164 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/launch_shinystan.R \name{launch_shinystan.stanreg} \alias{launch_shinystan.stanreg} \alias{launch_shinystan} \title{Using the ShinyStan GUI with rstanarm models} \usage{ \method{launch_shinystan}{stanreg}( object, ppd = TRUE, seed = 1234, model_name = NULL, note = NULL, rstudio = getOption("shinystan.rstudio"), ... ) } \arguments{ \item{object}{A fitted model object returned by one of the \pkg{rstanarm} modeling functions. See \code{\link{stanreg-objects}}.} \item{ppd}{Should \pkg{rstanarm} draw from the posterior predictive distribution before launching ShinyStan? The default is \code{TRUE}, although for very large objects it can be convenient to set it to \code{FALSE} as drawing from the posterior predictive distribution can be time consuming. If \code{ppd} is \code{TRUE} then graphical posterior predictive checks are available when ShinyStan is launched.} \item{seed}{Passed to \link[=pp_check]{pp_check} if \code{ppd} is \code{TRUE}.} \item{model_name, note}{Optional arguments passed to \code{\link[shinystan]{as.shinystan}}.} \item{rstudio}{Only relevant for 'RStudio' users. The default (\code{FALSE}) is to launch the app in the user's default web browser rather than the pop-up Viewer provided by 'RStudio'. Users can change the default to \code{TRUE} by setting the global option \code{options(shinystan.rstudio = TRUE)}.} \item{...}{Optional arguments passed to \code{\link[shiny]{runApp}}.} } \description{ The ShinyStan interface provides visual and numerical summaries of model parameters and convergence diagnostics. } \details{ The \code{\link[shinystan]{launch_shinystan}} function will accept a \code{\link[=stanreg-objects]{stanreg}} object as input. Currently, almost any model fit using one of \pkg{rstanarm}'s model-fitting functions can be used with ShinyStan. The only exception is that ShinyStan does not currently support \pkg{rstanarm} models fit using \code{algorithm='optimizing'}. See the \pkg{\link[=shinystan-package]{shinystan}} package documentation for more information. } \section{Faster launch times}{ For some \pkg{rstanarm} models ShinyStan may take a very long time to launch. If this is the case with one of your models you may be able to speed up \code{launch_shinystan} in one of several ways: \describe{ \item{Prevent ShinyStan from preparing graphical posterior predictive checks:}{ When used with a \code{\link[=stanreg-objects]{stanreg}} object (\pkg{rstanarm} model object) ShinyStan will draw from the posterior predictive distribution and prepare graphical posterior predictive checks before launching. That way when you go to the PPcheck page the plots are immediately available. This can be time consuming for models fit to very large datasets and you can prevent this behavior by creating a shinystan object before calling \code{launch_shinystan}. To do this use \code{\link[shinystan]{as.shinystan}} with optional argument \code{ppd} set to \code{FALSE} (see the Examples section below). When you then launch ShinyStan and go to the PPcheck page the plots will no longer be automatically generated and you will be presented with the standard interface requiring you to first specify the appropriate \eqn{y} and \eqn{yrep}, which can be done for many but not all \pkg{rstanarm} models. } \item{Use a shinystan object:}{ Even if you don't want to prevent ShinyStan from preparing graphical posterior predictive checks, first creating a shinystan object using \code{\link[shinystan]{as.shinystan}} can reduce \emph{future} launch times. That is, \code{launch_shinystan(sso)} will be faster than \code{launch_shinystan(fit)}, where \code{sso} is a shinystan object and \code{fit} is a stanreg object. It still may take some time for \code{as.shinystan} to create \code{sso} initially, but each time you subsequently call \code{launch_shinystan(sso)} it will reuse \code{sso} instead of internally creating a shinystan object every time. See the Examples section below.} } } \examples{ if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { \dontrun{ if (!exists("example_model")) example(example_model) # Launch the ShinyStan app without saving the resulting shinystan object if (interactive()) launch_shinystan(example_model) # Launch the ShinyStan app (saving resulting shinystan object as sso) if (interactive()) sso <- launch_shinystan(example_model) # First create shinystan object then call launch_shinystan sso <- shinystan::as.shinystan(example_model) if (interactive()) launch_shinystan(sso) # Prevent ShinyStan from preparing graphical posterior predictive checks that # can be time consuming. example_model is small enough that it won't matter # much here but in general this can help speed up launch_shinystan sso <- shinystan::as.shinystan(example_model, ppd = FALSE) if (interactive()) launch_shinystan(sso) } } } \references{ Gabry, J. , Simpson, D. , Vehtari, A. , Betancourt, M. and Gelman, A. (2019), Visualization in Bayesian workflow. \emph{J. R. Stat. Soc. A}, 182: 389-402. doi:10.1111/rssa.12378, \href{https://arxiv.org/abs/1709.01449}{arXiv preprint}, \href{https://github.com/jgabry/bayes-vis-paper}{code on GitHub}) Muth, C., Oravecz, Z., and Gabry, J. (2018) User-friendly Bayesian regression modeling: A tutorial with rstanarm and shinystan. \emph{The Quantitative Methods for Psychology}. 14(2), 99--119. \url{https://www.tqmp.org/RegularArticles/vol14-2/p099/p099.pdf} } rstanarm/man/print.survfit.stanjm.Rd0000644000176200001440000000115114551552004017272 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/posterior_survfit.R \name{print.survfit.stanjm} \alias{print.survfit.stanjm} \title{Generic print method for \code{survfit.stanjm} objects} \usage{ \method{print}{survfit.stanjm}(x, digits = 4, ...) } \arguments{ \item{x}{An object of class \code{survfit.stanjm}, returned by a call to \code{\link{posterior_survfit}}.} \item{digits}{Number of digits to use for formatting the time variable and the survival probabilities.} \item{...}{Ignored.} } \description{ Generic print method for \code{survfit.stanjm} objects } \keyword{internal} rstanarm/man/available-algorithms.Rd0000644000176200001440000000703714551552004017242 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/doc-algorithms.R \name{available-algorithms} \alias{available-algorithms} \title{Estimation algorithms available for \pkg{rstanarm} models} \description{ Estimation algorithms available for \pkg{rstanarm} models } \section{Estimation algorithms}{ The modeling functions in the \pkg{rstanarm} package take an \code{algorithm} argument that can be one of the following: \describe{ \item{\strong{Sampling} (\code{algorithm="sampling"})}{ Uses Markov Chain Monte Carlo (MCMC) --- in particular, Hamiltonian Monte Carlo (HMC) with a tuned but diagonal mass matrix --- to draw from the posterior distribution of the parameters. See \code{\link[rstan:stanmodel-method-sampling]{sampling}} (\pkg{rstan}) for more details. This is the slowest but most reliable of the available estimation algorithms and it is \strong{the default and recommended algorithm for statistical inference.} } \item{\strong{Mean-field} (\code{algorithm="meanfield"})}{ Uses mean-field variational inference to draw from an approximation to the posterior distribution. In particular, this algorithm finds the set of independent normal distributions in the unconstrained space that --- when transformed into the constrained space --- most closely approximate the posterior distribution. Then it draws repeatedly from these independent normal distributions and transforms them into the constrained space. The entire process is much faster than HMC and yields independent draws but \strong{is not recommended for final statistical inference}. It can be useful to narrow the set of candidate models in large problems, particularly when specifying \code{QR=TRUE} in \code{\link{stan_glm}}, \code{\link{stan_glmer}}, and \code{\link{stan_gamm4}}, but is \strong{only an approximation to the posterior distribution}. } \item{\strong{Full-rank} (\code{algorithm="fullrank"})}{ Uses full-rank variational inference to draw from an approximation to the posterior distribution by finding the multivariate normal distribution in the unconstrained space that --- when transformed into the constrained space --- most closely approximates the posterior distribution. Then it draws repeatedly from this multivariate normal distribution and transforms the draws into the constrained space. This process is slower than meanfield variational inference but is faster than HMC. Although still an approximation to the posterior distribution and thus \strong{not recommended for final statistical inference}, the approximation is more realistic than that of mean-field variational inference because the parameters are not assumed to be independent in the unconstrained space. Nevertheless, fullrank variational inference is a more difficult optimization problem and the algorithm is more prone to non-convergence or convergence to a local optimum. } \item{\strong{Optimizing} (\code{algorithm="optimizing"})}{ Finds the posterior mode using a C++ implementation of the LBGFS algorithm. See \code{\link[rstan:stanmodel-method-optimizing]{optimizing}} for more details. If there is no prior information, then this is equivalent to maximum likelihood, in which case there is no great reason to use the functions in the \pkg{rstanarm} package over the emulated functions in other packages. However, if priors are specified, then the estimates are penalized maximum likelihood estimates, which may have some redeeming value. Currently, optimization is only supported for \code{\link{stan_glm}}. } } } \seealso{ \url{https://mc-stan.org/rstanarm/} } rstanarm/man/stan_betareg.Rd0000644000176200001440000002541614551552005015613 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/stan_betareg.R, R/stan_betareg.fit.R \name{stan_betareg} \alias{stan_betareg} \alias{stan_betareg.fit} \title{Bayesian beta regression models via Stan} \usage{ stan_betareg( formula, data, subset, na.action, weights, offset, link = c("logit", "probit", "cloglog", "cauchit", "log", "loglog"), link.phi = NULL, model = TRUE, y = TRUE, x = FALSE, ..., prior = normal(autoscale = TRUE), prior_intercept = normal(autoscale = TRUE), prior_z = normal(autoscale = TRUE), prior_intercept_z = normal(autoscale = TRUE), prior_phi = exponential(autoscale = TRUE), prior_PD = FALSE, algorithm = c("sampling", "optimizing", "meanfield", "fullrank"), adapt_delta = NULL, QR = FALSE ) stan_betareg.fit( x, y, z = NULL, weights = rep(1, NROW(x)), offset = rep(0, NROW(x)), link = c("logit", "probit", "cloglog", "cauchit", "log", "loglog"), link.phi = NULL, ..., prior = normal(autoscale = TRUE), prior_intercept = normal(autoscale = TRUE), prior_z = normal(autoscale = TRUE), prior_intercept_z = normal(autoscale = TRUE), prior_phi = exponential(autoscale = TRUE), prior_PD = FALSE, algorithm = c("sampling", "optimizing", "meanfield", "fullrank"), adapt_delta = NULL, QR = FALSE ) } \arguments{ \item{formula, data, subset}{Same as \code{\link[betareg]{betareg}}, but \emph{we strongly advise against omitting the \code{data} argument}. Unless \code{data} is specified (and is a data frame) many post-estimation functions (including \code{update}, \code{loo}, \code{kfold}) are not guaranteed to work properly.} \item{na.action}{Same as \code{\link[betareg]{betareg}}, but rarely specified.} \item{link}{Character specification of the link function used in the model for mu (specified through \code{x}). Currently, "logit", "probit", "cloglog", "cauchit", "log", and "loglog" are supported.} \item{link.phi}{If applicable, character specification of the link function used in the model for \code{phi} (specified through \code{z}). Currently, "identity", "log" (default), and "sqrt" are supported. Since the "sqrt" link function is known to be unstable, it is advisable to specify a different link function (or to model \code{phi} as a scalar parameter instead of via a linear predictor by excluding \code{z} from the \code{formula} and excluding \code{link.phi}).} \item{model, offset, weights}{Same as \code{\link[betareg]{betareg}}.} \item{x, y}{In \code{stan_betareg}, logical scalars indicating whether to return the design matrix and response vector. In \code{stan_betareg.fit}, a design matrix and response vector.} \item{...}{Further arguments passed to the function in the \pkg{rstan} package (\code{\link[rstan:stanmodel-method-sampling]{sampling}}, \code{\link[rstan:stanmodel-method-vb]{vb}}, or \code{\link[rstan:stanmodel-method-optimizing]{optimizing}}), corresponding to the estimation method named by \code{algorithm}. For example, if \code{algorithm} is \code{"sampling"} it is possible to specify \code{iter}, \code{chains}, \code{cores}, and other MCMC controls. Another useful argument that can be passed to \pkg{rstan} via \code{...} is \code{refresh}, which specifies how often to print updates when sampling (i.e., show the progress every \code{refresh} iterations). \code{refresh=0} turns off the iteration updates.} \item{prior}{The prior distribution for the (non-hierarchical) regression coefficients. The default priors are described in the vignette \href{https://mc-stan.org/rstanarm/articles/priors.html}{\emph{Prior Distributions for rstanarm Models}}. If not using the default, \code{prior} should be a call to one of the various functions provided by \pkg{rstanarm} for specifying priors. The subset of these functions that can be used for the prior on the coefficients can be grouped into several "families": \tabular{ll}{ \strong{Family} \tab \strong{Functions} \cr \emph{Student t family} \tab \code{normal}, \code{student_t}, \code{cauchy} \cr \emph{Hierarchical shrinkage family} \tab \code{hs}, \code{hs_plus} \cr \emph{Laplace family} \tab \code{laplace}, \code{lasso} \cr \emph{Product normal family} \tab \code{product_normal} \cr } See the \link[=priors]{priors help page} for details on the families and how to specify the arguments for all of the functions in the table above. To omit a prior ---i.e., to use a flat (improper) uniform prior--- \code{prior} can be set to \code{NULL}, although this is rarely a good idea. \strong{Note:} Unless \code{QR=TRUE}, if \code{prior} is from the Student t family or Laplace family, and if the \code{autoscale} argument to the function used to specify the prior (e.g. \code{\link{normal}}) is left at its default and recommended value of \code{TRUE}, then the default or user-specified prior scale(s) may be adjusted internally based on the scales of the predictors. See the \link[=priors]{priors help page} and the \emph{Prior Distributions} vignette for details on the rescaling and the \code{\link{prior_summary}} function for a summary of the priors used for a particular model.} \item{prior_intercept}{The prior distribution for the intercept (after centering all predictors, see note below). The default prior is described in the vignette \href{https://mc-stan.org/rstanarm/articles/priors.html}{\emph{Prior Distributions for rstanarm Models}}. If not using the default, \code{prior_intercept} can be a call to \code{normal}, \code{student_t} or \code{cauchy}. See the \link[=priors]{priors help page} for details on these functions. To omit a prior on the intercept ---i.e., to use a flat (improper) uniform prior--- \code{prior_intercept} can be set to \code{NULL}. \strong{Note:} If using a dense representation of the design matrix ---i.e., if the \code{sparse} argument is left at its default value of \code{FALSE}--- then the prior distribution for the intercept is set so it applies to the value \emph{when all predictors are centered} (you don't need to manually center them). This is explained further in [Prior Distributions for rstanarm Models](https://mc-stan.org/rstanarm/articles/priors.html) If you prefer to specify a prior on the intercept without the predictors being auto-centered, then you have to omit the intercept from the \code{\link[stats]{formula}} and include a column of ones as a predictor, in which case some element of \code{prior} specifies the prior on it, rather than \code{prior_intercept}. Regardless of how \code{prior_intercept} is specified, the reported \emph{estimates} of the intercept always correspond to a parameterization without centered predictors (i.e., same as in \code{glm}).} \item{prior_z}{Prior distribution for the coefficients in the model for \code{phi} (if applicable). Same options as for \code{prior}.} \item{prior_intercept_z}{Prior distribution for the intercept in the model for \code{phi} (if applicable). Same options as for \code{prior_intercept}.} \item{prior_phi}{The prior distribution for \code{phi} if it is \emph{not} modeled as a function of predictors. If \code{z} variables are specified then \code{prior_phi} is ignored and \code{prior_intercept_z} and \code{prior_z} are used to specify the priors on the intercept and coefficients in the model for \code{phi}. When applicable, \code{prior_phi} can be a call to \code{exponential} to use an exponential distribution, or one of \code{normal}, \code{student_t} or \code{cauchy} to use half-normal, half-t, or half-Cauchy prior. See \code{\link{priors}} for details on these functions. To omit a prior ---i.e., to use a flat (improper) uniform prior--- set \code{prior_phi} to \code{NULL}.} \item{prior_PD}{A logical scalar (defaulting to \code{FALSE}) indicating whether to draw from the prior predictive distribution instead of conditioning on the outcome.} \item{algorithm}{A string (possibly abbreviated) indicating the estimation approach to use. Can be \code{"sampling"} for MCMC (the default), \code{"optimizing"} for optimization, \code{"meanfield"} for variational inference with independent normal distributions, or \code{"fullrank"} for variational inference with a multivariate normal distribution. See \code{\link{rstanarm-package}} for more details on the estimation algorithms. NOTE: not all fitting functions support all four algorithms.} \item{adapt_delta}{Only relevant if \code{algorithm="sampling"}. See the \link{adapt_delta} help page for details.} \item{QR}{A logical scalar defaulting to \code{FALSE}, but if \code{TRUE} applies a scaled \code{\link{qr}} decomposition to the design matrix. The transformation does not change the likelihood of the data but is recommended for computational reasons when there are multiple predictors. See the \link{QR-argument} documentation page for details on how \pkg{rstanarm} does the transformation and important information about how to interpret the prior distributions of the model parameters when using \code{QR=TRUE}.} \item{z}{For \code{stan_betareg.fit}, a regressor matrix for \code{phi}. Defaults to an intercept only.} } \value{ A \link[=stanreg-objects]{stanreg} object is returned for \code{stan_betareg}. A \link[=stanfit-class]{stanfit} object (or a slightly modified stanfit object) is returned if \code{stan_betareg.fit} is called directly. } \description{ \if{html}{\figure{stanlogo.png}{options: width="25" alt="https://mc-stan.org/about/logo/"}} Beta regression modeling with optional prior distributions for the coefficients, intercept, and auxiliary parameter \code{phi} (if applicable). } \details{ The \code{stan_betareg} function is similar in syntax to \code{\link[betareg]{betareg}} but rather than performing maximum likelihood estimation, full Bayesian estimation is performed (if \code{algorithm} is \code{"sampling"}) via MCMC. The Bayesian model adds priors (independent by default) on the coefficients of the beta regression model. The \code{stan_betareg} function calls the workhorse \code{stan_betareg.fit} function, but it is also possible to call the latter directly. } \examples{ if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { ### Simulated data N <- 200 x <- rnorm(N, 2, 1) z <- rnorm(N, 2, 1) mu <- binomial(link = "logit")$linkinv(1 + 0.2*x) phi <- exp(1.5 + 0.4*z) y <- rbeta(N, mu * phi, (1 - mu) * phi) hist(y, col = "dark grey", border = FALSE, xlim = c(0,1)) fake_dat <- data.frame(y, x, z) fit <- stan_betareg( y ~ x | z, data = fake_dat, link = "logit", link.phi = "log", algorithm = "optimizing" # just for speed of example ) print(fit, digits = 2) } } \references{ Ferrari, SLP and Cribari-Neto, F (2004). Beta regression for modeling rates and proportions. \emph{Journal of Applied Statistics}. 31(7), 799--815. } \seealso{ \code{\link{stanreg-methods}} and \code{\link[betareg]{betareg}}. The vignette for \code{stan_betareg}. \url{https://mc-stan.org/rstanarm/articles/} } rstanarm/man/stan_gamm4.Rd0000644000176200001440000003257414551552005015212 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/stan_gamm4.R \name{stan_gamm4} \alias{stan_gamm4} \alias{plot_nonlinear} \title{Bayesian generalized linear additive models with optional group-specific terms via Stan} \usage{ stan_gamm4( formula, random = NULL, family = gaussian(), data, weights = NULL, subset = NULL, na.action, knots = NULL, drop.unused.levels = TRUE, ..., prior = default_prior_coef(family), prior_intercept = default_prior_intercept(family), prior_smooth = exponential(autoscale = FALSE), prior_aux = exponential(autoscale = TRUE), prior_covariance = decov(), prior_PD = FALSE, algorithm = c("sampling", "meanfield", "fullrank"), adapt_delta = NULL, QR = FALSE, sparse = FALSE ) plot_nonlinear( x, smooths, ..., prob = 0.9, facet_args = list(), alpha = 1, size = 0.75 ) } \arguments{ \item{formula, random, family, data, knots, drop.unused.levels}{Same as for \code{\link[gamm4]{gamm4}}. \emph{We strongly advise against omitting the \code{data} argument}. Unless \code{data} is specified (and is a data frame) many post-estimation functions (including \code{update}, \code{loo}, \code{kfold}) are not guaranteed to work properly.} \item{subset, weights, na.action}{Same as \code{\link[stats]{glm}}, but rarely specified.} \item{...}{Further arguments passed to \code{\link[rstan:stanmodel-method-sampling]{sampling}} (e.g. \code{iter}, \code{chains}, \code{cores}, etc.) or to \code{\link[rstan:stanmodel-method-vb]{vb}} (if \code{algorithm} is \code{"meanfield"} or \code{"fullrank"}).} \item{prior}{The prior distribution for the (non-hierarchical) regression coefficients. The default priors are described in the vignette \href{https://mc-stan.org/rstanarm/articles/priors.html}{\emph{Prior Distributions for rstanarm Models}}. If not using the default, \code{prior} should be a call to one of the various functions provided by \pkg{rstanarm} for specifying priors. The subset of these functions that can be used for the prior on the coefficients can be grouped into several "families": \tabular{ll}{ \strong{Family} \tab \strong{Functions} \cr \emph{Student t family} \tab \code{normal}, \code{student_t}, \code{cauchy} \cr \emph{Hierarchical shrinkage family} \tab \code{hs}, \code{hs_plus} \cr \emph{Laplace family} \tab \code{laplace}, \code{lasso} \cr \emph{Product normal family} \tab \code{product_normal} \cr } See the \link[=priors]{priors help page} for details on the families and how to specify the arguments for all of the functions in the table above. To omit a prior ---i.e., to use a flat (improper) uniform prior--- \code{prior} can be set to \code{NULL}, although this is rarely a good idea. \strong{Note:} Unless \code{QR=TRUE}, if \code{prior} is from the Student t family or Laplace family, and if the \code{autoscale} argument to the function used to specify the prior (e.g. \code{\link{normal}}) is left at its default and recommended value of \code{TRUE}, then the default or user-specified prior scale(s) may be adjusted internally based on the scales of the predictors. See the \link[=priors]{priors help page} and the \emph{Prior Distributions} vignette for details on the rescaling and the \code{\link{prior_summary}} function for a summary of the priors used for a particular model.} \item{prior_intercept}{The prior distribution for the intercept (after centering all predictors, see note below). The default prior is described in the vignette \href{https://mc-stan.org/rstanarm/articles/priors.html}{\emph{Prior Distributions for rstanarm Models}}. If not using the default, \code{prior_intercept} can be a call to \code{normal}, \code{student_t} or \code{cauchy}. See the \link[=priors]{priors help page} for details on these functions. To omit a prior on the intercept ---i.e., to use a flat (improper) uniform prior--- \code{prior_intercept} can be set to \code{NULL}. \strong{Note:} If using a dense representation of the design matrix ---i.e., if the \code{sparse} argument is left at its default value of \code{FALSE}--- then the prior distribution for the intercept is set so it applies to the value \emph{when all predictors are centered} (you don't need to manually center them). This is explained further in [Prior Distributions for rstanarm Models](https://mc-stan.org/rstanarm/articles/priors.html) If you prefer to specify a prior on the intercept without the predictors being auto-centered, then you have to omit the intercept from the \code{\link[stats]{formula}} and include a column of ones as a predictor, in which case some element of \code{prior} specifies the prior on it, rather than \code{prior_intercept}. Regardless of how \code{prior_intercept} is specified, the reported \emph{estimates} of the intercept always correspond to a parameterization without centered predictors (i.e., same as in \code{glm}).} \item{prior_smooth}{The prior distribution for the hyperparameters in GAMs, with lower values yielding less flexible smooth functions. \code{prior_smooth} can be a call to \code{exponential} to use an exponential distribution, or \code{normal}, \code{student_t} or \code{cauchy}, which results in a half-normal, half-t, or half-Cauchy prior. See \code{\link{priors}} for details on these functions. To omit a prior ---i.e., to use a flat (improper) uniform prior--- set \code{prior_smooth} to \code{NULL}. The number of hyperparameters depends on the model specification but a scalar prior will be recylced as necessary to the appropriate length.} \item{prior_aux}{The prior distribution for the "auxiliary" parameter (if applicable). The "auxiliary" parameter refers to a different parameter depending on the \code{family}. For Gaussian models \code{prior_aux} controls \code{"sigma"}, the error standard deviation. For negative binomial models \code{prior_aux} controls \code{"reciprocal_dispersion"}, which is similar to the \code{"size"} parameter of \code{\link[stats:NegBinomial]{rnbinom}}: smaller values of \code{"reciprocal_dispersion"} correspond to greater dispersion. For gamma models \code{prior_aux} sets the prior on to the \code{"shape"} parameter (see e.g., \code{\link[stats:GammaDist]{rgamma}}), and for inverse-Gaussian models it is the so-called \code{"lambda"} parameter (which is essentially the reciprocal of a scale parameter). Binomial and Poisson models do not have auxiliary parameters. The default prior is described in the vignette \href{https://mc-stan.org/rstanarm/articles/priors.html}{\emph{Prior Distributions for rstanarm Models}}. If not using the default, \code{prior_aux} can be a call to \code{exponential} to use an exponential distribution, or \code{normal}, \code{student_t} or \code{cauchy}, which results in a half-normal, half-t, or half-Cauchy prior. See \code{\link{priors}} for details on these functions. To omit a prior ---i.e., to use a flat (improper) uniform prior--- set \code{prior_aux} to \code{NULL}.} \item{prior_covariance}{Cannot be \code{NULL}; see \code{\link{decov}} for more information about the default arguments.} \item{prior_PD}{A logical scalar (defaulting to \code{FALSE}) indicating whether to draw from the prior predictive distribution instead of conditioning on the outcome.} \item{algorithm}{A string (possibly abbreviated) indicating the estimation approach to use. Can be \code{"sampling"} for MCMC (the default), \code{"optimizing"} for optimization, \code{"meanfield"} for variational inference with independent normal distributions, or \code{"fullrank"} for variational inference with a multivariate normal distribution. See \code{\link{rstanarm-package}} for more details on the estimation algorithms. NOTE: not all fitting functions support all four algorithms.} \item{adapt_delta}{Only relevant if \code{algorithm="sampling"}. See the \link{adapt_delta} help page for details.} \item{QR}{A logical scalar defaulting to \code{FALSE}, but if \code{TRUE} applies a scaled \code{\link{qr}} decomposition to the design matrix. The transformation does not change the likelihood of the data but is recommended for computational reasons when there are multiple predictors. See the \link{QR-argument} documentation page for details on how \pkg{rstanarm} does the transformation and important information about how to interpret the prior distributions of the model parameters when using \code{QR=TRUE}.} \item{sparse}{A logical scalar (defaulting to \code{FALSE}) indicating whether to use a sparse representation of the design (X) matrix. If \code{TRUE}, the the design matrix is not centered (since that would destroy the sparsity) and likewise it is not possible to specify both \code{QR = TRUE} and \code{sparse = TRUE}. Depending on how many zeros there are in the design matrix, setting \code{sparse = TRUE} may make the code run faster and can consume much less RAM.} \item{x}{An object produced by \code{stan_gamm4}.} \item{smooths}{An optional character vector specifying a subset of the smooth functions specified in the call to \code{stan_gamm4}. The default is include all smooth terms.} \item{prob}{For univarite smooths, a scalar between 0 and 1 governing the width of the uncertainty interval.} \item{facet_args}{An optional named list of arguments passed to \code{\link[ggplot2]{facet_wrap}} (other than the \code{facets} argument).} \item{alpha, size}{For univariate smooths, passed to \code{\link[ggplot2]{geom_ribbon}}. For bivariate smooths, \code{size/2} is passed to \code{\link[ggplot2]{geom_contour}}.} } \value{ A \link[=stanreg-objects]{stanreg} object is returned for \code{stan_gamm4}. \code{plot_nonlinear} returns a ggplot object. } \description{ \if{html}{\figure{stanlogo.png}{options: width="25" alt="https://mc-stan.org/about/logo/"}} Bayesian inference for GAMMs with flexible priors. } \details{ The \code{stan_gamm4} function is similar in syntax to \code{\link[gamm4]{gamm4}} in the \pkg{gamm4} package. But rather than performing (restricted) maximum likelihood estimation with the \pkg{lme4} package, the \code{stan_gamm4} function utilizes MCMC to perform Bayesian estimation. The Bayesian model adds priors on the common regression coefficients (in the same way as \code{\link{stan_glm}}), priors on the standard deviations of the smooth terms, and a prior on the decomposition of the covariance matrices of any group-specific parameters (as in \code{\link{stan_glmer}}). Estimating these models via MCMC avoids the optimization issues that often crop up with GAMMs and provides better estimates for the uncertainty in the parameter estimates. See \code{\link[gamm4]{gamm4}} for more information about the model specicification and \code{\link{priors}} for more information about the priors on the main coefficients. The \code{formula} should include at least one smooth term, which can be specified in any way that is supported by the \code{\link[mgcv]{jagam}} function in the \pkg{mgcv} package. The \code{prior_smooth} argument should be used to specify a prior on the unknown standard deviations that govern how smooth the smooth function is. The \code{prior_covariance} argument can be used to specify the prior on the components of the covariance matrix for any (optional) group-specific terms. The \code{\link[gamm4]{gamm4}} function in the \pkg{gamm4} package uses group-specific terms to implement the departure from linearity in the smooth terms, but that is not the case for \code{stan_gamm4} where the group-specific terms are exactly the same as in \code{\link{stan_glmer}}. The \code{plot_nonlinear} function creates a ggplot object with one facet for each smooth function specified in the call to \code{stan_gamm4} in the case where all smooths are univariate. A subset of the smooth functions can be specified using the \code{smooths} argument, which is necessary to plot a bivariate smooth or to exclude the bivariate smooth and plot the univariate ones. In the bivariate case, a plot is produced using \code{\link[ggplot2]{geom_contour}}. In the univariate case, the resulting plot is conceptually similar to \code{\link[mgcv]{plot.gam}} except the outer lines here demark the edges of posterior uncertainty intervals (credible intervals) rather than confidence intervals and the inner line is the posterior median of the function rather than the function implied by a point estimate. To change the colors used in the plot see \code{\link[bayesplot:bayesplot-colors]{color_scheme_set}}. } \examples{ if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { # from example(gamm4, package = "gamm4"), prefixing gamm4() call with stan_ \donttest{ dat <- mgcv::gamSim(1, n = 400, scale = 2) ## simulate 4 term additive truth ## Now add 20 level random effect `fac'... dat$fac <- fac <- as.factor(sample(1:20, 400, replace = TRUE)) dat$y <- dat$y + model.matrix(~ fac - 1) \%*\% rnorm(20) * .5 br <- stan_gamm4(y ~ s(x0) + x1 + s(x2), data = dat, random = ~ (1 | fac), chains = 1, iter = 500) # for example speed print(br) plot_nonlinear(br) plot_nonlinear(br, smooths = "s(x0)", alpha = 2/3) } } } \references{ Crainiceanu, C., Ruppert D., and Wand, M. (2005). Bayesian analysis for penalized spline regression using WinBUGS. \emph{Journal of Statistical Software}. \strong{14}(14), 1--22. \url{https://www.jstatsoft.org/article/view/v014i14} } \seealso{ \code{\link{stanreg-methods}} and \code{\link[gamm4]{gamm4}}. The vignette for \code{stan_glmer}, which also discusses \code{stan_gamm4}. \url{https://mc-stan.org/rstanarm/articles/} } rstanarm/man/kfold.stanreg.Rd0000644000176200001440000001245214551552004015711 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/loo-kfold.R \name{kfold.stanreg} \alias{kfold.stanreg} \alias{kfold} \title{K-fold cross-validation} \usage{ \method{kfold}{stanreg}( x, K = 10, ..., folds = NULL, save_fits = FALSE, cores = getOption("mc.cores", 1) ) } \arguments{ \item{x}{A fitted model object returned by one of the rstanarm modeling functions. See \link{stanreg-objects}.} \item{K}{For \code{kfold}, the number of subsets (folds) into which the data will be partitioned for performing \eqn{K}-fold cross-validation. The model is refit \code{K} times, each time leaving out one of the \code{K} folds. If the \code{folds} argument is specified then \code{K} will automatically be set to \code{length(unique(folds))}, otherwise the specified value of \code{K} is passed to \code{loo::\link[loo:kfold-helpers]{kfold_split_random}} to randomly partition the data into \code{K} subsets of equal (or as close to equal as possible) size.} \item{...}{Currently ignored.} \item{folds}{For \code{kfold}, an optional integer vector with one element per observation in the data used to fit the model. Each element of the vector is an integer in \code{1:K} indicating to which of the \code{K} folds the corresponding observation belongs. There are some convenience functions available in the \pkg{loo} package that create integer vectors to use for this purpose (see the \strong{Examples} section below and also the \link[loo]{kfold-helpers} page).} \item{save_fits}{For \code{kfold}, if \code{TRUE}, a component \code{'fits'} is added to the returned object to store the cross-validated \link[=stanreg-objects]{stanreg} objects and the indices of the omitted observations for each fold. Defaults to \code{FALSE}.} \item{cores}{The number of cores to use for parallelization. Instead fitting separate Markov chains for the same model on different cores, by default \code{kfold} will distribute the \code{K} models to be fit across the cores (using \code{\link[parallel:clusterApply]{parLapply}} on Windows and \code{\link[parallel]{mclapply}} otherwise). The Markov chains for each model will be run sequentially. This will often be the most efficient option, especially if many cores are available, but in some cases it may be preferable to fit the \code{K} models sequentially and instead use the cores for the Markov chains. This can be accomplished by setting \code{options(mc.cores)} to be the desired number of cores to use for the Markov chains \emph{and} also manually specifying \code{cores=1} when calling the \code{kfold} function. See the end of the \strong{Examples} section for a demonstration.} } \value{ An object with classes 'kfold' and 'loo' that has a similar structure as the objects returned by the \code{\link{loo}} and \code{\link{waic}} methods and is compatible with the \code{\link{loo_compare}} function for comparing models. } \description{ The \code{kfold} method performs exact \eqn{K}-fold cross-validation. First the data are randomly partitioned into \eqn{K} subsets of equal size (or as close to equal as possible), or the user can specify the \code{folds} argument to determine the partitioning. Then the model is refit \eqn{K} times, each time leaving out one of the \eqn{K} subsets. If \eqn{K} is equal to the total number of observations in the data then \eqn{K}-fold cross-validation is equivalent to exact leave-one-out cross-validation (to which \code{\link[=loo.stanreg]{loo}} is an efficient approximation). } \examples{ if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { \donttest{ fit1 <- stan_glm(mpg ~ wt, data = mtcars, refresh = 0) fit2 <- stan_glm(mpg ~ wt + cyl, data = mtcars, refresh = 0) fit3 <- stan_glm(mpg ~ disp * as.factor(cyl), data = mtcars, refresh = 0) # 10-fold cross-validation # (if possible also specify the 'cores' argument to use multiple cores) (kfold1 <- kfold(fit1, K = 10)) kfold2 <- kfold(fit2, K = 10) kfold3 <- kfold(fit3, K = 10) loo_compare(kfold1, kfold2, kfold3) # stratifying by a grouping variable # (note: might get some divergences warnings with this model but # this is just intended as a quick example of how to code this) fit4 <- stan_lmer(mpg ~ disp + (1|cyl), data = mtcars, refresh = 0) table(mtcars$cyl) folds_cyl <- loo::kfold_split_stratified(K = 3, x = mtcars$cyl) table(cyl = mtcars$cyl, fold = folds_cyl) kfold4 <- kfold(fit4, folds = folds_cyl, cores = 2) print(kfold4) } } # Example code demonstrating the different ways to specify the number # of cores and how the cores are used # # options(mc.cores = NULL) # # # spread the K models over N_CORES cores (method 1) # kfold(fit, K, cores = N_CORES) # # # spread the K models over N_CORES cores (method 2) # options(mc.cores = N_CORES) # kfold(fit, K) # # # fit K models sequentially using N_CORES cores for the Markov chains each time # options(mc.cores = N_CORES) # kfold(fit, K, cores = 1) } \references{ Vehtari, A., Gelman, A., and Gabry, J. (2017). Practical Bayesian model evaluation using leave-one-out cross-validation and WAIC. \emph{Statistics and Computing}. 27(5), 1413--1432. doi:10.1007/s11222-016-9696-4. arXiv preprint: \url{https://arxiv.org/abs/1507.04544} Yao, Y., Vehtari, A., Simpson, D., and Gelman, A. (2018) Using stacking to average Bayesian predictive distributions. \emph{Bayesian Analysis}, advance publication, \doi{10.1214/17-BA1091}. } rstanarm/man/loo_predict.stanreg.Rd0000644000176200001440000000732514551552004017120 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/loo-prediction.R \name{loo_predict.stanreg} \alias{loo_predict.stanreg} \alias{loo_predict} \alias{loo_linpred} \alias{loo_predictive_interval} \alias{loo_linpred.stanreg} \alias{loo_predictive_interval.stanreg} \title{Compute weighted expectations using LOO} \usage{ \method{loo_predict}{stanreg}( object, type = c("mean", "var", "quantile"), probs = 0.5, ..., psis_object = NULL ) \method{loo_linpred}{stanreg}( object, type = c("mean", "var", "quantile"), probs = 0.5, transform = FALSE, ..., psis_object = NULL ) \method{loo_predictive_interval}{stanreg}(object, prob = 0.9, ..., psis_object = NULL) } \arguments{ \item{object}{A fitted model object returned by one of the \pkg{rstanarm} modeling functions. See \code{\link{stanreg-objects}}.} \item{type}{The type of expectation to compute. The options are \code{"mean"}, \code{"variance"}, and \code{"quantile"}.} \item{probs}{For computing quantiles, a vector of probabilities.} \item{...}{Currently unused.} \item{psis_object}{An object returned by \code{\link[loo]{psis}}. If missing then \code{psis} will be run internally, which may be time consuming for models fit to very large datasets.} \item{transform}{Passed to \code{\link{posterior_linpred}}.} \item{prob}{For \code{loo_predictive_interval}, a scalar in \eqn{(0,1)} indicating the desired probability mass to include in the intervals. The default is \code{prob=0.9} (\eqn{90}\% intervals).} } \value{ A list with elements \code{value} and \code{pareto_k}. For \code{loo_predict} and \code{loo_linpred} the value component is a vector with one element per observation. For \code{loo_predictive_interval} the \code{value} component is a matrix with one row per observation and two columns (like \code{\link{predictive_interval}}). \code{loo_predictive_interval(..., prob = p)} is equivalent to \code{loo_predict(..., type = "quantile", probs = c(a, 1-a))} with \code{a = (1 - p)/2}, except it transposes the result and adds informative column names. See \code{\link[loo]{E_loo}} and \code{\link[loo]{pareto-k-diagnostic}} for details on the \code{pareto_k} diagnostic. } \description{ These functions are wrappers around the \code{\link[loo]{E_loo}} function (\pkg{loo} package) that provide compatibility for \pkg{rstanarm} models. } \examples{ if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { \dontrun{ if (!exists("example_model")) example(example_model) # optionally, log-weights can be pre-computed and reused psis_result <- loo::psis(log_ratios = -log_lik(example_model)) loo_probs <- loo_linpred(example_model, type = "mean", transform = TRUE, psis_object = psis_result) str(loo_probs) loo_pred_var <- loo_predict(example_model, type = "var", psis_object = psis_result) str(loo_pred_var) loo_pred_ints <- loo_predictive_interval(example_model, prob = 0.8, psis_object = psis_result) str(loo_pred_ints) } } } \references{ Vehtari, A., Gelman, A., and Gabry, J. (2017). Practical Bayesian model evaluation using leave-one-out cross-validation and WAIC. \emph{Statistics and Computing}. 27(5), 1413--1432. doi:10.1007/s11222-016-9696-4. arXiv preprint: \url{https://arxiv.org/abs/1507.04544} Yao, Y., Vehtari, A., Simpson, D., and Gelman, A. (2018) Using stacking to average Bayesian predictive distributions. \emph{Bayesian Analysis}, advance publication, \doi{10.1214/17-BA1091}. Gabry, J. , Simpson, D. , Vehtari, A. , Betancourt, M. and Gelman, A. (2019), Visualization in Bayesian workflow. \emph{J. R. Stat. Soc. A}, 182: 389-402. doi:10.1111/rssa.12378, \href{https://arxiv.org/abs/1709.01449}{arXiv preprint}, \href{https://github.com/jgabry/bayes-vis-paper}{code on GitHub}) } rstanarm/man/stanreg-objects.Rd0000644000176200001440000001334114551552005016241 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/stanreg-objects.R \name{stanreg-objects} \alias{stanreg-objects} \title{Fitted model objects} \description{ The \pkg{rstanarm} model-fitting functions return an object of class \code{'stanreg'}, which is a list containing at a minimum the components listed below. Each \code{stanreg} object will also have additional classes (e.g. 'aov', 'betareg', 'glm', 'polr', etc.) and several additional components depending on the model and estimation algorithm. \cr \cr Some additional details apply to models estimated using the \code{\link{stan_mvmer}} or \code{\link{stan_jm}} modelling functions. The \code{\link{stan_mvmer}} modelling function returns an object of class \code{'stanmvreg'}, which inherits the \code{'stanreg'} class, but has a number of additional elements described in the subsection below. The \code{\link{stan_jm}} modelling function returns an object of class \code{'stanjm'}, which inherits both the \code{'stanmvreg'} and \code{'stanreg'} classes, but has a number of additional elements described in the subsection below. Both the \code{'stanjm'} and \code{'stanmvreg'} classes have several of their own methods for situations in which the default \code{'stanreg'} methods are not suitable; see the \strong{See Also} section below. } \note{ The \code{\link{stan_biglm}} function is an exception. It returns a \link[rstan:stanfit-class]{stanfit} object rather than a stanreg object. } \section{Elements for \code{stanreg} objects}{ \describe{ \item{\code{coefficients}}{ Point estimates, as described in \code{\link{print.stanreg}}. } \item{\code{ses}}{ Standard errors based on \code{\link[stats]{mad}}, as described in \code{\link{print.stanreg}}. } \item{\code{residuals}}{ Residuals of type \code{'response'}. } \item{\code{fitted.values}}{ Fitted mean values. For GLMs the linear predictors are transformed by the inverse link function. } \item{\code{linear.predictors}}{ Linear fit on the link scale. For linear models this is the same as \code{fitted.values}. } \item{\code{covmat}}{ Variance-covariance matrix for the coefficients based on draws from the posterior distribution, the variational approximation, or the asymptotic sampling distribution, depending on the estimation algorithm. } \item{\code{model,x,y}}{ If requested, the the model frame, model matrix and response variable used, respectively. } \item{\code{family}}{ The \code{\link[stats]{family}} object used. } \item{\code{call}}{ The matched call. } \item{\code{formula}}{ The model \code{\link[stats]{formula}}. } \item{\code{data,offset,weights}}{ The \code{data}, \code{offset}, and \code{weights} arguments. } \item{\code{algorithm}}{ The estimation method used. } \item{\code{prior.info}}{ A list with information about the prior distributions used. } \item{\code{stanfit,stan_summary}}{ The object of \code{\link[rstan]{stanfit-class}} returned by RStan and a matrix of various summary statistics from the stanfit object. } \item{\code{rstan_version}}{ The version of the \pkg{rstan} package that was used to fit the model. } } } \section{Elements for \code{stanmvreg} objects}{ \describe{ The \code{stanmvreg} objects contain the majority of the elements described above for \code{stanreg} objects, but in most cases these will be a list with each elements of the list correponding to one of the submodels (for example, the \code{family} element of a \code{stanmvreg} object will be a list with each element of the list containing the \code{\link[stats]{family}} object for one submodel). In addition, \code{stanmvreg} objects contain the following additional elements: \item{\code{cnms}}{ The names of the grouping factors and group specific parameters, collapsed across the longitudinal or glmer submodels. } \item{\code{flevels}}{ The unique factor levels for each grouping factor, collapsed across the longitudinal or glmer submodels. } \item{\code{n_markers}}{ The number of longitudinal or glmer submodels. } \item{\code{n_yobs}}{ The number of observations for each longitudinal or glmer submodel. } \item{\code{n_grps}}{ The number of levels for each grouping factor (for models estimated using \code{\link{stan_jm}}, this will be equal to \code{n_subjects} if the individual is the only grouping factor). } \item{\code{runtime}}{ The time taken to fit the model (in minutes). } } } \section{Additional elements for \code{stanjm} objects}{ \describe{ The \code{stanjm} objects contain the elements described above for \code{stanmvreg} objects, but also contain the following additional elements: \item{\code{id_var,time_var}}{ The names of the variables distinguishing between individuals, and representing time in the longitudinal submodel. } \item{\code{n_subjects}}{ The number of individuals. } \item{\code{n_events}}{ The number of non-censored events. } \item{\code{eventtime,status}}{ The event (or censoring) time and status indicator for each individual. } \item{\code{basehaz}}{ A list containing information about the baseline hazard. } \item{\code{assoc}}{ An array containing information about the association structure. } \item{\code{epsilon}}{ The width of the one-sided difference used to numerically evaluate the slope of the longitudinal trajectory; only relevant if a slope-based association structure was specified (e.g. etaslope, muslope, etc). } \item{\code{qnodes}}{ The number of Gauss-Kronrod quadrature nodes used to evaluate the cumulative hazard in the joint likelihood function. } } } \seealso{ \code{\link{stanreg-methods}}, \code{\link{stanmvreg-methods}} } rstanarm/man/stanreg-draws-formats.Rd0000644000176200001440000000377414551552004017411 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/draws.R \name{stanreg-draws-formats} \alias{stanreg-draws-formats} \alias{as_draws} \alias{as_draws_matrix} \alias{as_draws_array} \alias{as_draws_df} \alias{as_draws_rvars} \alias{as_draws_list} \alias{as_draws.stanreg} \alias{as_draws_matrix.stanreg} \alias{as_draws_array.stanreg} \alias{as_draws_df.stanreg} \alias{as_draws_list.stanreg} \alias{as_draws_rvars.stanreg} \title{Create a \code{draws} object from a \code{stanreg} object} \usage{ \method{as_draws}{stanreg}(x, ...) \method{as_draws_matrix}{stanreg}(x, ...) \method{as_draws_array}{stanreg}(x, ...) \method{as_draws_df}{stanreg}(x, ...) \method{as_draws_list}{stanreg}(x, ...) \method{as_draws_rvars}{stanreg}(x, ...) } \arguments{ \item{x}{A \code{stanreg} object returned by one of the \pkg{rstanarm} modeling functions.} \item{...}{Arguments (e.g., \code{pars}, \code{regex_pars}) passed internally to \code{\link{as.matrix.stanreg}} or \code{as.array.stanreg}.} } \value{ A \code{draws} object from the \pkg{\link[posterior:posterior-package]{posterior}} package. See the \pkg{posterior} package documentation and vignettes for details on working with these objects. } \description{ Convert a \code{stanreg} object to a format supported by the \pkg{\link[posterior:posterior-package]{posterior}} package. } \details{ To subset iterations, chains, or draws, use \code{\link[posterior:subset_draws]{subset_draws}} after making the \code{draws} object. To subset variables use \code{...} to pass the \code{pars} and/or \code{regex_pars} arguments to \code{as.matrix.stanreg} or \code{as.array.stanreg} (these are called internally by \code{as_draws.stanreg}), or use \code{\link[posterior:subset_draws]{subset_draws}} after making the \code{draws} object. } \examples{ fit <- stan_glm(mpg ~ wt + as.factor(cyl), data = mtcars) as_draws_matrix(fit) # matrix format combines all chains as_draws_df(fit, regex_pars = "cyl") posterior::summarize_draws(as_draws_array(fit)) } rstanarm/man/predictive_interval.stanreg.Rd0000644000176200001440000000547414551552004020662 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/predictive_interval.R \name{predictive_interval.stanreg} \alias{predictive_interval.stanreg} \alias{predictive_interval} \alias{predictive_interval.matrix} \alias{predictive_interval.ppd} \title{Predictive intervals} \usage{ \method{predictive_interval}{stanreg}( object, prob = 0.9, newdata = NULL, draws = NULL, re.form = NULL, fun = NULL, seed = NULL, offset = NULL, ... ) \method{predictive_interval}{matrix}(object, prob = 0.9, ...) \method{predictive_interval}{ppd}(object, prob = 0.9, ...) } \arguments{ \item{object}{Either a fitted model object returned by one of the \pkg{rstanarm} modeling functions (a \link[=stanreg-objects]{stanreg object}) or, for the matrix method, a matrix of draws from the posterior predictive distribution returned by \code{\link{posterior_predict}}.} \item{prob}{A number \eqn{p \in (0,1)}{p (0 < p < 1)} indicating the desired probability mass to include in the intervals. The default is to report \eqn{90}\% intervals (\code{prob=0.9}) rather than the traditionally used \eqn{95}\% (see Details).} \item{newdata, draws, fun, offset, re.form, seed}{Passed to \code{\link[=posterior_predict]{posterior_predict}}.} \item{...}{Currently ignored.} } \value{ A matrix with two columns and as many rows as are in \code{newdata}. If \code{newdata} is not provided then the matrix will have as many rows as the data used to fit the model. For a given value of \code{prob}, \eqn{p}, the columns correspond to the lower and upper \eqn{100p}\% central interval limits and have the names \eqn{100\alpha/2}\% and \eqn{100(1 - \alpha/2)}\%, where \eqn{\alpha = 1-p}. For example, if \code{prob=0.9} is specified (a \eqn{90}\% interval), then the column names will be \code{"5\%"} and \code{"95\%"}, respectively. } \description{ For models fit using MCMC (\code{algorithm="sampling"}) or one of the variational approximations (\code{"meanfield"} or \code{"fullrank"}), the \code{predictive_interval} function computes Bayesian predictive intervals. The method for stanreg objects calls \code{\link{posterior_predict}} internally, whereas the method for matrices accepts the matrix returned by \code{posterior_predict} as input and can be used to avoid multiple calls to \code{posterior_predict}. } \examples{ if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { fit <- stan_glm(mpg ~ wt, data = mtcars, iter = 300) predictive_interval(fit) predictive_interval(fit, newdata = data.frame(wt = range(mtcars$wt)), prob = 0.5) # stanreg vs matrix methods preds <- posterior_predict(fit, seed = 123) all.equal( predictive_interval(fit, seed = 123), predictive_interval(preds) ) } } \seealso{ \code{\link{predictive_error}}, \code{\link{posterior_predict}}, \code{\link{posterior_interval}} } rstanarm/man/stanreg-methods.Rd0000644000176200001440000001130614551552005016252 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/stanmvreg-methods.R, R/stanreg-methods.R \name{nobs.stanmvreg} \alias{nobs.stanmvreg} \alias{stanreg-methods} \alias{VarCorr} \alias{fixef} \alias{ranef} \alias{ngrps} \alias{sigma} \alias{nsamples} \alias{coef.stanreg} \alias{confint.stanreg} \alias{fitted.stanreg} \alias{nobs.stanreg} \alias{residuals.stanreg} \alias{se.stanreg} \alias{update.stanreg} \alias{vcov.stanreg} \alias{fixef.stanreg} \alias{ngrps.stanreg} \alias{nsamples.stanreg} \alias{ranef.stanreg} \alias{sigma.stanreg} \alias{VarCorr.stanreg} \title{Methods for stanreg objects} \usage{ \method{nobs}{stanmvreg}(object, ...) \method{coef}{stanreg}(object, ...) \method{confint}{stanreg}(object, parm, level = 0.95, ...) \method{fitted}{stanreg}(object, ...) \method{nobs}{stanreg}(object, ...) \method{residuals}{stanreg}(object, ...) \method{se}{stanreg}(object, ...) \method{update}{stanreg}(object, formula., ..., evaluate = TRUE) \method{vcov}{stanreg}(object, correlation = FALSE, ...) \method{fixef}{stanreg}(object, ...) \method{ngrps}{stanreg}(object, ...) \method{nsamples}{stanreg}(object, ...) \method{ranef}{stanreg}(object, ...) \method{sigma}{stanreg}(object, ...) \method{VarCorr}{stanreg}(x, sigma = 1, ...) } \arguments{ \item{object, x}{A fitted model object returned by one of the \pkg{rstanarm} modeling functions. See \code{\link{stanreg-objects}}.} \item{...}{Ignored, except by the \code{update} method. See \code{\link{update}}.} \item{parm}{For \code{confint}, an optional character vector of parameter names.} \item{level}{For \code{confint}, a scalar between \eqn{0} and \eqn{1} indicating the confidence level to use.} \item{formula., evaluate}{See \code{\link[stats]{update}}.} \item{correlation}{For \code{vcov}, if \code{FALSE} (the default) the covariance matrix is returned. If \code{TRUE}, the correlation matrix is returned instead.} \item{sigma}{Ignored (included for compatibility with \code{\link[nlme]{VarCorr}}).} } \description{ The methods documented on this page are actually some of the least important methods defined for \link[=stanreg-objects]{stanreg} objects. The most important methods are documented separately, each with its own page. Links to those pages are provided in the \strong{See Also} section, below. } \details{ The methods documented on this page are similar to the methods defined for objects of class 'lm', 'glm', 'glmer', etc. However there are a few key differences: \describe{ \item{\code{residuals}}{ Residuals are \emph{always} of type \code{"response"} (not \code{"deviance"} residuals or any other type). However, in the case of \code{\link{stan_polr}} with more than two response categories, the residuals are the difference between the latent utility and its linear predictor. } \item{\code{coef}}{ Medians are used for point estimates. See the \emph{Point estimates} section in \code{\link{print.stanreg}} for more details. } \item{\code{se}}{ The \code{se} function returns standard errors based on \code{\link{mad}}. See the \emph{Uncertainty estimates} section in \code{\link{print.stanreg}} for more details. } \item{\code{confint}}{ For models fit using optimization, confidence intervals are returned via a call to \code{\link[stats:confint]{confint.default}}. If \code{algorithm} is \code{"sampling"}, \code{"meanfield"}, or \code{"fullrank"}, the \code{confint} will throw an error because the \code{\link{posterior_interval}} function should be used to compute Bayesian uncertainty intervals. } \item{\code{nsamples}}{ The number of draws from the posterior distribution obtained } } } \seealso{ \itemize{ \item The \code{\link[=print.stanreg]{print}}, \code{\link[=summary.stanreg]{summary}}, and \code{\link{prior_summary}} methods for stanreg objects for information on the fitted model. \item \code{\link{launch_shinystan}} to use the ShinyStan GUI to explore a fitted \pkg{rstanarm} model. \item The \code{\link[=plot.stanreg]{plot}} method to plot estimates and diagnostics. \item The \code{\link{pp_check}} method for graphical posterior predictive checking. \item The \code{\link{posterior_predict}} and \code{\link{predictive_error}} methods for predictions and predictive errors. \item The \code{\link{posterior_interval}} and \code{\link{predictive_interval}} methods for uncertainty intervals for model parameters and predictions. \item The \code{\link[=loo.stanreg]{loo}}, \code{\link{kfold}}, and \code{\link{log_lik}} methods for leave-one-out or K-fold cross-validation, model comparison, and computing the log-likelihood of (possibly new) data. \item The \code{\link[=as.matrix.stanreg]{as.matrix}}, \code{as.data.frame}, and \code{as.array} methods to access posterior draws. } } rstanarm/man/pp_check.stanreg.Rd0000644000176200001440000001477214551552004016375 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/pp_check.R \name{pp_check.stanreg} \alias{pp_check.stanreg} \alias{pp_check} \title{Graphical posterior predictive checks} \usage{ \method{pp_check}{stanreg}(object, plotfun = "dens_overlay", nreps = NULL, seed = NULL, ...) } \arguments{ \item{object}{A fitted model object returned by one of the \pkg{rstanarm} modeling functions. See \code{\link{stanreg-objects}}.} \item{plotfun}{A character string naming the \pkg{bayesplot} \link[bayesplot:PPC-overview]{PPC} function to use. The default is to call \code{\link[bayesplot:PPC-distributions]{ppc_dens_overlay}}. \code{plotfun} can be specified either as the full name of a \pkg{bayesplot} plotting function (e.g. \code{"ppc_hist"}) or can be abbreviated to the part of the name following the \code{"ppc_"} prefix (e.g. \code{"hist"}). To get the names of all available PPC functions see \code{\link[bayesplot]{available_ppc}}.} \item{nreps}{The number of \eqn{y^{rep}}{yrep} datasets to generate from the \link[=posterior_predict]{posterior predictive distribution} and show in the plots. The default depends on \code{plotfun}. For functions that plot each \code{yrep} dataset separately (e.g. \code{ppc_hist}), \code{nreps} defaults to a small value to make the plots readable. For functions that overlay many \code{yrep} datasets (e.g., \code{ppc_dens_overlay}) a larger number is used by default, and for other functions (e.g. \code{ppc_stat}) the default is to set \code{nreps} equal to the posterior sample size.} \item{seed}{An optional \code{\link[=set.seed]{seed}} to pass to \code{\link{posterior_predict}}.} \item{...}{Additonal arguments passed to the \pkg{\link{bayesplot}} function called. For many plotting functions \code{...} is optional, however for functions that require a \code{group} or \code{x} argument, these arguments should be specified in \code{...}. If specifying \code{group} and/or \code{x}, they can be provided as either strings naming variables (in which case they are searched for in the model frame) or as vectors containing the actual values of the variables. See the \strong{Examples} section, below.} } \value{ \code{pp_check} returns a ggplot object that can be further customized using the \pkg{ggplot2} package. } \description{ Interface to the \link[bayesplot:PPC-overview]{PPC} (posterior predictive checking) module in the \pkg{\link{bayesplot}} package, providing various plots comparing the observed outcome variable \eqn{y} to simulated datasets \eqn{y^{rep}}{yrep} from the posterior predictive distribution. The \code{pp_check} method for \link{stanreg-objects} prepares the arguments required for the specified \pkg{bayesplot} PPC plotting function and then calls that function. It is also straightforward to use the functions from the \pkg{bayesplot} package directly rather than via the \code{pp_check} method. Examples of both are given below. } \note{ For binomial data, plots of \eqn{y} and \eqn{y^{rep}}{yrep} show the proportion of 'successes' rather than the raw count. Also for binomial models see \code{\link[bayesplot:PPC-errors]{ppc_error_binned}} for binned residual plots. } \examples{ if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { fit <- stan_glmer( mpg ~ wt + am + (1|cyl), data = mtcars, iter = 400, # iter and chains small just to keep example quick chains = 2, refresh = 0 ) # Compare distribution of y to distributions of multiple yrep datasets pp_check(fit) pp_check(fit, plotfun = "boxplot", nreps = 10, notch = FALSE) pp_check(fit, plotfun = "hist", nreps = 3) \donttest{ # Same plot (up to RNG noise) using bayesplot package directly bayesplot::ppc_hist(y = mtcars$mpg, yrep = posterior_predict(fit, draws = 3)) # Check histograms of test statistics by level of grouping variable 'cyl' pp_check(fit, plotfun = "stat_grouped", stat = "median", group = "cyl") # Defining a custom test statistic q25 <- function(y) quantile(y, probs = 0.25) pp_check(fit, plotfun = "stat_grouped", stat = "q25", group = "cyl") # Scatterplot of two test statistics pp_check(fit, plotfun = "stat_2d", stat = c("mean", "sd")) # Scatterplot of y vs. average yrep pp_check(fit, plotfun = "scatter_avg") # y vs. average yrep # Same plot (up to RNG noise) using bayesplot package directly bayesplot::ppc_scatter_avg(y = mtcars$mpg, yrep = posterior_predict(fit)) # Scatterplots of y vs. several individual yrep datasets pp_check(fit, plotfun = "scatter", nreps = 3) # Same plot (up to RNG noise) using bayesplot package directly bayesplot::ppc_scatter(y = mtcars$mpg, yrep = posterior_predict(fit, draws = 3)) # yrep intervals with y points overlaid # by default 1:length(y) used on x-axis but can also specify an x variable pp_check(fit, plotfun = "intervals") pp_check(fit, plotfun = "intervals", x = "wt") + ggplot2::xlab("wt") # Same plot (up to RNG noise) using bayesplot package directly bayesplot::ppc_intervals(y = mtcars$mpg, yrep = posterior_predict(fit), x = mtcars$wt) + ggplot2::xlab("wt") # predictive errors pp_check(fit, plotfun = "error_hist", nreps = 6) pp_check(fit, plotfun = "error_scatter_avg_vs_x", x = "wt") + ggplot2::xlab("wt") # Example of a PPC for ordinal models (stan_polr) fit2 <- stan_polr(tobgp ~ agegp, data = esoph, method = "probit", prior = R2(0.2, "mean"), init_r = 0.1, refresh = 0) pp_check(fit2, plotfun = "bars", nreps = 500, prob = 0.5) pp_check(fit2, plotfun = "bars_grouped", group = esoph$agegp, nreps = 500, prob = 0.5) } } } \references{ Gelman, A., Carlin, J. B., Stern, H. S., Dunson, D. B., Vehtari, A., and Rubin, D. B. (2013). \emph{Bayesian Data Analysis.} Chapman & Hall/CRC Press, London, third edition. (Ch. 6) Gabry, J. , Simpson, D. , Vehtari, A. , Betancourt, M. and Gelman, A. (2019), Visualization in Bayesian workflow. \emph{J. R. Stat. Soc. A}, 182: 389-402. doi:10.1111/rssa.12378, \href{https://arxiv.org/abs/1709.01449}{arXiv preprint}, \href{https://github.com/jgabry/bayes-vis-paper}{code on GitHub}) } \seealso{ \itemize{ \item The vignettes in the \pkg{bayesplot} package for many examples. Examples of posterior predictive checks can also be found in the \pkg{rstanarm} vignettes and demos. \item \code{\link[bayesplot]{PPC-overview}} (\pkg{bayesplot}) for links to the documentation for all the available plotting functions. \item \code{\link{posterior_predict}} for drawing from the posterior predictive distribution. \item \code{\link[bayesplot:bayesplot-colors]{color_scheme_set}} to change the color scheme of the plots. } } rstanarm/man/posterior_vs_prior.Rd0000644000176200001440000001561514551552004017125 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/posterior_vs_prior.R \name{posterior_vs_prior} \alias{posterior_vs_prior} \alias{posterior_vs_prior.stanreg} \title{Juxtapose prior and posterior} \usage{ posterior_vs_prior(object, ...) \method{posterior_vs_prior}{stanreg}( object, pars = NULL, regex_pars = NULL, prob = 0.9, color_by = c("parameter", "vs", "none"), group_by_parameter = FALSE, facet_args = list(), ... ) } \arguments{ \item{object}{A fitted model object returned by one of the \pkg{rstanarm} modeling functions. See \code{\link{stanreg-objects}}.} \item{...}{The S3 generic uses \code{...} to pass arguments to any defined methods. For the method for stanreg objects, \code{...} is for arguments (other than \code{color}) passed to \code{geom_pointrange} in the \pkg{ggplot2} package to control the appearance of the plotted intervals.} \item{pars}{An optional character vector specifying a subset of parameters to display. Parameters can be specified by name or several shortcuts can be used. Using \code{pars="beta"} will restrict the displayed parameters to only the regression coefficients (without the intercept). \code{"alpha"} can also be used as a shortcut for \code{"(Intercept)"}. If the model has varying intercepts and/or slopes they can be selected using \code{pars = "varying"}. In addition, for \code{stanmvreg} objects there are some additional shortcuts available. Using \code{pars = "long"} will display the parameter estimates for the longitudinal submodels only (excluding group-specific pparameters, but including auxiliary parameters). Using \code{pars = "event"} will display the parameter estimates for the event submodel only, including any association parameters. Using \code{pars = "assoc"} will display only the association parameters. Using \code{pars = "fixef"} will display all fixed effects, but not the random effects or the auxiliary parameters. \code{pars} and \code{regex_pars} are set to \code{NULL} then all fixed effect regression coefficients are selected, as well as any auxiliary parameters and the log posterior. If \code{pars} is \code{NULL} all parameters are selected for a \code{stanreg} object, while for a \code{stanmvreg} object all fixed effect regression coefficients are selected as well as any auxiliary parameters and the log posterior. See \strong{Examples}.} \item{regex_pars}{An optional character vector of \link[=grep]{regular expressions} to use for parameter selection. \code{regex_pars} can be used in place of \code{pars} or in addition to \code{pars}. Currently, all functions that accept a \code{regex_pars} argument ignore it for models fit using optimization.} \item{prob}{A number \eqn{p \in (0,1)}{p (0 < p < 1)} indicating the desired posterior probability mass to include in the (central posterior) interval estimates displayed in the plot. The default is \eqn{0.9}.} \item{color_by}{How should the estimates be colored? Use \code{"parameter"} to color by parameter name, \code{"vs"} to color the prior one color and the posterior another, and \code{"none"} to use no color. Except when \code{color_by="none"}, a variable is mapped to the color \code{\link[ggplot2]{aes}}thetic and it is therefore also possible to change the default colors by adding one of the various discrete color scales available in \code{ggplot2} (\code{\link[ggplot2:scale_manual]{scale_color_manual}}, \code{scale_colour_brewer}, etc.). See Examples.} \item{group_by_parameter}{Should estimates be grouped together by parameter (\code{TRUE}) or by posterior and prior (\code{FALSE}, the default)?} \item{facet_args}{A named list of arguments passed to \code{\link[ggplot2]{facet_wrap}} (other than the \code{facets} argument), e.g., \code{nrow} or \code{ncol} to change the layout, \code{scales} to allow axis scales to vary across facets, etc. See Examples.} } \value{ A ggplot object that can be further customized using the \pkg{ggplot2} package. } \description{ Plot medians and central intervals comparing parameter draws from the prior and posterior distributions. If the plotted priors look different than the priors you think you specified it is likely either because of internal rescaling or the use of the \code{QR} argument (see the documentation for the \code{\link[=prior_summary.stanreg]{prior_summary}} method for details on these special cases). } \examples{ if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { \dontrun{ if (!exists("example_model")) example(example_model) # display non-varying (i.e. not group-level) coefficients posterior_vs_prior(example_model, pars = "beta") # show group-level (varying) parameters and group by parameter posterior_vs_prior(example_model, pars = "varying", group_by_parameter = TRUE, color_by = "vs") # group by parameter and allow axis scales to vary across facets posterior_vs_prior(example_model, regex_pars = "period", group_by_parameter = TRUE, color_by = "none", facet_args = list(scales = "free")) # assign to object and customize with functions from ggplot2 (gg <- posterior_vs_prior(example_model, pars = c("beta", "varying"), prob = 0.8)) gg + ggplot2::geom_hline(yintercept = 0, size = 0.3, linetype = 3) + ggplot2::coord_flip() + ggplot2::ggtitle("Comparing the prior and posterior") # compare very wide and very narrow priors using roaches example # (see help(roaches, "rstanarm") for info on the dataset) roaches$roach100 <- roaches$roach1 / 100 wide_prior <- normal(0, 10) narrow_prior <- normal(0, 0.1) fit_pois_wide_prior <- stan_glm(y ~ treatment + roach100 + senior, offset = log(exposure2), family = "poisson", data = roaches, prior = wide_prior) posterior_vs_prior(fit_pois_wide_prior, pars = "beta", prob = 0.5, group_by_parameter = TRUE, color_by = "vs", facet_args = list(scales = "free")) fit_pois_narrow_prior <- update(fit_pois_wide_prior, prior = narrow_prior) posterior_vs_prior(fit_pois_narrow_prior, pars = "beta", prob = 0.5, group_by_parameter = TRUE, color_by = "vs", facet_args = list(scales = "free")) # look at cutpoints for ordinal model fit_polr <- stan_polr(tobgp ~ agegp, data = esoph, method = "probit", prior = R2(0.2, "mean"), init_r = 0.1) (gg_polr <- posterior_vs_prior(fit_polr, regex_pars = "\\\\|", color_by = "vs", group_by_parameter = TRUE)) # flip the x and y axes gg_polr + ggplot2::coord_flip() } } } \references{ Gabry, J. , Simpson, D. , Vehtari, A. , Betancourt, M. and Gelman, A. (2019), Visualization in Bayesian workflow. \emph{J. R. Stat. Soc. A}, 182: 389-402. doi:10.1111/rssa.12378, \href{https://arxiv.org/abs/1709.01449}{arXiv preprint}, \href{https://github.com/jgabry/bayes-vis-paper}{code on GitHub}) } rstanarm/man/stan_clogit.Rd0000644000176200001440000001735114551552005015462 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/stan_clogit.R \name{stan_clogit} \alias{stan_clogit} \title{Conditional logistic (clogit) regression models via Stan} \usage{ stan_clogit( formula, data, subset, na.action = NULL, contrasts = NULL, ..., strata, prior = normal(autoscale = TRUE), prior_covariance = decov(), prior_PD = FALSE, algorithm = c("sampling", "optimizing", "meanfield", "fullrank"), adapt_delta = NULL, QR = FALSE, sparse = FALSE ) } \arguments{ \item{formula, data, subset, na.action, contrasts}{Same as for \code{\link[lme4]{glmer}}, except that any global intercept included in the formula will be dropped. \emph{We strongly advise against omitting the \code{data} argument}. Unless \code{data} is specified (and is a data frame) many post-estimation functions (including \code{update}, \code{loo}, \code{kfold}) are not guaranteed to work properly.} \item{...}{Further arguments passed to the function in the \pkg{rstan} package (\code{\link[rstan:stanmodel-method-sampling]{sampling}}, \code{\link[rstan:stanmodel-method-vb]{vb}}, or \code{\link[rstan:stanmodel-method-optimizing]{optimizing}}), corresponding to the estimation method named by \code{algorithm}. For example, if \code{algorithm} is \code{"sampling"} it is possible to specify \code{iter}, \code{chains}, \code{cores}, and other MCMC controls. Another useful argument that can be passed to \pkg{rstan} via \code{...} is \code{refresh}, which specifies how often to print updates when sampling (i.e., show the progress every \code{refresh} iterations). \code{refresh=0} turns off the iteration updates.} \item{strata}{A factor indicating the groups in the data where the number of successes (possibly one) is fixed by the research design. It may be useful to use \code{\link{interaction}} or \code{\link[survival]{strata}} to create this factor. However, the \code{strata} argument must not rely on any object besides the \code{data} \code{\link{data.frame}}.} \item{prior}{The prior distribution for the (non-hierarchical) regression coefficients. The default priors are described in the vignette \href{https://mc-stan.org/rstanarm/articles/priors.html}{\emph{Prior Distributions for rstanarm Models}}. If not using the default, \code{prior} should be a call to one of the various functions provided by \pkg{rstanarm} for specifying priors. The subset of these functions that can be used for the prior on the coefficients can be grouped into several "families": \tabular{ll}{ \strong{Family} \tab \strong{Functions} \cr \emph{Student t family} \tab \code{normal}, \code{student_t}, \code{cauchy} \cr \emph{Hierarchical shrinkage family} \tab \code{hs}, \code{hs_plus} \cr \emph{Laplace family} \tab \code{laplace}, \code{lasso} \cr \emph{Product normal family} \tab \code{product_normal} \cr } See the \link[=priors]{priors help page} for details on the families and how to specify the arguments for all of the functions in the table above. To omit a prior ---i.e., to use a flat (improper) uniform prior--- \code{prior} can be set to \code{NULL}, although this is rarely a good idea. \strong{Note:} Unless \code{QR=TRUE}, if \code{prior} is from the Student t family or Laplace family, and if the \code{autoscale} argument to the function used to specify the prior (e.g. \code{\link{normal}}) is left at its default and recommended value of \code{TRUE}, then the default or user-specified prior scale(s) may be adjusted internally based on the scales of the predictors. See the \link[=priors]{priors help page} and the \emph{Prior Distributions} vignette for details on the rescaling and the \code{\link{prior_summary}} function for a summary of the priors used for a particular model.} \item{prior_covariance}{Cannot be \code{NULL} when lme4-style group-specific terms are included in the \code{formula}. See \code{\link{decov}} for more information about the default arguments. Ignored when there are no group-specific terms.} \item{prior_PD}{A logical scalar (defaulting to \code{FALSE}) indicating whether to draw from the prior predictive distribution instead of conditioning on the outcome.} \item{algorithm}{A string (possibly abbreviated) indicating the estimation approach to use. Can be \code{"sampling"} for MCMC (the default), \code{"optimizing"} for optimization, \code{"meanfield"} for variational inference with independent normal distributions, or \code{"fullrank"} for variational inference with a multivariate normal distribution. See \code{\link{rstanarm-package}} for more details on the estimation algorithms. NOTE: not all fitting functions support all four algorithms.} \item{adapt_delta}{Only relevant if \code{algorithm="sampling"}. See the \link{adapt_delta} help page for details.} \item{QR}{A logical scalar defaulting to \code{FALSE}, but if \code{TRUE} applies a scaled \code{\link{qr}} decomposition to the design matrix. The transformation does not change the likelihood of the data but is recommended for computational reasons when there are multiple predictors. See the \link{QR-argument} documentation page for details on how \pkg{rstanarm} does the transformation and important information about how to interpret the prior distributions of the model parameters when using \code{QR=TRUE}.} \item{sparse}{A logical scalar (defaulting to \code{FALSE}) indicating whether to use a sparse representation of the design (X) matrix. If \code{TRUE}, the the design matrix is not centered (since that would destroy the sparsity) and likewise it is not possible to specify both \code{QR = TRUE} and \code{sparse = TRUE}. Depending on how many zeros there are in the design matrix, setting \code{sparse = TRUE} may make the code run faster and can consume much less RAM.} } \value{ A \link[=stanreg-objects]{stanreg} object is returned for \code{stan_clogit}. } \description{ \if{html}{\figure{stanlogo.png}{options: width="25" alt="https://mc-stan.org/about/logo/"}} A model for case-control studies with optional prior distributions for the coefficients, intercept, and auxiliary parameters. } \details{ The \code{stan_clogit} function is mostly similar in syntax to \code{\link[survival]{clogit}} but rather than performing maximum likelihood estimation of generalized linear models, full Bayesian estimation is performed (if \code{algorithm} is \code{"sampling"}) via MCMC. The Bayesian model adds priors (independent by default) on the coefficients of the GLM. The \code{data.frame} passed to the \code{data} argument must be sorted by the variable passed to the \code{strata} argument. The \code{formula} may have group-specific terms like in \code{\link{stan_glmer}} but should not allow the intercept to vary by the stratifying variable, since there is no information in the data with which to estimate such deviations in the intercept. } \examples{ if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { dat <- infert[order(infert$stratum), ] # order by strata post <- stan_clogit(case ~ spontaneous + induced + (1 | education), strata = stratum, data = dat, subset = parity <= 2, QR = TRUE, chains = 2, iter = 500) # for speed only nd <- dat[dat$parity > 2, c("case", "spontaneous", "induced", "education", "stratum")] # next line would fail without case and stratum variables pr <- posterior_epred(post, newdata = nd) # get predicted probabilities # not a random variable b/c probabilities add to 1 within strata all.equal(rep(sum(nd$case), nrow(pr)), rowSums(pr)) } } \seealso{ \code{\link{stanreg-methods}} and \code{\link[survival]{clogit}}. The vignette for Bernoulli and binomial models, which has more details on using \code{stan_clogit}. \url{https://mc-stan.org/rstanarm/articles/} } rstanarm/man/available-models.Rd0000644000176200001440000001122214551552004016343 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/doc-modeling-functions.R \name{available-models} \alias{available-models} \title{Modeling functions available in \pkg{rstanarm}} \description{ Modeling functions available in \pkg{rstanarm} } \section{Modeling functions}{ The model estimating functions are described in greater detail in their individual help pages and vignettes. Here we provide a very brief overview: \describe{ \item{\code{\link{stan_lm}}, \code{stan_aov}, \code{stan_biglm}}{ Similar to \code{\link[stats]{lm}} or \code{\link[stats]{aov}} but with novel regularizing priors on the model parameters that are driven by prior beliefs about \eqn{R^2}, the proportion of variance in the outcome attributable to the predictors in a linear model. } \item{\code{\link{stan_glm}}, \code{stan_glm.nb}}{ Similar to \code{\link[stats]{glm}} but with various possible prior distributions for the coefficients and, if applicable, a prior distribution for any auxiliary parameter in a Generalized Linear Model (GLM) that is characterized by a \code{\link[stats]{family}} object (e.g. the shape parameter in Gamma models). It is also possible to estimate a negative binomial model in a similar way to the \code{\link[MASS]{glm.nb}} function in the \pkg{MASS} package. } \item{\code{\link{stan_glmer}}, \code{stan_glmer.nb}, \code{stan_lmer}}{ Similar to the \code{\link[lme4]{glmer}}, \code{\link[lme4]{glmer.nb}} and \code{\link[lme4]{lmer}} functions in the \pkg{lme4} package in that GLMs are augmented to have group-specific terms that deviate from the common coefficients according to a mean-zero multivariate normal distribution with a highly-structured but unknown covariance matrix (for which \pkg{rstanarm} introduces an innovative prior distribution). MCMC provides more appropriate estimates of uncertainty for models that consist of a mix of common and group-specific parameters. } \item{\code{\link{stan_nlmer}}}{ Similar to \code{\link[lme4]{nlmer}} in the \pkg{lme4} package for nonlinear "mixed-effects" models, but the group-specific coefficients have flexible priors on their unknown covariance matrices. } \item{\code{\link{stan_gamm4}}}{ Similar to \code{\link[gamm4]{gamm4}} in the \pkg{gamm4} package, which augments a GLM (possibly with group-specific terms) with nonlinear smooth functions of the predictors to form a Generalized Additive Mixed Model (GAMM). Rather than calling \code{\link[lme4]{glmer}} like \code{\link[gamm4]{gamm4}} does, \code{\link{stan_gamm4}} essentially calls \code{\link{stan_glmer}}, which avoids the optimization issues that often crop up with GAMMs and provides better estimates for the uncertainty of the parameter estimates. } \item{\code{\link{stan_polr}}}{ Similar to \code{\link[MASS]{polr}} in the \pkg{MASS} package in that it models an ordinal response, but the Bayesian model also implies a prior distribution on the unknown cutpoints. Can also be used to model binary outcomes, possibly while estimating an unknown exponent governing the probability of success. } \item{\code{\link{stan_betareg}}}{ Similar to \code{\link[betareg]{betareg}} in that it models an outcome that is a rate (proportion) but, rather than performing maximum likelihood estimation, full Bayesian estimation is performed by default, with customizable prior distributions for all parameters. } \item{\code{\link{stan_clogit}}}{ Similar to \code{\link[survival]{clogit}} in that it models an binary outcome where the number of successes and failures is fixed within each stratum by the research design. There are some minor syntactical differences relative to \code{\link[survival]{clogit}} that allow \code{stan_clogit} to accept group-specific terms as in \code{\link{stan_glmer}}. } \item{\code{\link{stan_mvmer}}}{ A multivariate form of \code{\link{stan_glmer}}, whereby the user can specify one or more submodels each consisting of a GLM with group-specific terms. If more than one submodel is specified (i.e. there is more than one outcome variable) then a dependence is induced by assuming that the group-specific terms for each grouping factor are correlated across submodels. } \item{\code{\link{stan_jm}}}{ Estimates shared parameter joint models for longitudinal and time-to-event (i.e. survival) data. The joint model can be univariate (i.e. one longitudinal outcome) or multivariate (i.e. more than one longitudinal outcome). A variety of parameterisations are available for linking the longitudinal and event processes (i.e. a variety of association structures). } } } \seealso{ \url{https://mc-stan.org/rstanarm/} } rstanarm/man/posterior_interval.stanreg.Rd0000644000176200001440000001140514551552004020541 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/posterior_interval.R \name{posterior_interval.stanreg} \alias{posterior_interval.stanreg} \alias{posterior_interval} \title{Posterior uncertainty intervals} \usage{ \method{posterior_interval}{stanreg}( object, prob = 0.9, type = "central", pars = NULL, regex_pars = NULL, ... ) } \arguments{ \item{object}{A fitted model object returned by one of the \pkg{rstanarm} modeling functions. See \code{\link{stanreg-objects}}.} \item{prob}{A number \eqn{p \in (0,1)}{p (0 < p < 1)} indicating the desired probability mass to include in the intervals. The default is to report \eqn{90}\% intervals (\code{prob=0.9}) rather than the traditionally used \eqn{95}\% (see Details).} \item{type}{The type of interval to compute. Currently the only option is \code{"central"} (see Details). A central \eqn{100p}\% interval is defined by the \eqn{\alpha/2} and \eqn{1 - \alpha/2} quantiles, where \eqn{\alpha = 1 - p}.} \item{pars}{An optional character vector of parameter names.} \item{regex_pars}{An optional character vector of \link[=grep]{regular expressions} to use for parameter selection. \code{regex_pars} can be used in place of \code{pars} or in addition to \code{pars}. Currently, all functions that accept a \code{regex_pars} argument ignore it for models fit using optimization.} \item{...}{Currently ignored.} } \value{ A matrix with two columns and as many rows as model parameters (or the subset of parameters specified by \code{pars} and/or \code{regex_pars}). For a given value of \code{prob}, \eqn{p}, the columns correspond to the lower and upper \eqn{100p}\% interval limits and have the names \eqn{100\alpha/2}\% and \eqn{100(1 - \alpha/2)}\%, where \eqn{\alpha = 1-p}. For example, if \code{prob=0.9} is specified (a \eqn{90}\% interval), then the column names will be \code{"5\%"} and \code{"95\%"}, respectively. } \description{ For models fit using MCMC (\code{algorithm="sampling"}) or one of the variational approximations (\code{"meanfield"} or \code{"fullrank"}), the \code{posterior_interval} function computes Bayesian posterior uncertainty intervals. These intervals are often referred to as \emph{credible} intervals, but we use the term \emph{uncertainty} intervals to highlight the fact that wider intervals correspond to greater uncertainty. } \details{ \subsection{Interpretation}{ Unlike for a frenquentist confidence interval, it is valid to say that, conditional on the data and model, we believe that with probability \eqn{p} the value of a parameter is in its \eqn{100p}\% posterior interval. This intuitive interpretation of Bayesian intervals is often erroneously applied to frequentist confidence intervals. See Morey et al. (2015) for more details on this issue and the advantages of using Bayesian posterior uncertainty intervals (also known as credible intervals). } \subsection{Default 90\% intervals}{ We default to reporting \eqn{90}\% intervals rather than \eqn{95}\% intervals for several reasons: \itemize{ \item Computational stability: \eqn{90}\% intervals are more stable than \eqn{95}\% intervals (for which each end relies on only \eqn{2.5}\% of the posterior draws). \item Relation to Type-S errors (Gelman and Carlin, 2014): \eqn{95}\% of the mass in a \eqn{90}\% central interval is above the lower value (and \eqn{95}\% is below the upper value). For a parameter \eqn{\theta}, it is therefore easy to see if the posterior probability that \eqn{\theta > 0} (or \eqn{\theta < 0}) is larger or smaller than \eqn{95}\%. } Of course, if \eqn{95}\% intervals are desired they can be computed by specifying \code{prob=0.95}. } \subsection{Types of intervals}{ Currently \code{posterior_interval} only computes central intervals because other types of intervals are rarely useful for the models that \pkg{rstanarm} can estimate. Additional possibilities may be provided in future releases as more models become available. } } \examples{ if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { if (!exists("example_model")) example(example_model) posterior_interval(example_model) posterior_interval(example_model, regex_pars = "herd") posterior_interval(example_model, pars = "period2", prob = 0.5) } } \references{ Gelman, A. and Carlin, J. (2014). Beyond power calculations: assessing Type S (sign) and Type M (magnitude) errors. \emph{Perspectives on Psychological Science}. 9(6), 641--51. Morey, R. D., Hoekstra, R., Rouder, J., Lee, M. D., and Wagenmakers, E. (2016). The fallacy of placing confidence in confidence intervals. \emph{Psychonomic Bulletin & Review}. 23(1), 103--123. } \seealso{ \code{\link{confint.stanreg}}, which, for models fit using optimization, can be used to compute traditional confidence intervals. \code{\link{predictive_interval}} for predictive intervals. } rstanarm/man/model.frame.stanreg.Rd0000644000176200001440000000076014551552005017003 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/stanreg-methods.R \name{model.frame.stanreg} \alias{model.frame.stanreg} \title{model.frame method for stanreg objects} \usage{ \method{model.frame}{stanreg}(formula, fixed.only = FALSE, ...) } \arguments{ \item{formula, ...}{See \code{\link[stats]{model.frame}}.} \item{fixed.only}{See \code{\link[lme4:merMod-class]{model.frame.merMod}}.} } \description{ model.frame method for stanreg objects } \keyword{internal} rstanarm/man/stan_glmer.Rd0000644000176200001440000002734114551552005015307 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/stan_glmer.R \name{stan_glmer} \alias{stan_glmer} \alias{stan_lmer} \alias{stan_glmer.nb} \title{Bayesian generalized linear models with group-specific terms via Stan} \usage{ stan_glmer( formula, data = NULL, family = gaussian, subset, weights, na.action = getOption("na.action", "na.omit"), offset, contrasts = NULL, ..., prior = default_prior_coef(family), prior_intercept = default_prior_intercept(family), prior_aux = exponential(autoscale = TRUE), prior_covariance = decov(), prior_PD = FALSE, algorithm = c("sampling", "meanfield", "fullrank"), adapt_delta = NULL, QR = FALSE, sparse = FALSE ) stan_lmer( formula, data = NULL, subset, weights, na.action = getOption("na.action", "na.omit"), offset, contrasts = NULL, ..., prior = default_prior_coef(family), prior_intercept = default_prior_intercept(family), prior_aux = exponential(autoscale = TRUE), prior_covariance = decov(), prior_PD = FALSE, algorithm = c("sampling", "meanfield", "fullrank"), adapt_delta = NULL, QR = FALSE ) stan_glmer.nb( formula, data = NULL, subset, weights, na.action = getOption("na.action", "na.omit"), offset, contrasts = NULL, link = "log", ..., prior = default_prior_coef(family), prior_intercept = default_prior_intercept(family), prior_aux = exponential(autoscale = TRUE), prior_covariance = decov(), prior_PD = FALSE, algorithm = c("sampling", "meanfield", "fullrank"), adapt_delta = NULL, QR = FALSE ) } \arguments{ \item{formula, data}{Same as for \code{\link[lme4]{glmer}}. \emph{We strongly advise against omitting the \code{data} argument}. Unless \code{data} is specified (and is a data frame) many post-estimation functions (including \code{update}, \code{loo}, \code{kfold}) are not guaranteed to work properly.} \item{family}{Same as for \code{\link[lme4]{glmer}} except it is also possible to use \code{family=mgcv::betar} to estimate a Beta regression with \code{stan_glmer}.} \item{subset, weights, offset}{Same as \code{\link[stats]{glm}}.} \item{na.action, contrasts}{Same as \code{\link[stats]{glm}}, but rarely specified.} \item{...}{For \code{stan_glmer}, further arguments passed to \code{\link[rstan:stanmodel-method-sampling]{sampling}} (e.g. \code{iter}, \code{chains}, \code{cores}, etc.) or to \code{\link[rstan:stanmodel-method-vb]{vb}} (if \code{algorithm} is \code{"meanfield"} or \code{"fullrank"}). For \code{stan_lmer} and \code{stan_glmer.nb}, \code{...} should also contain all relevant arguments to pass to \code{stan_glmer} (except \code{family}).} \item{prior}{The prior distribution for the (non-hierarchical) regression coefficients. The default priors are described in the vignette \href{https://mc-stan.org/rstanarm/articles/priors.html}{\emph{Prior Distributions for rstanarm Models}}. If not using the default, \code{prior} should be a call to one of the various functions provided by \pkg{rstanarm} for specifying priors. The subset of these functions that can be used for the prior on the coefficients can be grouped into several "families": \tabular{ll}{ \strong{Family} \tab \strong{Functions} \cr \emph{Student t family} \tab \code{normal}, \code{student_t}, \code{cauchy} \cr \emph{Hierarchical shrinkage family} \tab \code{hs}, \code{hs_plus} \cr \emph{Laplace family} \tab \code{laplace}, \code{lasso} \cr \emph{Product normal family} \tab \code{product_normal} \cr } See the \link[=priors]{priors help page} for details on the families and how to specify the arguments for all of the functions in the table above. To omit a prior ---i.e., to use a flat (improper) uniform prior--- \code{prior} can be set to \code{NULL}, although this is rarely a good idea. \strong{Note:} Unless \code{QR=TRUE}, if \code{prior} is from the Student t family or Laplace family, and if the \code{autoscale} argument to the function used to specify the prior (e.g. \code{\link{normal}}) is left at its default and recommended value of \code{TRUE}, then the default or user-specified prior scale(s) may be adjusted internally based on the scales of the predictors. See the \link[=priors]{priors help page} and the \emph{Prior Distributions} vignette for details on the rescaling and the \code{\link{prior_summary}} function for a summary of the priors used for a particular model.} \item{prior_intercept}{The prior distribution for the intercept (after centering all predictors, see note below). The default prior is described in the vignette \href{https://mc-stan.org/rstanarm/articles/priors.html}{\emph{Prior Distributions for rstanarm Models}}. If not using the default, \code{prior_intercept} can be a call to \code{normal}, \code{student_t} or \code{cauchy}. See the \link[=priors]{priors help page} for details on these functions. To omit a prior on the intercept ---i.e., to use a flat (improper) uniform prior--- \code{prior_intercept} can be set to \code{NULL}. \strong{Note:} If using a dense representation of the design matrix ---i.e., if the \code{sparse} argument is left at its default value of \code{FALSE}--- then the prior distribution for the intercept is set so it applies to the value \emph{when all predictors are centered} (you don't need to manually center them). This is explained further in [Prior Distributions for rstanarm Models](https://mc-stan.org/rstanarm/articles/priors.html) If you prefer to specify a prior on the intercept without the predictors being auto-centered, then you have to omit the intercept from the \code{\link[stats]{formula}} and include a column of ones as a predictor, in which case some element of \code{prior} specifies the prior on it, rather than \code{prior_intercept}. Regardless of how \code{prior_intercept} is specified, the reported \emph{estimates} of the intercept always correspond to a parameterization without centered predictors (i.e., same as in \code{glm}).} \item{prior_aux}{The prior distribution for the "auxiliary" parameter (if applicable). The "auxiliary" parameter refers to a different parameter depending on the \code{family}. For Gaussian models \code{prior_aux} controls \code{"sigma"}, the error standard deviation. For negative binomial models \code{prior_aux} controls \code{"reciprocal_dispersion"}, which is similar to the \code{"size"} parameter of \code{\link[stats:NegBinomial]{rnbinom}}: smaller values of \code{"reciprocal_dispersion"} correspond to greater dispersion. For gamma models \code{prior_aux} sets the prior on to the \code{"shape"} parameter (see e.g., \code{\link[stats:GammaDist]{rgamma}}), and for inverse-Gaussian models it is the so-called \code{"lambda"} parameter (which is essentially the reciprocal of a scale parameter). Binomial and Poisson models do not have auxiliary parameters. The default prior is described in the vignette \href{https://mc-stan.org/rstanarm/articles/priors.html}{\emph{Prior Distributions for rstanarm Models}}. If not using the default, \code{prior_aux} can be a call to \code{exponential} to use an exponential distribution, or \code{normal}, \code{student_t} or \code{cauchy}, which results in a half-normal, half-t, or half-Cauchy prior. See \code{\link{priors}} for details on these functions. To omit a prior ---i.e., to use a flat (improper) uniform prior--- set \code{prior_aux} to \code{NULL}.} \item{prior_covariance}{Cannot be \code{NULL}; see \code{\link{decov}} for more information about the default arguments.} \item{prior_PD}{A logical scalar (defaulting to \code{FALSE}) indicating whether to draw from the prior predictive distribution instead of conditioning on the outcome.} \item{algorithm}{A string (possibly abbreviated) indicating the estimation approach to use. Can be \code{"sampling"} for MCMC (the default), \code{"optimizing"} for optimization, \code{"meanfield"} for variational inference with independent normal distributions, or \code{"fullrank"} for variational inference with a multivariate normal distribution. See \code{\link{rstanarm-package}} for more details on the estimation algorithms. NOTE: not all fitting functions support all four algorithms.} \item{adapt_delta}{Only relevant if \code{algorithm="sampling"}. See the \link{adapt_delta} help page for details.} \item{QR}{A logical scalar defaulting to \code{FALSE}, but if \code{TRUE} applies a scaled \code{\link{qr}} decomposition to the design matrix. The transformation does not change the likelihood of the data but is recommended for computational reasons when there are multiple predictors. See the \link{QR-argument} documentation page for details on how \pkg{rstanarm} does the transformation and important information about how to interpret the prior distributions of the model parameters when using \code{QR=TRUE}.} \item{sparse}{A logical scalar (defaulting to \code{FALSE}) indicating whether to use a sparse representation of the design (X) matrix. If \code{TRUE}, the the design matrix is not centered (since that would destroy the sparsity) and likewise it is not possible to specify both \code{QR = TRUE} and \code{sparse = TRUE}. Depending on how many zeros there are in the design matrix, setting \code{sparse = TRUE} may make the code run faster and can consume much less RAM.} \item{link}{For \code{stan_glmer.nb} only, the link function to use. See \code{\link{neg_binomial_2}}.} } \value{ A \link[=stanreg-objects]{stanreg} object is returned for \code{stan_glmer, stan_lmer, stan_glmer.nb}. A list with classes \code{stanreg}, \code{glm}, \code{lm}, and \code{lmerMod}. The conventions for the parameter names are the same as in the lme4 package with the addition that the standard deviation of the errors is called \code{sigma} and the variance-covariance matrix of the group-specific deviations from the common parameters is called \code{Sigma}, even if this variance-covariance matrix only has one row and one column (in which case it is just the group-level variance). } \description{ \if{html}{\figure{stanlogo.png}{options: width="25" alt="https://mc-stan.org/about/logo/"}} Bayesian inference for GLMs with group-specific coefficients that have unknown covariance matrices with flexible priors. } \details{ The \code{stan_glmer} function is similar in syntax to \code{\link[lme4]{glmer}} but rather than performing (restricted) maximum likelihood estimation of generalized linear models, Bayesian estimation is performed via MCMC. The Bayesian model adds priors on the regression coefficients (in the same way as \code{\link{stan_glm}}) and priors on the terms of a decomposition of the covariance matrices of the group-specific parameters. See \code{\link{priors}} for more information about the priors. The \code{stan_lmer} function is equivalent to \code{stan_glmer} with \code{family = gaussian(link = "identity")}. The \code{stan_glmer.nb} function, which takes the extra argument \code{link}, is a wrapper for \code{stan_glmer} with \code{family = \link{neg_binomial_2}(link)}. } \examples{ if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { # see help(example_model) for details on the model below if (!exists("example_model")) example(example_model) print(example_model, digits = 1) } } \references{ Gelman, A. and Hill, J. (2007). \emph{Data Analysis Using Regression and Multilevel/Hierarchical Models.} Cambridge University Press, Cambridge, UK. (Ch. 11-15) Muth, C., Oravecz, Z., and Gabry, J. (2018) User-friendly Bayesian regression modeling: A tutorial with rstanarm and shinystan. \emph{The Quantitative Methods for Psychology}. 14(2), 99--119. \url{https://www.tqmp.org/RegularArticles/vol14-2/p099/p099.pdf} } \seealso{ \code{\link{stanreg-methods}} and \code{\link[lme4]{glmer}}. The vignette for \code{stan_glmer} and the \emph{Hierarchical Partial Pooling} vignette. \url{https://mc-stan.org/rstanarm/articles/} } rstanarm/DESCRIPTION0000644000176200001440000000705614552326563013632 0ustar liggesusersPackage: rstanarm Type: Package Title: Bayesian Applied Regression Modeling via Stan Version: 2.32.1 Date: 2024-01-15 Encoding: UTF-8 Authors@R: c(person("Jonah", "Gabry", email = "jsg2201@columbia.edu", role = "aut"), person("Imad", "Ali", role = "ctb"), person("Sam", "Brilleman", role = "ctb"), person(given = "Jacqueline Buros", family = "Novik", role = "ctb", comment = "R/stan_jm.R"), person("AstraZeneca", role = "ctb", comment = "R/stan_jm.R"), person("Trustees of", "Columbia University", role = "cph"), person("Simon", "Wood", role = "cph", comment = "R/stan_gamm4.R"), person("R Core", "Deveopment Team", role = "cph", comment = "R/stan_aov.R"), person("Douglas", "Bates", role = "cph", comment = "R/pp_data.R"), person("Martin", "Maechler", role = "cph", comment = "R/pp_data.R"), person("Ben", "Bolker", role = "cph", comment = "R/pp_data.R"), person("Steve", "Walker", role = "cph", comment = "R/pp_data.R"), person("Brian", "Ripley", role = "cph", comment = "R/stan_aov.R, R/stan_polr.R"), person("William", "Venables", role = "cph", comment = "R/stan_polr.R"), person("Paul-Christian", "Burkner", email = "paul.buerkner@gmail.com", role = "cph", comment = "R/misc.R"), person("Ben", "Goodrich", email = "benjamin.goodrich@columbia.edu", role = c("cre", "aut"))) Description: Estimates previously compiled regression models using the 'rstan' package, which provides the R interface to the Stan C++ library for Bayesian estimation. Users specify models via the customary R syntax with a formula and data.frame plus some additional arguments for priors. License: GPL (>= 3) Depends: R (>= 3.4.0), Rcpp (>= 0.12.0), methods Imports: bayesplot (>= 1.7.0), ggplot2 (>= 2.2.1), lme4 (>= 1.1-8), loo (>= 2.1.0), Matrix (>= 1.2-13), nlme (>= 3.1-124), posterior, rstan (>= 2.32.0), rstantools (>= 2.1.0), shinystan (>= 2.3.0), stats, survival (>= 2.40.1), RcppParallel (>= 5.0.1), utils Suggests: biglm, betareg, data.table (>= 1.10.0), digest, gridExtra, HSAUR3, knitr (>= 1.15.1), MASS, mgcv (>= 1.8-13), rmarkdown, roxygen2, StanHeaders (>= 2.21.0), testthat (>= 1.0.2), gamm4, shiny, V8 LinkingTo: StanHeaders (>= 2.32.0), rstan (>= 2.32.0), BH (>= 1.72.0-2), Rcpp (>= 0.12.0), RcppEigen (>= 0.3.3.3.0), RcppParallel (>= 5.0.1) SystemRequirements: GNU make, pandoc (>= 1.12.3), pandoc-citeproc VignetteBuilder: knitr LazyData: true UseLTO: true NeedsCompilation: yes URL: https://mc-stan.org/rstanarm/, https://discourse.mc-stan.org BugReports: https://github.com/stan-dev/rstanarm/issues RoxygenNote: 7.2.3 Packaged: 2024-01-16 19:07:49 UTC; ben Author: Jonah Gabry [aut], Imad Ali [ctb], Sam Brilleman [ctb], Jacqueline Buros Novik [ctb] (R/stan_jm.R), AstraZeneca [ctb] (R/stan_jm.R), Trustees of Columbia University [cph], Simon Wood [cph] (R/stan_gamm4.R), R Core Deveopment Team [cph] (R/stan_aov.R), Douglas Bates [cph] (R/pp_data.R), Martin Maechler [cph] (R/pp_data.R), Ben Bolker [cph] (R/pp_data.R), Steve Walker [cph] (R/pp_data.R), Brian Ripley [cph] (R/stan_aov.R, R/stan_polr.R), William Venables [cph] (R/stan_polr.R), Paul-Christian Burkner [cph] (R/misc.R), Ben Goodrich [cre, aut] Maintainer: Ben Goodrich Repository: CRAN Date/Publication: 2024-01-18 23:00:03 UTC rstanarm/build/0000755000176200001440000000000014551551771013213 5ustar liggesusersrstanarm/build/vignette.rds0000644000176200001440000000122614551551771015553 0ustar liggesusersTo0ڮ@@X W⡣l*m=8rU㉿?rN2~}y^k4`ö~ KG{ chjQhwHLSŰF籌8;d e2K dk DZJNDJQ(.RMc{)S 5,r kL#~ ʐԞS[=#KȜjv41SMћj2$Wi~=qhy=&c{5=kĔUSR2K%fT}[­$PB@ ,xZ߱" ڜ2AB21Zls<ͯ0=1$7Y|v͙*XfoEXәi2P8rC$_RF]ti#LnvTO@![LFBH f "T!zvpd`}L-;̉y-iť#w%'Z:bÐۋ0'1첷$R׎@L9z6j*4b=l2_r‚RƗv{P>ט%,݄?T Wuܜ9gG~n?(4u9͜-؃/kȉ rstanarm/build/partial.rdb0000644000176200001440000000007414551546373015343 0ustar liggesusersb```b`a 00 FN ͚Z d@$$7rstanarm/tests/0000755000176200001440000000000014551535021013244 5ustar liggesusersrstanarm/tests/testthat/0000755000176200001440000000000014552326563015116 5ustar liggesusersrstanarm/tests/testthat/test_plots.R0000644000176200001440000001457614370470372017452 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. suppressPackageStartupMessages(library(rstanarm)) SEED <- 123 ITER <- 10 CHAINS <- 2 CORES <- 1 if (!exists("example_model")) { example_model <- run_example_model() } fit <- example_model SW(fito <- stan_glm(mpg ~ ., data = mtcars, algorithm = "optimizing", seed = SEED, refresh = 0)) SW(fitvb <- update(fito, algorithm = "meanfield")) # plot.stanreg ------------------------------------------------------------ context("plot.stanreg") test_that("plot.stanreg errors if chains = 1 but needs multiple", { multiple_chain_plots <- c("trace_highlight", "hist_by_chain", "dens_overlay", "violin") SW(fit_1chain <- stan_glm(mpg ~ wt, data = mtcars, chains = 1, iter = 100, refresh = 0)) for (f in multiple_chain_plots) { expect_error(plot(fit_1chain, plotfun = f), info = f, regexp = "requires multiple chains") } }) test_that("other plot.stanreg errors thrown correctly", { expect_error(plot(fit, plotfun = "9999"), "not a valid MCMC function name") expect_error(plot(fit, plotfun = "ppc_hist"), "use the 'pp_check' method") expect_error(plot(fit, plotfun = "stan_diag"), "help('NUTS', 'bayesplot')", fixed = TRUE) }) test_that("plot.stanreg returns correct object", { # ggplot objects ggplot_object_plots <- c( "intervals", "areas", "dens", "dens_overlay", "hist", "hist_by_chain", "trace", "trace_highlight", "violin", "rhat", "rhat_hist", "neff", "neff_hist", "ess", "acf", "acf_bar", "ac" ) for (f in ggplot_object_plots) expect_gg(plot(fit, f)) # requires exactly 2 parameters expect_gg(plot(fit, "scat", pars = c("period2", "period3"))) }) test_that("plot method returns correct object for nuts diagnostic plots", { # energy plot returns ggplot object expect_gg(plot(fit, "nuts_energy")) # others return gtable objects gtable_object_plots <- paste0("nuts_", c("stepsize", "acceptance", "divergence", "treedepth")) for (f in gtable_object_plots) expect_s3_class(plot(fit, plotfun = f), "gtable") }) test_that("plot.stanreg ok for optimization", { expect_gg(plot(fito)) expect_gg(plot(fito, "areas")) expect_gg(plot(fito, "dens")) expect_gg(plot(fito, "scatter", pars = c("wt", "cyl"))) expect_gg(plot(fito, pars = c("alpha", "beta"))) expect_warning(plot(fito, regex_pars = "wt"), regexp = "'regex_pars' ignored") expect_error(plot(fito, "trace"), regexp = "only available for models fit using MCMC") expect_error(plot(fito, "nuts_acceptance"), regexp = "only available for models fit using MCMC") expect_error(plot(fito, "rhat_hist"), regexp = "only available for models fit using MCMC") }) test_that("plot.stanreg ok for vb", { expect_gg(plot(fitvb)) expect_gg(plot(fitvb, "areas")) expect_gg(plot(fitvb, "dens")) expect_gg(plot(fitvb, "scatter", pars = c("wt", "cyl"))) expect_gg(plot(fitvb, pars = c("alpha", "beta"))) expect_error(plot(fitvb, "trace"), regexp = "only available for models fit using MCMC") expect_error(plot(fitvb, "nuts_acceptance"), regexp = "only available for models fit using MCMC") expect_error(plot(fitvb, "rhat_hist"), regexp = "only available for models fit using MCMC") expect_error(plot(fitvb, "mcmc_neff"), regexp = "only available for models fit using MCMC") }) # pairs.stanreg ----------------------------------------------------------- context("pairs.stanreg") test_that("pairs method ok", { expect_silent(pairs(fit, pars = c("period2", "log-posterior"))) expect_silent(pairs(fit, pars = "b[(Intercept) herd:15]", regex_pars = "Sigma")) expect_silent(pairs(fit, pars = "b[(Intercept) herd:15]", regex_pars = "Sigma", condition = pairs_condition(nuts = "lp__"))) expect_error(pairs(fitvb), regexp = "only available for models fit using MCMC") expect_error(pairs(fito), regexp = "only available for models fit using MCMC") }) # posterior_vs_prior ------------------------------------------------------ context("posterior_vs_prior") test_that("posterior_vs_prior ok", { SW(p1 <- posterior_vs_prior(fit, pars = "beta")) expect_gg(p1) SW(p2 <- posterior_vs_prior(fit, pars = "varying", group_by_parameter = TRUE, color_by = "vs")) expect_gg(p2) SW(p3 <- posterior_vs_prior(fit, regex_pars = "period", group_by_parameter = FALSE, color_by = "none", facet_args = list(scales = "free", nrow = 2))) expect_gg(p3) SW(fit_polr <- stan_polr(tobgp ~ agegp, data = esoph, method = "probit", prior = R2(0.2, "mean"), init_r = 0.1, seed = SEED, chains = CHAINS, cores = CORES, iter = 100, refresh = 0)) SW(p4 <- posterior_vs_prior(fit_polr)) SW(p5 <- posterior_vs_prior(fit_polr, regex_pars = "\\|", group_by_parameter = TRUE, color_by = "vs")) expect_gg(p4) expect_gg(p5) }) test_that("posterior_vs_prior throws errors", { lmfit <- lm(mpg ~ wt, data = mtcars) expect_error(posterior_vs_prior(lmfit), "no applicable method") expect_error(posterior_vs_prior(fit, prob = 1), "prob < 1") expect_error(posterior_vs_prior(fito), "only available for models fit using MCMC") expect_error(posterior_vs_prior(fitvb), "only available for models fit using MCMC") }) rstanarm/tests/testthat/test_stan_functions.R0000644000176200001440000005536214500256225021336 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017, 2018, 2019 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. Sys.setenv(USE_CXX17 = 1) set.seed(12345) MODELS_HOME <- "stan_files" INCLUDE_DIR <- "include" context("setup") test_that("Stan programs are available", { expect_true(file.exists(MODELS_HOME)) }) library(rstan) Sys.unsetenv("R_TESTS") # TBB <- system.file("lib", .Platform$r_arch, package = "RcppParallel", mustWork = TRUE) # SH <- system.file(ifelse(.Platform$OS.type == "windows", "libs", "lib"), # .Platform$r_arch, package = "StanHeaders", mustWork = TRUE) # Sys.setenv(LOCAL_LIBS = paste0("-L", shQuote(TBB), " -tbb -tbbmalloc ", # "-L", shQuote(SH) , " -lStanHeaders")) # Sys.setenv(PKG_LIBS = Sys.getenv("LOCAL_LIBS")) # Eigen <- dir(system.file("include", "stan", "math", "prim", # package = "StanHeaders", mustWork = TRUE), # pattern = "Eigen.hpp$", full.names = TRUE, recursive = TRUE)[1] # Sys.setenv(PKG_CXXFLAGS = paste("-include", shQuote(Eigen))) functions <- sapply(dir(MODELS_HOME, pattern = "stan$", full.names = TRUE), function(f) { mc <- readLines(f) mc <- grep("#include", mc, invert = TRUE, value = TRUE) start <- grep("^functions[[:blank:]]*\\{[[:blank:]]*$", mc) if (length(start) == 1) { end <- grep("^}[[:blank:]]*$", mc)[1] if (end == (start + 1L)) return(as.character(NULL)) return(mc[(start + 1L):(end - 1L)]) } else return(as.character(NULL)) }) names(functions) <- basename(names(functions)) functions <- c(unlist(lapply(file.path(MODELS_HOME, "functions", c("common_functions.stan", "bernoulli_likelihoods.stan", "binomial_likelihoods.stan", "continuous_likelihoods.stan", "count_likelihoods.stan", "SSfunctions.stan")), FUN = readLines)), unlist(functions)) model_code <- paste(c("functions {", functions[grep("CODOLS", functions, invert = TRUE)], "}"), collapse = "\n") stanc_ret <- stanc(model_code = model_code, model_name = "Stan Functions", allow_undefined = TRUE) expose_stan_functions(stanc_ret, rebuild = TRUE, verbose = TRUE) # Rcpp::registerPlugin("rstan", rstan:::rstanplugin) # Rcpp::sourceCpp(file.path(INCLUDE_DIR, "tests.cpp"), rebuild = TRUE, verbose = TRUE) N <- 99L # bernoulli links <- c("logit", "probit", "cauchit", "log", "cloglog") context("Bernoulli") test_that("linkinv_bern returns expected results", { for (i in 1:length(links)) { eta <- -abs(rnorm(N)) linkinv <- binomial(link = links[i])$linkinv expect_true(all.equal(linkinv(eta), linkinv_bern(eta, i)), info = links[i]) } }) context("Bernoulli") test_that("pw_bern and ll_bern_lp return expected results", { for (i in 1:length(links)) { eta0 <- -abs(rnorm(N)) eta1 <- -abs(rnorm(N)) linkinv <- binomial(link = links[i])$linkinv ll0 <- dbinom(0, size = 1, prob = linkinv(eta0), log = TRUE) expect_true(all.equal(ll0, pw_bern(0, eta0, i)), info = links[i]) ll1 <- dbinom(1, size = 1, prob = linkinv(eta1), log = TRUE) expect_true(all.equal(ll1, pw_bern(1, eta1, i)), info = links[i]) expect_true(all.equal(sum(ll0, ll1), bern_lpdf(eta0, eta1, i, c(N,N))), info = links[i]) } }) # Binomial trials <- 10L context("Binomial") test_that("linkinv_binom returns expected results", { for (i in 1:length(links)) { eta <- -abs(rnorm(N)) linkinv <- binomial(link = links[i])$linkinv expect_true(all.equal(linkinv(eta), linkinv_binom(eta, i)), info = links[i]) } }) context("Bernoulli") test_that("pw_binom and ll_binom_lp return expected results", { for (i in 1:length(links)) { eta <- -abs(rnorm(N)) y <- sample.int(trials, size = N, replace = TRUE) linkinv <- binomial(link = links[i])$linkinv ll <- dbinom(y, size = trials, prob = linkinv(eta), log = TRUE) expect_true(all.equal(ll, pw_binom(y, rep(trials, N), eta, i)), info = links[i]) expect_true(all.equal(sum(ll), binom_lpmf(y, rep(trials, N), eta, i), info = links[i])) } }) # Count GLM links <- c("log", "identity", "sqrt") context("Poisson") test_that("linkinv_count returns expected results", { for (i in 1:length(links)) { eta <- abs(rnorm(N)) linkinv <- poisson(link = links[i])$linkinv expect_true(all.equal(linkinv(eta), linkinv_count(eta, i)), info = links[i]) } }) context("Poisson") test_that("pw_pois return expected results", { for (i in 1:length(links)) { y <- sample.int(10, size = N, replace = TRUE) eta <- abs(rnorm(N)) linkinv <- poisson(link = links[i])$linkinv ll <- dpois(y, linkinv(eta), log = TRUE) expect_true(all.equal(ll, pw_pois(y, eta, i)), info = links[i]) } }) # Negative Binomial context("Negative Binomial") test_that("pw_nb return expected results", { for (i in 1:length(links)) { y <- sample.int(10, size = N, replace = TRUE) eta <- abs(rnorm(N)) linkinv <- poisson(link = links[i])$linkinv theta <- rexp(1) ll <- dnbinom(y, mu = linkinv(eta), size = theta, log = TRUE) expect_true(all.equal(ll, pw_nb(y, eta, theta, i)), info = links[i]) } }) # Gaussian GLM links <- c("identity", "log", "inverse") context("Gaussian") test_that("linkinv_gauss returns expected results", { for (i in 1:length(links)) { eta <- rnorm(N) linkinv <- gaussian(link = links[i])$linkinv expect_true(all.equal(linkinv(eta), linkinv_gauss(eta, i)), info = links[i]) } }) context("Gaussian") test_that("pw_gauss returns expected results", { for (i in 1:length(links)) { eta <- rnorm(N) linkinv <- gaussian(link = links[i])$linkinv expect_true(all.equal(dnorm(0, mean = linkinv(eta), log = TRUE), pw_gauss(rep(0,N), eta, 1, i)), info = links[i]) } }) # Gamma GLM test_that("linkinv_gamma returns expected results", { for (i in 1:length(links)) { eta <- rexp(N) linkinv <- Gamma(link = links[i])$linkinv expect_true(all.equal(linkinv(eta), linkinv_gamma(eta, i)), info = links[i]) } }) test_that("pw_gamma returns expected results", { for (i in 1:length(links)) { eta <- rexp(N) shape <- rexp(1) linkinv <- Gamma(link = links[i])$linkinv y <- rgamma(N, shape, rate = 1 / linkinv(eta)) expect_true(all.equal(dgamma(y, shape = shape, rate = shape / linkinv(eta), log = TRUE), pw_gamma(y, eta, shape, i)), info = links[i]) } }) test_that("pw_gamma implies an actual density", { for (i in 1:length(links)) { eta <- rexp(1) shape <- rexp(1) foo <- function(y) { exp(pw_gamma(y, rep(eta, length(y)), shape, i)) } expect_true(all.equal(1, integrate(foo, lower = 0, upper = Inf)$value, tol = 1e-5)) } }) test_that("GammaReg_log returns the expected results", { for (i in 1:length(links)) { eta <- rexp(N) shape <- rexp(1) linkinv <- Gamma(link = links[i])$linkinv y <- rgamma(N, shape, rate = 1 / linkinv(eta)) expect_true(all.equal(sum(dgamma(y, shape = shape, rate = shape / linkinv(eta), log = TRUE)), GammaReg(y, eta, shape, i, sum(log(y)))), info = links[i]) } }) # Inverse Gaussian GLM links <- c(links, "1/mu^2") test_that("linkinv_inv_gaussian returns expected results", { for (i in 1:length(links)) { eta <- rgamma(N, 2, 1) linkinv <- inverse.gaussian(link = links[i])$linkinv expect_true(all.equal(linkinv(eta), linkinv_inv_gaussian(eta, i)), info = links[i]) } }) rinvGauss <- function(n, mu, lambda) { # from https://en.wikipedia.org/wiki/Inverse_Gaussian_distribution y <- rchisq(n, 1) mu2 <- mu^2 x <- mu + 0.5 * mu2 * y / lambda - 0.5 * mu / lambda * sqrt(4 * mu * lambda * y + mu2 * y^2) z <- runif(n) out <- ifelse(z <= mu / (mu + x), x, mu2 / x) return(out) } dinvGauss <- function(x, mu, lambda, log = FALSE) { out <- 0.5 * log(0.5 * lambda / pi) - 1.5 * log(x) - 0.5 * lambda / mu^2 * (x - mu)^2 / x if (!log) out <- exp(out) return(out) } test_that("pw_inv_gaussian returns expected results", { for (i in 1:length(links)) { eta <- rgamma(N, 2, 1) lambda <- rexp(1) linkinv <- inverse.gaussian(link = links[i])$linkinv y <- rinvGauss(N, linkinv(eta), lambda) expect_true(all.equal(dinvGauss(y, linkinv(eta), lambda, log = TRUE), pw_inv_gaussian(y, eta, lambda, i, log(y), sqrt(y))), info = links[i]) } }) test_that("pw_inv_gaussian implies an actual density", { for (i in 1:length(links)) { eta <- rgamma(1, 2, 1) lambda <- rexp(1) foo <- function(y) { exp(pw_inv_gaussian(y, rep(eta, length(y)), lambda, i, log(y), sqrt(y))) } expect_true(all.equal(1, integrate(foo, lower = 0, upper = Inf)$value, tol = 1e-4)) } }) test_that("inv_gaussian returns expected results", { for (i in 1:length(links)) { eta <- rgamma(N, 2, 1) lambda <- rexp(1) linkinv <- inverse.gaussian(link = links[i])$linkinv y <- rinvGauss(N, linkinv(eta), lambda) expect_true(all.equal(sum(dinvGauss(y, linkinv(eta), lambda, log = TRUE)), inv_gaussian(y, linkinv_inv_gaussian(eta,i), lambda, sum(log(y)), sqrt(y))), info = links[i]) } }) # lm N <- 99L context("lm") test_that("ll_mvn_ols... returns expected results", { X <- matrix(rnorm(2 * N), N, 2) X <- sweep(X, MARGIN = 2, STATS = colMeans(X), FUN = "-") y <- 1 + X %*% c(2:3) + rnorm(N) ols <- lm.fit(cbind(1,X), y) b <- coef(ols) intercept <- 0.5 beta <- rnorm(2) sigma <- rexp(1) SSR <- crossprod(residuals(ols))[1] ll <- sum(dnorm(y, intercept + X %*% beta, sigma, log = TRUE)) expect_true(all.equal(ll, mvn_ols_lpdf(c(intercept, beta), b, crossprod(cbind(1, X)), SSR, sigma, N))) decomposition <- qr(X) Q <- qr.Q(decomposition) R <- qr.R(decomposition) R_inv <- qr.solve(decomposition, Q) b <- R %*% b[-1] beta <- R %*% beta expect_true(all.equal(ll, mvn_ols_qr_lpdf(beta, b, intercept, mean(y), SSR, sigma, N))) }) # polr links <- c("logistic", "probit", "loglog", "cloglog", "cauchit") context("polr") test_that("CDF_polr returns expected results", { for (i in 1:length(links)) { x <- rnorm(1) if (i == 1) linkinv <- make.link("logit")$linkinv else if (i == 3) linkinv <- rstanarm:::pgumbel else linkinv <- make.link(links[i])$linkinv expect_true(all.equal(linkinv(x), CDF_polr(x, i))) } }) context("polr") test_that("pw_polr returns expected results", { J <- 3 for (i in 1:length(links)) { x <- matrix(rnorm(N * 2), nrow = N, ncol = 2) beta <- rnorm(2) zeta <- sort(rnorm(J-1)) eta <- c(x %*% beta) y <- apply(rmultinom(N, 1, prob = rep(1/J, J)) == 1, 2, which) model <- MASS::polr(as.factor(y) ~ x, method = links[i], start = c(beta, zeta), control = list(maxit = 0)) Pr <- fitted(model) Pr <- sapply(1:N, FUN = function(i) Pr[i,y[i]]) log_pr <- pw_polr(y, eta, zeta, i, 1) log_Pr <- log(Pr) good <- is.finite(log_pr) & is.finite(log_Pr) & log_Pr > -30 expect_equal(log_Pr[good], log_pr[good], info = links[i], tolerance = 1e-6) } }) rdirichlet <- function(n, alpha) { # from MCMCpack::rdirichlet and licensed under the GPL l <- length(alpha) x <- matrix(rgamma(l * n, alpha), ncol = l, byrow = TRUE) sm <- x %*% rep(1, l) return(x/as.vector(sm)) } context("polr") test_that("make_cutpoints returns expected results", { J <- 5L for (i in 1:length(links)) { p <- rdirichlet(1, rep(1,J))[1,] cutpoints <- make_cutpoints(p, 1, i) for (j in 1:length(cutpoints)) { expect_true(all.equal(sum(p[1:j]), CDF_polr(cutpoints[j], i))) } } }) context("polr") test_that("draw_ystar_rng returns expected results", { l <- -0.1 u <- 0.1 eta <- 0 for (i in 1:length(links)) { draw <- draw_ystar_rng(l, u, eta, i) expect_true(draw > l) expect_true(draw < u) } }) # glmer context("glmer") if (require(lme4) && require(HSAUR3)) test_that("the Stan equivalent of lme4's Z %*% b works", { stopifnot(require(Matrix)) test_lme4 <- function(group) { Lambdati <- group$Lambdat Lind <- group$Lind theta <- group$theta group <- rstanarm:::pad_reTrms(Ztlist = group$Ztlist, cnms = group$cnms, flist = group$flist) Z <- group$Z p <- sapply(group$cnms, FUN = length) l <- sapply(attr(group$flist, "assign"), function(i) nlevels(group$flist[[i]])) len_theta_L <- sum(choose(p,2), p) expect_true(len_theta_L == length(theta)) dispersion <- runif(1) tau <- as.array(rgamma(length(p), shape = 1, scale = 1)) scale <- as.array(abs(rcauchy(length(p)))) zeta <- as.array(rgamma(sum(p[p > 1]), shape = 1, scale = 1)) rho <- as.array(rbeta(sum(p - 1), 1, 1)) z_T <- as.array(rnorm(sum(pmax(0, choose(p,2) - 1)))) theta_L <- make_theta_L(len_theta_L, p, dispersion, tau, scale, zeta, rho, z_T) expect_true(all(theta_L[theta == 1] > 0)) Lambdati@x <- theta_L[Lind] z_b <- rnorm(ncol(Z)) b <- make_b(z_b, theta_L, p, l) mark <- colnames(Z) == "" expect_equal(b[!mark], as.vector(Matrix::t(Lambdati) %*% z_b[!mark]), tol = 1e-14) parts <- extract_sparse_parts(Z) Zb <- Z %*% b if (all(sapply(group$cnms, FUN = function(x) { length(x) == 1 && x == "(Intercept)" })) ) { V <- matrix(parts$v, nrow = sum(p), ncol = nrow(Z)) expect_true(all(V == t(as.matrix(as.data.frame(make_V(nrow(Z), nrow(V), parts$v)))))) expect_equal(Zb@x, apply(V, 2, FUN = function(v) sum(b[v]))) } } test_lme4(glFormula(Reaction ~ Days + (Days | Subject), data = sleepstudy)$reTrms) test_lme4(glFormula(Reaction ~ Days + (Days || Subject), data = sleepstudy)$reTrms) test_lme4(glFormula(Reaction ~ Days + (1 | Subject), data = sleepstudy)$reTrms) test_lme4(glFormula(cbind(incidence, size - incidence) ~ period + (1 | herd), data = cbpp, family = binomial)$reTrms) cbpp$obs <- 1:nrow(cbpp) test_lme4(glFormula(cbind(incidence, size - incidence) ~ period + (1 | herd) + (1|obs), family = binomial, data = cbpp)$reTrms) data(toenail, package = "HSAUR3") test_lme4(glFormula(outcome ~ visit + treatment + (visit|treatment) + (1|patientID), data=toenail, family = binomial)$reTrms) data(clouds, package = "HSAUR3") test_lme4(glFormula(rainfall ~ sne + cloudcover + prewetness + echomotion + (1 + sne + cloudcover + prewetness|seeding) + (1 + sne + cloudcover + prewetness||echomotion), data=clouds, family = gaussian)$reTrms) test_lme4(glFormula(angle ~ recipe + temp + (1|recipe:replicate), data = cake)$reTrms) test_lme4(glFormula(diameter ~ (1|plate) + (1|sample), data = Penicillin)$reTrms) }) context("glmer") test_that("the Cornish-Fisher expansion from standard normal to Student t works", { df <- exp(1) / pi approx_t <- sapply(rnorm(1000), FUN = CFt, df = df) expect_true(ks.test(approx_t, "pt", df = df, exact = TRUE)$p.value > 0.05) }) context("nlmer") test_that("SSasymp works", { Lob.329 <- Loblolly[ Loblolly$Seed == "329", ] Asym <- 100 resp0 <- -8.5 lrc <- -3.2 Phi <- cbind(Asym, resp0, lrc) expect_true(all.equal(SSasymp( Lob.329$age, Asym, resp0, lrc ), SS_asymp( Lob.329$age, Phi ), check.attributes = FALSE)) Phi <- matrix(Phi, nrow = nrow(Lob.329), ncol = ncol(Phi), byrow = TRUE) expect_true(all.equal(SSasymp( Lob.329$age, Asym, resp0, lrc ), SS_asymp( Lob.329$age, Phi ), check.attributes = FALSE)) }) context("nlmer") test_that("SSasympOff works", { CO2.Qn1 <- CO2[CO2$Plant == "Qn1", ] Asym <- 32; lrc <- -4; c0 <- 43 Phi <- cbind(Asym, lrc, c0) expect_true(all.equal(SSasympOff(CO2.Qn1$conc, Asym, lrc, c0), SS_asympOff(CO2.Qn1$conc, Phi), check.attributes = FALSE)) Phi <- matrix(Phi, nrow = nrow(CO2.Qn1), ncol = ncol(Phi), byrow = TRUE) expect_true(all.equal(SSasympOff(CO2.Qn1$conc, Asym, lrc, c0), SS_asympOff(CO2.Qn1$conc, Phi), check.attributes = FALSE)) }) context("nlmer") test_that("SSasympOrig works", { Lob.329 <- Loblolly[ Loblolly$Seed == "329", ] Asym <- 100; lrc <- -3.2 Phi <- cbind(Asym, lrc) expect_true(all.equal(SSasympOrig(Lob.329$age, Asym, lrc), SS_asympOrig(Lob.329$age, Phi), check.attributes = FALSE)) Phi <- matrix(Phi, nrow = nrow(Lob.329), ncol = ncol(Phi), byrow = TRUE) expect_true(all.equal(SSasympOrig(Lob.329$age, Asym, lrc), SS_asympOrig(Lob.329$age, Phi), check.attributes = FALSE)) }) context("nlmer") test_that("SSbiexp works", { Indo.1 <- Indometh[Indometh$Subject == 1, ] A1 <- 3; lrc1 <- 1; A2 <- 0.6; lrc2 <- -1.3 Phi <- cbind(A1, lrc1, A2, lrc2) expect_true(all.equal(SSbiexp( Indo.1$time, A1, lrc1, A2, lrc2 ), SS_biexp( Indo.1$time, Phi ), check.attributes = FALSE)) Phi <- matrix(Phi, nrow = nrow(Indo.1), ncol = ncol(Phi), byrow = TRUE) expect_true(all.equal(SSbiexp( Indo.1$time, A1, lrc1, A2, lrc2 ), SS_biexp( Indo.1$time, Phi ), check.attributes = FALSE)) }) context("nlmer") test_that("SSfol works", { Theoph.1 <- Theoph[ Theoph$Subject == 1, ] lKe <- -2.5; lKa <- 0.5; lCl <- -3 Phi <- cbind(lKe, lKa, lCl) expect_true(all.equal(SSfol(Theoph.1$Dose, Theoph.1$Time, lKe, lKa, lCl), SS_fol(Theoph.1$Dose, Theoph.1$Time, Phi), check.attributes = FALSE)) Phi <- matrix(Phi, nrow = nrow(Theoph.1), ncol = ncol(Phi), byrow = TRUE) expect_true(all.equal(SSfol(Theoph.1$Dose, Theoph.1$Time, lKe, lKa, lCl), SS_fol(Theoph.1$Dose, Theoph.1$Time, Phi), check.attributes = FALSE)) }) context("nlmer") test_that("SSfpl works", { Chick.1 <- ChickWeight[ChickWeight$Chick == 1, ] A <- 13; B <- 368; xmid <- 14; scal <- 6 Phi <- cbind(A, B, xmid, log(scal)) expect_true(all.equal(SSfpl(Chick.1$Time, A, B, xmid, scal), SS_fpl(Chick.1$Time, Phi), check.attributes = FALSE)) Phi <- matrix(Phi, nrow = nrow(Chick.1), ncol = ncol(Phi), byrow = TRUE) expect_true(all.equal(SSfpl(Chick.1$Time, A, B, xmid, scal), SS_fpl(Chick.1$Time, Phi), check.attributes = FALSE)) }) context("nlmer") test_that("SSgompertz works", { DNase.1 <- subset(DNase, Run == 1) Asym <- 4.5; b2 <- 2.3; b3 <- 0.7 Phi <- cbind(Asym, b2, b3) expect_true(all.equal(SSgompertz(log(DNase.1$conc), Asym, b2, b3), SS_gompertz(log(DNase.1$conc), Phi), check.attributes = FALSE)) Phi <- matrix(Phi, nrow = nrow(DNase.1), ncol = ncol(Phi), byrow = TRUE) expect_true(all.equal(SSgompertz(log(DNase.1$conc), Asym, b2, b3), SS_gompertz(log(DNase.1$conc), Phi), check.attributes = FALSE)) }) context("nlmer") test_that("SSlogis works", { Chick.1 <- ChickWeight[ChickWeight$Chick == 1, ] Asym <- 368; xmid <- 14; scal <- 6 Phi <- cbind(Asym, xmid, log(scal)) expect_true(all.equal(SSlogis(Chick.1$Time, Asym, xmid, scal), SS_logis(Chick.1$Time, Phi), check.attributes = FALSE)) Phi <- matrix(Phi, nrow = nrow(Chick.1), ncol = ncol(Phi), byrow = TRUE) expect_true(all.equal(SSlogis(Chick.1$Time, Asym, xmid, scal), SS_logis(Chick.1$Time, Phi), check.attributes = FALSE)) }) context("nlmer") test_that("SSmicmen works", { PurTrt <- Puromycin[ Puromycin$state == "treated", ] Vm <- 200; K <- 0.05 Phi <- cbind(Vm, K) expect_true(all.equal(SSmicmen(PurTrt$conc, Vm, K), SS_micmen(PurTrt$conc, Phi), check.attributes = FALSE)) Phi <- matrix(Phi, nrow = nrow(PurTrt), ncol = ncol(Phi), byrow = TRUE) expect_true(all.equal(SSmicmen(PurTrt$conc, Vm, K), SS_micmen(PurTrt$conc, Phi), check.attributes = FALSE)) }) context("nlmer") test_that("SSweibull works", { Chick.6 <- subset(ChickWeight, (Chick == 6) & (Time > 0)) Asym <- 160; Drop <- 115; lrc <- -5.5; pwr <- 2.5 Phi <- cbind(Asym, Drop, lrc, pwr) expect_true(all.equal(SSweibull(Chick.6$Time, Asym, Drop, lrc, pwr) , SS_weibull(Chick.6$Time, Phi) , check.attributes = FALSE)) Phi <- matrix(Phi, nrow = nrow(Chick.6), ncol = ncol(Phi), byrow = TRUE) expect_true(all.equal(SSweibull(Chick.6$Time, Asym, Drop, lrc, pwr) , SS_weibull(Chick.6$Time, Phi) , check.attributes = FALSE)) }) context("nlmer") test_that("reshape works", { x <- as.double(1:10) expect_true(all(matrix(x, 5, 2) == reshape_vec(x, 5L, 2L))) }) # betareg links <- c("logit", "probit", "cloglog", "cauchit", "log") context("betareg") test_that("linkinv_beta returns expected results", { for (i in 1:length(links)) { eta <- -abs(rnorm(N)) linkinv <- binomial(link = links[i])$linkinv expect_true(all.equal(linkinv(eta), linkinv_beta(eta, i)), info = links[i]) } }) context("betareg") test_that("pw_beta and ll_beta_lp return expected results", { for (i in 1:length(links)) { eta <- -abs(rnorm(N)) mu <- linkinv_beta(eta, i) dispersion <- 4/3 linkinv <- binomial(link = links[i])$linkinv ll <- dbeta(1/3, mu*dispersion, (1-mu)*dispersion, log = TRUE) expect_true(all.equal(ll, pw_beta(rep(1/3,N) , eta, dispersion, i)), info = links[i]) } }) context("clogit") test_that("ll_clogit_lp (which calls log_clogit_denom) returns the expected results", { data(infert) infert <- infert[order(infert$stratum, !infert$case),] betas <- c(spontaneous = 1.98587551667772, induced = 1.40901163187514) X <- model.matrix(case ~ spontaneous + induced - 1, data = infert) eta <- c(X %*% betas) y <- infert$case == 1 s <- aggregate(y, by = list(infert$stratum), FUN = sum)$x obs <- aggregate(y, by = list(infert$stratum), FUN = length)$x ll <- clogit_lpdf(eta0 = eta[!y], eta1 = eta[y], successes = s, failures = obs - s, observations = obs) expect_equal(-64.202236924431, ll) }) rstanarm/tests/testthat/include/0000755000176200001440000000000014414044166016532 5ustar liggesusersrstanarm/tests/testthat/include/CODOLS.hpp0000644000176200001440000000203514414044166020226 0ustar liggesusers#ifndef RSTANARM__CODOLS_HPP #define RSTANARM__CODOLS_HPP /* * Compute ordinary least squares coefficients, * even in the situation where X is rank deficient * See https://eigen.tuxfamily.org/dox/classEigen_1_1CompleteOrthogonalDecomposition.html */ template inline Eigen::Matrix::type, Eigen::Dynamic, 1> CODOLS(const Eigen::Matrix& X, const Eigen::Matrix& y, std::ostream* pstream__) { typename boost::math::tools::promote_args::type T1__; using namespace Eigen; CompleteOrthogonalDecomposition cod(X); return cod.solve(y); } inline auto CODOLS(const Eigen::Matrix& X, const Eigen::Map, 0, Eigen::Stride<0, 0>>& y, std::ostream* pstream__) { using namespace Eigen; CompleteOrthogonalDecomposition cod(X); return cod.solve(y).eval(); } #endif rstanarm/tests/testthat/include/meta_header.hpp0000644000176200001440000000014214370470372021501 0ustar liggesusers#ifndef RSTANARM__META_HEADER_HPP #define RSTANARM__META_HEADER_HPP #include "CODOLS.hpp" #endif rstanarm/tests/testthat/include/tests.cpp0000644000176200001440000000032214370470372020400 0ustar liggesusers// [[Rcpp::depends(BH)]] // [[Rcpp::depends(RcppEigen)]] // [[Rcpp::depends(StanHeaders)]] #include #include #include #include "meta_header.hpp" // Need test for CODOLS rstanarm/tests/testthat/test_stan_nlmer.R0000644000176200001440000000420614370470372020440 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. suppressPackageStartupMessages(library(rstanarm)) library(lme4) SEED <- 12345 ITER <- 100 CHAINS <- 2 CORES <- 2 REFRESH <- 0 threshold <- 0.05 context("stan_nlmer") data("Orange", package = "datasets") Orange$circumference <- Orange$circumference / 100 Orange$age <- Orange$age / 100 SW(fit <- stan_nlmer(circumference ~ SSlogis(age, Asym, xmid, scal) ~ Asym|Tree, data = Orange, prior = NULL, cores = CORES, init_r = 1, chains = CHAINS, seed = SEED, refresh = 0, QR = TRUE)) startvec <- c(Asym = 200, xmid = 725, scal = 350) / 100 ml <- nlmer(circumference ~ SSlogis(age, Asym, xmid, scal) ~ Asym|Tree, data = Orange, start = startvec) test_that("stan_nlmer runs for Orange example", { expect_stanreg(fit) }) test_that("stan_nlmer is similar to nlmer on Orange example", { expect_equal(fixef(ml), fixef(fit), tol = threshold) }) test_that("stan_nlmer throws error if formula includes an unknown function", { expect_error(stan_nlmer(circumference ~ SSfoo(age, Asym, xmid, scal) ~ Asym|Tree, data = Orange), regexp = "self-starting nonlinear function") }) test_that("loo/waic for stan_nlmer works", { expect_equivalent_loo(fit) }) context("posterior_predict (stan_nlmer)") test_that("compatible with stan_nlmer", { check_for_pp_errors(fit) }) rstanarm/tests/testthat/test_stan_polr.R0000644000176200001440000001015514551535205020275 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. suppressPackageStartupMessages(library(rstanarm)) library(MASS) SEED <- 123 ITER <- 100 CHAINS <- 2 CORES <- 1 REFRESH <- 0 threshold <- 0.03 context("stan_polr") f <- tobgp ~ agegp + alcgp SW({ fit1 <- stan_polr(f, data = esoph, method = "logistic", prior_PD = TRUE, prior = R2(location = 0.4, what = "median"), chains = CHAINS, iter = ITER, seed = SEED, refresh = 0) fit1vb <- stan_polr(f, data = esoph, method = "loglog", prior = R2(location = 0.4, what = "median"), seed = SEED, algorithm = "fullrank") fit2 <- stan_polr(factor(tobgp == "30+") ~ agegp + alcgp, data = esoph, prior = R2(location = 0.4), method = "logistic", shape = 2, rate = 2, chains = CHAINS, iter = ITER, seed = SEED, refresh = 0) fit2vb <- stan_polr(factor(tobgp == "30+") ~ agegp + alcgp, data = esoph, method = "probit", seed = SEED, algorithm = "fullrank", prior = NULL, prior_counts = NULL) # test with NULL priors fit3 <- stan_polr(factor(tobgp == "30+") ~ agegp + alcgp, data = esoph, prior = R2(location = 0.4), shape = 2, rate = 2, chains = CHAINS, iter = ITER, seed = SEED, refresh = 0) }) test_that("stan_polr runs for esoph example", { expect_stanreg(fit1) expect_stanreg(fit2) expect_stanreg(fit1vb) expect_stanreg(fit2vb) }) test_that("stan_polr runs with 1 predictor", { esoph$x1 <- rnorm(nrow(esoph)) expect_stanreg(stan_polr(tobgp ~ x1, data = esoph, prior = R2(0.5, "mean"))) }) test_that("stan_polr throws error if formula excludes intercept", { expect_error(stan_polr(tobgp ~ 0 + agegp + alcgp, data = esoph, method = "loglog", prior = R2(0.4, "median")), regexp = "an intercept is needed and assumed") }) test_that("stan_polr throws error if shape,rate specified with >2 outcome levels", { expect_error( stan_polr(f, data = esoph, method = "loglog", prior = R2(0.4, "median"), shape = 2), "'shape' must be NULL when there are more than 2 outcome categories" ) expect_error( stan_polr(f, data = esoph, method = "loglog", prior = R2(0.4, "median"), rate = 2), "'rate' must be NULL when there are more than 2 outcome categories" ) }) test_that("gumbel functions ok", { # formulas are correct # just test a few cases so they're flagged if anything changes by accident # maybe should compare to corresponding functions in ordinal package? expect_equal(rstanarm:::dgumbel(0), 0.3678794, tol = 0.00001) expect_equal(rstanarm:::qgumbel(0), -Inf) expect_equal(rstanarm:::qgumbel(0.5), 0.3665129, tol = 0.00001) expect_equal(rstanarm:::pgumbel(0.3665129), 0.5, tol = 0.00001) expect_equal(rstanarm:::qgumbel(1), Inf) }) test_that("loo/waic for stan_polr works", { ll_fun <- rstanarm:::ll_fun expect_equivalent_loo(fit1) expect_identical(ll_fun(fit1), rstanarm:::.ll_polr_i) expect_equivalent_loo(fit2) expect_identical(ll_fun(fit2), rstanarm:::.ll_polr_i) expect_equivalent_loo(fit3) expect_identical(ll_fun(fit3), rstanarm:::.ll_polr_i) }) context("posterior_predict (stan_polr)") test_that("compatible with stan_polr", { check_for_pp_errors(fit1) check_for_pp_errors(fit2) check_for_pp_errors(fit3) }) rstanarm/tests/testthat/test_stan_glm.R0000644000176200001440000004550614370470372020112 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. context("stan_glm") suppressPackageStartupMessages(library(rstanarm)) SEED <- 12345 set.seed(SEED) CHAINS <- 2 ITER <- 40 # small iter for speed but large enough for psis REFRESH <- 0 SW( fit_gaus <- stan_glm(mpg ~ wt, data = mtcars, chains = CHAINS, iter = ITER, seed = SEED, refresh = 0) ) dat <- data.frame(ldose = rep(0:5, 2), sex = factor(rep(c("M", "F"), c(6, 6)))) numdead <- c(1, 4, 9, 13, 18, 20, 0, 2, 6, 10, 12, 16) SF <- cbind(numdead, numalive = 20-numdead) SW( fit_binom <- stan_glm(SF ~ sex*ldose, data = dat, family = binomial, chains = CHAINS, iter = ITER, seed = SEED, refresh = 0) ) dead <- rbinom(length(numdead), 1, prob = 0.5) SW(fit_binom2 <- update(fit_binom, formula = factor(dead) ~ .)) d.AD <- data.frame(treatment = gl(3,3), outcome = gl(3,1,9), counts = c(18,17,15,20,10,20,25,13,12)) SW(fit_pois <- stan_glm(counts ~ outcome + treatment, data = d.AD, family = poisson, chains = CHAINS, iter = 10 * ITER, seed = SEED, refresh = 0)) SW(fit_negbin <- update(fit_pois, family = neg_binomial_2)) clotting <- data.frame(log_u = log(c(5,10,15,20,30,40,60,80,100)), lot1 = c(118,58,42,35,27,25,21,19,18), lot2 = c(69,35,26,21,18,16,13,12,12)) SW(fit_gamma <- stan_glm(lot1 ~ log_u, data = clotting, family = Gamma, chains = CHAINS, iter = ITER, seed = SEED, refresh = 0)) SW(fit_igaus <- update(fit_gamma, family = inverse.gaussian)) test_that("loo/waic for stan_glm works", { ll_fun <- rstanarm:::ll_fun # gaussian expect_equivalent_loo(fit_gaus) expect_identical(ll_fun(fit_gaus), rstanarm:::.ll_gaussian_i) # binomial expect_equivalent_loo(fit_binom) expect_equivalent_loo(fit_binom2) expect_identical(ll_fun(fit_binom), rstanarm:::.ll_binomial_i) expect_identical(ll_fun(fit_binom2), rstanarm:::.ll_binomial_i) # poisson expect_equivalent_loo(fit_pois) expect_identical(ll_fun(fit_pois), rstanarm:::.ll_poisson_i) # negative binomial expect_equivalent_loo(fit_negbin) expect_identical(ll_fun(fit_negbin), rstanarm:::.ll_neg_binomial_2_i) # gamma expect_equivalent_loo(fit_gamma) expect_identical(ll_fun(fit_gamma), rstanarm:::.ll_Gamma_i) # inverse gaussian expect_equivalent_loo(fit_igaus) expect_identical(ll_fun(fit_igaus), rstanarm:::.ll_inverse.gaussian_i) }) test_that("stan_glm throws appropriate errors, warnings, and messages", { counts <- c(18,17,15,20,10,20,25,13,12) outcome <- gl(3,1,9) treatment <- gl(3,3) dat <- data.frame(counts, outcome, treatment) f <- as.formula(counts ~ outcome + treatment) # error: glmer syntax expect_error(stan_glm(counts ~ treatment + (1|outcome), data = dat), regexp = "model formula not allowed") # error: empty model expect_error(stan_glm(counts ~ 0, data = dat), regexp = "No intercept or predictors specified") # error: stan_glm.nb with family argument expect_error(stan_glm.nb(f, data = dat, family = "neg_binomial_2"), regexp = "'family' should not be specified.") # error: prior and prior_intercept not lists expect_error(stan_glm(f, data = dat, family = "poisson", prior = normal), regexp = "should be a named list") expect_error(stan_glm(f, data = dat, family = "poisson", prior_intercept = normal), regexp = "should be a named list") # error: QR only with more than 1 predictor expect_error(stan_glm(counts ~ 1, data = dat, family = "poisson", QR = TRUE), regexp = "'QR' can only be specified when there are multiple predictors") # error: QR and sparse expect_error(stan_glm(f, data = dat, family = "poisson", QR = TRUE, sparse = TRUE), regexp = "'QR' and 'sparse' cannot both be TRUE") # require intercept for certain family and link combinations expect_error(stan_glm(counts ~ -1 + outcome + treatment, data = dat, family = poisson(link="identity"), seed = SEED), regexp = "model must have an intercept") expect_error(stan_glm(I(counts > 20) ~ -1 + outcome + treatment, data = dat, family = binomial(link="log"), seed = SEED), regexp = "model must have an intercept") # support of outcome variable expect_error(stan_glm(cbind(1:10, runif(10)) ~ 1, data = dat, family = "binomial"), "outcome values must be counts") expect_error(stan_glm(c(1,2,1,2) ~ 1, data = dat, family = "binomial"), "outcome values must be 0 or 1") expect_error(stan_glm((-1):3 ~ 1, data = dat, family = "poisson"), "outcome values must be counts") expect_error(stan_glm.nb(runif(3) ~ 1, data = dat), "outcome values must be counts") expect_error(stan_glm(0:3 ~ 1, data = dat, family = "Gamma"), "outcome values must be positive") expect_error(stan_glm(runif(3, -2, -1) ~ 1, data = dat, family = "inverse.gaussian"), "outcome values must be positive") expect_error(stan_glm(cbind(1:10, 1:10) ~ 1, data = dat, family = "gaussian"), "should not have multiple columns") # prior_aux can't be NULL if prior_PD is TRUE expect_error(stan_glm(mpg ~ wt, data = mtcars, prior_aux = NULL, prior_PD = TRUE), "'prior_aux' cannot be NULL if 'prior_PD' is TRUE") }) test_that("gaussian returns expected result for trees example", { # example using trees dataset links <- c("identity", "log", "inverse") for (i in 1:length(links)) { if (links[i] == "inverse") next # unreliable fit <- stan_glm(Volume ~ log(Girth) + log(Height), data = trees, family = gaussian(link = links[i]), algorithm = "optimizing", prior = NULL, prior_intercept = NULL, refresh = 0, QR = TRUE, tol_rel_grad = 1e-16, seed = SEED) expect_stanreg(fit) ans <- glm(Volume ~ log(Girth) + log(Height),data = trees, family = gaussian(link = links[i])) expect_equal(coef(fit), coef(ans), tol = 0.021) } expect_error(update(fit, prior = dnorm), regexp = "should be a named list") expect_error(update(fit, prior_intercept = dnorm), regexp = "should be a named list") expect_error(update(fit, prior = R2(0.5)), regexp = "should be one of") expect_error(update(fit, prior_intercept = R2(0.5)), regexp = "should be one of") }) links <- c("log", "identity", "sqrt") test_that("stan_glm returns expected result for glm poisson example", { # example from help("glm") for (i in 1:length(links)) { SW(fit <- stan_glm(counts ~ outcome + treatment, data = d.AD, family = poisson(links[i]), refresh = 0, prior = NULL, prior_intercept = NULL, QR = TRUE, algorithm = "optimizing", tol_rel_grad = 1e-16, seed = SEED)) expect_stanreg(fit) ans <- glm(counts ~ outcome + treatment, data = d.AD, family = poisson(links[i]), start = coef(fit)) if (links[i] == "log") expect_equal(coef(fit), coef(ans), tol = 0.03) # if (links[i] == "identity") expect_equal(coef(fit)[-1], coef(ans)[-1], tol = 0.03) if (links[i] == "sqrt") { # this is weird if (coef(ans)[1] > 0) expect_equal(coef(fit)[-1], coef(ans)[-1], tol = 0.1) else expect_equal(-coef(fit)[-1], coef(ans)[-1], tol = 0.04) } } }) test_that("stan_glm returns something for glm negative binomial example", { skip_if_not_installed("MASS") for (i in 1:length(links)) { SW(fit1 <- stan_glm(Days ~ Sex/(Age + Eth*Lrn), data = MASS::quine, family = neg_binomial_2(links[i]), seed = SEED, chains = 1, iter = 100, QR = TRUE, refresh = 0)) SW(fit2 <- stan_glm.nb(Days ~ Sex/(Age + Eth*Lrn), data = MASS::quine, link = links[i], seed = SEED, chains = 1, iter = 100, QR = TRUE, refresh = 0)) expect_stanreg(fit1) expect_stanreg(fit2) expect_equal(as.matrix(fit1), as.matrix(fit2)) } # testing results against MASS::glm.nb is unreliable }) test_that("stan_glm returns expected result for cars example", { fit <- stan_glm(log(dist) ~ log(speed), data = cars, sparse = TRUE, family = gaussian(link = "identity"), seed = SEED, prior = NULL, prior_intercept = NULL, refresh = 0, tol_rel_obj = .Machine$double.eps, algorithm = "optimizing") expect_stanreg(fit) ans <- glm(log(dist) ~ log(speed), data = cars, family = gaussian(link = "identity")) expect_equal(coef(fit), coef(ans), tol = 0.1) }) test_that("stan_glm returns expected result with no intercept for mtcars example", { f <- as.formula(mpg ~ -1 + wt + cyl + disp + am + carb) fit <- stan_glm(f, data = mtcars, refresh = 0, prior = NULL, prior_intercept = NULL, tol_rel_obj = .Machine$double.eps, algorithm = "optimizing", seed = SEED, sparse = TRUE) expect_stanreg(fit) ans <- glm(f, data = mtcars, family = gaussian(link = "identity")) expect_equal(coef(fit), coef(ans), tol = 0.04) }) links <- c("logit", "probit", "cauchit", "log", "cloglog") test_that("stan_glm returns expected result for bernoulli", { # bernoulli example sd1 <- 1; sd2 <- 0.5; corr_12 <- -0.4 Sigma <- matrix(c(sd1^2, rep(prod(corr_12, sd1, sd2), 2), sd2^2), 2, 2) x <- t(t(chol(Sigma)) %*% matrix(rnorm(50), 2, 250)) b <- c(2, 1) / 10 for (i in 1:length(links)) { fam <- binomial(links[i]) theta <- fam$linkinv(-1 + x %*% b) y <- rbinom(length(theta), size = 1, prob = theta) dat <- data.frame(y, x) SW( fit <- stan_glm(y ~ x, data = dat, family = fam, seed = SEED, QR = TRUE, prior = NULL, prior_intercept = NULL, refresh = 0, tol_rel_obj = .Machine$double.eps, algorithm = "optimizing") ) expect_stanreg(fit) val <- coef(fit) if (links[i] != "log") { ans <- coef(glm(y ~ x, family = fam, etastart = theta)) expect_equal(val, ans, 0.09, info = links[i]) } # else expect_equal(val[-1], ans[-1], 0.06, info = links[i]) } }) test_that("stan_glm returns expected result for binomial example", { # example using simulated data N <- 200 trials <- rpois(N, lambda = 30) trials <<- trials X <- cbind(1, matrix(rnorm(N * 3, sd = 0.5), N, 3)) for (i in 1:length(links)) { fam <- binomial(links[i]) if (i == 4) { b <- c(0, 0.5, 0.1, -1.0) eta <- X %*% b b[1] <- -max(eta) - 0.05 } else b <- c(0, 0.5, 0.1, -1.0) yes <- rbinom(N, size = trials, prob = fam$linkinv(X %*% b)) y <- cbind(yes, trials - yes) dat <- data.frame(yes, trials, x1 = X[,2], x2 = X[,3], x3 = X[,4]) SW( fit <- stan_glm(cbind(yes, trials - yes) ~ x1 + x2 + x3, data = dat, family = fam, seed = SEED, QR = TRUE, prior = NULL, prior_intercept = NULL, refresh = 0, tol_rel_obj = .Machine$double.eps, algorithm = "optimizing") ) expect_stanreg(fit) val <- coef(fit) ans <- coef(glm(y ~ x1 + x2 + x3, data = dat, family = fam, start = b)) if (links[i] != "log") expect_equal(val, ans, 0.02, info = links[i]) # else expect_equal(val[-1], ans[-1], 0.02, info = links[i]) # unstable prop <- yes / trials dat$prop <- prop SW( fit2 <- stan_glm(prop ~ x1 + x2 + x3, data = dat, weights = trials, family = fam, seed = SEED, refresh = 0, prior = NULL, prior_intercept = NULL, tol_rel_obj = .Machine$double.eps, algorithm = "optimizing") ) expect_stanreg(fit2) val2 <- coef(fit2) if (links[i] != "log") expect_equal(val2, ans, 0.02, info = links[i]) else expect_equal(val2[-1], ans[-1], 0.02, info = links[i]) } }) test_that("model with hs prior doesn't error", { SW(fit <- stan_glm(mpg ~ ., data = mtcars, prior = hs(4, 2, .5), seed = SEED, algorithm = "meanfield", QR = TRUE, refresh = 0)) expect_output(print(prior_summary(fit)), "~ hs(df = ", fixed = TRUE) }) test_that("model with hs_plus prior doesn't error", { # this works except on 32bit Windows skip_on_os("windows") SW(fit <- stan_glm(mpg ~ ., data = mtcars, prior = hs_plus(4, 1, 2, .5), seed = SEED, algorithm = "meanfield", QR = TRUE)) expect_output(print(prior_summary(fit)), "~ hs_plus(df1 = ", fixed = TRUE) }) test_that("model with laplace prior doesn't error", { SW(fit <- stan_glm(mpg ~ ., data = mtcars, prior = laplace(), seed = SEED, algorithm = "meanfield", refresh = 0)) expect_output(print(prior_summary(fit)), "~ laplace(", fixed = TRUE) }) test_that("model with lasso prior doesn't error", { SW(fit <- stan_glm(mpg ~ ., data = mtcars, prior = lasso(), seed = SEED, algorithm = "meanfield", refresh = 0)) expect_output(print(prior_summary(fit)), "~ lasso(", fixed = TRUE) }) test_that("model with product_normal prior doesn't error", { SW(fit <- stan_glm(mpg ~ ., data = mtcars, prior = product_normal(df = 3, scale = 0.5), seed = SEED, algorithm = "meanfield", refresh = 0)) expect_output(print(prior_summary(fit)), "~ product_normal(df = ", fixed = TRUE) }) test_that("prior_aux argument is detected properly", { SW(fit <- stan_glm(mpg ~ wt, data = mtcars, iter = 10, chains = 1, seed = SEED, refresh = 0, prior_aux = exponential(5), prior = normal(autoscale=FALSE), prior_intercept = normal(autoscale=FALSE))) expect_identical( fit$prior.info$prior_aux, list(dist = "exponential", location = NULL, scale = NULL, adjusted_scale = NULL, #1/5 * sd(mtcars$mpg), df = NULL, rate = 5, aux_name = "sigma") ) expect_output(print(prior_summary(fit)), "~ exponential(rate = ", fixed = TRUE) }) test_that("prior_aux can be NULL", { SW(fit <- stan_glm(mpg ~ wt, data = mtcars, iter = 10, chains = 1, seed = SEED, refresh = 0, prior_aux = NULL)) expect_output(print(prior_summary(fit)), "~ flat", fixed = TRUE) }) test_that("autoscale works (insofar as it's reported by prior_summary)", { SW(fit <- stan_glm(mpg ~ wt, data = mtcars, iter = 5, prior = normal(autoscale=FALSE), prior_intercept = normal(autoscale=FALSE), prior_aux = cauchy(autoscale=FALSE))) out <- capture.output(print(prior_summary(fit))) expect_false(any(grepl("adjusted", out))) SW(fit2 <- update(fit, prior = normal(autoscale=TRUE))) out <- capture.output(print(prior_summary(fit2))) expect_true(any(grepl("Adjusted", out))) }) test_that("prior_options is deprecated", { expect_warning( ops <- prior_options(scaled = FALSE, prior_scale_for_dispersion = 3), "deprecated and will be removed" ) expect_warning( capture.output(fit <- stan_glm(mpg ~ wt, data = mtcars, iter = 5, prior_ops = ops)), "Setting prior scale for aux to value specified in 'prior_options'" ) expect_output( print(prior_summary(fit)), "~ exponential(rate = 0.33)", fixed = TRUE ) }) test_that("empty interaction levels dropped", { x1 <- gl(3, 5, 100) x2 <- gl(4, 6, 100) x1[x2 == 1] <- 1 x1[x2 == 2] <- 1 y <- rnorm(100) expect_warning(stan_glm(y ~ x1*x2, chains = 1, iter = 20, refresh = 0), regexp = "Dropped empty interaction levels") }) test_that("posterior_predict compatible with glms", { check_for_pp_errors(fit_gaus) expect_linpred_equal(fit_gaus) mtcars2 <- mtcars mtcars2$offs <- runif(nrow(mtcars)) SW(fit2 <- stan_glm(mpg ~ wt + offset(offs), data = mtcars2, prior_intercept = NULL, prior = NULL, prior_aux = NULL, iter = ITER, chains = CHAINS, seed = SEED, refresh = 0)) expect_warning(posterior_predict(fit2, newdata = mtcars2[1:5, ]), "offset") check_for_pp_errors(fit_gaus, data = mtcars2, offset = mtcars2$offs) check_for_pp_errors(fit2, data = mtcars2, offset = mtcars2$offs) expect_linpred_equal(fit_gaus) # expect_linpred_equal(fit2) check_for_pp_errors(fit_pois) check_for_pp_errors(fit_negbin) expect_linpred_equal(fit_pois) expect_linpred_equal(fit_negbin) check_for_pp_errors(fit_gamma) check_for_pp_errors(fit_igaus) expect_linpred_equal(fit_gamma) expect_linpred_equal(fit_igaus) }) test_that("contrasts attribute isn't dropped", { contrasts <- list(wool = "contr.sum", tension = "contr.sum") SW(fit <- stan_glm(breaks ~ wool * tension, data = warpbreaks, contrasts = contrasts, chains = 1, refresh = 0)) expect_equal(fit$contrasts, contrasts) }) test_that("QR recommended if VB and at least 2 predictors", { expect_message( SW(stan_glm(mpg ~ wt + cyl, data = mtcars, algorithm = "meanfield", refresh = 0)), "Setting 'QR' to TRUE can often be helpful when using one of the variational inference algorithms" ) # no message if QR already specified expect_message( SW(stan_glm(mpg ~ wt + cyl, data = mtcars, algorithm = "meanfield", refresh = 0, QR = TRUE)), NA ) # no message if only 1 predictor expect_message( SW(stan_glm(mpg ~ wt, data = mtcars, algorithm = "meanfield", refresh = 0)), NA ) }) test_that("QR errors if only 1 predictor", { expect_error( stan_glm(mpg ~ wt, data = mtcars, QR = TRUE), "can only be specified when there are multiple predictors" ) }) test_that("returns something with collinear predictors", { N <- 100 y <- rnorm(N) z <- sample(c(0,1), N, replace=TRUE) x1 <- rnorm(N) x2 <- 2*x1 fit_1 <- stan_glm( y ~ z * (x1 + x2), data = data.frame(y, z, x1, x2), prior = normal(location = 0, scale = 0.1), prior_intercept = normal(location = 0, scale = 0.1), chains = CHAINS, iter = ITER, refresh = REFRESH ) expect_stanreg(fit_1) }) rstanarm/tests/testthat/test_stan_jm.R0000644000176200001440000006256514414044166017742 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016 Trustees of Columbia University # Copyright (C) 2017 Sam Brilleman # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. suppressPackageStartupMessages(library(rstanarm)) library(lme4) library(survival) ITER <- 1000 CHAINS <- 1 SEED <- 12345 REFRESH <- 0L set.seed(SEED) TOLSCALES <- list( lmer_fixef = 0.25, # how many SEs can stan_jm fixefs be from lmer fixefs lmer_ranef = 0.05, # how many SDs can stan_jm ranefs be from lmer ranefs glmer_fixef = 0.5, # how many SEs can stan_jm fixefs be from glmer fixefs glmer_ranef = 0.1, # how many SDs can stan_jm ranefs be from glmer ranefs event = 0.3 # how many SEs can stan_jm fixefs be from coxph fixefs ) context("stan_jm") #---- Data (for non-Gaussian families) pbcLong$ybern <- as.integer(pbcLong$logBili >= mean(pbcLong$logBili)) pbcLong$ybino <- as.integer(rpois(nrow(pbcLong), 5)) pbcLong$ypois <- as.integer(pbcLong$albumin) pbcLong$ynbin <- as.integer(rnbinom(nrow(pbcLong), 3, .3)) pbcLong$ygamm <- as.numeric(pbcLong$platelet / 10) pbcLong$xbern <- as.numeric(pbcLong$platelet / 100) pbcLong$xpois <- as.numeric(pbcLong$platelet / 100) pbcLong$xgamm <- as.numeric(pbcLong$logBili) #---- Models # univariate joint model fmLong1 <- logBili ~ year + (year | id) fmSurv1 <- Surv(futimeYears, death) ~ sex + trt o<-SW(jm1 <- stan_jm( fmLong1, pbcLong, fmSurv1, pbcSurv, time_var = "year", iter = 1, refresh = 0, chains = 1, seed = SEED)) # multivariate joint model fmLong2 <- list( logBili ~ year + (year | id), albumin ~ year + (year | id)) fmSurv2 <- Surv(futimeYears, death) ~ sex + trt o<-SW(jm2 <- stan_jm( fmLong2, pbcLong, fmSurv2, pbcSurv, time_var = "year", iter = 1, refresh = 0, chains = 1, seed = SEED)) #---- Tests for stan_jm arguments test_that("formula argument works", { SW(fit <- update(jm1, formulaLong. = list(fmLong1))) expect_identical(as.matrix(jm1), as.matrix(fit)) # fm as list # Longitudiinal model without offset expect_null(jm1$glmod$Long1$offset) # Longitudinal model with offset fmLong1_offset <- logBili ~ year + (year | id) + offset(log(ypois)) SW(jm_offset <- update(jm1, formulaLong. = fmLong1_offset)) expect_equal(jm_offset$glmod$Long1$offset, log(pbcLong$ypois)) expect_equal(jm_offset$glmod$Long1$has_offset, 1) }) test_that("error if outcome is character", { expect_error( update(jm1, formulaLong. = as.character(logBili) ~ year + (year | id)), "Outcome variable can't be type 'character'" ) }) test_that("data argument works", { SW(fit <- update(jm1, dataLong = list(pbcLong))) expect_identical(as.matrix(jm1), as.matrix(fit)) # data as list SW(fit <- update(jm2, dataLong = list(pbcLong, pbcLong))) expect_identical(as.matrix(jm2), as.matrix(fit)) }) test_that("id_var argument works", { # Models with a single grouping factor expect_output(suppressWarnings(update(jm1, id_var = "id"))) expect_output(expect_warning(update(jm1, id_var = "year"), "are not the same; 'id_var' will be ignored")) # Models with more than one grouping factor tmpdat <- pbcLong tmpdat$practice <- cut(pbcLong$id, c(0,10,20,30,40)) tmpfm <- logBili ~ year + (year | id) + (1 | practice) SW(ok_mod <- update(jm1, formulaLong. = tmpfm, dataLong = tmpdat, id_var = "id", init = 0)) expect_stanmvreg(ok_mod) expect_error(update(ok_mod, id_var = NULL), "'id_var' must be specified") expect_error(update(ok_mod, id_var = "year"), "'id_var' must be included as a grouping factor") }) test_that("family argument works", { expect_output(suppressWarnings(update(jm1, family = "gaussian"))) expect_output(suppressWarnings(update(jm1, family = gaussian))) expect_output(suppressWarnings(update(jm1, family = gaussian(link = identity)))) #expect_output(suppressWarnings(update(jm1, formulaLong. = ypois ~ ., family = poisson))) expect_output(suppressWarnings(update(jm1, formulaLong. = ynbin ~ ., family = neg_binomial_2))) #expect_output(suppressWarnings(update(jm1, formulaLong. = ygamm ~ ., family = Gamma))) #expect_output(suppressWarnings(update(jm1, formulaLong. = ygamm ~ ., family = inverse.gaussian))) expect_error(suppressWarnings(update(jm1, formulaLong. = ybino ~ ., family = binomial))) }) test_that("assoc argument works", { # NB: muslope, shared_b, and shared_coef have been temporarily # disallowed, and will be reinstated in a future release expect_error(ret <- update(jm1, assoc = "muslope"), "temporarily disallowed") expect_error(ret <- update(jm1, assoc = "shared_b"), "temporarily disallowed") expect_error(ret <- update(jm1, assoc = "shared_coef"), "temporarily disallowed") # Univariate joint models expect_output(suppressWarnings(update(jm1, assoc = NULL))) expect_output(suppressWarnings(update(jm1, assoc = "null"))) expect_output(suppressWarnings(update(jm1, assoc = "etavalue"))) expect_output(suppressWarnings(update(jm1, assoc = "muvalue"))) expect_output(suppressWarnings(update(jm1, assoc = "etaslope"))) #expect_output(suppressWarnings(update(jm1, assoc = "muslope"))) expect_output(suppressWarnings(update(jm1, assoc = "etaauc"))) expect_output(suppressWarnings(update(jm1, assoc = "muauc"))) expect_output(suppressWarnings(update(jm1, assoc = c("etavalue", "etaslope")))) #expect_output(suppressWarnings(update(jm1, assoc = c("etavalue", "muslope")))) expect_output(suppressWarnings(update(jm1, assoc = c("etavalue", "etaauc")))) expect_output(suppressWarnings(update(jm1, assoc = c("etavalue", "muauc")))) expect_output(suppressWarnings(update(jm1, assoc = c("muvalue", "etaslope")))) #expect_output(suppressWarnings(update(jm1, assoc = c("muvalue", "muslope")))) expect_output(suppressWarnings(update(jm1, assoc = c("muvalue", "etaauc")))) expect_output(suppressWarnings(update(jm1, assoc = c("muvalue", "muauc")))) expect_error(update(jm1, assoc = c("etavalue", "muvalue")), "cannot be specified together") #expect_error(update(jm1, assoc = c("etaslope", "muslope")), "cannot be specified together") expect_error(update(jm1, assoc = c("etaauc", "muauc")), "cannot be specified together") #expect_output(suppressWarnings(update(jm1, assoc = "shared_b"))) #expect_output(suppressWarnings(update(jm1, assoc = "shared_b(1)"))) #expect_output(suppressWarnings(update(jm1, assoc = "shared_b(2)"))) #expect_output(suppressWarnings(update(jm1, assoc = "shared_b(1:2)"))) #expect_output(suppressWarnings(update(jm1, assoc = "shared_b(1,2)"))) #expect_output(suppressWarnings(update(jm1, assoc = "shared_coef"))) #expect_output(suppressWarnings(update(jm1, assoc = "shared_coef(1)"))) #expect_output(suppressWarnings(update(jm1, assoc = "shared_coef(2)"))) #expect_output(suppressWarnings(update(jm1, assoc = "shared_coef(1:2)"))) #expect_output(suppressWarnings(update(jm1, assoc = "shared_coef(1,2)"))) #expect_error(ret <- update(jm1, assoc = "shared_b(10)"), "greater than the number of") #expect_error(ret <- update(jm1, assoc = "shared_coef(10)"), "greater than the number of") #expect_error(ret <- update(jm1, assoc = c("shared_b(1)", "shared_coef(1)")), "should not be specified in both") #expect_error(ret <- update(jm1, assoc = c("shared_b", "shared_coef")), "should not be specified in both") expect_output(suppressWarnings(update(jm1, assoc = list(NULL)))) expect_output(suppressWarnings(update(jm1, assoc = list("null")))) expect_output(suppressWarnings(update(jm1, assoc = list("etavalue")))) expect_output(suppressWarnings(update(jm1, assoc = list("muvalue")))) expect_output(suppressWarnings(update(jm1, assoc = list("etaslope")))) #expect_output(suppressWarnings(update(jm1, assoc = list("muslope")))) expect_output(suppressWarnings(update(jm1, assoc = list("etaauc")))) expect_output(suppressWarnings(update(jm1, assoc = list("muauc")))) expect_output(suppressWarnings(update(jm1, assoc = list(c("etavalue", "etaslope"))))) #expect_output(suppressWarnings(update(jm1, assoc = list(c("etavalue", "muslope"))))) expect_output(suppressWarnings(update(jm1, assoc = list(c("muvalue", "etaslope"))))) #expect_output(suppressWarnings(update(jm1, assoc = list(c("muvalue", "muslope"))))) expect_error(ret <- update(jm1, assoc = NA), "'assoc' should be") expect_error(ret <- update(jm1, assoc = 123), "'assoc' should be") expect_error(ret <- update(jm1, assoc = c(1,2,3)), "'assoc' should be") expect_error(ret <- update(jm1, assoc = c("wrong")), "unsupported association type") expect_error(ret <- update(jm1, assoc = list("wrong")), "unsupported association type") expect_error(ret <- update(jm1, assoc = list(NULL, NULL)), "incorrect length") expect_error(ret <- update(jm1, assoc = list("etavalue", "etavalue")), "incorrect length") expect_error(ret <- update(jm1, assoc = list(c("etavalue", "etaslope"), "etavalue")), "incorrect length") # Multivariate joint models expect_output(suppressWarnings(update(jm2, assoc = "etavalue"))) expect_output(suppressWarnings(update(jm2, assoc = "muvalue"))) expect_output(suppressWarnings(update(jm2, assoc = "etaslope"))) #expect_output(suppressWarnings(update(jm2, assoc = "muslope"))) expect_output(suppressWarnings(update(jm2, assoc = "etaauc"))) expect_output(suppressWarnings(update(jm2, assoc = "muauc"))) expect_output(suppressWarnings(update(jm2, assoc = c("etavalue", "etaslope")))) expect_output(suppressWarnings(update(jm2, assoc = c("etavalue", "etaauc")))) expect_output(suppressWarnings(update(jm2, assoc = c("etaslope", "etaauc")))) expect_output(suppressWarnings(update(jm2, assoc = list("etavalue")))) expect_output(suppressWarnings(update(jm2, assoc = list("etavalue", "etavalue")))) expect_output(suppressWarnings(update(jm2, assoc = list(c("etavalue", "etaslope"), "etavalue")))) expect_output(suppressWarnings(update(jm2, assoc = list("etavalue", c("etavalue", "etaslope"))))) expect_output(suppressWarnings(update(jm2, assoc = list(c("etavalue", "etaslope"), c("muvalue", "muauc"))))) expect_error(ret <- update(jm2, assoc = list("wrong", "etavalue")), "unsupported association type") expect_error(ret <- update(jm2, assoc = list("null", "etavalue", "etaslope")), "incorrect length") expect_error(ret <- update(jm2, assoc = data.frame("etavalue", "etaslope")), "'assoc' should be") }) test_that("basehaz argument works", { expect_output(suppressWarnings(update(jm1, basehaz = "weibull"))) expect_output(suppressWarnings(update(jm1, basehaz = "bs"))) expect_output(suppressWarnings(update(jm1, basehaz = "piecewise"))) expect_output(suppressWarnings(update(jm1, basehaz = "bs", basehaz_ops = list(df = 5)))) expect_output(suppressWarnings(update(jm1, basehaz = "bs", basehaz_ops = list(knots = c(1,3,5))))) expect_output(suppressWarnings(update(jm1, basehaz = "piecewise", basehaz_ops = list(df = 5)))) expect_output(suppressWarnings(update(jm1, basehaz = "piecewise", basehaz_ops = list(knots = c(1,3,5))))) expect_output(expect_warning(update(jm1, basehaz = "weibull", basehaz_ops = list(df = 1)), "'df' will be ignored")) expect_output(expect_warning(update(jm1, basehaz = "weibull", basehaz_ops = list(knots = 1)), "'knots' will be ignored")) expect_output(suppressWarnings(update(jm1, basehaz = "piecewise", basehaz_ops = list(knots = c(1,3,5))))) expect_error(update(jm1, basehaz = "bs", basehaz_ops = list(df = 1)), "must be at least 3") expect_error(update(jm1, basehaz = "bs", basehaz_ops = list(knots = -1)), "'knots' must be non-negative") expect_error(update(jm1, basehaz = "piecewise", basehaz_ops = list(knots = -1)), "'knots' must be non-negative") expect_error(update(jm1, basehaz = "piecewise", basehaz_ops = list(knots = c(1,2,50))), "cannot be greater than the largest event time") }) test_that("qnodes argument works", { expect_output(suppressWarnings(update(jm1, qnodes = 7))) expect_output(suppressWarnings(update(jm1, qnodes = 11))) expect_output(suppressWarnings(update(jm1, qnodes = 15))) expect_error(update(jm1, qnodes = 1), "'qnodes' must be either 7, 11 or 15") expect_error(update(jm1, qnodes = c(1,2)), "should be a numeric vector of length 1") expect_error(update(jm1, qnodes = "wrong"), "should be a numeric vector of length 1") }) test_that("weights argument works", { idvec0 <- pbcSurv[["id"]] idvec1 <- head(idvec0) # missing IDs idvec2 <- rep(idvec0, each = 2) # repeated IDs idvec3 <- c(idvec0, 9998, 9999) # extra IDs not in model wts0 <- data.frame(id = idvec0, weights = rep_len(c(1,2), length(idvec0))) wts1 <- data.frame(id = idvec1, weights = rep_len(c(1,2), length(idvec1))) wts2 <- data.frame(id = idvec2, weights = rep_len(c(1,2), length(idvec2))) wts3 <- data.frame(id = idvec0, weights = rep_len(c(1,2), length(idvec0)), junkcol = idvec0) wts4 <- data.frame(id = idvec0, weights = rep_len(c("word"), length(idvec0))) wts5 <- data.frame(id = idvec0, weights = rep_len(c(NA), length(idvec0))) wts6 <- data.frame(id = idvec0, weights = rep_len(c(-1, 1), length(idvec0))) wts7 <- data.frame(id = idvec3, weights = rep_len(c(1,2), length(idvec3))) expect_error(update(jm1, weights = wts0, iter = 5), "not yet implemented") #expect_output(update(jm1, weights = wts0, iter = 5)) #expect_output(update(jm1, weights = wts7, iter = 5)) # ok to supply extra IDs in weights #expect_error(update(jm1, weights = as.matrix(wts0)), "should be a data frame") #expect_error(update(jm1, weights = wts1), "do not have weights supplied") #expect_error(update(jm1, weights = wts2), "should only have one row") #expect_error(update(jm1, weights = wts3), "should be a data frame with two columns") #expect_error(update(jm1, weights = wts4), "weights supplied must be numeric") #expect_error(update(jm1, weights = wts5), "weights supplied must be numeric") #expect_error(update(jm1, weights = wts6), "Negative weights are not allowed") }) test_that("scale_assoc argument works", { # Univariate joint model expect_output(suppressWarnings(update(jm1, scale_assoc = NULL))) expect_output(suppressWarnings(update(jm1, scale_assoc = 10))) expect_error(suppressWarnings(update(jm1, scale_assoc = 0), "'scale_assoc' must be non-zero.")) expect_error(suppressWarnings(update(jm1, scale_assoc = c(10,10)), "'scale_assoc' can only be specified once for each longitudinal submodel.")) expect_error(suppressWarnings(update(jm1, scale_assoc = "10"), "'scale_assoc' must be numeric.")) # Multivariate joint model expect_error(update(jm2, scale_assoc = 10), "'scale_assoc' must be specified for each longitudinal submodel") expect_output(suppressWarnings(update(jm2, scale_assoc = c(0.5, 10)))) # Test scaling functionality scale_assoc <- 0.5 SW(jm1_scaled <- update(jm1, scale_assoc = scale_assoc)) expect_equal(coef(jm1)$Event, c(rep(1,3),scale_assoc) * coef(jm1_scaled)$Event) }) test_that("init argument works", { expect_output(suppressWarnings(update(jm1, init = "prefit"))) expect_output(suppressWarnings(update(jm1, init = "0"))) expect_output(suppressWarnings(update(jm1, init = 0))) expect_output(suppressWarnings(update(jm1, init = "random"))) }) test_that("prior_PD argument works", { expect_output(suppressWarnings(update(jm1, prior_PD = TRUE))) }) test_that("adapt_delta argument works", { expect_output(suppressWarnings(update(jm1, adapt_delta = NULL))) expect_output(suppressWarnings(update(jm1, adapt_delta = 0.8))) expect_output(suppressWarnings(update(jm1, control = list(adapt_delta = NULL)))) expect_output(suppressWarnings(update(jm1, control = list(adapt_delta = 0.8)))) }) test_that("max_treedepth argument works", { expect_output(suppressWarnings(update(jm1, max_treedepth = NULL))) expect_output(suppressWarnings(update(jm1, max_treedepth = 5))) expect_output(suppressWarnings(update(jm1, control = list(max_treedepth = NULL)))) expect_output(suppressWarnings(update(jm1, control = list(max_treedepth = 5)))) }) test_that("error message occurs for arguments not implemented", { expect_error(update(jm1, QR = TRUE), "not yet implemented") expect_error(update(jm1, sparse = TRUE), "not yet implemented") }) #---- Compare parameter estimates: stan_jm(assoc = NULL) vs stan_glmer/coxph compare_glmer <- function(fmLong, fam = gaussian, ...) { require(survival) fmSurv <- Surv(futimeYears, death) ~ sex + trt y1 <- stan_glmer(fmLong, pbcLong, fam, iter = 1000, chains = CHAINS, seed = SEED) s1 <- coxph(fmSurv, data = pbcSurv) j1 <- stan_jm(fmLong, pbcLong, fmSurv, pbcSurv, time_var = "year", family = fam, assoc = NULL, iter = 1000, chains = CHAINS, seed = SEED, ...) tols <- get_tols(y1, s1, tolscales = TOLSCALES) pars <- recover_pars(y1, s1) parsjm <- recover_pars(j1) for (i in names(tols$fixef)) expect_equal(pars$fixef[[i]], parsjm$fixef[[i]], tol = tols$fixef[[i]], info = fam) for (i in names(tols$ranef)) expect_equal(pars$ranef[[i]], parsjm$ranef[[i]], tol = tols$ranef[[i]], info = fam) for (i in names(tols$event)) expect_equal(pars$event[[i]], parsjm$event[[i]], tol = tols$event[[i]], info = fam) } # test_that("coefs same for stan_jm and stan_lmer/coxph", { # compare_glmer(logBili ~ year + (1 | id), gaussian)}) # test_that("coefs same for stan_jm and stan_glmer, bernoulli", { # compare_glmer(ybern ~ year + xbern + (1 | id), binomial)}) # test_that("coefs same for stan_jm and stan_glmer, poisson", { # compare_glmer(ypois ~ year + xpois + (1 | id), poisson, init = 0)}) # test_that("coefs same for stan_jm and stan_glmer, negative binomial", { # compare_glmer(ynbin ~ year + xpois + (1 | id), neg_binomial_2)}) # test_that("coefs same for stan_jm and stan_glmer, Gamma", { # compare_glmer(ygamm ~ year + xgamm + (1 | id), Gamma(log))}) #test_that("coefs same for stan_jm and stan_glmer, inverse gaussian", { # compare_glmer(ygamm ~ year + xgamm + (1 | id), inverse.gaussian)}) #-------- Check (post-)estimation functions work with various model specifications # No functions in formula o<-SW(f1 <- stan_jm(formulaLong = logBili ~ year + (year | id), dataLong = pbcLong, formulaEvent = Surv(futimeYears, death) ~ sex + trt, dataEvent = pbcSurv, time_var = "year", refresh = 0, # this next line is only to keep the example small in size! chains = 1, cores = 1, seed = 12345, iter = 5)) # Functions on LHS of formula o<-SW(f2 <- update(f1, formulaLong. = exp(logBili) ~ year + (year | id))) # Functions on RHS of formula # o<-SW(f3 <- update(f1, formulaLong. = logBili ~ poly(year, degree = 2) + (poly(year, degree = 2) | id))) # Functions on LHS and RHS of formula # o<-SW(f4 <- update(f1, formulaLong. = exp(logBili) ~ poly(year, degree = 2) + (poly(year, degree = 2) | id))) # Intercept only event submodel o<-SW(f5 <- update(f1, formulaEvent. = Surv(futimeYears, death) ~ 1)) # Different baseline hazards o<-SW(f7 <- update(f1, basehaz = "weibull")) o<-SW(f8 <- update(f1, basehaz = "bs")) #o<-SW(f9 <- update(f1, basehaz = "piecewise")) # posterior_survfit not yet implemented for piecewise # Different association structures o<-SW(f10 <- update(f1, assoc = NULL)) o<-SW(f11 <- update(f1, assoc = "etavalue")) o<-SW(f12 <- update(f1, assoc = "etaslope")) o<-SW(f13 <- update(f1, assoc = "etaauc")) o<-SW(f14 <- update(f1, assoc = "muvalue")) #o<-SW(f15 <- update(f1, assoc = "muslope")) o<-SW(f16 <- update(f1, assoc = "muauc")) o<-SW(f17 <- update(f1, assoc = c("etavalue", "etaslope"))) o<-SW(f18 <- update(f1, assoc = c("etavalue", "etaauc"))) # Different association structures with intercept only submodel o<-SW(f19 <- update(f5, assoc = NULL)) o<-SW(f20 <- update(f5, assoc = "etavalue")) o<-SW(f21 <- update(f5, assoc = "etaslope")) o<-SW(f22 <- update(f5, assoc = "etaauc")) o<-SW(f23 <- update(f5, assoc = "muvalue")) #o<-SW(f24 <- update(f5, assoc = "muslope")) o<-SW(f25 <- update(f5, assoc = "muauc")) o<-SW(f26 <- update(f5, assoc = c("etavalue", "etaslope"))) o<-SW(f27 <- update(f5, assoc = c("etavalue", "etaauc"))) # Shared random effect association structures #o<-SW(f28 <- update(f1, assoc = c("shared_b"))) #o<-SW(f29 <- update(f1, assoc = c("shared_coef"))) # Multivariate models o<-SW(f31 <- stan_jm(formulaLong = list(logBili ~ year + (year | id), albumin ~ sex + trt + year + (1 | id)), dataLong = pbcLong, formulaEvent = Surv(futimeYears, death) ~ sex + trt, dataEvent = pbcSurv, time_var = "year", refresh = 0, # this next line is only to keep the example small in size! chains = 1, cores = 1, seed = 12345, iter = 5)) o<-SW(f32 <- update(f31, assoc = list("etaslope", c("etavalue", "etaauc")))) # New data for predictions ndL1 <- pbcLong[pbcLong$id == 2,] ndE1 <- pbcSurv[pbcSurv$id == 2,] ndL2 <- pbcLong[pbcLong$id %in% c(1,2),] ndE2 <- pbcSurv[pbcSurv$id %in% c(1,2),] # Test the models for (j in c(1:30)) { mod <- try(get(paste0("f", j)), silent = TRUE) if (class(mod)[1L] == "try-error") { cat("Model not found:", paste0("f", j), "\n") } else { cat("Checking model:", paste0("f", j), "\n") test_that("log_lik works with estimation data", { ll <- log_lik(mod) expect_matrix(ll) expect_error(log_lik(mod, m = 1), "should not be specified") }) test_that("log_lik works with new data (one individual)", { ll <- log_lik(mod, newdataLong = ndL1, newdataEvent = ndE1) expect_matrix(ll) }) test_that("log_lik works with new data (multiple individuals)", { ll <- log_lik(mod, newdataLong = ndL2, newdataEvent = ndE2) expect_matrix(ll) }) test_that("loo and waic work", { expect_equivalent_loo(mod) }) test_that("posterior_predict works with estimation data", { pp <- posterior_predict(mod, seed = SEED) expect_ppd(pp) expect_identical(pp, posterior_predict(mod, m = 1, seed = SEED)) if (mod$n_markers > 1L) { pp <- posterior_predict(mod, m = 2) expect_ppd(pp) } }) test_that("posterior_predict works with new data (one individual)", { pp <- posterior_predict(mod, newdata = ndL1, seed = SEED) expect_ppd(pp) expect_identical(pp, posterior_predict(mod, m = 1, newdata = ndL1, seed = SEED)) if (mod$n_markers > 1L) { pp <- posterior_predict(mod, m = 2, newdata = ndL1) expect_ppd(pp) } expect_error(posterior_predict(mod, newdataLong = ndL1), "should not be specified") expect_error(posterior_predict(mod, newdataEvent = ndE1), "should not be specified") }) test_that("posterior_predict works with new data (multiple individuals)", { pp <- posterior_predict(mod, newdata = ndL2, seed = SEED) expect_ppd(pp) expect_identical(pp, posterior_predict(mod, m = 1, newdata = ndL2, seed = SEED)) if (mod$n_markers > 1L) { pp <- posterior_predict(mod, m = 2, newdata = ndL2) expect_ppd(pp) } }) test_that("posterior_traj works with estimation data", { pp <- posterior_traj(mod) expect_s3_class(pp, "predict.stanjm") if (mod$n_markers > 1L) { pp <- posterior_traj(mod, m = 2) expect_s3_class(pp, "predict.stanjm") } }) test_that("posterior_traj works with new data with and without offset", { pbcLong2 <- pbcLong nd <- pbcLong2[1:2, c("id", "year")] expect_s3_class(posterior_traj(mod, newdataLong = nd, dynamic = FALSE), "predict.stanjm") pbcLong2$off <- 1 o <- SW(mod_off <- update(mod, dataLong = pbcLong2, formulaLong. = logBili ~ offset(off) + year + (year | id))) nd <- pbcLong2[1:2, c("id", "year", "off")] expect_s3_class(posterior_traj(mod_off, newdataLong = nd, dynamic = FALSE), "predict.stanjm") }) test_that("posterior_traj works with new data (one individual)", { pp <- posterior_traj(mod, newdataLong = ndL1, dynamic = FALSE) expect_s3_class(pp, "predict.stanjm") if (mod$n_markers > 1L) { pp <- posterior_traj(mod, m = 2, newdataLong = ndL1, dynamic = FALSE) expect_s3_class(pp, "predict.stanjm") } }) test_that("posterior_traj works with new data (multiple individuals)", { pp <- posterior_traj(mod, newdataLong = ndL2, dynamic = FALSE) expect_s3_class(pp, "predict.stanjm") if (mod$n_markers > 1L) { pp <- posterior_traj(mod, m = 2, newdataLong = ndL2, dynamic = FALSE) expect_s3_class(pp, "predict.stanjm") } }) test_that("posterior_survfit works with estimation data", { SW(ps <- posterior_survfit(mod)) expect_survfit(ps) }) test_that("posterior_survfit works with new data (one individual)", { SW(ps <- posterior_survfit(mod, newdataLong = ndL1, newdataEvent = ndE1)) expect_survfit(ps) }) test_that("posterior_survfit works with new data (multiple individuals)", { SW(ps <- posterior_survfit(mod, newdataLong = ndL2, newdataEvent = ndE2)) expect_survfit(ps) }) } } rstanarm/tests/testthat/test_pp_check.R0000644000176200001440000001402314414044166020045 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. context("pp_check") suppressPackageStartupMessages(library(rstanarm)) SEED <- 123 set.seed(SEED) ITER <- 10 CHAINS <- 2 REFRESH <- 0 if (!exists("example_model")) { example_model <- run_example_model() } fit <- example_model SW(fit2 <- stan_glm(mpg ~ wt + am, data = mtcars, iter = ITER, chains = CHAINS, seed = SEED, refresh = 0)) patt <- "rootogram|_bars|vs_x|grouped$|_data$" ppc_funs_not_grouped <- bayesplot::available_ppc(patt, invert = TRUE) ppc_funs_grouped <- bayesplot::available_ppc("vs_x|grouped") ppc_funs_discrete <- bayesplot::available_ppc("rootogram|_bars") test_that("pp_check.stanreg creates ggplot object", { exclude <- c("ppc_bars", "ppc_loo_pit", "ppc_loo_pit_overlay", "ppc_loo_pit_qq", "ppc_loo_intervals", "ppc_loo_ribbon", "ppc_rootogram", "ppc_error_binned", "ppc_km_overlay", "ppc_pit_ecdf") for (f in ppc_funs_not_grouped) for (j in 1:2) { if (!f %in% exclude) { expect_gg(suppressWarnings(pp_check(fit, plotfun = f, nreps = j)), info = f) } } }) test_that("pp_check.stanreg creates ggplot object for grouped functions", { exclude <- c("ppc_km_overlay_grouped", "ppc_pit_ecdf_grouped") for (f in setdiff(ppc_funs_grouped, ppc_funs_discrete)) for (j in 1:2) { if (!(f %in% exclude)) { expect_gg(suppressWarnings(pp_check(fit2, plotfun = f, nreps = j, group = "am", x = "wt")), info = f) } } }) test_that("pp_check.stanreg creates ggplot object for count & ordinal outcomes", { d <- data.frame( counts = c(18,17,15,20,10,20,25,13,12), outcome = gl(3,1,9), treatment = gl(3,3) ) SW(fit3 <- stan_glm(counts ~ outcome + treatment, data = d, family = poisson(link="log"), iter = ITER, chains = CHAINS, seed = SEED, refresh = 0)) expect_gg(pp_check(fit3, plotfun = "rootogram")) SW(fit4 <- stan_polr(tobgp ~ agegp, data = esoph, method = "probit", prior = R2(0.2, "mean"), init_r = 0.1, iter = ITER, chains = CHAINS, seed = SEED, refresh = 0)) expect_gg(pp_check(fit4, plotfun = "bars")) expect_gg(pp_check(fit4, plotfun = "bars_grouped", group = "agegp")) }) test_that("pp_check ok for vb", { SW(fit3 <- stan_glm(mpg ~ wt, data = mtcars, algorithm = "meanfield", seed = SEED, iter = 10000, refresh = 0)) expect_gg(pp_check(fit3)) expect_gg(pp_check(fit3, plotfun = "error_hist")) }) # test_that("pp_check binned residual plot works for factors", { # ir2 <- iris[-c(1:50), ] # ir2$Species <- factor(ir2$Species) # SW(fit3 <- stan_glm(Species ~ Petal.Length + Petal.Width + Sepal.Length + Sepal.Width, # data=ir2, family = "binomial", iter = ITER, chains = CHAINS, # seed = SEED, refresh = 0)) # expect_gg(pp_check(fit3, plotfun = "error_binned")) # }) # test errors -------------------------------------------------------------- test_that("pp_check throws error if 'stat' arg is bad", { expect_error(pp_check(fit, plotfun = "stat", stat = "10982pqmeaw"), regexp = "not found") }) test_that("pp_check throws error if plotfun not found", { expect_error(pp_check(fit, plotfun = "9999"), "not a valid PPC function name") expect_error(pp_check(fit, plotfun = "mcmc_hist"), "use the 'plot' method") }) test_that("pp_check throws error if 'group' variable not found", { expect_error(pp_check(fit, plotfun = "stat_grouped", group = "herd2"), "not found in model frame") }) test_that("pp_check throws error for optimizing", { SW(fito <- stan_glm(mpg ~ wt, data = mtcars, algorithm = "optimizing", seed = SEED, refresh = 0)) expect_error(pp_check(fito), regexp = "algorithm") }) # test warnings ---------------------------------------------------------- test_that("pp_check throws warning if 'nreps' ignored ", { expect_warning(pp_check(fit, plotfun = "stat", nreps = 1), regexp = "'nreps' is ignored") }) test_that("pp_check throws warning if 'group' or 'x' ignored", { expect_warning(pp_check(fit, plotfun = "stat_2d", stat = c("mean", "sd"), group = "herd"), regexp = "ignored: group") expect_warning(pp_check(fit, plotfun = "scatter", nreps = 3, group = "herd"), regexp = "ignored: group") expect_warning(pp_check(fit, plotfun = "error_hist", x = "herd"), regexp = "ignored: x") }) # helpers ----------------------------------------------------------------- test_that(".ignore_nreps and .set_nreps work", { ignore_nreps <- rstanarm:::.ignore_nreps set_nreps <- rstanarm:::.set_nreps expect_warning(ignore_nreps(10), "'nreps' is ignored") expect_silent(ignore_nreps(NULL)) expect_warning(r <- set_nreps(10, "ppc_stat"), "'nreps' is ignored") expect_null(r) expect_equal(set_nreps(10, "ppc_hist"), 10) }) test_that("y coerced to numeric (attributes dropped)", { d <- mtcars attr(d$mpg, "test") <- "something" SW(fit3 <- update(fit2, data = d)) expect_equal(attr(get_y(fit3), "test"), "something") expect_gg(pp_check(fit3, nreps = 3)) }) rstanarm/tests/testthat/test_stan_lm.R0000644000176200001440000001347314551535205017737 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. context("stan_lm|aov|biglm") suppressPackageStartupMessages(library(rstanarm)) SEED <- 12345 CHAINS <- 2 ITER <- 400 threshold <- 0.21 REFRESH <- 0 SW(fit <- stan_lm(mpg ~ ., data = mtcars, prior = R2(location = 0.75), chains = CHAINS, iter = ITER, seed = SEED, refresh = 0)) test_that("stan_aov returns expected result for npk example", { contrasts_list <- list( block = "contr.poly", N = "contr.poly", P = "contr.poly", K = "contr.poly" ) SW(fit <- stan_aov(yield ~ block + N*P*K, data = npk, contrasts = contrasts_list, prior = R2(0.5), chains = CHAINS, iter = ITER, seed = SEED, refresh = 0)) expect_stanreg(fit) fit_sigma <- fit$stan_summary["sigma", "mean"] lm_sigma <- summary(lm(yield ~ block + N*P*K, data = npk, contrasts = contrasts_list))$sigma expect_equal(fit_sigma, lm_sigma, tol = threshold) expect_output(print(fit), regexp = "stan_aov") expect_output(print(fit), regexp = "ANOVA-like table") }) test_that("stan_biglm.fit returns stanfit (not stanreg) object ", { ols <- lm(mpg ~ wt + qsec + am - 1, data = as.data.frame(scale(mtcars, scale = FALSE))) b <- coef(ols) R <- qr.R(ols$qr) SSR <- crossprod(ols$residuals)[1] N <- length(ols$fitted.values) xbar <- colMeans(mtcars[,c("wt", "qsec", "am")]) y <- mtcars$mpg ybar <- mean(y) s_y <- sd(y) SW(post <- stan_biglm.fit(b, R, SSR, N, xbar, ybar, s_y, prior = R2(.75), chains = 1, iter = 10, seed = SEED, refresh = 0)) expect_s4_class(post, "stanfit") }) test_that("stan_biglm returns expected result", { biglm <- biglm::biglm(mpg ~ wt + qsec + am, data = mtcars) xbar <- colMeans(mtcars[,c("wt", "qsec", "am")]) y <- mtcars$mpg ybar <- mean(y) s_y <- sd(y) SW(post <- stan_biglm(biglm, xbar, ybar, s_y, prior = R2(0.5), chains = CHAINS, iter = ITER, seed = SEED, refresh = 0)) expect_equal(coef(lm(mpg ~ wt + qsec + am, data = mtcars)), rstan::summary(post)$summary[1:4, "mean"], tol = threshold) }) test_that("stan_lm returns expected result for mtcars example", { # example using mtcars dataset expect_stanreg(fit) fit_sigma <- fit$stan_summary["sigma", "mean"] lm_sigma <- summary(lm(mpg ~ ., data = mtcars))$sigma expect_equal(fit_sigma, lm_sigma, tol = threshold) }) test_that("stan_lm returns expected result for trees example", { # example using trees dataset SW(fit <- stan_lm(log(Volume) ~ log(Girth) + log(Height), data = trees, prior = R2(location = 0.9, what = "mean"), refresh = 0, chains = CHAINS, iter = ITER, seed = SEED, adapt_delta = 0.999)) expect_stanreg(fit) fit_sigma <- fit$stan_summary["sigma", "mean"] lm_sigma <- summary(lm(log(Volume) ~ log(Girth) + log(Height),data = trees))$sigma expect_equal(fit_sigma, lm_sigma, tol = threshold) }) test_that("stan_lm doesn't break with less common priors", { # prior = NULL SW(fit <- stan_lm(mpg ~ -1 + ., data = mtcars, prior = NULL, iter = 10, chains = 1, seed = SEED, refresh = 0)) expect_stanreg(fit) # prior_intercept = normal() SW(fit <- stan_lm(mpg ~ ., data = mtcars, refresh = 0, prior = R2(0.75), prior_intercept = normal(), iter = 10, chains = 1, seed = SEED)) expect_stanreg(fit) }) test_that("stan_lm doesn't break with vb algorithms", { SW(fit <- stan_lm(mpg ~ ., data = mtcars, prior = R2(location = 0.75), refresh = 0, algorithm = "meanfield", seed = SEED)) expect_stanreg(fit) SW(fit2 <- update(fit, algorithm = "fullrank")) expect_stanreg(fit2) }) test_that("stan_lm works with 1 predictor", { SW(fit <- stan_lm(mpg ~ wt, data = mtcars, prior = R2(0.5, "mean"), refresh = 0, seed = SEED)) expect_stanreg(fit) }) test_that("stan_lm throws error if only intercept", { expect_error(stan_lm(mpg ~ 1, data = mtcars, prior = R2(location = 0.75)), regexp = "not suitable for estimating a mean") }) test_that("stan_lm throws error if 'location' is a vector", { expect_error(stan_lm(mpg ~ ., data = mtcars, prior = R2(location = c(0.25, 0.5))), regexp = "only accepts a single value for 'location'") }) test_that("stan_lm throws error if N < K", { # NOTE: remove this test once N < K is enabled expect_error(stan_lm(mpg ~ ., data = mtcars[1:5, ], prior = R2(0.75)), regexp = "more predictors than data points is not yet enabled") }) test_that("stan_lm throws error if glmer syntax used", { expect_error(stan_lm(mpg ~ wt + (1|cyl), data = mtcars, prior = R2(0.5, "mean")), regexp = "model formula not allowed") }) test_that("loo/waic for stan_lm works", { ll_fun <- rstanarm:::ll_fun expect_equivalent_loo(fit) expect_identical(ll_fun(fit), rstanarm:::.ll_gaussian_i) }) test_that("posterior_predict compatible with stan_lm", { skip_on_os("mac") check_for_pp_errors(fit) expect_linpred_equal(fit) }) rstanarm/tests/testthat/test_misc.R0000644000176200001440000004626214370470372017241 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. suppressPackageStartupMessages(library(rstanarm)) SEED <- 12345 set.seed(SEED) ITER <- 10L CHAINS <- 2L REFRESH <- 0 if (!exists("example_model")) { example_model <- run_example_model() } context("helper functions") test_that("nlist works", { nlist <- rstanarm:::nlist a <- 1 b <- 2 c <- 3 val <- list(nlist(a, b, c), nlist(a, b, c = "tornado"), nlist(a = -1, b = -2, c)) ans <- list(list(a = a, b = b, c = c), list(a = a, b = b, c = "tornado"), list(a = -1, b = -2, c = c)) expect_identical(val, ans) }) test_that("family checking works", { fams <- rstanarm:::nlist("binomial", "gaussian", "poisson", gamma = "Gamma", ig = "inverse.gaussian", nb = "neg_binomial_2") for (j in seq_along(fams)) { is.f <- getFromNamespace(paste0("is.", names(fams)[j]), "rstanarm") f <- get(fams[[j]])()$family expect_true(is.f(f)) expect_false(is.f("not a family")) } }) test_that("%ORifNULL% works", { `%ORifNULL%` <- rstanarm:::`%ORifNULL%` a <- list(NULL, NA, NaN, 1, "a", FALSE) b <- 1 ans <- c(b, a[-1]) for (j in seq_along(a)) { expect_identical(a[[j]] %ORifNULL% b, ans[[j]]) } }) test_that("%ORifINF% works", { `%ORifINF%` <- rstanarm:::`%ORifINF%` a <- list(Inf, -Inf, 1, "a", FALSE) b <- 0 ans <- c(b, a[-1]) for (j in seq_along(a)) { expect_identical(a[[j]] %ORifINF% b, ans[[j]]) } }) test_that("maybe_broadcast works", { maybe_broadcast <- rstanarm:::maybe_broadcast n <- 5 x <- list(numeric(0), NULL, 1, c(1,1)) ans <- list(rep(0,n), rep(0,n), rep(1,n), c(1,1)) for (j in seq_along(ans)) { expect_equal(maybe_broadcast(x[[j]], n), ans[[j]]) } }) test_that("set_prior_scale works", { set_prior_scale <- rstanarm:::set_prior_scale expect_error(set_prior_scale("a", "b", "c")) expect_error(set_prior_scale(1, 1, 1)) expect_equal(set_prior_scale(NULL, 1, "a"), 1) expect_equal(set_prior_scale(NULL, 1, "probit"), dnorm(0) / dlogis(0)) expect_equal(set_prior_scale(2, 1, "a"), 2) expect_equal(set_prior_scale(2, 1, "probit"), 2 * dnorm(0) / dlogis(0)) }) test_that("validate_parameter_value works", { validate_parameter_value <- rstanarm:::validate_parameter_value expect_error(validate_parameter_value(-1), "should be positive") expect_error(validate_parameter_value(0), "should be positive") expect_error(validate_parameter_value("a"), "should be NULL or numeric") expect_error(validate_parameter_value(NA), "should be NULL or numeric") expect_true(validate_parameter_value(NULL)) expect_true(validate_parameter_value(.01)) expect_true(validate_parameter_value(.Machine$double.xmax)) }) test_that("validate_R2_location works", { validate_R2_location <- rstanarm:::validate_R2_location expect_error( validate_R2_location(-1, what = "mode"), "location must be in (0,1]", fixed = TRUE ) expect_error( validate_R2_location(.5, what = "log"), "location must be negative", fixed = TRUE ) expect_error( validate_R2_location(0, what = "mean"), "location must be in (0,1)", fixed = TRUE ) expect_error( validate_R2_location(c(0.5, 0.25), what = "mode"), "only accepts a single value for 'location'", fixed = TRUE ) }) test_that("validate_weights works", { validate_weights <- rstanarm:::validate_weights ff <- function(weights) validate_weights(weights) expect_equal(ff(), double(0)) expect_equal(ff(x <- rexp(10)), x) expect_equal(validate_weights(NULL), double(0)) expect_equal(validate_weights(1:10), 1:10) expect_error(validate_weights(LETTERS), regexp = "numeric") expect_error(validate_weights(c(-1,2,3)), regexp = "negative", ignore.case = TRUE) expect_error(stan_glm(mpg ~ wt, data = mtcars, weights = rep(-1, nrow(mtcars))), regexp = "negative", ignore.case = TRUE) capture.output(fit <- stan_glm(mpg ~ wt, data = mtcars, algorithm = "optimizing", seed = SEED, weights = rexp(nrow(mtcars)), refresh = 0)) expect_stanreg(fit) }) test_that("validate_offset works", { validate_offset <- rstanarm:::validate_offset expect_equal(validate_offset(NULL), double(0)) expect_equal(validate_offset(rep(1, 10), rnorm(10)), rep(1, 10)) expect_error(validate_offset(rep(1, 10), rnorm(5))) expect_error(validate_offset(rep(1, 5), rnorm(10)), regexp = "number of offsets", ignore.case = TRUE) SW(fito <- stan_glm(mpg ~ wt, data = mtcars, algorithm = "optimizing", seed = SEED)) SW(fito2 <- update(fito, offset = rep(5, nrow(mtcars)))) expect_equal(coef(fito)[1], 5 + coef(fito2)[1], tol = 0.2) }) test_that("validate_family works", { validate_family <- rstanarm:::validate_family expect_equal(validate_family("gaussian"), gaussian()) expect_equal(validate_family(gaussian), gaussian()) expect_equal(validate_family(gaussian()), gaussian()) expect_equal(validate_family(gaussian(link = "log")), gaussian(link = "log")) expect_equal(validate_family(binomial(link = "probit")), binomial(link = "probit")) expect_equal(validate_family(neg_binomial_2()), neg_binomial_2()) expect_error(validate_family("not a family")) expect_error(validate_family(rnorm(10)), "must be a family") expect_error(stan_glm(mpg ~ wt, data = mtcars, family = "not a family")) }) test_that("validate_glm_formula works", { validate_glm_formula <- rstanarm:::validate_glm_formula expect_silent(validate_glm_formula(mpg ~ wt + cyl)) expect_error(validate_glm_formula(mpg ~ wt + (1|cyl)), "not allowed") expect_error(validate_glm_formula(mpg ~ (1|cyl/gear)), "not allowed") }) test_that("validate_data works", { validate_data <- rstanarm:::validate_data expect_error(validate_data(list(1)), "'data' must be a data frame") expect_warning(d <- validate_data(if_missing = 3), "Omitting the 'data' argument is not recommended") expect_equal(d, 3) }) test_that("array1D_check works", { array1D_check <- rstanarm:::array1D_check y1 <- rnorm(10) expect_equal(array1D_check(y1), y1) names(y1) <- rep_len(letters, length(y1)) expect_equal(array1D_check(y1), y1) expect_identical(array1D_check(as.array(y1)), y1) y2 <- cbind(1:10, 11:20) expect_equal(array1D_check(y2), y2) }) test_that("fac2bin works", { fac2bin <- rstanarm:::fac2bin y <- gl(2, 2, 20, labels = c("lo", "hi")) expect_identical(fac2bin(y), rep_len(c(0L, 0L, 1L, 1L), 20)) y <- gl(2, 8, labels = c("Control", "Treat")) expect_identical(fac2bin(y), rep(c(0L, 1L), each = 8)) expect_identical(fac2bin(factor(c(1,2))), c(0L, 1L)) expect_error(fac2bin(rnorm(10))) expect_error(fac2bin(factor(c(1,2,3)))) expect_error(fac2bin(factor(mtcars$cyl, labels = c("lo", "mid", "hi")))) }) test_that("check_constant_vars works", { check_constant_vars <- rstanarm:::check_constant_vars mf <- model.frame(glm(mpg ~ ., data = mtcars)) mf2 <- mf mf2$wt <- 2 expect_equal(check_constant_vars(mf), mf) expect_error(check_constant_vars(mf2), "wt") mf2$gear <- 3 expect_error(check_constant_vars(mf2), "wt, gear") expect_error(stan_glm(mpg ~ ., data = mf2), "wt, gear") SW(fit1 <- stan_glm(mpg ~ ., data = mf, algorithm = "optimizing", seed = SEED, refresh = 0)) SW(fit2 <- stan_glm(mpg ~ ., data = mf, weights = rep(2, nrow(mf)), seed = SEED, offset = rep(1, nrow(mf)), algorithm = "optimizing", refresh = 0)) expect_stanreg(fit1) expect_stanreg(fit2) esoph2 <- esoph esoph2$agegp[1:nrow(esoph2)] <- "75+" expect_error(stan_polr(tobgp ~ agegp, data = esoph2, iter = 10, prior = R2(0.2, "mean"), init_r = 0.1, seed = SEED, refresh = 0), regexp = "agegp") }) test_that("linear_predictor methods work", { linpred_vec <- rstanarm:::linear_predictor.default linpred_mat <- rstanarm:::linear_predictor.matrix x <- cbind(1, 1:4) bmat <- matrix(c(-0.5, 0, 0.5, 1), nrow = 2, ncol = 2) bvec <- bmat[1, ] vec_ans <- seq(0, 1.5, 0.5) mat_ans <- rbind(vec_ans, 1:4) offset <- rep(2, nrow(x)) expect_equivalent(linpred_vec(bvec, x), vec_ans) expect_equivalent(linpred_vec(bvec, x, offset = NULL), vec_ans) expect_equivalent(linpred_vec(bvec, x, offset), vec_ans + offset) expect_equivalent(linpred_mat(bmat, x), mat_ans) expect_equivalent(linpred_mat(bmat, x, offset = NULL), mat_ans) expect_equivalent(linpred_mat(bmat, x, offset), mat_ans + offset) }) # fits to use in multiple calls to test_that below SW({ fit <- stan_glm(mpg ~ wt, data = mtcars, iter = ITER, chains = CHAINS, seed = SEED, refresh = 0) fit2 <- stan_glmer(mpg ~ wt + (1|cyl), data = mtcars, iter = ITER, chains = CHAINS, seed = SEED, refresh = 0) fito <- stan_glm(mpg ~ wt, data = mtcars, algorithm = "optimizing", seed = SEED) fitvb <- update(fito, algorithm = "meanfield", seed = SEED) fitvb2 <- update(fitvb, algorithm = "fullrank", seed = SEED) }) test_that("validate_stanreg_object works", { validate_stanreg_object <- rstanarm:::validate_stanreg_object expect_silent(validate_stanreg_object(fit)) expect_silent(validate_stanreg_object(fit2)) expect_silent(validate_stanreg_object(fito)) expect_silent(validate_stanreg_object(fitvb)) expect_error(validate_stanreg_object(fit$stanfit), "not a stanreg object") }) test_that("used.sampling, used.optimizing, and used.variational work", { used.sampling <- rstanarm:::used.sampling used.optimizing <- rstanarm:::used.optimizing used.variational <- rstanarm:::used.variational expect_true(used.sampling(fit)) expect_true(used.sampling(fit2)) expect_false(used.optimizing(fit)) expect_false(used.optimizing(fit2)) expect_false(used.variational(fit)) expect_false(used.variational(fit2)) expect_true(used.optimizing(fito)) expect_false(used.sampling(fito)) expect_false(used.variational(fito)) expect_true(used.variational(fitvb)) expect_true(used.variational(fitvb2)) expect_false(used.sampling(fitvb)) expect_false(used.sampling(fitvb2)) expect_false(used.optimizing(fitvb)) expect_false(used.optimizing(fitvb2)) # should return error if passed anything but a stanreg object expect_error(used.sampling(fit$stanfit)) expect_error(used.variational(fitvb$stanfit)) expect_error(used.optimizing(fito$stanfit)) }) test_that("is.mer works", { is.mer <- rstanarm:::is.mer bad1 <- bad2 <- example_model bad1$glmod <- NULL class(bad2) <- "stanreg" expect_true(is.mer(example_model)) expect_true(is.mer(fit2)) expect_false(is.mer(fit)) expect_false(is.mer(fito)) expect_false(is.mer(fitvb)) expect_false(is.mer(fitvb2)) expect_error(is.mer(bad1), regexp = "Bug found") expect_error(is.mer(bad2), regexp = "Bug found") }) test_that("get_x, get_y, get_z work", { x_ans <- cbind("(Intercept)" = 1, wt = mtcars$wt) y_ans <- mtcars$mpg expect_equivalent(get_x(fit), x_ans) expect_equivalent(get_y(fit), y_ans) expect_error(get_z(fit), "no applicable method") z_ans2 <- model.matrix(mpg ~ -1 + factor(cyl), data = mtcars) expect_equivalent(get_x(fit2), x_ans) expect_equivalent(get_y(fit2), y_ans) expect_equivalent(as.matrix(get_z(fit2)), z_ans2) SW( fit3 <- stan_glmer(mpg ~ wt + (1 + wt|cyl), data = mtcars, refresh = 0, iter = 10, chains = 1, refresh = 5, seed = SEED) ) z_ans3 <- mat.or.vec(nr = nrow(mtcars), nc = 6) z_ans3[, c(1, 3, 5)] <- model.matrix(mpg ~ 0 + factor(cyl), data = mtcars) z_ans3[, c(2, 4, 6)] <- model.matrix(mpg ~ 0 + wt:factor(cyl), data = mtcars) expect_equivalent(get_x(fit3), x_ans) expect_equivalent(get_y(fit3), y_ans) expect_equivalent(as.matrix(get_z(fit3)), z_ans3) }) test_that("set_sampling_args works", { set_sampling_args <- rstanarm:::set_sampling_args # user specifies stepsize and also overrides default max_treedepth control1 <- list(max_treedepth = 10, stepsize = 0.01) # user specifies control but doesn't override max_treedepth control2 <- list(stepsize = 0.01) # no user 'control' argument no_control <- list() # normal prior --> adapt_delta = 0.95 val1 <- set_sampling_args(fit, prior = normal(), user_dots = list(control = control1, iter = 100), user_adapt_delta = NULL) # use fit2 instead of fit to check that it doesn't matter which fit object is used val1b <- set_sampling_args(fit2, prior = normal(), user_dots = list(control = control1, iter = 100), user_adapt_delta = NULL) # normal prior --> adapt_delta = 0.95, but user override to 0.9 val2 <- set_sampling_args(fit, prior = normal(), user_dots = list(control = control1), user_adapt_delta = 0.9) # cauchy/t_1 prior --> adapt_delta = 0.95 val3 <- set_sampling_args(fit, prior = student_t(1), user_dots = list(control = control1), user_adapt_delta = NULL) # cauchy/t_1 prior --> adapt_delta = 0.95, but user override to 0.8 val4 <- set_sampling_args(fit, prior = cauchy(), user_dots = list(control = control2), user_adapt_delta = 0.8) # hs prior --> adapt_delta = 0.99 val5 <- set_sampling_args(fit, prior = hs(), user_dots = no_control, user_adapt_delta = NULL) val6 <- set_sampling_args(fit, prior = hs_plus(), user_dots = no_control, user_adapt_delta = NULL) expect_equal(val1$control, c(control1, adapt_delta = 0.95)) expect_equal(val1$iter, 100) expect_equal(val1$control, val1b$control) expect_equal(val2$control, c(control1, adapt_delta = 0.9)) expect_equal(val3$control, c(control1, adapt_delta = 0.95)) expect_equal(val4$control, c(control2, adapt_delta = 0.8, max_treedepth = 15)) expect_equal(val5$control, list(adapt_delta = 0.99, max_treedepth = 15)) expect_equal(val6$control, list(adapt_delta = 0.99, max_treedepth = 15)) }) test_that("linkinv methods work", { linkinv.stanreg <- rstanarm:::linkinv.stanreg linkinv.character <- rstanarm:::linkinv.character linkinv.family <- rstanarm:::linkinv.family expect_identical(linkinv.family(gaussian()), gaussian()$linkinv) expect_identical(linkinv.family(neg_binomial_2()), neg_binomial_2()$linkinv) expect_identical(linkinv.family(binomial(link = "probit")), binomial(link = "probit")$linkinv) SW( fit_polr <- stan_polr(tobgp ~ agegp, data = esoph, method = "loglog", prior = R2(0.2, "mean"), init_r = 0.1, chains = CHAINS, iter = ITER, seed = SEED, refresh = 0) ) expect_identical(linkinv.stanreg(fit_polr), rstanarm:::pgumbel) expect_identical(linkinv.character(fit_polr$family), rstanarm:::pgumbel) expect_identical(linkinv.stanreg(example_model), binomial()$linkinv) expect_identical(linkinv.stanreg(fit), gaussian()$linkinv) expect_error(rstanarm:::polr_linkinv(example_model), regexp = "should be a stanreg object created by stan_polr") }) test_that("collect_pars and grep_for_pars work", { fit <- example_model collect_pars <- rstanarm:::collect_pars grep_for_pars <- rstanarm:::grep_for_pars all_period <- paste0("period", 2:4) all_varying <- rstanarm:::b_names(rownames(fit$stan_summary), value = TRUE) expect_identical(grep_for_pars(fit, "period"), all_period) expect_identical(grep_for_pars(fit, c("period", "size")), c(all_period, "size")) expect_identical(grep_for_pars(fit, "period|size"), c("size", all_period)) expect_identical(grep_for_pars(fit, "(2|3)$"), all_period[1:2]) expect_identical(grep_for_pars(fit, "b\\["), all_varying) expect_identical(grep_for_pars(fit, "herd"), c(all_varying, "Sigma[herd:(Intercept),(Intercept)]")) expect_identical(grep_for_pars(fit, "Intercept"), c("(Intercept)", all_varying, "Sigma[herd:(Intercept),(Intercept)]")) expect_identical(grep_for_pars(fit, "herd:[3,5]"), all_varying[c(3,5)]) expect_identical(grep_for_pars(fit, "herd:[3-5]"), all_varying[3:5]) expect_error(grep_for_pars(fit, "NOT A PARAMETER"), regexp = "No matches") expect_error(grep_for_pars(fit, "b[")) expect_identical(collect_pars(fit, regex_pars = "period"), all_period) expect_identical(collect_pars(fit, pars = "size", regex_pars = "period"), c("size", all_period)) expect_identical(collect_pars(fit, pars = c("(Intercept)", "size")), c("(Intercept)", "size")) expect_identical(collect_pars(fit, pars = "period2", regex_pars = "herd:[[1]]"), c("period2", all_varying[1])) expect_identical(collect_pars(fit, pars = "size", regex_pars = "size"), "size") expect_identical(collect_pars(fit, regex_pars = c("period", "herd")), c(all_period, all_varying, "Sigma[herd:(Intercept),(Intercept)]")) }) test_that("posterior_sample_size works", { pss <- rstanarm:::posterior_sample_size expect_equal(pss(example_model), 1000) expect_equal(pss(fit), nrow(as.matrix(fit))) expect_equal(pss(fit2), ITER * CHAINS / 2) expect_equal(pss(fitvb), 1000) expect_equal(pss(fitvb2), 1000) expect_equal(pss(fito), nrow(as.matrix(fito))) SW(fit3 <- stan_glm(mpg ~ wt, data = mtcars, iter = 20, chains = 1, thin = 2, refresh = 0)) expect_equal(pss(fit3), nrow(as.matrix(fit3))) }) test_that("last_dimnames works", { a <- array(rnorm(300), dim = c(10, 3, 10), dimnames = list(A = NULL, B = NULL, C = letters[1:10])) last_dimnames <- rstanarm:::last_dimnames expect_identical(last_dimnames(a), letters[1:10]) m <- a[1,,, drop=TRUE] expect_identical(last_dimnames(m), letters[1:10]) expect_identical(last_dimnames(m), colnames(m)) d <- as.data.frame(m) expect_identical(last_dimnames(d), last_dimnames(m)) expect_null(last_dimnames(m[1,])) }) test_that("validate_newdata works", { fit <- example_model newd <- fit$data validate_newdata <- rstanarm:::validate_newdata expect_error(validate_newdata(fit, newdata = 1:10), "must be a data frame") expect_null(validate_newdata(fit, newdata = NULL)) expect_equal(newd, validate_newdata(fit, newdata = newd)) # doesn't complain about NAs in unused variables newd2 <- newd newd2$banana <- NA expect_silent(validate_newdata(fit, newdata = newd2)) expect_equal(validate_newdata(fit, newdata = newd2), newd2) newd$period[3] <- NA expect_error(validate_newdata(fit, newdata = newd), "NAs are not allowed") }) rstanarm/tests/testthat/helper.R0000644000176200001440000002043614370470372016521 0ustar liggesusersSW <- function(expr) utils::capture.output(suppressWarnings(expr)) run_example_model <- function() { o <- SW( fit <- stan_glmer(cbind(incidence, size - incidence) ~ size + period + (1|herd), data = lme4::cbpp, family = binomial, QR = TRUE, # this next line is only to keep the example small in size! chains = 2, cores = 1, seed = 12345, iter = 1000, refresh = 0) ) fit } # These tests just make sure that posterior_predict doesn't throw errors and # that result has correct dimensions check_for_pp_errors <- function(fit, data = NULL, offset = NULL) { nsims <- nrow(as.data.frame(fit)) mf <- if (!is.null(data)) data else model.frame(fit) if (identical(deparse(substitute(fit)), "example_model")) mf <- lme4::cbpp expect_silent(yrep1 <- posterior_predict(fit)) expect_silent(lin1 <- posterior_linpred(fit)) expect_silent(suppressMessages(posterior_linpred(fit, transform = TRUE))) expect_equal(dim(yrep1), c(nsims, nobs(fit))) expect_equal(dim(lin1), c(nsims, nobs(fit))) expect_silent(yrep2 <- posterior_predict(fit, draws = 1)) expect_equal(dim(yrep2), c(1, nobs(fit))) offs <- if (!is.null(offset)) offset[1] else offset expect_silent(yrep3 <- posterior_predict(fit, newdata = mf[1,], offset = offs)) expect_silent(lin3 <- posterior_linpred(fit, newdata = mf[1,], offset = offs)) expect_equal(dim(yrep3), c(nsims, 1)) expect_equal(dim(lin3), c(nsims, 1)) expect_silent(yrep4 <- posterior_predict(fit, draws = 2, newdata = mf[1,], offset = offs)) expect_equal(dim(yrep4), c(2, 1)) offs <- if (!is.null(offset)) offset[1:5] else offset expect_silent(yrep5 <- posterior_predict(fit, newdata = mf[1:5,], offset = offs)) expect_silent(lin5 <- posterior_linpred(fit, newdata = mf[1:5,], offset = offs)) expect_equal(dim(yrep5), c(nsims, 5)) expect_equal(dim(lin5), c(nsims, 5)) expect_silent(yrep6 <- posterior_predict(fit, draws = 3, newdata = mf[1:5,], offset = offs)) expect_equal(dim(yrep6), c(3, 5)) expect_error(posterior_predict(fit, draws = nsims + 1), regexep = "posterior sample size is only") } expect_equivalent_loo <- function(fit) { LOO.CORES <- ifelse(.Platform$OS.type == "windows", 1, 2) l <- suppressWarnings(loo(fit, cores = LOO.CORES)) w <- suppressWarnings(waic(fit)) expect_s3_class(l, "psis_loo") expect_s3_class(l, "loo") expect_s3_class(w, "loo") expect_s3_class(w, "waic") att_names <- c("names", "dims", "class", "model_name", "discrete", "yhash", "formula") expect_named(attributes(l), att_names) expect_named(attributes(w), att_names) discrete <- attr(l, "discrete") expect_true(!is.na(discrete) && is.logical(discrete)) if (fit$stan_function != "stan_clogit") { ll <- log_lik(fit) r_eff <- loo::relative_eff(exp(ll), chain_id = rstanarm:::chain_id_for_loo(fit)) l2 <- suppressWarnings(loo(ll, r_eff = r_eff, cores = LOO.CORES)) expect_equal(l$estimates, l2$estimates) expect_equivalent(w, suppressWarnings(waic(ll))) } } expect_gg <- function(x, info = NULL, label = NULL) { testthat::expect_is(x, "ggplot", info = info, label = label) invisible(ggplot2::ggplot_build(x)) } # Make sure that the fitted Stan models x and y have identical MCMC samples # after sorting the stanmat columns (ie. parameters) by name expect_identical_sorted_stanmats <- function(x, y) { x_mat <- as.matrix(x) y_mat <- as.matrix(y) x_nms <- colnames(x_mat) y_nms <- colnames(y_mat) x_mat_sorted <- x_mat[, order(x_nms), drop = FALSE] y_mat_sorted <- y_mat[, order(y_nms), drop = FALSE] expect_identical(x_mat_sorted, y_mat_sorted) } expect_linpred_equal <- function(object, tol = 0.1) { linpred <- posterior_linpred(object) expect_equal(apply(linpred, 2, median), object$linear.predictors, tolerance = tol, check.attributes = FALSE) } expect_matrix <- function(x) expect_true(is.matrix(x)) expect_ppd <- function(x) { expect_true(inherits(x, "ppd") || is.matrix(x)) } expect_stanreg <- function(x) expect_s3_class(x, "stanreg") expect_stanmvreg <- function(x) expect_s3_class(x, "stanmvreg") expect_survfit <- function(x) expect_s3_class(x, "survfit.stanjm") # Use the standard errors from a fitted 'comparison model' to obtain # the tolerance for each parameter in the joint model # Obtain parameter specific tolerances that can be used to assess the # accuracy of parameter estimates in stan_jm models. The tolerances # are calculated by taking the SE/SD for the parameter estimate in a # "gold standard" model and multiplying this by the relevant element # in the 'tolscales' argument. # # @param modLong The "gold standard" longitudinal model. Likely to be # a model estimated using either {g}lmer or stan_{g}lmer. # @param modEvent The "gold standard" event model. Likely to be a model # estimated using coxph. # @param toscales A named list with elements $lmer_fixef, $lmer_ranef, # $glmer_fixef, $glmer_ranef, $event. # @param idvar The name of the ID variable. Used to extract the SDs for # group-specific terms that correspond to the individual/patient. # get_tols <- function(modLong, modEvent = NULL, tolscales, idvar = "id") { if (is.null(modEvent)) modEvent <- modLong # if modLong is already a joint model if (class(modLong)[1] == "stanreg") { fixef_nms <- names(fixef(modLong)) fixef_ses <- modLong$ses[fixef_nms] ranef_sds <- attr(VarCorr(modLong)[[idvar]], "stddev") if (modLong$stan_function == "stan_lmer") { fixef_tols <- tolscales$lmer_fixef * fixef_ses ranef_tols <- tolscales$lmer_ranef * ranef_sds } else if (modLong$stan_function == "stan_glmer") { if (modLong$family$family == "gaussian") { fixef_tols <- tolscales$lmer_fixef * fixef_ses ranef_tols <- tolscales$lmer_ranef * ranef_sds } else { fixef_tols <- tolscales$glmer_fixef * fixef_ses ranef_tols <- tolscales$glmer_ranef * ranef_sds } } } else if (class(modLong)[1] %in% c("lmerMod", "glmerMod")) { fixef_ses <- sqrt(diag(vcov(modLong))) ranef_sds <- attr(VarCorr(modLong)[[idvar]], "stddev") if (class(modLong)[1] == "lmerMod") { fixef_tols <- tolscales$lmer_fixef * fixef_ses ranef_tols <- tolscales$lmer_ranef * ranef_sds } else if (class(modLong)[1] == "glmerMod") { fixef_tols <- tolscales$glmer_fixef * fixef_ses ranef_tols <- tolscales$glmer_ranef * ranef_sds } } if ("(Intercept)" %in% names(fixef_tols)) fixef_tols[["(Intercept)"]] <- 2 * fixef_tols[["(Intercept)"]] if ("(Intercept)" %in% names(ranef_tols)) ranef_tols[["(Intercept)"]] <- 2 * ranef_tols[["(Intercept)"]] if (class(modEvent)[1] == "coxph") { event_ses <- summary(modEvent)$coefficients[, "se(coef)"] } else event_ses <- NULL event_tols <- if (!is.null(event_ses)) tolscales$event * event_ses else NULL if ("(Intercept)" %in% names(event_tols)) event_tols[["(Intercept)"]] <- 2 * event_tols[["(Intercept)"]] ret <- Filter( function(x) !is.null(x), list(fixef = fixef_tols, ranef = ranef_tols, event = event_tols)) return(ret) } # Recover parameter estimates and return a list with consistent # parameter names for comparing stan_jm, stan_mvmer, stan_{g}lmer, # {g}lmer, and coxph estimates # # @param modLong The fitted longitudinal model. Likely to be # a model estimated using either {g}lmer or stan_{g}lmer. # @param modEvent The fitted event model. Likely to be a model # estimated using coxph. # @param idvar The name of the ID variable. Used to extract the estimates # for group-specific parameters that correspond to the individual/patient. # recover_pars <- function(modLong, modEvent = NULL, idvar = "id") { if (is.null(modEvent)) modEvent <- modLong if (class(modLong)[1] %in% c("stanreg", "lmerMod", "glmerMod")) { fixef_pars <- fixef(modLong) ranef_pars <- ranef(modLong)[[idvar]] } else if (class(modLong)[1] %in% c("stanjm", "stanmvreg")) { fixef_pars <- fixef(modLong)[[1L]] ranef_pars <- ranef(modLong)[[1L]][[idvar]] } if (class(modEvent)[1] == "coxph") { event_pars <- modEvent$coefficients } else if (class(modEvent)[1] %in% c("stanjm", "stanmvreg")) { event_pars <- fixef(modEvent)$Event } else event_pars <- NULL ret <- Filter( function(x) !is.null(x), list(fixef = fixef_pars, ranef = ranef_pars, event = event_pars)) return(ret) } rstanarm/tests/testthat/test_loo.R0000644000176200001440000002756314370470372017102 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. suppressPackageStartupMessages(library(rstanarm)) LOO.CORES <- ifelse(.Platform$OS.type == "windows", 1, 2) SEED <- 1234L set.seed(SEED) CHAINS <- 2 ITER <- 40 # small iter for speed but large enough for psis REFRESH <- 0 if (!exists("example_model")) { example_model <- run_example_model() } # loo and waic ------------------------------------------------------------ context("loo and waic") # These tests just check that the loo.stanreg method (which calls loo.function # method) results are identical to the loo.matrix results. Since for these tests # the log-likelihood matrix is computed using the log-likelihood function, the # only thing these tests really do is make sure that loo.stanreg and all the # log-likelihood functions don't return any errors and whatnot (it does not # check that the results returned by loo are actually correct). expect_equivalent_loo <- function(fit) { l <- suppressWarnings(loo(fit, cores = LOO.CORES)) w <- suppressWarnings(waic(fit)) expect_s3_class(l, "loo") expect_s3_class(w, "loo") expect_s3_class(w, "waic") att_names <- c("names", "dims", "class", "model_name", "discrete", "yhash", "formula") expect_named(attributes(l), att_names) expect_named(attributes(w), att_names) discrete <- attr(l, "discrete") expect_true(!is.na(discrete) && is.logical(discrete)) llik <- log_lik(fit) r <- loo::relative_eff(exp(llik), chain_id = rstanarm:::chain_id_for_loo(fit)) l2 <- suppressWarnings(loo(llik, r_eff = r, cores = LOO.CORES)) expect_equal(l$estimates, l2$estimates) expect_equivalent(w, suppressWarnings(waic(log_lik(fit)))) } test_that("loo & waic do something for non mcmc models", { SW(fito <- stan_glm(mpg ~ wt, data = mtcars, algorithm = "optimizing", seed = 1234L, prior_intercept = NULL, refresh = 0, prior = NULL, prior_aux = NULL)) SW(fitvb1 <- update(fito, algorithm = "meanfield", iter = ITER)) SW(fitvb2 <- update(fito, algorithm = "fullrank", iter = ITER)) SW(loo1 <- loo(fito)) SW(loo2 <- loo(fitvb1)) SW(loo3 <- loo(fitvb2)) expect_true("importance_sampling_loo" %in% class(loo1)) expect_true("importance_sampling_loo" %in% class(loo2)) expect_true("importance_sampling_loo" %in% class(loo3)) }) test_that("loo errors if model has weights", { SW( fit <- stan_glm(mpg ~ wt, data = mtcars, weights = rep_len(c(1,2), nrow(mtcars)), seed = SEED, refresh = 0, iter = 50) ) expect_error(loo(fit), "not supported") expect_error(loo(fit), "'kfold'") }) test_that("loo can handle empty interaction levels", { d <- expand.grid(group1 = c("A", "B"), group2 = c("a", "b", "c"))[1:5,] d$y <- c(0, 1, 0, 1, 0) SW(fit <- rstanarm::stan_glm(y ~ group1:group2, data = d, family = "binomial", refresh = 0, iter = 20, chains = 1)) SW(loo1 <- loo(fit)) expect_output(print(loo1), "Computed from 10 by 5 log-likelihood matrix") }) # loo with refitting ------------------------------------------------------ context("loo then refitting") test_that("loo issues errors/warnings", { expect_warning(loo(example_model, cores = LOO.CORES, k_threshold = 2), "Setting 'k_threshold' > 1 is not recommended") expect_error(loo(example_model, k_threshold = -1), "'k_threshold' < 0 not allowed.") expect_error(loo(example_model, k_threshold = 1:2), "'k_threshold' must be a single numeric value") expect_warning(rstanarm:::recommend_kfold(5), "Found 5") expect_warning(rstanarm:::recommend_kfold(5), "10-fold") expect_warning(rstanarm:::recommend_reloo(7), "Found 7") }) test_that("loo with k_threshold works", { SW(fit <- stan_glm(mpg ~ wt, prior = normal(0, 500), data = mtcars[25:32,], seed = 12345, iter = 5, chains = 1, cores = 1, refresh = 0)) expect_message(loo(fit, k_threshold = 0.5), "Model will be refit") # test that no errors from binomial model because it's trickier to get the # data right internally in reloo (matrix outcome) SW(loo_x <- loo(example_model)) expect_message(rstanarm:::reloo(example_model, loo_x, obs = 1), "Model will be refit 1 times") }) test_that("loo with k_threshold works for edge case(s)", { # without 'data' argument y <- mtcars$mpg[1:10] x <- rexp(length(y)) SW(fit <- stan_glm(y ~ 1, refresh = 0, iter = 50)) expect_message( SW(res <- loo(fit, k_threshold = 0.1, cores = LOO.CORES)), # low k_threshold to make sure reloo is triggered "problematic observation\\(s\\) found" ) expect_s3_class(res, "loo") }) # kfold ------------------------------------------------------------------- context("kfold") test_that("kfold does not throw an error for non mcmc models", { SW(fito <- stan_glm(mpg ~ wt, data = mtcars, algorithm = "optimizing", seed = 1234L, refresh = 0)) SW(k <- kfold(fito, K = 2)) expect_true("kfold" %in% class(k)) }) test_that("kfold throws error if K <= 1 or K > N", { expect_error(kfold(example_model, K = 1), "K > 1", fixed = TRUE) expect_error(kfold(example_model, K = 1e5), "K <= nobs(x)", fixed = TRUE) }) test_that("kfold throws error if folds arg is bad", { expect_error(kfold(example_model, K = 2, folds = 1:100), "length(folds) == N is not TRUE", fixed = TRUE) expect_error(kfold(example_model, K = 2, folds = 1:2), "length(folds) == N is not TRUE", fixed = TRUE) expect_error(kfold(example_model, K = 2, folds = seq(1,100, length.out = 56)), "all(folds == as.integer(folds)) is not TRUE", fixed = TRUE) }) test_that("kfold throws error if model has weights", { SW( fit <- stan_glm(mpg ~ wt, data = mtcars, iter = ITER, chains = CHAINS, refresh = 0, weights = runif(nrow(mtcars), 0.5, 1.5)) ) expect_error(kfold(fit), "not currently available for models fit using weights") }) test_that("kfold works on some examples", { mtcars2 <- mtcars mtcars2$wt[1] <- NA # make sure kfold works if NAs are dropped from original data SW( fit_gaus <- stan_glm(mpg ~ wt, data = mtcars2, refresh = 0, chains = 1, iter = 10) ) SW(kf <- kfold(fit_gaus, 2)) SW(kf2 <- kfold(example_model, 2)) expect_named(kf, c("estimates", "pointwise", "elpd_kfold", "se_elpd_kfold", "p_kfold", "se_p_kfold")) expect_named(kf2, c("estimates", "pointwise", "elpd_kfold", "se_elpd_kfold", "p_kfold", "se_p_kfold")) expect_named(attributes(kf), c("names", "class", "K", "dims", "model_name", "discrete", "yhash", "formula")) expect_named(attributes(kf2), c("names", "class", "K", "dims", "model_name", "discrete", "yhash", "formula")) expect_s3_class(kf, c("kfold", "loo")) expect_s3_class(kf2, c("kfold", "loo")) expect_false(is.na(kf$p_kfold)) expect_false(is.na(kf2$p_kfold)) SW(kf <- kfold(fit_gaus, K = 2, save_fits = TRUE)) expect_true("fits" %in% names(kf)) expect_s3_class(kf$fits[[1, "fit"]], "stanreg") expect_type(kf$fits[[2, "omitted"]], "integer") expect_length(kf$fits[[2, "omitted"]], 16) }) # loo_compare ---------------------------------------------------------- test_that("loo_compare throws correct errors", { SW(capture.output({ mtcars$mpg <- as.integer(mtcars$mpg) fit1 <- stan_glm(mpg ~ wt, data = mtcars, iter = 5, chains = 2, refresh = 0) fit2 <- update(fit1, data = mtcars[-1, ]) fit3 <- update(fit1, formula. = log(mpg) ~ .) fit4 <- update(fit1, family = poisson("log")) l1 <- loo(fit1, cores = LOO.CORES) l2 <- loo(fit2, cores = LOO.CORES) l3 <- loo(fit3, cores = LOO.CORES) l4 <- loo(fit4, cores = LOO.CORES) w1 <- waic(fit1) k1 <- kfold(fit1, K = 3) })) # this uses loo::loo_compare expect_error(loo_compare(l1, l2), "Not all models have the same number of data points") expect_error(loo_compare(list(l4, l2, l3)), "Not all models have the same number of data points") # using loo_compare.stanreg (can do extra checks) fit1$loo <- l1 fit2$loo <- l2 fit3$loo <- l3 fit4$loo <- l4 expect_error(loo_compare(fit1, fit2), "Not all models have the same number of data points") expect_warning(loo_compare(fit1, fit3), "Not all models have the same y variable") expect_error(loo_compare(fit1, fit4), "Discrete and continuous observation models can't be compared") expect_error(loo_compare(l1, fit1), "All inputs should have class 'loo'") expect_error(loo_compare(l1), "requires at least two models") }) test_that("loo_compare works", { suppressWarnings(capture.output({ mtcars$mpg <- as.integer(mtcars$mpg) fit1 <- stan_glm(mpg ~ wt, data = mtcars, iter = 40, chains = 2, refresh = 0) fit2 <- update(fit1, formula. = . ~ . + cyl) fit3 <- update(fit2, formula. = . ~ . + gear) fit4 <- update(fit1, family = "poisson") fit5 <- update(fit1, family = "neg_binomial_2") fit1$loo <- loo(fit1, cores = LOO.CORES) fit2$loo <- loo(fit2, cores = LOO.CORES) fit3$loo <- loo(fit3, cores = LOO.CORES) fit4$loo <- loo(fit4, cores = LOO.CORES) fit5$loo <- loo(fit5, cores = LOO.CORES) k1 <- kfold(fit1, K = 2) k2 <- kfold(fit2, K = 2) k3 <- kfold(fit3, K = 3) k4 <- kfold(fit4, K = 2) k5 <- kfold(fit5, K = 2) })) expect_false(attr(fit1$loo, "discrete")) expect_false(attr(fit2$loo, "discrete")) expect_false(attr(fit3$loo, "discrete")) expect_true(attr(fit4$loo, "discrete")) expect_true(attr(fit5$loo, "discrete")) comp1 <- loo_compare(fit1, fit2) comp2 <- loo_compare(fit1, fit2, fit3) expect_s3_class(comp1, "compare.loo") expect_s3_class(comp2, "compare.loo") expect_equal(comp1[, "elpd_diff"], loo_compare(list(fit1$loo, fit2$loo))[, "elpd_diff"]) expect_equal(comp2[, "elpd_diff"], loo_compare(list(fit1$loo, fit2$loo, fit3$loo))[, "elpd_diff"]) comp1_detail <- loo_compare(fit1, fit2, detail=TRUE) expect_output(print(comp1_detail), "Model formulas") # equivalent to stanreg_list method expect_equivalent(comp2, loo_compare(stanreg_list(fit1, fit2, fit3))) # for kfold expect_warning(comp3 <- loo_compare(k1, k2, k3), "Not all kfold objects have the same K value") expect_true(attr(k4, "discrete")) expect_true(attr(k5, "discrete")) expect_s3_class(loo_compare(k4, k5), "compare.loo") }) # helpers ----------------------------------------------------------------- context("loo and waic helpers") test_that("kfold_and_reloo_data works", { f <- rstanarm:::kfold_and_reloo_data d <- f(example_model) expect_identical(d, lme4::cbpp[, colnames(d)]) # if 'data' arg not originally specified when fitting the model y <- rnorm(40) SW(fit <- stan_glm(y ~ 1, iter = ITER, chains = CHAINS, refresh = 0)) expect_equivalent(f(fit), model.frame(fit)) # if 'subset' arg specified when fitting the model SW(fit2 <- stan_glm(mpg ~ wt, data = mtcars, subset = gear != 5, iter = ITER, chains = CHAINS, refresh = 0)) expect_equivalent(f(fit2), subset(mtcars[mtcars$gear != 5, c("mpg", "wt")])) }) test_that(".weighted works", { f <- rstanarm:::.weighted expect_equal(f(2, NULL), 2) expect_equal(f(2, 3), 6) expect_equal(f(8, 0.25), 2) expect_error(f(2), "missing, with no default") }) rstanarm/tests/testthat/test_pp_validate.R0000644000176200001440000000315514370470372020570 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. suppressPackageStartupMessages(library(rstanarm)) SEED <- 12345 set.seed(SEED) if (!exists("example_model")) { example_model <- run_example_model() } context("pp_validate") test_that("pp_validate throws correct errors", { expect_error(pp_validate(example_model$stanfit), "not a stanreg object") expect_error(pp_validate(example_model, nreps = 1), "at least 2") }) # For some reason this is resulting in a segfault # https://github.com/stan-dev/rstanarm/pull/496/checks?check_run_id=1580472558#step:9:681 # test_that("pp_validate runs for very quick example", { # capture.output( # fit <- stan_glm(mpg ~ wt, data = mtcars, seed = SEED, refresh = 0, # init_r = 0.1, iter = 500) # ) # gg <- pp_validate(fit, nreps = 2, seed = SEED) # expect_s3_class(gg, "ggplot") # }) rstanarm/tests/testthat/stan_files/0000755000176200001440000000000014551552005017234 5ustar liggesusersrstanarm/tests/testthat/stan_files/polr.stan0000644000176200001440000002140714551535205021106 0ustar liggesusers#include /pre/Columbia_copyright.stan #include /pre/license.stan // GLM for an ordinal outcome with coherent priors functions { /** * Evaluate a given CDF * * @param x The point to evaluate the CDF_polr at * @param link An integer indicating the link function * @return A scalar on (0,1) */ real CDF_polr(real x, int link) { // links in MASS::polr() are in a different order than binomial() // logistic, probit, loglog, cloglog, cauchit if (link == 1) return exp(log_inv_logit(x)); else if (link == 2) return exp(std_normal_lcdf(x|)); else if (link == 3) return exp(gumbel_lcdf(x | 0, 1)); else if (link == 4) return inv_cloglog(x); else if (link == 5) return exp(cauchy_lcdf(x | 0, 1)); else reject("Invalid link"); return x; // never reached } /** * Pointwise (pw) log-likelihood vector * * @param y The integer outcome variable. * @param eta A vector of linear predictors * @param cutpoints An ordered vector of cutpoints * @param link An integer indicating the link function * @return A vector of log-likelihods */ vector pw_polr(array[] int y, vector eta, vector cutpoints, int link, real alpha) { int N = rows(eta); int J = rows(cutpoints) + 1; vector[N] ll; if (link < 1 || link > 5) reject("Invalid link"); if (alpha == 1) for (n in 1 : N) { if (y[n] == 1) ll[n] = CDF_polr(cutpoints[1] - eta[n], link); else if (y[n] == J) ll[n] = 1 - CDF_polr(cutpoints[J - 1] - eta[n], link); else ll[n] = CDF_polr(cutpoints[y[n]] - eta[n], link) - CDF_polr(cutpoints[y[n] - 1] - eta[n], link); } else for (n in 1 : N) { if (y[n] == 1) ll[n] = CDF_polr(cutpoints[1] - eta[n], link) ^ alpha; else if (y[n] == J) ll[n] = 1 - CDF_polr(cutpoints[J - 1] - eta[n], link) ^ alpha; else reject("alpha not allowed with more than 2 outcome categories"); } return log(ll); } /** * Map from conditional probabilities to cutpoints * * @param probabilities A J-simplex * @param scale A positive scalar * @param link An integer indicating the link function * @return A vector of length J - 1 whose elements are in increasing order */ vector make_cutpoints(vector probabilities, real scale, int link) { int C = rows(probabilities) - 1; vector[C] cutpoints; real running_sum = 0; // links in MASS::polr() are in a different order than binomial() // logistic, probit, loglog, cloglog, cauchit if (link == 1) for (c in 1 : C) { running_sum += probabilities[c]; cutpoints[c] = logit(running_sum); } else if (link == 2) for (c in 1 : C) { running_sum += probabilities[c]; cutpoints[c] = inv_Phi(running_sum); } else if (link == 3) for (c in 1 : C) { running_sum += probabilities[c]; cutpoints[c] = -log(-log(running_sum)); } else if (link == 4) for (c in 1 : C) { running_sum += probabilities[c]; cutpoints[c] = log(-log1m(running_sum)); } else if (link == 5) for (c in 1 : C) { running_sum += probabilities[c]; cutpoints[c] = tan(pi() * (running_sum - 0.5)); } else reject("invalid link"); return scale * cutpoints; } /** * Randomly draw a value for utility * * @param low A scalar lower bound * @param high A scalar upper bound * @param eta A scalar linear predictor * @param link An integer indicating the link function * @return A scalar from the appropriate conditional distribution */ real draw_ystar_rng(real low, real high, real eta, int link) { int iter = 0; real ystar = not_a_number(); if (low >= high) reject("low must be less than high"); // links in MASS::polr() are in a different order than binomial() // logistic, probit, loglog, cloglog, cauchit if (link == 1) while (!(ystar > low && ystar < high)) ystar = logistic_rng(eta, 1); else if (link == 2) while (!(ystar > low && ystar < high)) ystar = normal_rng(eta, 1); else if (link == 3) while (!(ystar > low && ystar < high)) ystar = gumbel_rng(eta, 1); else if (link == 4) while (!(ystar > low && ystar < high)) ystar = log(-log1m(uniform_rng( 0, 1))); else if (link == 5) while (!(ystar > low && ystar < high)) ystar = cauchy_rng(eta, 1); else reject("invalid link"); return ystar; } } data { // declares N, K, X, xbar, dense_X, nnz_x, w_x, v_x, u_x #include /data/NKX.stan int J; // number of outcome categories, which typically is > 2 array[N] int y; // ordinal outcome // declares prior_PD, has_intercept, link, prior_dist, prior_dist_for_intercept #include /data/data_glm.stan // declares has_weights, weights, has_offset, offset #include /data/weights_offset.stan // hyperparameter values real regularization; vector[J] prior_counts; int is_skewed; real shape; real rate; int do_residuals; } transformed data { real half_K = 0.5 * K; real sqrt_Nm1 = sqrt(N - 1.0); int is_constant = 1; vector[0] beta_smooth; // not used for (j in 1 : J) if (prior_counts[j] != 1) is_constant = 0; } parameters { simplex[J] pi; // avoid error by making unit_vector have 2 elements when K <= 1 // https://github.com/stan-dev/rstanarm/issues/603#issuecomment-1785928224 array[K > 1] unit_vector[K > 1 ? K : 2] u; real 1 ? 0 : -1), upper=1> R2; array[is_skewed] real alpha; } transformed parameters { vector[K] beta; vector[J - 1] cutpoints; { real Delta_y; if (K > 1) { Delta_y = inv_sqrt(1 - R2); beta = u[1] * sqrt(R2) * Delta_y * sqrt_Nm1; } else { Delta_y = inv_sqrt(1 - square(R2)); beta[1] = R2 * Delta_y * sqrt_Nm1; } cutpoints = make_cutpoints(pi, Delta_y, link); } } model { if (prior_PD == 0) { #include /model/make_eta.stan if (has_weights == 0) { // unweighted log-likelihoods if (is_skewed == 0) target += pw_polr(y, eta, cutpoints, link, 1.0); else target += pw_polr(y, eta, cutpoints, link, alpha[1]); } else { // weighted log-likelihoods if (is_skewed == 0) target += dot_product(weights, pw_polr(y, eta, cutpoints, link, 1.0)); else target += dot_product(weights, pw_polr(y, eta, cutpoints, link, alpha[1])); } } if (is_constant == 0) target += dirichlet_lpdf(pi | prior_counts); // implicit: u is uniform on the surface of a hypersphere if (prior_dist == 1) { if (K > 1) target += beta_lpdf(R2 | half_K, regularization); else target += beta_lpdf(square(R2) | half_K, regularization) + log(abs(R2)); } if (is_skewed == 1) target += gamma_lpdf(alpha | shape, rate); } generated quantities { vector[J > 2 ? J : 1] mean_PPD = rep_vector(0, J > 2 ? J : 1); vector[do_residuals ? N : 0] residuals; vector[J - 1] zeta; // xbar is actually post multiplied by R^-1 if (dense_X) zeta = cutpoints + dot_product(xbar, beta); else zeta = cutpoints; if (J == 2) zeta *= -1.0; { #include /model/make_eta.stan for (n in 1 : N) { int y_tilde; vector[J] theta; real previous; real first = CDF_polr(cutpoints[1] - eta[n], link); previous = first; if (is_skewed) theta[1] = first ^ alpha[1]; else theta[1] = first; for (j in 2 : (J - 1)) { real current = CDF_polr(cutpoints[j] - eta[n], link); theta[j] = current - previous; previous = current; } if (is_skewed == 0) theta[J] = 1 - previous; else theta[J] = 1 - previous ^ alpha[1]; if (previous <= 0 || previous >= 1) { // do nothing } else if (J == 2) { mean_PPD[1] += bernoulli_rng(theta[J]); } else { y_tilde = categorical_rng(theta); mean_PPD[y_tilde] += 1; } if (do_residuals) { real ystar; if (y[n] == 1) ystar = draw_ystar_rng(negative_infinity(), cutpoints[1], eta[n], link); else if (y[n] == J) ystar = draw_ystar_rng(cutpoints[J - 1], positive_infinity( ), eta[n], link); else ystar = draw_ystar_rng(cutpoints[y[n] - 1], cutpoints[y[n]], eta[n], link); residuals[n] = ystar - eta[n]; } } mean_PPD /= (N + 0.0); } } rstanarm/tests/testthat/stan_files/count.stan0000644000176200001440000001654714500256225021267 0ustar liggesusers#include /pre/Columbia_copyright.stan #include /pre/license.stan // GLM for a count outcome functions { #include /functions/common_functions.stan #include /functions/count_likelihoods.stan } data { // declares N, K, X, xbar, dense_X, nnz_x, w_x, v_x, u_x #include /data/NKX.stan array[N] int y; // count outcome // declares prior_PD, has_intercept, link, prior_dist, prior_dist_for_intercept #include /data/data_glm.stan // declares has_weights, weights, has_offset, offset_ #include /data/weights_offset.stan int family; // 6 poisson, 7 neg-binom, (8 poisson with gamma noise at some point?) // declares prior_{mean, scale, df}, prior_{mean, scale, df}_for_intercept, prior_{mean, scale, df}_for_aux #include /data/hyperparameters.stan // declares t, p[t], l[t], q, len_theta_L, shape, scale, {len_}concentration, {len_}regularization #include /data/glmer_stuff.stan // declares num_not_zero, w, v, u #include /data/glmer_stuff2.stan } transformed data { real poisson_max = pow(2.0, 30.0); array[special_case ? t : 0, N] int V = make_V(N, special_case ? t : 0, v); int can_do_countlogglm = K != 0 && // remove K!=0 after rstan includes this Stan bugfix: https://github.com/stan-dev/math/issues/1398 link == 1 && prior_PD == 0 && dense_X == 1 && has_weights == 0 && t == 0; matrix[can_do_countlogglm ? N : 0, can_do_countlogglm ? K + K_smooth : 0] XS; // defines hs, len_z_T, len_var_group, delta, pos #include /tdata/tdata_glm.stan if (can_do_countlogglm) { XS = K_smooth > 0 ? append_col(X[1], S) : X[1]; } } parameters { array[has_intercept] real gamma; // declares z_beta, global, local, z_b, z_T, rho, zeta, tau #include /parameters/parameters_glm.stan array[family > 6] real aux_unscaled; array[family == 8] vector[N] noise; // do not store this } transformed parameters { real aux = negative_infinity(); // be careful with this in the family = 6 case // defines beta, b, theta_L #include /tparameters/tparameters_glm.stan if (family > 6 && (prior_dist_for_aux == 0 || prior_scale_for_aux <= 0)) aux = aux_unscaled[1]; else if (family > 6) { aux = prior_scale_for_aux * aux_unscaled[1]; if (prior_dist_for_aux <= 2) // normal or student_t aux += prior_mean_for_aux; } if (t > 0) { if (special_case == 1) { int start = 1; theta_L = scale .* (family == 6 ? tau : tau * aux); if (t == 1) b = theta_L[1] * z_b; else for (i in 1 : t) { int end = start + l[i] - 1; b[start : end] = theta_L[i] * z_b[start : end]; start = end + 1; } } else { if (family == 6) theta_L = make_theta_L(len_theta_L, p, 1.0, tau, scale, zeta, rho, z_T); else theta_L = make_theta_L(len_theta_L, p, aux, tau, scale, zeta, rho, z_T); b = make_b(z_b, theta_L, p, l); } } } model { if (can_do_countlogglm) { vector[K + K_smooth] coeff = K_smooth > 0 ? append_row(beta, beta_smooth) : beta; if (family != 7) { if (has_offset) { target += poisson_log_glm_lpmf(y | XS, has_intercept ? offset_ + gamma[1] : offset_, coeff); } else { target += poisson_log_glm_lpmf(y | XS, has_intercept ? gamma[1] : 0.0, coeff); } } else { if (has_offset) { target += neg_binomial_2_log_glm_lpmf(y | XS, has_intercept ? offset_ + gamma[1] : offset_, coeff, aux); } else { target += neg_binomial_2_log_glm_lpmf(y | XS, has_intercept ? gamma[1] : 0.0, coeff, aux); } } } else if (prior_PD == 0) { #include /model/make_eta.stan if (t > 0) { #include /model/eta_add_Zb.stan } if (has_intercept == 1) { if (link == 1) eta += gamma[1]; else eta += gamma[1] - min(eta); } else { #include /model/eta_no_intercept.stan } if (family == 8) { if (link == 1) eta += log(aux) + log(noise[1]); else if (link == 2) { eta *= aux; eta .*= noise[1]; } else eta += sqrt(aux) + sqrt(noise[1]); } // Log-likelihood if (has_weights == 0) { // unweighted log-likelihoods if (family != 7) { if (link == 1) target += poisson_log_lpmf(y | eta); else target += poisson_lpmf(y | linkinv_count(eta, link)); } else { if (link == 1) target += neg_binomial_2_log_lpmf(y | eta, aux); else target += neg_binomial_2_lpmf(y | linkinv_count(eta, link), aux); } } else if (family != 7) target += dot_product(weights, pw_pois(y, eta, link)); else target += dot_product(weights, pw_nb(y, eta, aux, link)); } // Log-prior for aux if (family > 6 && prior_dist_for_aux > 0 && prior_scale_for_aux > 0) { real log_half = -0.693147180559945286; if (prior_dist_for_aux == 1) target += normal_lpdf(aux_unscaled | 0, 1) - log_half; else if (prior_dist_for_aux == 2) target += student_t_lpdf(aux_unscaled | prior_df_for_aux, 0, 1) - log_half; else target += exponential_lpdf(aux_unscaled | 1); } #include /model/priors_glm.stan // Log-prior for noise if (family == 8) target += gamma_lpdf(noise[1] | aux, 1); if (t > 0) { target += decov_lpdf(z_b | z_T, rho, zeta, tau, regularization, delta, shape, t, p); } } generated quantities { real mean_PPD = compute_mean_PPD ? 0 : negative_infinity(); array[has_intercept] real alpha; if (has_intercept == 1) { if (dense_X) alpha[1] = gamma[1] - dot_product(xbar, beta); else alpha[1] = gamma[1]; } if (compute_mean_PPD) { vector[N] nu; #include /model/make_eta.stan if (t > 0) { #include /model/eta_add_Zb.stan } if (has_intercept == 1) { if (link == 1) eta += gamma[1]; else { real shift = min(eta); eta += gamma[1] - shift; alpha[1] -= shift; } } else { #include /model/eta_no_intercept.stan } if (family == 8) { if (link == 1) eta += log(aux) + log(noise[1]); else if (link == 2) { eta *= aux; eta .*= noise[1]; } else eta += sqrt(aux) + sqrt(noise[1]); } nu = linkinv_count(eta, link); if (family != 7) for (n in 1 : N) { if (nu[n] < poisson_max) mean_PPD += poisson_rng(nu[n]); else mean_PPD += normal_rng(nu[n], sqrt(nu[n])); } else for (n in 1 : N) { real gamma_temp; if (is_inf(aux)) gamma_temp = nu[n]; else gamma_temp = gamma_rng(aux, aux / nu[n]); if (gamma_temp < poisson_max) mean_PPD += poisson_rng(gamma_temp); else mean_PPD += normal_rng(gamma_temp, sqrt(gamma_temp)); } mean_PPD /= N; } } rstanarm/tests/testthat/stan_files/gqs/0000755000176200001440000000000014500256225020025 5ustar liggesusersrstanarm/tests/testthat/stan_files/gqs/gen_quantities_mvmer.stan0000644000176200001440000000564414500256225025152 0ustar liggesusers array[M] real mean_PPD; array[intercept_type[1] > 0] real yAlpha1; array[intercept_type[2] > 0] real yAlpha2; array[intercept_type[3] > 0] real yAlpha3; vector[prior_dist_for_cov == 2 && bK1 > 0 ? size(bCov1_idx) : 0] bCov1; vector[prior_dist_for_cov == 2 && bK2 > 0 ? size(bCov2_idx) : 0] bCov2; vector[bN1 * bK1] b1 = to_vector(bMat1'); // ensures same order as stan_glmer (make_b) vector[bN2 * bK2] b2 = to_vector(bMat2'); // Evaluate mean_PPD { int bMat1_colshift = 0; // column shift in bMat1 int bMat2_colshift = 0; // column shift in bMat2 // Linear predictor for submodel 1 if (M > 0) { vector[yNeta[1]] yEta1 = evaluate_mu( // linear predictor evaluate_eta(yX1, y1_Z1, y1_Z2, y1_Z1_id, y1_Z2_id, yGamma1, yBeta1, bMat1, bMat2, bMat1_colshift, bMat2_colshift, intercept_type[1], y1_offset), family[1], link[1]); mean_PPD[1] = mean_PPD_rng(yEta1, yAux1, family[1]); } // Linear predictor for submodel 2 if (M > 1) { vector[yNeta[2]] yEta2; bMat1_colshift += bK1_len[1]; bMat2_colshift += bK2_len[1]; yEta2 = evaluate_mu(evaluate_eta(yX2, y2_Z1, y2_Z2, y2_Z1_id, y2_Z2_id, yGamma2, yBeta2, bMat1, bMat2, bMat1_colshift, bMat2_colshift, intercept_type[2], y2_offset), family[2], link[2]); mean_PPD[2] = mean_PPD_rng(yEta2, yAux2, family[2]); } // Linear predictor for submodel 3 if (M > 2) { vector[yNeta[3]] yEta3; bMat1_colshift += bK1_len[2]; bMat2_colshift += bK2_len[2]; yEta3 = evaluate_mu(evaluate_eta(yX3, y3_Z1, y3_Z2, y3_Z1_id, y3_Z2_id, yGamma3, yBeta3, bMat1, bMat2, bMat1_colshift, bMat2_colshift, intercept_type[3], y3_offset), family[3], link[3]); mean_PPD[3] = mean_PPD_rng(yEta3, yAux3, family[3]); } } // Transform intercept parameters if (intercept_type[1] > 0) yAlpha1[1] = yGamma1[1] - dot_product(yXbar1, yBeta1); if (M > 1 && intercept_type[2] > 0) yAlpha2[1] = yGamma2[1] - dot_product(yXbar2, yBeta2); if (M > 2 && intercept_type[3] > 0) yAlpha3[1] = yGamma3[1] - dot_product(yXbar3, yBeta3); // Transform variance-covariance matrices // Grouping factor 1 if (prior_dist_for_cov == 2 && bK1 == 1) { bCov1[1] = bSd1[1] * bSd1[1]; } else if (prior_dist_for_cov == 2 && bK1 > 1) { bCov1 = to_vector(quad_form_diag( multiply_lower_tri_self_transpose(bCholesky1), bSd1))[bCov1_idx]; } // Grouping factor 2 if (prior_dist_for_cov == 2 && bK2 == 1) { bCov2[1] = bSd2[1] * bSd2[1]; } else if (prior_dist_for_cov == 2 && bK2 > 1) { bCov2 = to_vector(quad_form_diag( multiply_lower_tri_self_transpose(bCholesky2), bSd2))[bCov2_idx]; } rstanarm/tests/testthat/stan_files/parameters/0000755000176200001440000000000014500256225021376 5ustar liggesusersrstanarm/tests/testthat/stan_files/parameters/parameters_event.stan0000644000176200001440000000125314500256225025632 0ustar liggesusers array[e_has_intercept] real e_gamma; // intercept for event submodel vector[e_K] e_z_beta; // primitive log hazard ratios // unscaled basehaz params, either: // - weibull shape parameter // - b-spline coefs on log basehaz // - coefs for piecewise constant basehaz vector[basehaz_df] e_aux_unscaled; // parameters for priors on log haz ratios array[e_hs] real e_global; array[e_hs] vector[(e_hs>0)*e_K] e_local; array[e_hs > 0] real e_caux; array[e_prior_dist == 5 || e_prior_dist == 6] vector[e_K] e_mix; array[e_prior_dist == 6] real e_ool; rstanarm/tests/testthat/stan_files/parameters/parameters_glm.stan0000644000176200001440000000104214500256225025264 0ustar liggesusers vector[prior_dist == 7 ? sum(num_normals) : K] z_beta; vector[K_smooth] z_beta_smooth; vector[K_smooth > 0 ? smooth_map[K_smooth] : 0] smooth_sd_raw; array[hs] real global; array[hs] vector[K] local; array[hs > 0] real caux; array[prior_dist == 5 || prior_dist == 6] vector[K] mix; array[prior_dist == 6] real one_over_lambda; vector[q] z_b; vector[len_z_T] z_T; vector[len_rho] rho; vector[len_concentration] zeta; vector[t] tau; rstanarm/tests/testthat/stan_files/parameters/parameters_mvmer.stan0000644000176200001440000000507614500256225025646 0ustar liggesusers // intercepts array[intercept_type[1] > 0] real yGamma1; array[intercept_type[2] > 0] real yGamma2; array[intercept_type[3] > 0] real yGamma3; // population level primitive params vector[yK[1]] z_yBeta1; vector[yK[2]] z_yBeta2; vector[yK[3]] z_yBeta3; // group level params, decov prior vector[prior_dist_for_cov == 1 ? q : 0] z_b; vector[prior_dist_for_cov == 1 ? len_z_T : 0] z_T; vector[prior_dist_for_cov == 1 ? len_rho : 0] rho; vector[prior_dist_for_cov == 1 ? len_concentration : 0] zeta; vector[prior_dist_for_cov == 1 ? t : 0] tau; // group level params for first grouping factor // group-level sds vector[prior_dist_for_cov == 2 ? bK1 : 0] bSd1; // unscaled group-level params matrix[prior_dist_for_cov == 2 && bK1 > 0 ? bK1 : 0, bK1 > 0 ? bN1 : 0] z_bMat1; // cholesky factor of corr matrix (if > 1 random effect) cholesky_factor_corr[prior_dist_for_cov == 2 && bK1 > 1 ? bK1 : 0] bCholesky1; // group level params for second grouping factor // group-level sds vector[prior_dist_for_cov == 2 ? bK2 : 0] bSd2; // unscaled group-level params matrix[prior_dist_for_cov == 2 && bK2 > 0 ? bK2 : 0, bK2 > 0 ? bN2 : 0] z_bMat2; // cholesky factor of corr matrix (if > 1 random effect) cholesky_factor_corr[prior_dist_for_cov == 2 && bK2 > 1 ? bK2 : 0] bCholesky2; // auxiliary params, interpretation depends on family array[has_aux[1]] real yAux1_unscaled; array[has_aux[2]] real yAux2_unscaled; array[has_aux[3]] real yAux3_unscaled; // params for priors array[yHs1] real yGlobal1; array[yHs2] real yGlobal2; array[yHs3] real yGlobal3; array[yHs1] vector[yK[1]] yLocal1; array[yHs2] vector[yK[2]] yLocal2; array[yHs3] vector[yK[3]] yLocal3; array[yHs1 > 0] real y_caux1; array[yHs2 > 0] real y_caux2; array[yHs3 > 0] real y_caux3; array[y_prior_dist[1] == 6] real yOol1; // one_over_lambda array[y_prior_dist[2] == 6] real yOol2; array[y_prior_dist[3] == 6] real yOol3; array[y_prior_dist[1] == 5 || y_prior_dist[1] == 6] vector[yK[1]] yMix1; array[y_prior_dist[2] == 5 || y_prior_dist[2] == 6] vector[yK[2]] yMix2; array[y_prior_dist[3] == 5 || y_prior_dist[3] == 6] vector[yK[3]] yMix3; rstanarm/tests/testthat/stan_files/parameters/parameters_assoc.stan0000644000176200001440000000053514500256225025623 0ustar liggesusers vector[a_K] a_z_beta; // primitive assoc params // parameters for priors on assoc params array[a_hs] real a_global; array[a_hs] vector[(a_hs>0)*a_K] a_local; array[a_hs > 0] real a_caux; array[a_prior_dist == 5 || a_prior_dist == 6] vector[a_K] a_mix; array[a_prior_dist == 6] real a_ool; rstanarm/tests/testthat/stan_files/parameters/parameters_betareg.stan0000644000176200001440000000072714500256225026127 0ustar liggesusers vector[prior_dist_z == 7 ? sum(num_normals_z) : z_dim] z_omega; // betareg z variable coefficients array[has_intercept_z] real gamma_z; // betareg intercept array[hs_z] real global_z; array[hs_z] vector[z_dim] local_z; array[hs_z > 0] real caux_z; array[prior_dist_z == 5 || prior_dist_z == 6] vector[z_dim] S_z; array[prior_dist_z == 6] real one_over_lambda_z; rstanarm/tests/testthat/stan_files/tparameters/0000755000176200001440000000000014500256225021562 5ustar liggesusersrstanarm/tests/testthat/stan_files/tparameters/tparameters_glm.stan0000644000176200001440000000327414370470367025657 0ustar liggesusers vector[K] beta; vector[K_smooth] beta_smooth; vector[K_smooth > 0 ? smooth_map[K_smooth] : 0] smooth_sd; vector[q] b; vector[len_theta_L] theta_L; if (prior_dist == 0) beta = z_beta; else if (prior_dist == 1) beta = z_beta .* prior_scale + prior_mean; else if (prior_dist == 2) for (k in 1:K) { beta[k] = CFt(z_beta[k], prior_df[k]) * prior_scale[k] + prior_mean[k]; } else if (prior_dist == 3) { real c2 = square(slab_scale) * caux[1]; if (is_continuous == 1 && family == 1) beta = hs_prior(z_beta, global, local, global_prior_scale, aux, c2); else beta = hs_prior(z_beta, global, local, global_prior_scale, 1, c2); } else if (prior_dist == 4) { real c2 = square(slab_scale) * caux[1]; if (is_continuous == 1 && family == 1) beta = hsplus_prior(z_beta, global, local, global_prior_scale, aux, c2); else beta = hsplus_prior(z_beta, global, local, global_prior_scale, 1, c2); } else if (prior_dist == 5) // laplace beta = prior_mean + prior_scale .* sqrt(2 * mix[1]) .* z_beta; else if (prior_dist == 6) // lasso beta = prior_mean + one_over_lambda[1] * prior_scale .* sqrt(2 * mix[1]) .* z_beta; else if (prior_dist == 7) { // product_normal int z_pos = 1; for (k in 1:K) { beta[k] = z_beta[z_pos]; z_pos += 1; for (n in 2:num_normals[k]) { beta[k] *= z_beta[z_pos]; z_pos += 1; } beta[k] *= prior_scale[k] ^ num_normals[k]; beta[k] += prior_mean[k]; } } if (K_smooth) { smooth_sd = prior_mean_for_smooth + prior_scale_for_smooth .* smooth_sd_raw; if (is_continuous && family == 1) smooth_sd *= aux; beta_smooth = z_beta_smooth .* smooth_sd[smooth_map]; } rstanarm/tests/testthat/stan_files/tparameters/tparameters_betareg.stan0000644000176200001440000000222313365374540026501 0ustar liggesusers if (prior_dist_z == 0) omega = z_omega; else if (prior_dist_z == 1) omega = z_omega .* prior_scale_z + prior_mean_z; else if (prior_dist_z == 2) for (k in 1:z_dim) { real left = CFt(omega[k], prior_df_z[k]); omega[k] = left * prior_scale_z[k] + prior_mean_z[k]; } else if (prior_dist_z == 3) omega = hs_prior(z_omega, global_z, local_z, global_prior_scale, 1, square(slab_scale_z) * caux_z[1]); else if (prior_dist_z == 4) omega = hsplus_prior(z_omega, global_z, local_z, global_prior_scale, 1, square(slab_scale_z) * caux_z[1]); else if (prior_dist_z == 5) omega = prior_mean_z + prior_scale_z .* sqrt(2 * S_z[1]) .* z_omega; else if (prior_dist_z == 6) omega = prior_mean_z + one_over_lambda_z[1] * prior_scale_z .* sqrt(2 * S_z[1]) .* z_omega; else if (prior_dist_z == 7) { int z_pos = 1; for (k in 1:z_dim) { omega[k] = z_omega[z_pos]; z_pos += 1; for (n in 2:num_normals_z[k]) { omega[k] *= z_omega[z_pos]; z_pos += 1; } omega[k] *= prior_scale_z[k] ^ num_normals_z[k]; omega[k] += prior_mean_z[k]; } } rstanarm/tests/testthat/stan_files/tparameters/tparameters_mvmer.stan0000644000176200001440000000606414500256225026214 0ustar liggesusers vector[yK[1]] yBeta1; // population level params vector[yK[2]] yBeta2; vector[yK[3]] yBeta3; array[has_aux[1]] real yAux1; // auxiliary params array[has_aux[2]] real yAux2; array[has_aux[3]] real yAux3; vector[len_theta_L] theta_L; // cov matrix for decov prior real yAuxMaximum = 1.0; // used for scaling in theta_L // group level params matrix[bK1 > 0 ? bN1 : 0, bK1] bMat1; // for grouping factor 1 matrix[bK2 > 0 ? bN2 : 0, bK2] bMat2; // for grouping factor 2 // population level params, auxiliary params if (has_aux[1] == 1) { yAux1[1] = make_aux(yAux1_unscaled[1], y_prior_dist_for_aux[1], y_prior_mean_for_aux[1], y_prior_scale_for_aux[1]); if (yAux1[1] > yAuxMaximum) yAuxMaximum = yAux1[1]; } if (yK[1] > 0) yBeta1 = make_beta(z_yBeta1, y_prior_dist[1], y_prior_mean1, y_prior_scale1, y_prior_df1, y_global_prior_scale[1], yGlobal1, yLocal1, yOol1, yMix1, yAux1, family[1], y_slab_scale[1], y_caux1); if (M > 1) { if (has_aux[2] == 1) { yAux2[1] = make_aux(yAux2_unscaled[1], y_prior_dist_for_aux[2], y_prior_mean_for_aux[2], y_prior_scale_for_aux[2]); if (yAux2[1] > yAuxMaximum) yAuxMaximum = yAux2[1]; } if (yK[2] > 0) yBeta2 = make_beta(z_yBeta2, y_prior_dist[2], y_prior_mean2, y_prior_scale2, y_prior_df2, y_global_prior_scale[2], yGlobal2, yLocal2, yOol2, yMix2, yAux2, family[2], y_slab_scale[2], y_caux2); } if (M > 2) { if (has_aux[3] == 1) { yAux3[1] = make_aux(yAux3_unscaled[1], y_prior_dist_for_aux[3], y_prior_mean_for_aux[3], y_prior_scale_for_aux[3]); if (yAux3[1] > yAuxMaximum) yAuxMaximum = yAux3[1]; } if (yK[3] > 0) yBeta3 = make_beta(z_yBeta3, y_prior_dist[3], y_prior_mean3, y_prior_scale3, y_prior_df3, y_global_prior_scale[3], yGlobal3, yLocal3, yOol3, yMix3, yAux3, family[3], y_slab_scale[3], y_caux3); } // group level params, under decov prior if (prior_dist_for_cov == 1) { int mark = 1; // cov matrix theta_L = make_theta_L(len_theta_L, p, yAuxMaximum, tau, b_prior_scale, zeta, rho, z_T); // group-level params for first grouping factor if (bK1 > 0) bMat1 = make_b_matrix(z_b, theta_L, p, l, 1); // group level params for second grouping factor if (bK2 > 0) bMat2 = make_b_matrix(z_b, theta_L, p, l, 2); } // group-level params, under lkj prior else if (prior_dist_for_cov == 2) { // group-level params for first grouping factor if (bK1 == 1) bMat1 = (bSd1[1] * z_bMat1)'; else if (bK1 > 1) bMat1 = (diag_pre_multiply(bSd1, bCholesky1) * z_bMat1)'; // group level params for second grouping factor if (bK2 == 1) bMat2 = (bSd2[1] * z_bMat2)'; else if (bK2 > 1) bMat2 = (diag_pre_multiply(bSd2, bCholesky2) * z_bMat2)'; } rstanarm/tests/testthat/stan_files/continuous.stan0000644000176200001440000003604314500256225022336 0ustar liggesusers#include /pre/Columbia_copyright.stan #include /pre/license.stan // GLM for a Gaussian, Gamma, inverse Gaussian, or Beta outcome functions { #include /functions/common_functions.stan #include /functions/continuous_likelihoods.stan #include /functions/SSfunctions.stan /** * Increments the log-posterior with the logarithm of a multivariate normal * likelihood with a scalar standard deviation for all errors * Equivalent to normal_lpdf(y | intercept + X * beta + Z * b, sigma) but faster * @param coeff vector of coefficients (including intercept) * @param OLS precomputed vector of OLS coefficients (including intercept) * @param XtX precomputed matrix equal to crossprod(X) (including intercept) * @param SSR positive precomputed value of the sum of squared OLS residuals * @param sigma positive scalar for the standard deviation of the errors * @param N integer equal to the number of observations */ real mvn_ols_lpdf(vector coeff, vector OLS, matrix XtX, real SSR, real sigma, int N) { return -0.5 * (quad_form(XtX, coeff - OLS) + SSR) / square(sigma) - N * (log(sigma) + log(sqrt(2 * pi()))); } vector CODOLS(matrix X, vector y); // implemented in C++ } data { // declares N, K, X, xbar, dense_X, nnz_x, w_x, v_x, u_x #include /data/NKX.stan int len_y; // length of y real lb_y; // lower bound on y real ub_y; // upper bound on y vector[len_y] y; // continuous outcome int family; // 1 gaussian, 2 gamma, 3 inv-gaussian, 4 beta // declares prior_PD, has_intercept, link, prior_dist, prior_dist_for_intercept #include /data/data_glm.stan // declares has_weights, weights, has_offset, offset #include /data/weights_offset.stan // declares prior_{mean, scale, df}, prior_{mean, scale, df}_for_intercept, prior_{mean, scale, df}_for_aux #include /data/hyperparameters.stan // declares t, p[t], l[t], q, len_theta_L, shape, scale, {len_}concentration, {len_}regularization #include /data/glmer_stuff.stan // declares num_not_zero, w, v, u #include /data/glmer_stuff2.stan #include /data/data_betareg.stan int SSfun; // nonlinear function indicator, 0 for identity vector[SSfun > 0 ? len_y : 0] input; vector[SSfun == 5 ? len_y : 0] Dose; } transformed data { vector[family == 3 ? len_y : 0] sqrt_y; vector[family == 3 ? len_y : 0] log_y; real sum_log_y = family == 1 ? not_a_number() : sum(log(y)); array[special_case ? t : 0, len_y] int V = make_V(len_y, special_case ? t : 0, v); int hs_z; // for tdata_betareg.stan int can_do_OLS = family == 1 && link == 1 && SSfun == 0 && has_offset == 0 && t == 0 && prior_PD == 0 && dense_X && N > 2 && len_y >= (has_intercept + K + K_smooth); vector[can_do_OLS ? has_intercept + K + K_smooth : 0] OLS; matrix[can_do_OLS ? has_intercept + K + K_smooth : 0, can_do_OLS ? has_intercept + K + K_smooth : 0] XtX; int can_do_normalidglm = K != 0 && // remove K!=0 after rstan includes this Stan bugfix: https://github.com/stan-dev/math/issues/1398 can_do_OLS == 0 && family == 1 && link == 1 && SSfun == 0 && has_offset == 0 && dense_X && prior_PD == 0 && t == 0 && len_y < (has_intercept + K + K_smooth); matrix[can_do_normalidglm ? N : 0, can_do_normalidglm ? K + K_smooth : 0] XS; real SSR = not_a_number(); // defines hs, len_z_T, len_var_group, delta, is_continuous, pos #include /tdata/tdata_glm.stan // defines hs_z #include /tdata/tdata_betareg.stan is_continuous = 1; if (family == 3) { sqrt_y = sqrt(y); log_y = log(y); } if (can_do_OLS) { matrix[N, has_intercept + K + K_smooth] X_ = has_intercept ? append_col(rep_vector( 1.0, N), (K_smooth > 0 ? append_col( X[1], S) : X[1])) : (K_smooth > 0 ? append_col(X[1], S) : X[1]); XtX = crossprod(X_); OLS = CODOLS(X_, y); SSR = dot_self(y - X_ * OLS); } if (can_do_normalidglm) { XS = K_smooth > 0 ? append_col(X[1], S) : X[1]; } } parameters { array[has_intercept] real gamma; // declares z_beta, global, local, z_b, z_T, rho, zeta, tau #include /parameters/parameters_glm.stan real aux_unscaled; // interpretation depends on family! #include /parameters/parameters_betareg.stan } transformed parameters { // aux has to be defined first in the hs case real aux = prior_dist_for_aux == 0 ? aux_unscaled : (prior_dist_for_aux <= 2 ? prior_scale_for_aux * aux_unscaled + prior_mean_for_aux : prior_scale_for_aux * aux_unscaled); vector[z_dim] omega; // used in tparameters_betareg.stan // defines beta, b, theta_L #include /tparameters/tparameters_glm.stan #include /tparameters/tparameters_betareg.stan if (prior_dist_for_aux == 0) // none aux = aux_unscaled; else { aux = prior_scale_for_aux * aux_unscaled; if (prior_dist_for_aux <= 2) // normal or student_t aux += prior_mean_for_aux; } if (t > 0) { if (special_case == 1) { int start = 1; theta_L = scale .* tau * aux; if (t == 1) b = theta_L[1] * z_b; else for (i in 1 : t) { int end = start + l[i] - 1; b[start : end] = theta_L[i] * z_b[start : end]; start = end + 1; } } else { theta_L = make_theta_L(len_theta_L, p, aux, tau, scale, zeta, rho, z_T); b = make_b(z_b, theta_L, p, l); } } } model { if (can_do_OLS) { vector[cols(XtX)] coeff = has_intercept ? append_row(to_vector(gamma), (K_smooth > 0 ? append_row(beta, beta_smooth) : beta)) : (K_smooth > 0 ? append_row(beta, beta_smooth) : beta); target += mvn_ols_lpdf(coeff | OLS, XtX, SSR, aux, N); } else if (can_do_normalidglm) { vector[K + K_smooth] coeff = K_smooth > 0 ? append_row(beta, beta_smooth) : beta; target += normal_id_glm_lpdf(y | XS, has_intercept ? gamma[1] : 0.0, coeff, aux); } else if (prior_PD == 0) { vector[link_phi > 0 ? N : 0] eta_z; // beta regression linear predictor for phi #include /model/make_eta.stan if (t > 0) { #include /model/eta_add_Zb.stan } if (has_intercept == 1) { if ((family == 1 || link == 2) || (family == 4 && link != 5)) eta += gamma[1]; else if (family == 4 && link == 5) eta += gamma[1] - max(eta); else eta += gamma[1] - min(eta); } else { #include /model/eta_no_intercept.stan } if (SSfun > 0) { // nlmer matrix[len_y, K] P = reshape_vec(eta, len_y, K); if (SSfun < 5) { if (SSfun <= 2) { if (SSfun == 1) target += normal_lpdf(y | SS_asymp(input, P), aux); else target += normal_lpdf(y | SS_asympOff(input, P), aux); } else if (SSfun == 3) target += normal_lpdf(y | SS_asympOrig(input, P), aux); else { for (i in 1 : len_y) P[i, 1] += exp(P[i, 3]); // ordering constraint target += normal_lpdf(y | SS_biexp(input, P), aux); } } else { if (SSfun <= 7) { if (SSfun == 5) target += normal_lpdf(y | SS_fol(Dose, input, P), aux); else if (SSfun == 6) target += normal_lpdf(y | SS_fpl(input, P), aux); else target += normal_lpdf(y | SS_gompertz(input, P), aux); } else { if (SSfun == 8) target += normal_lpdf(y | SS_logis(input, P), aux); else if (SSfun == 9) target += normal_lpdf(y | SS_micmen(input, P), aux); else target += normal_lpdf(y | SS_weibull(input, P), aux); } } } else if (has_weights == 0) { // unweighted log-likelihoods #include /model/make_eta_z.stan // adjust eta_z according to links if (has_intercept_z == 1) { if (link_phi > 1) { eta_z += gamma_z[1] - min(eta_z); } else { eta_z += gamma_z[1]; } } else { // has_intercept_z == 0 #include /model/eta_z_no_intercept.stan } if (family == 1) { if (link == 1) target += normal_lpdf(y | eta, aux); else if (link == 2) target += normal_lpdf(y | exp(eta), aux); else target += normal_lpdf(y | inv(eta), aux); } else if (family == 2) { target += GammaReg(y, eta, aux, link, sum_log_y); } else if (family == 3) { target += inv_gaussian(y, linkinv_inv_gaussian(eta, link), aux, sum_log_y, sqrt_y); } else if (family == 4 && link_phi == 0) { vector[N] mu; mu = linkinv_beta(eta, link); target += beta_lpdf(y | mu * aux, (1 - mu) * aux); } else if (family == 4 && link_phi > 0) { vector[N] mu; vector[N] mu_z; mu = linkinv_beta(eta, link); mu_z = linkinv_beta_z(eta_z, link_phi); target += beta_lpdf(y | rows_dot_product(mu, mu_z), rows_dot_product(( 1 - mu), mu_z)); } } else { // weighted log-likelihoods vector[N] summands; if (family == 1) summands = pw_gauss(y, eta, aux, link); else if (family == 2) summands = pw_gamma(y, eta, aux, link); else if (family == 3) summands = pw_inv_gaussian(y, eta, aux, link, log_y, sqrt_y); else if (family == 4 && link_phi == 0) summands = pw_beta(y, eta, aux, link); else if (family == 4 && link_phi > 0) summands = pw_beta_z(y, eta, eta_z, link, link_phi); target += dot_product(weights, summands); } } // Log-priors if (prior_dist_for_aux > 0 && prior_scale_for_aux > 0) { real log_half = -0.693147180559945286; if (prior_dist_for_aux == 1) target += normal_lpdf(aux_unscaled | 0, 1) - log_half; else if (prior_dist_for_aux == 2) target += student_t_lpdf(aux_unscaled | prior_df_for_aux, 0, 1) - log_half; else target += exponential_lpdf(aux_unscaled | 1); } #include /model/priors_glm.stan #include /model/priors_betareg.stan if (t > 0) { target += decov_lpdf(z_b | z_T, rho, zeta, tau, regularization, delta, shape, t, p); } } generated quantities { real mean_PPD = compute_mean_PPD ? 0 : negative_infinity(); array[has_intercept] real alpha; array[has_intercept_z] real omega_int; if (has_intercept == 1) { if (dense_X) alpha[1] = gamma[1] - dot_product(xbar, beta); else alpha[1] = gamma[1]; } if (has_intercept_z == 1) { omega_int[1] = gamma_z[1] - dot_product(zbar, omega); // adjust betareg intercept } if (compute_mean_PPD) { vector[N] eta_z; #include /model/make_eta.stan if (t > 0) { #include /model/eta_add_Zb.stan } if (has_intercept == 1) { if (make_lower(family, link) == negative_infinity() && make_upper(family, link) == positive_infinity()) eta += gamma[1]; else if (family == 4 && link == 5) { real max_eta = max(eta); alpha[1] -= max_eta; eta += gamma[1] - max_eta; } else { real min_eta = min(eta); alpha[1] -= min_eta; eta += gamma[1] - min_eta; } } else { #include /model/eta_no_intercept.stan } #include /model/make_eta_z.stan // adjust eta_z according to links if (has_intercept_z == 1) { if (link_phi > 1) { omega_int[1] -= min(eta_z); eta_z += gamma_z[1] - min(eta_z); } else { eta_z += gamma_z[1]; } } else { // has_intercept_z == 0 #include /model/eta_z_no_intercept.stan } if (SSfun > 0) { // nlmer vector[len_y] eta_nlmer; matrix[len_y, K] P; P = reshape_vec(eta, len_y, K); if (SSfun < 5) { if (SSfun <= 2) { if (SSfun == 1) eta_nlmer = SS_asymp(input, P); else eta_nlmer = SS_asympOff(input, P); } else if (SSfun == 3) eta_nlmer = SS_asympOrig(input, P); else eta_nlmer = SS_biexp(input, P); } else { if (SSfun <= 7) { if (SSfun == 5) eta_nlmer = SS_fol(Dose, input, P); else if (SSfun == 6) eta_nlmer = SS_fpl(input, P); else eta_nlmer = SS_gompertz(input, P); } else { if (SSfun == 8) eta_nlmer = SS_logis(input, P); else if (SSfun == 9) eta_nlmer = SS_micmen(input, P); else eta_nlmer = SS_weibull(input, P); } } for (n in 1 : len_y) mean_PPD += normal_rng(eta_nlmer[n], aux); } else if (family == 1) { vector[N] mu = link > 1 ? linkinv_gauss(eta, link) : eta; for (n in 1 : len_y) mean_PPD += normal_rng(mu[n], aux); } else if (family == 2) { vector[N] mu = link > 1 ? linkinv_gamma(eta, link) : eta; for (n in 1 : len_y) mean_PPD += gamma_rng(aux, aux / mu[n]); } else if (family == 3) { vector[N] mu = link > 1 ? linkinv_inv_gaussian(eta, link) : eta; for (n in 1 : len_y) mean_PPD += inv_gaussian_rng(mu[n], aux); } else if (family == 4 && link_phi == 0) { vector[N] mu = linkinv_beta(eta, link); for (n in 1 : N) { real mu_n = mu[n]; if (aux <= 0) mean_PPD += bernoulli_rng(0.5); else if (mu_n >= 1) mean_PPD += 1; else if (mu_n > 0) mean_PPD += beta_rng(mu_n * aux, (1 - mu_n) * aux); } } else if (family == 4 && link_phi > 0) { vector[N] mu = linkinv_beta(eta, link); vector[N] phi = linkinv_beta_z(eta_z, link_phi); for (n in 1 : N) { real mu_n = mu[n]; real aux_n = phi[n]; if (aux_n <= 0) mean_PPD += bernoulli_rng(0.5); else if (mu_n >= 1) mean_PPD += 1; else if (mu_n > 0) mean_PPD += beta_rng(mu_n * aux_n, (1 - mu_n) * aux_n); } } mean_PPD /= len_y; } } rstanarm/tests/testthat/stan_files/tdata/0000755000176200001440000000000014500256225020330 5ustar liggesusersrstanarm/tests/testthat/stan_files/tdata/tdata_glm.stan0000644000176200001440000000110614500256225023151 0ustar liggesusers int len_z_T = 0; int len_var_group = sum(p) * (t > 0); int len_rho = sum(p) - t; int is_continuous = 0; // changed in continuous.stan int pos = 1; array[len_concentration] real delta; int hs; if (prior_dist <= 2) hs = 0; else if (prior_dist == 3) hs = 2; else if (prior_dist == 4) hs = 4; else hs = 0; for (i in 1:t) { if (p[i] > 1) { for (j in 1:p[i]) { delta[pos] = concentration[j]; pos += 1; } } for (j in 3:p[i]) len_z_T += p[i] - 1; } rstanarm/tests/testthat/stan_files/tdata/tdata_mvmer.stan0000644000176200001440000000377514500256225023536 0ustar liggesusers // dimensions for hs priors int yHs1 = get_nvars_for_hs(M > 0 ? y_prior_dist[1] : 0); int yHs2 = get_nvars_for_hs(M > 1 ? y_prior_dist[2] : 0); int yHs3 = get_nvars_for_hs(M > 2 ? y_prior_dist[3] : 0); // data for decov prior int len_z_T = 0; int len_var_group = sum(p) * (t > 0); int len_rho = sum(p) - t; int pos = 1; array[len_concentration] real delta; // data for lkj prior array[prior_dist_for_cov == 2 ? (bK1 + choose(bK1, 2)) : 0] int bCov1_idx; array[prior_dist_for_cov == 2 ? (bK2 + choose(bK2, 2)) : 0] int bCov2_idx; // transformations of data real sum_log_y1 = M > 0 && (family[1] == 2 || family[1] == 3) ? sum(log(yReal1)) : not_a_number(); real sum_log_y2 = M > 1 && (family[2] == 2 || family[2] == 3) ? sum(log(yReal2)) : not_a_number(); real sum_log_y3 = M > 2 && (family[3] == 2 || family[3] == 3) ? sum(log(yReal3)) : not_a_number(); vector[M > 0 && family[1] == 3 ? yNobs[1] : 0] sqrt_y1; vector[M > 1 && family[2] == 3 ? yNobs[2] : 0] sqrt_y2; vector[M > 2 && family[3] == 3 ? yNobs[3] : 0] sqrt_y3; vector[M > 0 && family[1] == 3 ? yNobs[1] : 0] log_y1; vector[M > 1 && family[2] == 3 ? yNobs[2] : 0] log_y2; vector[M > 2 && family[3] == 3 ? yNobs[3] : 0] log_y3; if (M > 0 && family[1] == 3) { sqrt_y1 = sqrt(yReal1); log_y1 = log(yReal1); } if (M > 1 && family[2] == 3) { sqrt_y2 = sqrt(yReal2); log_y2 = log(yReal2); } if (M > 2 && family[3] == 3) { sqrt_y3 = sqrt(yReal3); log_y3 = log(yReal3); } // data for decov prior if (prior_dist_for_cov == 1) { for (i in 1:t) { if (p[i] > 1) { for (j in 1:p[i]) { delta[pos] = b_prior_concentration[j]; pos += 1; } } for (j in 3:p[i]) len_z_T += p[i] - 1; } } // data for lkj prior if (prior_dist_for_cov == 2) { if (bK1 > 0) bCov1_idx = lower_tri_indices(bK1); if (bK2 > 0) bCov2_idx = lower_tri_indices(bK2); } rstanarm/tests/testthat/stan_files/tdata/tdata_betareg.stan0000644000176200001440000000020413340675562024013 0ustar liggesusers if (prior_dist_z <= 2) hs_z = 0; else if (prior_dist_z == 3) hs_z = 2; else if (prior_dist_z == 4) hs_z = 4; else hs_z = 0; rstanarm/tests/testthat/stan_files/data/0000755000176200001440000000000014500256225020144 5ustar liggesusersrstanarm/tests/testthat/stan_files/data/glmer_stuff2.stan0000644000176200001440000000066514500256225023441 0ustar liggesusers int num_non_zero; // number of non-zero elements in the Z matrix vector[num_non_zero] w; // non-zero elements in the implicit Z matrix array[num_non_zero] int v; // column indices for w array[t > 0 ? N + 1 : 0] int u; // where the non-zeros start in each row int special_case; // is the only term (1|group) rstanarm/tests/testthat/stan_files/data/dimensions_mvmer.stan0000644000176200001440000000265414500256225024420 0ustar liggesusers // population level dimensions int M; // num submodels with data (limit of 3) array[3] int has_aux; // has auxiliary param int has_weights; // has observation weights array[3] int resp_type; // 1=real,2=integer,0=none array[3] int intercept_type; // 1=unbounded,2=lob,3=upb,0=none array[3] int yNobs; // num observations array[3] int yNeta; // required length of eta array[3] int yK; // num predictors // group level dimensions, for decov prior int t; // num. terms (maybe 0) with a | in the glmer formula array[t] int p; // num. variables on the LHS of each | array[t] int l; // num. levels for the factor(s) on the RHS of each | int q; // conceptually equals \sum_{i=1}^t p_i \times l_i int len_theta_L; // length of the theta_L vector // group level dimensions, for lkj prior // group factor 1 int bN1; // num groups int bK1; // total num params array[3] int bK1_len; // num params in each submodel array[3,2] int bK1_idx; // beg/end index for group params // group factor 2 int bN2; // num groups int bK2; // total num params array[3] int bK2_len; // num params in each submodel array[3,2] int bK2_idx; // beg/end index for group params rstanarm/tests/testthat/stan_files/data/data_betareg.stan0000644000176200001440000000162614500256225023442 0ustar liggesusers // betareg data int has_intercept_z; // presence of z intercept int link_phi; // link transformation for eta_z (0 => no z in model) int z_dim; // dimensions of z vars matrix[N, z_dim] betareg_z; // matrix of z vars row_vector[z_dim] zbar; // mean of predictors // betareg hyperparameters int prior_dist_z; int prior_dist_for_intercept_z; vector[z_dim] prior_scale_z; real prior_scale_for_intercept_z; vector[z_dim] prior_mean_z; real prior_mean_for_intercept_z; vector[z_dim] prior_df_z; real prior_df_for_intercept_z; real global_prior_scale_z; real global_prior_df_z; real slab_df_z; real slab_scale_z; array[prior_dist_z == 7 ? z_dim : 0] int num_normals_z; rstanarm/tests/testthat/stan_files/data/glmer_stuff.stan0000644000176200001440000000144114500256225023350 0ustar liggesusers // glmer stuff, see table 3 of // https://cran.r-project.org/web/packages/lme4/vignettes/lmer.pdf int t; // num. terms (maybe 0) with a | in the glmer formula array[t] int p; // num. variables on the LHS of each | array[t] int l; // num. levels for the factor(s) on the RHS of each | int q; // conceptually equals \sum_{i=1}^t p_i \times l_i int len_theta_L; // length of the theta_L vector // hyperparameters for glmer stuff; if t > 0 priors are mandatory vector[t] shape; vector[t] scale; int len_concentration; array[len_concentration] real concentration; int len_regularization; array[len_regularization] real regularization; rstanarm/tests/testthat/stan_files/data/hyperparameters_event.stan0000644000176200001440000000133013365374540025455 0ustar liggesusers // hyperparameter values are set to 0 if there is no prior vector[e_K] e_prior_mean; real e_prior_mean_for_intercept; vector[basehaz_df] e_prior_mean_for_aux; vector[e_K] e_prior_scale; real e_prior_scale_for_intercept; vector[basehaz_df] e_prior_scale_for_aux; vector[e_K] e_prior_df; real e_prior_df_for_intercept; vector[basehaz_df] e_prior_df_for_aux; real e_global_prior_scale; // for hs priors only real e_global_prior_df; real e_slab_df; real e_slab_scale; rstanarm/tests/testthat/stan_files/data/data_assoc.stan0000644000176200001440000002062114500256225023135 0ustar liggesusers // prior family: 0 = none, 1 = normal, 2 = student_t, 3 = hs, 4 = hs_plus, // 5 = laplace, 6 = lasso int a_prior_dist; //--- dimensions for association structure // num. of association parameters int a_K; // used for centering assoc terms vector[a_K] a_xbar; // used for scaling assoc terms vector[a_K] a_scale; // 0 = no assoc structure, 1 = any assoc structure int assoc; // which components are required to build association terms array[6,3] int assoc_uses; // which association terms does each submodel use array[16,M] int has_assoc; // num. of shared random effects int sum_size_which_b; // num. of shared random effects for each long submodel array[M] int size_which_b; // which random effects are shared for each long submodel array[sum_size_which_b] int which_b_zindex; // num. of shared random effects incl fixed component int sum_size_which_coef; // num. of shared random effects incl fixed component for each long submodel array[M] int size_which_coef; // which random effects are shared incl fixed component array[sum_size_which_coef] int which_coef_zindex; // which fixed effects are shared array[sum_size_which_coef] int which_coef_xindex; // total num pars used in assoc*assoc interactions int sum_size_which_interactions; // num pars used in assoc*assoc interactions, by submodel // and by evev/evmv/mvev/mvmv interactions array[M*4] int size_which_interactions; // which terms to interact with array[sum_size_which_interactions] int which_interactions; //---- data for calculating eta in GK quadrature array[3] int nrow_y_Xq; // num. rows in long. predictor matrix at quadpoints // fe design matrix at quadpoints matrix[assoc_uses[1,1] == 1 ? nrow_y_Xq[1] : 0, yK[1]] y1_xq_eta; matrix[assoc_uses[1,2] == 1 ? nrow_y_Xq[2] : 0, yK[2]] y2_xq_eta; matrix[assoc_uses[1,3] == 1 ? nrow_y_Xq[3] : 0, yK[3]] y3_xq_eta; // offset values at quadpoints vector[has_offset[1] && assoc_uses[1,1] == 1 ? nrow_y_Xq[1] : 0] y1_offset_eta; vector[has_offset[2] && assoc_uses[1,2] == 1 ? nrow_y_Xq[2] : 0] y2_offset_eta; vector[has_offset[3] && assoc_uses[1,3] == 1 ? nrow_y_Xq[3] : 0] y3_offset_eta; // re design matrix at quadpoints, group factor 1 array[bK1_len[1]] vector[assoc_uses[1,1] == 1 && bK1_len[1] > 0 ? nrow_y_Xq[1] : 0] y1_z1q_eta; array[bK1_len[2]] vector[assoc_uses[1,2] == 1 && bK1_len[2] > 0 ? nrow_y_Xq[2] : 0] y2_z1q_eta; array[bK1_len[3]] vector[assoc_uses[1,3] == 1 && bK1_len[3] > 0 ? nrow_y_Xq[3] : 0] y3_z1q_eta; array[assoc_uses[1,1] == 1 && bK1_len[1] > 0 ? nrow_y_Xq[1] : 0] int y1_z1q_id_eta; array[assoc_uses[1,2] == 1 && bK1_len[2] > 0 ? nrow_y_Xq[2] : 0] int y2_z1q_id_eta; array[assoc_uses[1,3] == 1 && bK1_len[3] > 0 ? nrow_y_Xq[3] : 0] int y3_z1q_id_eta; // re design matrix at quadpoints, group factor 2 array[bK2_len[1]] vector[assoc_uses[1,1] == 1 && bK2_len[1] > 0 ? nrow_y_Xq[1] : 0] y1_z2q_eta; array[bK2_len[2]] vector[assoc_uses[1,2] == 1 && bK2_len[2] > 0 ? nrow_y_Xq[2] : 0] y2_z2q_eta; array[bK2_len[3]] vector[assoc_uses[1,3] == 1 && bK2_len[3] > 0 ? nrow_y_Xq[3] : 0] y3_z2q_eta; array[assoc_uses[1,1] == 1 && bK2_len[1] > 0 ? nrow_y_Xq[1] : 0] int y1_z2q_id_eta; array[assoc_uses[1,2] == 1 && bK2_len[2] > 0 ? nrow_y_Xq[2] : 0] int y2_z2q_id_eta; array[assoc_uses[1,3] == 1 && bK2_len[3] > 0 ? nrow_y_Xq[3] : 0] int y3_z2q_id_eta; //---- data for calculating derivative of eta in GK quadrature // fe design matrix at quadpoints matrix[assoc_uses[2,1] == 1 ? nrow_y_Xq[1] : 0, yK[1]] y1_xq_eps; matrix[assoc_uses[2,2] == 1 ? nrow_y_Xq[2] : 0, yK[2]] y2_xq_eps; matrix[assoc_uses[2,3] == 1 ? nrow_y_Xq[3] : 0, yK[3]] y3_xq_eps; // offset values at quadpoints vector[has_offset[1] && assoc_uses[2,1] == 1 ? nrow_y_Xq[1] : 0] y1_offset_eps; vector[has_offset[2] && assoc_uses[2,2] == 1 ? nrow_y_Xq[2] : 0] y2_offset_eps; vector[has_offset[3] && assoc_uses[2,3] == 1 ? nrow_y_Xq[3] : 0] y3_offset_eps; // re design matrix at quadpoints, group factor 1 array[bK1_len[1]] vector[assoc_uses[2,1] == 1 && bK1_len[1] > 0 ? nrow_y_Xq[1] : 0] y1_z1q_eps; array[bK1_len[2]] vector[assoc_uses[2,2] == 1 && bK1_len[2] > 0 ? nrow_y_Xq[2] : 0] y2_z1q_eps; array[bK1_len[3]] vector[assoc_uses[2,3] == 1 && bK1_len[3] > 0 ? nrow_y_Xq[3] : 0] y3_z1q_eps; array[assoc_uses[2,1] == 1 && bK1_len[1] > 0 ? nrow_y_Xq[1] : 0] int y1_z1q_id_eps; array[assoc_uses[2,2] == 1 && bK1_len[2] > 0 ? nrow_y_Xq[2] : 0] int y2_z1q_id_eps; array[assoc_uses[2,3] == 1 && bK1_len[3] > 0 ? nrow_y_Xq[3] : 0] int y3_z1q_id_eps; // re design matrix at quadpoints, group factor 2 array[bK2_len[1]] vector[assoc_uses[2,1] == 1 && bK2_len[1] > 0 ? nrow_y_Xq[1] : 0] y1_z2q_eps; array[bK2_len[2]] vector[assoc_uses[2,2] == 1 && bK2_len[2] > 0 ? nrow_y_Xq[2] : 0] y2_z2q_eps; array[bK2_len[3]] vector[assoc_uses[2,3] == 1 && bK2_len[3] > 0 ? nrow_y_Xq[3] : 0] y3_z2q_eps; array[assoc_uses[2,1] == 1 && bK2_len[1] > 0 ? nrow_y_Xq[1] : 0] int y1_z2q_id_eps; array[assoc_uses[2,2] == 1 && bK2_len[2] > 0 ? nrow_y_Xq[2] : 0] int y2_z2q_id_eps; array[assoc_uses[2,3] == 1 && bK2_len[3] > 0 ? nrow_y_Xq[3] : 0] int y3_z2q_id_eps; //---- data for calculating integral of eta in GK quadrature // num. of nodes for GK quadrature for area under marker trajectory int auc_qnodes; int nrow_y_Xq_auc; // num. rows in long. predictor matrix at auc quadpoints vector[sum(assoc_uses[3,]) > 0 ? nrow_y_Xq_auc : 0] auc_qwts; // fe design matrix at quadpoints matrix[assoc_uses[3,1] == 1 ? nrow_y_Xq_auc : 0, yK[1]] y1_xq_auc; matrix[assoc_uses[3,2] == 1 ? nrow_y_Xq_auc : 0, yK[2]] y2_xq_auc; matrix[assoc_uses[3,3] == 1 ? nrow_y_Xq_auc : 0, yK[3]] y3_xq_auc; // offset values at quadpoints vector[has_offset[1] && assoc_uses[3,1] == 1 ? nrow_y_Xq_auc : 0] y1_offset_auc; vector[has_offset[2] && assoc_uses[3,2] == 1 ? nrow_y_Xq_auc : 0] y2_offset_auc; vector[has_offset[3] && assoc_uses[3,3] == 1 ? nrow_y_Xq_auc : 0] y3_offset_auc; // re design matrix at quadpoints, group factor 1 array[bK1_len[1]] vector[assoc_uses[3,1] == 1 && bK1_len[1] > 0 ? nrow_y_Xq_auc : 0] y1_z1q_auc; array[bK1_len[2]] vector[assoc_uses[3,2] == 1 && bK1_len[2] > 0 ? nrow_y_Xq_auc : 0] y2_z1q_auc; array[bK1_len[3]] vector[assoc_uses[3,3] == 1 && bK1_len[3] > 0 ? nrow_y_Xq_auc : 0] y3_z1q_auc; array[assoc_uses[3,1] == 1 && bK1_len[1] > 0 ? nrow_y_Xq_auc : 0] int y1_z1q_id_auc; array[assoc_uses[3,2] == 1 && bK1_len[2] > 0 ? nrow_y_Xq_auc : 0] int y2_z1q_id_auc; array[assoc_uses[3,3] == 1 && bK1_len[3] > 0 ? nrow_y_Xq_auc : 0] int y3_z1q_id_auc; // re design matrix at quadpoints, group factor 2 array[bK2_len[1]] vector[assoc_uses[3,1] == 1 && bK2_len[1] > 0 ? nrow_y_Xq_auc : 0] y1_z2q_auc; array[bK2_len[2]] vector[assoc_uses[3,2] == 1 && bK2_len[2] > 0 ? nrow_y_Xq_auc : 0] y2_z2q_auc; array[bK2_len[3]] vector[assoc_uses[3,3] == 1 && bK2_len[3] > 0 ? nrow_y_Xq_auc : 0] y3_z2q_auc; array[assoc_uses[3,1] == 1 && bK2_len[1] > 0 ? nrow_y_Xq_auc : 0] int y1_z2q_id_auc; array[assoc_uses[3,2] == 1 && bK2_len[2] > 0 ? nrow_y_Xq_auc : 0] int y2_z2q_id_auc; array[assoc_uses[3,3] == 1 && bK2_len[3] > 0 ? nrow_y_Xq_auc : 0] int y3_z2q_id_auc; //---- data for calculating assoc*data interactions in GK quadrature // num assoc pars used in {ev/es/mv/ms}*data interactions array[M*4] int a_K_data; // design matrix for interacting with ev/es/mv/ms at quadpoints matrix[sum(nrow_y_Xq[1:M]), sum(a_K_data)] y_Xq_data; // indexing specifying the rows of y_Xq_data that correspond to // each submodel array[3,2] int idx_q; //---- data for combining lower level units clustered within patients array[M] int has_grp; // 1 = has clustering below patient level int grp_assoc; // 1=sum, 2=mean, 3=min, 4=max array[nrow_e_Xq,2] int grp_idx; rstanarm/tests/testthat/stan_files/data/hyperparameters_mvmer.stan0000644000176200001440000000276514500256225025466 0ustar liggesusers // hyperparameter values are set to 0 if there is no prior // coefficients vector[yK[1]] y_prior_mean1; vector[yK[2]] y_prior_mean2; vector[yK[3]] y_prior_mean3; vector[yK[1]] y_prior_scale1; vector[yK[2]] y_prior_scale2; vector[yK[3]] y_prior_scale3; vector[yK[1]] y_prior_df1; vector[yK[2]] y_prior_df2; vector[yK[3]] y_prior_df3; vector[M] y_global_prior_df; // for hs priors only vector[M] y_global_prior_scale; // for hs priors only vector[M] y_slab_df; // for hs priors only vector[M] y_slab_scale; // for hs priors only // intercepts vector[M] y_prior_mean_for_intercept; vector[M] y_prior_scale_for_intercept; vector[M] y_prior_df_for_intercept; // auxiliary params vector[M] y_prior_mean_for_aux; vector[M] y_prior_scale_for_aux; vector[M] y_prior_df_for_aux; // decov prior stuff int len_concentration; int len_regularization; vector[t] b_prior_shape; vector[t] b_prior_scale; array[len_concentration] real b_prior_concentration; array[len_regularization] real b_prior_regularization; // lkj prior stuff vector[bK1] b1_prior_scale; vector[bK2] b2_prior_scale; vector[bK1] b1_prior_df; vector[bK2] b2_prior_df; real b1_prior_regularization; real b2_prior_regularization; rstanarm/tests/testthat/stan_files/data/hyperparameters.stan0000644000176200001440000000156014500256225024250 0ustar liggesusers // hyperparameter values are set to 0 if there is no prior vector[K] prior_scale; real prior_scale_for_intercept; real prior_scale_for_aux; vector[K_smooth > 0 ? max(smooth_map) : 0] prior_scale_for_smooth; vector[K] prior_mean; real prior_mean_for_intercept; real prior_mean_for_aux; vector[K_smooth > 0 ? max(smooth_map) : 0] prior_mean_for_smooth; vector[K] prior_df; real prior_df_for_intercept; real prior_df_for_aux; vector[K_smooth > 0 ? max(smooth_map) : 0] prior_df_for_smooth; real global_prior_df; // for hs priors only real global_prior_scale; // for hs priors only real slab_df; // for hs prior only real slab_scale; // for hs prior only array[prior_dist == 7 ? K : 0] int num_normals; rstanarm/tests/testthat/stan_files/data/NKX.stan0000644000176200001440000000147614500256225021503 0ustar liggesusers // dimensions int N; // number of observations int K; // number of predictors // data vector[K] xbar; // predictor means int dense_X; // flag for dense vs. sparse array[dense_X] matrix[N,K] X; // centered predictor matrix in the dense case // stuff for the sparse case int nnz_X; // number of non-zero elements in the implicit X matrix vector[nnz_X] w_X; // non-zero elements in the implicit X matrix array[nnz_X] int v_X; // column indices for w_X // where the non-zeros start in each row of X array[dense_X ? 0 : N + 1] int u_X; // smooths int K_smooth; matrix[N,K_smooth] S; array[K_smooth] int smooth_map; rstanarm/tests/testthat/stan_files/data/data_mvmer.stan0000644000176200001440000000524414500256225023157 0ustar liggesusers // population level data array[resp_type[1] == 2 ? yNobs[1] : 0] int yInt1; // integer responses array[resp_type[2] == 2 ? yNobs[2] : 0] int yInt2; array[resp_type[3] == 2 ? yNobs[3] : 0] int yInt3; vector[resp_type[1] == 1 ? yNobs[1] : 0] yReal1; // real responses vector[resp_type[2] == 1 ? yNobs[2] : 0] yReal2; vector[resp_type[3] == 1 ? yNobs[3] : 0] yReal3; matrix[yNeta[1],yK[1]] yX1; // fe design matrix matrix[yNeta[2],yK[2]] yX2; matrix[yNeta[3],yK[3]] yX3; vector[yK[1]] yXbar1; // predictor means vector[yK[2]] yXbar2; vector[yK[3]] yXbar3; // family and link (determined by 'append_mvmer_famlink' R function) // 1 = gaussian // 2 = gamma // 3 = inverse gaussian // 4 = bernoulli // 5 = binomial (n>1) // 6 = poisson // 7 = negative binomial array[M] int family; array[M] int link; // varies by family // group level data, group factor 1 array[bK1_len[1]] vector[bK1_len[1] > 0 ? yNeta[1] : 0] y1_Z1; // re design matrix array[bK1_len[2]] vector[bK1_len[2] > 0 ? yNeta[2] : 0] y2_Z1; array[bK1_len[3]] vector[bK1_len[3] > 0 ? yNeta[3] : 0] y3_Z1; array[bK1_len[1] > 0 ? yNeta[1] : 0] int y1_Z1_id; // group indexing for y1_Z1 array[bK1_len[2] > 0 ? yNeta[2] : 0] int y2_Z1_id; // group indexing for y2_Z1 array[bK1_len[3] > 0 ? yNeta[3] : 0] int y3_Z1_id; // group indexing for y3_Z1 // group level data, group factor 2 array[bK2_len[1]] vector[bK2_len[1] > 0 ? yNeta[1] : 0] y1_Z2; // re design matrix array[bK2_len[2]] vector[bK2_len[2] > 0 ? yNeta[2] : 0] y2_Z2; array[bK2_len[3]] vector[bK2_len[3] > 0 ? yNeta[3] : 0] y3_Z2; array[bK2_len[1] > 0 ? yNeta[1] : 0] int y1_Z2_id; // group indexing for y1_Z2 array[bK2_len[2] > 0 ? yNeta[2] : 0] int y2_Z2_id; // group indexing for y2_Z2 array[bK2_len[3] > 0 ? yNeta[3] : 0] int y3_Z2_id; // group indexing for y3_Z2 // prior family: 0 = none, 1 = normal, 2 = student_t, 3 = hs, 4 = hs_plus, // 5 = laplace, 6 = lasso, 7 = product_normal array[3] int y_prior_dist; array[M] int y_prior_dist_for_intercept; // prior family: 0 = none, 1 = normal, 2 = student_t, 3 = exponential array[M] int y_prior_dist_for_aux; // prior family: 1 = decov, 2 = lkj int prior_dist_for_cov; // flag indicating whether to draw from the prior int prior_PD; // 1 = yes // offset array[3] int has_offset; // 0 = No, 1 = Yes vector[has_offset[1] ? yNeta[1] : 0] y1_offset; vector[has_offset[2] ? yNeta[2] : 0] y2_offset; vector[has_offset[3] ? yNeta[3] : 0] y3_offset; rstanarm/tests/testthat/stan_files/data/data_glm.stan0000644000176200001440000000142514366062356022617 0ustar liggesusers // flag indicating whether to draw from the prior int prior_PD; // 1 = yes int compute_mean_PPD; // 1 = yes // intercept int has_intercept; // 1 = yes // link function from location to linear predictor int link; // interpretation varies by .stan file // prior family: 0 = none, 1 = normal, 2 = student_t, 3 = hs, 4 = hs_plus, // 5 = laplace, 6 = lasso, 7 = product_normal int prior_dist; int prior_dist_for_intercept; // prior family: 0 = none, 1 = normal, 2 = student_t, 3 = exponential int prior_dist_for_aux; // prior family: 0 = none, 1 = normal, 2 = student_t, 3 = exponential int prior_dist_for_smooth; rstanarm/tests/testthat/stan_files/data/weights_offset.stan0000644000176200001440000000033013722762571024062 0ustar liggesusers // weights int has_weights; // 0 = No, 1 = Yes vector[has_weights ? N : 0] weights; // offset int has_offset; // 0 = No, 1 = Yes vector[has_offset ? N : 0] offset_; rstanarm/tests/testthat/stan_files/data/hyperparameters_assoc.stan0000644000176200001440000000053713340675562025455 0ustar liggesusers // hyperparameter values are set to 0 if there is no prior vector[a_K] a_prior_mean; vector[a_K] a_prior_scale; vector[a_K] a_prior_df; real a_global_prior_scale; // for hs priors only real a_global_prior_df; real a_slab_df; real a_slab_scale; rstanarm/tests/testthat/stan_files/data/data_event.stan0000644000176200001440000000312613365374540023160 0ustar liggesusers // prior family: 0 = none, 1 = normal, 2 = student_t, 3 = hs, 4 = hs_plus, // 5 = laplace, 6 = lasso int e_prior_dist; int e_prior_dist_for_intercept; // prior family: 0 = none, 1 = normal, 2 = student_t, 3 = exponential int e_prior_dist_for_aux; // prior for basehaz params // data for event submodel real norm_const; // constant shift for log baseline hazard int e_K; // num. of predictors in event submodel int Npat; // num. individuals (equal to l[id_var] - 1) int Nevents; // num. events (ie. not censored) int qnodes; // num. of nodes for Gauss-Kronrod quadrature int Npat_times_qnodes; int basehaz_type; // 1 = weibull, 2 = B-splines, 3 = piecewise int basehaz_df; // df for baseline hazard int e_has_intercept; // 1 = yes int nrow_e_Xq; // num. rows in event predictor matrix at quad points matrix[e_K > 0 ? nrow_e_Xq : 0, e_K] e_Xq; // predictor matrix (event submodel) at qpts, centred vector[nrow_e_Xq] e_times; // event times and unstandardised quadrature points matrix[nrow_e_Xq,basehaz_df] basehaz_X; // design matrix (basis terms) for baseline hazard vector[e_K] e_xbar; // predictor means (event submodel) vector[Npat] e_weights; // weights, set to zero if not used vector[Npat_times_qnodes] e_weights_rep; // repeated weights, set to zero if not used vector[Npat_times_qnodes] qwts; // GK quadrature weights with (b-a)/2 scaling rstanarm/tests/testthat/stan_files/functions/0000755000176200001440000000000014505733323021247 5ustar liggesusersrstanarm/tests/testthat/stan_files/functions/count_likelihoods.stan0000644000176200001440000000342514500256225025654 0ustar liggesusers /** * Apply inverse link function to linear predictor * see help(poisson) in R * * @param eta Linear predictor vector * @param link An integer indicating the link function * @return A vector, i.e. inverse-link(eta) */ vector linkinv_count(vector eta, int link) { if (link == 1) return exp(eta); // log else if (link == 2) return eta; // identity else if (link == 3) return(square(eta)); // sqrt else reject("Invalid link"); return eta; // never reached } /** * Pointwise (pw) log-likelihood vector for the Poisson distribution * * @param y The integer array corresponding to the outcome variable. * @param eta The vector of linear predictors * @param link An integer indicating the link function * @return A vector */ vector pw_pois(array[] int y, vector eta, int link) { int N = rows(eta); vector[N] ll; if (link == 1) // log for (n in 1:N) ll[n] = poisson_log_lpmf(y[n] | eta[n]); else if (link <= 3) { // link = identity or sqrt vector[N] phi = linkinv_count(eta, link); for (n in 1:N) ll[n] = poisson_lpmf(y[n] | phi[n]) ; } else reject("Invalid link"); return ll; } /** * Pointwise (pw) log-likelihood vector for the negative binomial distribution * * @param y The integer array corresponding to the outcome variable. * @param eta The vector of linear predictors * @param theta The reciprocal_dispersion parameter * @param link An integer indicating the link function * @return A vector */ vector pw_nb(array[] int y, vector eta, real theta, int link) { int N = rows(eta); vector[N] rho = linkinv_count(eta, link); // link checked vector[N] ll; for (n in 1:N) ll[n] = neg_binomial_2_lpmf(y[n] | rho[n], theta); return ll; } rstanarm/tests/testthat/stan_files/functions/SSfunctions.stan0000644000176200001440000001047314505733323024421 0ustar liggesusers/* These functions (without the underscores) are all documented in R See also Appendix C of Pinheiro and Bates https://books.google.com/books?id=3TVDAAAAQBAJ&lpg=PR3&dq=Pinheiro%20and%20Bates&pg=PA511#v=onepage&q&f=false These functions may be numerically unstable */ vector SS_asymp(vector input, matrix Phi_) { // Phi_[,1] = Asym, Phi_[,2] = R0, Phi_[,3] = lrc if (rows(Phi_) > 1) { vector[rows(Phi_)] Asym = Phi_[,1]; return Asym + (Phi_[,2] - Asym) .* exp(-exp(Phi_[,3]) .* input); } else { real Asym = Phi_[1,1]; return Asym + (Phi_[1,2] - Asym) * exp(-exp(Phi_[1,3]) * input); } } vector SS_asympOff(vector input, matrix Phi_) { // Phi_[,1] = Asym, Phi_[,2] = lrc, Phi_[,3] = c0 if (rows(Phi_) > 1) return Phi_[ ,1] .* (1 - exp(-exp(Phi_[ ,2]) .* (input - Phi_[ ,3]))); else return Phi_[1,1] * (1 - exp(-exp(Phi_[1,2]) * (input - Phi_[1,3]))); } vector SS_asympOrig(vector input, matrix Phi_) { // Phi_[,1] = Asym, Phi_[,2] = lrc if (rows(Phi_) > 1) return Phi_[ ,1] .* (1 - exp(-exp(Phi_[ ,2]) .* input)); else return Phi_[1,1] * (1 - exp(-exp(Phi_[1,2]) * input)); } vector SS_biexp(vector input, matrix Phi_) { // Phi_[,1] = A1, Phi_[,2] = lrc1, Phi_[,3] = A2, Phi_[,4] = lrc2 if (rows(Phi_) > 1) return Phi_[ ,1] .* exp(-exp(Phi_[ ,2]) .* input) + Phi_[ ,3] .* exp(-exp(Phi_[ ,4]) .* input); else return Phi_[1,1] * exp(-exp(Phi_[1,2]) * input) + Phi_[1,3] * exp(-exp(Phi_[1,4]) * input); } vector SS_fol(vector Dose, vector input, matrix Phi_) { // Phi_[,1] = lKe, Phi_[,2] = lKa, Phi_[,3] = lCl int Phi__rows = rows(Phi_); if (Phi__rows > 1) { vector[Phi__rows] lKe = Phi_[,1]; vector[Phi__rows] lKa = Phi_[,2]; vector[Phi__rows] exp_lKe = exp(lKe); vector[Phi__rows] exp_lKa = exp(lKa); return Dose .* exp(lKe + lKa - Phi_[,3]) .* (exp(-exp_lKe .* input) - exp(-exp_lKa .* input)) ./ (exp_lKa - exp_lKe); } else { real lKe = Phi_[1,1]; real lKa = Phi_[1,2]; real exp_lKe = exp(lKe); real exp_lKa = exp(lKa); return Dose * exp(lKe + lKa - Phi_[1,3]) .* (exp(-exp_lKe * input) - exp(-exp_lKa * input)) / (exp_lKa - exp_lKe); } } vector SS_fpl(vector input, matrix Phi_) { // Phi_[,1] = A, Phi_[,2] = B, Phi_[,3] = xmid, Phi_[,4] = scal // input is generally data so cannot switch signs if (rows(Phi_) > 1) { vector[rows(Phi_)] A = Phi_[,1]; return A + (Phi_[,2] - A) ./ (1 + exp((Phi_[,3] - input) ./ exp(Phi_[,4]))); } else { real A = Phi_[1,1]; return A + rep_vector(Phi_[1,2] - A, rows(input)) ./ (1 + exp((Phi_[1,3] - input) / exp(Phi_[1,4]))); } } vector SS_gompertz(vector x, matrix Phi_) { // Phi_[,1] = Asym, Phi_[,2] = b2, Phi_[,3] = b3 vector[rows(x)] out; if (rows(Phi_) > 1) for (i in 1:rows(x)) out[i] = Phi_[i,1] * exp(-Phi_[i,2] * Phi_[i,3] ^ x[i]); else { real Asym = Phi_[1,1]; real b2 = Phi_[1,2]; real b3 = Phi_[1,3]; for (i in 1:rows(x)) out[i] = Asym * exp(-b2 * b3 ^ x[i]); } return out; } vector SS_logis(vector input, matrix Phi_) { // Phi_[,1] = Asym, Phi_[,2] = xmid, Phi_[,3] = scal // input is typically data so cannot switch signs of everything if (rows(Phi_) > 1) return Phi_[,1] ./ (1 + exp( (Phi_[,2] - input) ./ exp(Phi_[,3]))); else return rep_vector(Phi_[1,1], rows(input)) ./ (1 + exp( (Phi_[1,2] - input) / exp(Phi_[1,3]))); } vector SS_micmen(vector input, matrix Phi_) { // Phi_[,1] = Vm, Phi_[,2] = K if (rows(Phi_) > 1) return Phi_[ ,1] .* input ./ (Phi_[ ,2] + input); else return Phi_[1,1] * input ./ (Phi_[1,2] + input); } vector SS_weibull(vector x, matrix Phi_) { // Phi_[,1] = Asym, Phi_[,2] = Drop, Phi_[,3] = lrc, Phi_[,4] = pwr vector[rows(x)] out; if (rows(Phi_) > 1) for (i in 1:rows(x)) out[i] = Phi_[i,1] - Phi_[i,2] * exp(-exp(Phi_[i,3]) * x[i] ^ Phi_[i,4]); else { real Asym = Phi_[1,1]; real Drop = Phi_[1,2]; real lrc = Phi_[1,3]; real pwr = Phi_[1,4]; for (i in 1:rows(x)) out[i] = Asym - Drop * exp(-exp(lrc) * x[i] ^ pwr); } return out; } matrix reshape_vec(vector x, int Rows, int Cols) { matrix[Rows, Cols] out; int pos = 1; if (rows(x) != Rows * Cols) reject("x is the wrong length"); for (c in 1:Cols) for (r in 1:Rows) { out[r,c] = x[pos]; pos += 1; } return out; } rstanarm/tests/testthat/stan_files/functions/bernoulli_likelihoods.stan0000644000176200001440000001034414505733323026521 0ustar liggesusers /** * Apply inverse link function to linear predictor * see help(binom) in R * * @param eta Linear predictor vector * @param link An integer indicating the link function * @return A vector, i.e. inverse-link(eta) */ vector linkinv_bern(vector eta, int link) { if (link == 1) return(inv_logit(eta)); // logit else if (link == 2) return(Phi(eta)); // probit else if (link == 3) return(atan(eta) / pi() + 0.5); // cauchit else if (link == 4) return(exp(eta)); // log else if (link == 5) return(inv_cloglog(eta)); // cloglog else reject("Invalid link"); return eta; // never reached } /** * Increment with the unweighted log-likelihood * @param link An integer indicating the link function * @param eta0 A vector of linear predictors | y = 0 * @param eta1 A vector of linear predictors | y = 1 * @param N An integer array of length 2 giving the number of * observations where y = 0 and y = 1 respectively * @return lp__ */ real bern_lpdf(vector eta0, vector eta1, int link, array[] int N) { real lp = 0; if (link == 1) { // logit lp += logistic_lccdf(eta0 | 0, 1); lp += logistic_lcdf( eta1 | 0, 1); } else if (link == 2) { // probit lp += normal_lccdf(eta0 | 0, 1); lp += normal_lcdf( eta1 | 0, 1); } else if (link == 3) { // cauchit lp += cauchy_lccdf(eta0 | 0, 1); lp += cauchy_lcdf( eta1 | 0, 1); } else if(link == 4) { // log lp += sum(log1m_exp(eta0)); lp += sum(eta1); // already in log form } else if(link == 5) { // cloglog lp += sum(log1m_exp(-exp(eta1))); lp += sum(-exp(eta0)); } else reject("Invalid link"); return lp; } /** * Pointwise (pw) log-likelihood vector * * @param y The integer outcome variable. Note that function is * called separately with y = 0 and y = 1 * @param eta Vector of linear predictions * @param link An integer indicating the link function * @return A vector */ vector pw_bern(int y, vector eta, int link) { int N = rows(eta); vector[N] ll; if (link == 1) { // logit for (n in 1:N) ll[n] = bernoulli_logit_lpmf(y | eta[n]); } else if (link <= 5) { // link = probit, cauchit, log, or cloglog vector[N] pi = linkinv_bern(eta, link); // may not be stable for (n in 1:N) ll[n] = bernoulli_lpmf(y | pi[n]); } else reject("Invalid link"); return ll; } /** * Log-normalizing constant in the clogit case * * @param N_j Integer number of observations in the j-th group * @param D_j Integer number of successes in the j-th group * @param eta_j Vector of linear predictions in the j-th group * @return A scalar that normalizes the probabilities on the log-scale */ real log_clogit_denom(int N_j, int D_j, vector eta_j) { if (D_j == 1 && N_j == rows(eta_j)) return log_sum_exp(eta_j); if (D_j == 0) return 0; if (N_j == D_j) { if (D_j == 1) return eta_j[N_j]; return sum(segment(eta_j, N_j - 1, 2)); } else { int N_jm1 = N_j - 1; return log_sum_exp(log_clogit_denom(N_jm1, D_j, eta_j), log_clogit_denom(N_jm1, D_j - 1, eta_j) + eta_j[N_j]); } return not_a_number(); // never reaches } /** * Log-likelihood for a clogit model * @param eta0 Linear predictors when y == 0 * @param eta1 Linear predictors when y == 1 * @param successes Integer array with the number of successes in group j * @param failures Integer array with the number of failures in group j * @param observations Integer array with the number of observations in group j * @return lp__ */ real clogit_lpdf(vector eta0, vector eta1, array[] int successes, array[] int failures, array[] int observations) { int J = num_elements(observations); int pos0 = 1; int pos1 = 1; vector[J] summands; for (j in 1:J) { int D_g = successes[j]; int N_g = observations[j]; int F_g = failures[j]; vector[N_g] eta_g = append_row(segment(eta1, pos1, D_g), segment(eta0, pos0, F_g)); summands[j] = log_clogit_denom(N_g, D_g, eta_g); pos0 += F_g; pos1 += D_g; } return sum(eta1) - sum(summands); } rstanarm/tests/testthat/stan_files/functions/jm_functions.stan0000644000176200001440000002714514500256225024641 0ustar liggesusers /** * Scale a vector of auxiliary parameters based on prior information * * @param aux_unscaled A vector, the unscaled auxiliary parameters * @param prior_dist Integer, the type of prior distribution * @param prior_mean,prior_scale Vectors, the mean and scale * of the prior distribution * @return A vector, corresponding to the scaled auxiliary parameters */ vector make_basehaz_coef(vector aux_unscaled, int prior_dist, vector prior_mean, vector prior_scale) { vector[rows(aux_unscaled)] aux; if (prior_dist == 0) // none aux = aux_unscaled; else { aux = prior_scale .* aux_unscaled; if (prior_dist <= 2) // normal or student_t aux += prior_mean; } return aux; } /** * Log-prior for baseline hazard parameters * * @param aux_unscaled Vector (potentially of length 1) of unscaled * auxiliary parameter(s) * @param dist Integer specifying the type of prior distribution * @param scale Real specifying the scale for the prior distribution * @param df Real specifying the df for the prior distribution * @return nothing */ real basehaz_lpdf(vector aux_unscaled, int dist, vector scale, vector df) { real lp = 0; if (dist > 0) { if (dist == 1) lp += normal_lpdf(aux_unscaled | 0, 1); else if (dist == 2) lp += student_t_lpdf(aux_unscaled | df, 0, 1); else lp += exponential_lpdf(aux_unscaled | 1); } return lp; } /** * Take the linear predictor and collapse across lower level * units of the grouping factor clustered within patients, using * the function specified by 'grp_assoc' * * @param eta The linear predictor evaluated for all the lower * level units, having some length greater than N. * @param grp_idx An N-by-2 two dimensional array providing the * beginning and ending index of the lower level units in eta that * correspond to patient n (where n = 1,...,N). * @param grp_assoc The method for collapsing across the lower * level units; 1=sum, 2=mean, 3=min, 4=max. * @return A vector */ vector collapse_within_groups(vector eta, array[,] int grp_idx, int grp_assoc) { int N = size(grp_idx); vector[N] val; if (grp_assoc == 1) { // sum of lower level clusters for (n in 1:N) val[n] = sum(eta[grp_idx[n,1]:grp_idx[n,2]]); } else if (grp_assoc == 2) { // mean of lower level clusters for (n in 1:N) val[n] = mean(eta[grp_idx[n,1]:grp_idx[n,2]]); } else if (grp_assoc == 3) { // min of lower level clusters for (n in 1:N) val[n] = min(eta[grp_idx[n,1]:grp_idx[n,2]]); } else if (grp_assoc == 4) { // max of lower level clusters for (n in 1:N) val[n] = max(eta[grp_idx[n,1]:grp_idx[n,2]]); } return val; } /** * Create a design matrix for a shared random effects association * structure in the joint model * * @param b Vector of group-specific coefficients * @param l An integer array with the number of levels for the factor(s) on * the RHS of each | * @param p An integer array with the number of variables on the LHS of each | * @param pmat A matrix with the number variables on the LHS of each | in each * longitudinal submodel. The rows correspond to each |, meaning the separate * equations for each grouping variable, and the columns correspond to each * longitudinal submodel. If subject ID is the only grouping variable then the * matrix will have one row. If the joint model only has one longitudinal * submodel then the matrix will have one column. * @param Npat Integer specifying number of individuals represented * in vector b * @param qnodes The number of quadrature nodes * @param which_b Integer array specifying the indices * of the random effects to use in the association structure * @param sum_size_which_b Integer specifying total number of * random effects that are to be used in the association structure * @param size_which_b Integer array specifying number of random effects from * each long submodel that are to be used in the association structure * @param t_i Integer specifying the index of the grouping factor that * corresponds to the patient-level * @param M An integer specifying the number of longitudinal submodels * @return A matrix with the desired random effects represented * in columns, and the individuals on the rows; the matrix is * repeated (qnodes + 1) times (bounded by rows) */ matrix make_x_assoc_shared_b( vector b, array[] int l, array[] int p, array[,] int pmat, int Npat, int qnodes, array[] int which_b, int sum_size_which_b, array[] int size_which_b, int t_i, int M) { int prior_shift; // num. ranefs prior to subject-specific ranefs int start_store; int end_store; matrix[Npat,sum_size_which_b] temp; matrix[(Npat*(qnodes+1)),sum_size_which_b] x_assoc_shared_b; if (t_i == 1) prior_shift = 0; else prior_shift = sum(l[1:(t_i-1)]); for (i in 1:Npat) { int mark; int start_collect; // index start of subject-specific ranefs for patient mark = 1; start_collect = prior_shift + (i - 1) * p[t_i]; for (m in 1:M) { if (size_which_b[m] > 0) { int shift; // num. subject-specific ranefs in prior submodels int j_shift; // shift in indexing of which_b vector if (m == 1) { shift = 0; j_shift = 0; } else { shift = sum(pmat[t_i, 1:(m-1)]); j_shift = sum(size_which_b[1:(m-1)]); } for (j in 1:size_which_b[m]) { int item_collect; // subject-specific ranefs to select for current submodel item_collect = start_collect + shift + which_b[(j_shift + j)]; temp[i,mark] = b[item_collect]; mark += 1; } } } } for (i in 1:(qnodes+1)) { start_store = (i - 1) * Npat + 1; end_store = i * Npat; x_assoc_shared_b[start_store:end_store,] = temp; } return x_assoc_shared_b; } /** * Create a design matrix for a shared fixed + random effects association * structure in the joint model * * @param b Vector of group-specific coefficients * @param l An integer array with the number of levels for the factor(s) on * the RHS of each | * @param p An integer array with the number of variables on the LHS of each | * @param pmat A matrix with the number variables on the LHS of each | in each * longitudinal submodel. The rows correspond to each |, meaning the separate * equations for each grouping variable, and the columns correspond to each * longitudinal submodel. If subject ID is the only grouping variable then the * matrix will have one row. If the joint model only has one longitudinal * submodel then the matrix will have one column. * @param Npat Integer specifying number of individuals represented * in vector b * @param qnodes The number of quadrature nodes * @param which_b Integer array specifying the indices * of the random effects to use in the association structure * @param sum_size_which_b Integer specifying total number of * random effects that are to be used in the association structure * @param size_which_b Integer array specifying number of random effects from * each long submodel that are to be used in the association structure * @param t_i Integer specifying the index of the grouping factor that * corresponds to the patient-level * @param M An integer specifying the number of longitudinal submodels * @return A matrix with the desired random effects represented * in columns, and the individuals on the rows; the matrix is * repeated (qnodes + 1) times (bounded by rows) */ matrix make_x_assoc_shared_coef( vector b, vector beta, array[] int KM, int M, int t_i, array[] int l, array[] int p, array[,] int pmat, int Npat, int qnodes, int sum_size_which_coef, array[] int size_which_coef, array[] int which_coef_zindex, array[] int which_coef_xindex, array[] int has_intercept, array[] int has_intercept_nob, array[] int has_intercept_lob, array[] int has_intercept_upb, array[] real gamma_nob, array[] real gamma_lob, array[] real gamma_upb) { // in the loops below: // t_i should only really ever equal 1 (since shared_coef association // structure is not allowed if there is more than one clustering level) // i = levels (ie, individuals) // j = indices of the shared random effecs // m = models int t_shift; // skip over group-level coefficients for earlier grouping factors int start_store; int end_store; matrix[Npat,sum_size_which_coef] temp; matrix[(Npat*(qnodes+1)),sum_size_which_coef] x_assoc_shared_coef; if (t_i == 1) t_shift = 0; else t_shift = sum(l[1:(t_i-1)]); for (i in 1:Npat) { int mark; // counter for looping over shared coefficients int i_shift; // skip over group-level coefficients for earlier levels mark = 1; i_shift = (i - 1) * p[t_i]; for (m in 1:M) { if (size_which_coef[m] > 0) { // if model has shared coefficients int j_shift; // skip over elements of which_coef_zindex vector that are associated with earlier submodels int m_shift; // skip over individual i's group-level coefficients for earlier submodels int shift_nb; int shift_lb; int shift_ub; int shift_beta; if (m == 1) { j_shift = 0; m_shift = 0; shift_nb = 0; shift_lb = 0; shift_ub = 0; shift_beta = 0; } else { j_shift = sum(size_which_coef[1:(m-1)]); m_shift = sum(pmat[t_i, 1:(m-1)]); shift_nb = sum(has_intercept_nob[1:(m-1)]); shift_lb = sum(has_intercept_lob[1:(m-1)]); shift_ub = sum(has_intercept_upb[1:(m-1)]); shift_beta = sum(KM[1:(m-1)]); } for (j in 1:size_which_coef[m]) { int b_collect; // group-level coefficients to extract for current i, j, m int beta_collect_m; // within-submodel index of fixed effect coefficient to extract int beta_collect; // overall index of fixed effect coefficient to extract real coef; b_collect = t_shift + i_shift + m_shift + which_coef_zindex[(j_shift + j)]; beta_collect_m = which_coef_xindex[(j_shift + j)]; beta_collect = shift_beta + beta_collect_m; coef = b[b_collect]; // start with group-level coefficient if ((has_intercept[m] == 1) && (beta_collect == 1)) { // collect intercept if (has_intercept_nob[m] == 1) coef += gamma_nob[sum(has_intercept_nob[1:m])]; else if (has_intercept_lob[m] == 1) coef += gamma_lob[sum(has_intercept_lob[1:m])]; else if (has_intercept_upb[m] == 1) coef += gamma_upb[sum(has_intercept_upb[1:m])]; } else if (has_intercept[m] == 1) { // collect fixed effect whilst recognising intercept term // isn't in beta and correcting for that in the indexing coef += beta[(beta_collect - 1)]; } else coef += beta[beta_collect]; temp[i, mark] = coef; mark += 1; // move to next shared coefficient for individual i } } } } // repeat the temp matrix qnodes times (ie, rbind) for (i in 1:(qnodes+1)) { start_store = (i - 1) * Npat + 1; end_store = i * Npat; x_assoc_shared_coef[start_store:end_store, ] = temp; } return x_assoc_shared_coef; } rstanarm/tests/testthat/stan_files/functions/continuous_likelihoods.stan0000644000176200001440000001750613722762571026752 0ustar liggesusers /** * Apply inverse link function to linear predictor * * @param eta Linear predictor vector * @param link An integer indicating the link function * @return A vector, i.e. inverse-link(eta) */ vector linkinv_gauss(vector eta, int link) { if (link == 1) return eta; else if (link == 2) return exp(eta); else if (link == 3) return inv(eta); else reject("Invalid link"); return eta; // never reached } /** * Pointwise (pw) log-likelihood vector * * @param y A vector corresponding to the outcome variable. * @param link An integer indicating the link function * @return A vector */ vector pw_gauss(vector y, vector eta, real sigma, int link) { return -0.5 * log(6.283185307179586232 * sigma) - 0.5 * square((y - linkinv_gauss(eta, link)) / sigma); } /** * Apply inverse link function to linear predictor * * @param eta Linear predictor vector * @param link An integer indicating the link function * @return A vector, i.e. inverse-link(eta) */ vector linkinv_gamma(vector eta, int link) { if (link == 1) return eta; else if (link == 2) return exp(eta); else if (link == 3) return inv(eta); else reject("Invalid link"); return eta; // never reached } /** * Pointwise (pw) log-likelihood vector * * @param y A vector corresponding to the outcome variable. * @param eta A vector of linear predictors * @param shape A real number for the shape parameter * @param link An integer indicating the link function * @param sum_log_y A scalar equal to the sum of log(y) * @return A scalar log-likelihood */ real GammaReg(vector y, vector eta, real shape, int link, real sum_log_y) { real ret = rows(y) * (shape * log(shape) - lgamma(shape)) + (shape - 1) * sum_log_y; if (link == 2) // link is log ret -= shape * sum(eta) + shape * sum(y ./ exp(eta)); else if (link == 1) // link is identity ret -= shape * sum(log(eta)) + shape * sum(y ./ eta); else if (link == 3) // link is inverse ret += shape * sum(log(eta)) - shape * dot_product(eta, y); else reject("Invalid link"); return ret; } /** * Pointwise (pw) log-likelihood vector * * @param y A vector corresponding to the outcome variable. * @param shape A real number for the shape parameter * @param link An integer indicating the link function * @return A vector */ vector pw_gamma(vector y, vector eta, real shape, int link) { int N = rows(eta); vector[N] ll; if (link == 3) { // link = inverse for (n in 1:N) { ll[n] = gamma_lpdf(y[n] | shape, shape * eta[n]); } } else if (link == 2) { // link = log for (n in 1:N) { ll[n] = gamma_lpdf(y[n] | shape, shape / exp(eta[n])); } } else if (link == 1) { // link = identity for (n in 1:N) { ll[n] = gamma_lpdf(y[n] | shape, shape / eta[n]); } } else reject("Invalid link"); return ll; } /** * Apply inverse link function to linear predictor * * @param eta Linear predictor vector * @param link An integer indicating the link function * @return A vector, i.e. inverse-link(eta) */ vector linkinv_inv_gaussian(vector eta, int link) { if (link == 1) return eta; else if (link == 2) return exp(eta); else if (link == 3) return inv(eta); else if (link == 4) return inv_sqrt(eta); else reject("Invalid link"); return eta; // never reached } /** * inverse Gaussian log-PDF * * @param y The vector of outcomes * @param mu The vector of conditional means * @param lambda A positive scalar dispersion parameter * @param sum_log_y A scalar equal to the sum of log(y) * @param sqrt_y A vector equal to sqrt(y) * @return A scalar */ real inv_gaussian(vector y, vector mu, real lambda, real sum_log_y, vector sqrt_y) { return 0.5 * rows(y) * log(lambda / 6.283185307179586232) - 1.5 * sum_log_y - 0.5 * lambda * dot_self( (y - mu) ./ (mu .* sqrt_y) ); } /** * Pointwise (pw) log-likelihood vector * * @param y A vector corresponding to the outcome variable. * @param eta The linear predictors * @param lamba A positive scalar dispersion parameter * @param link An integer indicating the link function * @param log_y A precalculated vector of the log of y * @param sqrt_y A precalculated vector of the square root of y * @return A vector of log-likelihoods */ vector pw_inv_gaussian(vector y, vector eta, real lambda, int link, vector log_y, vector sqrt_y) { vector[rows(y)] mu = linkinv_inv_gaussian(eta, link); // link checked return -0.5 * lambda * square( (y - mu) ./ (mu .* sqrt_y) ) + 0.5 * log(lambda / 6.283185307179586232) - 1.5 * log_y; } /** * PRNG for the inverse Gaussian distribution * * Algorithm from wikipedia * * @param mu The expectation * @param lambda The dispersion * @return A draw from the inverse Gaussian distribution */ real inv_gaussian_rng(real mu, real lambda) { real mu2 = square(mu); real z = uniform_rng(0,1); real y = square(normal_rng(0,1)); real x = mu + ( mu2 * y - mu * sqrt(4 * mu * lambda * y + mu2 * square(y)) ) / (2 * lambda); if (z <= (mu / (mu + x))) return x; else return mu2 / x; } /** * Apply inverse link function to linear predictor for beta models * * @param eta Linear predictor vector * @param link An integer indicating the link function * @return A vector, i.e. inverse-link(eta) */ vector linkinv_beta(vector eta, int link) { if (link == 1) return inv_logit(eta); // logit else if (link == 2) return Phi(eta); // probit else if (link == 3) return inv_cloglog(eta); // cloglog else if (link == 4) return 0.5 + atan(eta) / pi(); // cauchy else if (link == 5) return exp(eta); // log else if (link == 6) return 1 - inv_cloglog(-eta); // loglog else reject("invalid link"); return eta; // never reached } /** * Apply inverse link function to linear predictor for dispersion for beta models * * @param eta Linear predictor vector * @param link An integer indicating the link function * @return A vector, i.e. inverse-link(eta) */ vector linkinv_beta_z(vector eta, int link) { if (link == 1) return exp(eta); // log else if (link == 2) return eta; // identity else if (link == 3) return square(eta); // sqrt else reject("Invalid link"); return eta; // never reached } /** * Pointwise (pw) log-likelihood vector for beta models * * @param y The vector of outcomes * @param eta The linear predictors * @param dispersion Positive dispersion parameter * @param link An integer indicating the link function * @return A vector of log-likelihoods */ vector pw_beta(vector y, vector eta, real dispersion, int link) { vector[rows(y)] ll; vector[rows(y)] mu = linkinv_beta(eta, link); // link checked for (n in 1:rows(y)) { ll[n] = beta_lpdf(y[n] | mu[n] * dispersion, (1 - mu[n]) * dispersion); } return ll; } /** * Pointwise (pw) log-likelihood vector for beta models with z variables * * @param y The vector of outcomes * @param eta The linear predictors (for y) * @param eta_z The linear predictors (for dispersion) * @param link An integer indicating the link function passed to linkinv_beta * @param link_phi An integer indicating the link function passed to linkinv_beta_z * @return A vector of log-likelihoods */ vector pw_beta_z(vector y, vector eta, vector eta_z, int link, int link_phi) { vector[rows(y)] ll; vector[rows(y)] mu = linkinv_beta(eta, link); // link checked vector[rows(y)] mu_z = linkinv_beta_z(eta_z, link_phi); // link checked for (n in 1:rows(y)) { ll[n] = beta_lpdf(y[n] | mu[n] * mu_z[n], (1-mu[n]) * mu_z[n]); } return ll; } rstanarm/tests/testthat/stan_files/functions/mvmer_functions.stan0000644000176200001440000003732614500256225025363 0ustar liggesusers /** * Return the required number of local hs parameters * * @param prior_dist An integer indicating the prior distribution * @return An integer */ int get_nvars_for_hs(int prior_dist) { int hs = 0; if (prior_dist == 3) hs = 2; else if (prior_dist == 4) hs = 4; return hs; } /** * Return the lower/upper bound for the specified intercept type * * @param intercept_type An integer specifying the type of intercept; * 0=no intercept, 1=unbounded, 2=lower bounded, 3=upper bounded * @return A real, corresponding to the lower bound */ real lb(int intercept_type) { return intercept_type == 2 ? 0 : negative_infinity(); } real ub(int intercept_type) { return intercept_type == 3 ? 0 : positive_infinity(); } /** * Get the indices corresponding to the lower tri of a square matrix * * @param dim The number of rows in the square matrix * @return A vector of indices */ array[] int lower_tri_indices(int dim) { array[dim + choose(dim, 2)] int indices; int mark = 1; for (r in 1:dim) { for (c in r:dim) { indices[mark] = (r - 1) * dim + c; mark += 1; } } return indices; } /** * Scale the auxiliary parameter based on prior information * * @param aux_unscaled A real, the unscaled auxiliary parameter * @param prior_dist Integer, the type of prior distribution * @param prior_mean,prior_scale Real scalars, the mean and scale * of the prior distribution * @return A real, corresponding to the scaled auxiliary parameter */ real make_aux(real aux_unscaled, int prior_dist, real prior_mean, real prior_scale) { real aux; if (prior_dist == 0) // none aux = aux_unscaled; else { aux = prior_scale * aux_unscaled; if (prior_dist <= 2) // normal or student_t aux += prior_mean; } return aux; } /** * Scale the primitive population level parameters based on prior information * * @param z_beta A vector of primitive parameters * @param prior_dist Integer, the type of prior distribution * @param prior_mean,prior_scale Vectors of mean and scale parameters * for the prior distributions * @return A vector containing the population level parameters (coefficients) */ vector make_beta(vector z_beta, int prior_dist, vector prior_mean, vector prior_scale, vector prior_df, real global_prior_scale, array[] real global, array[] vector local, array[] real ool, array[] vector mix, array[] real aux, int family, real slab_scale, array[] real caux) { vector[rows(z_beta)] beta; if (prior_dist == 0) beta = z_beta; else if (prior_dist == 1) beta = z_beta .* prior_scale + prior_mean; else if (prior_dist == 2) for (k in 1:rows(prior_mean)) { beta[k] = CFt(z_beta[k], prior_df[k]) * prior_scale[k] + prior_mean[k]; } else if (prior_dist == 3) { real c2 = square(slab_scale) * caux[1]; if (family == 1) // don't need is_continuous since family == 1 is gaussian in mvmer beta = hs_prior(z_beta, global, local, global_prior_scale, aux[1], c2); else beta = hs_prior(z_beta, global, local, global_prior_scale, 1, c2); } else if (prior_dist == 4) { real c2 = square(slab_scale) * caux[1]; if (family == 1) // don't need is_continuous since family == 1 is gaussian in mvmer beta = hsplus_prior(z_beta, global, local, global_prior_scale, aux[1], c2); else beta = hsplus_prior(z_beta, global, local, global_prior_scale, 1, c2); } else if (prior_dist == 5) // laplace beta = prior_mean + prior_scale .* sqrt(2 * mix[1]) .* z_beta; else if (prior_dist == 6) // lasso beta = prior_mean + ool[1] * prior_scale .* sqrt(2 * mix[1]) .* z_beta; return beta; } /** * Create group-specific coefficients, see section 2 of * https://cran.r-project.org/web/packages/lme4/vignettes/lmer.pdf * * @param z_b Vector whose elements are iid normal(0,sigma) a priori * @param theta Vector with covariance parameters as defined in lme4 * @param p An integer array with the number variables on the LHS of each | * @param l An integer array with the number of levels for the factor(s) on * the RHS of each | * @param i The index of the grouping factor for which you want to return * the group-specific coefficients for * @return An array of group-specific coefficients for grouping factor i */ matrix make_b_matrix(vector z_b, vector theta_L, array[] int p, array[] int l, int i) { matrix[p[i],l[i]] b_matrix; int nc = p[i]; int b_mark = 1; int theta_L_mark = 1; if (i > 1) { for (j in 1:(i-1)) { theta_L_mark += p[j] + choose(p[j], 2); b_mark += p[j] * l[j]; } } if (nc == 1) { real theta_L_start = theta_L[theta_L_mark]; for (s in b_mark:(b_mark + l[i] - 1)) b_matrix[nc,s] = theta_L_start * z_b[s]; } else { matrix[nc,nc] T_i = rep_matrix(0, nc, nc); for (c in 1:nc) { T_i[c,c] = theta_L[theta_L_mark]; theta_L_mark += 1; for(r in (c+1):nc) { T_i[r,c] = theta_L[theta_L_mark]; theta_L_mark += 1; } } for (j in 1:l[i]) { vector[nc] temp = T_i * segment(z_b, b_mark, nc); b_matrix[,j] = temp; b_mark += nc; } } return b_matrix'; } /** * Evaluate the linear predictor for the glmer submodel * * @param X Design matrix for fe * @param Z1 Design matrix for re, for first grouping factor * @param Z2 Design matrix for re, for second grouping factor * @param Z1_id Group indexing for Z1 * @param Z2_id Group indexing for Z2 * @param gamma The intercept parameter * @param beta Vector of population level parameters * @param b1Mat Matrix of group level params for first grouping factor * @param b2Mat Matrix of group level params for second grouping factor * @param b1Mat_colshift,b2Mat_colshift Number of columns in b1Mat/b2Mat * that correpond to group level params from prior glmer submodels * @param intercept_type The type of intercept parameter (0 = none, * 1 = unbounded, 2 = lower bound, 3 = upper bound) * @return A vector containing the linear predictor for the glmer submodel */ vector evaluate_eta(matrix X, array[] vector Z1, array[] vector Z2, array[] int Z1_id, array[] int Z2_id, array[] real gamma, vector beta, matrix b1Mat, matrix b2Mat, int b1Mat_colshift, int b2Mat_colshift, int intercept_type, vector Ti) { int N = rows(X); // num rows in design matrix int K = rows(beta); // num predictors int p1 = size(Z1); // num group level params for group factor 1 int p2 = size(Z2); // num group level params for group factor 2 vector[N] eta; if (K > 0) eta = X * beta; else eta = rep_vector(0.0, N); if (intercept_type > 0) { // submodel has an intercept if (intercept_type == 1) eta += gamma[1]; else if (intercept_type == 2) eta += gamma[1] - max(eta); else if (intercept_type == 3) eta += gamma[1] - min(eta); } if (p1 > 0) { // submodel includes group factor 1 for (k in 1:p1) for (n in 1:N) eta[n] += (b1Mat[Z1_id[n], k+b1Mat_colshift]) * Z1[k,n]; } if (p2 > 0) { // submodel includes group factor 2 for (k in 1:p2) for (n in 1:N) eta[n] += (b2Mat[Z2_id[n], k+b2Mat_colshift]) * Z2[k,n]; } if (rows(Ti) > 0) eta = eta + Ti; // add offset value return eta; } /** * Evaluate mu based on eta, family and link * * @param eta Vector of linear predictors * @param family An integer indicating the family * @param link An integer indicating the link function (differs by family) * @return A vector */ vector evaluate_mu(vector eta, int family, int link) { vector[rows(eta)] mu; if (family == 1) mu = linkinv_gauss(eta, link); else if (family == 2) mu = linkinv_gamma(eta, link); else if (family == 3) mu = linkinv_inv_gaussian(eta, link); else if (family == 4) mu = linkinv_bern(eta, link); else if (family == 5) mu = linkinv_binom(eta, link); else if (family == 6 || family == 7 || family == 8) mu = linkinv_count(eta, link); return mu; } /** * Increment the target with the log-likelihood for the glmer submodel * * @param z_beta A vector of primitive parameters * @param prior_dist Integer, the type of prior distribution * @param prior_mean,prior_scale Vectors of mean and scale parameters * for the prior distributions * @return A vector containing the population level parameters (coefficients) */ real glm_lpdf(vector y_real, array[] int y_integer, vector eta, array[] real aux, int family, int link, real sum_log_y, vector sqrt_y, vector log_y) { real lp = 0; if (family == 1) { // gaussian if (link == 1) lp += normal_lpdf(y_real | eta, aux[1]); else if (link == 2) lp += lognormal_lpdf(y_real | eta, aux[1]); else lp += normal_lpdf(y_real | inv(eta), aux[1]); } else if (family == 2) { // gamma lp += GammaReg(y_real, eta, aux[1], link, sum_log_y); } else if (family == 3) { // inverse gaussian lp += inv_gaussian(y_real, linkinv_inv_gaussian(eta, link), aux[1], sum_log_y, sqrt_y); } else if (family == 4) { // bernoulli if (link == 1) lp += bernoulli_logit_lpmf(y_integer | eta); else lp += bernoulli_lpmf(y_integer | linkinv_bern(eta, link)); } else if (family == 5) { // binomial reject("Binomial with >1 trials not allowed."); } else if (family == 6 || family == 8) { // poisson or poisson-gamma if (link == 1) lp += poisson_log_lpmf(y_integer | eta); else lp += poisson_lpmf(y_integer | linkinv_count(eta, link)); } else if (family == 7) { // negative binomial if (link == 1) lp += neg_binomial_2_log_lpmf(y_integer | eta, aux[1]); else lp += neg_binomial_2_lpmf(y_integer | linkinv_count(eta, link), aux[1]); } else reject("Invalid family."); return lp; } /** * Log-prior for coefficients * * @param z_beta Vector of primative coefficients * @param prior_dist Integer, the type of prior distribution * @param prior_scale Real, scale for the prior distribution * @param prior_df Real, df for the prior distribution * @param global_prior_df Real, df for the prior for the global hs parameter * @param local Vector of hs local parameters * @param global Real, the global parameter * @param mix Vector of shrinkage parameters * @param one_over_lambda Real * @return nothing */ real beta_custom_lpdf(vector z_beta, int prior_dist, vector prior_scale, vector prior_df, real global_prior_df, array[] vector local, array[] real global, array[] vector mix, array[] real one_over_lambda, real slab_df, array[] real caux) { real lp = 0; if (prior_dist == 1) lp += normal_lpdf(z_beta | 0, 1); else if (prior_dist == 2) lp += normal_lpdf(z_beta | 0, 1); // Student t else if (prior_dist == 3) { // hs lp += normal_lpdf(z_beta | 0, 1); lp += normal_lpdf(local[1] | 0, 1); lp += inv_gamma_lpdf(local[2] | 0.5 * prior_df, 0.5 * prior_df); lp += normal_lpdf(global[1] | 0, 1); lp += inv_gamma_lpdf(global[2] | 0.5 * global_prior_df, 0.5 * global_prior_df); lp += inv_gamma_lpdf(caux | 0.5 * slab_df, 0.5 * slab_df); } else if (prior_dist == 4) { // hs+ lp += normal_lpdf(z_beta | 0, 1); lp += normal_lpdf(local[1] | 0, 1); lp += inv_gamma_lpdf(local[2] | 0.5 * prior_df, 0.5 * prior_df); lp += normal_lpdf(local[3] | 0, 1); // unorthodox useage of prior_scale as another df hyperparameter lp += inv_gamma_lpdf(local[4] | 0.5 * prior_scale, 0.5 * prior_scale); lp += normal_lpdf(global[1] | 0, 1); lp += inv_gamma_lpdf(global[2] | 0.5 * global_prior_df, 0.5 * global_prior_df); lp += inv_gamma_lpdf(caux | 0.5 * slab_df, 0.5 * slab_df); } else if (prior_dist == 5) { // laplace lp += normal_lpdf(z_beta | 0, 1); lp += exponential_lpdf(mix[1] | 1); } else if (prior_dist == 6) { // lasso lp += normal_lpdf(z_beta | 0, 1); lp += exponential_lpdf(mix[1] | 1); lp += chi_square_lpdf(one_over_lambda[1] | prior_df[1]); } else if (prior_dist == 7) { // product_normal lp += normal_lpdf(z_beta | 0, 1); } return lp; /* else prior_dist is 0 and nothing is added */ } /** * Log-prior for intercept parameters * * @param gamma Real, the intercept parameter * @param dist Integer, the type of prior distribution * @param mean_ Real, mean of prior distribution * @param scale Real, scale for the prior distribution * @param df Real, df for the prior distribution * @return nothing */ real gamma_custom_lpdf(real gamma, int dist, real mean_, real scale, real df) { real lp = 0; if (dist == 1) // normal lp += normal_lpdf(gamma | mean_, scale); else if (dist == 2) // student_t lp += student_t_lpdf(gamma | df, mean_, scale); /* else dist is 0 and nothing is added */ return lp; } /** * Log-prior for auxiliary parameters * * @param aux_unscaled Vector (potentially of length 1) of unscaled * auxiliary parameter(s) * @param dist Integer specifying the type of prior distribution * @param scale Real specifying the scale for the prior distribution * @param df Real specifying the df for the prior distribution * @return nothing */ real aux_lpdf(real aux_unscaled, int dist, real scale, real df) { real lp = 0; if (dist > 0 && scale > 0) { if (dist == 1) lp += normal_lpdf(aux_unscaled | 0, 1); else if (dist == 2) lp += student_t_lpdf(aux_unscaled | df, 0, 1); else lp += exponential_lpdf(aux_unscaled | 1); } return lp; } /** * Evaluate the mean of the posterior predictive distribution * * @param mu Vector containing the mean of the posterior predictive * distribution for each observation (ie. the linear predictor after * applying the inverse link function). * @param real The auxiliary parameter for the glmer submodel. This will be * an empty array if the submodel does not have an auxiliary parameter * @param family An integer specifying the family * @return A real, the mean of the posterior predictive distribution */ real mean_PPD_rng(vector mu, array[] real aux, int family) { int N = rows(mu); real mean_PPD = 0; if (family == 1) { // gaussian for (n in 1:N) mean_PPD += normal_rng(mu[n], aux[1]); } else if (family == 2) { // gamma for (n in 1:N) mean_PPD += gamma_rng(aux[1], aux[1] / mu[n]); } else if (family == 3) { // inverse gaussian for (n in 1:N) mean_PPD += inv_gaussian_rng(mu[n], aux[1]); } else if (family == 4) { // bernoulli for (n in 1:N) mean_PPD += bernoulli_rng(mu[n]); } else if (family == 5) { // binomial reject("Binomial with >1 trials not allowed."); } else if (family == 6 || family == 8) { real poisson_max = pow(2.0, 30.0); for (n in 1:N) { // poisson or poisson-gamma if (mu[n] < poisson_max) mean_PPD += poisson_rng(mu[n]); else mean_PPD += normal_rng(mu[n], sqrt(mu[n])); } } else if (family == 7) { real poisson_max = pow(2.0, 30.0); for (n in 1:N) { // negative binomial real gamma_temp; if (is_inf(aux[1])) gamma_temp = mu[n]; else gamma_temp = gamma_rng(aux[1], aux[1] / mu[n]); if (gamma_temp < poisson_max) mean_PPD += poisson_rng(gamma_temp); else mean_PPD += normal_rng(gamma_temp, sqrt(gamma_temp)); } } mean_PPD /= N; return mean_PPD; } rstanarm/tests/testthat/stan_files/functions/binomial_likelihoods.stan0000644000176200001440000000453714500256225026323 0ustar liggesusers /** * Apply inverse link function to linear predictor * * @param eta Linear predictor vector * @param link An integer indicating the link function * @return A vector, i.e. inverse-link(eta) */ vector linkinv_binom(vector eta, int link) { if (link == 1) return(inv_logit(eta)); // logit else if (link == 2) return(Phi(eta)); // probit else if (link == 3) return(atan(eta) / pi() + 0.5); // cauchit else if (link == 4) return(exp(eta)); // log else if (link == 5) return(inv_cloglog(eta)); // cloglog else reject("Invalid link"); return eta; // never reached } /** * Increment with the unweighted log-likelihood * @param y An integer array indicating the number of successes * @param trials An integer array indicating the number of trials * @param eta A vector of linear predictors * @param link An integer indicating the link function * @return lp__ */ real binom_lpmf(array[] int y, array[] int trials, vector eta, int link) { real lp = 0; if (link == 1) lp += binomial_logit_lpmf(y | trials, eta); else if (link < 4) lp += binomial_lpmf( y | trials, linkinv_binom(eta, link)); else if (link == 4) { // log for (n in 1:num_elements(y)) { lp += y[n] * eta[n]; lp += (trials[n] - y[n]) * log1m_exp(eta[n]); lp += lchoose(trials[n], y[n]); } } else if (link == 5) { // cloglog for (n in 1:num_elements(y)) { real neg_exp_eta = -exp(eta[n]); lp += y[n] * log1m_exp(neg_exp_eta); lp += (trials[n] - y[n]) * neg_exp_eta; lp += lchoose(trials[n], y[n]); } } else reject("Invalid link"); return lp; } /** * Pointwise (pw) log-likelihood vector * * @param y The integer array corresponding to the outcome variable. * @param link An integer indicating the link function * @return A vector */ vector pw_binom(array[] int y, array[] int trials, vector eta, int link) { int N = rows(eta); vector[N] ll; if (link == 1) { // logit for (n in 1:N) ll[n] = binomial_logit_lpmf(y[n] | trials[n], eta[n]); } else if (link <= 5) { // link = probit, cauchit, log, or cloglog vector[N] pi = linkinv_binom(eta, link); // may be unstable for (n in 1:N) ll[n] = binomial_lpmf(y[n] | trials[n], pi[n]) ; } else reject("Invalid link"); return ll; } rstanarm/tests/testthat/stan_files/functions/common_functions.stan0000644000176200001440000002412314500256225025514 0ustar liggesusers /* for multiple .stan files */ /** * Create group-specific block-diagonal Cholesky factor, see section 2 of * https://cran.r-project.org/web/packages/lme4/vignettes/lmer.pdf * @param len_theta_L An integer indicating the length of returned vector, * which lme4 denotes as m * @param p An integer array with the number variables on the LHS of each | * @param dispersion Scalar standard deviation of the errors, calles sigma by lme4 * @param tau Vector of scale parameters whose squares are proportional to the * traces of the relative covariance matrices of the group-specific terms * @param scale Vector of prior scales that are multiplied by elements of tau * @param zeta Vector of positive parameters that are normalized into simplexes * and multiplied by the trace of the covariance matrix to produce variances * @param rho Vector of radii in the onion method for creating Cholesky factors * @param z_T Vector used in the onion method for creating Cholesky factors * @return A vector that corresponds to theta in lme4 */ vector make_theta_L(int len_theta_L, array[] int p, real dispersion, vector tau, vector scale, vector zeta, vector rho, vector z_T) { vector[len_theta_L] theta_L; int zeta_mark = 1; int rho_mark = 1; int z_T_mark = 1; int theta_L_mark = 1; // each of these is a diagonal block of the implicit Cholesky factor for (i in 1:size(p)) { int nc = p[i]; if (nc == 1) { // "block" is just a standard deviation theta_L[theta_L_mark] = tau[i] * scale[i] * dispersion; // unlike lme4, theta[theta_L_mark] includes the dispersion term in it theta_L_mark += 1; } else { // block is lower-triangular matrix[nc,nc] T_i; real std_dev; real T21; real trace_T_i = square(tau[i] * scale[i] * dispersion) * nc; vector[nc] pi = segment(zeta, zeta_mark, nc); // gamma(zeta | shape, 1) pi /= sum(pi); // thus dirichlet(pi | shape) // unlike lme4, T_i includes the dispersion term in it zeta_mark += nc; std_dev = sqrt(pi[1] * trace_T_i); T_i[1,1] = std_dev; // Put a correlation into T_i[2,1] and scale by std_dev std_dev = sqrt(pi[2] * trace_T_i); T21 = 2.0 * rho[rho_mark] - 1.0; rho_mark += 1; T_i[2,2] = std_dev * sqrt(1.0 - square(T21)); T_i[2,1] = std_dev * T21; for (r in 2:(nc - 1)) { // scaled onion method to fill T_i int rp1 = r + 1; vector[r] T_row = segment(z_T, z_T_mark, r); real scale_factor = sqrt(rho[rho_mark] / dot_self(T_row)) * std_dev; z_T_mark += r; std_dev = sqrt(pi[rp1] * trace_T_i); for(c in 1:r) T_i[rp1,c] = T_row[c] * scale_factor; T_i[rp1,rp1] = sqrt(1.0 - rho[rho_mark]) * std_dev; rho_mark += 1; } // now vech T_i for (c in 1:nc) for (r in c:nc) { theta_L[theta_L_mark] = T_i[r,c]; theta_L_mark += 1; } } } return theta_L; } /** * Create group-specific coefficients, see section 2 of * https://cran.r-project.org/web/packages/lme4/vignettes/lmer.pdf * * @param z_b Vector whose elements are iid normal(0,sigma) a priori * @param theta Vector with covariance parameters as defined in lme4 * @param p An integer array with the number variables on the LHS of each | * @param l An integer array with the number of levels for the factor(s) on * the RHS of each | * @return A vector of group-specific coefficients */ vector make_b(vector z_b, vector theta_L, array[] int p, array[] int l) { vector[rows(z_b)] b; int b_mark = 1; int theta_L_mark = 1; for (i in 1:size(p)) { int nc = p[i]; if (nc == 1) { real theta_L_start = theta_L[theta_L_mark]; for (s in b_mark:(b_mark + l[i] - 1)) b[s] = theta_L_start * z_b[s]; b_mark += l[i]; theta_L_mark += 1; } else { matrix[nc,nc] T_i = rep_matrix(0, nc, nc); for (c in 1:nc) { T_i[c,c] = theta_L[theta_L_mark]; theta_L_mark += 1; for(r in (c+1):nc) { T_i[r,c] = theta_L[theta_L_mark]; theta_L_mark += 1; } } for (j in 1:l[i]) { vector[nc] temp = T_i * segment(z_b, b_mark, nc); b_mark -= 1; for (s in 1:nc) b[b_mark + s] = temp[s]; b_mark += nc + 1; } } } return b; } /** * Prior on group-specific parameters * * @param z_b A vector of primitive coefficients * @param z_T A vector of primitives for the unit vectors in the onion method * @param rho A vector radii for the onion method * @param zeta A vector of primitives for the simplexes * @param tau A vector of scale parameters * @param regularization A real array of LKJ hyperparameters * @param delta A real array of concentration paramters * @param shape A vector of shape parameters * @param t An integer indicating the number of group-specific terms * @param p An integer array with the number variables on the LHS of each | * @return target() */ real decov_lpdf(vector z_b, vector z_T, vector rho, vector zeta, vector tau, array[] real regularization, array[] real delta, vector shape, int t, array[] int p) { real lp = 0; int pos_reg = 1; int pos_rho = 1; lp += normal_lpdf(z_b | 0, 1); lp += normal_lpdf(z_T | 0, 1); for (i in 1:t) if (p[i] > 1) { vector[p[i] - 1] shape1; vector[p[i] - 1] shape2; real nu = regularization[pos_reg] + 0.5 * (p[i] - 2); pos_reg += 1; shape1[1] = nu; shape2[1] = nu; for (j in 2:(p[i]-1)) { nu -= 0.5; shape1[j] = 0.5 * j; shape2[j] = nu; } lp += beta_lpdf(rho[pos_rho:(pos_rho + p[i] - 2)] | shape1, shape2); pos_rho += p[i] - 1; } lp += gamma_lpdf(zeta | delta, 1); lp += gamma_lpdf(tau | shape, 1); return lp; } /** * Hierarchical shrinkage parameterization * * @param z_beta A vector of primitive coefficients * @param global A real array of positive numbers * @param local A vector array of positive numbers * @param global_prior_scale A positive real number * @param error_scale 1 or sigma in the Gaussian case * @param c2 A positive real number * @return A vector of coefficientes */ vector hs_prior(vector z_beta, array[] real global, array[] vector local, real global_prior_scale, real error_scale, real c2) { int K = rows(z_beta); vector[K] lambda = local[1] .* sqrt(local[2]); real tau = global[1] * sqrt(global[2]) * global_prior_scale * error_scale; vector[K] lambda2 = square(lambda); vector[K] lambda_tilde = sqrt( c2 * lambda2 ./ (c2 + square(tau) * lambda2) ); return z_beta .* lambda_tilde * tau; } /** * Hierarchical shrinkage plus parameterization * * @param z_beta A vector of primitive coefficients * @param global A real array of positive numbers * @param local A vector array of positive numbers * @param global_prior_scale A positive real number * @param error_scale 1 or sigma in the Gaussian case * @param c2 A positive real number * @return A vector of coefficientes */ vector hsplus_prior(vector z_beta, array[] real global, array[] vector local, real global_prior_scale, real error_scale, real c2) { int K = rows(z_beta); vector[K] lambda = local[1] .* sqrt(local[2]); vector[K] eta = local[3] .* sqrt(local[4]); real tau = global[1] * sqrt(global[2]) * global_prior_scale * error_scale; vector[K] lambda_eta2 = square(lambda .* eta); vector[K] lambda_tilde = sqrt( c2 * lambda_eta2 ./ ( c2 + square(tau) * lambda_eta2) ); return z_beta .* lambda_tilde * tau; } /** * Cornish-Fisher expansion for standard normal to Student t * * See result 26.7.5 of * https://people.math.sfu.ca/~cbm/aands/page_949.htm * * @param z A scalar distributed standard normal * @param df A scalar degrees of freedom * @return An (approximate) Student t variate with df degrees of freedom */ real CFt(real z, real df) { real z2 = square(z); real z3 = z2 * z; real z5 = z2 * z3; real z7 = z2 * z5; real z9 = z2 * z7; real df2 = square(df); real df3 = df2 * df; real df4 = df2 * df2; return z + (z3 + z) / (4 * df) + (5 * z5 + 16 * z3 + 3 * z) / (96 * df2) + (3 * z7 + 19 * z5 + 17 * z3 - 15 * z) / (384 * df3) + (79 * z9 + 776 * z7 + 1482 * z5 - 1920 * z3 - 945 * z) / (92160 * df4); } /** * Return two-dimensional array of group membership * * @param N An integer indicating the number of observations * @param t An integer indicating the number of grouping variables * @param v An integer array with the indices of group membership * @return An two-dimensional integer array of group membership */ array[,] int make_V(int N, int t, array[] int v) { array[t,N] int V; int pos = 1; if (t > 0) for (j in 1:N) for (i in 1:t) { V[i,j] = v[pos]; // + 1 pos += 1; } return V; } /** * Calculate lower bound on intercept * * @param family Integer family code * 1 = gaussian * 2 = gamma * 3 = inv-gaussian * 4 = beta * 5 = binomial * 6 = poisson * 7 = neg-binom * 8 = poisson w/ gamma noise (not currently used but in count.stan) * @param link Integer link code * @return real lower bound */ real make_lower(int family, int link) { if (family == 1) return negative_infinity(); // Gaussian if (family <= 3) { // Gamma or inverse Gaussian if (link == 2) return negative_infinity(); // log return 0; } return negative_infinity(); } /** * Calculate upper bound on intercept * * @param family Integer family code (see make_lower above for codes) * @param link Integer link code * @return real upper bound */ real make_upper(int family, int link) { if (family == 4 && link == 5) return 0; return positive_infinity(); } rstanarm/tests/testthat/stan_files/lm.stan0000644000176200001440000001217614551535205020545 0ustar liggesusers#include /pre/Columbia_copyright.stan #include /pre/license.stan // GLM for a Gaussian outcome with no link function functions { /** * Increments the log-posterior with the logarithm of a multivariate normal * likelihood with a scalar standard deviation for all errors * Equivalent to normal_lpdf(y | intercept + Q * R * beta, sigma) but faster * @param theta vector of coefficients (excluding intercept), equal to R * beta * @param b precomputed vector of OLS coefficients (excluding intercept) in Q-space * @param intercept scalar (assuming columns of Q have mean zero) * @param ybar precomputed sample mean of the outcome * @param SSR positive precomputed value of the sum of squared OLS residuals * @param sigma positive scalar for the standard deviation of the errors * @param N integer equal to the number of observations */ real mvn_ols_qr_lpdf(vector theta, vector b, real intercept, real ybar, real SSR, real sigma, int N) { return -0.5 * (dot_self(theta - b) + N * square(intercept - ybar) + SSR) / square(sigma) - // 0.91... is log(sqrt(2 * pi())) N * (log(sigma) + 0.91893853320467267); } } data { int has_intercept; // 0 = no, 1 = yes int prior_dist_for_intercept; // 0 = none, 1 = normal real prior_scale_for_intercept; // 0 = by CLT real prior_mean_for_intercept; // expected value for alpha int prior_dist; // 0 = uniform for R^2, 1 = Beta(K/2,eta) int prior_PD; // 0 = no, 1 = yes to drawing from the prior real eta; // shape hyperparameter int J; // number of groups // the rest of these are indexed by group but should work even if J = 1 array[J] int N; // number of observations int K; // number of predictors array[J] vector[K] xbarR_inv; // vector of means of the predictors array[J] real ybar; // sample mean of outcome real center_y; // zero or sample mean of outcome array[J] real s_Y; // standard deviation of the outcome array[J] vector[K] Rb; // OLS coefficients array[J] real SSR; // OLS sum-of-squared residuals array[J] matrix[K, K] R_inv; // inverse R matrices } transformed data { real half_K = 0.5 * K; array[J] real sqrt_inv_N; array[J] real sqrt_Nm1; for (j in 1 : J) { sqrt_inv_N[j] = sqrt(1.0 / N[j]); sqrt_Nm1[j] = sqrt(N[j] - 1.0); } } parameters { // must not call with init="0" // https://github.com/stan-dev/rstanarm/issues/603#issuecomment-1785928224 array[K > 1 ? J : 0] unit_vector[K > 1 ? K : 2] u; // primitives for coefficients array[J * has_intercept] real z_alpha; // primitives for intercepts array[J] real 1 ? 0 : -1), upper=1> R2; // proportions of variance explained vector[J * (1 - prior_PD)] log_omega; // under/overfitting factors } transformed parameters { array[J * has_intercept] real alpha; // uncentered intercepts array[J] vector[K] theta; // coefficients in Q-space array[J] real sigma; // error standard deviations for (j in 1 : J) { // marginal standard deviation of outcome for group j real Delta_y = prior_PD == 0 ? s_Y[j] * exp(log_omega[j]) : 1; // coefficients in Q-space if (K > 1) theta[j] = u[j] * sqrt(R2[j]) * sqrt_Nm1[j] * Delta_y; else theta[j][1] = R2[j] * sqrt_Nm1[j] * Delta_y; sigma[j] = Delta_y * sqrt(1 - R2[j]); // standard deviation of errors if (has_intercept == 1) { if (prior_dist_for_intercept == 0) // no information alpha[j] = z_alpha[j]; else if (prior_scale_for_intercept == 0) // central limit theorem alpha[j] = z_alpha[j] * Delta_y * sqrt_inv_N[j] + prior_mean_for_intercept; else // arbitrary informative prior alpha[j] = z_alpha[j] * prior_scale_for_intercept + prior_mean_for_intercept; } } } model { if (prior_PD == 0) for (j in 1 : J) { // likelihood contribution for each group real shift = dot_product(xbarR_inv[j], theta[j]); target += mvn_ols_qr_lpdf(theta[j] | Rb[j], has_intercept == 1 ? alpha[j] + shift : shift, ybar[j], SSR[j], sigma[j], N[j]); // implicit: u[j] is uniform on the surface of a hypersphere } if (has_intercept == 1 && prior_dist_for_intercept > 0) target += normal_lpdf(z_alpha | 0, 1); if (prior_dist == 1) { if (K > 1) target += beta_lpdf(R2 | half_K, eta); else { // TODO(Andrew) remove once vectorised abs available in rstan array[J] real R2_abs; for (j in 1:J) { R2_abs[j] = abs(R2[j]); } target += beta_lpdf(square(R2) | half_K, eta) + sum(log(R2_abs)); } } // implicit: log_omega is uniform over the real line for all j } generated quantities { array[J] real mean_PPD; array[J] vector[K] beta; for (j in 1 : J) { real shift; shift = dot_product(xbarR_inv[j], theta[j]); mean_PPD[j] = normal_rng(has_intercept == 1 ? alpha[j] + shift : shift, sigma[j] * sqrt_inv_N[j]); beta[j] = R_inv[j] * theta[j]; } } rstanarm/tests/testthat/stan_files/pre/0000755000176200001440000000000014214414633020022 5ustar liggesusersrstanarm/tests/testthat/stan_files/pre/Columbia_copyright.stan0000644000176200001440000000015613340675562024547 0ustar liggesusers// This file is part of rstanarm. // Copyright (C) 2015, 2016 2017 Trustees of Columbia University rstanarm/tests/testthat/stan_files/pre/Brilleman_copyright.stan0000644000176200001440000000005513340675562024717 0ustar liggesusers// Copyright (C) 2016, 2017 Sam Brilleman rstanarm/tests/testthat/stan_files/pre/license.stan0000644000176200001440000000120614214414633022332 0ustar liggesusers/* rstanarm is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. rstanarm is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with rstanarm. If not, see . */ rstanarm/tests/testthat/stan_files/model/0000755000176200001440000000000014414044166020336 5ustar liggesusersrstanarm/tests/testthat/stan_files/model/make_eta_bern.stan0000644000176200001440000000207314370470372024006 0ustar liggesusers vector[N[1]] eta0; vector[N[2]] eta1; if (K > 0) { if (dense_X) { eta0 = N[1] > 0 ? X0[1] * beta : rep_vector(0.0, 0); eta1 = N[2] > 0 ? X1[1] * beta : rep_vector(0.0, 0); } else { eta0 = csr_matrix_times_vector(N[1], K, w_X0, v_X0, u_X0, beta); eta1 = csr_matrix_times_vector(N[2], K, w_X1, v_X1, u_X1, beta); } } else { eta0 = rep_vector(0.0, N[1]); eta1 = rep_vector(0.0, N[2]); } if (has_intercept == 0 && dense_X) { real tmp = dot_product(xbar, beta); if (N[1] > 0) eta0 += tmp; if (N[2] > 0) eta1 += tmp; } if (has_offset == 1) { if (N[1] > 0) eta0 += offset0; if (N[2] > 0) eta1 += offset1; } if (K_smooth) { if (N[1] > 0) eta0 += S0 * beta_smooth; if (N[2] > 0) eta1 += S1 * beta_smooth; } if (special_case) for (i in 1:t) { if (N[1] > 0) eta0 += b[V0[i]]; if (N[2] > 0) eta1 += b[V1[i]]; } else if (t > 0) { if (N[1] > 0) eta0 += csr_matrix_times_vector(N[1], q, w0, v0, u0, b); if (N[2] > 0) eta1 += csr_matrix_times_vector(N[2], q, w1, v1, u1, b); } rstanarm/tests/testthat/stan_files/model/priors_glm.stan0000644000176200001440000000531614370470367023416 0ustar liggesusers // Log-priors for coefficients if (prior_dist == 1) target += normal_lpdf(z_beta | 0, 1); else if (prior_dist == 2) target += normal_lpdf(z_beta | 0, 1); // Student t via Cornish-Fisher expansion else if (prior_dist == 3) { // hs real log_half = -0.693147180559945286; target += normal_lpdf(z_beta | 0, 1); target += normal_lpdf(local[1] | 0, 1) - log_half; target += inv_gamma_lpdf(local[2] | 0.5 * prior_df, 0.5 * prior_df); target += normal_lpdf(global[1] | 0, 1) - log_half; target += inv_gamma_lpdf(global[2] | 0.5 * global_prior_df, 0.5 * global_prior_df); target += inv_gamma_lpdf(caux | 0.5 * slab_df, 0.5 * slab_df); } else if (prior_dist == 4) { // hs+ real log_half = -0.693147180559945286; target += normal_lpdf(z_beta | 0, 1); target += normal_lpdf(local[1] | 0, 1) - log_half; target += inv_gamma_lpdf(local[2] | 0.5 * prior_df, 0.5 * prior_df); target += normal_lpdf(local[3] | 0, 1) - log_half; // unorthodox useage of prior_scale as another df hyperparameter target += inv_gamma_lpdf(local[4] | 0.5 * prior_scale, 0.5 * prior_scale); target += normal_lpdf(global[1] | 0, 1) - log_half; target += inv_gamma_lpdf(global[2] | 0.5 * global_prior_df, 0.5 * global_prior_df); target += inv_gamma_lpdf(caux | 0.5 * slab_df, 0.5 * slab_df); } else if (prior_dist == 5) { // laplace target += normal_lpdf(z_beta | 0, 1); target += exponential_lpdf(mix[1] | 1); } else if (prior_dist == 6) { // lasso target += normal_lpdf(z_beta | 0, 1); target += exponential_lpdf(mix[1] | 1); target += chi_square_lpdf(one_over_lambda[1] | prior_df[1]); } else if (prior_dist == 7) { // product_normal target += normal_lpdf(z_beta | 0, 1); } /* else prior_dist is 0 and nothing is added */ // Log-prior for intercept if (has_intercept == 1) { if (prior_dist_for_intercept == 1) // normal target += normal_lpdf(gamma | prior_mean_for_intercept, prior_scale_for_intercept); else if (prior_dist_for_intercept == 2) // student_t target += student_t_lpdf(gamma | prior_df_for_intercept, prior_mean_for_intercept, prior_scale_for_intercept); /* else prior_dist is 0 and nothing is added */ } if (K_smooth) { target += normal_lpdf(z_beta_smooth | 0, 1); if (prior_dist_for_smooth > 0) { real log_half = -0.693147180559945286; if (prior_dist_for_smooth == 1) target += normal_lpdf(smooth_sd_raw | 0, 1) - log_half; else if (prior_dist_for_smooth == 2) target += student_t_lpdf(smooth_sd_raw | prior_df_for_smooth, 0, 1) - log_half; else if (prior_dist_for_smooth == 3) target += exponential_lpdf(smooth_sd_raw | 1); } } rstanarm/tests/testthat/stan_files/model/assoc_evaluate.stan0000644000176200001440000003246314370470372024236 0ustar liggesusers // !!! Be careful that indexing of has_assoc matches stan_jm.fit !!! // mark tracks indexing within a_beta vector, which is the // vector of association parameters int mark = 0; // mark2 tracks indexing within a_K_data vector, which is the // vector specifying the number of columns used for each possible // type of association term by data interaction int mark2 = 0; // mark3 tracks indexing within size_which_interactions vector int mark3 = 0; for (m in 1:M) { //----- etavalue and any interactions mark2 += 1; if (has_assoc[1,m] == 1 || // etavalue has_assoc[9,m] == 1 || // etavalue * data has_assoc[13,m] == 1 || // etavalue * etavalue has_assoc[14,m] == 1) { // etavalue * muvalue // declare and define eta at quadpoints for submodel m #include /model/make_eta_tmp.stan // add etavalue and any interactions to event submodel eta if (has_assoc[1,m] == 1) { // etavalue vector[nrow_e_Xq] val; if (has_grp[m] == 0) { // no grouping factor clustered within patients val = eta_tmp; } else { // submodel has a grouping factor clustered within patients val = collapse_within_groups(eta_tmp, grp_idx, grp_assoc); } mark += 1; e_eta_q += a_beta[mark] * a_scale[mark] * (val - a_xbar[mark]); } if (has_assoc[9,m] == 1) { // etavalue*data int J = a_K_data[mark2]; int j_shift = (mark2 == 1) ? 0 : sum(a_K_data[1:(mark2-1)]); for (j in 1:J) { vector[nrow_e_Xq] val; int sel = j_shift + j; if (has_grp[m] == 0) { val = eta_tmp .* y_Xq_data[idx_q[m,1]:idx_q[m,2], sel]; } else { val = collapse_within_groups( eta_tmp .* y_Xq_data[idx_q[m,1]:idx_q[m,2], sel], grp_idx, grp_assoc); } mark += 1; e_eta_q += a_beta[mark] * a_scale[mark] * (val - a_xbar[mark]); } } mark3 += 1; // count even if assoc type isn't used if (has_assoc[13,m] == 1) { // etavalue*etavalue for (j in 1:size_which_interactions[mark3]) { int j_shift = (mark3 == 1) ? 0 : sum(size_which_interactions[1:(mark3-1)]); int sel = which_interactions[j+j_shift]; vector[nrow_e_Xq] val; #include /model/make_eta_tmp2.stan val = eta_tmp .* eta_tmp2; mark += 1; e_eta_q += a_beta[mark] * a_scale[mark] * (val - a_xbar[mark]); } } mark3 += 1; // count even if assoc type isn't used if (has_assoc[14,m] == 1) { // etavalue*muvalue for (j in 1:size_which_interactions[mark3]) { int j_shift = (mark3 == 1) ? 0 : sum(size_which_interactions[1:(mark3-1)]); int sel = which_interactions[j+j_shift]; vector[nrow_e_Xq] val; vector[nrow_y_Xq[sel]] mu_tmp2; #include /model/make_eta_tmp2.stan mu_tmp2 = evaluate_mu(eta_tmp2, family[sel], link[sel]); val = eta_tmp .* mu_tmp2; mark += 1; e_eta_q += a_beta[mark] * a_scale[mark] * (val - a_xbar[mark]); } } } else { mark3 += 2; } //----- etaslope and any interactions mark2 += 1; if ((has_assoc[2,m] == 1) || (has_assoc[10,m] == 1)) { // declare and define etaslope at quadpoints for submodel m vector[nrow_y_Xq[m]] dydt_eta_q; if (m == 1) { int bMat1_colshift = 0; int bMat2_colshift = 0; dydt_eta_q = evaluate_eta(y1_xq_eps, y1_z1q_eps, y1_z2q_eps, y1_z1q_id_eps, y1_z2q_id_eps, yGamma1, yBeta1, bMat1, bMat2, bMat1_colshift, bMat2_colshift, 0, y1_offset_eps); } else if (m == 2) { int bMat1_colshift = bK1_len[1]; int bMat2_colshift = bK2_len[1]; dydt_eta_q = evaluate_eta(y2_xq_eps, y2_z1q_eps, y2_z2q_eps, y2_z1q_id_eps, y2_z2q_id_eps, yGamma2, yBeta2, bMat1, bMat2, bMat1_colshift, bMat2_colshift, 0, y2_offset_eps); } else if (m == 3) { int bMat1_colshift = sum(bK1_len[1:2]); int bMat2_colshift = sum(bK2_len[1:2]); dydt_eta_q = evaluate_eta(y3_xq_eps, y3_z1q_eps, y3_z2q_eps, y3_z1q_id_eps, y3_z2q_id_eps, yGamma3, yBeta3, bMat1, bMat2, bMat1_colshift, bMat2_colshift, 0, y3_offset_eps); } // add etaslope and any interactions to event submodel eta if (has_assoc[2,m] == 1) { // etaslope vector[nrow_e_Xq] val; if (has_grp[m] == 0) { val = dydt_eta_q; } else { val = collapse_within_groups(dydt_eta_q, grp_idx, grp_assoc); } mark += 1; e_eta_q += a_beta[mark] * a_scale[mark] * (val - a_xbar[mark]); } if (has_assoc[10,m] == 1) { // etaslope*data int J = a_K_data[mark2]; int j_shift = (mark2 == 1) ? 0 : sum(a_K_data[1:(mark2-1)]); for (j in 1:J) { vector[nrow_e_Xq] val; int sel = j_shift + j; if (has_grp[m] == 0) { val = dydt_eta_q .* y_Xq_data[idx_q[m,1]:idx_q[m,2], sel]; } else { val = collapse_within_groups( dydt_eta_q .* y_Xq_data[idx_q[m,1]:idx_q[m,2], sel], grp_idx, grp_assoc); } mark += 1; e_eta_q += a_beta[mark] * a_scale[mark] * (val - a_xbar[mark]); } } } //----- etaauc // add etaauc to event submodel eta if (has_assoc[3,m] == 1) { // etaauc vector[nrow_y_Xq_auc] eta_auc_tmp; // eta at all auc quadpoints (for submodel m) vector[nrow_y_Xq[m]] val; // eta following summation over auc quadpoints if (m == 1) { int bMat1_colshift = 0; int bMat2_colshift = 0; eta_auc_tmp = evaluate_eta(y1_xq_auc, y1_z1q_auc, y1_z2q_auc, y1_z1q_id_auc, y1_z2q_id_auc, yGamma1, yBeta1, bMat1, bMat2, bMat1_colshift, bMat2_colshift, intercept_type[1], y1_offset_auc); } else if (m == 2) { int bMat1_colshift = bK1_len[1]; int bMat2_colshift = bK2_len[1]; eta_auc_tmp = evaluate_eta(y2_xq_auc, y2_z1q_auc, y2_z2q_auc, y2_z1q_id_auc, y2_z2q_id_auc, yGamma2, yBeta2, bMat1, bMat2, bMat1_colshift, bMat2_colshift, intercept_type[2], y2_offset_auc); } else if (m == 3) { int bMat1_colshift = sum(bK1_len[1:2]); int bMat2_colshift = sum(bK2_len[1:2]); eta_auc_tmp = evaluate_eta(y3_xq_auc, y3_z1q_auc, y3_z2q_auc, y3_z1q_id_auc, y3_z2q_id_auc, yGamma3, yBeta3, bMat1, bMat2, bMat1_colshift, bMat2_colshift, intercept_type[3], y3_offset_auc); } mark += 1; for (r in 1:nrow_y_Xq[m]) { vector[auc_qnodes] val_tmp; vector[auc_qnodes] wgt_tmp; val_tmp = eta_auc_tmp[((r-1) * auc_qnodes + 1):(r * auc_qnodes)]; wgt_tmp = auc_qwts[((r-1) * auc_qnodes + 1):(r * auc_qnodes)]; val[r] = sum(wgt_tmp .* val_tmp); } e_eta_q += a_beta[mark] * a_scale[mark] * (val - a_xbar[mark]); } //----- muvalue and any interactions mark2 += 1; if (has_assoc[4,m] == 1 || // muvalue has_assoc[11,m] == 1 || // muvalue * data has_assoc[15,m] == 1 || // muvalue * etavalue has_assoc[16,m] == 1) { // muvalue * muvalue // declare and define mu for submodel m vector[nrow_y_Xq[m]] mu_tmp; #include /model/make_eta_tmp.stan mu_tmp = evaluate_mu(eta_tmp, family[m], link[m]); // add muvalue and any interactions to event submodel eta if (has_assoc[4,m] == 1) { // muvalue vector[nrow_e_Xq] val; if (has_grp[m] == 0) { val = mu_tmp; } else { val = collapse_within_groups(mu_tmp, grp_idx, grp_assoc); } mark += 1; e_eta_q += a_beta[mark] * a_scale[mark] * (val - a_xbar[mark]); } if (has_assoc[11,m] == 1) { // muvalue*data int tmp = a_K_data[mark2]; int j_shift = (mark2 == 1) ? 0 : sum(a_K_data[1:(mark2-1)]); for (j in 1:tmp) { vector[nrow_e_Xq] val; int sel = j_shift + j; if (has_grp[m] == 0) { val = mu_tmp .* y_Xq_data[idx_q[m,1]:idx_q[m,2], sel]; } else { val = collapse_within_groups( mu_tmp .* y_Xq_data[idx_q[m,1]:idx_q[m,2], sel], grp_idx, grp_assoc); } mark += 1; e_eta_q += a_beta[mark] * a_scale[mark] * (val - a_xbar[mark]); } } mark3 += 1; // count even if assoc type isn't used if (has_assoc[15,m] == 1) { // muvalue*etavalue for (j in 1:size_which_interactions[mark3]) { int j_shift = (mark3 == 1) ? 0 : sum(size_which_interactions[1:(mark3-1)]); int sel = which_interactions[j+j_shift]; vector[nrow_e_Xq] val; #include /model/make_eta_tmp2.stan val = mu_tmp .* eta_tmp2; mark += 1; e_eta_q += a_beta[mark] * a_scale[mark] * (val - a_xbar[mark]); } } mark3 += 1; // count even if assoc type isn't used if (has_assoc[16,m] == 1) { // muvalue*muvalue for (j in 1:size_which_interactions[mark3]) { int j_shift = (mark3 == 1) ? 0 : sum(size_which_interactions[1:(mark3-1)]); int sel = which_interactions[j+j_shift]; vector[nrow_e_Xq] val; vector[nrow_y_Xq[sel]] mu_tmp2; #include /model/make_eta_tmp2.stan mu_tmp2 = evaluate_mu(eta_tmp2, family[sel], link[sel]); val = mu_tmp .* mu_tmp2; mark += 1; e_eta_q += a_beta[mark] * a_scale[mark] * (val - a_xbar[mark]); } } } else { mark3 += 2; } //----- muslope and any interactions mark2 += 1; if (has_assoc[5,m] == 1 || has_assoc[12,m] == 1) { reject("muslope association structure has been removed."); } //----- muauc // add muauc to event submodel eta if (has_assoc[6,m] == 1) { // muauc vector[nrow_y_Xq_auc] eta_auc_tmp; // eta at all auc quadpoints (for submodel m) vector[nrow_y_Xq_auc] mu_auc_tmp; // mu at all auc quadpoints (for submodel m) vector[nrow_y_Xq[m]] val; // mu following summation over auc quadpoints if (m == 1) { int bMat1_colshift = 0; int bMat2_colshift = 0; eta_auc_tmp = evaluate_eta(y1_xq_auc, y1_z1q_auc, y1_z2q_auc, y1_z1q_id_auc, y1_z2q_id_auc, yGamma1, yBeta1, bMat1, bMat2, bMat1_colshift, bMat2_colshift, intercept_type[1], y1_offset_auc); } else if (m == 2) { int bMat1_colshift = bK1_len[1]; int bMat2_colshift = bK2_len[1]; eta_auc_tmp = evaluate_eta(y2_xq_auc, y2_z1q_auc, y2_z2q_auc, y2_z1q_id_auc, y2_z2q_id_auc, yGamma2, yBeta2, bMat1, bMat2, bMat1_colshift, bMat2_colshift, intercept_type[2], y2_offset_auc); } else if (m == 3) { int bMat1_colshift = sum(bK1_len[1:2]); int bMat2_colshift = sum(bK2_len[1:2]); eta_auc_tmp = evaluate_eta(y3_xq_auc, y3_z1q_auc, y3_z2q_auc, y3_z1q_id_auc, y3_z2q_id_auc, yGamma3, yBeta3, bMat1, bMat2, bMat1_colshift, bMat2_colshift, intercept_type[3], y3_offset_auc); } mu_auc_tmp = evaluate_mu(eta_auc_tmp, family[m], link[m]); mark += 1; for (r in 1:nrow_y_Xq[m]) { vector[auc_qnodes] val_tmp; vector[auc_qnodes] wgt_tmp; val_tmp = mu_auc_tmp[((r-1) * auc_qnodes + 1):(r * auc_qnodes)]; wgt_tmp = auc_qwts[((r-1) * auc_qnodes + 1):(r * auc_qnodes)]; val[r] = sum(wgt_tmp .* val_tmp); } e_eta_q += a_beta[mark] * a_scale[mark] * (val - a_xbar[mark]); } } //----- shared random effects if (sum_size_which_b > 0) { reject("shared_b has been removed."); } if (sum_size_which_coef > 0) { reject("shared_coef has been removed."); } rstanarm/tests/testthat/stan_files/model/make_eta_z.stan0000644000176200001440000000027013452513210023313 0ustar liggesusers if (family == 4 && z_dim > 0 && link_phi > 0) { eta_z = betareg_z * omega; } else if (family == 4 && z_dim == 0 && has_intercept_z == 1){ eta_z = rep_vector(0.0, N); } rstanarm/tests/testthat/stan_files/model/priors_mvmer.stan0000644000176200001440000000546314414044166023761 0ustar liggesusers // Log-priors, auxiliary params if (has_aux[1] == 1) target += aux_lpdf(yAux1_unscaled[1] | y_prior_dist_for_aux[1], y_prior_scale_for_aux[1], y_prior_df_for_aux[1]); if (M > 1 && has_aux[2] == 1) target += aux_lpdf(yAux2_unscaled[1] | y_prior_dist_for_aux[2], y_prior_scale_for_aux[2], y_prior_df_for_aux[2]); if (M > 2 && has_aux[3] == 1) target += aux_lpdf(yAux3_unscaled[1] | y_prior_dist_for_aux[3], y_prior_scale_for_aux[3], y_prior_df_for_aux[3]); // Log priors, intercepts if (intercept_type[1] > 0) target += gamma_custom_lpdf(yGamma1[1] | y_prior_dist_for_intercept[1], y_prior_mean_for_intercept[1], y_prior_scale_for_intercept[1], y_prior_df_for_intercept[1]); if (M > 1 && intercept_type[2] > 0) target += gamma_custom_lpdf(yGamma2[1] | y_prior_dist_for_intercept[2], y_prior_mean_for_intercept[2], y_prior_scale_for_intercept[2], y_prior_df_for_intercept[2]); if (M > 2 && intercept_type[3] > 0) target += gamma_custom_lpdf(yGamma3[1] | y_prior_dist_for_intercept[3], y_prior_mean_for_intercept[3], y_prior_scale_for_intercept[3], y_prior_df_for_intercept[3]); // Log priors, population level params if (yK[1] > 0) target += beta_custom_lpdf(z_yBeta1 | y_prior_dist[1], y_prior_scale1, y_prior_df1, y_global_prior_df[1], yLocal1, yGlobal1, yMix1, yOol1, y_slab_df[1], y_caux1); if (M > 1 && yK[2] > 0) target += beta_custom_lpdf(z_yBeta2 | y_prior_dist[2], y_prior_scale2, y_prior_df2, y_global_prior_df[2], yLocal2, yGlobal2, yMix2, yOol2, y_slab_df[2], y_caux2); if (M > 2 && yK[3] > 0) target += beta_custom_lpdf(z_yBeta3 | y_prior_dist[3], y_prior_scale3, y_prior_df3, y_global_prior_df[3], yLocal3, yGlobal3, yMix3, yOol3, y_slab_df[3], y_caux3); // Log priors, group level terms if (prior_dist_for_cov == 1) { // decov target += decov_lpdf(z_b | z_T, rho, zeta, tau, b_prior_regularization, delta, b_prior_shape, t, p); } else if (prior_dist_for_cov == 2) { // lkj if (bK1 > 0) { // sds for group factor 1 target += student_t_lpdf(bSd1 | b1_prior_df, 0, b1_prior_scale); // primitive coefs for group factor 1 target += normal_lpdf(to_vector(z_bMat1) | 0, 1); // corr matrix for group factor 1 if (bK1 > 1) target += lkj_corr_cholesky_lpdf(bCholesky1 | b1_prior_regularization); } if (bK2 > 0) { // sds for group factor 2 target += student_t_lpdf(bSd2 | b2_prior_df, 0, b2_prior_scale); // primitive coefs for group factor 2 target += normal_lpdf(to_vector(z_bMat2) | 0, 1); // corr matrix for group factor 2 if (bK2 > 1) target += lkj_corr_cholesky_lpdf(bCholesky2 | b2_prior_regularization); } } rstanarm/tests/testthat/stan_files/model/mvmer_lp.stan0000644000176200001440000000305214414044166023046 0ustar liggesusers vector[yNeta[1]] yEta1; // linear predictor vector[yNeta[2]] yEta2; vector[yNeta[3]] yEta3; // Linear predictor for submodel 1 if (M > 0) { int bMat1_colshift = 0; // column shift in bMat1 int bMat2_colshift = 0; // column shift in bMat2 yEta1 = evaluate_eta(yX1, y1_Z1, y1_Z2, y1_Z1_id, y1_Z2_id, yGamma1, yBeta1, bMat1, bMat2, bMat1_colshift, bMat2_colshift, intercept_type[1], y1_offset); } // Linear predictor for submodel 2 if (M > 1) { int bMat1_colshift = bK1_len[1]; // column shift in bMat1 int bMat2_colshift = bK2_len[1]; // column shift in bMat2 yEta2 = evaluate_eta(yX2, y2_Z1, y2_Z2, y2_Z1_id, y2_Z2_id, yGamma2, yBeta2, bMat1, bMat2, bMat1_colshift, bMat2_colshift, intercept_type[2], y2_offset); } // Linear predictor for submodel 3 if (M > 2) { int bMat1_colshift = sum(bK1_len[1:2]); // column shift in bMat1 int bMat2_colshift = sum(bK2_len[1:2]); // column shift in bMat2 yEta3 = evaluate_eta(yX3, y3_Z1, y3_Z2, y3_Z1_id, y3_Z2_id, yGamma3, yBeta3, bMat1, bMat2, bMat1_colshift, bMat2_colshift, intercept_type[3], y3_offset); } // Log-likelihoods if (prior_PD == 0) { target += glm_lpdf(yReal1 | yInt1, yEta1, yAux1, family[1], link[1], sum_log_y1, sqrt_y1, log_y1); if (M > 1) target += glm_lpdf(yReal2 | yInt2, yEta2, yAux2, family[2], link[2], sum_log_y2, sqrt_y2, log_y2); if (M > 2) target += glm_lpdf(yReal3 | yInt3, yEta3, yAux3, family[3], link[3], sum_log_y3, sqrt_y3, log_y3); } rstanarm/tests/testthat/stan_files/model/eta_no_intercept.stan0000644000176200001440000000015413365374540024555 0ustar liggesusers // correction to eta if model has no intercept (because X is centered) eta += dot_product(xbar, beta); rstanarm/tests/testthat/stan_files/model/event_lp.stan0000644000176200001440000000212413702655240023040 0ustar liggesusers vector[nrow_e_Xq] log_basehaz; // log baseline hazard AT event time and quadrature points vector[nrow_e_Xq] log_haz_q; // log hazard AT event time and quadrature points vector[Nevents] log_haz_etimes; // log hazard AT the event time only vector[Npat_times_qnodes] log_haz_qtimes; // log hazard AT the quadrature points // Log baseline hazard at event and quad times if (basehaz_type == 1) log_basehaz = norm_const + log(e_aux[1]) + basehaz_X * (e_aux - 1) + e_gamma[1]; else log_basehaz = norm_const + basehaz_X * e_aux; // Log hazard at event and quad times log_haz_q = log_basehaz + e_eta_q; log_haz_etimes = head(log_haz_q, Nevents); log_haz_qtimes = tail(log_haz_q, Npat_times_qnodes); // Log likelihood for event model if (has_weights == 0 && prior_PD == 0) { // unweighted log likelihood target += sum(log_haz_etimes) - dot_product(qwts, exp(log_haz_qtimes)); } else if (prior_PD == 0) { // weighted log likelihood target += dot_product(e_weights, log_haz_etimes) - dot_product(e_weights_rep, qwts .* exp(log_haz_qtimes)); } rstanarm/tests/testthat/stan_files/model/make_eta_tmp2.stan0000644000176200001440000000302114370470372023734 0ustar liggesusers vector[nrow_y_Xq[sel]] eta_tmp2; if (sel == 1) { int bMat1_colshift = 0; int bMat2_colshift = 0; eta_tmp2 = evaluate_eta(y1_xq_eta, y1_z1q_eta, y1_z2q_eta, y1_z1q_id_eta, y1_z2q_id_eta, yGamma1, yBeta1, bMat1, bMat2, bMat1_colshift, bMat2_colshift, intercept_type[1], y1_offset_eta); } else if (sel == 2) { int bMat1_colshift = bK1_len[1]; int bMat2_colshift = bK2_len[1]; eta_tmp2 = evaluate_eta(y2_xq_eta, y2_z1q_eta, y2_z2q_eta, y2_z1q_id_eta, y2_z2q_id_eta, yGamma2, yBeta2, bMat1, bMat2, bMat1_colshift, bMat2_colshift, intercept_type[2], y2_offset_eta); } else if (sel == 3) { int bMat1_colshift = sum(bK1_len[1:2]); int bMat2_colshift = sum(bK2_len[1:2]); eta_tmp2 = evaluate_eta(y3_xq_eta, y3_z1q_eta, y3_z2q_eta, y3_z1q_id_eta, y3_z2q_id_eta, yGamma3, yBeta3, bMat1, bMat2, bMat1_colshift, bMat2_colshift, intercept_type[3], y3_offset_eta); } rstanarm/tests/testthat/stan_files/model/eta_add_Zb.stan0000644000176200001440000000016014370470372023241 0ustar liggesusers if (special_case) for (i in 1:t) eta += b[V[i]]; else eta += csr_matrix_times_vector(N, q, w, v, u, b); rstanarm/tests/testthat/stan_files/model/make_eta.stan0000644000176200001440000000042014370470372022772 0ustar liggesusers vector[N] eta; // linear predictor if (K > 0) { if (dense_X) eta = X[1] * beta; else eta = csr_matrix_times_vector(N, K, w_X, v_X, u_X, beta); } else eta = rep_vector(0.0, N); if (has_offset == 1) eta += offset_; if (K_smooth) eta += S * beta_smooth; rstanarm/tests/testthat/stan_files/model/priors_betareg.stan0000644000176200001440000000435413340675562024251 0ustar liggesusers // Log-priors for coefficients if (prior_dist_z == 1) target += normal_lpdf(z_omega | 0, 1); else if (prior_dist_z == 2) target += normal_lpdf(z_omega | 0, 1); else if (prior_dist_z == 3) { // hs real log_half = -0.693147180559945286; target += normal_lpdf(z_omega | 0, 1); target += normal_lpdf(local_z[1] | 0, 1) - log_half; target += inv_gamma_lpdf(local_z[2] | 0.5 * prior_df_z, 0.5 * prior_df_z); target += normal_lpdf(global_z[1] | 0, 1) - log_half; target += inv_gamma_lpdf(global_z[2] | 0.5 * global_prior_df_z, 0.5 * global_prior_df_z); target += inv_gamma_lpdf(caux_z | 0.5 * slab_df_z, 0.5 * slab_df_z); } else if (prior_dist_z == 4) { // hs+ real log_half = -0.693147180559945286; target += normal_lpdf(z_omega | 0, 1); target += normal_lpdf(local_z[1] | 0, 1) - log_half; target += inv_gamma_lpdf(local_z[2] | 0.5 * prior_df_z, 0.5 * prior_df_z); target += normal_lpdf(local_z[3] | 0, 1) - log_half; // unorthodox useage of prior_scale as another df hyperparameter target += inv_gamma_lpdf(local_z[4] | 0.5 * prior_scale_z, 0.5 * prior_scale_z); target += normal_lpdf(global_z[1] | 0, 1) - log_half; target += inv_gamma_lpdf(global_z[2] | 0.5, 0.5); target += inv_gamma_lpdf(caux_z | 0.5 * slab_df_z, 0.5 * slab_df_z); } else if (prior_dist_z == 5) { // laplace target += normal_lpdf(z_omega | 0, 1); target += exponential_lpdf(S_z[1] | 1); } else if (prior_dist_z == 6) { // lasso target += normal_lpdf(z_omega | 0, 1); target += exponential_lpdf(S_z[1] | 1); target += chi_square_lpdf(one_over_lambda_z[1] | prior_df_z[1]); } else if (prior_dist_z == 7) { // product_normal target += normal_lpdf(z_omega | 0, 1); } /* else prior_dist is 0 and nothing is added */ // Log-prior for intercept if (has_intercept_z == 1) { if (prior_dist_for_intercept_z == 1) // normal target += normal_lpdf(gamma_z | prior_mean_for_intercept_z, prior_scale_for_intercept_z); else if (prior_dist_for_intercept_z == 2) // student_t target += student_t_lpdf(gamma_z | prior_df_for_intercept_z, prior_mean_for_intercept_z, prior_scale_for_intercept_z); /* else prior_dist is 0 and nothing is added */ } rstanarm/tests/testthat/stan_files/model/eta_z_no_intercept.stan0000644000176200001440000000020213365374540025100 0ustar liggesusers if (link_phi > 1) { eta_z += dot_product(zbar, omega) - min(eta_z); } else { eta_z += dot_product(zbar, omega); } rstanarm/tests/testthat/stan_files/model/make_eta_tmp.stan0000644000176200001440000000256314370470372023664 0ustar liggesusers vector[nrow_y_Xq[m]] eta_tmp; if (m == 1) { int bMat1_colshift = 0; int bMat2_colshift = 0; eta_tmp = evaluate_eta(y1_xq_eta, y1_z1q_eta, y1_z2q_eta, y1_z1q_id_eta, y1_z2q_id_eta, yGamma1, yBeta1, bMat1, bMat2, bMat1_colshift, bMat2_colshift, intercept_type[1], y1_offset_eta); } else if (m == 2) { int bMat1_colshift = bK1_len[1]; int bMat2_colshift = bK2_len[1]; eta_tmp = evaluate_eta(y2_xq_eta, y2_z1q_eta, y2_z2q_eta, y2_z1q_id_eta, y2_z2q_id_eta, yGamma2, yBeta2, bMat1, bMat2, bMat1_colshift, bMat2_colshift, intercept_type[2], y2_offset_eta); } else if (m == 3) { int bMat1_colshift = sum(bK1_len[1:2]); int bMat2_colshift = sum(bK2_len[1:2]); eta_tmp = evaluate_eta(y3_xq_eta, y3_z1q_eta, y3_z2q_eta, y3_z1q_id_eta, y3_z2q_id_eta, yGamma3, yBeta3, bMat1, bMat2, bMat1_colshift, bMat2_colshift, intercept_type[3], y3_offset_eta); } rstanarm/tests/testthat/stan_files/bernoulli.stan0000644000176200001440000001740314500256225022122 0ustar liggesusers#include /pre/Columbia_copyright.stan #include /pre/license.stan // GLM for a Bernoulli outcome functions { #include /functions/common_functions.stan #include /functions/bernoulli_likelihoods.stan } data { // dimensions int K; // number of predictors array[2] int N; // number of observations where y = 0 and y = 1 respectively vector[K] xbar; // vector of column-means of rbind(X0, X1) int dense_X; // flag for dense vs. sparse array[dense_X] matrix[N[1], K] X0; // centered (by xbar) predictor matrix | y = 0 array[dense_X] matrix[N[2], K] X1; // centered (by xbar) predictor matrix | y = 1 int clogit; // 1 iff the number of successes is fixed in each stratum int J; // number of strata (possibly zero) array[clogit == 1 ? N[1] + N[2] : 0] int strata; // stuff for the sparse case int nnz_X0; // number of non-zero elements in the implicit X0 matrix vector[nnz_X0] w_X0; // non-zero elements in the implicit X0 matrix array[nnz_X0] int v_X0; // column indices for w_X0 // where the non-zeros start in each row of X0 array[dense_X ? 0 : N[1] + 1] int u_X0; int nnz_X1; // number of non-zero elements in the implicit X1 matrix vector[nnz_X1] w_X1; // non-zero elements in the implicit X1 matrix array[nnz_X1] int v_X1; // column indices for w_X1 // where the non-zeros start in each row of X1 array[dense_X ? 0 : N[2] + 1] int u_X1; // declares prior_PD, has_intercept, link, prior_dist, prior_dist_for_intercept #include /data/data_glm.stan int K_smooth; matrix[N[1], K_smooth] S0; matrix[N[2], K_smooth] S1; array[K_smooth] int smooth_map; int family; // weights int has_weights; // 0 = No, 1 = Yes vector[has_weights ? N[1] : 0] weights0; vector[has_weights ? N[2] : 0] weights1; // offset int has_offset; // 0 = No, 1 = Yes vector[has_offset ? N[1] : 0] offset0; vector[has_offset ? N[2] : 0] offset1; // declares prior_{mean, scale, df}, prior_{mean, scale, df}_for_intercept, prior_{mean, scale, df}_for_aux #include /data/hyperparameters.stan // declares t, p[t], l[t], q, len_theta_L, shape, scale, {len_}concentration, {len_}regularization #include /data/glmer_stuff.stan // more glmer stuff array[2] int num_non_zero; // number of non-zero elements in the Z matrices vector[num_non_zero[1]] w0; // non-zero elements in the implicit Z0 matrix vector[num_non_zero[2]] w1; // non-zero elements in the implicit Z1 matrix array[num_non_zero[1]] int v0; // column indices for w0 array[num_non_zero[2]] int v1; // column indices for w1 // where the non-zeros start in each row of Z0 array[t > 0 ? N[1] + 1 : 0] int u0; // where the non-zeros start in each row of Z1 array[t > 0 ? N[2] + 1 : 0] int u1; int special_case; // whether we only have to deal with (1|group) } transformed data { int NN = N[1] + N[2]; real aux = not_a_number(); array[special_case ? t : 0, N[1]] int V0 = make_V(N[1], special_case ? t : 0, v0); array[special_case ? t : 0, N[2]] int V1 = make_V(N[2], special_case ? t : 0, v1); array[clogit ? J : 0] int successes; array[clogit ? J : 0] int failures; array[clogit ? J : 0] int observations; int can_do_bernoullilogitglm = K != 0 && // remove K!=0 after rstan includes this Stan bugfix: https://github.com/stan-dev/math/issues/1398 link == 1 && clogit == 0 && has_offset == 0 && prior_PD == 0 && dense_X == 1 && has_weights == 0 && t == 0; matrix[can_do_bernoullilogitglm ? NN : 0, can_do_bernoullilogitglm ? K + K_smooth : 0] XS; array[can_do_bernoullilogitglm ? NN : 0] int y; // defines hs, len_z_T, len_var_group, delta, pos #include /tdata/tdata_glm.stan for (j in 1 : J) { successes[j] = 0; failures[j] = 0; } if (J > 0) for (i in 1 : N[2]) successes[strata[i]] += 1; if (J > 0) for (i in (N[2] + 1) : NN) failures[strata[i]] += 1; for (j in 1 : J) observations[j] = failures[j] + successes[j]; if (can_do_bernoullilogitglm) { XS = K_smooth > 0 ? append_col(append_row(X0[1], X1[1]), append_row(S0, S1)) : append_row(X0[1], X1[1]); y = append_array(rep_array(0, N[1]), rep_array(1, N[2])); } } parameters { array[has_intercept] real gamma; // declares z_beta, global, local, z_b, z_T, rho, zeta, tau #include /parameters/parameters_glm.stan } transformed parameters { // defines beta, b, theta_L #include /tparameters/tparameters_glm.stan if (t > 0) { if (special_case) { int start = 1; theta_L = scale .* tau; if (t == 1) b = theta_L[1] * z_b; else for (i in 1 : t) { int end = start + l[i] - 1; b[start : end] = theta_L[i] * z_b[start : end]; start = end + 1; } } else { theta_L = make_theta_L(len_theta_L, p, 1.0, tau, scale, zeta, rho, z_T); b = make_b(z_b, theta_L, p, l); } } } model { if (can_do_bernoullilogitglm) { vector[K + K_smooth] coeff = K_smooth > 0 ? append_row(beta, beta_smooth) : beta; target += bernoulli_logit_glm_lpmf(y | XS, has_intercept ? gamma[1] : 0.0, coeff); } else if (prior_PD == 0) { // defines eta0, eta1 #include /model/make_eta_bern.stan if (has_intercept == 1) { if (link != 4) { eta0 += gamma[1]; eta1 += gamma[1]; } else { real shift = fmax(max(eta0), max(eta1)); eta0 += gamma[1] - shift; eta1 += gamma[1] - shift; } } // Log-likelihood if (clogit) { target += clogit_lpdf(eta0 | eta1, successes, failures, observations); } else if (has_weights == 0) { target += bern_lpdf(eta0 | eta1, link, N); } else { // weighted log-likelihoods target += dot_product(weights0, pw_bern(0, eta0, link)); target += dot_product(weights1, pw_bern(1, eta1, link)); } } #include /model/priors_glm.stan if (t > 0) { target += decov_lpdf(z_b | z_T, rho, zeta, tau, regularization, delta, shape, t, p); } } generated quantities { real mean_PPD = compute_mean_PPD ? 0 : negative_infinity(); array[has_intercept] real alpha; if (has_intercept == 1) { if (dense_X) alpha[1] = gamma[1] - dot_product(xbar, beta); else alpha[1] = gamma[1]; } if (compute_mean_PPD) { vector[N[1]] pi0; vector[N[2]] pi1; // defines eta0, eta1 #include /model/make_eta_bern.stan if (has_intercept == 1) { if (link != 4) { eta0 += gamma[1]; eta1 += gamma[1]; } else { real shift; shift = fmax(max(eta0), max(eta1)); eta0 += gamma[1] - shift; eta1 += gamma[1] - shift; alpha[1] -= shift; } } if (clogit) for (j in 1 : J) mean_PPD += successes[j]; // fixed by design else { pi0 = linkinv_bern(eta0, link); pi1 = linkinv_bern(eta1, link); for (n in 1 : N[1]) mean_PPD += bernoulli_rng(pi0[n]); for (n in 1 : N[2]) mean_PPD += bernoulli_rng(pi1[n]); } mean_PPD /= NN; } } rstanarm/tests/testthat/stan_files/mvmer.stan0000644000176200001440000000416214500256225021253 0ustar liggesusers#include /pre/Columbia_copyright.stan #include /pre/Brilleman_copyright.stan #include /pre/license.stan // Multivariate GLM with correlated group-specific terms functions { #include /functions/common_functions.stan #include /functions/bernoulli_likelihoods.stan #include /functions/binomial_likelihoods.stan #include /functions/continuous_likelihoods.stan #include /functions/count_likelihoods.stan #include /functions/mvmer_functions.stan } data { // declares: M, has_aux, has_weights, resp_type, intercept_type, // yNobs, yNeta, yK, t, p, l, q, len_theta_L, bN1, bK1, bK1_len // bK1_idx, bN2, bK2, bK2_len, bK2_idx #include /data/dimensions_mvmer.stan // declares: yInt{1,2,3}, yReal{1,2,3}, yX{1,2,3}, yXbar{1,2,3}, // family, link, y{1,2,3}_Z{1,2}, y{1,2,3}_Z{1,2}_id, // y_prior_dist{_for_intercept,_for_aux,_for_cov}, prior_PD #include /data/data_mvmer.stan // declares: y_prior_{mean,scale,df}{1,2,3,_for_intercept,_for_aux}, // y_global_prior_{df,scale}, len_{concentration,regularization}, // b_prior_{shape,scale,concentration,regularization}, // b{1,2}_prior_{scale,df,regularization} #include /data/hyperparameters_mvmer.stan } transformed data { // declares: yHs{1,2,3}, len_{z_T,var_group,rho}, pos, delta, // bCov{1,2}_idx, {sqrt,log,sum_log}_y{1,2,3}, #include /tdata/tdata_mvmer.stan } parameters { // declares: yGamma{1,2,3}, z_yBeta{1,2,3}, z_b, z_T, rho, // zeta, tau, bSd{1,2}, z_bMat{1,2}, bCholesky{1,2}, // yAux{1,2,3}_unscaled, yGlobal{1,2,3}, yLocal{1,2,3}, // yOol{1,2,3}, yMix{1,2,3} #include /parameters/parameters_mvmer.stan } transformed parameters { // declares and defines: yBeta{1,2,3}, yAux{1,2,3}, yAuxMaximum, // theta_L, bMat{1,2} #include /tparameters/tparameters_mvmer.stan } model { // Log likelihoods // increments target with mvmer log liks #include /model/mvmer_lp.stan // Log priors // increments target with mvmer priors #include /model/priors_mvmer.stan } generated quantities { // declares and defines: mean_PPD, yAlpha{1,2,3}, b{1,2}, bCov{1,2} #include /gqs/gen_quantities_mvmer.stan } rstanarm/tests/testthat/stan_files/jm.stan0000644000176200001440000001322614500256225020534 0ustar liggesusers#include /pre/Columbia_copyright.stan #include /pre/Brilleman_copyright.stan #include /pre/license.stan // Shared parameter joint model functions { #include /functions/common_functions.stan #include /functions/bernoulli_likelihoods.stan #include /functions/binomial_likelihoods.stan #include /functions/continuous_likelihoods.stan #include /functions/count_likelihoods.stan #include /functions/mvmer_functions.stan #include /functions/jm_functions.stan } data { // declares: M, has_aux, has_weights, resp_type, intercept_type, // yNobs, yNeta, yK, t, p, l, q, len_theta_L, bN1, bK1, bK1_len // bK1_idx, bN2, bK2, bK2_len, bK2_idx #include /data/dimensions_mvmer.stan // declares: yInt{1,2,3}, yReal{1,2,3}, yX{1,2,3}, yXbar{1,2,3}, // family, link, y{1,2,3}_Z{1,2}, y{1,2,3}_Z{1,2}_id, // y_prior_dist{_for_intercept,_for_aux,_for_cov}, prior_PD #include /data/data_mvmer.stan // declares: e_prior_dist{_for_intercept,_for_aux}, // Npat, Nevents, qnodes, Npat_times_qnodes, qwts, // basehaz_{type,df,X}, nrow_e_Xq, e_has_intercept, nrow_e_Xq, // e_{K,Xq,times,xbar,weights,weights_rep} #include /data/data_event.stan // declares: a_{K,xbar}, a_prior_dist, assoc, assoc_uses, has_assoc, // {sum_}size_which_b, which_b_zindex, {sum_}size_which_coef, // which_coef_{zindex,xindex}, a_K_data, y_Xq_{eta,eps,lag,auc,data}, // {sum_,sum_size_}which_interactions, idx_q, // nrow_y_Xq{_auc}, auc_{qnodes,qwts}, has_grp, grp_assoc, grp_idx, // y{1,2,3}_xq_{eta,eps,auc}, y{1,2,3}_z{1,2}q_{eta,eps,auc}, // y{1,2,3}_z{1,2}q_id_{eta,eps,auc} #include /data/data_assoc.stan // declares: e_prior_{mean,scale,df}{_for_intercept,for_aux}, // e_global_prior_{scale,df} #include /data/hyperparameters_mvmer.stan #include /data/hyperparameters_event.stan // declares: a_prior_{mean,scale,df}, a_global_prior_{scale,df} #include /data/hyperparameters_assoc.stan } transformed data { int e_hs = get_nvars_for_hs(e_prior_dist); int a_hs = get_nvars_for_hs(a_prior_dist); // declares: yHs{1,2,3}, len_{z_T,var_group,rho}, pos, delta, // bCov{1,2}_idx, {sqrt,log,sum_log}_y{1,2,3}, #include /tdata/tdata_mvmer.stan } parameters { // declares: yGamma{1,2,3}, z_yBeta{1,2,3}, z_b, z_T, rho, // zeta, tau, bSd{1,2}, z_bMat{1,2}, bCholesky{1,2}, // yAux{1,2,3}_unscaled, yGlobal{1,2,3}, yLocal{1,2,3}, // yOol{1,2,3}, yMix{1,2,3} #include /parameters/parameters_mvmer.stan // declares e_{gamma,z_beta,aux_unscaled,global,local,mix,ool} #include /parameters/parameters_event.stan // declares a_{z_beta,global,local,mix,ool} #include /parameters/parameters_assoc.stan } transformed parameters { vector[e_K] e_beta; // log hazard ratios vector[a_K] a_beta; // assoc params vector[basehaz_df] e_aux; // basehaz params //---- Parameters for longitudinal submodels // declares and defines: yBeta{1,2,3}, yAux{1,2,3}, yAuxMaximum, // theta_L, bMat{1,2} #include /tparameters/tparameters_mvmer.stan //---- Parameters for event submodel e_beta = make_beta(e_z_beta, e_prior_dist, e_prior_mean, e_prior_scale, e_prior_df, e_global_prior_scale, e_global, e_local, e_ool, e_mix, rep_array(1.0, 0), 0, e_slab_scale, e_caux); a_beta = make_beta(a_z_beta, a_prior_dist, a_prior_mean, a_prior_scale, a_prior_df, a_global_prior_scale, a_global, a_local, a_ool, a_mix, rep_array(1.0, 0), 0, a_slab_scale, a_caux); e_aux = make_basehaz_coef(e_aux_unscaled, e_prior_dist_for_aux, e_prior_mean_for_aux, e_prior_scale_for_aux); } model { //---- Log likelihoods for longitudinal submodels #include /model/mvmer_lp.stan { //---- Log likelihood for event submodel (GK quadrature) vector[nrow_e_Xq] e_eta_q; // eta for event submodel (at event and quad times) // Event submodel: linear predictor at event and quad times if (e_K > 0) e_eta_q = e_Xq * e_beta; else e_eta_q = rep_vector(0.0, nrow_e_Xq); if (assoc == 1) { // declares y_eta_q{_eps,_lag,_auc}, y_eta_qwide{_eps,_lag,_auc}, // y_q_wide{_eps,_lag,_auc}, mark{2,3} #include /model/assoc_evaluate.stan } { // declares log_basehaz, log_{haz_q,haz_etimes,surv_etimes,event} // increments target with event log-lik #include /model/event_lp.stan } } //---- Log priors // increments target with mvmer priors #include /model/priors_mvmer.stan target += beta_custom_lpdf(e_z_beta | e_prior_dist, e_prior_scale, e_prior_df, e_global_prior_df, e_local, e_global, e_mix, e_ool, e_slab_df, e_caux); target += beta_custom_lpdf(a_z_beta | a_prior_dist, a_prior_scale, a_prior_df, a_global_prior_df, a_local, a_global, a_mix, a_ool, a_slab_df, a_caux); target += basehaz_lpdf(e_aux_unscaled | e_prior_dist_for_aux, e_prior_scale_for_aux, e_prior_df_for_aux); if (e_has_intercept == 1) target += gamma_custom_lpdf(e_gamma[1] | e_prior_dist_for_intercept, e_prior_mean_for_intercept, e_prior_scale_for_intercept, e_prior_df_for_intercept); } generated quantities { real e_alpha; // transformed intercept for event submodel // declares and defines: mean_PPD, yAlpha{1,2,3}, b{1,2}, bCov{1,2} #include /gqs/gen_quantities_mvmer.stan // norm_const is a constant shift in log baseline hazard if (e_has_intercept == 1) e_alpha = e_gamma[1] + norm_const - dot_product(e_xbar, e_beta) - dot_product(a_xbar, a_beta .* a_scale); else e_alpha = norm_const - dot_product(e_xbar, e_beta) - dot_product(a_xbar, a_beta .* a_scale); } rstanarm/tests/testthat/stan_files/binomial.stan0000644000176200001440000000703614500256225021722 0ustar liggesusers#include /pre/Columbia_copyright.stan #include /pre/license.stan // GLM for a binomial outcome functions { #include /functions/common_functions.stan #include /functions/binomial_likelihoods.stan } data { // declares N, K, X, xbar, dense_X, nnz_x, w_x, v_x, u_x #include /data/NKX.stan array[N] int y; // outcome: number of successes array[N] int trials; // number of trials // declares prior_PD, has_intercept, link, prior_dist, prior_dist_for_intercept #include /data/data_glm.stan // declares has_weights, weights, has_offset, offset #include /data/weights_offset.stan int family; // declares prior_{mean, scale, df}, prior_{mean, scale, df}_for_intercept, prior_scale_{mean, scale, df}_for_aux #include /data/hyperparameters.stan // declares t, p[t], l[t], q, len_theta_L, shape, scale, {len_}concentration, {len_}regularization #include /data/glmer_stuff.stan // declares num_not_zero, w, v, u #include /data/glmer_stuff2.stan } transformed data { real aux = not_a_number(); array[special_case ? t : 0, N] int V = make_V(N, special_case ? t : 0, v); // defines hs, len_z_T, len_var_group, delta, pos #include /tdata/tdata_glm.stan } parameters { array[has_intercept] real gamma; // declares z_beta, global, local, z_b, z_T, rho, zeta, tau #include /parameters/parameters_glm.stan } transformed parameters { // defines beta, b, theta_L #include /tparameters/tparameters_glm.stan if (t > 0) { if (special_case == 1) { int start = 1; theta_L = scale .* tau; if (t == 1) b = theta_L[1] * z_b; else for (i in 1 : t) { int end = start + l[i] - 1; b[start : end] = theta_L[i] * z_b[start : end]; start = end + 1; } } else { theta_L = make_theta_L(len_theta_L, p, 1.0, tau, scale, zeta, rho, z_T); b = make_b(z_b, theta_L, p, l); } } } model { if (prior_PD == 0) { #include /model/make_eta.stan if (t > 0) { #include /model/eta_add_Zb.stan } if (has_intercept == 1) { if (link != 4) eta += gamma[1]; else eta += gamma[1] - max(eta); } else { #include /model/eta_no_intercept.stan } // Log-likelihood if (has_weights == 0) { // unweighted log-likelihoods target += binom_lpmf(y | trials, eta, link); } else target += dot_product(weights, pw_binom(y, trials, eta, link)); } #include /model/priors_glm.stan if (t > 0) { target += decov_lpdf(z_b | z_T, rho, zeta, tau, regularization, delta, shape, t, p); } } generated quantities { real mean_PPD = compute_mean_PPD ? 0 : negative_infinity(); array[has_intercept] real alpha; if (has_intercept == 1) { if (dense_X) alpha[1] = gamma[1] - dot_product(xbar, beta); else alpha[1] = gamma[1]; } if (compute_mean_PPD) { vector[N] pi; #include /model/make_eta.stan if (t > 0) { #include /model/eta_add_Zb.stan } if (has_intercept == 1) { if (link != 4) eta += gamma[1]; else { real shift = max(eta); eta += gamma[1] - shift; alpha[1] -= shift; } } else { #include /model/eta_no_intercept.stan } pi = linkinv_binom(eta, link); for (n in 1 : N) mean_PPD += binomial_rng(trials[n], pi[n]); mean_PPD /= N; } } rstanarm/tests/testthat/test_stan_clogit.R0000644000176200001440000000525014370470372020604 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # this mostly goes through same code as a logit model so only testing the unique stuff suppressPackageStartupMessages(library(rstanarm)) SEED <- 123 ITER <- 100 CHAINS <- 2 CORES <- 1 REFRESH <- 0 threshold <- 0.03 context("stan_clogit") SW(fit <- stan_clogit(case ~ spontaneous + induced, strata = stratum, prior = NULL, data = infert[order(infert$stratum), ], QR = TRUE, init_r = 0.5, chains = CHAINS, iter = ITER, seed = SEED, refresh = 0)) test_that("stan_clogit is similar to survival::clogit", { expect_equal(c(spontaneous = 1.985876, induced = 1.409012), coef(fit), tol = threshold) }) test_that("stan_clogit runs for infert example", { expect_stanreg(fit) }) test_that("stan_clogit works when y is a factor", { d <- infert[order(infert$stratum), ] d$case <- factor(d$case, labels = c("A", "B")) SW(fit_factor <- stan_clogit(case ~ spontaneous + induced, strata = stratum, prior = NULL, data = infert[order(infert$stratum), ], QR = TRUE, init_r = 0.5, chains = CHAINS, iter = ITER, seed = SEED, refresh = 0)) expect_equal(coef(fit_factor), coef(fit)) }) test_that("stan_clogit throws error if data are not sorted", { expect_error(update(fit, data = infert), regexp = "Data must be sorted") }) test_that("loo/waic for stan_clogit works", { ll_fun <- rstanarm:::ll_fun expect_equivalent_loo(fit) expect_identical(ll_fun(fit), rstanarm:::.ll_clogit_i) }) context("posterior_predict (stan_clogit)") test_that("compatible with stan_clogit", { PPD1 <- posterior_predict(fit) PPD2 <- posterior_predict(fit, newdata = infert) # order irrelevant expect_identical(rowSums(PPD1), rowSums(PPD2)) expect_equal(rowSums(PPD1), round(rowSums( posterior_linpred(fit, newdata = infert, transform = TRUE)))) }) rstanarm/tests/testthat/Rplots.pdf0000644000176200001440000000044414370470372017072 0ustar liggesusers%PDF-1.4 %ρ\r 1 0 obj << /CreationDate (D:20210508175404) /ModDate (D:20210508175404) /Title (R Graphics Output) /Producer (R 4.0.3) /Creator (R) >> endobj 2 0 obj << /Type /Catalog /Pages 3 0 R >> endobj 7 0 obj << /Type /Page /Parent 3 0 R /Contents 8 0 R /Resources 4 0 R >> endobj rstanarm/tests/testthat/test_predict.R0000644000176200001440000001304114370470372017725 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. suppressPackageStartupMessages(library(rstanarm)) library(betareg) SEED <- 123 set.seed(SEED) CHAINS <- 2 ITER <- 100 REFRESH <- 0 if (!exists("example_model")) { example_model <- run_example_model() } plink <- function(fit, nd = NULL, sef = TRUE) predict(fit, newdata = nd, type = "link", se.fit = sef) presp <- function(fit, nd = NULL, sef = TRUE) predict(fit, newdata = nd, type = "response", se.fit = sef) context("predict") test_that("predict recommends posterior_predict for glmer models", { expect_error(predict(example_model), "Please use the 'posterior_predict' function") }) test_that("predict ok for binomial", { # example from help(predict.glm) ldose <- rep(0:5, 2) numdead <- c(1, 4, 9, 13, 18, 20, 0, 2, 6, 10, 12, 16) sex <- factor(rep(c("M", "F"), c(6, 6))) SF <- cbind(numdead, numalive = 20-numdead) glmfit <- glm(SF ~ sex*ldose, family = binomial) SW({ stanfit <- stan_glm(SF ~ sex*ldose, family = binomial, chains = CHAINS, iter = ITER, seed = SEED, refresh = 0) stanfit_opt <- stan_glm(SF ~ sex*ldose, family = binomial, prior = NULL, prior_intercept = NULL, seed = SEED, refresh = 0, QR = TRUE, algorithm = "optimizing") }) pg <- plink(glmfit) ps <- plink(stanfit) pso <- plink(stanfit_opt) expect_equal(pg$fit, ps$fit, tol = 0.1) expect_equal(pg$fit, pso$fit, tol = 0.05) expect_equal(pg$se.fit, ps$se.fit, tol = 0.2) expect_equal(pg$se.fit, pso$se.fit, tol = 0.1) expect_equal(presp(glmfit)[1:2], presp(stanfit_opt), tol = 0.05) ld <- seq(0, 5, 0.1) newd <- data.frame(ldose = ld, sex = factor(rep("M", length(ld)), levels = levels(sex))) pg <- plink(glmfit, newd) ps <- plink(stanfit, newd) pso <- plink(stanfit_opt, newd) # expect_equal(pg$fit, ps$fit, tol = 0.05) expect_equal(pg$fit, pso$fit, tol = 0.05) expect_equal(pg$se.fit, ps$se.fit, tol = 0.2) expect_equal(pg$se.fit, pso$se.fit, tol = 0.1) expect_equal(presp(glmfit, newd)[1:2], presp(stanfit_opt, newd), tol = 0.1) }) test_that("predict ok for gaussian", { glmfit <- glm(mpg ~ wt, data = mtcars) SW({ stanfit <- stan_glm(mpg ~ wt, data = mtcars, chains = CHAINS, iter = 2 * ITER, seed = SEED, refresh = 0) stanfit_opt <- stan_glm(mpg ~ wt, data = mtcars, prior = NULL, prior_intercept = NULL, iter = 2 * ITER, seed = SEED, refresh = 0, algorithm = "optimizing") }) pg <- plink(glmfit) ps <- plink(stanfit) pso <- plink(stanfit_opt) expect_equal(pg$fit, ps$fit, tol = 0.05) expect_equal(pg$fit, pso$fit, tol = 0.05) expect_equal(pg$se.fit, ps$se.fit, tol = 0.3) expect_equal(pg$se.fit, pso$se.fit, tol = 0.1) expect_equal(presp(glmfit)[1:2], presp(stanfit_opt), tol = 0.1) newd <- data.frame(wt = c(1,5)) pg <- plink(glmfit, newd) ps <- plink(stanfit, newd) pso <- plink(stanfit_opt, newd) expect_equal(pg$fit, ps$fit, tol = 0.05) expect_equal(pg$fit, pso$fit, tol = 0.05) expect_equal(pg$se.fit, ps$se.fit, tol = 0.3) expect_equal(pg$se.fit, pso$se.fit, tol = 0.1) expect_equal(presp(glmfit, newd)[1:2], presp(stanfit_opt, newd), tol = 0.1) }) test_that("predict ok for Poisson", { dat <- data.frame(counts = c(18,17,15,20,10,20,25,13,12), outcome = gl(3,1,9), treatment = gl(3,3)) glmfit <- glm(counts ~ outcome + treatment, data = dat, family = poisson()) SW({ stanfit <- stan_glm(counts ~ outcome + treatment, data = dat, family = poisson(), chains = CHAINS, iter = ITER, seed = SEED, refresh = 0) stanfit_opt <- stan_glm(counts ~ outcome + treatment, data = dat, family = poisson(), iter = ITER, seed = SEED, refresh = 0, algorithm = "optimizing") }) pg <- plink(glmfit) ps <- plink(stanfit) pso <- plink(stanfit_opt) expect_equal(pg$fit, ps$fit, tol = 0.05) expect_equal(pg$fit, pso$fit, tol = 0.05) expect_equal(pg$se.fit, ps$se.fit, tol = 0.1) expect_equal(pg$se.fit, pso$se.fit, tol = 0.1) expect_equal(presp(glmfit)[1:2], presp(stanfit_opt), tol = 0.1) expect_equal(plink(stanfit, sef = FALSE), plink(glmfit, sef = FALSE), tol = 0.05) expect_equal(presp(stanfit, sef = FALSE), presp(glmfit, sef = FALSE), tol = 0.05) newd <- dat[1:2, ] pg <- plink(glmfit, newd) ps <- plink(stanfit, newd) pso <- plink(stanfit_opt, newd) expect_equal(pg$fit, ps$fit, tol = 0.05) expect_equal(pg$fit, pso$fit, tol = 0.05) expect_equal(pg$se.fit, ps$se.fit, tol = 0.1) expect_equal(pg$se.fit, pso$se.fit, tol = 0.1) expect_equal(presp(glmfit, newd)[1:2], presp(stanfit_opt, newd), tol = 0.1) }) rstanarm/tests/testthat/test_stan_betareg.R0000644000176200001440000003071014370470372020733 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. if (.Platform$OS.type != "windows" && require(betareg)) { suppressPackageStartupMessages(library(rstanarm)) SEED <- 12345 set.seed(SEED) ITER <- 10 CHAINS <- 2 REFRESH <- 0 context("stan_betareg") simple_betareg_data <- function(N, draw_z = FALSE) { x <- rnorm(N, 2, 1) z <- if (draw_z) rnorm(N, 0, 1) else rep(0, N) mu <- binomial(link="logit")$linkinv(1 + 0.2 * x) phi <- 20 y <- rbeta(N, mu * phi, (1 - mu) * phi) data.frame(y,x,z) } dat <- simple_betareg_data(200, draw_z = TRUE) link1 <- c("logit", "probit", "cloglog", "cauchit", "log", "loglog") link2 <- c("log", "identity", "sqrt") # sparse currently not used in stan_betareg test_that("sparse = TRUE errors", { expect_error( stan_betareg(y ~ x, link = "logit", seed = SEED, sparse = TRUE, data = dat), "unknown arguments: sparse" ) }) # test QR test_that("QR errors when number of x and/or z predictors is <= 1", { expect_error( stan_betareg(y ~ x, link = "logit", seed = SEED, QR = TRUE, data = dat), "'QR' can only be specified when there are multiple predictors" ) expect_error( stan_betareg(y ~ x | z, link = "logit", seed = SEED, QR = TRUE, data = dat), "'QR' can only be specified when there are multiple predictors" ) }) test_that("QR works when number of x and/or z predictors is >= 1", { SW(fit1 <- stan_betareg(y ~ x + z, link = "logit", seed = SEED, QR = TRUE, prior = NULL, prior_intercept = NULL, refresh = 0, data = dat, algorithm = "optimizing")) expect_stanreg(fit1) expect_output(print(prior_summary(fit1)), "Q-space") SW(fit2 <- stan_betareg(y ~ x + z | z, link = "logit", seed = SEED, QR = TRUE, prior = NULL, prior_intercept = NULL, refresh = 0, data = dat, algorithm = "optimizing")) expect_stanreg(fit2) }) test_that("stan_betareg returns expected result when modeling x and dispersion", { for (i in 1:length(link1)) { SW(fit <- stan_betareg(y ~ x, link = link1[i], seed = SEED, prior = NULL, prior_intercept = NULL, prior_phi = NULL, refresh = 0, data = dat, algorithm = "optimizing")) expect_stanreg(fit) val <- coef(fit) ans <- coef(betareg(y ~ x, link = link1[i], data = dat)) expect_equal(val, ans, tol = 0.1, info = link1[i]) } }) test_that("stan_betareg works with QR = TRUE and algorithm = 'optimizing'", { SW(fit <- stan_betareg(y ~ x + z, link = "logit", seed = SEED, QR = TRUE, prior = NULL, prior_intercept = NULL, prior_phi = NULL, refresh = 0, data = dat, algorithm = "optimizing")) expect_stanreg(fit) val <- coef(fit) ans <- coef(betareg(y ~ x + z, link = "logit", data = dat)) expect_equal(val, ans, tol = 0.1, info = "logit") }) test_that("stan_betareg works with QR = TRUE and algorithm = 'sampling'", { SW(fit <- stan_betareg(y ~ x + z, link = "logit", QR = TRUE, prior = NULL, prior_intercept = NULL, prior_phi = NULL, refresh = 0, iter = 100, chains = 2, data = dat)) expect_stanreg(fit) val <- coef(fit) ans <- coef(betareg(y ~ x + z, link = "logit", data = dat)) expect_equal(val, ans, tol = 0.1) }) test_that("QR recommended if VB and at least 2 predictors", { expect_message( stan_betareg(y ~ x + z, data = dat, link = "logit", algorithm = "meanfield", prior = NULL, prior_intercept = NULL, prior_phi = NULL, refresh = 0), "Setting 'QR' to TRUE can often be helpful when using one of the variational inference algorithms" ) # no message if QR already specified expect_message( stan_betareg(y ~ x + z, data = dat, QR = TRUE, link = "logit", algorithm = "meanfield", prior = NULL, prior_intercept = NULL, prior_phi = NULL, refresh = 0), NA ) # no message if only 1 predictor expect_message( stan_betareg(y ~ x, data = dat, link = "logit", algorithm = "meanfield", prior = NULL, prior_intercept = NULL, prior_phi = NULL, refresh = 0), NA ) }) test_that("stan_betareg ok when modeling x and z (link.phi = 'log')", { N <- 200 dat <- data.frame(x = rnorm(N, 2, 1), z = rnorm(N, 2, 1)) mu <- binomial(link="logit")$linkinv(1 + 0.2 * dat$x) phi <- poisson(link = link2[1])$linkinv(1.5 + 0.4*dat$z) dat$y <- rbeta(N, mu * phi, (1 - mu) * phi) for (i in 1:length(link1)) { SW(fit <- stan_betareg(y ~ x | z, link = link1[i], link.phi = link2[1], seed = SEED, refresh = 0, prior = NULL, prior_intercept = NULL, prior_z = NULL, prior_intercept_z = NULL, data = dat, algorithm = "optimizing")) expect_stanreg(fit) val <- coef(fit) ans <- coef(betareg(y ~ x | z, link = link1[i], link.phi = link2[1], data = dat)) expect_equal(val, ans, tol = 0.1, info = c(link1[i], link2[1])) } }) # tests use sampling instead of optimizing (the latter fails) test_that("stan_betareg ok when modeling x and z (link.phi = 'identity')", { N <- 200 dat <- data.frame(x = rnorm(N, 2, 1), z = rnorm(N, 2, 1)) mu <- binomial(link = "logit")$linkinv(1 + 0.2*dat$x) phi <- dat$z - min(dat$z) + 5.5 dat$y <- rbeta(N, mu * phi, (1 - mu) * phi) for (i in 1:length(link1)) { SW(fit <- stan_betareg(y ~ x | z, link = link1[i], link.phi = link2[2], prior = NULL, prior_intercept = NULL, prior_z = NULL, prior_intercept_z = NULL, data = dat, algorithm = "optimizing", seed = SEED, refresh = 0)) expect_stanreg(fit) val <- coef(fit) ans <- coef(betareg(y ~ x | z, link = link1[i], link.phi = link2[2], data = dat)) expect_equal(val, ans, tol = 0.15, info = c(link1[i], link2[2])) } }) # sqrt link is unstable so only testing that the model runs. test_that("stan_betareg ok when modeling x and z (link.phi = 'sqrt')", { # skip_on_ci() # seems to segfault sometimes: https://github.com/stan-dev/rstanarm/pull/496/checks?check_run_id=1582276935#step:9:397 for (i in 1:length(link1)) { # FIXME! N <- 1000 dat <- data.frame(x = rnorm(N, 2, 1), z = rep(1, N)) mu <- binomial(link = "logit")$linkinv(-0.8 + 0.5*dat$x) phi <- poisson(link = "sqrt")$linkinv(8 + 2*dat$z) dat$y <- rbeta(N, mu * phi, (1 - mu) * phi) SW(fit <- stan_betareg(y ~ x | 1, link = link1[i], link.phi = "sqrt", data = dat, chains = 1, iter = 1, refresh = 0, algorithm = "sampling", seed = SEED)) expect_stanreg(fit) } }) # test weights/offset (make test more comprehensive once the beta_rng() update is in stan math) test_that("stan_betareg ok when modeling x and dispersion with offset and weights", { N <- 200 weights <- rbeta(N, 2, 2) offset <- rep(0.3, N) dat <- data.frame(x = rnorm(N, 2, 1)) mu <- binomial(link="logit")$linkinv(1+0.2*dat$x) phi <- 20 dat$y <- rbeta(N, mu * phi, (1 - mu) * phi) SW(fit <- stan_betareg(y ~ x, link = "logit", seed = SEED, prior = NULL, prior_intercept = NULL, prior_phi = NULL, data = dat, weights = weights, offset = offset, algorithm = "optimizing", iter = 2000, refresh = 0)) expect_stanreg(fit) val <- coef(fit) ans <- coef(betareg(y ~ x, link = "logit", weights = weights, offset = offset, data = dat)) expect_equal(val, ans, tol = 0.3, info = "logit") }) test_that("heavy tailed priors work with stan_betareg", { # skip_on_ci() SW(fit1 <- stan_betareg(y ~ x | z, data = dat, prior = product_normal(), prior_z = product_normal(), chains = 1, iter = 1, refresh = 0)) expect_stanreg(fit1) SW(fit2 <- stan_betareg(y ~ x | z, data = dat, prior = laplace(), prior_z = laplace(), chains = 1, iter = 1, refresh = 0)) expect_stanreg(fit2) SW(fit3 <- stan_betareg(y ~ x | z, data = dat, prior = lasso(), prior_z = lasso(), chains = 1, iter = 1, refresh = 0)) expect_stanreg(fit3) }) test_that("loo/waic for stan_betareg works", { ll_fun <- rstanarm:::ll_fun data("GasolineYield", package = "betareg") SW(fit_logit <- stan_betareg(yield ~ batch + temp | temp, data = GasolineYield, link = "logit", chains = CHAINS, iter = ITER, seed = SEED, refresh = 0)) expect_equivalent_loo(fit_logit) expect_identical(ll_fun(fit_logit), rstanarm:::.ll_beta_i) }) test_that("compatible with stan_betareg with z", { data("GasolineYield", package = "betareg") SW(fit <- stan_betareg(yield ~ pressure + temp | temp, data = GasolineYield, iter = ITER*5, chains = 2*CHAINS, seed = SEED, refresh = 0)) check_for_pp_errors(fit) # expect_linpred_equal(fit) }) test_that("compatible with stan_betareg without z", { data("GasolineYield", package = "betareg") SW(fit <- stan_betareg(yield ~ temp, data = GasolineYield, iter = ITER, chains = CHAINS, seed = SEED, refresh = 0)) check_for_pp_errors(fit) # expect_linpred_equal(fit) }) test_that("compatible with betareg with offset", { GasolineYield2 <- GasolineYield GasolineYield2$offs <- runif(nrow(GasolineYield2)) SW(fit <- stan_betareg(yield ~ temp, data = GasolineYield2, offset = offs, iter = ITER*5, chains = CHAINS, seed = SEED, refresh = 0)) SW(fit2 <- stan_betareg(yield ~ temp + offset(offs), data = GasolineYield2, iter = ITER*5, chains = CHAINS, seed = SEED, refresh = 0)) expect_warning(posterior_predict(fit, newdata = GasolineYield), "offset") check_for_pp_errors(fit, data = GasolineYield2, offset = GasolineYield2$offs) check_for_pp_errors(fit2, data = GasolineYield2, offset = GasolineYield2$offs) expect_linpred_equal(fit) expect_linpred_equal(fit2) }) test_that("predict ok for stan_betareg", { dat <- list() dat$N <- 200 dat$x <- rnorm(dat$N, 2, 1) dat$z <- rnorm(dat$N, 2, 1) dat$mu <- binomial(link = "logit")$linkinv(0.5 + 0.2*dat$x) dat$phi <- exp(1.5 + 0.4*dat$z) dat$y <- rbeta(dat$N, dat$mu * dat$phi, (1 - dat$mu) * dat$phi) dat <- data.frame(dat$y, dat$x, dat$z) colnames(dat) <- c("y", "x", "z") betaregfit <- betareg(y ~ x | z, data = dat) SW(capture.output( stanfit <- stan_betareg(y ~ x | z, data = dat, chains = CHAINS, iter = ITER, seed = SEED, refresh = 0) )) pb <- predict(betaregfit, type = "response") ps <- predict(stanfit, type = "response") # expect_equal(pb, ps, tol = 0.05) expect_error(presp(stanfit)) newd <- data.frame(x = c(300,305)) pb <- predict(betaregfit, newdata = newd, type = "link") ps <- predict(stanfit, newdata = newd, type = "link") # expect_equal(pb, ps, tol = 0.05) }) } rstanarm/tests/testthat/test_stan_mvmer.R0000644000176200001440000003031214406606742020450 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016 Trustees of Columbia University # Copyright (C) 2017 Sam Brilleman # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. suppressPackageStartupMessages(library(rstanarm)) library(lme4) ITER <- 1000 CHAINS <- 1 SEED <- 12345 REFRESH <- 0L set.seed(SEED) TOLSCALES <- list( lmer_fixef = 0.25, # how many SEs can stan_jm fixefs be from lmer fixefs lmer_ranef = 0.05, # how many SDs can stan_jm ranefs be from lmer ranefs glmer_fixef = 0.3, # how many SEs can stan_jm fixefs be from glmer fixefs glmer_ranef = 0.1 # how many SDs can stan_jm ranefs be from glmer ranefs ) context("stan_mvmer") #---- Data (for non-Gaussian families) pbcLong$ybern <- as.integer(pbcLong$logBili >= mean(pbcLong$logBili)) pbcLong$ybino <- as.integer(rpois(nrow(pbcLong), 5)) pbcLong$ypois <- as.integer(pbcLong$albumin) pbcLong$ynbin <- as.integer(rnbinom(nrow(pbcLong), 3, .3)) pbcLong$ygamm <- as.numeric(pbcLong$platelet / 10) pbcLong$xbern <- as.numeric(pbcLong$platelet / 100) pbcLong$xpois <- as.numeric(pbcLong$platelet / 100) pbcLong$xgamm <- as.numeric(pbcLong$logBili) #---- Models # univariate GLM fm1 <- logBili ~ year + (year | id) o<-SW(m1 <- stan_mvmer(fm1, pbcLong, iter = 5, chains = 1, seed = SEED, refresh = 0)) # multivariate GLM fm2 <- list(logBili ~ year + (year | id), albumin ~ year + (year | id)) o<-SW(m2 <- stan_mvmer(fm2, pbcLong, iter = 5, chains = 1, seed = SEED, refresh = 0)) #---- Tests for stan_mvmer arguments test_that("formula argument works", { SW(m991 <- update(m1, formula. = list(fm1))) expect_identical(as.matrix(m1), as.matrix(m991)) # fm as list }) test_that("error if outcome is character", { expect_error( update(m1, formula. = as.character(logBili) ~ year + (year | id)), "Outcome variable can't be type 'character'" ) }) test_that("data argument works", { SW(m991 <- update(m1, data = list(pbcLong))) SW(m992 <- update(m2, data = list(pbcLong, pbcLong))) expect_identical(as.matrix(m1), as.matrix(m991)) # data as list expect_identical(as.matrix(m2), as.matrix(m992)) }) test_that("family argument works", { expect_output(suppressWarnings(update(m1, family = "gaussian", iter = 5))) expect_output(suppressWarnings(update(m1, family = gaussian, iter = 5))) expect_output(suppressWarnings(update(m1, family = gaussian(link = identity), iter = 5))) expect_output(suppressWarnings(update(m1, formula. = ybern ~ ., family = binomial, iter = 5))) expect_output(suppressWarnings(update(m1, formula. = ypois ~ ., family = poisson, iter = 5))) expect_output(suppressWarnings(update(m1, formula. = ypois ~ ., family = neg_binomial_2, iter = 5))) expect_output(suppressWarnings(update(m1, formula. = ygamm ~ ., family = Gamma, init = 0, iter = 5))) expect_output(suppressWarnings(update(m1, formula. = ygamm ~ ., family = inverse.gaussian, init = 0, iter = 5))) expect_error(update(m1, formula. = ybino ~ ., family = binomial)) # multivariate model with combinations of family expect_output(suppressWarnings(update(m2, formula. = list(~ ., ybern ~ .), family = list(gaussian, binomial), iter = 5))) }) test_that("prior_PD argument works", { expect_output(suppressWarnings(update(m1, prior_PD = TRUE, iter = 5))) }) test_that("adapt_delta argument works", { expect_output(suppressWarnings(update(m1, adapt_delta = NULL, iter = 5))) expect_output(suppressWarnings(update(m1, adapt_delta = 0.8, iter = 5))) }) test_that("error message occurs for arguments not implemented", { expect_error(update(m1, weights = 1:10), "not yet implemented") expect_error(update(m1, QR = TRUE), "not yet implemented") expect_error(update(m1, sparse = TRUE), "not yet implemented") }) #---- Check models with multiple grouping factors test_that("multiple grouping factors are ok", { tmpdat <- pbcLong tmpdat$practice <- cut(pbcLong$id, c(0,10,20,30,40)) tmpfm1 <- logBili ~ year + (year | id) + (1 | practice) SW(ok_mod1 <- update(m1, formula. = tmpfm1, data = tmpdat, iter = 1, refresh = 0, init = 0)) expect_stanmvreg(ok_mod1) tmpfm2 <- list( logBili ~ year + (year | id) + (1 | practice), albumin ~ year + (year | id)) SW(ok_mod2 <- update(m2, formula. = tmpfm2, data = tmpdat, iter = 1, refresh = 0, init = 0)) expect_stanmvreg(ok_mod2) tmpfm3 <- list( logBili ~ year + (year | id) + (1 | practice), albumin ~ year + (year | id) + (1 | practice)) SW(ok_mod3 <- update(m2, formula. = tmpfm3, data = tmpdat, iter = 1, refresh = 0, init = 0)) expect_stanmvreg(ok_mod3) # check reordering grouping factors is ok # NB it seems these comparisons must be made using init = 0 and one iteration, # probably because the order of the parameters passed to Stan affects the # sequence of MCMC samples even when the same seed is used. An alternative # would be to test equality of the stanmat colMeans with specified tolerance? tmpfm4 <- list( logBili ~ year + (1 | practice) + (year | id), albumin ~ year + (year | id)) SW(ok_mod4 <- update(ok_mod2, formula. = tmpfm4)) expect_identical_sorted_stanmats(ok_mod2, ok_mod4) tmpfm5 <- list( logBili ~ year + (1 | practice) + (year | id), albumin ~ year + (year | id) + (1 | practice)) SW(ok_mod5 <- update(ok_mod3, formula. = tmpfm5)) expect_identical_sorted_stanmats(ok_mod3, ok_mod5) tmpfm6 <- list( logBili ~ year + (1 | practice) + (year | id), albumin ~ year + (1 | practice) + (year | id)) SW(ok_mod6 <- update(ok_mod3, formula. = tmpfm6)) expect_identical_sorted_stanmats(ok_mod3, ok_mod6) }) #---- Compare estimates: univariate stan_mvmer vs stan_glmer if (interactive()) { compare_glmer <- function(fmLong, fam = gaussian, ...) { SW(y1 <- stan_glmer(fmLong, pbcLong, fam, iter = 1000, chains = CHAINS, seed = SEED, refresh = 0)) SW(y2 <- stan_mvmer(fmLong, pbcLong, fam, iter = 1000, chains = CHAINS, seed = SEED, ..., refresh = 0)) tols <- get_tols(y1, tolscales = TOLSCALES) pars <- recover_pars(y1) pars2 <- recover_pars(y2) for (i in names(tols$fixef)) expect_equal(pars$fixef[[i]], pars2$fixef[[i]], tol = tols$fixef[[i]]) for (i in names(tols$ranef)) expect_equal(pars$ranef[[i]], pars2$ranef[[i]], tol = tols$ranef[[i]]) expect_equal(colMeans(log_lik(y1)), colMeans(log_lik(y2)), tol = 0.15) nd <- pbcLong[stats::complete.cases(pbcLong), , drop = FALSE] expect_equal(colMeans(log_lik(y1, newdata = nd)), colMeans(log_lik(y2, newdata = nd)), tol = 0.15) } test_that("coefs same for stan_jm and stan_lmer/coxph", { # fails in many cases # compare_glmer(logBili ~ year + (1 | id), gaussian) }) # fails in some cases # test_that("coefs same for stan_jm and stan_glmer, bernoulli", { # compare_glmer(ybern ~ year + xbern + (1 | id), binomial)}) test_that("coefs same for stan_jm and stan_glmer, poisson", { compare_glmer(ypois ~ year + xpois + (1 | id), poisson, init = 0)}) test_that("coefs same for stan_jm and stan_glmer, negative binomial", { compare_glmer(ynbin ~ year + xpois + (1 | id), neg_binomial_2)}) test_that("coefs same for stan_jm and stan_glmer, Gamma", { compare_glmer(ygamm ~ year + xgamm + (1 | id), Gamma(log))}) # test_that("coefs same for stan_jm and stan_glmer, inverse gaussian", { # compare_glmer(ygamm ~ year + xgamm + (1 | id), inverse.gaussian)}) } #---- Check methods and post-estimation functions tmpdat <- pbcLong tmpdat$practice <- cut(pbcLong$id, c(0,10,20,30,40)) o<-SW(f1 <- update(m1, formula. = list(logBili ~ year + (year | id)), data = tmpdat, iter = 5)) o<-SW(f2 <- update(f1, formula. = list(logBili ~ year + (year | id) + (1 | practice)))) o<-SW(f3 <- update(m2, formula. = list(logBili ~ year + (year | id) + (1 | practice), albumin ~ year + (year | id)), data = tmpdat, iter = 5)) o<-SW(f4 <- update(f3, formula. = list(logBili ~ year + (year | id) + (1 | practice), albumin ~ year + (year | id) + (1 | practice)))) o<-SW(f5 <- update(f3, formula. = list(logBili ~ year + (year | id) + (1 | practice), ybern ~ year + (year | id) + (1 | practice)), family = list(gaussian, binomial))) for (j in 1:5) { mod <- get(paste0("f", j)) cat("Checking model:", paste0("f", j), "\n") expect_error(posterior_traj(mod), "stanjm") expect_error(posterior_survfit(mod), "stanjm") test_that("posterior_predict works with estimation data", { pp <- posterior_predict(mod, m = 1) expect_ppd(pp) if (mod$n_markers > 1L) { pp <- posterior_predict(mod, m = 2) expect_ppd(pp) } }) test_that("log_lik works with estimation data", { ll <- log_lik(mod) expect_matrix(ll) expect_identical(ll, log_lik(mod, m = 1)) if (mod$n_markers > 1L) expect_matrix(log_lik(mod, m = 2)) }) nd <- tmpdat[tmpdat$id == 2,] test_that("posterior_predict works with new data (one individual)", { pp <- posterior_predict(mod, m = 1, newdata = nd) expect_ppd(pp) if (mod$n_markers > 1L) { pp <- posterior_predict(mod, m = 2, newdata = nd) expect_ppd(pp) } }) test_that("log_lik works with new data (one individual)", { ll <- log_lik(mod, newdata = nd) expect_matrix(ll) expect_identical(ll, log_lik(mod, m = 1, newdata = nd)) if (mod$n_markers > 1L) expect_matrix(log_lik(mod, m = 2, newdata = nd)) # log_lik is only designed for one submodel at a time so passing # newdata as a list should generate an error in validate_newdata expect_error(log_lik(mod, newdata = list(nd)), "data frame") }) nd <- tmpdat[tmpdat$id %in% c(1,2),] test_that("posterior_predict works with new data (multiple individuals)", { pp <- posterior_predict(mod, m = 1, newdata = nd) expect_ppd(pp) if (mod$n_markers > 1L) { pp <- posterior_predict(mod, m = 2, newdata = nd) expect_ppd(pp) } }) test_that("log_lik works with estimation data", { expect_matrix(log_lik(mod, newdata = nd)) if (mod$n_markers > 1L) expect_matrix(log_lik(mod, m = 2, newdata = nd)) }) test_that("loo and waic work", { l <- suppressWarnings(loo(mod)) w <- suppressWarnings(waic(mod)) expect_s3_class(l, "loo") expect_s3_class(w, "loo") expect_s3_class(w, "waic") att_names <- c('names', 'dims', 'class', 'model_name', 'discrete', 'yhash', 'formula') expect_named(attributes(l), att_names) expect_named(attributes(w), att_names) }) test_that("extraction methods work", { M <- mod$n_markers fe <- fixef(mod) re <- ranef(mod) ce <- coef(mod) mf <- model.frame(mod) tt <- terms(mod) fm <- formula(mod) fam <- family(mod) sig <- sigma(mod) expect_is(fe, "list"); expect_identical(length(fe), M) expect_is(re, "list"); expect_identical(length(re), M) expect_is(ce, "list"); expect_identical(length(re), M) expect_is(mf, "list"); expect_identical(length(mf), M); lapply(mf, function(x) expect_is(x, "data.frame")) expect_is(tt, "list"); expect_identical(length(tt), M); lapply(tt, function(x) expect_is(x, "terms")) expect_is(fm, "list"); expect_identical(length(fm), M); lapply(fm, function(x) expect_is(x, "formula")) expect_is(fam,"list"); expect_identical(length(fam),M); lapply(fam, function(x) expect_is(x, "family")) expect_is(sig, "numeric"); }) test_that("these extraction methods are currently disallowed", { expect_error(se(mod), "Not currently implemented") expect_error(fitted(mod), "Not currently implemented") expect_error(residuals(mod), "Not currently implemented") }) } rstanarm/tests/testthat/test_stan_glmer.R0000644000176200001440000003037214370470372020434 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. suppressPackageStartupMessages(library(rstanarm)) stopifnot(require(lme4)) # stopifnot(require(gamm4)) stopifnot(require(HSAUR3)) ITER <- 400 CHAINS <- 2 SEED <- 123 REFRESH <- ITER set.seed(SEED) FIXEF_tol <- 0.05 RANEF_tol <- 0.25 if (!exists("example_model")) { example_model <- run_example_model() } SW(fit <- stan_lmer(Reaction / 10 ~ Days + (Days | Subject), data = sleepstudy, refresh = 0, init_r = 0.05, chains = CHAINS, iter = ITER, seed = SEED)) context("stan_glmer") test_that("draws from stan_glmer (gaussian) same as from stan_lmer", { SW(fit1 <- stan_glmer(mpg ~ wt + (1|cyl), data = mtcars, iter = 10, chains = 1, seed = SEED, refresh = 0)) SW(fit2 <- stan_lmer(mpg ~ wt + (1|cyl), data = mtcars, iter = 10, chains = 1, seed = SEED, refresh = 0)) expect_identical(as.matrix(fit1), as.matrix(fit2)) }) test_that("stan_glmer returns expected result for binomial cbpp example", { links <- c("logit", "probit", "cauchit", "log", "cloglog") # for (i in seq_along(links)) { i <- 1L # it seems only logit gives results similar to glmer with same link fmla <- cbind(incidence, size - incidence) ~ period + (1 | herd) SW(fit <- stan_glmer(fmla, data = cbpp, family = binomial(links[i]), chains = CHAINS, iter = ITER, seed = SEED, refresh = 0)) expect_stanreg(fit) ans <- glmer(fmla, data = cbpp, family = binomial(links[i])) expect_equal(fixef(fit), fixef(ans), tol = FIXEF_tol) expect_equal(ranef(fit), ranef(ans), tol = RANEF_tol, check.attributes = FALSE) expect_equal(ngrps(fit), ngrps(ans)) # } }) context("stan_glmer.nb") test_that("stan_glmer.nb ok", { dd <- expand.grid(f1 = factor(1:3), f2 = LETTERS[1:2], g=1:9, rep=1:15, KEEP.OUT.ATTRS=FALSE) mu <- 5*(-4 + with(dd, as.integer(f1) + 4*as.numeric(f2))) dd$y <- rnbinom(nrow(dd), mu = mu, size = 0.5) fmla <- as.formula(y ~ f1*f2 + (1|g)) SW(fit <- stan_glmer.nb(formula = fmla, data = dd, init_r = 1, refresh = 0, iter = ITER, seed = SEED, algorithm = "meanfield")) expect_stanreg(fit) ans <- glmer.nb(formula = fmla, data = dd) # ans is messed up # expect_equal(fixef(fit), fixef(ans), tol = FIXEF_tol) # expect_equal(ranef(fit), ranef(ans), tol = RANEF_tol) expect_equal(ngrps(fit), ngrps(ans)) }) context("stan_lmer") test_that("stan_lmer returns expected result for slepstudy example", { fmla <- formula(fit) expect_stanreg(fit) ans <- lmer(fmla, data = sleepstudy) expect_equal(fixef(fit), fixef(ans), tol = FIXEF_tol) # expect_equal(ranef(fit), ranef(ans), tol = RANEF_tol) expect_equal(ngrps(fit), ngrps(ans)) }) test_that("stan_lmer returns expected result for Penicillin example", { fmla <- as.formula(diameter ~ (1|plate) + (1|sample)) SW(fit <- stan_lmer(fmla, data = Penicillin, chains = CHAINS, iter = ITER, seed = SEED, refresh = 0, sparse = TRUE)) expect_stanreg(fit) ans <- lmer(fmla, data = Penicillin) expect_equal(fixef(fit), fixef(ans), tol = FIXEF_tol) expect_equal(ranef(fit), ranef(ans), tol = RANEF_tol, check.attributes = FALSE) expect_identical(ngrps(fit), ngrps(ans)) }) test_that("stan_lmer ok if global intercept forced to 0", { SW(fit <- stan_lmer(mpg ~ 0 + (1|cyl), data = mtcars, iter = 10, seed = SEED, refresh = 0)) expect_stanreg(fit) }) test_that("stan_lmer returns an error when multiple group-specific terms are specified", { expect_error( stan_lmer(Reaction / 10 ~ Days + (Days | Subject) + (1|Subject), data = sleepstudy), "formulas with duplicate group-specific terms" ) }) test_that("stan_lmer returns an error when 'family' specified", { expect_error( stan_lmer(Reaction / 10 ~ Days + (Days | Subject), family = "gaussian", data = sleepstudy), "'family' should not be specified" ) }) test_that("error if y is character", { expect_error( stan_lmer(as.character(mpg) ~ wt + (1|cyl), data = mtcars), "Outcome variable can't be type 'character'" ) expect_error( stan_glmer.nb(as.character(mpg) ~ wt + (1|cyl), data = mtcars), "Outcome variable can't be type 'character'" ) }) context("stan_gamm4") test_that("stan_gamm4 returns stanreg object", { skip_if_not_installed("mgcv") sleepstudy$y <- sleepstudy$Reaction / 10 SW(fit <- stan_gamm4(y ~ s(Days), data = sleepstudy, sparse = TRUE, random = ~(1|Subject), chains = CHAINS, iter = ITER, seed = SEED, refresh = 0)) expect_stanreg(fit) # ans <- gamm4(Reaction / 10 ~ s(Days), data = sleepstudy, # random = ~(1|Subject))$mer # expect_equal(fixef(fit)[-1], fixef(ans)[-1], tol = FIXEF_tol, check.attributes = FALSE) # expect_equal(ranef(fit), ranef(ans), tol = RANEF_tol) # expect_identical(ngrps(fit), ngrps(ans)) p1 <- plot_nonlinear(fit) p2 <- plot_nonlinear(fit, smooths = "s(Days)") expect_gg(p1) expect_gg(p2) }) test_that("stan_gamm4 doesn't error when bs='cc", { # https://github.com/stan-dev/rstanarm/issues/362 skip_if_not_installed("mgcv") N <- 100 y <- rnorm(N, 0, 1) x <- rep(1:(N/2),2) x2 <- rnorm(N) data <- data.frame(x, x2, y) # only run a few iter to make sure it doesn't error SW(fit1 <- stan_gamm4(y ~ x2 + s(x, bs = "cc"), data=data, iter = 5, chains = 1, init = 0, refresh = 0)) expect_stanreg(fit1) # with another smooth term SW(fit2 <- stan_gamm4(y ~ s(x2) + s(x, bs = "cc"), data=data, iter = 5, chains = 1, init = 0, refresh = 0)) expect_stanreg(fit2) # with another 'cc' smooth term SW(fit3 <- stan_gamm4(y ~ s(x2, bs = "cc") + s(x, bs = "cc"), data=data, iter = 5, chains = 1, init = 0, refresh = 0)) expect_stanreg(fit3) }) test_that("stan_gamm4 errors if no smooth terms in formula", { dat <- data.frame( y = rnorm(100), x = rnorm(100), id = gl(5, 20) ) expect_error( stan_gamm4(y ~ x, random = ~(1 | id), data = dat), "Formula must have at least one smooth term to use stan_gamm4" ) }) test_that("loo/waic for stan_glmer works", { ll_fun <- rstanarm:::ll_fun # gaussian expect_equivalent_loo(fit) expect_identical(ll_fun(fit), rstanarm:::.ll_gaussian_i) # binomial expect_equivalent_loo(example_model) expect_identical(ll_fun(example_model), rstanarm:::.ll_binomial_i) }) context("posterior_predict (stan_gamm4)") test_that("stan_gamm4 returns expected result for sleepstudy example", { skip_if_not_installed("mgcv") sleepstudy$y <- sleepstudy$Reaction / 10 SW(fit <- stan_gamm4(y ~ s(Days), data = sleepstudy, random = ~(1|Subject), chains = CHAINS, iter = ITER, seed = SEED, refresh = 0)) expect_silent(yrep1 <- posterior_predict(fit)) # expect_equal(dim(yrep1), c(nrow(as.data.frame(fit)), nobs(fit))) expect_silent(yrep2 <- posterior_predict(fit, draws = 1)) # expect_equal(dim(yrep2), c(1, nobs(fit))) expect_silent(posterior_predict(fit, newdata = sleepstudy)) }) context("posterior_predict (stan_(g)lmer)") test_that("compatible with stan_lmer", { check_for_pp_errors(fit) expect_linpred_equal(fit) }) test_that("compatible with stan_glmer (binomial)", { check_for_pp_errors(example_model) expect_linpred_equal(example_model) predprob <- posterior_linpred(example_model, transform = TRUE) expect_true(all(predprob > 0) && all(predprob < 1)) }) test_that("compatible with stan_(g)lmer with transformation in formula", { d <- mtcars d$cyl <- as.factor(d$cyl) args <- list(formula = mpg ~ log1p(wt) + (1|cyl) + (1|gear), data = d, iter = 10, chains = 1, seed = SEED, refresh = 0) SW(fit1 <- do.call("stan_lmer", args)) SW(fit2 <- do.call("stan_glmer", args)) nd <- d[6:10, ] nd$wt <- runif(5) expect_silent(posterior_predict(fit1)) expect_silent(posterior_predict(fit2)) expect_silent(posterior_predict(fit1, newdata = nd)) expect_silent(posterior_predict(fit2, newdata = nd)) expect_silent(posterior_linpred(fit1)) expect_silent(posterior_linpred(fit2)) expect_silent(posterior_linpred(fit1, newdata = nd)) expect_silent(posterior_linpred(fit2, newdata = nd)) }) test_that("compatible with stan_lmer with offset", { offs <- rnorm(nrow(mtcars)) SW(fit <- stan_lmer(mpg ~ wt + (1|cyl) + (1 + wt|gear), data = mtcars, prior = normal(0,1), iter = 10, chains = 1, seed = SEED, refresh = 0, offset = offs)) expect_warning(posterior_predict(fit, newdata = mtcars[1:2, ], offset = offs), "STATS") check_for_pp_errors(fit, offset = offs) }) test_that("predition with family mgcv::betar doesn't error", { test_data <- data.frame(y = c(0.1, 0.3), x = c(TRUE, FALSE)) SW(fit <- stan_glmer(y ~ (1|x), family=mgcv::betar(link="logit"), data=test_data, seed = 101, iter = 10, chains = 1, refresh = 0)) expect_silent(posterior_linpred(fit, newdata=test_data)) expect_silent(posterior_predict(fit, newdata=test_data)) }) # compare to lme4 --------------------------------------------------------- context("posterior_predict (compare to lme4)") test_that("posterior_predict close to predict.merMod for gaussian", { mod1 <- as.formula(mpg ~ wt + (1|cyl) + (1|gear)) mod2 <- as.formula(mpg ~ log1p(wt) + I(disp/100) + (1|cyl)) mod3 <- as.formula(mpg ~ wt + (1|cyl) + (1 + wt|gear)) mod4 <- as.formula(log(mpg) ~ wt + (1 + wt|cyl) + (1 + wt + am|gear)) lfit1 <- lmer(mod1, data = mtcars) SW(sfit1 <- stan_glmer(mod1, data = mtcars, iter = 400, chains = CHAINS, seed = SEED, refresh = 0)) lfit2 <- update(lfit1, formula = mod2) SW(sfit2 <- update(sfit1, formula = mod2)) lfit3 <- update(lfit1, formula = mod3) SW(sfit3 <- update(sfit1, formula = mod3)) lfit4 <- update(lfit1, formula = mod4) SW(sfit4 <- update(sfit1, formula = mod4)) nd <- nd2 <- mtcars[1:5, ] nd2$cyl[2] <- 5 # add new levels nd3 <- nd2 nd3$gear[2] <- 7 nd3$gear[5] <- 1 tol <- 0.3 for (j in 1:4) { expect_equal( colMeans(posterior_predict(get(paste0("sfit", j)), newdata = nd, seed = SEED)), unname(predict(get(paste0("lfit", j)), newdata = nd)), tol = tol, check.attributes = FALSE) expect_equal( colMeans(posterior_predict(get(paste0("sfit", j)), newdata = nd2, seed = SEED, allow.new.levels = TRUE)), unname(predict(get(paste0("lfit", j)), newdata = nd2, allow.new.levels = TRUE)), tol = tol, check.attributes = FALSE) expect_equal( colMeans(posterior_predict(get(paste0("sfit", j)), newdata = nd3, seed = SEED, allow.new.levels = TRUE)), unname(predict(get(paste0("lfit", j)), newdata = nd3, allow.new.levels = TRUE)), tol = tol, check.attributes = FALSE) } }) test_that("posterior_predict close to predict.merMod for binomial", { d <- nd <- lme4::cbpp sfit <- example_model lfit <- glmer(formula(example_model), data = d, family = "binomial") levels(nd$herd) <- c(levels(nd$herd), "99") nd$herd[1:2] <- "99" lpred <- simulate(lfit, newdata = nd, re.form = NULL, allow.new.levels = TRUE, nsim = 500, seed = SEED) for (j in 1:ncol(lpred)) { lpred[, j] <- lpred[, j][, 1] / rowSums(lpred[, j]) } lpred <- t(as.matrix(lpred)) spred <- posterior_predict(sfit, draws = 500, newdata = nd, seed = SEED) spred <- sweep(spred, 2, rowSums(get_y(sfit)), "/") expect_equal(colMeans(spred), unname(colMeans(lpred)), tol = .125, check.attributes = FALSE) }) rstanarm/tests/testthat/test_methods.R0000644000176200001440000011111014476664567017754 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. suppressPackageStartupMessages(library(rstanarm)) library(lme4) library(MASS) SEED <- 12345 set.seed(SEED) ITER <- 10 CHAINS <- 2 REFRESH <- 0 if (!exists("example_model")) { example_model <- run_example_model() } N <- 200 x <- rnorm(N, 2, 1) z <- rnorm(N, 2, 1) mu <- binomial(link = "logit")$linkinv(1 + 0.2*x) phi <- exp(1.5 + 0.4*z) y <- rbeta(N, mu * phi, (1 - mu) * phi) fake_dat <- data.frame(y, x, z) remove(N, x, y, z, mu, phi) SW({ stan_glm1 <- stan_glm(mpg ~ wt + cyl, data = mtcars, iter = ITER, chains = CHAINS, seed = SEED, refresh = 0) stan_glm_opt1 <- stan_glm(mpg ~ wt + cyl, data = mtcars, algorithm = "optimizing", seed = SEED, refresh = 0) stan_glm_vb1 <- update(stan_glm_opt1, algorithm = "meanfield", QR = TRUE, iter = 10000) glm1 <- glm(mpg ~ wt + cyl, data = mtcars) lmer1 <- lmer(diameter ~ (1|plate) + (1|sample), data = Penicillin) stan_lmer1 <- stan_lmer(diameter ~ (1|plate) + (1|sample), data = Penicillin, prior_intercept = normal(0, 50, autoscale = FALSE), prior_aux = normal(0, 10), iter = ITER, chains = CHAINS, seed = SEED, refresh = 0) lmer2 <- lmer(Reaction ~ Days + (Days | Subject), data = sleepstudy) stan_lmer2 <- stan_lmer(Reaction ~ Days + (Days | Subject), data = sleepstudy, iter = ITER, chains = CHAINS, seed = SEED, refresh = 0) stan_polr1 <- stan_polr(tobgp ~ agegp, data = esoph, prior = R2(0.2, "mean"), init_r = 0.1, iter = ITER, chains = CHAINS, seed = SEED, refresh = 0) polr1 <- polr(tobgp ~ agegp, data = esoph, Hess = TRUE) stan_gamm41 <- stan_gamm4(mpg ~ s(wt) + cyl, data = mtcars, iter = ITER, chains = CHAINS, seed = SEED, refresh = 0) stan_betareg1 <- stan_betareg(y ~ x | z, data = fake_dat, link = "logit", link.phi = "log", refresh = 0, iter = ITER, chains = CHAINS, seed = SEED) betareg1 <- betareg::betareg(y ~ x | z, data = fake_dat, link = "logit", link.phi = "log") }) att_names <- function(object) { nms <- names(object) att_nms <- names(attributes(object)) att_nms2 <- lapply(object, function(x) sort(names(attributes(x)))) c(nms, att_nms, att_nms2) } check_att_names <- function(x,y) { expect_identical(att_names(x), att_names(y)) } check_sizes <- function(x,y) { expect_equal(length(x), length(y)) expect_equal(lapply(x, dim), lapply(y, dim)) } context("methods for stanreg objects") # extractors -------------------------------------------------------------- test_that("stanreg extractor methods work properly", { expect_equal(resid(stan_glm1), stan_glm1$residuals) expect_equal(coef(stan_glm1), stan_glm1$coefficients) expect_equal(vcov(stan_glm1), stan_glm1$covmat) expect_equal(fitted(stan_glm1), stan_glm1$fitted.values) expect_equal(se(stan_glm1), stan_glm1$ses) expect_equal(resid(stan_polr1), stan_polr1$residuals) expect_equal(coef(stan_polr1), stan_polr1$coefficients) expect_equal(vcov(stan_polr1), stan_polr1$covmat) expect_equal(fitted(stan_polr1), stan_polr1$fitted.values) expect_equal(se(stan_polr1), stan_polr1$ses) expect_equal(vcov(stan_glm_opt1), stan_glm_opt1$covmat) expect_equal(vcov(stan_glm_opt1, correlation = TRUE), cov2cor(stan_glm_opt1$covmat)) expect_equal(resid(stan_glm_opt1), stan_glm_opt1$residuals) expect_equal(coef(stan_glm_opt1), stan_glm_opt1$coefficients) expect_equal(fitted(stan_glm_opt1), stan_glm_opt1$fitted.values) expect_equal(se(stan_glm_opt1), stan_glm_opt1$ses) expect_equal(resid(stan_lmer1), stan_lmer1$residuals) expect_equal(fitted(stan_lmer1), stan_lmer1$fitted.values) expect_equal(se(stan_lmer1), stan_lmer1$ses) expect_equal(resid(example_model), example_model$residuals) expect_equal(fitted(example_model), example_model$fitted.values) expect_equal(se(example_model), example_model$ses) # coef and vcov are different for stan_(g)lmer models and are tested # separately later in this file expect_equal(resid(stan_betareg1), stan_betareg1$residuals) expect_equal(coef(stan_betareg1), stan_betareg1$coefficients) expect_equal(vcov(stan_betareg1), stan_betareg1$covmat) expect_equal(fitted(stan_betareg1), stan_betareg1$fitted.values) expect_equal(se(stan_betareg1), stan_betareg1$ses) }) # confint ----------------------------------------------------------------- test_that("confint method returns correct structure", { expect_silent(ci <- confint(stan_glm_opt1)) expect_silent(ci2 <- confint(stan_glm_opt1, parm = "wt", level = 0.9)) expect_equal(rownames(ci), c("(Intercept)", "wt", "cyl")) expect_equal(colnames(ci), c("2.5 %", "97.5 %")) expect_equal(rownames(ci2), c("wt")) expect_equal(colnames(ci2), c("5 %", "95 %")) expect_error(confint(stan_glm1), regexp = "use posterior_interval") expect_error(confint(stan_glm_vb1), regexp = "use posterior_interval") expect_error(confint(stan_polr1), regexp = "use posterior_interval") expect_error(confint(stan_lmer1), regexp = "use posterior_interval") expect_error(confint(stan_lmer2), regexp = "use posterior_interval") expect_error(confint(stan_betareg1), regexp = "use posterior_interval") }) # posterior_interval ----------------------------------------------------- test_that("posterior_interval returns correct structure", { expect_silent(ci <- posterior_interval(stan_glm1, prob = 0.5)) expect_silent(ci2 <- posterior_interval(stan_glm_vb1, pars = "wt", prob = 0.95)) expect_silent(ci3 <- posterior_interval(example_model, prob = 0.95, regex_pars = "herd")) expect_silent(ci4 <- posterior_interval(example_model, prob = 0.8, pars = "(Intercept)", regex_pars = "period")) expect_silent(ci5 <- posterior_interval(stan_polr1, prob = 0.9)) expect_identical(rownames(ci), c("(Intercept)", "wt", "cyl", "sigma")) expect_identical(rownames(ci2), "wt") expect_identical(rownames(ci3), c(paste0("b[(Intercept) herd:", 1:15, "]"), "Sigma[herd:(Intercept),(Intercept)]")) expect_identical(rownames(ci4), c("(Intercept)", paste0("period", 2:4))) expect_identical(colnames(ci), c("25%", "75%")) expect_identical(colnames(ci2), c("2.5%", "97.5%")) expect_identical(colnames(ci3), c("2.5%", "97.5%")) expect_identical(colnames(ci4), c("10%", "90%")) expect_identical(colnames(ci5), c("5%", "95%")) expect_silent(ci6 <- posterior_interval(stan_betareg1, prob = 0.5)) expect_identical(colnames(ci6), c("25%", "75%")) expect_error(posterior_interval(stan_glm1, type = "HPD"), regexp = "only option for 'type' is 'central'") expect_identical(colnames(posterior_interval(stan_glm_opt1)), c("5%", "95%")) expect_error(posterior_interval(lm(mpg ~ wt, data = mtcars)), regexp = "should be a matrix") prob_msg <- "'prob' should be a single number greater than 0 and less than 1." expect_error(posterior_interval(stan_glm1, prob = c(0.25, 0.75)), regexp = prob_msg) expect_error(posterior_interval(stan_glm1, prob = 0), regexp = prob_msg) expect_error(posterior_interval(stan_glm1, prob = 1), regexp = prob_msg) expect_error(posterior_interval(stan_glm1, prob = 2), regexp = prob_msg) }) # log_lik ----------------------------------------------------------------- test_that("log_lik method works", { expect_silent(log_lik(stan_glm_opt1)) expect_silent(log_lik(stan_glm_vb1)) expect_silent(log_lik(stan_glm1)) expect_silent(log_lik(stan_polr1)) expect_silent(log_lik(stan_gamm41)) expect_equal(dim(log_lik(stan_polr1)), c(ITER, nobs(stan_polr1))) expect_equal(dim(log_lik(stan_lmer1)), c(ITER, nobs(stan_lmer1))) expect_equal(log_lik(stan_betareg1), log_lik(stan_betareg1, newdata = fake_dat)) # Compute log-lik matrix using different method than log_lik.stanreg # and compare samp <- as.matrix(stan_glm1) y <- get_y(stan_glm1) y_new <- y[1:10] + rnorm(10) x <- get_x(stan_glm1) x_new <- cbind(1, x[1:10, 2:3] + rnorm(10)) sigma <- samp[, 4] eta <- tcrossprod(x, samp[, 1:3]) eta_new <- tcrossprod(x_new, samp[, 1:3]) llmat <- matrix(NA, nrow = nrow(samp), ncol = nrow(eta)) llmat_new <- matrix(NA, nrow = nrow(samp), ncol = nrow(eta_new)) for (i in 1:nrow(llmat)) { llmat[i, ] <- dnorm(y, mean = eta[, i], sd = sigma[i], log = TRUE) llmat_new[i, ] <- dnorm(y_new, mean = eta_new[, i], sd = sigma[i], log = TRUE) } expect_equal(log_lik(stan_glm1), llmat, check.attributes = FALSE) nd <- data.frame(mpg = y_new, wt = x_new[, 2], cyl = x_new[, 3]) expect_equal(log_lik(stan_glm1, newdata = nd), llmat_new, check.attributes = FALSE) # make sure log_lik with newdata equals log_lik if newdata is the same as the # data used to fit the model expect_equal(log_lik(example_model), log_lik(example_model, newdata = cbpp)) expect_equal(log_lik(stan_lmer2), log_lik(stan_lmer2, newdata = sleepstudy)) expect_equal(log_lik(stan_glm1), log_lik(stan_glm1, newdata = mtcars)) expect_equal(log_lik(stan_polr1), log_lik(stan_polr1, newdata = esoph)) expect_equal(log_lik(stan_gamm41), log_lik(stan_gamm41, newdata = mtcars)) }) # ngrps, nobs ------------------------------------------------------------- test_that("ngrps is right", { expect_equal(ngrps(lmer1), ngrps(stan_lmer1)) expect_equal(ngrps(lmer2), ngrps(stan_lmer2)) expect_error(ngrps(stan_glm1), "stan_glmer and stan_lmer models only") expect_error(ngrps(stan_betareg1), "stan_glmer and stan_lmer models only") expect_equal(nobs(stan_betareg1), nobs(betareg1)) }) test_that("nobs is right", { expect_equal(nobs(lmer1), nobs(stan_lmer1)) expect_equal(nobs(lmer2), nobs(stan_lmer2)) expect_equal(nobs(glm1), nobs(stan_glm_opt1)) expect_equal(nobs(glm1), nobs(stan_glm1)) expect_equal(nobs(polr1), nobs(stan_polr1)) }) # vcov -------------------------------------------------------------- test_that("vcov returns correct structure", { expect_equal(dimnames(vcov(stan_glm1)), dimnames(vcov(glm1))) expect_equal(dimnames(vcov(stan_polr1)), dimnames(vcov(polr1))) expect_equal(dimnames(vcov(stan_lmer1)), dimnames(vcov(lmer1))) expect_equal(dimnames(vcov(stan_lmer2)), dimnames(vcov(lmer2))) expect_equal(dimnames(vcov(stan_betareg1)), dimnames(vcov(betareg1))) }) # sigma -------------------------------------------------------------- test_that("sigma method works", { # need to use :: because sigma is masked by lme4's sigma rsigma <- rstanarm::sigma expect_identical(rsigma(stan_polr1), 1) expect_identical(rsigma(example_model), 1) expect_double <- function(x) expect_type(x, "double") expect_double(sig <- rsigma(stan_lmer1)) expect_false(identical(sig, 1)) expect_double(sig <- rsigma(stan_lmer2)) expect_false(identical(sig, 1)) expect_double(sig <- rsigma(stan_glm1)) expect_false(identical(sig, 1)) expect_double(sig <- rsigma(stan_glm_vb1)) expect_false(identical(sig, 1)) expect_double(sig <- rsigma(stan_glm_opt1)) expect_false(identical(sig, 1)) expect_double(sig <- rsigma(stan_betareg1)) expect_true(identical(sig, 1)) }) # VarCorr ----------------------------------------------------------------- test_that("VarCorr returns correct structure", { vc_lmer1 <- VarCorr(lmer1); vc_stan1 <- VarCorr(stan_lmer1) vc_lmer2 <- VarCorr(lmer2); vc_stan2 <- VarCorr(stan_lmer2) expect_s3_class(vc_stan1, class(vc_lmer1)) expect_s3_class(vc_stan2, class(vc_lmer2)) check_att_names(vc_stan1, vc_lmer1) check_att_names(vc_stan2, vc_lmer2) v <- sapply(vc_stan1, "[[", 1) expect_true(length(unique(v)) == length(v)) expect_error(VarCorr(stan_glm1), "stan_glmer and stan_lmer models only") expect_error(VarCorr(stan_betareg1), "stan_glmer and stan_lmer models only") }) # ranef,fixef,coef ----------------------------------------------------------- test_that("ranef returns correct structure", { re_stan1 <- ranef(stan_lmer1); re_lmer1 <- ranef(lmer1) re_stan2 <- ranef(stan_lmer1); re_lmer2 <- ranef(lmer1) expect_s3_class(re_stan1, class(re_lmer1)) expect_s3_class(re_stan2, class(re_lmer2)) check_att_names(re_stan1, re_lmer1) check_att_names(re_stan2, re_lmer2) check_sizes(re_stan1, re_lmer1) check_sizes(re_stan2, re_lmer2) expect_error(ranef(stan_glm1), "stan_glmer and stan_lmer models only") expect_error(ranef(stan_betareg1), "stan_glmer and stan_lmer models only") }) test_that("fixef returns the right coefs", { expect_identical(names(fixef(stan_lmer1)), names(fixef(lmer1))) expect_identical(names(fixef(stan_lmer2)), names(fixef(lmer2))) }) test_that("coef returns the right structure", { coef_stan1 <- coef(stan_lmer1); coef_lmer1 <- coef(lmer1) coef_stan2 <- coef(stan_lmer1); coef_lmer2 <- coef(lmer1) check_att_names(coef_stan1, coef_lmer1) check_att_names(coef_stan2, coef_lmer2) check_sizes(coef_stan1, coef_lmer1) check_sizes(coef_stan2, coef_lmer2) }) test_that("coef ok if any 'ranef' missing from 'fixef'", { SW(stan_lmer3 <- update(stan_lmer2, formula = . ~ (Days | Subject))) lmer3 <- update(lmer2, formula = . ~ (Days | Subject)) coef_stan3 <- coef(stan_lmer3); coef_lmer3 <- coef(lmer3) check_att_names(coef_stan3, coef_lmer3) check_sizes(coef_stan3, coef_lmer3) }) # as.matrix,as.data.frame,as.array ---------------------------------------- test_that("as.matrix, as.data.frame, as.array methods work for MCMC", { last_dimnames <- rstanarm:::last_dimnames # glm mat <- as.matrix(stan_glm1) df <- as.data.frame(stan_glm1) arr <- as.array(stan_glm1) expect_identical(df, as.data.frame(mat)) expect_identical(mat[1:2, 1], arr[1:2, 1, 1]) expect_equal(dim(mat), c(floor(ITER/2) * CHAINS, 4L)) expect_equal(dim(arr), c(floor(ITER/2), CHAINS, 4L)) expect_identical(last_dimnames(mat), c("(Intercept)", "wt", "cyl", "sigma")) expect_identical(last_dimnames(arr), last_dimnames(mat)) # selecting only 1 parameter mat <- as.matrix(stan_glm1, pars = "wt") df <- as.data.frame(stan_glm1, pars = "wt") arr <- as.array(stan_glm1, pars = "wt") expect_identical(df, as.data.frame(mat)) expect_identical(mat[1:2, 1], arr[1:2, 1, 1]) expect_equal(dim(mat), c(floor(ITER/2) * CHAINS, 1L)) expect_equal(dim(arr), c(floor(ITER/2), CHAINS, 1L)) expect_identical(last_dimnames(mat), "wt") expect_identical(last_dimnames(arr), last_dimnames(mat)) # glmer mat <- as.matrix(example_model) df <- as.data.frame(example_model) arr <- as.array(example_model) expect_identical(df, as.data.frame(mat)) expect_identical(mat[1:2, 1], arr[1:2, 1, 1]) nc <- length(c(fixef(example_model), unlist(ranef(example_model)))) + 1L nr <- rstanarm:::posterior_sample_size(example_model) nms <- rownames(summary(example_model))[seq_len(nc)] expect_equal(dim(mat), c(nr, nc)) expect_equal(dim(arr), c(nr / 2, 2, nc)) expect_identical(last_dimnames(mat), nms) expect_identical(last_dimnames(mat), last_dimnames(arr)) # pars & regex_pars mat <- as.matrix(example_model, pars = "mean_PPD", regex_pars = "period") df <- as.data.frame(example_model, pars = "mean_PPD", regex_pars = "period") arr <- as.array(example_model, pars = "mean_PPD", regex_pars = "period") expect_identical(df, as.data.frame(mat)) expect_identical(mat[1:2, 1], arr[1:2, 1, 1]) expect_equal(dim(mat), c(nr, 4L)) expect_equal(dim(arr), c(nr/2, 2, 4L)) expect_identical(last_dimnames(mat), c("mean_PPD", paste0("period", 2:4))) expect_identical(last_dimnames(mat), last_dimnames(arr)) # lmer mat <- as.matrix(stan_lmer2) df <- as.data.frame(stan_lmer2) arr <- as.array(stan_lmer2) expect_identical(df, as.data.frame(mat)) expect_identical(mat[1:2, 1], arr[1:2, 1, 1]) # +1 for "sigma" and +3 for "Sigma" nc <- length(c(fixef(stan_lmer2), unlist(ranef(stan_lmer2)))) + 4 nms <- rownames(summary(stan_lmer2))[seq_len(nc)] expect_equal(dim(mat), c(floor(ITER/2) * CHAINS, nc)) expect_equal(dim(arr), c(floor(ITER/2), CHAINS, nc)) expect_identical(last_dimnames(mat), nms) expect_identical(last_dimnames(mat), last_dimnames(arr)) mat <- as.matrix(stan_lmer2, pars = "(Intercept)", regex_pars = "b\\[Days Subject") df <- as.data.frame(stan_lmer2, pars = "(Intercept)", regex_pars = "b\\[Days Subject") expect_identical(df, as.data.frame(mat)) s <- summary(stan_lmer2, pars = "(Intercept)", regex_pars = "b\\[Days Subject") expect_equal(dim(mat), c(floor(ITER/2) * CHAINS, nrow(s))) expect_identical(colnames(mat), rownames(s)) # polr mat <- as.matrix(stan_polr1) df <- as.data.frame(stan_polr1) arr <- as.array(stan_polr1) expect_identical(df, as.data.frame(mat)) expect_identical(mat[1:2, 1], arr[1:2, 1, 1]) nms <- names(c(stan_polr1$coefficients, stan_polr1$zeta)) expect_equal(dim(mat), c(floor(ITER/2) * CHAINS, length(nms))) expect_equal(dim(arr), c(floor(ITER/2), CHAINS, length(nms))) expect_identical(last_dimnames(mat), nms) expect_identical(last_dimnames(mat), last_dimnames(arr)) mat <- as.matrix(stan_polr1, regex_pars = "agegp") df <- as.data.frame(stan_polr1, regex_pars = "agegp") expect_identical(df, as.data.frame(mat)) # betareg mat <- as.matrix(stan_betareg1) df <- as.data.frame(stan_betareg1) arr <- as.array(stan_betareg1) expect_identical(df, as.data.frame(mat)) expect_identical(mat[1:2, 1], arr[1:2, 1, 1]) expect_equal(dim(mat), c(floor(ITER/2) * CHAINS, 4L)) expect_equal(dim(arr), c(floor(ITER/2), CHAINS, 4L)) expect_identical(last_dimnames(mat), c("(Intercept)", "x", "(phi)_(Intercept)", "(phi)_z")) expect_identical(last_dimnames(arr), last_dimnames(mat)) }) test_that("as.matrix and as.data.frame work for optimization and vb", { # optimization mat <- as.matrix(stan_glm_opt1) df <- as.data.frame(stan_glm_opt1) expect_identical(df, as.data.frame(mat)) expect_equal(dim(mat), c(1000L, 4L)) expect_identical(colnames(mat), c("(Intercept)", "wt", "cyl", "sigma")) mat <- as.matrix(stan_glm_opt1, pars = "sigma") df <- as.data.frame(stan_glm_opt1, pars = "sigma") expect_identical(df, as.data.frame(mat)) expect_equal(dim(mat), c(1000, 1L)) expect_identical(colnames(mat), "sigma") # vb mat <- as.matrix(stan_glm_vb1) df <- as.data.frame(stan_glm_vb1) expect_identical(df, as.data.frame(mat)) expect_equal(dim(mat), c(1000L, 4L)) expect_identical(colnames(mat), c("(Intercept)", "wt", "cyl", "sigma")) mat <- as.matrix(stan_glm_vb1, pars = c("(Intercept)", "sigma")) df <- as.data.frame(stan_glm_vb1, pars = c("(Intercept)", "sigma")) expect_identical(df, as.data.frame(mat)) expect_equal(dim(mat), c(1000, 2L)) expect_identical(colnames(mat), c("(Intercept)", "sigma")) }) test_that("as.matrix and as.array errors & warnings", { # optimization and vb errors expect_error(as.array(stan_glm_opt1), regexp = "use 'as.matrix' instead") expect_error(as.array(stan_glm_vb1), regexp = "use 'as.matrix' instead") # pars and regex_pars errors expect_error(as.matrix(stan_glm1, pars = c("bad1", "sigma")), regexp = "No parameter(s) bad1", fixed = TRUE) expect_error(as.matrix(stan_glm1, regex_pars = "not a parameter"), regexp = "No matches for 'regex_pars'") expect_warning(as.matrix(stan_glm_opt1, regex_pars = "wt"), regexp = "'regex_pars' ignored") }) # terms, formula, model.frame, model.matrix, update methods ----------------- context("model.frame methods") test_that("model.frame works properly", { expect_identical(model.frame(stan_glm1), model.frame(glm1)) expect_identical(model.frame(stan_glm_opt1), model.frame(glm1)) expect_identical(model.frame(stan_glm_vb1), model.frame(glm1)) expect_identical(model.frame(stan_polr1), model.frame(polr1)) expect_identical(model.frame(stan_lmer1), model.frame(lmer1)) expect_identical(model.frame(stan_lmer2), model.frame(lmer2)) # lme4 is doing something different with the names # expect_identical(model.frame(stan_lmer1, fixed.only = TRUE), # model.frame(lmer1, fixed.only = TRUE)) # expect_identical(model.frame(stan_lmer2, fixed.only = TRUE), # model.frame(lmer2, fixed.only = TRUE)) expect_identical(model.frame(stan_betareg1), model.frame(betareg1)) }) context("terms methods") test_that("terms works properly", { expect_identical(terms(stan_glm1), terms(glm1)) expect_identical(terms(stan_glm_opt1), terms(glm1)) expect_identical(terms(stan_glm_vb1), terms(glm1)) expect_identical(terms(stan_polr1), terms(polr1)) expect_identical(terms(stan_lmer1), terms(lmer1)) expect_identical(terms(stan_lmer2), terms(lmer2)) expect_identical(terms(stan_lmer1, fixed.only = TRUE), terms(lmer1, fixed.only = TRUE)) expect_identical(terms(stan_lmer2, fixed.only = TRUE), terms(lmer2, fixed.only = TRUE)) expect_equal(terms(stan_lmer1, random.only = TRUE), terms(lmer1, random.only = TRUE)) expect_equal(terms(stan_lmer2, random.only = TRUE), terms(lmer2, random.only = TRUE)) expect_error(terms(stan_lmer1, fixed.only = TRUE, random.only = TRUE), regexp = "can't both be TRUE") expect_identical(terms(stan_betareg1), terms(betareg1)) }) context("formula methods") test_that("formula works properly", { expect_identical(formula(stan_glm1), formula(glm1)) expect_identical(formula(stan_glm_opt1), formula(glm1)) expect_identical(formula(stan_glm_vb1), formula(glm1)) expect_identical(formula(stan_betareg1), formula(betareg1)) expect_equal(terms(stan_polr1), formula(polr1)) expect_identical(formula(stan_lmer1), formula(lmer1)) expect_identical(formula(stan_lmer2), formula(lmer2)) expect_identical(formula(stan_lmer1, fixed.only = TRUE), formula(lmer1, fixed.only = TRUE)) expect_identical(formula(stan_lmer2, fixed.only = TRUE), formula(lmer2, fixed.only = TRUE)) expect_equal(formula(stan_lmer1, random.only = TRUE), formula(lmer1, random.only = TRUE)) expect_equal(formula(stan_lmer2, random.only = TRUE), formula(lmer2, random.only = TRUE)) expect_error(formula(stan_lmer1, fixed.only = TRUE, random.only = TRUE), regexp = "can't both be TRUE") tmp <- stan_lmer1 tmp$formula <- NULL attr(tmp$glmod$fr, "formula") <- NULL expect_equal(formula(tmp), formula(lmer1)) tmp$call <- NULL expect_error(formula(tmp), regexp = "can't find formula", ignore.case = TRUE) }) context("update methods") test_that("update works properly", { pss <- rstanarm:::posterior_sample_size SW(fit1 <- update(stan_lmer2, iter = ITER * 2, chains = 2 * CHAINS)) SW(fit2 <- update(stan_glm1, iter = ITER * 2, chains = 2 * CHAINS)) SW(fit3 <- update(stan_betareg1, iter = ITER * 2, chains = CHAINS * 2)) expect_equal(pss(fit1), 4 * pss(stan_lmer2)) expect_equal(pss(fit2), 4 * pss(stan_glm1)) expect_equal(pss(fit3), 4 * pss(stan_betareg1)) call_only <- update(fit1, evaluate = FALSE) expect_is(call_only, "call") expect_identical(call_only, getCall(fit1)) # expect_error(fit2 <- update(fit2, algorithm = "optimizing"), # regexp = "unknown arguments: chains") expect_identical(fit2$algorithm, "sampling") fit2$call <- NULL expect_error(update(fit2), regexp = "does not contain a 'call' component") }) # print and summary ------------------------------------------------------- context("print and summary methods") test_that("print and summary methods ok for mcmc and vb", { expect_output(print(example_model, digits = 2), "stan_glmer") expect_output(print(example_model, digits = 2), "Error terms") expect_output(print(stan_lmer1, digits = 2), "stan_lmer") expect_output(print(stan_lmer2), "stan_lmer") expect_output(print(stan_polr1), "stan_polr") expect_output(print(stan_polr1), "Cutpoints") expect_output(print(stan_glm_opt1, digits = 5), "stan_glm") expect_output(print(stan_glm_vb1, digits = 5), "stan_glm") expect_output(print(stan_betareg1, digits = 2), "stan_betareg") expect_silent(s <- summary(stan_lmer1, pars = "varying", regex_pars = "Sigma")) expect_silent(s_alt <- summary(stan_lmer1, regex_pars = c("plate", "sample"))) expect_identical(s, s_alt) expect_silent(s <- summary(stan_lmer1)) expect_silent(d <- as.data.frame(s)) expect_s3_class(s, "summary.stanreg") expect_output(print(s), "stan_lmer") expect_identical(attr(s, "algorithm"), "sampling") expect_identical(colnames(s), colnames(d)) expect_identical(rownames(s), rownames(d)) expect_silent(s <- summary(example_model, pars = "beta", regex_pars = "herd")) expect_silent(s_alt <- summary(example_model, pars = c("beta", "varying"), regex_pars = "Sigma")) expect_identical(s, s_alt) expect_silent(d <- as.data.frame(s)) expect_s3_class(s, "summary.stanreg") expect_output(print(s), "stan_glmer") expect_output( print(s), paste(rstanarm:::posterior_sample_size(example_model)), "(posterior sample size)" ) expect_identical(attr(s, "algorithm"), "sampling") expect_identical(colnames(s), colnames(d)) expect_identical(rownames(s), rownames(d)) expect_silent(s <- summary(stan_polr1, pars = "beta", probs = c(0.25, 0.75))) expect_silent(d <- as.data.frame(s)) expect_identical(colnames(s), c("mean", "mcse", "sd", "25%", "75%", "n_eff", "Rhat")) expect_identical(colnames(s), colnames(d)) expect_identical(rownames(s), rownames(d)) expect_s3_class(s, "summary.stanreg") expect_output(print(s), "stan_polr") expect_warning(s <- summary(stan_glm1, pars = "varying"), regexp = "No group-specific parameters. 'varying' ignored.") expect_silent(s <- summary(stan_glm1, pars = c("alpha", "beta"), digits = 3)) expect_s3_class(s, "summary.stanreg") expect_output(print(s), "stan_glm") expect_identical(attr(s, "algorithm"), "sampling") expect_silent(s <- summary(stan_glm_vb1, pars = c("alpha", "beta"))) expect_silent(d <- as.data.frame(s)) expect_s3_class(s, "summary.stanreg") expect_output(print(s), "stan_glm") expect_identical(attr(s, "algorithm"), "meanfield") expect_warning(s <- summary(stan_betareg1, pars = "varying"), regexp = "No group-specific parameters. 'varying' ignored.") expect_silent(s <- summary(stan_betareg1, pars = c("alpha", "beta"), digits = 3)) expect_s3_class(s, "summary.stanreg") expect_output(print(s), "stan_betareg") expect_identical(attr(s, "algorithm"), "sampling") }) test_that("print and summary include subset information", { SW(fit <- stan_glm(mpg ~ wt, data = mtcars, subset = cyl == 4, iter = 5, chains = 1, refresh = 0)) expect_output(print(fit), "subset: cyl == 4") expect_output(print(summary(fit)), "subset: cyl == 4") SW(fit <- stan_glm(mpg ~ wt, data = mtcars, subset = rep(TRUE, 32), iter = 5, chains = 1, refresh = 0)) expect_output(print(fit), "subset: rep(TRUE, 32)", fixed = TRUE) expect_output(print(summary(fit)), "subset: rep(TRUE, 32)", fixed = TRUE) sub <- mtcars$cyl == 4 SW(fit <- stan_glm(mpg ~ wt, data = mtcars, subset = sub, iter = 5, chains = 1, refresh = 0)) expect_output(print(fit), "subset: sub", fixed = TRUE) expect_output(print(summary(fit)), "subset: sub", fixed = TRUE) }) test_that("print and summary methods ok for optimization", { expect_silent(s <- summary(stan_glm_opt1)) expect_silent(s <- summary(stan_glm_opt1, pars = c("wt", "sigma"), digits = 8)) expect_warning(s <- summary(stan_glm_opt1, regex_pars = c("wt", "sigma")), regexp = "'regex_pars' ignored") expect_silent(d <- as.data.frame(s)) expect_s3_class(s, "summary.stanreg") expect_output(print(s), "stan_glm") expect_identical(attr(s, "algorithm"), "optimizing") expect_identical(colnames(s), colnames(d)) expect_identical(rownames(s), rownames(d)) counts <- c(18,17,15,20,10,20,25,13,12) outcome <- gl(3,1,9) treatment <- gl(3,3) SW(fit <- stan_glm.nb(counts ~ outcome + treatment, algorithm = "optimizing", seed = SEED, refresh = 0)) expect_output(print(fit), "reciprocal_dispersion") clotting <- data.frame(log_u = log(c(5,10,15,20,30,40,60,80,100)), lot1 = c(118,58,42,35,27,25,21,19,18), lot2 = c(69,35,26,21,18,16,13,12,12)) SW(fit2 <- stan_glm(lot1 ~ log_u, data = clotting, family = Gamma(link="log"), algorithm = "optimizing", seed = SEED, refresh = 0)) SW(fit3 <- update(fit2, family = inverse.gaussian(link = "log"))) expect_output(print(fit2), "shape") expect_output(print(fit3), "lambda") }) # prior_summary ----------------------------------------------------------- test_that("prior_summary errors if info not found", { tmp <- example_model tmp$prior.info <- NULL expect_message(s <- prior_summary(tmp), "Priors not found in stanreg object") expect_null(s) }) test_that("prior_summary doesn't error", { expect_output(print(prior_summary(example_model, digits = 2)), "Priors for model 'example_model'") expect_output(print(prior_summary(stan_lmer1, digits = 2)), "stan_lmer1") expect_output(print(prior_summary(stan_lmer2)), "stan_lmer2") expect_output(print(prior_summary(stan_polr1)), "stan_polr1") expect_output(print(prior_summary(stan_glm_opt1)), "stan_glm_opt1") expect_output(print(prior_summary(stan_glm_vb1)), "stan_glm_vb1") expect_output(print(prior_summary(stan_betareg1)), "stan_betareg1") }) test_that("prior_summary returns correctly named list", { expect_named(prior_summary(example_model), c("prior", "prior_intercept", "prior_covariance")) expect_named(prior_summary(stan_lmer1), c("prior", "prior_intercept", "prior_covariance", "prior_aux")) expect_named(prior_summary(stan_lmer2), c("prior", "prior_intercept", "prior_covariance", "prior_aux")) expect_named(prior_summary(stan_polr1), c("prior", "prior_counts")) expect_named(prior_summary(stan_glm_opt1), c("prior", "prior_intercept", "prior_aux")) expect_named(prior_summary(stan_glm_vb1), c("prior", "prior_intercept", "prior_aux")) expect_named(prior_summary(stan_betareg1), c("prior", "prior_z", "prior_intercept", "prior_intercept_z", "prior_aux")) }) # predictive_error,predictive_interval ------------------------------------ context("predictive error and interval methods") test_that("predictive_error works", { expect_error(predictive_error(stan_glm1, draws = 100), "'draws' should be <= posterior sample size") expect_error(predictive_error(stan_polr1), "not currently available for stan_polr") expect_error(predictive_error(stan_betareg1, draws = 600), "'draws' should be <= posterior sample size") mods <- c("stan_glm1", "stan_glm_vb1", "stan_lmer1", "stan_lmer2", "example_model") for (m in seq_along(mods)) { mod <- get(mods[m]) err <- predictive_error(mod, draws = 5) expect_equal(dim(err), c(5, nobs(mod)), info = mods[m]) } err2 <- predictive_error(stan_glm1, newdata = model.frame(stan_glm1)[1:10, ], draws = 7) expect_equal(dim(err2), c(7, 10)) err3 <- predictive_error(example_model, draws = 5, newdata = data.frame( size = c(10, 20), incidence = c(5, 10), period = factor(c(1,2)), herd = c(1, 15) )) expect_equal(dim(err3), c(5, 2)) }) test_that("predictive_interval works", { expect_error(predictive_interval(stan_glm1, draws = 100), "'draws' should be <= posterior sample size") expect_error(predictive_interval(stan_glm1, prob = c(0.25, 0.76)), "'prob' should be a single number greater than 0 and less than 1") expect_error(predictive_interval(stan_polr1), "not currently available for stan_polr") expect_error(predictive_interval(stan_betareg1, draws = 600), "'draws' should be <= posterior sample size") expect_error(predictive_interval(stan_betareg1, prob = c(0.25, 0.76)), "'prob' should be a single number greater than 0 and less than 1") mods <- c("stan_glm1", "stan_glm_vb1", "stan_lmer1", "stan_lmer2", "example_model") for (m in seq_along(mods)) { mod <- get(mods[m]) pint1 <- predictive_interval(mod, draws = 5) expect_equal(dim(pint1), c(nobs(mod), 2), info = mods[m]) expect_identical(colnames(pint1), c("5%", "95%"), info = mods[m]) } pint2 <- predictive_interval(stan_glm1, prob = 0.5, newdata = model.frame(stan_glm1)[1:2, ]) expect_equal(dim(pint2), c(2, 2)) expect_identical(colnames(pint2), c("25%", "75%")) pint3 <- predictive_interval(example_model, prob = 0.8, newdata = lme4::cbpp[1:10, ]) expect_equal(dim(pint3), c(10, 2)) expect_identical(colnames(pint3), c("10%", "90%")) }) test_that("predictive_error stanreg and ppd methods return the same thing", { preds <- posterior_predict(stan_glm1, seed = 123) expect_equal( predictive_error(stan_glm1, seed = 123), predictive_error(preds, y = stan_glm1$y) ) preds <- posterior_predict(stan_betareg1, seed = 123) expect_equal( predictive_error(stan_betareg1, seed = 123), predictive_error(preds, y = stan_betareg1$y) ) }) test_that("predictive_interval stanreg and ppd methods return the same thing", { preds <- posterior_predict(stan_glm1, seed = 123) expect_equal( predictive_interval(stan_glm1, seed = 123), predictive_interval(preds) ) preds <- posterior_predict(stan_betareg1, seed = 123) expect_equal( predictive_interval(stan_betareg1, seed = 123), predictive_interval(preds) ) }) # stanreg lists ----------------------------------------------------------- test_that("stan*_list functions throw proper errors", { expect_error(stanreg_list(), ">= 1 is not TRUE") expect_error(stanreg_list(stan_glm1, glm1), "For stanreg_list") expect_error(stanmvreg_list(stan_glm1, glm1), "For stanmvreg_list") expect_error(stanjm_list(stan_glm1, glm1), "For stanjm_list") }) test_that("stanreg_list works", { list1 <- stanreg_list(stan_lmer1, stan_lmer2) expect_named(list1, c("stan_lmer1", "stan_lmer2")) expect_equivalent(attr(list1, "families"), c("gaussian", "gaussian")) expect_identical(list1$stan_lmer1, stan_lmer1) expect_identical(list1$stan_lmer2, stan_lmer2) }) # posterior pkg draws formats --------------------------------------------- test_that("as_draws methods work", { draws <- as_draws_df(stan_lmer1) expect_equal(posterior::variables(draws), colnames(as.matrix(stan_lmer1))) expect_equal(posterior::nvariables(draws), ncol(as.matrix(stan_lmer1))) expect_equal(posterior::ndraws(draws), ITER) expect_equal(posterior::niterations(draws), ITER/CHAINS) expect_equal(posterior::nchains(draws), CHAINS) draws <- as_draws_df(stan_lmer1, pars = "sigma") expect_equal(posterior::variables(draws), "sigma") draws <- as_draws_matrix(stan_lmer1) expect_equal(posterior::variables(draws), colnames(as.matrix(stan_lmer1))) expect_equal(posterior::nvariables(draws), ncol(as.matrix(stan_lmer1))) expect_equal(posterior::ndraws(draws), ITER) expect_equal(posterior::niterations(draws), ITER) expect_equal(posterior::nchains(draws), 1) draws <- as_draws_df(stan_glm_vb1) expect_equal(posterior::variables(draws), colnames(as.matrix(stan_glm_vb1))) expect_equal(posterior::nvariables(draws), ncol(as.matrix(stan_glm_vb1))) expect_equal(posterior::ndraws(draws), 1000) expect_equal(posterior::niterations(draws), 1000) expect_equal(posterior::nchains(draws), 1) draws <- as_draws_df(stan_glm_opt1) expect_equal(posterior::variables(draws), colnames(as.matrix(stan_glm_vb1))) expect_equal(posterior::nvariables(draws), ncol(as.matrix(stan_glm_vb1))) expect_equal(posterior::ndraws(draws), 1000) expect_equal(posterior::niterations(draws), 1000) expect_equal(posterior::nchains(draws), 1) expect_equal( as_draws_list(as_draws_array(stan_polr1)), as_draws_list(stan_polr1) ) expect_error( as_draws_array(stan_glm_opt1), "not fit using MCMC" ) expect_error( as_draws_array(stan_glm_vb1), "not fit using MCMC" ) }) rstanarm/tests/testthat/test_posterior_predict.R0000644000176200001440000002741714414044166022044 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. suppressPackageStartupMessages(library(rstanarm)) library(lme4) SEED <- 123 set.seed(SEED) ITER <- 100 CHAINS <- 2 REFRESH <- 0 if (!exists("example_model")) { example_model <- run_example_model() } # Error messages ---------------------------------------------------------- test_that("posterior_predict does not error if model fit using optimization", { fit1 <- stan_glm(mpg ~ wt, data = mtcars, algorithm = "optimizing", seed = SEED, refresh = 0) expect_silent(posterior_predict(fit1)) expect_silent(posterior_linpred(fit1)) }) test_that("posterior_predict errors if NAs in newdata", { nd <- model.frame(example_model) nd$period[1] <- NA expect_error(posterior_predict(example_model, newdata = nd), regexp = "NAs are not allowed in 'newdata'") expect_error(posterior_linpred(example_model, newdata = nd), regexp = "NAs are not allowed in 'newdata'") }) test_that("posterior_predict errors if draws > posterior sample size", { expect_error(posterior_predict(example_model, draws = 1e6), regexp = "'draws' should be <= posterior sample size") }) # VB ---------------------------------------------------------------------- context("posterior_predict ok for vb") test_that("silent for vb", { SW(fit1 <- stan_glm(mpg ~ wt + cyl + am, data = mtcars, algorithm = "meanfield", refresh = 0)) SW(fit2 <- update(fit1, algorithm = "fullrank", refresh = 0)) expect_silent(posterior_predict(fit1)) expect_silent(posterior_predict(fit2)) expect_silent(posterior_linpred(fit1)) expect_silent(posterior_linpred(fit2)) }) # MCMC -------------------------------------------------------------------- test_that("edge cases for posterior_predict work correctly", { dims <- c(nrow(as.matrix(example_model)), nrow(lme4::cbpp)) expect_identical(posterior_predict(example_model, re.form = NA, seed = SEED), posterior_predict(example_model, re.form = ~0, seed = SEED)) expect_identical(posterior_linpred(example_model, re.form = NA), posterior_linpred(example_model, re.form = ~0)) expect_identical(posterior_predict(example_model, seed = SEED), posterior_predict(example_model, newdata = lme4::cbpp, seed = SEED)) expect_identical(posterior_linpred(example_model), posterior_linpred(example_model, newdata = lme4::cbpp)) expect_error(posterior_predict(example_model, re.form = ~1)) expect_error(posterior_predict(example_model, re.form = ~(1|foo))) expect_error(posterior_linpred(example_model, re.form = ~1)) expect_error(posterior_linpred(example_model, re.form = ~(1|foo))) }) test_that("lme4 tests work similarly", { # loosely following predict tests from lme4 sfit <- example_model nd <- lme4::cbpp p1 <- posterior_predict(sfit, seed = SEED) p1b <- posterior_predict(sfit, newdata = nd, seed = SEED) expect_equal(p1, p1b) p2 <- posterior_predict(sfit, re.form = NA, seed = SEED) expect_equal(ncol(p2), nrow(nd)) nd2 <- with(nd, expand.grid(period = unique(period), herd = unique(herd), size = 20)) nd2$incidence <- 0 p3 <- posterior_predict(sfit, nd2, seed = SEED) p4 <- expect_silent(posterior_predict(sfit, nd2, re.form = NA, seed = SEED)) p5 <- posterior_predict(sfit, nd2, re.form = ~(1|herd), seed = SEED) expect_equal(p3, p5) # new levels nd3 <- rbind(nd2, data.frame(period = as.character(1:4), herd = rep("new",4), size = 20, incidence = 0)) p6 <- posterior_predict(sfit, nd3, allow.new.levels = TRUE, seed = SEED) expect_equal(colMeans(p3), colMeans(p6[, 1:ncol(p3)]), tol = 0.05) expect_equal(apply(p3, 2, sd), apply(p6[, 1:ncol(p3)], 2, sd), tol = 0.05) # multiple groups lfit <- lmer(diameter ~ (1|plate) + (1|sample), Penicillin) SW(sfit <- stan_lmer(diameter ~ (1|plate) + (1|sample), data = Penicillin, iter = 400, chains = CHAINS, seed = SEED, refresh = 0)) nd <- with(Penicillin, expand.grid(plate=unique(plate), sample=unique(sample))) p1 <- posterior_predict(sfit, re.form = NA, seed = SEED) p2 <- posterior_predict(sfit, nd, seed = SEED) p3 <- posterior_predict(sfit, nd, re.form = NA, seed = SEED) p4 <- posterior_predict(sfit, nd, re.form=~(1|plate)+(~1|sample), seed = SEED) p4b <- posterior_predict(sfit, nd, re.form=~(1|sample)+(~1|plate), seed = SEED) expect_equal(p2,p4) expect_equal(p4,p4b) p5 <- posterior_predict(sfit, nd, re.form=~(1|plate), seed = SEED) }) # spaces in factor levels ------------------------------------------------- context("posterior_linpred/predict with spaces in factor levels") test_that("posterior_linpred not sensitive to spaces in factor levels", { df <- data.frame( y = rnorm(10), fac_nospace = gl(2, 5, labels = c("levelone", "leveltwo")), char_nospace = rep(c("levelone", "leveltwo"), each = 5), fac_space = gl(2, 5, labels = c("level one", "level two")), char_space = rep(c("level one", "level two"), each = 5), fac_mix = gl(2, 5, labels = c("level one", "leveltwo")), char_mix = rep(c("level one", "leveltwo"), each = 5), int = rep(1:2, each = 5) ) SW({ fit1 <- stan_lmer(y ~ (1 | fac_nospace), data = df, seed = 123, chains = 2, iter = 25, refresh = 0) fit2 <- update(fit1, formula. = . ~ (1 | char_nospace)) fit3 <- update(fit1, formula. = . ~ (1 | fac_space)) fit4 <- update(fit1, formula. = . ~ (1 | char_space)) fit5 <- update(fit1, formula. = . ~ (1 | fac_mix)) fit6 <- update(fit1, formula. = . ~ (1 | char_mix)) fit7 <- update(fit1, formula. = . ~ (1 | int)) }) # not adding a new level nd1 <- df[c(1, 10), ] ans1 <- posterior_linpred(fit1, newdata = nd1) expect_equal(ans1, posterior_linpred(fit2, newdata = nd1)) expect_equal(ans1, posterior_linpred(fit3, newdata = nd1)) expect_equal(ans1, posterior_linpred(fit4, newdata = nd1)) expect_equal(ans1, posterior_linpred(fit5, newdata = nd1)) expect_equal(ans1, posterior_linpred(fit6, newdata = nd1)) expect_equal(ans1, posterior_linpred(fit7, newdata = nd1)) # adding new levels nd2 <- data.frame( fac_nospace = gl(4, 1, labels = c("levelone", "leveltwo", "levelthree", "levelfour")), char_nospace = c("levelone", "leveltwo", "levelthree", "levelfour"), fac_space = gl(4, 1, labels = c("level one", "level two", "level three", "level four")), char_space = c("level one", "level two", "level three", "level four"), fac_mix = gl(4, 1, labels = c("level one", "leveltwo", "level three", "levelfour")), char_mix = c("level one", "leveltwo", "level three", "levelfour"), int = 1:4 ) ans2 <- posterior_linpred(fit1, newdata = nd2) # should be same as ans1 except for cols 3:4 with new levels expect_equal(ans2[, 1:2], ans1, check.attributes = FALSE) expect_equal(ans2, posterior_linpred(fit2, newdata = nd2)) expect_equal(ans2, posterior_linpred(fit3, newdata = nd2)) expect_equal(ans2, posterior_linpred(fit4, newdata = nd2)) expect_equal(ans2, posterior_linpred(fit5, newdata = nd2)) expect_equal(ans2, posterior_linpred(fit6, newdata = nd2)) expect_equal(ans2, posterior_linpred(fit7, newdata = nd2)) }) test_that("posterior_linpred with spaces in factor levels ok with complicated formula", { d <- mtcars d$cyl_fac <- factor(d$cyl, labels = c("cyl 4", "cyl 6", "cyl 8")) d$gear_fac <- factor(d$gear, labels = c("gear 3", "gear 4", "gear 5")) SW({ fit1 <- stan_lmer(mpg ~ (1 + wt|cyl/gear), data = d, iter = 50, chains = 1, seed = 123, refresh = 0) fit2 <- update(fit1, formula. = . ~ (1 + wt|cyl_fac/gear_fac)) }) expect_equal(posterior_linpred(fit1), posterior_linpred(fit2)) # no new levels, all orig levels present in newdata nd1 <- data.frame(wt = 2, cyl = d$cyl, gear = d$gear) nd2 <- data.frame(wt = 2, cyl_fac = d$cyl_fac, gear_fac = d$gear_fac) expect_equal(posterior_linpred(fit1, newdata = nd1), posterior_linpred(fit2, newdata = nd2)) # no new levels, subset of orig levels present in newdata nd3 <- data.frame(wt = 2, cyl = 4, gear = 3) nd4 <- data.frame(wt = 2, cyl_fac = "cyl 4", gear_fac = factor(3, labels = "gear 3")) expect_equal(posterior_linpred(fit1, newdata = nd3), posterior_linpred(fit2, newdata = nd4)) # with new levels nd5 <- data.frame(wt = 2, cyl = 98, gear = 99) nd6 <- data.frame(wt = 2, cyl_fac = "new cyl", gear_fac = "new gear") expect_equal(posterior_linpred(fit1, newdata = nd5), posterior_linpred(fit2, newdata = nd6)) }) test_that("posterior_predict/epred with newdata works for intercept only model", { SW(fit_intercept <- stan_glm(mpg ~ 1, data = mtcars, refresh = 0, iter = 50, chains = 1)) nd0 <- data.frame() nd1 <- data.frame(row.names = 1) nd2 <- data.frame(row.names = 1:2) expect_equal(ncol(posterior_predict(fit_intercept, newdata = nd1)), 1) expect_equal(ncol(posterior_predict(fit_intercept, newdata = nd2)), 2) expect_error(posterior_predict(fit_intercept, data.frame()), "must have more than 0 rows") expect_equal(ncol(posterior_epred(fit_intercept, newdata = nd1)), 1) expect_equal(ncol(posterior_epred(fit_intercept, newdata = nd2)), 2) expect_error(posterior_epred(fit_intercept, data.frame()), "must have more than 0 rows") }) test_that("posterior_predict can handle empty interaction levels", { d1 <- expand.grid(group1 = c("A", "B"), group2 = c("a", "b", "c"))[1:5,] d1$y <- c(0, 1, 0, 1, 0) SW(fit <- rstanarm::stan_glm(y ~ group1:group2, data = d1, family = "binomial", refresh = 0, iter = 20, chains = 1)) expect_silent(ppd <- posterior_predict(fit)) expect_equal(dim(ppd), c(10, 5)) # make sure it can handle this in newdata even if not a problem in original data d2 <- expand.grid(group1 = c("A", "B"), group2 = c("a", "b", "c"))[1:6,] d2$y <- c(0, 1, 0, 1, 0, 0) SW(fit <- rstanarm::stan_glm(y ~ group1:group2, data = d2, family = "binomial", refresh = 0, iter = 20, chains = 1)) expect_silent(posterior_predict(fit)) expect_silent(posterior_predict(fit, newdata = d1)) # make sure it doesn't drop repeated rows in newdata nd <- data.frame(group1 = c("A", "A"), group2 = c("a", "a")) expect_silent(ppd <- posterior_predict(fit, newdata = nd)) expect_equal(ncol(ppd), nrow(nd)) expect_silent(ppd <- posterior_predict(fit, newdata = nd[1, ])) expect_equal(ncol(ppd), 1) }) # helper functions -------------------------------------------------------- context("posterior_predict helper functions") test_that("pp_binomial_trials works", { ppbt <- rstanarm:::pp_binomial_trials # binomial expect_equal(ppbt(example_model), cbpp$size) expect_equal(ppbt(example_model, newdata = cbpp[1:5, ]), cbpp[1:5, "size"]) # bernoulli SW(fit <- stan_glm(I(mpg > 25) ~ wt, data = mtcars, family = binomial, iter = ITER, refresh = 0, chains = CHAINS, seed = SEED)) expect_equal(ppbt(fit), rep(1, nrow(mtcars))) # expect_equal(ppbt(fit, newdata = mtcars[1:5, ]), rep(1, 5)) }) rstanarm/tests/testthat.R0000644000176200001440000000223114551535021015225 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. library(testthat) suppressPackageStartupMessages(library(rstanarm)) Sys.unsetenv("R_TESTS") o <- utils::capture.output(example(example_model, echo = FALSE)) if (.Platform$OS.type != "windows") { # || .Platform$r_arch != "i386" test_check("rstanarm", invert = FALSE, filter = if (Sys.getenv("NOT_CRAN") != "true") "stan_functions") } rstanarm/src/0000755000176200001440000000000014551551771012703 5ustar liggesusersrstanarm/src/init.cpp0000644000176200001440000000271014370470372014346 0ustar liggesusers// This file is part of RStanArm // Copyright (C) 2017 Trustees of Columbia University // // RStan is free software; you can redistribute it and/or // modify it under the terms of the GNU General Public License // as published by the Free Software Foundation; either version 3 // of the License, or (at your option) any later version. // // RStan is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. /* * To register the functions implemented in C++, see * https://cran.r-project.org/doc/manuals/R-exts.html#Registering-native-routines * * But it seems not to work as it is supposed to be in that * they are still working if not registered. */ #include #include #include #include #include static const R_CallMethodDef CallEntries[] = { {NULL, NULL, 0} }; void attribute_visible R_init_rstanarm(DllInfo *dll) { // next line is necessary to avoid a NOTE from R CMD check R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); R_useDynamicSymbols(dll, TRUE); // necessary for .onLoad() to work } rstanarm/src/rstanarm-win.def0000644000176200001440000000004014370470372015773 0ustar liggesusersLIBRARY rstanarm.dll EXPORTS rstanarm/src/Makevars0000644000176200001440000000221314551535215014370 0ustar liggesusersSTANHEADERS_SRC = $(shell "$(R_HOME)/bin$(R_ARCH_BIN)/Rscript" -e "message()" -e "cat(system.file('include', 'src', package = 'StanHeaders', mustWork = TRUE))" -e "message()" | grep "StanHeaders") STANC_FLAGS = -DUSE_STANC3 PKG_CPPFLAGS = -I"../inst/include" -I"$(STANHEADERS_SRC)" -DBOOST_DISABLE_ASSERTS -DEIGEN_NO_DEBUG -DBOOST_MATH_OVERFLOW_ERROR_POLICY=errno_on_error $(STANC_FLAGS) -D_HAS_AUTO_PTR_ETC=0 PKG_CXXFLAGS = $(shell "$(R_HOME)/bin$(R_ARCH_BIN)/Rscript" -e "RcppParallel::CxxFlags()") $(shell "$(R_HOME)/bin$(R_ARCH_BIN)/Rscript" -e "StanHeaders:::CxxFlags()") $(LTO) PKG_LIBS = $(shell "$(R_HOME)/bin$(R_ARCH_BIN)/Rscript" -e "RcppParallel::RcppParallelLibs()") $(shell "$(R_HOME)/bin$(R_ARCH_BIN)/Rscript" -e "StanHeaders:::LdFlags()") SHLIB_LDFLAGS = $(SHLIB_CXXLDFLAGS) SHLIB_LD = $(SHLIB_CXXLD) CXX_STD = CXX17 SOURCES = $(wildcard stan_files/*.stan) OBJECTS = $(SOURCES:.stan=.o) init.o all: $(SHLIB) clean: RM -rf stan_files/*.o RM -rf *.so *.o RM -rf stan_files/*.cc RM -rf stan_files/*.hpp %.cc: %.stan "$(R_HOME)/bin$(R_ARCH_BIN)/Rscript" -e "source(file.path('..', 'tools', 'make_cc.R')); make_cc(commandArgs(TRUE))" $< rstanarm/src/Makevars.win0000644000176200001440000000232214500262162015155 0ustar liggesusersSTANHEADERS_SRC = $(shell "$(R_HOME)/bin$(R_ARCH_BIN)/Rscript" -e "message()" -e "cat(system.file('include', 'src', package = 'StanHeaders', mustWork = TRUE))" -e "message()" | grep "StanHeaders") STANC_FLAGS = -DUSE_STANC3 PKG_CPPFLAGS = -I"../inst/include" -I"$(STANHEADERS_SRC)" -DBOOST_DISABLE_ASSERTS -DEIGEN_NO_DEBUG -DRCPP_PARALLEL_USE_TBB=1 $(STANC_FLAGS) -D_HAS_AUTO_PTR_ETC=0 PKG_CXXFLAGS = $(shell "$(R_HOME)/bin$(R_ARCH_BIN)/Rscript" -e "RcppParallel::CxxFlags()") $(shell "$(R_HOME)/bin$(R_ARCH_BIN)/Rscript" -e "StanHeaders:::CxxFlags()") PKG_LIBS = $(shell "$(R_HOME)/bin$(R_ARCH_BIN)/Rscript" -e "RcppParallel::RcppParallelLibs()") $(shell "$(R_HOME)/bin$(R_ARCH_BIN)/Rscript" -e "StanHeaders:::LdFlags()") PKG_CXXFLAGS += -flto=jobserver PKG_LIBS += -Wl,--allow-multiple-definition CXX_STD = CXX17 SOURCES = $(wildcard stan_files/*.stan) OBJECTS = $(SOURCES:.stan=.o) init.o AR = gcc-ar -m$(WIN) NM = gcc-nm -m$(WIN) RANLIB = gcc-ranlib -m$(WIN) all: $(SHLIB) clean: RM -rf stan_files/*.o RM -rf *.so *.o RM -rf stan_files/*.cc RM -rf stan_files/*.hpp %.cc: %.stan "$(R_HOME)/bin$(R_ARCH_BIN)/Rscript" -e "source(file.path('..', 'tools', 'make_cc.R')); make_cc(commandArgs(TRUE))" $< .phony: clean rstanarm/src/stan_files/0000755000176200001440000000000014551552005015021 5ustar liggesusersrstanarm/src/stan_files/polr.stan0000644000176200001440000002140714551535205016673 0ustar liggesusers#include /pre/Columbia_copyright.stan #include /pre/license.stan // GLM for an ordinal outcome with coherent priors functions { /** * Evaluate a given CDF * * @param x The point to evaluate the CDF_polr at * @param link An integer indicating the link function * @return A scalar on (0,1) */ real CDF_polr(real x, int link) { // links in MASS::polr() are in a different order than binomial() // logistic, probit, loglog, cloglog, cauchit if (link == 1) return exp(log_inv_logit(x)); else if (link == 2) return exp(std_normal_lcdf(x|)); else if (link == 3) return exp(gumbel_lcdf(x | 0, 1)); else if (link == 4) return inv_cloglog(x); else if (link == 5) return exp(cauchy_lcdf(x | 0, 1)); else reject("Invalid link"); return x; // never reached } /** * Pointwise (pw) log-likelihood vector * * @param y The integer outcome variable. * @param eta A vector of linear predictors * @param cutpoints An ordered vector of cutpoints * @param link An integer indicating the link function * @return A vector of log-likelihods */ vector pw_polr(array[] int y, vector eta, vector cutpoints, int link, real alpha) { int N = rows(eta); int J = rows(cutpoints) + 1; vector[N] ll; if (link < 1 || link > 5) reject("Invalid link"); if (alpha == 1) for (n in 1 : N) { if (y[n] == 1) ll[n] = CDF_polr(cutpoints[1] - eta[n], link); else if (y[n] == J) ll[n] = 1 - CDF_polr(cutpoints[J - 1] - eta[n], link); else ll[n] = CDF_polr(cutpoints[y[n]] - eta[n], link) - CDF_polr(cutpoints[y[n] - 1] - eta[n], link); } else for (n in 1 : N) { if (y[n] == 1) ll[n] = CDF_polr(cutpoints[1] - eta[n], link) ^ alpha; else if (y[n] == J) ll[n] = 1 - CDF_polr(cutpoints[J - 1] - eta[n], link) ^ alpha; else reject("alpha not allowed with more than 2 outcome categories"); } return log(ll); } /** * Map from conditional probabilities to cutpoints * * @param probabilities A J-simplex * @param scale A positive scalar * @param link An integer indicating the link function * @return A vector of length J - 1 whose elements are in increasing order */ vector make_cutpoints(vector probabilities, real scale, int link) { int C = rows(probabilities) - 1; vector[C] cutpoints; real running_sum = 0; // links in MASS::polr() are in a different order than binomial() // logistic, probit, loglog, cloglog, cauchit if (link == 1) for (c in 1 : C) { running_sum += probabilities[c]; cutpoints[c] = logit(running_sum); } else if (link == 2) for (c in 1 : C) { running_sum += probabilities[c]; cutpoints[c] = inv_Phi(running_sum); } else if (link == 3) for (c in 1 : C) { running_sum += probabilities[c]; cutpoints[c] = -log(-log(running_sum)); } else if (link == 4) for (c in 1 : C) { running_sum += probabilities[c]; cutpoints[c] = log(-log1m(running_sum)); } else if (link == 5) for (c in 1 : C) { running_sum += probabilities[c]; cutpoints[c] = tan(pi() * (running_sum - 0.5)); } else reject("invalid link"); return scale * cutpoints; } /** * Randomly draw a value for utility * * @param low A scalar lower bound * @param high A scalar upper bound * @param eta A scalar linear predictor * @param link An integer indicating the link function * @return A scalar from the appropriate conditional distribution */ real draw_ystar_rng(real low, real high, real eta, int link) { int iter = 0; real ystar = not_a_number(); if (low >= high) reject("low must be less than high"); // links in MASS::polr() are in a different order than binomial() // logistic, probit, loglog, cloglog, cauchit if (link == 1) while (!(ystar > low && ystar < high)) ystar = logistic_rng(eta, 1); else if (link == 2) while (!(ystar > low && ystar < high)) ystar = normal_rng(eta, 1); else if (link == 3) while (!(ystar > low && ystar < high)) ystar = gumbel_rng(eta, 1); else if (link == 4) while (!(ystar > low && ystar < high)) ystar = log(-log1m(uniform_rng( 0, 1))); else if (link == 5) while (!(ystar > low && ystar < high)) ystar = cauchy_rng(eta, 1); else reject("invalid link"); return ystar; } } data { // declares N, K, X, xbar, dense_X, nnz_x, w_x, v_x, u_x #include /data/NKX.stan int J; // number of outcome categories, which typically is > 2 array[N] int y; // ordinal outcome // declares prior_PD, has_intercept, link, prior_dist, prior_dist_for_intercept #include /data/data_glm.stan // declares has_weights, weights, has_offset, offset #include /data/weights_offset.stan // hyperparameter values real regularization; vector[J] prior_counts; int is_skewed; real shape; real rate; int do_residuals; } transformed data { real half_K = 0.5 * K; real sqrt_Nm1 = sqrt(N - 1.0); int is_constant = 1; vector[0] beta_smooth; // not used for (j in 1 : J) if (prior_counts[j] != 1) is_constant = 0; } parameters { simplex[J] pi; // avoid error by making unit_vector have 2 elements when K <= 1 // https://github.com/stan-dev/rstanarm/issues/603#issuecomment-1785928224 array[K > 1] unit_vector[K > 1 ? K : 2] u; real 1 ? 0 : -1), upper=1> R2; array[is_skewed] real alpha; } transformed parameters { vector[K] beta; vector[J - 1] cutpoints; { real Delta_y; if (K > 1) { Delta_y = inv_sqrt(1 - R2); beta = u[1] * sqrt(R2) * Delta_y * sqrt_Nm1; } else { Delta_y = inv_sqrt(1 - square(R2)); beta[1] = R2 * Delta_y * sqrt_Nm1; } cutpoints = make_cutpoints(pi, Delta_y, link); } } model { if (prior_PD == 0) { #include /model/make_eta.stan if (has_weights == 0) { // unweighted log-likelihoods if (is_skewed == 0) target += pw_polr(y, eta, cutpoints, link, 1.0); else target += pw_polr(y, eta, cutpoints, link, alpha[1]); } else { // weighted log-likelihoods if (is_skewed == 0) target += dot_product(weights, pw_polr(y, eta, cutpoints, link, 1.0)); else target += dot_product(weights, pw_polr(y, eta, cutpoints, link, alpha[1])); } } if (is_constant == 0) target += dirichlet_lpdf(pi | prior_counts); // implicit: u is uniform on the surface of a hypersphere if (prior_dist == 1) { if (K > 1) target += beta_lpdf(R2 | half_K, regularization); else target += beta_lpdf(square(R2) | half_K, regularization) + log(abs(R2)); } if (is_skewed == 1) target += gamma_lpdf(alpha | shape, rate); } generated quantities { vector[J > 2 ? J : 1] mean_PPD = rep_vector(0, J > 2 ? J : 1); vector[do_residuals ? N : 0] residuals; vector[J - 1] zeta; // xbar is actually post multiplied by R^-1 if (dense_X) zeta = cutpoints + dot_product(xbar, beta); else zeta = cutpoints; if (J == 2) zeta *= -1.0; { #include /model/make_eta.stan for (n in 1 : N) { int y_tilde; vector[J] theta; real previous; real first = CDF_polr(cutpoints[1] - eta[n], link); previous = first; if (is_skewed) theta[1] = first ^ alpha[1]; else theta[1] = first; for (j in 2 : (J - 1)) { real current = CDF_polr(cutpoints[j] - eta[n], link); theta[j] = current - previous; previous = current; } if (is_skewed == 0) theta[J] = 1 - previous; else theta[J] = 1 - previous ^ alpha[1]; if (previous <= 0 || previous >= 1) { // do nothing } else if (J == 2) { mean_PPD[1] += bernoulli_rng(theta[J]); } else { y_tilde = categorical_rng(theta); mean_PPD[y_tilde] += 1; } if (do_residuals) { real ystar; if (y[n] == 1) ystar = draw_ystar_rng(negative_infinity(), cutpoints[1], eta[n], link); else if (y[n] == J) ystar = draw_ystar_rng(cutpoints[J - 1], positive_infinity( ), eta[n], link); else ystar = draw_ystar_rng(cutpoints[y[n] - 1], cutpoints[y[n]], eta[n], link); residuals[n] = ystar - eta[n]; } } mean_PPD /= (N + 0.0); } } rstanarm/src/stan_files/count.stan0000644000176200001440000001654714500256225017054 0ustar liggesusers#include /pre/Columbia_copyright.stan #include /pre/license.stan // GLM for a count outcome functions { #include /functions/common_functions.stan #include /functions/count_likelihoods.stan } data { // declares N, K, X, xbar, dense_X, nnz_x, w_x, v_x, u_x #include /data/NKX.stan array[N] int y; // count outcome // declares prior_PD, has_intercept, link, prior_dist, prior_dist_for_intercept #include /data/data_glm.stan // declares has_weights, weights, has_offset, offset_ #include /data/weights_offset.stan int family; // 6 poisson, 7 neg-binom, (8 poisson with gamma noise at some point?) // declares prior_{mean, scale, df}, prior_{mean, scale, df}_for_intercept, prior_{mean, scale, df}_for_aux #include /data/hyperparameters.stan // declares t, p[t], l[t], q, len_theta_L, shape, scale, {len_}concentration, {len_}regularization #include /data/glmer_stuff.stan // declares num_not_zero, w, v, u #include /data/glmer_stuff2.stan } transformed data { real poisson_max = pow(2.0, 30.0); array[special_case ? t : 0, N] int V = make_V(N, special_case ? t : 0, v); int can_do_countlogglm = K != 0 && // remove K!=0 after rstan includes this Stan bugfix: https://github.com/stan-dev/math/issues/1398 link == 1 && prior_PD == 0 && dense_X == 1 && has_weights == 0 && t == 0; matrix[can_do_countlogglm ? N : 0, can_do_countlogglm ? K + K_smooth : 0] XS; // defines hs, len_z_T, len_var_group, delta, pos #include /tdata/tdata_glm.stan if (can_do_countlogglm) { XS = K_smooth > 0 ? append_col(X[1], S) : X[1]; } } parameters { array[has_intercept] real gamma; // declares z_beta, global, local, z_b, z_T, rho, zeta, tau #include /parameters/parameters_glm.stan array[family > 6] real aux_unscaled; array[family == 8] vector[N] noise; // do not store this } transformed parameters { real aux = negative_infinity(); // be careful with this in the family = 6 case // defines beta, b, theta_L #include /tparameters/tparameters_glm.stan if (family > 6 && (prior_dist_for_aux == 0 || prior_scale_for_aux <= 0)) aux = aux_unscaled[1]; else if (family > 6) { aux = prior_scale_for_aux * aux_unscaled[1]; if (prior_dist_for_aux <= 2) // normal or student_t aux += prior_mean_for_aux; } if (t > 0) { if (special_case == 1) { int start = 1; theta_L = scale .* (family == 6 ? tau : tau * aux); if (t == 1) b = theta_L[1] * z_b; else for (i in 1 : t) { int end = start + l[i] - 1; b[start : end] = theta_L[i] * z_b[start : end]; start = end + 1; } } else { if (family == 6) theta_L = make_theta_L(len_theta_L, p, 1.0, tau, scale, zeta, rho, z_T); else theta_L = make_theta_L(len_theta_L, p, aux, tau, scale, zeta, rho, z_T); b = make_b(z_b, theta_L, p, l); } } } model { if (can_do_countlogglm) { vector[K + K_smooth] coeff = K_smooth > 0 ? append_row(beta, beta_smooth) : beta; if (family != 7) { if (has_offset) { target += poisson_log_glm_lpmf(y | XS, has_intercept ? offset_ + gamma[1] : offset_, coeff); } else { target += poisson_log_glm_lpmf(y | XS, has_intercept ? gamma[1] : 0.0, coeff); } } else { if (has_offset) { target += neg_binomial_2_log_glm_lpmf(y | XS, has_intercept ? offset_ + gamma[1] : offset_, coeff, aux); } else { target += neg_binomial_2_log_glm_lpmf(y | XS, has_intercept ? gamma[1] : 0.0, coeff, aux); } } } else if (prior_PD == 0) { #include /model/make_eta.stan if (t > 0) { #include /model/eta_add_Zb.stan } if (has_intercept == 1) { if (link == 1) eta += gamma[1]; else eta += gamma[1] - min(eta); } else { #include /model/eta_no_intercept.stan } if (family == 8) { if (link == 1) eta += log(aux) + log(noise[1]); else if (link == 2) { eta *= aux; eta .*= noise[1]; } else eta += sqrt(aux) + sqrt(noise[1]); } // Log-likelihood if (has_weights == 0) { // unweighted log-likelihoods if (family != 7) { if (link == 1) target += poisson_log_lpmf(y | eta); else target += poisson_lpmf(y | linkinv_count(eta, link)); } else { if (link == 1) target += neg_binomial_2_log_lpmf(y | eta, aux); else target += neg_binomial_2_lpmf(y | linkinv_count(eta, link), aux); } } else if (family != 7) target += dot_product(weights, pw_pois(y, eta, link)); else target += dot_product(weights, pw_nb(y, eta, aux, link)); } // Log-prior for aux if (family > 6 && prior_dist_for_aux > 0 && prior_scale_for_aux > 0) { real log_half = -0.693147180559945286; if (prior_dist_for_aux == 1) target += normal_lpdf(aux_unscaled | 0, 1) - log_half; else if (prior_dist_for_aux == 2) target += student_t_lpdf(aux_unscaled | prior_df_for_aux, 0, 1) - log_half; else target += exponential_lpdf(aux_unscaled | 1); } #include /model/priors_glm.stan // Log-prior for noise if (family == 8) target += gamma_lpdf(noise[1] | aux, 1); if (t > 0) { target += decov_lpdf(z_b | z_T, rho, zeta, tau, regularization, delta, shape, t, p); } } generated quantities { real mean_PPD = compute_mean_PPD ? 0 : negative_infinity(); array[has_intercept] real alpha; if (has_intercept == 1) { if (dense_X) alpha[1] = gamma[1] - dot_product(xbar, beta); else alpha[1] = gamma[1]; } if (compute_mean_PPD) { vector[N] nu; #include /model/make_eta.stan if (t > 0) { #include /model/eta_add_Zb.stan } if (has_intercept == 1) { if (link == 1) eta += gamma[1]; else { real shift = min(eta); eta += gamma[1] - shift; alpha[1] -= shift; } } else { #include /model/eta_no_intercept.stan } if (family == 8) { if (link == 1) eta += log(aux) + log(noise[1]); else if (link == 2) { eta *= aux; eta .*= noise[1]; } else eta += sqrt(aux) + sqrt(noise[1]); } nu = linkinv_count(eta, link); if (family != 7) for (n in 1 : N) { if (nu[n] < poisson_max) mean_PPD += poisson_rng(nu[n]); else mean_PPD += normal_rng(nu[n], sqrt(nu[n])); } else for (n in 1 : N) { real gamma_temp; if (is_inf(aux)) gamma_temp = nu[n]; else gamma_temp = gamma_rng(aux, aux / nu[n]); if (gamma_temp < poisson_max) mean_PPD += poisson_rng(gamma_temp); else mean_PPD += normal_rng(gamma_temp, sqrt(gamma_temp)); } mean_PPD /= N; } } rstanarm/src/stan_files/gqs/0000755000176200001440000000000014500256225015612 5ustar liggesusersrstanarm/src/stan_files/gqs/gen_quantities_mvmer.stan0000644000176200001440000000564414500256225022737 0ustar liggesusers array[M] real mean_PPD; array[intercept_type[1] > 0] real yAlpha1; array[intercept_type[2] > 0] real yAlpha2; array[intercept_type[3] > 0] real yAlpha3; vector[prior_dist_for_cov == 2 && bK1 > 0 ? size(bCov1_idx) : 0] bCov1; vector[prior_dist_for_cov == 2 && bK2 > 0 ? size(bCov2_idx) : 0] bCov2; vector[bN1 * bK1] b1 = to_vector(bMat1'); // ensures same order as stan_glmer (make_b) vector[bN2 * bK2] b2 = to_vector(bMat2'); // Evaluate mean_PPD { int bMat1_colshift = 0; // column shift in bMat1 int bMat2_colshift = 0; // column shift in bMat2 // Linear predictor for submodel 1 if (M > 0) { vector[yNeta[1]] yEta1 = evaluate_mu( // linear predictor evaluate_eta(yX1, y1_Z1, y1_Z2, y1_Z1_id, y1_Z2_id, yGamma1, yBeta1, bMat1, bMat2, bMat1_colshift, bMat2_colshift, intercept_type[1], y1_offset), family[1], link[1]); mean_PPD[1] = mean_PPD_rng(yEta1, yAux1, family[1]); } // Linear predictor for submodel 2 if (M > 1) { vector[yNeta[2]] yEta2; bMat1_colshift += bK1_len[1]; bMat2_colshift += bK2_len[1]; yEta2 = evaluate_mu(evaluate_eta(yX2, y2_Z1, y2_Z2, y2_Z1_id, y2_Z2_id, yGamma2, yBeta2, bMat1, bMat2, bMat1_colshift, bMat2_colshift, intercept_type[2], y2_offset), family[2], link[2]); mean_PPD[2] = mean_PPD_rng(yEta2, yAux2, family[2]); } // Linear predictor for submodel 3 if (M > 2) { vector[yNeta[3]] yEta3; bMat1_colshift += bK1_len[2]; bMat2_colshift += bK2_len[2]; yEta3 = evaluate_mu(evaluate_eta(yX3, y3_Z1, y3_Z2, y3_Z1_id, y3_Z2_id, yGamma3, yBeta3, bMat1, bMat2, bMat1_colshift, bMat2_colshift, intercept_type[3], y3_offset), family[3], link[3]); mean_PPD[3] = mean_PPD_rng(yEta3, yAux3, family[3]); } } // Transform intercept parameters if (intercept_type[1] > 0) yAlpha1[1] = yGamma1[1] - dot_product(yXbar1, yBeta1); if (M > 1 && intercept_type[2] > 0) yAlpha2[1] = yGamma2[1] - dot_product(yXbar2, yBeta2); if (M > 2 && intercept_type[3] > 0) yAlpha3[1] = yGamma3[1] - dot_product(yXbar3, yBeta3); // Transform variance-covariance matrices // Grouping factor 1 if (prior_dist_for_cov == 2 && bK1 == 1) { bCov1[1] = bSd1[1] * bSd1[1]; } else if (prior_dist_for_cov == 2 && bK1 > 1) { bCov1 = to_vector(quad_form_diag( multiply_lower_tri_self_transpose(bCholesky1), bSd1))[bCov1_idx]; } // Grouping factor 2 if (prior_dist_for_cov == 2 && bK2 == 1) { bCov2[1] = bSd2[1] * bSd2[1]; } else if (prior_dist_for_cov == 2 && bK2 > 1) { bCov2 = to_vector(quad_form_diag( multiply_lower_tri_self_transpose(bCholesky2), bSd2))[bCov2_idx]; } rstanarm/src/stan_files/parameters/0000755000176200001440000000000014500256225017163 5ustar liggesusersrstanarm/src/stan_files/parameters/parameters_event.stan0000644000176200001440000000125314500256225023417 0ustar liggesusers array[e_has_intercept] real e_gamma; // intercept for event submodel vector[e_K] e_z_beta; // primitive log hazard ratios // unscaled basehaz params, either: // - weibull shape parameter // - b-spline coefs on log basehaz // - coefs for piecewise constant basehaz vector[basehaz_df] e_aux_unscaled; // parameters for priors on log haz ratios array[e_hs] real e_global; array[e_hs] vector[(e_hs>0)*e_K] e_local; array[e_hs > 0] real e_caux; array[e_prior_dist == 5 || e_prior_dist == 6] vector[e_K] e_mix; array[e_prior_dist == 6] real e_ool; rstanarm/src/stan_files/parameters/parameters_glm.stan0000644000176200001440000000104214500256225023051 0ustar liggesusers vector[prior_dist == 7 ? sum(num_normals) : K] z_beta; vector[K_smooth] z_beta_smooth; vector[K_smooth > 0 ? smooth_map[K_smooth] : 0] smooth_sd_raw; array[hs] real global; array[hs] vector[K] local; array[hs > 0] real caux; array[prior_dist == 5 || prior_dist == 6] vector[K] mix; array[prior_dist == 6] real one_over_lambda; vector[q] z_b; vector[len_z_T] z_T; vector[len_rho] rho; vector[len_concentration] zeta; vector[t] tau; rstanarm/src/stan_files/parameters/parameters_mvmer.stan0000644000176200001440000000507614500256225023433 0ustar liggesusers // intercepts array[intercept_type[1] > 0] real yGamma1; array[intercept_type[2] > 0] real yGamma2; array[intercept_type[3] > 0] real yGamma3; // population level primitive params vector[yK[1]] z_yBeta1; vector[yK[2]] z_yBeta2; vector[yK[3]] z_yBeta3; // group level params, decov prior vector[prior_dist_for_cov == 1 ? q : 0] z_b; vector[prior_dist_for_cov == 1 ? len_z_T : 0] z_T; vector[prior_dist_for_cov == 1 ? len_rho : 0] rho; vector[prior_dist_for_cov == 1 ? len_concentration : 0] zeta; vector[prior_dist_for_cov == 1 ? t : 0] tau; // group level params for first grouping factor // group-level sds vector[prior_dist_for_cov == 2 ? bK1 : 0] bSd1; // unscaled group-level params matrix[prior_dist_for_cov == 2 && bK1 > 0 ? bK1 : 0, bK1 > 0 ? bN1 : 0] z_bMat1; // cholesky factor of corr matrix (if > 1 random effect) cholesky_factor_corr[prior_dist_for_cov == 2 && bK1 > 1 ? bK1 : 0] bCholesky1; // group level params for second grouping factor // group-level sds vector[prior_dist_for_cov == 2 ? bK2 : 0] bSd2; // unscaled group-level params matrix[prior_dist_for_cov == 2 && bK2 > 0 ? bK2 : 0, bK2 > 0 ? bN2 : 0] z_bMat2; // cholesky factor of corr matrix (if > 1 random effect) cholesky_factor_corr[prior_dist_for_cov == 2 && bK2 > 1 ? bK2 : 0] bCholesky2; // auxiliary params, interpretation depends on family array[has_aux[1]] real yAux1_unscaled; array[has_aux[2]] real yAux2_unscaled; array[has_aux[3]] real yAux3_unscaled; // params for priors array[yHs1] real yGlobal1; array[yHs2] real yGlobal2; array[yHs3] real yGlobal3; array[yHs1] vector[yK[1]] yLocal1; array[yHs2] vector[yK[2]] yLocal2; array[yHs3] vector[yK[3]] yLocal3; array[yHs1 > 0] real y_caux1; array[yHs2 > 0] real y_caux2; array[yHs3 > 0] real y_caux3; array[y_prior_dist[1] == 6] real yOol1; // one_over_lambda array[y_prior_dist[2] == 6] real yOol2; array[y_prior_dist[3] == 6] real yOol3; array[y_prior_dist[1] == 5 || y_prior_dist[1] == 6] vector[yK[1]] yMix1; array[y_prior_dist[2] == 5 || y_prior_dist[2] == 6] vector[yK[2]] yMix2; array[y_prior_dist[3] == 5 || y_prior_dist[3] == 6] vector[yK[3]] yMix3; rstanarm/src/stan_files/parameters/parameters_assoc.stan0000644000176200001440000000053514500256225023410 0ustar liggesusers vector[a_K] a_z_beta; // primitive assoc params // parameters for priors on assoc params array[a_hs] real a_global; array[a_hs] vector[(a_hs>0)*a_K] a_local; array[a_hs > 0] real a_caux; array[a_prior_dist == 5 || a_prior_dist == 6] vector[a_K] a_mix; array[a_prior_dist == 6] real a_ool; rstanarm/src/stan_files/parameters/parameters_betareg.stan0000644000176200001440000000072714500256225023714 0ustar liggesusers vector[prior_dist_z == 7 ? sum(num_normals_z) : z_dim] z_omega; // betareg z variable coefficients array[has_intercept_z] real gamma_z; // betareg intercept array[hs_z] real global_z; array[hs_z] vector[z_dim] local_z; array[hs_z > 0] real caux_z; array[prior_dist_z == 5 || prior_dist_z == 6] vector[z_dim] S_z; array[prior_dist_z == 6] real one_over_lambda_z; rstanarm/src/stan_files/tparameters/0000755000176200001440000000000014500256225017347 5ustar liggesusersrstanarm/src/stan_files/tparameters/tparameters_glm.stan0000644000176200001440000000327414370470367023444 0ustar liggesusers vector[K] beta; vector[K_smooth] beta_smooth; vector[K_smooth > 0 ? smooth_map[K_smooth] : 0] smooth_sd; vector[q] b; vector[len_theta_L] theta_L; if (prior_dist == 0) beta = z_beta; else if (prior_dist == 1) beta = z_beta .* prior_scale + prior_mean; else if (prior_dist == 2) for (k in 1:K) { beta[k] = CFt(z_beta[k], prior_df[k]) * prior_scale[k] + prior_mean[k]; } else if (prior_dist == 3) { real c2 = square(slab_scale) * caux[1]; if (is_continuous == 1 && family == 1) beta = hs_prior(z_beta, global, local, global_prior_scale, aux, c2); else beta = hs_prior(z_beta, global, local, global_prior_scale, 1, c2); } else if (prior_dist == 4) { real c2 = square(slab_scale) * caux[1]; if (is_continuous == 1 && family == 1) beta = hsplus_prior(z_beta, global, local, global_prior_scale, aux, c2); else beta = hsplus_prior(z_beta, global, local, global_prior_scale, 1, c2); } else if (prior_dist == 5) // laplace beta = prior_mean + prior_scale .* sqrt(2 * mix[1]) .* z_beta; else if (prior_dist == 6) // lasso beta = prior_mean + one_over_lambda[1] * prior_scale .* sqrt(2 * mix[1]) .* z_beta; else if (prior_dist == 7) { // product_normal int z_pos = 1; for (k in 1:K) { beta[k] = z_beta[z_pos]; z_pos += 1; for (n in 2:num_normals[k]) { beta[k] *= z_beta[z_pos]; z_pos += 1; } beta[k] *= prior_scale[k] ^ num_normals[k]; beta[k] += prior_mean[k]; } } if (K_smooth) { smooth_sd = prior_mean_for_smooth + prior_scale_for_smooth .* smooth_sd_raw; if (is_continuous && family == 1) smooth_sd *= aux; beta_smooth = z_beta_smooth .* smooth_sd[smooth_map]; } rstanarm/src/stan_files/tparameters/tparameters_betareg.stan0000644000176200001440000000222313365374540024266 0ustar liggesusers if (prior_dist_z == 0) omega = z_omega; else if (prior_dist_z == 1) omega = z_omega .* prior_scale_z + prior_mean_z; else if (prior_dist_z == 2) for (k in 1:z_dim) { real left = CFt(omega[k], prior_df_z[k]); omega[k] = left * prior_scale_z[k] + prior_mean_z[k]; } else if (prior_dist_z == 3) omega = hs_prior(z_omega, global_z, local_z, global_prior_scale, 1, square(slab_scale_z) * caux_z[1]); else if (prior_dist_z == 4) omega = hsplus_prior(z_omega, global_z, local_z, global_prior_scale, 1, square(slab_scale_z) * caux_z[1]); else if (prior_dist_z == 5) omega = prior_mean_z + prior_scale_z .* sqrt(2 * S_z[1]) .* z_omega; else if (prior_dist_z == 6) omega = prior_mean_z + one_over_lambda_z[1] * prior_scale_z .* sqrt(2 * S_z[1]) .* z_omega; else if (prior_dist_z == 7) { int z_pos = 1; for (k in 1:z_dim) { omega[k] = z_omega[z_pos]; z_pos += 1; for (n in 2:num_normals_z[k]) { omega[k] *= z_omega[z_pos]; z_pos += 1; } omega[k] *= prior_scale_z[k] ^ num_normals_z[k]; omega[k] += prior_mean_z[k]; } } rstanarm/src/stan_files/tparameters/tparameters_mvmer.stan0000644000176200001440000000606414500256225024001 0ustar liggesusers vector[yK[1]] yBeta1; // population level params vector[yK[2]] yBeta2; vector[yK[3]] yBeta3; array[has_aux[1]] real yAux1; // auxiliary params array[has_aux[2]] real yAux2; array[has_aux[3]] real yAux3; vector[len_theta_L] theta_L; // cov matrix for decov prior real yAuxMaximum = 1.0; // used for scaling in theta_L // group level params matrix[bK1 > 0 ? bN1 : 0, bK1] bMat1; // for grouping factor 1 matrix[bK2 > 0 ? bN2 : 0, bK2] bMat2; // for grouping factor 2 // population level params, auxiliary params if (has_aux[1] == 1) { yAux1[1] = make_aux(yAux1_unscaled[1], y_prior_dist_for_aux[1], y_prior_mean_for_aux[1], y_prior_scale_for_aux[1]); if (yAux1[1] > yAuxMaximum) yAuxMaximum = yAux1[1]; } if (yK[1] > 0) yBeta1 = make_beta(z_yBeta1, y_prior_dist[1], y_prior_mean1, y_prior_scale1, y_prior_df1, y_global_prior_scale[1], yGlobal1, yLocal1, yOol1, yMix1, yAux1, family[1], y_slab_scale[1], y_caux1); if (M > 1) { if (has_aux[2] == 1) { yAux2[1] = make_aux(yAux2_unscaled[1], y_prior_dist_for_aux[2], y_prior_mean_for_aux[2], y_prior_scale_for_aux[2]); if (yAux2[1] > yAuxMaximum) yAuxMaximum = yAux2[1]; } if (yK[2] > 0) yBeta2 = make_beta(z_yBeta2, y_prior_dist[2], y_prior_mean2, y_prior_scale2, y_prior_df2, y_global_prior_scale[2], yGlobal2, yLocal2, yOol2, yMix2, yAux2, family[2], y_slab_scale[2], y_caux2); } if (M > 2) { if (has_aux[3] == 1) { yAux3[1] = make_aux(yAux3_unscaled[1], y_prior_dist_for_aux[3], y_prior_mean_for_aux[3], y_prior_scale_for_aux[3]); if (yAux3[1] > yAuxMaximum) yAuxMaximum = yAux3[1]; } if (yK[3] > 0) yBeta3 = make_beta(z_yBeta3, y_prior_dist[3], y_prior_mean3, y_prior_scale3, y_prior_df3, y_global_prior_scale[3], yGlobal3, yLocal3, yOol3, yMix3, yAux3, family[3], y_slab_scale[3], y_caux3); } // group level params, under decov prior if (prior_dist_for_cov == 1) { int mark = 1; // cov matrix theta_L = make_theta_L(len_theta_L, p, yAuxMaximum, tau, b_prior_scale, zeta, rho, z_T); // group-level params for first grouping factor if (bK1 > 0) bMat1 = make_b_matrix(z_b, theta_L, p, l, 1); // group level params for second grouping factor if (bK2 > 0) bMat2 = make_b_matrix(z_b, theta_L, p, l, 2); } // group-level params, under lkj prior else if (prior_dist_for_cov == 2) { // group-level params for first grouping factor if (bK1 == 1) bMat1 = (bSd1[1] * z_bMat1)'; else if (bK1 > 1) bMat1 = (diag_pre_multiply(bSd1, bCholesky1) * z_bMat1)'; // group level params for second grouping factor if (bK2 == 1) bMat2 = (bSd2[1] * z_bMat2)'; else if (bK2 > 1) bMat2 = (diag_pre_multiply(bSd2, bCholesky2) * z_bMat2)'; } rstanarm/src/stan_files/continuous.stan0000644000176200001440000003604314500256225020123 0ustar liggesusers#include /pre/Columbia_copyright.stan #include /pre/license.stan // GLM for a Gaussian, Gamma, inverse Gaussian, or Beta outcome functions { #include /functions/common_functions.stan #include /functions/continuous_likelihoods.stan #include /functions/SSfunctions.stan /** * Increments the log-posterior with the logarithm of a multivariate normal * likelihood with a scalar standard deviation for all errors * Equivalent to normal_lpdf(y | intercept + X * beta + Z * b, sigma) but faster * @param coeff vector of coefficients (including intercept) * @param OLS precomputed vector of OLS coefficients (including intercept) * @param XtX precomputed matrix equal to crossprod(X) (including intercept) * @param SSR positive precomputed value of the sum of squared OLS residuals * @param sigma positive scalar for the standard deviation of the errors * @param N integer equal to the number of observations */ real mvn_ols_lpdf(vector coeff, vector OLS, matrix XtX, real SSR, real sigma, int N) { return -0.5 * (quad_form(XtX, coeff - OLS) + SSR) / square(sigma) - N * (log(sigma) + log(sqrt(2 * pi()))); } vector CODOLS(matrix X, vector y); // implemented in C++ } data { // declares N, K, X, xbar, dense_X, nnz_x, w_x, v_x, u_x #include /data/NKX.stan int len_y; // length of y real lb_y; // lower bound on y real ub_y; // upper bound on y vector[len_y] y; // continuous outcome int family; // 1 gaussian, 2 gamma, 3 inv-gaussian, 4 beta // declares prior_PD, has_intercept, link, prior_dist, prior_dist_for_intercept #include /data/data_glm.stan // declares has_weights, weights, has_offset, offset #include /data/weights_offset.stan // declares prior_{mean, scale, df}, prior_{mean, scale, df}_for_intercept, prior_{mean, scale, df}_for_aux #include /data/hyperparameters.stan // declares t, p[t], l[t], q, len_theta_L, shape, scale, {len_}concentration, {len_}regularization #include /data/glmer_stuff.stan // declares num_not_zero, w, v, u #include /data/glmer_stuff2.stan #include /data/data_betareg.stan int SSfun; // nonlinear function indicator, 0 for identity vector[SSfun > 0 ? len_y : 0] input; vector[SSfun == 5 ? len_y : 0] Dose; } transformed data { vector[family == 3 ? len_y : 0] sqrt_y; vector[family == 3 ? len_y : 0] log_y; real sum_log_y = family == 1 ? not_a_number() : sum(log(y)); array[special_case ? t : 0, len_y] int V = make_V(len_y, special_case ? t : 0, v); int hs_z; // for tdata_betareg.stan int can_do_OLS = family == 1 && link == 1 && SSfun == 0 && has_offset == 0 && t == 0 && prior_PD == 0 && dense_X && N > 2 && len_y >= (has_intercept + K + K_smooth); vector[can_do_OLS ? has_intercept + K + K_smooth : 0] OLS; matrix[can_do_OLS ? has_intercept + K + K_smooth : 0, can_do_OLS ? has_intercept + K + K_smooth : 0] XtX; int can_do_normalidglm = K != 0 && // remove K!=0 after rstan includes this Stan bugfix: https://github.com/stan-dev/math/issues/1398 can_do_OLS == 0 && family == 1 && link == 1 && SSfun == 0 && has_offset == 0 && dense_X && prior_PD == 0 && t == 0 && len_y < (has_intercept + K + K_smooth); matrix[can_do_normalidglm ? N : 0, can_do_normalidglm ? K + K_smooth : 0] XS; real SSR = not_a_number(); // defines hs, len_z_T, len_var_group, delta, is_continuous, pos #include /tdata/tdata_glm.stan // defines hs_z #include /tdata/tdata_betareg.stan is_continuous = 1; if (family == 3) { sqrt_y = sqrt(y); log_y = log(y); } if (can_do_OLS) { matrix[N, has_intercept + K + K_smooth] X_ = has_intercept ? append_col(rep_vector( 1.0, N), (K_smooth > 0 ? append_col( X[1], S) : X[1])) : (K_smooth > 0 ? append_col(X[1], S) : X[1]); XtX = crossprod(X_); OLS = CODOLS(X_, y); SSR = dot_self(y - X_ * OLS); } if (can_do_normalidglm) { XS = K_smooth > 0 ? append_col(X[1], S) : X[1]; } } parameters { array[has_intercept] real gamma; // declares z_beta, global, local, z_b, z_T, rho, zeta, tau #include /parameters/parameters_glm.stan real aux_unscaled; // interpretation depends on family! #include /parameters/parameters_betareg.stan } transformed parameters { // aux has to be defined first in the hs case real aux = prior_dist_for_aux == 0 ? aux_unscaled : (prior_dist_for_aux <= 2 ? prior_scale_for_aux * aux_unscaled + prior_mean_for_aux : prior_scale_for_aux * aux_unscaled); vector[z_dim] omega; // used in tparameters_betareg.stan // defines beta, b, theta_L #include /tparameters/tparameters_glm.stan #include /tparameters/tparameters_betareg.stan if (prior_dist_for_aux == 0) // none aux = aux_unscaled; else { aux = prior_scale_for_aux * aux_unscaled; if (prior_dist_for_aux <= 2) // normal or student_t aux += prior_mean_for_aux; } if (t > 0) { if (special_case == 1) { int start = 1; theta_L = scale .* tau * aux; if (t == 1) b = theta_L[1] * z_b; else for (i in 1 : t) { int end = start + l[i] - 1; b[start : end] = theta_L[i] * z_b[start : end]; start = end + 1; } } else { theta_L = make_theta_L(len_theta_L, p, aux, tau, scale, zeta, rho, z_T); b = make_b(z_b, theta_L, p, l); } } } model { if (can_do_OLS) { vector[cols(XtX)] coeff = has_intercept ? append_row(to_vector(gamma), (K_smooth > 0 ? append_row(beta, beta_smooth) : beta)) : (K_smooth > 0 ? append_row(beta, beta_smooth) : beta); target += mvn_ols_lpdf(coeff | OLS, XtX, SSR, aux, N); } else if (can_do_normalidglm) { vector[K + K_smooth] coeff = K_smooth > 0 ? append_row(beta, beta_smooth) : beta; target += normal_id_glm_lpdf(y | XS, has_intercept ? gamma[1] : 0.0, coeff, aux); } else if (prior_PD == 0) { vector[link_phi > 0 ? N : 0] eta_z; // beta regression linear predictor for phi #include /model/make_eta.stan if (t > 0) { #include /model/eta_add_Zb.stan } if (has_intercept == 1) { if ((family == 1 || link == 2) || (family == 4 && link != 5)) eta += gamma[1]; else if (family == 4 && link == 5) eta += gamma[1] - max(eta); else eta += gamma[1] - min(eta); } else { #include /model/eta_no_intercept.stan } if (SSfun > 0) { // nlmer matrix[len_y, K] P = reshape_vec(eta, len_y, K); if (SSfun < 5) { if (SSfun <= 2) { if (SSfun == 1) target += normal_lpdf(y | SS_asymp(input, P), aux); else target += normal_lpdf(y | SS_asympOff(input, P), aux); } else if (SSfun == 3) target += normal_lpdf(y | SS_asympOrig(input, P), aux); else { for (i in 1 : len_y) P[i, 1] += exp(P[i, 3]); // ordering constraint target += normal_lpdf(y | SS_biexp(input, P), aux); } } else { if (SSfun <= 7) { if (SSfun == 5) target += normal_lpdf(y | SS_fol(Dose, input, P), aux); else if (SSfun == 6) target += normal_lpdf(y | SS_fpl(input, P), aux); else target += normal_lpdf(y | SS_gompertz(input, P), aux); } else { if (SSfun == 8) target += normal_lpdf(y | SS_logis(input, P), aux); else if (SSfun == 9) target += normal_lpdf(y | SS_micmen(input, P), aux); else target += normal_lpdf(y | SS_weibull(input, P), aux); } } } else if (has_weights == 0) { // unweighted log-likelihoods #include /model/make_eta_z.stan // adjust eta_z according to links if (has_intercept_z == 1) { if (link_phi > 1) { eta_z += gamma_z[1] - min(eta_z); } else { eta_z += gamma_z[1]; } } else { // has_intercept_z == 0 #include /model/eta_z_no_intercept.stan } if (family == 1) { if (link == 1) target += normal_lpdf(y | eta, aux); else if (link == 2) target += normal_lpdf(y | exp(eta), aux); else target += normal_lpdf(y | inv(eta), aux); } else if (family == 2) { target += GammaReg(y, eta, aux, link, sum_log_y); } else if (family == 3) { target += inv_gaussian(y, linkinv_inv_gaussian(eta, link), aux, sum_log_y, sqrt_y); } else if (family == 4 && link_phi == 0) { vector[N] mu; mu = linkinv_beta(eta, link); target += beta_lpdf(y | mu * aux, (1 - mu) * aux); } else if (family == 4 && link_phi > 0) { vector[N] mu; vector[N] mu_z; mu = linkinv_beta(eta, link); mu_z = linkinv_beta_z(eta_z, link_phi); target += beta_lpdf(y | rows_dot_product(mu, mu_z), rows_dot_product(( 1 - mu), mu_z)); } } else { // weighted log-likelihoods vector[N] summands; if (family == 1) summands = pw_gauss(y, eta, aux, link); else if (family == 2) summands = pw_gamma(y, eta, aux, link); else if (family == 3) summands = pw_inv_gaussian(y, eta, aux, link, log_y, sqrt_y); else if (family == 4 && link_phi == 0) summands = pw_beta(y, eta, aux, link); else if (family == 4 && link_phi > 0) summands = pw_beta_z(y, eta, eta_z, link, link_phi); target += dot_product(weights, summands); } } // Log-priors if (prior_dist_for_aux > 0 && prior_scale_for_aux > 0) { real log_half = -0.693147180559945286; if (prior_dist_for_aux == 1) target += normal_lpdf(aux_unscaled | 0, 1) - log_half; else if (prior_dist_for_aux == 2) target += student_t_lpdf(aux_unscaled | prior_df_for_aux, 0, 1) - log_half; else target += exponential_lpdf(aux_unscaled | 1); } #include /model/priors_glm.stan #include /model/priors_betareg.stan if (t > 0) { target += decov_lpdf(z_b | z_T, rho, zeta, tau, regularization, delta, shape, t, p); } } generated quantities { real mean_PPD = compute_mean_PPD ? 0 : negative_infinity(); array[has_intercept] real alpha; array[has_intercept_z] real omega_int; if (has_intercept == 1) { if (dense_X) alpha[1] = gamma[1] - dot_product(xbar, beta); else alpha[1] = gamma[1]; } if (has_intercept_z == 1) { omega_int[1] = gamma_z[1] - dot_product(zbar, omega); // adjust betareg intercept } if (compute_mean_PPD) { vector[N] eta_z; #include /model/make_eta.stan if (t > 0) { #include /model/eta_add_Zb.stan } if (has_intercept == 1) { if (make_lower(family, link) == negative_infinity() && make_upper(family, link) == positive_infinity()) eta += gamma[1]; else if (family == 4 && link == 5) { real max_eta = max(eta); alpha[1] -= max_eta; eta += gamma[1] - max_eta; } else { real min_eta = min(eta); alpha[1] -= min_eta; eta += gamma[1] - min_eta; } } else { #include /model/eta_no_intercept.stan } #include /model/make_eta_z.stan // adjust eta_z according to links if (has_intercept_z == 1) { if (link_phi > 1) { omega_int[1] -= min(eta_z); eta_z += gamma_z[1] - min(eta_z); } else { eta_z += gamma_z[1]; } } else { // has_intercept_z == 0 #include /model/eta_z_no_intercept.stan } if (SSfun > 0) { // nlmer vector[len_y] eta_nlmer; matrix[len_y, K] P; P = reshape_vec(eta, len_y, K); if (SSfun < 5) { if (SSfun <= 2) { if (SSfun == 1) eta_nlmer = SS_asymp(input, P); else eta_nlmer = SS_asympOff(input, P); } else if (SSfun == 3) eta_nlmer = SS_asympOrig(input, P); else eta_nlmer = SS_biexp(input, P); } else { if (SSfun <= 7) { if (SSfun == 5) eta_nlmer = SS_fol(Dose, input, P); else if (SSfun == 6) eta_nlmer = SS_fpl(input, P); else eta_nlmer = SS_gompertz(input, P); } else { if (SSfun == 8) eta_nlmer = SS_logis(input, P); else if (SSfun == 9) eta_nlmer = SS_micmen(input, P); else eta_nlmer = SS_weibull(input, P); } } for (n in 1 : len_y) mean_PPD += normal_rng(eta_nlmer[n], aux); } else if (family == 1) { vector[N] mu = link > 1 ? linkinv_gauss(eta, link) : eta; for (n in 1 : len_y) mean_PPD += normal_rng(mu[n], aux); } else if (family == 2) { vector[N] mu = link > 1 ? linkinv_gamma(eta, link) : eta; for (n in 1 : len_y) mean_PPD += gamma_rng(aux, aux / mu[n]); } else if (family == 3) { vector[N] mu = link > 1 ? linkinv_inv_gaussian(eta, link) : eta; for (n in 1 : len_y) mean_PPD += inv_gaussian_rng(mu[n], aux); } else if (family == 4 && link_phi == 0) { vector[N] mu = linkinv_beta(eta, link); for (n in 1 : N) { real mu_n = mu[n]; if (aux <= 0) mean_PPD += bernoulli_rng(0.5); else if (mu_n >= 1) mean_PPD += 1; else if (mu_n > 0) mean_PPD += beta_rng(mu_n * aux, (1 - mu_n) * aux); } } else if (family == 4 && link_phi > 0) { vector[N] mu = linkinv_beta(eta, link); vector[N] phi = linkinv_beta_z(eta_z, link_phi); for (n in 1 : N) { real mu_n = mu[n]; real aux_n = phi[n]; if (aux_n <= 0) mean_PPD += bernoulli_rng(0.5); else if (mu_n >= 1) mean_PPD += 1; else if (mu_n > 0) mean_PPD += beta_rng(mu_n * aux_n, (1 - mu_n) * aux_n); } } mean_PPD /= len_y; } } rstanarm/src/stan_files/tdata/0000755000176200001440000000000014500256225016115 5ustar liggesusersrstanarm/src/stan_files/tdata/tdata_glm.stan0000644000176200001440000000110614500256225020736 0ustar liggesusers int len_z_T = 0; int len_var_group = sum(p) * (t > 0); int len_rho = sum(p) - t; int is_continuous = 0; // changed in continuous.stan int pos = 1; array[len_concentration] real delta; int hs; if (prior_dist <= 2) hs = 0; else if (prior_dist == 3) hs = 2; else if (prior_dist == 4) hs = 4; else hs = 0; for (i in 1:t) { if (p[i] > 1) { for (j in 1:p[i]) { delta[pos] = concentration[j]; pos += 1; } } for (j in 3:p[i]) len_z_T += p[i] - 1; } rstanarm/src/stan_files/tdata/tdata_mvmer.stan0000644000176200001440000000377514500256225021323 0ustar liggesusers // dimensions for hs priors int yHs1 = get_nvars_for_hs(M > 0 ? y_prior_dist[1] : 0); int yHs2 = get_nvars_for_hs(M > 1 ? y_prior_dist[2] : 0); int yHs3 = get_nvars_for_hs(M > 2 ? y_prior_dist[3] : 0); // data for decov prior int len_z_T = 0; int len_var_group = sum(p) * (t > 0); int len_rho = sum(p) - t; int pos = 1; array[len_concentration] real delta; // data for lkj prior array[prior_dist_for_cov == 2 ? (bK1 + choose(bK1, 2)) : 0] int bCov1_idx; array[prior_dist_for_cov == 2 ? (bK2 + choose(bK2, 2)) : 0] int bCov2_idx; // transformations of data real sum_log_y1 = M > 0 && (family[1] == 2 || family[1] == 3) ? sum(log(yReal1)) : not_a_number(); real sum_log_y2 = M > 1 && (family[2] == 2 || family[2] == 3) ? sum(log(yReal2)) : not_a_number(); real sum_log_y3 = M > 2 && (family[3] == 2 || family[3] == 3) ? sum(log(yReal3)) : not_a_number(); vector[M > 0 && family[1] == 3 ? yNobs[1] : 0] sqrt_y1; vector[M > 1 && family[2] == 3 ? yNobs[2] : 0] sqrt_y2; vector[M > 2 && family[3] == 3 ? yNobs[3] : 0] sqrt_y3; vector[M > 0 && family[1] == 3 ? yNobs[1] : 0] log_y1; vector[M > 1 && family[2] == 3 ? yNobs[2] : 0] log_y2; vector[M > 2 && family[3] == 3 ? yNobs[3] : 0] log_y3; if (M > 0 && family[1] == 3) { sqrt_y1 = sqrt(yReal1); log_y1 = log(yReal1); } if (M > 1 && family[2] == 3) { sqrt_y2 = sqrt(yReal2); log_y2 = log(yReal2); } if (M > 2 && family[3] == 3) { sqrt_y3 = sqrt(yReal3); log_y3 = log(yReal3); } // data for decov prior if (prior_dist_for_cov == 1) { for (i in 1:t) { if (p[i] > 1) { for (j in 1:p[i]) { delta[pos] = b_prior_concentration[j]; pos += 1; } } for (j in 3:p[i]) len_z_T += p[i] - 1; } } // data for lkj prior if (prior_dist_for_cov == 2) { if (bK1 > 0) bCov1_idx = lower_tri_indices(bK1); if (bK2 > 0) bCov2_idx = lower_tri_indices(bK2); } rstanarm/src/stan_files/tdata/tdata_betareg.stan0000644000176200001440000000020413340675562021600 0ustar liggesusers if (prior_dist_z <= 2) hs_z = 0; else if (prior_dist_z == 3) hs_z = 2; else if (prior_dist_z == 4) hs_z = 4; else hs_z = 0; rstanarm/src/stan_files/data/0000755000176200001440000000000014500256225015731 5ustar liggesusersrstanarm/src/stan_files/data/glmer_stuff2.stan0000644000176200001440000000066514500256225021226 0ustar liggesusers int num_non_zero; // number of non-zero elements in the Z matrix vector[num_non_zero] w; // non-zero elements in the implicit Z matrix array[num_non_zero] int v; // column indices for w array[t > 0 ? N + 1 : 0] int u; // where the non-zeros start in each row int special_case; // is the only term (1|group) rstanarm/src/stan_files/data/dimensions_mvmer.stan0000644000176200001440000000265414500256225022205 0ustar liggesusers // population level dimensions int M; // num submodels with data (limit of 3) array[3] int has_aux; // has auxiliary param int has_weights; // has observation weights array[3] int resp_type; // 1=real,2=integer,0=none array[3] int intercept_type; // 1=unbounded,2=lob,3=upb,0=none array[3] int yNobs; // num observations array[3] int yNeta; // required length of eta array[3] int yK; // num predictors // group level dimensions, for decov prior int t; // num. terms (maybe 0) with a | in the glmer formula array[t] int p; // num. variables on the LHS of each | array[t] int l; // num. levels for the factor(s) on the RHS of each | int q; // conceptually equals \sum_{i=1}^t p_i \times l_i int len_theta_L; // length of the theta_L vector // group level dimensions, for lkj prior // group factor 1 int bN1; // num groups int bK1; // total num params array[3] int bK1_len; // num params in each submodel array[3,2] int bK1_idx; // beg/end index for group params // group factor 2 int bN2; // num groups int bK2; // total num params array[3] int bK2_len; // num params in each submodel array[3,2] int bK2_idx; // beg/end index for group params rstanarm/src/stan_files/data/data_betareg.stan0000644000176200001440000000162614500256225021227 0ustar liggesusers // betareg data int has_intercept_z; // presence of z intercept int link_phi; // link transformation for eta_z (0 => no z in model) int z_dim; // dimensions of z vars matrix[N, z_dim] betareg_z; // matrix of z vars row_vector[z_dim] zbar; // mean of predictors // betareg hyperparameters int prior_dist_z; int prior_dist_for_intercept_z; vector[z_dim] prior_scale_z; real prior_scale_for_intercept_z; vector[z_dim] prior_mean_z; real prior_mean_for_intercept_z; vector[z_dim] prior_df_z; real prior_df_for_intercept_z; real global_prior_scale_z; real global_prior_df_z; real slab_df_z; real slab_scale_z; array[prior_dist_z == 7 ? z_dim : 0] int num_normals_z; rstanarm/src/stan_files/data/glmer_stuff.stan0000644000176200001440000000144114500256225021135 0ustar liggesusers // glmer stuff, see table 3 of // https://cran.r-project.org/web/packages/lme4/vignettes/lmer.pdf int t; // num. terms (maybe 0) with a | in the glmer formula array[t] int p; // num. variables on the LHS of each | array[t] int l; // num. levels for the factor(s) on the RHS of each | int q; // conceptually equals \sum_{i=1}^t p_i \times l_i int len_theta_L; // length of the theta_L vector // hyperparameters for glmer stuff; if t > 0 priors are mandatory vector[t] shape; vector[t] scale; int len_concentration; array[len_concentration] real concentration; int len_regularization; array[len_regularization] real regularization; rstanarm/src/stan_files/data/hyperparameters_event.stan0000644000176200001440000000133013365374540023242 0ustar liggesusers // hyperparameter values are set to 0 if there is no prior vector[e_K] e_prior_mean; real e_prior_mean_for_intercept; vector[basehaz_df] e_prior_mean_for_aux; vector[e_K] e_prior_scale; real e_prior_scale_for_intercept; vector[basehaz_df] e_prior_scale_for_aux; vector[e_K] e_prior_df; real e_prior_df_for_intercept; vector[basehaz_df] e_prior_df_for_aux; real e_global_prior_scale; // for hs priors only real e_global_prior_df; real e_slab_df; real e_slab_scale; rstanarm/src/stan_files/data/data_assoc.stan0000644000176200001440000002062114500256225020722 0ustar liggesusers // prior family: 0 = none, 1 = normal, 2 = student_t, 3 = hs, 4 = hs_plus, // 5 = laplace, 6 = lasso int a_prior_dist; //--- dimensions for association structure // num. of association parameters int a_K; // used for centering assoc terms vector[a_K] a_xbar; // used for scaling assoc terms vector[a_K] a_scale; // 0 = no assoc structure, 1 = any assoc structure int assoc; // which components are required to build association terms array[6,3] int assoc_uses; // which association terms does each submodel use array[16,M] int has_assoc; // num. of shared random effects int sum_size_which_b; // num. of shared random effects for each long submodel array[M] int size_which_b; // which random effects are shared for each long submodel array[sum_size_which_b] int which_b_zindex; // num. of shared random effects incl fixed component int sum_size_which_coef; // num. of shared random effects incl fixed component for each long submodel array[M] int size_which_coef; // which random effects are shared incl fixed component array[sum_size_which_coef] int which_coef_zindex; // which fixed effects are shared array[sum_size_which_coef] int which_coef_xindex; // total num pars used in assoc*assoc interactions int sum_size_which_interactions; // num pars used in assoc*assoc interactions, by submodel // and by evev/evmv/mvev/mvmv interactions array[M*4] int size_which_interactions; // which terms to interact with array[sum_size_which_interactions] int which_interactions; //---- data for calculating eta in GK quadrature array[3] int nrow_y_Xq; // num. rows in long. predictor matrix at quadpoints // fe design matrix at quadpoints matrix[assoc_uses[1,1] == 1 ? nrow_y_Xq[1] : 0, yK[1]] y1_xq_eta; matrix[assoc_uses[1,2] == 1 ? nrow_y_Xq[2] : 0, yK[2]] y2_xq_eta; matrix[assoc_uses[1,3] == 1 ? nrow_y_Xq[3] : 0, yK[3]] y3_xq_eta; // offset values at quadpoints vector[has_offset[1] && assoc_uses[1,1] == 1 ? nrow_y_Xq[1] : 0] y1_offset_eta; vector[has_offset[2] && assoc_uses[1,2] == 1 ? nrow_y_Xq[2] : 0] y2_offset_eta; vector[has_offset[3] && assoc_uses[1,3] == 1 ? nrow_y_Xq[3] : 0] y3_offset_eta; // re design matrix at quadpoints, group factor 1 array[bK1_len[1]] vector[assoc_uses[1,1] == 1 && bK1_len[1] > 0 ? nrow_y_Xq[1] : 0] y1_z1q_eta; array[bK1_len[2]] vector[assoc_uses[1,2] == 1 && bK1_len[2] > 0 ? nrow_y_Xq[2] : 0] y2_z1q_eta; array[bK1_len[3]] vector[assoc_uses[1,3] == 1 && bK1_len[3] > 0 ? nrow_y_Xq[3] : 0] y3_z1q_eta; array[assoc_uses[1,1] == 1 && bK1_len[1] > 0 ? nrow_y_Xq[1] : 0] int y1_z1q_id_eta; array[assoc_uses[1,2] == 1 && bK1_len[2] > 0 ? nrow_y_Xq[2] : 0] int y2_z1q_id_eta; array[assoc_uses[1,3] == 1 && bK1_len[3] > 0 ? nrow_y_Xq[3] : 0] int y3_z1q_id_eta; // re design matrix at quadpoints, group factor 2 array[bK2_len[1]] vector[assoc_uses[1,1] == 1 && bK2_len[1] > 0 ? nrow_y_Xq[1] : 0] y1_z2q_eta; array[bK2_len[2]] vector[assoc_uses[1,2] == 1 && bK2_len[2] > 0 ? nrow_y_Xq[2] : 0] y2_z2q_eta; array[bK2_len[3]] vector[assoc_uses[1,3] == 1 && bK2_len[3] > 0 ? nrow_y_Xq[3] : 0] y3_z2q_eta; array[assoc_uses[1,1] == 1 && bK2_len[1] > 0 ? nrow_y_Xq[1] : 0] int y1_z2q_id_eta; array[assoc_uses[1,2] == 1 && bK2_len[2] > 0 ? nrow_y_Xq[2] : 0] int y2_z2q_id_eta; array[assoc_uses[1,3] == 1 && bK2_len[3] > 0 ? nrow_y_Xq[3] : 0] int y3_z2q_id_eta; //---- data for calculating derivative of eta in GK quadrature // fe design matrix at quadpoints matrix[assoc_uses[2,1] == 1 ? nrow_y_Xq[1] : 0, yK[1]] y1_xq_eps; matrix[assoc_uses[2,2] == 1 ? nrow_y_Xq[2] : 0, yK[2]] y2_xq_eps; matrix[assoc_uses[2,3] == 1 ? nrow_y_Xq[3] : 0, yK[3]] y3_xq_eps; // offset values at quadpoints vector[has_offset[1] && assoc_uses[2,1] == 1 ? nrow_y_Xq[1] : 0] y1_offset_eps; vector[has_offset[2] && assoc_uses[2,2] == 1 ? nrow_y_Xq[2] : 0] y2_offset_eps; vector[has_offset[3] && assoc_uses[2,3] == 1 ? nrow_y_Xq[3] : 0] y3_offset_eps; // re design matrix at quadpoints, group factor 1 array[bK1_len[1]] vector[assoc_uses[2,1] == 1 && bK1_len[1] > 0 ? nrow_y_Xq[1] : 0] y1_z1q_eps; array[bK1_len[2]] vector[assoc_uses[2,2] == 1 && bK1_len[2] > 0 ? nrow_y_Xq[2] : 0] y2_z1q_eps; array[bK1_len[3]] vector[assoc_uses[2,3] == 1 && bK1_len[3] > 0 ? nrow_y_Xq[3] : 0] y3_z1q_eps; array[assoc_uses[2,1] == 1 && bK1_len[1] > 0 ? nrow_y_Xq[1] : 0] int y1_z1q_id_eps; array[assoc_uses[2,2] == 1 && bK1_len[2] > 0 ? nrow_y_Xq[2] : 0] int y2_z1q_id_eps; array[assoc_uses[2,3] == 1 && bK1_len[3] > 0 ? nrow_y_Xq[3] : 0] int y3_z1q_id_eps; // re design matrix at quadpoints, group factor 2 array[bK2_len[1]] vector[assoc_uses[2,1] == 1 && bK2_len[1] > 0 ? nrow_y_Xq[1] : 0] y1_z2q_eps; array[bK2_len[2]] vector[assoc_uses[2,2] == 1 && bK2_len[2] > 0 ? nrow_y_Xq[2] : 0] y2_z2q_eps; array[bK2_len[3]] vector[assoc_uses[2,3] == 1 && bK2_len[3] > 0 ? nrow_y_Xq[3] : 0] y3_z2q_eps; array[assoc_uses[2,1] == 1 && bK2_len[1] > 0 ? nrow_y_Xq[1] : 0] int y1_z2q_id_eps; array[assoc_uses[2,2] == 1 && bK2_len[2] > 0 ? nrow_y_Xq[2] : 0] int y2_z2q_id_eps; array[assoc_uses[2,3] == 1 && bK2_len[3] > 0 ? nrow_y_Xq[3] : 0] int y3_z2q_id_eps; //---- data for calculating integral of eta in GK quadrature // num. of nodes for GK quadrature for area under marker trajectory int auc_qnodes; int nrow_y_Xq_auc; // num. rows in long. predictor matrix at auc quadpoints vector[sum(assoc_uses[3,]) > 0 ? nrow_y_Xq_auc : 0] auc_qwts; // fe design matrix at quadpoints matrix[assoc_uses[3,1] == 1 ? nrow_y_Xq_auc : 0, yK[1]] y1_xq_auc; matrix[assoc_uses[3,2] == 1 ? nrow_y_Xq_auc : 0, yK[2]] y2_xq_auc; matrix[assoc_uses[3,3] == 1 ? nrow_y_Xq_auc : 0, yK[3]] y3_xq_auc; // offset values at quadpoints vector[has_offset[1] && assoc_uses[3,1] == 1 ? nrow_y_Xq_auc : 0] y1_offset_auc; vector[has_offset[2] && assoc_uses[3,2] == 1 ? nrow_y_Xq_auc : 0] y2_offset_auc; vector[has_offset[3] && assoc_uses[3,3] == 1 ? nrow_y_Xq_auc : 0] y3_offset_auc; // re design matrix at quadpoints, group factor 1 array[bK1_len[1]] vector[assoc_uses[3,1] == 1 && bK1_len[1] > 0 ? nrow_y_Xq_auc : 0] y1_z1q_auc; array[bK1_len[2]] vector[assoc_uses[3,2] == 1 && bK1_len[2] > 0 ? nrow_y_Xq_auc : 0] y2_z1q_auc; array[bK1_len[3]] vector[assoc_uses[3,3] == 1 && bK1_len[3] > 0 ? nrow_y_Xq_auc : 0] y3_z1q_auc; array[assoc_uses[3,1] == 1 && bK1_len[1] > 0 ? nrow_y_Xq_auc : 0] int y1_z1q_id_auc; array[assoc_uses[3,2] == 1 && bK1_len[2] > 0 ? nrow_y_Xq_auc : 0] int y2_z1q_id_auc; array[assoc_uses[3,3] == 1 && bK1_len[3] > 0 ? nrow_y_Xq_auc : 0] int y3_z1q_id_auc; // re design matrix at quadpoints, group factor 2 array[bK2_len[1]] vector[assoc_uses[3,1] == 1 && bK2_len[1] > 0 ? nrow_y_Xq_auc : 0] y1_z2q_auc; array[bK2_len[2]] vector[assoc_uses[3,2] == 1 && bK2_len[2] > 0 ? nrow_y_Xq_auc : 0] y2_z2q_auc; array[bK2_len[3]] vector[assoc_uses[3,3] == 1 && bK2_len[3] > 0 ? nrow_y_Xq_auc : 0] y3_z2q_auc; array[assoc_uses[3,1] == 1 && bK2_len[1] > 0 ? nrow_y_Xq_auc : 0] int y1_z2q_id_auc; array[assoc_uses[3,2] == 1 && bK2_len[2] > 0 ? nrow_y_Xq_auc : 0] int y2_z2q_id_auc; array[assoc_uses[3,3] == 1 && bK2_len[3] > 0 ? nrow_y_Xq_auc : 0] int y3_z2q_id_auc; //---- data for calculating assoc*data interactions in GK quadrature // num assoc pars used in {ev/es/mv/ms}*data interactions array[M*4] int a_K_data; // design matrix for interacting with ev/es/mv/ms at quadpoints matrix[sum(nrow_y_Xq[1:M]), sum(a_K_data)] y_Xq_data; // indexing specifying the rows of y_Xq_data that correspond to // each submodel array[3,2] int idx_q; //---- data for combining lower level units clustered within patients array[M] int has_grp; // 1 = has clustering below patient level int grp_assoc; // 1=sum, 2=mean, 3=min, 4=max array[nrow_e_Xq,2] int grp_idx; rstanarm/src/stan_files/data/hyperparameters_mvmer.stan0000644000176200001440000000276514500256225023253 0ustar liggesusers // hyperparameter values are set to 0 if there is no prior // coefficients vector[yK[1]] y_prior_mean1; vector[yK[2]] y_prior_mean2; vector[yK[3]] y_prior_mean3; vector[yK[1]] y_prior_scale1; vector[yK[2]] y_prior_scale2; vector[yK[3]] y_prior_scale3; vector[yK[1]] y_prior_df1; vector[yK[2]] y_prior_df2; vector[yK[3]] y_prior_df3; vector[M] y_global_prior_df; // for hs priors only vector[M] y_global_prior_scale; // for hs priors only vector[M] y_slab_df; // for hs priors only vector[M] y_slab_scale; // for hs priors only // intercepts vector[M] y_prior_mean_for_intercept; vector[M] y_prior_scale_for_intercept; vector[M] y_prior_df_for_intercept; // auxiliary params vector[M] y_prior_mean_for_aux; vector[M] y_prior_scale_for_aux; vector[M] y_prior_df_for_aux; // decov prior stuff int len_concentration; int len_regularization; vector[t] b_prior_shape; vector[t] b_prior_scale; array[len_concentration] real b_prior_concentration; array[len_regularization] real b_prior_regularization; // lkj prior stuff vector[bK1] b1_prior_scale; vector[bK2] b2_prior_scale; vector[bK1] b1_prior_df; vector[bK2] b2_prior_df; real b1_prior_regularization; real b2_prior_regularization; rstanarm/src/stan_files/data/hyperparameters.stan0000644000176200001440000000156014500256225022035 0ustar liggesusers // hyperparameter values are set to 0 if there is no prior vector[K] prior_scale; real prior_scale_for_intercept; real prior_scale_for_aux; vector[K_smooth > 0 ? max(smooth_map) : 0] prior_scale_for_smooth; vector[K] prior_mean; real prior_mean_for_intercept; real prior_mean_for_aux; vector[K_smooth > 0 ? max(smooth_map) : 0] prior_mean_for_smooth; vector[K] prior_df; real prior_df_for_intercept; real prior_df_for_aux; vector[K_smooth > 0 ? max(smooth_map) : 0] prior_df_for_smooth; real global_prior_df; // for hs priors only real global_prior_scale; // for hs priors only real slab_df; // for hs prior only real slab_scale; // for hs prior only array[prior_dist == 7 ? K : 0] int num_normals; rstanarm/src/stan_files/data/NKX.stan0000644000176200001440000000147614500256225017270 0ustar liggesusers // dimensions int N; // number of observations int K; // number of predictors // data vector[K] xbar; // predictor means int dense_X; // flag for dense vs. sparse array[dense_X] matrix[N,K] X; // centered predictor matrix in the dense case // stuff for the sparse case int nnz_X; // number of non-zero elements in the implicit X matrix vector[nnz_X] w_X; // non-zero elements in the implicit X matrix array[nnz_X] int v_X; // column indices for w_X // where the non-zeros start in each row of X array[dense_X ? 0 : N + 1] int u_X; // smooths int K_smooth; matrix[N,K_smooth] S; array[K_smooth] int smooth_map; rstanarm/src/stan_files/data/data_mvmer.stan0000644000176200001440000000524414500256225020744 0ustar liggesusers // population level data array[resp_type[1] == 2 ? yNobs[1] : 0] int yInt1; // integer responses array[resp_type[2] == 2 ? yNobs[2] : 0] int yInt2; array[resp_type[3] == 2 ? yNobs[3] : 0] int yInt3; vector[resp_type[1] == 1 ? yNobs[1] : 0] yReal1; // real responses vector[resp_type[2] == 1 ? yNobs[2] : 0] yReal2; vector[resp_type[3] == 1 ? yNobs[3] : 0] yReal3; matrix[yNeta[1],yK[1]] yX1; // fe design matrix matrix[yNeta[2],yK[2]] yX2; matrix[yNeta[3],yK[3]] yX3; vector[yK[1]] yXbar1; // predictor means vector[yK[2]] yXbar2; vector[yK[3]] yXbar3; // family and link (determined by 'append_mvmer_famlink' R function) // 1 = gaussian // 2 = gamma // 3 = inverse gaussian // 4 = bernoulli // 5 = binomial (n>1) // 6 = poisson // 7 = negative binomial array[M] int family; array[M] int link; // varies by family // group level data, group factor 1 array[bK1_len[1]] vector[bK1_len[1] > 0 ? yNeta[1] : 0] y1_Z1; // re design matrix array[bK1_len[2]] vector[bK1_len[2] > 0 ? yNeta[2] : 0] y2_Z1; array[bK1_len[3]] vector[bK1_len[3] > 0 ? yNeta[3] : 0] y3_Z1; array[bK1_len[1] > 0 ? yNeta[1] : 0] int y1_Z1_id; // group indexing for y1_Z1 array[bK1_len[2] > 0 ? yNeta[2] : 0] int y2_Z1_id; // group indexing for y2_Z1 array[bK1_len[3] > 0 ? yNeta[3] : 0] int y3_Z1_id; // group indexing for y3_Z1 // group level data, group factor 2 array[bK2_len[1]] vector[bK2_len[1] > 0 ? yNeta[1] : 0] y1_Z2; // re design matrix array[bK2_len[2]] vector[bK2_len[2] > 0 ? yNeta[2] : 0] y2_Z2; array[bK2_len[3]] vector[bK2_len[3] > 0 ? yNeta[3] : 0] y3_Z2; array[bK2_len[1] > 0 ? yNeta[1] : 0] int y1_Z2_id; // group indexing for y1_Z2 array[bK2_len[2] > 0 ? yNeta[2] : 0] int y2_Z2_id; // group indexing for y2_Z2 array[bK2_len[3] > 0 ? yNeta[3] : 0] int y3_Z2_id; // group indexing for y3_Z2 // prior family: 0 = none, 1 = normal, 2 = student_t, 3 = hs, 4 = hs_plus, // 5 = laplace, 6 = lasso, 7 = product_normal array[3] int y_prior_dist; array[M] int y_prior_dist_for_intercept; // prior family: 0 = none, 1 = normal, 2 = student_t, 3 = exponential array[M] int y_prior_dist_for_aux; // prior family: 1 = decov, 2 = lkj int prior_dist_for_cov; // flag indicating whether to draw from the prior int prior_PD; // 1 = yes // offset array[3] int has_offset; // 0 = No, 1 = Yes vector[has_offset[1] ? yNeta[1] : 0] y1_offset; vector[has_offset[2] ? yNeta[2] : 0] y2_offset; vector[has_offset[3] ? yNeta[3] : 0] y3_offset; rstanarm/src/stan_files/data/data_glm.stan0000644000176200001440000000142514366062356020404 0ustar liggesusers // flag indicating whether to draw from the prior int prior_PD; // 1 = yes int compute_mean_PPD; // 1 = yes // intercept int has_intercept; // 1 = yes // link function from location to linear predictor int link; // interpretation varies by .stan file // prior family: 0 = none, 1 = normal, 2 = student_t, 3 = hs, 4 = hs_plus, // 5 = laplace, 6 = lasso, 7 = product_normal int prior_dist; int prior_dist_for_intercept; // prior family: 0 = none, 1 = normal, 2 = student_t, 3 = exponential int prior_dist_for_aux; // prior family: 0 = none, 1 = normal, 2 = student_t, 3 = exponential int prior_dist_for_smooth; rstanarm/src/stan_files/data/weights_offset.stan0000644000176200001440000000033013722762571021647 0ustar liggesusers // weights int has_weights; // 0 = No, 1 = Yes vector[has_weights ? N : 0] weights; // offset int has_offset; // 0 = No, 1 = Yes vector[has_offset ? N : 0] offset_; rstanarm/src/stan_files/data/hyperparameters_assoc.stan0000644000176200001440000000053713340675562023242 0ustar liggesusers // hyperparameter values are set to 0 if there is no prior vector[a_K] a_prior_mean; vector[a_K] a_prior_scale; vector[a_K] a_prior_df; real a_global_prior_scale; // for hs priors only real a_global_prior_df; real a_slab_df; real a_slab_scale; rstanarm/src/stan_files/data/data_event.stan0000644000176200001440000000312613365374540020745 0ustar liggesusers // prior family: 0 = none, 1 = normal, 2 = student_t, 3 = hs, 4 = hs_plus, // 5 = laplace, 6 = lasso int e_prior_dist; int e_prior_dist_for_intercept; // prior family: 0 = none, 1 = normal, 2 = student_t, 3 = exponential int e_prior_dist_for_aux; // prior for basehaz params // data for event submodel real norm_const; // constant shift for log baseline hazard int e_K; // num. of predictors in event submodel int Npat; // num. individuals (equal to l[id_var] - 1) int Nevents; // num. events (ie. not censored) int qnodes; // num. of nodes for Gauss-Kronrod quadrature int Npat_times_qnodes; int basehaz_type; // 1 = weibull, 2 = B-splines, 3 = piecewise int basehaz_df; // df for baseline hazard int e_has_intercept; // 1 = yes int nrow_e_Xq; // num. rows in event predictor matrix at quad points matrix[e_K > 0 ? nrow_e_Xq : 0, e_K] e_Xq; // predictor matrix (event submodel) at qpts, centred vector[nrow_e_Xq] e_times; // event times and unstandardised quadrature points matrix[nrow_e_Xq,basehaz_df] basehaz_X; // design matrix (basis terms) for baseline hazard vector[e_K] e_xbar; // predictor means (event submodel) vector[Npat] e_weights; // weights, set to zero if not used vector[Npat_times_qnodes] e_weights_rep; // repeated weights, set to zero if not used vector[Npat_times_qnodes] qwts; // GK quadrature weights with (b-a)/2 scaling rstanarm/src/stan_files/functions/0000755000176200001440000000000014505733323017034 5ustar liggesusersrstanarm/src/stan_files/functions/count_likelihoods.stan0000644000176200001440000000342514500256225023441 0ustar liggesusers /** * Apply inverse link function to linear predictor * see help(poisson) in R * * @param eta Linear predictor vector * @param link An integer indicating the link function * @return A vector, i.e. inverse-link(eta) */ vector linkinv_count(vector eta, int link) { if (link == 1) return exp(eta); // log else if (link == 2) return eta; // identity else if (link == 3) return(square(eta)); // sqrt else reject("Invalid link"); return eta; // never reached } /** * Pointwise (pw) log-likelihood vector for the Poisson distribution * * @param y The integer array corresponding to the outcome variable. * @param eta The vector of linear predictors * @param link An integer indicating the link function * @return A vector */ vector pw_pois(array[] int y, vector eta, int link) { int N = rows(eta); vector[N] ll; if (link == 1) // log for (n in 1:N) ll[n] = poisson_log_lpmf(y[n] | eta[n]); else if (link <= 3) { // link = identity or sqrt vector[N] phi = linkinv_count(eta, link); for (n in 1:N) ll[n] = poisson_lpmf(y[n] | phi[n]) ; } else reject("Invalid link"); return ll; } /** * Pointwise (pw) log-likelihood vector for the negative binomial distribution * * @param y The integer array corresponding to the outcome variable. * @param eta The vector of linear predictors * @param theta The reciprocal_dispersion parameter * @param link An integer indicating the link function * @return A vector */ vector pw_nb(array[] int y, vector eta, real theta, int link) { int N = rows(eta); vector[N] rho = linkinv_count(eta, link); // link checked vector[N] ll; for (n in 1:N) ll[n] = neg_binomial_2_lpmf(y[n] | rho[n], theta); return ll; } rstanarm/src/stan_files/functions/SSfunctions.stan0000644000176200001440000001047314505733323022206 0ustar liggesusers/* These functions (without the underscores) are all documented in R See also Appendix C of Pinheiro and Bates https://books.google.com/books?id=3TVDAAAAQBAJ&lpg=PR3&dq=Pinheiro%20and%20Bates&pg=PA511#v=onepage&q&f=false These functions may be numerically unstable */ vector SS_asymp(vector input, matrix Phi_) { // Phi_[,1] = Asym, Phi_[,2] = R0, Phi_[,3] = lrc if (rows(Phi_) > 1) { vector[rows(Phi_)] Asym = Phi_[,1]; return Asym + (Phi_[,2] - Asym) .* exp(-exp(Phi_[,3]) .* input); } else { real Asym = Phi_[1,1]; return Asym + (Phi_[1,2] - Asym) * exp(-exp(Phi_[1,3]) * input); } } vector SS_asympOff(vector input, matrix Phi_) { // Phi_[,1] = Asym, Phi_[,2] = lrc, Phi_[,3] = c0 if (rows(Phi_) > 1) return Phi_[ ,1] .* (1 - exp(-exp(Phi_[ ,2]) .* (input - Phi_[ ,3]))); else return Phi_[1,1] * (1 - exp(-exp(Phi_[1,2]) * (input - Phi_[1,3]))); } vector SS_asympOrig(vector input, matrix Phi_) { // Phi_[,1] = Asym, Phi_[,2] = lrc if (rows(Phi_) > 1) return Phi_[ ,1] .* (1 - exp(-exp(Phi_[ ,2]) .* input)); else return Phi_[1,1] * (1 - exp(-exp(Phi_[1,2]) * input)); } vector SS_biexp(vector input, matrix Phi_) { // Phi_[,1] = A1, Phi_[,2] = lrc1, Phi_[,3] = A2, Phi_[,4] = lrc2 if (rows(Phi_) > 1) return Phi_[ ,1] .* exp(-exp(Phi_[ ,2]) .* input) + Phi_[ ,3] .* exp(-exp(Phi_[ ,4]) .* input); else return Phi_[1,1] * exp(-exp(Phi_[1,2]) * input) + Phi_[1,3] * exp(-exp(Phi_[1,4]) * input); } vector SS_fol(vector Dose, vector input, matrix Phi_) { // Phi_[,1] = lKe, Phi_[,2] = lKa, Phi_[,3] = lCl int Phi__rows = rows(Phi_); if (Phi__rows > 1) { vector[Phi__rows] lKe = Phi_[,1]; vector[Phi__rows] lKa = Phi_[,2]; vector[Phi__rows] exp_lKe = exp(lKe); vector[Phi__rows] exp_lKa = exp(lKa); return Dose .* exp(lKe + lKa - Phi_[,3]) .* (exp(-exp_lKe .* input) - exp(-exp_lKa .* input)) ./ (exp_lKa - exp_lKe); } else { real lKe = Phi_[1,1]; real lKa = Phi_[1,2]; real exp_lKe = exp(lKe); real exp_lKa = exp(lKa); return Dose * exp(lKe + lKa - Phi_[1,3]) .* (exp(-exp_lKe * input) - exp(-exp_lKa * input)) / (exp_lKa - exp_lKe); } } vector SS_fpl(vector input, matrix Phi_) { // Phi_[,1] = A, Phi_[,2] = B, Phi_[,3] = xmid, Phi_[,4] = scal // input is generally data so cannot switch signs if (rows(Phi_) > 1) { vector[rows(Phi_)] A = Phi_[,1]; return A + (Phi_[,2] - A) ./ (1 + exp((Phi_[,3] - input) ./ exp(Phi_[,4]))); } else { real A = Phi_[1,1]; return A + rep_vector(Phi_[1,2] - A, rows(input)) ./ (1 + exp((Phi_[1,3] - input) / exp(Phi_[1,4]))); } } vector SS_gompertz(vector x, matrix Phi_) { // Phi_[,1] = Asym, Phi_[,2] = b2, Phi_[,3] = b3 vector[rows(x)] out; if (rows(Phi_) > 1) for (i in 1:rows(x)) out[i] = Phi_[i,1] * exp(-Phi_[i,2] * Phi_[i,3] ^ x[i]); else { real Asym = Phi_[1,1]; real b2 = Phi_[1,2]; real b3 = Phi_[1,3]; for (i in 1:rows(x)) out[i] = Asym * exp(-b2 * b3 ^ x[i]); } return out; } vector SS_logis(vector input, matrix Phi_) { // Phi_[,1] = Asym, Phi_[,2] = xmid, Phi_[,3] = scal // input is typically data so cannot switch signs of everything if (rows(Phi_) > 1) return Phi_[,1] ./ (1 + exp( (Phi_[,2] - input) ./ exp(Phi_[,3]))); else return rep_vector(Phi_[1,1], rows(input)) ./ (1 + exp( (Phi_[1,2] - input) / exp(Phi_[1,3]))); } vector SS_micmen(vector input, matrix Phi_) { // Phi_[,1] = Vm, Phi_[,2] = K if (rows(Phi_) > 1) return Phi_[ ,1] .* input ./ (Phi_[ ,2] + input); else return Phi_[1,1] * input ./ (Phi_[1,2] + input); } vector SS_weibull(vector x, matrix Phi_) { // Phi_[,1] = Asym, Phi_[,2] = Drop, Phi_[,3] = lrc, Phi_[,4] = pwr vector[rows(x)] out; if (rows(Phi_) > 1) for (i in 1:rows(x)) out[i] = Phi_[i,1] - Phi_[i,2] * exp(-exp(Phi_[i,3]) * x[i] ^ Phi_[i,4]); else { real Asym = Phi_[1,1]; real Drop = Phi_[1,2]; real lrc = Phi_[1,3]; real pwr = Phi_[1,4]; for (i in 1:rows(x)) out[i] = Asym - Drop * exp(-exp(lrc) * x[i] ^ pwr); } return out; } matrix reshape_vec(vector x, int Rows, int Cols) { matrix[Rows, Cols] out; int pos = 1; if (rows(x) != Rows * Cols) reject("x is the wrong length"); for (c in 1:Cols) for (r in 1:Rows) { out[r,c] = x[pos]; pos += 1; } return out; } rstanarm/src/stan_files/functions/bernoulli_likelihoods.stan0000644000176200001440000001034414505733323024306 0ustar liggesusers /** * Apply inverse link function to linear predictor * see help(binom) in R * * @param eta Linear predictor vector * @param link An integer indicating the link function * @return A vector, i.e. inverse-link(eta) */ vector linkinv_bern(vector eta, int link) { if (link == 1) return(inv_logit(eta)); // logit else if (link == 2) return(Phi(eta)); // probit else if (link == 3) return(atan(eta) / pi() + 0.5); // cauchit else if (link == 4) return(exp(eta)); // log else if (link == 5) return(inv_cloglog(eta)); // cloglog else reject("Invalid link"); return eta; // never reached } /** * Increment with the unweighted log-likelihood * @param link An integer indicating the link function * @param eta0 A vector of linear predictors | y = 0 * @param eta1 A vector of linear predictors | y = 1 * @param N An integer array of length 2 giving the number of * observations where y = 0 and y = 1 respectively * @return lp__ */ real bern_lpdf(vector eta0, vector eta1, int link, array[] int N) { real lp = 0; if (link == 1) { // logit lp += logistic_lccdf(eta0 | 0, 1); lp += logistic_lcdf( eta1 | 0, 1); } else if (link == 2) { // probit lp += normal_lccdf(eta0 | 0, 1); lp += normal_lcdf( eta1 | 0, 1); } else if (link == 3) { // cauchit lp += cauchy_lccdf(eta0 | 0, 1); lp += cauchy_lcdf( eta1 | 0, 1); } else if(link == 4) { // log lp += sum(log1m_exp(eta0)); lp += sum(eta1); // already in log form } else if(link == 5) { // cloglog lp += sum(log1m_exp(-exp(eta1))); lp += sum(-exp(eta0)); } else reject("Invalid link"); return lp; } /** * Pointwise (pw) log-likelihood vector * * @param y The integer outcome variable. Note that function is * called separately with y = 0 and y = 1 * @param eta Vector of linear predictions * @param link An integer indicating the link function * @return A vector */ vector pw_bern(int y, vector eta, int link) { int N = rows(eta); vector[N] ll; if (link == 1) { // logit for (n in 1:N) ll[n] = bernoulli_logit_lpmf(y | eta[n]); } else if (link <= 5) { // link = probit, cauchit, log, or cloglog vector[N] pi = linkinv_bern(eta, link); // may not be stable for (n in 1:N) ll[n] = bernoulli_lpmf(y | pi[n]); } else reject("Invalid link"); return ll; } /** * Log-normalizing constant in the clogit case * * @param N_j Integer number of observations in the j-th group * @param D_j Integer number of successes in the j-th group * @param eta_j Vector of linear predictions in the j-th group * @return A scalar that normalizes the probabilities on the log-scale */ real log_clogit_denom(int N_j, int D_j, vector eta_j) { if (D_j == 1 && N_j == rows(eta_j)) return log_sum_exp(eta_j); if (D_j == 0) return 0; if (N_j == D_j) { if (D_j == 1) return eta_j[N_j]; return sum(segment(eta_j, N_j - 1, 2)); } else { int N_jm1 = N_j - 1; return log_sum_exp(log_clogit_denom(N_jm1, D_j, eta_j), log_clogit_denom(N_jm1, D_j - 1, eta_j) + eta_j[N_j]); } return not_a_number(); // never reaches } /** * Log-likelihood for a clogit model * @param eta0 Linear predictors when y == 0 * @param eta1 Linear predictors when y == 1 * @param successes Integer array with the number of successes in group j * @param failures Integer array with the number of failures in group j * @param observations Integer array with the number of observations in group j * @return lp__ */ real clogit_lpdf(vector eta0, vector eta1, array[] int successes, array[] int failures, array[] int observations) { int J = num_elements(observations); int pos0 = 1; int pos1 = 1; vector[J] summands; for (j in 1:J) { int D_g = successes[j]; int N_g = observations[j]; int F_g = failures[j]; vector[N_g] eta_g = append_row(segment(eta1, pos1, D_g), segment(eta0, pos0, F_g)); summands[j] = log_clogit_denom(N_g, D_g, eta_g); pos0 += F_g; pos1 += D_g; } return sum(eta1) - sum(summands); } rstanarm/src/stan_files/functions/jm_functions.stan0000644000176200001440000002714514500256225022426 0ustar liggesusers /** * Scale a vector of auxiliary parameters based on prior information * * @param aux_unscaled A vector, the unscaled auxiliary parameters * @param prior_dist Integer, the type of prior distribution * @param prior_mean,prior_scale Vectors, the mean and scale * of the prior distribution * @return A vector, corresponding to the scaled auxiliary parameters */ vector make_basehaz_coef(vector aux_unscaled, int prior_dist, vector prior_mean, vector prior_scale) { vector[rows(aux_unscaled)] aux; if (prior_dist == 0) // none aux = aux_unscaled; else { aux = prior_scale .* aux_unscaled; if (prior_dist <= 2) // normal or student_t aux += prior_mean; } return aux; } /** * Log-prior for baseline hazard parameters * * @param aux_unscaled Vector (potentially of length 1) of unscaled * auxiliary parameter(s) * @param dist Integer specifying the type of prior distribution * @param scale Real specifying the scale for the prior distribution * @param df Real specifying the df for the prior distribution * @return nothing */ real basehaz_lpdf(vector aux_unscaled, int dist, vector scale, vector df) { real lp = 0; if (dist > 0) { if (dist == 1) lp += normal_lpdf(aux_unscaled | 0, 1); else if (dist == 2) lp += student_t_lpdf(aux_unscaled | df, 0, 1); else lp += exponential_lpdf(aux_unscaled | 1); } return lp; } /** * Take the linear predictor and collapse across lower level * units of the grouping factor clustered within patients, using * the function specified by 'grp_assoc' * * @param eta The linear predictor evaluated for all the lower * level units, having some length greater than N. * @param grp_idx An N-by-2 two dimensional array providing the * beginning and ending index of the lower level units in eta that * correspond to patient n (where n = 1,...,N). * @param grp_assoc The method for collapsing across the lower * level units; 1=sum, 2=mean, 3=min, 4=max. * @return A vector */ vector collapse_within_groups(vector eta, array[,] int grp_idx, int grp_assoc) { int N = size(grp_idx); vector[N] val; if (grp_assoc == 1) { // sum of lower level clusters for (n in 1:N) val[n] = sum(eta[grp_idx[n,1]:grp_idx[n,2]]); } else if (grp_assoc == 2) { // mean of lower level clusters for (n in 1:N) val[n] = mean(eta[grp_idx[n,1]:grp_idx[n,2]]); } else if (grp_assoc == 3) { // min of lower level clusters for (n in 1:N) val[n] = min(eta[grp_idx[n,1]:grp_idx[n,2]]); } else if (grp_assoc == 4) { // max of lower level clusters for (n in 1:N) val[n] = max(eta[grp_idx[n,1]:grp_idx[n,2]]); } return val; } /** * Create a design matrix for a shared random effects association * structure in the joint model * * @param b Vector of group-specific coefficients * @param l An integer array with the number of levels for the factor(s) on * the RHS of each | * @param p An integer array with the number of variables on the LHS of each | * @param pmat A matrix with the number variables on the LHS of each | in each * longitudinal submodel. The rows correspond to each |, meaning the separate * equations for each grouping variable, and the columns correspond to each * longitudinal submodel. If subject ID is the only grouping variable then the * matrix will have one row. If the joint model only has one longitudinal * submodel then the matrix will have one column. * @param Npat Integer specifying number of individuals represented * in vector b * @param qnodes The number of quadrature nodes * @param which_b Integer array specifying the indices * of the random effects to use in the association structure * @param sum_size_which_b Integer specifying total number of * random effects that are to be used in the association structure * @param size_which_b Integer array specifying number of random effects from * each long submodel that are to be used in the association structure * @param t_i Integer specifying the index of the grouping factor that * corresponds to the patient-level * @param M An integer specifying the number of longitudinal submodels * @return A matrix with the desired random effects represented * in columns, and the individuals on the rows; the matrix is * repeated (qnodes + 1) times (bounded by rows) */ matrix make_x_assoc_shared_b( vector b, array[] int l, array[] int p, array[,] int pmat, int Npat, int qnodes, array[] int which_b, int sum_size_which_b, array[] int size_which_b, int t_i, int M) { int prior_shift; // num. ranefs prior to subject-specific ranefs int start_store; int end_store; matrix[Npat,sum_size_which_b] temp; matrix[(Npat*(qnodes+1)),sum_size_which_b] x_assoc_shared_b; if (t_i == 1) prior_shift = 0; else prior_shift = sum(l[1:(t_i-1)]); for (i in 1:Npat) { int mark; int start_collect; // index start of subject-specific ranefs for patient mark = 1; start_collect = prior_shift + (i - 1) * p[t_i]; for (m in 1:M) { if (size_which_b[m] > 0) { int shift; // num. subject-specific ranefs in prior submodels int j_shift; // shift in indexing of which_b vector if (m == 1) { shift = 0; j_shift = 0; } else { shift = sum(pmat[t_i, 1:(m-1)]); j_shift = sum(size_which_b[1:(m-1)]); } for (j in 1:size_which_b[m]) { int item_collect; // subject-specific ranefs to select for current submodel item_collect = start_collect + shift + which_b[(j_shift + j)]; temp[i,mark] = b[item_collect]; mark += 1; } } } } for (i in 1:(qnodes+1)) { start_store = (i - 1) * Npat + 1; end_store = i * Npat; x_assoc_shared_b[start_store:end_store,] = temp; } return x_assoc_shared_b; } /** * Create a design matrix for a shared fixed + random effects association * structure in the joint model * * @param b Vector of group-specific coefficients * @param l An integer array with the number of levels for the factor(s) on * the RHS of each | * @param p An integer array with the number of variables on the LHS of each | * @param pmat A matrix with the number variables on the LHS of each | in each * longitudinal submodel. The rows correspond to each |, meaning the separate * equations for each grouping variable, and the columns correspond to each * longitudinal submodel. If subject ID is the only grouping variable then the * matrix will have one row. If the joint model only has one longitudinal * submodel then the matrix will have one column. * @param Npat Integer specifying number of individuals represented * in vector b * @param qnodes The number of quadrature nodes * @param which_b Integer array specifying the indices * of the random effects to use in the association structure * @param sum_size_which_b Integer specifying total number of * random effects that are to be used in the association structure * @param size_which_b Integer array specifying number of random effects from * each long submodel that are to be used in the association structure * @param t_i Integer specifying the index of the grouping factor that * corresponds to the patient-level * @param M An integer specifying the number of longitudinal submodels * @return A matrix with the desired random effects represented * in columns, and the individuals on the rows; the matrix is * repeated (qnodes + 1) times (bounded by rows) */ matrix make_x_assoc_shared_coef( vector b, vector beta, array[] int KM, int M, int t_i, array[] int l, array[] int p, array[,] int pmat, int Npat, int qnodes, int sum_size_which_coef, array[] int size_which_coef, array[] int which_coef_zindex, array[] int which_coef_xindex, array[] int has_intercept, array[] int has_intercept_nob, array[] int has_intercept_lob, array[] int has_intercept_upb, array[] real gamma_nob, array[] real gamma_lob, array[] real gamma_upb) { // in the loops below: // t_i should only really ever equal 1 (since shared_coef association // structure is not allowed if there is more than one clustering level) // i = levels (ie, individuals) // j = indices of the shared random effecs // m = models int t_shift; // skip over group-level coefficients for earlier grouping factors int start_store; int end_store; matrix[Npat,sum_size_which_coef] temp; matrix[(Npat*(qnodes+1)),sum_size_which_coef] x_assoc_shared_coef; if (t_i == 1) t_shift = 0; else t_shift = sum(l[1:(t_i-1)]); for (i in 1:Npat) { int mark; // counter for looping over shared coefficients int i_shift; // skip over group-level coefficients for earlier levels mark = 1; i_shift = (i - 1) * p[t_i]; for (m in 1:M) { if (size_which_coef[m] > 0) { // if model has shared coefficients int j_shift; // skip over elements of which_coef_zindex vector that are associated with earlier submodels int m_shift; // skip over individual i's group-level coefficients for earlier submodels int shift_nb; int shift_lb; int shift_ub; int shift_beta; if (m == 1) { j_shift = 0; m_shift = 0; shift_nb = 0; shift_lb = 0; shift_ub = 0; shift_beta = 0; } else { j_shift = sum(size_which_coef[1:(m-1)]); m_shift = sum(pmat[t_i, 1:(m-1)]); shift_nb = sum(has_intercept_nob[1:(m-1)]); shift_lb = sum(has_intercept_lob[1:(m-1)]); shift_ub = sum(has_intercept_upb[1:(m-1)]); shift_beta = sum(KM[1:(m-1)]); } for (j in 1:size_which_coef[m]) { int b_collect; // group-level coefficients to extract for current i, j, m int beta_collect_m; // within-submodel index of fixed effect coefficient to extract int beta_collect; // overall index of fixed effect coefficient to extract real coef; b_collect = t_shift + i_shift + m_shift + which_coef_zindex[(j_shift + j)]; beta_collect_m = which_coef_xindex[(j_shift + j)]; beta_collect = shift_beta + beta_collect_m; coef = b[b_collect]; // start with group-level coefficient if ((has_intercept[m] == 1) && (beta_collect == 1)) { // collect intercept if (has_intercept_nob[m] == 1) coef += gamma_nob[sum(has_intercept_nob[1:m])]; else if (has_intercept_lob[m] == 1) coef += gamma_lob[sum(has_intercept_lob[1:m])]; else if (has_intercept_upb[m] == 1) coef += gamma_upb[sum(has_intercept_upb[1:m])]; } else if (has_intercept[m] == 1) { // collect fixed effect whilst recognising intercept term // isn't in beta and correcting for that in the indexing coef += beta[(beta_collect - 1)]; } else coef += beta[beta_collect]; temp[i, mark] = coef; mark += 1; // move to next shared coefficient for individual i } } } } // repeat the temp matrix qnodes times (ie, rbind) for (i in 1:(qnodes+1)) { start_store = (i - 1) * Npat + 1; end_store = i * Npat; x_assoc_shared_coef[start_store:end_store, ] = temp; } return x_assoc_shared_coef; } rstanarm/src/stan_files/functions/continuous_likelihoods.stan0000644000176200001440000001750613722762571024537 0ustar liggesusers /** * Apply inverse link function to linear predictor * * @param eta Linear predictor vector * @param link An integer indicating the link function * @return A vector, i.e. inverse-link(eta) */ vector linkinv_gauss(vector eta, int link) { if (link == 1) return eta; else if (link == 2) return exp(eta); else if (link == 3) return inv(eta); else reject("Invalid link"); return eta; // never reached } /** * Pointwise (pw) log-likelihood vector * * @param y A vector corresponding to the outcome variable. * @param link An integer indicating the link function * @return A vector */ vector pw_gauss(vector y, vector eta, real sigma, int link) { return -0.5 * log(6.283185307179586232 * sigma) - 0.5 * square((y - linkinv_gauss(eta, link)) / sigma); } /** * Apply inverse link function to linear predictor * * @param eta Linear predictor vector * @param link An integer indicating the link function * @return A vector, i.e. inverse-link(eta) */ vector linkinv_gamma(vector eta, int link) { if (link == 1) return eta; else if (link == 2) return exp(eta); else if (link == 3) return inv(eta); else reject("Invalid link"); return eta; // never reached } /** * Pointwise (pw) log-likelihood vector * * @param y A vector corresponding to the outcome variable. * @param eta A vector of linear predictors * @param shape A real number for the shape parameter * @param link An integer indicating the link function * @param sum_log_y A scalar equal to the sum of log(y) * @return A scalar log-likelihood */ real GammaReg(vector y, vector eta, real shape, int link, real sum_log_y) { real ret = rows(y) * (shape * log(shape) - lgamma(shape)) + (shape - 1) * sum_log_y; if (link == 2) // link is log ret -= shape * sum(eta) + shape * sum(y ./ exp(eta)); else if (link == 1) // link is identity ret -= shape * sum(log(eta)) + shape * sum(y ./ eta); else if (link == 3) // link is inverse ret += shape * sum(log(eta)) - shape * dot_product(eta, y); else reject("Invalid link"); return ret; } /** * Pointwise (pw) log-likelihood vector * * @param y A vector corresponding to the outcome variable. * @param shape A real number for the shape parameter * @param link An integer indicating the link function * @return A vector */ vector pw_gamma(vector y, vector eta, real shape, int link) { int N = rows(eta); vector[N] ll; if (link == 3) { // link = inverse for (n in 1:N) { ll[n] = gamma_lpdf(y[n] | shape, shape * eta[n]); } } else if (link == 2) { // link = log for (n in 1:N) { ll[n] = gamma_lpdf(y[n] | shape, shape / exp(eta[n])); } } else if (link == 1) { // link = identity for (n in 1:N) { ll[n] = gamma_lpdf(y[n] | shape, shape / eta[n]); } } else reject("Invalid link"); return ll; } /** * Apply inverse link function to linear predictor * * @param eta Linear predictor vector * @param link An integer indicating the link function * @return A vector, i.e. inverse-link(eta) */ vector linkinv_inv_gaussian(vector eta, int link) { if (link == 1) return eta; else if (link == 2) return exp(eta); else if (link == 3) return inv(eta); else if (link == 4) return inv_sqrt(eta); else reject("Invalid link"); return eta; // never reached } /** * inverse Gaussian log-PDF * * @param y The vector of outcomes * @param mu The vector of conditional means * @param lambda A positive scalar dispersion parameter * @param sum_log_y A scalar equal to the sum of log(y) * @param sqrt_y A vector equal to sqrt(y) * @return A scalar */ real inv_gaussian(vector y, vector mu, real lambda, real sum_log_y, vector sqrt_y) { return 0.5 * rows(y) * log(lambda / 6.283185307179586232) - 1.5 * sum_log_y - 0.5 * lambda * dot_self( (y - mu) ./ (mu .* sqrt_y) ); } /** * Pointwise (pw) log-likelihood vector * * @param y A vector corresponding to the outcome variable. * @param eta The linear predictors * @param lamba A positive scalar dispersion parameter * @param link An integer indicating the link function * @param log_y A precalculated vector of the log of y * @param sqrt_y A precalculated vector of the square root of y * @return A vector of log-likelihoods */ vector pw_inv_gaussian(vector y, vector eta, real lambda, int link, vector log_y, vector sqrt_y) { vector[rows(y)] mu = linkinv_inv_gaussian(eta, link); // link checked return -0.5 * lambda * square( (y - mu) ./ (mu .* sqrt_y) ) + 0.5 * log(lambda / 6.283185307179586232) - 1.5 * log_y; } /** * PRNG for the inverse Gaussian distribution * * Algorithm from wikipedia * * @param mu The expectation * @param lambda The dispersion * @return A draw from the inverse Gaussian distribution */ real inv_gaussian_rng(real mu, real lambda) { real mu2 = square(mu); real z = uniform_rng(0,1); real y = square(normal_rng(0,1)); real x = mu + ( mu2 * y - mu * sqrt(4 * mu * lambda * y + mu2 * square(y)) ) / (2 * lambda); if (z <= (mu / (mu + x))) return x; else return mu2 / x; } /** * Apply inverse link function to linear predictor for beta models * * @param eta Linear predictor vector * @param link An integer indicating the link function * @return A vector, i.e. inverse-link(eta) */ vector linkinv_beta(vector eta, int link) { if (link == 1) return inv_logit(eta); // logit else if (link == 2) return Phi(eta); // probit else if (link == 3) return inv_cloglog(eta); // cloglog else if (link == 4) return 0.5 + atan(eta) / pi(); // cauchy else if (link == 5) return exp(eta); // log else if (link == 6) return 1 - inv_cloglog(-eta); // loglog else reject("invalid link"); return eta; // never reached } /** * Apply inverse link function to linear predictor for dispersion for beta models * * @param eta Linear predictor vector * @param link An integer indicating the link function * @return A vector, i.e. inverse-link(eta) */ vector linkinv_beta_z(vector eta, int link) { if (link == 1) return exp(eta); // log else if (link == 2) return eta; // identity else if (link == 3) return square(eta); // sqrt else reject("Invalid link"); return eta; // never reached } /** * Pointwise (pw) log-likelihood vector for beta models * * @param y The vector of outcomes * @param eta The linear predictors * @param dispersion Positive dispersion parameter * @param link An integer indicating the link function * @return A vector of log-likelihoods */ vector pw_beta(vector y, vector eta, real dispersion, int link) { vector[rows(y)] ll; vector[rows(y)] mu = linkinv_beta(eta, link); // link checked for (n in 1:rows(y)) { ll[n] = beta_lpdf(y[n] | mu[n] * dispersion, (1 - mu[n]) * dispersion); } return ll; } /** * Pointwise (pw) log-likelihood vector for beta models with z variables * * @param y The vector of outcomes * @param eta The linear predictors (for y) * @param eta_z The linear predictors (for dispersion) * @param link An integer indicating the link function passed to linkinv_beta * @param link_phi An integer indicating the link function passed to linkinv_beta_z * @return A vector of log-likelihoods */ vector pw_beta_z(vector y, vector eta, vector eta_z, int link, int link_phi) { vector[rows(y)] ll; vector[rows(y)] mu = linkinv_beta(eta, link); // link checked vector[rows(y)] mu_z = linkinv_beta_z(eta_z, link_phi); // link checked for (n in 1:rows(y)) { ll[n] = beta_lpdf(y[n] | mu[n] * mu_z[n], (1-mu[n]) * mu_z[n]); } return ll; } rstanarm/src/stan_files/functions/mvmer_functions.stan0000644000176200001440000003732614500256225023150 0ustar liggesusers /** * Return the required number of local hs parameters * * @param prior_dist An integer indicating the prior distribution * @return An integer */ int get_nvars_for_hs(int prior_dist) { int hs = 0; if (prior_dist == 3) hs = 2; else if (prior_dist == 4) hs = 4; return hs; } /** * Return the lower/upper bound for the specified intercept type * * @param intercept_type An integer specifying the type of intercept; * 0=no intercept, 1=unbounded, 2=lower bounded, 3=upper bounded * @return A real, corresponding to the lower bound */ real lb(int intercept_type) { return intercept_type == 2 ? 0 : negative_infinity(); } real ub(int intercept_type) { return intercept_type == 3 ? 0 : positive_infinity(); } /** * Get the indices corresponding to the lower tri of a square matrix * * @param dim The number of rows in the square matrix * @return A vector of indices */ array[] int lower_tri_indices(int dim) { array[dim + choose(dim, 2)] int indices; int mark = 1; for (r in 1:dim) { for (c in r:dim) { indices[mark] = (r - 1) * dim + c; mark += 1; } } return indices; } /** * Scale the auxiliary parameter based on prior information * * @param aux_unscaled A real, the unscaled auxiliary parameter * @param prior_dist Integer, the type of prior distribution * @param prior_mean,prior_scale Real scalars, the mean and scale * of the prior distribution * @return A real, corresponding to the scaled auxiliary parameter */ real make_aux(real aux_unscaled, int prior_dist, real prior_mean, real prior_scale) { real aux; if (prior_dist == 0) // none aux = aux_unscaled; else { aux = prior_scale * aux_unscaled; if (prior_dist <= 2) // normal or student_t aux += prior_mean; } return aux; } /** * Scale the primitive population level parameters based on prior information * * @param z_beta A vector of primitive parameters * @param prior_dist Integer, the type of prior distribution * @param prior_mean,prior_scale Vectors of mean and scale parameters * for the prior distributions * @return A vector containing the population level parameters (coefficients) */ vector make_beta(vector z_beta, int prior_dist, vector prior_mean, vector prior_scale, vector prior_df, real global_prior_scale, array[] real global, array[] vector local, array[] real ool, array[] vector mix, array[] real aux, int family, real slab_scale, array[] real caux) { vector[rows(z_beta)] beta; if (prior_dist == 0) beta = z_beta; else if (prior_dist == 1) beta = z_beta .* prior_scale + prior_mean; else if (prior_dist == 2) for (k in 1:rows(prior_mean)) { beta[k] = CFt(z_beta[k], prior_df[k]) * prior_scale[k] + prior_mean[k]; } else if (prior_dist == 3) { real c2 = square(slab_scale) * caux[1]; if (family == 1) // don't need is_continuous since family == 1 is gaussian in mvmer beta = hs_prior(z_beta, global, local, global_prior_scale, aux[1], c2); else beta = hs_prior(z_beta, global, local, global_prior_scale, 1, c2); } else if (prior_dist == 4) { real c2 = square(slab_scale) * caux[1]; if (family == 1) // don't need is_continuous since family == 1 is gaussian in mvmer beta = hsplus_prior(z_beta, global, local, global_prior_scale, aux[1], c2); else beta = hsplus_prior(z_beta, global, local, global_prior_scale, 1, c2); } else if (prior_dist == 5) // laplace beta = prior_mean + prior_scale .* sqrt(2 * mix[1]) .* z_beta; else if (prior_dist == 6) // lasso beta = prior_mean + ool[1] * prior_scale .* sqrt(2 * mix[1]) .* z_beta; return beta; } /** * Create group-specific coefficients, see section 2 of * https://cran.r-project.org/web/packages/lme4/vignettes/lmer.pdf * * @param z_b Vector whose elements are iid normal(0,sigma) a priori * @param theta Vector with covariance parameters as defined in lme4 * @param p An integer array with the number variables on the LHS of each | * @param l An integer array with the number of levels for the factor(s) on * the RHS of each | * @param i The index of the grouping factor for which you want to return * the group-specific coefficients for * @return An array of group-specific coefficients for grouping factor i */ matrix make_b_matrix(vector z_b, vector theta_L, array[] int p, array[] int l, int i) { matrix[p[i],l[i]] b_matrix; int nc = p[i]; int b_mark = 1; int theta_L_mark = 1; if (i > 1) { for (j in 1:(i-1)) { theta_L_mark += p[j] + choose(p[j], 2); b_mark += p[j] * l[j]; } } if (nc == 1) { real theta_L_start = theta_L[theta_L_mark]; for (s in b_mark:(b_mark + l[i] - 1)) b_matrix[nc,s] = theta_L_start * z_b[s]; } else { matrix[nc,nc] T_i = rep_matrix(0, nc, nc); for (c in 1:nc) { T_i[c,c] = theta_L[theta_L_mark]; theta_L_mark += 1; for(r in (c+1):nc) { T_i[r,c] = theta_L[theta_L_mark]; theta_L_mark += 1; } } for (j in 1:l[i]) { vector[nc] temp = T_i * segment(z_b, b_mark, nc); b_matrix[,j] = temp; b_mark += nc; } } return b_matrix'; } /** * Evaluate the linear predictor for the glmer submodel * * @param X Design matrix for fe * @param Z1 Design matrix for re, for first grouping factor * @param Z2 Design matrix for re, for second grouping factor * @param Z1_id Group indexing for Z1 * @param Z2_id Group indexing for Z2 * @param gamma The intercept parameter * @param beta Vector of population level parameters * @param b1Mat Matrix of group level params for first grouping factor * @param b2Mat Matrix of group level params for second grouping factor * @param b1Mat_colshift,b2Mat_colshift Number of columns in b1Mat/b2Mat * that correpond to group level params from prior glmer submodels * @param intercept_type The type of intercept parameter (0 = none, * 1 = unbounded, 2 = lower bound, 3 = upper bound) * @return A vector containing the linear predictor for the glmer submodel */ vector evaluate_eta(matrix X, array[] vector Z1, array[] vector Z2, array[] int Z1_id, array[] int Z2_id, array[] real gamma, vector beta, matrix b1Mat, matrix b2Mat, int b1Mat_colshift, int b2Mat_colshift, int intercept_type, vector Ti) { int N = rows(X); // num rows in design matrix int K = rows(beta); // num predictors int p1 = size(Z1); // num group level params for group factor 1 int p2 = size(Z2); // num group level params for group factor 2 vector[N] eta; if (K > 0) eta = X * beta; else eta = rep_vector(0.0, N); if (intercept_type > 0) { // submodel has an intercept if (intercept_type == 1) eta += gamma[1]; else if (intercept_type == 2) eta += gamma[1] - max(eta); else if (intercept_type == 3) eta += gamma[1] - min(eta); } if (p1 > 0) { // submodel includes group factor 1 for (k in 1:p1) for (n in 1:N) eta[n] += (b1Mat[Z1_id[n], k+b1Mat_colshift]) * Z1[k,n]; } if (p2 > 0) { // submodel includes group factor 2 for (k in 1:p2) for (n in 1:N) eta[n] += (b2Mat[Z2_id[n], k+b2Mat_colshift]) * Z2[k,n]; } if (rows(Ti) > 0) eta = eta + Ti; // add offset value return eta; } /** * Evaluate mu based on eta, family and link * * @param eta Vector of linear predictors * @param family An integer indicating the family * @param link An integer indicating the link function (differs by family) * @return A vector */ vector evaluate_mu(vector eta, int family, int link) { vector[rows(eta)] mu; if (family == 1) mu = linkinv_gauss(eta, link); else if (family == 2) mu = linkinv_gamma(eta, link); else if (family == 3) mu = linkinv_inv_gaussian(eta, link); else if (family == 4) mu = linkinv_bern(eta, link); else if (family == 5) mu = linkinv_binom(eta, link); else if (family == 6 || family == 7 || family == 8) mu = linkinv_count(eta, link); return mu; } /** * Increment the target with the log-likelihood for the glmer submodel * * @param z_beta A vector of primitive parameters * @param prior_dist Integer, the type of prior distribution * @param prior_mean,prior_scale Vectors of mean and scale parameters * for the prior distributions * @return A vector containing the population level parameters (coefficients) */ real glm_lpdf(vector y_real, array[] int y_integer, vector eta, array[] real aux, int family, int link, real sum_log_y, vector sqrt_y, vector log_y) { real lp = 0; if (family == 1) { // gaussian if (link == 1) lp += normal_lpdf(y_real | eta, aux[1]); else if (link == 2) lp += lognormal_lpdf(y_real | eta, aux[1]); else lp += normal_lpdf(y_real | inv(eta), aux[1]); } else if (family == 2) { // gamma lp += GammaReg(y_real, eta, aux[1], link, sum_log_y); } else if (family == 3) { // inverse gaussian lp += inv_gaussian(y_real, linkinv_inv_gaussian(eta, link), aux[1], sum_log_y, sqrt_y); } else if (family == 4) { // bernoulli if (link == 1) lp += bernoulli_logit_lpmf(y_integer | eta); else lp += bernoulli_lpmf(y_integer | linkinv_bern(eta, link)); } else if (family == 5) { // binomial reject("Binomial with >1 trials not allowed."); } else if (family == 6 || family == 8) { // poisson or poisson-gamma if (link == 1) lp += poisson_log_lpmf(y_integer | eta); else lp += poisson_lpmf(y_integer | linkinv_count(eta, link)); } else if (family == 7) { // negative binomial if (link == 1) lp += neg_binomial_2_log_lpmf(y_integer | eta, aux[1]); else lp += neg_binomial_2_lpmf(y_integer | linkinv_count(eta, link), aux[1]); } else reject("Invalid family."); return lp; } /** * Log-prior for coefficients * * @param z_beta Vector of primative coefficients * @param prior_dist Integer, the type of prior distribution * @param prior_scale Real, scale for the prior distribution * @param prior_df Real, df for the prior distribution * @param global_prior_df Real, df for the prior for the global hs parameter * @param local Vector of hs local parameters * @param global Real, the global parameter * @param mix Vector of shrinkage parameters * @param one_over_lambda Real * @return nothing */ real beta_custom_lpdf(vector z_beta, int prior_dist, vector prior_scale, vector prior_df, real global_prior_df, array[] vector local, array[] real global, array[] vector mix, array[] real one_over_lambda, real slab_df, array[] real caux) { real lp = 0; if (prior_dist == 1) lp += normal_lpdf(z_beta | 0, 1); else if (prior_dist == 2) lp += normal_lpdf(z_beta | 0, 1); // Student t else if (prior_dist == 3) { // hs lp += normal_lpdf(z_beta | 0, 1); lp += normal_lpdf(local[1] | 0, 1); lp += inv_gamma_lpdf(local[2] | 0.5 * prior_df, 0.5 * prior_df); lp += normal_lpdf(global[1] | 0, 1); lp += inv_gamma_lpdf(global[2] | 0.5 * global_prior_df, 0.5 * global_prior_df); lp += inv_gamma_lpdf(caux | 0.5 * slab_df, 0.5 * slab_df); } else if (prior_dist == 4) { // hs+ lp += normal_lpdf(z_beta | 0, 1); lp += normal_lpdf(local[1] | 0, 1); lp += inv_gamma_lpdf(local[2] | 0.5 * prior_df, 0.5 * prior_df); lp += normal_lpdf(local[3] | 0, 1); // unorthodox useage of prior_scale as another df hyperparameter lp += inv_gamma_lpdf(local[4] | 0.5 * prior_scale, 0.5 * prior_scale); lp += normal_lpdf(global[1] | 0, 1); lp += inv_gamma_lpdf(global[2] | 0.5 * global_prior_df, 0.5 * global_prior_df); lp += inv_gamma_lpdf(caux | 0.5 * slab_df, 0.5 * slab_df); } else if (prior_dist == 5) { // laplace lp += normal_lpdf(z_beta | 0, 1); lp += exponential_lpdf(mix[1] | 1); } else if (prior_dist == 6) { // lasso lp += normal_lpdf(z_beta | 0, 1); lp += exponential_lpdf(mix[1] | 1); lp += chi_square_lpdf(one_over_lambda[1] | prior_df[1]); } else if (prior_dist == 7) { // product_normal lp += normal_lpdf(z_beta | 0, 1); } return lp; /* else prior_dist is 0 and nothing is added */ } /** * Log-prior for intercept parameters * * @param gamma Real, the intercept parameter * @param dist Integer, the type of prior distribution * @param mean_ Real, mean of prior distribution * @param scale Real, scale for the prior distribution * @param df Real, df for the prior distribution * @return nothing */ real gamma_custom_lpdf(real gamma, int dist, real mean_, real scale, real df) { real lp = 0; if (dist == 1) // normal lp += normal_lpdf(gamma | mean_, scale); else if (dist == 2) // student_t lp += student_t_lpdf(gamma | df, mean_, scale); /* else dist is 0 and nothing is added */ return lp; } /** * Log-prior for auxiliary parameters * * @param aux_unscaled Vector (potentially of length 1) of unscaled * auxiliary parameter(s) * @param dist Integer specifying the type of prior distribution * @param scale Real specifying the scale for the prior distribution * @param df Real specifying the df for the prior distribution * @return nothing */ real aux_lpdf(real aux_unscaled, int dist, real scale, real df) { real lp = 0; if (dist > 0 && scale > 0) { if (dist == 1) lp += normal_lpdf(aux_unscaled | 0, 1); else if (dist == 2) lp += student_t_lpdf(aux_unscaled | df, 0, 1); else lp += exponential_lpdf(aux_unscaled | 1); } return lp; } /** * Evaluate the mean of the posterior predictive distribution * * @param mu Vector containing the mean of the posterior predictive * distribution for each observation (ie. the linear predictor after * applying the inverse link function). * @param real The auxiliary parameter for the glmer submodel. This will be * an empty array if the submodel does not have an auxiliary parameter * @param family An integer specifying the family * @return A real, the mean of the posterior predictive distribution */ real mean_PPD_rng(vector mu, array[] real aux, int family) { int N = rows(mu); real mean_PPD = 0; if (family == 1) { // gaussian for (n in 1:N) mean_PPD += normal_rng(mu[n], aux[1]); } else if (family == 2) { // gamma for (n in 1:N) mean_PPD += gamma_rng(aux[1], aux[1] / mu[n]); } else if (family == 3) { // inverse gaussian for (n in 1:N) mean_PPD += inv_gaussian_rng(mu[n], aux[1]); } else if (family == 4) { // bernoulli for (n in 1:N) mean_PPD += bernoulli_rng(mu[n]); } else if (family == 5) { // binomial reject("Binomial with >1 trials not allowed."); } else if (family == 6 || family == 8) { real poisson_max = pow(2.0, 30.0); for (n in 1:N) { // poisson or poisson-gamma if (mu[n] < poisson_max) mean_PPD += poisson_rng(mu[n]); else mean_PPD += normal_rng(mu[n], sqrt(mu[n])); } } else if (family == 7) { real poisson_max = pow(2.0, 30.0); for (n in 1:N) { // negative binomial real gamma_temp; if (is_inf(aux[1])) gamma_temp = mu[n]; else gamma_temp = gamma_rng(aux[1], aux[1] / mu[n]); if (gamma_temp < poisson_max) mean_PPD += poisson_rng(gamma_temp); else mean_PPD += normal_rng(gamma_temp, sqrt(gamma_temp)); } } mean_PPD /= N; return mean_PPD; } rstanarm/src/stan_files/functions/binomial_likelihoods.stan0000644000176200001440000000453714500256225024110 0ustar liggesusers /** * Apply inverse link function to linear predictor * * @param eta Linear predictor vector * @param link An integer indicating the link function * @return A vector, i.e. inverse-link(eta) */ vector linkinv_binom(vector eta, int link) { if (link == 1) return(inv_logit(eta)); // logit else if (link == 2) return(Phi(eta)); // probit else if (link == 3) return(atan(eta) / pi() + 0.5); // cauchit else if (link == 4) return(exp(eta)); // log else if (link == 5) return(inv_cloglog(eta)); // cloglog else reject("Invalid link"); return eta; // never reached } /** * Increment with the unweighted log-likelihood * @param y An integer array indicating the number of successes * @param trials An integer array indicating the number of trials * @param eta A vector of linear predictors * @param link An integer indicating the link function * @return lp__ */ real binom_lpmf(array[] int y, array[] int trials, vector eta, int link) { real lp = 0; if (link == 1) lp += binomial_logit_lpmf(y | trials, eta); else if (link < 4) lp += binomial_lpmf( y | trials, linkinv_binom(eta, link)); else if (link == 4) { // log for (n in 1:num_elements(y)) { lp += y[n] * eta[n]; lp += (trials[n] - y[n]) * log1m_exp(eta[n]); lp += lchoose(trials[n], y[n]); } } else if (link == 5) { // cloglog for (n in 1:num_elements(y)) { real neg_exp_eta = -exp(eta[n]); lp += y[n] * log1m_exp(neg_exp_eta); lp += (trials[n] - y[n]) * neg_exp_eta; lp += lchoose(trials[n], y[n]); } } else reject("Invalid link"); return lp; } /** * Pointwise (pw) log-likelihood vector * * @param y The integer array corresponding to the outcome variable. * @param link An integer indicating the link function * @return A vector */ vector pw_binom(array[] int y, array[] int trials, vector eta, int link) { int N = rows(eta); vector[N] ll; if (link == 1) { // logit for (n in 1:N) ll[n] = binomial_logit_lpmf(y[n] | trials[n], eta[n]); } else if (link <= 5) { // link = probit, cauchit, log, or cloglog vector[N] pi = linkinv_binom(eta, link); // may be unstable for (n in 1:N) ll[n] = binomial_lpmf(y[n] | trials[n], pi[n]) ; } else reject("Invalid link"); return ll; } rstanarm/src/stan_files/functions/common_functions.stan0000644000176200001440000002412314500256225023301 0ustar liggesusers /* for multiple .stan files */ /** * Create group-specific block-diagonal Cholesky factor, see section 2 of * https://cran.r-project.org/web/packages/lme4/vignettes/lmer.pdf * @param len_theta_L An integer indicating the length of returned vector, * which lme4 denotes as m * @param p An integer array with the number variables on the LHS of each | * @param dispersion Scalar standard deviation of the errors, calles sigma by lme4 * @param tau Vector of scale parameters whose squares are proportional to the * traces of the relative covariance matrices of the group-specific terms * @param scale Vector of prior scales that are multiplied by elements of tau * @param zeta Vector of positive parameters that are normalized into simplexes * and multiplied by the trace of the covariance matrix to produce variances * @param rho Vector of radii in the onion method for creating Cholesky factors * @param z_T Vector used in the onion method for creating Cholesky factors * @return A vector that corresponds to theta in lme4 */ vector make_theta_L(int len_theta_L, array[] int p, real dispersion, vector tau, vector scale, vector zeta, vector rho, vector z_T) { vector[len_theta_L] theta_L; int zeta_mark = 1; int rho_mark = 1; int z_T_mark = 1; int theta_L_mark = 1; // each of these is a diagonal block of the implicit Cholesky factor for (i in 1:size(p)) { int nc = p[i]; if (nc == 1) { // "block" is just a standard deviation theta_L[theta_L_mark] = tau[i] * scale[i] * dispersion; // unlike lme4, theta[theta_L_mark] includes the dispersion term in it theta_L_mark += 1; } else { // block is lower-triangular matrix[nc,nc] T_i; real std_dev; real T21; real trace_T_i = square(tau[i] * scale[i] * dispersion) * nc; vector[nc] pi = segment(zeta, zeta_mark, nc); // gamma(zeta | shape, 1) pi /= sum(pi); // thus dirichlet(pi | shape) // unlike lme4, T_i includes the dispersion term in it zeta_mark += nc; std_dev = sqrt(pi[1] * trace_T_i); T_i[1,1] = std_dev; // Put a correlation into T_i[2,1] and scale by std_dev std_dev = sqrt(pi[2] * trace_T_i); T21 = 2.0 * rho[rho_mark] - 1.0; rho_mark += 1; T_i[2,2] = std_dev * sqrt(1.0 - square(T21)); T_i[2,1] = std_dev * T21; for (r in 2:(nc - 1)) { // scaled onion method to fill T_i int rp1 = r + 1; vector[r] T_row = segment(z_T, z_T_mark, r); real scale_factor = sqrt(rho[rho_mark] / dot_self(T_row)) * std_dev; z_T_mark += r; std_dev = sqrt(pi[rp1] * trace_T_i); for(c in 1:r) T_i[rp1,c] = T_row[c] * scale_factor; T_i[rp1,rp1] = sqrt(1.0 - rho[rho_mark]) * std_dev; rho_mark += 1; } // now vech T_i for (c in 1:nc) for (r in c:nc) { theta_L[theta_L_mark] = T_i[r,c]; theta_L_mark += 1; } } } return theta_L; } /** * Create group-specific coefficients, see section 2 of * https://cran.r-project.org/web/packages/lme4/vignettes/lmer.pdf * * @param z_b Vector whose elements are iid normal(0,sigma) a priori * @param theta Vector with covariance parameters as defined in lme4 * @param p An integer array with the number variables on the LHS of each | * @param l An integer array with the number of levels for the factor(s) on * the RHS of each | * @return A vector of group-specific coefficients */ vector make_b(vector z_b, vector theta_L, array[] int p, array[] int l) { vector[rows(z_b)] b; int b_mark = 1; int theta_L_mark = 1; for (i in 1:size(p)) { int nc = p[i]; if (nc == 1) { real theta_L_start = theta_L[theta_L_mark]; for (s in b_mark:(b_mark + l[i] - 1)) b[s] = theta_L_start * z_b[s]; b_mark += l[i]; theta_L_mark += 1; } else { matrix[nc,nc] T_i = rep_matrix(0, nc, nc); for (c in 1:nc) { T_i[c,c] = theta_L[theta_L_mark]; theta_L_mark += 1; for(r in (c+1):nc) { T_i[r,c] = theta_L[theta_L_mark]; theta_L_mark += 1; } } for (j in 1:l[i]) { vector[nc] temp = T_i * segment(z_b, b_mark, nc); b_mark -= 1; for (s in 1:nc) b[b_mark + s] = temp[s]; b_mark += nc + 1; } } } return b; } /** * Prior on group-specific parameters * * @param z_b A vector of primitive coefficients * @param z_T A vector of primitives for the unit vectors in the onion method * @param rho A vector radii for the onion method * @param zeta A vector of primitives for the simplexes * @param tau A vector of scale parameters * @param regularization A real array of LKJ hyperparameters * @param delta A real array of concentration paramters * @param shape A vector of shape parameters * @param t An integer indicating the number of group-specific terms * @param p An integer array with the number variables on the LHS of each | * @return target() */ real decov_lpdf(vector z_b, vector z_T, vector rho, vector zeta, vector tau, array[] real regularization, array[] real delta, vector shape, int t, array[] int p) { real lp = 0; int pos_reg = 1; int pos_rho = 1; lp += normal_lpdf(z_b | 0, 1); lp += normal_lpdf(z_T | 0, 1); for (i in 1:t) if (p[i] > 1) { vector[p[i] - 1] shape1; vector[p[i] - 1] shape2; real nu = regularization[pos_reg] + 0.5 * (p[i] - 2); pos_reg += 1; shape1[1] = nu; shape2[1] = nu; for (j in 2:(p[i]-1)) { nu -= 0.5; shape1[j] = 0.5 * j; shape2[j] = nu; } lp += beta_lpdf(rho[pos_rho:(pos_rho + p[i] - 2)] | shape1, shape2); pos_rho += p[i] - 1; } lp += gamma_lpdf(zeta | delta, 1); lp += gamma_lpdf(tau | shape, 1); return lp; } /** * Hierarchical shrinkage parameterization * * @param z_beta A vector of primitive coefficients * @param global A real array of positive numbers * @param local A vector array of positive numbers * @param global_prior_scale A positive real number * @param error_scale 1 or sigma in the Gaussian case * @param c2 A positive real number * @return A vector of coefficientes */ vector hs_prior(vector z_beta, array[] real global, array[] vector local, real global_prior_scale, real error_scale, real c2) { int K = rows(z_beta); vector[K] lambda = local[1] .* sqrt(local[2]); real tau = global[1] * sqrt(global[2]) * global_prior_scale * error_scale; vector[K] lambda2 = square(lambda); vector[K] lambda_tilde = sqrt( c2 * lambda2 ./ (c2 + square(tau) * lambda2) ); return z_beta .* lambda_tilde * tau; } /** * Hierarchical shrinkage plus parameterization * * @param z_beta A vector of primitive coefficients * @param global A real array of positive numbers * @param local A vector array of positive numbers * @param global_prior_scale A positive real number * @param error_scale 1 or sigma in the Gaussian case * @param c2 A positive real number * @return A vector of coefficientes */ vector hsplus_prior(vector z_beta, array[] real global, array[] vector local, real global_prior_scale, real error_scale, real c2) { int K = rows(z_beta); vector[K] lambda = local[1] .* sqrt(local[2]); vector[K] eta = local[3] .* sqrt(local[4]); real tau = global[1] * sqrt(global[2]) * global_prior_scale * error_scale; vector[K] lambda_eta2 = square(lambda .* eta); vector[K] lambda_tilde = sqrt( c2 * lambda_eta2 ./ ( c2 + square(tau) * lambda_eta2) ); return z_beta .* lambda_tilde * tau; } /** * Cornish-Fisher expansion for standard normal to Student t * * See result 26.7.5 of * https://people.math.sfu.ca/~cbm/aands/page_949.htm * * @param z A scalar distributed standard normal * @param df A scalar degrees of freedom * @return An (approximate) Student t variate with df degrees of freedom */ real CFt(real z, real df) { real z2 = square(z); real z3 = z2 * z; real z5 = z2 * z3; real z7 = z2 * z5; real z9 = z2 * z7; real df2 = square(df); real df3 = df2 * df; real df4 = df2 * df2; return z + (z3 + z) / (4 * df) + (5 * z5 + 16 * z3 + 3 * z) / (96 * df2) + (3 * z7 + 19 * z5 + 17 * z3 - 15 * z) / (384 * df3) + (79 * z9 + 776 * z7 + 1482 * z5 - 1920 * z3 - 945 * z) / (92160 * df4); } /** * Return two-dimensional array of group membership * * @param N An integer indicating the number of observations * @param t An integer indicating the number of grouping variables * @param v An integer array with the indices of group membership * @return An two-dimensional integer array of group membership */ array[,] int make_V(int N, int t, array[] int v) { array[t,N] int V; int pos = 1; if (t > 0) for (j in 1:N) for (i in 1:t) { V[i,j] = v[pos]; // + 1 pos += 1; } return V; } /** * Calculate lower bound on intercept * * @param family Integer family code * 1 = gaussian * 2 = gamma * 3 = inv-gaussian * 4 = beta * 5 = binomial * 6 = poisson * 7 = neg-binom * 8 = poisson w/ gamma noise (not currently used but in count.stan) * @param link Integer link code * @return real lower bound */ real make_lower(int family, int link) { if (family == 1) return negative_infinity(); // Gaussian if (family <= 3) { // Gamma or inverse Gaussian if (link == 2) return negative_infinity(); // log return 0; } return negative_infinity(); } /** * Calculate upper bound on intercept * * @param family Integer family code (see make_lower above for codes) * @param link Integer link code * @return real upper bound */ real make_upper(int family, int link) { if (family == 4 && link == 5) return 0; return positive_infinity(); } rstanarm/src/stan_files/lm.stan0000644000176200001440000001217614551535205016332 0ustar liggesusers#include /pre/Columbia_copyright.stan #include /pre/license.stan // GLM for a Gaussian outcome with no link function functions { /** * Increments the log-posterior with the logarithm of a multivariate normal * likelihood with a scalar standard deviation for all errors * Equivalent to normal_lpdf(y | intercept + Q * R * beta, sigma) but faster * @param theta vector of coefficients (excluding intercept), equal to R * beta * @param b precomputed vector of OLS coefficients (excluding intercept) in Q-space * @param intercept scalar (assuming columns of Q have mean zero) * @param ybar precomputed sample mean of the outcome * @param SSR positive precomputed value of the sum of squared OLS residuals * @param sigma positive scalar for the standard deviation of the errors * @param N integer equal to the number of observations */ real mvn_ols_qr_lpdf(vector theta, vector b, real intercept, real ybar, real SSR, real sigma, int N) { return -0.5 * (dot_self(theta - b) + N * square(intercept - ybar) + SSR) / square(sigma) - // 0.91... is log(sqrt(2 * pi())) N * (log(sigma) + 0.91893853320467267); } } data { int has_intercept; // 0 = no, 1 = yes int prior_dist_for_intercept; // 0 = none, 1 = normal real prior_scale_for_intercept; // 0 = by CLT real prior_mean_for_intercept; // expected value for alpha int prior_dist; // 0 = uniform for R^2, 1 = Beta(K/2,eta) int prior_PD; // 0 = no, 1 = yes to drawing from the prior real eta; // shape hyperparameter int J; // number of groups // the rest of these are indexed by group but should work even if J = 1 array[J] int N; // number of observations int K; // number of predictors array[J] vector[K] xbarR_inv; // vector of means of the predictors array[J] real ybar; // sample mean of outcome real center_y; // zero or sample mean of outcome array[J] real s_Y; // standard deviation of the outcome array[J] vector[K] Rb; // OLS coefficients array[J] real SSR; // OLS sum-of-squared residuals array[J] matrix[K, K] R_inv; // inverse R matrices } transformed data { real half_K = 0.5 * K; array[J] real sqrt_inv_N; array[J] real sqrt_Nm1; for (j in 1 : J) { sqrt_inv_N[j] = sqrt(1.0 / N[j]); sqrt_Nm1[j] = sqrt(N[j] - 1.0); } } parameters { // must not call with init="0" // https://github.com/stan-dev/rstanarm/issues/603#issuecomment-1785928224 array[K > 1 ? J : 0] unit_vector[K > 1 ? K : 2] u; // primitives for coefficients array[J * has_intercept] real z_alpha; // primitives for intercepts array[J] real 1 ? 0 : -1), upper=1> R2; // proportions of variance explained vector[J * (1 - prior_PD)] log_omega; // under/overfitting factors } transformed parameters { array[J * has_intercept] real alpha; // uncentered intercepts array[J] vector[K] theta; // coefficients in Q-space array[J] real sigma; // error standard deviations for (j in 1 : J) { // marginal standard deviation of outcome for group j real Delta_y = prior_PD == 0 ? s_Y[j] * exp(log_omega[j]) : 1; // coefficients in Q-space if (K > 1) theta[j] = u[j] * sqrt(R2[j]) * sqrt_Nm1[j] * Delta_y; else theta[j][1] = R2[j] * sqrt_Nm1[j] * Delta_y; sigma[j] = Delta_y * sqrt(1 - R2[j]); // standard deviation of errors if (has_intercept == 1) { if (prior_dist_for_intercept == 0) // no information alpha[j] = z_alpha[j]; else if (prior_scale_for_intercept == 0) // central limit theorem alpha[j] = z_alpha[j] * Delta_y * sqrt_inv_N[j] + prior_mean_for_intercept; else // arbitrary informative prior alpha[j] = z_alpha[j] * prior_scale_for_intercept + prior_mean_for_intercept; } } } model { if (prior_PD == 0) for (j in 1 : J) { // likelihood contribution for each group real shift = dot_product(xbarR_inv[j], theta[j]); target += mvn_ols_qr_lpdf(theta[j] | Rb[j], has_intercept == 1 ? alpha[j] + shift : shift, ybar[j], SSR[j], sigma[j], N[j]); // implicit: u[j] is uniform on the surface of a hypersphere } if (has_intercept == 1 && prior_dist_for_intercept > 0) target += normal_lpdf(z_alpha | 0, 1); if (prior_dist == 1) { if (K > 1) target += beta_lpdf(R2 | half_K, eta); else { // TODO(Andrew) remove once vectorised abs available in rstan array[J] real R2_abs; for (j in 1:J) { R2_abs[j] = abs(R2[j]); } target += beta_lpdf(square(R2) | half_K, eta) + sum(log(R2_abs)); } } // implicit: log_omega is uniform over the real line for all j } generated quantities { array[J] real mean_PPD; array[J] vector[K] beta; for (j in 1 : J) { real shift; shift = dot_product(xbarR_inv[j], theta[j]); mean_PPD[j] = normal_rng(has_intercept == 1 ? alpha[j] + shift : shift, sigma[j] * sqrt_inv_N[j]); beta[j] = R_inv[j] * theta[j]; } } rstanarm/src/stan_files/pre/0000755000176200001440000000000014214414633015607 5ustar liggesusersrstanarm/src/stan_files/pre/Columbia_copyright.stan0000644000176200001440000000015613340675562022334 0ustar liggesusers// This file is part of rstanarm. // Copyright (C) 2015, 2016 2017 Trustees of Columbia University rstanarm/src/stan_files/pre/Brilleman_copyright.stan0000644000176200001440000000005513340675562022504 0ustar liggesusers// Copyright (C) 2016, 2017 Sam Brilleman rstanarm/src/stan_files/pre/license.stan0000644000176200001440000000120614214414633020117 0ustar liggesusers/* rstanarm is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. rstanarm is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with rstanarm. If not, see . */ rstanarm/src/stan_files/model/0000755000176200001440000000000014414044166016123 5ustar liggesusersrstanarm/src/stan_files/model/make_eta_bern.stan0000644000176200001440000000207314370470372021573 0ustar liggesusers vector[N[1]] eta0; vector[N[2]] eta1; if (K > 0) { if (dense_X) { eta0 = N[1] > 0 ? X0[1] * beta : rep_vector(0.0, 0); eta1 = N[2] > 0 ? X1[1] * beta : rep_vector(0.0, 0); } else { eta0 = csr_matrix_times_vector(N[1], K, w_X0, v_X0, u_X0, beta); eta1 = csr_matrix_times_vector(N[2], K, w_X1, v_X1, u_X1, beta); } } else { eta0 = rep_vector(0.0, N[1]); eta1 = rep_vector(0.0, N[2]); } if (has_intercept == 0 && dense_X) { real tmp = dot_product(xbar, beta); if (N[1] > 0) eta0 += tmp; if (N[2] > 0) eta1 += tmp; } if (has_offset == 1) { if (N[1] > 0) eta0 += offset0; if (N[2] > 0) eta1 += offset1; } if (K_smooth) { if (N[1] > 0) eta0 += S0 * beta_smooth; if (N[2] > 0) eta1 += S1 * beta_smooth; } if (special_case) for (i in 1:t) { if (N[1] > 0) eta0 += b[V0[i]]; if (N[2] > 0) eta1 += b[V1[i]]; } else if (t > 0) { if (N[1] > 0) eta0 += csr_matrix_times_vector(N[1], q, w0, v0, u0, b); if (N[2] > 0) eta1 += csr_matrix_times_vector(N[2], q, w1, v1, u1, b); } rstanarm/src/stan_files/model/priors_glm.stan0000644000176200001440000000531614370470367021203 0ustar liggesusers // Log-priors for coefficients if (prior_dist == 1) target += normal_lpdf(z_beta | 0, 1); else if (prior_dist == 2) target += normal_lpdf(z_beta | 0, 1); // Student t via Cornish-Fisher expansion else if (prior_dist == 3) { // hs real log_half = -0.693147180559945286; target += normal_lpdf(z_beta | 0, 1); target += normal_lpdf(local[1] | 0, 1) - log_half; target += inv_gamma_lpdf(local[2] | 0.5 * prior_df, 0.5 * prior_df); target += normal_lpdf(global[1] | 0, 1) - log_half; target += inv_gamma_lpdf(global[2] | 0.5 * global_prior_df, 0.5 * global_prior_df); target += inv_gamma_lpdf(caux | 0.5 * slab_df, 0.5 * slab_df); } else if (prior_dist == 4) { // hs+ real log_half = -0.693147180559945286; target += normal_lpdf(z_beta | 0, 1); target += normal_lpdf(local[1] | 0, 1) - log_half; target += inv_gamma_lpdf(local[2] | 0.5 * prior_df, 0.5 * prior_df); target += normal_lpdf(local[3] | 0, 1) - log_half; // unorthodox useage of prior_scale as another df hyperparameter target += inv_gamma_lpdf(local[4] | 0.5 * prior_scale, 0.5 * prior_scale); target += normal_lpdf(global[1] | 0, 1) - log_half; target += inv_gamma_lpdf(global[2] | 0.5 * global_prior_df, 0.5 * global_prior_df); target += inv_gamma_lpdf(caux | 0.5 * slab_df, 0.5 * slab_df); } else if (prior_dist == 5) { // laplace target += normal_lpdf(z_beta | 0, 1); target += exponential_lpdf(mix[1] | 1); } else if (prior_dist == 6) { // lasso target += normal_lpdf(z_beta | 0, 1); target += exponential_lpdf(mix[1] | 1); target += chi_square_lpdf(one_over_lambda[1] | prior_df[1]); } else if (prior_dist == 7) { // product_normal target += normal_lpdf(z_beta | 0, 1); } /* else prior_dist is 0 and nothing is added */ // Log-prior for intercept if (has_intercept == 1) { if (prior_dist_for_intercept == 1) // normal target += normal_lpdf(gamma | prior_mean_for_intercept, prior_scale_for_intercept); else if (prior_dist_for_intercept == 2) // student_t target += student_t_lpdf(gamma | prior_df_for_intercept, prior_mean_for_intercept, prior_scale_for_intercept); /* else prior_dist is 0 and nothing is added */ } if (K_smooth) { target += normal_lpdf(z_beta_smooth | 0, 1); if (prior_dist_for_smooth > 0) { real log_half = -0.693147180559945286; if (prior_dist_for_smooth == 1) target += normal_lpdf(smooth_sd_raw | 0, 1) - log_half; else if (prior_dist_for_smooth == 2) target += student_t_lpdf(smooth_sd_raw | prior_df_for_smooth, 0, 1) - log_half; else if (prior_dist_for_smooth == 3) target += exponential_lpdf(smooth_sd_raw | 1); } } rstanarm/src/stan_files/model/assoc_evaluate.stan0000644000176200001440000003246314370470372022023 0ustar liggesusers // !!! Be careful that indexing of has_assoc matches stan_jm.fit !!! // mark tracks indexing within a_beta vector, which is the // vector of association parameters int mark = 0; // mark2 tracks indexing within a_K_data vector, which is the // vector specifying the number of columns used for each possible // type of association term by data interaction int mark2 = 0; // mark3 tracks indexing within size_which_interactions vector int mark3 = 0; for (m in 1:M) { //----- etavalue and any interactions mark2 += 1; if (has_assoc[1,m] == 1 || // etavalue has_assoc[9,m] == 1 || // etavalue * data has_assoc[13,m] == 1 || // etavalue * etavalue has_assoc[14,m] == 1) { // etavalue * muvalue // declare and define eta at quadpoints for submodel m #include /model/make_eta_tmp.stan // add etavalue and any interactions to event submodel eta if (has_assoc[1,m] == 1) { // etavalue vector[nrow_e_Xq] val; if (has_grp[m] == 0) { // no grouping factor clustered within patients val = eta_tmp; } else { // submodel has a grouping factor clustered within patients val = collapse_within_groups(eta_tmp, grp_idx, grp_assoc); } mark += 1; e_eta_q += a_beta[mark] * a_scale[mark] * (val - a_xbar[mark]); } if (has_assoc[9,m] == 1) { // etavalue*data int J = a_K_data[mark2]; int j_shift = (mark2 == 1) ? 0 : sum(a_K_data[1:(mark2-1)]); for (j in 1:J) { vector[nrow_e_Xq] val; int sel = j_shift + j; if (has_grp[m] == 0) { val = eta_tmp .* y_Xq_data[idx_q[m,1]:idx_q[m,2], sel]; } else { val = collapse_within_groups( eta_tmp .* y_Xq_data[idx_q[m,1]:idx_q[m,2], sel], grp_idx, grp_assoc); } mark += 1; e_eta_q += a_beta[mark] * a_scale[mark] * (val - a_xbar[mark]); } } mark3 += 1; // count even if assoc type isn't used if (has_assoc[13,m] == 1) { // etavalue*etavalue for (j in 1:size_which_interactions[mark3]) { int j_shift = (mark3 == 1) ? 0 : sum(size_which_interactions[1:(mark3-1)]); int sel = which_interactions[j+j_shift]; vector[nrow_e_Xq] val; #include /model/make_eta_tmp2.stan val = eta_tmp .* eta_tmp2; mark += 1; e_eta_q += a_beta[mark] * a_scale[mark] * (val - a_xbar[mark]); } } mark3 += 1; // count even if assoc type isn't used if (has_assoc[14,m] == 1) { // etavalue*muvalue for (j in 1:size_which_interactions[mark3]) { int j_shift = (mark3 == 1) ? 0 : sum(size_which_interactions[1:(mark3-1)]); int sel = which_interactions[j+j_shift]; vector[nrow_e_Xq] val; vector[nrow_y_Xq[sel]] mu_tmp2; #include /model/make_eta_tmp2.stan mu_tmp2 = evaluate_mu(eta_tmp2, family[sel], link[sel]); val = eta_tmp .* mu_tmp2; mark += 1; e_eta_q += a_beta[mark] * a_scale[mark] * (val - a_xbar[mark]); } } } else { mark3 += 2; } //----- etaslope and any interactions mark2 += 1; if ((has_assoc[2,m] == 1) || (has_assoc[10,m] == 1)) { // declare and define etaslope at quadpoints for submodel m vector[nrow_y_Xq[m]] dydt_eta_q; if (m == 1) { int bMat1_colshift = 0; int bMat2_colshift = 0; dydt_eta_q = evaluate_eta(y1_xq_eps, y1_z1q_eps, y1_z2q_eps, y1_z1q_id_eps, y1_z2q_id_eps, yGamma1, yBeta1, bMat1, bMat2, bMat1_colshift, bMat2_colshift, 0, y1_offset_eps); } else if (m == 2) { int bMat1_colshift = bK1_len[1]; int bMat2_colshift = bK2_len[1]; dydt_eta_q = evaluate_eta(y2_xq_eps, y2_z1q_eps, y2_z2q_eps, y2_z1q_id_eps, y2_z2q_id_eps, yGamma2, yBeta2, bMat1, bMat2, bMat1_colshift, bMat2_colshift, 0, y2_offset_eps); } else if (m == 3) { int bMat1_colshift = sum(bK1_len[1:2]); int bMat2_colshift = sum(bK2_len[1:2]); dydt_eta_q = evaluate_eta(y3_xq_eps, y3_z1q_eps, y3_z2q_eps, y3_z1q_id_eps, y3_z2q_id_eps, yGamma3, yBeta3, bMat1, bMat2, bMat1_colshift, bMat2_colshift, 0, y3_offset_eps); } // add etaslope and any interactions to event submodel eta if (has_assoc[2,m] == 1) { // etaslope vector[nrow_e_Xq] val; if (has_grp[m] == 0) { val = dydt_eta_q; } else { val = collapse_within_groups(dydt_eta_q, grp_idx, grp_assoc); } mark += 1; e_eta_q += a_beta[mark] * a_scale[mark] * (val - a_xbar[mark]); } if (has_assoc[10,m] == 1) { // etaslope*data int J = a_K_data[mark2]; int j_shift = (mark2 == 1) ? 0 : sum(a_K_data[1:(mark2-1)]); for (j in 1:J) { vector[nrow_e_Xq] val; int sel = j_shift + j; if (has_grp[m] == 0) { val = dydt_eta_q .* y_Xq_data[idx_q[m,1]:idx_q[m,2], sel]; } else { val = collapse_within_groups( dydt_eta_q .* y_Xq_data[idx_q[m,1]:idx_q[m,2], sel], grp_idx, grp_assoc); } mark += 1; e_eta_q += a_beta[mark] * a_scale[mark] * (val - a_xbar[mark]); } } } //----- etaauc // add etaauc to event submodel eta if (has_assoc[3,m] == 1) { // etaauc vector[nrow_y_Xq_auc] eta_auc_tmp; // eta at all auc quadpoints (for submodel m) vector[nrow_y_Xq[m]] val; // eta following summation over auc quadpoints if (m == 1) { int bMat1_colshift = 0; int bMat2_colshift = 0; eta_auc_tmp = evaluate_eta(y1_xq_auc, y1_z1q_auc, y1_z2q_auc, y1_z1q_id_auc, y1_z2q_id_auc, yGamma1, yBeta1, bMat1, bMat2, bMat1_colshift, bMat2_colshift, intercept_type[1], y1_offset_auc); } else if (m == 2) { int bMat1_colshift = bK1_len[1]; int bMat2_colshift = bK2_len[1]; eta_auc_tmp = evaluate_eta(y2_xq_auc, y2_z1q_auc, y2_z2q_auc, y2_z1q_id_auc, y2_z2q_id_auc, yGamma2, yBeta2, bMat1, bMat2, bMat1_colshift, bMat2_colshift, intercept_type[2], y2_offset_auc); } else if (m == 3) { int bMat1_colshift = sum(bK1_len[1:2]); int bMat2_colshift = sum(bK2_len[1:2]); eta_auc_tmp = evaluate_eta(y3_xq_auc, y3_z1q_auc, y3_z2q_auc, y3_z1q_id_auc, y3_z2q_id_auc, yGamma3, yBeta3, bMat1, bMat2, bMat1_colshift, bMat2_colshift, intercept_type[3], y3_offset_auc); } mark += 1; for (r in 1:nrow_y_Xq[m]) { vector[auc_qnodes] val_tmp; vector[auc_qnodes] wgt_tmp; val_tmp = eta_auc_tmp[((r-1) * auc_qnodes + 1):(r * auc_qnodes)]; wgt_tmp = auc_qwts[((r-1) * auc_qnodes + 1):(r * auc_qnodes)]; val[r] = sum(wgt_tmp .* val_tmp); } e_eta_q += a_beta[mark] * a_scale[mark] * (val - a_xbar[mark]); } //----- muvalue and any interactions mark2 += 1; if (has_assoc[4,m] == 1 || // muvalue has_assoc[11,m] == 1 || // muvalue * data has_assoc[15,m] == 1 || // muvalue * etavalue has_assoc[16,m] == 1) { // muvalue * muvalue // declare and define mu for submodel m vector[nrow_y_Xq[m]] mu_tmp; #include /model/make_eta_tmp.stan mu_tmp = evaluate_mu(eta_tmp, family[m], link[m]); // add muvalue and any interactions to event submodel eta if (has_assoc[4,m] == 1) { // muvalue vector[nrow_e_Xq] val; if (has_grp[m] == 0) { val = mu_tmp; } else { val = collapse_within_groups(mu_tmp, grp_idx, grp_assoc); } mark += 1; e_eta_q += a_beta[mark] * a_scale[mark] * (val - a_xbar[mark]); } if (has_assoc[11,m] == 1) { // muvalue*data int tmp = a_K_data[mark2]; int j_shift = (mark2 == 1) ? 0 : sum(a_K_data[1:(mark2-1)]); for (j in 1:tmp) { vector[nrow_e_Xq] val; int sel = j_shift + j; if (has_grp[m] == 0) { val = mu_tmp .* y_Xq_data[idx_q[m,1]:idx_q[m,2], sel]; } else { val = collapse_within_groups( mu_tmp .* y_Xq_data[idx_q[m,1]:idx_q[m,2], sel], grp_idx, grp_assoc); } mark += 1; e_eta_q += a_beta[mark] * a_scale[mark] * (val - a_xbar[mark]); } } mark3 += 1; // count even if assoc type isn't used if (has_assoc[15,m] == 1) { // muvalue*etavalue for (j in 1:size_which_interactions[mark3]) { int j_shift = (mark3 == 1) ? 0 : sum(size_which_interactions[1:(mark3-1)]); int sel = which_interactions[j+j_shift]; vector[nrow_e_Xq] val; #include /model/make_eta_tmp2.stan val = mu_tmp .* eta_tmp2; mark += 1; e_eta_q += a_beta[mark] * a_scale[mark] * (val - a_xbar[mark]); } } mark3 += 1; // count even if assoc type isn't used if (has_assoc[16,m] == 1) { // muvalue*muvalue for (j in 1:size_which_interactions[mark3]) { int j_shift = (mark3 == 1) ? 0 : sum(size_which_interactions[1:(mark3-1)]); int sel = which_interactions[j+j_shift]; vector[nrow_e_Xq] val; vector[nrow_y_Xq[sel]] mu_tmp2; #include /model/make_eta_tmp2.stan mu_tmp2 = evaluate_mu(eta_tmp2, family[sel], link[sel]); val = mu_tmp .* mu_tmp2; mark += 1; e_eta_q += a_beta[mark] * a_scale[mark] * (val - a_xbar[mark]); } } } else { mark3 += 2; } //----- muslope and any interactions mark2 += 1; if (has_assoc[5,m] == 1 || has_assoc[12,m] == 1) { reject("muslope association structure has been removed."); } //----- muauc // add muauc to event submodel eta if (has_assoc[6,m] == 1) { // muauc vector[nrow_y_Xq_auc] eta_auc_tmp; // eta at all auc quadpoints (for submodel m) vector[nrow_y_Xq_auc] mu_auc_tmp; // mu at all auc quadpoints (for submodel m) vector[nrow_y_Xq[m]] val; // mu following summation over auc quadpoints if (m == 1) { int bMat1_colshift = 0; int bMat2_colshift = 0; eta_auc_tmp = evaluate_eta(y1_xq_auc, y1_z1q_auc, y1_z2q_auc, y1_z1q_id_auc, y1_z2q_id_auc, yGamma1, yBeta1, bMat1, bMat2, bMat1_colshift, bMat2_colshift, intercept_type[1], y1_offset_auc); } else if (m == 2) { int bMat1_colshift = bK1_len[1]; int bMat2_colshift = bK2_len[1]; eta_auc_tmp = evaluate_eta(y2_xq_auc, y2_z1q_auc, y2_z2q_auc, y2_z1q_id_auc, y2_z2q_id_auc, yGamma2, yBeta2, bMat1, bMat2, bMat1_colshift, bMat2_colshift, intercept_type[2], y2_offset_auc); } else if (m == 3) { int bMat1_colshift = sum(bK1_len[1:2]); int bMat2_colshift = sum(bK2_len[1:2]); eta_auc_tmp = evaluate_eta(y3_xq_auc, y3_z1q_auc, y3_z2q_auc, y3_z1q_id_auc, y3_z2q_id_auc, yGamma3, yBeta3, bMat1, bMat2, bMat1_colshift, bMat2_colshift, intercept_type[3], y3_offset_auc); } mu_auc_tmp = evaluate_mu(eta_auc_tmp, family[m], link[m]); mark += 1; for (r in 1:nrow_y_Xq[m]) { vector[auc_qnodes] val_tmp; vector[auc_qnodes] wgt_tmp; val_tmp = mu_auc_tmp[((r-1) * auc_qnodes + 1):(r * auc_qnodes)]; wgt_tmp = auc_qwts[((r-1) * auc_qnodes + 1):(r * auc_qnodes)]; val[r] = sum(wgt_tmp .* val_tmp); } e_eta_q += a_beta[mark] * a_scale[mark] * (val - a_xbar[mark]); } } //----- shared random effects if (sum_size_which_b > 0) { reject("shared_b has been removed."); } if (sum_size_which_coef > 0) { reject("shared_coef has been removed."); } rstanarm/src/stan_files/model/make_eta_z.stan0000644000176200001440000000027013452513210021100 0ustar liggesusers if (family == 4 && z_dim > 0 && link_phi > 0) { eta_z = betareg_z * omega; } else if (family == 4 && z_dim == 0 && has_intercept_z == 1){ eta_z = rep_vector(0.0, N); } rstanarm/src/stan_files/model/priors_mvmer.stan0000644000176200001440000000546314414044166021546 0ustar liggesusers // Log-priors, auxiliary params if (has_aux[1] == 1) target += aux_lpdf(yAux1_unscaled[1] | y_prior_dist_for_aux[1], y_prior_scale_for_aux[1], y_prior_df_for_aux[1]); if (M > 1 && has_aux[2] == 1) target += aux_lpdf(yAux2_unscaled[1] | y_prior_dist_for_aux[2], y_prior_scale_for_aux[2], y_prior_df_for_aux[2]); if (M > 2 && has_aux[3] == 1) target += aux_lpdf(yAux3_unscaled[1] | y_prior_dist_for_aux[3], y_prior_scale_for_aux[3], y_prior_df_for_aux[3]); // Log priors, intercepts if (intercept_type[1] > 0) target += gamma_custom_lpdf(yGamma1[1] | y_prior_dist_for_intercept[1], y_prior_mean_for_intercept[1], y_prior_scale_for_intercept[1], y_prior_df_for_intercept[1]); if (M > 1 && intercept_type[2] > 0) target += gamma_custom_lpdf(yGamma2[1] | y_prior_dist_for_intercept[2], y_prior_mean_for_intercept[2], y_prior_scale_for_intercept[2], y_prior_df_for_intercept[2]); if (M > 2 && intercept_type[3] > 0) target += gamma_custom_lpdf(yGamma3[1] | y_prior_dist_for_intercept[3], y_prior_mean_for_intercept[3], y_prior_scale_for_intercept[3], y_prior_df_for_intercept[3]); // Log priors, population level params if (yK[1] > 0) target += beta_custom_lpdf(z_yBeta1 | y_prior_dist[1], y_prior_scale1, y_prior_df1, y_global_prior_df[1], yLocal1, yGlobal1, yMix1, yOol1, y_slab_df[1], y_caux1); if (M > 1 && yK[2] > 0) target += beta_custom_lpdf(z_yBeta2 | y_prior_dist[2], y_prior_scale2, y_prior_df2, y_global_prior_df[2], yLocal2, yGlobal2, yMix2, yOol2, y_slab_df[2], y_caux2); if (M > 2 && yK[3] > 0) target += beta_custom_lpdf(z_yBeta3 | y_prior_dist[3], y_prior_scale3, y_prior_df3, y_global_prior_df[3], yLocal3, yGlobal3, yMix3, yOol3, y_slab_df[3], y_caux3); // Log priors, group level terms if (prior_dist_for_cov == 1) { // decov target += decov_lpdf(z_b | z_T, rho, zeta, tau, b_prior_regularization, delta, b_prior_shape, t, p); } else if (prior_dist_for_cov == 2) { // lkj if (bK1 > 0) { // sds for group factor 1 target += student_t_lpdf(bSd1 | b1_prior_df, 0, b1_prior_scale); // primitive coefs for group factor 1 target += normal_lpdf(to_vector(z_bMat1) | 0, 1); // corr matrix for group factor 1 if (bK1 > 1) target += lkj_corr_cholesky_lpdf(bCholesky1 | b1_prior_regularization); } if (bK2 > 0) { // sds for group factor 2 target += student_t_lpdf(bSd2 | b2_prior_df, 0, b2_prior_scale); // primitive coefs for group factor 2 target += normal_lpdf(to_vector(z_bMat2) | 0, 1); // corr matrix for group factor 2 if (bK2 > 1) target += lkj_corr_cholesky_lpdf(bCholesky2 | b2_prior_regularization); } } rstanarm/src/stan_files/model/mvmer_lp.stan0000644000176200001440000000305214414044166020633 0ustar liggesusers vector[yNeta[1]] yEta1; // linear predictor vector[yNeta[2]] yEta2; vector[yNeta[3]] yEta3; // Linear predictor for submodel 1 if (M > 0) { int bMat1_colshift = 0; // column shift in bMat1 int bMat2_colshift = 0; // column shift in bMat2 yEta1 = evaluate_eta(yX1, y1_Z1, y1_Z2, y1_Z1_id, y1_Z2_id, yGamma1, yBeta1, bMat1, bMat2, bMat1_colshift, bMat2_colshift, intercept_type[1], y1_offset); } // Linear predictor for submodel 2 if (M > 1) { int bMat1_colshift = bK1_len[1]; // column shift in bMat1 int bMat2_colshift = bK2_len[1]; // column shift in bMat2 yEta2 = evaluate_eta(yX2, y2_Z1, y2_Z2, y2_Z1_id, y2_Z2_id, yGamma2, yBeta2, bMat1, bMat2, bMat1_colshift, bMat2_colshift, intercept_type[2], y2_offset); } // Linear predictor for submodel 3 if (M > 2) { int bMat1_colshift = sum(bK1_len[1:2]); // column shift in bMat1 int bMat2_colshift = sum(bK2_len[1:2]); // column shift in bMat2 yEta3 = evaluate_eta(yX3, y3_Z1, y3_Z2, y3_Z1_id, y3_Z2_id, yGamma3, yBeta3, bMat1, bMat2, bMat1_colshift, bMat2_colshift, intercept_type[3], y3_offset); } // Log-likelihoods if (prior_PD == 0) { target += glm_lpdf(yReal1 | yInt1, yEta1, yAux1, family[1], link[1], sum_log_y1, sqrt_y1, log_y1); if (M > 1) target += glm_lpdf(yReal2 | yInt2, yEta2, yAux2, family[2], link[2], sum_log_y2, sqrt_y2, log_y2); if (M > 2) target += glm_lpdf(yReal3 | yInt3, yEta3, yAux3, family[3], link[3], sum_log_y3, sqrt_y3, log_y3); } rstanarm/src/stan_files/model/eta_no_intercept.stan0000644000176200001440000000015413365374540022342 0ustar liggesusers // correction to eta if model has no intercept (because X is centered) eta += dot_product(xbar, beta); rstanarm/src/stan_files/model/event_lp.stan0000644000176200001440000000212413702655240020625 0ustar liggesusers vector[nrow_e_Xq] log_basehaz; // log baseline hazard AT event time and quadrature points vector[nrow_e_Xq] log_haz_q; // log hazard AT event time and quadrature points vector[Nevents] log_haz_etimes; // log hazard AT the event time only vector[Npat_times_qnodes] log_haz_qtimes; // log hazard AT the quadrature points // Log baseline hazard at event and quad times if (basehaz_type == 1) log_basehaz = norm_const + log(e_aux[1]) + basehaz_X * (e_aux - 1) + e_gamma[1]; else log_basehaz = norm_const + basehaz_X * e_aux; // Log hazard at event and quad times log_haz_q = log_basehaz + e_eta_q; log_haz_etimes = head(log_haz_q, Nevents); log_haz_qtimes = tail(log_haz_q, Npat_times_qnodes); // Log likelihood for event model if (has_weights == 0 && prior_PD == 0) { // unweighted log likelihood target += sum(log_haz_etimes) - dot_product(qwts, exp(log_haz_qtimes)); } else if (prior_PD == 0) { // weighted log likelihood target += dot_product(e_weights, log_haz_etimes) - dot_product(e_weights_rep, qwts .* exp(log_haz_qtimes)); } rstanarm/src/stan_files/model/make_eta_tmp2.stan0000644000176200001440000000302114370470372021521 0ustar liggesusers vector[nrow_y_Xq[sel]] eta_tmp2; if (sel == 1) { int bMat1_colshift = 0; int bMat2_colshift = 0; eta_tmp2 = evaluate_eta(y1_xq_eta, y1_z1q_eta, y1_z2q_eta, y1_z1q_id_eta, y1_z2q_id_eta, yGamma1, yBeta1, bMat1, bMat2, bMat1_colshift, bMat2_colshift, intercept_type[1], y1_offset_eta); } else if (sel == 2) { int bMat1_colshift = bK1_len[1]; int bMat2_colshift = bK2_len[1]; eta_tmp2 = evaluate_eta(y2_xq_eta, y2_z1q_eta, y2_z2q_eta, y2_z1q_id_eta, y2_z2q_id_eta, yGamma2, yBeta2, bMat1, bMat2, bMat1_colshift, bMat2_colshift, intercept_type[2], y2_offset_eta); } else if (sel == 3) { int bMat1_colshift = sum(bK1_len[1:2]); int bMat2_colshift = sum(bK2_len[1:2]); eta_tmp2 = evaluate_eta(y3_xq_eta, y3_z1q_eta, y3_z2q_eta, y3_z1q_id_eta, y3_z2q_id_eta, yGamma3, yBeta3, bMat1, bMat2, bMat1_colshift, bMat2_colshift, intercept_type[3], y3_offset_eta); } rstanarm/src/stan_files/model/eta_add_Zb.stan0000644000176200001440000000016014370470372021026 0ustar liggesusers if (special_case) for (i in 1:t) eta += b[V[i]]; else eta += csr_matrix_times_vector(N, q, w, v, u, b); rstanarm/src/stan_files/model/make_eta.stan0000644000176200001440000000042014370470372020557 0ustar liggesusers vector[N] eta; // linear predictor if (K > 0) { if (dense_X) eta = X[1] * beta; else eta = csr_matrix_times_vector(N, K, w_X, v_X, u_X, beta); } else eta = rep_vector(0.0, N); if (has_offset == 1) eta += offset_; if (K_smooth) eta += S * beta_smooth; rstanarm/src/stan_files/model/priors_betareg.stan0000644000176200001440000000435413340675562022036 0ustar liggesusers // Log-priors for coefficients if (prior_dist_z == 1) target += normal_lpdf(z_omega | 0, 1); else if (prior_dist_z == 2) target += normal_lpdf(z_omega | 0, 1); else if (prior_dist_z == 3) { // hs real log_half = -0.693147180559945286; target += normal_lpdf(z_omega | 0, 1); target += normal_lpdf(local_z[1] | 0, 1) - log_half; target += inv_gamma_lpdf(local_z[2] | 0.5 * prior_df_z, 0.5 * prior_df_z); target += normal_lpdf(global_z[1] | 0, 1) - log_half; target += inv_gamma_lpdf(global_z[2] | 0.5 * global_prior_df_z, 0.5 * global_prior_df_z); target += inv_gamma_lpdf(caux_z | 0.5 * slab_df_z, 0.5 * slab_df_z); } else if (prior_dist_z == 4) { // hs+ real log_half = -0.693147180559945286; target += normal_lpdf(z_omega | 0, 1); target += normal_lpdf(local_z[1] | 0, 1) - log_half; target += inv_gamma_lpdf(local_z[2] | 0.5 * prior_df_z, 0.5 * prior_df_z); target += normal_lpdf(local_z[3] | 0, 1) - log_half; // unorthodox useage of prior_scale as another df hyperparameter target += inv_gamma_lpdf(local_z[4] | 0.5 * prior_scale_z, 0.5 * prior_scale_z); target += normal_lpdf(global_z[1] | 0, 1) - log_half; target += inv_gamma_lpdf(global_z[2] | 0.5, 0.5); target += inv_gamma_lpdf(caux_z | 0.5 * slab_df_z, 0.5 * slab_df_z); } else if (prior_dist_z == 5) { // laplace target += normal_lpdf(z_omega | 0, 1); target += exponential_lpdf(S_z[1] | 1); } else if (prior_dist_z == 6) { // lasso target += normal_lpdf(z_omega | 0, 1); target += exponential_lpdf(S_z[1] | 1); target += chi_square_lpdf(one_over_lambda_z[1] | prior_df_z[1]); } else if (prior_dist_z == 7) { // product_normal target += normal_lpdf(z_omega | 0, 1); } /* else prior_dist is 0 and nothing is added */ // Log-prior for intercept if (has_intercept_z == 1) { if (prior_dist_for_intercept_z == 1) // normal target += normal_lpdf(gamma_z | prior_mean_for_intercept_z, prior_scale_for_intercept_z); else if (prior_dist_for_intercept_z == 2) // student_t target += student_t_lpdf(gamma_z | prior_df_for_intercept_z, prior_mean_for_intercept_z, prior_scale_for_intercept_z); /* else prior_dist is 0 and nothing is added */ } rstanarm/src/stan_files/model/eta_z_no_intercept.stan0000644000176200001440000000020213365374540022665 0ustar liggesusers if (link_phi > 1) { eta_z += dot_product(zbar, omega) - min(eta_z); } else { eta_z += dot_product(zbar, omega); } rstanarm/src/stan_files/model/make_eta_tmp.stan0000644000176200001440000000256314370470372021451 0ustar liggesusers vector[nrow_y_Xq[m]] eta_tmp; if (m == 1) { int bMat1_colshift = 0; int bMat2_colshift = 0; eta_tmp = evaluate_eta(y1_xq_eta, y1_z1q_eta, y1_z2q_eta, y1_z1q_id_eta, y1_z2q_id_eta, yGamma1, yBeta1, bMat1, bMat2, bMat1_colshift, bMat2_colshift, intercept_type[1], y1_offset_eta); } else if (m == 2) { int bMat1_colshift = bK1_len[1]; int bMat2_colshift = bK2_len[1]; eta_tmp = evaluate_eta(y2_xq_eta, y2_z1q_eta, y2_z2q_eta, y2_z1q_id_eta, y2_z2q_id_eta, yGamma2, yBeta2, bMat1, bMat2, bMat1_colshift, bMat2_colshift, intercept_type[2], y2_offset_eta); } else if (m == 3) { int bMat1_colshift = sum(bK1_len[1:2]); int bMat2_colshift = sum(bK2_len[1:2]); eta_tmp = evaluate_eta(y3_xq_eta, y3_z1q_eta, y3_z2q_eta, y3_z1q_id_eta, y3_z2q_id_eta, yGamma3, yBeta3, bMat1, bMat2, bMat1_colshift, bMat2_colshift, intercept_type[3], y3_offset_eta); } rstanarm/src/stan_files/bernoulli.stan0000644000176200001440000001740314500256225017707 0ustar liggesusers#include /pre/Columbia_copyright.stan #include /pre/license.stan // GLM for a Bernoulli outcome functions { #include /functions/common_functions.stan #include /functions/bernoulli_likelihoods.stan } data { // dimensions int K; // number of predictors array[2] int N; // number of observations where y = 0 and y = 1 respectively vector[K] xbar; // vector of column-means of rbind(X0, X1) int dense_X; // flag for dense vs. sparse array[dense_X] matrix[N[1], K] X0; // centered (by xbar) predictor matrix | y = 0 array[dense_X] matrix[N[2], K] X1; // centered (by xbar) predictor matrix | y = 1 int clogit; // 1 iff the number of successes is fixed in each stratum int J; // number of strata (possibly zero) array[clogit == 1 ? N[1] + N[2] : 0] int strata; // stuff for the sparse case int nnz_X0; // number of non-zero elements in the implicit X0 matrix vector[nnz_X0] w_X0; // non-zero elements in the implicit X0 matrix array[nnz_X0] int v_X0; // column indices for w_X0 // where the non-zeros start in each row of X0 array[dense_X ? 0 : N[1] + 1] int u_X0; int nnz_X1; // number of non-zero elements in the implicit X1 matrix vector[nnz_X1] w_X1; // non-zero elements in the implicit X1 matrix array[nnz_X1] int v_X1; // column indices for w_X1 // where the non-zeros start in each row of X1 array[dense_X ? 0 : N[2] + 1] int u_X1; // declares prior_PD, has_intercept, link, prior_dist, prior_dist_for_intercept #include /data/data_glm.stan int K_smooth; matrix[N[1], K_smooth] S0; matrix[N[2], K_smooth] S1; array[K_smooth] int smooth_map; int family; // weights int has_weights; // 0 = No, 1 = Yes vector[has_weights ? N[1] : 0] weights0; vector[has_weights ? N[2] : 0] weights1; // offset int has_offset; // 0 = No, 1 = Yes vector[has_offset ? N[1] : 0] offset0; vector[has_offset ? N[2] : 0] offset1; // declares prior_{mean, scale, df}, prior_{mean, scale, df}_for_intercept, prior_{mean, scale, df}_for_aux #include /data/hyperparameters.stan // declares t, p[t], l[t], q, len_theta_L, shape, scale, {len_}concentration, {len_}regularization #include /data/glmer_stuff.stan // more glmer stuff array[2] int num_non_zero; // number of non-zero elements in the Z matrices vector[num_non_zero[1]] w0; // non-zero elements in the implicit Z0 matrix vector[num_non_zero[2]] w1; // non-zero elements in the implicit Z1 matrix array[num_non_zero[1]] int v0; // column indices for w0 array[num_non_zero[2]] int v1; // column indices for w1 // where the non-zeros start in each row of Z0 array[t > 0 ? N[1] + 1 : 0] int u0; // where the non-zeros start in each row of Z1 array[t > 0 ? N[2] + 1 : 0] int u1; int special_case; // whether we only have to deal with (1|group) } transformed data { int NN = N[1] + N[2]; real aux = not_a_number(); array[special_case ? t : 0, N[1]] int V0 = make_V(N[1], special_case ? t : 0, v0); array[special_case ? t : 0, N[2]] int V1 = make_V(N[2], special_case ? t : 0, v1); array[clogit ? J : 0] int successes; array[clogit ? J : 0] int failures; array[clogit ? J : 0] int observations; int can_do_bernoullilogitglm = K != 0 && // remove K!=0 after rstan includes this Stan bugfix: https://github.com/stan-dev/math/issues/1398 link == 1 && clogit == 0 && has_offset == 0 && prior_PD == 0 && dense_X == 1 && has_weights == 0 && t == 0; matrix[can_do_bernoullilogitglm ? NN : 0, can_do_bernoullilogitglm ? K + K_smooth : 0] XS; array[can_do_bernoullilogitglm ? NN : 0] int y; // defines hs, len_z_T, len_var_group, delta, pos #include /tdata/tdata_glm.stan for (j in 1 : J) { successes[j] = 0; failures[j] = 0; } if (J > 0) for (i in 1 : N[2]) successes[strata[i]] += 1; if (J > 0) for (i in (N[2] + 1) : NN) failures[strata[i]] += 1; for (j in 1 : J) observations[j] = failures[j] + successes[j]; if (can_do_bernoullilogitglm) { XS = K_smooth > 0 ? append_col(append_row(X0[1], X1[1]), append_row(S0, S1)) : append_row(X0[1], X1[1]); y = append_array(rep_array(0, N[1]), rep_array(1, N[2])); } } parameters { array[has_intercept] real gamma; // declares z_beta, global, local, z_b, z_T, rho, zeta, tau #include /parameters/parameters_glm.stan } transformed parameters { // defines beta, b, theta_L #include /tparameters/tparameters_glm.stan if (t > 0) { if (special_case) { int start = 1; theta_L = scale .* tau; if (t == 1) b = theta_L[1] * z_b; else for (i in 1 : t) { int end = start + l[i] - 1; b[start : end] = theta_L[i] * z_b[start : end]; start = end + 1; } } else { theta_L = make_theta_L(len_theta_L, p, 1.0, tau, scale, zeta, rho, z_T); b = make_b(z_b, theta_L, p, l); } } } model { if (can_do_bernoullilogitglm) { vector[K + K_smooth] coeff = K_smooth > 0 ? append_row(beta, beta_smooth) : beta; target += bernoulli_logit_glm_lpmf(y | XS, has_intercept ? gamma[1] : 0.0, coeff); } else if (prior_PD == 0) { // defines eta0, eta1 #include /model/make_eta_bern.stan if (has_intercept == 1) { if (link != 4) { eta0 += gamma[1]; eta1 += gamma[1]; } else { real shift = fmax(max(eta0), max(eta1)); eta0 += gamma[1] - shift; eta1 += gamma[1] - shift; } } // Log-likelihood if (clogit) { target += clogit_lpdf(eta0 | eta1, successes, failures, observations); } else if (has_weights == 0) { target += bern_lpdf(eta0 | eta1, link, N); } else { // weighted log-likelihoods target += dot_product(weights0, pw_bern(0, eta0, link)); target += dot_product(weights1, pw_bern(1, eta1, link)); } } #include /model/priors_glm.stan if (t > 0) { target += decov_lpdf(z_b | z_T, rho, zeta, tau, regularization, delta, shape, t, p); } } generated quantities { real mean_PPD = compute_mean_PPD ? 0 : negative_infinity(); array[has_intercept] real alpha; if (has_intercept == 1) { if (dense_X) alpha[1] = gamma[1] - dot_product(xbar, beta); else alpha[1] = gamma[1]; } if (compute_mean_PPD) { vector[N[1]] pi0; vector[N[2]] pi1; // defines eta0, eta1 #include /model/make_eta_bern.stan if (has_intercept == 1) { if (link != 4) { eta0 += gamma[1]; eta1 += gamma[1]; } else { real shift; shift = fmax(max(eta0), max(eta1)); eta0 += gamma[1] - shift; eta1 += gamma[1] - shift; alpha[1] -= shift; } } if (clogit) for (j in 1 : J) mean_PPD += successes[j]; // fixed by design else { pi0 = linkinv_bern(eta0, link); pi1 = linkinv_bern(eta1, link); for (n in 1 : N[1]) mean_PPD += bernoulli_rng(pi0[n]); for (n in 1 : N[2]) mean_PPD += bernoulli_rng(pi1[n]); } mean_PPD /= NN; } } rstanarm/src/stan_files/mvmer.stan0000644000176200001440000000416214500256225017040 0ustar liggesusers#include /pre/Columbia_copyright.stan #include /pre/Brilleman_copyright.stan #include /pre/license.stan // Multivariate GLM with correlated group-specific terms functions { #include /functions/common_functions.stan #include /functions/bernoulli_likelihoods.stan #include /functions/binomial_likelihoods.stan #include /functions/continuous_likelihoods.stan #include /functions/count_likelihoods.stan #include /functions/mvmer_functions.stan } data { // declares: M, has_aux, has_weights, resp_type, intercept_type, // yNobs, yNeta, yK, t, p, l, q, len_theta_L, bN1, bK1, bK1_len // bK1_idx, bN2, bK2, bK2_len, bK2_idx #include /data/dimensions_mvmer.stan // declares: yInt{1,2,3}, yReal{1,2,3}, yX{1,2,3}, yXbar{1,2,3}, // family, link, y{1,2,3}_Z{1,2}, y{1,2,3}_Z{1,2}_id, // y_prior_dist{_for_intercept,_for_aux,_for_cov}, prior_PD #include /data/data_mvmer.stan // declares: y_prior_{mean,scale,df}{1,2,3,_for_intercept,_for_aux}, // y_global_prior_{df,scale}, len_{concentration,regularization}, // b_prior_{shape,scale,concentration,regularization}, // b{1,2}_prior_{scale,df,regularization} #include /data/hyperparameters_mvmer.stan } transformed data { // declares: yHs{1,2,3}, len_{z_T,var_group,rho}, pos, delta, // bCov{1,2}_idx, {sqrt,log,sum_log}_y{1,2,3}, #include /tdata/tdata_mvmer.stan } parameters { // declares: yGamma{1,2,3}, z_yBeta{1,2,3}, z_b, z_T, rho, // zeta, tau, bSd{1,2}, z_bMat{1,2}, bCholesky{1,2}, // yAux{1,2,3}_unscaled, yGlobal{1,2,3}, yLocal{1,2,3}, // yOol{1,2,3}, yMix{1,2,3} #include /parameters/parameters_mvmer.stan } transformed parameters { // declares and defines: yBeta{1,2,3}, yAux{1,2,3}, yAuxMaximum, // theta_L, bMat{1,2} #include /tparameters/tparameters_mvmer.stan } model { // Log likelihoods // increments target with mvmer log liks #include /model/mvmer_lp.stan // Log priors // increments target with mvmer priors #include /model/priors_mvmer.stan } generated quantities { // declares and defines: mean_PPD, yAlpha{1,2,3}, b{1,2}, bCov{1,2} #include /gqs/gen_quantities_mvmer.stan } rstanarm/src/stan_files/jm.stan0000644000176200001440000001322614500256225016321 0ustar liggesusers#include /pre/Columbia_copyright.stan #include /pre/Brilleman_copyright.stan #include /pre/license.stan // Shared parameter joint model functions { #include /functions/common_functions.stan #include /functions/bernoulli_likelihoods.stan #include /functions/binomial_likelihoods.stan #include /functions/continuous_likelihoods.stan #include /functions/count_likelihoods.stan #include /functions/mvmer_functions.stan #include /functions/jm_functions.stan } data { // declares: M, has_aux, has_weights, resp_type, intercept_type, // yNobs, yNeta, yK, t, p, l, q, len_theta_L, bN1, bK1, bK1_len // bK1_idx, bN2, bK2, bK2_len, bK2_idx #include /data/dimensions_mvmer.stan // declares: yInt{1,2,3}, yReal{1,2,3}, yX{1,2,3}, yXbar{1,2,3}, // family, link, y{1,2,3}_Z{1,2}, y{1,2,3}_Z{1,2}_id, // y_prior_dist{_for_intercept,_for_aux,_for_cov}, prior_PD #include /data/data_mvmer.stan // declares: e_prior_dist{_for_intercept,_for_aux}, // Npat, Nevents, qnodes, Npat_times_qnodes, qwts, // basehaz_{type,df,X}, nrow_e_Xq, e_has_intercept, nrow_e_Xq, // e_{K,Xq,times,xbar,weights,weights_rep} #include /data/data_event.stan // declares: a_{K,xbar}, a_prior_dist, assoc, assoc_uses, has_assoc, // {sum_}size_which_b, which_b_zindex, {sum_}size_which_coef, // which_coef_{zindex,xindex}, a_K_data, y_Xq_{eta,eps,lag,auc,data}, // {sum_,sum_size_}which_interactions, idx_q, // nrow_y_Xq{_auc}, auc_{qnodes,qwts}, has_grp, grp_assoc, grp_idx, // y{1,2,3}_xq_{eta,eps,auc}, y{1,2,3}_z{1,2}q_{eta,eps,auc}, // y{1,2,3}_z{1,2}q_id_{eta,eps,auc} #include /data/data_assoc.stan // declares: e_prior_{mean,scale,df}{_for_intercept,for_aux}, // e_global_prior_{scale,df} #include /data/hyperparameters_mvmer.stan #include /data/hyperparameters_event.stan // declares: a_prior_{mean,scale,df}, a_global_prior_{scale,df} #include /data/hyperparameters_assoc.stan } transformed data { int e_hs = get_nvars_for_hs(e_prior_dist); int a_hs = get_nvars_for_hs(a_prior_dist); // declares: yHs{1,2,3}, len_{z_T,var_group,rho}, pos, delta, // bCov{1,2}_idx, {sqrt,log,sum_log}_y{1,2,3}, #include /tdata/tdata_mvmer.stan } parameters { // declares: yGamma{1,2,3}, z_yBeta{1,2,3}, z_b, z_T, rho, // zeta, tau, bSd{1,2}, z_bMat{1,2}, bCholesky{1,2}, // yAux{1,2,3}_unscaled, yGlobal{1,2,3}, yLocal{1,2,3}, // yOol{1,2,3}, yMix{1,2,3} #include /parameters/parameters_mvmer.stan // declares e_{gamma,z_beta,aux_unscaled,global,local,mix,ool} #include /parameters/parameters_event.stan // declares a_{z_beta,global,local,mix,ool} #include /parameters/parameters_assoc.stan } transformed parameters { vector[e_K] e_beta; // log hazard ratios vector[a_K] a_beta; // assoc params vector[basehaz_df] e_aux; // basehaz params //---- Parameters for longitudinal submodels // declares and defines: yBeta{1,2,3}, yAux{1,2,3}, yAuxMaximum, // theta_L, bMat{1,2} #include /tparameters/tparameters_mvmer.stan //---- Parameters for event submodel e_beta = make_beta(e_z_beta, e_prior_dist, e_prior_mean, e_prior_scale, e_prior_df, e_global_prior_scale, e_global, e_local, e_ool, e_mix, rep_array(1.0, 0), 0, e_slab_scale, e_caux); a_beta = make_beta(a_z_beta, a_prior_dist, a_prior_mean, a_prior_scale, a_prior_df, a_global_prior_scale, a_global, a_local, a_ool, a_mix, rep_array(1.0, 0), 0, a_slab_scale, a_caux); e_aux = make_basehaz_coef(e_aux_unscaled, e_prior_dist_for_aux, e_prior_mean_for_aux, e_prior_scale_for_aux); } model { //---- Log likelihoods for longitudinal submodels #include /model/mvmer_lp.stan { //---- Log likelihood for event submodel (GK quadrature) vector[nrow_e_Xq] e_eta_q; // eta for event submodel (at event and quad times) // Event submodel: linear predictor at event and quad times if (e_K > 0) e_eta_q = e_Xq * e_beta; else e_eta_q = rep_vector(0.0, nrow_e_Xq); if (assoc == 1) { // declares y_eta_q{_eps,_lag,_auc}, y_eta_qwide{_eps,_lag,_auc}, // y_q_wide{_eps,_lag,_auc}, mark{2,3} #include /model/assoc_evaluate.stan } { // declares log_basehaz, log_{haz_q,haz_etimes,surv_etimes,event} // increments target with event log-lik #include /model/event_lp.stan } } //---- Log priors // increments target with mvmer priors #include /model/priors_mvmer.stan target += beta_custom_lpdf(e_z_beta | e_prior_dist, e_prior_scale, e_prior_df, e_global_prior_df, e_local, e_global, e_mix, e_ool, e_slab_df, e_caux); target += beta_custom_lpdf(a_z_beta | a_prior_dist, a_prior_scale, a_prior_df, a_global_prior_df, a_local, a_global, a_mix, a_ool, a_slab_df, a_caux); target += basehaz_lpdf(e_aux_unscaled | e_prior_dist_for_aux, e_prior_scale_for_aux, e_prior_df_for_aux); if (e_has_intercept == 1) target += gamma_custom_lpdf(e_gamma[1] | e_prior_dist_for_intercept, e_prior_mean_for_intercept, e_prior_scale_for_intercept, e_prior_df_for_intercept); } generated quantities { real e_alpha; // transformed intercept for event submodel // declares and defines: mean_PPD, yAlpha{1,2,3}, b{1,2}, bCov{1,2} #include /gqs/gen_quantities_mvmer.stan // norm_const is a constant shift in log baseline hazard if (e_has_intercept == 1) e_alpha = e_gamma[1] + norm_const - dot_product(e_xbar, e_beta) - dot_product(a_xbar, a_beta .* a_scale); else e_alpha = norm_const - dot_product(e_xbar, e_beta) - dot_product(a_xbar, a_beta .* a_scale); } rstanarm/src/stan_files/binomial.stan0000644000176200001440000000703614500256225017507 0ustar liggesusers#include /pre/Columbia_copyright.stan #include /pre/license.stan // GLM for a binomial outcome functions { #include /functions/common_functions.stan #include /functions/binomial_likelihoods.stan } data { // declares N, K, X, xbar, dense_X, nnz_x, w_x, v_x, u_x #include /data/NKX.stan array[N] int y; // outcome: number of successes array[N] int trials; // number of trials // declares prior_PD, has_intercept, link, prior_dist, prior_dist_for_intercept #include /data/data_glm.stan // declares has_weights, weights, has_offset, offset #include /data/weights_offset.stan int family; // declares prior_{mean, scale, df}, prior_{mean, scale, df}_for_intercept, prior_scale_{mean, scale, df}_for_aux #include /data/hyperparameters.stan // declares t, p[t], l[t], q, len_theta_L, shape, scale, {len_}concentration, {len_}regularization #include /data/glmer_stuff.stan // declares num_not_zero, w, v, u #include /data/glmer_stuff2.stan } transformed data { real aux = not_a_number(); array[special_case ? t : 0, N] int V = make_V(N, special_case ? t : 0, v); // defines hs, len_z_T, len_var_group, delta, pos #include /tdata/tdata_glm.stan } parameters { array[has_intercept] real gamma; // declares z_beta, global, local, z_b, z_T, rho, zeta, tau #include /parameters/parameters_glm.stan } transformed parameters { // defines beta, b, theta_L #include /tparameters/tparameters_glm.stan if (t > 0) { if (special_case == 1) { int start = 1; theta_L = scale .* tau; if (t == 1) b = theta_L[1] * z_b; else for (i in 1 : t) { int end = start + l[i] - 1; b[start : end] = theta_L[i] * z_b[start : end]; start = end + 1; } } else { theta_L = make_theta_L(len_theta_L, p, 1.0, tau, scale, zeta, rho, z_T); b = make_b(z_b, theta_L, p, l); } } } model { if (prior_PD == 0) { #include /model/make_eta.stan if (t > 0) { #include /model/eta_add_Zb.stan } if (has_intercept == 1) { if (link != 4) eta += gamma[1]; else eta += gamma[1] - max(eta); } else { #include /model/eta_no_intercept.stan } // Log-likelihood if (has_weights == 0) { // unweighted log-likelihoods target += binom_lpmf(y | trials, eta, link); } else target += dot_product(weights, pw_binom(y, trials, eta, link)); } #include /model/priors_glm.stan if (t > 0) { target += decov_lpdf(z_b | z_T, rho, zeta, tau, regularization, delta, shape, t, p); } } generated quantities { real mean_PPD = compute_mean_PPD ? 0 : negative_infinity(); array[has_intercept] real alpha; if (has_intercept == 1) { if (dense_X) alpha[1] = gamma[1] - dot_product(xbar, beta); else alpha[1] = gamma[1]; } if (compute_mean_PPD) { vector[N] pi; #include /model/make_eta.stan if (t > 0) { #include /model/eta_add_Zb.stan } if (has_intercept == 1) { if (link != 4) eta += gamma[1]; else { real shift = max(eta); eta += gamma[1] - shift; alpha[1] -= shift; } } else { #include /model/eta_no_intercept.stan } pi = linkinv_binom(eta, link); for (n in 1 : N) mean_PPD += binomial_rng(trials[n], pi[n]); mean_PPD /= N; } } rstanarm/vignettes/0000755000176200001440000000000014551551771014124 5ustar liggesusersrstanarm/vignettes/pooling.Rmd0000644000176200001440000017321414500256225016235 0ustar liggesusers--- title: "Hierarchical Partial Pooling for Repeated Binary Trials" author: "Bob Carpenter, Jonah Gabry and Ben Goodrich" date: "`r Sys.Date()`" output: html_vignette: toc: yes toc_depth: 3 --- ```{r, knitr-settings, include=FALSE} stopifnot(require(knitr)) opts_chunk$set( comment=NA, message = FALSE, warning = FALSE, eval = identical(Sys.getenv("NOT_CRAN"), "true"), dev = "png", dpi = 150, fig.asp = 0.618, fig.width = 7, out.width = "70%", fig.align = "center" ) ``` ```{r, child="children/SETTINGS-gg.txt"} ``` # Introduction This vignette illustrates the effects on posterior inference of pooling data (a.k.a sharing strength) across units for repeated binary trial data. It provides R code to fit and check predictive models for three situations: (a) complete pooling, which assumes each unit is the same, (b) no pooling, which assumes the units are unrelated, and (c) partial pooling, where the similarity among the units is estimated. The note explains with working examples how to (i) fit the models using **rstanarm** and plot the results, (ii) estimate event probabilities, (iii) evaluate posterior predictive densities to evaluate model predictions on held-out data, (iv) rank units by chance of success, (v) perform multiple comparisons in several settings, (vi) replicate new data for posterior $p$-values, and (vii) perform graphical posterior predictive checks. The content of the vignette is based on Bob Carpenter's Stan tutorial *[Hierarchical Partial Pooling for Repeated Binary Trials](https://mc-stan.org/users/documentation/case-studies/pool-binary-trials.html)*, but here we show how to fit the models and carry out predictions and model checking and comparison using **rstanarm**. Most of the text is taken from the original, with some additions and subtractions to make the content more useful for **rstanarm** users. The Stan code from the original tutorial has also been entirely removed, as **rstanarm** will fit all of the models in Stan without the user having to write the underlying Stan programs. The Stan code in the original document is a good reference for anyone interested in how these models are estimated "under-the-hood", though the parameterizations used internally by **rstanarm** differ somewhat from those in the original. # Repeated Binary Trials Suppose that for each of $N$ units $n \in 1{:}N$, we observe $y_n$ successes out of $K_n$ trials. For example, the data may consist of * rat tumor development, with $y_n$ rats developing tumors of $K_n$ total rats in experimental control group $n \in 1{:}N$ (Tarone 1982) * surgical mortality, with $y_n$ surgical patients dying in $K_n$ surgeries for hospitals $n \in 1{:}N$ (Spiegelhalter et al. 1996) * baseball batting ability, with $y_n$ hits in $K_n$ at bats for baseball players $n \in 1{:}N$ (Efron and Morris 1975; Carpenter 2009) * machine learning system accuracy, with $y_n$ correct classifications out of $K_n$ examples for systems $n \in 1{:}N$ (ML conference proceedings; Kaggle competitions) In this vignette we use the small baseball data set of Efron and Morris (1975), but we also provide the rat control data of Tarone (1982), the surgical mortality data of Spiegelhalter et al. (1996) and the extended baseball data set of Carpenter (2009). ### Baseball Hits (Efron and Morris 1975) As a running example, we will use the data from Table 1 of (Efron and Morris 1975), which is included in **rstanarm** under the name `bball1970` (it was downloaded 24 Dec 2015 from [here](https://www.swarthmore.edu/NatSci/peverso1/Sports%20Data/JamesSteinData/Efron-Morris%20Baseball/EfronMorrisBB.txt)). It is drawn from the 1970 Major League Baseball season (from both leagues). ```{r, load-data} library(rstanarm) data(bball1970) bball <- bball1970 print(bball) ``` ```{r, N-K-y} # A few quantities we'll use throughout N <- nrow(bball) K <- bball$AB y <- bball$Hits K_new <- bball$RemainingAB y_new <- bball$RemainingHits ``` The data separates the outcome from the initial 45 at-bats from the rest of the season. After running this code, `N` is the number of units (players). Then for each unit `n`, `K[n]` is the number of initial trials (at-bats), `y[n]` is the number of initial successes (hits), `K_new[n]` is the remaining number of trials (remaining at-bats), and `y_new[n]` is the number of successes in the remaining trials (remaining hits). The remaining data can be used to evaluate the predictive performance of our models conditioned on the observed data. That is, we will "train" on the first 45 at bats and see how well our various models do at predicting the rest of the season. # Pooling With *complete pooling*, each unit is assumed to have the same chance of success. With *no pooling*, each unit is assumed to have a completely unrelated chance of success. With *partial pooling*, each unit is assumed to have a different chance of success, but the data for all of the observed units informs the estimates for each unit. Partial pooling is typically accomplished through hierarchical models. Hierarchical models directly model the population of units. From a population model perspective, no pooling corresponds to infinite population variance, whereas complete pooling corresponds to zero population variance. In the following sections, all three types of pooling models will be fit for the baseball data. # Fitting the Models First we'll create some useful objects to use throughout the rest of this vignette. One of them is a function `batting_avg`, which just formats a number to include three decimal places to the right of zero when printing, as is customary for batting averages. ```{r, create-objects, results="hold"} batting_avg <- function(x) print(format(round(x, digits = 3), nsmall = 3), quote = FALSE) player_avgs <- y / K # player avgs through 45 AB tot_avg <- sum(y) / sum(K) # overall avg through 45 AB cat("Player averages through 45 at-bats:\n") batting_avg(player_avgs) cat("Overall average through 45 at-bats:\n") batting_avg(tot_avg) ``` ## Complete Pooling The complete pooling model assumes a single parameter $\theta$ representing the chance of success for all units (in this case players). Assuming each player's at-bats are independent Bernoulli trials, the probability distribution for each player's number of hits $y_n$ is modeled as \[ p(y_n \, | \, \theta) \ = \ \mathsf{Binomial}(y_n \, | \, K_n, \theta). \] When viewed as a function of $\theta$ for fixed $y_n$, this is called the likelihood function. Assuming each player is independent leads to the complete data likelihood \[ p(y \, | \, \theta) = \prod_{n=1}^N \mathsf{Binomial}(y_n \, | \, K_n, \theta). \] Using `family=binomial("logit")`, the `stan_glm` function in **rstanarm** will parameterize the model in terms of the log-odds $\alpha$, which are defined by the logit transform as \[ \alpha = \mathrm{logit}(\theta) = \log \, \frac{\theta}{1 - \theta}. \] For example, $\theta = 0.25$ corresponds to odds of $.25$ to $.75$ (equivalently, $1$ to $3$), or log-odds of $\log .25 / .75 = -1.1$. The model is therefore \[ p(y_n \, | \, K_n, \alpha) \ = \ \mathsf{Binomial}(y_n \, | \, K_n, \ \mathrm{logit}^{-1}(\alpha)) \] The inverse logit function is the logistic [sigmoid](https://en.wikipedia.org/wiki/Sigmoid_function) from which logistic regression gets its name because the inverse logit function is also the standard logistic Cumulative Distribution Function (CDF), \[ \mathrm{logit}^{-1}(\alpha) = \frac{1}{1 + \exp(-\alpha)} = \theta. \] By construction, for any $\alpha \in (-\infty, \infty)$, $\mathrm{logit}^{-1}(\alpha) \in (0, 1)$; the sigmoid converts arbitrary log odds back to the probability scale. We will use a normal distribution with mean $-1$ and standard deviation $1$ as the prior on the log-odds $\alpha$. This is a weakly informative prior that places about 95% of the prior probability in the interval $(-3, 1)$, which inverse-logit transforms to the interval $(0.05, 0.73)$. The prior median $-1$ corresponds to a $0.27$ chance of success. In fact, an even narrower prior is actually motivated here from substantial baseball knowledge. The figure below shows both this prior on $\alpha$ as well as the prior it implies on the probability $\theta$. ```{r, echo=FALSE} par(mfrow = c(1,3), las = 1) p_alpha <- function(alpha) { dnorm(alpha, -1, 1) } p_theta <- function(theta) { dnorm(log(theta) - log1p(-theta), -1, 1) / (theta - theta^2) } curve2 <- function(expr, limits, xlab, ...) { curve(expr, from = limits[1], to = limits[2], xlab = xlab, lwd = 3, bty = "l", ylab = "", cex.lab = 1.5, ...) } curve2(p_alpha, c(-3, 1), expression(alpha)) text(x = 0.25, y = 0.35, labels = expression(p(alpha)), cex = 1.5) curve2(p_theta, c(0, 1), expression(theta), col = "red", ylim = c(0, 2.5)) text(x = 0.575, y = 1.5, labels = expression(p(theta)), cex = 1.5, col = "red") curve2(p_alpha, c(-3, 1), expression(paste(alpha,", ", theta)), ylim = c(0, 2.5)) curve2(p_theta, c(0,1), col = "red", add = TRUE) text(x = -1, y = 0.65, labels = expression(p(alpha)), cex = 1.5) text(x = -0.5, y = 1.5, labels = expression(p(theta)), cex = 1.5, col = "red") ``` To fit the model we call `stan_glm` with the formula `cbind(Hits, AB - Hits) ~ 1`. The left-hand side of the formula specifies the binomial outcome by providing the number of successes (hits) and failures (at-bats) for each player, and the right-hand side indicates that we want an intercept-only model. ```{r, full-pooling, results="hide"} SEED <- 101 wi_prior <- normal(-1, 1) # weakly informative prior on log-odds fit_pool <- stan_glm(cbind(Hits, AB - Hits) ~ 1, data = bball, family = binomial("logit"), prior_intercept = wi_prior, seed = SEED) ``` The `summary` function will compute all sorts of summary statistics from the fitted model, but here we'll create a small function that will compute just a few posterior summary statistics that we'll want for each of the models we estimate. The `summary_stats` function, defined below, will take a matrix of posterior draws as its input, apply an inverse-logit transformation (to convert from log-odds to probabilities) and then compute the median and 80% interval. ```{r, summary-stats-function} invlogit <- plogis # function(x) 1/(1 + exp(-x)) summary_stats <- function(posterior) { x <- invlogit(posterior) # log-odds -> probabilities t(apply(x, 2, quantile, probs = c(0.1, 0.5, 0.9))) } pool <- summary_stats(as.matrix(fit_pool)) # as.matrix extracts the posterior draws pool <- matrix(pool, # replicate to give each player the same estimates nrow(bball), ncol(pool), byrow = TRUE, dimnames = list(bball$Player, c("10%", "50%", "90%"))) batting_avg(pool) ``` With more data, such as from more players or from the rest of the season, the posterior approaches a delta function around the maximum likelihood estimate and the posterior interval around the central posterior intervals will shrink. Nevertheless, even if we know a player's chance of success exactly, there is a large amount of uncertainty in running $K$ binary trials with that chance of success; using a binomial model fundamentally bounds our prediction accuracy. Although this model will be a good baseline for comparison, we have good reason to believe from a large amount of prior data (players with as many as 10,000 trials) that it is very unlikely that all baseball players have the same chance of success. ## No Pooling A model with no pooling involves a separate chance-of-success parameter $\theta_n \in [0,1]$ for each player $n$, where the $\theta_n$ are assumed to be independent. **rstanarm** will again parameterize the model in terms of the log-odds, $\alpha_n = \mathrm{logit}(\theta_n)$, so the likelihood then uses the log-odds of success $\alpha_n$ for unit $n$ in modeling the number of successes $y_n$ as \[ p(y_n \, | \, \alpha_n) = \mathsf{Binomial}(y_n \, | \, K_n, \mathrm{logit}^{-1}(\alpha_n)). \] Assuming the $y_n$ are independent (conditional on $\theta$), this leads to the total data likelihood \[ p(y \, | \, \alpha) = \prod_{n=1}^N \mathsf{Binomial}(y_n \, | \, K_n, \mathrm{logit}^{-1}(\alpha_n)). \] To fit the model we need only tweak the model formula used for the full pooling model to drop the intercept and instead include as the only predictor the factor variable `Player`. This is equivalent to estimating a separate intercept on the log-odds scale for each player. We'll also use the `prior` (rather than `prior_intercept`) argument since `Player` is considered a predictor rather than an intercept from R's perspective. Using the same weakly informative prior now means that the each $\alpha_n$ gets a $\mathsf{Normal}(-1, 1)$ prior, independent of the others. ```{r, no-pooling, results="hide"} fit_nopool <- update(fit_pool, formula = . ~ 0 + Player, prior = wi_prior) nopool <- summary_stats(as.matrix(fit_nopool)) rownames(nopool) <- as.character(bball$Player) batting_avg(nopool) ``` ```{r, no-pooling-print, echo=FALSE} batting_avg(nopool) ``` Each 80% interval is much wider than the estimated interval for the population in the complete pooling model; this is to be expected---there are only 45 data units for each parameter here as opposed to 810 in the complete pooling case. If the units each had different numbers of trials, the intervals would also vary based on size. As the estimated chance of success goes up toward 0.5, the 80% intervals gets wider. This is to be expected for chance of success parameters, because the variance is maximized when $\theta = 0.5$. Based on our existing knowledge of baseball, the no-pooling model is almost certainly overestimating the high abilities and underestimating lower abilities (Ted Williams, 30 years prior to the year this data was collected, was the last player with a 40% observed success rate over a season, whereas 20% or less is too low for all but a few rare defensive specialists). ## Partial Pooling Complete pooling provides estimated abilities that are too narrowly distributed for the units and removes any chance of modeling population variation. Estimating each chance of success separately without any pooling provides estimated abilities that are too broadly distributed for the units and hence too variable. Clearly some amount of pooling between these two extremes is called for. But how much? A hierarchical model treats the players as belonging to a population of players. The properties of this population will be estimated along with player abilities, implicitly controlling the amount of pooling that is applied. The more variable the (estimate of the) population, the less pooling is applied. Mathematically, the hierarchical model places a prior on the abilities with parameters that are themselves estimated. This model can be estimated using the `stan_glmer` function. ```{r, partial-pooling, results="hide"} fit_partialpool <- stan_glmer(cbind(Hits, AB - Hits) ~ (1 | Player), data = bball, family = binomial("logit"), prior_intercept = wi_prior, seed = SEED) ``` Because `stan_glmer` (like `glmer`) estimates the varying intercepts for `Player` by estimating a single global intercept $\alpha_0$ and individual deviations from that intercept for each player $\delta_n = \alpha_n - \alpha_0$, to get the posterior distribution for each $\alpha_n$ we need to shift each of the posterior draws by the corresponding draw for the intercept. We can do this easily using the `sweep` function. ```{r, partial-pooling-shift-draws} # shift each player's estimate by intercept (and then drop intercept) shift_draws <- function(draws) { sweep(draws[, -1], MARGIN = 1, STATS = draws[, 1], FUN = "+") } alphas <- shift_draws(as.matrix(fit_partialpool)) partialpool <- summary_stats(alphas) partialpool <- partialpool[-nrow(partialpool),] rownames(partialpool) <- as.character(bball$Player) batting_avg(partialpool) ``` Here the estimates are less extreme than in the no-pooling case, which we should expect due to the partial pooling. It is also clear from the wide posteriors for the $\theta_n$ that there is considerable uncertainty in the estimates of chance-of-success on an unit-by-unit (player-by-player) basis. ## Observed vs. Estimated Chance of Success Figure 5.4 from (Gelman et al. 2013) plots the observed number of successes $y_n$ for the first $K_n$ trials versus the median and 80\% intervals for the estimated chance-of-success parameters $\theta_n$ in the posterior. The following R code reproduces a similar plot for our data. ```{r, plot-observed-vs-estimated} library(ggplot2) models <- c("complete pooling", "no pooling", "partial pooling") estimates <- rbind(pool, nopool, partialpool) colnames(estimates) <- c("lb", "median", "ub") plotdata <- data.frame(estimates, observed = rep(player_avgs, times = length(models)), model = rep(models, each = N), row.names = NULL) ggplot(plotdata, aes(x = observed, y = median, ymin = lb, ymax = ub)) + geom_hline(yintercept = tot_avg, color = "lightpink", size = 0.75) + geom_abline(intercept = 0, slope = 1, color = "skyblue") + geom_linerange(color = "gray60", size = 0.75) + geom_point(size = 2.5, shape = 21, fill = "gray30", color = "white", stroke = 0.2) + facet_grid(. ~ model) + coord_fixed() + scale_x_continuous(breaks = c(0.2, 0.3, 0.4)) + labs(x = "Observed Hits / AB", y = "Predicted chance of hit") + ggtitle("Posterior Medians and 80% Intervals") ``` The horizontal axis is the observed rate of success, broken out by player (the overplotting is from players with the same number of successes---they all had the same number of trials in this data). The dots are the posterior medians with bars extending to cover the central 80% posterior interval. Players with the same observed rates are indistinguishable, any differences in estimates are due to MCMC error. The horizontal red line has an intercept equal to the overall success rate, The overall success rate is also the posterior mode (i.e., maximum likelihood estimate) for the complete pooling model. The diagonal blue line has intercept 0 and slope 1. Estimates falling on this line make up the maximum likelihood estimates for the no-pooling model. Overall, the plot makes the amount of pooling toward the prior evident. # Posterior Predictive Distribution After we have fit a model using some "training" data, we are usually interested in the predictions of the fitted model for new data, which we can use to * make predictions for new data points; e.g., predict how many hits will Roberto Clemente get in the rest of the season, * evaluate predictions against observed future data; e.g., how well did we predict how many hits Roberto Clemente actually got in the rest of the season, and * generate new simulated data to validate our model fits. With full Bayesian inference, we do not make a point estimate of parameters and use those prediction---we instead use an average of predictions weighted by the posterior. Given data $y$ and a model with parameters $\theta$, the posterior predictive distribution for new data $\tilde{y}$ is defined by \[ p(\tilde{y} \, | \, y) \ = \ \int_{\Theta} p(\tilde{y} \, | \, \theta) \ p(\theta \, | \, y) \ \mathrm{d}\theta, \] where $\Theta$ is the support of the parameters $\theta$. What an integral of this form says is that $p(\tilde{y} \, | \, y)$ is defined as a weighted average over the legal parameter values $\theta \in \Theta$ of the likelihood function $p(\tilde{y} \, | \, \theta)$, with weights given by the posterior, $p(\theta \, | \, y)$. While we do not want to get sidetracked with the notational and mathematical subtleties of expectations here, the posterior predictive density reduces to the expectation of $p(\tilde{y} \, | \, \theta)$ conditioned on $y$. ### Evaluating Held-Out Data Predictions Because the posterior predictive density is formulated as an expectation over the posterior, it is possible to compute via MCMC. With $M$ draws $\theta^{(m)}$ from the posterior $p(\theta \, | \, y)$, the posterior predictive log density for new data $y^{\mathrm{new}}$ is given by the MCMC approximation \[ \log \frac{1}{M} \, \sum_{m=1}^M \ p\left( y^{\mathrm{new}} \, | \, \theta^{(m)} \right). \] In practice, this requires care to prevent underflow in floating point calculations; a robust calculation on the log scale is provided below. ### Simulating Replicated Data It is also straightforward to use forward simulation from the probability distribution of the data $p(y \, | \, \theta)$ to generate replicated data $y^{\mathrm{rep}}$ according to the posterior predictive distribution. (Recall that $p(y \, | \, \theta)$ is called the probability distribution when $\theta$ is fixed and the likelihood when $y$ is fixed.) With $M$ draws $\theta^{(m)}$ from the posterior $p(\theta \, | \, y)$, replicated data can be simulated by drawing a sequence of $M$ simulations according $y^{\mathrm{rep} \ (m)}$ with each drawn according to distribution $p(y \, | \, \theta^{(m)})$. This latter random variate generation can usually be done efficiently (both computationally and statistically) by means of forward simulation from the probability distribution of the data; we provide an example below. ## Prediction for New Trials Efron and Morris's (1975) baseball data includes not only the observed hit rate in the initial 45 at bats, but also includes the data for how the player did for the rest of the season. The question arises as to how well these models predict a player's performance for the rest of the season based on their initial 45 at bats. ### Calibration A well calibrated statistical model is one in which the uncertainty in the predictions matches the uncertainty in further data. That is, if we estimate posterior 50% intervals for predictions on new data (here, number of hits in the rest of the season for each player), roughly 50% of the new data should fall in its predicted 50% interval. If the model is true in the sense of correctly describing the generative process of the data, then Bayesian inference is guaranteed to be well calibrated. Given that our models are rarely correct in this deep sense, in practice we are concerned with testing their calibration on quantities of interest. ### Sharpness Given two well calibrated models, the one that makes the more precise predictions in the sense of having narrower intervals is better predictively (Gneiting et al. 2007). To see this in an example, we would rather have a well-calibrated prediction that there's a 90% chance the number of hits for a player in the rest of the season will fall in $(120, 130)$ than a 90% prediction that the number of hits will fall in $(100, 150)$. For the models introduced here, a posterior that is a delta function provides the sharpest predictions. Even so, there is residual uncertainty due to the repeated trials; with $K^{\mathrm{new}}$ further trials and a a fixed $\theta_n$ chance of success, the random variable $Y^{\mathrm{new}}_n$ denoting the number of further successes for unit $n$ has a standard deviation from the repeated binary trials of \[ \mathrm{sd}[Y^{\mathrm{new}}_n] \ = \ \sqrt{K \ \theta \, (1 - \theta)}. \] ### Why Evaluate with the Predictive Posterior? The predictive posterior density directly measures the probability of seeing the new data. The higher the probability assigned to the new data, the better job the model has done at predicting the outcome. In the limit, an ideal model would perfectly predict the new outcome with no uncertainty (probability of 1 for a discrete outcome or a delta function at the true value for the density in a continuous outcome). This notion is related to the notion of sharpness discussed in the previous section, because if the new observations have higher predictive densities, they're probably within narrower posterior intervals (Gneiting et al. 2007). ### $\log E[p(\tilde{y} \, | \, \theta)]$ vs $E[\log p(\tilde{y} \, | \, \theta)]$ The log of posterior predictive density is defined in the obvious way as \[ \log p(\tilde{y} \, | \, y) = \log \int_{\Theta} p(\tilde{y} \, | \, \theta) \ p(\theta \, | \, y) \ \mathrm{d}\theta. \] This is not a posterior expectation, but rather the log of a posterior expectation. In particular, it should not be confused with the posterior expectation of the log predictive density, which is given by \[ \int_{\Theta} \left( \log p(\tilde{y} \, | \, \theta) \right) \ p(\theta \, | \, y) \ \mathrm{d}\theta. \] Although this is easy to compute in Stan in a stable fashion, it does not produce the same answer (as we show below). Because $-\log(u)$ is convex, a little wrangling with [Jensen's inequality](https://en.wikipedia.org/wiki/Jensen%27s_inequality) shows that the expectation of the log is less than or equal to the log of the expectation, \[ \int_{\Theta} \left( \, \log p(\tilde{y} \, | \, \theta) \, \right) \ p(\theta \, | \, y) \ \mathrm{d}\theta \ \leq \ \log \int_{\Theta} p(\tilde{y} \, | \, \theta) \ p(\theta \, | \, y) \ \mathrm{d}\theta \] We'll compute both expectations and demonstrate Jensen's inequality in our running example. The variables `K_new[n]` and `y_new[n]` hold the number of at bats (trials) and the number of hits (successes) for player (unit) `n`. With the held out data we can compute the log density of each data point using the `log_lik` function, which, like `posterior_predict`, accepts a `newdata` argument. The `log_lik` function will return an $M \times N$ matrix, where $M$ is the size of the posterior sample (the number of draws we obtained from the posterior distribution) and $N$ is the number of data points in `newdata`. We can then take the row sums of this matrix to sum over the data points. ```{r, log_p_new} newdata <- data.frame(Hits = y_new, AB = K_new, Player = bball$Player) fits <- list(Pooling = fit_pool, NoPooling = fit_nopool, PartialPooling = fit_partialpool) # compute log_p_new matrix with each of the models in 'fits' log_p_new_mats <- lapply(fits, log_lik, newdata = newdata) # for each matrix in the list take the row sums log_p_new <- sapply(log_p_new_mats, rowSums) M <- nrow(log_p_new) head(log_p_new) ``` We now have the distributions of `log_p_new` in a matrix with a column for each model. For each model, the posterior mean for `log_p_new` will give us \[ \int_{\Theta} \left( \log p(\tilde{y} \, | \, \theta) \right) \ p(\theta \, | \, y) \ \mathrm{d}\theta \ \approx \ \frac{1}{M} \, \sum_{m=1}^M \log p(y^{\mathrm{new}} \, | \, \theta^{(m)}). \] To compute this for each of the models we only need to take the mean of the corresponding column of `log_p_new`. ```{r, log_p_new-mean} mean_log_p_new <- colMeans(log_p_new) round(sort(mean_log_p_new, decreasing = TRUE), digits = 1) ``` From a predictive standpoint, the models are ranked by the amount of pooling they do, with complete pooling being the best, and no pooling being the worst predictively. All of these models do predictions by averaging over their posteriors, with the amount of posterior uncertainty also being ranked in reverse order of the amount of pooling they do. As we will now see, the ranking of the models can change when we compute the posterior expectation of the log predictive density. #### Posterior expectation of the log predictive density The straight path to calculate this would be to define a generated quantity $p(y^{\mathrm{new}} \, | y)$, look at the posterior mean computed by Stan, and takes its log. That is, \[ \log p(y^{\mathrm{new}} \, | \, y) \ \approx \ \log \frac{1}{M} \, \sum_{m=1}^M p(y^{\mathrm{new}} \, | \, \theta^{(m)}). \] Unfortunately, this won't work in most cases because when we try to compute $p(y^{\mathrm{new}} \, | \, \theta^{(m)})$ directly, it is prone to underflow. For example, 2000 outcomes $y^{\mathrm{new}}_n$, each with likelihood 0.5 for $\theta^{(m)}$, will underflow, because $0.5^{2000}$ is smaller than the smallest positive number that a computer can represent using standard [double-precision floating point](https://en.wikipedia.org/wiki/IEEE_754-1985) (used by Stan, R, etc.). In contrast, if we work on the log scale, $\log p(y^{\mathrm{new}} \, | \, y)$ will not underflow. It's a sum of a bunch of terms of order 1. But we already saw we can't just average the log to get the log of the average. To avoid underflow, we're going to use the [log-sum-of-exponentials](https://en.wikipedia.org/wiki/LogSumExp) trick, which begins by noting the obvious, \[ \log \frac{1}{M} \, \sum_{m=1}^M \ p(y^{\mathrm{new}} \, | \, \theta^{(m)}). \ = \ \log \frac{1}{M} \, \sum_{m=1}^M \ \exp \left( \log p(y^{\mathrm{new}} \, | \, \theta^{(m)}) \right). \] We'll then write that last expression as \[ -\log M + \mathrm{log\_sum\_exp \, } \ \log p(y^{\mathrm{new}} \, | \, \theta^{(m)}) \] We can compute $\mathrm{log\_sum\_exp}$ stably by subtracting the max value. Suppose $u = u_1, \ldots, u_M$, and $\max(u)$ is the largest $u_m$. We can calculate \[ \mathrm{log\_sum\_exp \, } \ u_m \ = \ \log \sum_{m=1}^M \exp(u_m) \ = \ \max(u) + \log \sum_{m=1}^M \exp(u_m - \max(u)). \] Because $u_m - \max(u) \leq 0$, the exponentiations cannot overflow. They may underflow to zero, but this will not lose precision because of the leading $\max(u)$ term; the only way underflow can arise is if $u_m - \max(u)$ is very small, meaning that it won't add significant digits to $\max(u)$ if it had not underflowed. We can implement $\mathrm{log\_sum\_exp}$ in R as follows: ```{r, log_sum_exp} log_sum_exp <- function(u) { max_u <- max(u) a <- 0 for (n in 1:length(u)) { a <- a + exp(u[n] - max_u) } max_u + log(a) } # Or equivalently using vectorization log_sum_exp <- function(u) { max_u <- max(u) max_u + log(sum(exp(u - max_u))) } ``` and then include the $-\log M$ term to make it `log_mean_exp`: ```{r, log_mean_exp} log_mean_exp <- function(u) { M <- length(u) -log(M) + log_sum_exp(u) } ``` We can then use it to compute the log posterior predictive densities for each of the models: ```{r comment=NA} new_lps <- lapply(log_p_new_mats, function(x) apply(x, 2, log_mean_exp)) # sum over the data points new_lps_sums <- sapply(new_lps, sum) round(sort(new_lps_sums, decreasing = TRUE), digits = 1) ``` Now the ranking is different! As expected, the values here are greater than the expectation of the log density due to Jensen's inequality. The partial pooling model appears to be making slightly better predictions than the full pooling model, which in turn is making slightly better predictions than the no pooling model. #### Approximating the expected log predictive density Vehtari, Gelman, and Gabry (2016) shows that the expected log predictive density can be approximated using the `loo` function for each model and then compared across models: ```{r, loo} loo_compare(loo(fit_partialpool), loo(fit_pool), loo(fit_nopool)) ``` The third column is the leave-one-out (loo) approximation to the expected log predictive density. This approximation is only asymptotically valid and with only 18 observations in this case, substantially underestimates the expected log predictive densities found in the previous subsection. Nevertheless, the relative ranking of the models is essentially the same with the pooled and partially pooled models being virtually indistinguishable but much better than the no pooling model. ## Predicting New Observations With **rstanarm** it is straightforward to generate draws from the posterior predictive distribution using the `posterior_predict` function. With this capability, we can either generate predictions for new data or we can apply it to the predictors we already have. There will be two sources of uncertainty in our predictions, the first being the uncertainty in $\theta$ in the posterior $p(\theta \, | \, y)$ and the second being the uncertainty due to the likelihood $p(\tilde{y} \, | \, \theta)$. We let $z_n$ be the number of successes for unit $n$ in $K^{\mathrm{new}}_n$ further trials. It might seem tempting to eliminate that second source of uncertainty and set $z_n^{(m)}$ to its expectation, $\theta_n^{(m)} \, K^{\mathrm{new}}$, at each draw $m$ from the posterior rather than simulating a new value. Or it might seem tempting to remove the first source of uncertainty and use the posterior mean (or median or mode or ...) rather than draws from the posterior. Either way, the resulting values would suffice for estimating the posterior mean, but would not capture the uncertainty in the prediction for $y^{\mathrm{new}}_n$ and would thus not be useful in estimating predictive standard deviations or quantiles or as the basis for decision making under uncertainty. In other words, the predictions would not be properly calibrated (in a sense we define below). To predict $z$ for each player we can use the following code: ```{r, ppd} newdata <- data.frame(Hits = y_new, AB = K_new, Player = bball$Player) ppd_pool <- posterior_predict(fit_pool, newdata) ppd_nopool <- posterior_predict(fit_nopool, newdata) ppd_partialpool <- posterior_predict(fit_partialpool, newdata) colnames(ppd_pool) <- colnames(ppd_nopool) <- colnames(ppd_partialpool) <- as.character(bball$Player) colMeans(ppd_partialpool) ``` Translating the posterior number of hits into a season batting average, $\frac{y_n + z_n}{K_n + K^{\mathrm{new}}_n}$, we get an 80% posterior interval of ```{r, clemente} z_1 <- ppd_partialpool[, 1] clemente_80pct <- (y[1] + quantile(z_1, prob = c(0.1, 0.9))) / (K[1] + K_new[1]) batting_avg(clemente_80pct) ``` for Roberto Clemente from the partial pooling model. Part of our uncertainty here is due to our uncertainty in Clemente's underlying chance of success, and part of our uncertainty is due to there being 367 remaining trials (at bats) modeled as binomial. In the remaining at bats for the season, Clemente's success rate (batting average) was $127 / 367 = 0.346$. For each model, the following plot shows each player's posterior predictive 50% interval for predicted batting average (success rate) in his remaining at bats (trials); the observed success rate in the remainder of the season is shown as a blue dot. ```{r, ppd-stats} ppd_intervals <- function(x) t(apply(x, 2, quantile, probs = c(0.25, 0.75))) ppd_summaries <- (1 / K_new) * rbind(ppd_intervals(ppd_pool), ppd_intervals(ppd_nopool), ppd_intervals(ppd_partialpool)) df_ppd <- data.frame(player = rep(1:length(y_new), 3), y = rep(y_new / K_new, 3), lb = ppd_summaries[, "25%"], ub = ppd_summaries[, "75%"], model = rep(models, each = length(y_new))) ``` ```{r, plot-ppd} ggplot(df_ppd, aes(x=player, y=y, ymin=lb, ymax=ub)) + geom_linerange(color = "gray60", size = 2) + geom_point(size = 2.5, color = "skyblue4") + facet_grid(. ~ model) + labs(x = NULL, y = "batting average") + scale_x_continuous(breaks = NULL) + ggtitle(expression( atop("Posterior Predictions for Batting Average in Remainder of Season", atop("50% posterior predictive intervals (gray bars); observed (blue dots)", "")))) ``` We choose to plot 50% posterior intervals as they are a good single point for checking calibration. Rather than plotting the number of hits on the vertical axis, we have standardized all the predictions and outcomes to a success rate. Because each unit (player) has a different number of subsequent trials (at bats), the posterior intervals are relatively wider or narrower within the plots for each model (more trials imply narrower intervals for the average). Because each unit had the same number of initial observed trials, this variation is primarily due to the uncertainty from the binomial model of outcomes. ### Calibration With 50% intervals, we expect half of our estimates to lie outside their intervals in a well-calibrated model. If fewer than the expected number of outcomes lie in their estimated posterior intervals, we have reason to believe the model is not well calibrated---its posterior intervals are too narrow. This is also true if too many outcomes lie in their estimated posterior intervals---in this case the intervals are too broad. Of course, there is variation in the tests as the number of units lying in their intervals is itself a random variable (see the exercises), so in practice we are only looking for extreme values as indicators of miscalibration. Each of the models other than the complete pooling model appears to be reasonably well calibrated, and even the calibration for the complete pooling model is not bad (the variation in chance-of-success among players has low enough variance that the complete pooling model cannot be rejected as a possibility with only the amount of data we used here). ### Sharpness Consider the width of the posterior predictive intervals for the units across the models. The model with no pooling has the broadest posterior predictive intervals and the complete pooling model the narrowest. This is to be expected given the number of observations used to fit each model; 45 each in the no pooling case and 810 in the complete pooling case, and relatively something in between for the partial pooling models. Because the log odds model is doing more pooling, its intervals are slightly narrower than that of the direct hierarchical model. For two well calibrated models, the one with the narrower posterior intervals is preferable because its predictions are more tighter. The term introduced for this by Gneiting et al. (2007) is "sharpness." In the limit, a perfect model would provide a delta function at the true answer with a vanishing posterior interval. ## Estimating Event Probabilities The 80% interval in the partial pooling model coincidentally shows us that our model estimates a roughly 10% chance of Roberto Clemente batting 0.400 or better for the season based on batting 0.400 in his first 45 at bats. Not great, but non-trivial. Rather than fishing for the right quantile and hoping to get lucky, we can write a model to directly estimate event probabilities, such as Robert Clemente's batting average is 0.400 or better for the season. Event probabilities are defined as expectations of indicator functions over parameters and data. For example, the probability of player $n$'s batting average being 0.400 or better conditioned on the data $y$ is defined by the conditional event probability \[ \mathrm{Pr}\left[ \frac{(y_n + z_n)}{(45 + K^{\mathrm{new}}_n)} \geq 0.400 \, \Big| \, y \right] \ = \ \int_{\Theta} \mathrm{I}\left[\frac{(y_n + z_n)}{(45 + K^{\mathrm{new}}_n)} \geq 0.400\right] \ p(z_n \, | \, \theta_n, K^{\mathrm{new}}_n) \ p(\theta \, | \, y, K) \ \mathrm{d}\theta. \] The indicator function $\mathrm{I}[c]$ evaluates to 1 if the condition $c$ is true and 0 if it is false. Because it is just another expectation with respect to the posterior, we can calculate this event probability using MCMC as \[ \mathrm{Pr}\left[\frac{(y_n + z_n)}{(45 + K^{\mathrm{new}}_n)} \geq 0.400 \, \Big| \, y \right] \ \approx \ \frac{1}{M} \, \sum_{m=1}^M \mathrm{I}\left[\frac{(y_n + z_n^{(m)})}{(45 + K^{\mathrm{new}}_n)} \geq 0.400\right]. \] This event is about the season batting average being greater than 0.400. What if we care about ability (chance of success), not batting average (success rate) for the rest of the season? Then we would ask the question of whether $\mathrm{Pr}[\theta_n > 0.4]$. This is defined as a weighted average over the prior and computed via MCMC as the previous case. \[ \mathrm{Pr}\left[\theta_n \geq 0.400 \, | \, y \right] \ = \ \int_{\Theta} \mathrm{I}\left[\theta_n \geq 0.400\right] \ p(\theta \, | \, y, K) \ \mathrm{d}\theta \ \approx \ \frac{1}{M} \, \sum_{m=1}^M \mathrm{I}[\theta_n^{(m)} \geq 0.400]. \] ```{r, event-probabilities, results="hold"} draws_partialpool <- shift_draws(as.matrix(fit_partialpool)) thetas_partialpool <- plogis(draws_partialpool) thetas_partialpool <- thetas_partialpool[,-ncol(thetas_partialpool)] colnames(thetas_partialpool) <- as.character(bball$Player) ability_gt_400 <- thetas_partialpool > 0.4 cat("Pr(theta_n >= 0.400 | y)\n") colMeans(ability_gt_400)[c(1, 5, 10)] some_gt_350 <- apply(thetas_partialpool, 1, function(x) max(x) > 0.35) cat("Pr(at least one theta_n >= 0.350 | y)\n") mean(some_gt_350) ``` ## Multiple Comparisons We snuck in a "multiple comparison" event in the last section, namely whether there was some player with an a chance of success for hits of .350 or greater. With traditional significance testing over multiple trials, it is common to adjust for falsely rejecting the null hypothesis (a so-called Type I error) by inflating the conventional (and arguably far too low) 5% target for reporting "significance." For example, suppose we have our 18 players with ability parameters $\theta_n$ and we have $N$ null hypotheses of the form $H_0^n: \theta_n < 0.350$. Now suppose we evaluate each of these 18 hypotheses independently at the conventional $p = 0.05$ significance level, giving each a 5% chance of rejecting the null hypothesis in error. When we run all 18 hypothesis tests, the overall chance of falsely rejecting at least one of the null hypotheses is a whopping $1 - (1 - 0.05)^{18} = 0.60$. The traditional solution to this problem is to apply a Bonferroni adjustment to control the false rejection rate; the typical adjustment is to divide the $p$-value by the number of hypothesis tests in the "family" (that is, the collective test being done). Here that sets the rate to $p = 0.05/18$, or approximately $p = 0.003$, and results in a slightly less than 5% chance of falsely rejecting a null hypothesis in error. Although the Bonferroni correction does reduce the overall chance of falsely rejecting a null hypothesis, it also reduces the statistical power of the test to the same degree. This means that many null hypotheses will fail to be rejected in error. Rather than doing classical multiple comparison adjustments to adjust for false-discovery rate, such as a Bonferroni correction, Gelman et al. (2012) suggest using a hierarchical model to perform partial pooling instead. As already shown, hierarchical models partially pool the data, which pulls estimates toward the population mean with a strength determined by the amount of observed variation in the population (see also Figure 2 of (Gelman et al. 2012)). This automatically reduces the false-discovery rate, though not in a way that is intrinsically calibrated to false discovery, which is good, because reducing the overall false discovery rate in and of itself reduces the true discovery rate at the same time. The generated quantity `some_ability_gt_350` will be set to 1 if the maximum ability estimate in $\theta$ is greater than 0.35. And thus the posterior mean of this generated quantity will be the event probability \[ \mathrm{Pr}[\mathrm{max}(\theta) > 0.350] \ = \ \int_{\Theta} \mathrm{I}[\mathrm{max}(\theta) > 0.35] \ p(\theta \, | \, y, K) \ \mathrm{d}\theta \ \approx \ \frac{1}{M} \, \sum_{m=1}^M \ \mathrm{I}[\mathrm{max}(\theta^{(m)}) > 0.35] \] where $\theta^{(m)}$ is the sequence of posterior draws for the ability parameter vector. Stan reports this value as the posterior mean of the generated quantity `some_ability_gt_350`, which takes on the value $\mathrm{I}[\mathrm{max}(\theta^{(m)}) > 0.35]$ in each iteration. ```{r, echo=FALSE} thetas_pool <- plogis(as.matrix(fit_pool)) thetas_nopool <- plogis(as.matrix(fit_nopool)) some_gt_350_all <- sapply(list(thetas_pool, thetas_nopool, thetas_partialpool), function(x) apply(x, 1, max) > 0.35) chance_gt_350 <- round(100 * colMeans(some_gt_350_all)) ``` The probability estimate of there being a player with an ability (chance of success) greater than 0.350 is essentially zero in the complete and is essentially guaranteed in the no pooling model. The partially pooled estimates would not be considered significant at conventional p=0.05 thresholds. One way to get a handle on what's going on is to inspect the posterior 80% intervals for chance-of-success estimates in the first graph above. ## Ranking In addition to multiple comparisons, we can use the simultaneous estimation of the ability parameters to rank the units. In this section, we rank ballplayers by (estimated) chance of success (i.e., batting ability). Of course, ranking players by ability makes no sense for the complete pooling model, where every player is assumed to have the same ability. ```{r, ranking} reverse_rank <- function(x) 1 + length(x) - rank(x) # so lower rank is better rank <- apply(thetas_partialpool, 1, reverse_rank) t(apply(rank, 1, quantile, prob = c(0.1, 0.5, 0.9))) ``` It is again abundantly clear from the posterior intervals that our uncertainty is very great after only 45 at bats. In the original Volume I BUGS [example](https://www.mrc-bsu.cam.ac.uk/software/bugs/the-bugs-project-the-bugs-book/bugs-book-examples/the-bugs-book-examples-chapter-2-2-7-1/) of surgical mortality, the posterior distribution over ranks was plotted for each hospital. It is now straightforward to reproduce that figure here for the baseball data. ```{r, plot-ranks} df_rank <- data.frame(name = rep(bball$Player, each = M), rank = c(t(rank))) ggplot(df_rank, aes(rank)) + stat_count(width = 0.8) + facet_wrap(~ name) + scale_x_discrete("Rank", limits = c(1, 5, 10, 15)) + scale_y_discrete("Probability", limits = c(0, 0.1 * M, 0.2 * M), labels = c("0.0", "0.1", "0.2")) + ggtitle("Rankings for Partial Pooling Model") ``` #### Who has the Highest Chance of Success? We can use our ranking statistic to calculate the event probability for unit $n$ that the unit has the highest chance of success using MCMC as \[ \mathrm{Pr}[\theta_n = \max(\theta)] \ = \ \int_{\Theta} \mathrm{I}[\theta_n = \mathrm{max}(\theta)] \ p(\theta \, | \, y, K) \ \mathrm{d}\theta \ \approx \ \frac{1}{M} \, \sum_{m=1}^M \mathrm{I}[\theta^{(m)}_n = \mathrm{max}(\theta^{(m)})]. \] Like our other models, the partial pooling mitigates the implicit multiple comparisons being done to calculate the probabilities of rankings. Contrast this with an approach that does a pairwise significance test and then applies a false-discovery correction. We can compute this straightforwardly using the rank data we have already computed or we could compute it directly as above. Because $\mathrm{Pr}[\theta_n = \theta_{n'}] = 0$ for $n \neq n'$, we don't have to worry about ties. ```{r, plot-best-player} thetas_nopool <- plogis(as.matrix(fit_nopool)) colnames(thetas_nopool) <- as.character(bball$Player) rank_nopool <- apply(thetas_nopool, 1, reverse_rank) is_best_nopool <- rowMeans(rank_nopool == 1) is_best_partialpool <- rowMeans(rank == 1) df_is_best <- data.frame(unit = rep(bball$Player, 2), is_best = c(is_best_partialpool, is_best_nopool), model = rep(c("partial pooling", "no pooling"), each = N)) ggplot(df_is_best, aes(x=unit, y=is_best)) + geom_bar(stat = "identity") + facet_wrap(~ model) + scale_y_continuous(name = "Pr[player is best]") + ggtitle("Who is the Best Player?") + theme(axis.text.x = element_text(angle = -45, vjust = 1, hjust = 0)) ``` This question of which player has the highest chance of success (batting ability) doesn't even make sense in the complete pooling model, because the chance of success parameters are all the same by definition. In the other models, the amount of pooling directly determines the probabilities of being the best player. That is, the probability of being best goes down for high performing players with more pooling, whereas it goes up for below-average players. ## Graphical Posterior Predictive Checks We can simulate data from the predictive distribution and compare it to the original data used for fitting the model. If they are not consistent, then either our model is not capturing the aspects of the data we are probing with test statistics or the measurement we made is highly unlikely. That is, extreme $p$-values lead us to suspect there is something wrong with our model that deserves further exploration. In some cases, we are willing to work with models that are wrong in some measurable aspects, but accurately capture quantities of interest for an application. That is, it's possible for a model to capture some, but not all, aspects of a data set, and still be useful. ### Test Statistics and Bayesian $p$-Values A test statistic $T$ is a function from data to a real value. Following (Gelman et al. 2013), we will concentrate on four specific test statistics for repeated binary trial data (though these choices are fairly general): minimum value, maximum value, sample mean, and sample standard deviation. Given a test statistic $T$ and data $y$, the Bayesian $p$-value has a direct definition as a probability, \[ p_B = \mathrm{Pr}[T(y^{\mathrm{rep}}) \geq T(y) \, | \, y]. \] Bayesian $p$-values, like their traditional counterparts, are probabilities, but not probabilities that a model is true. They simply measure discrepancies between the observed data and what we would expect if the model is true. Values of Bayesian $p$-values near 0 or 1 indicate that the data $y$ used to estimate the model is unlikely to have been generated by the estimated model. As with other forms of full Bayesian inference, our estimate is the full posterior, not just a point estimate. As with other Bayesain inferences, we average over the posterior rather than working from a point estimate of the parameters. Expanding this as an expectation of an indicator function, \[ p_B \ = \ \int_{\Theta, Y^{\mathrm{rep}}} \mathrm{I}[T(y^{\mathrm{rep}}) \geq T(y)] \ p(y^{\mathrm{rep}} \, | \, \theta) \ p(\theta \, | \, y) \ \mathrm{d}\theta, \] We treat $y^{\mathrm{rep}}$ as a parameter in parallel with $\theta$, integrating over possible values $y^{\mathrm{rep}} \in Y^{\mathrm{rep}}$. As usual, we use the integration sign in a general way intended to include summation, as with the discrete variable $y^{\mathrm{rep}}$. The formulation as an expectation leads to the obvious MCMC calculation based on posterior draws $y^{\mathrm{rep} (m)}$ for $m \in 1{:}M$, \[ p_B \approx \frac{1}{M} \, \sum_{m=1}^M \mathrm{I}[T(y^{\mathrm{rep} \ (m)}) \geq T(y)]. \] Using the `pp_check` in **rstanarm**, we can easily reproduce Figure 6.12 from (Gelman et al. 2013), which shows the posterior predictive distribution for the test statistic, the observed value as a vertical line, and the $p$-value for each of the tests. First, here is just the plot for the no pooling model using the mean as the test statistic: ```{r, plot-ppc-stats-mean} pp_check(fit_nopool, plotfun = "stat", stat = "mean") ``` The `stat` argument can the be the name of any R function (including your own functions defined in the Global Environment) that takes a vector as an input and returns a scalar. To make plots for each of the models for several test statistics we can use the following code, which will create a list of ggplot objects for each model and then arrange everything in a single plot. ```{r, plot-ppc-stats} tstat_plots <- function(model, stats) { lapply(stats, function(stat) { graph <- pp_check(model, plotfun = "stat", stat = stat, seed = SEED) # optional arguments graph + xlab(stat) + theme(legend.position = "none") }) } Tstats <- c("mean", "sd", "min", "max") ppcs_pool <- tstat_plots(fit_pool, Tstats) ppcs_nopool <- tstat_plots(fit_nopool, Tstats) ppcs_partialpool <- tstat_plots(fit_partialpool, Tstats) if (require(gridExtra)) { grid.arrange( arrangeGrob(grobs = ppcs_pool, nrow = 1, left = "Pooling"), arrangeGrob(grobs = ppcs_nopool, nrow = 1, left = "No Pooling"), arrangeGrob(grobs = ppcs_partialpool, nrow = 1, left = "Partial Pooling") ) } ``` The only worrisomely extreme value visible in the plots is the $p$-value for standard deviation in the no-pooling model, where the vast majority of the simulated data sets under the model had standard deviations greater than the actual data. We didn't actually compute this $p$-value because extreme $p$-values are easy to detect visually and whether or not the $p$-value is less than $0.05$ or some other arbitrary value is of little use to us beyond what we can already see in the plot. However, if we did want to actually compute the $p$-value we can do so easily: ```{r, p-value} yrep <- posterior_predict(fit_nopool, seed = SEED) # seed is optional Ty <- sd(y) Tyrep <- apply(yrep, 1, sd) # tail-area probability p <- 1 - mean(Tyrep > Ty) print(p) ``` ### Comparing Observed and Replicated Data Following the advice of Gelman et al. (2013), we will take the fitted parameters of the data set and generate replicated data sets, then compare the replicated data sets visually to the observed data we used to fit the model. In this section we'll create the plots for the model using partial pooling, but the same plots can be made for the other models too. Again using **rstanarm**'s `pp_check` function, we can plot some of the simulated data sets along with the original data set to do a visual inspection as suggested by Gelman et al. (2013). For this type of posterior predictive check we set the `check` argument to `"distributions"` and we use `nreps` to specify how many replicated sets of data to generate from the posterior predictive distribution. Because our models have a binomial outcome, instead of plotting the number of successes (hits in this case) on the x-axis, `pp_check` will plot the proportion of successes. ```{r, plot-ppc-y-vs-yrep} pp_check(fit_partialpool, plotfun = "hist", nreps = 15, binwidth = 0.025) + ggtitle("Model: Partial Pooling") ``` These simulations are not unreasonable for a binomial likelihood, but they are more spread out than the actual data. In this case, this may actually have more to do with how the data were selected out of all the major league baseball players than the actual data distribution. Efron and Morris (1975, p 312) write > This sample was chosen because we wanted between 30 and 50 at bats to assure a satisfactory approximation of the binomial by the normal distribution while leaving the bulk of at bats to be estimated. We also wanted to include an unusually good hitter (Clemente) to test the method with at least one extreme parameter, a situation expected to be less favorable to Stein's estimator. Stein's estimator requires equal variances, or in this situation, equal at bats, so the remaining 17 players are all whom either the April 26 or May 3 *New York Times* reported with 45 at bats. # Discussion A hierarchical model introduces an estimation bias toward the population mean and the stronger the bias, the less variance there is in the estimates for the units. Exactly how much bias and variance is warranted can be estimated by further calibrating the model and testing where its predictions do not bear out. With very little data, there is very little we can do to gain sharp inferences other than provide more informative priors, which is well worth doing when prior information is available. On the other hand, with more data, the models provide similar results (see the exercises), and in the limit, all of the models (other than complete pooling) converge to posteriors that are delta functions around the empirical chance of success (i.e., the maximum likelihood estimate). Meanwhile, Bayesian inference is allowing us to make more accurate predictions with the data available before we hit that asymptotic regime. # Exercises 1. Generate fake data according to the pooling, no-pooling, and partial pooling models. Fit the model and consider the coverage of the posterior 80% intervals. 1. Try generating data where each player has a different number of at-bats (trials) and then fitting the models. What effect does the number of initial trials have on the posterior? Is there a way to quantify the effect? 1. In the section where we fit the complete pooling model we show a plot of the prior distribution on the probability of success $\theta$ implied by the $\mathsf{Normal}(-1,1)$ prior on the log-odds $\alpha$. If $\theta = \mathrm{logit}^{-1}(\alpha)$ and $p(\alpha) = \mathsf{Normal}(\alpha \,|\, -1, 1)$, what is $p(\theta)$? For a hint, see [here](https://en.wikipedia.org/wiki/Probability_density_function#Dependent_variables_and_change_of_variables). 1. How sensitive is the basic no-pooling model to the choice of prior? We used a somewhat informative prior due to our knowledge of baseball, but the prior could be made more or less informative. How, if at all, does this affect posterior inference? 1. What are some other test statistics that might be used to evaluate our model fit to data? Try some out using `pp_check(model, plotfun="stat", stat = "my_test")`, where `my_test` is your function that computes the test statistic. For example, to check the 25% quantile you could first define a function `q25 <- function(x) quantile(x, 0.25)` and then call `pp_check(model, plotfun = "stat", stat = "q25")`. 1. Discuss the difference between batting average and on-base percentage as random variables. Consider particularly the denominator (at-bat versus plate appearance). Is the denominator in these kinds of problems always a random variable itself? Why might this be important in inference? # References * Betancourt, M. and Girolami, M. (2015) Hamiltonian Monte Carlo for hierarchical models. *Current Trends in Bayesian Methodology with Applications* **79**. * Efron, B. and Morris, C. (1975) Data analysis using Stein's estimator and its generalizations. *Journal of the American Statistical Association* **70**(350), 311--319. [ [pdf](https://www.medicine.mcgill.ca/epidemiology/hanley/bios602/MultilevelData/EfronMorrisJASA1975.pdf) ] * Gelman, A., Carlin, J. B., Stern, H. S., Dunson, D. B., Vehtari, A., and Rubin, D. B. (2013) *Bayesian Data Analysis*, 3rd Edition. Chapman & Hall/CRC Press, London. * Gelman, A. and Hill, J. (2007) *Data Analysis Using Regression and Multilevel-Hierarchical Models*. Cambridge University Press, Cambridge, United Kingdom. * Gelman, A., Hill, J., and Yajima, M. (2012) Why we (usually) don't have to worry about multiple comparisons. *Journal of Research on Educational Effectiveness* **5**, 189--211. [ [pdf](http://www.stat.columbia.edu/~gelman/research/published/multiple2f.pdf) ] * Gneiting, T., Balabdaoui, F., and Raftery, A. E. (2007) Probabilistic forecasts, calibration and sharpness. *Journal of the Royal Statistical Society: Series B* (Statistical Methodology), **69**(2), 243--268. * Lunn, D., Jackson, C., Best, N., Thomas, A., and Spiegelhalter, D. (2013) *The BUGS Book: A Practical Introduction to Bayesian Analysis*. Chapman & Hall/CRC Press. * Neal, R. M. (2003) Slice sampling. *Annals of Statistics* **31**(3):705--767. * Papaspiliopoulos, O., Roberts, G. O., and Skold, M. (2003) Non-centered parameterisations for hierarchical models and data augmentation. In *Bayesian Statistics 7: Proceedings of the Seventh Valencia International Meeting*, edited by Bernardo, J. M., Bayarri, M. J., Berger, J. O., Dawid, A. P., Heckerman, D., Smith, A. F. M., and West, M. Oxford University Press, Chicago. * Plummer, M., Best, N., Cowles, K., & Vines, K. (2006). CODA: Convergence diagnosis and output analysis for MCMC. *R News*, **6**(1), 7--11. * Spiegelhalter, D., Thomas, A., Best, N., & Gilks, W. (1996) BUGS 0.5 Examples. MRC Biostatistics Unit, Institute of Public health, Cambridge, UK. * Stan Development Team (2015) *Stan Modeling Language User's Guide and Reference Manual*. [ [web page] ](https://mc-stan.org/documentation/) * Tarone, R. E. (1982) The use of historical control information in testing for a trend in proportions. *Biometrics* **38**(1):215--220. * Vehtari, A, Gelman, A., & Gabry, J. (2016) Practical Bayesian model evaluation using leave-one-out cross-validation and WAIC. [ [pdf](https://arxiv.org/abs/1507.04544) ] # Additional Data Sets The following additional data sets have a similar structure to the baseball data used in this vignette and are included with **rstanarm**. #### Rat tumors (N = 71) Tarone (1982) provides a data set of tumor incidence in historical control groups of rats; specifically endometrial stromal polyps in female lab rats of type F344. The data set is taken from the book site for (Gelman et al. 2013): * To load: `data(tumors, package = "rstanarm")` * Data source: [http://www.stat.columbia.edu/~gelman/book/data/rats.asc](http://www.stat.columbia.edu/~gelman/book/data/rats.asc) #### Surgical mortality (N = 12) Spiegelhalter et al. (1996) provide a data set of mortality rates in 12 hospitals performing cardiac surgery in babies. We just manually entered the data from the paper; it is also available in the Stan example models repository in R format. * To load: `data(mortality, package = "rstanarm")` * Data source: Unknown #### Baseball hits 1996 AL (N = 308) Carpenter (2009) updates Efron and Morris's (1975) data set for the entire set of players for the entire 2006 American League season of Major League Baseball. The data was originally downloaded from the seanlahman.com, which is currently not working. * To load: `data(bball2006, package = "rstanarm")` * Data Source: [https://web.archive.org/web/20220618114439/https://lingpipe-blog.com/2009/09/23/](https://web.archive.org/web/20220618114439/https://lingpipe-blog.com/2009/09/23/) rstanarm/vignettes/jm.Rmd0000644000176200001440000020131014370470372015167 0ustar liggesusers--- title: "Estimating Joint Models for Longitudinal and Time-to-Event Data with rstanarm" author: "Sam Brilleman" date: "`r Sys.Date()`" output: html_vignette: toc: true number_sections: false --- ```{r, child="children/SETTINGS-knitr.txt"} ``` ```{r, child="children/SETTINGS-gg.txt"} ``` ```{r setup_jm, include=FALSE, message=FALSE} knitr::opts_chunk$set(fig.width=10, fig.height=4) library(rstanarm) ``` # Preamble This vignette provides an introduction to the `stan_jm` modelling function in the __rstanarm__ package. The `stan_jm` function allows the user to estimate a shared parameter joint model for longitudinal and time-to-event data under a Bayesian framework. # Introduction Joint modelling can be broadly defined as the simultaneous estimation of two or more statistical models which traditionally would have been separately estimated. When we refer to a shared parameter joint model for longitudinal and time-to-event data, we generally mean the joint estimation of: 1) a longitudinal mixed effects model which analyses patterns of change in an outcome variable that has been measured repeatedly over time (for example, a clinical biomarker) and 2) a survival or time-to-event model which analyses the time until an event of interest occurs (for example, death or disease progression). Joint estimation of these so-called "submodels" is achieved by assuming they are correlated via individual-specific parameters (i.e. individual-level random effects). Over the last two decades the joint modelling of longitudinal and time-to-event data has received a significant amount of attention [1-5]. Methodological developments in the area have been motivated by a growing awareness of the benefits that a joint modelling approach can provide. In clinical or epidemiological research it is common for a clinical biomarker to be repeatedly measured over time on a given patient. In addition, it is common for time-to-event data, such as the patient-specific time from a defined origin (e.g. time of diagnosis of a disease) until a terminating clinical event such as death or disease progression to also be collected. The figure below shows observed longitudinal measurements (i.e. observed "trajectories") of log serum bilirubin for a small sample of patients with primary biliary cirrhosis. Solid lines are used for those patients who were still alive at the end of follow up, while dashed lines are used for those patients who died. From the plots, we can observe between-patient variation in the longitudinal trajectories for log serum bilirubin, with some patients showing an increase in the biomarker over time, others decreasing, and some remaining stable. Moreover, there is variation between patients in terms of the frequency and timing of the longitudinal measurements. \ ```{r traj_figure, echo=FALSE} # Plot observed longitudinal trajectories for log serum bilirubin ids <- c(25,31:33,36,38:40) pbcLong_subset <- pbcLong[pbcLong$id %in% ids, ] pbcLong_subset <- merge(pbcLong_subset, pbcSurv) pbcLong_subset$Died <- factor(pbcLong_subset$death, labels = c("No", "Yes")) patient_labels <- paste("Patient", 1:8) names(patient_labels) <- ids ggplot() + geom_line(aes(y = logBili, x = year, lty = Died), data = pbcLong_subset) + facet_wrap(~ id, ncol = 4, labeller = labeller(id = patient_labels)) + theme_bw() + ylab("Log serum bilirubin") + xlab("Time (years)") ``` From the perspective of clinical risk prediction, we may be interested in asking whether the between-patient variation in the log serum bilirubin trajectories provides meaningful prognostic information that can help us differentiate patients with regard to some clinical event of interest, such as death. Alternatively, from an epidemiological perspective we may wish to explore the potential for etiological associations between changes in log serum bilirubin and mortality. Joint modelling approaches provide us with a framework under which we can begin to answer these types of clinical and epidemiological questions. More formally, the motivations for undertaking a joint modelling analysis of longitudinal and time-to-event data might include one or more of the following: - One may be interested in how *underlying changes in the biomarker influence the occurrence of the event*. However, including the observed biomarker measurements directly into a time-to-event model as time-varying covariates poses several problems. For example, if the widely used Cox proportional hazards model is assumed for the time-to-event model then biomarker measurements need to be available for all patients at all failure times, which is unlikely to be the case [3]. If simple methods of imputation are used, such as the "last observation carried forward" method, then these are likely to induce bias [6]. Furthermore, the observed biomarker measurements may be subject to measurement error and therefore their inclusion as time-varying covariates may result in biased and inefficient estimates. In most cases, the measurement error will result in parameter estimates which are shrunk towards the null [7]. On the other hand, joint modelling approaches allow us to estimate the association between the biomarker (or some function of the biomarker trajectory, such as rate of change in the biomarker) and the risk of the event, whilst allowing for both the discrete time and measurement-error aspects of the observed biomarker. - One may be interested primarily in the evolution of the clinical biomarker but *may wish to account for what is known as informative dropout*. If the value of future (unobserved) biomarker measurements are related to the occurrence of the terminating event, then those unobserved biomarker measurements will be "missing not at random" [8,9]. In other words, biomarker measurements for patients who have an event will differ from those who do not have an event. Under these circumstances, inference based solely on observed measurements of the biomarker will be subject to bias. A joint modelling approach can help to adjust for informative dropout and has been shown to reduce bias in the estimated parameters associated with longitudinal changes in the biomarker [1,9,10]. - Joint models are naturally suited to the task of *dynamic risk prediction*. For example, joint modelling approaches have been used to develop prognostic models where predictions of event risk can be updated as new longitudinal biomarker measurements become available. Taylor et al. [11] jointly modelled longitudinal measurements of the prostate specific antigen (PSA) and time to clinical recurrence of prostate cancer. The joint model was then used to develop a web-based calculator which could provide real-time predictions of the probability of recurrence based on a patient's up to date PSA measurements. In this vignette, we describe the __rstanarm__ package's `stan_jm` modelling function. This modelling function allows users to fit a shared parameter joint model for longitudinal and time-to-event data under a Bayesian framework, with the backend estimation carried out using Stan. In Section 2 we describe the formulation of the joint model used by `stan_jm`. In Section 3 we present a variety of examples showing the usage of `stan_jm`. Note that some aspects of the estimation are covered in other vignettes, such as the priors [vignette](priors.html) which contains details on the prior distributions available for regression coefficients. # Technical details ## Model formulation A shared parameter joint model consists of related submodels which are specified separately for each of the longitudinal and time-to-event outcomes. These are therefore commonly referred to as the *longitudinal submodel(s)* and the *event submodel*. The longitudinal and event submodels are linked using shared individual-specific parameters, which can be parameterised in a number of ways. We describe each of these submodels below. ### Longitudinal submodel(s) We assume $y_{ijm}(t) = y_{im}(t_{ij})$ corresponds to the observed value of the $m^{th}$ $(m = 1,...,M)$ biomarker for individual $i$ $(i = 1,...,N)$ taken at time point $t_{ij}$, $j = 1,...,n_{im}$. We specify a (multivariate) generalised linear mixed model that assumes $y_{ijm}(t)$ follows a distribution in the exponential family with mean $\mu_{ijm}(t)$ and linear predictor $$ \eta_{ijm}(t) = g_m(\mu_{ijm}(t)) = \boldsymbol{x}^T_{ijm}(t) \boldsymbol{\beta}_m + \boldsymbol{z}^T_{ijm}(t) \boldsymbol{b}_{im} $$ where $\boldsymbol{x}^T_{ijm}(t)$ and $\boldsymbol{z}^T_{ijm}(t)$ are both row-vectors of covariates (which likely include some function of time, for example a linear slope, cubic splines, or polynomial terms) with associated vectors of fixed and individual-specific parameters $\boldsymbol{\beta}_m$ and $\boldsymbol{b}_{im}$, respectively, and $g_m$ is some known link function. The distribution and link function are allowed to differ over the $M$ longitudinal submodels. We let the vector $\boldsymbol{\beta} = \{ \boldsymbol{\beta}_m ; m = 1,...,M\}$ denote the collection of population-level parameters across the $M$ longitudinal submodels. We assume that the dependence across the different longitudinal submodels (i.e. the correlation between the different longitudinal biomarkers) is captured through a shared multivariate normal distribution for the individual-specific parameters; that is, we assume $$ \begin{pmatrix} \boldsymbol{b}_{i1} \\ \vdots \\ \boldsymbol{b}_{iM} \end{pmatrix} = \boldsymbol{b}_i \sim \mathsf{Normal} \left( 0 , \boldsymbol{\Sigma} \right) $$ for some unstructured variance-covariance matrix $\boldsymbol{\Sigma}$. ### Event submodel We assume that we also observe an event time $T_i = \mathsf{min} \left( T^*_i , C_i \right)$ where $T^*_i$ denotes the so-called "true" event time for individual $i$ (potentially unobserved) and $C_i$ denotes the censoring time. We define an event indicator $d_i = I(T^*_i \leq C_i)$. We then model the hazard of the event using a parametric proportional hazards regression model of the form $$ h_i(t) = h_0(t; \boldsymbol{\omega}) \mathsf{exp} \left( \boldsymbol{w}^T_i(t) \boldsymbol{\gamma} + \sum_{m=1}^M \sum_{q=1}^{Q_m} f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{i}, \alpha_{mq}; t) \right) $$ where $h_i(t)$ is the hazard of the event for individual $i$ at time $t$, $h_0(t; \boldsymbol{\omega})$ is the baseline hazard at time $t$ given parameters $\boldsymbol{\omega}$, $\boldsymbol{w}^T_i(t)$ is a row-vector of individual-specific covariates (possibly time-dependent) with an associated vector of regression coefficients $\boldsymbol{\gamma}$ (log hazard ratios), $f_{mq}(.)$ are a set of known functions for $m=1,...,M$ and $q=1,...,Q_m$, and the $\alpha_{mq}$ are regression coefficients (log hazard ratios). The longitudinal and event submodels are assumed to be related via an "association structure", which is a set of functions each $\{ f_{mq} ; m = 1,...,M, q = 1,...,Q_m \}$ that may each be conditional on the population-level parameters from the longitudinal submodel $\boldsymbol{\beta}$, the individual-specific parameters $\boldsymbol{b}_{i}$, and the population-level parameters $\alpha_{mq}$ for $m=1,...,M$ and $q=1,...,Q_m$. That is, the association structure of the joint model is captured via the $\sum_{m=1}^M \sum_{q=1}^{Q_m} f_{mq}(\boldsymbol{\beta}_m, \boldsymbol{b}_{im}, \alpha_{mq}; t)$ term in the linear predictor of the event submodel. The $\alpha_{mq}$ are referred to as the "association parameters" since they quantify the strength of the association between the longitudinal and event processes. The various ways in which the association structure can be are described in the next section. The probability of individual $i$ still being event-free at time $t$, often referred to as the "survival probability", is defined as $$ S_i(t) = \text{Prob} \Big( T_i^* \geq t \Big) = \exp \Big( -H_i(t) \Big) $$ where $H_i(t) = \int_{s=0}^t h_i(s) ds$ is the cumulative hazard for individual $i$. We assume that the baseline hazard $h_0(t; \boldsymbol{\omega})$ is modelled parametrically. In the `stan_jm` modelling function the baseline hazard be specified as either: an approximation using B-splines on the log hazard scale (the default); a Weibull distribution; or an approximation using a piecewise constant function on the log hazard scale (sometimes referred to as piecewise exponential). The choice of baseline hazard can be made via the `basehaz` argument. In the case of the B-splines or piecewise constant baseline hazard, the user can control the flexibility by specifying the knots or degrees of freedom via the `basehaz_ops` argument. (Note that currently there is slightly limited post-estimation functionality available for models estimated with a piecewise constant baseline hazard, so this is perhaps the least preferable choice). ### Association structures As mentioned in the previous section, the dependence between the longitudinal and event submodels is captured through the association structure, which can be specified in a number of ways. The simplest association structure is likely to be $$ f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{im}, \alpha_{mq}; t) = \alpha_{mq} \eta_{im}(t) $$ and this is often referred to as a *current value* association structure since it assumes that the log hazard of the event at time $t$ is linearly associated with the value of the longitudinal submodel's linear predictor also evaluated at time $t$. This is the most common association structure used in the joint modelling literature to date. In the situation where the longitudinal submodel is based on an identity link function and normal error distribution (i.e. a linear mixed model) the *current value* association structure can be viewed as a method for including the underlying "true" value of the biomarker as a time-varying covariate in the event submodel.^[By "true" value of the biomarker, we mean the value of the biomarker which is not subject to measurement error or discrete time observation. Of course, for the expected value from the longitudinal submodel to be considered the so-called "true" underlying biomarker value, we would need to have specified the longitudinal submodel appropriately!] However, other association structures are also possible. For example, we could assume the log hazard of the event is linearly associated with the *current slope* (i.e. rate of change) of the longitudinal submodel's linear predictor, that is $$ f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{i}, \alpha_{mq}; t) = \alpha_{mq} \frac{d\eta_{im}(t)}{dt} $$ There are in fact a whole range of possible association structures, many of which have been discussed in the literature [14-16]. The `stan_jm` modelling function in the __rstanarm__ package allows for the following association structures, which are specified via the `assoc` argument: Current value (of the linear predictor or expected value) $$ f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{i}, \alpha_{mq}; t) = \alpha_{mq} \eta_{im}(t) \\ f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{i}, \alpha_{mq}; t) = \alpha_{mq} \mu_{im}(t) $$ Current slope (of the linear predictor or expected value) $$ f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{i}, \alpha_{mq}; t) = \alpha_{mq} \frac{d\eta_{im}(t)}{dt} \\ f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{i}, \alpha_{mq}; t) = \alpha_{mq} \frac{d\mu_{im}(t)}{dt} $$ Area under the curve (of the linear predictor or expected value) $$ f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{i}, \alpha_{mq}; t) = \alpha_{mq} \int_0^t \eta_{im}(u) du \\ f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{i}, \alpha_{mq}; t) = \alpha_{mq} \int_0^t \mu_{im}(u) du $$ Interactions between different biomarkers $$ f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{i}, \alpha_{mq}; t) = \alpha_{mq} \eta_{im}(t) \eta_{im'}(t) \text{ for some } m = m' \text{ or } m \neq m' \\ f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{i}, \alpha_{mq}; t) = \alpha_{mq} \eta_{im}(t) \mu_{im'}(t) \text{ for some } m = m' \text{ or } m \neq m' \\ f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{i}, \alpha_{mq}; t) = \alpha_{mq} \mu_{im}(t) \mu_{im'}(t) \text{ for some } m = m' \text{ or } m \neq m' $$ Interactions between the biomarker (or it's slope) and observed data $$ f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{i}, \alpha_{mq}; t) = \alpha_{mq} c_{i}(t) \eta_{im}(t) \text{ for some covariate value } c_{i}(t) \\ f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{i}, \alpha_{mq}; t) = \alpha_{mq} c_{i}(t) \mu_{im}(t) \text{ for some covariate value } c_{i}(t) \\ f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{i}, \alpha_{mq}; t) = \alpha_{mq} c_{i}(t) \frac{d\eta_{im}(t)}{dt} \text{ for some covariate value } c_{i}(t) \\ f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{i}, \alpha_{mq}; t) = \alpha_{mq} c_{i}(t) \frac{d\mu_{im}(t)}{dt} \text{ for some covariate value } c_{i}(t) $$ As well as using lagged values for any of the above. That is, replacing $t$ with $t-u$ where $u$ is some lag time, such that the hazard of the event at time $t$ is assumed to be associated with some function of the longitudinal submodel parameters at time $t-u$. Lastly, we can specify some time-independent function of the random effects, possibly including the fixed effect component. For example, $$ f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{i}, \alpha_{mq}; t) = \alpha_{mq} \boldsymbol{b}_{im0} $$ or $$ f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{i}, \alpha_{mq}; t) = \alpha_{mq} \Big( \boldsymbol{\beta}_{m0} + \boldsymbol{b}_{im0} \Big) $$ where $\boldsymbol{\beta}_{m0}$ is the population-level intercept for the $m^{th}$ longitudinal submodel and $\boldsymbol{b}_{im0}$ is the $i^{th}$ individual's random deviation from the population-level intercept for the $m^{th}$ longitudinal submodel. Note that more than one association structure can be specified, however, not all possible combinations are allowed. Moreover, if you are fitting a multivariate joint model (i.e. more than one longitudinal outcome) then you can optionally choose to use a different association structure(s) for linking each longitudinal submodel to the event submodel. To do this you can pass a list of length $M$ to the `assoc` argument. ### Assumptions Here we define a set of assumptions for the multivariate shared parameter joint model. The so-called conditional independence assumption of the shared parameter joint model postulates $$ y_{im}(t) \perp y_{im'}(t) \mid \boldsymbol{b}_i, \boldsymbol{\theta} \\ y_{im}(t) \perp y_{im}(t') \mid \boldsymbol{b}_i, \boldsymbol{\theta} \\ y_{im}(t) \perp T_i^* \mid \boldsymbol{b}_i, \boldsymbol{\theta} $$ for some $m \neq m'$ and $t \neq t'$, and where $\boldsymbol{\theta}$ denotes the combined vector of all remaining population-level parameters in the model. That is, conditional on the individual-specific parameters $\boldsymbol{b}_i$ and population-level parameters $\boldsymbol{\theta}$, the following are assumed: (i) any biomarker measurement for individual $i$ is independent of that individual's true event time $T_i^*$; (ii) any two measurements of the $m^{th}$ biomarker taken on the $i^{th}$ individual at two distinct time points $t$ and $t'$ (i.e. longitudinal or repeated measurements) are independent of one another; and (iii) any two measurements of two different biomarkers, taken on the $i^{th}$ individual at some time point $t$ are independent of one another. These conditional independence assumptions allow for a convenient factorisation of the full likelihood for joint model into the likelihoods for each of the component parts (i.e. the likelihood for the longitudinal submodel, the likelihood for the event submodel, and the likelihood for the distribution of the individual-specific parameters), which facilitates the estimation of the model. Moreover, we require two additional assumptions: (i) that the censoring process for the event outcome is independent of the true event time, that is $C_i \perp T_i^* \mid \boldsymbol{\theta}$ (i.e. uninformative censoring); and (ii) that the visiting process by which the observation times $t_{ijm}$ are determined is independent of the true event time $T_i^*$ and all missing future unobserved longitudinal biomarker measurements. ### Log posterior distribution Under the conditional independence assumption, the log posterior for the $i^{th}$ individual can be specified as $$ \log p(\boldsymbol{\theta}, \boldsymbol{b}_{i} \mid \boldsymbol{y}_{i}, T_i, d_i) \propto \log \Bigg[ \Bigg( \prod_{m=1}^M \prod_{j=1}^{n_i} p(y_{ijm}(t) \mid \boldsymbol{b}_{i}, \boldsymbol{\theta}) \Bigg) p(T_i, d_i \mid \boldsymbol{b}_{i}, \boldsymbol{\theta}) p(\boldsymbol{b}_{i} \mid \boldsymbol{\theta}) p(\boldsymbol{\theta}) \Bigg] $$ where $\boldsymbol{y}_i = \{ y_{ijm}(t); j = 1,...,n_i, m = 1,...,M \}$ denotes the collection of longitudinal biomarker data for individual $i$ and $\boldsymbol{\theta}$ denotes all remaining population-level parameters in the model. We can rewrite this log posterior as $$ \log p(\boldsymbol{\theta}, \boldsymbol{b}_{i} \mid \boldsymbol{y}_{i}, T_i, d_i) \propto \Bigg( \sum_{m=1}^M \sum_{j=1}^{n_i} \log p(y_{ijm}(t) \mid \boldsymbol{b}_{i}, \boldsymbol{\theta}) \Bigg) + \log p(T_i, d_i \mid \boldsymbol{b}_{i}, \boldsymbol{\theta}) + \log p(\boldsymbol{b}_{i} \mid \boldsymbol{\theta}) + \log p(\boldsymbol{\theta}) $$ where $\sum_{j=1}^{n_{im}} \log p(y_{ijm} \mid \boldsymbol{b}_{i}, \boldsymbol{\theta})$ is the log likelihood for the $m^{th}$ longitudinal submodel, $\log p(T_i, d_i \mid \boldsymbol{b}_{i}, \boldsymbol{\theta})$ is the log likelihood for the event submodel, $\log p(\boldsymbol{b}_{i} \mid \boldsymbol{\theta})$ is the log likelihood for the distribution of the group-specific parameters (i.e. random effects), and $\log p(\boldsymbol{\theta})$ represents the log likelihood for the joint prior distribution across all remaining unknown parameters.^[We refer the reader to the priors [vignette](priors.html) for a discussion of the possible prior distributions.] We can rewrite the log likelihood for the event submodel as $$ \log p(T_i, d_i \mid \boldsymbol{b}_{i}, \boldsymbol{\theta}) = d_i * \log h_i(T_i) - \int_0^{T_i} h_i(s) ds $$ and then use Gauss-Kronrod quadrature with $Q$ nodes to approximate $\int_0^{T_i} h_i(s) ds$, such that $$ \int_0^{T_i} h_i(s) ds \approx \frac{T_i}{2} \sum_{q=1}^{Q} w_q h_i \bigg( \frac{T_i(1+s_q)}{2} \bigg) $$ where $w_q$ and $s_q$, respectively, are the standardised weights and locations ("abscissa") for quadrature node $q$ $(q=1,...,Q)$ [17]. The default for the `stan_jm` modelling function is to use $Q=15$ quadrature nodes, however if the user wishes, they can choose between $Q=15$, $11$, or $7$ quadrature nodes (specified via the `qnodes` argument). Therefore, once we have an individual's event time $T_i$ we can evaluate the design matrices for the event submodel and longitudinal submodels at the $Q+1$ necessary time points (which are the event time $T_i$ and the quadrature points $\frac{T_i(1+s_q)}{2}$ for $q=1,...,Q$) and then pass these to Stan's data block. We can then evaluate the log likelihood for the event submodel by simply calculating the hazard $h_i(t)$ at those $Q+1$ time points and summing the quantities appropriately. This calculation will need to be performed each time we iterate through Stan's model block. A simplified example of the underlying Stan code used to fit the joint model can be found in [Brilleman et al. (2018)](https://github.com/stan-dev/stancon_talks/blob/master/2018/Contributed-Talks/03_brilleman/notebook.pdf) [12]. ## Model predictions Before discussing the methods by which we can generate posterior predictions, first let us define some additional relevant quantities. Let $\mathcal{D} = \{ \boldsymbol{y}_i, T_i, d_i; i = 1,...,N \}$ be the entire collection of outcome data in the sample. We will refer to this sample as the "training data". Let $T_{max} = \max \{ T_i; i = 1,...,N \}$ denote the maximum event or censoring time across the $i = 1,...,N$ individuals in our training data. ### Individual-specific predictions for in-sample individuals (for $0 \leq t \leq T_i$) We can generate posterior predictions for the longitudinal and time-to-event outcomes in the following manner. For the $i^{th}$ individual in our training data, a predicted value for the $m^{th}$ longitudinal biomarker at time $t$, denoted $y^*_{im}(t)$, can be generated from the posterior predictive distribution $$ p \Big( y^{*}_{im}(t) \mid \mathcal{D} \Big) = \int \int p \Big( y^{*}_{im}(t) \mid \boldsymbol{\theta}, \boldsymbol{b}_i \Big) p \Big( \boldsymbol{\theta}, \boldsymbol{b}_i \mid \mathcal{D} \Big) \space d \boldsymbol{b}_i \space d \boldsymbol{\theta} $$ and, similarly, a predicted probability of the $i^{th}$ individual being event-free at time $t$, denoted $S^*_i(t)$, can be generated from the posterior predictive distribution $$ p \Big( S^{*}_{i}(t) \mid \mathcal{D} \Big) = \int \int p \Big( S^{*}_i(t) \mid \boldsymbol{\theta}, \boldsymbol{b}_i \Big) p \Big( \boldsymbol{\theta}, \boldsymbol{b}_i \mid \mathcal{D} \Big) d \boldsymbol{b}_i \space d \boldsymbol{\theta} $$ Note that for simplicity we have ignored the implicit conditioning on covariates; $\boldsymbol{x}_{im}(t)$ and $\boldsymbol{z}_{im}(t)$, for $m = 1,...,M$, and $\boldsymbol{w}_{i}(t)$. Since individual $i$ is included in the training data, it is easy for us to approximate these posterior predictive distributions by drawing from $p(y^{*}_{im}(t) \mid \boldsymbol{\theta}^{(l)}, \boldsymbol{b}_i^{(l)})$ and $p(S^{*}_i(t) \mid \boldsymbol{\theta}^{(l)}, \boldsymbol{b}_i^{(l)})$ where $\boldsymbol{\theta}^{(l)}$ and $\boldsymbol{b}_i^{(l)}$ are the $l^{th}$ $(l = 1,...,L)$ MCMC draws from the joint posterior distribution $p(\boldsymbol{\theta}, \boldsymbol{b}_i \mid \mathcal{D})$. These draws from the posterior predictive distributions can be used for assessing the fit of the model. For example, - the draws from $p(y^{*}_{im}(t) \mid \mathcal{D})$ for $0 \leq t \leq T_i$ can be used to evaluate the fit of the longitudinal trajectory for the $m^{th}$ biomarker for the $i^{th}$ individual, and - the draws from $p(S^{*}_{i}(t) \mid \mathcal{D})$ for $0 \leq t \leq T_{max}$ can be averaged across the $N$ individuals to obtain a standardised survival curve (discussed in greater detail in later sections) which can then be compared to the observed survival curve, for example, the Kaplan-Meier curve. ### Individual-specific predictions for in-sample individuals (for $t > C_i$) However, given that we know the event or censoring time for each individual in our training data, it may make more sense to consider what will happen to censored individuals in our study when we look beyond their last known survival time (i.e. extrapolation). For an individual $i$, who was in our training data, and who was known to be event-free up until their censoring time $C_i$, we wish to draw from the conditional posterior predictive distribution for their longitudinal outcome at some time $t > C_i$, that is $$ p \Big( y^{*}_{im}(t) \mid \mathcal{D}, t > C_i \Big) = \int \int p \Big( y^{*}_{im}(t) \mid \boldsymbol{\theta}, \boldsymbol{b}_i, t > C_i \Big) p \Big( \boldsymbol{\theta}, \boldsymbol{b}_i \mid \mathcal{D} \Big) d \boldsymbol{b}_i \space d \boldsymbol{\theta} $$ and the conditional posterior predictive distribution for their survival probability at some time $t > C_i$, that is $$ \begin{aligned} p \Big( S^{*}_{i}(t) \mid \mathcal{D}, t > C_i, T_i^* > C_i \Big) & = \frac {p \Big( S^{*}_{i}(t) \mid \mathcal{D} \Big)} {p \Big( S^{*}_{i}(C_i) \mid \mathcal{D} \Big)} \\ & = \int \int \frac {p \Big( S^{*}_i(t) \mid \boldsymbol{\theta}, \boldsymbol{b}_i \Big)} {p \Big( S^{*}_i(C_i) \mid \boldsymbol{\theta}, \boldsymbol{b}_i \Big)} \space p \Big( \boldsymbol{\theta}, \boldsymbol{b}_i \mid \mathcal{D} \Big) d \boldsymbol{b}_i \space d \boldsymbol{\theta} \end{aligned} $$ These draws from the conditional posterior predictive distributions can be used to extrapolate into the future for individual $i$, conditional on their longitudinal biomarker data collected between baseline and their censoring time $C_i$. For example, - the draws from $p(y^{*}_{im}(t) \mid \mathcal{D}, t > C_i)$ for $C_i \leq t \leq T_{max}$ can be used to show the forecasted longitudinal trajectory for the $m^{th}$ biomarker for the $i^{th}$ individual, and - the draws from $p(S^{*}_{i}(t) \mid \mathcal{D}, t > C_i, T_i^* > C_i))$ for $C_i \leq t \leq T_{max}$ can be used to show the estimated conditional probability of individual $i$ remaining event-free into the future. ### Individual-specific predictions for out-of-sample individuals (i.e. dynamic predictions) **TBC.** Describe dynamic predictions under the framework of Rizopoulos (2011) [18]. These types of individual-specific predictions can be obtained using the `posterior_traj` and `posterior_survfit` functions by providing prediction data and specifying `dynamic = TRUE` (which is the default); see the examples provided below. ### Population-level (i.e. marginal) predictions We can also generate posterior predictions for the longitudinal and time-to-event outcomes that do not require any conditioning on observed outcome data for a specific individual. Here, we will discuss two ways in which this can be done. The first way is to "marginalise" over the distribution of the individual-specific parameters. We wish to generate a predicted value for the $m^{th}$ longitudinal biomarker at time $t$ for a new individual $k$ for whom we do not have any observed data. We will denote this prediction $y^*_{km}(t)$ and note that it can be generated from the posterior predictive distribution for the longitudinal outcome $$ \begin{aligned} p \Big( y^{*}_{km}(t) \mid \mathcal{D} \Big) & = \int \int p \Big( y^{*}_{km}(t) \mid \boldsymbol{\theta}, \boldsymbol{\tilde{b}}_k \Big) p \Big( \boldsymbol{\theta}, \boldsymbol{\tilde{b}}_k \mid \mathcal{D} \Big) \space d \boldsymbol{\tilde{b}}_{k} \space d \boldsymbol{\theta} \\ & = \int \int p \Big( y^{*}_{km}(t) \mid \boldsymbol{\theta}, \boldsymbol{\tilde{b}}_k \Big) p \Big( \boldsymbol{\tilde{b}}_k \mid \boldsymbol{\theta} \Big) p \Big( \boldsymbol{\theta} \mid \mathcal{D} \Big) \space d \boldsymbol{\tilde{b}}_{k} \space d \boldsymbol{\theta} \end{aligned} $$ and similarly for the survival probability $$ \begin{aligned} p \Big( S^{*}_{k}(t) \mid \mathcal{D} \Big) & = \int \int p \Big( S^{*}_k(t) \mid \boldsymbol{\theta}, \boldsymbol{\tilde{b}}_k \Big) p \Big( \boldsymbol{\theta}, \boldsymbol{\tilde{b}}_k \mid \mathcal{D} \Big) d \boldsymbol{b}_k \space d \boldsymbol{\theta} \\ & = \int \int p \Big( S^{*}_k(t) \mid \boldsymbol{\theta}, \boldsymbol{\tilde{b}}_k \Big) p \Big( \boldsymbol{\tilde{b}}_k \mid \boldsymbol{\theta} \Big) p \Big( \boldsymbol{\theta} \mid \mathcal{D} \Big) d \boldsymbol{b}_k \space d \boldsymbol{\theta} \\ \end{aligned} $$ We can obtain draws for $\boldsymbol{\tilde{b}}_k$ in the same manner as for the individual-specific parameters $\boldsymbol{b}_i$. That is, at the $l^{th}$ iteration of the MCMC sampler we draw $\boldsymbol{\tilde{b}}_k^{(l)}$ and store it^[These random draws from the posterior distribution of the group-specific parameters are stored each time a joint model is estimated using `stan_glmer`, `stan_mvmer`, or `stan_jm`; they are saved under an ID value called `"_NEW_"`]. However, individual $k$ did not provide any contribution to the training data and so we are effectively taking random draws from the posterior distribution for the individual-specific parameters. We are therefore effectively marginalising over the distribution of the group-specific coefficients when we obtain predictions using the draws $\boldsymbol{\tilde{b}}_k^{(l)}$ fro $l = 1,\dots,L$. In other words, we are predicting for a new individual whom we have no information except that they are drawn from the same population as the $i = 1,...,N$ individuals in the training data. Because these predictions will incorporate all the uncertainty associated with between-individual variation our 95% credible intervals are likely to be very wide. These types of marginal predictions can be obtained using the `posterior_traj` and `posterior_survfit` functions by providing prediction data and specifying `dynamic = FALSE`; see the examples provided below. The second way is to effectively ignore the group-level structure in the model. That is, to only predict with only the population-level parameters contributing to the model. For example, under a identity link function and normal error distribution (i.e. linear mixed effect longitudinal submodel), we would obtain draws from the distribution $y^{(l)}_{km}(t) \sim N \Big( \boldsymbol{x}^T_{km}(t) \boldsymbol{\beta}_m^{(l)}, \sigma_m^{(l)} \Big)$ where $\boldsymbol{\beta}_m^{(l)}$ and $\sigma_m^{(l)}$ are the population-level parameters and residual error standard deviation, respectively, for the $l^{th}$ draw of the MCMC samples. However, referring to this as a "marginal" prediction is somewhat misleading since we are not explicitly conditioning on the individual-specific parameters but we are implicitly assuming that we know they are equal to zero with absolute certainty. That is, we are actually drawing from the posterior predictive distribution for the longitudinal outcome $$ \begin{aligned} p \Big( y^{*}_{km}(t) \mid \mathcal{D} \Big) & = \int p \Big( y^{*}_{km}(t) \mid \boldsymbol{\theta}, \boldsymbol{b}_k = 0 \Big) p \Big( \boldsymbol{\theta} \mid \mathcal{D} \Big) d \boldsymbol{\theta} \\ \end{aligned} $$ and similarly for the survival probability $$ p \Big( S^{*}_{k}(t) \mid \mathcal{D} \Big) = \int p \Big( S^{*}_k(t) \mid \boldsymbol{\theta}, \boldsymbol{b}_k = 0 \Big) p \Big( \boldsymbol{\theta} \mid \mathcal{D} \Big) d \boldsymbol{\theta} \\ $$ These types of so-called "marginal" predictions can not currently be obtained using the `posterior_traj` and `posterior_survfit` functions. ### Standardised survival probabilities All of the previously discussed population-level (i.e. marginal) predictions assumed implicit conditioning on some covariate values for the longitudinal submodel, $\boldsymbol{x}_{im}(t)$ and $\boldsymbol{z}_{im}(t)$ for $m = 1,...,M$, and for the event submodel, $\boldsymbol{w}_{i}(t)$. Even though we marginalise over the distribution of the individual-specific parameters we were still assuming that we obtained predictions for some known values of the covariates. However, sometimes we wish to marginalise (i.e. average) over the observed distribution of covariates as well. Here we discuss a method by which we can do that for the predicted survival probabilities. At any time $t$, it is possible to obtain a standardised survival probability by averaging the individual-specific survival probabilities. That is, we can obtain $$ S^*(t) = \frac{\sum_{i=1}^{N^{pred}} S_i^*(t)}{N^{pred}} $$ where $S_i^*(t)$ is the predicted survival probability for individual $i$ ($i = 1,\dots,N^{pred}$ at time $t$, and $N^{pred}$ is the number of individuals included in the prediction dataset. We refer to these predictions as *standardised survival probabilities*. Note however, that if $N_{pred}$ is not sufficiently large (e.g. we pass new data with just 2 individuals, say) then marginalising over their covariate distribution may not be meaningful and, similarly, their joint random effects distribution may be a poor representation of the random effects distribution for the entire population. It is better to calculate these standardised survival probabilities using where, say, $N^{pred}$ is equal to the total number of individuals in the training data. ## Model extensions ### Delayed entry (left-truncation) **TBC.** ### Multilevel clustering **TBC.** ## Model comparison ### LOO/WAIC in the context of joint models **TBC.** # Usage examples ## Dataset used in the examples We use the Mayo Clinic's primary biliary cirrhosis (PBC) dataset in the examples below. The dataset contains 312 individuals with primary biliary cirrhosis who participated in a randomised placebo controlled trial of D-penicillamine conducted at the Mayo Clinic between 1974 and 1984 [19]. However, to ensure the examples run quickly, we use a small random subset of just 40 patients from the full data. These example data are contained in two separate data frames. The first data frame contains multiple-row per patient longitudinal biomarker information, as shown in ```{r pbcLong} head(pbcLong) ``` while the second data frame contains single-row per patient survival information, as shown in ```{r pbcSurv} head(pbcSurv) ``` The variables included across the two datasets can be defined as follows: - `age` in years - `albumin` serum albumin (g/dl) - `logBili` logarithm of serum bilirubin - `death` indicator of death at endpoint - `futimeYears` time (in years) between baseline and the earliest of death, transplantion or censoring - `id` numeric ID unique to each individual - `platelet` platelet count - `sex` gender (m = male, f = female) - `status` status at endpoint (0 = censored, 1 = transplant, 2 = dead) - `trt` binary treatment code (0 = placebo, 1 = D-penicillamine) - `year` time (in years) of the longitudinal measurements, taken as time since baseline) A description of the example datasets can be found by accessing the following help documentation: ```{r datasets_help, eval = FALSE} help("datasets", package = "rstanarm") ``` ## Fitting the models ### Univariate joint model (current value association structure) In this example we fit a simple univariate joint model, with one normally distributed longitudinal marker, an association structure based on the current value of the linear predictor, and B-splines baseline hazard. To fit the model we use the joint (longitudinal and time-to-event) modelling function in the **rstanarm** package: `stan_jm`. When calling `stan_jm` we must, at a minimum, specify a formula object for each of the longitudinal and event submodels (through the arguments `formulaLong` and `formulaEvent`), the data frames which contain the variables for each of the the longitudinal and event submodels (through the arguments `dataLong` and `dataEvent`), and the name of the variable representing time in the longitudinal submodel (through the argument `time_var`). The formula for the longitudinal submodel is specified using the **lme4** package formula style. That is `y ~ x + (random_effects | grouping_factor)`. In this example we specify that log serum bilirubin (`logBili`) follows a subject-specific linear trajectory. To do this we include a fixed intercept and fixed slope (`year`), as well as a random intercept and random slope for each subject `id` (`(year | id)`). The formula for the event submodel is specified using the **survival** package formula style. That is, the outcome of the left of the `~` needs to be of the format `Surv(event_time, event_indicator)` for single row per individual data, or `Surv(start_time, stop_time, event_indicator)` for multiple row per individual data. The latter allows for exogenous time-varying covariates to be included in the event submodel. In this example we assume that the log hazard of death is linearly related to gender (`sex`) and an indicator of treatment with D-penicillamine (`trt`). ```{r univariate_fit, results = "hold", message = FALSE, warning = FALSE} library(rstanarm) mod1 <- stan_jm(formulaLong = logBili ~ sex + trt + year + (year | id), dataLong = pbcLong, formulaEvent = survival::Surv(futimeYears, death) ~ sex + trt, dataEvent = pbcSurv, time_var = "year", chains = 1, refresh = 2000, seed = 12345) ``` The argument `refresh = 2000` was specified so that Stan didn't provide us with excessive progress updates whilst fitting the model. However, if you are fitting a model that will take several minutes or hours to fit, then you may wish to request progress updates quite regularly, for example setting `refresh = 20` for every 20 iterations (by default the refresh argument is set to 1/10th of the total number of iterations). The fitted model is returned as an object of the S3 class `stanjm`. We have a variety of methods and post-estimation functions available for this class, including: `print`, `summary`, `plot`, `fixef`, `ranef`, `coef`, `VarCorr`, `posterior_interval`, `update`, and more. Here, we will examine the most basic output for the fitted joint model by typing `print(mod1)`: ```{r print, echo = FALSE} alpha_mod1 <- as.data.frame(mod1)[["Assoc|Long1|etavalue"]] alpha_median <- round(median(alpha_mod1), 3) print(mod1) ``` The "Long1|etavalue" row under "Event submodel" is our $\alpha_{mq}$ parameter ($m = 1$, $q = 1$). The estimated median of tells us that for each one unit increase in an individual's underlying level of log serum bilirubin, their estimated log hazard of death increases by some amount. The mean absolute deviation (MAD) is provided as a more robust estimate of the standard deviation of the posterior distribution. In this case the MAD_SD for the association parameter indicates there is quite large uncertainty around the estimated association between log serum bilirubin and risk of death (recall this is a small dataset). If we wanted some slightly more detailed output for each of the model parameters, as well as further details regarding the model estimation (for example computation time, number of longitudinal observations, number of individuals, type of baseline hazard, etc) we can instead use the `summary` method: ```{r summary} summary(mod1, probs = c(.025,.975)) ``` The easiest way to extract the correlation matrix for the random effects (aside from viewing the `print` output) is to use the `VarCorr` function (modelled on the `VarCorr` function from the **lme4** package). If you wish to extract the variances and covariances (instead of the standard deviations and correlations) then you can type the following to return a data frame with all of the relevant information: ```{r VarCorr} as.data.frame(VarCorr(mod1)) ``` ### Univariate joint model (current value and current slope association structure) In the previous example we were fitting a shared parameter joint model which assumed that the log hazard of the event (in this case the log hazard of death) at time *t* was linearly related to the subject-specific expected value of the longitudinal marker (in this case the expected value of log serum bilirubin) also at time *t*. This is the default association structure, although it could be explicitly specified by setting the `assoc = "etavalue"` argument. However, let's suppose we believe that the log hazard of death is actually related to both the *current value* of log serum bilirubin and the current *rate of change* in log serum bilirubin. To estimate this joint model we need to indicate that we want to also include the subject-specific slope (at time *t*) from the longitudinal submodel as part of the association structure. We do this by setting the `assoc` argument equal to a character vector `c("etavalue", "etaslope")` which indicates our desired association structure: ```{r assoc_etaslope, eval = FALSE} mod2 <- stan_jm(formulaLong = logBili ~ sex + trt + year + (year | id), dataLong = pbcLong, formulaEvent = survival::Surv(futimeYears, death) ~ sex + trt, dataEvent = pbcSurv, assoc = c("etavalue", "etaslope"), time_var = "year", chains = 1, refresh = 2000, seed = 12345) ``` In this example the subject-specific slope is actually constant across time *t* since we have a linear trajectory. Note however that we could still use the `"etaslope"` association structure even if we had a non-linear subject specific trajectory (for example modelled using cubic splines or polynomials). ### Multivariate joint model (current value association structures) Suppose instead that we were interested in *two* repeatedly measured clinical biomarkers, log serum bilirubin and serum albumin, and their association with the risk of death. We may wish to model these two biomarkers, allowing for the correlation between them, and estimating their respective associations with the log hazard of death. We will fit a linear mixed effects submodel (identity link, normal distribution) for each biomarker with a patient-specific intercept and linear slope but no other covariates. In the event submodel we will include gender (`sex`) and treatment (`trt`) as baseline covariates. Each biomarker is assumed to be associated with the log hazard of death at time $t$ via it's expected value at time $t$ (i.e. a *current value* association structure). The model we are going to fit can therefore be specified as: $$ y_{im}(t_{ijm}) \sim N(\mu_{im}(t_{ijm}), \sigma_m) $$ $$ \eta_{im}(t) = \mu_{im}(t) = \beta_{0m} + \beta_{1m} t + b_{0mi} + b_{1mi} t $$ $$ h_i(t) = h_0(t; \boldsymbol{\omega}) \exp(\gamma_1 w_{1i} + \gamma_2 w_{2i} + \alpha_{1i} \mu_{i1}(t) + \alpha_{2i} \mu_{i2}(t)) $$ where $t$ is time in years, and $w_{1i}$ and $w_{2i}$ are, respectively, the gender and treatment indicators for individual $i$. (Note that due to the very small sample size, the clinical findings from this analysis should not to be overinterpreted!). ```{r fitmodel_mv_ev_ev, warning=FALSE, message=FALSE} mod3 <- stan_jm( formulaLong = list( logBili ~ sex + trt + year + (year | id), albumin ~ sex + trt + year + (year | id)), formulaEvent = survival::Surv(futimeYears, death) ~ sex + trt, dataLong = pbcLong, dataEvent = pbcSurv, time_var = "year", chains = 1, refresh = 2000, seed = 12345) ``` We can now examine the output from the fitted model, for example \ ```{r results_print} print(mod3) ``` or we can examine the summary output for the association parameters alone: \ ```{r results_summary} summary(mod3, pars = "assoc") ``` ## Posterior predictions We can also access the range of post-estimation functions (described in the `stan_jm` and related help documentation; see for example `help(posterior_traj)` or `help(posterior_survfit)`). ### Predicted individual-specific longitudinal trajectory for in-sample individuals Predicted individual-specific biomarker values can be obtained using either the `posterior_traj` or `posterior_predict` function. The `posterior_traj` is preferable, because it can be used to obtain the biomarker values at a series of evenly spaced time points between baseline and the individual's event or censoring time by using the default `interpolate = TRUE` option. Whereas, the `posterior_predict` function only provides the predicted biomarker values at the observed time points, or the time points in the new data. Predicting the biomarker values at a series of evenly spaced time points can be convenient because they can be easily used for plotting the longitudinal trajectory. Moreover, by default the `posterior_traj` returns a data frame with variables corresponding to the individual ID, the time, the predicted mean biomarker value, the limits for the 95% credible interval (i.e. uncertainty interval for the predicted mean biomarker value), and limits for the 95% prediction interval (i.e. uncertainty interval for a predicted biomarker data point), where the level for the uncertainty intervals can be changed via the `prob` argument. Conversely, the `posterior_predict` function returns an $S$ by $N$ matrix of predictions where $S$ is the number of posterior draws and $N$ is the number of prediction time points (note that this return type can also be obtained for `posterior_traj` by specifying the argument `return_matrix = TRUE`). As an example, let's plot the predicted individual-specific longitudinal trajectories for each of the two biomarkers (log serum bilirubin and serum albumin) in the multivariate joint model estimated above. We will do this for three individuals (IDs 6, 7 and 8) who were included in the model estimation. Here are the plots for log serum bilirubin: ```{r plots_872312} p1 <- posterior_traj(mod3, m = 1, ids = 6:8) pp1 <- plot(p1, plot_observed = TRUE) pp1 ``` and here are the plots for serum albumin: ```{r plots_555762} p2 <- posterior_traj(mod3, m = 2, ids = 6:8) pp2 <- plot(p2, plot_observed = TRUE) pp2 ``` The `m` argument specifies which biomarker we want to predict for (only relevant for a multivariate joint model). The `ids` argument is optional, and specifies a subset of individuals for whom we want to predict. In the plotting method, the `plot_observed = TRUE` specifies that we want to include the observed biomarker values in the plot of the longitudinal trajectory. If we wanted to extrapolate the trajectory forward from the event or censoring time for each individual, then this can be easily achieved by specifying `extrapolate = TRUE` in the `posterior_traj` call. For example, here is the plot for log serum bilirubin with extrapolation: ```{r plots_65662} p3 <- posterior_traj(mod3, m = 1, ids = 6:8, extrapolate = TRUE) pp3 <- plot(p3, plot_observed = TRUE, vline = TRUE) pp3 ``` and for serum albumin with extrapolation: ```{r plots_998889} p4 <- posterior_traj(mod3, m = 2, ids = 6:8, extrapolate = TRUE) pp4 <- plot(p4, plot_observed = TRUE, vline = TRUE) pp4 ``` Here, we included the `vline = TRUE` which adds a vertical dashed line at the timing of the individual's event or censoring time. The interpolation and extrapolation of the biomarker trajectory can be further controlled through the `control` argument to the `posterior_traj` function; for example, we could specify the number of time points at which to predict, the distance by which to extrapolate, and so on. We could customize these plots further, for example, by using any of the __ggplot2__ functionality or using the additional arguments described in `help(plot.predict.stanjm)`. ### Predicted individual-specific survival curves for in-sample individuals Predicted individual-specific survival probabilities and/or survival curves can be obtained using the `posterior_survfit` function. The function by default returns a data frame with the individual ID, the time, and the predicted survival probability (posterior mean and limits for the 95% credible interval). The uncertainty level for the credible interval can be changed via the `prob` argument. By default, individual-specific survival probabilities are calculated *conditional* on the individual's last known survival time. When we are predicting survival probabilities for individuals that were used in the estimation of the model (i.e. in-sample individuals, where no new covariate data is provided), then the individual's "last known survival time" will be their event or censoring time. (Note that if we wanted didn't want to condition on the individual's last known survival time, then we could specify `condition = FALSE`, but we probably wouldn't want to do this unless we were calculating marginal or standardised survival probabilities, which are discussed later). The default argument `extrapolate = TRUE` specifies that the individual-specific conditional survival probabilities will be calculated at evenly spaced time points between the individual's last known survival time and the maximum follow up time that was observed in the estimation sample. The behaviour of the extrapolation can be further controlled via the `control` argument. If we were to specify `extrapolate = FALSE` then the survival probabilities would only be calculated at one time point, which could be specified in the `times` argument (or otherwise would default to the individual's last known survival time). As an example, let plot the predicted individual-specific conditional survival curve for the same three individual's that were used in the previous example. The predicted survival curve will be obtained under the multivariate joint model estimated above. \ ```{r plots_23812} p5 <- posterior_survfit(mod3, ids = 6:8) pp5 <- plot(p5) pp5 ``` We could customize the plot further, for example, by using any of the __ggplot2__ functionality or using the additional arguments described in `help(plot.survfit.stanjm)`. ### Combined plot of longitudinal trajectories and survival curves The package also provides a convenience plotting function, which combines plots of the individual-specific longitudinal trajectories, and the individual-specific survival function. We can demonstrate this by replotting the predictions for the three individuals in the previous example: ```{r plots_987321, fig.height=13} plot_stack_jm(yplot = list(pp3, pp4), survplot = pp5) ``` Here we can see the strong relationship between the underlying values of the biomarkers and mortality. Patient `8` who, relative to patients `6` and `7`, has a higher underlying value for log serum bilirubin and a lower underlying value for serum albumin at the end of their follow up has a far worse predicted probability of survival. ### Predicted individual-specific longitudinal trajectory and survival curve for out-of-sample individuals (i.e. dynamic predictions) Let us take an individual from our training data, in this case the individual with subject ID value `8`. However, we will pretend this individual was not a member of our training data and rather that they are a new individual for whom we have obtained new biomarker measurements. Our goal is to obtain predictions for the longitudinal trajectory for this individual, and their conditional survival curve, given that we know they are conditional on their biomarker measurements we currently have available. First, let's extract the data for subject `8` and then rename their subject ID value so that they appear to be an individual who was not included in our training dataset: ```{r newdata_23188} ndL <- pbcLong[pbcLong$id == 8, , drop = FALSE] ndE <- pbcSurv[pbcSurv$id == 8, , drop = FALSE] ndL$id <- paste0("new_patient") ndE$id <- paste0("new_patient") ``` Note that we have both the longitudinal data and event data for this new individual. We require data for both submodels because we are going to generate *dynamic predictions* that require drawing new individual-specific parameters (i.e. random effects) for this individual conditional on their observed data. That means we need to evaluate the likelihood for the full joint model and that requires both the longitudinal and event data (note however that the status indicator `death` will be ignored, since it is assumed that the individual we are predicting for is still alive at the time we wish to generate the predictions). Now we can pass this data to the `posterior_traj` function in the same way as for the in-sample individuals, except we will now specify the `newdataLong` and `newdataEvent` arguments. We will also specify the `last_time` argument so that the function knows which variable in the event data specifies the individual's last known survival time (the default behaviour is to use the time of the last biomarker measurement). Our predictions for this new individual for the log serum bilirubin trajectory can be obtained using: ```{r plots_999333} p6 <- posterior_traj(mod3, m = 1, newdataLong = ndL, newdataEvent = ndE, last_time = "futimeYears") pp6 <- plot(p6, plot_observed = TRUE, vline = TRUE) pp6 ``` and for the serum albumin trajectory: ```{r plots_122223} p7 <- posterior_traj(mod3, m = 2, newdataLong = ndL, newdataEvent = ndE, last_time = "futimeYears") pp7 <- plot(p7, plot_observed = TRUE, vline = TRUE) pp7 ``` For the conditional survival probabilities we use similar information, provided to the `posterior_survfit` function: ```{r plots_65401} p8 <- posterior_survfit(mod3, newdataLong = ndL, newdataEvent = ndE, last_time = "futimeYears") pp8 <- plot(p8) pp8 ``` We can then use the `plot_stack_jm` function, as we saw in a previous example, to stack the plots of the longitudinal trajectory and the conditional survival curve: ```{r plots_0089231, fig.height=13} plot_stack_jm(yplot = list(pp6, pp7), survplot = pp8) ``` Here we see that the predicted longitudinal trajectories and conditional survival curve for this individual, obtained using the dynamic predictions approach, are similar to the predictions we obtained when we used their individual-specific parameters from the original model estimation. This is because in both situations we are conditioning on the same outcome data. **Side note:** We can even compare the estimated individual specific parameters obtained under the two approaches. For example, here is the posterior mean for the estimated individual-specific parameters for individual `8` from the fitted model: ```{r b_pars_23123} c(ranef(mod3)[["Long1"]][["id"]][8,], ranef(mod3)[["Long2"]][["id"]][8,]) ``` and here is the mean of the draws for the individual-specific parameters for individual `8` under the dynamic predictions approach: ```{r b_pars_5436765} colMeans(attr(p6, "b_new")) ``` ### Predicted population-level longitudinal trajectory Suppose we wanted to predict the longitudinal trajectory for each of the biomarkers, marginalising over the distribution of the individual-specific parameters. To do this, we can pass a new data frame with the covariate values we want to use in the predictions. Here, we will demonstrate this by obtaining the predicted trajectory for log serum bilirubin, under the multivariate joint model that was estimated previously. Our prediction data will require the variables `year`, `sex` and `trt`, since these were the covariates used in the longitudinal submodel. We will predict the value of log serum bilirubin at years 0 through 10, for each combination of `sex` and `trt`. We also need to include the `id` variable in our prediction data because this is relevant to the longitudinal submodel. Since we want to marginalise over the individual-specific parameters (i.e. individual-level random effects) we need to note two things: - First, the values for the `id` variable **must not** match any individual used in the model estimation. Here, we use the following `id` values: `"male_notrt"`, `"female_notrt"`, `"male_trt"`, and `"female_trt"`, since each individual in our prediction data represents a different combination of `sex` and `trt`. However, we could have given the individuals any `id` value just as long as is didn't match an individual who was used in the model estimation - Second, we need to specify the argument `dynamic = FALSE` when calling `posterior_traj`. This specifies that we do not want to draw new individual-specific parameters conditional on outcome data observed up to some time $t$. Instead, we want predictions that marginalise over the distribution of individual-specific parameters and are therefore conditional *only on covariates* and not conditional on outcome data for the new individuals. Here is our prediction data: ```{r newdata_19213} ndL <- expand.grid(year = seq(0, 10, 1), sex = c("m", "f"), trt = 0:1) ndL$id <- rep(c("male_notrt", "female_notrt", "male_trt", "female_trt"), each = 11) ndL <- ndL[, c(4,1,2,3)] str(ndL) ``` And to predict the marginal longitudinal trajectory for log serum bilirubin under each covariate profile and plot it we can type: ```{r plot_traj_218391} p1 <- posterior_traj(mod3, m = 1, newdataLong = ndL, dynamic = FALSE) plot(p1) + ggplot2::coord_cartesian(ylim = c(-10,15)) ``` Because we are marginalising over the distribution of the individual-specific parameters, we are incorporating all the variation related to between-individual differences, and therefore the prediction interval is wide (shown by the shaded area around the marginal longitudinal trajectory). The magnitude of the effects of both `sex` and `trt` are relatively small compared to the population-level effect of `year` and the between-individual variation in the intercept and slope. For example, here are the point estimates for the population-level effects of `sex`, `trt`, and `year`: ```{r fixef_2132} fixef(mod3)$Long1 ``` and here are the standard deviations for the individual-level random effects: ```{r ranef_5664} VarCorr(mod3) ``` This shows us that the point estimates for the population-level effects of `sex` and `trt` are 0.57 and -0.10, respectively, whereas the standard deviation for the individual-specific intercept and slope parameters are 1.24 and 0.19; hence, any differences due to the population-level effects of gender and treatment (i.e. differences in the black line across the four panels of the plot) are swamped by the width of the uncertainty intervals (i.e. the grey shaded areas). ### Standardised survival curves In this example we show how a standardised survival curve can be obtained, where the $i = 1,...,N^{pred}$ individuals used in generating the standardised survival curve are the same individuals that were used in estimating the model. We will obtain the survival curve for the multivariate joint model estimated in an earlier example (`mod3`). The `standardise = TRUE` argument to `posterior_survfit` specifies that we want to obtain individual-specific predictions of the survival curve and then average these. Because, in practical terms, we need to obtain survival probabilities at time $t$ for each individual and then average them we want to explicitly specify the values of $t$ we want to use (and the same values of $t$ will be used for individuals). We specify the values of $t$ to use via the `times` argument; here we will predict the standardised survival curve at time 0 and then for convenience we can just specify `extrapolate = TRUE` (which is the default anyway) which will mean we automatically predict at 10 evenly spaced time points between 0 and the maximum event or censoring time. ```{r standsurv} p1 <- posterior_survfit(mod3, standardise = TRUE, times = 0) head(p1) # data frame with standardised survival probabilities plot(p1) # plot the standardised survival curve ``` # References 1. Henderson R, Diggle P, Dobson A. Joint modelling of longitudinal measurements and event time data. *Biostatistics* 2000;**1**(4):465-80. 2. Wulfsohn MS, Tsiatis AA. A joint model for survival and longitudinal data measured with error. *Biometrics* 1997;**53**(1):330-9. 3. Tsiatis AA, Davidian M. Joint modeling of longitudinal and time-to-event data: An overview. *Stat Sinica* 2004;**14**(3):809-34. 4. Gould AL, Boye ME, Crowther MJ, Ibrahim JG, Quartey G, Micallef S, et al. Joint modeling of survival and longitudinal non-survival data: current methods and issues. Report of the DIA Bayesian joint modeling working group. *Stat Med*. 2015;**34**(14):2181-95. 5. Rizopoulos D. *Joint Models for Longitudinal and Time-to-Event Data: With Applications in R* CRC Press; 2012. 6. Liu G, Gould AL. Comparison of alternative strategies for analysis of longitudinal trials with dropouts. *J Biopharm Stat* 2002;**12**(2):207-26. 7. Prentice RL. Covariate Measurement Errors and Parameter-Estimation in a Failure Time Regression-Model. *Biometrika* 1982;**69**(2):331-42. 8. Baraldi AN, Enders CK. An introduction to modern missing data analyses. *J Sch Psychol* 2010;**48**(1):5-37. 9. Philipson PM, Ho WK, Henderson R. Comparative review of methods for handling drop-out in longitudinal studies. *Stat Med* 2008;**27**(30):6276-98. 10. Pantazis N, Touloumi G. Bivariate modelling of longitudinal measurements of two human immunodeficiency type 1 disease progression markers in the presence of informative drop-outs. *Applied Statistics* 2005;**54**:405-23. 11. Taylor JM, Park Y, Ankerst DP, et al. Real-time individual predictions of prostate cancer recurrence using joint models. *Biometrics* 2013;**69**(1):206-13. 12. Brilleman SL, Crowther MJ, Moreno-Betancur M, Buros Novik J, Wolfe R. Joint longitudinal and time-to-event models via Stan. *In: Proceedings of StanCon 2018.* https://github.com/stan-dev/stancon_talks 12. Stan Development Team. *rstanarm: Bayesian applied regression modeling via Stan.* R package version 2.14.1. https://mc-stan.org/. 2016. 13. R Core Team. *R: A language and environment for statistical computing.* Vienna, Austria: R Foundation for Statistical Computing; 2015. 14. Crowther MJ, Lambert PC, Abrams KR. Adjusting for measurement error in baseline prognostic biomarkers included in a time-to-event analysis: a joint modelling approach. *BMC Med Res Methodol* 2013;**13**. 15. Hickey GL, Philipson P, Jorgensen A, Kolamunnage-Dona R. Joint modelling of time-to-event and multivariate longitudinal outcomes: recent developments and issues. *BMC Med Res Methodol* 2016;**16**(1):117. 16. Rizopoulos D, Ghosh P. A Bayesian semiparametric multivariate joint model for multiple longitudinal outcomes and a time-to-event. *Stat Med*. 2011;**30**(12):1366-80. 17. Laurie DP. Calculation of Gauss-Kronrod quadrature rules. *Math Comput* 1997;**66**(219):1133-45. 18. Rizopoulos D. Dynamic Predictions and Prospective Accuracy in Joint Models for Longitudinal and Time-to-Event Data. *Biometrics* 2011;**67**(3):819-829. 19. Therneau T, Grambsch P. *Modeling Survival Data: Extending the Cox Model* Springer-Verlag, New York; 2000. ISBN: 0-387-98784-3 rstanarm/vignettes/mrp.Rmd0000644000176200001440000010314413722762571015372 0ustar liggesusers--- title: "MRP with rstanarm" author: "Lauren Kennedy and Jonah Gabry" date: "`r Sys.Date()`" output: html_vignette: toc: yes bibliography: mrp-files/mrp.bib --- ```{r, child="children/SETTINGS-knitr.txt"} ``` ```{r packages-1, message=FALSE} library(rstanarm) library(ggplot2) library(bayesplot) theme_set(bayesplot::theme_default()) # options(mc.cores = 4) ``` ```{r packages-2, eval=FALSE, message=FALSE} library(dplyr) library(tidyr) ``` Inference about the population is one the main aims of statistical methodology. Multilevel regression and post-stratification (MRP) [@little1993post; @lax2009should; @park2004bayesian] has been shown to be an effective method of adjusting the sample to be more representative of the population for a set of key variables. Recent work has demonstrated the effectiveness of MRP when there are a number of suspected interactions between these variables [@ghitza2013deep], replicated by @lei20172008. While @ghitza2013deep use approximate marginal maximum likelihood estimates; @lei20172008 implement a fully Bayesian approach through Stan. The **rstanarm** package allows the user to conduct complicated regression analyses in Stan with the simplicity of standard formula notation in R. The purpose of this vignette is to demonstrate the utility of **rstanarm** when conducting MRP analyses. We will not delve into the details of conducting logistic regression with rstanarm as this is already covered in [other vignettes](https://mc-stan.org/rstanarm/articles/). Most of the code for data manipulation and plotting is not shown in the text but is available in the R markdown [source code on GitHub](https://github.com/stan-dev/rstanarm/blob/master/vignettes/mrp.Rmd). ```{r, include=FALSE, collapse=TRUE} simulate_mrp_data <- function(n) { J <- c(2, 3, 7, 3, 50) # male or not, eth, age, income level, state poststrat <- as.data.frame(array(NA, c(prod(J), length(J)+1))) # Columns of post-strat matrix, plus one for size colnames(poststrat) <- c("male", "eth", "age","income", "state",'N') count <- 0 for (i1 in 1:J[1]){ for (i2 in 1:J[2]){ for (i3 in 1:J[3]){ for (i4 in 1:J[4]){ for (i5 in 1:J[5]){ count <- count + 1 # Fill them in so we know what category we are referring to poststrat[count, 1:5] <- c(i1-1, i2, i3,i4,i5) } } } } } # Proportion in each sample in the population p_male <- c(0.52, 0.48) p_eth <- c(0.5, 0.2, 0.3) p_age <- c(0.2,.1,0.2,0.2, 0.10, 0.1, 0.1) p_income<-c(.50,.35,.15) p_state_tmp<-runif(50,10,20) p_state<-p_state_tmp/sum(p_state_tmp) poststrat$N<-0 for (j in 1:prod(J)){ poststrat$N[j] <- round(250e6 * p_male[poststrat[j,1]+1] * p_eth[poststrat[j,2]] * p_age[poststrat[j,3]]*p_income[poststrat[j,4]]*p_state[poststrat[j,5]]) #Adjust the N to be the number observed in each category in each group } # Now let's adjust for the probability of response p_response_baseline <- 0.01 p_response_male <- c(2, 0.8) / 2.8 p_response_eth <- c(1, 1.2, 2.5) / 4.7 p_response_age <- c(1, 0.4, 1, 1.5, 3, 5, 7) / 18.9 p_response_inc <- c(1, 0.9, 0.8) / 2.7 p_response_state <- rbeta(50, 1, 1) p_response_state <- p_response_state / sum(p_response_state) p_response <- rep(NA, prod(J)) for (j in 1:prod(J)) { p_response[j] <- p_response_baseline * p_response_male[poststrat[j, 1] + 1] * p_response_eth[poststrat[j, 2]] * p_response_age[poststrat[j, 3]] * p_response_inc[poststrat[j, 4]] * p_response_state[poststrat[j, 5]] } people <- sample(prod(J), n, replace = TRUE, prob = poststrat$N * p_response) ## For respondent i, people[i] is that person's poststrat cell, ## some number between 1 and 32 n_cell <- rep(NA, prod(J)) for (j in 1:prod(J)) { n_cell[j] <- sum(people == j) } coef_male <- c(0,-0.3) coef_eth <- c(0, 0.6, 0.9) coef_age <- c(0,-0.2,-0.3, 0.4, 0.5, 0.7, 0.8, 0.9) coef_income <- c(0,-0.2, 0.6) coef_state <- c(0, round(rnorm(49, 0, 1), 1)) coef_age_male <- t(cbind(c(0, .1, .23, .3, .43, .5, .6), c(0, -.1, -.23, -.5, -.43, -.5, -.6))) true_popn <- data.frame(poststrat[, 1:5], cat_pref = rep(NA, prod(J))) for (j in 1:prod(J)) { true_popn$cat_pref[j] <- plogis( coef_male[poststrat[j, 1] + 1] + coef_eth[poststrat[j, 2]] + coef_age[poststrat[j, 3]] + coef_income[poststrat[j, 4]] + coef_state[poststrat[j, 5]] + coef_age_male[poststrat[j, 1] + 1, poststrat[j, 3]] ) } #male or not, eth, age, income level, state, city y <- rbinom(n, 1, true_popn$cat_pref[people]) male <- poststrat[people, 1] eth <- poststrat[people, 2] age <- poststrat[people, 3] income <- poststrat[people, 4] state <- poststrat[people, 5] sample <- data.frame(cat_pref = y, male, age, eth, income, state, id = 1:length(people)) #Make all numeric: for (i in 1:ncol(poststrat)) { poststrat[, i] <- as.numeric(poststrat[, i]) } for (i in 1:ncol(true_popn)) { true_popn[, i] <- as.numeric(true_popn[, i]) } for (i in 1:ncol(sample)) { sample[, i] <- as.numeric(sample[, i]) } list( sample = sample, poststrat = poststrat, true_popn = true_popn ) } ``` # The Data Three data sets are simulated by the function `simulate_mrp_data()`, which is defined in the [source code](https://github.com/stan-dev/rstanarm/blob/master/vignettes/mrp.Rmd) for this R markdown document (and printed in the appendix). The first, `sample`, contains $n$ observations from the individuals that form our sample (i.e., $n$ rows). For each individual we have their age (recorded as membership within a specific age bracket), ethnicity, income level (recorded as membership within a specific bracket), and gender. Participants were randomly sampled from a state. MRP is often used for dichotomous fixed choice questions (e.g., McCain's share of two party vote [@ghitza2013deep]; support for George W Bush, [@park2004bayesian]; or support for the death penalty [@shirley2015hierarchical]), so we will use a binary variable as the outcome in this vignette. However, MRP can also be used if there are more than two categories or if the outcome is continuous. As this is a simple toy example, we will describe the proportion of the population who would choose to adopt a cat over a dog, given the opportunity. We will simulate data using a function that is included in the appendix of this document. The `simulate_mrp_data()` function simulates a sample from a much larger population. It returns a list including the sample, population poststratification matrix and the true population preference for cats. ```{r include=FALSE, eval=FALSE} mrp_sim <- simulate_mrp_data(n=1200) save(mrp_sim, file = "mrp-files/mrp_sim.rda", version = 2) ``` ```{r eval=FALSE} mrp_sim <- simulate_mrp_data(n=1200) str(mrp_sim) ``` ```{r, echo=FALSE} load("mrp-files/mrp_sim.rda") str(mrp_sim) ``` ```{r, message=FALSE} sample <- mrp_sim[["sample"]] rbind(head(sample), tail(sample)) ``` The variables describing the individual (age, ethnicity, income level and gender) will be used to match the sample to the population of interest. To do this we will need to form a post-stratification table, which contains the number of people in each possible combination of the post-stratification variables. We have 4 variables with 2 (male), 7 (age), 3 (ethnicity) and 3 (income) levels, so there are 2x7x3x3 different levels. Participants are also selected from a state (50), increasing the number of possible levels to $6300$. To make inference about the population, we will also need the proportion of individuals in each post stratification cell at the *population* level. We will use this information to update the estimate of our outcome variable from the sample so that is more representative of the population. This is particularly helpful if there is a belief that the sample has some bias (e.g., a greater proportion of females responded than males), and that the bias impacts the outcome variable (e.g., maybe women are more likely to pick a cat than men). For each possible combination of factors, the post-stratification table shows the proportion/number of the population in that cell (rather than the proportion/number in the sample in the cell). Below we read in the poststrat data our simulated data list. ```{r message=FALSE} poststrat <- mrp_sim[["poststrat"]] rbind(head(poststrat), tail(poststrat)) ``` One of the benefits of using a simulated data set for this example is that the actual population level probability of cat preference is known for each post-stratification cell. In real world data analysis, we don't have this luxury, but we will use it later in this case study to check the predictions of the model. Details regarding the simulation of this data are available in the appendix. ```{r message=FALSE} true_popn <- mrp_sim[["true_popn"]] rbind(head(true_popn), tail(true_popn)) ``` # Exploring Graphically Before we begin with the MRP analysis, we first explore the data set with some basic visualizations. ## Comparing sample to population The aim of this analysis is to obtain a *population* estimation of cat preference given our sample of $4626$. We can see in the following plot the difference in proportions between the sample and the population. Horizontal panels represent each variable. Bars represent the proportion of the sample (solid) and population (dashed) in each category (represented by colour and the x-axis). For ease of viewing, we ordered the states in terms of the proportion of the sample in that state that was observed. We will continue this formatting choice thoughout this vignette. ```{r order-states} sample$state <- factor(sample$state, levels=1:50) sample$state <- with(sample, factor(state, levels=order(table(state)))) true_popn$state <- factor(true_popn$state,levels = levels(sample$state)) poststrat$state <- factor(poststrat$state,levels = levels(sample$state)) ``` ```{r state-and-pop-data-for-plots, eval=FALSE, include=FALSE} # not evaluated to avoid tidyverse dependency income_popn <- poststrat %>% group_by(income) %>% summarize(Num=sum(N)) %>% mutate(PROP=Num/sum(Num),TYPE='Popn',VAR='Income',CAT=income) %>% ungroup() income_data <- sample %>% group_by(income) %>% summarise(Num=n()) %>% mutate(PROP=Num/sum(Num),TYPE='Sample',VAR='Income',CAT=income) %>% ungroup() income<-rbind(income_data[,2:6],income_popn[,2:6]) age_popn <- poststrat%>% group_by(age)%>% summarize(Num=sum(N))%>% mutate(PROP=Num/sum(Num),TYPE='Popn',VAR='Age',CAT=age)%>% ungroup() age_data <- sample%>% group_by(age)%>% summarise(Num=n())%>% mutate(PROP=Num/sum(Num),TYPE='Sample',VAR='Age',CAT=age)%>% ungroup() age <- rbind(age_data[,2:6],age_popn[,2:6] ) eth_popn <- poststrat%>% group_by(eth)%>% summarize(Num=sum(N))%>% mutate(PROP=Num/sum(Num),TYPE='Popn',VAR='Ethnicity',CAT=eth)%>% ungroup() eth_data <- sample%>% group_by(eth)%>% summarise(Num=n())%>% mutate(PROP=Num/sum(Num),TYPE='Sample',VAR='Ethnicity',CAT=eth)%>% ungroup() eth<-rbind(eth_data[,2:6],eth_popn[,2:6]) male_popn <- poststrat%>% group_by(male)%>% summarize(Num=sum(N))%>% mutate(PROP=Num/sum(Num),TYPE='Popn',VAR='Male',CAT=male)%>% ungroup() male_data <- sample%>% group_by(male)%>% summarise(Num=n())%>% mutate(PROP=Num/sum(Num),TYPE='Sample',VAR='Male',CAT=male)%>% ungroup() male <- rbind(male_data[,2:6],male_popn[,2:6]) state_popn <- poststrat%>% group_by(state)%>% summarize(Num=sum(N))%>% mutate(PROP=Num/sum(poststrat$N),TYPE='Popn',VAR='State',CAT=state)%>% ungroup() state_plot_data <- sample%>% group_by(state)%>% summarise(Num=n())%>% mutate(PROP=Num/nrow(sample),TYPE='Sample',VAR='State',CAT=state)%>% ungroup() state_plot_data <- rbind(state_plot_data[,2:6],state_popn[,2:6]) state_plot_data$TYPE <- factor(state_plot_data$TYPE, levels = c("Sample","Popn")) plot_data <- rbind(male,eth,age,income) plot_data$TYPE <- factor(plot_data$TYPE, levels = c("Sample","Popn")) save(state_plot_data, file = "mrp-files/state_plot_data.rda", version = 2) save(plot_data, file = "mrp-files/plot_data.rda", version = 2) ``` ```{r plot-data, echo=FALSE, fig.height = 4, fig.width = 7, fig.align = "center"} load("mrp-files/plot_data.rda") # created in previous chunk ggplot(data=plot_data, aes(x=as.factor(CAT), y=PROP, group=as.factor(TYPE), linetype=as.factor(TYPE))) + geom_point(stat="identity",colour='black')+ geom_line()+ facet_wrap( ~ VAR, scales = "free",nrow=1,ncol=5)+ theme_bw()+ scale_fill_manual(values=c('#1f78b4','#33a02c', '#e31a1c','#ff7f00','#8856a7'),guide=FALSE)+ scale_y_continuous(breaks=c(0,.25,.5,.75,1), labels=c('0%','25%',"50%","75%","100%"))+ scale_alpha_manual(values=c(1, .3))+ ylab('Proportion')+ labs(alpha='')+ theme(legend.position="bottom", axis.title.y=element_blank(), axis.title.x=element_blank(), legend.title=element_blank(), legend.text=element_text(size=10), axis.text=element_text(size=10), strip.text=element_text(size=10), strip.background = element_rect(fill='grey92')) load("mrp-files/state_plot_data.rda") # created in previous chunk ggplot(data=state_plot_data, aes(x=as.factor(CAT), y=PROP, group=as.factor(TYPE), linetype=as.factor(TYPE))) + geom_point(stat="identity",colour='black')+ geom_line()+ facet_wrap( ~ VAR)+ theme_bw()+ scale_fill_manual(values=c('#1f78b4','#33a02c', '#e31a1c','#ff7f00','#8856a7'),guide=FALSE)+ scale_y_continuous(breaks=c(0,.025,.05,1), labels=c('0%','2.5%',"5%","100%"),expand=c(0,0),limits=c(0,.06))+ scale_alpha_manual(values=c(1, .3))+ ylab('Proportion')+ labs(alpha='')+ theme(legend.position="bottom", axis.title.y=element_blank(), axis.title.x=element_blank(), legend.title=element_blank(), legend.text=element_text(size=10), axis.text.y=element_text(size=10), axis.text.x=element_text(size=8,angle=90), strip.text=element_text(size=10), strip.background = element_rect(fill='grey92')) ``` # Effect of the post-stratification variable on preference for cats Secondly; we consider the evidence of different proportions across different levels of a post-stratification variable; which we should consider for each of the post-stratification variables. Here we break down the proportion of individuals who would prefer a cat (*y-axis*) by different levels (*x-axis*) of the post-stratification variable (*horizontal panels*). We can see from this figure that there appears to be differences in cat preference for the different levels of post-stratification variables. Given the previous figure, which suggested that the sample was different to the population in the share of different levels of theses variables, this should suggest that using the sample to estimate cat preference may not give accurate estimates of cat preference in the population. ```{r, eval=FALSE, echo=FALSE} # not evaluated to avoid dependency on tidyverse #Summarise summary_by_poststrat_var <- sample %>% gather(variable,category,c("income","eth","age","male")) %>% group_by(variable,category) %>% #Wald confidence interval summarise(y_mean=mean(cat_pref),y_sd=sqrt(mean(cat_pref)*(1-mean(cat_pref))/n())) %>% ungroup() summary_by_poststrat_var$variable <- as.factor(summary_by_poststrat_var$variable) levels(summary_by_poststrat_var$variable) <- list('Age'='age','Ethnicity'='eth','Income'='income','Male'='male') save(summary_by_poststrat_var, file = "mrp-files/summary_by_poststrat_var.rda", version = 2) ``` ```{r plot-summary-by-poststrat-var, echo=FALSE, fig.height = 4, fig.width = 7, fig.align = "center"} load("mrp-files/summary_by_poststrat_var.rda") # created in previous chunk ggplot(data=summary_by_poststrat_var, aes(x=as.factor(category), y=y_mean,group=1)) + geom_errorbar(aes(ymin=y_mean-y_sd, ymax=y_mean+y_sd), width=0)+ geom_line()+ geom_point()+ scale_colour_manual(values=c('#1f78b4','#33a02c','#e31a1c','#ff7f00', '#8856a7'))+theme_bw()+ facet_wrap(~variable,scales = "free_x",nrow=1,ncol=5)+ scale_y_continuous(breaks=c(.5,.75,1), labels=c("50%","75%", "100%"), limits=c(0.4-.4*.05,.9),expand = c(0,0))+ labs(x="",y="Cat preference")+ theme(legend.position="none", axis.title.y=element_text(size=10), axis.title.x=element_blank(), axis.text=element_text(size=10), strip.text=element_text(size=10), strip.background = element_rect(fill='grey92')) ``` ## Interaction effect Thirdly, we demonstrate visually that there is an interaction between age and gender and compare to a case where there is no interaction. Here a simulated interaction effect between age (*x-axis*) and gender (*color*), right panel, is contrasted with no interaction effect (*left panel*). While both panels demonstrate a difference between the genders on the outcome variable (*y-axis*), only the second panel shows this difference changing with the variable on the x-axis. ```{r interaction-summary, eval=FALSE, echo=FALSE} # not evaluated to avoid dependency on tidyverse #Summarise interaction <- sample %>% gather(variable, category, c("age", "eth")) %>% group_by(variable, category, male) %>% summarise(y_mean = mean(cat_pref), y_sd = sqrt(mean(cat_pref) * (1 - mean(cat_pref)) / n())) %>% ungroup() #Tidy for nice facet labels interaction$variable <- as.factor(interaction$variable) levels(interaction$variable) <- list('Ethnicity' = 'eth', 'Age' = 'age') save(interaction, file = "mrp-files/interaction.rda", version = 2) ``` ```{r plot-interaction, echo=FALSE, fig.height = 4, fig.width = 7, fig.align = "center"} load("mrp-files/interaction.rda") # created in previous chunk ggplot(data=interaction, aes(x=as.factor(category), y=y_mean, colour=as.factor(male),group=as.factor(male))) + geom_errorbar(aes(ymin=y_mean-y_sd, ymax=y_mean+y_sd),width=0 )+ geom_line(aes(x=as.factor(category), y=y_mean,colour=as.factor(male)))+ geom_point()+ facet_wrap(~variable,scales = "free_x",nrow=1,ncol=2)+ labs(x="",y="Cat preference",colour='Gender')+ scale_y_continuous(breaks=c(0,.25,.5,.75,1), labels=c("0%",'25%',"50%","75%", "100%"), limits=c(0,1),expand=c(0,0))+ scale_colour_manual(values=c('#4575b4','#d73027'))+theme_bw()+ theme(axis.title=element_text(size=10), axis.text=element_text(size=10), legend.position='none', strip.text=element_text(size=10), strip.background = element_rect(fill='grey92')) ``` ## Design effect Lastly we look at the difference in cat preference between states, which will form the basis for the multi-level component of our analysis. Participants were randomly selected from particular states. Plotting the state (*x-axis*) against the overall proportion of participants who prefer cats (*y-axis*) demonstrates state differences. The downward slope is because we ordered the x-axis by the proportion of cat preference for ease of viewing. We also include second plot with a horizontal line to represent the overall preference for cats in the total population, according to the sample. ```{r, eval=FALSE, echo=FALSE} # not evaluated to avoid dependency on tidyverse #Summarise by state preference_by_state <- sample %>% group_by(state) %>% summarise(y_mean = mean(cat_pref), y_sd = sqrt(mean(cat_pref) * (1 - mean(cat_pref)) / n())) %>% ungroup() save(preference_by_state, file = "mrp-files/preference_by_state.rda", version = 2) ``` ```{r, echo=FALSE, fig.height = 4, fig.width = 8, fig.align = "center"} load("mrp-files/preference_by_state.rda") compare <- ggplot(data=preference_by_state, aes(x=state, y=y_mean,group=1)) + geom_ribbon(aes(ymin=y_mean-y_sd,ymax=y_mean+y_sd,x=state),fill='lightgrey',alpha=.7)+ geom_line(aes(x=state, y=y_mean))+ geom_point()+ scale_y_continuous(breaks=c(0,.25,.5,.75,1), labels=c("0%","25%","50%","75%","100%"), limits=c(0,1), expand=c(0,0))+ scale_x_discrete(drop=FALSE)+ scale_colour_manual(values=c('#1f78b4','#33a02c','#e31a1c','#ff7f00', '#8856a7'))+ theme_bw()+ labs(x="States",y="Cat preference")+ theme(legend.position="none", axis.title=element_text(size=10), axis.text.y=element_text(size=10), axis.text.x=element_text(angle=90,size=8), legend.title=element_text(size=10), legend.text=element_text(size=10)) compare2 <- ggplot()+ geom_hline(yintercept = mean(sample$cat_pref),size=.8)+ geom_text(aes(x = 5.2, y = mean(sample$cat_pref)+.025, label = "Sample"))+ scale_y_continuous(breaks=c(0,.25,.5,.75,1), labels=c("0%","25%","50%","75%","100%"), limits=c(-0.25,1.25),expand=c(0,0))+ theme_bw()+ labs(x="Popn",y="")+ theme(legend.position="none", axis.title.y=element_blank(), axis.title.x=element_text(size=10), axis.text=element_blank(), axis.ticks=element_blank(), legend.title=element_text(size=10), legend.text=element_text(size=10)) bayesplot_grid(compare,compare2, grid_args = list(nrow=1, widths = c(8,1))) ``` # MRP with rstanarm From visual inspection, it appears that different levels of post-stratification variable have different preferences for cats. Our survey also appears to have sampling bias; indicating that some groups were over/under sampled relative to the population. The net effect of this is that we could not make good population level estimates of cat preference straight from our sample. Our aim is to infer the preference for cats in the *population* using the post-stratification variables to account for systematic differences between the sample and population. Using rstanarm, this becomes a simple procedure. The first step is to use a multi-level logistic regression model to predict preference for cats in the sample given the variables that we will use to post-stratify. Note that we actually have more rows in the post-stratification matrix than the we have observed units, so there are some cells in the poststrat matrix that we don't observe. We can use a multi-level model to partially pool information across the different levels within each variable to assist with this. In the model described below, we use a fixed intercept for gender, and hierarchically modeled varying intercepts for each of the other factors. Let $\theta_{j}$ denote the preference for cats in the $j$th poststratification cell. The non-hierarchical part of the model can be written as $$\theta_j= logit^{-1}(X_{j}\beta),$$ where here $X$ only contains an indicator for male or female and an interaction term with age. Adding the varying intercepts for the other variables the model becomes $$ \theta_j = logit^{-1}( X_{j}\beta + \alpha_{\rm state[j]}^{\rm state} + \alpha_{\rm age[j]}^{\rm age} + \alpha_{\rm eth[j]}^{\rm eth} + \alpha_{\rm inc[j]}^{\rm inc} ) $$ with $$ \begin{align*} \alpha_{\rm state[j]}^{\rm state} & \sim N(0,\sigma^{\rm state}) \\ \alpha_{\rm age[j]}^{\rm age} & \sim N(0,\sigma^{\rm age})\\ \alpha_{\rm eth[j]}^{\rm eth} & \sim N(0,\sigma^{\rm eth})\\ \alpha_{\rm inc[j]}^{\rm inc} &\sim N(0,\sigma^{\rm inc}) \\ \end{align*} $$ Each of $\sigma^{\rm state}$, $\sigma^{\rm age}$, $\sigma^{\rm eth}$, and $\sigma^{\rm inc}$ are estimated from the data (in this case using rstanarm's default priors), which is beneficial as it means we share information between the levels of each variable and we can prevent levels with with less data from being too sensitive to the few observed values. This also helps with the levels we don't observe at all it will use information from the levels that we do observe. For more on the benefits of this type of model, see @gelman2005analysis, and see @ghitza2013deep and @si2017bayesian for more complicated extensions that involve deep interactions and structured prior distributions. Here is the model specified using the `stan_glmer()` function in rstanarm, which uses the same formula syntax as the `glmer()` function from the lme4 package: ```{r, message=FALSE, warning=FALSE, results='hide'} fit <- stan_glmer( cat_pref ~ factor(male) + factor(male) * factor(age) + (1 | state) + (1 | age) + (1 | eth) + (1 | income), family = binomial(link = "logit"), data = sample ) ``` ```{r} print(fit) ``` As a first pass to check whether the model is performing well, note that there are no warnings about divergences, failure to converge or tree depth. If these errors do occur, more information on how to alleviate them is provided [here](https://mc-stan.org/rstanarm/articles/rstanarm.html#step-3-criticize-the-model). ## Population Estimate From this we get a summary of the baseline log odds of cat preference at the first element of each factor (i.e., male = 0, age = 1) for each state, plus estimates on variability of the intercept for state, ethnicity, age and income. While this is interesting, currently all we have achieved is a model that predicts cat preference given a number of factor-type predictors in a sample. What we would like to do is estimate cat preference in the population by accounting for differences between our sample and the population. We use the `posterior_linpred()` function to obtain posterior estimates for cat preference given the proportion of people in the *population* in each level of the factors included in the model. ```{r, message=FALSE} posterior_prob <- posterior_linpred(fit, transform = TRUE, newdata = poststrat) poststrat_prob <- posterior_prob %*% poststrat$N / sum(poststrat$N) model_popn_pref <- c(mean = mean(poststrat_prob), sd = sd(poststrat_prob)) round(model_popn_pref, 3) ``` We can compare this to the estimate we would have made if we had just used the sample: ```{r, message=FALSE} sample_popn_pref <- mean(sample$cat_pref) round(sample_popn_pref, 3) ``` We can also add it to the last figure to graphically represent the difference between the sample and population estimate. ```{r, message=FALSE,fig.height = 4, fig.width = 8, fig.align = "center"} compare2 <- compare2 + geom_hline(yintercept = model_popn_pref[1], colour = '#2ca25f', size = 1) + geom_text(aes(x = 5.2, y = model_popn_pref[1] + .025), label = "MRP", colour = '#2ca25f') bayesplot_grid(compare, compare2, grid_args = list(nrow = 1, widths = c(8, 1))) ``` As this is simulated data, we can look directly at the preference for cats that we simulated from to consider how good our estimate is. ```{r, message=FALSE} true_popn_pref <- sum(true_popn$cat_pref * poststrat$N) / sum(poststrat$N) round(true_popn_pref, 3) ``` Which we will also add to the figure. ```{r, echo=FALSE, message=FALSE,fig.height = 4, fig.width = 8, fig.align = "center"} compare2 <- compare2 + geom_hline(yintercept = mean(true_popn_pref), linetype = 'dashed', size = .8) + geom_text(aes(x = 5.2, y = mean(true_popn_pref) - .025), label = "True") bayesplot_grid(compare, compare2, grid_args = list(nrow = 1, widths = c(8, 1))) ``` Our MRP estimate is barely off, while our sample estimate is off by more than 10 percentage points. This indicates that using MRP helps to make estimates for the population from our sample that are more accurate. ## Estimates for states One of the nice benefits of using MRP to make inference about the population is that we can change the population of interest. In the previous paragraph we inferred the preference for cats in the whole population. We can also infer the preference for cats in a single state. In the following code we post-stratify for each state in turn. Note that we can reuse the predictive model from the previous step and update for different population demographics. This is particularly useful for complicated cases or large data sets where the model takes some time to fit. As before, first we use the proportion of the population in each combination of the post-stratification groups to estimate the proportion of people who preferred cats in the population, only in this case the population of interest is the state. ```{r, message=FALSE} state_df <- data.frame( State = 1:50, model_state_sd = rep(-1, 50), model_state_pref = rep(-1, 50), sample_state_pref = rep(-1, 50), true_state_pref = rep(-1, 50), N = rep(-1, 50) ) for(i in 1:length(levels(as.factor(poststrat$state)))) { poststrat_state <- poststrat[poststrat$state == i, ] posterior_prob_state <- posterior_linpred( fit, transform = TRUE, draws = 1000, newdata = as.data.frame(poststrat_state) ) poststrat_prob_state <- (posterior_prob_state %*% poststrat_state$N) / sum(poststrat_state$N) #This is the estimate for popn in state: state_df$model_state_pref[i] <- round(mean(poststrat_prob_state), 4) state_df$model_state_sd[i] <- round(sd(poststrat_prob_state), 4) #This is the estimate for sample state_df$sample_state_pref[i] <- round(mean(sample$cat_pref[sample$state == i]), 4) #And what is the actual popn? state_df$true_state_pref[i] <- round(sum(true_popn$cat_pref[true_popn$state == i] * poststrat_state$N) / sum(poststrat_state$N), digits = 4) state_df$N[i] <- length(sample$cat_pref[sample$state == i]) } state_df[c(1,3:6)] state_df$State <- factor(state_df$State, levels = levels(sample$state)) ``` Here we similar findings to when we considered the population as whole. While estimates for cat preference (in percent) using the sample are off by ```{r} round(100 * c( mean = mean(abs(state_df$sample_state_pref-state_df$true_state_pref), na.rm = TRUE), max = max(abs(state_df$sample_state_pref-state_df$true_state_pref), na.rm = TRUE) )) ``` the MRP based estimates are much closer to the actual percentage, ```{r} round(100 * c( mean = mean(abs(state_df$model_state_pref-state_df$true_state_pref)), max = max(abs(state_df$model_state_pref-state_df$true_state_pref)) )) ``` and especially when the sample size for that population is relatively small. This is easier to see graphically, so we will continue to add additional layers to the previous figure. Here we add model estimates,represented by triangles, and the true population cat preference, represented as transparent circles. ```{r, message=FALSE, echo=FALSE, fig.height = 4, fig.width = 8, fig.align = "center",warning=FALSE, fig.align = "center"} #Summarise by state compare <- compare + geom_point(data=state_df, mapping=aes(x=State, y=model_state_pref), inherit.aes=TRUE,colour='#238b45')+ geom_line(data=state_df, mapping=aes(x=State, y=model_state_pref,group=1), inherit.aes=TRUE,colour='#238b45')+ geom_ribbon(data=state_df,mapping=aes(x=State,ymin=model_state_pref-model_state_sd, ymax=model_state_pref+model_state_sd,group=1), inherit.aes=FALSE,fill='#2ca25f',alpha=.3)+ geom_point(data=state_df, mapping=aes(x=State, y=true_state_pref), alpha=.5,inherit.aes=TRUE)+ geom_line(data=state_df, mapping=aes(x=State, y=true_state_pref), inherit.aes = TRUE,linetype='dashed') bayesplot_grid(compare, compare2, grid_args = list(nrow = 1, widths = c(8, 1))) ``` # Other formats ## Alternate methods of modelling Previously we used a binary outcome variable. An alternative form of this model is to aggregate the data to the poststrat cell level and model the number of successes (or endorsement of cat preference in this case) out of the total number of people in that cell. To do this we need to create two n x 1 outcome variables, `N_cat_pref` (number in cell who prefer cats) and `N` (number in the poststrat cell). ```{r, eval=FALSE} # not evaluated to avoid dependency on tidyverse sample_alt <- sample %>% group_by(male, age, income, state, eth) %>% summarise(N_cat_pref = sum(cat_pref), N = n()) %>% ungroup() ``` ```{r, include=FALSE} load("mrp-files/sample_alt.rda") ``` We then can use these two outcome variables to model the data using the binomial distribution. ```{r, message=FALSE, warning=FALSE, results='hide'} fit2 <- stan_glmer( cbind(N_cat_pref, N - N_cat_pref) ~ factor(male) + factor(male) * factor(age) + (1 | state) + (1 | age) + (1 | eth) + (1 | income), family = binomial("logit"), data = sample_alt, refresh = 0 ) ``` ```{r} print(fit2) ``` Like before, we can use the `posterior_linpred()` function to obtain an estimate of the preference for cats in the population. ```{r, message=FALSE} posterior_prob_alt <- posterior_linpred(fit2, transform = TRUE, newdata = poststrat) poststrat_prob_alt <- posterior_prob_alt %*% poststrat$N / sum(poststrat$N) model_popn_pref_alt <- c(mean = mean(poststrat_prob_alt), sd = sd(poststrat_prob_alt)) round(model_popn_pref_alt, 3) ``` As we should, we get the same answer as when we fit the model using the binary outcome. The two ways are equivalent, so we can use whichever form is most convenient for the data at hand. More details on these two forms of binomial models are available [here](https://mc-stan.org/rstanarm/articles/binomial.html). # Appendix ### Examples of other formulas The formulas for fitting so-called "mixed-effects" models in **rstanarm** are the same as those in the **lme4** package. A table of examples can be found in Table 2 of the vignette for the **lme4** package, available [here](https://CRAN.R-project.org/package=lme4/vignettes/lmer.pdf). ### Code to simulate the data Here is the source code for the `simulate_mrp_function()`, which is based off of some code provided by Aki Vehtari. ```{r} print(simulate_mrp_data) ``` # References rstanarm/vignettes/binomial.Rmd0000644000176200001440000004131414214422264016353 0ustar liggesusers--- title: "Estimating Generalized Linear Models for Binary and Binomial Data with rstanarm" author: "Jonah Gabry and Ben Goodrich" date: "`r Sys.Date()`" output: html_vignette: toc: yes --- ```{r, child="children/SETTINGS-knitr.txt"} ``` ```{r, child="children/SETTINGS-gg.txt"} ``` # Introduction This vignette explains how to estimate generalized linear models (GLMs) for binary (Bernoulli) and Binomial response variables using the `stan_glm` function in the __rstanarm__ package. ```{r, child="children/four_steps.txt"} ``` Steps 3 and 4 are covered in more depth by the vignette entitled ["How to Use the __rstanarm__ Package"](rstanarm.html). This vignette focuses on Step 1 when the likelihood is the product of conditionally independent binomial distributions (possibly with only one trial per observation). # Likelihood For a binomial GLM the likelihood for one observation $y$ can be written as a conditionally binomial PMF $$\binom{n}{y} \pi^{y} (1 - \pi)^{n - y},$$ where $n$ is the known number of trials, $\pi = g^{-1}(\eta)$ is the probability of success and $\eta = \alpha + \mathbf{x}^\top \boldsymbol{\beta}$ is a linear predictor. For a sample of size $N$, the likelihood of the entire sample is the product of $N$ individual likelihood contributions. Because $\pi$ is a probability, for a binomial model the _link_ function $g$ maps between the unit interval (the support of $\pi$) and the set of all real numbers $\mathbb{R}$. When applied to a linear predictor $\eta$ with values in $\mathbb{R}$, the inverse link function $g^{-1}(\eta)$ therefore returns a valid probability between 0 and 1. The two most common link functions used for binomial GLMs are the [logit](https://en.wikipedia.org/wiki/Logit) and [probit](https://en.wikipedia.org/wiki/Probit) functions. With the logit (or log-odds) link function $g(x) = \ln{\left(\frac{x}{1-x}\right)}$, the likelihood for a single observation becomes $$\binom{n}{y}\left(\text{logit}^{-1}(\eta)\right)^y \left(1 - \text{logit}^{-1}(\eta)\right)^{n-y} = \binom{n}{y} \left(\frac{e^{\eta}}{1 + e^{\eta}}\right)^{y} \left(\frac{1}{1 + e^{\eta}}\right)^{n - y}$$ and the probit link function $g(x) = \Phi^{-1}(x)$ yields the likelihood $$\binom{n}{y} \left(\Phi(\eta)\right)^{y} \left(1 - \Phi(\eta)\right)^{n - y},$$ where $\Phi$ is the CDF of the standard normal distribution. The differences between the logit and probit functions are minor and -- if, as __rstanarm__ does by default, the probit is scaled so its slope at the origin matches the logit's -- the two link functions should yield similar results. With `stan_glm`, binomial models with a logit link function can typically be fit slightly faster than the identical model with a probit link because of how the two models are implemented in Stan. Unless the user has a specific reason to prefer the probit link, we recommend the logit simply because it will be slightly faster and more numerically stable. In theory, there are infinitely many possible link functions, although in practice only a few are typically used. Other common choices are the `cauchit` and `cloglog` functions, which can also be used with `stan_glm` (every link function compatible with`glm` will work with `stan_glm`). # Priors ```{r, child="children/stan_glm_priors.txt"} ``` # Posterior With independent prior distributions, the joint posterior distribution for $\alpha$ and $\boldsymbol{\beta}$ is proportional to the product of the priors and the $N$ likelihood contributions: $$f\left(\alpha,\boldsymbol{\beta} | \mathbf{y},\mathbf{X}\right) \propto f\left(\alpha\right) \times \prod_{k=1}^K f\left(\beta_k\right) \times \prod_{i=1}^N { g^{-1}\left(\eta_i\right)^{y_i} \left(1 - g^{-1}\left(\eta_i\right)\right)^{n_i-y_i}}.$$ This is posterior distribution that `stan_glm` will draw from when using MCMC. # Logistic Regression Example When the logit link function is used the model is often referred to as a logistic regression model (the inverse logit function is the CDF of the standard logistic distribution). As an example, here we will show how to carry out a few parts of the analysis from Chapter 5.4 of [Gelman and Hill (2007)](http://www.stat.columbia.edu/~gelman/arm/) using `stan_glm`. Gelman and Hill describe a survey of 3200 residents in a small area of Bangladesh suffering from arsenic contamination of groundwater. Respondents with elevated arsenic levels in their wells had been encouraged to switch their water source to a safe public or private well in the nearby area and the survey was conducted several years later to learn which of the affected residents had switched wells. The goal of the analysis presented by Gelman and Hill is to learn about the factors associated with switching wells. To start, we'll use `dist` (the distance from the respondent's house to the nearest well with safe drinking water) as the only predictor of `switch` (1 if switched, 0 if not). Then we'll expand the model by adding the arsenic level of the water in the resident's own well as a predictor and compare this larger model to the original. After loading the `wells` data, we first rescale the `dist` variable (measured in meters) so that it is measured in units of 100 meters. If we leave `dist` in its original units then the corresponding regression coefficient will represent the effect of the marginal meter, which is too small to have a useful interpretation. ```{r binom-arsenic-data} library(rstanarm) data(wells) wells$dist100 <- wells$dist / 100 ``` Before estimating any models we can visualize the distribution of `dist100` in the data: ```{r, binom-arsenic-plot-dist100, fig.height=3} ggplot(wells, aes(x = dist100, y = ..density.., fill = switch == 1)) + geom_histogram() + scale_fill_manual(values = c("gray30", "skyblue")) ``` In the plot above the blue bars correspond to the `r sum(rstanarm::wells$switch == 1)` residents who said they switched wells and darker bars show the distribution of `dist100` for the `r sum(rstanarm::wells$switch == 0)` residents who didn't switch. As we would expect, for the residents who switched wells, the distribution of `dist100` is more concentrated at smaller distances. A Bayesian version of Gelman and Hill's initial logistic regression model can be estimated using the `stan_glm` function. Here we'll use a Student t prior with 7 degrees of freedom and a scale of 2.5, which, as discussed above, is a reasonable default prior when coefficients should be close to zero but have some chance of being large. ```{r, binom-arsenic-mcmc, results="hide"} t_prior <- student_t(df = 7, location = 0, scale = 2.5) fit1 <- stan_glm(switch ~ dist100, data = wells, family = binomial(link = "logit"), prior = t_prior, prior_intercept = t_prior, cores = 2, seed = 12345) ``` ```{r, binom-arsenic-print, echo=FALSE} (coef_fit1 <- round(coef(fit1), 3)) ``` The `formula`, `data` and `family` arguments to `stan_glm` are specified in exactly the same way as for `glm`. We've also added the optional additional arguments `chains` (how many chains we want to execute), `cores` (how many cores we want the computer to utilize) and `seed` (for reproducibility). You can read about other possible arguments in the `stan_glm` documentation (`help(stan_glm, package = 'rstanarm')`). To get a sense for the uncertainty in our estimates we can use the `posterior_interval` function to get Bayesian uncertainty intervals. The uncertainty intervals are computed by finding the relevant quantiles of the draws from the posterior distribution. For example, to compute 50% intervals we use: ```{r, binom-arsenic-ci} round(posterior_interval(fit1, prob = 0.5), 2) ``` For more on `posterior_interval` and interpreting the parameter estimates from a Bayesian model see Step 2 in the ["How to Use the __rstanarm__ Package"](rstanarm.html) vignette. Using the coefficient estimates we can plot the predicted probability of `switch = 1` (as a function of `dist100`) together with the observed outcomes: ```{r, binom-arsenic-plot-model} # Predicted probability as a function of x pr_switch <- function(x, ests) plogis(ests[1] + ests[2] * x) # A function to slightly jitter the binary data jitt <- function(...) { geom_point(aes_string(...), position = position_jitter(height = 0.05, width = 0.1), size = 2, shape = 21, stroke = 0.2) } ggplot(wells, aes(x = dist100, y = switch, color = switch)) + scale_y_continuous(breaks = c(0, 0.5, 1)) + jitt(x="dist100") + stat_function(fun = pr_switch, args = list(ests = coef(fit1)), size = 2, color = "gray35") ``` The plot shows that under this model the predicted probability of switching is a decent bit above 50% for residents living very close to wells with safe drinking water. As expected, larger values of `dist100` are associated with lower predicted probabilities of switching. At the extreme ($\approx 300$ meters), the probability is about 25%. Next, we incorporate an additional predictor into the model: the arsenic level of water in the respondent's well. According to Gelman and Hill, "At the levels present in the Bangladesh drinking water, the health risks from arsenic are roughly proportional to exposure, and so we would expect switching to be more likely from wells with high arsenic levels" (pg. 90). We only need to change the formula, so we can use the `update` function: ```{r binom-arsenic-mcmc2, results="hide"} fit2 <- update(fit1, formula = switch ~ dist100 + arsenic) ``` ```{r} (coef_fit2 <- round(coef(fit2), 3)) ``` As expected the coefficient on `arsenic` is positive. The plot below shows distance on the x-axis and arsenic level on the y-axis with the predicted probability of well-switching mapped to the color of the background tiles (the lighter the color the higher the probability). The observed value of `switch` is indicated by the color of the points. ```{r,echo=FALSE} theme_update(legend.position = "right") ``` ```{r, binom-arsenic-plot-model2} pr_switch2 <- function(x, y, ests) plogis(ests[1] + ests[2] * x + ests[3] * y) grid <- expand.grid(dist100 = seq(0, 4, length.out = 100), arsenic = seq(0, 10, length.out = 100)) grid$prob <- with(grid, pr_switch2(dist100, arsenic, coef(fit2))) ggplot(grid, aes(x = dist100, y = arsenic)) + geom_tile(aes(fill = prob)) + geom_point(data = wells, aes(color = factor(switch)), size = 2, alpha = 0.85) + scale_fill_gradient() + scale_color_manual("switch", values = c("white", "black"), labels = c("No", "Yes")) ``` We can see that the black points (`switch=1`) are predominantly clustered in the upper-left region of the plot where the predicted probability of switching is highest. Another way we can visualize the data and model is to follow Gelman and Hill and create separate plots for varying the arsenic level and distance. Here we'll plot curves representing the predicted probability of switching for the minimum, maximum and quartile values of both variables. ```{r,echo=FALSE} theme_update(legend.position = "none") ``` ```{r, binom-arsenic-plot-model2-alt} # Quantiles q_ars <- quantile(wells$dist100, seq(0, 1, 0.25)) q_dist <- quantile(wells$arsenic, seq(0, 1, 0.25)) base <- ggplot(wells) + xlim(c(0, NA)) + scale_y_continuous(breaks = c(0, 0.5, 1)) vary_arsenic <- base + jitt(x="arsenic", y="switch", color="switch") vary_dist <- base + jitt(x="dist100", y="switch", color="switch") for (i in 1:5) { vary_dist <- vary_dist + stat_function(fun = pr_switch2, color = "gray35", args = list(ests = coef(fit2), y = q_dist[i])) vary_arsenic <- vary_arsenic + stat_function(fun = pr_switch2, color = "gray35", args = list(ests = coef(fit2), x = q_ars[i])) } bayesplot_grid(vary_dist, vary_arsenic, grid_args = list(ncol = 2)) ``` We can compare our two models (with and without `arsenic`) using an approximation to Leave-One-Out (LOO) cross-validation, which is a method for estimating out of sample predictive performance and is implemented by the `loo` function in the __loo__ package: ```{r, binom-arsenic-loo} (loo1 <- loo(fit1)) (loo2 <- loo(fit2)) loo_compare(loo1, loo2) ``` These results favor `fit2` over `fit1`, as the estimated difference in `elpd` (the expected log pointwise predictive density for a new dataset) is so much larger than its standard error. LOO penalizes models for adding additional predictors (this helps counter overfitting), but in this case `fit2` represents enough of an improvement over `fit1` that the penalty for including `arsenic` is negligible (as it should be if `arsenic` is an important predictor). The [vignette](lm.html) for the `stan_lm` function also has an example of using the `loo` function where the results are quite a bit different from what we see here and some important additional considerations are discussed. # Conditional Logit Models The previous example relies on the fact that observations are plausibly conditionally independent. In contrast, so-called "case-control studies" require that there are a fixed number of successes and failures within each stratum, and the question is _which_ members of each stratum succeed and fail? The `stan_clogit` function estimates such a model and is very similar to the `clogit` function in the **survival** package. The main syntactical difference is that the `clogit` function requires that the user call the `strata` function in the model formula, whereas the `stan_clogit` function has a required `strata` argument. In addition, in the `stan_clogit` case the data must be sorted by the variable passed to `strata`. The advantage to these changes is that `stan_clogit` can optionally utilize the multilevel syntax from the **lme4** package to specify group-specific terms, rather than the more limited multilevel structure supported by the `frailty` function in the **survival** package. The [vignette](glmer.html) for the `stan_glmer` function discusses the lme4-style syntax in more detail. For example, ```{r, results = "hide"} post <- stan_clogit(case ~ spontaneous + induced + (1 | parity), data = infert[order(infert$stratum), ], # order necessary strata = stratum, QR = TRUE, cores = 2, seed = 12345) ``` ```{r} post ``` The posterior predictions are also constrained such that there is exactly one success (in this case) for each of the strata and thus the posterior distribution of the probabilities are also so constrained: ```{r} PPD <- posterior_predict(post) stopifnot(rowSums(PPD) == max(infert$stratum)) PLP <- posterior_linpred(post, transform = TRUE) stopifnot(round(rowSums(PLP)) == max(infert$stratum)) ``` # Binomial Models Although the example in this vignette focused on a binary response variable, we can use nearly identical code if we have the sum of multiple binary variables. For example, image a hypothetical dataset similar to the well-switching data but spanning multiple villages. Each observation (each row) of this `data.frame` corresponds to an entire village: `switch[i]` is the number of 'yes' responses to the well-switching question for village `i`, `dist100[i]` is the average distance to the closest well with clean water for village `i`, etc. We also now have a variable `n` where `n[i]` is the number of respondents from village `i`. For this data we can estimate a similar model to the one we used in the binary case by changing the formula to `cbind(switch, n - switch) ~ dist100 + arsenic` The left-hand side is now a 2-column matrix where the first column is the number of 'yes' responses and the second column is the number of 'no' responses (or more generally, the number of successes and number of failures). The same model can also be specified using the proportion of 'yes' responses and the total number of responses in each village. This corresponds to the formula `prop_switch ~ dist100 + arsenic` where `prop_switch = switch / n` is the proportion of 'yes' responses. The total number of responses is provided using the `weights` argument. In this case we would add `weights = n` to the call to `stan_glm`. An example of a similar model can also be found in __Step 1__ of the ["How to Use the __rstanarm__ Package"](rstanarm.html) vignette. # Going Further In the hypothetical scenario above, if we also have access to the observations for each individual in all of the villages (not just the aggregate data), then a natural extension would be to consider a multilevel model that takes advantage of the inherent multilevel structure of the data (individuals nested within villages). The [vignette](glmer.html) for the `stan_glmer` function discusses these models. # References Gelman, A. and Hill, J. (2007). _Data Analysis Using Regression and Multilevel/Hierarchical Models._ Cambridge University Press, Cambridge, UK. rstanarm/vignettes/mrp-files/0000755000176200001440000000000013540753420016012 5ustar liggesusersrstanarm/vignettes/mrp-files/mrp.bib0000644000176200001440000000634613540072577017306 0ustar liggesusers@article{little1993post, title={Post-stratification: a modeler's perspective}, author={Little, Roderick JA}, journal={Journal of the American Statistical Association}, volume={88}, number={423}, pages={1001--1012}, year={1993}, publisher={Taylor \& Francis Group} } @article{park2004bayesian, title={Bayesian multilevel estimation with poststratification: state-level estimates from national polls}, author={Park, David K and Gelman, Andrew and Bafumi, Joseph}, journal={Political Analysis}, volume={12}, number={4}, pages={375--385}, year={2004}, publisher={Cambridge University Press} } @article{shirley2015hierarchical, title={Hierarchical models for estimating state and demographic trends in US death penalty public opinion}, author={Shirley, Kenneth E and Gelman, Andrew}, journal={Journal of the Royal Statistical Society: Series A (Statistics in Society)}, volume={178}, number={1}, pages={1--28}, year={2015}, publisher={Wiley Online Library} } @article{barr2013random, title={Random effects structure for confirmatory hypothesis testing: Keep it maximal}, author={Barr, Dale J and Levy, Roger and Scheepers, Christoph and Tily, Harry J}, journal={Journal of memory and language}, volume={68}, number={3}, pages={255--278}, year={2013}, publisher={Elsevier} } @article{ghitza2013deep, title={Deep interactions with MRP: Election turnout and voting patterns among small electoral subgroups}, author={Ghitza, Yair and Gelman, Andrew}, journal={American Journal of Political Science}, volume={57}, number={3}, pages={762--776}, year={2013}, publisher={Wiley Online Library} } @article{lei20172008, title={The 2008 election: A preregistered replication analysis}, author={Lei, Rayleigh and Gelman, Andrew and Ghitza, Yair}, journal={Statistics and Public Policy}, pages={1--8}, year={2017}, publisher={Taylor \& Francis} } @article{gelman2007struggles, title={Struggles with survey weighting and regression modeling}, author={Gelman, Andrew}, journal={Statistical Science}, pages={153--164}, year={2007}, publisher={JSTOR} } @article{lax2009should, title={How should we estimate public opinion in the states?}, author={Lax, Jeffrey R and Phillips, Justin H}, journal={American Journal of Political Science}, volume={53}, number={1}, pages={107--121}, year={2009}, publisher={Wiley Online Library} } @article{park2004bayesian, title={Bayesian multilevel estimation with poststratification: state-level estimates from national polls}, author={Park, David K and Gelman, Andrew and Bafumi, Joseph}, journal={Political Analysis}, volume={12}, number={4}, pages={375--385}, year={2004}, publisher={Cambridge University Press} } @article{gelman2005analysis, title={Analysis of variance-why it is more important than ever}, author={Gelman, Andrew and others}, journal={The annals of statistics}, volume={33}, number={1}, pages={1--53}, year={2005}, publisher={Institute of Mathematical Statistics} } @article{si2017bayesian, title={Bayesian hierarchical weighting adjustment and survey inference}, author={Si, Yajuan and Trangucci, Rob and Gabry, Jonah Sol and Gelman, Andrew}, journal={arXiv preprint arXiv:1707.08220}, year={2017} }rstanarm/vignettes/mrp-files/summary_by_poststrat_var.rda0000644000176200001440000000104113540753420023660 0ustar liggesusers r0b```b`fcd`b2Y# '(.M,O//.).)J,/K,b``)Ҽ@7fBHQdI-K)QfT(ӵ$#/393/9?&⛘f&krNb1HEi%@0b>7?0PZJK@iK.qt .?wXkOv>Zns751 *SЌ,/׃ JL @! CdYK@ɕXV4 |Mrstanarm/vignettes/mrp-files/preference_by_state.rda0000644000176200001440000000202513540753420022511 0ustar liggesusers]}L[UkY`"C7kLCd[e+miJ!Ѩ"M椓MSD pMPT6d, 戴ݗxs{=Ԡ*U1 #c L2is8>a-m~x Q| ! f@@DT|*_FB |9$ YCAE#}AGr H لnۺ8pՐ $!o-Z4h+fq4W@-3] .eZ]ioJ5~M4FKi)z>-J+襭tqb!T&1HK#191) mHp4.GN RU}IK9+*\U}h"g16? 4> ~Gw'^ EEݎ%/'xf\I|!FM'a)će늇`d&Z)Ų̌51&_춭ZAz 0Wk*xa&sOW|K]/,LM'L:=cďaB'0w5#\T>۸w ;z^{8|rI, Spevb\Q{VX[tBCwG Q%O}*uV6? nC4 Sr(}յ:1sC2qy<_/wYN#ntN"[`&rstanarm/vignettes/mrp-files/sample_alt.rda0000644000176200001440000000503413540753420020625 0ustar liggesusers]Ys7=l \ r>/d\vmW%bBuG7n{l½<$vyR) O>ՁzPϐ;[^D6[Wq~(x17e/0|&L_Aˎ y`><~aV<-"㤘]#Xggjk#js[0RYlv;s{bna,܆`s;q>4Qou8 GŞ0֠ga24 ]'{|~)([&k%+2Z8x뀯FCWZx-y=}1I퇵tbjaبQ̼KkF^y0I^O,Y 1kyCڃ}G[ݾ8)bY[gu5އ>LSF\ɻ]k(hm]%cqxgZ~]+u*S?Ӆ4QށN:rss>0.+ Z+z^kƛ  G!GV>NX\-.PڸʾbXg~uָ9kkjzkvU݈wcAëu-^447Ɠ\ +y{4?֮~=gfuJxAAN9.}ԑsWFS}-ǫS O0ޚm޳[s;ԯ<~pw?yxh \|=80~BZ<4kŗ)8U5ySqICɳʕ淕)3Zƅ76zܯ8ld0.Q_L/up`r\q߶?Ug}7XYojB8Ky5 KqX72jtrMǵw1X=:G#=xNp |/zleo_mM}193IʅưgqMcKU>cLOy,I3#_Y+N~wNhZﵸ9 eզ8?>Ӥf7-`6U54ڦ|m77|\7{[z?/gWmo} ))=_xYG~zy7^eL}Ƴrstanarm/vignettes/mrp-files/state_plot_data.rda0000644000176200001440000000264313540753420021656 0ustar liggesusersSUTxY%$ 4dw(-n^mh2,&)Hªir(IRyԒ4L$ɬ~`jty{E=gvsְ8CqSx85Cƹq#[M,Кc8N1ݑĜq8S(}A? gK Ε0XB C))#(K%BhX =Es RW()8Iz]E{BqG`ꂺ3=%|k#cS$_ t|=ľm3O>O%K{H>÷gחO~Χ1>" ռN}|/s6y" Y[i3rl<oϿΏV{5|1yqڃ|ܸ~ޮ~ڊ9/#!<ϏϟC~O`ߦ bΦ}] h_mM|wKr:^mޥՏ\! UƷk7)ګvZ,ǗܗѾj֝ݛqi&_$q_ υ7=LOJ@((!cvQTrp&n9yiUʨu@CW6tCUwK_puڑ=PeSL: _{NAmaGHp8tZC}N=q ;cnݨ&GVVTmZX -bp&k Ǖٶ֋wmjIm N*w@u7e+:E96ߵzʢI?@=w8i޿oB?G8 ށwFN[Fmv]qno X\ S:r{`6T]XuS?Tn z`WQd'PwVPQw!o>pBqnzWF _=χχxs7䚋QkKFSŜK{nBK=l*X"H6) <+N>@kREhCZPb6 P&5,&™*sB3,D) c6VJLRCXRA%N0Fɘ)Tʂd쑨Y611+=Ѱ *U%._zOH?\aN 3v aHzAh.MDQTX,D6100()P!h]m"mzvP,z?+Lrstanarm/vignettes/mrp-files/interaction.rda0000644000176200001440000000113313540753420021017 0ustar liggesusers r0b```b`fcd`b2Y# '+I-JL.c`` i^ -FfĎQfI-K) %yə%PfT490pҀ/1F;0h-FK24 B>$<w:ho`u.ۂ%OLV:? KH9_}y'2˾ܛ}So?H~fWbJ{ы]OV4;2ͺI,=mmKק-~u(n ɰR[O\5%'hՕV=!mJ5]_s5zÙ;oU_rsܭ x.~iuO;Ϳ9~vUدs=38>-v_%ylv"+-.?_ԙ1&AaO=}߬~.Гs^bn*,9B92rRaĒ"X`M˱U&d*SЬ,/׃YL @o CgY˃@ɕXV4fz8frstanarm/vignettes/mrp-files/mrp_sim.rda0000644000176200001440000006562613540753420020167 0ustar liggesusers E@h@s7D$A/"p#"yˌ{uEYT&B2Y&d2Yطy=}g&o=7]U9U9㤳?E>ώ{D~qa+ݑݎ|vwW6D;8#)/6|SەT>mh[[__R:m'՛$T}ӌkGRI5w=Sq(.+ޥmGSB~)_ǵ+zsæ=򷴽J{{:W7m)kvZ?Z*>'Ŷ+I^s3#I(ŖO:Jsz*Xo.y{Sl{hv(5I7W=MO5כJI+/qGSRvSh<gR:jb5Վiۑ6׾b\z+')R=Ii.;&o)=%#.KOjWUSXK٧v5UNSmOSck_jw_-n8L[zX{'\IhR7hj)6c'K[?*'htMN/mM[V?9vJs[(jW5m9T9ebgv455ul\j~TZz4W'mMWSY;Q:-e檯RRoSi쿧{r3MGK$(|6n$mz|=5?T|m.yŖ+6\.zsR{J^s;9劯qGT$\s׷Sn{7󅦖kuSS-5W_5|ZesO*WnK;}rbͽoj-5Rqoy;m,~v;T*V/rַ㶗\bOIn-iҿ;܇;`4|q}O[`Ƕ1>oo\A?rr*\80]14ۑdO]lyW|2ŵ7N8XO[G^O#ڟ4=bш^/4|=qI8/z^٧?3>ǕwQmOLH{[NsR~$=Ώ,珸ǍꋙgzΣq+ipy n qLs.q*qq1qs}ҼNٞXy.]McgZ91od87|;.i=ꍍ1kzQL}qcn롔u'?ĵ&|9 19Mqt\Cz3vŬO$֙wR܉[71M/m%#I"z:&Ώq??\?Iv|\q0mK{?W.;v=N9Fϸ_ۓK~\uoG^i9Iz&Sᘤw?q&iW~4ޒ]WEL=^N}@qK\;x'nqV[w=}Fr:Ǖ/zckOvž.>M͟Z(e(%v%!}oSR뗐>X'ٱbK'ţ3uq iH_:nGL_(}O\MҎ[|7w>)N^_Ĕ/yމ :~&ħRIbיiSy3?O|)QƽoZ}tVb/9ΦRWE-5ZץΧ%W,\ѣv͗~J_캮d{9.Q,5ɍvM;/Q4}by81Ǵ/Jh_L-Mw8O/ҎS#e\N;RRb환?}F˗O?RI~/^I/mK='_Rh0u{Ŏb6g_?mMbsq1?kP8Mݟ;_t,튻8dUw=A^O;;/vO?]|]I9SOَb_y"]1S}>e;Q-m.OףB=?DOvjb}H?F)a7SE1LYoIrK_jʢCLT?-Jӵ+Q^t=xNjGZ+m\*Z^"]bcWb+5>/%7qL;^7^OJE!1ޖx说 ?wפIr:/b͟}*~84.Ӟoo׭IkI+F6_Tכ$'} վ|\:M'g/oORS"7R?}~\eb}7._v]6Χ|^hߤ8ϋ6{)9AiG<.1.pߚgm?|mjS߄I&i^/uݞoq:~cگ/I+u#wBED'.>< t%mc{E|koۡ NnslӗdPo:vu}\0?uqI/v`=yKx<ҕϔqz_'sdsGlt#;9R.U~Q^W`=~>qh/Gݗ|? ! Ѻs|g|b9?G>x/a?6c=7w?.O!^b~?*\@7aqڇr%Ŀ|ali/x^Wtqh}$}wvvf=ϳ㤿/ |a~!}wNcyc~z |a9?GݓO{ gyG~]y?CA:եI_?kE~~}Ǎ.ykw>oa?^}>(HR?ΎK[׭wLnhG/c;Zf:o/?]g{+@\߷.:xvRo__2;-q'k>^ll |a9?/~zڻߜhW]s~opv %8/ Bx?'s5|?>ڱoЏw~Gs|1n{;Gҝ>טzp_pv9|a:!_X˷wy߇q? EQ=p7}w:y{;! @?d~+ wܘG1^|G{lٝG{1:_߄Is7?8姿*\G99'Ϸϯ>\ Ǔ\9\Y?W?O:SNISyzr<_ߏIPXx o$o+v\ڳU3ceԋveaL#ڇ{z۟~_Ҟo' 1q~_~!_E@"/dc7PwCn( 廡(1_(1_%( _%( _()_()_|wQ;wG(廣e(_e(_(9_(9_(_W(_W(%_W(%_WU(_WU(_W(5_W(5_W5( _נ5( _נ(-_ע(-_עu(_סu(_ס(=_ף(=_ף|Q@tAq?y=f`o ȿo ȿo ȿo  =!'{B~O =!'{B~O ^ {A~/ ^ {A~/ !7ސ{C~o !7ސ{C~o} >@~} >@~ȿo!Fȿo!Fȿo!Fȿo!F }!/B~_ }!/B~_ ~A~? ~A~?ȿ o &ȿ o &ȿ o &ȿ o &!?C~!?C~ȿo!fȿo!fȿo!fȿo!fȿo[ ȿo[ ȿo[ ȿo[  ?@ ?@ȿo[!Vȿo[!Vȿo[!Vȿo[!V! ?B@! ?B@C~-vۃhOP廂hgl_~ ڟ B/A=/? ͯAAAAAAAAAAAAAAAA0 C0 C0 C0 C0 C0 C0 C0 C!@!@!@!@!@!@!@!@( B( B( B( B( B( B( B( BaAaAaAaAaAaAaAaA8C8C8C8C8C8C8C8CG@G@G@G@G@G@G@G@$ GB$ GB$ GB$ GB$ GB$ GB$ GB$ GBQGAQGAQGAQGAQGAQGAQGAQGA4 GC4 GC4 GC4 GC4 GC4 GC4 GC4 GC1 @1 @1 @1 @1 @1 @1 @1 @, B, B, B, B, B, B, B, BqAqAqAqAqAqAqAqA<C<C<C<C<C<C<C<C '@ '@ '@ '@ '@ '@ '@ '@bv 1@vx0L;M<~&~́'gKAy5`7ÕCAaXp"xL ?MM| >39 :q Xu`1VE _WU _WU@ :_W@ N* *WUU*j"WU _WU_WM* U_UU_U_5_ WW5_ Wwj5_j>j" _ Wy _ Ww5_ W5_=9;W&?t]kjbdKG!ntǿvygW^y%_ ջ[o&R's=r?ALow5zqB"1S8b!s)fdrOH32NEcǧX\$fS,d.3ŌSX)2bFƩh, L1#T4{|Ebq*=>B"1S8b!s)fdrOH32NEcǧX\$fS,d.3ŌSX)2bFƩh, L1#T4{|Ebq*=>B"1S8b!s)fdrOH32NEcǧX\$fS,d.3ŌSX)2bFƩh, L1#T4{|Ebq*=>B"1S8b!s)fdrOH32NEcǧX\$fS,d.3ŌSX)2bFƩh, L1#T4{|Ebq*=>B"1S8b!s)fdrOH32NEcǧX\$fS,d.3ŌSX)2bFƩh, L1#T4{|Ebq*=>B"1S8#DQ;ۃC/_;G_~< <1cǁǃ`;#>>铐> 铑>.HwAOA+]>G(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`ޘ8wь߼vn;GMto+Y{z<v_9fw?SPZho/l@=VvOߙґ;Ldy֎wXOik3gXE'M*kLewݖ䇦ܽ/v]֎5}^MkQUaq{s?{7.SNnˣv~F/=n~瞝m}ڲy;ݸRkǛszG}}ϟzwnd׿fq]ehtuRzg1?d7t2{SV~Kc][ML띭SM V~&EVϛ5;ߵvlQcηr.=lu׍550˞,(wqEvk`kkn[|2#_kG \Vn%vo^ZhW[r]bz7|ڸ[7ʭzWmv,?@{X5_r-Yr-^r6 ^xنxUn;&WW6v>.lQ]ߎxuo K4˿jk/gVU\īMr[^yr.4ZmnzxrcVjū\Նv~զ*k6W7C.զo@?īWu_4AW"^j܈xUxUxj=JīU{[5W+ߍz[WαֽaZxUnīU߆īu_ ^ա?kZ=VYz˷ j U Ζo%jīՈW[5ƺ^W^F{a g ^q>(x5j. īWӿf_Ax5\e^ ^ʹKn吳ҳnf?30g"^Ax+?ҋ>mʽ(_g ^Fx5W3深!^-!0+7J뫻-W_+W+ݖ_+^YW&/cJw[~Z~Z4yT+^Y~+W)^y+xe,ɍWO`p;,}~iW6_}mN ;_;>S&omPk`rɍsڶn4m[MV_L7x;=v`߷[_oWboAY}&Y=n3^c\[oX;+WT kϳ-]r/ zVv2ke!$/EU[t˿ra>Yv/i9WC oAv}U^Y yaU+k0YC( r_zJؔ[dr+;L5ў[,B\K/S1Χ8V~=~zq;*떞~aqm|tb}z 3ug/k!^Gxt3ɮ/ \cu|īe^ā]#܋CM,īyW/#^MGx5iīWs eī|^Bq|īfc>x5- ī=H1v#Cv sï_!^BN&ī9W~W ^=|{6D8ÈWOc4!1īG=#ѮMb]-Z ī|=x5|j`43WO!^=s{zl}:F;A?zzrqgZ_Wh`+]W2*^YZ lt?~~lr?x+^Y~+W |WV|Wxe!W v@n7:mNN;ـ%lHu/r'ݿ[34`I5;7V~~*7؎%[{W{-nb{7֎-_&@uOOYzUa]߆Vڵmc9hV˿֮X}![Y-_Ry`UGXk/vad18G5[{05_:<ZoauGX^ճP|h>e~ڷ]c[v~;Y~{)&~Sr扵Y߁3n\gb{̳tç7#P/;:dq>%-ž߷r x^}Λ^~0pqZw&A-?g =]|?|sWC;B-x_~ @Ў`o~gysOE]}j}(7wxj<īW5FZ.Cfg#^ͳ4 ^FZIAZx닥W a_RīEW5W?`]=j+!^EZx56˿j6bGZ9xUx_ī1x埾_b ^=B?{鳖~jSOS[zZԃxVK?xӈW/AzY x_Ջ/SE M^y'!?+gZځuW^WOî"^={_Dv'x,3Z_^LމJSU2y'Z:+JVD4yTzWW)^╵WU^VavnʽvW3ƓY?Nwvܫ'UZlz8/69,rk{wh;gL~&\M Lottn CHk=֮G VNfM_Ͽ޾4oν>7{n{밝5gw[0{6Tޜ`ox됻wrM[M-[-cM^O,ߖDc]yfǭe_շ i^noߘj 9ۭB|Wi%_`*Ƚ[լ n_fvk-^NMr[WZuWkhvX[|W,^͵rj!^;Dk][Qnū\WލWuO@U~W=L.F[یxU{;x۲Nnxkx{߀x~߄oXx볍W{-r[,^l@pܚVn=jī [+j#^-Cڀxr[xjXK/]_x2 W5땈WkVaՈWkVo_W:kNˎoCZu@X[lWbܯjjV nzCZrV"^D7oLīyKZz:9n<Ws^/W/!^@x5o`_līWV#;cYiوW3"^x5~)KW  ^MGx |>l<ī~ī9W.^Ak}eel}x+^Y~+X ~ШAA+˧xWF+xeL/+xeLnLZF`ƵYantwqw ~bmXxIY}]q3;+Uا~;_{pտ@};֮;ڳ+څ&跏x8f5}u,*ħ5ܹLoٴꭝvEE׶KLzjOj$dW4.42- =' ?7`XUx۸t_'l7;|\5r1,{|H;_۳z?@gՇ` v|п_qݦ ˲*˿jžͲN,V̙/gu?mFV߳3<q<;MQ yY=CgW^A?6Ю|~ɟ?r]q㬽-L|h6P9Ʃ5c1; ~K67"uW oRīWˎΞnqxt7v[9W3C4jFZx{շlgռ-X>j 9].īGf"^-ks +.Ft0wZ@.Vī٧]gxj1?g͝h:ZzZ_Gx5E엜x5q*īf!^8vA}' ?#^E}3^B~2 īY3ng#^M9Ov~jռ3qi,ī>9OCNސx2sQ/s^FzvƧGzjCWOGy %a|qÈW{]GXӰˤ},#wO?:8=A.գ~=a ^M؇]'E}5L2 X={W_+˯իJҊWȯxe*^+"-^_zcv~-d7aXڿ'&ooP/{:t>VϢGL#3[EWFE+,P ,ӯzġ'Xz>̆r9a`?r;B| ' /v`?a_ܱ~<|g&l㷨T~K ԋQ5?#x5j9j9W5j + jVwj/%GZ9Ks?jf}Ӯ:˝xj ԋxv}>օ Aj.ɐx|qUrӒw_||#^-Dx5jV~ ڿjo,=嚌SXOCzv})ī>|WODgrGߏzj8#^=x"SW#^M9C(x̃8x,K0r;#~Iī^@SBz뻧o|v,Az)ī) WSyՔ+NCjZ_Y~^L^WW _+^YW&/cJ5__VM^j,WzL╝WW&O+^v͎;Q}6n\>{_vgKݭ7{jZ?p|6 |nqYn~0QζDю`H:tСC:tСC:tСC:tСC:tСC:tСC:tСC:tСC:tСC:tСC:tСC:tСCG:*DQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQQrCEQEQEQEQEQEQEQE3((((((((fmEQEQEQEQEQEQEQEQrBEQEQEQEQEQEQEQE3((((((((fN(((((((("8 QD(e.EQl%zOE1((EQl%B"1S8b!s)fdrOH32NEcǧX\$fS,d.3ŌSX)2bFƩh, L1#T4{|Ebq*=>B"1S8b!s)fdrOH32NEcǧX\$fS,d.3ŌSX)2bFƩh, L1#T4{|Ebq*=>B"1S8b!s)fdrOH32NEcǧX\$fS,d.3ŌSX)2bFƩh, L1#T4{|Ebq*=>B"1S8b!s)fdrOH32NEcǧX\$fS,d.3ŌSX)2bFƩh, L1#T4{|Ebq*=>B"1S8b!s)fdrOH32NEcǧX\$fb(jG|`{`xKa`'H7oGG<6x,8x̑ycg;#}' }"'"}'!}2'#.H)HwE+ҧZZa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0F3QKT.Ώ{밍3>3^ܓW_5rw<Uq*W?~`K:\uh}?PO5Kp~S\ݲɿݯ|ym?WޮϐPm<%'~aAwO s7wxa5|^ć*_zof⎠_mn|{5_ᐟzS>۟S|Uύ𕪭 va~ʡݘt̲WNyvmU?:T.yeXŜO.\:2I=+Xӎԟ~CA6=soNd;Yc?|N>CG]B=3^8ceO)gïj?;T>G+qpAC0?nLNl/=!`Oll>#l{T zoAþ7cW=Y/? 3?=Y١]ЏϠ7l:Gv١ >OM.Ӵ?!vO0n/^|/20zW=/ԛ0!]9YI/ߏwڏ~jC0?nL{08~IkWO\|3O=22?@ڡk#/v~ԛr|aq~`C0?nL~9^N_ϳ}Onvs|u//z9n'|@8oOk/0Nup^rƏyF6WVtߏY苣Wqsnt}G6󤭕.T٭rS:mGGdvk>$w)_:UwH'ϬKsP=׆zoSkʥ%Ի[UrofU>s?Pς;ܶ8 ^qmrY_}~U9>5?'ޔ񈋷?poi.O9Ӵ;x`17\qGjsK=]93>zO7j'vҮԟa=O5wLϩeb9C>ڑK}{h z[?{^ާ@~)#Yp_]6cڅl/g!88kos\lgz˩!n缸g<ậ'n uaM =sv!^s_jC=ߌO)_סn׺iӕ~%+ vcvb{9yUmu.Ϝ|;iN?iG\OP?g$z]\'ٯ7l{m衍C0?nL~߳QP/Shl>#~IwPO+ԏWޔSzÏ?7x#%7^O[u!^s`}yCg90_Od?<7F&Я^N_ϳ}#s]zoG~kz9n6p1AOC;u!֍kg/lg>wO_r+ ǯ3bvb{?!  vf=~==?ӯi_<9~O~y_Q a>wuOwǼ_4V?&cOq}`A=X/S쏮pF}`G^77Is|`Ws|\r6~z=&.Ǘd{8l-|;Q;iWvyӎԟ'h70N:|_B=3;hG#r{kЏ_7t3}m}Ua>q>>s]?7Ġۡb` '? ~ٯPoW]Lz~ǥ/5!{ڑ׹O}(r>#|הFiX/ܮ>s4ܧݿ1ؓ~ q>7c rsn|{z_\}l' P#s=v$zo7?z9nA='53r6_wj. rh?bv a}muo' Pge?9f=ޮ U*%f=Å ߣ:ɿc?d-wvzx>;}"Xgg6uwkz9n~ͯkúB~.c?!g7ܐB|~%6L?\؉96[k3揻ZA=X/˅! mvuߟ\ۿ5vV'a|q_ʹ7su'p ̍/9g識^.G| z{;KoQ]x}`ڵ}ei:K&5&>k&QsЯ9~籸L;:egzmM[>;~K?_ ~_LuB=^\9w\{?PC(Ws` ~nLN>sѯs?ܮ@>Sy?"M,q?ܻe~~>%ztԣOk<O a?uZ|9{kyw|>}߅a>>󿋳ا|z9n+2]Q ~A1]G0vҮOi8ԟ|ˌ x ko|}v~)Oۂomvan JC\o^ ~vz׹/>#||߳xϴb0>gh/_ӿ 0ݎ%>߽1nU/gwZ_|;~[vS~ujg:p >F3s]?d;}=ᾍN?c=9nq?k;3r6_Ysp_n|Y8o1Ά܇a?n߇'v?0O;W}F6^P!nEطD/l=S+ ]vمÍ vb{wD8)<i?Ё!1U? ٸ}ٮ}at;SU.y[yϯ\?q^s]MܟvO린^юĬο¸x{8{g3{Mo(\gh|lǡ/??F~ܩڋ|~~{Xqմ/{ ǿ-pc}`2G?}3=n=¿#fЛrߗ`څ)vcv*{|q2?~޹??w+$Fgli7#Grߥg!.YK>rW׫o0Uov ~O+C0?nLNl/0~I0A;2n]m4ϻ-/!yiC=C;h7~N9 Sƴ; ~?jF5?jx ;8}UJOSTʧvaWؾ_{;_c#7|rOU.<~ύB=s?gʿ=ĤFzo,86跴gl>G?r+yvy!?˟suZXW~v.O9Ӵ|pչƲg+Wn:W^9c_>i>VvI+Xӎԟj7@0al#/KH#6R_<{з {ho v'ޔS=7rt|C0?nLNl/=!`O,G_;;?:vߑ7a m&֝`OqǓ.]wWГԏx_zSΆ׿_u*׽[%ȡ]ߏ/i'0_xs?Nzr|$w] /\܁4au]bڅ)vcگ{ybgW޽'f;jw/r[߭wqݮnzwJ޽'؍eC-wYK>s~Ynu=߷\zRѤFߗ0cϓXk9"!C;yϓ9{7ynU8> qw] }dA,gIƉ~ RKv}e/iWK6[k9> v󌏛q]g=>v4!록 ߛL _/ƧǢ]p?įCJ~/cdMQߏ?J]K}>6%>oi?\wv}v_Gs>Aƴn>f}-a~vҮ~C#uۯ:%g'NJp!록_oҎO)K=.;`=[|/ؑ~|o]?'eg?d5؟X.<wz <ܗv Ym:SXP/ {}$} )7*|Fiډ8v=ov=oo-ۛguqH3S]Ew%N?>_7'?S-7Z;+C=ޮ ׋Ǫ v~<~̦hpKX/ Ͼ eg?d3a9Ca_ofo߬{;3N۵r}o7vEط~FBNmt؇^ῂ]߯Ϙ}ͻq{k"K/us]Ao?~?~`_M/#{=RƫfW[Lv:!^ūʄxU2^U&īʔjvujx5ū~W =HAEm7*vo&\>{_vgKݭ7m/=_fA{6}nqYn;\^ݮ{ջg<ѻguVG L)D rstanarm/vignettes/mrp-files/plot_data.rda0000644000176200001440000000115713540753420020455 0ustar liggesusers r0b```b`fcd`b2Y# ',/OI,Id`` i9 P<L;73Ǽ$^u" M8_o.G6@ԙ1@H:*_/{1h ]~i2]vhɀS579=p!}0?_pwuO6?X|Տ,vi1vyA#P{P>йBڟ9s_ecc9[h|=l?OIvدc`*elL[v_@МYj3wΜqsv<.N?̼`69 i-/t@'}R?{_KK;fCac#n' 78Q@03bCGLx0u( -',5BEs rR<<4}90mpKA (7F&"#ZYRIKfdKϥ aT܁ ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) knitr::opts_chunk$set(collapse = TRUE) knitr::opts_chunk$set(eval = identical(Sys.getenv("NOT_CRAN"), "true")) library(rstanarm) ``` ## Abstract This case study shows how basic A/B testing using Stan and Bayesian methods can be used to facilitate business decisions. In practice, we find this approach useful given its ability to quantify domain-specific business knowledge and hypotheses through the use of prior distributions. Instead of using p-values and confidence intervals, we are able to perform inference with _probability_ intervals estimated from posterior predictions. In addition to the results being highly interpretable, this approach allows us to quantify business risk. ## Introduction A/B testing is an experiment. It is essentially a randomized controlled trial in an industry setting. The test or experiment is conducted on a subset of users in order to determine if a change in service (e.g. user experience) will have a positive impact on the business, before rolling out that change to all the users. (Here we consider _static_ A/B testing where inference is performed after the experiment. For A/B testing where the users are _dynamically_ allocated to the outperforming group during the experiment consider _multi-arm bandits_.) Here are a few stylized scenarios where A/B testing could provide useful insight: * If you change the order in which information is displayed on a service will users spend more time on the service? * If you personalize the cover art for streaming content are users more likely to stream that content? * Are users more responsive to a revised recommendation system? * Is there a noticeable difference between the three point accuracy of two basketball players? * In a drug trial, is there evidence that the treatment is better than the placebo? Typically, A/B testing involves one group of people being served the existing content (control group, group A) while another group is served different content (treatment group, group B) and, through a measurable indicator, the business wants to determine if there is a difference in reaction between the two groups. If we compare the two groups and find that the difference in the indicator is large (relative to our uncertainty) then we can argue that the different content drove that change. Conversely, if the change was minimal then we may be hesitant to conclude that the different content resulted in any change in behavior at all. In that situation perhaps the content needs to be redesigned and retested. Most A/B testing approaches used in practice typically rely on frequentist hypothesis testing methods. Not only are the results of these methods difficult to interpret, but they can also be misleading. Terms such as "p-values" and "confidence intervals" are often misinterpreted as probabilities directly related to the quantity of interest (e.g. the difference in means between two groups). P-values are also often used as cutoffs for business decisions. In other words, reaching a statistically significant result is often sufficient to convince a business to move forward with a particular decision. We argue that these decisions should not be reductively derived from arbitrary cutoffs (e.g. a p-value of less than 0.05). Instead they should be determined by domain-specific experts who understand the industry, with statisticians providing interpretable results that can help these experts make more informed decisions. This case study provides a way for domain-specific experts to apply their knowledge to the statistical inference process of A/B testing through prior distributions. Additionally, the experts can quantify the risk they are willing to take and probabilistically incorporate this into the inference. Some key benefits to the Bayesian approach outlined in this case study include, * Allowing domain-specific experts to apply their knowledge and appetite for risk to statistical inference. * Modeling the data rather than defining/computing a test statistic from the data. (This allows us to perform inference on the (predicted) data instead of the parameters.) * The ability to describe differences in groups probabilistically rather than using traditional hypothesis testing methods. * Quantifying null hypotheses in priors. We use simple examples to show how to apply Bayesian inference to A/B testing using continuous and count data. The examples used here are analogous to the t-test and the Fisher's exact test, but the methodology discussed can be applied to data that follow other distributions. The first section considers continuous data (assumed to be generated from the normal distribution) and the second section considers count data (assumed to be generated from the binomial distribution). (If you need a referesher on how convoluted hypothesis testing is, Appendix A goes over the interpretation of p-values using a two-sample t-test as an example.) At a high-level, we stress that frequentist methods focus on the distribution of the test statistic as opposed to the quantity of interest (i.e. predictions or parameters). In such methods inference is done by understanding how the observed test statistic compares to the distribution of the test statistic under the null hypothesis. Alternatively, the Bayesian approach proposed here allows the statistician to perform inference directly on the quantity of interest (in this case predicted data), which is more transparent and informative in the context of A/B testing. ## Continuous Data _This example is analogous to the two sample t-test (specifically Welch's t-test) where the statistician is interested in testing if there is a noticeable difference between the means of two different samples._ Suppose an online streaming company is interested in testing whether ads affect the consumption of their service. The hypothesis is that reducing ads will increase hourly streaming consumption. Since this decision can be costly if a significant amount of revenue is derived from ads, it would be useful to conduct a test to evaluate the impact of ad reduction. One way to test this is to draw two random samples from the user base, serve them with different levels of ad content, and see if there is a substantial difference in streaming consumption (say hours per day). Suppose we treat the two groups in the following way, * Group A (control): streaming service contains ads. * Group B (treatment): streaming service contains no ads. The data collected might look something like the following below. Each observation is a user's average daily streaming consumption in hours. Suppose we also have an additional (binary) variable `hc` which defines whether a user is predisposed to being a _high consumer_ of streaming content (a value of 1 represents a high consumer and a value of -1 represents a low consumer). ```{r} set.seed(123) group <- c(rep(1,10), rep(2,12)) group <- factor(c(rep("A",10), rep("B",12))) N <- length(group) hc <- sample(c(-1,1), N, replace = TRUE) effect <- c(3,5) lp <- effect[group] + 0.7*hc y <- rnorm(N, lp, 0.5) experiment <- data.frame(y = y, group = factor(group), hc = hc) experiment ``` In order to determine if there is a difference between the groups we need to define a model that predicts the outcome for each group. The data has been generated from the normal distribution so it is appropriate to specify a normal likelihood. (Often we do not know how the data is generated and have to make an assumption about which distribution should be used to model the likelihood.) Since we are _modeling_ the outcome we can include other variables, such as the high consumer indicator `hc`. Traditional hypothesis testing methods are focused on comparing the outcome of two groups. Here we model the outcome before comparing the groups. This allows us to include additional information in the model which will enable us to perform more granular inferences. Next we need to specify prior distributions on each of these parameters. This is where the domain-specific expert can provide valuable input. For example, they may believe that (due to poor sampling) the sampled average of daily streaming hours is too low for each group. In such a situation a prior can be applied to coerce the estimated average closer to the value they feel is more appropriate and representative of the population. Putting these pieces together gives us the model below. $y$ is the outcome (average streaming hours) and $sigma$ is the residual standard deviation (i.e. the standard deviation of $y$ conditional on the parameters and the data). $\mu$ is the parameter associated with the variable $group$ which defines group membership, and $\beta$ is the parameter associated with the the high consumer indicator. One limitation of this approach is that $\sigma$ does not vary among groups. However, in this case it is sufficient to assume that the outcome of both groups has the same standard deviation. (In order to allow the standard deviation to vary among groups the model would have to be fit in [**rstan**](https://mc-stan.org/rstan/), which would require defining the model in a Stan file.) $$ \begin{align*} y_i \sim &\mathcal{N}(\mu_A \cdot groupA_i + \mu_B \cdot groupB_i + \beta \cdot high\_consumer_i, \sigma) \\ \mu_A \sim& \mathcal{N}(3,1) \\ \mu_B \sim& \mathcal{N}(3,1) \\ \beta \sim& \mathcal{N}(0,1) \\ & \mbox{(default prior specified on } \sigma \mbox{)} \end{align*} $$ With regard to priors, we have applied $\mathcal{N}(3,1)$ distributions on both group effects. The reasoning behind this is twofold: 1. Based on prior knowledge (past data and/or domain specific experts) we believe that users spend around three hours per day on the service (regardless of what our random sample says). 2. We allow the hyperparameters for both group groups to be identical to quantify our belief that group B (which received the treatment) is not substantially different from group A. This can be interpreted as incorporating the belief underlying our null hypothesis into the prior. More importantly, this approach allows us to be more conservative when we do our inference. If we end up concluding that the two groups are different, we can say that the difference in behavior was so strong that it overcame our prior belief that the two groups are identical. Now that we have established our model, we need to fit the model to the data so that we can estimate the parameters. We can do this using the [**rstanarm**](https://mc-stan.org/rstanarm/) package which can fit a Bayesian linear regression model (using the `stan_glm()` function) without an intercept, and with group membership and additional variables as parameters. We fit the model below. ```{r results='hide'} fit <- stan_glm(y ~ 0 + group + hc, data = experiment, family = gaussian(link="identity"), prior = normal(c(3,3,0), 1), seed = 123) ``` Recall that Stan uses a sampling algorithm to estimate the joint posterior distribution of the parameters which means that we have samples instead of point estimates for the parameter values. The medians for each parameter are provided below. ```{r} c(coef(fit), sigma = sigma(fit)) ``` With these estimates it looks like Group A had an average consumption of about 3 hours while Group B had an average consumption of about 5 hours. This gives us a difference in consumption of approximately 2 hours. Unfortunately, this assessment does not say anything about how uncertain this difference is. We would like to be able to say something like "we are $p\%$ sure that the two groups are different enough". We can quantify the uncertainty of how different the two estimates are by computing sample quantiles on the posterior predictive distribution. This is often referred to as a credible interval, although the preferred term is _predictive interval_ when describing predictions (and _posterior interval_ when describing parameters). If we compute the $90\%$ predictive interval then we can say that $90\%$ of the posterior predictions for that group lie between that interval. In order for us to evaluate whether the two groups are different enough we can compute the [overlap coefficient](https://en.wikipedia.org/wiki/Overlap_coefficient), which describes the overlap of the prediction intervals for each group as a proportion. For example, suppose there is a $15\%$ overlap between the $90\%$ prediction intervals in each of the two groups. This allows us to say, given that we are $90\%$ certain about where the predictions lie, there's a $15\%$ chance that the two groups are similar. The functions below compute the proportion of overlap between the two groups. ```{r} #' Quantify Overlapping Proportion #' Compute how much of the smaller distribution overlaps with the larger (i.e. wider) distribution. #' @param large Posterior predictive samples that have larger range than \code{small}. #' @param small Posterior predictive samples that have smaller range than \code{large}. #' @param p Probability to compute prediction interval. #' @return A proportion between 0 and 1 indicating how much of \code{small} is contained in \code{large} given the credible interval specification. overlap_prop <- function(large, small, p = 1) { p_lwr <- (1-p)/2 p_upr <- 1 - p_lwr large_ci <- quantile(large, probs = c(p_lwr, p_upr)) left <- min(large_ci) right <- max(large_ci) indxs <- which(small >= left & small <= right) return(length(indxs)/length(small)) } #' Quantify Overlapping Posterior Predictive Distributions #' Quantify the overlap between posterior samples from two distributions. #' @param a Group A posterior predictive samples. #' @param b Group B posterior predictive samples. #' @param p Probability to compute credible interval. #' @return A proportion between 0 and 1 indicating how much of the credible intervals for \code{a} and \code{b} overlap with one another. overlap <- function(a, b, p = 1) { length_a <- dist(range(a)) length_b <- dist(range(b)) if (length_a >= length_b) { out <- overlap_prop(a, b, p) } else if (length_a < length_b) { out <- overlap_prop(b, a, p) } return(out) } ``` Below we compute the $0.9$ prediction interval for both groups. Note that the prediction interval choice is arbitrary, and may vary depending on the applied context and the appetite for uncertainty. This is also where we recommend getting input from domain-specific experts. In this case we are willing to accept a $10\%$ chance of being wrong about where the predictions lie. The closer the prediction interval is to $1$ the more risk averse the business is with regards to inference. ```{r fig.align='center', fig.height=8, fig.width=6} pp_a <- posterior_predict(fit, newdata = data.frame(group = factor("A"), hc = experiment$hc)) pp_b <- posterior_predict(fit, newdata = data.frame(group = factor("B"), hc = experiment$hc)) pp_a_quant <- quantile(pp_a, probs = c(0.05,0.95)) pp_b_quant <- quantile(pp_b, probs = c(0.05,0.95)) overlap(pp_a, pp_b, p = 0.9) par(mfrow=c(2,1)) # group A hist(pp_a, breaks = 50, col = '#808080', border = '#FFFFFF', main = "Group A", xlab = "Avg Streaming (hrs)", xlim = c(0,10)) abline(v = pp_a_quant[1], lwd = 2, col = "red") abline(v = pp_a_quant[2], lwd = 2, col = "red") # group B hist(pp_b, breaks = 50, col = '#808080', border = '#FFFFFF', main = "Group B", xlab = "Avg Streaming (hrs)", xlim = c(0,10)) abline(v = pp_b_quant[1], lwd = 2, col = "red") abline(v = pp_b_quant[2], lwd = 2, col = "red") ``` After computing the $90\%$ prediction interval for both groups we find an overlap proportion of approximately $0.25$. Thus, given that we are $90\%$ sure about our posterior predictions for the two groups, we are about $75\%$ sure that the two groups are in fact different. Going back to the business context, we can conclude that we are $75\%$ sure that reducing ads increases daily streaming consumption given our acceptable risk of being $10\%$ wrong about daily streaming consumption. Since we modeled the outcome using a predictor (in addition to group membership variables) we can vary the predictor as well as group membership for an observation for more detailed inference. Below we plot the prediction intervals for each group and high consumer variable combination. This allows to us compare the difference in average streaming hours among the two groups for those individuals that were categorized as high/low consumers. ```{r fig.align='center', fig.height=6, fig.width=10} pp_a0 <- posterior_predict(fit, newdata = data.frame(group = factor("A"), hc = -1)) pp_b0 <- posterior_predict(fit, newdata = data.frame(group = factor("B"), hc = -1)) pp_a1 <- posterior_predict(fit, newdata = data.frame(group = factor("A"), hc = 1)) pp_b1 <- posterior_predict(fit, newdata = data.frame(group = factor("B"), hc = 1)) pp_a0_quant <- quantile(pp_a0, probs = c(0.05,0.95)) pp_b0_quant <- quantile(pp_b0, probs = c(0.05,0.95)) pp_a1_quant <- quantile(pp_a1, probs = c(0.05,0.95)) pp_b1_quant <- quantile(pp_b1, probs = c(0.05,0.95)) par(mfrow=c(2,2)) # group A, x = 0 hist(pp_a0, breaks = 50, col = '#808080', border = '#FFFFFF', main = "Group A (hc=-1)", xlab = "Avg Streaming (hrs)", xlim = c(0,10)) abline(v = pp_a0_quant[1], lwd = 2, col = "red") abline(v = pp_a0_quant[2], lwd = 2, col = "red") # group B, x = 0 hist(pp_b0, breaks = 50, col = '#808080', border = '#FFFFFF', main = "Group B (hc=-1)", xlab = "Avg Streaming (hrs)", xlim = c(0,10)) abline(v = pp_b0_quant[1], lwd = 2, col = "red") abline(v = pp_b0_quant[2], lwd = 2, col = "red") # group A, x = 1 hist(pp_a1, breaks = 50, col = '#808080', border = '#FFFFFF', main = "Group A (hc=1)", xlab = "Avg Streaming (hrs)", xlim = c(0,10)) abline(v = pp_a1_quant[1], lwd = 2, col = "red") abline(v = pp_a1_quant[2], lwd = 2, col = "red") # group B, x = 1 hist(pp_b1, breaks = 50, col = '#808080', border = '#FFFFFF', main = "Group B (hc=1)", xlab = "Avg Streaming (hrs)", xlim = c(0,10)) abline(v = pp_b1_quant[1], lwd = 2, col = "red") abline(v = pp_b1_quant[2], lwd = 2, col = "red") ``` In the plot below we show how the overlap proportion will vary as the prediction interval varies. To put it differently, it shows how the probabilistic difference between groups varies as risk varies. Notice that the more risk we take when defining our prediction interval (i.e. the closer the prediction interval is to 0) the lower the overlap proportion, and consequentially the more apparent the difference between the two groups. ```{r fig.align='center', fig.height=5, fig.width=5} # prediction interval probabilities ci_p <- seq(0.1,1, by = 0.05) # compute proportions overlap_ab <- sapply(ci_p, function(s){overlap(pp_a, pp_b, s)}) # plot plot(ci_p, overlap_ab, type = "o", pch = 20, xaxt = "n", yaxt = "n", main = "Group A vs Group B", xlab = "Prediction Interval Probability (1-Risk)", ylab = "Overlap Proportion (Group Similarity)") axis(1, seq(0,1,by=0.1), cex.axis = 0.8) axis(2, seq(0,1,by=0.1), cex.axis = 0.8) abline(v = 0.5, lty = 2) ``` ## Count Data _This example is analogous to Fisher's exact test where the statistician is interested in testing differences in proportions (particularly in the form of a contingency table)._ Now, suppose that the business wants to know whether a product sells better if there is a change to the online user interface (UI) that users interact with to buy the product. They run an experiment on two groups and obtain the following results, * Group C (control): 10 users out of a sample of 19 purchased the product with the default UI. * Group D (treatment): 14 users out of a sample of 22 purchased the product with the alternative UI. Here we can assume that the data is binomially distributed, in which case we can define the model for the the two groups as follows, $$ y_i \sim \mbox{Bin}(\mbox{logit}^{-1}(\mu_C \cdot groupC_i + \mu_D \cdot groupD_i), N_i)\\ $$ where $\mu$ is the parameter for each group, $group$ is a binary variable indicating group membership, $y$ is the number of users that purchased the product and $N$ is the total number of users in each group. Below we fit this model to the data. ```{r results='hide', message=FALSE, warning=FALSE} experiment_bin <- data.frame(group = factor(c("C","D")), y = c(10,14), trials = c(19,22)) fit_group_bin <- stan_glm(cbind(y, trials - y) ~ 0 + group, data = experiment_bin, family = binomial(link="logit"), seed = 123) ``` Similar to the method described in the previous section we compute and plot the $90\%$ prediction intervals for the posterior predictions in each group. We also compute the overlap proportion of these two sets of predictions. ```{r fig.align='center', fig.height=5, fig.width=10} # pp_c <- posterior_linpred(fit_group_bin, newdata = data.frame(group = factor("C")), transform = TRUE) # pp_d <- posterior_linpred(fit_group_bin, newdata = data.frame(group = factor("D")), transform = TRUE) # below doesn't work as expected (predictions are bigger than the number of trials) # pp_c <- posterior_predict(fit_group_bin, newdata = data.frame(group = factor("C"), trials = 19)) # pp_d <- posterior_predict(fit_group_bin, newdata = data.frame(group = factor("D"), trials = 22)) pp <- posterior_predict(fit_group_bin) pp_c <- pp[,1] pp_d <- pp[,2] pp_c_quant <- quantile(pp_c, probs = c(0.05,0.95)) pp_d_quant <- quantile(pp_d, probs = c(0.05,0.95)) # compute overlap overlap(pp_c, pp_d, p = 0.9) # plot # group C par(mfrow=c(1,2)) hist(pp_c, breaks = 50, col = '#808080', border = '#FFFFFF', main = "Group C", xlab = "Product Consumption", xlim = c(0,25)) abline(v = pp_c_quant[1], lwd = 2, col = "red") abline(v = pp_c_quant[2], lwd = 2, col = "red") # group D hist(pp_d, breaks = 50, col = '#808080', border = '#FFFFFF', main = "Group D", xlab = "Product Consumption", xlim = c(0,25)) abline(v = pp_d_quant[1], lwd = 2, col = "red") abline(v = pp_d_quant[2], lwd = 2, col = "red") ``` Looking at the histograms it's clear that there's quite a bit of overlap between the two groups. The overlap proportion is about 0.7. So under our $90\%$ prediction interval, there is a $70\%$ chance that there is no difference in behavior when the UI changes. This might suggest that we don't have strong evidence that the UI change encouraged a change in behavior. Below we show how the overlap proportion varies based on the amount of risk we're willing to take when we define our prediction intervals. Similar to the continuous example in the previous section, risk is inversely related to group similarity. ```{r fig.align='center', fig.height=5, fig.width=5} # prediction interval probabilities ci_p <- rev(seq(0.1,1, by = 0.05)) # compute proportions overlap_cd <- sapply(ci_p, function(s){overlap(pp_c, pp_d, s)}) # plot plot(ci_p, overlap_cd, type = "o", pch = 20, xaxt = "n", yaxt = "n", main = "Group C vs Group D", xlab = "Prediction Interval Probability (1-Risk)", ylab = "Overlap Proportion (Group Similarity)") axis(1, seq(0,1,by=0.1), cex.axis = 0.8) axis(2, seq(0,1,by=0.1), cex.axis = 0.8) abline(v = 0.5, lty = 2) ``` Note, this example involved a really small data set (only one observation for each group). But the same model can easily be extended to many observations within each group. Also, just as we described in the continuous example, we can define a more comprehensive model for the outcome if we had additional predictors. ## Benefits of Bayesian Methods The key benefits that we have discussed include the ability to probabilistically interpret the results of our inference, and the ability to incorporate prior beliefs (i.e. business knowledge and hypotheses) into our models. **Interpretation of probability** With regards to interpretation, there are some advantages with taking a Bayesian inference approach to A/B testing using Stan: 1. The ability to communicate our results using the intuitive concept of probability. 3. The ability to quantify business risk using probability when doing inference. Quantifying our uncertainty probabilistically enables us to make statements like "based on the data collected, the model specified, and the risk we are willing to take; we are 80% certain that the two groups are different." This is much more interpretable than statements like 'with a p-value of less than 0.2 we can reject the null hypothesis that the two groups are identical'. While this is not exclusively a Bayesian benefit (i.e. we could have completely excluded priors from our models, estimating the parameters solely from the likelihood of the data), we took advantage of the fact that appropriately implemented Bayesian computational methods rely on robust sampling methods. These samples can then be transformed and used to make probabilistic statements about the posterior predictive distribution, and consequentially about the question being asked. **Incorporating prior beliefs** The ability to define a prior distribution on your parameters is a useful feature of Bayesian methods. Prior information can be incorporated in your model with two choices: the type of the distribution and how the distribution is parametrized. The type of distribution relates to which distribution you choose to define on the parameters. In the continuous data example we chose the normal distribution. But, since the underlying data (hours streamed per day) cannot be negative, it might be more sensible to define a truncated normal distribution as the prior (which is straightforward to implement in rstan). This gives us the opportunity to model the data generation process more appropriately. How the prior distribution is parameterized reflects your belief on the value that parameter takes. This gives us the opportunity to quantify business knowledge in prior distributions. In the continuous data example we showed how we parameterized the prior distribution for each group's parameter to capture our prior belief that the two groups are similar. A similar approach can be taken for the treatment group in the count data example. With these types of priors, if we concluded that the two groups are in fact different then we could really be sure that the treatment actually changed the treatment group's behavior. In other words, the treatment group's observed behavior overcame our prior belief. We could also tune this belief to be more or less strong by adjusting where most of the density/mass of the prior distribution sits. Applying this type of prior would help mitigate false-positive conclusions from this type of analysis. ## Conclusion Below is an abstracted summary of the inference process we've gone through to compare groups involved in A/B testing. 1. Model the indicator that is being measured to track the difference between the two groups. 2. Compute the prediction interval $p$ over the posterior predictions of the two groups. $1-p$ quantifies how much risk the business is willing to take in regards to the predicted indicator. The value of $p$ should be driven by domain-specific experts. 3. Compute the proportion $o$ of how much each interval overlaps with one another. $o$ defines the similarity between the two groups. After implementing the steps above, we can construct the following conclusion: given there is a $(1-p) \cdot 100$ percent chance that we are wrong about the predictions from our model, there is a $(1-o) \cdot 100$ percent chance that the two groups are different. The Bayesian methods outlined in this case study focused on modeling the data generation process and performing inference on the posterior predictive distribution of two groups. We did not need to worry about computing test statistics and determining the distribution of these statistics under the null hypothesis. Nor did we need to calculate p-values to figure out whether the groups involved in the A/B test are different. Instead we performed inference directly on the posterior predictions. By constructing prediction intervals and computing the overlap of these intervals we are able to probabilistically convey how sure we are about the difference between the two groups. Bayesian inference gives statisticians the ability to quantify business information/risk and enables them to communicate uncertainty unambiguously to decision makers, allowing more informed decisions to be made. ## Acknowlegements Thanks to Jonah Gabry and Charles Zhou for feedback on initial drafts. ## References Fisher's exact test. Wikipedia. Available from https://en.wikipedia.org/wiki/Fisher%27s_exact_test. Gallo, A. (2017) A Refresher on A/B Testing. _Harvard Business Review_. https://hbr.org/2017/06/a-refresher-on-ab-testing. Goodrich, B., Gabry, J., Ali, I. & Brilleman, S. (2019). rstanarm: Bayesian applied regression modeling via Stan. R package version 2.17.4. https://mc-stan.org/. Krushke, J.K. (2015). _Doing Bayesian Data Analysis - A Tutorial with R, JAGS, and Stan_. Elsevier, New York, 2nd edition. Overlap coefficient. Wikipedia. Available from https://en.wikipedia.org/wiki/Overlap_coefficient Stan Development Team (2019). RStan: the R interface to Stan. R package version 2.19.2. https://mc-stan.org/. Student's t-test. Wikipedia. Available from https://en.wikipedia.org/wiki/Student's_t-test. Welch's t-test. Wikipedia. Available from https://en.wikipedia.org/wiki/Welch%27s_t-test. ## Appendix A: Refresher on p-values Recall that frequentist methods of hypothesis testing involve constructing a test statistic with the available data. Then, using the distribution of that test statistic under the null hypothesis, you can determine the probability of observing statistics that are more extreme than the one calculated. This is known as a p-value. A small p-value suggests a small probability of observing a more extreme test statistic, which in turn means that it is unlikely for that statistic to have been generated under the null hypothesis. Since the statistic is computed from the data this suggests that the data itself is unlikely to have been generated under the null hypothesis. The value of how small a p-value should be to arrive at this conclusion is up to the statistician. As an example consider the data associated with Group A and Group B in the continuous data section. The null hypothesis is whether the two groups have equal means. Below we compute Welch's test statistic and p-value given the data. ```{r} group_a <- experiment$y[experiment$group == "A"] group_b <- experiment$y[experiment$group == "B"] # Relevant dplyr code # group_a <- experiment %>% filter(group == "A") %>% select(y) %>% unlist %>% unname # group_b <- experiment %>% filter(group == "B") %>% select(y) %>% unlist %>% unname t_test <- t.test(x=group_a, y=group_b) t_stat <- abs(t_test$statistic) p_value <- t_test$p.value print(p_value) # You can manually compute the p-value with the following code # p_value <- pt(-t_stat, t_test$parameter)*2 # you can manually compute the confidence intervals with the following code # group_a_mean <- mean(group_a) # group_b_mean <- mean(group_b) # v <- sqrt((var(group_a)/length(group_a)) + (var(group_b)/length(group_b))) # ci_lwr <- (group_a_mean - group_b_new_mean) - abs(qt(0.025, t_test$parameter[['df']])*v) # ci_upr <- (group_a_mean - group_b_new_mean) + abs(qt(0.025, t_test$parameter[['df']])*v) ``` The p-value in this case is really small, approximately zero. We can visualize this result. Since we know that the test statistic is t-distributed we can plot what the distribution of the test statistic under the null, along with the test statistic calculated with the observed data. This is illustrated below. The red lines are the (two-tailed) test statistics calculated from the data. ```{r fig.align='center', fig.height=5, fig.width=5} dof <- t_test$parameter[["df"]] x <- seq(-10,10,length.out = 1e3) plot(x, dt(x, dof), type = "l", main = "Distribution of Test Statistics Under Null Hypothesis", xlab = "t-statistic value", ylab = "t-distribution density") abline(v=-t_stat, col="red", lwd=2) abline(v=t_stat, col="red", lwd=2) ``` Given the small p-value we can make the following sequence of conclusions: 1. The computed test statistic is unlikely to occur under the null hypothesis. 2. The data used to compute this statistic is unlikely to have been generated under the null hypothesis. 3. Therefore the null hypothesis must be invalid and can be rejected, allowing us to conclude that the two groups are different. Notice how far removed we are from the data and the observed data generation process. Once we calculate the test statistic we step away from the distribution of the data itself and start dealing with the distribution of the test statistic under the null. We were also unable to encode any prior belief or business knowledge into our inference. ## Appendix B: Hierarchical Example Here we show how to use hierarchical (or multilevel) models as an alternative modeling approach when performing A/B tests. Using the data in the continuous example we want to build a model where we account for group-level intercepts while allowing information to be shared among the groups. $$ \begin{align*} y_{i=A} \sim &\mathcal{N}(\mu_A + \beta \cdot high\_consumer_{i=A}, \sigma) \\ y_{i=B} \sim &\mathcal{N}(\mu_B + \beta \cdot high\_consumer_{i=B}, \sigma) \\ \beta \sim& \mathcal{N}(0,1) \\ & \mbox{(default priors specified on covariance matrix and } \sigma \mbox{)} \end{align*} $$ Below we fit the model. ```{r results='hide'} fit_hier <- stan_glmer(y ~ 0 + (1 | group) + hc, prior = normal(0, 1), data = experiment, family = gaussian(link="identity"), seed = 123) ``` ```{r} coef(fit_hier) fixef(fit_hier) ranef(fit_hier) ``` With this modeling approach we can perform the same inferences as we have shown above while accounting for the hierarchical nature of the data. rstanarm/vignettes/count.Rmd0000644000176200001440000002477014214422264015720 0ustar liggesusers--- title: "Estimating Generalized Linear Models for Count Data with rstanarm" author: "Jonah Gabry and Ben Goodrich" date: "`r Sys.Date()`" output: html_vignette: toc: yes --- ```{r, child="children/SETTINGS-knitr.txt"} ``` ```{r, child="children/SETTINGS-gg.txt"} ``` # Introduction This vignette explains how to estimate generalized linear models (GLMs) for count data using the `stan_glm` function in the __rstanarm__ package. ```{r, child="children/four_steps.txt"} ``` Steps 3 and 4 are covered in more depth by the vignette entitled ["How to Use the __rstanarm__ Package"](rstanarm.html). This vignette focuses on Step 1 for Poisson and negative binomial regression models using the `stan_glm` function. # Likelihood If the outcome for a single observation $y$ is assumed to follow a Poisson distribution, the likelihood for one observation can be written as a conditionally Poisson PMF $$\tfrac{1}{y!} \lambda^y e^{-\lambda},$$ where $\lambda = E(y | \mathbf{x}) = g^{-1}(\eta)$ and $\eta = \alpha + \mathbf{x}^\top \boldsymbol{\beta}$ is a linear predictor. For the Poisson distribution it is also true that $\lambda = Var(y | \mathbf{x})$, i.e. the mean and variance are both $\lambda$. Later in this vignette we also show how to estimate a negative binomial regression, which relaxes this assumption of equal conditional mean and variance of $y$. Because the rate parameter $\lambda$ must be positive, for a Poisson GLM the _link_ function $g$ maps between the positive real numbers $\mathbb{R}^+$ (the support of $\lambda$) and the set of all real numbers $\mathbb{R}$. When applied to a linear predictor $\eta$ with values in $\mathbb{R}$, the inverse link function $g^{-1}(\eta)$ therefore returns a positive real number. Although other link functions are possible, the canonical link function for a Poisson GLM is the log link $g(x) = \ln{(x)}$. With the log link, the inverse link function is simply the exponential function and the likelihood for a single observation becomes $$\frac{g^{-1}(\eta)^y}{y!} e^{-g^{-1}(\eta)} = \frac{e^{\eta y}}{y!} e^{-e^\eta}.$$ # Priors ```{r, child="children/stan_glm_priors.txt"} ``` # Posterior With independent prior distributions, the joint posterior distribution for $\alpha$ and $\boldsymbol{\beta}$ in the Poisson model is proportional to the product of the priors and the $N$ likelihood contributions: $$f\left(\alpha,\boldsymbol{\beta} | \mathbf{y},\mathbf{X}\right) \propto f\left(\alpha\right) \times \prod_{k=1}^K f\left(\beta_k\right) \times \prod_{i=1}^N { \frac{g^{-1}(\eta_i)^{y_i}}{y_i!} e^{-g^{-1}(\eta_i)}}.$$ This is posterior distribution that `stan_glm` will draw from when using MCMC. # Poisson and Negative Binomial Regression Example This example comes from Chapter 8.3 of [Gelman and Hill (2007)](http://www.stat.columbia.edu/~gelman/arm/). We want to make inferences about the efficacy of a certain pest management system at reducing the number of roaches in urban apartments. Here is how Gelman and Hill describe the experiment (pg. 161): > [...] the treatment and control were applied to 160 and 104 apartments, respectively, and the outcome measurement $y_i$ in each apartment $i$ was the number of roaches caught in a set of traps. Different apartments had traps for different numbers of days [...] In addition to an intercept, the regression predictors for the model are the pre-treatment number of roaches `roach1`, the treatment indicator `treatment`, and a variable indicating whether the apartment is in a building restricted to elderly residents `senior`. Because the number of days for which the roach traps were used is not the same for all apartments in the sample, we include it as an exposure, which slightly changes the model described in the __Likelihood__ section above in that the rate parameter $\lambda_i = exp(\eta_i)$ is multiplied by the exposure $u_i$ giving us $y_i \sim Poisson(u_i \lambda_i)$. This is equivalent to adding $\ln{(u_i)}$ to the linear predictor $\eta_i$ and it can be specified using the `offset` argument to `stan_glm`. ```{r, count-roaches-mcmc, results="hide"} library(rstanarm) data(roaches) # Rescale roaches$roach1 <- roaches$roach1 / 100 # Estimate original model glm1 <- glm(y ~ roach1 + treatment + senior, offset = log(exposure2), data = roaches, family = poisson) # Estimate Bayesian version with stan_glm stan_glm1 <- stan_glm(y ~ roach1 + treatment + senior, offset = log(exposure2), data = roaches, family = poisson, prior = normal(0, 2.5), prior_intercept = normal(0, 5), seed = 12345) ``` The `formula`, `data`, `family`, and `offset` arguments to `stan_glm` can be specified in exactly the same way as for `glm`. The `poisson` family function defaults to using the log link, but to write code readable to someone not familiar with the defaults we should be explicit and use `family = poisson(link = "log")`. We've also specified some optional arguments. The `chains` argument controls how many Markov chains are executed, the `cores` argument controls the number of cores utilized by the computer when fitting the model. We also provided a seed so that we have the option to deterministically reproduce these results at any time. The `stan_glm` function has many other optional arguments that allow for more user control over the way estimation is performed. The documentation for `stan_glm` has more information about these controls as well as other topics related to GLM estimation. Here are the point estimates and uncertainties from the `glm` fit and `stan_glm` fit, which we see are nearly identical: ```{r, count-roaches-comparison} round(rbind(glm = coef(glm1), stan_glm = coef(stan_glm1)), digits = 2) round(rbind(glm = summary(glm1)$coefficients[, "Std. Error"], stan_glm = se(stan_glm1)), digits = 3) ``` (Note: the dataset we have is slightly different from the one used in Gelman and Hill (2007), which leads to slightly different parameter estimates than those shown in the book even when copying the `glm` call verbatim. Also, we have rescaled the `roach1` predictor. For the purposes of this example, the actual estimates are less important than the process.) Gelman and Hill next show how to compare the observed data to replicated datasets from the model to check the quality of the fit. Here we don't show the original code used by Gelman and Hill because it's many lines, requiring several loops and some care to get the matrix multiplications right (see pg. 161-162). On the other hand, the __rstanarm__ package makes this easy. We can generate replicated datasets with a single line of code using the `posterior_predict` function: ```{r, count-roaches-posterior_predict} yrep <- posterior_predict(stan_glm1) ``` By default `posterior_predict` will generate a dataset for each set of parameter draws from the posterior distribution. That is, `yrep` will be an $S \times N$ matrix, where $S$ is the size of the posterior sample and $N$ is the number of data points. Each row of `yrep` represents a full dataset generated from the posterior predictive distribution. For more about the importance of the `posterior_predict` function, see the ["How to Use the __rstanarm__ Package"](rstanarm.html) vignette. Gelman and Hill take the simulated datasets and for each of them compute the proportion of zeros and compare to the observed proportion in the original data. We can do this easily using the `pp_check` function, which generates graphical comparisons of the data `y` and replicated datasets `yrep`. ```{r, count-roaches-plot-pp_check1} prop_zero <- function(y) mean(y == 0) (prop_zero_test1 <- pp_check(stan_glm1, plotfun = "stat", stat = "prop_zero", binwidth = .005)) ``` The value of the test statistic (in this case the proportion of zeros) computed from the sample `y` is the dark blue vertical line. More than 30% of these observations are zeros, whereas the replicated datasets all contain less than 1% zeros (light blue histogram). This is a sign that we should consider a model that more accurately accounts for the large proportion of zeros in the data. Gelman and Hill show how we can do this using an overdispersed Poisson regression. To illustrate the use of a different `stan_glm` model, here we will instead try [negative binomial](https://en.wikipedia.org/wiki/Negative_binomial_distribution) regression, which is also used for overdispersed or zero-inflated count data. The negative binomial distribution allows the (conditional) mean and variance of $y$ to differ unlike the Poisson distribution. To fit the negative binomial model can either use the `stan_glm.nb` function or, equivalently, change the `family` we specify in the call to `stan_glm` to `neg_binomial_2` instead of `poisson`. To do the latter we can just use `update`: ```{r, count-roaches-negbin, results="hide"} stan_glm2 <- update(stan_glm1, family = neg_binomial_2) ``` We now use `pp_check` again, this time to check the proportion of zeros in the replicated datasets under the negative binomial model: ```{r, count-roaches-plot-pp_check2, fig.width=7, out.width="80%"} prop_zero_test2 <- pp_check(stan_glm2, plotfun = "stat", stat = "prop_zero", binwidth = 0.01) # Show graphs for Poisson and negative binomial side by side bayesplot_grid(prop_zero_test1 + ggtitle("Poisson"), prop_zero_test2 + ggtitle("Negative Binomial"), grid_args = list(ncol = 2)) ``` This is a much better fit, as the proportion of zeros in the data falls nicely near the center of the distribution of the proportion of zeros among the replicated datasets. The observed proportion of zeros is quite plausible under this model. We could have also made these plots manually without using the `pp_check` function because we have the `yrep` datasets created by `posterior_predict`. The `pp_check` function takes care of this for us, but `yrep` can be used directly to carry out other posterior predictive checks that aren't automated by `pp_check`. When we comparing the models using the __loo__ package we also see a clear preference for the negative binomial model ```{r, count-roaches-loo} loo1 <- loo(stan_glm1, cores = 2) loo2 <- loo(stan_glm2, cores = 2) loo_compare(loo1, loo2) ``` which is not surprising given the better fit we've already observed from the posterior predictive checks. # References Gelman, A. and Hill, J. (2007). _Data Analysis Using Regression and Multilevel/Hierarchical Models._ Cambridge University Press, Cambridge, UK. rstanarm/vignettes/aov.Rmd0000644000176200001440000001662713722762571015372 0ustar liggesusers--- title: "Estimating ANOVA Models with rstanarm" author: "Jonah Gabry and Ben Goodrich" date: "`r Sys.Date()`" output: html_vignette: toc: yes --- ```{r, child="children/SETTINGS-knitr.txt"} ``` ```{r, child="children/SETTINGS-gg.txt"} ``` # Introduction This vignette explains how to estimate ANalysis Of VAriance (ANOVA) models using the `stan_aov` function in the __rstanarm__ package ```{r, child="children/four_steps.txt"} ``` Steps 3 and 4 are covered in more depth by the vignette entitled ["How to Use the __rstanarm__ Package"](rstanarm.html). This vignette focuses on Step 1 when the likelihood is the product of independent normal distributions. We also demonstrate that Step 2 is not entirely automatic because it is sometimes necessary to specify some additional tuning parameters in order to obtain optimally efficient results. # Likelihood The likelihood for one observation under a linear model can be written as a conditionally normal PDF $$\frac{1}{\sigma_{\epsilon} \sqrt{2 \pi}} e^{-\frac{1}{2} \left(\frac{y - \mu}{\sigma_{\epsilon}}\right)^2},$$ where $\mu = \alpha + \mathbf{x}^\top \boldsymbol{\beta}$ is a linear predictor and $\sigma_{\epsilon}$ is the standard deviation of the error in predicting the outcome, $y$. The likelihood of the entire sample is the product of $N$ individual likelihood contributions. An ANOVA model can be considered a special case of the above linear regression model where each of the $K$ predictors in $\mathbf{x}$ is a dummy variable indicating membership in a group. An equivalent linear predictor can be written as $\mu_j = \alpha + \alpha_j$, which expresses the conditional expectation of the outcome in the $j$-th group as the sum of a common mean, $\alpha$, and a group-specific deviation from the common mean, $\alpha_j$. # Priors If we view the ANOVA model as a special case of a linear regression model with only dummy variables as predictors, then the model could be estimated using the prior specification in the `stan_lm` function. In fact, this is exactly how the `stan_aov` function is coded. These functions require the user to specify a value for the prior location (by default the mode) of the $R^2$, the proportion of variance in the outcome attributable to the predictors under a linear model. This prior specification is appealing in an ANOVA context because of the fundamental identity $$SS_{\mbox{total}} = SS_{\mbox{model}} + SS_{\mbox{error}},$$ where $SS$ stands for sum-of-squares. If we normalize this identity, we obtain the tautology $1 = R^2 + \left(1 - R^2\right)$ but it is reasonable to expect a researcher to have a plausible guess for $R^2$ before conducting an ANOVA. See the [vignette](lm.html) for the `stan_lm` function (regularized linear models) for more information on this approach. If we view the ANOVA model as a difference of means, then the model could be estimated using the prior specification in the `stan_lmer` function. In the syntax popularized by the __lme4__ package, `y ~ 1 + (1|group)` represents a likelihood where $\mu_j = \alpha + \alpha_j$ and $\alpha_j$ is normally distributed across the $J$ groups with mean zero and some unknown standard deviation. The `stan_lmer` function specifies that this standard deviation has a Gamma prior with, by default, both its shape and scale parameters equal to $1$, which is just an standard exponential distribution. However, the shape and scale parameters can be specified as other positive values. This approach also requires specifying a prior distribution on the standard deviation of the errors that is independent of the prior distribution for each $\alpha_j$. See the [vignette](glmer.html) for the `stan_glmer` function (__lme4__-style models using __rstanarm__) for more information on this approach. # Example We will utilize an example from the __HSAUR3__ package by Brian S. Everitt and Torsten Hothorn, which is used in their 2014 book _A Handbook of Statistical Analyses Using R (3rd Edition)_ (Chapman & Hall / CRC). This book is frequentist in nature and we will show how to obtain the corresponding Bayesian results. The model in section 4.3.1 analyzes an experiment where rats were subjected to different diets in order to see how much weight they gained. The experimental factors were whether their diet had low or high protein and whether the protein was derived from beef or cereal. Before seeing the data, one might expect that a moderate proportion of the variance in weight gain might be attributed to protein (source) in the diet. The frequentist ANOVA estimates can be obtained: ```{r aov-weightgain-aov} data("weightgain", package = "HSAUR3") coef(aov(weightgain ~ source * type, data = weightgain)) ``` To obtain Bayesian estimates we can prepend `stan_` to `aov` and specify the prior location of the $R^2$ as well as optionally the number of cores that the computer is allowed to utilize: ```{r aov-weightgain-mcmc, results="hide"} library(rstanarm) post1 <- stan_aov(weightgain ~ source * type, data = weightgain, prior = R2(location = 0.5), adapt_delta = 0.999, seed = 12345) post1 ``` ```{r, echo=FALSE} print(post1) ``` Here we have specified `adapt_delta = 0.999` to decrease the stepsize and largely prevent divergent transitions. See the Troubleshooting section in the main rstanarm [vignette](rstanarm.html) for more details about `adapt_delta`. Also, our prior guess that $R^2 = 0.5$ was overly optimistic. However, the frequentist estimates presumably overfit the data even more. Alternatively, we could prepend `stan_` to `lmer` and specify the corresponding priors ```{r, aov-weightgain-stan_lmer, eval=FALSE} post2 <- stan_lmer(weightgain ~ 1 + (1|source) + (1|type) + (1|source:type), data = weightgain, prior_intercept = cauchy(), prior_covariance = decov(shape = 2, scale = 2), adapt_delta = 0.999, seed = 12345) ``` Comparing these two models using the `loo` function in the __loo__ package reveals a negligible preference for the first approach that is almost entirely due to its having a smaller number of effective parameters as a result of the more regularizing priors. However, the difference is so small that it may seem advantageous to present the second results which are more in line with a mainstream Bayesian approach to an ANOVA model. # Conclusion This vignette has compared and contrasted two approaches to estimating an ANOVA model with Bayesian techniques using the __rstanarm__ package. They both have the same likelihood, so the (small in this case) differences in the results are attributable to differences in the priors. The `stan_aov` approach just calls `stan_lm` and thus only requires a prior location on the $R^2$ of the linear model. This seems rather easy to do in the context of an ANOVA decomposition of the total sum-of-squares in the outcome into model sum-of-squares and residual sum-of-squares. The `stan_lmer` approach just calls `stan_glm` but specifies a normal prior with mean zero for the deviations from $\alpha$ across groups. This is more in line with what most Bayesians would do naturally --- particularly if the factors were considered "random" --- but also requires a prior for $\alpha$, $\sigma$, and the standard deviation of the normal prior on the group-level intercepts. The `stan_lmer` approach is very flexible and might be more appropriate for more complicated experimental designs. rstanarm/vignettes/lm.Rmd0000644000176200001440000004417513722762571015214 0ustar liggesusers--- title: "Estimating Regularized Linear Models with rstanarm" author: "Jonah Gabry and Ben Goodrich" date: "`r Sys.Date()`" output: html_vignette: toc: yes --- ```{r, child="children/SETTINGS-knitr.txt"} ``` ```{r, child="children/SETTINGS-gg.txt"} ``` # Introduction This vignette explains how to estimate linear models using the `stan_lm` function in the __rstanarm__ package. ```{r, child = "children/four_steps.txt"} ``` Steps 3 and 4 are covered in more depth by the vignette entitled ["How to Use the __rstanarm__ Package"](rstanarm.html). This vignette focuses on Step 1 when the likelihood is the product of independent normal distributions. The goal of the __rstanarm__ package is to make Bayesian estimation of common regression models routine. That goal can be partially accomplished by providing interfaces that are similar to the popular formula-based interfaces to frequentist estimators of those regression models. But fully accomplishing that goal sometimes entails utilizing priors that applied researchers are unaware that they prefer. These priors are intended to work well for any data that a user might pass to the interface that was generated according to the assumptions of the likelihood function. It is important to distinguish between priors that are easy for applied researchers to _specify_ and priors that are easy for applied researchers to _conceptualize_. The prior described below emphasizes the former but we outline its derivation so that applied researchers may feel more comfortable utilizing it. # Likelihood The likelihood for one observation under a linear model can be written as a conditionally normal PDF $$\frac{1}{\sigma_{\epsilon} \sqrt{2 \pi}} e^{-\frac{1}{2} \left(\frac{y - \mu}{\sigma_{\epsilon}}\right)^2},$$ where $\mu = \alpha + \mathbf{x}^\top \boldsymbol{\beta}$ is a linear predictor and $\sigma_{\epsilon}$ is the standard deviation of the error in predicting the outcome, $y$. The likelihood of the entire sample is the product of $N$ individual likelihood contributions. It is well-known that the likelihood of the sample is maximized when the sum-of-squared residuals is minimized, which occurs when $$ \widehat{\boldsymbol{\beta}} = \left(\mathbf{X}^\top \mathbf{X}\right)^{-1} \mathbf{X}^\top \mathbf{y}, $$ $$ \widehat{\alpha} = \overline{y} - \overline{\mathbf{x}}^\top \widehat{\boldsymbol{\beta}}, $$ $$ \widehat{\sigma}_{\epsilon}^2 = \frac{\left(\mathbf{y} - \widehat{\alpha} - \mathbf{X} \widehat{ \boldsymbol{\beta}}\right)^\top \left(\mathbf{y} - \widehat{\alpha} - \mathbf{X} \widehat{ \boldsymbol{\beta}}\right)}{N},$$ where $\overline{\mathbf{x}}$ is a vector that contains the sample means of the $K$ predictors, $\mathbf{X}$ is a $N \times K$ matrix of _centered_ predictors, $\mathbf{y}$ is a $N$-vector of outcomes and $\overline{y}$ is the sample mean of the outcome. # QR Decomposition The `lm` function in R actually performs a QR decomposition of the design matrix, $\mathbf{X} = \mathbf{Q}\mathbf{R}$, where $\mathbf{Q}^\top \mathbf{Q} = \mathbf{I}$ and $\mathbf{R}$ is upper triangular. Thus, the OLS solution for the coefficients can be written as $\left(\mathbf{X}^\top \mathbf{X}\right)^{-1} \mathbf{X}^\top \mathbf{y} = \mathbf{R}^{-1} \mathbf{Q}^\top \mathbf{y}$. The `lm` function utilizes the QR decomposition for numeric stability reasons, but the QR decomposition is also useful for thinking about priors in a Bayesian version of the linear model. In addition, writing the likelihood in terms of $\mathbf{Q}$ allows it to be evaluated in a very efficient manner in Stan. # Priors The key innovation in the `stan_lm` function in the __rstanarm__ package is the prior for the parameters in the QR-reparameterized model. To understand this prior, think about the equations that characterize the maximum likelihood solutions before observing the data on $\mathbf{X}$ and especially $\mathbf{y}$. What would the prior distribution of $\boldsymbol{\theta} = \mathbf{Q}^\top \mathbf{y}$ be? We can write its $k$-th element as $\theta_k = \rho_k \sigma_Y \sqrt{N - 1}$ where $\rho_k$ is the correlation between the $k$th column of $\mathbf{Q}$ and the outcome, $\sigma_Y$ is the standard deviation of the outcome, and $\frac{1}{\sqrt{N-1}}$ is the standard deviation of the $k$ column of $\mathbf{Q}$. Then let $\boldsymbol{\rho} = \sqrt{R^2}\mathbf{u}$ where $\mathbf{u}$ is a unit vector that is uniformly distributed on the surface of a hypersphere. Consequently, $R^2 = \boldsymbol{\rho}^\top \boldsymbol{\rho}$ is the familiar coefficient of determination for the linear model. An uninformative prior on $R^2$ would be standard uniform, which is a special case of a Beta distribution with both shape parameters equal to $1$. A non-uniform prior on $R^2$ is somewhat analogous to ridge regression, which is popular in data mining and produces better out-of-sample predictions than least squares because it penalizes $\boldsymbol{\beta}^\top \boldsymbol{\beta}$, usually after standardizing the predictors. An informative prior on $R^2$ effectively penalizes $\boldsymbol{\rho}\top \boldsymbol{\rho}$, which encourages $\boldsymbol{\beta} = \mathbf{R}^{-1} \boldsymbol{\theta}$ to be closer to the origin. Lewandowski, Kurowicka, and Joe (2009) derives a distribution for a correlation matrix that depends on a single shape parameter $\eta > 0$, which implies the variance of one variable given the remaining $K$ variables is $\mathrm{Beta}\left(\eta,\frac{K}{2}\right)$. Thus, the $R^2$ is distributed $\mathrm{Beta}\left(\frac{K}{2},\eta\right)$ and any prior information about the location of $R^2$ can be used to choose a value of the hyperparameter $\eta$. The `R2(location, what)` function in the __rstanarm__ package supports four ways of choosing $\eta$: 1. `what = "mode"` and `location` is some prior mode on the $\left(0,1\right)$ interval. This is the default but since the mode of a $\mathrm{Beta}\left(\frac{K}{2},\eta\right)$ distribution is $\frac{\frac{K}{2} - 1}{\frac{K}{2} + \eta - 2}$ the mode only exists if $K > 2$. If $K \leq 2$, then the user must specify something else for `what`. 2. `what = "mean"` and `location` is some prior mean on the $\left(0,1\right)$ interval, where the mean of a $\mathrm{Beta}\left(\frac{K}{2},\eta\right)$ distribution is $\frac{\frac{K}{2}}{\frac{K}{2} + \eta}$. 3. `what = "median"` and `location` is some prior median on the $\left(0,1\right)$ interval. The median of a $\mathrm{Beta}\left(\frac{K}{2},\eta\right)$ distribution is not available in closed form but if $K > 2$ it is approximately equal to $\frac{\frac{K}{2} - \frac{1}{3}}{\frac{K}{2} + \eta - \frac{2}{3}}$. Regardless of whether $K > 2$, the `R2` function can numerically solve for the value of $\eta$ that is consistent with a given prior median utilizing the quantile function. 4. `what = "log"` and `location` is some (negative) prior value for $\mathbb{E} \ln R^2 = \psi\left(\frac{K}{2}\right)- \psi\left(\frac{K}{2}+\eta\right)$, where $\psi\left(\cdot\right)$ is the `digamma` function. Again, given a prior value for the left-hand side it is easy to numerically solve for the corresponding value of $\eta$. There is no default value for the `location` argument of the `R2` function. This is an _informative_ prior on $R^2$, which must be chosen by the user in light of the research project. However, specifying `location = 0.5` is often safe, in which case $\eta = \frac{K}{2}$ regardless of whether `what` is `"mode"`, `"mean"`, or `"median"`. In addition, it is possible to specify `NULL`, in which case a standard uniform on $R^2$ is utilized. We set $\sigma_y = \omega s_y$ where $s_y$ is the sample standard deviation of the outcome and $\omega > 0$ is an unknown scale parameter to be estimated. The only prior for $\omega$ that does not contravene Bayes' theorem in this situation is Jeffreys prior, $f\left(\omega\right) \propto \frac{1}{\omega}$, which is proportional to a Jeffreys prior on the unknown $\sigma_y$, $f\left(\sigma_y\right) \propto \frac{1}{\sigma_y} = \frac{1}{\omega \widehat{\sigma}_y} \propto \frac{1}{\omega}$. This parameterization and prior makes it easy for Stan to work with any continuous outcome variable, no matter what its units of measurement are. It would seem that we need a prior for $\sigma_{\epsilon}$, but our prior beliefs about $\sigma_{\epsilon} = \omega s_y \sqrt{1 - R^2}$ are already implied by our prior beliefs about $\omega$ and $R^2$. That only leaves a prior for $\alpha = \overline{y} - \overline{\mathbf{x}}^\top \mathbf{R}^{-1} \boldsymbol{\theta}$. The default choice is an improper uniform prior, but a normal prior can also be specified such as one with mean zero and standard deviation $\frac{\sigma_y}{\sqrt{N}}$. # Posterior The previous sections imply a posterior distribution for $\omega$, $\alpha$, $\mathbf{u}$, and $R^2$. The parameters of interest can then be recovered as generated quantities: * $\sigma_y = \omega s_y$ * $\sigma_{\epsilon} = \sigma_y \sqrt{1 - R^2}$ * $\boldsymbol{\beta} = \mathbf{R}^{-1} \mathbf{u} \sigma_y \sqrt{R^2 \left(N-1\right)}$ The implementation actually utilizes an improper uniform prior on $\ln \omega$. Consequently, if $\ln \omega = 0$, then the marginal standard deviation of the outcome _implied by the model_ is the same as the sample standard deviation of the outcome. If $\ln \omega > 0$, then the marginal standard deviation of the outcome implied by the model exceeds the sample standard deviation, so the model overfits the data. If $\ln \omega < 0$, then the marginal standard deviation of the outcome implied by the model is less than the sample standard deviation, so the model _underfits_ the data or that the data-generating process is nonlinear. Given the regularizing nature of the prior on $R^2$, a minor underfit would be considered ideal if the goal is to obtain good out-of-sample predictions. If the model badly underfits or overfits the data, then you may want to reconsider the model. # Example We will utilize an example from the __HSAUR3__ package by Brian S. Everitt and Torsten Hothorn, which is used in their 2014 book _A Handbook of Statistical Analyses Using R (3rd Edition)_ (Chapman & Hall / CRC). This book is frequentist in nature and we will show how to obtain the corresponding Bayesian results. The model in section 5.3.1 analyzes an experiment where clouds were seeded with different amounts of silver iodide to see if there was increased rainfall. This effect could vary according to covariates, which (except for `time`) are interacted with the treatment variable. Most people would probably be skeptical that cloud hacking could explain very much of the variation in rainfall and thus the prior mode of the $R^2$ would probably be fairly small. The frequentist estimator of this model can be replicated by executing ```{r lm-clouds-ols} data("clouds", package = "HSAUR3") ols <- lm(rainfall ~ seeding * (sne + cloudcover + prewetness + echomotion) + time, data = clouds) round(coef(ols), 3) ``` Note that we have _not_ looked at the estimated $R^2$ or $\sigma$ for the ordinary least squares model. We can estimate a Bayesian version of this model by prepending `stan_` to the `lm` call, specifying a prior mode for $R^2$, and optionally specifying how many cores the computer may utilize: ```{r lm-clouds-mcmc, results='hide'} library(rstanarm) post <- stan_lm( rainfall ~ seeding * (sne + cloudcover + prewetness + echomotion) + time, data = clouds, prior = R2(location = 0.2), seed = 12345 ) post ``` ```{r, echo=FALSE} print(post) ``` In this case, the "Bayesian point estimates", which are represented by the posterior medians, appear quite different from the ordinary least squares estimates. However, the `log-fit_ratio` (i.e. $\ln \omega$) is quite small, indicating that the model only slightly overfits the data when the prior derived above is utilized. Thus, it would be safe to conclude that the ordinary least squares estimator considerably overfits the data since there are only $24$ observations to estimate $12$ parameters with and no prior information on the parameters. Also, it is not obvious what the estimated average treatment effect is since the treatment variable, `seeding`, is interacted with four other correlated predictors. However, it is easy to estimate or visualize the average treatment effect (ATE) using __rstanarm__'s `posterior_predict` function. ```{r lm-clouds-ate-plot} clouds_cf <- clouds clouds_cf$seeding[] <- "yes" y1_rep <- posterior_predict(post, newdata = clouds_cf) clouds_cf$seeding[] <- "no" y0_rep <- posterior_predict(post, newdata = clouds_cf) qplot(x = c(y1_rep - y0_rep), geom = "histogram", xlab = "Estimated ATE") ``` As can be seen, the treatment effect is not estimated precisely and is as almost as likely to be negative as it is to be positive. # Alternative Approach The prior derived above works well in many situations and is quite simple to _use_ since it only requires the user to specify the prior location of the $R^2$. Nevertheless, the implications of the prior are somewhat difficult to _conceptualize_. Thus, it is perhaps worthwhile to compare to another estimator of a linear model that simply puts independent Cauchy priors on the regression coefficients. This simpler approach can be executed by calling the `stan_glm` function with `family = gaussian()` and specifying the priors: ```{r lm-clouds-simple, results="hide"} simple <- stan_glm( rainfall ~ seeding * (sne + cloudcover + prewetness + echomotion) + time, data = clouds, family = gaussian(), prior = cauchy(), prior_intercept = cauchy(), seed = 12345 ) ``` We can compare the two approaches using an approximation to Leave-One-Out (LOO) cross-validation, which is implemented by the `loo` function in the __loo__ package. ```{r lm-clouds-loo, warning=TRUE} (loo_post <- loo(post)) loo_compare(loo_post, loo(simple)) ``` The results indicate that the first approach is expected to produce better out-of-sample predictions but the Warning messages are at least as important. Many of the estimated shape parameters for the Generalized Pareto distribution are above $0.5$ in the model with Cauchy priors, which indicates that these estimates are only going to converge slowly to the true out-of-sample deviance measures. Thus, with only $24$ observations, they should not be considered reliable. The more complicated prior derived above is stronger --- as evidenced by the fact that the effective number of parameters is about half of that in the simpler approach and $12$ for the maximum likelihood estimator --- and only has a few of the $24$ Pareto shape estimates in the "danger zone". We might want to reexamine these observations ```{r lm-clouds-plot-loo} plot(loo_post, label_points = TRUE) ``` because the posterior is sensitive to them but, overall, the results seem tolerable. In general, we would expect the joint prior derived here to work better when there are many predictors relative to the number of observations. Placing independent, heavy-tailed priors on the coefficients neither reflects the beliefs of the researcher nor conveys enough information to stabilize all the computations. # Conclusion This vignette has discussed the prior distribution utilized in the `stan_lm` function, which has the same likelihood and a similar syntax as the `lm` function in R but adds the ability to expression prior beliefs about the location of the $R^2$, which is the familiar proportion of variance in the outcome variable that is attributable to the predictors under a linear model. Since the $R^2$ is a well-understood bounded scalar, it is easy to specify prior information about it, whereas other Bayesian approaches require the researcher to specify a joint prior distribution for the regression coefficients (and the intercept and error variance). However, most researchers have little inclination to specify all these prior distributions thoughtfully and take a short-cut by specifying one prior distribution that is taken to apply to all the regression coefficients as if they were independent of each other (and the intercept and error variance). This short-cut is available in the `stan_glm` function and is described in more detail in other __rstanarm__ vignettes for Generalized Linear Models (GLMs), which can be found by navigating up one level. We are optimistic that this prior on the $R^2$ will greatly help in accomplishing our goal for __rstanarm__ of making Bayesian estimation of regression models routine. The same approach is used to specify a prior in ANOVA models (see `stan_aov`) and proportional-odds models for ordinal outcomes (see `stan_polr`). Finally, the `stan_biglm` function can be used when the design matrix is too large for the `qr` function to process. The `stan_biglm` function inputs the output of the `biglm` function in the __biglm__ package, which utilizes an incremental QR decomposition that does not require the entire dataset to be loaded into memory simultaneously. However, the `biglm` function needs to be called in a particular way in order to work with `stan_biglm`. In particular, The means of the columns of the design matrix, the sample mean of the outcome, and the sample standard deviation of the outcome all need to be passed to the `stan_biglm` function, as well as a flag indicating whether the model really does include an intercept. Also, the number of columns of the design matrix currently cannot exceed the number of rows. Although `stan_biglm` should run fairly quickly and without much memory, the resulting object is a fairly plain `stanfit` object rather than an enhanced `stanreg` object like that produced by `stan_lm`. Many of the enhanced capabilities of a `stanreg` object depend on being able to access the full design matrix, so doing posterior predictions, posterior checks, etc. with the output of `stan_biglm` would require some custom R code. # References Lewandowski, D., Kurowicka D., and Joe, H. (2009). Generating random correlation matrices based on vines and extended onion method. _Journal of Multivariate Analysis_. __100__(9), 1989--2001. rstanarm/vignettes/polr.Rmd0000644000176200001440000003134713722762571015555 0ustar liggesusers--- title: "Estimating Ordinal Regression Models with rstanarm" author: "Jonah Gabry and Ben Goodrich" date: "`r Sys.Date()`" output: html_vignette: toc: yes --- ```{r, child="children/SETTINGS-knitr.txt"} ``` ```{r, child="children/SETTINGS-gg.txt"} ``` # Introduction This vignette explains how to estimate models for ordinal outcomes using the `stan_polr` function in the __rstanarm__ package. ```{r, child="children/four_steps.txt"} ``` Steps 3 and 4 are covered in more depth by the vignette entitled ["How to Use the __rstanarm__ Package"](rstanarm.html). This vignette focuses on Step 1. One of the strengths of doing MCMC with Stan --- as opposed to a Gibbs sampler --- is that reparameterizations are essentially costless, which allows the user to specify priors on parameters that are either more intuitive, numerically stable, or computationally efficient without changing the posterior distribution of the parameters that enter the likelihood. Advantageous parameterizations are already built into the Stan programs used in the __rstanarm__ package, so it is just a matter of using these vignettes to explain how the priors work in the context of these reparameterizations. # Likelihood Ordinal outcomes fall in one of $J$ categories. One way to motivate an ordinal model is to introduce a latent variable, $y^\ast$, that is related to the observed outcomes via an observation mechanism: $$y=\begin{cases} 1 & \mbox{if }y^{\ast}<\zeta_{1}\\ 2 & \mbox{if }\zeta_{1}\leq y^{\ast}<\zeta_{2}\\ \vdots\\ J & \mbox{if }\zeta_{J-1}\leq y^{\ast} \end{cases},$$ where $\boldsymbol{\zeta}$ is a vector of cutpoints of length $J-1$. Then $y^\ast$ is modeled as a linear function of $K$ predictors $$y^\ast = \mu + \epsilon = \mathbf{x}^\top \boldsymbol{\beta} + \epsilon,$$ where $\epsilon$ has mean zero and unit scale but can be specified as being drawn from one of several distributions. Note that there is no "intercept" in this model since the data cannot distinguish an intercept from the cutpoints. However, if $J = 2$, then $\zeta_1$ can be referred to as either the cutpoint or the intercept. A Bayesian can treat $y^\ast$ as another unknown parameter, although for computational efficiency the Stan code essentially integrates each $y^\ast$ out of the posterior distribution, leaving the posterior distribution of $\boldsymbol{\beta}$ and $\boldsymbol{\zeta}$. Nevertheless, it is useful to motivate the model theoretically as if $y^\ast$ were just an unknown parameter with a distribution truncated by the relevant element(s) of $\boldsymbol{\zeta}$. # Priors If $y^\ast$ were observed we would simply have a linear regression model for it, and the description of the priors in the vignette entitled ["Estimating Linear Models with the __rstanarm__ Package"](lm.html) would apply directly. Another way to say the same thing is _conditional_ on a realization of $y^\ast$, we have a linear regression model and the description of the priors in the other [vignette](lm.html) does apply (and should be read before continuing with this subsection). The `stan_lm` function essentially specifies a prior on $\boldsymbol{\theta} = \mathbf{R}^{-1} \boldsymbol{\beta}$, where $\mathbf{R}$ is the upper triangular matrix in the QR decomposition of the design matrix, $\mathbf{X} = \mathbf{Q} \mathbf{R}$. Furthermore, in `stan_lm`, $\sigma_{\epsilon} = \sigma_y \sqrt{1 - R^2}$ where $R^2$ is the proportion of variance in the outcome that is attributable to the coefficients in a linear model. The main difference in the context of a model for an ordinal outcome is that the scale of $y^\ast$ is not identified by the data. Thus, the ordinal model specifies that $\sigma_{\epsilon} = 1$, which implies that $\sigma_{y^\ast} = 1 / \sqrt{1 - R^2}$ is an intermediate parameter rather than a primitive parameter. It is somewhat more difficult to specify a prior value for the $R^2$ in an ordinal model because $R^2$ refers to the proportion of variance in the \emph{unobservable} $y^\ast$ that is attributable to the predictors under a linear model. In general, the $R^2$ tends to be lower in an ordinal model than in a linear model where the continuous outcome is observed. The other difference is that an ordinal model does not have a global intercept but rather a vector of $J-1$ cutpoints. The implied prior on these cutpoints used by the __rstanarm__ package is somewhat novel. The user instead specifies a Dirichlet prior on $\Pr\left(y=j \, \left.\right| \, \overline{\mathbf{x}} \right)$, which is to say the prior probability of the outcome falling in each of the $J$ categories given that the predictors are at their sample means. The Dirichlet prior is for a simplex random variable, whose elements are non-negative and sum to $1$. The Dirichlet PDF can be written as $$f\left(\boldsymbol{\pi}|\boldsymbol{\alpha}\right) \propto \prod_{j=1}^J{\pi_j^{\alpha_j - 1}}, $$ where $\boldsymbol{\pi}$ is a simplex vector such that $\pi_j = \Pr\left(y=j \, \left.\right| \, \overline{\mathbf{x}} \right)$. The Dirichlet prior is one of the easiest to specify because the so-called "concentration" hyperparameters $\boldsymbol{\alpha}$ can be interpreted as prior counts, i.e., prior observations for each of the J categories (although they need not be integers). If $\alpha_j = 1$ for every $j$ (the default used by __rstanarm__) then the Dirichlet prior is jointly uniform over the space of these simplexes. This corresponds to a prior count of one observation falling in each of the $J$ ordinal categories when the predictors are at their sample means and conveys the reasonable but weak prior information that no category has probability zero. If, for each $j$, $\alpha_j = \alpha > 1$ then the prior mode is that the $J$ categories are equiprobable, with prior probability $1/J$ of the outcome falling in each of the $J$ categories. The larger the value of $\alpha$ the more sharply peaked the distribution is at the mode. The $j$-th cutpoint $\zeta_j$ is then given by $$\zeta_j = F_{y^\ast}^{-1}\left(\sum_{i=1}^j{\pi_i}\right),$$ where $F_{y^\ast}^{-1}$ is an inverse CDF function, which depends on the assumed distribution of $y^\ast$. Common choices include the normal and logistic distributions. The scale parameter of this distribution is again $\sigma_{y^\ast} = 1/\sqrt{1 - R^2}$. In short, by making each $\zeta_j$ a function of $\boldsymbol{\pi}$, it allows us to specify a Dirichlet prior on $\boldsymbol{\pi}$, which is simpler than specifying a prior on $\boldsymbol{\zeta}$ directly. # Example In this section, we start with an ordinal model of tobacco consumption as a function of age and alcohol consumption. Frequentist estimates can be obtained using the `polr` function in the __MASS__ package: ```{r polr-tobgp-mass} library(MASS) print(polr(tobgp ~ agegp + alcgp, data = esoph), digits = 1) ``` To obtain Bayesian estimates, we prepend `stan_` and specify the priors: ```{r polr-tobgp-mcmc, results="hide"} library(rstanarm) post0 <- stan_polr(tobgp ~ agegp + alcgp, data = esoph, prior = R2(0.25), prior_counts = dirichlet(1), seed = 12345) ``` ```{r} print(post0, digits = 1) ``` ```{r, polr-tobgp-cutpoints, echo=FALSE} zeta_medians <- round(apply(rstan::extract(post0$stanfit, pars = "zeta")[[1]], 2, median), digits = 2) ``` The point estimates, represented by the posterior medians, are qualitatively similar to the maximum-likelihood estimates but are somewhat shrunk toward zero due to the regularizing prior on the coefficients. Since these cutpoints are actually _known_, it would be more appropriate for the model to take that into account, but `stan_polr` does not currently support that. Next, we utilize an example from the __MASS__ package where low birthweight is the binary outcome of interest. First, we recode some of the variables: ```{r polr-birthwt-recodes} data("birthwt", package = "MASS") birthwt$race <- factor(birthwt$race, levels = 1:3, labels = c("white", "black", "other")) birthwt$bwt <- birthwt$bwt / 1000 # convert from grams to kilograms birthwt$low <- factor(birthwt$low, levels = 0:1, labels = c("no", "yes")) ``` It is usually a good idea to rescale variables by constants so that all the numbers are in single or double digits. We start by estimating a linear model for birthweight in kilograms, flipping the sign so that positive coefficients are associated with _lower_ birthweights. ```{r polr-stan_lm, results="hide"} post1 <- stan_lm(-bwt ~ smoke + age + race + ptl + ht + ftv, data = birthwt, prior = R2(0.5), seed = 12345) ``` ```{r} print(post1) ``` Next, we estimate an "ordinal" model for the incidence of low birthweight, which is defined as a birth weight of less than $2.5$ kilograms. Even though this outcome is binary, a binary variable is a special case of an ordinal variable with $J=2$ categories and is acceptable to `stan_polr`. We can think of `bwt` as something proportional to $y^\ast$ and pretend that it is not observed, forcing us to estimate an ordinal model. ```{r polr-birthwt-mcmc, results="hide"} post2 <- stan_polr(low ~ smoke + age + race + ptl + ht + ftv, data = birthwt, prior = R2(0.5), prior_counts = dirichlet(c(1,1)), method = "probit", seed = 12345) ``` ```{r, polr-loo-plot} plot(loo(post2)) ``` This prior seems to have worked well in this case because none of the points in the plot are above $0.5$, which would have indicated the the posterior is very sensitive to those observations. If we compare the estimated coefficients, ```{r polr-birthwt-comparison} round(cbind(Linear = coef(post1), Ordinal = coef(post2), Rescaled = coef(post1) / sigma(post1)), 3) ``` they have the same signs and similar magnitudes, with the exception of the "Intercept". In an ordinal model where the outcome only has $J=2$ categories, this "Intercept" is actually $\zeta_1$, but it is more conventional to call it the "Intercept" so that it agrees with `stan_glm` when `family = binomial(link = 'probit')`. Recall that $\sigma_{\epsilon} = 1$ in an ordinal model, so if we rescale the coefficients from a linear model by dividing by the posterior median of $\sigma$, the resulting coefficients are even closer to those of the ordinal model. This illustrates the fundamental similarity between a linear model for a continuous observed outcome and a linear model for a latent $y^\ast$ that generates an ordinal observed outcome. The main difference is when the outcome is continuous and observed, we can estimate the scale of the errors meaningfully. When the outcome is ordinal, we can only fix the scale of the latent errors to $1$ arbitrarily. Finally, when $J = 2$, the `stan_polr` function allows you to specify non-`NULL` values of the `shape` and `rate` arguments, which implies a "scobit" likelihood where the probability of success is given by $F\left(y^\ast \right)^\alpha$, where $F\left(\right)$ is the logistic CDF and $\alpha > 0$ is a skewing parameter that has a gamma prior with a given `shape` and `rate`. If $\alpha \neq 1$, then the relationship between $y^\ast$ and the probability of success is asymmetric. In principle, it seems appropriate to estimate $\alpha$ but in practice, a lot of data is needed to estimate $\alpha$ with adequate precision. In the previous example, if we specify `shape = 2` and `rate = 2` to reflect the prior beliefs that $\alpha$ is expected to be $1$ but has a variance of $\frac{1}{2}$, then the `loo` calculation yields many Pareto shape parameters that are excessively large. However, with more than $189$ observations, such a model may be more fruitful. # Conclusion The posterior distribution for an ordinal model requires priors on the coefficients and the cutpoints. The priors used by the `stan_polr` function are unconventional but should work well for a variety of problems. The prior on the coefficients is essentially the same as that used by the `stan_lm` function but omits a scale parameter because the standard deviation of the latent $y^\ast$ is not identified by the data. The cutpoints are conditionally deterministic given a simplex vector for the probability of falling in each of the $J$ ordinal categories given that the predictors are at their sample means. Thus, a Dirichlet prior --- which is relatively easy to specify and has a good default of jointly uniform --- on this simplex completes the posterior distribution. This approach provides an alternative to `stan_glm` with `family = binomial()` even if the outcome variable has only two categories. The `stan_glm` function has more options for the prior on the coefficients and the prior on the intercept (which can be interpreted as the first cutpoint when $J = 2$). However, it may be more difficult to obtain efficient sampling with those priors. rstanarm/vignettes/.install_extras0000644000176200001440000000001313340675562017153 0ustar liggesusers^children rstanarm/vignettes/children/0000755000176200001440000000000013722762571015715 5ustar liggesusersrstanarm/vignettes/children/SETTINGS-gg.txt0000644000176200001440000000016013540537566020310 0ustar liggesusers```{r, SETTINGS-gg, include=TRUE} library(ggplot2) library(bayesplot) theme_set(bayesplot::theme_default()) ``` rstanarm/vignettes/children/stan_glm_priors.txt0000644000176200001440000000244513340675562021664 0ustar liggesusersA full Bayesian analysis requires specifying prior distributions $f(\alpha)$ and $f(\boldsymbol{\beta})$ for the intercept and vector of regression coefficients. When using `stan_glm`, these distributions can be set using the `prior_intercept` and `prior` arguments. The `stan_glm` function supports a variety of prior distributions, which are explained in the __rstanarm__ documentation (`help(priors, package = 'rstanarm')`). As an example, suppose we have $K$ predictors and believe --- prior to seeing the data --- that $\alpha, \beta_1, \dots, \beta_K$ are as likely to be positive as they are to be negative, but are highly unlikely to be far from zero. These beliefs can be represented by normal distributions with mean zero and a small scale (standard deviation). To give $\alpha$ and each of the $\beta$s this prior (with a scale of 1, say), in the call to `stan_glm` we would include the arguments `prior_intercept = normal(0,1)` and `prior = normal(0,1)`. If, on the other hand, we have less a priori confidence that the parameters will be close to zero then we could use a larger scale for the normal distribution and/or a distribution with heavier tails than the normal like the Student t distribution. __Step 1__ in the "How to Use the __rstanarm__ Package" vignette discusses one such example. rstanarm/vignettes/children/SETTINGS-knitr.txt0000644000176200001440000000045513722762571021047 0ustar liggesusers```{r, SETTINGS-knitr, include=FALSE} stopifnot(require(knitr)) opts_chunk$set( comment=NA, message = FALSE, warning = FALSE, eval = identical(Sys.getenv("NOT_CRAN"), "true"), dev = "png", dpi = 150, fig.asp = 0.618, fig.width = 5, out.width = "60%", fig.align = "center" ) ``` rstanarm/vignettes/children/four_steps.txt0000644000176200001440000000137013340675562020647 0ustar liggesusersThe four steps of a Bayesian analysis are 1. Specify a joint distribution for the outcome(s) and all the unknowns, which typically takes the form of a marginal prior distribution for the unknowns multiplied by a likelihood for the outcome(s) conditional on the unknowns. This joint distribution is proportional to a posterior distribution of the unknowns conditional on the observed data 2. Draw from posterior distribution using Markov Chain Monte Carlo (MCMC). 3. Evaluate how well the model fits the data and possibly revise the model. 4. Draw from the posterior predictive distribution of the outcome(s) given interesting values of the predictors in order to visualize how a manipulation of a predictor affects (a function of) the outcome(s). rstanarm/vignettes/continuous.Rmd0000644000176200001440000004027314214422264016772 0ustar liggesusers--- title: "Estimating Generalized Linear Models for Continuous Data with rstanarm" author: "Jonah Gabry and Ben Goodrich" date: "`r Sys.Date()`" output: html_vignette: toc: yes --- ```{r, child="children/SETTINGS-knitr.txt"} ``` ```{r, child="children/SETTINGS-gg.txt"} ``` # Introduction This vignette explains how to estimate linear and generalized linear models (GLMs) for continuous response variables using the `stan_glm` function in the __rstanarm__ package. For GLMs for discrete outcomes see the vignettes for [binary/binomial](binomial.html) and [count](count.html) outcomes. ```{r, child="children/four_steps.txt"} ``` This vignette primarily focuses on Steps 1 and 2 when the likelihood is the product of conditionally independent continuous distributions. Steps 3 and 4 are covered in more depth by the vignette entitled ["How to Use the __rstanarm__ Package"](rstanarm.html), although this vignette does also give a few examples of model checking and generating predictions. # Likelihood In the simplest case a GLM for a continuous outcome is simply a linear model and the likelihood for one observation is a conditionally normal PDF $$\frac{1}{\sigma \sqrt{2 \pi}} e^{-\frac{1}{2} \left(\frac{y - \mu}{\sigma}\right)^2},$$ where $\mu = \alpha + \mathbf{x}^\top \boldsymbol{\beta}$ is a linear predictor and $\sigma$ is the standard deviation of the error in predicting the outcome, $y$. More generally, a linear predictor $\eta = \alpha + \mathbf{x}^\top \boldsymbol{\beta}$ can be related to the conditional mean of the outcome via a link function $g$ that serves as a map between the range of values on which the outcome is defined and the space on which the linear predictor is defined. For the linear model described above no transformation is needed and so the link function is taken to be the identity function. However, there are cases in which a link function is used for Gaussian models; the log link, for example, can be used to log transform the (conditional) expected value of the outcome when it is constrained to be positive. Like the `glm` function, the `stan_glm` function uses R's family objects. The family objects for continuous outcomes compatible with `stan_glm` are the `gaussian`, `Gamma`, and `inverse.gaussian` distributions. All of the link functions provided by these family objects are also compatible with `stan_glm`. For example, for a Gamma GLM, where we assume that observations are conditionally independent Gamma random variables, common link functions are the log and inverse links. Regardless of the distribution and link function, the likelihood for the entire sample is the product of the likelihood contributions of the individual observations. # Priors ```{r, child="children/stan_glm_priors.txt"} ``` # Posterior With independent prior distributions, the joint posterior distribution for $\alpha$ and $\boldsymbol{\beta}$ is proportional to the product of the priors and the $N$ likelihood contributions: $$f\left(\boldsymbol{\beta} | \mathbf{y},\mathbf{X}\right) \propto f\left(\alpha\right) \times \prod_{k=1}^K f\left(\beta_k\right) \times \prod_{i=1}^N {f(y_i|\eta_i)},$$ where $\mathbf{X}$ is the matrix of predictors and $\eta$ the linear predictor. This is the posterior distribution that `stan_glm` will draw from when using MCMC. # Linear Regression Example The `stan_lm` function, which has its own [vignette](lm.html), fits regularized linear models using a novel means of specifying priors for the regression coefficients. Here we focus using the `stan_glm` function, which can be used to estimate linear models with independent priors on the regression coefficients. To illustrate the usage of `stan_glm` and some of the post-processing functions in the __rstanarm__ package we'll use a simple example from Chapter 3 of [Gelman and Hill (2007)](http://www.stat.columbia.edu/~gelman/arm/): > We shall fit a series of regressions predicting cognitive test scores of three- and four-year-old children given characteristics of their mothers, using data from a survey of adult American women and their children (a subsample from the National Longitudinal Survey of Youth). Using two predictors -- a binary indicator for whether the mother has a high-school degree (`mom_hs`) and the mother's score on an IQ test (`mom_iq`) -- we will fit four contending models. The first two models will each use just one of the predictors, the third will use both, and the fourth will also include a term for the interaction of the two predictors. For these models we'll use the default weakly informative priors for `stan_glm`, which are currently set to `normal(0,10)` for the intercept and `normal(0,5)` for the other regression coefficients. For an overview of the many other available prior distributions see `help("prior", package = "rstanarm")`. ```{r, continuous-kidiq-mcmc,results="hide"} library(rstanarm) data(kidiq) post1 <- stan_glm(kid_score ~ mom_hs, data = kidiq, family = gaussian(link = "identity"), seed = 12345) post2 <- update(post1, formula = . ~ mom_iq) post3 <- update(post1, formula = . ~ mom_hs + mom_iq) (post4 <- update(post1, formula = . ~ mom_hs * mom_iq)) ``` ```{r, continuous-kidiq-print, echo=FALSE} print(post4) ``` Following Gelman and Hill's example, we make some plots overlaying the estimated regression lines on the data. ```{r, continuous-kidiq-plot1a} base <- ggplot(kidiq, aes(x = mom_hs, y = kid_score)) + geom_point(size = 1, position = position_jitter(height = 0.05, width = 0.1)) + scale_x_continuous(breaks = c(0,1), labels = c("No HS", "HS")) base + geom_abline(intercept = coef(post1)[1], slope = coef(post1)[2], color = "skyblue4", size = 1) ``` There several ways we could add the uncertainty in our estimates to the plot. One way is to also plot the estimated regression line at each draw from the posterior distribution. To do this we can extract the posterior draws from the fitted model object using the `as.matrix` or `as.data.frame` methods: ```{r, continuous-kidiq-plot1b} draws <- as.data.frame(post1) colnames(draws)[1:2] <- c("a", "b") base + geom_abline(data = draws, aes(intercept = a, slope = b), color = "skyblue", size = 0.2, alpha = 0.25) + geom_abline(intercept = coef(post1)[1], slope = coef(post1)[2], color = "skyblue4", size = 1) ``` For the second model we can make the same plot but the x-axis will show the continuous predictor `mom_iq`: ```{r, continuous-kidiq-plot2} draws <- as.data.frame(as.matrix(post2)) colnames(draws)[1:2] <- c("a", "b") ggplot(kidiq, aes(x = mom_iq, y = kid_score)) + geom_point(size = 1) + geom_abline(data = draws, aes(intercept = a, slope = b), color = "skyblue", size = 0.2, alpha = 0.25) + geom_abline(intercept = coef(post2)[1], slope = coef(post2)[2], color = "skyblue4", size = 1) ``` For the third and fourth models, each of which uses both predictors, we can plot the continuous `mom_iq` on the x-axis and use color to indicate which points correspond to the different subpopulations defined by `mom_hs`. We also now plot two regression lines, one for each subpopulation: ```{r, continuous-kidiq-plot3} reg0 <- function(x, ests) cbind(1, 0, x) %*% ests reg1 <- function(x, ests) cbind(1, 1, x) %*% ests args <- list(ests = coef(post3)) kidiq$clr <- factor(kidiq$mom_hs, labels = c("No HS", "HS")) lgnd <- guide_legend(title = NULL) base2 <- ggplot(kidiq, aes(x = mom_iq, fill = relevel(clr, ref = "HS"))) + geom_point(aes(y = kid_score), shape = 21, stroke = .2, size = 1) + guides(color = lgnd, fill = lgnd) + theme(legend.position = "right") base2 + stat_function(fun = reg0, args = args, aes(color = "No HS"), size = 1.5) + stat_function(fun = reg1, args = args, aes(color = "HS"), size = 1.5) ``` ```{r, continuous-kidiq-plot4} reg0 <- function(x, ests) cbind(1, 0, x, 0 * x) %*% ests reg1 <- function(x, ests) cbind(1, 1, x, 1 * x) %*% ests args <- list(ests = coef(post4)) base2 + stat_function(fun = reg0, args = args, aes(color = "No HS"), size = 1.5) + stat_function(fun = reg1, args = args, aes(color = "HS"), size = 1.5) ``` ## Model comparison One way we can compare the four contending models is to use an approximation to Leave-One-Out (LOO) cross-validation, which is implemented by the `loo` function in the __loo__ package: ```{r, continuous-kidiq-loo} # Compare them with loo loo1 <- loo(post1, cores = 2) loo2 <- loo(post2, cores = 2) loo3 <- loo(post3, cores = 2) loo4 <- loo(post4, cores = 2) (comp <- loo_compare(loo1, loo2, loo3, loo4)) ``` In this case the fourth model is preferred as it has the highest expected log predicted density (`elpd_loo`) or, equivalently, the lowest value of the LOO Information Criterion (`looic`). The fourth model is preferred by a lot over the first model ```{r, continuous-kidiq-loo-2} loo_compare(loo1, loo4) ``` because the difference in `elpd` is so much larger than the standard error. However, the preference of the fourth model over the others isn't as strong: ```{r, continuous-kidiq-loo-3} loo_compare(loo3, loo4) loo_compare(loo2, loo4) ``` ## The posterior predictive distribution The posterior predictive distribution is the distribution of the outcome implied by the model after using the observed data to update our beliefs about the unknown parameters. When simulating observations from the posterior predictive distribution we use the notation $y^{\rm rep}$ (for _replicate_) when we use the same observations of $X$ that were used to estimate the model parameters. When $X$ contains new observations we use the notation $\tilde{y}$ to refer to the posterior predictive simulations. Simulating data from the posterior predictive distribution using the observed predictors is useful for checking the fit of the model. Drawing from the posterior predictive distribution at interesting values of the predictors also lets us visualize how a manipulation of a predictor affects (a function of) the outcome(s). ### Graphical posterior predictive checks The `pp_check` function generates a variety of plots comparing the observed outcome $y$ to simulated datasets $y^{\rm rep}$ from the posterior predictive distribution using the same observations of the predictors $X$ as we used to fit the model. He we show a few of the possible displays. The documentation at `help("pp_check.stanreg", package = "rstanarm")` has details on all of the `pp_check` options. First we'll look at a plot directly comparing the distributions of $y$ and $y^{\rm rep}$. The following call to `pp_check` will create a plot juxtaposing the histogram of $y$ and histograms of five $y^{\rm rep}$ datasets: ```{r, continuous-kidiq-pp_check1} pp_check(post4, plotfun = "hist", nreps = 5) ``` The idea is that if the model is a good fit to the data we should be able to generate data $y^{\rm rep}$ from the posterior predictive distribution that looks a lot like the observed data $y$. That is, given $y$, the $y^{\rm rep}$ we generate should be plausible. Another useful plot we can make using `pp_check` shows the distribution of a test quantity $T(y^{\rm rep})$ compared to $T(y)$, the value of the quantity in the observed data. When the argument `plotfun = "stat"` is specified, `pp_check` will simulate $S$ datasets $y_1^{\rm rep}, \dots, y_S^{\rm rep}$, each containing $N$ observations. Here $S$ is the size of the posterior sample (the number of MCMC draws from the posterior distribution of the model parameters) and $N$ is the length of $y$. We can then check if $T(y)$ is consistent with the distribution of $\left(T(y_1^{\rm yep}), \dots, T(y_S^{\rm yep})\right)$. In the plot below we see that the mean of the observations is plausible when compared to the distribution of the means of the $S$ $y^{\rm rep}$ datasets: ```{r, continuous-kidiq-pp_check2} pp_check(post4, plotfun = "stat", stat = "mean") ``` Using `plotfun="stat_2d"` we can also specify two test quantities and look at a scatterplot: ```{r, continuous-kidiq-pp_check3} pp_check(post4, plotfun = "stat_2d", stat = c("mean", "sd")) ``` ### Generating predictions The `posterior_predict` function is used to generate replicated data $y^{\rm rep}$ or predictions for future observations $\tilde{y}$. Here we show how to use `posterior_predict` to generate predictions of the outcome `kid_score` for a range of different values of `mom_iq` and for both subpopulations defined by `mom_hs`. ```{r, continuous-kidiq-posterior_predict} IQ_SEQ <- seq(from = 75, to = 135, by = 5) y_nohs <- posterior_predict(post4, newdata = data.frame(mom_hs = 0, mom_iq = IQ_SEQ)) y_hs <- posterior_predict(post4, newdata = data.frame(mom_hs = 1, mom_iq = IQ_SEQ)) dim(y_hs) ``` We now have two matrices, `y_nohs` and `y_hs`. Each matrix has as many columns as there are values of `IQ_SEQ` and as many rows as the size of the posterior sample. One way to show the predictors is to plot the predictions for the two groups of kids side by side: ```{r, continuous-kidiq-plot-predict, fig.width=7} par(mfrow = c(1:2), mar = c(5,4,2,1)) boxplot(y_hs, axes = FALSE, outline = FALSE, ylim = c(10,170), xlab = "Mom IQ", ylab = "Predicted Kid IQ", main = "Mom HS") axis(1, at = 1:ncol(y_hs), labels = IQ_SEQ, las = 3) axis(2, las = 1) boxplot(y_nohs, outline = FALSE, col = "red", axes = FALSE, ylim = c(10,170), xlab = "Mom IQ", ylab = NULL, main = "Mom No HS") axis(1, at = 1:ncol(y_hs), labels = IQ_SEQ, las = 3) ``` ```{r, continuous-kidiq-validation, eval=FALSE, include=FALSE} # # External Validation # source(paste0(ROOT, "ARM/Ch.3/kids_before1987.data.R"), # local = kidiq, verbose = FALSE) # source(paste0(ROOT, "ARM/Ch.3/kids_after1987.data.R"), # local = kidiq, verbose = FALSE) # post5 <- stan_lm(ppvt ~ hs + afqt, data = kidiq, # prior = R2(location = 0.25, what = "mean"), seed = SEED) # y_ev <- posterior_predict(post5, newdata = # data.frame(hs = kidiq$hs_ev, afqt = kidiq$afqt_ev)) # par(mfrow = c(1,1)) # hist(-sweep(y_ev, 2, STATS = kidiq$ppvt_ev, FUN = "-"), prob = TRUE, # xlab = "Predictive Errors in ppvt", main = "", las = 2) ``` # Gamma Regression Example Gamma regression is often used when the response variable is continuous and positive, and the _coefficient of variation_ (rather than the variance) is constant. We'll use one of the standard examples of Gamma regression, which is taken from McCullagh & Nelder (1989). This example is also given in the documentation for R's `glm` function. The outcome of interest is the clotting time of blood (in seconds) for "normal plasma diluted to nine different percentage concentrations with prothrombin-free plasma; clotting was induced by two lots of thromboplastin" (p. 300). The help page for R's `glm` function presents the example as follows: ```{r, continuous-clotting-mle, results='hide'} clotting <- data.frame( u = c(5,10,15,20,30,40,60,80,100), lot1 = c(118,58,42,35,27,25,21,19,18), lot2 = c(69,35,26,21,18,16,13,12,12)) summary(glm(lot1 ~ log(u), data = clotting, family = Gamma)) summary(glm(lot2 ~ log(u), data = clotting, family = Gamma)) ``` To fit the analogous Bayesian models we can simply substitute `stan_glm` for `glm` above. However, instead of fitting separate models we can also reshape the data slightly and fit a model interacting lot with plasma concentration: ```{r, continuous-clotting-mcmc, results="hide"} clotting2 <- with(clotting, data.frame( log_plasma = rep(log(u), 2), clot_time = c(lot1, lot2), lot_id = factor(rep(c(1,2), each = length(u))) )) fit <- stan_glm(clot_time ~ log_plasma * lot_id, data = clotting2, family = Gamma, prior_intercept = normal(0, 1, autoscale = TRUE), prior = normal(0, 1, autoscale = TRUE), seed = 12345) ``` ```{r} print(fit, digits = 3) ``` In the output above, the estimate reported for `shape` is for the shape parameter of the Gamma distribution. The _reciprocal_ of the shape parameter can be interpreted similarly to what `summary.glm` refers to as the dispersion parameter. # References Gelman, A. and Hill, J. (2007). _Data Analysis Using Regression and Multilevel/Hierarchical Models._ Cambridge University Press, Cambridge, UK. McCullagh, P. and Nelder, J. A. (1989). _Generalized Linear Models._ Chapman and Hall/CRC Press, New York. rstanarm/vignettes/glmer.Rmd0000644000176200001440000004771714370470372015712 0ustar liggesusers--- title: "Estimating Generalized (Non-)Linear Models with Group-Specific Terms with rstanarm" author: "Jonah Gabry and Ben Goodrich" date: "`r Sys.Date()`" output: html_vignette: toc: yes --- ```{r, child="children/SETTINGS-knitr.txt"} ``` ```{r, child="children/SETTINGS-gg.txt"} ``` # Introduction This vignette explains how to use the `stan_lmer`, `stan_glmer`, `stan_nlmer`, and `stan_gamm4` functions in the __rstanarm__ package to estimate linear and generalized (non-)linear models with parameters that may vary across groups. Before continuing, we recommend reading the vignettes (navigate up one level) for the various ways to use the `stan_glm` function. The _Hierarchical Partial Pooling_ vignette also has examples of both `stan_glm` and `stan_glmer`. # GLMs with group-specific terms Models with this structure are refered to by many names: multilevel models, (generalized) linear mixed (effects) models (GLMM), hierarchical (generalized) linear models, etc. In the simplest case, the model for an outcome can be written as $$\mathbf{y} = \alpha + \mathbf{X} \boldsymbol{\beta} + \mathbf{Z} \mathbf{b} + \boldsymbol{\epsilon},$$ where $\mathbf{X}$ is a matrix predictors that is analogous to that in Generalized Linear Models and $\mathbf{Z}$ is a matrix that encodes deviations in the predictors across specified groups. The terminology for the unknowns in the model is diverse. To frequentists, the error term consists of $\mathbf{Z}\mathbf{b} + \boldsymbol{\epsilon}$ and the observations within each group are _not_ independent conditional on $\mathbf{X}$ alone. Since, $\mathbf{b}$ is considered part of the random error term, frequentists allow themselves to make distributional assumptions about $\mathbf{b}$, invariably that it is distributed multivariate normal with mean vector zero and structured covariance matrix $\boldsymbol{\Sigma}$. If $\epsilon_i$ is also distributed (univariate) normal with mean zero and standard deviation $\sigma$, then $\mathbf{b}$ can be integrated out, which implies $$\mathbf{y} \thicksim \mathcal{N}\left(\alpha + \mathbf{X}\boldsymbol{\beta}, \sigma^2 \mathbf{I}+\mathbf{Z}^\top \boldsymbol{\Sigma} \mathbf{Z} \right),$$ and it is possible to maximize this likelihood function by choosing proposals for the parameters $\alpha$, $\boldsymbol{\beta}$, and (the free elements of) $\boldsymbol{\Sigma}$. Consequently, frequentists refer to $\mathbf{b}$ as the _random effects_ because they capture the random deviation in the effects of predictors from one group to the next. In contradistinction, $\alpha$ and $\boldsymbol{\beta}$ are referred to as _fixed effects_ because they are the same for all groups. Moreover, $\alpha$ and $\boldsymbol{\beta}$ persist in the model in hypothetical replications of the analysis that draw the members of the groups afresh every time, whereas $\mathbf{b}$ would differ from one replication to the next. Consequently, $\mathbf{b}$ is not a "parameter" to be estimated because parameters are unknown constants that are fixed in repeated sampling. Bayesians condition on the data in-hand without reference to repeated sampling and describe their _beliefs_ about the unknowns with prior distributions before observing the data. Thus, the likelihood in a simple hierarchical model in __rstarnarm__ is $$\mathbf{y} \thicksim \mathcal{N}\left(\alpha + \mathbf{X}\boldsymbol{\beta} + \mathbf{Z}\mathbf{b}, \sigma^2 \mathbf{I}\right)$$ and the observations are independent conditional on $\mathbf{X}$ and $\mathbf{Z}$. In this formulation, there are * intercept(s) and coefficients that are _common across groups_ * deviations in the intercept(s) and / or coefficients that _vary across groups_ Bayesians are compelled to state their prior beliefs about all unknowns and the usual assumption (which is maintained in __rstanarm__) is that $\mathbf{b} \thicksim \mathcal{N}\left(\mathbf{0},\boldsymbol{\Sigma}\right),$ but it is then necessary to state prior beliefs about $\boldsymbol{\Sigma}$, in addition to $\alpha$, $\boldsymbol{\beta}$, and $\sigma$. One of the many challenges of fitting models to data comprising multiple groupings is confronting the tradeoff between validity and precision. An analysis that disregards between-group heterogeneity can yield parameter estimates that are wrong if there is between-group heterogeneity but would be relatively precise if there actually were no between-group heterogeneity. Group-by-group analyses, on the other hand, are valid but produces estimates that are relatively imprecise. While complete pooling or no pooling of data across groups is sometimes called for, models that ignore the grouping structures in the data tend to underfit or overfit (Gelman et al.,2013). Hierarchical modeling provides a compromise by allowing parameters to vary by group at lower levels of the hierarchy while estimating common parameters at higher levels. Inference for each group-level parameter is informed not only by the group-specific information contained in the data but also by the data for other groups as well. This is commonly referred to as _borrowing strength_ or _shrinkage_. In __rstanarm__, these models can be estimated using the `stan_lmer` and `stan_glmer` functions, which are similar in syntax to the `lmer` and `glmer` functions in the __lme4__ package. However, rather than performing (restricted) maximum likelihood (RE)ML estimation, Bayesian estimation is performed via MCMC. The Bayesian model adds independent prior distributions on the regression coefficients (in the same way as `stan_glm`) as well as priors on the terms of a decomposition of the covariance matrices of the group-specific parameters. These priors are discussed in greater detail below. # Priors on covariance matrices In this section we discuss a flexible family of prior distributions for the unknown covariance matrices of the group-specific coefficients. ### Overview For each group, we assume the vector of varying slopes and intercepts is a zero-mean random vector following a multivariate Gaussian distribution with an unknown covariance matrix to be estimated. Unfortunately, expressing prior information about a covariance matrix is not intuitive and can also be computationally challenging. When the covariance matrix is not $1\times 1$, it is often both much more intuitive and efficient to work instead with the __correlation__ matrix and variances. When the covariance matrix is $1\times 1$, we still denote it as $\boldsymbol{\Sigma}$ but most of the details in this section do not apply. The variances are in turn decomposed into the product of a simplex vector (probability vector) and the trace of the implied covariance matrix, which is defined as the sum of its diagonal elements. Finally, this trace is set equal to the product of the order of the matrix and the square of a scale parameter. This implied prior on a covariance matrix is represented by the `decov` (short for decomposition of covariance) function in __rstanarm__. ### Details Using the decomposition described above, the prior used for a correlation matrix $\Omega$ is called the LKJ distribution and has a probability density function proportional to the determinant of the correlation matrix raised to a power of $\zeta$ minus one: $$ f(\Omega | \zeta) \propto \text{det}(\Omega)^{\zeta - 1}, \quad \zeta > 0. $$ The shape of this prior depends on the value of the regularization parameter, $\zeta$ in the following ways: * If $\zeta = 1$ (the default), then the LKJ prior is jointly uniform over all correlation matrices of the same dimension as $\Omega$. * If $\zeta > 1$, then the mode of the distribution is the identity matrix. The larger the value of $\zeta$ the more sharply peaked the density is at the identity matrix. * If $0 < \zeta < 1$, then the density has a trough at the identity matrix. The $J \times J$ covariance matrix $\Sigma$ of a random vector $\boldsymbol{\theta} = (\theta_1, \dots, \theta_J)$ has diagonal entries ${\Sigma}_{jj} = \sigma^2_j = \text{var}(\theta_j)$. Therefore, the trace of the covariance matrix is equal to the sum of the variances. We set the trace equal to the product of the order of the covariance matrix and the square of a positive scale parameter $\tau$: $$\text{tr}(\Sigma) = \sum_{j=1}^{J} \Sigma_{jj} = J\tau^2.$$ The vector of variances is set equal to the product of a simplex vector $\boldsymbol{\pi}$ --- which is non-negative and sums to 1 --- and the scalar trace: $J \tau^2 \boldsymbol{\pi}$. Each element $\pi_j$ of $\boldsymbol{\pi}$ then represents the proportion of the trace (total variance) attributable to the corresponding variable $\theta_j$. For the simplex vector $\boldsymbol{\pi}$ we use a symmetric Dirichlet prior, which has a single _concentration_ parameter $\gamma > 0$: * If $\gamma = 1$ (the default), then the prior is jointly uniform over the space of simplex vectors with $J$ elements. * If $\gamma > 1$, then the prior mode corresponds to all variables having the same (proportion of total) variance, which can be used to ensure that the posterior variances are not zero. As the concentration parameter approaches infinity, this mode becomes more pronounced. * If $0 < \gamma < 1$, then the variances are more polarized. If all the elements of $\boldsymbol{\theta}$ were multiplied by the same number $k$, the trace of their covariance matrix would increase by a factor of $k^2$. For this reason, it is sensible to use a scale-invariant prior for $\tau$. We choose a Gamma distribution, with shape and scale parameters both set to $1$ by default, implying a unit-exponential distribution. Users can set the shape hyperparameter to some value greater than one to ensure that the posterior trace is not zero. In the case where $\boldsymbol{\Sigma}$ is $1\times 1$, $\tau$ is the cross-group standard deviation in the parameters and its square is the variance (so the Gamma prior with its shape and scale directly applies to the cross-group standard deviation in the parameters). # Comparison with __lme4__ There are several advantages to estimating these models using __rstanarm__ rather than the __lme4__ package. There are also a few drawbacks. In this section we briefly discuss what we find to be the two most important advantages as well as an important disadvantage. ### Advantage: better uncertainty estimates While __lme4__ uses (restricted) maximum likelihood (RE)ML estimation, __rstanarm__ enables full Bayesian inference via MCMC to be performed. It is well known that (RE)ML tends to underestimate uncertainties because it relies on point estimates of hyperparameters. Full Bayes, on the other hand, propagates the uncertainty in the hyperparameters throughout all levels of the model and provides more appropriate estimates of uncertainty for models that consist of a mix of common and group-specific parameters. ### Advantage: incorporate prior information The `stan_glmer` and `stan_lmer` functions allow the user to specify prior distributions over the regression coefficients as well as any unknown covariance matrices. There are various reasons to specify priors, from helping to stabilize computation to incorporating important information into an analysis that does not enter through the data. ### Disadvantage: speed The benefits of full Bayesian inference (via MCMC) come with a cost. Fitting models with (RE)ML will tend to be much faster than fitting a similar model using MCMC. Speed comparable to __lme4__ can be obtained with __rstanarm__ using approximate Bayesian inference via the mean-field and full-rank variational algorithms (see `help("rstanarm-package", "rstanarm")` for details). These algorithms can be useful to narrow the set of candidate models in large problems, but MCMC should always be used for final statistical inference. # Relationship to `glmer` In the __lme4__ package, there is a fundamental distinction between the way that Linear Mixed Models and Generalized Linear Mixed Models are estimated. In Linear Mixed Models, $\mathbf{b}$ can be integrated out analytically, leaving a likelihood function that can be maximized over proposals for the parameters. To estimate a Linear Mixed Model, one can call the `lmer` function. Generalized Linear Mixed Models are appropriate when the conditional mean of the outcome is determined by an inverse link function, $\boldsymbol{\mu} = g\left(\alpha + \mathbf{X} \boldsymbol{\beta} + \mathbf{Z}\mathbf{b}\right)$. If $g\left(\cdot\right)$ is not the identity function, then it is not possible to integrate out $\mathbf{b}$ analytically and numerical integration must be used. To estimate a Generalized Linear Mixed Model, one can call the `glmer` function and specify the `family` argument. In the __rstanarm__ package, there is no such fundamental distinction; in fact `stan_lmer` simply calls `stan_glmer` with `family = gaussian(link = "identity")`. Bayesians do not (have to) integrate $\mathbf{b}$ out of the likelihood and if $\mathbf{b}$ is not of interest, then the margins of its posterior distribution can simply be ignored. # Relationship to `gamm4` The __rstanarm__ package includes a `stan_gamm4` function that is similar to the `gamm4` function in the __gamm4__ package, which is in turn similar to the `gamm` function in the __mgcv__ package. The substring `gamm` stands for Generalized Additive Mixed Models, which differ from Generalized Additive Models (GAMs) due to the presence of group-specific terms that can be specified with the syntax of __lme4__. Both GAMs and GAMMs include nonlinear functions of (non-categorical) predictors called "smooths". In the example below, so-called "thin-plate splines" are used to model counts of roaches where we might fear that the number of roaches in the current period is an exponentially increasing function of the number of roaches in the previous period. Unlike `stan_glmer`, in `stan_gamm4` it is necessary to specify group-specific terms as a one-sided formula that is passed to the `random` argument as in the `lme` function in the __nlme__ package. ```{r, results = "hide"} library(rstanarm) data(roaches) roaches$roach1 <- roaches$roach1 / 100 roaches$log_exposure2 <- log(roaches$exposure2) post <- stan_gamm4( y ~ s(roach1) + treatment + log_exposure2, random = ~(1 | senior), data = roaches, family = neg_binomial_2, QR = TRUE, cores = 2, chains = 2, adapt_delta = 0.99, seed = 12345 ) ``` ```{r} plot_nonlinear(post) ``` Here we see that the relationship between past and present roaches is estimated to be nonlinear. For a small number of past roaches, the function is steep and then it appears to flatten out, although we become highly uncertain about the function in the rare cases where the number of past roaches is large. # Relationship to `nlmer` The `stan_gamm4` function allows designated predictors to have a nonlinear effect on what would otherwise be called the "linear" predictor in Generalized Linear Models. The `stan_nlmer` function is similar to the `nlmer` function in the __lme4__ package, and essentially allows a wider range of nonlinear functions that relate the linear predictor to the conditional expectation of a Gaussian outcome. To estimate an example model with the `nlmer` function in the __lme4__ package, we start by rescaling the outcome and main predictor(s) by a constant ```{r} data("Orange", package = "datasets") Orange$age <- Orange$age / 100 Orange$circumference <- Orange$circumference / 100 ``` Although doing so has no substantive effect on the inferences obtained, it is numerically much easier for Stan and for __lme4__ to work with variables whose units are such that the estimated parameters tend to be single-digit numbers that are not too close to zero. The `nlmer` function requires that the user pass starting values to the ironically-named self-starting non-linear function: ```{r, warning=TRUE} startvec <- c(Asym = 2, xmid = 7.25, scal = 3.5) library(lme4) nm1 <- nlmer(circumference ~ SSlogis(age, Asym, xmid, scal) ~ Asym|Tree, data = Orange, start = startvec) summary(nm1) ``` Note the warning messages indicating difficulty estimating the variance-covariance matrix. Although __lme4__ has a fallback mechanism, the need to utilize it suggests that the sample is too small to sustain the asymptotic assumptions underlying the maximum likelihood estimator. In the above example, we use the `SSlogis` function, which is a lot like the logistic CDF, but with an additional `Asym` argument that need not be one and indicates what value the function approaches for large values of the first argument. In this case, we can interpret the asymptote as the maximum possible circumference for an orange. However, this asymptote is allowed to vary from tree to tree using the `Asym | Tree` syntax, which reflects an assumption that the asymptote for a randomly-selected tree deviates from the asymptote for the population of orange trees in a Gaussian fashion with mean zero and an unknown standard deviation. The `nlmer` function supports user-defined non-linear functions, whereas the `stan_nlmer` function only supports the pre-defined non-linear functions starting with `SS` in the __stats__ package, which are ```{r, echo = FALSE} grep("^SS[[:lower:]]+", ls("package:stats"), value = TRUE) ``` To fit essentially the same model using Stan's implementation of MCMC, we add a `stan_` prefix ```{r, results = "hide"} post1 <- stan_nlmer(circumference ~ SSlogis(age, Asym, xmid, scal) ~ Asym|Tree, data = Orange, cores = 2, seed = 12345, init_r = 0.5) ``` ```{r} post1 ``` In `stan_nlmer`, it is not necessary to supply starting values; however, in this case it was necessary to specify the `init_r` argument so that the randomly-chosen starting values were not more than $0.5$ away from zero (in the unconstrained parameter space). The default value of $2.0$ produced suboptimal results. As can be seen, the posterior medians and estimated standard deviations in the MCMC case are quite similar to the maximum likelihood estimates and estimated standard errors. However, `stan_nlmer` produces uncertainty estimates for the tree-specific deviations in the asymptote, which are considerable. ```{r} plot(post1, regex_pars = "^[b]") ``` As can be seen, the age of the tree has a non-linear effect on the predicted circumference of the tree (here for a out-of-sample tree): ```{r} nd <- data.frame(age = 1:20, Tree = factor("6", levels = 1:6)) PPD <- posterior_predict(post1, newdata = nd) PPD_df <- data.frame(age = as.factor(rep(1:20, each = nrow(PPD))), circumference = c(PPD)) ggplot(PPD_df, aes(age, circumference)) + geom_boxplot() ``` If we were pharmacological, we could evaluate drug concentration using a first-order compartment model, such as ```{r, eval = FALSE} post3 <- stan_nlmer(conc ~ SSfol(Dose, Time, lKe, lKa, lCl) ~ (0 + lKe + lKa + lCl | Subject), data = Theoph, cores = 2, seed = 12345, QR = TRUE, init_r = 0.25, adapt_delta = 0.999) pairs(post3, regex_pars = "^l") pairs(post3, regex_pars = "igma") ``` However, in this case the posterior distribution is bimodal Thus, you should always be running many chains when using Stan, especially `stan_nlmer`. # Conclusion There are model fitting functions in the **rstanarm** package that can do essentially all of what can be done in the **lme4** and **gamm4** packages --- in the sense that they can fit models with multilevel structure and / or nonlinear relationships --- and propagate the uncertainty in the parameter estimates to the predictions and other functions of interest. The documentation of **lme4** and **gamm4** has various warnings that acknowledge that the estimated standard errors, confidence intervals, etc. are not entirely correct, even from a frequentist perspective. A frequentist point estimate would also completely miss the second mode in the last example with `stan_nlmer`. Thus, there is considerable reason to prefer the **rstanarm** variants of these functions for regression modeling. The only disadvantage is the execution time required to produce an answer that properly captures the uncertainty in the estimates of complicated models such as these. rstanarm/vignettes/betareg.Rmd0000644000176200001440000002252113722762571016204 0ustar liggesusers--- title: "Modeling Rates/Proportions using Beta Regression with rstanarm" author: "Imad Ali, Jonah Gabry and Ben Goodrich" date: "`r Sys.Date()`" output: html_vignette: toc: yes --- ```{r, child="children/SETTINGS-knitr.txt"} ``` ```{r, child="children/SETTINGS-gg.txt"} ``` # Introduction This vignette explains how to model continuous outcomes on the open unit interval using the `stan_betareg` function in the __rstanarm__ package. ```{r, child="children/four_steps.txt"} ``` Steps 3 and 4 are covered in more depth by the vignette entitled ["How to Use the __rstanarm__ Package"](rstanarm.html). This vignette focuses on Step 1 when the likelihood is the product of beta distributions. # Likelihood Beta regression uses the beta distribution as the likelihood for the data, $$ f(y_i | a, b) = \frac{y_i^{(a-1)}(1-y_i)^{(b-1)}}{B(a,b)} $$ where $B(\cdot)$ is the beta function. The shape parameters for the distribution are $a$ and $b$ and enter into the model according to the following transformations, $$ a = \mu\cdot\phi \\ b = (1-\mu)\cdot\phi $$ Let $g_1(\cdot)$ be some link function. Then, in the specification of the shape parameters above, $\mu = g_1^{-1}(\mathbf{X}\boldsymbol{\beta})$, where $\boldsymbol{X}$ is a $N\times K$ dimensional matrix of predictors, and $\boldsymbol{\beta}$ is a $K$ dimensional vector of parameters associated with each predictor. In the simplest case (with only one set of regressors), $\phi$ is a scalar parameter. Alternatively, it is possible to model $\phi$ using a second set of regressors $\mathbf{Z}$. In this context let $g_2(\cdot)$ be some link function that is not necessarily identical to $g_1(\cdot)$. Then $\phi = g_2^{-1}(\mathbf{Z}\boldsymbol{\gamma})$, where $\boldsymbol{\gamma}$ is a $J$ dimensional vector of parameters associated with the $N\times J$ dimensional matrix of predictors $\mathbf{Z}$. After substituting the shape parameter values in, the likelihood used in beta regression takes the following form, $$ f(y_i | \mu, \phi) = \frac{y_i^{(\mu\phi-1)}(1-y_i)^{((1-\mu)\phi-1)}}{B(\mu\phi,(1-\mu)\phi)} $$ # Priors A full Bayesian analysis requires specifying prior distributions $f(\boldsymbol{\beta})$ and $f(\phi)$ for the vector of regression coefficients and $\phi$. When using `stan_betareg`, these distributions can be set using the `prior_intercept`, `prior`, and `prior_phi` arguments. The `stan_betareg` function supports a variety of prior distributions, which are explained in the __rstanarm__ documentation (`help(priors, package = 'rstanarm')`). When modeling $\phi$ with a linear predictor a full Bayesian analysis requires specifying the prior distributions $f(\boldsymbol{\beta})$ and $f(\boldsymbol{\gamma})$. In `stan_betareg` the prior distributions on $\boldsymbol{\gamma}$ can be set using the `prior_intercept_z` and `prior_z` arguments. As an example, suppose we have $K$ predictors and believe --- prior to seeing the data --- that $\beta_1, \dots, \beta_K$ and $\phi$ are as likely to be positive as they are to be negative, but are highly unlikely to be far from zero. These beliefs can be represented by normal distributions with mean zero and a small scale (standard deviation). To give $\phi$ and each of the $\beta$s this prior (with a scale of 1, say), in the call to `stan_betareg` we would include the arguments `prior_intercept = normal(0,1)`, `prior = normal(0,1)`, and `prior_phi = normal(0,1)`. If, on the other hand, we have less a priori confidence that the parameters will be close to zero then we could use a larger scale for the normal distribution and/or a distribution with heavier tails than the normal like the Student t distribution. __Step 1__ in the "How to Use the __rstanarm__ Package" vignette discusses one such example. After fitting the model we can use the `prior_summary` function to print information about the prior distributions used when fitting the model. # Posterior When using only a *single set of regressors*, the posterior distribution of $\boldsymbol{\beta}$ and $\phi$ is proportional to the product of the likelihood contributions, the $K$ priors on the $\beta_k$ parameters, and $\phi$, $$ f(\boldsymbol{\beta},\phi|\mathbf{y},\mathbf{X}) \propto \prod_{i=1}^N f(y_i | a, b) \times \prod_{k=1}^K f(\beta_k) \times f(\phi) $$ When using *two sets of regressors*, the posterior distribution of $\boldsymbol{\beta}$ and $\boldsymbol{\gamma}$ is proportional to the product of the likelihood contribution, the $K$ priors on the $\beta_k$ parameters, and the $J$ priors on the $\gamma_j$ parameters, $$ f(\boldsymbol{\beta},\boldsymbol{\gamma}|\mathbf{y},\mathbf{X}) \propto \prod_{i=1}^N f(y_i | a, b) \times \prod_{k=1}^K f(\beta_k) \times \prod_{j=1}^J f(\gamma_j) $$ # An Example Using Simulated Data In this example the outcome variable $\mathbf{y}$ is simulated in a way that warrants the use of beta regression. It is worth mentioning that the data generation process is quite convoluted, which is apparent in the identification of the likelihood above. The data simulated below uses the logistic link function on the first set of regressors and the log link function on the second set of regressors. ```{r simulated-data, fig.height=5} SEED <- 1234 set.seed(SEED) eta <- c(1, -0.2) gamma <- c(1.8, 0.4) N <- 200 x <- rnorm(N, 2, 2) z <- rnorm(N, 0, 2) mu <- binomial(link = logit)$linkinv(eta[1] + eta[2]*x) phi <- binomial(link = log)$linkinv(gamma[1] + gamma[2]*z) y <- rbeta(N, mu * phi, (1 - mu) * phi) dat <- data.frame(cbind(y, x, z)) hist(dat$y, col = "darkgrey", border = F, main = "Distribution of Outcome Variable", xlab = "y", breaks = 20, freq = F) ``` The model can be fit by calling `stan_betareg`, using the appropriate link functions. ```{r simulated-fit, results = "hide"} library(rstanarm) fit1 <- stan_betareg(y ~ x | z, data = dat, link = "logit", link.phi = "log", cores = 2, seed = 12345) fit2 <- stan_betareg(y ~ -1 + x , data = dat, link = "logit", link.phi = "log", cores = 2, seed = 12345) round(coef(fit1), 2) round(coef(fit2), 2) ``` ``` {r simulated-fit-print, echo=FALSE} round(coef(fit1), 2) round(coef(fit2), 2) ``` For clarity we can use `prior_summary` to print the information about the prior distributions used to fit the models. The priors used in `fit1` are provided below. ``` {r print-priors} prior_summary(fit1) ``` The usual posterior analyses are available in **rstanarm**. The plots below illustrate simulated values of the outcome variable. The incorrect model noticeably fails to capture the top of the distribution consistently in comparison to the true model. ```{r simulated-analysis, fig.height=5} library(ggplot2) library(bayesplot) bayesplot_grid( pp_check(fit1), pp_check(fit2), xlim = c(0,1), ylim = c(0,4), titles = c("True Model: y ~ x | z", "False Model: y ~ x - 1"), grid_args = list(ncol = 2) ) ``` We can also compare models by evaluating the expected log pointwise predictive density (`elpd`), which can be calculated using the `loo` method, which provides an interface for __rstanarm__ models to the functionality in the __loo__ package. ``` {r simulated-loo} loo1 <- loo(fit1) loo2 <- loo(fit2) loo_compare(loo1, loo2) ``` The difference in `elpd` is negative indicating that the expected predictive accuracy for the first model is higher. # An Example Using Gasoline Data In some applied contexts it may be necessary to work with an outcome variable that is a proportion. If the proportion is bound on the open unit interval then beta regression can be considered a reasonable estimation method. The `betareg` package provides a dataset on the proportion of crude oil converted to gasoline after distillation and fractionation. This variable is defined as yield. Below `stan_betareg` is used to model yield as a function of temperature, pressure, and the batch of conditions. ```{r, gas-fit, results="hide"} library(rstanarm) data("GasolineYield", package = "betareg") gas_fit1 <- stan_betareg(yield ~ temp + batch, data = GasolineYield, link = "logit", seed = 12345) gas_fit2 <- stan_betareg(yield ~ temp + batch | pressure, data = GasolineYield, link = "logit", seed = 12345) round(coef(gas_fit1), 2) round(coef(gas_fit2), 2) ``` ``` {r, gas-print, echo=FALSE} round(coef(gas_fit1), 2) round(coef(gas_fit2), 2) ``` The plots below illustrate simulated values of gasoline yield. While the first model accounts for variation in batch conditions its predictions looks somewhat uniform rather than resembling the peaked and right-skewed behavior of the true data. The second model does a somewhat better job at capturing the shape of the distribution, however its location is off as it is centered around 0.50 rather than 0.20. ```{r gas-analysis, fig.height=5} library(ggplot2) bayesplot_grid( pp_check(gas_fit1), pp_check(gas_fit2), xlim = c(0,1), ylim = c(0,5), titles = c("gas_fit1", "gas_fit2"), grid_args = list(ncol = 2) ) ``` ``` {r, gas-loo} gas_loo1 <- loo(gas_fit1) gas_loo2 <- loo(gas_fit2) loo_compare(gas_loo1, gas_loo2) ``` Evaluating the expected log predictive distribution using `loo` reveals that the second of the two models is preferred. # References Ferrari, SLP and Cribari-Neto, F (2004) "Beta Regression for Modeling Rates and Proportions". _Journal of Applied Statistics._ Vol. 31, No. 07, p799-815. rstanarm/vignettes/rstanarm.Rmd0000644000176200001440000006310214370470372016415 0ustar liggesusers--- title: "How to Use the rstanarm Package" author: "Jonah Gabry and Ben Goodrich" date: "`r Sys.Date()`" output: html_vignette: toc: yes --- ```{r, child="children/SETTINGS-knitr.txt"} ``` ```{r, child="children/SETTINGS-gg.txt"} ``` # Introduction This vignette provides an _overview_ of how to use the functions in the __rstanarm__ package that focuses on commonalities. The other __rstanarm__ vignettes go into the particularities of each of the individual model-estimating functions. The goal of the __rstanarm__ package is to make Bayesian estimation _routine_ for the most common regression models that applied researchers use. This will enable researchers to avoid the counter-intuitiveness of the frequentist approach to probability and statistics with only minimal changes to their existing R scripts. ```{r, child="children/four_steps.txt"} ``` Step 1 is necessarily model-specific and is covered in more detail in the other vignettes that cover specific forms of the marginal prior distribution and likelihood of the outcome. It is somewhat more involved than the corresponding first step of a frequentist analysis, which only requires that the likelihood of the outcome be specified. However, the default priors in the __rstanarm__ package should work well in the majority of cases. Steps 2, 3, and 4 are the focus of this vignette because they are largely not specific to how the joint distribution in Step 1 is specified. The key concept in Step 3 and Step 4 is the posterior predictive distribution, which is the distribution of the outcome implied by the model after having used the observed data to update our beliefs about the unknown parameters. Frequentists, by definition, have no posterior predictive distribution and frequentist predictions are subtly different from what applied researchers seek. Maximum likelihood estimates do _not_ condition on the observed outcome data and so the uncertainty in the estimates pertains to the variation in the sampling distribution of the estimator, i.e. the distribution of estimates that would occur if we could repeat the process of drawing a random sample from a well-defined population and apply the estimator to each sample. It is possible to construct a distribution of predictions under the frequentist paradigm but it evokes the hypothetical of repeating the process of drawing a random sample, applying the estimator each time, and generating point predictions of the outcome. In contrast, the posterior predictive distribution conditions on the observed outcome data in hand to update beliefs about the unknowns and the variation in the resulting distribution of predictions reflects the remaining uncertainty in our beliefs about the unknowns. # Step 1: Specify a posterior distribution For the sake of discussion, we need some posterior distribution to draw from. We will utilize an example from the __HSAUR3__ package by Brian S. Everitt and Torsten Hothorn, which is used in their 2014 book _A Handbook of Statistical Analyses Using R (3rd Edition)_ (Chapman & Hall / CRC). This book is frequentist in nature and we will show how to obtain the corresponding Bayesian results. The model in section 6.3.2 pertains to whether a survey respondent agrees or disagrees with a conservative statement about the role of women in society, which is modeled as a function of the gender and education of the respondents. The posterior distribution --- with independent priors --- can be written as $$f\left(\alpha,\beta_1,\beta_2|\mathbf{y},\mathbf{X}\right) \propto f\left(\alpha\right) f\left(\beta_1\right) f\left(\beta_2\right) \times \prod_{i=1}^J { g^{-1}\left(\eta_i\right)^{y_i} \left(1 - g^{-1}\left(\eta_i\right)\right)^{n_i-y_i}},$$ where $\eta_i = \alpha + \beta_1 \mbox{education}_i + \beta_2 \mbox{Female}_i$ is the linear predictor and a function of an intercept $\left(\alpha\right)$, a coefficient on the years of education $\left(\beta_1\right)$, and an intercept-shift $\left(\beta_2\right)$ for the case where the respondent is female. These data are organized such that $y_i$ is the number of respondents who agree with the statement that have the same level of education and the same gender, and $n_i - y_i$ is the number of such people who disagree with the statement. The inverse link function, $p = g^{-1}\left(\eta_i \right)$, for a binomial likelihood can be one of several Cumulative Distribution Functions (CDFs) but in this case is the standard logistic CDF, $g^{-1}\left(\eta_i \right)=\frac{1}{1 + e^{-\eta_i}}$. Suppose we believe --- prior to seeing the data --- that $\alpha$, $\beta_1$, and $\beta_2$ are probably close to zero, are as likely to be positive as they are to be negative, but have a small chance of being quite far from zero. These beliefs can be represented by Student t distributions with a few degrees of freedom in order to produce moderately heavy tails. In particular, we will specify seven degrees of freedom. Note that these purported beliefs may well be more skeptical than your actual beliefs, which are probably that women and people with more education have less conservative societal views. ### Note on "prior beliefs" and default priors In this vignette we use the term "prior beliefs" to refer in generality to the information content of the prior distribution (conditional on the model). Sometimes previous research on the topic of interest motivates beliefs about model parameters, but other times such work may not exist or several studies may make contradictory claims. Regardless, we nearly always have _some_ knowledge that should be reflected in our choice of prior distributions. For example, no one believes a logistic regression coefficient will be greater than five in absolute value if the predictors are scaled reasonably. You may also have seen examples of so-called "non-informative" (or "vague", "diffuse", etc.) priors like a normal distribution with a variance of 1000. When data are reasonably scaled, these priors are almost always a bad idea for various reasons (they give non-trivial weight to extreme values, reduce computational efficiency, etc). The default priors in __rstanarm__ are designed to be _weakly informative_, by which we mean that they avoid placing unwarranted prior weight on nonsensical parameter values and provide some regularization to avoid overfitting, but also do allow for extreme values if warranted by the data. If additional information is available, the weakly informative defaults can be replaced with more informative priors. # Step 2: Draw from the posterior distribution The likelihood for the sample is just the product over the $J$ groups of $$g^{-1}\left(\eta_i \right)^{y_i} \left(1 - g^{-1}\left(\eta_i \right)\right)^{n_i-y_i},$$ which can be maximized over $\alpha$, $\beta_1$, and $\beta_2$ to obtain frequentist estimates by calling ```{r rstanarm-mle, eval = TRUE} data("womensrole", package = "HSAUR3") womensrole$total <- womensrole$agree + womensrole$disagree womensrole_glm_1 <- glm(cbind(agree, disagree) ~ education + gender, data = womensrole, family = binomial(link = "logit")) round(coef(summary(womensrole_glm_1)), 3) ``` The p-value for the null hypothesis that $\beta_1 = 0$ is very small, while the p-value for the null hypothesis that $\beta_2 = 0$ is very large. However, frequentist p-values are awkward because they do not pertain to the probability that a scientific hypothesis is true but rather to the probability of observing a $z$-statistic that is so large (in magnitude) if the null hypothesis were true. The desire to make probabilistic statements about a scientific hypothesis is one reason why many people are drawn to the Bayesian approach. A model with the same likelihood but Student t priors with seven degrees of freedom can be specified using the __rstanarm__ package in a similar way by prepending `stan_` to the `glm` call and specifying priors (and optionally the number of cores on your computer to utilize): ```{r rstanarm-mcmc, results="hide", eval = TRUE} library(rstanarm) womensrole_bglm_1 <- stan_glm(cbind(agree, disagree) ~ education + gender, data = womensrole, family = binomial(link = "logit"), prior = student_t(df = 7, 0, 5), prior_intercept = student_t(df = 7, 0, 5), cores = 2, seed = 12345) womensrole_bglm_1 ``` ```{r, echo=FALSE, eval = TRUE} print(womensrole_bglm_1) ``` As can be seen, the "Bayesian point estimates" --- which are represented by the posterior medians --- are very similar to the maximum likelihood estimates. Frequentists would ask whether the point estimate is greater in magnitude than double the estimated standard deviation of the sampling distribution. But here we simply have estimates of the standard deviation of the marginal posterior distributions, which are based on a scaling of the Median Absolute Deviation (MAD) from the posterior medians to obtain a robust estimator of the posterior standard deviation. In addition, we can use the `posterior_interval` function to obtain a Bayesian uncertainty interval for $\beta_1$: ```{r rstanarm-ci, eval = TRUE} ci95 <- posterior_interval(womensrole_bglm_1, prob = 0.95, pars = "education") round(ci95, 2) ``` Unlike frequentist confidence intervals --- which are _not_ interpretable in terms of post-data probabilities --- the Bayesian uncertainty interval indicates we believe after seeing the data that there is a $0.95$ probability that $\beta_2$ is between `ci95[1,1]` and `ci95[1,2]`. Alternatively, we could say that there is essentially zero probability that $\beta_2 > 0$, although frequentists cannot make such a claim coherently. Many of the post-estimation methods that are available for a model that is estimated by `glm` are also available for a model that is estimated by `stan_glm`. For example, ```{r rstanarm-methods, eval = TRUE} cbind(Median = coef(womensrole_bglm_1), MAD_SD = se(womensrole_bglm_1)) summary(residuals(womensrole_bglm_1)) # not deviance residuals cov2cor(vcov(womensrole_bglm_1)) ``` __rstanarm__ does provide a `confint` method, although it is reserved for computing confidence intervals in the case that the user elects to estimate a model by (penalized) maximum likelihood. When using full Bayesian inference (the __rstanarm__ default) or approximate Bayesian inference the `posterior_interval` function should be used to obtain Bayesian uncertainty intervals. # Step 3: Criticize the model The `launch_shinystan` function in the __shinystan__ package provides almost all the tools you need to visualize the posterior distribution and diagnose any problems with the Markov chains. In this case, the results are fine and to verify that, you can call ```{r rstanarm-shinystan, eval = FALSE} launch_shinystan(womensrole_bglm_1, ppd = FALSE) ``` which will open a web browser that drives the visualizations. For the rest of this subsection, we focus on what users can do programmatically to evaluate whether a model is adequate. A minimal requirement for Bayesian estimates is that the model should fit the data that the estimates conditioned on. The key function here is `posterior_predict`, which can be passed a new `data.frame` to predict out-of-sample, but in this case is omitted to obtain in-sample posterior predictions: ```{r rstanarm-posterior_predict, eval = TRUE} y_rep <- posterior_predict(womensrole_bglm_1) dim(y_rep) ``` The resulting matrix has rows equal to the number of posterior simulations, which in this case is $2000$ and columns equal to the number of observations in the original dataset, which is $42$ combinations of education and gender. Each element of this matrix is a predicted number of respondents with that value of education and gender who agreed with the survey question and thus should be reasonably close to the observed proportion of agreements in the data. We can create a plot to check this: ```{r rstanarm-criticism-plot, fig.width=8, out.width="90%", fig.cap="Posterior predictive boxplots vs. observed datapoints", eval = TRUE} par(mfrow = 1:2, mar = c(5,3.7,1,0) + 0.1, las = 3) boxplot(sweep(y_rep[,womensrole$gender == "Male"], 2, STATS = womensrole$total[womensrole$gender == "Male"], FUN = "/"), axes = FALSE, main = "Male", pch = NA, xlab = "Years of Education", ylab = "Proportion of Agrees") with(womensrole, axis(1, at = education[gender == "Male"] + 1, labels = 0:20)) axis(2, las = 1) with(womensrole[womensrole$gender == "Male",], points(education + 1, agree / (agree + disagree), pch = 16, col = "red")) boxplot(sweep(y_rep[,womensrole$gender == "Female"], 2, STATS = womensrole$total[womensrole$gender == "Female"], FUN = "/"), axes = FALSE, main = "Female", pch = NA, xlab = "Years of Education", ylab = "") with(womensrole, axis(1, at = education[gender == "Female"] + 1, labels = 0:20)) with(womensrole[womensrole$gender == "Female",], points(education + 1, agree / (agree + disagree), pch = 16, col = "red")) ``` Here the boxplots provide the median, interquartile range, and hinges of the posterior predictive distribution for a given gender and level of education, while the red points represent the corresponding observed data. As can be seen, the model predicts the observed data fairly well for six to sixteen years of education but predicts less well for very low or very high levels of education where there are less data. Consequently, we might consider a model where education has a quadratic effect on agreement, which is easy to specify using R's formula-based syntax. ```{r, rstanarm-update, results="hide", eval = TRUE} (womensrole_bglm_2 <- update(womensrole_bglm_1, formula. = . ~ . + I(education^2))) ``` ```{r, echo=FALSE} print(womensrole_bglm_2) ``` Frequentists would test the null hypothesis that the coefficient on the squared level of education is zero. Bayesians might ask whether such a model is expected to produce better out-of-sample predictions than a model with only the level of education. The latter question can be answered using leave-one-out cross-validation or the approximation thereof provided by the `loo` function in the __loo__ package, for which a method is provided by the __rstanarm__ package. ```{r rstanarm-loo, eval = TRUE} loo_bglm_1 <- loo(womensrole_bglm_1) loo_bglm_2 <- loo(womensrole_bglm_2) ``` First, we verify that the posterior is not too sensitive to any particular observation in the dataset. ```{r rstanarm-loo-plot, fig.width=7, out.width="70%", eval = TRUE} par(mfrow = 1:2, mar = c(5,3.8,1,0) + 0.1, las = 3) plot(loo_bglm_1, label_points = TRUE) plot(loo_bglm_2, label_points = TRUE) ``` There are only one or two moderate outliers (whose statistics are greater than $0.5$), which should not have too much of an effect on the resulting model comparison: ```{r, rstanarm-loo-compare, eval = TRUE} loo_compare(loo_bglm_1, loo_bglm_2) ``` In this case, there is little difference in the expected log pointwise deviance between the two models, so we are essentially indifferent between them after taking into account that the second model estimates an additional parameter. The "LOO Information Criterion (LOOIC)" ```{r, rstanarm-loo-print, eval = TRUE} loo_bglm_1 ``` has the same purpose as the Akaike Information Criterion (AIC) that is used by frequentists. Both are intended to estimate the expected log predicted density (ELPD) for a new dataset. However, the AIC ignores priors and assumes that the posterior distribution is multivariate normal, whereas the functions from the loo package used here do not assume that the posterior distribution is multivariate normal and integrate over uncertainty in the parameters. This only assumes that any one observation can be omitted without having a major effect on the posterior distribution, which can be judged using the plots above. # Step 4: Analyze manipulations of predictors Frequentists attempt to interpret the estimates of the model, which is difficult except when the model is linear, has no inverse link function, and contains no interaction terms. Bayesians can avoid this difficulty simply by inspecting the posterior predictive distribution at different levels of the predictors. For example, ```{r, rstanarm-posterior_predict-manipulate, eval = TRUE} # note: in newdata we want agree and disagree to sum to the number of people we # want to predict for. the values of agree and disagree don't matter so long as # their sum is the desired number of trials. we need to explicitly imply the # number of trials like this because our original data are aggregate. if we had # bernoulli data then it would be a given we wanted to predict for single # individuals. newdata <- data.frame(agree = c(0,0), disagree = c(100,100), education = c(12,16), gender = factor("Female", levels = c("Male", "Female"))) y_rep <- posterior_predict(womensrole_bglm_2, newdata) summary(apply(y_rep, 1, diff)) ``` As can be seen, out of $100$ women who have a college degree versus $100$ women with only a high school degree, we would expect about $20$ fewer college-educated women to agree with the question. There is an even chance that the difference is between $24$ and $16$, a one-in-four chance that it is greater, and one-in-four chance that it is less. # Troubleshooting This section provides suggestions for how to proceed when you encounter warning messages generated by the modeling functions in the __rstanarm__ package. The example models below are used just for the purposes of concisely demonstrating certain difficulties and possible remedies (we won't worry about the merit of the models themselves). The references at the end provide more information on the relevant issues. ### Markov chains did not converge __Recommendation:__ run the chains for more iterations.
By default, all __rstanarm__ modeling functions will run four randomly initialized Markov chains, each for 2000 iterations (including a warmup period of 1000 iterations that is discarded). All chains must converge to the target distribution for inferences to be valid. For most models, the default settings are sufficient, but if you see a warning message about Markov chains not converging, the first thing to try is increasing the number of iterations. This can be done by specifying the `iter` argument (e.g. `iter = 3000`). One way to monitor whether a chain has converged to the equilibrium distribution is to compare its behavior to other randomly initialized chains. This is the motivation for the Gelman and Rubin potential scale reduction statistic Rhat. The Rhat statistic measures the ratio of the average variance of the draws within each chain to the variance of the pooled draws across chains; if all chains are at equilibrium, these will be the same and Rhat will be one. If the chains have not converged to a common distribution, the Rhat statistic will tend to be greater than one. Gelman and Rubin's recommendation is that the independent Markov chains be initialized with diffuse starting values for the parameters and sampled until all values for Rhat are below 1.1. When any Rhat values are above 1.1 __rstanarm__ will print a warning message like this: Markov chains did not converge! Do not analyze results! To illustrate how to check the Rhat values after fitting a model using __rstanarm__ we'll fit two models and run them for different numbers of iterations. ```{r, rstanarm-rhat-fit, results='hide', warning=TRUE, eval = TRUE} bad_rhat <- stan_glm(mpg ~ ., data = mtcars, iter = 20, chains = 2, seed = 12345) good_rhat <- update(bad_rhat, iter = 1000, chains = 2, seed = 12345) ``` Here the first model leads to the warning message about convergence but the second model does not. Indeed, we can see that many Rhat values are much bigger than 1 for the first model: ```{r, rstasnarm-rhat-bad, eval = TRUE} rhat <- summary(bad_rhat)[, "Rhat"] rhat[rhat > 1.1] ``` Since we didn't get a warning for the second model we shouldn't find any parameters with an Rhat far from 1: ```{r, rstasnarm-rhat-good, eval = TRUE} any(summary(good_rhat)[, "Rhat"] > 1.1) ``` Details on the computation of Rhat and some of its limitations can be found in [Stan Modeling Language User's Guide and Reference Manual](https://mc-stan.org/users/documentation/). ### Divergent transitions __Recommendation:__ increase the target acceptance rate `adapt_delta`.
Hamiltonian Monte Carlo (HMC), the MCMC algorithm used by [Stan](https://mc-stan.org), works by simulating the evolution of a Hamiltonian system. Stan uses a [symplectic integrator](https://en.wikipedia.org/wiki/Symplectic_integrator) to approximate the exact solution of the Hamiltonian dynamics. When the step size parameter is too large relative to the curvature of the log posterior this approximation can diverge and threaten the validity of the sampler. __rstanarm__ will print a warning if there are any divergent transitions after the warmup period, in which case the posterior sample may be biased. The recommended method is to increase the `adapt_delta` parameter -- target average proposal acceptance probability in the adaptation -- which will in turn reduce the step size. Each of the modeling functions accepts an `adapt_delta` argument, so to increase `adapt_delta` you can simply change the value from the default value to a value closer to $1$. To reduce the frequency with which users need to manually set `adapt_delta`, the default value depends on the prior distribution used (see `help("adapt_delta", package = "rstanarm")` for details). The downside to increasing the target acceptance rate -- and, as a consequence, decreasing the step size -- is that sampling will tend to be slower. Intuitively, this is because a smaller step size means that more steps are required to explore the posterior distribution. Since the validity of the estimates is not guaranteed if there are any post-warmup divergent transitions, the slower sampling is a minor cost. ### Maximum treedepth exceeded __Recommendation:__ increase the maximum allowed treedepth `max_treedepth`.
Configuring the No-U-Turn-Sampler (the variant of HMC used by Stan) involves putting a cap on the depth of the trees that it evaluates during each iteration. This is controlled through a maximum depth parameter `max_treedepth`. When the maximum allowed tree depth is reached it indicates that NUTS is terminating prematurely to avoid excessively long execution time. If __rstanarm__ prints a warning about transitions exceeding the maximum treedepth you should try increasing the `max_treedepth` parameter using the optional `control` argument. For example, to increase `max_treedepth` to 20 (the default used __rstanarm__ is 15) you can provide the argument `control = list(max_treedepth = 20)` to any of the __rstanarm__ modeling functions. If you do not see a warning about hitting the maximum treedepth (which is rare), then you do not need to worry. # Conclusion In this vignette, we have gone through the four steps of a Bayesian analysis. The first step --- specifying the posterior distribution --- varies considerably from one analysis to the next because the likelihood function employed differs depending on the nature of the outcome variable and our prior beliefs about the parameters in the model varies not only from situation to situation but from researcher to researcher. However, given a posterior distribution and given that this posterior distribution can be drawn from using the __rstanarm__ package, the remaining steps are conceptually similar across analyses. The key is to draw from the posterior predictive distribution of the outcome, which is the what the model predicts the outcome to be after having updated our beliefs about the unknown parameters with the observed data. Posterior predictive distributions can be used for model checking and for making inferences about how manipulations of the predictors would affect the outcome. Of course, all of this assumes that you have obtained draws from the posterior distribution faithfully. The functions in the __rstanarm__ package will throw warnings if there is evidence that the draws are tainted, and we have discussed some steps to remedy these problems. For the most part, the model-fitting functions in the __rstanarm__ package are unlikely to produce many such warnings, but they may appear in more complicated models. If the posterior distribution that you specify in the first step cannot be sampled from using the __rstanarm__ package, then it is often possible to create a hand-written program in the the Stan language so that the posterior distribution can be drawn from using the __rstan__ package. See the documentation for the __rstan__ package or https://mc-stan.org for more details about this more advanced usage of Stan. However, many relatively simple models can be fit using the __rstanarm__ package without writing any code in the Stan language, which is illustrated for each estimating function in the __rstanarm__ package in the other [vignettes](index.html). # References Betancourt, M. J., & Girolami, M. (2013). Hamiltonian Monte Carlo for hierarchical models. [arXiv preprint](https://arxiv.org/abs/1312.0906). Stan Development Team. (2015). _Stan modeling language user's guide and reference manual, Version 2.9.0_. https://mc-stan.org/documentation/. See the 'Hamiltonian Monte Carlo Sampling' chapter. Gelman, A., & Rubin, D. B. (1992). Inference from iterative simulation using multiple sequences. _Statistical Science_, 7(4), 457 -- 472. Gelman, A., & Shirley, K. (2011). Inference from simulations and monitoring convergence. In S. Brooks, A. Gelman, G. Jones, & X. Meng (Eds.), _Handbook of Markov chain Monte Carlo_. Boca Raton: Chapman & Hall/CRC. rstanarm/vignettes/priors.Rmd0000644000176200001440000003741014370470372016107 0ustar liggesusers--- title: "Prior Distributions for rstanarm Models" author: "Jonah Gabry and Ben Goodrich" date: "`r Sys.Date()`" output: html_vignette: toc: yes --- ```{r, child="children/SETTINGS-knitr.txt"} ``` ```{r, child="children/SETTINGS-gg.txt"} ``` # July 2020 Update As of July 2020 there are a few changes to prior distributions: * Except for in default priors, `autoscale` now defaults to `FALSE`. This means that when specifying custom priors you no longer need to manually set `autoscale=FALSE` every time you use a distribution. * There are minor changes to the default priors on the intercept and (non-hierarchical) regression coefficients. See **Default priors and scale adjustments** below. We recommend the new book [Regression and Other Stories](https://avehtari.github.io/ROS-Examples/), which discusses the background behind the default priors in **rstanarm** and also provides examples of specifying non-default priors. # Introduction This vignette provides an overview of how the specification of prior distributions works in the __rstanarm__ package. It is still a work in progress and more content will be added in future versions of __rstanarm__. Before reading this vignette it is important to first read the [How to Use the __rstanarm__ Package](rstanarm.html) vignette, which provides a general overview of the package. Every modeling function in __rstanarm__ offers a subset of the arguments in the table below which are used for specifying prior distributions for the model parameters.
| Argument | Used in | Applies to | | ------------- | ------------- | ------------- | | `prior_intercept` | All modeling functions except `stan_polr` and `stan_nlmer`| Model intercept, after centering predictors.| | `prior` | All modeling functions| Regression coefficients. Does _not_ include coefficients that vary by group in a multilevel model (see `prior_covariance`).| | `prior_aux` | `stan_glm`\*, `stan_glmer`\*, `stan_gamm4`, `stan_nlmer`| Auxiliary parameter, e.g. error SD (interpretation depends on the GLM).| | `prior_covariance` | `stan_glmer`\*, `stan_gamm4`, `stan_nlmer`| Covariance matrices in multilevel models with varying slopes and intercepts. See the [`stan_glmer` vignette](https://mc-stan.org/rstanarm/articles/glmer.html) for details on this prior.| \* `stan_glm` also implies `stan_glm.nb`. `stan_glmer` implies `stan_lmer` and `stan_glmer.nb`.
The `stan_polr`, `stan_betareg`, and `stan_gamm4` functions also provide additional arguments specific only to those models: | Argument | Used only in | Applies to | | ------------- | ------------- | ------------- | | `prior_smooth` | `stan_gamm4` | Prior for hyperparameters in GAMs (lower values yield less flexible smooth functions). | | `prior_counts` | `stan_polr` | Prior counts of an _ordinal_ outcome (when predictors at sample means). | | `prior_z` | `stan_betareg`| Coefficients in the model for `phi`.| | `prior_intercept_z` | `stan_betareg`| Intercept in the model for `phi`. | | `prior_phi` | `stan_betareg`| `phi`, if not modeled as function of predictors. |
To specify these arguments the user provides a call to one of the various available functions for specifying priors (e.g., `prior = normal(0, 1)`, `prior = cauchy(c(0, 1), c(1, 2.5))`). The documentation for these functions can be found at `help("priors")`. The __rstanarm__ documentation and the other [vignettes](index.html) provide many examples of using these arguments to specify priors and the documentation for these arguments on the help pages for the various __rstanarm__ modeling functions (e.g., `help("stan_glm")`) also explains which distributions can be used when specifying each of the prior-related arguments.
# Default (Weakly Informative) Prior Distributions With very few exceptions, the default priors in __rstanarm__ ---the priors used if the arguments in the tables above are untouched--- are _not_ flat priors. Rather, the defaults are intended to be _weakly informative_. That is, they are designed to provide moderate regularization and help stabilize computation. For many (if not most) applications the defaults will perform well, but this is not guaranteed (there are no default priors that make sense for every possible model specification). The way __rstanarm__ attempts to make priors weakly informative by default is to internally adjust the scales of the priors. How this works (and, importantly, how to turn it off) is explained below, but first we can look at the default priors in action by fitting a basic linear regression model with the `stan_glm` function. For specifying priors, the `stan_glm` function accepts the arguments `prior_intercept`, `prior`, and `prior_aux`. To use the default priors we just leave those arguments at their defaults (i.e., we don't specify them): ```{r, default-prior-1, results="hide"} library("rstanarm") default_prior_test <- stan_glm(mpg ~ wt + am, data = mtcars, chains = 1) ``` The `prior_summary` function provides a concise summary of the priors used: ```{r, default-prior-summary} prior_summary(default_prior_test) ``` ```{r, echo=FALSE} priors <- prior_summary(default_prior_test) fr2 <- function(x) format(round(x, 2), nsmall = 2) ``` Starting from the bottom up, we can see that: * __Auxiliary__: `sigma`, the error standard deviation, has a default prior that is $\mathsf{exponential}(1)$. However, as a result of the automatic rescaling, the actual scale used was 6.03. * __Coefficients__: By default the regression coefficients (in this case the coefficients on the `wt` and `am` variables) are treated as a priori independent with normal priors centered at 0 and with scale (standard deviation) $2.5$. Like for `sigma`, in order for the default to be weakly informative __rstanarm__ will adjust the scales of the priors on the coefficients. As a result, the prior scales actually used were 15.40 and 30.20. * __Intercept__: For the intercept, the default prior is normal with mean $0$ and standard deviation $2.5$, but in this case the standard deviation was adjusted to 15.07. There is also a note in parentheses informing you that the prior applies to the intercept after all predictors have been centered (a similar note can be found in the documentation of the `prior_intercept` argument). In many cases the value of $y$ when $x=0$ is not meaningful and it is easier to think about the value when $x = \bar{x}$. Therefore placing a prior on the intercept after centering the predictors typically makes it easier to specify a reasonable prior for the intercept. (Note: the user does _not_ need to manually center the predictors.) To disable the centering of the predictors, you need to omit the intercept from the model `formula` and include a column of ones as a predictor (which cannot be named `"(Intercept)"` in the `data.frame`). Then you can specify a prior "coefficient" for the column of ones. The next two subsections describe how the rescaling works and how to easily disable it if desired. ### Default priors and scale adjustments Automatic scale adjustments happen in two cases: 1. When the default priors are used. 2. When the user sets `autoscale=TRUE` when specifying their own prior (e.g., `normal(0, 3, autoscale=TRUE)`). See `help("priors")` for a list of distributions to see which have an `autoscale` argument. Here we describe how the default priors work for the intercept, regression coefficients, and (if applicable) auxiliary parameters. Autoscaling when not using default priors works analogously (if `autoscale=TRUE`). Assume we have outcome $y$ and predictors $x_1,\ldots,x_k$ and our model has linear predictor $$ \alpha + \beta_1 x_1 + \dots + \beta_K x_K. $$ #### Regression coefficients The default prior on regression coefficients $\beta_k$ is $$ \beta_k \sim \mathsf{Normal}(0, \, 2.5 \cdot s_y/s_x) $$ where $s_x = \text{sd}(x)$ and $$ s_y = \begin{cases} \text{sd}(y) & \text{if } \:\: {\tt family=gaussian(link)}, \\ 1 & \text{otherwise}. \end{cases} $$ This corresponds to `prior = normal(0, 2.5, autoscale = TRUE)` in **rstanarm** code. #### Intercept The intercept is assigned a prior indirectly. The `prior_intercept` argument refers to the intercept after all predictors have been centered (internally by **rstanarm**). That is, instead of placing the prior on the expected value of $y$ when $x=0$, we place a prior on the expected value of $y$ when $x = \bar{x}$. The default prior for this centered intercept, say $\alpha_c$, is $$ \alpha_c \sim \mathsf{Normal}(m_y, \, 2.5 \cdot s_y) $$ where $$ m_y = \begin{cases} \bar{y} & \text{if } \:\: {\tt family=gaussian(link="identity")}, \\ 0 & \text{otherwise} \end{cases} $$ and $s_y$ is the same as above (either 1 or $\text{sd(y)}$). #### Auxiliary parameters The default prior on the auxiliary parameter (residual standard deviation for Gaussian, shape for gamma, reciprocal dispersion for negative binomial, etc.) is an exponential distribution with rate $1/s_y$ $$ \text{aux} \sim \mathsf{Exponential}(1/s_y) $$ where $s_y$ is the same as above (either 1 or $\text{sd(y)}$). This corresponds to `prior_aux = exponential(1, autoscale=TRUE)` in **rstanarm** code. #### Note on data-based priors Because the scaling is based on the scales of the predictors (and possibly the outcome) these are technically data-dependent priors. However, since these priors are quite wide (and in most cases rather conservative), the amount of information used is weak and mainly takes into account the order of magnitude of the variables. This enables __rstanarm__ to offer defaults that are reasonable for many models. ### Disabling prior scale adjustments To disable automatic rescaling simply specify a prior other than the default. **rstanarm** versions up to and including version `2.19.3` used to require you to explicitly set the `autoscale` argument to `FALSE`, but now autoscaling only happens by default for the default priors. To use autoscaling with manually specified priors you have to set `autoscale = TRUE`. For example, this prior specification will not include any autoscaling: ```{r, no-autoscale, results="hide"} test_no_autoscale <- update( default_prior_test, prior = normal(0, 5), prior_intercept = student_t(4, 0, 10), prior_aux = cauchy(0, 3) ) ``` We can verify that the prior scales weren't adjusted by checking `prior_summary`: ```{r, no-autoscale-prior-summary} prior_summary(test_no_autoscale) ```
# How to Specify Flat Priors (and why you typically shouldn't) ### Uninformative is usually unwarranted and unrealistic (flat is frequently frivolous and fictional) When "non-informative" or "uninformative" is used in the context of prior distributions, it typically refers to a flat (uniform) distribution or a nearly flat distribution. Sometimes it may also be used to refer to the parameterization-invariant Jeffreys prior. Although __rstanarm__ does not prevent you from using very diffuse or flat priors, unless the data is very strong it is wise to avoid them. Rarely is it appropriate in any applied setting to use a prior that gives the same (or nearly the same) probability mass to values near zero as it gives values bigger than the age of the universe in nanoseconds. Even a much narrower prior than that, e.g., a normal distribution with $\sigma = 500$, will tend to put much more probability mass on unreasonable parameter values than reasonable ones. In fact, using the prior $\theta \sim \mathsf{Normal(\mu = 0, \sigma = 500)}$ implies some strange prior beliefs. For example, you believe a priori that $P(|\theta| < 250) < P(|\theta| > 250)$, which can easily be verified by doing the calculation with the normal CDF ```{r} p <- 1 - 2 * pnorm(-250, mean = 0, sd = 500) print(paste("Pr(-250 < theta < 250) =", round(p, 2))) ``` or via approximation with Monte Carlo draws: ```{r, fig.cap="_There is much more probability mass outside the interval (-250, 250)._"} theta <- rnorm(1e5, mean = 0, sd = 500) p_approx <- mean(abs(theta) < 250) print(paste("Pr(-250 < theta < 250) =", round(p_approx, 2))) d <- data.frame(theta, clr = abs(theta) > 250) library(ggplot2) ggplot(d, aes(x = theta, fill = clr)) + geom_histogram(binwidth = 5, show.legend = FALSE) + scale_y_continuous(name = "", labels = NULL, expand = c(0,0)) + scale_x_continuous(name = expression(theta), breaks = c(-1000, -250, 250, 1000)) ```
This will almost never correspond to the prior beliefs of a researcher about a parameter in a well-specified applied regression model and yet priors like $\theta \sim \mathsf{Normal(\mu = 0, \sigma = 500)}$ (and more extreme) remain quite popular. Even when you know very little, a flat or very wide prior will almost never be the best approximation to your beliefs about the parameters in your model that you can express using __rstanarm__ (or other software). _Some_ amount of prior information will be available. For example, even if there is nothing to suggest a priori that a particular coefficient will be positive or negative, there is almost always enough information to suggest that different orders of magnitude are not equally likely. Making use of this information when setting a prior scale parameter is simple ---one heuristic is to set the scale an order of magnitude bigger than you suspect it to be--- and has the added benefit of helping to stabilize computations. A more in-depth discussion of non-informative vs weakly informative priors is available in the case study [_How the Shape of a Weakly Informative Prior Affects Inferences_](https://mc-stan.org/users/documentation/case-studies/weakly_informative_shapes.html). ### Specifying flat priors __rstanarm__ will use flat priors if `NULL` is specified rather than a distribution. For example, to use a flat prior on regression coefficients you would specify `prior=NULL`: ```{r, flat-prior-1, echo=FALSE, results="hide"} flat_prior_test <- stan_glm(mpg ~ wt, data = mtcars, prior = NULL, iter = 10, chains = 1) ``` ```{r, flat-prior-2, eval=FALSE} flat_prior_test <- stan_glm(mpg ~ wt, data = mtcars, prior = NULL) ``` In this case we let __rstanarm__ use the default priors for the intercept and error standard deviation (we could change that if we wanted), but the coefficient on the `wt` variable will have a flat prior. To double check that indeed a flat prior was used for the coefficient on `wt` we can call `prior_summary`: ```{r, flat-prior-summary} prior_summary(flat_prior_test) ```
# Informative Prior Distributions Although the default priors tend to work well, prudent use of more informative priors is encouraged. For example, suppose we have a linear regression model $$y_i \sim \mathsf{Normal}\left(\alpha + \beta_1 x_{1,i} + \beta_2 x_{2,i}, \, \sigma\right)$$ and we have evidence (perhaps from previous research on the same topic) that approximately $\beta_1 \in (-15, -5)$ and $\beta_2 \in (-1, 1)$. An example of an informative prior for $\boldsymbol{\beta} = (\beta_1, \beta_2)'$ could be $$ \boldsymbol{\beta} \sim \mathsf{Normal} \left( \begin{pmatrix} -10 \\ 0 \end{pmatrix}, \begin{pmatrix} 5^2 & 0 \\ 0 & 2^2 \end{pmatrix} \right), $$ which sets the prior means at the midpoints of the intervals and then allows for some wiggle room on either side. If the data are highly informative about the parameter values (enough to overwhelm the prior) then this prior will yield similar results to a non-informative prior. But as the amount of data and/or the signal-to-noise ratio decrease, using a more informative prior becomes increasingly important. If the variables `y`, `x1`, and `x2` are in the data frame `dat` then this model can be specified as ```{r, eval=FALSE} my_prior <- normal(location = c(-10, 0), scale = c(5, 2)) stan_glm(y ~ x1 + x2, data = dat, prior = my_prior) ``` We left the priors for the intercept and error standard deviation at their defaults, but informative priors can be specified for those parameters in an analogous manner. rstanarm/R/0000755000176200001440000000000014551535215012310 5ustar liggesusersrstanarm/R/stan_aov.R0000644000176200001440000001717513722762571014266 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University # Copyright (C) 1995-2015 The R Core Team # Copyright (C) 1998 B. D. Ripley # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #' @rdname stan_lm #' @export #' @param projections For \code{stan_aov}, a logical scalar (defaulting to #' \code{FALSE}) indicating whether \code{\link[stats]{proj}} should be called #' on the fit. #' @examples #' if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { #' \donttest{ #' op <- options(contrasts = c("contr.helmert", "contr.poly")) #' fit_aov <- stan_aov(yield ~ block + N*P*K, data = npk, #' prior = R2(0.5), seed = 12345) #' options(op) #' print(fit_aov) #' } #' } stan_aov <- function(formula, data, projections = FALSE, contrasts = NULL, ..., prior = R2(stop("'location' must be specified")), prior_PD = FALSE, algorithm = c("sampling", "meanfield", "fullrank"), adapt_delta = NULL) { # parse like aov() does Terms <- if (missing(data)) terms(formula, "Error") else terms(formula, "Error", data = data) indError <- attr(Terms, "specials")$Error ## NB: this is only used for n > 1, so singular form makes no sense ## in English. But some languages have multiple plurals. if(length(indError) > 1L) stop(sprintf(ngettext(length(indError), "there are %d Error terms: only 1 is allowed", "there are %d Error terms: only 1 is allowed"), length(indError)), domain = NA) lmcall <- Call <- match.call() ## need rstanarm:: for non-standard evaluation lmcall[[1L]] <- quote(stan_lm) lmcall$singular.ok <- TRUE if (projections) qr <- lmcall$qr <- TRUE lmcall$projections <- NULL if (is.null(indError)) { ## no Error term fit <- eval(lmcall, parent.frame()) fit$terms <- Terms fit$qr <- qr(model.matrix(Terms, data = fit$data, contrasts.arg = contrasts)) R <- qr.R(fit$qr) beta <- extract(fit$stanfit, pars = "beta", permuted = FALSE) pnames <- dimnames(beta)$parameters rownames(R) <- colnames(R) R <- R[pnames, pnames, drop = FALSE] effects <- apply(beta, 1:2, FUN = function(x) R %*% x) if (length(dim(effects)) == 2) { dim(effects) <- c(1L, dim(effects)) } effects <- aperm(effects, c(2,3,1)) fit$effects <- effects class(fit) <- c("stanreg", "aov", "lm") if (projections) fit$projections <- proj(fit) fit$call <- Call fit$stan_function <- "stan_aov" return(fit) } else { # nocov start stop("Error terms not supported yet") if(pmatch("weights", names(match.call()), 0L)) stop("weights are not supported in a multistratum aov() fit") ## Helmert contrasts can be helpful: do we want to force them? ## this version does for the Error model. opcons <- options("contrasts") options(contrasts = c("contr.helmert", "contr.poly")) on.exit(options(opcons)) allTerms <- Terms errorterm <- attr(Terms, "variables")[[1 + indError]] eTerm <- deparse(errorterm[[2L]], width.cutoff = 500L, backtick = TRUE) intercept <- attr(Terms, "intercept") ecall <- lmcall ecall$formula <- as.formula(paste(deparse(formula[[2L]], width.cutoff = 500L, backtick = TRUE), "~", eTerm, if(!intercept) "- 1"), env = environment(formula)) ecall$method <- "qr" ecall$qr <- TRUE ecall$contrasts <- NULL er.fit <- eval(ecall, parent.frame()) options(opcons) nmstrata <- attr(terms(er.fit), "term.labels") ## remove backticks from simple labels for strata (only) nmstrata <- sub("^`(.*)`$", "\\1", nmstrata) nmstrata <- c("(Intercept)", nmstrata) qr.e <- er.fit$qr rank.e <- er.fit$rank if(rank.e < NROW(er.fit$coefficients)) warning("Error() model is singular") qty <- er.fit$residuals maov <- is.matrix(qty) asgn.e <- er.fit$assign[qr.e$pivot[1L:rank.e]] ## we want this to label the rows of qtx, not cols of x. maxasgn <- length(nmstrata) - 1L nobs <- NROW(qty) len <- if(nobs > rank.e) { asgn.e[(rank.e+1):nobs] <- maxasgn + 1L nmstrata <- c(nmstrata, "Within") maxasgn + 2L } else maxasgn + 1L result <- setNames(vector("list", len), nmstrata) lmcall$formula <- form <- update(formula, paste(". ~ .-", deparse(errorterm, width.cutoff = 500L, backtick = TRUE))) Terms <- terms(form) lmcall$method <- "model.frame" mf <- eval(lmcall, parent.frame()) xlev <- .getXlevels(Terms, mf) resp <- model.response(mf) qtx <- model.matrix(Terms, mf, contrasts) cons <- attr(qtx, "contrasts") dnx <- colnames(qtx) asgn.t <- attr(qtx, "assign") if(length(wts <- model.weights(mf))) { wts <- sqrt(wts) resp <- resp * wts qtx <- qtx * wts } qty <- as.matrix(qr.qty(qr.e, resp)) if((nc <- ncol(qty)) > 1L) { dny <- colnames(resp) if(is.null(dny)) dny <- paste0("Y", 1L:nc) dimnames(qty) <- list(seq(nrow(qty)), dny) } else dimnames(qty) <- list(seq(nrow(qty)), NULL) qtx <- qr.qty(qr.e, qtx) dimnames(qtx) <- list(seq(nrow(qtx)) , dnx) for(i in seq_along(nmstrata)) { select <- asgn.e == (i-1L) ni <- sum(select) if(!ni) next ## helpful to drop constant columns. xi <- qtx[select, , drop = FALSE] cols <- colSums(xi^2) > 1e-5 if(any(cols)) { xi <- xi[, cols, drop = FALSE] attr(xi, "assign") <- asgn.t[cols] fiti <- lm.fit(xi, qty[select,,drop=FALSE]) fiti$terms <- Terms } else { y <- qty[select,,drop=FALSE] fiti <- list(coefficients = numeric(), residuals = y, fitted.values = 0 * y, weights = wts, rank = 0L, df.residual = NROW(y)) } if(projections) fiti$projections <- proj(fiti) class(fiti) <- c(if(maov) "maov", "aov", oldClass(er.fit)) result[[i]] <- fiti } ## drop empty strata result <- result[!sapply(result, is.null)] class(result) <- c("aovlist", "listof") if(qr) attr(result, "error.qr") <- qr.e attr(result, "call") <- Call if(length(wts)) attr(result, "weights") <- wts attr(result, "terms") <- allTerms attr(result, "contrasts") <- cons attr(result, "xlevels") <- xlev result } # nocov end } rstanarm/R/stan_biglm.R0000644000176200001440000001107414370470372014556 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #' Bayesian regularized linear but big models via Stan #' #' \if{html}{\figure{stanlogo.png}{options: width="25" alt="https://mc-stan.org/about/logo/"}} #' This is the same model as with \code{\link{stan_lm}} but it utilizes the #' output from \code{\link[biglm]{biglm}} in the \pkg{biglm} package in order to #' proceed when the data is too large to fit in memory. #' #' @export #' @param biglm The list output by \code{\link[biglm]{biglm}} in the \pkg{biglm} #' package. #' @param xbar A numeric vector of column means in the implicit design matrix #' excluding the intercept for the observations included in the model. #' @param ybar A numeric scalar indicating the mean of the outcome for the #' observations included in the model. #' @param s_y A numeric scalar indicating the unbiased sample standard deviation #' of the outcome for the observations included in the model. #' @template args-dots #' @param prior Must be a call to \code{\link{R2}} with its \code{location} #' argument specified or \code{NULL}, which would indicate a standard uniform #' prior for the \eqn{R^2}. #' @inheritParams stan_lm #' @template args-prior_PD #' @template args-algorithm #' @template args-adapt_delta #' #' @details The \code{stan_biglm} function is intended to be used in the same #' circumstances as the \code{\link[biglm]{biglm}} function in the \pkg{biglm} #' package but with an informative prior on the \eqn{R^2} of the regression. #' Like \code{\link[biglm]{biglm}}, the memory required to estimate the model #' depends largely on the number of predictors rather than the number of #' observations. However, \code{stan_biglm} and \code{stan_biglm.fit} have #' additional required arguments that are not necessary in #' \code{\link[biglm]{biglm}}, namely \code{xbar}, \code{ybar}, and \code{s_y}. #' If any observations have any missing values on any of the predictors or the #' outcome, such observations do not contribute to these statistics. #' #' @return The output of both \code{stan_biglm} and \code{stan_biglm.fit} is an #' object of \code{\link[rstan]{stanfit-class}} rather than #' \code{\link{stanreg-objects}}, which is more limited and less convenient #' but necessitated by the fact that \code{stan_biglm} does not bring the full #' design matrix into memory. Without the full design matrix,some of the #' elements of a \code{\link{stanreg-objects}} object cannot be calculated, #' such as residuals. Thus, the functions in the \pkg{rstanarm} package that #' input \code{\link{stanreg-objects}}, such as #' \code{\link{posterior_predict}} cannot be used. #' stan_biglm <- function(biglm, xbar, ybar, s_y, ..., prior = R2(stop("'location' must be specified")), prior_intercept = NULL, prior_PD = FALSE, algorithm = c("sampling", "meanfield", "fullrank"), adapt_delta = NULL) { if (!requireNamespace("biglm", quietly = TRUE)) { stop("Please install the biglm package to use this function.") } if (!inherits(biglm, "biglm") || is.null(biglm$qr) || !inherits(biglm$qr, "bigqr") || is.null(biglm$terms)) { stop("'biglm' must be of S3 class biglm as defined by the biglm package.") } b <- coef(biglm) R <- diag(length(b)) R[upper.tri(R)] <- biglm$qr$rbar R <- sqrt(biglm$qr$D) * R if (identical(attr(biglm$terms, "intercept"), 1L)) { b <- b[-1] R <- R[-1,-1] has_intercept <- TRUE } else { has_intercept <- FALSE } stan_biglm.fit(b, R, SSR = biglm$qr$ss, N = biglm$n, xbar, ybar, s_y, has_intercept, ..., prior = prior, prior_intercept = prior_intercept, prior_PD = prior_PD, algorithm = match.arg(algorithm), adapt_delta = adapt_delta) } rstanarm/R/doc-adapt_delta.R0000644000176200001440000000515713537747601015457 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #' \code{adapt_delta}: Target average acceptance probability #' #' Details about the \code{adapt_delta} argument to \pkg{rstanarm}'s modeling #' functions. #' #' @name adapt_delta #' @template reference-stan-manual #' @references Brief Guide to Stan's Warnings: #' \url{https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup} #' #' #' @details For the No-U-Turn Sampler (NUTS), the variant of Hamiltonian Monte #' Carlo used used by \pkg{rstanarm}, \code{adapt_delta} is the target average #' proposal acceptance probability during Stan's adaptation period. #' \code{adapt_delta} is ignored by \pkg{rstanarm} if the \code{algorithm} argument #' is not set to \code{"sampling"}. #' #' The default value of \code{adapt_delta} is 0.95, except when the prior for #' the regression coefficients is \code{\link{R2}}, \code{\link{hs}}, or #' \code{\link{hs_plus}}, in which case the default is 0.99. #' #' These defaults are higher (more conservative) than the default of #' \code{adapt_delta=0.8} used in the \pkg{rstan} package, which may result in #' slower sampling speeds but will be more robust to posterior distributions #' with high curvature. #' #' In general you should not need to change \code{adapt_delta} unless you see #' a warning message about divergent transitions, in which case you can #' increase \code{adapt_delta} from the default to a value \emph{closer} to 1 #' (e.g. from 0.95 to 0.99, or from 0.99 to 0.999, etc). The step size used by #' the numerical integrator is a function of \code{adapt_delta} in that #' increasing \code{adapt_delta} will result in a smaller step size and fewer #' divergences. Increasing \code{adapt_delta} will typically result in a #' slower sampler, but it will always lead to a more robust sampler. NULL rstanarm/R/doc-datasets.R0000644000176200001440000001675714551535215015026 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # #' Datasets for rstanarm examples #' #' Small datasets for use in \pkg{rstanarm} examples and vignettes. #' #' @name rstanarm-datasets #' @aliases kidiq roaches wells bball1970 bball2006 mortality tumors radon pbcLong pbcSurv #' @format #' \describe{ #' \item{\code{bball1970}}{ #' Data on hits and at-bats from the 1970 Major League Baseball season for 18 #' players. #' #' Source: Efron and Morris (1975). #' #' 18 obs. of 5 variables #' \itemize{ #' \item \code{Player} Player's last name #' \item \code{Hits} Number of hits in the first 45 at-bats of the season #' \item \code{AB} Number of at-bats (45 for all players) #' \item \code{RemainingAB} Number of remaining at-bats (different for most players) #' \item \code{RemainingHits} Number of remaining hits #' } #' } #' \item{\code{bball2006}}{ #' Hits and at-bats for the entire 2006 American League season of Major League #' Baseball. #' #' Source: Carpenter (2009) #' #' 302 obs. of 2 variables #' \itemize{ #' \item \code{y} Number of hits #' \item \code{K} Number of at-bats #' } #' } #' \item{\code{kidiq}}{ #' Data from a survey of adult American women and their children #' (a subsample from the National Longitudinal Survey of Youth). #' #' Source: Gelman and Hill (2007) #' #' 434 obs. of 4 variables #' \itemize{ #' \item \code{kid_score} Child's IQ score #' \item \code{mom_hs} Indicator for whether the mother has a high school degree #' \item \code{mom_iq} Mother's IQ score #' \item \code{mom_age} Mother's age #' } #' } #' \item{\code{mortality}}{ #' Surgical mortality rates in 12 hospitals performing cardiac surgery #' in babies. #' #' Source: Spiegelhalter et al. (1996). #' #' 12 obs. of 2 variables #' \itemize{ #' \item \code{y} Number of deaths #' \item \code{K} Number of surgeries #' } #' } #' \item{\code{pbcLong,pbcSurv}}{ #' Longitudinal biomarker and time-to-event survival data for 40 patients #' with primary biliary cirrhosis who participated in a randomised #' placebo controlled trial of D-penicillamine conducted at the Mayo #' Clinic between 1974 and 1984. #' #' Source: Therneau and Grambsch (2000) #' #' 304 obs. of 8 variables (\code{pbcLong}) and 40 obs. of 7 variables (\code{pbcSurv}) #' \itemize{ #' \item \code{age} in years #' \item \code{albumin} serum albumin (g/dl) #' \item \code{logBili} logarithm of serum bilirubin #' \item \code{death} indicator of death at endpoint #' \item \code{futimeYears} time (in years) between baseline and #' the earliest of death, transplantion or censoring #' \item \code{id} numeric ID unique to each individual #' \item \code{platelet} platelet count #' \item \code{sex} gender (m = male, f = female) #' \item \code{status} status at endpoint (0 = censored, #' 1 = transplant, 2 = dead) #' \item \code{trt} binary treatment code (0 = placebo, 1 = #' D-penicillamine) #' \item \code{year} time (in years) of the longitudinal measurements, #' taken as time since baseline) #' } #' } #' #' \item{\code{radon}}{ #' Data on radon levels in houses in the state of Minnesota. #' #' Source: Gelman and Hill (2007) #' #' 919 obs. of 4 variables #' \itemize{ #' \item \code{log_radon} Radon measurement from the house (log scale) #' \item \code{log_uranium} Uranium level in the county (log scale) #' \item \code{floor} Indicator for radon measurement made on the first floor of #' the house (0 = basement, 1 = first floor) #' \item \code{county} County name (\code{\link{factor}}) #' } #' } #' \item{\code{roaches}}{ #' Data on the efficacy of a pest management system at reducing the number of #' roaches in urban apartments. #' #' Source: Gelman and Hill (2007) #' #' 262 obs. of 6 variables #' \itemize{ #' \item \code{y} Number of roaches caught #' \item \code{roach1} Pretreatment number of roaches #' \item \code{treatment} Treatment indicator #' \item \code{senior} Indicator for only elderly residents in building #' \item \code{exposure2} Number of days for which the roach traps were used #' } #' } #' \item{\code{tumors}}{ #' Tarone (1982) provides a data set of tumor incidence in historical #' control groups of rats; specifically endometrial stromal polyps in #' female lab rats of type F344. #' #' Source: Gelman and Hill (2007) #' #' 71 obs. of 2 variables #' \itemize{ #' \item \code{y} Number of rats with tumors #' \item \code{K} Number of rats #' } #' } #' \item{\code{wells}}{ #' A survey of 3200 residents in a small area of Bangladesh suffering from #' arsenic contamination of groundwater. Respondents with elevated arsenic #' levels in their wells had been encouraged to switch their water source to a #' safe public or private well in the nearby area and the survey was conducted #' several years later to learn which of the affected residents had switched #' wells. #' #' Souce: Gelman and Hill (2007) #' #' 3020 obs. of 5 variables #' \itemize{ #' \item \code{switch} Indicator for well-switching #' \item \code{arsenic} Arsenic level in respondent's well #' \item \code{dist} Distance (meters) from the respondent's house to the #' nearest well with safe drinking water. #' \item \code{assoc} Indicator for member(s) of household participate #' in community organizations #' \item \code{educ} Years of education (head of household) #' } #' } #' } #' #' @references #' Carpenter, B. (2009) Bayesian estimators for the beta-binomial model of #' batting ability. \url{https://web.archive.org/web/20220618114439/https://lingpipe-blog.com/2009/09/23/} #' #' Efron, B. and Morris, C. (1975) Data analysis using Stein's estimator and its #' generalizations. \emph{Journal of the American Statistical Association} #' \strong{70}(350), 311--319. #' #' @templateVar armRef \url{https://stat.columbia.edu/~gelman/arm/} #' @template reference-gelman-hill #' #' @references #' Spiegelhalter, D., Thomas, A., Best, N., & Gilks, W. (1996) BUGS 0.5 #' Examples. MRC Biostatistics Unit, Institute of Public health, Cambridge, UK. #' #' Tarone, R. E. (1982) The use of historical control information in testing for #' a trend in proportions. \emph{Biometrics} \strong{38}(1):215--220. #' #' Therneau, T. and Grambsch, P. (2000) \emph{Modeling Survival Data: Extending #' the Cox Model}. Springer-Verlag, New York, US. #' #' @examples #' if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { #' # Using 'kidiq' dataset #' fit <- stan_lm(kid_score ~ mom_hs * mom_iq, data = kidiq, #' prior = R2(location = 0.30, what = "mean"), #' # the next line is only to make the example go fast enough #' chains = 1, iter = 500, seed = 12345) #' pp_check(fit, nreps = 20) #' \donttest{ #' bayesplot::color_scheme_set("brightblue") #' pp_check(fit, plotfun = "stat_grouped", stat = "median", #' group = factor(kidiq$mom_hs, labels = c("No HS", "HS"))) #' } #' } NULL rstanarm/R/draws.R0000644000176200001440000000646214476664567013606 0ustar liggesusers#' Create a \code{draws} object from a \code{stanreg} object #' #' Convert a \code{stanreg} object to a format supported by the #' \pkg{\link[posterior:posterior-package]{posterior}} package. #' #' @name stanreg-draws-formats #' @aliases as_draws as_draws_matrix as_draws_array as_draws_df as_draws_rvars as_draws_list #' #' @param x A \code{stanreg} object returned by one of the \pkg{rstanarm} #' modeling functions. #' @param ... Arguments (e.g., \code{pars}, \code{regex_pars}) passed internally to #' \code{\link{as.matrix.stanreg}} or \code{as.array.stanreg}. #' #' @details To subset iterations, chains, or draws, use #' \code{\link[posterior:subset_draws]{subset_draws}} after making the #' \code{draws} object. To subset variables use \code{...} to pass the \code{pars} #' and/or \code{regex_pars} arguments to \code{as.matrix.stanreg} or #' \code{as.array.stanreg} (these are called internally by #' \code{as_draws.stanreg}), or use #' \code{\link[posterior:subset_draws]{subset_draws}} after making the #' \code{draws} object. #' #' @return A \code{draws} object from the #' \pkg{\link[posterior:posterior-package]{posterior}} package. See the #' \pkg{posterior} package documentation and vignettes for details on working #' with these objects. #' #' @examples #' fit <- stan_glm(mpg ~ wt + as.factor(cyl), data = mtcars) #' as_draws_matrix(fit) # matrix format combines all chains #' as_draws_df(fit, regex_pars = "cyl") #' posterior::summarize_draws(as_draws_array(fit)) #' NULL #' @rdname stanreg-draws-formats #' @importFrom posterior as_draws #' @method as_draws stanreg #' @export #' @export as_draws as_draws.stanreg <- function(x, ...) { as_draws_df(x, ...) } #' @rdname stanreg-draws-formats #' @importFrom posterior as_draws_matrix #' @method as_draws_matrix stanreg #' @export #' @export as_draws_matrix as_draws_matrix.stanreg <- function(x, ...) { posterior::as_draws_matrix( as.matrix.stanreg(x, ...) ) } #' @rdname stanreg-draws-formats #' @importFrom posterior as_draws_array #' @method as_draws_array stanreg #' @export #' @export as_draws_array as_draws_array.stanreg <- function(x, ...) { if (used.sampling(x)) { posterior::as_draws_array( as.array.stanreg(x, ...) ) } else { stop("For models not fit using MCMC use 'as_draws_matrix' instead of 'as_draws_array'", call. = FALSE) } } #' @rdname stanreg-draws-formats #' @importFrom posterior as_draws_df #' @method as_draws_df stanreg #' @export #' @export as_draws_df as_draws_df.stanreg <- function(x, ...) { posterior::as_draws_df( if (used.sampling(x)) { as.array.stanreg(x, ...) } else { as.matrix.stanreg(x, ...) } ) } #' @rdname stanreg-draws-formats #' @importFrom posterior as_draws_list #' @method as_draws_list stanreg #' @export #' @export as_draws_list as_draws_list.stanreg <- function(x, ...) { posterior::as_draws_list( if (used.sampling(x)) { as.array.stanreg(x, ...) } else { as.matrix.stanreg(x, ...) } ) } #' @rdname stanreg-draws-formats #' @importFrom posterior as_draws_rvars #' @method as_draws_rvars stanreg #' @export #' @export as_draws_rvars as_draws_rvars.stanreg <- function(x, ...) { posterior::as_draws_rvars( if (used.sampling(x)) { as.array.stanreg(x, ...) } else { as.matrix.stanreg(x, ...) } ) } rstanarm/R/stan_glmer.R0000644000176200001440000002407314370470372014575 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #' Bayesian generalized linear models with group-specific terms via Stan #' #' \if{html}{\figure{stanlogo.png}{options: width="25" alt="https://mc-stan.org/about/logo/"}} #' Bayesian inference for GLMs with group-specific coefficients that have #' unknown covariance matrices with flexible priors. #' #' @export #' @templateVar armRef (Ch. 11-15) #' @templateVar fun stan_glmer, stan_lmer, stan_glmer.nb #' @templateVar pkg lme4 #' @templateVar pkgfun glmer #' @template return-stanreg-object #' @template see-also #' @template args-prior_intercept #' @template args-priors #' @template args-prior_aux #' @template args-prior_covariance #' @template args-prior_PD #' @template args-algorithm #' @template args-adapt_delta #' @template args-QR #' @template args-sparse #' @template reference-gelman-hill #' @template reference-muth #' #' @param formula,data Same as for \code{\link[lme4]{glmer}}. \emph{We #' strongly advise against omitting the \code{data} argument}. Unless #' \code{data} is specified (and is a data frame) many post-estimation #' functions (including \code{update}, \code{loo}, \code{kfold}) are not #' guaranteed to work properly. #' @param family Same as for \code{\link[lme4]{glmer}} except it is also #' possible to use \code{family=mgcv::betar} to estimate a Beta regression #' with \code{stan_glmer}. #' @param subset,weights,offset Same as \code{\link[stats]{glm}}. #' @param na.action,contrasts Same as \code{\link[stats]{glm}}, but rarely #' specified. #' @param ... For \code{stan_glmer}, further arguments passed to #' \code{\link[rstan:stanmodel-method-sampling]{sampling}} (e.g. \code{iter}, \code{chains}, #' \code{cores}, etc.) or to \code{\link[rstan:stanmodel-method-vb]{vb}} (if \code{algorithm} is #' \code{"meanfield"} or \code{"fullrank"}). For \code{stan_lmer} and #' \code{stan_glmer.nb}, \code{...} should also contain all relevant arguments #' to pass to \code{stan_glmer} (except \code{family}). #' #' @details The \code{stan_glmer} function is similar in syntax to #' \code{\link[lme4]{glmer}} but rather than performing (restricted) maximum #' likelihood estimation of generalized linear models, Bayesian estimation is #' performed via MCMC. The Bayesian model adds priors on the #' regression coefficients (in the same way as \code{\link{stan_glm}}) and #' priors on the terms of a decomposition of the covariance matrices of the #' group-specific parameters. See \code{\link{priors}} for more information #' about the priors. #' #' The \code{stan_lmer} function is equivalent to \code{stan_glmer} with #' \code{family = gaussian(link = "identity")}. #' #' The \code{stan_glmer.nb} function, which takes the extra argument #' \code{link}, is a wrapper for \code{stan_glmer} with \code{family = #' \link{neg_binomial_2}(link)}. #' #' @return A list with classes \code{stanreg}, \code{glm}, \code{lm}, #' and \code{lmerMod}. The conventions for the parameter names are the #' same as in the lme4 package with the addition that the standard #' deviation of the errors is called \code{sigma} and the variance-covariance #' matrix of the group-specific deviations from the common parameters is #' called \code{Sigma}, even if this variance-covariance matrix only has #' one row and one column (in which case it is just the group-level variance). #' #' #' @seealso The vignette for \code{stan_glmer} and the \emph{Hierarchical #' Partial Pooling} vignette. \url{https://mc-stan.org/rstanarm/articles/} #' #' @examples #' if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { #' # see help(example_model) for details on the model below #' if (!exists("example_model")) example(example_model) #' print(example_model, digits = 1) #' } #' @importFrom lme4 glFormula #' @importFrom Matrix Matrix t stan_glmer <- function(formula, data = NULL, family = gaussian, subset, weights, na.action = getOption("na.action", "na.omit"), offset, contrasts = NULL, ..., prior = default_prior_coef(family), prior_intercept = default_prior_intercept(family), prior_aux = exponential(autoscale=TRUE), prior_covariance = decov(), prior_PD = FALSE, algorithm = c("sampling", "meanfield", "fullrank"), adapt_delta = NULL, QR = FALSE, sparse = FALSE) { call <- match.call(expand.dots = TRUE) mc <- match.call(expand.dots = FALSE) data <- validate_data(data) #, if_missing = environment(formula)) family <- validate_family(family) mc[[1]] <- quote(lme4::glFormula) mc$control <- make_glmerControl( ignore_lhs = prior_PD, ignore_x_scale = prior$autoscale %ORifNULL% FALSE ) mc$data <- data mc$prior <- mc$prior_intercept <- mc$prior_covariance <- mc$prior_aux <- mc$prior_PD <- mc$algorithm <- mc$scale <- mc$concentration <- mc$shape <- mc$adapt_delta <- mc$... <- mc$QR <- mc$sparse <- NULL glmod <- eval(mc, parent.frame()) X <- glmod$X if ("b" %in% colnames(X)) { stop("stan_glmer does not allow the name 'b' for predictor variables.", call. = FALSE) } if (prior_PD && !has_outcome_variable(formula)) { y <- NULL } else { y <- glmod$fr[, as.character(glmod$formula[2L])] if (is.matrix(y) && ncol(y) == 1L) { y <- as.vector(y) } } offset <- model.offset(glmod$fr) %ORifNULL% double(0) weights <- validate_weights(as.vector(model.weights(glmod$fr))) if (binom_y_prop(y, family, weights)) { y1 <- as.integer(as.vector(y) * weights) y <- cbind(y1, y0 = weights - y1) weights <- double(0) } if (is.null(prior_covariance)) stop("'prior_covariance' can't be NULL.", call. = FALSE) group <- glmod$reTrms group$decov <- prior_covariance algorithm <- match.arg(algorithm) stanfit <- stan_glm.fit(x = X, y = y, weights = weights, offset = offset, family = family, prior = prior, prior_intercept = prior_intercept, prior_aux = prior_aux, prior_PD = prior_PD, algorithm = algorithm, adapt_delta = adapt_delta, group = group, QR = QR, sparse = sparse, mean_PPD = !prior_PD, ...) add_classes <- "lmerMod" # additional classes to eventually add to stanreg object if (family$family == "Beta regression") { add_classes <- c(add_classes, "betareg") family$family <- "beta" } sel <- apply(X, 2L, function(x) !all(x == 1) && length(unique(x)) < 2) X <- X[ , !sel, drop = FALSE] Z <- pad_reTrms(Ztlist = group$Ztlist, cnms = group$cnms, flist = group$flist)$Z colnames(Z) <- b_names(names(stanfit), value = TRUE) fit <- nlist(stanfit, family, formula, offset, weights, x = cbind(X, Z), y = y, data, call, terms = NULL, model = NULL, na.action = attr(glmod$fr, "na.action"), contrasts, algorithm, glmod, stan_function = "stan_glmer") out <- stanreg(fit) class(out) <- c(class(out), add_classes) return(out) } #' @rdname stan_glmer #' @export stan_lmer <- function(formula, data = NULL, subset, weights, na.action = getOption("na.action", "na.omit"), offset, contrasts = NULL, ..., prior = default_prior_coef(family), prior_intercept = default_prior_intercept(family), prior_aux = exponential(autoscale=TRUE), prior_covariance = decov(), prior_PD = FALSE, algorithm = c("sampling", "meanfield", "fullrank"), adapt_delta = NULL, QR = FALSE) { if ("family" %in% names(list(...))) { stop( "'family' should not be specified. ", "To specify a family use stan_glmer instead of stan_lmer." ) } mc <- call <- match.call(expand.dots = TRUE) if (!"formula" %in% names(call)) names(call)[2L] <- "formula" mc[[1L]] <- quote(stan_glmer) mc$REML <- NULL mc$family <- "gaussian" out <- eval(mc, parent.frame()) out$call <- call out$stan_function <- "stan_lmer" return(out) } #' @rdname stan_glmer #' @export #' @param link For \code{stan_glmer.nb} only, the link function to use. See #' \code{\link{neg_binomial_2}}. #' stan_glmer.nb <- function(formula, data = NULL, subset, weights, na.action = getOption("na.action", "na.omit"), offset, contrasts = NULL, link = "log", ..., prior = default_prior_coef(family), prior_intercept = default_prior_intercept(family), prior_aux = exponential(autoscale=TRUE), prior_covariance = decov(), prior_PD = FALSE, algorithm = c("sampling", "meanfield", "fullrank"), adapt_delta = NULL, QR = FALSE) { if ("family" %in% names(list(...))) stop("'family' should not be specified.") mc <- call <- match.call(expand.dots = TRUE) if (!"formula" %in% names(call)) names(call)[2L] <- "formula" mc[[1]] <- quote(stan_glmer) mc$REML <- mc$link <- NULL mc$family <- neg_binomial_2(link = link) out <- eval(mc, parent.frame()) out$call <- call out$stan_function <- "stan_glmer.nb" return(out) } rstanarm/R/predictive_interval.R0000644000176200001440000000776414370470372016514 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #' Predictive intervals #' #' For models fit using MCMC (\code{algorithm="sampling"}) or one of the #' variational approximations (\code{"meanfield"} or \code{"fullrank"}), the #' \code{predictive_interval} function computes Bayesian predictive intervals. #' The method for stanreg objects calls \code{\link{posterior_predict}} #' internally, whereas the method for matrices accepts the matrix returned by #' \code{posterior_predict} as input and can be used to avoid multiple calls to #' \code{posterior_predict}. #' #' @export #' @aliases predictive_interval #' #' @param object Either a fitted model object returned by one of the #' \pkg{rstanarm} modeling functions (a \link[=stanreg-objects]{stanreg #' object}) or, for the matrix method, a matrix of draws from the #' posterior predictive distribution returned by #' \code{\link{posterior_predict}}. #' @template args-dots-ignored #' @inheritParams posterior_interval.stanreg #' @param newdata,draws,fun,offset,re.form,seed Passed to #' \code{\link[=posterior_predict]{posterior_predict}}. #' #' @return A matrix with two columns and as many rows as are in \code{newdata}. #' If \code{newdata} is not provided then the matrix will have as many rows as #' the data used to fit the model. For a given value of \code{prob}, \eqn{p}, #' the columns correspond to the lower and upper \eqn{100p}\% central interval #' limits and have the names \eqn{100\alpha/2}\% and \eqn{100(1 - #' \alpha/2)}\%, where \eqn{\alpha = 1-p}. For example, if \code{prob=0.9} is #' specified (a \eqn{90}\% interval), then the column names will be #' \code{"5\%"} and \code{"95\%"}, respectively. #' #' @seealso \code{\link{predictive_error}}, \code{\link{posterior_predict}}, #' \code{\link{posterior_interval}} #' #' @examples #' if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { #' fit <- stan_glm(mpg ~ wt, data = mtcars, iter = 300) #' predictive_interval(fit) #' predictive_interval(fit, newdata = data.frame(wt = range(mtcars$wt)), #' prob = 0.5) #' #' # stanreg vs matrix methods #' preds <- posterior_predict(fit, seed = 123) #' all.equal( #' predictive_interval(fit, seed = 123), #' predictive_interval(preds) #' ) #' } predictive_interval.stanreg <- function(object, prob = 0.9, newdata = NULL, draws = NULL, re.form = NULL, fun = NULL, seed = NULL, offset = NULL, ...) { if (used.optimizing(object)) STOP_not_optimizing("posterior_interval") if (inherits(object, "polr")) stop("'predictive_interval' is not currently available for stan_polr.") ytilde <- posterior_predict( object, newdata = newdata, draws = draws, seed = seed, re.form = re.form, offset = offset, fun = fun ) predictive_interval(ytilde, prob = prob) } #' @rdname predictive_interval.stanreg #' @export predictive_interval.matrix <- function(object, prob = 0.9, ...) { NextMethod("predictive_interval") } #' @rdname predictive_interval.stanreg #' @export predictive_interval.ppd <- function(object, prob = 0.9, ...) { predictive_interval(unclass(object), prob = prob, ...) } rstanarm/R/doc-example_jm.R0000644000176200001440000000412013722762571015322 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2017 Sam Brilleman # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #' Example joint longitudinal and time-to-event model #' #' A model for use in the \pkg{rstanarm} examples related to \code{\link{stan_jm}}. #' #' @name example_jm #' @format Calling \code{example("example_jm")} will run the model in the #' Examples section, below, and the resulting stanmvreg object will then be #' available in the global environment. The \code{chains} and \code{iter} #' arguments are specified to make this example be small in size. In practice, #' we recommend that they be left unspecified in order to use the default #' values or increased if there are convergence problems. The \code{cores} #' argument is optional and on a multicore system, the user may well want #' to set that equal to the number of chains being executed. #' #' @examples #' # set.seed(123) #' if (.Platform$OS.type != "windows" || .Platform$r_arch !="i386") #' example_jm <- #' stan_jm(formulaLong = logBili ~ year + (1 | id), #' dataLong = pbcLong[1:101,], #' formulaEvent = survival::Surv(futimeYears, death) ~ sex + trt, #' dataEvent = pbcSurv[1:15,], #' time_var = "year", #' # this next line is only to keep the example small in size! #' chains = 1, seed = 12345, iter = 100, refresh = 0) #' #' NULL rstanarm/R/loo.R0000644000176200001440000006775214406606742013250 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #' Information criteria and cross-validation #' #' @description For models fit using MCMC, compute approximate leave-one-out #' cross-validation (LOO, LOOIC) or, less preferably, the Widely Applicable #' Information Criterion (WAIC) using the \pkg{\link[=loo-package]{loo}} #' package. (For \eqn{K}-fold cross-validation see \code{\link{kfold.stanreg}}.) #' Functions for model comparison, and model weighting/averaging are also #' provided. #' #' \strong{Note}: these functions are not guaranteed to work #' properly unless the \code{data} argument was specified when the model was #' fit. Also, as of \pkg{loo} version \code{2.0.0} the default number of cores #' is now only 1, but we recommend using as many (or close to as many) cores #' as possible by setting the \code{cores} argument or using #' \code{options(mc.cores = VALUE)} to set it for an entire session. #' #' @aliases loo #' @importFrom loo loo loo.function loo.matrix is.loo #' @export #' @method loo stanreg #' @template reference-loo #' @template reference-bayesvis #' #' @param x For \code{loo} and \code{waic}, a fitted model object returned by #' one of the rstanarm modeling functions. See \link{stanreg-objects}. #' #' For the \code{loo_model_weights} method, \code{x} should be a #' "stanreg_list" object, which is a list of fitted model objects created by #' \code{\link{stanreg_list}}. \code{loo_compare} also allows \code{x} to be a #' single stanreg object, with the remaining objects passed via \code{...}, or #' a single \code{stanreg_list} object. #' #' @param ... For \code{loo_compare.stanreg}, \code{...} can contain objects #' returned by the \code{loo}, \code{\link[=kfold.stanreg]{kfold}}, or #' \code{waic} method (see the \strong{Examples} section, below). #' #' For \code{loo_model_weights}, \code{...} should contain arguments (e.g. #' \code{method}) to pass to the default \code{\link[loo]{loo_model_weights}} #' method from the \pkg{loo} package. #' #' @param cores,save_psis Passed to \code{\link[loo]{loo}}. #' @param k_threshold Threshold for flagging estimates of the Pareto shape #' parameters \eqn{k} estimated by \code{loo}. See the \emph{How to proceed #' when \code{loo} gives warnings} section, below, for details. #' #' @return The structure of the objects returned by \code{loo} and \code{waic} #' methods are documented in detail in the \strong{Value} section in #' \code{\link[loo]{loo}} and \code{\link[loo]{waic}} (from the \pkg{loo} #' package). #' #' @section Approximate LOO CV: The \code{loo} method for stanreg objects #' provides an interface to the \pkg{\link[=loo-package]{loo}} package for #' approximate leave-one-out cross-validation (LOO). The LOO Information #' Criterion (LOOIC) has the same purpose as the Akaike Information Criterion #' (AIC) that is used by frequentists. Both are intended to estimate the #' expected log predictive density (ELPD) for a new dataset. However, the AIC #' ignores priors and assumes that the posterior distribution is multivariate #' normal, whereas the functions from the \pkg{loo} package do not make this #' distributional assumption and integrate over uncertainty in the parameters. #' This only assumes that any one observation can be omitted without having a #' major effect on the posterior distribution, which can be judged using the #' diagnostic plot provided by the \code{\link[loo:pareto-k-diagnostic]{plot.loo}} method and the #' warnings provided by the \code{\link[loo]{print.loo}} method (see the #' \emph{How to Use the rstanarm Package} vignette for an example of this #' process). #' #' \subsection{How to proceed when \code{loo} gives warnings (k_threshold)}{ #' The \code{k_threshold} argument to the \code{loo} method for \pkg{rstanarm} #' models is provided as a possible remedy when the diagnostics reveal #' problems stemming from the posterior's sensitivity to particular #' observations. Warnings about Pareto \eqn{k} estimates indicate observations #' for which the approximation to LOO is problematic (this is described in #' detail in Vehtari, Gelman, and Gabry (2017) and the #' \pkg{\link[=loo-package]{loo}} package documentation). The #' \code{k_threshold} argument can be used to set the \eqn{k} value above #' which an observation is flagged. If \code{k_threshold} is not \code{NULL} #' and there are \eqn{J} observations with \eqn{k} estimates above #' \code{k_threshold} then when \code{loo} is called it will refit the #' original model \eqn{J} times, each time leaving out one of the \eqn{J} #' problematic observations. The pointwise contributions of these observations #' to the total ELPD are then computed directly and substituted for the #' previous estimates from these \eqn{J} observations that are stored in the #' object created by \code{loo}. Another option to consider is K-fold #' cross-validation, which is documented on a separate page (see #' \code{\link[=kfold.stanreg]{kfold}}). #' #' \strong{Note}: in the warning messages issued by \code{loo} about large #' Pareto \eqn{k} estimates we recommend setting \code{k_threshold} to at #' least \eqn{0.7}. There is a theoretical reason, explained in Vehtari, #' Gelman, and Gabry (2017), for setting the threshold to the stricter value #' of \eqn{0.5}, but in practice they find that errors in the LOO #' approximation start to increase non-negligibly when \eqn{k > 0.7}. #' } #' #' @seealso #' \itemize{ #' \item The new \href{https://mc-stan.org/loo/articles/}{\pkg{loo} package vignettes} #' and various \href{https://mc-stan.org/rstanarm/articles/}{\pkg{rstanarm} vignettes} #' for more examples using \code{loo} and related functions with \pkg{rstanarm} models. #' \item \code{\link[loo]{pareto-k-diagnostic}} in the \pkg{loo} package for #' more on Pareto \eqn{k} diagnostics. #' \item \code{\link{log_lik.stanreg}} to directly access the pointwise #' log-likelihood matrix. #' } #' #' @examples #' if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { #' \donttest{ #' fit1 <- stan_glm(mpg ~ wt, data = mtcars, refresh = 0) #' fit2 <- stan_glm(mpg ~ wt + cyl, data = mtcars, refresh = 0) #' #' # (for bigger models use as many cores as possible) #' loo1 <- loo(fit1, cores = 1) #' print(loo1) #' loo2 <- loo(fit2, cores = 1) #' print(loo2) #' #' # when comparing models the loo objects can be passed to loo_compare #' # as individual arguments or as a list of loo objects #' loo_compare(loo1, loo2) #' loo_compare(list(loo1, loo2)) #' #' # if the fitted model objects contain a loo object in the component "loo" #' # then the model objects can be passed directly or as a stanreg_list #' fit1$loo <- loo1 #' fit2$loo <- loo2 #' loo_compare(fit1, fit2) #' #' # if the fitted model objects contain a loo object _and_ a waic or kfold #' # object, then the criterion argument determines which of them the comparison #' # is based on #' fit1$waic <- waic(fit1) #' fit2$waic <- waic(fit2) #' loo_compare(fit1, fit2, criterion = "waic") #' #' # the models can also be combined into a stanreg_list object, and more #' # informative model names can be provided to use when printing #' model_list <- stanreg_list(fit1, fit2, model_names = c("Fewer predictors", "More predictors")) #' loo_compare(model_list) #' #' fit3 <- stan_glm(mpg ~ disp * as.factor(cyl), data = mtcars, refresh = 0) #' loo3 <- loo(fit3, cores = 2, k_threshold = 0.7) #' loo_compare(loo1, loo2, loo3) #' #' # setting detail=TRUE will also print model formulas if used with #' # loo_compare.stanreg or loo_compare.stanreg_list #' fit3$loo <- loo3 #' model_list <- stanreg_list(fit1, fit2, fit3) #' loo_compare(model_list, detail=TRUE) #' #' # Computing model weights #' # #' # if the objects in model_list already have 'loo' components then those #' # will be used. otherwise loo will be computed for each model internally #' # (in which case the 'cores' argument may also be used and is passed to loo()) #' loo_model_weights(model_list) # defaults to method="stacking" #' loo_model_weights(model_list, method = "pseudobma") #' loo_model_weights(model_list, method = "pseudobma", BB = FALSE) #' #' # you can also pass precomputed loo objects directly to loo_model_weights #' loo_list <- list(A = loo1, B = loo2, C = loo3) # names optional (affects printing) #' loo_model_weights(loo_list) #' } #' } loo.stanreg <- function(x, ..., cores = getOption("mc.cores", 1), save_psis = FALSE, k_threshold = NULL) { if (model_has_weights(x)) recommend_exact_loo(reason = "model has weights") user_threshold <- !is.null(k_threshold) if (user_threshold) { validate_k_threshold(k_threshold) } else { k_threshold <- 0.7 } if (used.sampling(x)) # chain_id to pass to loo::relative_eff chain_id <- chain_id_for_loo(x) else { # ir_idx to pass to ... if (exists("ir_idx",x)) { ir_idx <- x$ir_idx } else if ("diagnostics" %in% names(x$stanfit@sim) & "ir_idx" %in% names(x$stanfit@sim$diagnostics)) { ir_idx <- x$stanfit@sim$diagnostics$ir_idx } else { stop("loo not available for models fit using algorithm='", x$algorithm, "' and importance_resampling=FALSE.", call. = FALSE) } } if (is.stanjm(x)) { ll <- log_lik(x) r_eff <- loo::relative_eff(exp(ll), chain_id = chain_id, cores = cores) loo_x <- suppressWarnings(loo.matrix( ll, r_eff = r_eff, cores = cores, save_psis = save_psis )) } else if (is.stanmvreg(x)) { M <- get_M(x) ll <- do.call("cbind", lapply(1:M, function(m) log_lik(x, m = m))) r_eff <- loo::relative_eff(exp(ll), chain_id = chain_id, cores = cores) loo_x <- suppressWarnings(loo.matrix( ll, r_eff = r_eff, cores = cores, save_psis = save_psis )) } else if (is_clogit(x)) { ll <- log_lik.stanreg(x) cons <- apply(ll,MARGIN = 2, FUN = function(y) sd(y) < 1e-15) if (any(cons)) { message( "The following strata were dropped from the ", "loo calculation because log-lik is constant: ", paste(which(cons), collapse = ", ") ) ll <- ll[,!cons, drop = FALSE] } r_eff <- loo::relative_eff(exp(ll), chain_id = chain_id, cores = cores) loo_x <- suppressWarnings(loo.matrix( ll, r_eff = r_eff, cores = cores, save_psis = save_psis )) } else { args <- ll_args(x) llfun <- ll_fun(x) likfun <- function(data_i, draws) { exp(llfun(data_i, draws)) } if (used.sampling(x)) { r_eff <- loo::relative_eff( # using function method x = likfun, chain_id = chain_id, data = args$data, draws = args$draws, cores = cores, ... ) } else { w_ir <- as.numeric(table(ir_idx))/length(ir_idx) ir_uidx <- which(!duplicated(ir_idx)) draws <- args$draws data <- args$data r_eff <- pmin(sapply(1:dim(data)[1], function(i) {lik_i <- likfun(data[i,], draws)[ir_uidx]; var(lik_i)/(sum(w_ir^2*(lik_i-mean(lik_i))^2))}),length(ir_uidx))/length(ir_idx) } loo_x <- suppressWarnings( loo.function( llfun, data = args$data, draws = args$draws, r_eff = r_eff, ..., cores = cores, save_psis = save_psis ) ) } bad_obs <- loo::pareto_k_ids(loo_x, k_threshold) n_bad <- length(bad_obs) out <- structure( loo_x, model_name = deparse(substitute(x)), discrete = is_discrete(x), yhash = hash_y(x), formula = loo_model_formula(x) ) if (!length(bad_obs)) { if (user_threshold) { message( "All pareto_k estimates below user-specified threshold of ", k_threshold, ". \nReturning loo object." ) } return(out) } if (!user_threshold) { if (n_bad > 10) { recommend_kfold(n_bad) } else { recommend_reloo(n_bad) } return(out) } reloo_out <- reloo(x, loo_x, obs = bad_obs) structure( reloo_out, model_name = attr(out, "model_name"), discrete = attr(out, "discrete"), yhash = attr(out, "yhash"), formula = loo_model_formula(x) ) } # WAIC # #' @rdname loo.stanreg #' @aliases waic #' @importFrom loo waic waic.function waic.matrix is.waic #' @export #' waic.stanreg <- function(x, ...) { if (!used.sampling(x)) STOP_sampling_only("waic") if (is.stanjm(x)) { out <- waic.matrix(log_lik(x)) } else if (is.stanmvreg(x)) { M <- get_M(x) ll <- do.call("cbind", lapply(1:M, function(m) log_lik(x, m = m))) out <- waic.matrix(ll) } else if (is_clogit(x)) { out <- waic.matrix(log_lik(x)) } else { args <- ll_args(x) out <- waic.function(ll_fun(x), data = args$data, draws = args$draws) } structure(out, class = c("waic", "loo"), model_name = deparse(substitute(x)), discrete = is_discrete(x), yhash = hash_y(x), formula = loo_model_formula(x)) } #' @rdname loo.stanreg #' @aliases loo_compare #' @importFrom loo loo_compare #' @export #' #' @param detail For \code{loo_compare.stanreg} and #' \code{loo_compare.stanreg_list}, if \code{TRUE} then extra information #' about each model (currently just the model formulas) will be printed with #' the output. #' @param criterion For \code{loo_compare.stanreg} and #' \code{loo_compare.stanreg_list}, should the comparison be based on LOO-CV #' (\code{criterion="loo"}), K-fold-CV (\code{criterion="kfold"}), or WAIC #' (\code{criterion="waic"}). The default is LOO-CV. See the \strong{Comparing #' models} and \strong{Examples} sections below. #' #' @return \code{loo_compare} returns a matrix with class 'compare.loo'. See the #' \strong{Comparing models} section below for more details. #' #' @section Comparing models: "loo" (or "waic" or "kfold") objects can be passed #' to the \code{\link[loo]{loo_compare}} function in the \pkg{loo} package to #' perform model comparison. \pkg{rstanarm} also provides a #' \code{loo_compare.stanreg} method that can be used if the "loo" (or "waic" #' or "kfold") object has been added to the fitted model object (see the #' \strong{Examples} section below for how to do this). This second method #' allows \pkg{rstanarm} to perform some extra checks that can't be done by #' the \pkg{loo} package itself (e.g., verifying that all models to be #' compared were fit using the same outcome variable). #' #' \code{loo_compare} will return a matrix with one row per model and columns #' containing the ELPD difference and the standard error of the difference. In #' the first row of the matrix will be the model with the largest ELPD #' (smallest LOOIC) and will contain zeros (there is no difference between #' this model and itself). For each of the remaining models the ELPD #' difference and SE are reported relative to the model with the best ELPD #' (the first row). See the \strong{Details} section at the #' \code{\link[loo]{loo_compare}} page in the \pkg{loo} package for more #' information. #' loo_compare.stanreg <- function(x, ..., criterion = c("loo", "kfold", "waic"), detail = FALSE) { criterion <- match.arg(criterion) dots <- list(...) fits <- c(list(x), dots) .loo_comparison(fits, criterion = criterion, detail = detail) } #' @rdname loo.stanreg #' @export loo_compare.stanreg_list <- function(x, ..., criterion = c("loo", "kfold", "waic"), detail = FALSE) { criterion <- match.arg(criterion) .loo_comparison(x, criterion = criterion, detail = detail) } .loo_comparison <- function(fits, criterion, detail = FALSE) { loos <- lapply(fits, "[[", criterion) if (any(sapply(loos, is.null))) { stop("Not all objects have a ", criterion," component.", call. = FALSE) } loos <- validate_loos(loos) comp <- loo::loo_compare(x = loos) if (!detail) { formulas <- NULL } else { formulas <- lapply(loos, attr, "formula") names(formulas) <- sapply(loos, attr, "model_name") } # Note : rows of comp are ordered by ELPD, but formulas are in same order as # as initial order of models when passed in by user structure( comp, class = c("compare_rstanarm_loos", class(comp)), formulas = formulas, criterion = criterion ) } #' @keywords internal #' @export #' @method print compare_rstanarm_loos print.compare_rstanarm_loos <- function(x, ...) { if (is.null(attr(x, "criterion"))) { criterion <- NA } else { criterion <- switch( attr(x, "criterion"), "loo" = "LOO-CV", "kfold" = "K-fold-CV", "waic" = "WAIC" ) } formulas <- attr(x, "formulas") if (is.null(formulas) && !is.na(criterion)) { cat("Model comparison based on", paste0(criterion, ":"), "\n") } else { cat("Model formulas: ") nms <- names(formulas) for (j in seq_len(NROW(x))) { cat("\n", paste0(nms[j], ": "), formula_string(formulas[[j]])) } if (!is.na(criterion)) { cat("\n\nModel comparison based on", paste0(criterion, ":"), "\n") } } xcopy <- x class(xcopy) <- "compare.loo" print(xcopy, ...) return(invisible(x)) } #' @rdname loo.stanreg #' @aliases loo_model_weights #' #' @importFrom loo loo_model_weights #' @export loo_model_weights #' #' @export #' #' #' @section Model weights: The \code{loo_model_weights} method can be used to #' compute model weights for a \code{"stanreg_list"} object, which is a list #' of fitted model objects made with \code{\link{stanreg_list}}. The end of #' the \strong{Examples} section has a demonstration. For details see the #' \code{\link[loo]{loo_model_weights}} documentation in the \pkg{loo} #' package. #' loo_model_weights.stanreg_list <- function(x, ..., cores = getOption("mc.cores", 1), k_threshold = NULL) { loos <- lapply(x, function(object) object[["loo"]]) no_loo <- sapply(loos, is.null) if (!any(no_loo)) { loo_list <- loos } else if (all(no_loo)) { message("Computing approximate LOO-CV (models do not already have 'loo' components). ") loo_list <- vector(mode = "list", length = length(x)) for (j in seq_along(x)) { loo_list[[j]] <- loo.stanreg(x[[j]], cores = cores, k_threshold = k_threshold) } } else { stop("Found some models with 'loo' components and some without, ", "but either all or none should have 'loo' components.") } wts <- loo::loo_model_weights.default(x = loo_list, ...) setNames(wts, names(x)) } # internal ---------------------------------------------------------------- validate_k_threshold <- function(k) { if (!is.numeric(k) || length(k) != 1) { stop("'k_threshold' must be a single numeric value.", call. = FALSE) } else if (k < 0) { stop("'k_threshold' < 0 not allowed.", call. = FALSE) } else if (k > 1) { warning( "Setting 'k_threshold' > 1 is not recommended.", "\nFor details see the PSIS-LOO section in help('loo-package', 'loo').", call. = FALSE ) } } recommend_kfold <- function(n) { warning( "Found ", n, " observations with a pareto_k > 0.7. ", "With this many problematic observations we recommend calling ", "'kfold' with argument 'K=10' to perform 10-fold cross-validation ", "rather than LOO.\n", call. = FALSE ) } recommend_reloo <- function(n) { warning( "Found ", n, " observation(s) with a pareto_k > 0.7. ", "We recommend calling 'loo' again with argument 'k_threshold = 0.7' ", "in order to calculate the ELPD without the assumption that ", "these observations are negligible. ", "This will refit the model ", n, " times to compute the ELPDs for the problematic observations directly.\n", call. = FALSE ) } recommend_exact_loo <- function(reason) { stop( "'loo' is not supported if ", reason, ". ", "If refitting the model 'nobs(x)' times is feasible, ", "we recommend calling 'kfold' with K equal to the ", "total number of observations in the data to perform exact LOO-CV.\n", call. = FALSE ) } # Refit model leaving out specific observations # # @param x stanreg object # @param loo_x the result of loo(x) # @param obs vector of observation indexes. the model will be refit length(obs) # times, each time leaving out one of the observations specified in 'obs'. # @param ... unused currently # @param refit logical, to toggle whether refitting actually happens (only used # to avoid refitting in tests) # # @return A modified version of 'loo_x'. # @importFrom utils capture.output reloo <- function(x, loo_x, obs, ..., refit = TRUE) { if (is.stanmvreg(x)) STOP_if_stanmvreg("reloo") stopifnot(!is.null(x$data), is.loo(loo_x)) J <- length(obs) d <- kfold_and_reloo_data(x) lls <- vector("list", J) message( J, " problematic observation(s) found.", "\nModel will be refit ", J, " times." ) if (!refit) return(NULL) for (j in 1:J) { message( "\nFitting model ", j, " out of ", J, " (leaving out observation ", obs[j], ")" ) omitted <- obs[j] if (is_clogit(x)) { strata_id <- model.weights(model.frame(x)) omitted <- which(strata_id == strata_id[obs[j]]) } if (used.optimizing(x)) { fit_j_call <- update( x, data = d[-omitted, , drop = FALSE], subset = rep(TRUE, nrow(d) - length(omitted)), evaluate = FALSE ) } else { fit_j_call <- update( x, data = d[-omitted, , drop = FALSE], subset = rep(TRUE, nrow(d) - length(omitted)), evaluate = FALSE, refresh = 0, open_progress = FALSE ) } fit_j_call$subset <- eval(fit_j_call$subset) fit_j_call$data <- eval(fit_j_call$data) if (!is.null(getCall(x)$offset)) { fit_j_call$offset <- x$offset[-omitted] } capture.output( fit_j <- suppressWarnings(eval(fit_j_call)) ) lls[[j]] <- log_lik.stanreg( fit_j, newdata = d[omitted, , drop = FALSE], offset = x$offset[omitted], newx = get_x(x)[omitted, , drop = FALSE], newz = x[["z"]][omitted, , drop = FALSE], # NULL other than for some stan_betareg models stanmat = as.matrix.stanreg(fit_j) ) } # compute elpd_{loo,j} for each of the held out observations elpd_loo <- unlist(lapply(lls, log_mean_exp)) # compute \hat{lpd}_j for each of the held out observations (using log-lik # matrix from full posterior, not the leave-one-out posteriors) ll_x <- log_lik( object = x, newdata = d[obs,, drop=FALSE], offset = x$offset[obs] ) hat_lpd <- apply(ll_x, 2, log_mean_exp) # compute effective number of parameters p_loo <- hat_lpd - elpd_loo # replace parts of the loo object with these computed quantities sel <- c("elpd_loo", "p_loo", "looic") loo_x$pointwise[obs, sel] <- cbind(elpd_loo, p_loo, -2 * elpd_loo) loo_x$estimates[sel, "Estimate"] <- with(loo_x, colSums(pointwise[, sel])) loo_x$estimates[sel, "SE"] <- with(loo_x, { N <- nrow(pointwise) sqrt(N * apply(pointwise[, sel], 2, var)) }) loo_x$diagnostics$pareto_k[obs] <- NA return(loo_x) } log_sum_exp2 <- function(a,b) { m <- max(a,b) m + log(sum(exp(c(a,b) - m))) } # @param x numeric vector log_sum_exp <- function(x) { max_x <- max(x) max_x + log(sum(exp(x - max_x))) } # log_mean_exp (just log_sum_exp(x) - log(length(x))) log_mean_exp <- function(x) { log_sum_exp(x) - log(length(x)) } # Get correct data to use for kfold and reloo # # @param x stanreg object # @return data frame kfold_and_reloo_data <- function(x) { # either data frame or environment d <- x[["data"]] form <- formula(x) if (!inherits(form, "formula")) { # may be a string form <- as.formula(form, env = NULL) } sub <- getCall(x)[["subset"]] if (!is.null(sub)) { keep <- eval(substitute(sub), envir = d) } if (is.environment(d)) { # make data frame d <- get_all_vars(form, data = d) } else { # already a data frame all_vars <- all.vars(form) if (isTRUE(x$stan_function == "stan_gamm4")) { # see https://github.com/stan-dev/rstanarm/issues/435 all_vars <- c(all_vars, all.vars(getCall(x)[["random"]])) } if ("." %in% all_vars) { all_vars <- seq_len(ncol(d)) } d <- d[, all_vars, drop=FALSE] } if (!is.null(sub)) { d <- d[keep,, drop=FALSE] } d <- na.omit(d) if (is_clogit(x)) { strata_var <- as.character(getCall(x)$strata) d[[strata_var]] <- model.weights(model.frame(x)) } return(d) } # Calculate a SHA1 hash of y # @param x stanreg object # @param ... Passed to digest::sha1 # hash_y <- function(x, ...) { if (!requireNamespace("digest", quietly = TRUE)) stop("Please install the 'digest' package.") validate_stanreg_object(x) y <- get_y(x) attributes(y) <- NULL digest::sha1(x = y, ...) } # check if discrete or continuous # @param object stanreg object is_discrete <- function(object) { if (inherits(object, "polr")) return(TRUE) if (inherits(object, "stanmvreg")) { fams <- fetch(family(object), "family") res <- sapply(fams, function(x) is.binomial(x) || is.poisson(x) || is.nb(x)) return(res) } fam <- family(object)$family is.binomial(fam) || is.poisson(fam) || is.nb(fam) } # validate objects for model comparison validate_loos <- function(loos = list()) { if (utils::packageVersion("loo") <= "2.1.0") { # will be checked by loo in later versions yhash <- lapply(loos, attr, which = "yhash") yhash_check <- sapply(yhash, function(x) { isTRUE(all.equal(x, yhash[[1]])) }) if (!all(yhash_check)) { warning("Not all models have the same y variable.", call. = FALSE) } } discrete <- sapply(loos, attr, which = "discrete") if (!all(discrete == discrete[1])) { stop("Discrete and continuous observation models can't be compared.", call. = FALSE) } setNames(loos, nm = lapply(loos, attr, which = "model_name")) } # chain_id to pass to loo::relative_eff chain_id_for_loo <- function(object) { dims <- dim(object$stanfit)[1:2] n_iter <- dims[1] n_chain <- dims[2] rep(1:n_chain, each = n_iter) } # model formula to store in loo object # @param x stanreg object loo_model_formula <- function(x) { form <- try(formula(x), silent = TRUE) if (inherits(form, "try-error") || is.null(form)) { form <- "formula not found" } return(form) } # deprecated -------------------------------------------------------------- #' @rdname loo.stanreg #' @param loos a list of objects produced by the \code{\link{loo}} function #' @export compare_models <- function(..., loos = list(), detail = FALSE) { .Deprecated("loo_compare") dots <- list(...) if (length(dots) && length(loos)) { stop("'...' and 'loos' can't both be specified.", call. = FALSE) } else if (length(dots)) { loos <- dots } else { stopifnot(is.list(loos)) } loos <- validate_loos(loos) comp <- loo::compare(x = loos) structure( comp, class = c("compare_rstanarm_loos", class(comp)), model_names = names(loos), formulas = if (!detail) NULL else lapply(loos, attr, "formula") ) } rstanarm/R/predictive_error.R0000644000176200001440000002407414370470372016012 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #' In-sample or out-of-sample predictive errors #' #' This is a convenience function for computing \eqn{y - y^{rep}}{y - yrep} #' (in-sample, for observed \eqn{y}) or \eqn{y - \tilde{y}}{y - ytilde} #' (out-of-sample, for new or held-out \eqn{y}). The method for stanreg objects #' calls \code{\link{posterior_predict}} internally, whereas the method for #' matrices accepts the matrix returned by \code{posterior_predict} as input and #' can be used to avoid multiple calls to \code{posterior_predict}. #' #' @aliases predictive_error #' @export #' #' @param object Either a fitted model object returned by one of the #' \pkg{rstanarm} modeling functions (a \link[=stanreg-objects]{stanreg #' object}) or, for the matrix method, a matrix of draws from the #' posterior predictive distribution returned by #' \code{\link{posterior_predict}}. #' @param newdata,draws,seed,offset,re.form Optional arguments passed to #' \code{\link{posterior_predict}}. For binomial models, please see the #' \strong{Note} section below if \code{newdata} will be specified. #' @template args-dots-ignored #' #' @return A \code{draws} by \code{nrow(newdata)} matrix. If \code{newdata} is #' not specified then it will be \code{draws} by \code{nobs(object)}. #' #' @note The \strong{Note} section in \code{\link{posterior_predict}} about #' \code{newdata} for binomial models also applies for #' \code{predictive_error}, with one important difference. For #' \code{posterior_predict} if the left-hand side of the model formula is #' \code{cbind(successes, failures)} then the particular values of #' \code{successes} and \code{failures} in \code{newdata} don't matter, only #' that they add to the desired number of trials. \strong{This is not the case #' for} \code{predictive_error}. For \code{predictive_error} the particular #' value of \code{successes} matters because it is used as \eqn{y} when #' computing the error. #' #' @seealso \code{\link[=posterior_predict.stanreg]{posterior_predict}} to draw #' from the posterior predictive distribution without computing predictive #' errors. #' #' @examples #' if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { #' if (!exists("example_model")) example(example_model) #' err1 <- predictive_error(example_model, draws = 50) #' hist(err1) #' #' # Using newdata with a binomial model #' formula(example_model) #' nd <- data.frame( #' size = c(10, 20), #' incidence = c(5, 10), #' period = factor(c(1,2)), #' herd = c(1, 15) #' ) #' err2 <- predictive_error(example_model, newdata = nd, draws = 10, seed = 1234) #' #' # stanreg vs matrix methods #' fit <- stan_glm(mpg ~ wt, data = mtcars, iter = 300) #' preds <- posterior_predict(fit, seed = 123) #' all.equal( #' predictive_error(fit, seed = 123), #' predictive_error(preds, y = fit$y) #' ) #' } predictive_error.stanreg <- function(object, newdata = NULL, draws = NULL, re.form = NULL, seed = NULL, offset = NULL, ...) { if (used.optimizing(object)) STOP_not_optimizing("predictive_error") if (inherits(object, "polr")) stop("'predictive_error' is not currently available for stan_polr.") if ("y" %in% names(list(...))) stop("Argument 'y' should not be specified if 'object' is a stanreg object.") y <- if (is.null(newdata)) get_y(object) else eval(formula(object)[[2L]], newdata) fam <- family(object)$family if (is.binomial(fam) && NCOL(y) == 2) y <- y[, 1] ytilde <- posterior_predict( object, newdata = newdata, draws = draws, offset = offset, seed = seed, re.form = re.form ) predictive_error(ytilde, y = y) } #' @rdname predictive_error.stanreg #' @export #' @param y For the matrix method only, a vector of \eqn{y} values the #' same length as the number of columns in the matrix used as \code{object}. #' The method for stanreg objects takes \code{y} directly from the fitted #' model object. #' predictive_error.matrix <- function(object, y, ...) { NextMethod("predictive_error") } #' @rdname predictive_error.stanreg #' @export predictive_error.ppd <- function(object, y, ...) { predictive_error(unclass(object), y = y, ...) } # @rdname predictive_error.stanreg # @export # @param m For \code{stanmvreg} models, the submodel for which to calculate # the prediction error. Can be an integer, or for \code{\link{stan_mvmer}} # models it can be \code{"y1"}, \code{"y2"}, etc, or for \code{\link{stan_jm}} # models it can be \code{"Event"}, \code{"Long1"}, \code{"Long2"}, etc. # @param t,u Only relevant for \code{\link{stan_jm}} models and when \code{m = "Event"}. # The argument \code{t} specifies the time up to which individuals must have survived # as well as being the time up to which the longitudinal data in \code{newdata} # is available. The argument \code{u} specifies the time at which the # prediction error should be calculated (i.e. the time horizon). # predictive_error.stanmvreg <- function(object, newdataLong = NULL, newdataEvent = NULL, m = "Event", draws = NULL, re.form = NULL, seed = NULL, offset = NULL, t, u, lossfn = "square", ...) { stop("This function is not yet implemented for stanmvreg objects.") if ("y" %in% names(list(...))) stop("Argument 'y' should not be specified if 'object' is a stanmvreg object.") if (!is.jm(object)) stop("This function is currently only implemented for stan_jm models.") if (missing(t)) t <- NULL if (missing(u)) u <- NULL M <- get_M(object) if (m == "Event") { # prediction error for event submodel if (!is.surv(object)) stop("No event submodel was found in the fitted object.") if (is.null(t) || is.null(u)) stop("'t' and 'u' must be specified when calculating the ", "prediction error for the event submodel.") if (u <= t) stop("'u' must be greater than 't'.") # Construct prediction data # ndL: dataLong to be used in predictions # ndE: dataEvent to be used in predictions if (!identical(is.null(newdataLong), is.null(newdataEvent))) stop("Both newdataLong and newdataEvent must be supplied together.") if (is.null(newdataLong)) { # user did not specify newdata dats <- get_model_data(object) ndL <- dats[1:M] ndE <- dats[["Event"]] } else { # user specified newdata newdatas <- validate_newdatas(object, newdataLong, newdataEvent) ndL <- newdatas[1:M] ndE <- newdatas[["Event"]] } # Subset prediction data to only include # observations prior to time t fm_LHS <- formula(object, m = "Event")[[2L]] event_tvar <- as.character(fm_LHS[[length(fm_LHS) - 1L]]) sel <- which(ndE[[event_tvar]] > t) ndE <- ndE[sel, , drop = FALSE] ndL <- lapply(ndL, function(x) { sel <- which(x[[object$time_var]] > t) x[sel, , drop = FALSE] }) id_var <- object$id_var ids <- ndE[[id_var]] for (i in 1:length(ndL)) ids <- intersect(ndL[[i]][[id_var]], ids) if (!length(ids)) stop("No individuals still at risk at time 't' and ", "with longitudinal measurements prior to 't'.") ndE <- ndE[ndE[[id_var]] %in% ids, , drop = FALSE] ndL <- lapply(ndL, function(x) { x[x[[id_var]] %in% ids, , drop = FALSE] }) # Observed y: event status at time u event_dvar <- as.character(fm_LHS[[length(fm_LHS)]]) y <- ndE[, c(id_var, event_tvar, event_dvar), drop = FALSE] # Predicted y: conditional survival probability at time u ytilde <- posterior_survfit( object, newdataLong = ndL, newdataEvent = ndE, times = u, last_time = t, last_time2 = event_tvar, condition = TRUE, extrapolate = FALSE, draws = draws, seed = seed) ytilde <- ytilde[, c(id_var, "survpred", "survpred_eventtime"), drop = FALSE] y <- merge(y, ytilde, by = id_var) loss <- switch(lossfn, square = function(x) {x*x}, absolute = function(x) {abs(x)}) y$dummy <- as.integer(y[[event_tvar]] > u) y$status <- as.integer(y[[event_dvar]]) y$res <- y$dummy * loss(1 - y$survpred) + y$status * (1 - y$dummy) * loss(0 - y$survpred) + (1 - y$status) * (1 - y$dummy) * ( y$survpred_eventtime * loss(1- y$survpred) + (1 - y$survpred_eventtime) + loss(0- y$survpred) ) return(list(PE = mean(y$res), N = nrow(y))) } else { # prediction error for longitudinal submodel y <- if (is.null(newdataLong)) get_y(object, m = m) else eval(formula(object, m = m)[[2L]], newdataLong) fam <- family(object, m = m)$family if (is.binomial(fam) && NCOL(y) == 2) y <- y[, 1] ytilde <- posterior_predict( object, m = m, newdata = newdataLong, draws = draws, offset = offset, seed = seed, re.form = re.form ) return(predictive_error(ytilde, y = y)) } } rstanarm/R/zzz.R0000644000176200001440000000300214370470372013264 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. .onLoad <- function(libname, pkgname) { modules <- paste0("stan_fit4", names(stanmodels), "_mod") for (m in modules) loadModule(m, what = TRUE) } .onAttach <- function(...) { ver <- utils::packageVersion("rstanarm") packageStartupMessage("This is rstanarm version ", ver) packageStartupMessage("- See https://mc-stan.org/rstanarm/articles/priors for changes to default priors!") packageStartupMessage("- Default priors may change, so it's safest to specify priors, even if equivalent to the defaults.") packageStartupMessage("- For execution on a local, multicore CPU with excess RAM we recommend calling") packageStartupMessage(" options(mc.cores = parallel::detectCores())") } rstanarm/R/stanmvreg-methods.R0000644000176200001440000004742113722762571016120 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University # Copyright (C) 2016, 2017 Sam Brilleman # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #' Methods for stanmvreg objects #' #' S3 methods for \link[=stanreg-objects]{stanmvreg} objects. There are also #' several methods (listed in \strong{See Also}, below) with their own #' individual help pages. #' The main difference between these methods and the #' \link[=stanreg-methods]{stanreg} methods is that the methods described here #' generally include an additional argument \code{m} which allows the user to #' specify which submodel they wish to return the result for. If the argument #' \code{m} is set to \code{NULL} then the result will generally be a named list #' with each element of the list containing the result for one of the submodels. #' #' @name stanmvreg-methods #' #' @templateVar stanmvregArg object,x #' @templateVar mArg m #' @template args-stanmvreg-object #' @template args-m #' @template args-remove-stub #' @param ... Ignored, except by the \code{update} method. See #' \code{\link{update}}. #' #' @details Most of these methods are similar to the methods defined for objects #' of class 'lm', 'glm', 'glmer', etc. However there are a few exceptions: #' #' \describe{ #' \item{\code{coef}}{ #' Medians are used for point estimates. See the \emph{Point estimates} section #' in \code{\link{print.stanmvreg}} for more details. \code{coef} returns a list #' equal to the length of the number of submodels. The first #' elements of the list are the coefficients from each of the fitted longitudinal #' submodels and are the same layout as those returned by \code{coef} method of the #' \pkg{lme4} package, that is, the sum of the random and fixed effects coefficients #' for each explanatory variable for each level of each grouping factor. The final #' element of the returned list is a vector of fixed effect coefficients from the #' event submodel. #' } #' \item{\code{se}}{ #' The \code{se} function returns standard errors based on #' \code{\link{mad}}. See the \emph{Uncertainty estimates} section in #' \code{\link{print.stanmvreg}} for more details. #' } #' \item{\code{confint}}{ #' Not supplied, since the \code{\link{posterior_interval}} function should #' be used instead to compute Bayesian uncertainty intervals. #' } #' \item{\code{residuals}}{ #' Residuals are \emph{always} of type \code{"response"} (not \code{"deviance"} #' residuals or any other type). #' } #' } #' #' @seealso #' \itemize{ #' \item The \code{\link[=print.stanmvreg]{print}}, #' \code{\link[=summary.stanmvreg]{summary}}, and \code{\link{prior_summary}} #' methods for \code{stanmvreg} objects for information on the fitted model. #' \item The \code{\link[=plot.stanreg]{plot}} method to plot estimates and #' diagnostics. #' \item The \code{\link{pp_check}} method for graphical posterior predictive #' checking of the longitudinal or glmer submodels. #' \item The \code{\link{ps_check}} method for graphical posterior predictive #' checking of the event submodel. #' \item The \code{\link{posterior_traj}} for predictions for the longitudinal #' submodel (for models estimated using \code{\link{stan_jm}}), as well as #' it's associated \code{\link[=plot.predict.stanjm]{plot}} method. #' \item The \code{\link{posterior_survfit}} for predictions for the event #' submodel, including so-called "dynamic" predictions (for models estimated #' using \code{\link{stan_jm}}), as well as #' it's associated \code{\link[=plot.survfit.stanjm]{plot}} method. #' \item The \code{\link{posterior_predict}} for predictions for the glmer #' submodel (for models estimated using \code{\link{stan_mvmer}}). #' \item The \code{\link{posterior_interval}} for uncertainty intervals for #' model parameters. #' \item The \code{\link[=loo.stanreg]{loo}}, #' and \code{\link[=log_lik.stanmvreg]{log_lik}} methods for leave-one-out #' model comparison, and computing the log-likelihood of (possibly new) data. #' \item The \code{\link[=as.matrix.stanreg]{as.matrix}}, \code{as.data.frame}, #' and \code{as.array} methods to access posterior draws. #' } #' #' Other S3 methods for stanmvreg objects, which have separate documentation, #' including \code{\link{print.stanmvreg}}, and \code{\link{summary.stanmvreg}}. #' #' Also \code{\link{posterior_interval}} for an alternative to \code{confint}, #' and \code{posterior_predict}, \code{posterior_traj} and #' \code{posterior_survfit} for predictions based on the fitted joint model. #' NULL #' @rdname stanmvreg-methods #' @export #' coef.stanmvreg <- function(object, m = NULL, ...) { M <- get_M(object) if (length(list(...))) warning("Arguments named \"", paste(names(list(...)), collapse = ", "), "\" ignored.", call. = FALSE) fef <- lapply(fixef(object), function(x) data.frame(rbind(x), check.names = FALSE)) ref <- ranef(object) refnames <- lapply(ref, function(x) unlist(lapply(x, colnames))) missnames <- lapply(1:M, function(m) setdiff(refnames[[m]], names(fef[[m]]))) nmiss <- sapply(missnames, length) if (any(nmiss > 0)) for (x in 1:M) { if (nmiss[x] > 0) { fillvars <- setNames(data.frame(rbind(rep(0, nmiss[x]))), missnames[[x]]) fef[[x]] <- cbind(fillvars, fef[[x]]) } } val <- lapply(1:M, function(m) lapply(ref[[m]], function(x) fef[[m]][rep.int(1L, nrow(x)), , drop = FALSE])) for (x in 1:M) { # loop over number of markers for (i in seq(a = val[[x]])) { # loop over number of grouping factors refi <- ref[[x]][[i]] row.names(val[[x]][[i]]) <- row.names(refi) nmsi <- colnames(refi) if (!all(nmsi %in% names(fef[[x]]))) stop("Unable to align random and fixed effects.", call. = FALSE) for (nm in nmsi) val[[x]][[i]][[nm]] <- val[[x]][[i]][[nm]] + refi[, nm] } } val <- lapply(val, function(x) structure(x, class = "coef.mer")) if (is.jm(object)) val <- c(val, list(fixef(object)$Event)) if (is.null(m)) list_nms(val, M, stub = get_stub(object)) else val[[m]] } #' @rdname stanmvreg-methods #' @export #' fitted.stanmvreg <- function(object, m = NULL, ...) { stop("Not currently implemented.") M <- get_M(object) stub <- get_stub(object) if (is.null(m)) list_nms(object$fitted.values, M, stub = stub) else object$fitted.values[[m]] } #' @rdname stanmvreg-methods #' @export residuals.stanmvreg <- function(object, m = NULL, ...) { stop("Not currently implemented.") M <- get_M(object) stub <- get_stub(object) if (is.null(m)) list_nms(object$residuals, M, stub = stub) else object$residuals[[m]] } #' @rdname stanmvreg-methods #' @export se.stanmvreg <- function(object, m = NULL, ...) { stop("Not currently implemented.") M <- get_M(object) stub <- get_stub(object) if (is.null(m)) list_nms(object$ses, M, stub = stub) else object$ses[[m]] } #' @rdname stanmvreg-methods #' @export #' @param fixed.only A logical specifying whether to only retain the fixed effect #' part of the longitudinal submodel formulas #' @param random.only A logical specifying whether to only retain the random effect #' part of the longitudinal submodel formulas formula.stanmvreg <- function (x, fixed.only = FALSE, random.only = FALSE, m = NULL, ...) { if (missing(fixed.only) && random.only) fixed.only <- FALSE if (fixed.only && random.only) stop("'fixed.only' and 'random.only' can't both be TRUE.", call. = FALSE) M <- get_M(x) form <- x$formula if (is.null(form)) stop2("Could not find formula in stanmvreg object.") if (fixed.only) { for (i in 1:M) form[[i]][[length(form[[i]])]] <- lme4::nobars(form[[i]][[length(form[[i]])]]) } if (random.only) { for (i in 1:M) form[[i]] <- justRE(form[[i]], response = TRUE) } if (is.null(m)) return(list_nms(form, M, stub = get_stub(x))) else return(form[[m]]) } #' terms method for stanmvreg objects #' @export #' @keywords internal #' @templateVar mArg m #' @template args-m #' @param x,fixed.only,random.only,... See lme4:::terms.merMod. #' terms.stanmvreg <- function(x, fixed.only = TRUE, random.only = FALSE, m = NULL, ...) { if (!is.stanmvreg(x)) return(NextMethod("terms")) if (missing(fixed.only) && random.only) fixed.only <- FALSE if (fixed.only && random.only) stop("'fixed.only' and 'random.only' can't both be TRUE.", call. = FALSE) Terms <- list() if (is.mvmer(x)) { M <- get_M(x) mvmer_terms <- fetch(x$glmod, "terms") if (fixed.only) { Terms <- lapply(seq(M), function(i) { fe_form <- formula.stanmvreg(x, fixed.only = TRUE, m = i) tt <- terms.formula(fe_form) attr(tt, "predvars") <- attr(mvmer_terms[[i]], "predvars.fixed") tt }) } else if (random.only) { Terms <- lapply(seq(M), function(i) { re_form <- formula.stanmvreg(x, random.only = TRUE, m = i) tt <- terms.formula(lme4::subbars(re_form)) attr(tt, "predvars") <- attr(mvmer_terms[[i]], "predvars.random") tt }) } else { Terms[1:M] <- mvmer_terms } Terms <- list_nms(Terms, M, stub = get_stub(x)) } if (is.surv(x)) { Terms$Event <- terms(x$terms$Event) } if (is.null(m)) Terms else Terms[[m]] } #' @rdname stanmvreg-methods #' @export #' @method update stanmvreg #' @param formula. An updated formula for the model. For a multivariate model #' \code{formula.} should be a list of formulas, as described for the #' \code{formula} argument in \code{\link{stan_mvmer}}. #' @param evaluate See \code{\link[stats]{update}}. #' update.stanmvreg <- function(object, formula., ..., evaluate = TRUE) { call <- getCall(object) M <- get_M(object) if (is.null(call)) stop2("'object' does not contain a 'call' component.") extras <- match.call(expand.dots = FALSE)$... fm <- formula(object) if (!missing(formula.)) { if (M > 1) { if (!is.list(formula.)) stop2("To update the formula for a multivariate model ", "'formula.' should be a list of formula objects. Use ", "'~ .' if you do not wish to alter the formula for one or ", "more of the submodels.") if (length(formula.) != M) stop2(paste0("The list provided in 'formula.' appears to be the ", "incorrect length; should be length ", M)) } else { if (!is.list(formula.)) formula. <- list(formula.) } if (length(formula.) != M) stop2("The length of 'formula.' must be equal to the number of ", "glmer submodels in the original model, which was ", M, ".") fm_mvmer <- lapply(1:M, function(m) update.formula(fm[[m]], formula.[[m]])) names(fm_mvmer) <- NULL fm_mvmer <- as.call(c(quote(list), fm_mvmer)) call$formula <- fm_mvmer } if (length(extras)) { existing <- !is.na(match(names(extras), names(call))) for (a in names(extras)[existing]) call[[a]] <- extras[[a]] if (any(!existing)) { call <- c(as.list(call), extras[!existing]) call <- as.call(call) } } if (!evaluate) return(call) # do this like lme4 update.merMod instead of update.default ff <- environment(formula(object)) pf <- parent.frame() sf <- sys.frames()[[1L]] tryCatch(eval(call, envir = ff), error = function(e) { tryCatch(eval(call, envir = sf), error = function(e) { eval(call, pf) }) }) } #' @rdname stanmvreg-methods #' @export #' @method update stanjm #' @param formulaLong.,formulaEvent. An updated formula for the longitudinal #' or event submodel, when \code{object} was estimated using #' \code{\link{stan_jm}}. For a multivariate joint model \code{formulaLong.} #' should be a list of formulas, as described for the \code{formulaLong} #' argument in \code{\link{stan_jm}}. #' update.stanjm <- function(object, formulaLong., formulaEvent., ..., evaluate = TRUE) { call <- getCall(object) M <- get_M(object) if (is.null(call)) stop2("'object' does not contain a 'call' component.") if ("formula." %in% names(list(...))) stop2("'formula.' should not be specified for joint models. ", "Specify 'formulaLong.' and 'formulaEvent' instead.") extras <- match.call(expand.dots = FALSE)$... fm <- formula(object) if (!missing(formulaLong.)) { if (!is.jm(object)) stop("'formulaLong.' should only be specified for joint models estimated ", "using stan_jm. Specify 'formula.' instead.") if (M > 1) { if (!is.list(formulaLong.)) stop("To update the formula for a multivariate joint model ", "'formulaLong.' should be a list of formula objects. Use ", "'~ .' if you do not wish to alter the formula for one or ", "more of the longitudinal submodels.", call. = FALSE) if (length(formulaLong.) != M) stop(paste0("The list provided in 'formulaLong.' appears to be the ", "incorrect length; should be length ", M), call. = FALSE) } else { if (!is.list(formulaLong.)) formulaLong. <- list(formulaLong.) } if (length(formulaLong.) != M) stop2("The length of 'formulaLong.' must be equal to the number of ", "longitudinal submodels in the original model, which was ", M, ".") fm_long <- lapply(1:M, function(m) update.formula(fm[[m]], formulaLong.[[m]])) names(fm_long) <- NULL fm_long <- as.call(c(quote(list), fm_long)) call$formulaLong <- fm_long } if (!missing(formulaEvent.)) { if (!is.jm(object)) stop("'formulaEvent.' should only be specified for joint models estimated ", "using stan_jm.") call$formulaEvent <- update.formula(fm[[length(fm)]], formulaEvent.) } if (length(extras)) { existing <- !is.na(match(names(extras), names(call))) for (a in names(extras)[existing]) call[[a]] <- extras[[a]] if (any(!existing)) { call <- c(as.list(call), extras[!existing]) call <- as.call(call) } } if (!evaluate) return(call) # do this like lme4 update.merMod instead of update.default ff <- environment(formula(object)) pf <- parent.frame() sf <- sys.frames()[[1L]] tryCatch(eval(call, envir = ff), error = function(e) { tryCatch(eval(call, envir = sf), error = function(e) { eval(call, pf) }) }) } #' @rdname stanmvreg-methods #' @export #' @export fixef #' @importFrom lme4 fixef #' fixef.stanmvreg <- function(object, m = NULL, remove_stub = TRUE, ...) { M <- get_M(object) coefs <- object$coefficients coefs <- lapply(coefs, function(x) x[b_names(names(x), invert = TRUE)]) if (remove_stub) { for (i in 1:length(coefs)) names(coefs[[i]]) <- rm_stub(names(coefs[[i]])) } if (is.null(m)) list_nms(coefs, M, stub = get_stub(object)) else coefs[[m]] } #' @rdname stanmvreg-methods #' @export #' @export ngrps #' @importFrom lme4 ngrps #' ngrps.stanmvreg <- function(object, ...) { object$n_grps } #' @rdname stanmvreg-methods #' @export #' @export ranef #' @importFrom lme4 ranef #' ranef.stanmvreg <- function(object, m = NULL, ...) { M <- get_M(object) stub <- get_stub(object) all_names <- if (used.optimizing(object)) rownames(object$stan_summary) else object$stanfit@sim$fnames_oi ans_list <- lapply(1:M, function(x) { sel <- b_names_M(all_names, x, stub = stub) ans <- object$stan_summary[sel, select_median(object$algorithm)] # avoid returning the extra levels that were included ans <- ans[!grepl("_NEW_", names(ans), fixed = TRUE)] fl <- .flist(object, m = x) levs <- lapply(fl, levels) asgn <- attr(fl, "assign") cnms <- .cnms(object, m = x) nc <- vapply(cnms, length, 1L) nb <- nc * vapply(levs, length, 1L)[asgn] nbseq <- rep.int(seq_along(nb), nb) ml <- split(ans, nbseq) for (i in seq_along(ml)) { ml[[i]] <- matrix(ml[[i]], ncol = nc[i], byrow = TRUE, dimnames = list(NULL, cnms[[i]])) } ans <- lapply(seq_along(fl), function(i) { data.frame(do.call(cbind, ml[asgn == i]), row.names = levs[[i]], check.names = FALSE) }) names(ans) <- names(fl) class(ans) <- c("ranef.mer") ans }) if (is.null(m)) list_nms(ans_list, M, stub = get_stub(object)) else ans_list[[m]] } #' @rdname stanmvreg-methods #' @export #' @export sigma #' @rawNamespace if(getRversion()>='3.3.0') importFrom(stats, sigma) else #' importFrom(lme4,sigma) #' sigma.stanmvreg <- function(object, m = NULL, ...) { stub <- get_stub(object) if (is.null(m)) { nms <- paste0("^", stub, "[1-9]\\|sigma") } else if (is.numeric(m)) { nms <- paste0("^", stub, m, "\\|sigma") } else if (is.character(m)) { nms <- paste0(m, "\\|sigma") } else { stop("Invalid 'm' argument.") } sel <- sapply(nms, grep, rownames(object$stan_summary), value = TRUE) if (!length(sel)) return(1) sigma <- object$stan_summary[sel, select_median(object$algorithm)] new_nms <- gsub("\\|sigma", "", sel) names(sigma) <- new_nms return(sigma) } # Exported but doc kept internal ---------------------------------------------- #' family method for stanmvreg objects #' #' @keywords internal #' @export #' @templateVar mArg m #' @template args-m #' @param object,... See \code{\link[stats]{family}}. family.stanmvreg <- function(object, m = NULL, ...) { M <- get_M(object) stub <- get_stub(object) if (!is.null(m)) object$family[[m]] else list_nms(object$family, M , stub = stub) } #' model.frame method for stanmvreg objects #' #' @keywords internal #' @export #' @templateVar mArg m #' @template args-m #' @param formula,... See \code{\link[stats]{model.frame}}. #' @param fixed.only See \code{\link[lme4:merMod-class]{model.frame.merMod}}. #' model.frame.stanmvreg <- function(formula, fixed.only = FALSE, m = NULL, ...) { if (is.stanmvreg(formula)) { M <- get_M(formula) fr <- fetch(formula$glmod, "model_frame") if (fixed.only) { fr <- lapply(seq(M), function(i) { ff <- formula(formula, fixed.only = TRUE, m = i) vars <- rownames(attr(terms.formula(ff), "factors")) fr[[i]][vars] }) } fr$Event <- formula$survmod$model_frame if (is.null(m)) return(list_nms(fr, M, stub = get_stub(formula))) else return(fr[[m]]) } NextMethod("model.frame") } #' @rdname stanreg-methods #' @export nobs.stanmvreg <- function(object, ...) { nrow(model.frame(object, m = 1)) } # internal ---------------------------------------------------------------- .stanmvreg_check <- function(object) { if (!is.stanmvreg(object)) stop("This method is for stanmvreg objects only.", call. = FALSE) } .cnms.stanmvreg <- function(object, m = NULL, remove_stub = FALSE, ...) { .stanmvreg_check(object) cnms <- if (is.null(m)) object$cnms else object$glmod[[m]]$reTrms$cnms if (remove_stub) lapply(cnms, rm_stub) else cnms } .flist.stanmvreg <- function(object, m = NULL, ...) { .stanmvreg_check(object) if (is.null(m)) { stop("'m = NULL' cannot currently be handled by .flist.stanmvreg method.") } else as.list(fetch(object$glmod, "reTrms", "flist")[[m]]) } .p <- function(object) { .stanmvreg_check(object) sapply(object$cnms, length) } rstanarm/R/doc-example_model.R0000644000176200001440000000403313722762571016017 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #' Example model #' #' A model for use in \pkg{rstanarm} examples. #' #' @name example_model #' @format Calling \code{example("example_model")} will run the model in the #' Examples section, below, and the resulting stanreg object will then be #' available in the global environment. The \code{chains} and \code{iter} #' arguments are specified to make this example be small in size. In practice, #' we recommend that they be left unspecified in order to use the default #' values (4 and 2000 respectively) or increased if there are convergence #' problems. The \code{cores} argument is optional and on a multicore system, #' the user may well want to set that equal to the number of chains being #' executed. #' #' @seealso \code{\link[lme4]{cbpp}} for a description of the data. #' @examples #' if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { #' example_model <- #' stan_glmer(cbind(incidence, size - incidence) ~ size + period + (1|herd), #' data = lme4::cbpp, family = binomial, QR = TRUE, #' # this next line is only to keep the example small in size! #' chains = 2, cores = 1, seed = 12345, iter = 1000, refresh = 0) #' example_model #' } NULL rstanarm/R/stanmvreg.R0000644000176200001440000001211614370470372014443 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University # Copyright (C) 2016, 2017 Sam Brilleman # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # Function to create a stanmvreg object (fitted model object) # # @param object A list returned by a call to any of: stan_jm, stan_mvmer # @return A stanmvreg object # stanmvreg <- function(object) { opt <- object$algorithm == "optimizing" stanfit <- object$stanfit M <- object$M is_mvmer <- is.mvmer(object) is_surv <- is.surv(object) is_jm <- is.jm(object) stub <- if (is_jm) "Long" else "y" if (opt) { stop("Optimisation not implemented for stanmvreg objects.") } else { stan_summary <- make_stan_summary(stanfit) nms <- collect_nms(rownames(stan_summary), M, stub = get_stub(object)) coefs <- list() ses <- list() # Coefs and SEs for longitudinal submodel(s) if (is_mvmer) { y_coefs <- lapply(1:M, function(m) stan_summary[c(nms$y[[m]], nms$y_b[[m]]), select_median(object$algorithm)]) y_stanmat <- lapply(1:M, function(m) as.matrix(stanfit)[, c(nms$y[[m]], nms$y_b[[m]]), drop = FALSE]) y_ses <- lapply(y_stanmat, function(m) apply(m, 2L, mad)) y_covmat <- lapply(y_stanmat, cov) for (m in 1:M) { rownames(y_covmat[[m]]) <- colnames(y_covmat[[m]]) <- rownames(stan_summary)[c(nms$y[[m]], nms$y_b[[m]])] } # Remove padding coefs[1:M] <- list_nms(lapply(y_coefs, unpad_reTrms.default), M, stub = stub) ses[1:M] <- list_nms(lapply(y_ses, unpad_reTrms.default), M, stub = stub) } # Coefs and SEs for event submodel if (is_surv) { e_coefs <- stan_summary[c(nms$e, nms$a), select_median(object$algorithm)] if (length(e_coefs) == 1L) names(e_coefs) <- rownames(stan_summary)[c(nms$e, nms$a)[1L]] e_stanmat <- as.matrix(stanfit)[, c(nms$e, nms$a), drop = FALSE] e_ses <- apply(e_stanmat, 2L, mad) e_covmat <- cov(e_stanmat) rownames(e_covmat) <- colnames(e_covmat) <- rownames(stan_summary)[c(nms$e, nms$a)] coefs$Event <- e_coefs ses$Event <- e_ses } # Covariance matrix for fixed effects stanmat <- as.matrix(stanfit)[, c(nms$alpha, nms$beta), drop = FALSE] covmat <- cov(stanmat) if (object$algorithm == "sampling") { # for MCMC fits only # Check Rhats for all parameters check_rhats(stan_summary[, "Rhat"]) # Run time (mins) times <- round((rstan::get_elapsed_time(object$stanfit))/60, digits = 1) times <- cbind(times, total = rowSums(times)) } } out <- nlist( formula = list_nms(object$formula, M, stub), terms = list_nms(object$terms, M, stub), coefficients = coefs, ses = ses, covmat = covmat, prior.weights = object$weights, prior.info = object$prior.info, algorithm = object$algorithm, call = object$call, stan_function = object$stan_function, runtime = if (object$algorithm == "sampling") times else NULL, stan_summary, stanfit ) if (is_mvmer) { out$cnms <- object$cnms out$flevels <- object$flevels out$n_markers <- object$M out$n_grps <- object$n_grps out$n_yobs <- list_nms(object$n_yobs, M, stub) out$family <- list_nms(object$family, M, stub) out$glmod <- list_nms(object$glmod, M, stub) out$data <- if (!is_jm) list_nms(object$data, M, stub) else NULL classes <- c("stanmvreg", "stanreg", "lmerMod") } if (is_jm) { out$id_var <- object$id_var out$time_var <- object$time_var out$n_subjects<- object$n_subjects out$n_events <- sum(object$survmod$status > 0) out$eventtime <- object$survmod$eventtime out$status <- object$survmod$status > 0 out$basehaz <- object$basehaz out$survmod <- object$survmod out$qnodes <- object$qnodes out$epsilon <- object$epsilon out$assoc <- object$assoc out$assocmod <- list_nms(object$assocmod, M, stub) out$scale_assoc <- object$scale_assoc out$dataLong <- list_nms(object$dataLong, M, stub) out$dataEvent <- object$dataEvent out$grp_stuff <- object$grp_stuff out$fr <- object$fr classes <- c("stanjm", classes) } out <- rm_null(out, recursive = FALSE) structure(out, class = classes) } rstanarm/R/posterior_predict.R0000644000176200001440000004702114406606742016202 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #' Draw from posterior predictive distribution #' #' The posterior predictive distribution is the distribution of the outcome #' implied by the model after using the observed data to update our beliefs #' about the unknown parameters in the model. Simulating data from the posterior #' predictive distribution using the observed predictors is useful for checking #' the fit of the model. Drawing from the posterior predictive distribution at #' interesting values of the predictors also lets us visualize how a #' manipulation of a predictor affects (a function of) the outcome(s). With new #' observations of predictor variables we can use the posterior predictive #' distribution to generate predicted outcomes. #' #' @aliases posterior_predict #' @export #' #' @templateVar stanregArg object #' @template args-stanreg-object #' @param newdata Optionally, a data frame in which to look for variables with #' which to predict. If omitted, the model matrix is used. If \code{newdata} #' is provided and any variables were transformed (e.g. rescaled) in the data #' used to fit the model, then these variables must also be transformed in #' \code{newdata}. This only applies if variables were transformed before #' passing the data to one of the modeling functions and \emph{not} if #' transformations were specified inside the model formula. Also see the Note #' section below for a note about using the \code{newdata} argument with with #' binomial models. #' @param draws An integer indicating the number of draws to return. The default #' and maximum number of draws is the size of the posterior sample. #' @param re.form If \code{object} contains \code{\link[=stan_glmer]{group-level}} #' parameters, a formula indicating which group-level parameters to #' condition on when making predictions. \code{re.form} is specified in the #' same form as for \code{\link[lme4]{predict.merMod}}. The default, #' \code{NULL}, indicates that all estimated group-level parameters are #' conditioned on. To refrain from conditioning on any group-level parameters, #' specify \code{NA} or \code{~0}. The \code{newdata} argument may include new #' \emph{levels} of the grouping factors that were specified when the model #' was estimated, in which case the resulting posterior predictions #' marginalize over the relevant variables. #' @param fun An optional function to apply to the results. \code{fun} is found #' by a call to \code{\link{match.fun}} and so can be specified as a function #' object, a string naming a function, etc. #' @param seed An optional \code{\link[=set.seed]{seed}} to use. #' @param offset A vector of offsets. Only required if \code{newdata} is #' specified and an \code{offset} argument was specified when fitting the #' model. #' @param ... For \code{stanmvreg} objects, argument \code{m} can be specified #' indicating the submodel for which you wish to obtain predictions. #' #' @return A \code{draws} by \code{nrow(newdata)} matrix of simulations from the #' posterior predictive distribution. Each row of the matrix is a vector of #' predictions generated using a single draw of the model parameters from the #' posterior distribution. #' #' @note For binomial models with a number of trials greater than one (i.e., not #' Bernoulli models), if \code{newdata} is specified then it must include all #' variables needed for computing the number of binomial trials to use for the #' predictions. For example if the left-hand side of the model formula is #' \code{cbind(successes, failures)} then both \code{successes} and #' \code{failures} must be in \code{newdata}. The particular values of #' \code{successes} and \code{failures} in \code{newdata} do not matter so #' long as their sum is the desired number of trials. If the left-hand side of #' the model formula were \code{cbind(successes, trials - successes)} then #' both \code{trials} and \code{successes} would need to be in \code{newdata}, #' probably with \code{successes} set to \code{0} and \code{trials} specifying #' the number of trials. See the Examples section below and the #' \emph{How to Use the rstanarm Package} for examples. #' @note For models estimated with \code{\link{stan_clogit}}, the number of #' successes per stratum is ostensibly fixed by the research design. Thus, when #' doing posterior prediction with new data, the \code{data.frame} passed to #' the \code{newdata} argument must contain an outcome variable and a stratifying #' factor, both with the same name as in the original \code{data.frame}. Then, the #' posterior predictions will condition on this outcome in the new data. #' #' @seealso \code{\link{pp_check}} for graphical posterior predictive checks. #' Examples of posterior predictive checking can also be found in the #' \pkg{rstanarm} vignettes and demos. #' #' \code{\link{predictive_error}} and \code{\link{predictive_interval}}. #' #' @examples #' if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { #' if (!exists("example_model")) example(example_model) #' yrep <- posterior_predict(example_model) #' table(yrep) #' #' \donttest{ #' # Using newdata #' counts <- c(18,17,15,20,10,20,25,13,12) #' outcome <- gl(3,1,9) #' treatment <- gl(3,3) #' dat <- data.frame(counts, treatment, outcome) #' fit3 <- stan_glm( #' counts ~ outcome + treatment, #' data = dat, #' family = poisson(link="log"), #' prior = normal(0, 1, autoscale = FALSE), #' prior_intercept = normal(0, 5, autoscale = FALSE), #' refresh = 0 #' ) #' nd <- data.frame(treatment = factor(rep(1,3)), outcome = factor(1:3)) #' ytilde <- posterior_predict(fit3, nd, draws = 500) #' print(dim(ytilde)) # 500 by 3 matrix (draws by nrow(nd)) #' #' ytilde <- data.frame( #' count = c(ytilde), #' outcome = rep(nd$outcome, each = 500) #' ) #' ggplot2::ggplot(ytilde, ggplot2::aes(x=outcome, y=count)) + #' ggplot2::geom_boxplot() + #' ggplot2::ylab("predicted count") #' #' #' # Using newdata with a binomial model. #' # example_model is binomial so we need to set #' # the number of trials to use for prediction. #' # This could be a different number for each #' # row of newdata or the same for all rows. #' # Here we'll use the same value for all. #' nd <- lme4::cbpp #' print(formula(example_model)) # cbind(incidence, size - incidence) ~ ... #' nd$size <- max(nd$size) + 1L # number of trials #' nd$incidence <- 0 # set to 0 so size - incidence = number of trials #' ytilde <- posterior_predict(example_model, newdata = nd) #' #' #' # Using fun argument to transform predictions #' mtcars2 <- mtcars #' mtcars2$log_mpg <- log(mtcars2$mpg) #' fit <- stan_glm(log_mpg ~ wt, data = mtcars2, refresh = 0) #' ytilde <- posterior_predict(fit, fun = exp) #' } #' } posterior_predict.stanreg <- function(object, newdata = NULL, draws = NULL, re.form = NULL, fun = NULL, seed = NULL, offset = NULL, ...) { if (!is.null(seed)) set.seed(seed) if (!is.null(fun)) fun <- match.fun(fun) dots <- list(...) if (is.stanmvreg(object)) { m <- dots[["m"]] # submodel to predict for stanmat <- dots[["stanmat"]] # possibly incl. new b pars (dynamic preds) if (is.null(m)) STOP_arg_required_for_stanmvreg(m) } else { m <- NULL stanmat <- NULL } newdata <- validate_newdata(object, newdata = newdata, m = m) pp_data_args <- c(list(object, newdata = newdata, re.form = re.form, offset = offset), dots) dat <- do.call("pp_data", pp_data_args) if (is_scobit(object)) { data <- pp_eta(object, dat, NULL) if (!is.null(draws)) { S <- posterior_sample_size(object) if (draws > S) { err <- paste0("'draws' should be <= posterior sample size (", S, ").") stop(err) } samp <- sample(S, draws) data$eta <- data$eta[samp, , drop = FALSE] ppargs <- pp_args(object, data) ppargs$alpha <- ppargs$alpha[samp] } else { ppargs <- pp_args(object, data, m = m) } } else if (is.stanjm(object)) { ppargs <- pp_args(object, data = pp_eta(object, dat, draws, m = m, stanmat = stanmat), m = m) } else { if (!is.null(newdata) && is_clogit(object)) { y <- eval(formula(object)[[2L]], newdata) strata <- as.factor(eval(object$call$strata, newdata)) formals(object$family$linkinv)$g <- strata formals(object$family$linkinv)$successes <- aggregate(y, by = list(strata), FUN = sum)$x } ppargs <- pp_args(object, data = pp_eta(object, dat, draws, m = m), m = m) } if (is_clogit(object)) { if (is.null(newdata)) ppargs$strata <- model.frame(object)[,"(weights)"] else ppargs$strata <- eval(object$call$strata, newdata) ppargs$strata <- as.factor(ppargs$strata) } else if (!is_polr(object) && is.binomial(family(object, m = m)$family)) { ppargs$trials <- pp_binomial_trials(object, newdata, m = m) } ppfun <- pp_fun(object, m = m) ytilde <- do.call(ppfun, ppargs) if ((is.null(newdata) && nobs(object) == 1L) || (!is.null(newdata) && nrow(newdata) == 1L)) { ytilde <- t(ytilde) } if (!is.null(fun)) ytilde <- do.call(fun, list(ytilde)) if (is_polr(object) && !is_scobit(object)) ytilde <- matrix(levels(get_y(object))[ytilde], nrow(ytilde), ncol(ytilde)) if (is.null(newdata)) colnames(ytilde) <- rownames(model.frame(object, m = m)) else colnames(ytilde) <- rownames(newdata) # if function is called from posterior_traj then add mu as attribute fn <- tryCatch(sys.call(-3)[[1]], error = function(e) NULL) if (!is.null(fn) && grepl("posterior_traj", deparse(fn), fixed = TRUE)) return(structure(ytilde, mu = ppargs$mu, class = class(ytilde))) ytilde } #' @rdname posterior_predict.stanreg #' @export #' @templateVar mArg m #' @template args-m #' posterior_predict.stanmvreg <- function(object, m = 1, newdata = NULL, draws = NULL, re.form = NULL, fun = NULL, seed = NULL, offset = NULL, ...) { validate_stanmvreg_object(object) dots <- list(...) if ("newdataLong" %in% names(dots)) stop2("'newdataLong' should not be specified for posterior_predict.") if ("newdataEvent" %in% names(dots)) stop2("'newdataEvent' should not be specified for posterior_predict.") out <- posterior_predict.stanreg(object, newdata = newdata, draws = draws, re.form = re.form, fun = fun, seed = seed, offset = offset, m = m, ...) out } # internal ---------------------------------------------------------------- # functions to draw from the various posterior predictive distributions pp_fun <- function(object, m = NULL) { suffix <- if (is_polr(object)) "polr" else if (is_clogit(object)) "clogit" else family(object, m = m)$family get(paste0(".pp_", suffix), mode = "function") } .pp_gaussian <- function(mu, sigma) { t(sapply(1:nrow(mu), function(s) { rnorm(ncol(mu), mu[s,], sigma[s]) })) } .pp_binomial <- function(mu, trials) { t(sapply(1:nrow(mu), function(s) { rbinom(ncol(mu), size = trials, prob = mu[s, ]) })) } .pp_clogit <- function(mu, strata) { t(sapply(1:nrow(mu), function(s) { unlist(by(mu[s,], INDICES = list(strata), FUN = rmultinom, n = 1, size = 1)) })) } .pp_beta <- function(mu, phi) { t(sapply(1:nrow(mu), function(s) { rbeta(ncol(mu), mu[s,] * phi[s], (1 - mu[s, ]) * phi[s]) })) } .pp_poisson <- function(mu) { t(sapply(1:nrow(mu), function(s) { rpois(ncol(mu), mu[s, ]) })) } .pp_neg_binomial_2 <- function(mu, size) { t(sapply(1:nrow(mu), function(s) { rnbinom(ncol(mu), size = size[s], mu = mu[s, ]) })) } .pp_Gamma <- function(mu, shape) { t(sapply(1:nrow(mu), function(s) { rgamma(ncol(mu), shape = shape[s], rate = shape[s] / mu[s, ]) })) } .rinvGauss <- function(n, mu, lambda) { # draw from inverse gaussian distribution mu2 <- mu^2 y <- rnorm(n)^2 z <- runif(n) tmp <- (mu2 * y - mu * sqrt(4 * mu * lambda * y + mu2 * y^2)) x <- mu + tmp / (2 * lambda) ifelse(z <= (mu / (mu + x)), x, mu2 / x) } .pp_inverse.gaussian <- function(mu, lambda) { t(sapply(1:nrow(mu), function(s) { .rinvGauss(ncol(mu), mu = mu[s,], lambda = lambda[s]) })) } .pp_polr <- function(eta, zeta, linkinv, alpha = NULL) { n <- ncol(eta) q <- ncol(zeta) if (!is.null(alpha)) { pr <- linkinv(eta)^alpha if (NROW(eta) == 1) { pr <- matrix(pr, nrow = 1) } t(sapply(1:NROW(eta), FUN = function(s) { rbinom(NCOL(eta), size = 1, prob = pr[s, ]) })) } else { t(sapply(1:NROW(eta), FUN = function(s) { tmp <- matrix(zeta[s, ], n, q, byrow = TRUE) - eta[s, ] cumpr <- matrix(linkinv(tmp), ncol = q) fitted <- t(apply(cumpr, 1L, function(x) diff(c(0, x, 1)))) apply(fitted, 1, function(p) which(rmultinom(1, 1, p) == 1)) })) } } # create list of arguments to pass to the function returned by pp_fun # # @param object stanreg or stanmvreg object # @param data output from pp_eta (named list with eta and stanmat) # @param m optional integer specifying the submodel for stanmvreg objects # @return named list pp_args <- function(object, data, m = NULL) { stanmat <- data$stanmat eta <- data$eta stopifnot(is.stanreg(object), is.matrix(stanmat)) if (is.stanmvreg(object) && is.null(m)) STOP_arg_required_for_stanmvreg(m) inverse_link <- linkinv(object, m = m) if (is.nlmer(object)) inverse_link <- function(x) return(x) if (is_polr(object)) { zeta <- stanmat[, grep("|", colnames(stanmat), value = TRUE, fixed = TRUE)] args <- nlist(eta, zeta, linkinv = inverse_link) if ("alpha" %in% colnames(stanmat)) # scobit args$alpha <- stanmat[, "alpha"] return(args) } else if (is_clogit(object)) { return(list(mu = inverse_link(eta))) } args <- list(mu = inverse_link(eta)) famname <- family(object, m = m)$family m_stub <- get_m_stub(m, stub = get_stub(object)) if (is.gaussian(famname)) { args$sigma <- stanmat[, paste0(m_stub, "sigma")] } else if (is.gamma(famname)) { args$shape <- stanmat[, paste0(m_stub, "shape")] } else if (is.ig(famname)) { args$lambda <- stanmat[, paste0(m_stub, "lambda")] } else if (is.nb(famname)) { args$size <- stanmat[, paste0(m_stub, "reciprocal_dispersion")] } else if (is.beta(famname)) { args$phi <- data$phi if (is.null(args$phi)) { args$phi <- linkinv(object$family_phi)(data$phi_linpred) } } args } # create eta and stanmat (matrix of posterior draws) # # @param object A stanreg or stanmvreg object # @param data Output from pp_data() # @param draws Number of draws # @param m Optional integer specifying the submodel for stanmvreg objects # @param stanmat Optionally pass a stanmat that has been amended to include # new b parameters for individuals in the prediction data but who were not # included in the model estimation; relevant for dynamic predictions for # stan_jm objects only # @return Linear predictor "eta" and matrix of posterior draws "stanmat". For # some stan_betareg models "" is also included in the list. pp_eta <- function(object, data, draws = NULL, m = NULL, stanmat = NULL) { x <- data$x if (!is.null(object$dropped_cols)) { x <- x[, !(colnames(x) %in% object$dropped_cols), drop = FALSE] } S <- if (is.null(stanmat)) posterior_sample_size(object) else nrow(stanmat) if (is.null(draws)) draws <- S if (draws > S) { err <- paste0("'draws' should be <= posterior sample size (", S, ").") stop(err) } some_draws <- isTRUE(draws < S) if (some_draws) samp <- sample(S, draws) if (is.stanmvreg(object)) { if (is.null(m)) STOP_arg_required_for_stanmvreg(m) M <- get_M(object) } if (is.null(stanmat)) { stanmat <- if (is.null(data$Zt)) as.matrix.stanreg(object) else as.matrix(object$stanfit) } nms <- if (is.stanmvreg(object)) collect_nms(colnames(stanmat), M, stub = get_stub(object)) else NULL beta_sel <- if (is.null(nms)) seq_len(ncol(x)) else nms$y[[m]] beta <- stanmat[, beta_sel, drop = FALSE] if (some_draws) beta <- beta[samp, , drop = FALSE] eta <- linear_predictor(beta, x, data$offset) if (!is.null(data$Zt)) { b_sel <- if (is.null(nms)) grepl("^b\\[", colnames(stanmat)) else nms$y_b[[m]] b <- stanmat[, b_sel, drop = FALSE] if (some_draws) b <- b[samp, , drop = FALSE] if (is.null(data$Z_names)) { b <- b[, !grepl("_NEW_", colnames(b), fixed = TRUE), drop = FALSE] } else { b <- pp_b_ord(b, data$Z_names) } eta <- eta + as.matrix(b %*% data$Zt) } if (is.nlmer(object)) { if (is.null(data$arg1)) eta <- linkinv(object)(eta) else eta <- linkinv(object)(eta, data$arg1, data$arg2) eta <- t(eta) } out <- nlist(eta, stanmat) if (inherits(object, "betareg")) { z_vars <- colnames(stanmat)[grepl("(phi)", colnames(stanmat))] omega <- stanmat[, z_vars] if (length(z_vars) == 1 && z_vars == "(phi)") { out$phi <- stanmat[, "(phi)"] } else { out$phi_linpred <- linear_predictor(as.matrix(omega), as.matrix(data$z_betareg), data$offset) } } return(out) } pp_b_ord <- function(b, Z_names) { b_ord <- function(x) { m <- grep(paste0("b[", x, "]"), colnames(b), fixed = TRUE) len <- length(m) if (len == 1) return(m) if (len > 1) stop("multiple matches bug") m <- grep(paste0("b[", sub(" (.*):.*$", " \\1:_NEW_\\1", x), "]"), colnames(b), fixed = TRUE) len <- length(m) if (len == 1) return(m) if (len > 1) stop("multiple matches bug") x <- strsplit(x, split = ":", fixed = TRUE)[[1]] stem <- strsplit(x[[1]], split = " ", fixed = TRUE)[[1]] x <- paste(x[1], x[2], paste0("_NEW_", stem[2]), x[2], sep = ":") m <- grep(paste0("b[", x, "]"), colnames(b), fixed = TRUE) len <- length(m) if (len == 1) return(m) if (len > 1) stop("multiple matches bug") x <- paste(paste(stem[1], stem[2]), paste0("_NEW_", stem[2]), sep = ":") m <- grep(paste0("b[", x, "]"), colnames(b), fixed = TRUE) len <- length(m) if (len == 1) return(m) if (len > 1) stop("multiple matches bug") stop("no matches bug") } ord <- sapply(Z_names, FUN = b_ord) b[, ord, drop = FALSE] } # Number of trials for binomial models pp_binomial_trials <- function(object, newdata = NULL, m = NULL) { if (is.stanmvreg(object) && is.null(m)) { STOP_arg_required_for_stanmvreg(m) } y <- get_y(object, m) is_bernoulli <- NCOL(y) == 1L if (is_bernoulli) { trials <- if (is.null(newdata)) rep(1, NROW(y)) else rep(1, NROW(newdata)) } else { trials <- if (is.null(newdata)) rowSums(y) else rowSums(eval(formula(object, m = m)[[2L]], newdata)) } return(trials) } rstanarm/R/as.matrix.stanreg.R0000644000176200001440000001245414476664567016034 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #' Extract the posterior sample #' #' For models fit using MCMC (\code{algorithm="sampling"}), the posterior sample #' ---the post-warmup draws from the posterior distribution--- can be extracted #' from a fitted model object as a matrix, data frame, or array. The #' \code{as.matrix} and \code{as.data.frame} methods merge all chains together, #' whereas the \code{as.array} method keeps the chains separate. For models fit #' using optimization (\code{"optimizing"}) or variational inference #' (\code{"meanfield"} or \code{"fullrank"}), there is no posterior sample but #' rather a matrix (or data frame) of 1000 draws from either the asymptotic #' multivariate Gaussian sampling distribution of the parameters or the #' variational approximation to the posterior distribution. #' #' @method as.matrix stanreg #' @export #' @templateVar stanregArg x #' @template args-stanreg-object #' @template args-pars #' @template args-regex-pars #' @param ... Ignored. #' #' @return A matrix, data.frame, or array, the dimensions of which depend on #' \code{pars} and \code{regex_pars}, as well as the model and estimation #' algorithm (see the Description section above). #' #' @seealso \code{\link{stanreg-draws-formats}}, \code{\link{stanreg-methods}} #' #' @examples #' if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { #' \donttest{ #' if (!exists("example_model")) example(example_model) #' # Extract posterior sample after MCMC #' draws <- as.matrix(example_model) #' print(dim(draws)) #' #' # For example, we can see that the median of the draws for the intercept #' # is the same as the point estimate rstanarm uses #' print(median(draws[, "(Intercept)"])) #' print(example_model$coefficients[["(Intercept)"]]) #' #' # The as.array method keeps the chains separate #' draws_array <- as.array(example_model) #' print(dim(draws_array)) # iterations x chains x parameters #' #' # Extract draws from asymptotic Gaussian sampling distribution #' # after optimization #' fit <- stan_glm(mpg ~ wt, data = mtcars, algorithm = "optimizing") #' draws <- as.data.frame(fit) #' print(colnames(draws)) #' print(nrow(draws)) # 1000 draws are taken #' #' # Extract draws from variational approximation to the posterior distribution #' fit2 <- update(fit, algorithm = "meanfield") #' draws <- as.data.frame(fit2, pars = "wt") #' print(colnames(draws)) #' print(nrow(draws)) # 1000 draws are taken #' } #' } as.matrix.stanreg <- function(x, ..., pars = NULL, regex_pars = NULL) { pars <- collect_pars(x, pars, regex_pars) user_pars <- !is.null(pars) if (used.optimizing(x)) { mat <- x$asymptotic_sampling_dist if (is.null(mat)) STOP_no_draws() if (!user_pars) { aux <- c("sigma", "scale", "shape", "lambda", "reciprocal_dispersion") pars <- c(names(coef(x)), # return with coefficients first aux[which(aux %in% colnames(mat))]) } } else { # used mcmc or vb mat <- as.matrix(x$stanfit) if (!user_pars) pars <- exclude_lp_and_ppd(colnames(mat)) } if (user_pars) check_missing_pars(mat, pars) mat <- mat[, pars, drop = FALSE] if (!is.mer(x)) return(mat) unpad_reTrms(mat) } #' @rdname as.matrix.stanreg #' @method as.array stanreg #' @export as.array.stanreg <- function(x, ..., pars = NULL, regex_pars = NULL) { pars <- collect_pars(x, pars, regex_pars) if (!used.sampling(x)) stop( "For models not fit using MCMC ", "use 'as.matrix' instead of 'as.array'" ) arr <- as.array(x$stanfit) if (identical(arr, numeric(0))) STOP_no_draws() if (!is.null(pars)) { check_missing_pars(arr, pars) } else { pars <- exclude_lp_and_ppd(last_dimnames(arr)) } arr <- arr[, , pars, drop = FALSE] if (!is.mer(x)) return(arr) unpad_reTrms(arr) } #' @rdname as.matrix.stanreg #' @method as.data.frame stanreg #' @export as.data.frame.stanreg <- function(x, ..., pars = NULL, regex_pars = NULL) { mat <- as.matrix.stanreg(x, pars = pars, regex_pars = regex_pars, ...) as.data.frame(mat) } # internal ---------------------------------------------------------------- STOP_no_draws <- function() stop("No draws found.", call. = FALSE) check_missing_pars <- function(x, pars) { notfound <- which(!pars %in% last_dimnames(x)) if (length(notfound)) stop( "No parameter(s) ", paste(pars[notfound], collapse = ", "), call. = FALSE ) } exclude_lp_and_ppd <- function(pars) { grep( pattern = "mean_PPD|log-posterior", x = pars, invert = TRUE, value = TRUE ) } rstanarm/R/doc-modeling-functions.R0000644000176200001440000001132214406606742017004 0ustar liggesusers#' Modeling functions available in \pkg{rstanarm} #' #' @name available-models #' #' @section Modeling functions: #' The model estimating functions are described in greater detail in their #' individual help pages and vignettes. Here we provide a very brief #' overview: #' #' \describe{ #' \item{\code{\link{stan_lm}}, \code{stan_aov}, \code{stan_biglm}}{ #' Similar to \code{\link[stats]{lm}} or \code{\link[stats]{aov}} but with #' novel regularizing priors on the model parameters that are driven by prior #' beliefs about \eqn{R^2}, the proportion of variance in the outcome #' attributable to the predictors in a linear model. #' } #' \item{\code{\link{stan_glm}}, \code{stan_glm.nb}}{ #' Similar to \code{\link[stats]{glm}} but with various possible prior #' distributions for the coefficients and, if applicable, a prior distribution #' for any auxiliary parameter in a Generalized Linear Model (GLM) that is #' characterized by a \code{\link[stats]{family}} object (e.g. the shape #' parameter in Gamma models). It is also possible to estimate a negative #' binomial model in a similar way to the \code{\link[MASS]{glm.nb}} function #' in the \pkg{MASS} package. #' } #' \item{\code{\link{stan_glmer}}, \code{stan_glmer.nb}, \code{stan_lmer}}{ #' Similar to the \code{\link[lme4]{glmer}}, \code{\link[lme4]{glmer.nb}} and #' \code{\link[lme4]{lmer}} functions in the \pkg{lme4} package in that GLMs #' are augmented to have group-specific terms that deviate from the common #' coefficients according to a mean-zero multivariate normal distribution with #' a highly-structured but unknown covariance matrix (for which \pkg{rstanarm} #' introduces an innovative prior distribution). MCMC provides more #' appropriate estimates of uncertainty for models that consist of a mix of #' common and group-specific parameters. #' } #' \item{\code{\link{stan_nlmer}}}{ #' Similar to \code{\link[lme4]{nlmer}} in the \pkg{lme4} package for #' nonlinear "mixed-effects" models, but the group-specific coefficients #' have flexible priors on their unknown covariance matrices. #' } #' \item{\code{\link{stan_gamm4}}}{ #' Similar to \code{\link[gamm4]{gamm4}} in the \pkg{gamm4} package, which #' augments a GLM (possibly with group-specific terms) with nonlinear smooth #' functions of the predictors to form a Generalized Additive Mixed Model #' (GAMM). Rather than calling \code{\link[lme4]{glmer}} like #' \code{\link[gamm4]{gamm4}} does, \code{\link{stan_gamm4}} essentially calls #' \code{\link{stan_glmer}}, which avoids the optimization issues that often #' crop up with GAMMs and provides better estimates for the uncertainty of the #' parameter estimates. #' } #' \item{\code{\link{stan_polr}}}{ #' Similar to \code{\link[MASS]{polr}} in the \pkg{MASS} package in that it #' models an ordinal response, but the Bayesian model also implies a prior #' distribution on the unknown cutpoints. Can also be used to model binary #' outcomes, possibly while estimating an unknown exponent governing the #' probability of success. #' } #' \item{\code{\link{stan_betareg}}}{ #' Similar to \code{\link[betareg]{betareg}} in that it models an outcome that #' is a rate (proportion) but, rather than performing maximum likelihood #' estimation, full Bayesian estimation is performed by default, with #' customizable prior distributions for all parameters. #' } #' \item{\code{\link{stan_clogit}}}{ #' Similar to \code{\link[survival]{clogit}} in that it models an binary outcome #' where the number of successes and failures is fixed within each stratum by #' the research design. There are some minor syntactical differences relative #' to \code{\link[survival]{clogit}} that allow \code{stan_clogit} to accept #' group-specific terms as in \code{\link{stan_glmer}}. #' } #' \item{\code{\link{stan_mvmer}}}{ #' A multivariate form of \code{\link{stan_glmer}}, whereby the user can #' specify one or more submodels each consisting of a GLM with group-specific #' terms. If more than one submodel is specified (i.e. there is more than one #' outcome variable) then a dependence is induced by assuming that the #' group-specific terms for each grouping factor are correlated across submodels. #' } #' \item{\code{\link{stan_jm}}}{ #' Estimates shared parameter joint models for longitudinal and time-to-event #' (i.e. survival) data. The joint model can be univariate (i.e. one longitudinal #' outcome) or multivariate (i.e. more than one longitudinal outcome). A variety #' of parameterisations are available for linking the longitudinal and event #' processes (i.e. a variety of association structures). #' } #' } #' #' @seealso \url{https://mc-stan.org/rstanarm/} #' NULL rstanarm/R/pp_validate.R0000644000176200001440000002131713722762571014735 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2016, 2017 Trustees of Columbia University # Copyright (C) 2005 Samantha Cook # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # #' Model validation via simulation #' #' The \code{pp_validate} function is based on the methods described in #' Cook, Gelman, and Rubin (2006) for validating software developed to fit #' particular Bayesian models. Here we take the perspective that models #' themselves are software and thus it is useful to apply this validation #' approach to individual models. #' #' @export #' @templateVar stanregArg object #' @template args-stanreg-object #' @param nreps The number of replications to be performed. \code{nreps} must be #' sufficiently large so that the statistics described below in Details are #' meaningful. Depending on the model and the size of the data, running #' \code{pp_validate} may be slow. See also the Note section below for advice #' on avoiding numerical issues. #' @param seed A seed passed to Stan to use when refitting the model. #' @param ... Currently ignored. #' #' @details #' We repeat \code{nreps} times the process of simulating parameters and data #' from the model and refitting the model to this simulated data. For each of #' the \code{nreps} replications we do the following: #' \enumerate{ #' \item Refit the model but \emph{without} conditioning on the data (setting #' \code{prior_PD=TRUE}), obtaining draws \eqn{\theta^{true}}{\theta_true} #' from the \emph{prior} distribution of the model parameters. #' \item Given \eqn{\theta^{true}}{\theta_true}, simulate data \eqn{y^\ast}{y*} #' from the \emph{prior} predictive distribution (calling #' \code{\link{posterior_predict}} on the fitted model object obtained in step #' 1). #' \item Fit the model to the simulated outcome \eqn{y^\ast}{y*}, obtaining #' parameters \eqn{\theta^{post}}{\theta_post}. #' } #' For any individual parameter, the quantile of the "true" parameter value with #' respect to its posterior distribution \emph{should} be uniformly distributed. #' The validation procedure entails looking for deviations from uniformity by #' computing statistics for a test that the quantiles are uniformly distributed. #' The absolute values of the computed test statistics are plotted for batches #' of parameters (e.g., non-varying coefficients are grouped into a batch called #' "beta", parameters that vary by group level are in batches named for the #' grouping variable, etc.). See Cook, Gelman, and Rubin (2006) for more details #' on the validation procedure. #' #' @note In order to make it through \code{nreps} replications without running #' into numerical difficulties you may have to restrict the range for randomly #' generating initial values for parameters when you fit the \emph{original} #' model. With any of \pkg{rstanarm}'s modeling functions this can be done by #' specifying the optional argument \code{init_r} as some number less than the #' default of \eqn{2}. #' #' @return A ggplot object that can be further customized using the #' \pkg{ggplot2} package. #' #' @references #' Cook, S., Gelman, A., and Rubin, D. #' (2006). Validation of software for Bayesian models using posterior quantiles. #' \emph{Journal of Computational and Graphical Statistics}. 15(3), 675--692. #' #' @seealso #' \code{\link{pp_check}} for graphical posterior predictive checks and #' \code{\link{posterior_predict}} to draw from the posterior predictive #' distribution. #' #' \code{\link[bayesplot:bayesplot-colors]{color_scheme_set}} to change the color scheme of the #' plot. #' #' @examples #' if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { #' \dontrun{ #' if (!exists("example_model")) example(example_model) #' try(pp_validate(example_model)) # fails with default seed / priors #' } #' } #' @importFrom ggplot2 rel geom_point geom_segment scale_x_continuous element_line #' pp_validate <- function(object, nreps = 20, seed = 12345, ...) { # based on Samantha Cook's BayesValidate::validate quant <- function(draws) { n <- length(draws) rank_theta <- c(1:n)[order(draws) == 1] - 1 quants <- (rank_theta + 0.5) / n return(quants) } validate_stanreg_object(object) if (is.stanmvreg(object)) STOP_if_stanmvreg("'pp_validate'") if (nreps < 2) stop("'nreps' must be at least 2.") dims <- object$stanfit@par_dims[c("alpha", "beta", "b", "aux", "cutpoints", "theta_L")] dims <- dims[!sapply(dims, is.null)] dims <- sapply(dims, prod) dims <- dims[dims > 0] if ("b" %in% names(dims)) { mark <- which(names(dims) == "b") vals <- sapply(ranef(object), function(x) length(as.matrix(x))) dims <- append(dims, values = vals, after = mark) dims <- dims[-mark] } names(dims)[which(names(dims) == "theta_L")] <- "Sigma" batches <- dims params_batch <- names(dims) num_batches <- length(batches) num_params <- sum(dims) batch_ind <- rep(0, num_batches + 1) plot_batch <- rep(1, batches[1]) for (i in 1:num_batches) batch_ind[i+1] <- batch_ind[i] + batches[i] for (i in 2:num_batches) plot_batch <- c(plot_batch, rep(i, batches[i])) quantile_theta <- matrix(NA_real_, nrow = nreps, ncol = num_params + num_batches) post <- suppressWarnings(update(object, prior_PD = TRUE, seed = seed, algorithm = "sampling", warmup = 1000, iter = 1000 + 1, chains = nreps)) post_mat <- as.matrix(post) data_mat <- posterior_predict(post) constant <- apply(data_mat, 1, FUN = function(x) all(duplicated(x)[-1L])) if (any(constant)) stop("'pp_validate' cannot proceed because some simulated outcomes are constant. ", "Try again with better priors on the parameters.") y <- get_y(object) for (reps in 1:nreps) { theta_true <- post_mat[reps, ] data_rep <- data_mat[reps, ] mf <- model.frame(object) if (NCOL(mf[, 1]) == 2) { # binomial models new_f <- update.formula(formula(object), cbind(ynew_1s, ynew_0s) ~ .) ynew <- c(data_rep) mf2 <- data.frame(mf[, -1], ynew_1s = ynew, ynew_0s = rowSums(y) - ynew) mf <- get_all_vars(new_f, data = mf2) } else { new_f <- NULL if (is.factor(y)) mf[, 1] <- factor(data_rep, levels = levels(y), ordered = is.ordered(y)) else mf[, 1] <- c(data_rep) } update_args <- nlist(object, data = mf, seed) if (!is.null(new_f)) update_args$formula <- new_f theta_draws <- as.matrix(do.call("update", update_args)) if (!is.null(batches)){ for (i in 1:num_batches) { if (batches[i] > 1) { sel <- (batch_ind[i]+1):batch_ind[(i+1)] theta_draws <- cbind(theta_draws, apply(theta_draws[, sel], 1, mean)) theta_true <- c(theta_true, mean(theta_true[sel])) } else { theta_draws <- cbind(theta_draws, theta_draws[, (batch_ind[i]+1)]) theta_true <- c(theta_true, theta_true[(batch_ind[i]+1)]) } } } theta_draws <- rbind(theta_true, theta_draws) quantile_theta[reps, ] <- apply(theta_draws, 2, quant) } quantile_trans <- (apply(quantile_theta, 2, qnorm))^2 q_trans <- apply(quantile_trans, 2, sum) p_vals <- pchisq(q_trans, df = nreps, lower.tail = FALSE) z_stats <- abs(qnorm(p_vals)) if (is.null(batches)) { adj_min_p <- num_params * min(p_vals) } else { z_batch <- z_stats[(num_params + 1):length(p_vals)] p_batch <- p_vals[(num_params + 1):length(p_vals)] adj_min_p <- num_batches * min(p_batch) } upper_lim <- max(max(z_stats + 1), 3.5) plotdata <- data.frame(x = z_batch, y = params_batch) scheme <- bayesplot::color_scheme_get() ggplot(plotdata, aes_string(x = "x", y = "y")) + geom_segment( aes_string(x = "0", xend = "x", y = "y", yend = "y"), color = scheme[["mid"]], size = rel(1) ) + geom_point( size = rel(3), shape = 21, fill = scheme[["dark"]], color = scheme[["dark_highlight"]] ) + scale_x_continuous(limits = c(0, upper_lim), expand = c(0, 0)) + xlab(expression("Absolute " * z[theta] * " Statistics")) + theme_default() + yaxis_title(FALSE) + grid_lines(color = "gray", size = 0.1) } rstanarm/R/launch_shinystan.R0000644000176200001440000001333413722762571016017 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #' Using the ShinyStan GUI with rstanarm models #' #' The ShinyStan interface provides visual and numerical summaries of model #' parameters and convergence diagnostics. #' #' @aliases launch_shinystan #' @export #' #' @templateVar stanregArg object #' @template args-stanreg-object #' @inheritParams shinystan::launch_shinystan #' @param ppd Should \pkg{rstanarm} draw from the posterior predictive #' distribution before launching ShinyStan? The default is \code{TRUE}, #' although for very large objects it can be convenient to set it to #' \code{FALSE} as drawing from the posterior predictive distribution can be #' time consuming. If \code{ppd} is \code{TRUE} then graphical posterior #' predictive checks are available when ShinyStan is launched. #' @param seed Passed to \link[=pp_check]{pp_check} if #' \code{ppd} is \code{TRUE}. #' @param model_name,note Optional arguments passed to #' \code{\link[shinystan]{as.shinystan}}. #' #' @details The \code{\link[shinystan]{launch_shinystan}} function will accept a #' \code{\link[=stanreg-objects]{stanreg}} object as input. Currently, almost #' any model fit using one of \pkg{rstanarm}'s model-fitting functions can be #' used with ShinyStan. The only exception is that ShinyStan does not #' currently support \pkg{rstanarm} models fit using #' \code{algorithm='optimizing'}. See the #' \pkg{\link[=shinystan-package]{shinystan}} package documentation for more #' information. #' #' @section Faster launch times: #' For some \pkg{rstanarm} models ShinyStan may take a very long time to launch. #' If this is the case with one of your models you may be able to speed up #' \code{launch_shinystan} in one of several ways: #' \describe{ #' \item{Prevent ShinyStan from preparing graphical posterior predictive #' checks:}{ #' When used with a \code{\link[=stanreg-objects]{stanreg}} object #' (\pkg{rstanarm} model object) ShinyStan will draw from the posterior #' predictive distribution and prepare graphical posterior predictive checks #' before launching. That way when you go to the PPcheck page the plots are #' immediately available. This can be time consuming for models fit to very #' large datasets and you can prevent this behavior by creating a shinystan #' object before calling \code{launch_shinystan}. To do this use #' \code{\link[shinystan]{as.shinystan}} with optional argument \code{ppd} set #' to \code{FALSE} (see the Examples section below). When you then launch #' ShinyStan and go to the PPcheck page the plots will no longer be #' automatically generated and you will be presented with the standard #' interface requiring you to first specify the appropriate \eqn{y} and #' \eqn{yrep}, which can be done for many but not all \pkg{rstanarm} models. #' } #' \item{Use a shinystan object:}{ #' Even if you don't want to prevent ShinyStan from preparing graphical #' posterior predictive checks, first creating a shinystan object using #' \code{\link[shinystan]{as.shinystan}} can reduce \emph{future} launch #' times. That is, \code{launch_shinystan(sso)} will be faster than #' \code{launch_shinystan(fit)}, where \code{sso} is a shinystan object and #' \code{fit} is a stanreg object. It still may take some time for #' \code{as.shinystan} to create \code{sso} initially, but each time you #' subsequently call \code{launch_shinystan(sso)} it will reuse \code{sso} #' instead of internally creating a shinystan object every time. See the #' Examples section below.} #' } #' #' @template reference-bayesvis #' @template reference-muth #' #' @examples #' if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { #' \dontrun{ #' if (!exists("example_model")) example(example_model) #' #' # Launch the ShinyStan app without saving the resulting shinystan object #' if (interactive()) launch_shinystan(example_model) #' #' # Launch the ShinyStan app (saving resulting shinystan object as sso) #' if (interactive()) sso <- launch_shinystan(example_model) #' #' # First create shinystan object then call launch_shinystan #' sso <- shinystan::as.shinystan(example_model) #' if (interactive()) launch_shinystan(sso) #' #' # Prevent ShinyStan from preparing graphical posterior predictive checks that #' # can be time consuming. example_model is small enough that it won't matter #' # much here but in general this can help speed up launch_shinystan #' sso <- shinystan::as.shinystan(example_model, ppd = FALSE) #' if (interactive()) launch_shinystan(sso) #' } #' } launch_shinystan.stanreg <- function(object, ppd = TRUE, seed = 1234, model_name = NULL, note = NULL, rstudio = getOption("shinystan.rstudio"), ...) { sso <- shinystan::as.shinystan( object, ppd = ppd, seed = seed, model_name = model_name, note = note ) shinystan::launch_shinystan(sso, rstudio = rstudio, ...) } rstanarm/R/predict.R0000644000176200001440000000621413722762571014076 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #' Predict method for stanreg objects #' #' This method is primarily intended to be used only for models fit using #' optimization. For models fit using MCMC or one of the variational #' approximations, see \code{\link{posterior_predict}}. #' #' @export #' @templateVar stanregArg object #' @template args-stanreg-object #' @param ... Ignored. #' @param newdata Optionally, a data frame in which to look for variables with #' which to predict. If omitted, the model matrix is used. #' @param type The type of prediction. The default \code{'link'} is on the scale #' of the linear predictors; the alternative \code{'response'} is on the scale #' of the response variable. #' @param se.fit A logical scalar indicating if standard errors should be #' returned. The default is \code{FALSE}. #' #' @return A vector if \code{se.fit} is \code{FALSE} and a list if \code{se.fit} #' is \code{TRUE}. #' #' @seealso \code{\link{posterior_predict}} #' predict.stanreg <- function(object, ..., newdata = NULL, type = c("link", "response"), se.fit = FALSE) { if (is.mer(object)) { stop( "'predict' is not available for models fit with ", object$stan_function, ". Please use the 'posterior_predict' function instead.", call. = FALSE ) } type <- match.arg(type) if (!se.fit && is.null(newdata)) { preds <- if (type == "link") object$linear.predictors else object$fitted.values return(preds) } if (isTRUE(object$stan_function == "stan_betareg") && !is.null(newdata)) { # avoid false positive warnings about missing z variables in newdata zvars <- all.vars(object$terms$precision) for (var in zvars) { if (!var %in% colnames(newdata)) newdata[[var]] <- NA } } dat <- pp_data(object, newdata) stanmat <- as.matrix.stanreg(object) beta <- stanmat[, seq_len(ncol(dat$x))] eta <- linear_predictor(beta, dat$x, dat$offset) if (type == "response") { inverse_link <- linkinv(object) eta <- inverse_link(eta) if (is(object, "polr") && ("alpha" %in% colnames(stanmat))) eta <- apply(eta, 1L, FUN = `^`, e2 = stanmat[, "alpha"]) } fit <- colMeans(eta) if (!se.fit) return(fit) se.fit <- apply(eta, 2L, sd) nlist(fit, se.fit) } rstanarm/R/priors.R0000644000176200001440000010021014370470372013744 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #' Prior distributions and options #' #' @name priors #' @description The functions described on this page are used to specify the #' prior-related arguments of the various modeling functions in the #' \pkg{rstanarm} package (to view the priors used for an existing model see #' \code{\link{prior_summary}}). #' #' The default priors used in the various \pkg{rstanarm} modeling functions #' are intended to be \emph{weakly informative} in that they provide moderate #' regularization and help stabilize computation. For many applications the #' defaults will perform well, but prudent use of more informative priors is #' encouraged. Uniform prior distributions are possible (e.g. by setting #' \code{\link{stan_glm}}'s \code{prior} argument to \code{NULL}) but, unless #' the data is very strong, they are not recommended and are \emph{not} #' non-informative, giving the same probability mass to implausible values as #' plausible ones. #' #' More information on priors is available in the vignette #' \href{https://mc-stan.org/rstanarm/articles/priors.html}{\emph{Prior #' Distributions for rstanarm Models}} as well as the vignettes for the #' various modeling functions. For details on the #' priors used for multilevel models in particular see the vignette #' \href{https://mc-stan.org/rstanarm/articles/glmer.html}{\emph{Estimating #' Generalized (Non-)Linear Models with Group-Specific Terms with rstanarm}} #' and also the \strong{Covariance matrices} section lower down on this page. #' #' #' @param location Prior location. In most cases, this is the prior mean, but #' for \code{cauchy} (which is equivalent to \code{student_t} with #' \code{df=1}), the mean does not exist and \code{location} is the prior #' median. The default value is \eqn{0}, except for \code{R2} which has no #' default value for \code{location}. For \code{R2}, \code{location} pertains #' to the prior location of the \eqn{R^2} under a Beta distribution, but the #' interpretation of the \code{location} parameter depends on the specified #' value of the \code{what} argument (see the \emph{R2 family} section in #' \strong{Details}). #' @param scale Prior scale. The default depends on the family (see #' \strong{Details}). #' @param df,df1,df2 Prior degrees of freedom. The default is \eqn{1} for #' \code{student_t}, in which case it is equivalent to \code{cauchy}. For the #' hierarchical shrinkage priors (\code{hs} and \code{hs_plus}) the degrees of #' freedom parameter(s) default to \eqn{1}. For the \code{product_normal} #' prior, the degrees of freedom parameter must be an integer (vector) that is #' at least \eqn{2} (the default). #' @param global_df,global_scale,slab_df,slab_scale Optional arguments for the #' hierarchical shrinkage priors. See the \emph{Hierarchical shrinkage family} #' section below. #' @param what A character string among \code{'mode'} (the default), #' \code{'mean'}, \code{'median'}, or \code{'log'} indicating how the #' \code{location} parameter is interpreted in the \code{LKJ} case. If #' \code{'log'}, then \code{location} is interpreted as the expected #' logarithm of the \eqn{R^2} under a Beta distribution. Otherwise, #' \code{location} is interpreted as the \code{what} of the \eqn{R^2} #' under a Beta distribution. If the number of predictors is less than #' or equal to two, the mode of this Beta distribution does not exist #' and an error will prompt the user to specify another choice for #' \code{what}. #' @param autoscale If \code{TRUE} then the scales of the priors on the #' intercept and regression coefficients may be additionally modified #' internally by \pkg{rstanarm} in the following cases. First, for Gaussian #' models only, the prior scales for the intercept, coefficients, and the #' auxiliary parameter \code{sigma} (error standard deviation) are multiplied #' by \code{sd(y)}. Additionally --- not only for Gaussian models --- if the #' \code{QR} argument to the model fitting function (e.g. \code{stan_glm}) is #' \code{FALSE} then we also divide the prior scale(s) by \code{sd(x)}. #' Prior autoscaling is also discussed in the vignette #' \href{https://mc-stan.org/rstanarm/articles/priors.html}{\emph{Prior #' Distributions for rstanarm Models}} #' #' #' @details The details depend on the family of the prior being used: #' \subsection{Student t family}{ #' Family members: #' \itemize{ #' \item \code{normal(location, scale)} #' \item \code{student_t(df, location, scale)} #' \item \code{cauchy(location, scale)} #' } #' Each of these functions also takes an argument \code{autoscale}. #' #' For the prior distribution for the intercept, \code{location}, #' \code{scale}, and \code{df} should be scalars. For the prior for the other #' coefficients they can either be vectors of length equal to the number of #' coefficients (not including the intercept), or they can be scalars, in #' which case they will be recycled to the appropriate length. As the #' degrees of freedom approaches infinity, the Student t distribution #' approaches the normal distribution and if the degrees of freedom are one, #' then the Student t distribution is the Cauchy distribution. #' #' If \code{scale} is not specified it will default to \eqn{2.5}, unless the #' probit link function is used, in which case these defaults are scaled by a #' factor of \code{dnorm(0)/dlogis(0)}, which is roughly \eqn{1.6}. #' #' If the \code{autoscale} argument is \code{TRUE}, then the #' scales will be further adjusted as described above in the documentation of #' the \code{autoscale} argument in the \strong{Arguments} section. #' } #' \subsection{Hierarchical shrinkage family}{ #' Family members: #' \itemize{ #' \item \code{hs(df, global_df, global_scale, slab_df, slab_scale)} #' \item \code{hs_plus(df1, df2, global_df, global_scale, slab_df, slab_scale)} #' } #' #' The hierarchical shrinkage priors are normal with a mean of zero and a #' standard deviation that is also a random variable. The traditional #' hierarchical shrinkage prior utilizes a standard deviation that is #' distributed half Cauchy with a median of zero and a scale parameter that is #' also half Cauchy. This is called the "horseshoe prior". The hierarchical #' shrinkage (\code{hs}) prior in the \pkg{rstanarm} package instead utilizes #' a regularized horseshoe prior, as described by Piironen and Vehtari (2017), #' which recommends setting the \code{global_scale} argument equal to the ratio #' of the expected number of non-zero coefficients to the expected number of #' zero coefficients, divided by the square root of the number of observations. #' #' The hierarhical shrinkpage plus (\code{hs_plus}) prior is similar except #' that the standard deviation that is distributed as the product of two #' independent half Cauchy parameters that are each scaled in a similar way #' to the \code{hs} prior. #' #' The hierarchical shrinkage priors have very tall modes and very fat tails. #' Consequently, they tend to produce posterior distributions that are very #' concentrated near zero, unless the predictor has a strong influence on the #' outcome, in which case the prior has little influence. Hierarchical #' shrinkage priors often require you to increase the #' \code{\link{adapt_delta}} tuning parameter in order to diminish the number #' of divergent transitions. For more details on tuning parameters and #' divergent transitions see the Troubleshooting section of the \emph{How to #' Use the rstanarm Package} vignette. #' } #' \subsection{Laplace family}{ #' Family members: #' \itemize{ #' \item \code{laplace(location, scale)} #' \item \code{lasso(df, location, scale)} #' } #' Each of these functions also takes an argument \code{autoscale}. #' #' The Laplace distribution is also known as the double-exponential #' distribution. It is a symmetric distribution with a sharp peak at its mean #' / median / mode and fairly long tails. This distribution can be motivated #' as a scale mixture of normal distributions and the remarks above about the #' normal distribution apply here as well. #' #' The lasso approach to supervised learning can be expressed as finding the #' posterior mode when the likelihood is Gaussian and the priors on the #' coefficients have independent Laplace distributions. It is commonplace in #' supervised learning to choose the tuning parameter by cross-validation, #' whereas a more Bayesian approach would be to place a prior on \dQuote{it}, #' or rather its reciprocal in our case (i.e. \emph{smaller} values correspond #' to more shrinkage toward the prior location vector). We use a chi-square #' prior with degrees of freedom equal to that specified in the call to #' \code{lasso} or, by default, 1. The expectation of a chi-square random #' variable is equal to this degrees of freedom and the mode is equal to the #' degrees of freedom minus 2, if this difference is positive. #' #' It is also common in supervised learning to standardize the predictors #' before training the model. We do not recommend doing so. Instead, it is #' better to specify \code{autoscale = TRUE}, which #' will adjust the scales of the priors according to the dispersion in the #' variables. See the documentation of the \code{autoscale} argument above #' and also the \code{\link{prior_summary}} page for more information. #' } #' \subsection{Product-normal family}{ #' Family members: #' \itemize{ #' \item \code{product_normal(df, location, scale)} #' } #' The product-normal distribution is the product of at least two independent #' normal variates each with mean zero, shifted by the \code{location} #' parameter. It can be shown that the density of a product-normal variate is #' symmetric and infinite at \code{location}, so this prior resembles a #' \dQuote{spike-and-slab} prior for sufficiently large values of the #' \code{scale} parameter. For better or for worse, this prior may be #' appropriate when it is strongly believed (by someone) that a regression #' coefficient \dQuote{is} equal to the \code{location}, parameter even though #' no true Bayesian would specify such a prior. #' #' Each element of \code{df} must be an integer of at least \eqn{2} because #' these \dQuote{degrees of freedom} are interpreted as the number of normal #' variates being multiplied and then shifted by \code{location} to yield the #' regression coefficient. Higher degrees of freedom produce a sharper #' spike at \code{location}. #' #' Each element of \code{scale} must be a non-negative real number that is #' interpreted as the standard deviation of the normal variates being #' multiplied and then shifted by \code{location} to yield the regression #' coefficient. In other words, the elements of \code{scale} may differ, but #' the k-th standard deviation is presumed to hold for all the normal deviates #' that are multiplied together and shifted by the k-th element of #' \code{location} to yield the k-th regression coefficient. The elements of #' \code{scale} are not the prior standard deviations of the regression #' coefficients. The prior variance of the regression coefficients is equal to #' the scale raised to the power of \eqn{2} times the corresponding element of #' \code{df}. Thus, larger values of \code{scale} put more prior volume on #' values of the regression coefficient that are far from zero. #' } #' \subsection{Dirichlet family}{ #' Family members: #' \itemize{ #' \item \code{dirichlet(concentration)} #' } #' #' The Dirichlet distribution is a multivariate generalization of the beta #' distribution. It is perhaps the easiest prior distribution to specify #' because the concentration parameters can be interpreted as prior counts #' (although they need not be integers) of a multinomial random variable. #' #' The Dirichlet distribution is used in \code{\link{stan_polr}} for an #' implicit prior on the cutpoints in an ordinal regression model. More #' specifically, the Dirichlet prior pertains to the prior probability of #' observing each category of the ordinal outcome when the predictors are at #' their sample means. Given these prior probabilities, it is straightforward #' to add them to form cumulative probabilities and then use an inverse CDF #' transformation of the cumulative probabilities to define the cutpoints. #' #' If a scalar is passed to the \code{concentration} argument of the #' \code{dirichlet} function, then it is replicated to the appropriate length #' and the Dirichlet distribution is symmetric. If \code{concentration} is a #' vector and all elements are \eqn{1}, then the Dirichlet distribution is #' jointly uniform. If all concentration parameters are equal but greater than #' \eqn{1} then the prior mode is that the categories are equiprobable, and #' the larger the value of the identical concentration parameters, the more #' sharply peaked the distribution is at the mode. The elements in #' \code{concentration} can also be given different values to represent that #' not all outcome categories are a priori equiprobable. #' } #' \subsection{Covariance matrices}{ #' Family members: #' \itemize{ #' \item \code{decov(regularization, concentration, shape, scale)} #' \item \code{lkj(regularization, scale, df)} #' } #' (Also see vignette for \code{stan_glmer}, #' \href{https://mc-stan.org/rstanarm/articles/glmer.html}{\emph{Estimating #' Generalized (Non-)Linear Models with Group-Specific Terms with rstanarm}}) #' #' Covariance matrices are decomposed into correlation matrices and #' variances. The variances are in turn decomposed into the product of a #' simplex vector and the trace of the matrix. Finally, the trace is the #' product of the order of the matrix and the square of a scale parameter. #' This prior on a covariance matrix is represented by the \code{decov} #' function. #' #' The prior for a correlation matrix is called LKJ whose density is #' proportional to the determinant of the correlation matrix raised to the #' power of a positive regularization parameter minus one. If #' \code{regularization = 1} (the default), then this prior is jointly #' uniform over all correlation matrices of that size. If #' \code{regularization > 1}, then the identity matrix is the mode and in the #' unlikely case that \code{regularization < 1}, the identity matrix is the #' trough. #' #' The trace of a covariance matrix is equal to the sum of the variances. We #' set the trace equal to the product of the order of the covariance matrix #' and the \emph{square} of a positive scale parameter. The particular #' variances are set equal to the product of a simplex vector --- which is #' non-negative and sums to \eqn{1} --- and the scalar trace. In other words, #' each element of the simplex vector represents the proportion of the trace #' attributable to the corresponding variable. #' #' A symmetric Dirichlet prior is used for the simplex vector, which has a #' single (positive) \code{concentration} parameter, which defaults to #' \eqn{1} and implies that the prior is jointly uniform over the space of #' simplex vectors of that size. If \code{concentration > 1}, then the prior #' mode corresponds to all variables having the same (proportion of total) #' variance, which can be used to ensure the the posterior variances are not #' zero. As the \code{concentration} parameter approaches infinity, this #' mode becomes more pronounced. In the unlikely case that #' \code{concentration < 1}, the variances are more polarized. #' #' If all the variables were multiplied by a number, the trace of their #' covariance matrix would increase by that number squared. Thus, it is #' reasonable to use a scale-invariant prior distribution for the positive #' scale parameter, and in this case we utilize a Gamma distribution, whose #' \code{shape} and \code{scale} are both \eqn{1} by default, implying a #' unit-exponential distribution. Set the \code{shape} hyperparameter to some #' value greater than \eqn{1} to ensure that the posterior trace is not zero. #' #' If \code{regularization}, \code{concentration}, \code{shape} and / or #' \code{scale} are positive scalars, then they are recycled to the #' appropriate length. Otherwise, each can be a positive vector of the #' appropriate length, but the appropriate length depends on the number of #' covariance matrices in the model and their sizes. A one-by-one covariance #' matrix is just a variance and thus does not have \code{regularization} or #' \code{concentration} parameters, but does have \code{shape} and #' \code{scale} parameters for the prior standard deviation of that #' variable. #' #' Note that for \code{\link{stan_mvmer}} and \code{\link{stan_jm}} models an #' additional prior distribution is provided through the \code{lkj} function. #' This prior is in fact currently used as the default for those modelling #' functions (although \code{decov} is still available as an option if the user #' wishes to specify it through the \code{prior_covariance} argument). The #' \code{lkj} prior uses the same decomposition of the covariance matrices #' into correlation matrices and variances, however, the variances are not #' further decomposed into a simplex vector and the trace; instead the #' standard deviations (square root of the variances) for each of the group #' specific parameters are given a half Student t distribution with the #' scale and df parameters specified through the \code{scale} and \code{df} #' arguments to the \code{lkj} function. The scale parameter default is 10 #' which is then autoscaled, whilst the df parameter default is 1 #' (therefore equivalent to a half Cauchy prior distribution for the #' standard deviation of each group specific parameter). This prior generally #' leads to similar results as the \code{decov} prior, but it is also likely #' to be **less** diffuse compared with the \code{decov} prior; therefore it #' sometimes seems to lead to faster estimation times, hence why it has #' been chosen as the default prior for \code{\link{stan_mvmer}} and #' \code{\link{stan_jm}} where estimation times can be long. #' } #' \subsection{R2 family}{ #' Family members: #' \itemize{ #' \item \code{R2(location, what)} #' } #' #' The \code{\link{stan_lm}}, \code{\link{stan_aov}}, and #' \code{\link{stan_polr}} functions allow the user to utilize a function #' called \code{R2} to convey prior information about all the parameters. #' This prior hinges on prior beliefs about the location of \eqn{R^2}, the #' proportion of variance in the outcome attributable to the predictors, #' which has a \code{\link[stats]{Beta}} prior with first shape #' hyperparameter equal to half the number of predictors and second shape #' hyperparameter free. By specifying \code{what} to be the prior mode (the #' default), mean, median, or expected log of \eqn{R^2}, the second shape #' parameter for this Beta distribution is determined internally. If #' \code{what = 'log'}, location should be a negative scalar; otherwise it #' should be a scalar on the \eqn{(0,1)} interval. #' #' For example, if \eqn{R^2 = 0.5}, then the mode, mean, and median of #' the \code{\link[stats]{Beta}} distribution are all the same and thus the #' second shape parameter is also equal to half the number of predictors. #' The second shape parameter of the \code{\link[stats]{Beta}} distribution #' is actually the same as the shape parameter in the LKJ prior for a #' correlation matrix described in the previous subsection. Thus, the smaller #' is \eqn{R^2}, the larger is the shape parameter, the smaller are the #' prior correlations among the outcome and predictor variables, and the more #' concentrated near zero is the prior density for the regression #' coefficients. Hence, the prior on the coefficients is regularizing and #' should yield a posterior distribution with good out-of-sample predictions #' \emph{if} the prior location of \eqn{R^2} is specified in a reasonable #' fashion. #' } #' @return A named list to be used internally by the \pkg{rstanarm} model #' fitting functions. #' @seealso The various vignettes for the \pkg{rstanarm} package also discuss #' and demonstrate the use of some of the supported prior distributions. #' #' @templateVar bdaRef \url{https://stat.columbia.edu/~gelman/book/} #' @template reference-bda #' #' @references #' Gelman, A., Jakulin, A., Pittau, M. G., and Su, Y. (2008). A weakly #' informative default prior distribution for logistic and other regression #' models. \emph{Annals of Applied Statistics}. 2(4), 1360--1383. #' #' @template reference-piironen-vehtari #' @template reference-stan-manual #' #' @examples #' if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { #' fmla <- mpg ~ wt + qsec + drat + am #' #' # Draw from prior predictive distribution (by setting prior_PD = TRUE) #' prior_pred_fit <- stan_glm(fmla, data = mtcars, prior_PD = TRUE, #' chains = 1, seed = 12345, iter = 250, # for speed only #' prior = student_t(df = 4, 0, 2.5), #' prior_intercept = cauchy(0,10), #' prior_aux = exponential(1/2)) #' plot(prior_pred_fit, "hist") #' #' \donttest{ #' # Can assign priors to names #' N05 <- normal(0, 5) #' fit <- stan_glm(fmla, data = mtcars, prior = N05, prior_intercept = N05) #' } #' #' # Visually compare normal, student_t, cauchy, laplace, and product_normal #' compare_priors <- function(scale = 1, df_t = 2, xlim = c(-10, 10)) { #' dt_loc_scale <- function(x, df, location, scale) { #' 1/scale * dt((x - location)/scale, df) #' } #' dlaplace <- function(x, location, scale) { #' 0.5 / scale * exp(-abs(x - location) / scale) #' } #' dproduct_normal <- function(x, scale) { #' besselK(abs(x) / scale ^ 2, nu = 0) / (scale ^ 2 * pi) #' } #' stat_dist <- function(dist, ...) { #' ggplot2::stat_function(ggplot2::aes_(color = dist), ...) #' } #' ggplot2::ggplot(data.frame(x = xlim), ggplot2::aes(x)) + #' stat_dist("normal", size = .75, fun = dnorm, #' args = list(mean = 0, sd = scale)) + #' stat_dist("student_t", size = .75, fun = dt_loc_scale, #' args = list(df = df_t, location = 0, scale = scale)) + #' stat_dist("cauchy", size = .75, linetype = 2, fun = dcauchy, #' args = list(location = 0, scale = scale)) + #' stat_dist("laplace", size = .75, linetype = 2, fun = dlaplace, #' args = list(location = 0, scale = scale)) + #' stat_dist("product_normal", size = .75, linetype = 2, fun = dproduct_normal, #' args = list(scale = 1)) #' } #' # Cauchy has fattest tails, followed by student_t, laplace, and normal #' compare_priors() #' #' # The student_t with df = 1 is the same as the cauchy #' compare_priors(df_t = 1) #' #' # Even a scale of 5 is somewhat large. It gives plausibility to rather #' # extreme values #' compare_priors(scale = 5, xlim = c(-20,20)) #' #' # If you use a prior like normal(0, 1000) to be "non-informative" you are #' # actually saying that a coefficient value of e.g. -500 is quite plausible #' compare_priors(scale = 1000, xlim = c(-1000,1000)) #' } NULL #' @rdname priors #' @export normal <- function(location = 0, scale = NULL, autoscale = FALSE) { validate_parameter_value(scale) nlist(dist = "normal", df = NA, location, scale, autoscale) } #' @rdname priors #' @export student_t <- function(df = 1, location = 0, scale = NULL, autoscale = FALSE) { validate_parameter_value(scale) validate_parameter_value(df) nlist(dist = "t", df, location, scale, autoscale) } #' @rdname priors #' @export cauchy <- function(location = 0, scale = NULL, autoscale = FALSE) { student_t(df = 1, location = location, scale = scale, autoscale) } #' @rdname priors #' @export hs <- function(df = 1, global_df = 1, global_scale = 0.01, slab_df = 4, slab_scale = 2.5) { validate_parameter_value(df) validate_parameter_value(global_df) validate_parameter_value(global_scale) validate_parameter_value(slab_df) validate_parameter_value(slab_scale) nlist(dist = "hs", df, location = 0, scale = 1, global_df, global_scale, slab_df, slab_scale) } #' @rdname priors #' @export hs_plus <- function(df1 = 1, df2 = 1, global_df = 1, global_scale = 0.01, slab_df = 4, slab_scale = 2.5) { validate_parameter_value(df1) validate_parameter_value(df2) validate_parameter_value(global_df) validate_parameter_value(global_scale) validate_parameter_value(slab_df) validate_parameter_value(slab_scale) # scale gets used as a second df hyperparameter nlist(dist = "hs_plus", df = df1, location = 0, scale = df2, global_df, global_scale, slab_df, slab_scale) } #' @rdname priors #' @export laplace <- function(location = 0, scale = NULL, autoscale = FALSE) { nlist(dist = "laplace", df = NA, location, scale, autoscale) } #' @rdname priors #' @export lasso <- function(df = 1, location = 0, scale = NULL, autoscale = FALSE) { nlist(dist = "lasso", df, location, scale, autoscale) } #' @rdname priors #' @export product_normal <- function(df = 2, location = 0, scale = 1) { validate_parameter_value(df) stopifnot(all(df >= 1), all(df == as.integer(df))) validate_parameter_value(scale) nlist(dist = "product_normal", df, location, scale) } #' @rdname priors #' @export #' @param rate Prior rate for the exponential distribution. Defaults to #' \code{1}. For the exponential distribution, the rate parameter is the #' \emph{reciprocal} of the mean. #' exponential <- function(rate = 1, autoscale = FALSE) { stopifnot(length(rate) == 1) validate_parameter_value(rate) nlist(dist = "exponential", df = NA, location = NA, scale = 1/rate, autoscale) } #' @rdname priors #' @export #' @param regularization Exponent for an LKJ prior on the correlation matrix in #' the \code{decov} or \code{lkj} prior. The default is \eqn{1}, implying a #' joint uniform prior. #' @param concentration Concentration parameter for a symmetric Dirichlet #' distribution. The default is \eqn{1}, implying a joint uniform prior. #' @param shape Shape parameter for a gamma prior on the scale parameter in the #' \code{decov} prior. If \code{shape} and \code{scale} are both \eqn{1} (the #' default) then the gamma prior simplifies to the unit-exponential #' distribution. decov <- function(regularization = 1, concentration = 1, shape = 1, scale = 1) { validate_parameter_value(regularization) validate_parameter_value(concentration) validate_parameter_value(shape) validate_parameter_value(scale) nlist(dist = "decov", regularization, concentration, shape, scale) } #' @rdname priors #' @export lkj <- function(regularization = 1, scale = 10, df = 1, autoscale = TRUE) { validate_parameter_value(regularization) validate_parameter_value(scale) validate_parameter_value(df) nlist(dist = "lkj", regularization, scale, df, autoscale) } #' @rdname priors #' @export dirichlet <- function(concentration = 1) { validate_parameter_value(concentration) nlist(dist = "dirichlet", concentration) } #' @rdname priors #' @export R2 <- function(location = NULL, what = c("mode", "mean", "median", "log")) { what <- match.arg(what) validate_R2_location(location, what) list(dist = "R2", location = location, what = what, df = 0, scale = 0) } #' @rdname priors #' @export #' @param family Not currently used. default_prior_intercept = function(family) { # family arg not used, but we can use in the future to do different things # based on family if necessary out <- normal(0, 2.5, autoscale = TRUE) out$location <- NULL # not determined yet out$default <- TRUE out$version <- utils::packageVersion("rstanarm") out } #' @rdname priors #' @export default_prior_coef = function(family) { # family arg not used, but we can use in the future to do different things # based on family if necessary out <- normal(0, 2.5, autoscale = TRUE) out$default <- TRUE out$version <- utils::packageVersion("rstanarm") out } # internal ---------------------------------------------------------------- # Check for positive scale or df parameter (NULL ok) # # @param x The value to check. # @return Either an error is thrown or \code{TRUE} is returned invisibly. validate_parameter_value <- function(x) { nm <- deparse(substitute(x)) if (!is.null(x)) { if (!is.numeric(x)) stop(nm, " should be NULL or numeric", call. = FALSE) if (any(x <= 0)) stop(nm, " should be positive", call. = FALSE) } invisible(TRUE) } # Throw informative error if 'location' isn't valid for the particular 'what' # specified or isn't the right length. # # @param location,what User's location and what arguments to R2() # @return Either an error is thrown or TRUE is returned invisibly. # validate_R2_location <- function(location = NULL, what) { stopifnot(is.numeric(location)) if (length(location) > 1) stop( "The 'R2' function only accepts a single value for 'location', ", "which applies to the prior R^2. ", "If you are trying to put different priors on different coefficients ", "rather than specify a joint prior via 'R2', you can use stan_glm ", "which accepts a wider variety of priors, many of which allow ", "specifying arguments as vectors.", call. = FALSE ) if (what == "log") { if (location >= 0) stop("If 'what' is 'log' then location must be negative.", call. = FALSE) } else if (what == "mode") { if (location <= 0 || location > 1) stop("If 'what' is 'mode', location must be in (0,1].", call. = FALSE) } else { # "mean", "median" if (location <= 0 || location >= 1) stop("If 'what' is 'mean' or 'median', location must be in (0,1).", call. = FALSE) } invisible(TRUE) } # For the R2 prior, calculate LKJ shape eta # # @param location,what User's R2 prior arguments. # @param K number of predictors. # @return A positive scalar. # make_eta <- function(location, what = c("mode", "mean", "median", "log"), K) { stopifnot(length(location) == 1, is.numeric(location)) stopifnot(is.numeric(K), K == as.integer(K)) if (K == 0) stop("R2 prior is not applicable when there are no covariates.", call. = FALSE) what <- match.arg(what) half_K <- K / 2 if (what == "mode") { stopifnot(location > 0, location <= 1) if (K <= 2) stop(paste("R2 prior error.", "The mode of the beta distribution does not exist", "with fewer than three predictors.", "Specify 'what' as 'mean', 'median', or 'log' instead."), call. = FALSE) eta <- (half_K - 1 - location * half_K + location * 2) / location } else if (what == "mean") { stopifnot(location > 0, location < 1) eta <- (half_K - location * half_K) / location } else if (what == "median") { stopifnot(location > 0, location < 1) FUN <- function(eta) qbeta(0.5, half_K, qexp(eta)) - location eta <- qexp(uniroot(FUN, interval = 0:1)$root) } else { # what == "log" stopifnot(location < 0) FUN <- function(eta) digamma(half_K) - digamma(half_K + qexp(eta)) - location eta <- qexp(uniroot(FUN, interval = 0:1, f.lower = -location, f.upper = -.Machine$double.xmax)$root) } return(eta) } rstanarm/R/stanreg-methods.R0000644000176200001440000004232214406606742015545 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #' Methods for stanreg objects #' #' The methods documented on this page are actually some of the least important #' methods defined for \link[=stanreg-objects]{stanreg} objects. The most #' important methods are documented separately, each with its own page. Links to #' those pages are provided in the \strong{See Also} section, below. #' #' @name stanreg-methods #' @aliases VarCorr fixef ranef ngrps sigma nsamples #' #' @templateVar stanregArg object,x #' @template args-stanreg-object #' @param ... Ignored, except by the \code{update} method. See #' \code{\link{update}}. #' #' @details The methods documented on this page are similar to the methods #' defined for objects of class 'lm', 'glm', 'glmer', etc. However there are a #' few key differences: #' #' \describe{ #' \item{\code{residuals}}{ #' Residuals are \emph{always} of type \code{"response"} (not \code{"deviance"} #' residuals or any other type). However, in the case of \code{\link{stan_polr}} #' with more than two response categories, the residuals are the difference #' between the latent utility and its linear predictor. #' } #' \item{\code{coef}}{ #' Medians are used for point estimates. See the \emph{Point estimates} section #' in \code{\link{print.stanreg}} for more details. #' } #' \item{\code{se}}{ #' The \code{se} function returns standard errors based on #' \code{\link{mad}}. See the \emph{Uncertainty estimates} section in #' \code{\link{print.stanreg}} for more details. #' } #' \item{\code{confint}}{ #' For models fit using optimization, confidence intervals are returned via a #' call to \code{\link[stats:confint]{confint.default}}. If \code{algorithm} is #' \code{"sampling"}, \code{"meanfield"}, or \code{"fullrank"}, the #' \code{confint} will throw an error because the #' \code{\link{posterior_interval}} function should be used to compute Bayesian #' uncertainty intervals. #' } #' \item{\code{nsamples}}{ #' The number of draws from the posterior distribution obtained #' } #' } #' #' @seealso #' \itemize{ #' \item The \code{\link[=print.stanreg]{print}}, #' \code{\link[=summary.stanreg]{summary}}, and \code{\link{prior_summary}} #' methods for stanreg objects for information on the fitted model. #' \item \code{\link{launch_shinystan}} to use the ShinyStan GUI to explore a #' fitted \pkg{rstanarm} model. #' \item The \code{\link[=plot.stanreg]{plot}} method to plot estimates and #' diagnostics. #' \item The \code{\link{pp_check}} method for graphical posterior predictive #' checking. #' \item The \code{\link{posterior_predict}} and \code{\link{predictive_error}} #' methods for predictions and predictive errors. #' \item The \code{\link{posterior_interval}} and \code{\link{predictive_interval}} #' methods for uncertainty intervals for model parameters and predictions. #' \item The \code{\link[=loo.stanreg]{loo}}, \code{\link{kfold}}, and #' \code{\link{log_lik}} methods for leave-one-out or K-fold cross-validation, #' model comparison, and computing the log-likelihood of (possibly new) data. #' \item The \code{\link[=as.matrix.stanreg]{as.matrix}}, \code{as.data.frame}, #' and \code{as.array} methods to access posterior draws. #' } #' NULL #' @rdname stanreg-methods #' @export coef.stanreg <- function(object, ...) { if (is.mer(object)) return(coef_mer(object, ...)) object$coefficients } #' @rdname stanreg-methods #' @export #' @param parm For \code{confint}, an optional character vector of parameter #' names. #' @param level For \code{confint}, a scalar between \eqn{0} and \eqn{1} #' indicating the confidence level to use. #' confint.stanreg <- function(object, parm, level = 0.95, ...) { if (!used.optimizing(object)) { stop("For models fit using MCMC or a variational approximation please use ", "posterior_interval() to obtain Bayesian interval estimates.", call. = FALSE) } confint.default(object, parm, level, ...) } #' @rdname stanreg-methods #' @export fitted.stanreg <- function(object, ...) { object$fitted.values } #' @rdname stanreg-methods #' @export nobs.stanreg <- function(object, ...) { nrow(model.frame(object)) } #' @rdname stanreg-methods #' @export residuals.stanreg <- function(object, ...) { object$residuals } #' Extract standard errors #' #' Generic function for extracting standard errors from fitted models. #' #' @export #' @keywords internal #' @param object A fitted model object. #' @param ... Arguments to methods. #' @return Standard errors of model parameters. #' @seealso \code{\link{se.stanreg}} #' se <- function(object, ...) UseMethod("se") #' @rdname stanreg-methods #' @export se.stanreg <- function(object, ...) { object$ses } #' @rdname stanreg-methods #' @export #' @method update stanreg #' @param formula.,evaluate See \code{\link[stats]{update}}. #' update.stanreg <- function(object, formula., ..., evaluate = TRUE) { call <- getCall(object) if (is.null(call)) stop("'object' does not contain a 'call' component.", call. = FALSE) extras <- match.call(expand.dots = FALSE)$... if (!missing(formula.)) call$formula <- update.formula(formula(object), formula.) if (length(extras)) { existing <- !is.na(match(names(extras), names(call))) for (a in names(extras)[existing]) call[[a]] <- extras[[a]] if (any(!existing)) { call <- c(as.list(call), extras[!existing]) call <- as.call(call) } } if (!evaluate) return(call) # do this like lme4 update.merMod instead of update.default ff <- environment(formula(object)) pf <- parent.frame() sf <- sys.frames()[[1L]] tryCatch(eval(call, envir = ff), error = function(e) { tryCatch(eval(call, envir = sf), error = function(e) { eval(call, pf) }) }) } #' @rdname stanreg-methods #' @export #' @param correlation For \code{vcov}, if \code{FALSE} (the default) the #' covariance matrix is returned. If \code{TRUE}, the correlation matrix is #' returned instead. #' vcov.stanreg <- function(object, correlation = FALSE, ...) { out <- object$covmat if (!correlation) return(out) cov2cor(out) } #' @rdname stanreg-methods #' @export #' @export fixef #' @importFrom lme4 fixef #' fixef.stanreg <- function(object, ...) { coefs <- object$coefficients coefs[b_names(names(coefs), invert = TRUE)] } #' @rdname stanreg-methods #' @export #' @export ngrps #' @importFrom lme4 ngrps #' ngrps.stanreg <- function(object, ...) { vapply(.flist(object), nlevels, 1) } #' @rdname stanreg-methods #' @export #' @export nsamples #' @importFrom rstantools nsamples nsamples.stanreg <- function(object, ...) { posterior_sample_size(object) } #' @rdname stanreg-methods #' @export #' @export ranef #' @importFrom lme4 ranef #' ranef.stanreg <- function(object, ...) { .glmer_check(object) point_estimates <- object$stan_summary[, select_median(object$algorithm)] out <- ranef_template(object) group_vars <- names(out) for (j in seq_along(out)) { tmp <- out[[j]] pars <- colnames(tmp) levs <- rownames(tmp) levs <- gsub(" ", "_", levs) for (p in seq_along(pars)) { stan_pars <- paste0("b[", pars[p], " ", group_vars[j], ":", levs, "]") tmp[[pars[p]]] <- unname(point_estimates[stan_pars]) } out[[j]] <- tmp } out } # Call lme4 to get the right structure for ranef objects #' @importFrom lme4 lmerControl glmerControl nlmerControl lmer glmer nlmer ranef_template <- function(object) { stan_fun <- object$stan_function %ORifNULL% "stan_glmer" if (stan_fun != "stan_gamm4") { new_formula <- formula(object) } else { # remove the part of the formula with s() terms just so we can call lme4 # to get the ranef template without error new_formula_rhs <- as.character(object$call$random)[2] new_formula_lhs <- as.character(formula(object))[2] new_formula <- as.formula(paste(new_formula_lhs, "~", new_formula_rhs)) } if (stan_fun != "stan_nlmer" && (is.gaussian(object$family$family) || is.beta(object$family$family))) { stan_fun <- "stan_lmer" } lme4_fun <- switch( stan_fun, "stan_lmer" = "lmer", "stan_nlmer" = "nlmer", "glmer" # for stan_glmer, stan_glmer.nb, stan_gamm4 (unless gaussian or beta) ) cntrl_args <- list(optimizer = "Nelder_Mead", optCtrl = list(maxfun = 1)) if (lme4_fun != "nlmer") { # nlmerControl doesn't allow these cntrl_args$check.conv.grad <- "ignore" cntrl_args$check.conv.singular <- "ignore" cntrl_args$check.conv.hess <- "ignore" cntrl_args$check.nlev.gtreq.5 <- "ignore" cntrl_args$check.nobs.vs.rankZ <- "ignore" cntrl_args$check.nobs.vs.nlev <- "ignore" cntrl_args$check.nobs.vs.nRE <- "ignore" if (lme4_fun == "glmer") { cntrl_args$check.response.not.const <- "ignore" } } cntrl <- do.call(paste0(lme4_fun, "Control"), cntrl_args) fit_args <- list( formula = new_formula, data = object$data, control = cntrl ) if (lme4_fun == "nlmer") { # create starting values to avoid error fit_args$start <- unlist(getInitial( object = as.formula(as.character(formula(object))[2]), data = object$data, control = list(maxiter = 0, warnOnly = TRUE) )) } family <- family(object) fam <- family$family if (!(fam %in% c("gaussian", "beta"))) { if (fam == "neg_binomial_2") { family <- stats::poisson() } else if (fam == "beta_binomial") { family <- stats::binomial() } else if (fam == "binomial" && family$link == "clogit") { family <- stats::binomial() } fit_args$family <- family } lme4_fit <- suppressWarnings(do.call(lme4_fun, args = fit_args)) ranef(lme4_fit) } #' @rdname stanreg-methods #' @export #' @export sigma #' @rawNamespace if(getRversion()>='3.3.0') importFrom(stats, sigma) else #' importFrom(lme4,sigma) #' sigma.stanreg <- function(object, ...) { if (!("sigma" %in% rownames(object$stan_summary))) return(1) object$stan_summary["sigma", select_median(object$algorithm)] } #' @rdname stanreg-methods #' @param sigma Ignored (included for compatibility with #' \code{\link[nlme]{VarCorr}}). #' @export #' @export VarCorr #' @importFrom nlme VarCorr #' @importFrom stats cov2cor VarCorr.stanreg <- function(x, sigma = 1, ...) { dots <- list(...) # used to pass stanmat with a single draw for posterior_survfit mat <- if ("stanmat" %in% names(dots)) as.matrix(dots$stanmat) else as.matrix(x) cnms <- .cnms(x) useSc <- "sigma" %in% colnames(mat) if (useSc) sc <- mat[,"sigma"] else sc <- 1 Sigma <- colMeans(mat[,grepl("^Sigma\\[", colnames(mat)), drop = FALSE]) nc <- vapply(cnms, FUN = length, FUN.VALUE = 1L) nms <- names(cnms) ncseq <- seq_along(nc) if (length(Sigma) == sum(nc * nc)) { # stanfit contains all Sigma entries spt <- split(Sigma, rep.int(ncseq, nc * nc)) ans <- lapply(ncseq, function(i) { Sigma <- matrix(0, nc[i], nc[i]) Sigma[,] <- spt[[i]] rownames(Sigma) <- colnames(Sigma) <- cnms[[i]] stddev <- sqrt(diag(Sigma)) corr <- cov2cor(Sigma) structure(Sigma, stddev = stddev, correlation = corr) }) } else { # stanfit contains lower tri Sigma entries spt <- split(Sigma, rep.int(ncseq, (nc * (nc + 1)) / 2)) ans <- lapply(ncseq, function(i) { Sigma <- matrix(0, nc[i], nc[i]) Sigma[lower.tri(Sigma, diag = TRUE)] <- spt[[i]] Sigma <- Sigma + t(Sigma) diag(Sigma) <- diag(Sigma) / 2 rownames(Sigma) <- colnames(Sigma) <- cnms[[i]] stddev <- sqrt(diag(Sigma)) corr <- cov2cor(Sigma) structure(Sigma, stddev = stddev, correlation = corr) }) } names(ans) <- nms structure(ans, sc = mean(sc), useSc = useSc, class = "VarCorr.merMod") } # Exported but doc kept internal ---------------------------------------------- #' family method for stanreg objects #' #' @keywords internal #' @export #' @param object,... See \code{\link[stats]{family}}. family.stanreg <- function(object, ...) object$family #' model.frame method for stanreg objects #' #' @keywords internal #' @export #' @param formula,... See \code{\link[stats]{model.frame}}. #' @param fixed.only See \code{\link[lme4:merMod-class]{model.frame.merMod}}. #' model.frame.stanreg <- function(formula, fixed.only = FALSE, ...) { if (is.mer(formula)) { fr <- formula$glmod$fr if (fixed.only) { ff <- formula(formula, fixed.only = TRUE) vars <- rownames(attr(terms.formula(ff), "factors")) fr <- fr[vars] } return(fr) } NextMethod("model.frame") } #' model.matrix method for stanreg objects #' #' @keywords internal #' @export #' @param object,... See \code{\link[stats]{model.matrix}}. #' model.matrix.stanreg <- function(object, ...) { if (inherits(object, "gamm4")) return(object$jam$X) if (is.mer(object)) return(object$glmod$X) NextMethod("model.matrix") } #' formula method for stanreg objects #' #' @keywords internal #' @export #' @param x A stanreg object. #' @param ... Can contain \code{fixed.only} and \code{random.only} arguments #' that both default to \code{FALSE}. #' formula.stanreg <- function(x, ..., m = NULL) { if (is.mer(x) && !isTRUE(x$stan_function == "stan_gamm4")) return(formula_mer(x, ...)) x$formula } #' terms method for stanreg objects #' @export #' @keywords internal #' @param x,fixed.only,random.only,... See lme4:::terms.merMod. #' terms.stanreg <- function(x, ..., fixed.only = TRUE, random.only = FALSE) { if (!is.mer(x)) return(NextMethod("terms")) fr <- x$glmod$fr if (missing(fixed.only) && random.only) fixed.only <- FALSE if (fixed.only && random.only) stop("'fixed.only' and 'random.only' can't both be TRUE.", call. = FALSE) Terms <- attr(fr, "terms") if (fixed.only) { Terms <- terms.formula(formula(x, fixed.only = TRUE)) attr(Terms, "predvars") <- attr(terms(fr), "predvars.fixed") } if (random.only) { Terms <- terms.formula(lme4::subbars(formula.stanreg(x, random.only = TRUE))) attr(Terms, "predvars") <- attr(terms(fr), "predvars.random") } return(Terms) } # internal ---------------------------------------------------------------- .glmer_check <- function(object) { if (!is.mer(object)) stop("This method is for stan_glmer and stan_lmer models only.", call. = FALSE) } .cnms <- function(object, ...) UseMethod(".cnms") .cnms.stanreg <- function(object, ...) { .glmer_check(object) object$glmod$reTrms$cnms } .flist <- function(object, ...) UseMethod(".flist") .flist.stanreg <- function(object, ...) { .glmer_check(object) as.list(object$glmod$reTrms$flist) } coef_mer <- function(object, ...) { if (length(list(...))) warning("Arguments named \"", paste(names(list(...)), collapse = ", "), "\" ignored.", call. = FALSE) fef <- data.frame(rbind(fixef(object)), check.names = FALSE) ref <- ranef(object) refnames <- unlist(lapply(ref, colnames)) missnames <- setdiff(refnames, names(fef)) nmiss <- length(missnames) if (nmiss > 0) { fillvars <- setNames(data.frame(rbind(rep(0, nmiss))), missnames) fef <- cbind(fillvars, fef) } val <- lapply(ref, function(x) fef[rep.int(1L, nrow(x)), , drop = FALSE]) for (i in seq(a = val)) { refi <- ref[[i]] row.names(val[[i]]) <- row.names(refi) nmsi <- colnames(refi) if (!all(nmsi %in% names(fef))) stop("Unable to align random and fixed effects.", call. = FALSE) for (nm in nmsi) val[[i]][[nm]] <- val[[i]][[nm]] + refi[, nm] } structure(val, class = "coef.mer") } justRE <- function(f, response = FALSE) { response <- if (response && length(f) == 3) f[[2]] else NULL reformulate(paste0("(", vapply(lme4::findbars(f), function(x) paste(deparse(x, 500L), collapse = " "), ""), ")"), response = response) } formula_mer <- function (x, fixed.only = FALSE, random.only = FALSE, ...) { if (missing(fixed.only) && random.only) fixed.only <- FALSE if (fixed.only && random.only) stop("'fixed.only' and 'random.only' can't both be TRUE.", call. = FALSE) fr <- x$glmod$fr if (is.null(form <- attr(fr, "formula"))) { if (!grepl("lmer$", deparse(getCall(x)[[1L]]))) stop("Can't find formula stored in model frame or call.", call. = FALSE) form <- as.formula(formula(getCall(x), ...)) } if (fixed.only) { form <- attr(fr, "formula") form[[length(form)]] <- lme4::nobars(form[[length(form)]]) } if (random.only) form <- justRE(form, response = TRUE) return(form) } rstanarm/R/stanmodels.R0000644000176200001440000000327713365374540014621 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # This file is only intended to be used during the installation process # nocov start MODELS_HOME <- "src" if (!file.exists(MODELS_HOME)) MODELS_HOME <- sub("R$", "src", getwd()) stan_files <- dir(file.path(MODELS_HOME, "stan_files"), pattern = "stan$", full.names = TRUE) stanmodels <- lapply(stan_files, function(f) { model_cppname <- sub("\\.stan$", "", basename(f)) stanfit <- rstan::stanc(f, allow_undefined = TRUE, obfuscate_model_name = FALSE) stanfit$model_cpp <- list(model_cppname = stanfit$model_name, model_cppcode = stanfit$cppcode) return(do.call(methods::new, args = c(stanfit[-(1:3)], Class = "stanmodel", mk_cppmodule = function(x) get(paste0("model_", model_cppname))))) } ) names(stanmodels) <- sub("\\.stan$", "", basename(stan_files)) rm(MODELS_HOME) # nocov end rstanarm/R/stan_biglm.fit.R0000644000176200001440000002534613722762571015353 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #' @rdname stan_biglm #' @export #' @param b A numeric vector of OLS coefficients, excluding the intercept #' @param R A square upper-triangular matrix from the QR decomposition of the #' design matrix, excluding the intercept #' @param SSR A numeric scalar indicating the sum-of-squared residuals for OLS #' @param N A integer scalar indicating the number of included observations #' @param has_intercept A logical scalar indicating whether to add an intercept #' to the model when estimating it. #' @param importance_resampling Logical scalar indicating whether to use #' importance resampling when approximating the posterior distribution with #' a multivariate normal around the posterior mode, which only applies #' when \code{algorithm} is \code{"optimizing"} but defaults to \code{TRUE} #' in that case #' @param keep_every Positive integer, which defaults to 1, but can be higher #' in order to thin the importance sampling realizations and also only #' apples when \code{algorithm} is \code{"optimizing"} but defaults to #' \code{TRUE} in that case #' @examples #' if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { #' # create inputs #' ols <- lm(mpg ~ wt + qsec + am, data = mtcars, # all row are complete so ... #' na.action = na.exclude) # not necessary in this case #' b <- coef(ols)[-1] #' R <- qr.R(ols$qr)[-1,-1] #' SSR <- crossprod(ols$residuals)[1] #' not_NA <- !is.na(fitted(ols)) #' N <- sum(not_NA) #' xbar <- colMeans(mtcars[not_NA,c("wt", "qsec", "am")]) #' y <- mtcars$mpg[not_NA] #' ybar <- mean(y) #' s_y <- sd(y) #' post <- stan_biglm.fit(b, R, SSR, N, xbar, ybar, s_y, prior = R2(.75), #' # the next line is only to make the example go fast #' chains = 1, iter = 500, seed = 12345) #' cbind(lm = b, stan_lm = rstan::get_posterior_mean(post)[13:15,]) # shrunk #' } stan_biglm.fit <- function(b, R, SSR, N, xbar, ybar, s_y, has_intercept = TRUE, ..., prior = R2(stop("'location' must be specified")), prior_intercept = NULL, prior_PD = FALSE, algorithm = c("sampling", "meanfield", "fullrank", "optimizing"), adapt_delta = NULL, importance_resampling = TRUE, keep_every = 1) { if (prior_PD && is.null(prior_intercept)) { msg <- "The default flat prior on the intercept is not recommended when 'prior_PD' is TRUE." warning(msg, call. = FALSE, immediate. = TRUE) warning(msg, call. = FALSE, immediate. = FALSE) } J <- 1L N <- array(N, c(J)) K <- ncol(R) cn <- names(xbar) if (is.null(cn)) cn <- names(b) R_inv <- backsolve(R, diag(K)) JK <- c(J, K) xbarR_inv <- array(c(xbar %*% R_inv), JK) Rb <- array(R %*% b, JK) SSR <- array(SSR, J) s_Y <- array(s_y, J) center_y <- if (isTRUE(all.equal(matrix(0, J, K), xbar))) ybar else 0.0 ybar <- array(ybar, J) if (!length(prior)) { prior_dist <- 0L eta <- 0 } else { prior_dist <- 1L eta <- prior$eta <- make_eta(prior$location, prior$what, K = K) } if (!length(prior_intercept)) { prior_dist_for_intercept <- 0L prior_mean_for_intercept <- 0 prior_scale_for_intercept <- 0 } else { if (!identical(prior_intercept$dist, "normal")) stop("'prior_intercept' must be 'NULL' or a call to 'normal'.") prior_dist_for_intercept <- 1L prior_mean_for_intercept <- prior_intercept$location prior_scale_for_intercept <- prior_intercept$scale if (is.null(prior_scale_for_intercept)) prior_scale_for_intercept <- 0 # also add scale back to prior_intercept to pass to summarize_lm_prior later prior_intercept$scale <- prior_scale_for_intercept } dim(R_inv) <- c(J, dim(R_inv)) # initial values R2 <- array(1 - SSR[1] / ((N - 1) * s_Y^2), J) log_omega <- array(0, ifelse(prior_PD == 0, J, 0)) init_fun <- function(chain_id) { out <- list(R2 = R2, log_omega = log_omega) if (has_intercept == 0L) out$z_alpha <- double() return(out) } stanfit <- stanmodels$lm standata <- nlist(K, has_intercept, prior_dist, prior_dist_for_intercept, prior_mean_for_intercept, prior_scale_for_intercept, prior_PD, eta, J, N, xbarR_inv, ybar, center_y, s_Y, Rb, SSR, R_inv) pars <- c(if (has_intercept) "alpha", "beta", "sigma", if (prior_PD == 0) "log_omega", "R2", "mean_PPD") algorithm <- match.arg(algorithm) if (algorithm == "optimizing") { optimizing_args <- list(...) if (is.null(optimizing_args$draws)) optimizing_args$draws <- 1000L optimizing_args$object <- stanfit optimizing_args$data <- standata optimizing_args$constrained <- TRUE optimizing_args$importance_resampling <- importance_resampling if (is.null(optimizing_args$tol_rel_grad)) optimizing_args$tol_rel_grad <- 10000L out <- do.call(optimizing, args = optimizing_args) check <- check_stanfit(out) if (!isTRUE(check)) return(standata) if (K == 1) out$theta_tilde[,'R2[1]'] <- (out$theta_tilde[,'R2[1]']) ^ 2 pars_idx <- unlist(sapply(1:length(pars), function(i) { which(grepl(paste('^', pars[i], sep=''), names(out$par))) })) nrows <- dim(out$theta_tilde)[1] out$theta_tilde <- out$theta_tilde[,pars_idx] dim(out$theta_tilde) <- c(nrows, length(pars_idx)) new_names <- c(if (has_intercept) "(Intercept)", cn, "sigma", if (prior_PD == 0) "log-fit_ratio", "R2", "mean_PPD") colnames(out$theta_tilde) <- new_names if (optimizing_args$draws > 0) { # begin: psis diagnostics and importance resampling lr <- out$log_p-out$log_g lr[lr == -Inf] <- -800 p <- suppressWarnings(loo::psis(lr, r_eff = 1)) p$log_weights <- p$log_weights - log_sum_exp(p$log_weights) theta_pareto_k <- suppressWarnings(apply(out$theta_tilde, 2L, function(col) { if (all(is.finite(col))) loo::psis(log1p(col ^ 2) / 2 + lr, r_eff = 1)$diagnostics$pareto_k else NaN })) ## todo: change fixed threshold to an option if (any(theta_pareto_k > 0.7, na.rm = TRUE)) { warning("Some Pareto k diagnostic values are too high. Resampling disabled.", "Decreasing tol_rel_grad may help if optimization has terminated prematurely.", " Otherwise consider using sampling instead of optimizing.", call. = FALSE, immediate. = TRUE) importance_resampling <- FALSE } else if (any(theta_pareto_k > 0.5, na.rm = TRUE)) { warning("Some Pareto k diagnostic values are slightly high.", " Increasing the number of draws or decreasing tol_rel_grad may help.", call. = FALSE, immediate. = TRUE) } out$psis <- nlist(pareto_k = p$diagnostics$pareto_k, n_eff = p$diagnostics$n_eff / keep_every) } else { theta_pareto_k <- rep(NaN, length(new_names)) importance_resampling <- FALSE } if (importance_resampling) { ir_idx <- .sample_indices(exp(p$log_weights), n_draws = ceiling(optimizing_args$draws / keep_every)) out$theta_tilde <- out$theta_tilde[ir_idx,] out$ir_idx <- ir_idx ## SIR mcse and n_eff w_sir <- as.numeric(table(ir_idx)) / length(ir_idx) mcse <- apply(out$theta_tilde[!duplicated(ir_idx),], 2L, function(col) { if (all(is.finite(col))) sqrt(sum(w_sir ^ 2 * (col-mean(col)) ^ 2)) else NaN }) n_eff <- round(apply(out$theta_tilde[!duplicated(ir_idx),], 2L, var) / (mcse^2), digits = 0) } else { out$ir_idx <- NULL mcse <- rep(NaN, length(theta_pareto_k)) n_eff <- rep(NaN, length(theta_pareto_k)) } out$diagnostics <- cbind(mcse, theta_pareto_k, n_eff) colnames(out$diagnostics) <- c("mcse", "khat", "n_eff") ## end: psis diagnostics and SIR out$stanfit <- suppressMessages(sampling(stanfit, data = standata, chains = 0)) prior_info <- summarize_lm_prior(prior, prior_intercept) return(structure(out, prior.info = prior_info)) } else if (algorithm %in% c("meanfield", "fullrank")) { stanfit <- rstan::vb(stanfit, data = standata, pars = pars, algorithm = algorithm, ...) } else { sampling_args <- set_sampling_args( object = stanfit, prior = prior, user_dots = list(...), user_adapt_delta = adapt_delta, init = init_fun, data = standata, pars = pars, show_messages = FALSE) stanfit <- do.call(sampling, sampling_args) } check <- check_stanfit(stanfit) if (!isTRUE(check)) return(standata) if (K == 1) stanfit@sim$samples <- lapply(stanfit@sim$samples, FUN = function(x) { x$`R2[1]` <- (x$`R2[1]`)^2 return(x) }) new_names <- c(if (has_intercept) "(Intercept)", cn, "sigma", if (prior_PD == 0) "log-fit_ratio", "R2", "mean_PPD", "log-posterior") stanfit@sim$fnames_oi <- new_names prior_info <- summarize_lm_prior(prior, prior_intercept) structure(stanfit, prior.info = prior_info) } # internal ---------------------------------------------------------------- # Create "prior.info" attribute needed for prior_summary() # # @param prior, prior_intercept User's prior and prior_intercept specifications # @return A named list with elements 'prior' and 'prior_intercept' containing # the values needed for prior_summary summarize_lm_prior <- function(prior, prior_intercept) { flat <- !length(prior) flat_int <- !length(prior_intercept) list( prior = list( dist = ifelse(flat, NA, "R2"), location = ifelse(flat, NA, prior$location), what = ifelse(flat, NA, prior$what) ), prior_intercept = list( dist = ifelse(flat_int, NA, "normal"), location = ifelse(flat_int, NA, prior_intercept$location), scale = ifelse(flat_int, NA, prior_intercept$scale) ) ) } rstanarm/R/stan_mvmer.R0000644000176200001440000002367714370470372014626 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2013, 2014, 2015, 2016, 2017 Trustees of Columbia University # Copyright (C) 2016, 2017 Sam Brilleman # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #' Bayesian multivariate generalized linear models with correlated #' group-specific terms via Stan #' #' \if{html}{\figure{stanlogo.png}{options: width="25" alt="https://mc-stan.org/about/logo/"}} #' Bayesian inference for multivariate GLMs with group-specific coefficients #' that are assumed to be correlated across the GLM submodels. #' #' @export #' @template args-dots #' @template args-prior_PD #' @template args-algorithm #' @template args-adapt_delta #' @template args-max_treedepth #' @template args-QR #' @template args-sparse #' #' @param formula A two-sided linear formula object describing both the #' fixed-effects and random-effects parts of the longitudinal submodel #' similar in vein to formula specification in the \strong{lme4} package #' (see \code{\link[lme4]{glmer}} or the \strong{lme4} vignette for details). #' Note however that the double bar (\code{||}) notation is not allowed #' when specifying the random-effects parts of the formula, and neither #' are nested grouping factors (e.g. \code{(1 | g1/g2))} or #' \code{(1 | g1:g2)}, where \code{g1}, \code{g2} are grouping factors. #' For a multivariate GLM this should be a list of such formula objects, #' with each element of the list providing the formula for one of the #' GLM submodels. #' @param data A data frame containing the variables specified in #' \code{formula}. For a multivariate GLM, this can #' be either a single data frame which contains the data for all #' GLM submodels, or it can be a list of data frames where each #' element of the list provides the data for one of the GLM submodels. #' @param family The family (and possibly also the link function) for the #' GLM submodel(s). See \code{\link[lme4]{glmer}} for details. #' If fitting a multivariate GLM, then this can optionally be a #' list of families, in which case each element of the list specifies the #' family for one of the GLM submodels. In other words, a different family #' can be specified for each GLM submodel. #' @param weights Same as in \code{\link[stats]{glm}}, #' except that when fitting a multivariate GLM and a list of data frames #' is provided in \code{data} then a corresponding list of weights #' must be provided. If weights are #' provided for one of the GLM submodels, then they must be provided for #' all GLM submodels. #' @param prior,prior_intercept,prior_aux Same as in \code{\link{stan_glmer}} #' except that for a multivariate GLM a list of priors can be provided for #' any of \code{prior}, \code{prior_intercept} or \code{prior_aux} arguments. #' That is, different priors can optionally be specified for each of the GLM #' submodels. If a list is not provided, then the same prior distributions are #' used for each GLM submodel. Note that the \code{"product_normal"} prior is #' not allowed for \code{stan_mvmer}. #' @param prior_covariance Cannot be \code{NULL}; see \code{\link{priors}} for #' more information about the prior distributions on covariance matrices. #' Note however that the default prior for covariance matrices in #' \code{stan_mvmer} is slightly different to that in \code{\link{stan_glmer}} #' (the details of which are described on the \code{\link{priors}} page). #' @param init The method for generating initial values. See #' \code{\link[rstan]{stan}}. #' #' @details The \code{stan_mvmer} function can be used to fit a multivariate #' generalized linear model (GLM) with group-specific terms. The model consists #' of distinct GLM submodels, each which contains group-specific terms; within #' a grouping factor (for example, patient ID) the grouping-specific terms are #' assumed to be correlated across the different GLM submodels. It is #' possible to specify a different outcome type (for example a different #' family and/or link function) for each of the GLM submodels. \cr #' \cr #' Bayesian estimation of the model is performed via MCMC, in the same way as #' for \code{\link{stan_glmer}}. Also, similar to \code{\link{stan_glmer}}, #' an unstructured covariance matrix is used for the group-specific terms #' within a given grouping factor, with priors on the terms of a decomposition #' of the covariance matrix.See \code{\link{priors}} for more information about #' the priors distributions that are available for the covariance matrices, #' the regression coefficients and the intercept and auxiliary parameters. #' #' @return A \link[=stanreg-objects]{stanmvreg} object is returned. #' #' @seealso \code{\link{stan_glmer}}, \code{\link{stan_jm}}, #' \code{\link{stanreg-objects}}, \code{\link{stanmvreg-methods}}, #' \code{\link{print.stanmvreg}}, \code{\link{summary.stanmvreg}}, #' \code{\link{posterior_predict}}, \code{\link{posterior_interval}}. #' #' @examples #' if (.Platform$OS.type != "windows" || .Platform$r_arch !="i386") { #' \donttest{ #' ##### #' # A multivariate GLM with two submodels. For the grouping factor 'id', the #' # group-specific intercept from the first submodel (logBili) is assumed to #' # be correlated with the group-specific intercept and linear slope in the #' # second submodel (albumin) #' f1 <- stan_mvmer( #' formula = list( #' logBili ~ year + (1 | id), #' albumin ~ sex + year + (year | id)), #' data = pbcLong, #' # this next line is only to keep the example small in size! #' chains = 1, cores = 1, seed = 12345, iter = 1000) #' summary(f1) #' #' ##### #' # A multivariate GLM with one bernoulli outcome and one #' # gaussian outcome. We will artificially create the bernoulli #' # outcome by dichotomising log serum bilirubin #' pbcLong$ybern <- as.integer(pbcLong$logBili >= mean(pbcLong$logBili)) #' f2 <- stan_mvmer( #' formula = list( #' ybern ~ year + (1 | id), #' albumin ~ sex + year + (year | id)), #' data = pbcLong, #' family = list(binomial, gaussian), #' chains = 1, cores = 1, seed = 12345, iter = 1000) #' } #' } stan_mvmer <- function(formula, data, family = gaussian, weights, prior = normal(autoscale=TRUE), prior_intercept = normal(autoscale=TRUE), prior_aux = cauchy(0, 5, autoscale=TRUE), prior_covariance = lkj(autoscale=TRUE), prior_PD = FALSE, algorithm = c("sampling", "meanfield", "fullrank"), adapt_delta = NULL, max_treedepth = 10L, init = "random", QR = FALSE, sparse = FALSE, ...) { #----------------------------- # Pre-processing of arguments #----------------------------- algorithm <- match.arg(algorithm) if (missing(weights)) weights <- NULL if (!is.null(weights)) stop("'weights' are not yet implemented.") if (QR) stop("'QR' decomposition is not yet implemented.") if (sparse) stop("'sparse' option is not yet implemented.") # Formula formula <- validate_arg(formula, "formula"); M <- length(formula) if (M > 3L) stop("'stan_mvmer' is currently limited to a maximum of 3 outcomes.") # Data data <- validate_arg(data, "data.frame", validate_length = M) data <- xapply(formula, data, FUN = get_all_vars) # drop additional vars # Family ok_classes <- c("function", "family", "character") ok_families <- c("binomial", "gaussian", "Gamma", "inverse.gaussian", "poisson", "neg_binomial_2") family <- validate_arg(family, ok_classes, validate_length = M) family <- lapply(family, validate_famlink, ok_families) # Observation weights if (!is.null(weights)) { if (!is(weights, "list")) weights <- rep(list(weights), M) weights <- lapply(weights, validate_weights) } # Is prior* already a list? prior <- broadcast_prior(prior, M) prior_intercept <- broadcast_prior(prior_intercept, M) prior_aux <- broadcast_prior(prior_aux, M) #----------- # Fit model #----------- stanfit <- stan_jm.fit(formulaLong = formula, dataLong = data, family = family, weights = weights, priorLong = prior, priorLong_intercept = prior_intercept, priorLong_aux = prior_aux, prior_covariance = prior_covariance, prior_PD = prior_PD, algorithm = algorithm, adapt_delta = adapt_delta, max_treedepth = max_treedepth, init = init, QR = QR, sparse = sparse, ...) if (algorithm != "optimizing" && !is(stanfit, "stanfit")) return(stanfit) y_mod <- attr(stanfit, "y_mod") cnms <- attr(stanfit, "cnms") flevels <- attr(stanfit, "flevels") prior_info <- attr(stanfit, "prior_info") stanfit <- drop_attributes(stanfit, "y_mod", "cnms", "flevels", "prior_info") terms <- fetch(y_mod, "terms") n_yobs <- fetch_(y_mod, "x", "N") n_grps <- sapply(flevels, n_distinct) fit <- nlist(stanfit, formula, family, weights, M, cnms, flevels, n_grps, n_yobs, algorithm, terms, glmod = y_mod, data, prior.info = prior_info, stan_function = "stan_mvmer", call = match.call(expand.dots = TRUE)) out <- stanmvreg(fit) return(out) } rstanarm/R/jm_make_assoc_parts.R0000644000176200001440000002514514406606742016451 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2013, 2014, 2015, 2016, 2017 Trustees of Columbia University # Copyright (C) 2016, 2017 Sam Brilleman # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # Function to construct quantities, primarily design matrices (x, Zt), that # will be used to evaluate the longitudinal submodel contributions to the # association structure in the event submodel. For example, the design matrices # evaluated at the quadpoints, quadpoints + eps, lagged quadpoints, auc quadpoints, # and so on. Exactly what quantities are returned depends on what is specified # in the use_function argument. # # @param use_function The function to call which will return the design # matrices for eta, eps, lag, auc, etc. Generally either # 'make_assoc_parts_for_stan' or 'pp_data'. # @param newdata A model frame used for constructing the design matrices # @param assoc A list with information about the association structure for # the one longitudinal submodel. # @param grp_stuff A list with information about any lower level grouping # factors that are clustered within patients and how to handle them in # the association structure. # @param ids,times The subject IDs and times vectors that correspond to the # event/censoring and quadrature times at which the design matrices will # need to be evaluated for the association structure. # @param id_var The name on the ID variable. # @param time_var The name of the time variable. # @param epsilon The half-width of the central difference used for # numerically calculating the derivative of the design matrix for slope # based association structures. # @param auc_qnodes Integer specifying the number of GK quadrature nodes to # use in the integral/AUC based association structures. # @param ... Additional arguments passes to use_function # @return A named list make_assoc_parts <- function(use_function = make_assoc_parts_for_stan, newdata, assoc, grp_stuff, ids, times, id_var, time_var, epsilon = 1E-5, auc_qnodes = 15L, ...) { if (!requireNamespace("data.table")) stop("the 'data.table' package must be installed to use this function") eps_uses_derivative_of_x <- TRUE # experimental # Apply lag lag <- assoc[["which_lag"]] if (!lag == 0) times <- set_lag(times, lag) # Broadcast ids and times if there is lower level clustering if (grp_stuff$has_grp) { # grps corresponding to each id grps <- as.vector(unlist(grp_stuff$grp_list[as.character(ids)])) # freq by which to expand each ids and times element freq_seq <- grp_stuff$grp_freq[as.character(ids)] # rep each patient id and prediction time the required num of times ids <- rep(ids, freq_seq) times <- rep(times, freq_seq) # indices for collapsing across clusters within patients grp_idx <- get_idx_array(freq_seq) } else grps <- grp_idx <- NULL # Identify row in longitudinal data closest to event time or quadrature point # NB if the quadrature point is earlier than the first observation time, # then covariates values are carried back to avoid missing values. # In any other case, the observed covariates values from the most recent # observation time preceeding the quadrature point are carried forward to # represent the covariate value(s) at the quadrature point. (To avoid # missingness there is no limit on how far forwards or how far backwards # covariate values can be carried). If no time varying covariates are # present in the longitudinal submodel (other than the time variable) # then nothing is carried forward or backward. dataQ <- rolling_merge(data = newdata, ids = ids, times = times, grps = grps) mod_eta <- use_function(newdata = dataQ, ...) # If association structure is based on slope, then calculate design # matrices under a time shift of epsilon sel_slope <- grep("etaslope", names(assoc)) if (any(unlist(assoc[sel_slope]))) { if (eps_uses_derivative_of_x) { # slope is evaluated by passing Stan the derivatives of the X and Z # design matrices directly, each evaluated using central differences # with a half-width equal to epsilon dataQ_pos <- dataQ_neg <- dataQ dataQ_neg[[time_var]] <- dataQ_neg[[time_var]] - epsilon dataQ_pos[[time_var]] <- dataQ_pos[[time_var]] + epsilon mod_neg <- use_function(newdata = dataQ_neg, ...) mod_pos <- use_function(newdata = dataQ_pos, ...) mod_eps <- mod_pos mod_eps$x <- (mod_pos$x - mod_neg$x ) / (2 * epsilon) # derivative of X mod_eps$xtemp <- (mod_pos$xtemp - mod_neg$xtemp) / (2 * epsilon) mod_eps$z <- xapply(mod_pos$z, mod_neg$z, # derivative of z FUN = function(x, y) (x - y) / (2 * epsilon)) if (!is.null(mod_eps$Zt)) mod_eps$Zt <- (mod_pos$Zt - mod_neg$Zt) / (2 * epsilon) } else { # slope is evaluated by passing Stan the X and Z design matrices under # a time shift of epsilon and then evaluating the derivative of the # linear predictor in Stan using a one-sided difference dataQ_eps <- dataQ dataQ_eps[[time_var]] <- dataQ_eps[[time_var]] + epsilon mod_eps <- use_function(newdata = dataQ_eps, ...) } } else mod_eps <- NULL # If association structure is based on area under the marker trajectory, then # calculate design matrices at the subquadrature points sel_auc <- grep("etaauc|muauc", names(assoc)) if (any(unlist(assoc[sel_auc]))) { if (grp_stuff$has_grp) stop2("'etaauc' and 'muauc' not yet implemented when there is a grouping ", "factor clustered within patients.") # Return a design matrix that is (qnodes * auc_qnodes * Npat) rows auc_qpts <- uapply(times, function(x) lapply(get_quadpoints(auc_qnodes)$points, unstandardise_qpts, 0, x)) auc_qwts <- uapply(times, function(x) lapply(get_quadpoints(auc_qnodes)$weights, unstandardise_qwts, 0, x)) ids2 <- rep(ids, each = auc_qnodes) dataQ_auc <- rolling_merge(data = newdata, ids = ids2, times = auc_qpts) mod_auc <- use_function(newdata = dataQ_auc, ...) } else mod_auc <- auc_qpts <- auc_qwts <- NULL # If association structure is based on interactions with data, then calculate # the design matrix which will be multiplied by etavalue, etaslope, muvalue or muslope sel_data <- grep("_data", names(assoc), value = TRUE) X_data <- xapply(sel_data, FUN = function(i) { form <- assoc[["which_formulas"]][[i]] if (length(form)) { form <- as.formula(form) vars <- rownames(attr(terms.formula(form), "factors")) if (is.null(vars)) stop2("No variables found in the formula for the '", i, "' association structure.") sel <- which(!vars %in% colnames(dataQ)) if (length(sel)) stop2("The following variables were specified in the formula for the '", i, "' association structure, but they cannot be found in the data: ", paste0(vars[sel], collapse = ", ")) mf <- stats::model.frame(form, data = dataQ) X <- stats::model.matrix(form, data = mf) X <- drop_intercept(X) if (!ncol(X)) stop2("Bug found: A formula was specified for the '", i, "' association ", "structure, but the resulting design matrix has no columns.") } else { X <- matrix(0, nrow(dataQ), 0) } X }) K_data <- sapply(X_data, ncol) X_bind_data <- do.call(cbind, X_data) ret <- nlist(times, mod_eta, mod_eps, mod_auc, K_data, X_data, X_bind_data, grp_stuff) structure(ret, times = times, lag = lag, epsilon = epsilon, grp_idx = grp_idx, auc_qnodes = auc_qnodes, auc_qpts = auc_qpts, auc_qwts = auc_qwts, eps_uses_derivative_of_x = eps_uses_derivative_of_x) } # Return design matrices for the longitudinal submodel. This is # designed to generate the design matrices evaluated at the GK # quadrature points, because it uses a 'terms' object to generate # the model frame, and that terms object should have been generated # from the longitudinal submodel's model frame when it was evaluated # at the observation times; i.e. the predvars and X_bar would have # come from the design matrices at the observation times, not the # quadrature points. # # @param newdata A data frame; the data for the longitudinal submodel # at the event and quadrature points. # @param y_mod The list returned by handle_y_mod, containing info about # the longitudinal submodel evaluated at the observation (not quadrature) # times, for example, the x_bar means used for centering, the predvars # attribute for the longitudinal submodel formula, and so on. # @param include_Zt Whether to include the sparse Zt matrix in the # returned parts. make_assoc_parts_for_stan <- function(newdata, y_mod, include_Zt = TRUE) { # construct model frame using predvars formula <- use_predvars(y_mod, keep_response = FALSE) data <- as.data.frame(newdata) model_frame <- stats::model.frame(lme4::subbars(formula), data) # fe design matrices x_form <- lme4::nobars(formula) x <- model.matrix(x_form, model_frame) xtemp <- drop_intercept(x) x_bar <- y_mod$x$x_bar xtemp <- sweep(xtemp, 2, x_bar, FUN = "-") # re design matrices bars <- lme4::findbars(formula) if (length(bars) > 2L) stop2("A maximum of 2 grouping factors are allowed.") z_parts <- lapply(bars, split_at_bars) z_forms <- fetch(z_parts, "re_form") z <- lapply(z_forms, model.matrix, model_frame) group_vars <- fetch(z_parts, "group_var") group_list <- lapply(group_vars, function(x) factor(model_frame[[x]])) names(z) <- names(group_list) <- group_vars ret <- nlist(x, xtemp, z, group_list, group_vars) # return list # optionally add the sparse Zt matrix if (include_Zt) ret$Zt <- lme4::mkReTrms(bars, model_frame)$Zt # add offset values if ('offset' %in% colnames(newdata)) ret$offset <- newdata$offset else ret$offset <- NULL ret } rstanarm/R/stan_jm.fit.R0000644000176200001440000013123314370470372014653 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2013, 2014, 2015, 2016, 2017 Trustees of Columbia University # Copyright (C) 2016, 2017 Sam Brilleman # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # Internal model fitting function for models estimated using # \code{stan_mvmer} or \code{stan_jm}. # # See \code{stan_jm} for a description of the arguments to the # \code{stan_jm.fit} function call. # stan_jm.fit <- function(formulaLong = NULL, dataLong = NULL, formulaEvent = NULL, dataEvent = NULL, time_var, id_var, family = gaussian, assoc = "etavalue", lag_assoc = 0, grp_assoc, scale_assoc = NULL, epsilon = 1E-5, basehaz = c("bs", "weibull", "piecewise"), basehaz_ops, qnodes = 15, init = "prefit", weights, priorLong = normal(autoscale=TRUE), priorLong_intercept = normal(autoscale=TRUE), priorLong_aux = cauchy(0, 5, autoscale=TRUE), priorEvent = normal(autoscale=TRUE), priorEvent_intercept = normal(autoscale=TRUE), priorEvent_aux = cauchy(autoscale=TRUE), priorEvent_assoc = normal(autoscale=TRUE), prior_covariance = lkj(autoscale=TRUE), prior_PD = FALSE, algorithm = c("sampling", "meanfield", "fullrank"), adapt_delta = NULL, max_treedepth = 10L, QR = FALSE, sparse = FALSE, ...) { #----------------------------- # Pre-processing of arguments #----------------------------- if (!requireNamespace("survival")) stop("the 'survival' package must be installed to use this function.") # Set seed if specified dots <- list(...) if ("seed" %in% names(dots)) set.seed(dots$seed) algorithm <- match.arg(algorithm) basehaz <- match.arg(basehaz) if (missing(basehaz_ops)) basehaz_ops <- NULL if (missing(weights)) weights <- NULL if (missing(id_var)) id_var <- NULL if (missing(time_var)) time_var <- NULL if (missing(grp_assoc)) grp_assoc <- NULL if (!is.null(weights)) stop("'weights' are not yet implemented.") if (QR) stop("'QR' decomposition is not yet implemented.") if (sparse) stop("'sparse' option is not yet implemented.") # Error if args not supplied together supplied_together(formulaLong, dataLong, error = TRUE) supplied_together(formulaEvent, dataEvent, error = TRUE) # Determine whether a joint longitudinal-survival model was specified is_jm <- supplied_together(formulaLong, formulaEvent) stub <- if (is_jm) "Long" else "y" if (is_jm && is.null(time_var)) stop("'time_var' must be specified.") # Formula formulaLong <- validate_arg(formulaLong, "formula"); M <- length(formulaLong) # Data dataLong <- validate_arg(dataLong, "data.frame", validate_length = M) if (is_jm) dataEvent <- as.data.frame(dataEvent) # Family ok_classes <- c("function", "family", "character") ok_families <- c("binomial", "gaussian", "Gamma", "inverse.gaussian", "poisson", "neg_binomial_2") family <- validate_arg(family, ok_classes, validate_length = M) family <- lapply(family, validate_famlink, ok_families) family <- lapply(family, append_mvmer_famlink) # Observation weights has_weights <- !is.null(weights) # Priors priorLong <- broadcast_prior(priorLong, M) priorLong_intercept <- broadcast_prior(priorLong_intercept, M) priorLong_aux <- broadcast_prior(priorLong_aux, M) #-------------------------- # Longitudinal submodel(s) #-------------------------- # Info for separate longitudinal submodels y_mod <- xapply(formulaLong, dataLong, family, FUN = handle_y_mod) # Construct single cnms list for all longitudinal submodels y_cnms <- fetch(y_mod, "z", "group_cnms") cnms <- get_common_cnms(y_cnms, stub = stub) cnms_nms <- names(cnms) if (length(cnms_nms) > 2L) stop("A maximum of 2 grouping factors are allowed.") # Construct single list with unique levels for each grouping factor y_flist <- fetch(y_mod, "z", "group_list") flevels <- get_common_flevels(y_flist) # Ensure id_var is a valid grouping factor in all submodels if (is_jm) { id_var <- check_id_var(id_var, y_cnms, y_flist) id_list <- check_id_list(id_var, y_flist) if (!is.null(weights)) weights <- check_weights(weights, id_var) } # Observation weights y_weights <- lapply(y_mod, handle_weights, weights, id_var) #----------- Prior distributions -----------# # Valid prior distributions ok_dists <- nlist("normal", student_t = "t", "cauchy", "hs", "hs_plus", "laplace", "lasso") # disallow product normal ok_intercept_dists <- ok_dists[1:3] ok_aux_dists <- c(ok_dists[1:3], exponential = "exponential") ok_covariance_dists <- c("decov", "lkj") y_vecs <- fetch(y_mod, "y", "y") # used in autoscaling x_mats <- fetch(y_mod, "x", "xtemp") # used in autoscaling # Note: *_user_prior_*_stuff objects are stored unchanged for constructing # prior_summary, while *_prior_*_stuff objects are autoscaled # Priors for longitudinal submodels y_links <- fetch(y_mod, "family", "link") y_user_prior_stuff <- y_prior_stuff <- xapply(priorLong, nvars = fetch(y_mod, "x", "K"), link = y_links, FUN = handle_glm_prior, args = list(default_scale = 2.5, ok_dists = ok_dists)) y_user_prior_intercept_stuff <- y_prior_intercept_stuff <- xapply(priorLong_intercept, link = y_links, FUN = handle_glm_prior, args = list(nvars = 1, default_scale = 10, ok_dists = ok_intercept_dists)) y_user_prior_aux_stuff <- y_prior_aux_stuff <- xapply(priorLong_aux, FUN = handle_glm_prior, args = list(nvars = 1, default_scale = 5, link = NULL, ok_dists = ok_aux_dists)) b_user_prior_stuff <- b_prior_stuff <- handle_cov_prior( prior_covariance, cnms = cnms, ok_dists = ok_covariance_dists) # Autoscaling of priors y_prior_stuff <- xapply(y_prior_stuff, response = y_vecs, predictors = x_mats, family = family, FUN = autoscale_prior) y_prior_intercept_stuff <- xapply(y_prior_intercept_stuff, response = y_vecs, family = family, FUN = autoscale_prior) y_prior_aux_stuff <- xapply(y_prior_aux_stuff, response = y_vecs, family = family, FUN = autoscale_prior) if (b_prior_stuff$prior_dist_name == "lkj") { # autoscale priors for ranef sds b_prior_stuff <- split_cov_prior(b_prior_stuff, cnms = cnms, submodel_cnms = y_cnms) b_prior_stuff <- xapply( cnms_nms, FUN = function(nm) { z_mats <- fetch(y_mod, "z", "z", nm) xapply(b_prior_stuff[[nm]], response = y_vecs, predictors = z_mats, family = family, FUN = autoscale_prior) }) } #----------- Data for export to Stan -----------# standata <- list( M = as.integer(M), has_weights = as.integer(!all(lapply(weights, is.null))), family = fetch_array(y_mod, "family", "mvmer_family"), link = fetch_array(y_mod, "family", "mvmer_link"), weights = as.array(numeric(0)), # not yet implemented prior_PD = as.integer(prior_PD) ) # Offset Y_offset <- fetch(y_mod, "offset", pad_length = 3) standata$has_offset <- has_offset <- fetch_array(y_mod, "has_offset", pad_length = 3) standata$y1_offset <- if (has_offset[1]) Y_offset[[1]] else as.array(integer(0)) standata$y2_offset <- if (has_offset[2]) Y_offset[[2]] else as.array(integer(0)) standata$y3_offset <- if (has_offset[3]) Y_offset[[3]] else as.array(integer(0)) # Dimensions standata$has_aux <- fetch_array(y_mod, "has_aux", pad_length = 3) standata$resp_type <- fetch_array(y_mod, "y", "resp_type", pad_length = 3) standata$intercept_type <- fetch_array(y_mod, "intercept_type", "number", pad_length = 3) standata$yNobs <- fetch_array(y_mod, "x", "N", pad_length = 3) standata$yNeta <- fetch_array(y_mod, "x", "N", pad_length = 3) # same as Nobs for stan_mvmer standata$yK <- fetch_array(y_mod, "x", "K", pad_length = 3) # Response vectors Y_integer <- fetch(y_mod, "y", "integer") standata$yInt1 <- if (M > 0) Y_integer[[1]] else as.array(integer(0)) standata$yInt2 <- if (M > 1) Y_integer[[2]] else as.array(integer(0)) standata$yInt3 <- if (M > 2) Y_integer[[3]] else as.array(integer(0)) Y_real <- fetch(y_mod, "y", "real") standata$yReal1 <- if (M > 0) Y_real[[1]] else as.array(double(0)) standata$yReal2 <- if (M > 1) Y_real[[2]] else as.array(double(0)) standata$yReal3 <- if (M > 2) Y_real[[3]] else as.array(double(0)) # Population level design matrices X <- fetch(y_mod, "x", "xtemp") standata$yX1 <- if (M > 0) X[[1]] else matrix(0,0,0) standata$yX2 <- if (M > 1) X[[2]] else matrix(0,0,0) standata$yX3 <- if (M > 2) X[[3]] else matrix(0,0,0) X_bar <- fetch(y_mod, "x", "x_bar") standata$yXbar1 <- if (M > 0) as.array(X_bar[[1]]) else as.array(double(0)) standata$yXbar2 <- if (M > 1) as.array(X_bar[[2]]) else as.array(double(0)) standata$yXbar3 <- if (M > 2) as.array(X_bar[[3]]) else as.array(double(0)) # Data for group specific terms - group factor 1 b1_varname <- cnms_nms[[1L]] # name of group factor 1 b1_nvars <- fetch_(y_mod, "z", "nvars", b1_varname, null_to_zero = TRUE, pad_length = 3) b1_ngrps <- fetch_(y_mod, "z", "ngrps", b1_varname) if (!n_distinct(b1_ngrps) == 1L) stop("The number of groups for the grouping factor '", b1_varname, "' should be the same in all submodels.") standata$bN1 <- b1_ngrps[[1L]] + 1L # add padding for _NEW_ group standata$bK1 <- sum(b1_nvars) standata$bK1_len <- as.array(b1_nvars) standata$bK1_idx <- get_idx_array(b1_nvars) Z1 <- fetch(y_mod, "z", "z", b1_varname) Z1 <- lapply(Z1, transpose) Z1 <- lapply(Z1, convert_null, "matrix") standata$y1_Z1 <- if (M > 0) Z1[[1L]] else matrix(0,0,0) standata$y2_Z1 <- if (M > 1) Z1[[2L]] else matrix(0,0,0) standata$y3_Z1 <- if (M > 2) Z1[[3L]] else matrix(0,0,0) Z1_id <- fetch(y_mod, "z", "group_list", b1_varname) Z1_id <- lapply(Z1_id, groups) Z1_id <- lapply(Z1_id, convert_null, "arrayinteger") standata$y1_Z1_id <- if (M > 0) Z1_id[[1L]] else as.array(integer(0)) standata$y2_Z1_id <- if (M > 1) Z1_id[[2L]] else as.array(integer(0)) standata$y3_Z1_id <- if (M > 2) Z1_id[[3L]] else as.array(integer(0)) # Data for group specific terms - group factor 2 if (length(cnms) > 1L) { # model has a second grouping factor b2_varname <- cnms_nms[[2L]] # name of group factor 2 b2_nvars <- fetch_(y_mod, "z", "nvars", b2_varname, null_to_zero = TRUE, pad_length = 3) b2_ngrps <- fetch_(y_mod, "z", "ngrps", b2_varname) if (!n_distinct(b2_ngrps) == 1L) stop("The number of groups for the grouping factor '", b2_varname, "' should be the same in all submodels.") standata$bN2 <- b2_ngrps[[1L]] + 1L # add padding for _NEW_ group standata$bK2 <- sum(b2_nvars) standata$bK2_len <- as.array(b2_nvars) standata$bK2_idx <- get_idx_array(b2_nvars) Z2 <- fetch(y_mod, "z", "z", b2_varname) Z2 <- lapply(Z2, transpose) Z2 <- lapply(Z2, convert_null, "matrix") standata$y1_Z2 <- if (M > 0) Z2[[1L]] else matrix(0,0,0) standata$y2_Z2 <- if (M > 1) Z2[[2L]] else matrix(0,0,0) standata$y3_Z2 <- if (M > 2) Z2[[3L]] else matrix(0,0,0) Z2_id <- fetch(y_mod, "z", "group_list", b2_varname) Z2_id <- lapply(Z2_id, groups) Z2_id <- lapply(Z2_id, convert_null, "arrayinteger") standata$y1_Z2_id <- if (M > 0) Z2_id[[1L]] else as.array(integer(0)) standata$y2_Z2_id <- if (M > 1) Z2_id[[2L]] else as.array(integer(0)) standata$y3_Z2_id <- if (M > 2) Z2_id[[3L]] else as.array(integer(0)) } else { # no second grouping factor standata$bN2 <- 0L standata$bK2 <- 0L standata$bK2_len <- as.array(rep(0,3L)) standata$bK2_idx <- get_idx_array(rep(0,3L)) standata$y1_Z2 <- matrix(0,0,0) standata$y2_Z2 <- matrix(0,0,0) standata$y3_Z2 <- matrix(0,0,0) standata$y1_Z2_id <- as.array(integer(0)) standata$y2_Z2_id <- as.array(integer(0)) standata$y3_Z2_id <- as.array(integer(0)) } # Priors standata$y_prior_dist_for_intercept <- fetch_array(y_prior_intercept_stuff, "prior_dist") standata$y_prior_mean_for_intercept <- fetch_array(y_prior_intercept_stuff, "prior_mean") standata$y_prior_scale_for_intercept <- fetch_array(y_prior_intercept_stuff, "prior_scale") standata$y_prior_df_for_intercept <- fetch_array(y_prior_intercept_stuff, "prior_df") standata$y_prior_dist_for_aux <- fetch_array(y_prior_aux_stuff, "prior_dist") standata$y_prior_mean_for_aux <- fetch_array(y_prior_aux_stuff, "prior_mean") standata$y_prior_scale_for_aux <- fetch_array(y_prior_aux_stuff, "prior_scale") standata$y_prior_df_for_aux <- fetch_array(y_prior_aux_stuff, "prior_df") standata$y_prior_dist <- fetch_array(y_prior_stuff, "prior_dist", pad_length = 3) prior_mean <- fetch(y_prior_stuff, "prior_mean") standata$y_prior_mean1 <- if (M > 0) prior_mean[[1]] else as.array(double(0)) standata$y_prior_mean2 <- if (M > 1) prior_mean[[2]] else as.array(double(0)) standata$y_prior_mean3 <- if (M > 2) prior_mean[[3]] else as.array(double(0)) prior_scale <- fetch(y_prior_stuff, "prior_scale") standata$y_prior_scale1 <- if (M > 0) as.array(prior_scale[[1]]) else as.array(double(0)) standata$y_prior_scale2 <- if (M > 1) as.array(prior_scale[[2]]) else as.array(double(0)) standata$y_prior_scale3 <- if (M > 2) as.array(prior_scale[[3]]) else as.array(double(0)) prior_df <- fetch(y_prior_stuff, "prior_df") standata$y_prior_df1 <- if (M > 0) prior_df[[1]] else as.array(double(0)) standata$y_prior_df2 <- if (M > 1) prior_df[[2]] else as.array(double(0)) standata$y_prior_df3 <- if (M > 2) prior_df[[3]] else as.array(double(0)) # hs priors only standata$y_global_prior_scale <- fetch_array(y_prior_stuff, "global_prior_scale") standata$y_global_prior_df <- fetch_array(y_prior_stuff, "global_prior_df") standata$y_slab_df <- fetch_array(y_prior_stuff, "slab_df") standata$y_slab_scale <- fetch_array(y_prior_stuff, "slab_scale") # Priors for group specific terms standata$t <- length(cnms) standata$p <- as.array(sapply(cnms, length)) standata$l <- as.array( sapply(cnms_nms, FUN = function(nm) { ngrps <- unique(fetch_(y_mod, "z", "ngrps", nm)) ngrps + 1L # add padding for _NEW_ group })) standata$q <- sum(standata$p * standata$l) if (prior_covariance$dist == "decov") { # data for decov prior standata$prior_dist_for_cov <- b_prior_stuff$prior_dist standata$b_prior_shape <- b_prior_stuff$prior_shape standata$b_prior_scale <- b_prior_stuff$prior_scale standata$b_prior_concentration <- b_prior_stuff$prior_concentration standata$b_prior_regularization <- b_prior_stuff$prior_regularization standata$len_concentration <- length(standata$b_prior_concentration) standata$len_regularization <- length(standata$b_prior_regularization) standata$len_theta_L <- sum(choose(standata$p, 2), standata$p) # pass empty lkj data standata$b1_prior_scale <- as.array(rep(0L, standata$bK1)) standata$b2_prior_scale <- as.array(rep(0L, standata$bK2)) standata$b1_prior_df <- as.array(rep(0L, standata$bK1)) standata$b2_prior_df <- as.array(rep(0L, standata$bK2)) standata$b1_prior_regularization <- 1.0 standata$b2_prior_regularization <- 1.0 } else if (prior_covariance$dist == "lkj") { # data for lkj prior b1_prior_stuff <- b_prior_stuff[[b1_varname]] b1_prior_dist <- fetch_(b1_prior_stuff, "prior_dist") b1_prior_scale <- fetch_array(b1_prior_stuff, "prior_scale") b1_prior_df <- fetch_array(b1_prior_stuff, "prior_df") b1_prior_regularization <- fetch_(b1_prior_stuff, "prior_regularization") if (n_distinct(b1_prior_dist) > 1L) stop2("Bug found: covariance prior should be the same for all submodels.") if (n_distinct(b1_prior_regularization) > 1L) { stop2("Bug found: prior_regularization should be the same for all submodels.") } standata$prior_dist_for_cov <- unique(b1_prior_dist) standata$b1_prior_scale <- b1_prior_scale standata$b1_prior_df <- b1_prior_df standata$b1_prior_regularization <- if (length(b1_prior_regularization)) unique(b1_prior_regularization) else 1.0 if (standata$bK2 > 0) { # model has a second grouping factor b2_prior_stuff <- b_prior_stuff[[b2_varname]] b2_prior_scale <- fetch_array(b2_prior_stuff, "prior_scale") b2_prior_df <- fetch_array(b2_prior_stuff, "prior_df") b2_prior_regularization <- fetch_(b2_prior_stuff, "prior_regularization") standata$b2_prior_scale <- b2_prior_scale standata$b2_prior_df <- b2_prior_df standata$b2_prior_regularization <- unique(b2_prior_regularization) } else { # model does not have a second grouping factor standata$b2_prior_scale <- as.array(double(0)) standata$b2_prior_df <- as.array(double(0)) standata$b2_prior_regularization <- 1.0 } # pass empty decov data standata$len_theta_L <- 0L standata$b_prior_shape <- as.array(rep(0L, standata$t)) standata$b_prior_scale <- as.array(rep(0L, standata$t)) standata$len_concentration <- 0L standata$len_regularization <- 0L standata$b_prior_concentration <- as.array(rep(0L, standata$len_concentration)) standata$b_prior_regularization <- as.array(rep(0L, standata$len_regularization)) } # Names for longitudinal submodel parameters y_intercept_nms <- uapply(1:M, function(m) { if (y_mod[[m]]$intercept_type$number > 0) paste0(stub, m, "|(Intercept)") else NULL }) y_beta_nms <- uapply(1:M, function(m) { if (!is.null(colnames(X[[m]]))) paste0(stub, m, "|", colnames(X[[m]])) else NULL }) y_aux_nms <- uapply(1:M, function(m) { famname_m <- family[[m]]$family if (is.gaussian(famname_m)) paste0(stub, m,"|sigma") else if (is.gamma(famname_m)) paste0(stub, m,"|shape") else if (is.ig(famname_m)) paste0(stub, m,"|lambda") else if (is.nb(famname_m)) paste0(stub, m,"|reciprocal_dispersion") else NULL }) # Names for group specific coefficients ("b pars") b_nms <- uapply(seq_along(cnms), FUN = function(i) { nm <- cnms_nms[i] nms_i <- paste(cnms[[i]], nm) flevels[[nm]] <- c(gsub(" ", "_", flevels[[nm]]), paste0("_NEW_", nm)) if (length(nms_i) == 1) { paste0(nms_i, ":", flevels[[nm]]) } else { c(t(sapply(nms_i, paste0, ":", flevels[[nm]]))) } }) # Names for Sigma matrix Sigma_nms <- get_Sigma_nms(cnms) #---------------- # Event submodel #---------------- if (is_jm) { # begin jm block # Fit separate event submodel e_mod <- handle_e_mod(formula = formulaEvent, data = dataEvent, qnodes = qnodes, id_var = id_var, y_id_list = id_list) # Baseline hazard ok_basehaz <- nlist("weibull", "bs", "piecewise") basehaz <- handle_basehaz(basehaz, basehaz_ops, ok_basehaz = ok_basehaz, eventtime = e_mod$eventtime, status = e_mod$status) # Observation weights e_weights <- handle_weights(e_mod, weights, id_var) # Check longitudinal observation times are not later than the event time lapply(dataLong, FUN = validate_observation_times, eventtime = e_mod$eventtime, id_var = id_var, time_var = time_var) #----------- Prior distributions -----------# # Valid prior distributions ok_e_aux_dists <- ok_dists[1:3] # Note: *_user_prior_*_stuff objects are stored unchanged for constructing # prior_summary, while *_prior_*_stuff objects are autoscaled # Priors for event submodel e_user_prior_stuff <- e_prior_stuff <- handle_glm_prior(priorEvent, nvars = e_mod$K, default_scale = 2.5, link = NULL, ok_dists = ok_dists) e_user_prior_intercept_stuff <- e_prior_intercept_stuff <- handle_glm_prior(priorEvent_intercept, nvars = 1, default_scale = 20, link = NULL, ok_dists = ok_intercept_dists) e_user_prior_aux_stuff <- e_prior_aux_stuff <- handle_glm_prior(priorEvent_aux, nvars = basehaz$df, default_scale = if (basehaz$type_name == "weibull") 2 else 20, link = NULL, ok_dists = ok_e_aux_dists) # Autoscaling of priors e_prior_stuff <- autoscale_prior(e_prior_stuff, predictors = e_mod$x$x) e_prior_intercept_stuff <- autoscale_prior(e_prior_intercept_stuff) e_prior_aux_stuff <- autoscale_prior(e_prior_aux_stuff) #----------- Data for export to Stan -----------# # Data and dimensions standata$e_K <- as.integer(e_mod$K) standata$Npat <- as.integer(e_mod$Npat) standata$Nevents <- as.integer(e_mod$Nevents) standata$qnodes <- as.integer(qnodes) standata$qwts <- as.array(e_mod$qwts) standata$Npat_times_qnodes <- as.integer(e_mod$Npat * qnodes) standata$e_times <- as.array(e_mod$cpts) standata$nrow_e_Xq <- length(standata$e_times) standata$e_has_intercept <- as.integer(basehaz$type_name == "weibull") standata$e_Xq <- e_mod$Xq standata$e_xbar <- as.array(e_mod$Xbar) standata$e_weights <- as.array(e_weights) standata$e_weights_rep <- as.array(rep(e_weights, times = qnodes)) # Baseline hazard standata$basehaz_type <- as.integer(basehaz$type) standata$basehaz_df <- as.integer(basehaz$df) standata$basehaz_X <- make_basehaz_X(e_mod$cpts, basehaz) standata$norm_const <- e_mod$norm_const # Priors standata$e_prior_dist <- e_prior_stuff$prior_dist standata$e_prior_dist_for_intercept<- e_prior_intercept_stuff$prior_dist standata$e_prior_dist_for_aux <- e_prior_aux_stuff$prior_dist # hyperparameters for event submodel priors standata$e_prior_mean <- e_prior_stuff$prior_mean standata$e_prior_scale <- e_prior_stuff$prior_scale standata$e_prior_df <- e_prior_stuff$prior_df standata$e_prior_mean_for_intercept <- c(e_prior_intercept_stuff$prior_mean) standata$e_prior_scale_for_intercept<- c(e_prior_intercept_stuff$prior_scale) standata$e_prior_df_for_intercept <- c(e_prior_intercept_stuff$prior_df) standata$e_prior_mean_for_aux <- if (basehaz$type == 1L) as.array(0) else as.array(e_prior_aux_stuff$prior_mean) standata$e_prior_scale_for_aux <- e_prior_aux_stuff$prior_scale standata$e_prior_df_for_aux <- e_prior_aux_stuff$prior_df standata$e_global_prior_scale <- e_prior_stuff$global_prior_scale standata$e_global_prior_df <- e_prior_stuff$global_prior_df standata$e_slab_df <- e_prior_stuff$slab_df standata$e_slab_scale <- e_prior_stuff$slab_scale #----------------------- # Association structure #----------------------- # Handle association structure # !! If order is changed here, then must also change standata$has_assoc !! ok_assoc <- c("null", "etavalue","etaslope", "etaauc", "muvalue", "muslope", "muauc", "shared_b", "shared_coef") ok_assoc_data <- ok_assoc[c(2:3,5:6)] ok_assoc_interactions <- ok_assoc[c(2,5)] lag_assoc <- validate_lag_assoc(lag_assoc, M) assoc <- mapply(assoc, y_mod = y_mod, lag = lag_assoc, FUN = validate_assoc, MoreArgs = list(ok_assoc = ok_assoc, ok_assoc_data = ok_assoc_data, ok_assoc_interactions = ok_assoc_interactions, id_var = id_var, M = M)) assoc <- check_order_of_assoc_interactions(assoc, ok_assoc_interactions) colnames(assoc) <- paste0("Long", 1:M) # For each submodel, identify any grouping factors that are # clustered within id_var (i.e. lower level clustering) ok_grp_assocs <- c("sum", "mean", "min", "max") grp_basic <- xapply(FUN = get_basic_grp_info, cnms = y_cnms, flist = y_flist, args = list(id_var = id_var)) grp_stuff <- xapply(FUN = get_extra_grp_info, basic_info = grp_basic, flist = y_flist, args = list(id_var = id_var, grp_assoc = grp_assoc, ok_grp_assocs = ok_grp_assocs)) has_grp <- fetch_(grp_stuff, "has_grp") if (any(has_grp)) { grp_structure <- fetch(grp_stuff, "grp_list")[has_grp] if (n_distinct(grp_structure) > 1L) stop2("Any longitudinal submodels with a grouping factor clustered within ", "patients must use the same clustering structure; that is, the same ", "clustering variable and the same number of units clustered within a ", "given patient.") ok_assocs_with_grp <- c("etavalue", "etavalue_data", "etaslope", "etaslope_data", "muvalue", "muvalue_data") validate_assoc_with_grp(has_grp = has_grp, assoc = assoc, ok_assocs_with_grp = ok_assocs_with_grp) } else if (!is.null(grp_assoc)) { stop2("'grp_assoc' can only be specified when there is a grouping factor ", "clustered within patients.") } # Return design matrices for evaluating longitudinal submodel quantities # at the quadrature points auc_qnodes <- 15L assoc_as_list <- apply(assoc, 2L, c) a_mod <- xapply(data = dataLong, assoc = assoc_as_list, y_mod = y_mod, grp_stuff = grp_stuff, FUN = handle_assocmod, args = list(ids = e_mod$cids, times = e_mod$cpts, id_var = id_var, time_var = time_var, epsilon = epsilon, auc_qnodes = auc_qnodes)) # Number of association parameters a_K <- get_num_assoc_pars(assoc, a_mod) # Association scaling parameter a_scale <- validate_scale_assoc(scale_assoc, assoc_as_list) # Use a stan_mvmer variational bayes model fit for: # - obtaining initial values for joint model parameters # - obtaining appropriate scaling for priors on association parameters vbdots <- list(...) dropargs <- c("chains", "cores", "iter", "refresh", "thin", "test_grad", "control") for (i in dropargs) vbdots[[i]] <- NULL vbpars <- pars_to_monitor(standata, is_jm = FALSE) vbargs <- c(list(stanmodels$mvmer, pars = vbpars, data = standata, algorithm = "meanfield"), vbdots) utils::capture.output(init_fit <- suppressWarnings(do.call(rstan::vb, vbargs))) init_new_nms <- c(y_intercept_nms, y_beta_nms, if (length(standata$q)) c(paste0("b[", b_nms, "]")), y_aux_nms, paste0("Sigma[", Sigma_nms, "]"), paste0(stub, 1:M, "|mean_PPD"), "log-posterior") init_fit@sim$fnames_oi <- init_new_nms init_mat <- t(colMeans(as.matrix(init_fit))) # posterior means init_nms <- collect_nms(colnames(init_mat), M, stub = "Long") init_beta <- lapply(1:M, function(m) init_mat[, init_nms$y[[m]]]) init_b <- lapply(1:M, function(m) { # can drop _NEW_ groups since they are not required for generating # the assoc_terms that are used in scaling the priors for # the association parameters (ie. the Zt matrix returned by the # function 'make_assoc_parts_for_stan' will not be padded). b <- init_mat[, init_nms$y_b[[m]]] b[!grepl("_NEW_", names(b), fixed = TRUE)] }) if (is.character(init) && (init =="prefit")) { init_means2 <- rstan::get_posterior_mean(init_fit) init_nms2 <- rownames(init_means2) inits <- generate_init_function(e_mod, standata)() sel_b1 <- grep(paste0("^z_bMat1\\."), init_nms2) if (length(sel_b1)) inits[["z_bMat1"]] <- matrix(init_means2[sel_b1,], nrow = standata$bK1) sel_b2 <- grep(paste0("^z_bMat2\\."), init_nms2) if (length(sel_b2)) inits[["z_bMat2"]] <- matrix(init_means2[sel_b2,], nrow = standata$bK2) sel_bC1 <- grep(paste0("^bCholesky1\\."), init_nms2) if (length(sel_bC1) > 1) { inits[["bCholesky1"]] <- matrix(init_means2[sel_bC1,], nrow = standata$bK1) } else if (length(sel_bC1) == 1) { inits[["bCholesky1"]] <- as.array(init_means2[sel_bC1,]) } sel_bC2 <- grep(paste0("^bCholesky2\\."), init_nms2) if (length(sel_bC2) > 1) { inits[["bCholesky2"]] <- matrix(init_means2[sel_bC2,], nrow = standata$bK2) } else if (length(sel_bC1) == 1) { inits[["bCholesky2"]] <- as.array(init_means2[sel_bC2,]) } sel <- c("yGamma1", "yGamma2", "yGamma3", "z_yBeta1", "z_yBeta2", "z_yBeta3", "yAux1_unscaled", "yAux2_unscaled", "yAux3_unscaled", "bSd1", "bSd2", "z_b", "z_T", "rho", "zeta", "tau", "yGlobal1", "yGlobal2", "yGlobal3", "yLocal1", "yLocal2", "yLocal3", "yMix1", "yMix2", "yMix3", "yOol1", "yOol2", "yOol3") for (i in sel) { sel_i <- grep(paste0("^", i, "\\."), init_nms2) if (length(sel_i)) inits[[i]] <- as.array(init_means2[sel_i,]) } init <- function() inits } #----------- Prior distributions -----------# # Priors for association parameters e_user_prior_assoc_stuff <- e_prior_assoc_stuff <- handle_glm_prior(priorEvent_assoc, nvars = a_K, default_scale = 2.5, link = NULL, ok_dists = ok_dists) # Autoscaling of priors if (a_K) { e_prior_assoc_stuff <- autoscale_prior(e_prior_assoc_stuff, family = family, assoc = assoc, parts = a_mod, beta = init_beta, b = init_b, scale_assoc = a_scale) } #----------- Data for export to Stan -----------# # Dimensions standata$assoc <- as.integer(a_K > 0L) # any association structure, 1 = yes standata$a_K <- as.integer(a_K) # num association parameters # Indicator for which components are required to build the association terms assoc_uses <- sapply( c("etavalue", "etaslope", "etaauc", "muvalue", "muslope", "muauc"), function(x, assoc) { nm_check <- switch(x, etavalue = "^eta|^mu", etaslope = "etaslope|muslope", etaauc = "etaauc|muauc", muvalue = "muvalue|muslope", muslope = "muslope", muauc = "muauc") sel <- grep(nm_check, rownames(assoc)) tmp <- assoc[sel, , drop = FALSE] tmp <- pad_matrix(tmp, cols = 3L, value = FALSE) as.integer(as.logical(colSums(tmp > 0))) }, assoc = assoc) standata$assoc_uses <- t(assoc_uses) # Indexing for desired association types # !! Must be careful with corresponding use of indexing in Stan code !! # 1 = ev; 2 = es; 3 = ea; 4 = mv; 5 = ms; 6 = ma; # 7 = shared_b; 8 = shared_coef; # 9 = ev_data; 10 = es_data; 11 = mv_data; 12 = ms_data; # 13 = evev; 14 = evmv; 15 = mvev; 16 = mvmv; sel <- grep("which|null", rownames(assoc), invert = TRUE) standata$has_assoc <- matrix(as.integer(assoc[sel,]), ncol = M) # Data for association structure when there is # clustering below the patient-level standata$has_grp <- as.array(as.integer(has_grp)) if (any(has_grp)) { # has lower level clustering sel <- which(has_grp)[[1L]] standata$grp_idx <- attr(a_mod[[sel]], "grp_idx") standata$grp_assoc <- switch(grp_assoc, sum = 1L, mean = 2L, min = 3L, max = 4L, 0L) } else { # no lower level clustering standata$grp_idx <- matrix(0L, standata$nrow_e_Xq, 2L) standata$grp_assoc <- 0L } # Data for calculating eta, slope, auc in GK quadrature N_tmp <- sapply(a_mod, function(x) NROW(x$mod_eta$xtemp)) N_tmp <- c(N_tmp, rep(0, 3 - length(N_tmp))) standata$nrow_y_Xq <- as.array(as.integer(N_tmp)) for (m in 1:3) { for (i in c("eta", "eps", "auc")) { nm_check <- switch(i, eta = "^eta|^mu", eps = "slope", auc = "auc") sel <- grep(nm_check, rownames(assoc)) if (m <= M && any(unlist(assoc[sel,m]))) { tmp_stuff <- a_mod[[m]][[paste0("mod_", i)]] # fe design matrix at quadpoints X_tmp <- tmp_stuff$xtemp # re design matrix at quadpoints, group factor 1 Z1_tmp <- tmp_stuff$z[[cnms_nms[1L]]] Z1_tmp <- transpose(Z1_tmp) Z1_tmp <- convert_null(Z1_tmp, "matrix") Z1_tmp_id <- tmp_stuff$group_list[[cnms_nms[1L]]] Z1_tmp_id <- groups(Z1_tmp_id) Z1_tmp_id <- convert_null(Z1_tmp_id, "arrayinteger") # re design matrix at quadpoints, group factor 1 if (length(cnms_nms) > 1L) { Z2_tmp <- tmp_stuff$z[[cnms_nms[2L]]] Z2_tmp <- transpose(Z2_tmp) Z2_tmp <- convert_null(Z2_tmp, "matrix") Z2_tmp_id <- tmp_stuff$group_list[[cnms_nms[2L]]] Z2_tmp_id <- groups(Z2_tmp_id) Z2_tmp_id <- convert_null(Z2_tmp_id, "arrayinteger") } else { Z2_tmp <- matrix(0,standata$bK2_len[m],0) Z2_tmp_id <- as.array(integer(0)) } y_offset_tmp <- if (has_offset[m]) tmp_stuff$offset else as.array(integer(0)) } else { X_tmp <- matrix(0,0,standata$yK[m]) Z1_tmp <- matrix(0,standata$bK1_len[m],0) Z2_tmp <- matrix(0,standata$bK2_len[m],0) Z1_tmp_id <- as.array(integer(0)) Z2_tmp_id <- as.array(integer(0)) y_offset_tmp <- as.array(integer(0)) } standata[[paste0("y", m, "_xq_", i)]] <- X_tmp standata[[paste0("y", m, "_z1q_", i)]] <- Z1_tmp standata[[paste0("y", m, "_z2q_", i)]] <- Z2_tmp standata[[paste0("y", m, "_z1q_id_", i)]] <- Z1_tmp_id standata[[paste0("y", m, "_z2q_id_", i)]] <- Z2_tmp_id standata[[paste0("y", m, "_offset_", i)]] <- y_offset_tmp } } # Data for auc association structure standata$auc_qnodes <- as.integer(auc_qnodes) standata$Npat_times_auc_qnodes <- as.integer(e_mod$Npat * auc_qnodes) nrow_y_Xq_auc <- unique(uapply(a_mod, function(x) { nr <- NROW(x$mod_auc$x) if (nr > 0) nr else NULL })) if (length(nrow_y_Xq_auc) > 1L) stop2("Bug found: nrows for auc should be the same for all submodels.") standata$nrow_y_Xq_auc <- if (!is.null(nrow_y_Xq_auc)) nrow_y_Xq_auc else 0L auc_qwts <- uapply(e_mod$cpts, function(x) lapply(get_quadpoints(auc_qnodes)$weights, unstandardise_qwts, 0, x)) standata$auc_qwts <- if (any(standata$assoc_uses[3,] > 0)) as.array(auc_qwts) else double(0) # Interactions between association terms and data, with the following objects: # a_K_data: number of columns in y_Xq_data corresponding to each interaction # type (ie, etavalue, etaslope, muvalue, muslope) for each submodel # idx_q: indexing for the rows of Xq_data that correspond to each submodel, # since it is formed as a block diagonal matrix Xq_data <- fetch(a_mod, "X_bind_data") # design mat for the interactions standata$y_Xq_data <- as.array(as.matrix(Matrix::bdiag(Xq_data))) standata$a_K_data <- fetch_array(a_mod, "K_data") standata$idx_q <- get_idx_array(standata$nrow_y_Xq) # Interactions between association terms standata$which_interactions <- as.array(unlist(assoc["which_interactions",])) standata$size_which_interactions <- c(sapply(assoc["which_interactions",], sapply, length)) # Shared random effects standata$which_b_zindex <- as.array(unlist(assoc["which_b_zindex",])) standata$which_coef_zindex <- as.array(unlist(assoc["which_coef_zindex",])) standata$which_coef_xindex <- as.array(unlist(assoc["which_coef_xindex",])) standata$size_which_b <- as.array(sapply(assoc["which_b_zindex", ], length)) standata$size_which_coef <- as.array(sapply(assoc["which_coef_zindex", ], length)) # Sum dimensions for (i in c("a_K_data", paste0("size_which_", c("b", "coef", "interactions")))) { standata[[paste0("sum_", i)]] <- as.integer(sum(standata[[i]])) } # Hyperparameters for assoc parameter priors standata$a_prior_dist <- e_prior_assoc_stuff$prior_dist standata$a_prior_mean <- e_prior_assoc_stuff$prior_mean standata$a_prior_scale <- as.array(e_prior_assoc_stuff$prior_scale) standata$a_prior_df <- e_prior_assoc_stuff$prior_df standata$a_global_prior_scale <- e_prior_assoc_stuff$global_prior_scale standata$a_global_prior_df <- e_prior_assoc_stuff$global_prior_df standata$a_slab_df <- e_prior_assoc_stuff$slab_df standata$a_slab_scale <- e_prior_assoc_stuff$slab_scale # Centering for association terms standata$a_xbar <- if (a_K) e_prior_assoc_stuff$a_xbar else numeric(0) # Scaling for association terms standata$a_scale <- if (a_K) as.array(a_scale) else numeric(0) } # end jm block #--------------- # Prior summary #--------------- prior_info <- summarize_jm_prior( user_priorLong = y_user_prior_stuff, user_priorLong_intercept = y_user_prior_intercept_stuff, user_priorLong_aux = y_user_prior_aux_stuff, if (is_jm) user_priorEvent = e_user_prior_stuff, if (is_jm) user_priorEvent_intercept = e_user_prior_intercept_stuff, if (is_jm) user_priorEvent_aux = e_user_prior_aux_stuff, if (is_jm) user_priorEvent_assoc = e_user_prior_assoc_stuff, user_prior_covariance = prior_covariance, b_user_prior_stuff = b_user_prior_stuff, b_prior_stuff = b_prior_stuff, y_has_intercept = fetch_(y_mod, "x", "has_intercept"), y_has_predictors = fetch_(y_mod, "x", "K") > 0, if (is_jm) e_has_intercept = standata$e_has_intercept, if (is_jm) e_has_predictors = standata$e_K > 0, if (is_jm) has_assoc = a_K > 0, adjusted_priorLong_scale = fetch(y_prior_stuff, "prior_scale"), adjusted_priorLong_intercept_scale = fetch(y_prior_intercept_stuff, "prior_scale"), adjusted_priorLong_aux_scale = fetch(y_prior_aux_stuff, "prior_scale"), if (is_jm) adjusted_priorEvent_scale = e_prior_stuff$prior_scale, if (is_jm) adjusted_priorEvent_intercept_scale = e_prior_intercept_stuff$prior_scale, if (is_jm) adjusted_priorEvent_aux_scale = e_prior_aux_stuff$prior_scale, if (is_jm) adjusted_priorEvent_assoc_scale = e_prior_assoc_stuff$prior_scale, family = family, if (is_jm) basehaz = basehaz, stub_for_names = if (is_jm) "Long" else "y" ) #----------- # Fit model #----------- # call stan() to draw from posterior distribution stanfit <- if (is_jm) stanmodels$jm else stanmodels$mvmer pars <- pars_to_monitor(standata, is_jm = is_jm) if (M == 1L) cat("Fitting a univariate", if (is_jm) "joint" else "glmer", "model.\n\n") if (M > 1L) cat("Fitting a multivariate", if (is_jm) "joint" else "glmer", "model.\n\n") if (algorithm == "sampling") { cat("Please note the warmup may be much slower than later iterations!\n") sampling_args <- set_jm_sampling_args( object = stanfit, cnms = cnms, user_dots = list(...), user_adapt_delta = adapt_delta, user_max_treedepth = max_treedepth, data = standata, pars = pars, init = init, show_messages = FALSE) stanfit <- do.call(sampling, sampling_args) } else { # meanfield or fullrank vb stanfit <- rstan::vb(stanfit, pars = pars, data = standata, algorithm = algorithm, ...) } check <- check_stanfit(stanfit) if (!isTRUE(check)) return(standata) # Sigma values in stanmat if (prior_covariance$dist == "decov" && standata$len_theta_L) stanfit <- evaluate_Sigma(stanfit, cnms) if (is_jm) { # begin jm block e_intercept_nms <- "Event|(Intercept)" e_beta_nms <- if (e_mod$K) paste0("Event|", colnames(e_mod$Xq)) else NULL e_aux_nms <- if (basehaz$type_name == "weibull") "Event|weibull-shape" else if (basehaz$type_name == "bs") paste0("Event|b-splines-coef", seq(basehaz$df)) else if (basehaz$type_name == "piecewise") paste0("Event|piecewise-coef", seq(basehaz$df)) e_assoc_nms <- character() for (m in 1:M) { if (assoc["etavalue", ][[m]]) e_assoc_nms <- c(e_assoc_nms, paste0("Assoc|Long", m,"|etavalue")) if (assoc["etavalue_data", ][[m]]) e_assoc_nms <- c(e_assoc_nms, paste0("Assoc|Long", m,"|etavalue:", colnames(a_mod[[m]][["X_data"]][["etavalue_data"]]))) if (assoc["etavalue_etavalue",][[m]]) e_assoc_nms <- c(e_assoc_nms, paste0("Assoc|Long", m,"|etavalue:Long", assoc["which_interactions",][[m]][["etavalue_etavalue"]], "|etavalue")) if (assoc["etavalue_muvalue", ][[m]]) e_assoc_nms <- c(e_assoc_nms, paste0("Assoc|Long", m,"|etavalue:Long", assoc["which_interactions",][[m]][["etavalue_muvalue"]], "|muvalue")) if (assoc["etaslope", ][[m]]) e_assoc_nms <- c(e_assoc_nms, paste0("Assoc|Long", m,"|etaslope")) if (assoc["etaslope_data", ][[m]]) e_assoc_nms <- c(e_assoc_nms, paste0("Assoc|Long", m,"|etaslope:", colnames(a_mod[[m]][["X_data"]][["etaslope_data"]]))) if (assoc["etaauc", ][[m]]) e_assoc_nms <- c(e_assoc_nms, paste0("Assoc|Long", m,"|etaauc")) if (assoc["muvalue", ][[m]]) e_assoc_nms <- c(e_assoc_nms, paste0("Assoc|Long", m,"|muvalue")) if (assoc["muvalue_data", ][[m]]) e_assoc_nms <- c(e_assoc_nms, paste0("Assoc|Long", m,"|muvalue:", colnames(a_mod[[m]][["X_data"]][["muvalue_data"]]))) if (assoc["muvalue_etavalue", ][[m]]) e_assoc_nms <- c(e_assoc_nms, paste0("Assoc|Long", m,"|muvalue:Long", assoc["which_interactions",][[m]][["muvalue_etavalue"]], "|etavalue")) if (assoc["muvalue_muvalue", ][[m]]) e_assoc_nms <- c(e_assoc_nms, paste0("Assoc|Long", m,"|muvalue:Long", assoc["which_interactions",][[m]][["muvalue_muvalue"]], "|muvalue")) if (assoc["muslope", ][[m]]) e_assoc_nms <- c(e_assoc_nms, paste0("Assoc|Long", m,"|muslope")) if (assoc["muslope_data", ][[m]]) e_assoc_nms <- c(e_assoc_nms, paste0("Assoc|Long", m,"|muslope:", colnames(a_mod[[m]][["X_data"]][["muslope_data"]]))) if (assoc["muauc", ][[m]]) e_assoc_nms <- c(e_assoc_nms, paste0("Assoc|Long", m,"|muauc")) } if (sum(standata$size_which_b)) { temp_g_nms <- lapply(1:M, FUN = function(m) { all_nms <- paste0(paste0("Long", m, "|b["), y_mod[[m]]$z$group_cnms[[id_var]], "]") all_nms[assoc["which_b_zindex",][[m]]]}) e_assoc_nms <- c(e_assoc_nms, paste0("Assoc|", unlist(temp_g_nms))) } if (sum(standata$size_which_coef)) { temp_g_nms <- lapply(1:M, FUN = function(m) { all_nms <- paste0(paste0("Long", m, "|coef["), y_mod[[m]]$z$group_cnms[[id_var]], "]") all_nms[assoc["which_coef_zindex",][[m]]]}) e_assoc_nms <- c(e_assoc_nms, paste0("Assoc|", unlist(temp_g_nms))) } } # end jm block new_names <- c(y_intercept_nms, y_beta_nms, if (is_jm) e_intercept_nms, if (is_jm) e_beta_nms, if (is_jm) e_assoc_nms, if (length(standata$q)) c(paste0("b[", b_nms, "]")), y_aux_nms, if (is_jm) e_aux_nms, paste0("Sigma[", Sigma_nms, "]"), paste0(stub, 1:M, "|mean_PPD"), "log-posterior") stanfit@sim$fnames_oi <- new_names stanfit_str <- nlist(.Data = stanfit, prior_info, y_mod, cnms, flevels) if (is_jm) stanfit_str <- c(stanfit_str, nlist(e_mod, a_mod, assoc, basehaz, id_var, grp_stuff, scale_assoc)) do.call("structure", stanfit_str) } rstanarm/R/stan_lm.fit.R0000644000176200001440000000654213722762571014666 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2013, 2014, 2015, 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #' @rdname stan_lm #' @export stan_lm.wfit <- function(x, y, w, offset = NULL, singular.ok = TRUE, ..., prior = R2(stop("'location' must be specified")), prior_intercept = NULL, prior_PD = FALSE, algorithm = c("sampling", "meanfield", "fullrank"), adapt_delta = NULL) { algorithm <- match.arg(algorithm) if (NCOL(y) > 1) { stop("Multivariate responses not supported yet.") } if (colnames(x)[1L] == "(Intercept)") { has_intercept <- 1L x <- x[, -1L, drop = FALSE] if (NCOL(x) == 0L) { stop("'stan_lm' is not suitable for estimating a mean.", "\nUse 'stan_glm' with 'family = gaussian()' instead.", call. = FALSE) } } else { has_intercept <- 0L } if (nrow(x) < ncol(x)) { stop("stan_lm with more predictors than data points is not yet enabled.", call. = FALSE) } # allow prior_PD even if no y variable if (is.null(y)) { if (!prior_PD) { stop("Outcome variable must be specified if 'prior_PD' is not TRUE.") } else { y <- fake_y_for_prior_PD(N = NROW(x), family = gaussian()) } } xbar <- colMeans(x) x <- sweep(x, 2L, xbar, FUN = "-") ybar <- mean(y) y <- y - ybar ols <- if (length(w) == 0) lm.fit(x, y) else lm.wfit(x, y, w) b <- coef(ols) NAs <- is.na(b) if (any(NAs) && singular.ok) { x <- x[,!NAs, drop = FALSE] xbar <- xbar[!NAs] ols <- lsfit(x, y, w, intercept = FALSE) b <- coef(ols) } else { b[NAs] <- 0.0 } if (!is.null(w)) { x <- sqrt(w) * x } return(stan_biglm.fit(b, R = qr.R(ols$qr), SSR = crossprod(residuals(ols))[1], N = nrow(x), xbar = xbar, ybar = ybar, s_y = sd(y), has_intercept = has_intercept, ..., prior = prior, prior_intercept = prior_intercept, prior_PD = prior_PD, algorithm = algorithm, adapt_delta = adapt_delta)) } #' @rdname stan_lm #' @export stan_lm.fit <- function(x, y, offset = NULL, singular.ok = TRUE, ..., prior = R2(stop("'location' must be specified")), prior_intercept = NULL, prior_PD = FALSE, algorithm = c("sampling", "meanfield", "fullrank"), adapt_delta = NULL) { # nocov start mf <- match.call(expand.dots = FALSE) mf[[1L]] <- as.name("stan_lm.wfit") mf$w <- as.name("NULL") eval(mf, parent.frame()) } # nocov end rstanarm/R/jm_data_block.R0000644000176200001440000025631214406606742015220 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2013, 2014, 2015, 2016, 2017 Trustees of Columbia University # Copyright (C) 2016, 2017 Sam Brilleman # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. .datatable.aware <- TRUE # necessary for some reason when data.table is in Suggests #--------------- Miscellaneous and helper functions #' @importFrom survival Surv #' @export survival::Surv # Check input argument is a valid type, and return as a list # # @param arg The user input to the argument # @param type A character vector of valid classes # @param validate_length The required length of the returned list # @return A list validate_arg <- function(arg, type, validate_length = NULL) { nm <- deparse(substitute(arg)) if (inherits(arg, type)) { # input type is valid, so return as a list arg <- list(arg) } else if (is(arg, "list")) { # input type is a list, check each element check <- sapply(arg, function(x) inherits(x, type)) if (!all(check)) STOP_arg(nm, type) } else { # input type is not valid STOP_arg(nm, type) } if (!is.null(validate_length)) { # return list of the specified length if (length(arg) == 1L) arg <- rep(arg, times = validate_length) if (!length(arg) == validate_length) stop2(nm, " is a list of the incorrect length.") } if ("data.frame" %in% type) arg <- lapply(arg, as.data.frame) if ("family" %in% type) arg <- lapply(arg, validate_family) arg } # Check if the user input a list of priors for the longitudinal # submodel, and if not, then return the appropriate list # # @param prior The user input to the prior argument in the stan_mvmer # or stan_jm call # @param M An integer specifying the number of longitudinal submodels broadcast_prior <- function(prior, M) { if (is.null(prior)) { return(rep(list(NULL), M)) } else if ("dist" %in% names(prior)) { return(rep(list(prior), M)) } else if (is.list(prior) && length(prior) == M) { return(prior) } else { nm <- deparse(substitute(priorarg)) stop2(nm, " appears to provide prior information separately for the ", "different submodels, but the list is of the incorrect length.") } } # From a vector of length M giving the number of elements (for example number # of parameters or observations) for each submodel, create an indexing array # of dimension M * 2, where column 1 is the beginning index and 2 is the end index # # @param x A numeric vector # @return A length(x) * 2 array get_idx_array <- function(x) { as.array(do.call("rbind", lapply(1:length(x), function(i) { idx_beg <- ifelse(x[i] > 0L, sum(x[0:(i-1)]) + 1, 0L) idx_end <- ifelse(x[i] > 0L, sum(x[0:i]), 0L) c(idx_beg, idx_end) }))) } # Function to return the range or SD of the predictors, used for scaling the priors # This is taken from an anonymous function in stan_glm.fit # # @param x A vector get_scale_value <- function(x) { num.categories <- n_distinct(x) x.scale <- 1 if (num.categories == 2) { x.scale <- diff(range(x)) } else if (num.categories > 2) { x.scale <- sd(x) } return(x.scale) } # Apply a lag to a vector of times # # @param x A numeric vector (e.g. observation times) # @param lag A scalar (the lag time) # @return A numeric vector set_lag <- function(x, lag) { x <- x - lag x[x < 0] <- 0.0 # use baseline for lag times prior to baseline x } # Get the required number of (local) horseshoe parameters for a specified prior type # # @param prior_dist An integer indicating the type of prior distribution: # where 1L == normal, 2L == t, 3L == hs, 4L == hs_plus get_nvars_for_hs <- function(prior_dist) { if (prior_dist <= 2L) return(0L) else if (prior_dist == 3L) return(2L) else if (prior_dist == 4L) return(4L) else return(0L) } # Reformulate an expression as the LHS of a model formula # # @param x The expression to reformulate # @return A model formula reformulate_lhs <- function(x) { formula(substitute(LHS ~ 1, list(LHS = x))) } # Reformulate an expression as the RHS of a model formula # # @param x The expression to reformulate # @param subbars A logical specifying whether to call lme4::subbars # on the result # @return A model formula reformulate_rhs <- function(x, subbars = FALSE) { fm <- formula(substitute(~ RHS, list(RHS = x))) if (subbars) { lme4::subbars(fm) } else { fm } } #--------------- Functions related to priors # Deal with covariance prior # # @param prior A list # @param cnms A list of lists, with names of the group specific # terms for each grouping factor # @param ok_dists A list of admissible distributions handle_cov_prior <- function(prior, cnms, ok_dists = nlist("decov", "lkj")) { if (!is.list(prior)) stop(sQuote(deparse(substitute(prior))), " should be a named list") t <- length(unique(cnms)) # num grouping factors p <- sapply(cnms, length) # num terms for each grouping factor prior_dist_name <- prior$dist if (!prior_dist_name %in% unlist(ok_dists)) { stop("The prior distribution should be one of ", paste(names(ok_dists), collapse = ", ")) } else if (prior_dist_name == "decov") { prior_shape <- as.array(maybe_broadcast(prior$shape, t)) prior_scale <- as.array(maybe_broadcast(prior$scale, t)) prior_concentration <- as.array(maybe_broadcast(prior$concentration, sum(p[p > 1]))) prior_regularization <- as.array(maybe_broadcast(prior$regularization, sum(p > 1))) prior_df <- NULL } else if (prior_dist_name == "lkj") { prior_shape <- NULL prior_scale <- as.array(maybe_broadcast(prior$scale, sum(p))) prior_concentration <- NULL prior_regularization <- as.array(maybe_broadcast(prior$regularization, sum(p > 1))) prior_df <- as.array(maybe_broadcast(prior$df, sum(p))) } prior_dist <- switch(prior_dist_name, decov = 1L, lkj = 2L) nlist(prior_dist_name, prior_dist, prior_shape, prior_scale, prior_concentration, prior_regularization, prior_df, t, p, prior_autoscale = isTRUE(prior$autoscale)) } # Seperate the information about the covariance prior into a list # of lists. At the top level of the returned list the elements # correpond to each of the grouping factors, and on the second level # of the returned list the elements correpsond to the separate glmer # submodels. This separation is required for autoscaling the priors # on the sds of group level effects, since these are autoscaled based # on the separate Z matrices (design matrices for the random effects). # # @param prior_stuff The named list returned by handle_cov_prior # @param cnms The component names for group level terms, combined across # all glmer submodels # @param submodel_cnms The component names for the group level terms, # separately for each glmer submodel (stored as a list of length M) # @return A list with each element containing the covariance prior # information for one grouping factor split_cov_prior <- function(prior_stuff, cnms, submodel_cnms) { if (!prior_stuff$prior_dist_name == "lkj") { return(prior_stuff) # nothing to be done for decov prior } else { M <- length(submodel_cnms) # number of submodels cnms_nms <- names(cnms) # names of grouping factors mark <- 0 new_prior_stuff <- list() for (nm in cnms_nms) { for (m in 1:M) { len <- length(submodel_cnms[[m]][[nm]]) new_prior_stuff[[nm]][[m]] <- prior_stuff if (len) { # submodel 'm' has group level terms for group factor 'nm' beg <- mark + 1; end <- mark + len new_prior_stuff[[nm]][[m]]$prior_scale <- prior_stuff$prior_scale[beg:end] new_prior_stuff[[nm]][[m]]$prior_df <- prior_stuff$prior_df[beg:end] mark <- mark + len } else { new_prior_stuff[[nm]][[m]]$prior_scale <- NULL new_prior_stuff[[nm]][[m]]$prior_df <- NULL new_prior_stuff[[nm]][[m]]$prior_regularization <- NULL } } } } new_prior_stuff } # Autoscaling of priors # # @param prior_stuff A named list returned by a call to handle_glm_prior # @param response A vector containing the response variable, only required if # the priors are to be scaled by the standard deviation of the response (for # gaussian reponse variables only) # @param predictors The predictor matrix, only required if the priors are to be # scaled by the range/sd of the predictors # @param family A family object # @param QR A logical specifying whether QR decomposition is used for the # predictor matrix # @param min_prior_scale The minimum allowed for prior scales # @param assoc A two dimensional array with information about desired association # structure for the joint model (returned by a call to validate_assoc). Cannot # be NULL if autoscaling priors for the association parameters. # @param ... Other arguments passed to make_assoc_terms. If autoscaling priors # for the association parameters then this should include 'parts' which # is a list containing the design matrices for the longitudinal submodel # evaluated at the quadrature points, as well as 'beta' and 'b' which are # the parameter values to use when constructing the linear predictor(s) in # make_assoc_terms. # @return A named list with the same structure as returned by handle_glm_prior autoscale_prior <- function(prior_stuff, response = NULL, predictors = NULL, family = NULL, QR = FALSE, min_prior_scale = 1e-12, assoc = NULL, scale_assoc = NULL, ...) { ps <- prior_stuff if (!identical(NULL, response) && is.gaussian(family$family)) { # use response variable for scaling priors if (ps$prior_dist > 0L && ps$prior_autoscale) { ss <- sd(response) ps$prior_scale <- ss * ps$prior_scale } } if (!identical(NULL, predictors) && !QR) { # use predictors for scaling priors if (ps$prior_dist > 0L && ps$prior_autoscale) { ps$prior_scale <- pmax(min_prior_scale, ps$prior_scale / apply(predictors, 2L, get_scale_value)) } } if (!identical(NULL, assoc)) { # Evaluate mean and SD of each of the association terms that will go into # the linear predictor for the event submodel (as implicit "covariates"). # (NB the approximate association terms are calculated using coefs # from the separate longitudinal submodels estimated using glmer). # The mean will be used for centering each association term. # The SD will be used for autoscaling the prior for each association parameter. if (identical(NULL, family)) stop("'family' cannot be NULL when autoscaling association parameters.") assoc_terms <- make_assoc_terms(family = family, assoc = assoc, ...) ps$a_xbar <- as.array(apply(assoc_terms, 2L, mean)) if (ps$prior_dist > 0L && ps$prior_autoscale) { if (!identical(NULL, scale_assoc)) assoc_terms <- assoc_terms * scale_assoc a_beta_scale <- apply(assoc_terms, 2L, get_scale_value) ps$prior_scale <- pmax(min_prior_scale, ps$prior_scale / a_beta_scale) } } ps$prior_scale <- as.array(pmin(.Machine$double.xmax, ps$prior_scale)) ps } # Create "prior.info" attribute for stan_{mvmer,jm}; needed for prior_summary() # # @param user_* The user's priors. These should be passed in after broadcasting # the df/location/scale arguments if necessary. # @param y_has_intercept Vector of T/F, does each long submodel have an intercept? # @param y_has_predictors Vector of T/F, does each long submodel have predictors? # @param e_has_intercept T/F, does event submodel have an intercept? # @param e_has_predictors T/F, does event submodel have predictors? # @param has_assoc Logical specifying whether the model has an association # structure. Can be NULL if the prior summary is not for a joint model. # @param adjusted_prior_*_scale Adjusted scales computed if using autoscaled priors # @param family A list of family objects. # @param basehaz A list with information about the baseline hazard. # @param stub_for_names Character string with the text stub to use in the # names identifying the glmer or longitudinal submodels. # @return A named list with components 'prior*', 'prior*_intercept', # 'prior_covariance' and 'prior*_aux' each of which itself is a list # containing the needed values for prior_summary. summarize_jm_prior <- function(user_priorLong = NULL, user_priorLong_intercept = NULL, user_priorLong_aux = NULL, user_priorEvent = NULL, user_priorEvent_intercept = NULL, user_priorEvent_aux = NULL, user_priorEvent_assoc = NULL, user_prior_covariance = NULL, b_user_prior_stuff = NULL, b_prior_stuff = NULL, y_has_intercept = NULL, e_has_intercept = NULL, y_has_predictors = NULL, e_has_predictors = NULL, has_assoc = NULL, adjusted_priorLong_scale = NULL, adjusted_priorLong_intercept_scale = NULL, adjusted_priorLong_aux_scale = NULL, adjusted_priorEvent_scale = NULL, adjusted_priorEvent_intercept_scale = NULL, adjusted_priorEvent_aux_scale = NULL, adjusted_priorEvent_assoc_scale = NULL, family = NULL, basehaz = NULL, stub_for_names = "Long") { if (!is.null(family) && !is(family, "list")) stop("'family' should be a list of family objects, one for each submodel.") if (!is.null(has_assoc) && !is.logical(has_assoc) && (length(has_assoc) == 1L)) stop("'has_assoc' should be a logical vector of length 1.") M <- length(family) prior_list <- list() if (!is.null(user_priorLong)) { rescaled_coefLong <- mapply(check_if_rescaled, user_priorLong, y_has_predictors, adjusted_priorLong_scale) rescaled_intLong <- mapply(check_if_rescaled, user_priorLong_intercept, y_has_intercept, adjusted_priorLong_intercept_scale) rescaled_auxLong <- mapply(check_if_rescaled, user_priorLong_aux, TRUE, adjusted_priorLong_aux_scale) for (m in 1:M) { user_priorLong[[m]] <- rename_t_and_cauchy(user_priorLong[[m]], y_has_predictors[m]) user_priorLong_intercept[[m]] <- rename_t_and_cauchy(user_priorLong_intercept[[m]], y_has_intercept[m]) user_priorLong_aux[[m]] <- rename_t_and_cauchy(user_priorLong_aux[[m]], TRUE) } prior_list$priorLong <- list_nms(lapply(1:M, function(m) { if (!y_has_predictors[m]) NULL else with(user_priorLong[[m]], list( dist = prior_dist_name, location = prior_mean, scale = prior_scale, adjusted_scale = if (rescaled_coefLong[m]) adjusted_priorLong_scale[[m]] else NULL, df = if (prior_dist_name %in% c ("student_t", "hs", "hs_plus", "lasso", "product_normal")) prior_df else NULL )) }), M, stub = stub_for_names) prior_list$priorLong_intercept <- list_nms(lapply(1:M, function(m) { if (!y_has_intercept[m]) NULL else with(user_priorLong_intercept[[m]], list( dist = prior_dist_name, location = prior_mean, scale = prior_scale, adjusted_scale = if (rescaled_intLong[m]) adjusted_priorLong_intercept_scale[[m]] else NULL, df = if (prior_dist_name %in% "student_t") prior_df else NULL )) }), M, stub = stub_for_names) aux_name <- lapply(family, .rename_aux) prior_list$priorLong_aux <- list_nms(lapply(1:M, function(m) { if (is.na(aux_name[[m]])) NULL else with(user_priorLong_aux[[m]], list( dist = prior_dist_name, location = if (!is.na(prior_dist_name) && prior_dist_name != "exponential") prior_mean else NULL, scale = if (!is.na(prior_dist_name) && prior_dist_name != "exponential") prior_scale else NULL, adjusted_scale = if (rescaled_auxLong[m]) adjusted_priorLong_aux_scale[[m]] else NULL, df = if (!is.na(prior_dist_name) && prior_dist_name %in% "student_t") prior_df else NULL, rate = if (!is.na(prior_dist_name) && prior_dist_name %in% "exponential") 1 / prior_scale else NULL, aux_name = aux_name[[m]] )) }), M, stub = stub_for_names) } if (!is.null(user_priorEvent)) { rescaled_coefEvent <- check_if_rescaled(user_priorEvent, e_has_predictors, adjusted_priorEvent_scale) rescaled_intEvent <- check_if_rescaled(user_priorEvent_intercept, e_has_intercept, adjusted_priorEvent_intercept_scale) rescaled_auxEvent <- check_if_rescaled(user_priorEvent_aux, TRUE, adjusted_priorEvent_aux_scale) user_priorEvent <- rename_t_and_cauchy(user_priorEvent, e_has_predictors) user_priorEvent_intercept <- rename_t_and_cauchy(user_priorEvent_intercept, e_has_intercept) user_priorEvent_aux <- rename_t_and_cauchy(user_priorEvent_aux, TRUE) prior_list$priorEvent <- if (!e_has_predictors) NULL else with(user_priorEvent, list( dist = prior_dist_name, location = prior_mean, scale = prior_scale, adjusted_scale = if (rescaled_coefEvent) adjusted_priorEvent_scale else NULL, df = if (prior_dist_name %in% c ("student_t", "hs", "hs_plus", "lasso", "product_normal")) prior_df else NULL )) prior_list$priorEvent_intercept <- if (!e_has_intercept) NULL else with(user_priorEvent_intercept, list( dist = prior_dist_name, location = prior_mean, scale = prior_scale, adjusted_scale = if (rescaled_intEvent) adjusted_priorEvent_intercept_scale else NULL, df = if (prior_dist_name %in% "student_t") prior_df else NULL )) e_aux_name <- .rename_e_aux(basehaz) prior_list$priorEvent_aux <- with(user_priorEvent_aux, list( dist = prior_dist_name, location = prior_mean, scale = prior_scale, adjusted_scale = if (rescaled_auxEvent) adjusted_priorEvent_aux_scale else NULL, df = if (!is.na(prior_dist_name) && prior_dist_name %in% "student_t") prior_df else NULL, aux_name = e_aux_name )) } if (!is.null(user_priorEvent_assoc)) { rescaled_coefAssoc <- check_if_rescaled(user_priorEvent_assoc, has_assoc, adjusted_priorEvent_assoc_scale) user_priorEvent_assoc <- rename_t_and_cauchy(user_priorEvent_assoc, has_assoc) prior_list$priorEvent_assoc <- if (!has_assoc) NULL else with(user_priorEvent_assoc, list( dist = prior_dist_name, location = prior_mean, scale = prior_scale, adjusted_scale = if (rescaled_coefAssoc) adjusted_priorEvent_assoc_scale else NULL, df = if (prior_dist_name %in% c ("student_t", "hs", "hs_plus", "lasso", "product_normal")) prior_df else NULL )) } if (length(user_prior_covariance)) { if (user_prior_covariance$dist == "decov") { prior_list$prior_covariance <- user_prior_covariance } else if (user_prior_covariance$dist == "lkj") { # lkj prior for correlation matrix prior_list$prior_covariance <- user_prior_covariance # half-student_t prior on SD for each ranef (possibly autoscaled) prior_list$prior_covariance$df <- b_user_prior_stuff$prior_df prior_list$prior_covariance$scale <- b_user_prior_stuff$prior_scale adj_scales <- uapply(b_prior_stuff, FUN = uapply, '[[', "prior_scale") if (!all(b_user_prior_stuff$prior_scale == adj_scales)) { prior_list$prior_covariance$adjusted_scale <- adj_scales } else { prior_list$prior_covariance$adjusted_scale <- NULL } } else { prior_list$prior_covariance <- NULL } } if (!stub_for_names == "Long") { nms <- names(prior_list) new_nms <- gsub("Long", "", nms) names(prior_list) <- new_nms } return(prior_list) } # Get name of auxiliary parameters for event submodel # # @param basehaz A list with information about the baseline hazard .rename_e_aux <- function(basehaz) { nm <- basehaz$type_name if (nm == "weibull") "weibull-shape" else if (nm == "bs") "spline-coefficients" else if (nm == "piecewise") "piecewise-coefficients" else NA } # Check if priors were autoscaled # # @param prior_stuff A list with prior info returned by handle_glm_prior # @param has A logical checking, for example, whether the model has_predictors, # has_intercept, has_assoc, etc # @param adjusted_prior_scale The prior scale after any autoscaling check_if_rescaled <- function(prior_stuff, has, adjusted_prior_scale) { prior_stuff$prior_autoscale && has && !is.na(prior_stuff$prior_dist_name) && !all(prior_stuff$prior_scale == adjusted_prior_scale) } # Rename the t prior as being student-t or cauchy # # @param prior_stuff A list with prior info returned by handle_glm_prior # @param has A logical checking, for example, whether the model has_predictors, # has_intercept, has_assoc, etc rename_t_and_cauchy <- function(prior_stuff, has) { if (has && prior_stuff$prior_dist_name %in% "t") { if (all(prior_stuff$prior_df == 1)) { prior_stuff$prior_dist_name <- "cauchy" } else { prior_stuff$prior_dist_name <- "student_t" } } return(prior_stuff) } #--------------- Functions related to longitudinal submodel # Construct a list with information on the glmer submodel # # @param formula The model formula for the glmer submodel. # @param data The data for the glmer submodel. # @param family The family object for the glmer submodel. # @return A named list with the following elements: # y: named list with the reponse vector and related info. # x: named list with the fe design matrix and related info. # z: named list with the re design matrices and related info. # terms: the model.frame terms object with bars "|" replaced by "+". # model_frame: The model frame with all variables used in the # model formula. # formula: The model formula. # reTrms: returned by lme4::glFormula$reTrms. # family: the (modified) family object for the glmer submodel. # intercept_type: named list with info about the type of # intercept required for the glmer submodel. # has_aux: logical specifying whether the glmer submodel # requires an auxiliary parameter. handle_y_mod <- function(formula, data, family) { mf <- stats::model.frame(lme4::subbars(formula), data) if (!length(formula) == 3L) stop2("An outcome variable must be specified.") # lme4 parts lme4_parts <- lme4::glFormula(formula, data) reTrms <- lme4_parts$reTrms # Response vector, design matrices y <- make_y_for_stan(formula, mf, family) x <- make_x_for_stan(formula, mf) z <- make_z_for_stan(formula, mf) # Terms terms <- attr(mf, "terms") terms <- append_predvars_attribute(terms, formula, data) # Binomial with >1 trials not allowed by stan_{mvmver,jm} is_binomial <- is.binomial(family$family) is_bernoulli <- is_binomial && NCOL(y$y) == 1L && all(y$y %in% 0:1) if (is_binomial && !is_bernoulli) STOP_binomial() # Various flags intercept_type <- check_intercept_type(x, family) has_aux <- check_for_aux(family) family <- append_mvmer_famlink(family, is_bernoulli) # Offset offset <- model.offset(mf) has_offset <- as.numeric(!is.null(offset)) nlist(y, x, z, reTrms, model_frame = mf, formula, terms, family, intercept_type, has_aux, offset, has_offset) } # Return the response vector for passing to Stan # # @param formula The model formula # @param model_frame The model frame # @param family A family object # @return A named list with the following elements: # y: the response vector # real: the response vector if real, else numeric(0) # integer: the response vector if integer, else integer(0) # resp_type: 1L if response is real, 2L is response is integer make_y_for_stan <- function(formula, model_frame, family) { y <- as.vector(model.response(model_frame)) y <- validate_glm_outcome_support(y, family) resp_type <- if (check_response_real(family)) 1L else 2L real <- if (resp_type == 1L) y else numeric(0) integer <- if (resp_type == 2L) y else integer(0) nlist(y, real, integer, resp_type) } # Return the design matrix for passing to Stan # # @param formula The model formula. # @param model_frame The model frame. # @return A named list with the following elements: # x: the fe model matrix, not centred and may have intercept. # xtemp: fe model matrix, centred and no intercept. # x_form: the formula for the fe model matrix. # x_bar: the column means of the model matrix. # has_intercept: logical for whether the submodel has an intercept # N,K: number of rows (observations) and columns (predictors) in the # fixed effects model matrix make_x_for_stan <- function(formula, model_frame) { x_form <- lme4::nobars(formula) x <- model.matrix(x_form, model_frame) has_intercept <- check_for_intercept(x, logical = TRUE) xtemp <- drop_intercept(x) x_bar <- colMeans(xtemp) xtemp <- sweep(xtemp, 2, x_bar, FUN = "-") # identify any column of x with < 2 unique values (empty interaction levels) sel <- (2 > apply(xtemp, 2L, function(x) length(unique(x)))) if (any(sel)) stop2("Cannot deal with empty interaction levels found in columns: ", paste(colnames(xtemp)[sel], collapse = ", ")) nlist(x, xtemp, x_form, x_bar, has_intercept, N = NROW(xtemp), K = NCOL(xtemp)) } # Return design matrices for the group level terms for passing to Stan # # @param formula The model formula # @param model_frame The model frame # @return A named list with the following elements: # z: a list with each element containing the random effects model # matrix for one grouping factor. # z_forms: a list with each element containing the model formula for # one grouping factor. # group_vars: a character vector with the name of each of the # grouping factors # group_cnms: a list with each element containing the names of the # group level parameters for one grouping factor # group_list: a list with each element containing the vector of group # IDs for the rows of z # nvars: a vector with the number of group level parameters for each # grouping factor # ngrps: a vector with the number of groups for each grouping factor make_z_for_stan <- function(formula, model_frame) { bars <- lme4::findbars(formula) if (length(bars) > 2L) stop2("A maximum of 2 grouping factors are allowed.") z_parts <- lapply(bars, split_at_bars) z_forms <- fetch(z_parts, "re_form") z <- lapply(z_forms, model.matrix, model_frame) group_cnms <- lapply(z, colnames) group_vars <- fetch(z_parts, "group_var") group_list <- lapply(group_vars, function(x) factor(model_frame[[x]])) nvars <- lapply(group_cnms, length) ngrps <- lapply(group_list, n_distinct) names(z) <- names(z_forms) <- names(group_cnms) <- names(group_list) <- names(nvars) <- names(ngrps) <- group_vars nlist(z, z_forms, group_vars, group_cnms, group_list, nvars, ngrps) } # Return info on the required type of intercept # # @param X The model matrix # @param family A family object # @return A named list with the following elements: # type: character string specifying the type of bounds to use # for the intercept. # number: an integer specifying the type of bounds to use # for the intercept where 0L = no intercept, 1L = no bounds # on intercept, 2L = lower bound, 3L = upper bound. check_intercept_type <- function(X, family) { fam <- family$family link <- family$link if (!X$has_intercept) { # no intercept type <- "none" needs_intercept <- (!is.gaussian(fam) && link == "identity") || (is.gamma(fam) && link == "inverse") || (is.binomial(fam) && link == "log") if (needs_intercept) stop2("To use the specified combination of family and link (", fam, ", ", link, ") the model must have an intercept.") } else if (fam == "binomial" && link == "log") { # binomial, log type <- "upper_bound" } else if (fam == "binomial") { # binomial, !log type <- "no_bound" } else if (link == "log") { # gamma/inv-gaus/poisson/nb, log type <- "no_bound" } else if (fam == "gaussian") { # gaussian, !log type <- "no_bound" } else { # gamma/inv-gaus/poisson/nb, !log type <- "lower_bound" } number <- switch(type, none = 0L, no_bound = 1L, lower_bound = 2L, upper_bound = 3L) nlist(type, number) } # Check the id_var argument is valid and is included appropriately in the # formulas for each of the longitudinal submodels # # @param id_var The character string that the user specified for the id_var # argument -- will have been set to NULL if the argument was missing. # @param y_cnms A list of length M with the cnms for each longitudinal submodel # @param y_flist A list of length M with the flist for each longitudinal submodel # @return Returns the character string corresponding to the appropriate id_var. # This will either be the user specified id_var argument or the only grouping # factor. check_id_var <- function(id_var, y_cnms, y_flist) { len_cnms <- sapply(y_cnms, length) if (any(len_cnms > 1L)) { # more than one grouping factor if (is.null(id_var)) { stop("'id_var' must be specified when using more than one grouping factor", call. = FALSE) } else { lapply(y_cnms, function(x) if (!(id_var %in% names(x))) stop("'id_var' must be included as a grouping factor in each ", "of the longitudinal submodels", call. = FALSE)) } return(id_var) } else { # only one grouping factor (assumed to be subject ID) only_cnm <- unique(sapply(y_cnms, names)) if (length(only_cnm) > 1L) stop("The grouping factor (ie, subject ID variable) is not the ", "same in all longitudinal submodels", call. = FALSE) if ((!is.null(id_var)) && (!identical(id_var, only_cnm))) warning("The user specified 'id_var' (", paste(id_var), ") and the assumed ID variable based on the single ", "grouping factor (", paste(only_cnm), ") are not the same; ", "'id_var' will be ignored", call. = FALSE, immediate. = TRUE) return(only_cnm) } } # Check the family and link function are supported by stan_{mvmer,jm} # # @param family A family object # @param supported_families A character vector of supported family names # @return A family object validate_famlink <- function(family, supported_families) { famname <- family$family fam <- which(supported_families == famname) if (!length(fam)) stop2("'family' must be one of ", paste(supported_families, collapse = ", ")) supported_links <- supported_glm_links(famname) link <- which(supported_links == family$link) if (!length(link)) stop("'link' must be one of ", paste(supported_links, collapse = ", ")) return(family) } # Append a family object with numeric family and link information used by Stan # # @param family The existing family object # @param is_bernoulli Logical specifying whether the family should be bernoulli # @return A family object with two appended elements: # mvmer_family: an integer telling Stan which family # mvmer_link: an integer telling Stan which link function (varies by family!) append_mvmer_famlink <- function(family, is_bernoulli = FALSE) { famname <- family$family family$mvmer_family <- switch( famname, gaussian = 1L, Gamma = 2L, inverse.gaussian = 3L, binomial = 5L, # bernoulli = 4L changed later poisson = 6L, "neg_binomial_2" = 7L) if (is_bernoulli) family$mvmer_family <- 4L supported_links <- supported_glm_links(famname) link <- which(supported_links == family$link) family$mvmer_link <- link return(family) } # Split the random effects part of a model formula into # - the formula part (ie. the formula on the LHS of "|"), and # - the name of the grouping factor (ie. the variable on the RHS of "|") # # @param x Random effects part of a model formula, as returned by lme4::findbars # @return A named list with the following elements: # re_form: a formula specifying the random effects structure # group_var: the name of the grouping factor split_at_bars <- function(x) { terms <- strsplit(deparse(x, 500), "\\s\\|\\s")[[1L]] if (!length(terms) == 2L) stop2("Could not parse the random effects formula.") re_form <- formula(paste("~", terms[[1L]])) group_var <- terms[[2L]] nlist(re_form, group_var) } # Function to check if the response vector is real or integer # # @param family A family object # @return A logical specify whether the response is real (TRUE) or integer (FALSE) check_response_real <- function(family) { !(family$family %in% c("binomial", "poisson", "neg_binomial_2")) } # Function to check if the submodel should include a auxiliary term # # @param family A family object # @return A logical specify whether the submodel includes a auxiliary term check_for_aux <- function(family) { !(family$family %in% c("binomial", "poisson")) } # Function to return a single cnms object for all longitudinal submodels # # @param x A list, with each element being a cnms object returned by (g)lmer get_common_cnms <- function(x, stub = "Long") { nms <- lapply(x, names) unique_nms <- unique(unlist(nms)) cnms <- lapply(seq_along(unique_nms), function(i) { nm <- unique_nms[i] unlist(lapply(1:length(x), function(m) if (nm %in% nms[[m]]) paste0(stub, m, "|", x[[m]][[nm]]))) }) names(cnms) <- unique_nms cnms } # Function to return a single list with the factor levels for each # grouping factor, but collapsed across all longitudinal submodels # # @param x A list containing the flist object for each of the submodels get_common_flevels <- function(x) { nms <- lapply(x, names) unique_nms <- unique(unlist(nms)) flevels <- lapply(seq_along(unique_nms), function(i) { nm <- unique_nms[i] flevels_nm <- lapply(1:length(x), function(m) if (nm %in% nms[[m]]) levels(x[[m]][[nm]])) flevels_nm <- rm_null(unique(flevels_nm)) if (length(flevels_nm) > 1L) stop2("The group factor levels must be the same for all submodels.") flevels_nm[[1L]] }) names(flevels) <- unique_nms flevels } # Take a list of cnms objects (each element containing the cnms for one # submodel) and assess whether the specified variable is included as a # grouping factor in all of the submodels # # @param y_cnms A list with each element containing the cnms object for # one submodel. # @param group_var The name of the grouping factor variable. # @return The name of the grouping factor, or an error if it doesn't # appear in every submodel. validate_grouping_factor <- function(y_cnms, group_var) { check <- sapply(y_cnms, function(x) group_var %in% names(x)) if (!all(check)) { nm <- deparse(substitute(group_var)) stop2(nm, " must be a grouping factor in all longitudinal submodels.") } group_var } # Check the factor list corresponding to subject ID is the same in each # of the longitudinal submodels # # @param id_var The name of the ID variable # @param y_flist A list containing the flist objects returned for each # separate longitudinal submodel # @return A vector of factor levels corresponding to the IDs appearing # in the longitudinal submodels check_id_list <- function(id_var, y_flist) { id_list <- unique(lapply(y_flist, function(x) levels(x[[id_var]]))) if (length(id_list) > 1L) stop2("The subject IDs are not the same in all longitudinal submodels.") unlist(id_list) } # Take the model frame terms object and append with attributes # that provide the predvars for the fixed and random effects # parts, based on the model formula and data # # @param terms The existing model frame terms object # @param formula The formula that was used to build the model frame # (but prior to having called lme4::subbars on it!) # @param data The data frame that was used to build the model frame # @return A terms object with predvars.fixed and predvars.random as # additional attributes append_predvars_attribute <- function(terms, formula, data) { fe_form <- lme4::nobars(formula) re_form <- lme4::subbars(justRE(formula, response = TRUE)) fe_frame <- stats::model.frame(fe_form, data) re_frame <- stats::model.frame(re_form, data) fe_terms <- attr(fe_frame, "terms") re_terms <- attr(re_frame, "terms") fe_predvars <- attr(fe_terms, "predvars") re_predvars <- attr(re_terms, "predvars") attr(terms, "predvars.fixed") <- attr(fe_terms, "predvars") attr(terms, "predvars.random") <- attr(re_terms, "predvars") terms } # Function to substitute variables in the formula of a fitted model # with the corresponding predvars based on the terms object for the model. # (This is useful since lme4::glFormula doesn't allow a terms object to be # passed as the first argument instead of a model formula). # # @param mod A (g)lmer model object from which to extract the formula and terms # @return A reformulated model formula with variables replaced by predvars use_predvars <- function(mod, keep_response = TRUE) { fm <- formula(mod) ff <- lapply(attr(terms(mod, fixed.only = TRUE), "variables"), deparse, 500)[-1] fr <- lapply(attr(terms(mod, random.only = TRUE), "variables"), deparse, 500)[-1] pf <- lapply(attr(terms(mod, fixed.only = TRUE), "predvars"), deparse, 500)[-1] pr <- lapply(attr(terms(mod, random.only = TRUE), "predvars"), deparse, 500)[-1] if (!identical(c(ff, fr), c(pf, pr))) { for (j in 1:length(ff)) fm <- gsub(ff[[j]], pf[[j]], fm, fixed = TRUE) for (j in 1:length(fr)) fm <- gsub(fr[[j]], pr[[j]], fm, fixed = TRUE) } rhs <- fm[[length(fm)]] if (is(rhs, "call")) rhs <- deparse(rhs, 500L) if (keep_response && length(fm) == 3L) { fm <- reformulate(rhs, response = formula(mod)[[2L]]) } else if (keep_response && length(fm) == 2L) { warning("No response variable found, reformulating RHS only.", call. = FALSE) fm <- reformulate(rhs, response = NULL) } else { fm <- reformulate(rhs, response = NULL) } fm } # Check that the observation times for the longitudinal submodel are all # positive and not observed after the individual's event time # # @param data A data frame (data for one longitudinal submodel) # @param eventtimes A named numeric vector with the event time for each # individual. The vector names should be the individual ids. # @param id_var,time_var The ID and time variable in the longitudinal data. # @return Nothing. validate_observation_times <-function(data, eventtimes, id_var, time_var) { if (!time_var %in% colnames(data)) STOP_no_var(time_var) if (!id_var %in% colnames(data)) STOP_no_var(id_var) if (any(data[[time_var]] < 0)) stop2("Values for the time variable (", time_var, ") should not be negative.") mt <- tapply(data[[time_var]], factor(data[[id_var]]), max) # max observation time nms <- names(eventtimes) # patient IDs if (is.null(nms)) stop2("Bug found: cannot find names in the vector of event times.") sel <- which(sapply(nms, FUN = function(i) mt[i] > eventtimes[i])) if (length(sel)) stop2("The following individuals have observation times in the longitudinal data ", "are later than their event time: ", paste(nms[sel], collapse = ", ")) } #--------------- Functions related to event submodel # Construct a list with information on the event submodel # # @param formula The model formula for the event submodel # @param data The data for the event submodel # @param qnodes An integer specifying the number of GK quadrature nodes # @param id_var The name of the ID variable # @param y_id_list A character vector with a unique list of subject IDs # (factor levels) that appeared in the longitudinal submodels # @return A named list with the following elements: # mod: The fitted Cox model. # entrytime: Named vector of numeric entry times. # eventtime: Named vector of numeric event times. # status: Named vector of event/failure indicators. # Npat: Number of individuals. # Nevents: Total number of events/failures. # id_list: A vector of unique subject IDs, as a factor. # qnodes: The number of GK quadrature nodes. # qwts,qpts: Vector of unstandardised quadrature weights and points. # The vector is ordered such that the first Npat items are the # weights/locations of the first quadrature point, then the second # Npat items are the weights/locations for the second quadrature # point, and so on. # qids: The subject IDs corresponding to each element of qwts/qpts. # epts: The event times, but only for individuals who were NOT censored # (i.e. those individual who had an event). # eids: The subject IDs corresponding to each element of epts. # cpts: Combined vector of failure and quadrature times: c(epts, qpts). # cids: Combined vector subject IDs: c(eids, qids). # Xq: The model matrix for the event submodel, centred and no intercept. # Xbar: Vector of column means for the event submodel model matrix. # K: Number of predictors for the event submodel. # norm_const: Scalar, the constant used to shift the event submodel # linear predictor (equal to the log of the mean incidence rate). # model_frame: The model frame for the fitted Cox model, but with the # subject ID variable also included. # tvc: Logical, if TRUE then a counting type Surv() object was used # in the fitted Cox model (ie. time varying covariates). handle_e_mod <- function(formula, data, qnodes, id_var, y_id_list) { if (!requireNamespace("survival")) stop("the 'survival' package must be installed to use this function") if (!requireNamespace("data.table")) stop("the 'data.table' package must be installed to use this function") mod <- survival::coxph(formula, data = data, x = TRUE) RHS_with_id <- paste(deparse(formula[[3L]]), "+", id_var) formula_with_id <- reformulate(RHS_with_id, response = formula[[2L]]) mf1 <- model.frame(formula_with_id, data = data) mf1[[id_var]] <- promote_to_factor(mf1[[id_var]]) # same as lme4 mf2 <- unclass_Surv_column(mf1) if (attr(mod$y, "type") == "counting") { tvc <- TRUE; t0_var <- "start"; t1_var <- "stop" } else if (attr(mod$y, "type") == "right") { tvc <- FALSE; t0_var <- "time"; t1_var <- "time" } else { stop2("Only 'right' or 'counting' type Surv objects are allowed ", "on the LHS of 'formulaEvent'.") } # Split model frame and find event time and status mf_by_id <- split(mf2, mf2[, id_var]) mf_entry <- do.call(rbind, lapply( mf_by_id, FUN = function(x) x[which.min(x[, t0_var]), ])) mf_event <- do.call(rbind, lapply( mf_by_id, FUN = function(x) x[which.max(x[, t1_var]), ])) entrytime <- mf_entry[[t0_var]] if (tvc && (any(entrytime) > 0)) warning("Note that delayed entry is not yet implemented. It will ", "be assumed that all individuals were at risk from time 0.") entrytime <- rep(0, length(entrytime)) # no delayed entry eventtime <- mf_event[[t1_var]] status <- mf_event[["status"]] id_list <- factor(mf_event[[id_var]]) names(entrytime) <- names(eventtime) <- names(status) <- id_list # Mean log incidence rate - used for shifting log baseline hazard norm_const <- log(sum(status) / sum(eventtime)) # Error checks for the ID variable if (!identical(y_id_list, levels(factor(id_list)))) stop2("The patient IDs (levels of the grouping factor) included ", "in the longitudinal and event submodels do not match") if (is.unsorted(factor(id_list))) stop2("'dataEvent' needs to be sorted by the subject ", "ID/grouping variable") if (!identical(length(y_id_list), length(id_list))) stop2("The number of patients differs between the longitudinal and ", "event submodels. Perhaps you intended to use 'start/stop' notation ", "for the Surv() object.") # Quadrature weights/times/ids qq <- get_quadpoints(qnodes) qwts <- uapply(qq$weights, unstandardise_qwts, entrytime, eventtime) qpts <- uapply(qq$points, unstandardise_qpts, entrytime, eventtime) qids <- rep(id_list, qnodes) # Event times/ids (for failures only) epts <- eventtime[status == 1] # event times (for failures only) eids <- id_list[status == 1] # subject ids (for failures only) # Both event times/ids and quadrature times/ids cpts <- c(epts, qpts) cids <- unlist(list(eids, qids)) # NB using c(.) demotes factors to integers # Evaluate design matrix at event and quadrature times if (ncol(mod$x)) { # Convert model frame from Cox model into a data.table dt <- prepare_data_table(mf2, id_var = id_var, time_var = t0_var) # Obtain rows of the model frame that are as close as possible to # the event times (failures only) and quadrature times mf2 <- rolling_merge(dt, ids = cids, times = cpts) # Construct design matrix evaluated at event and quadrature times fm_RHS <- reformulate(attr(terms(mod), "term.labels")) Xq <- model.matrix(fm_RHS, data = mf2) Xq <- Xq[, -1L, drop = FALSE] # drop intercept # Centre the design matrix Xbar <- colMeans(Xq) Xq <- sweep(Xq, 2, Xbar, FUN = "-") sel <- (2 > apply(Xq, 2L, function(x) length(unique(x)))) if (any(sel)) { # drop any column of x with < 2 unique values (empty interaction levels) warning("Dropped empty interaction levels: ", paste(colnames(Xq)[sel], collapse = ", ")) Xq <- Xq[, !sel, drop = FALSE] Xbar <- Xbar[!sel] } } else { Xq <- matrix(0,0L,0L) Xbar <- rep(0,0L) } nlist(mod, entrytime, eventtime, status, Npat = length(eventtime), Nevents = sum(status), id_list, qnodes, qwts, qpts, qids, epts, eids, cpts, cids, Xq, Xbar, K = ncol(Xq), norm_const, model_frame = mf1, tvc) } # Deal with the baseline hazard # # @param basehaz A string specifying the type of baseline hazard # @param basehaz_ops A named list with elements df, knots # @param ok_basehaz A list of admissible baseline hazards # @param eventtime A numeric vector with eventtimes for each individual # @param status A numeric vector with event indicators for each individual # @return A named list with the following elements: # type: integer specifying the type of baseline hazard, 1L = weibull, # 2L = b-splines, 3L = piecewise. # type_name: character string specifying the type of baseline hazard. # user_df: integer specifying the input to the df argument # df: integer specifying the number of parameters to use for the # baseline hazard. # knots: the knot locations for the baseline hazard. # bs_basis: The basis terms for the B-splines. This is passed to Stan # as the "model matrix" for the baseline hazard. It is also used in # post-estimation when evaluating the baseline hazard for posterior # predictions since it contains information about the knot locations # for the baseline hazard (this is implemented via splines::predict.bs). handle_basehaz <- function(basehaz, basehaz_ops, ok_basehaz = nlist("weibull", "bs", "piecewise"), ok_basehaz_ops = nlist("df", "knots"), eventtime, status) { if (!basehaz %in% unlist(ok_basehaz)) stop("The baseline hazard should be one of ", paste(names(ok_basehaz), collapse = ", ")) if (!all(names(basehaz_ops) %in% unlist(ok_basehaz_ops))) stop("The baseline hazard options list can only include ", paste(names(ok_basehaz_ops), collapse = ", ")) type <- switch(basehaz, weibull = 1L, bs = 2L, piecewise = 3L) type_name <- basehaz user_df <- basehaz_ops$df df <- basehaz_ops$df knots <- basehaz_ops$knots bs_basis <- NULL if (type_name == "weibull") { # handle df and knots if (!is.null(df)) warning("'df' will be ignored since baseline hazard was set to weibull.", immediate. = TRUE, call. = FALSE) if (!is.null(knots)) warning("'knots' will be ignored since baseline hazard was set to weibull.", immediate. = TRUE, call. = FALSE) user_df <- NULL df <- 1L knots <- NULL } else if (type_name %in% c("bs", "piecewise")) { # handle df and knots if (!any(is.null(df), is.null(knots))) { # both specified stop("Cannot specify both 'df' and 'knots' for the baseline hazard.", call. = FALSE) } else if (all(is.null(df), is.null(knots))) { # both null -- use default df user_df <- df <- 6L knots <- NULL } else if (!is.null(df)) { # only df specified if (type == 2L) { if (df < 3) stop("'df' must be at least 3 for B-splines baseline hazard.") user_df <- df <- df + 1 } } else if (!is.null(knots)) { # only knots specified if (!is.numeric(knots)) stop("'knots' vector must be numeric", call. = FALSE) if (any(knots < 0)) stop("'knots' must be non-negative", call. = FALSE) if (type == 2L) df <- length(knots) + 4 else if (type == 3L) df <- length(knots) + 1 } else { stop("Bug found: unable to reconcile 'df' and 'knots' arguments.", call. = FALSE) } } # Evaluate spline basis (knots, df, etc) based on distribution of observed event times # or evaluate cut points for piecewise constant baseline hazard if (type == 2L) { bs_basis <- splines::bs(eventtime[(status > 0)], df = user_df, knots = knots, Boundary.knots = c(0, max(eventtime)), intercept = TRUE) } else if (type == 3L) { if (is.null(knots)) { knots <- quantile(eventtime[(status > 0)], probs = seq(0, 1, 1 / df)) knots[[1]] <- 0 knots[[length(knots)]] <- max(eventtime) } else { if (any(knots > max(eventtime))) stop("'knots' for the baseline hazard cannot be greater than the ", "largest event time.", call. = FALSE) knots <- c(0, knots, max(eventtime)) } } nlist(type, type_name, user_df, df, knots, bs_basis) } # Return the design matrix for the baseline hazard # # @param times A vector of times at which to evaluate the baseline hazard # @param basehaz A named list with info about the baseline hazard, # returned by a call to handle_basehaz # @return A matrix make_basehaz_X <- function(times, basehaz) { if (basehaz$type_name == "weibull") { X <- matrix(log(times), nrow = length(times), ncol = 1) } else if (basehaz$type_name == "bs") { basis <- basehaz$bs_basis if (is.null(basis)) stop2("Bug found: could not find info on B-splines basis terms.") X <- as.array(predict(basis, times)) } else if (basehaz$type_name == "piecewise") { knots <- basehaz$knots df <- basehaz$df if (is.null(knots) || is.null(df)) stop2("Bug found: could not find info on basehaz df and knot locations.") times_quantiles <- cut(times, knots, include.lowest = TRUE, labels = FALSE) X <- matrix(NA, length(times_quantiles), df) for (i in 1:df) X[, i] <- ifelse(times_quantiles == i, 1, 0) X <- as.array(X) } else { stop2("Bug found: type of baseline hazard unknown.") } X } # Function to return standardised GK quadrature points and weights # # @param nodes The required number of quadrature nodes # @return A list with two named elements (points and weights) each # of which is a numeric vector with length equal to the number of # quadrature nodes get_quadpoints <- function(nodes = 15) { if (!is.numeric(nodes) || (length(nodes) > 1L)) { stop("'qnodes' should be a numeric vector of length 1.") } else if (nodes == 15) { list( points = c( -0.991455371120812639207, -0.949107912342758524526, -0.86486442335976907279, -0.7415311855993944398639, -0.5860872354676911302941, -0.4058451513773971669066, -0.2077849550078984676007, 0, 0.2077849550078984676007, 0.405845151377397166907, 0.5860872354676911302941, 0.741531185599394439864, 0.86486442335976907279, 0.9491079123427585245262, 0.991455371120812639207), weights = c( 0.0229353220105292249637, 0.063092092629978553291, 0.10479001032225018384, 0.140653259715525918745, 0.1690047266392679028266, 0.1903505780647854099133, 0.204432940075298892414, 0.209482141084727828013, 0.204432940075298892414, 0.1903505780647854099133, 0.169004726639267902827, 0.140653259715525918745, 0.1047900103222501838399, 0.063092092629978553291, 0.0229353220105292249637)) } else if (nodes == 11) { list( points = c( -0.984085360094842464496, -0.906179845938663992798, -0.754166726570849220441, -0.5384693101056830910363, -0.2796304131617831934135, 0, 0.2796304131617831934135, 0.5384693101056830910363, 0.754166726570849220441, 0.906179845938663992798, 0.984085360094842464496), weights = c( 0.042582036751081832865, 0.1152333166224733940246, 0.186800796556492657468, 0.2410403392286475866999, 0.272849801912558922341, 0.2829874178574912132043, 0.272849801912558922341, 0.241040339228647586701, 0.186800796556492657467, 0.115233316622473394025, 0.042582036751081832865)) } else if (nodes == 7) { list( points = c( -0.9604912687080202834235, -0.7745966692414833770359, -0.4342437493468025580021, 0, 0.4342437493468025580021, 0.7745966692414833770359, 0.9604912687080202834235), weights = c( 0.1046562260264672651938, 0.268488089868333440729, 0.401397414775962222905, 0.450916538658474142345, 0.401397414775962222905, 0.268488089868333440729, 0.104656226026467265194)) } else stop("'qnodes' must be either 7, 11 or 15.") } # Remove the "Surv" class attribute from the first column # of the model frame after a survival::coxph call # # @param data A model frame with the first column being the Surv() response unclass_Surv_column <- function(data) { cbind(unclass(data[,1]), data[, -1, drop = FALSE], stringsAsFactors = FALSE) } #--------------- Functions related to association structure # Return a named list with information about the specified association structure # # @param user_x A character vector or NULL, being the user input to the # assoc argument (for one submodel) in the stan_jm call # @param y_mod_stuff A list returned by a call to handle_glmod # @param id_var The name of the ID variable # @param M Integer specifying the total number of longitudinal submodels # @return A list with information about the desired association structure validate_assoc <- function(user_x, y_mod_stuff, ok_assoc, ok_assoc_data, ok_assoc_interactions, lag, id_var, M) { ok_inputs <- c(ok_assoc, paste0(ok_assoc_data, "_data"), unlist(lapply(ok_assoc_interactions, paste0, "_", ok_assoc_interactions))) # Check user input to assoc argument trimmed_x <- trim_assoc(user_x, ok_assoc_data, ok_assoc_interactions) if (is.null(user_x) || all(trimmed_x %in% ok_inputs)) { temporarily_disallowed <- c("muslope", "shared_b", "shared_coef") if (any(trimmed_x %in% temporarily_disallowed)) stop2("The following association structures have been temporarily disallowed ", "and will be reinstated in a future release: ", paste(temporarily_disallowed, collapse = ", ")) assoc <- sapply(ok_inputs, `%in%`, trimmed_x, simplify = FALSE) if (is.null(user_x)) { assoc$null <- TRUE } else if (is.vector(user_x) && is.character(user_x)) { if ((assoc$null) && (length(user_x) > 1L)) stop("In assoc, 'null' cannot be specified in conjuction ", "with another association type", call. = FALSE) STOP_combination_not_allowed(assoc, "etavalue", "muvalue") STOP_combination_not_allowed(assoc, "etaslope", "muslope") STOP_combination_not_allowed(assoc, "etaauc", "muauc") } else { stop("'assoc' argument should be a character vector or, for a multivariate ", "joint model, possibly a list of character vectors.", call. = FALSE) } } else { stop("An unsupported association type has been specified. The ", "'assoc' argument can only include the following association ", "types: ", paste(ok_assoc, collapse = ", "), ", as well as ", "possible interactions either between association terms or ", "with observed data.", call. = FALSE) } # Parse suffix specifying indices for shared random effects cnms <- y_mod_stuff$z$group_cnms cnms_id <- cnms[[id_var]] # names of random effect terms assoc$which_b_zindex <- parse_assoc_sharedRE("shared_b", user_x, max_index = length(cnms_id), cnms_id) assoc$which_coef_zindex <- parse_assoc_sharedRE("shared_coef", user_x, max_index = length(cnms_id), cnms_id) if (length(intersect(assoc$which_b_zindex, assoc$which_coef_zindex))) stop("The same random effects indices should not be specified in both ", "'shared_b' and 'shared_coef'. Specifying indices in 'shared_coef' ", "will include both the fixed and random components.", call. = FALSE) if (length(assoc$which_coef_zindex)) { if (length(cnms) > 1L) stop("'shared_coef' association structure cannot be used when there is ", "clustering at levels other than the individual-level.", call. = FALSE) b_nms <- names(assoc$which_coef_zindex) assoc$which_coef_xindex <- sapply(b_nms, function(y, beta_nms) { beta_match <- grep(y, beta_nms, fixed = TRUE) if (!length(beta_match)) { stop("In association structure 'shared_coef', no matching fixed effect ", "component could be found for the following random effect: ", y, ". Perhaps consider using 'shared_b' association structure instead.") } else if (length(beta_match) > 1L) { stop("Bug found: In association structure 'shared_coef', multiple ", "fixed effect components have been found to match the following ", "random effect: ", y) } beta_match }, beta_nms = colnames(y_mod_stuff$X$X)) } else assoc$which_coef_xindex <- numeric(0) if (!identical(length(assoc$which_coef_zindex), length(assoc$which_coef_xindex))) stop("Bug found: the lengths of the fixed and random components of the ", "'shared_coef' association structure are not the same.") # Parse suffix specifying formula for interactions with data ok_inputs_data <- paste0(ok_assoc_data, "_data") assoc$which_formulas <- sapply(ok_inputs_data, parse_assoc_data, user_x, simplify = FALSE) # Parse suffix specifying indices for interactions between association terms ok_inputs_interactions <- unlist(lapply(ok_assoc_interactions, paste0, "_", ok_assoc_interactions)) assoc$which_interactions <- sapply(ok_inputs_interactions, parse_assoc_interactions, user_x, max_index = M, simplify = FALSE) # Lag for association structure assoc$which_lag <- lag assoc } # Check whether an association structure was specified that is not allowed # when there is an additional grouping factor clustered within patients # # @param has_grp Logical vector specifying where each of the 1:M submodels # has a grp factor clustered within patients or not. # @param assoc A two dimensional array with information about desired association # structure for the joint model (returned by a call to validate_assoc). # @param ok_assocs_with_grp A character vector with the rownames in assoc # that are allowed association structures when there is a grp factor # clustered within patients. validate_assoc_with_grp <- function(has_grp, assoc, ok_assocs_with_grp) { all_rownames <- grep("which|null", rownames(assoc), invert = TRUE, value = TRUE) disallowed_rows <- setdiff(all_rownames, ok_assocs_with_grp) sel <- which(has_grp) check <- unlist(assoc[disallowed_rows, sel]) if (any(check)) stop2("Only the following association structures are allowed when ", "there is a grouping factor clustered within individuals: ", paste(ok_assocs_with_grp, collapse = ", ")) } # Validate the user input to the lag_assoc argument of stan_jm # # @param lag_assoc The user input to the lag_assoc argument # @param M Integer specifying the number of longitudinal submodels validate_lag_assoc <- function(lag_assoc, M) { if (length(lag_assoc) == 1L) lag_assoc <- rep(lag_assoc, M) if (!length(lag_assoc) == M) stop2("'lag_assoc' should length 1 or length equal to the ", "number of markers (", M, ").") if (!is.numeric(lag_assoc)) stop2("'lag_assoc' must be numeric.") if (any(lag_assoc < 0)) stop2("'lag_assoc' must be non-negative.") lag_assoc } # Validate the user input to the scale_assoc argument of stan_jm # # @param scale_assoc The user input to the scale_assoc argument # @param assoc_as_list A list with information about the association structure for # the longitudinal submodels # @return A numeric vector of scaling parameters for all assoc terms validate_scale_assoc <- function(scale_assoc, assoc_as_list) { M <- length(assoc_as_list) if (is.null(scale_assoc)) scale_assoc <- rep(1,M) if (length(scale_assoc) < M) stop2("'scale_assoc' must be specified for each longitudinal submodel.") if (length(scale_assoc) > M) stop2("'scale_assoc' can only be specified once for each longitudinal submodel.") if (!is.numeric(scale_assoc)) stop2("'scale_assoc' must be numeric.") sel_shared <- c("shared_b", "shared_coef") sel_terms <- c("etavalue", "etaslope", "etaauc", "muvalue", "muslope", "muauc") sel_data <- c("which_formulas") sel_itx <- c("which_interactions") scale_list <- list() for (m in 1:M) { a = assoc_as_list[[m]] if (a[["null"]]) { scale_list[[m]] = as.array(integer(0)) } else { if (scale_assoc[m] == 0) stop2("'scale_assoc' must be non-zero.") if (any(unlist(a[sel_shared]))) stop2("'scale_assoc' is not yet implemented for the following association structures: ", paste(sel_shared, collapse = ", ")) # calculate scale for each assoc term scale_terms <- rep(scale_assoc[m], length(which(unlist(a[sel_terms])))) scale_data <- rep(scale_assoc[m], length(unlist(a[[sel_data]]))) scale_itx <- scale_assoc[m] * scale_assoc[unlist(a[[sel_itx]])] scale_list[[m]] <- c(scale_terms, scale_data, scale_itx) } } # return vector of scaling parameters return(unlist(scale_list)) } # Remove suffixes from the user inputted assoc argument # # @param x A character vector, being the user input to the # assoc argument in the stan_jm call # @param ok_assoc_data A character vector specifying which types # of association terms are allowed to be interacted with data # @param ok_assoc_interactions A character vector specifying which types # of association terms are allowed to be interacted with other # association terms trim_assoc <- function(x, ok_assoc_data, ok_assoc_interactions) { x <- gsub("^shared_b\\(.*", "shared_b", x) x <- gsub("^shared_coef\\(.*", "shared_coef", x) for (i in ok_assoc_data) x <- gsub(paste0("^", i, "_data\\(.*"), paste0(i, "_data"), x) for (i in ok_assoc_interactions) for (j in ok_assoc_interactions) x <- gsub(paste0("^", i, "_", j, "\\(.*"), paste0(i, "_", j), x) x } # Parse the formula for specifying a data interaction with an association term # # @param x A character string corresponding to one of the allowed # association structures for interactions with data, for example, # "etavalue_data" or "etaslope_data" # @param user_x A character vector, being the user input to the assoc # argument in the stan_jm call # @return The parsed formula (which can be used for constructing a # design matrix for interacting data with association type x) or NULL parse_assoc_data <- function(x, user_x) { val <- grep(paste0("^", x, ".*"), user_x, value = TRUE) if (length(val)) { val2 <- unlist(strsplit(val, x))[-1] fm <- tryCatch(eval(parse(text = val2)), error = function(e) stop(paste0("Incorrect specification of the formula in the '", x, "' association structure. See Examples in the help file."), call. = FALSE)) if (!is(fm, "formula")) stop(paste0("Suffix to '", x, "' association structure should include ", "a formula within parentheses."), call. = FALSE) if (identical(length(fm), 3L)) stop(paste0("Formula specified for '", x, "' association structure should not ", "include a response."), call. = FALSE) if (length(lme4::findbars(fm))) stop(paste0("Formula specified for '", x, "' association structure should only ", "include fixed effects."), call. = FALSE) if (fm[[2L]] == 1) stop(paste0("Formula specified for '", x, "' association structure cannot ", "be an intercept only."), call. = FALSE) return(fm) } else numeric(0) } # Parse the indices specified for shared random effects # # @param x A character string corresponding to one of the allowed # association structures for shared random effects # @param user_x A character vector, being the user input to the assoc # argument in the stan_jm call # @param max_index An integer specifying the total number of random effects # in the longitudinal submodel, and therefore the maximum allowed index for # the shared random effects # @param cnms The names of the random effects corresponding to the # individual-level (id_var) of clustering # @return A numeric vector specifying indices for the shared random effects parse_assoc_sharedRE <- function(x, user_x, max_index, cnms) { val <- grep(paste0("^", x, ".*"), user_x, value = TRUE) if (length(val)) { val2 <- unlist(strsplit(val, x))[-1] if (length(val2)) { index <- tryCatch(eval(parse(text = paste0("c", val2))), error = function(e) stop("Incorrect specification of the '", x, "' association structure. ", "See Examples in help file.", call. = FALSE)) if (any(index > max_index)) stop(paste0("The indices specified for the '", x, "' association structure are ", "greater than the number of subject-specific random effects."), call. = FALSE) } else index <- seq_len(max_index) names(index) <- cnms[index] return(index) } else numeric(0) } # Parse the indices specified for interactions between association terms # # @param x A character string corresponding to one of the allowed # association structures # @param user_x A character vector, being the user input to the assoc # argument in the stan_jm call # @param max_index An integer specifying the maximum allowed index # @return A numeric vector specifying indices parse_assoc_interactions <- function(x, user_x, max_index) { val <- grep(paste0("^", x, ".*"), user_x, value = TRUE) if (length(val)) { val2 <- unlist(strsplit(val, x))[-1] if (length(val2)) { index <- tryCatch(eval(parse(text = paste0("c", val2))), error = function(e) stop("Incorrect specification of the '", x, "' association structure. It should ", "include a suffix with parentheses specifying the indices of the association ", "terms you want to include in the interaction. See Examples in the help file.", call. = FALSE)) if (any(index > max_index)) stop("The indices specified for the '", x, "' association structure ", "cannot be greater than the number of longitudinal submodels.", call. = FALSE) return(index) } else stop("Incorrect specification of the '", x, "' association structure. It should ", "include a suffix with parentheses specifying the indices of the association ", "terms you want to include in the interaction. See Examples in the help file.", call. = FALSE) } else numeric(0) } # Make sure that interactions between association terms (for example # etavalue_etaslope or mu_value_muvalue etc) are always ordered so that # the first listed association term is for the submodel with the smallest # index. For example, etavalue1_etavalue2 NOT etavalue2_etavalue1. This # is to ensure there is no replication such as including both # etavalue1_etavalue2 AND etavalue2_etavalue1 when passing to Stan. # # @param assoc A two dimensional array with information about desired association # structure for the joint model (returned by a call to validate_assoc). # @param ok_assoc_interactions A character vector, specifying which association # structures are allowed to be used in interactions check_order_of_assoc_interactions <- function(assoc, ok_assoc_interactions) { M <- ncol(assoc) for (i in ok_assoc_interactions) { for (j in ok_assoc_interactions) { header <- paste0(i, "_", j) header_reversed <- paste0(j, "_", i) for (m in 1:M) { if (assoc[header,][[m]]) { indices <- assoc["which_interactions",][[m]][[header]] sel <- which(indices < m) if (length(sel)) { # Remove indices for submodels before the current submodel m new_indices <- indices[-sel] assoc["which_interactions", ][[m]][[header]] <- new_indices assoc[header,][[m]] <- (length(new_indices) > 0L) # Replace those indices by reversing the order of association terms for (k in indices[sel]) { assoc["which_interactions",][[k]][[header_reversed]] <- unique(c(assoc["which_interactions",][[k]][[header_reversed]], m)) assoc[header_reversed,][[k]] <- (length(assoc["which_interactions",][[k]][[header_reversed]]) > 0L) } } } } } } assoc } # Return design matrices for evaluating longitudinal submodel quantities # at specified quadrature points/times # # @param data A data frame, the data for the longitudinal submodel. # @param assoc A list with information about the association structure for # the one longitudinal submodel. # @param y_mod A named list returned by a call to handle_y_mod (the # fit for a single longitudinal submodel) # @param grp_stuff A list with information about any lower level grouping # factors that are clustered within patients and how to handle them in # the association structure. # @param ids,times The subject IDs and times vectors that correspond to the # event and quadrature times at which the design matrices will # need to be evaluated for the association structure. # @param id_var The name on the ID variable. # @param time_var The name of the time variable. # @param epsilon The half-width of the central difference used for # numerically calculating the derivative of the design matrix for slope # based association structures. # @param auc_qnodes Integer specifying the number of GK quadrature nodes to # use in the integral/AUC based association structures. # @return The list returned by make_assoc_parts. handle_assocmod <- function(data, assoc, y_mod, grp_stuff, ids, times, id_var, time_var, epsilon, auc_qnodes) { if (!requireNamespace("data.table")) stop2("the 'data.table' package must be installed to use this function.") # Before turning data into a data.table (for a rolling merge # against the quadrature points) we want to make sure that the # data does not include any NAs for the predictors or assoc formula variables tt <- y_mod$terms assoc_interaction_forms <- assoc[["which_formulas"]] extra_vars <- uapply(assoc_interaction_forms, function(i) { # loop over the four possible assoc interaction formulas and # collect any variables used if (length(i)) { rownames(attr(terms.formula(i), "factors")) } else NULL }) rhs <- deparse(tt[[3L]], 500L) if (!is.null(extra_vars)) rhs <- c(rhs, extra_vars) form_new <- reformulate(rhs, response = NULL) df <- get_all_vars(form_new, data) df <- df[complete.cases(df), , drop = FALSE] df$offset <- 0 # force offset to zero for assoc term # Declare df as a data.table for merging with quadrature points dt <- prepare_data_table(df, id_var = id_var, time_var = time_var, grp_var = grp_stuff$grp_var) # NB grp_var may be NULL # Design matrices for calculating association structure based on # (possibly lagged) eta, slope, auc and any interactions with data parts <- make_assoc_parts(use_function = make_assoc_parts_for_stan, newdata = dt, assoc = assoc, id_var = id_var, time_var = time_var, grp_stuff = grp_stuff, ids = ids, times = times, epsilon = epsilon, auc_qnodes = auc_qnodes, y_mod = y_mod) # If association structure is based on shared random effects or shared # coefficients then construct a matrix with the estimated b parameters # from the separate glmod (for the id_var grouping factor only). Note this # matrix is not passed to standata, but just used for autoscaling the # priors for association parameters. sel_shared <- grep("^shared", rownames(assoc)) if (any(unlist(assoc[sel_shared]))) { # flist for long submodel flist_tmp <- lme4::getME(y_mod$mod, "flist") # which grouping factor is id_var Gp_sel <- which(names(flist_tmp) == id_var) # grouping factor indices Gp <- lme4::getME(y_mod$mod, "Gp") b_beg <- Gp[[Gp_sel]] + 1 b_end <- Gp[[Gp_sel + 1]] # b vector for grouping factor = id_var b_vec <- lme4::getME(y_mod$mod, "b")[b_beg:b_end] # convert to Npat * n_re matrix b_mat <- matrix(b_vec, nrow = length(levels(flist_tmp[[Gp_sel]])), byrow = TRUE) } else b_mat <- NULL parts$b_mat <- b_mat return(parts) } # Get the information need for combining the information in lower-level units # clustered within an individual, when the patient-level is not the only # clustering level in the longitudinal submodel # # @param cnms The component names for a single longitudinal submodel # @param flist The flist for a single longitudinal submodel # @param id_var The name of the ID variable # @param qnodes Integer specifying the number of qnodes being used for # the GK quadrature in the stan_jm call # @param grp_assoc Character string specifying the association structure used # for combining information in the lower level units clustered within an # individual # @return A named list with the following elements: # has_grp: logical specifying whether the submodel has a grouping factor # that is clustered with patients. # grp_var: the name of any grouping factor that is clustered with patients. # grp_assoc: the user input to the grp_assoc argument in the stan_jm call. # grp_freq: a named vector with the number of lower level units clustered # within each individual. # grp_list: a named list containing the unique names for the lower level # units clustered within each individual. get_basic_grp_info <- function(cnms, flist, id_var) { cnms_nms <- names(cnms) tally <- xapply(cnms_nms, FUN = function(x) # within each ID, count the number of levels for the grouping factor x tapply(flist[[x]], flist[[id_var]], FUN = n_distinct)) sel <- which(sapply(tally, function(x) !all(x == 1L)) == TRUE) has_grp <- as.logical(length(sel)) if (!has_grp) { return(nlist(has_grp)) } else { if (length(sel) > 1L) stop("There can only be one grouping factor clustered within 'id_var'.") grp_var <- cnms_nms[sel] return(nlist(has_grp, grp_var)) } } get_extra_grp_info <- function(basic_info, flist, id_var, grp_assoc, ok_grp_assocs = c("sum", "mean", "min", "max")) { has_grp <- basic_info$has_grp grp_var <- basic_info$grp_var if (!has_grp) { # no grouping factor clustered within patients return(basic_info) } else { # submodel has a grouping factor clustered within patients if (is.null(grp_var)) stop2("Bug found: could not find 'grp_var' in basic_info.") if (is.null(grp_assoc)) stop2("'grp_assoc' cannot be NULL when there is a grouping factor ", "clustered within patients.") if (!grp_assoc %in% ok_grp_assocs) stop2("'grp_assoc' must be one of: ", paste(ok_grp_assocs, collapse = ", ")) # cluster and patient ids for each row of the z matrix factor_grp <- factor(flist[[grp_var]]) factor_ids <- factor(flist[[id_var]]) # num clusters within each patient grp_freq <- tapply(factor_grp, factor_ids, FUN = n_distinct, simplify = FALSE) grp_freq <- unlist(grp_freq) # unique cluster ids for each patient id grp_list <- tapply(factor_grp, factor_ids, FUN = unique, simplify = FALSE) basic_info <- nlist(has_grp, grp_var) extra_info <- nlist(grp_assoc, grp_freq, grp_list) return(c(basic_info, extra_info)) } } # Function to calculate the number of association parameters in the model # # @param assoc A list of length M with information about the association structure # type for each submodel, returned by an mapply call to validate_assoc # @param a_mod_stuff A list of length M with the design matrices related to # the longitudinal submodels in the GK quadrature, returned by an mapply # call to handle_assocmod # @return Integer indicating the number of association parameters in the model get_num_assoc_pars <- function(assoc, a_mod_stuff) { sel1 <- c("etavalue", "etaslope", "etaauc", "muvalue", "muslope", "muauc") sel2 <- c("which_b_zindex", "which_coef_zindex") sel3 <- c("which_interactions") K1 <- sum(as.integer(assoc[sel1,])) K2 <- length(unlist(assoc[sel2,])) K3 <- length(unlist(assoc[sel3,])) K4 <- sum(fetch_(a_mod_stuff, "K_data")) K1 + K2 + K3 + K4 } #--------------- Functions related to generating initial values # Create a function that can be used to generate the model-based initial values for Stan # # @param e_mod_stuff A list object returned by a call to the handle_coxmod function # @param standata The data list that will be passed to Stan generate_init_function <- function(e_mod_stuff, standata) { # Initial values for intercepts, coefficients and aux parameters e_beta <- e_mod_stuff$mod$coef e_aux <- if (standata$basehaz_type == 1L) runif(1, 0.5, 3) else rep(0, standata$basehaz_df) e_z_beta <- standardise_coef(e_beta, standata$e_prior_mean, standata$e_prior_scale) e_aux_unscaled<- standardise_coef(e_aux, standata$e_prior_mean_for_aux, standata$e_prior_scale_for_aux) # Function to generate model based initial values model_based_inits <- rm_null(list( e_z_beta = array_else_double(e_z_beta), e_aux_unscaled = array_else_double(e_aux_unscaled), e_gamma = array_else_double(rep(0, standata$e_has_intercept)))) return(function() model_based_inits) } #--------------- Functions related to standata and sampling # Set arguments for sampling for stan_jm # # Prepare a list of arguments to use with \code{rstan::sampling} via # \code{do.call}. # # *Note that this differs from the set_sampling_args function in that # it uses a different default adapt_delta and max_treedepth. Using a # shorter treedepth seems to stop the sampler trailing off during early # iterations and can drastically reduce the model estimation time, and # in most examples using a shorter treedepth hasn't compromised the sampler # at later interations (ie, at later iterations the sampler doesn't # hit the maximum treedepth). The default adapt_delta depends on the # largest number of group-specific parameters for any single grouping # factor in the model. # # @param object The stanfit object to use for sampling. # @param cnms The component names for the group level parameters combined # across all glmer submodels. This is used to determine the maximum number # of parameters for any one grouping factor in the model, which in turn is # used to determine the default adapt_delta. # @param user_dots The contents of \code{...} from the user's call to # the \code{stan_jm} modeling function. # @param user_adapt_delta The value for \code{adapt_delta} specified by the # user. # @param user_max_treedepth The value for \code{max_treedepth} specified by the # user. # @param ... Other arguments to \code{\link[rstan]{sampling}} not coming from # \code{user_dots} (e.g. \code{pars}, \code{init}, etc.) # @return A list of arguments to use for the \code{args} argument for # \code{do.call(sampling, args)}. set_jm_sampling_args <- function(object, cnms, user_dots = list(), user_adapt_delta = NULL, user_max_treedepth = NULL, ...) { args <- list(object = object, ...) unms <- names(user_dots) for (j in seq_along(user_dots)) { args[[unms[j]]] <- user_dots[[j]] } max_p <- max(sapply(cnms, length)) default_adapt_delta <- if (max_p > 2) 0.85 else 0.80 default_max_treedepth <- 10L if (!is.null(user_adapt_delta)) args$control$adapt_delta <- user_adapt_delta else if (is.null(args$control$adapt_delta)) args$control$adapt_delta <- default_adapt_delta if (!is.null(user_max_treedepth)) args$control$max_treedepth <- user_max_treedepth else if (is.null(args$control$max_treedepth)) args$control$max_treedepth <- default_max_treedepth if (!"save_warmup" %in% unms) args$save_warmup <- FALSE return(args) } # Return the list of pars for Stan to monitor # # @param standata The list of data to pass to Stan # @param is_jm A logical # @return A character vector pars_to_monitor <- function(standata, is_jm = FALSE) { c(if (standata$M > 0 && standata$intercept_type[1]) "yAlpha1", if (standata$M > 1 && standata$intercept_type[2]) "yAlpha2", if (standata$M > 2 && standata$intercept_type[3]) "yAlpha3", if (standata$M > 0 && standata$yK[1]) "yBeta1", if (standata$M > 1 && standata$yK[2]) "yBeta2", if (standata$M > 2 && standata$yK[3]) "yBeta3", if (is_jm) "e_alpha", if (is_jm && standata$e_K) "e_beta", if (is_jm && standata$a_K) "a_beta", if (standata$bK1 > 0) "b1", if (standata$bK2 > 0) "b2", if (standata$M > 0 && standata$has_aux[1]) "yAux1", if (standata$M > 1 && standata$has_aux[2]) "yAux2", if (standata$M > 2 && standata$has_aux[3]) "yAux3", if (is_jm && length(standata$basehaz_X)) "e_aux", if (standata$prior_dist_for_cov == 2 && standata$bK1 > 0) "bCov1", if (standata$prior_dist_for_cov == 2 && standata$bK2 > 0) "bCov2", if (standata$prior_dist_for_cov == 1 && standata$len_theta_L) "theta_L", "mean_PPD") } # Change the MCMC samples for theta_L to Sigma # # @param stanfit The stanfit object from the fitted model # @param cnms The component names for the group level terms, combined # across all glmer submodels # @return A stanfit object evaluate_Sigma <- function(stanfit, cnms) { nc <- sapply(cnms, FUN = length) nms <- names(cnms) thetas <- extract(stanfit, pars = "theta_L", inc_warmup = TRUE, permuted = FALSE) Sigma <- apply(thetas, 1:2, FUN = function(theta) { Sigma <- mkVarCorr(sc = 1, cnms, nc, theta, nms) unlist(sapply(Sigma, simplify = FALSE, FUN = function(x) x[lower.tri(x, TRUE)])) }) l <- length(dim(Sigma)) end <- tail(dim(Sigma), 1L) shift <- grep("^theta_L", names(stanfit@sim$samples[[1]]))[1] - 1L if (l == 3) for (chain in 1:end) for (param in 1:nrow(Sigma)) { stanfit@sim$samples[[chain]][[shift + param]] <- Sigma[param, , chain] } else for (chain in 1:end) { stanfit@sim$samples[[chain]][[shift + 1]] <- Sigma[, chain] } stanfit } # Get the names for the Sigma var-cov matrix # # @param cnms The component names for the group level terms, combined # across all glmer submodels # @return A character vector get_Sigma_nms <- function(cnms) { nms <- names(cnms) Sigma_nms <- lapply(cnms, FUN = function(grp) { nm <- outer(grp, grp, FUN = paste, sep = ",") nm[lower.tri(nm, diag = TRUE)] }) for (j in seq_along(Sigma_nms)) { Sigma_nms[[j]] <- paste0(nms[j], ":", Sigma_nms[[j]]) } unlist(Sigma_nms) } #--------------- Functions related to observation weights # Check the weights argument for stan_jm # # @param weights The data frame passed via the weights argument # @param id_var The name of the ID variable check_weights <- function(weights, id_var) { # Check weights are an appropriate data frame if ((!is.data.frame(weights)) || (!ncol(weights) == 2)) stop("'weights' argument should be a data frame with two columns: the first ", "containing patient IDs, the second containing their corresponding ", "weights.", call. = FALSE) if (!id_var %in% colnames(weights)) stop("The data frame supplied in the 'weights' argument should have a ", "column named ", id_var, call. = FALSE) weight_var <- setdiff(colnames(weights), id_var) # Check weights are positive and numeric wts <- weights[[weight_var]] if (!is.numeric(wts)) stop("The weights supplied must be numeric.", call. = FALSE) if (any(wts < 0)) stop("Negative weights are not allowed.", call. = FALSE) # Check only one weight per ID n_weights_per_id <- tapply(weights[[weight_var]], weights[[id_var]], length) if (!all(n_weights_per_id == 1L)) stop("The data frame supplied in the 'weights' argument should only have ", "one row (ie, one weight) per patient ID.", call. = FALSE) } # Return the vector of prior weights for one of the submodels # # @param mod_stuff A named list with elements: y, flist, ord # @param weights The data frame passed via the weights argument # @param id_var The name of the ID variable handle_weights <- function(mod_stuff, weights, id_var) { is_glmod <- (is.null(mod_stuff$eventtime)) # No weights provided by user if (is.null(weights)) { len <- if (is_glmod) length(mod_stuff$Y$Y) else length(mod_stuff$eventtime) return(rep(0.0, len)) } # Check for IDs with no weight supplied weights[[id_var]] <- factor(weights[[id_var]]) ids <- if (is_glmod) mod_stuff$Z$group_list[[id_var]] else factor(mod_stuff$id_list) sel <- which(!ids %in% weights[[id_var]]) if (length(sel)) { if (length(sel) > 30L) sel <- sel[1:30] stop(paste0("The following patient IDs are used in fitting the model, but ", "do not have weights supplied via the 'weights' argument: ", paste(ids[sel], collapse = ", ")), call. = FALSE) } # Obtain length and ordering of weights vector using flist wts_df <- merge(data.frame(id = ids), weights, by.x = "id", by.y = id_var, sort = FALSE) wts_var <- setdiff(colnames(weights), id_var) wts <- wts_df[[wts_var]] wts } rstanarm/R/neg_binomial_2.R0000644000176200001440000000520713722762571015311 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #' Family function for negative binomial GLMs #' #' Specifies the information required to fit a Negative Binomial GLM in a #' similar way to \code{\link[MASS]{negative.binomial}}. However, here the #' overdispersion parameter \code{theta} is not specified by the user and always #' estimated (really the \emph{reciprocal} of the dispersion parameter is #' estimated). A call to this function can be passed to the \code{family} #' argument of \code{\link{stan_glm}} or \code{\link{stan_glmer}} to estimate a #' Negative Binomial model. Alternatively, the \code{\link{stan_glm.nb}} and #' \code{\link{stan_glmer.nb}} wrapper functions may be used, which call #' \code{neg_binomial_2} internally. #' #' @export #' @param link The same as for \code{\link[stats:family]{poisson}}, typically a character #' vector of length one among \code{"log"}, \code{"identity"}, and #' \code{"sqrt"}. #' @return An object of class \code{\link[stats]{family}} very similar to #' that of \code{\link[stats:family]{poisson}} but with a different family name. #' @examples #' if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") #' stan_glm(Days ~ Sex/(Age + Eth*Lrn), data = MASS::quine, seed = 123, #' family = neg_binomial_2, QR = TRUE, algorithm = "optimizing") #' #' # or, equivalently, call stan_glm.nb() without specifying the family #' neg_binomial_2 <- function(link = "log") { out <- poisson(link) out$family <- "neg_binomial_2" out$variance <- function(mu, theta = Inf) mu + mu^2 / theta out$dev.resids <- function(y, mu, wt) { stop("'dev.resids' function should not be called") } out$aic <- function(y, n, mu, wt, dev) { stop("'aic' function should not have been called") } out$simulate <- function(object, nsim) stop("'simulate' function should not have been called") return(out) } rstanarm/R/plots.R0000644000176200001440000004113214406606742013600 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # #' Plot method for stanreg objects #' #' The \code{plot} method for \link{stanreg-objects} provides a convenient #' interface to the \link[bayesplot:MCMC-overview]{MCMC} module in the #' \pkg{\link{bayesplot}} package for plotting MCMC draws and diagnostics. It is also #' straightforward to use the functions from the \pkg{bayesplot} package directly rather than #' via the \code{plot} method. Examples of both methods of plotting are given #' below. #' #' @method plot stanreg #' @export #' @templateVar stanregArg x #' @template args-stanreg-object #' @template args-pars #' @template args-regex-pars #' @param plotfun A character string naming the \pkg{bayesplot} #' \link[bayesplot:MCMC-overview]{MCMC} function to use. The default is to call #' \code{\link[bayesplot:MCMC-intervals]{mcmc_intervals}}. \code{plotfun} can be specified #' either as the full name of a \pkg{bayesplot} plotting function (e.g. #' \code{"mcmc_hist"}) or can be abbreviated to the part of the name following #' the \code{"mcmc_"} prefix (e.g. \code{"hist"}). To get the names of all #' available MCMC functions see \code{\link[bayesplot:available_ppc]{available_mcmc}}. #' #' @param ... Additional arguments to pass to \code{plotfun} for customizing the #' plot. These are described on the help pages for the individual plotting #' functions. For example, the arguments accepted for the default #' \code{plotfun="intervals"} can be found at #' \code{\link[bayesplot:MCMC-intervals]{mcmc_intervals}}. #' #' @return Either a ggplot object that can be further customized using the #' \pkg{ggplot2} package, or an object created from multiple ggplot objects #' (e.g. a gtable object created by \code{\link[gridExtra]{arrangeGrob}}). #' #' @seealso #' \itemize{ #' \item The vignettes in the \pkg{bayesplot} package for many examples. #' \item \code{\link[bayesplot]{MCMC-overview}} (\pkg{bayesplot}) for links to #' the documentation for all the available plotting functions. #' \item \code{\link[bayesplot:bayesplot-colors]{color_scheme_set}} (\pkg{bayesplot}) to change #' the color scheme used for plotting. #' \item \code{\link{pp_check}} for graphical posterior predictive checks. #' \item \code{\link{plot_nonlinear}} for models with nonlinear smooth #' functions fit using \code{\link{stan_gamm4}}. #' } #' #' @template reference-bayesvis #' #' @examples #' if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { #' \donttest{ #' # Use rstanarm example model #' if (!exists("example_model")) example(example_model) #' fit <- example_model #' #' ##################################### #' ### Intervals and point estimates ### #' ##################################### #' plot(fit) # same as plot(fit, "intervals"), plot(fit, "mcmc_intervals") #' #' p <- plot(fit, pars = "size", regex_pars = "period", #' prob = 0.5, prob_outer = 0.9) #' p + ggplot2::ggtitle("Posterior medians \n with 50% and 90% intervals") #' #' # Shaded areas under densities #' bayesplot::color_scheme_set("brightblue") #' plot(fit, "areas", regex_pars = "period", #' prob = 0.5, prob_outer = 0.9) #' #' # Make the same plot by extracting posterior draws and calling #' # bayesplot::mcmc_areas directly #' x <- as.array(fit, regex_pars = "period") #' bayesplot::mcmc_areas(x, prob = 0.5, prob_outer = 0.9) #' #' # Ridgelines version of the areas plot #' bayesplot::mcmc_areas_ridges(x, regex_pars = "period", prob = 0.9) #' #' #' ################################## #' ### Histograms & density plots ### #' ################################## #' plot_title <- ggplot2::ggtitle("Posterior Distributions") #' plot(fit, "hist", regex_pars = "period") + plot_title #' plot(fit, "dens_overlay", pars = "(Intercept)", #' regex_pars = "period") + plot_title #' #' #################### #' ### Scatterplots ### #' #################### #' bayesplot::color_scheme_set("teal") #' plot(fit, "scatter", pars = paste0("period", 2:3)) #' plot(fit, "scatter", pars = c("(Intercept)", "size"), #' size = 3, alpha = 0.5) + #' ggplot2::stat_ellipse(level = 0.9) #' #' #' #################################################### #' ### Rhat, effective sample size, autocorrelation ### #' #################################################### #' bayesplot::color_scheme_set("red") #' #' # rhat #' plot(fit, "rhat") #' plot(fit, "rhat_hist") #' #' # ratio of effective sample size to total posterior sample size #' plot(fit, "neff") #' plot(fit, "neff_hist") #' #' # autocorrelation by chain #' plot(fit, "acf", pars = "(Intercept)", regex_pars = "period") #' plot(fit, "acf_bar", pars = "(Intercept)", regex_pars = "period") #' #' #' ################## #' ### Traceplots ### #' ################## #' # NOTE: rstanarm doesn't store the warmup draws (to save space because they #' # are not so essential for diagnosing the particular models implemented in #' # rstanarm) so the iterations in the traceplot are post-warmup iterations #' #' bayesplot::color_scheme_set("pink") #' (trace <- plot(fit, "trace", pars = "(Intercept)")) #' #' # change traceplot colors to ggplot defaults or custom values #' trace + ggplot2::scale_color_discrete() #' trace + ggplot2::scale_color_manual(values = c("maroon", "skyblue2")) #' #' # changing facet layout #' plot(fit, "trace", pars = c("(Intercept)", "period2"), #' facet_args = list(nrow = 2)) #' # same plot by calling bayesplot::mcmc_trace directly #' x <- as.array(fit, pars = c("(Intercept)", "period2")) #' bayesplot::mcmc_trace(x, facet_args = list(nrow = 2)) #' #' #' ############ #' ### More ### #' ############ #' #' # regex_pars examples #' plot(fit, regex_pars = "herd:1\\]") #' plot(fit, regex_pars = "herd:[279]") #' plot(fit, regex_pars = "herd:[279]|period2") #' plot(fit, regex_pars = c("herd:[279]", "period2")) #' } #' #' # For graphical posterior predictive checks see #' # help("pp_check.stanreg") #' } #' @importFrom ggplot2 ggplot aes_string xlab %+replace% theme #' plot.stanreg <- function(x, plotfun = "intervals", pars = NULL, regex_pars = NULL, ...) { if (plotfun %in% c("pairs", "mcmc_pairs")) return(pairs.stanreg(x, pars = pars, regex_pars = regex_pars, ...)) fun <- set_plotting_fun(plotfun) args <- set_plotting_args(x, pars, regex_pars, ..., plotfun = plotfun) do.call(fun, args) } # internal for plot.stanreg ---------------------------------------------- # Prepare argument list to pass to plotting function # # @param x stanreg object # @param pars, regex_pars user specified pars and regex_pars arguments (can be # missing) # @param ... additional arguments to pass to the plotting function # @param plotfun User's 'plotfun' argument set_plotting_args <- function(x, pars = NULL, regex_pars = NULL, ..., plotfun = character()) { plotfun <- mcmc_function_name(plotfun) if (!used.sampling(x)) validate_plotfun_for_opt_or_vb(plotfun) .plotfun_is_type <- function(patt) { grepl(pattern = paste0("_", patt), x = plotfun, fixed = TRUE) } if (.plotfun_is_type("nuts")) { nuts_stuff <- list(x = bayesplot::nuts_params(x), ...) if (!.plotfun_is_type("energy")) nuts_stuff[["lp"]] <- bayesplot::log_posterior(x) return(nuts_stuff) } if (.plotfun_is_type("rhat")) { rhat <- bayesplot::rhat(x, pars = pars, regex_pars = regex_pars) return(list(rhat = rhat, ...)) } if (.plotfun_is_type("neff")) { ratio <- bayesplot::neff_ratio(x, pars = pars, regex_pars = regex_pars) return(list(ratio = ratio, ...)) } if (!is.null(pars) || !is.null(regex_pars)) { pars <- collect_pars(x, pars, regex_pars) pars <- allow_special_parnames(x, pars) } if (!used.sampling(x)) { if (!length(pars)) pars <- NULL return(list(x = as.matrix(x, pars = pars), ...)) } list(x = as.array(x, pars = pars, regex_pars = regex_pars), ...) } mcmc_function_name <- function(fun) { # to keep backwards compatibility convert old function names if (fun == "scat") { fun <- "scatter" } else if (fun == "ess") { fun <- "neff" } else if (fun == "ac") { fun <- "acf" } else if (fun %in% c("diag", "stan_diag")) { stop( "For NUTS diagnostics, instead of 'stan_diag', ", "please specify the name of one of the functions listed at ", "help('NUTS', 'bayesplot')", call. = FALSE ) } if (identical(substr(fun, 1, 4), "ppc_")) stop( "For 'ppc_' functions use the 'pp_check' ", "method instead of 'plot'.", call. = FALSE ) if (!identical(substr(fun, 1, 5), "mcmc_")) fun <- paste0("mcmc_", fun) if (!fun %in% bayesplot::available_mcmc()) stop( fun, " is not a valid MCMC function name.", " Use bayesplot::available_mcmc() for a list of available MCMC functions." ) return(fun) } # check if a plotting function requires multiple chains needs_chains <- function(x) { nms <- c( "trace", "trace_highlight", "rank", "rank_overlay", "acf", "acf_bar", "hist_by_chain", "dens_overlay", "violin", "combo" ) mcmc_function_name(x) %in% paste0("mcmc_", nms) } # Select the correct plotting function # @param plotfun user specified plotfun argument (can be missing) set_plotting_fun <- function(plotfun = NULL) { if (is.null(plotfun)) return("mcmc_intervals") if (!is.character(plotfun)) stop("'plotfun' should be a string.", call. = FALSE) plotfun <- mcmc_function_name(plotfun) fun <- try(get(plotfun, pos = asNamespace("bayesplot"), mode = "function"), silent = TRUE) if (!inherits(fun, "try-error")) return(fun) stop( "Plotting function ", plotfun, " not found. ", "A valid plotting function is any function from the ", "'bayesplot' package beginning with the prefix 'mcmc_'.", call. = FALSE ) } # check if plotfun is ok to use with vb or optimization validate_plotfun_for_opt_or_vb <- function(plotfun) { plotfun <- mcmc_function_name(plotfun) if (needs_chains(plotfun) || grepl("_rhat|_neff|_nuts_", plotfun)) STOP_sampling_only(plotfun) } # pairs method ------------------------------------------------------------ #' Pairs method for stanreg objects #' #' Interface to \pkg{bayesplot}'s #' \code{\link[bayesplot:MCMC-scatterplots]{mcmc_pairs}} function for use with #' \pkg{rstanarm} models. Be careful not to specify too many parameters to #' include or the plot will be both hard to read and slow to render. #' #' @method pairs stanreg #' @export #' @importFrom bayesplot pairs_style_np pairs_condition #' @export pairs_style_np pairs_condition #' @aliases pairs_style_np pairs_condition #' #' @templateVar stanregArg x #' @template args-stanreg-object #' @template args-regex-pars #' @param pars An optional character vector of parameter names. All parameters #' are included by default, but for models with more than just a few #' parameters it may be far too many to visualize on a small computer screen #' and also may require substantial computing time. #' @param condition Same as the \code{condition} argument to #' \code{\link[bayesplot:MCMC-scatterplots]{mcmc_pairs}} except the \emph{default is different} #' for \pkg{rstanarm} models. By default, the \code{mcmc_pairs} function in #' the \pkg{bayesplot} package plots some of the Markov chains (half, in the #' case of an even number of chains) in the panels above the diagonal and the #' other half in the panels below the diagonal. However since we know that #' \pkg{rstanarm} models were fit using Stan (which \pkg{bayesplot} doesn't #' assume) we can make the default more useful by splitting the draws #' according to the \code{accept_stat__} diagnostic. The plots below the #' diagonal will contain realizations that are below the median #' \code{accept_stat__} and the plots above the diagonal will contain #' realizations that are above the median \code{accept_stat__}. To change this #' behavior see the documentation of the \code{condition} argument at #' \code{\link[bayesplot:MCMC-scatterplots]{mcmc_pairs}}. #' @param ... Optional arguments passed to #' \code{\link[bayesplot:MCMC-scatterplots]{mcmc_pairs}}. #' The \code{np}, \code{lp}, and \code{max_treedepth} arguments to #' \code{mcmc_pairs} are handled automatically by \pkg{rstanarm} and do not #' need to be specified by the user in \code{...}. The arguments that can be #' specified in \code{...} include \code{transformations}, \code{diag_fun}, #' \code{off_diag_fun}, \code{diag_args}, \code{off_diag_args}, #' and \code{np_style}. These arguments are #' documented thoroughly on the help page for #' \code{\link[bayesplot:MCMC-scatterplots]{mcmc_pairs}}. #' #' #' @examples #' if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { #' \donttest{ #' if (!exists("example_model")) example(example_model) #' #' bayesplot::color_scheme_set("purple") #' #' # see 'condition' argument above for details on the plots below and #' # above the diagonal. default is to split by accept_stat__. #' pairs(example_model, pars = c("(Intercept)", "log-posterior")) #' #' # for demonstration purposes, intentionally fit a model that #' # will (almost certainly) have some divergences #' fit <- stan_glm( #' mpg ~ ., data = mtcars, #' iter = 1000, #' # this combo of prior and adapt_delta should lead to some divergences #' prior = hs(), #' adapt_delta = 0.9, #' refresh = 0 #' ) #' #' pairs(fit, pars = c("wt", "sigma", "log-posterior")) #' #' # requires hexbin package #' # pairs( #' # fit, #' # pars = c("wt", "sigma", "log-posterior"), #' # transformations = list(sigma = "log"), # show log(sigma) instead of sigma #' # off_diag_fun = "hex" # use hexagonal heatmaps instead of scatterplots #' # ) #' #' bayesplot::color_scheme_set("brightblue") #' pairs( #' fit, #' pars = c("(Intercept)", "wt", "sigma", "log-posterior"), #' transformations = list(sigma = "log"), #' off_diag_args = list(size = 3/4, alpha = 1/3), # size and transparency of scatterplot points #' np_style = pairs_style_np(div_color = "black", div_shape = 2) # color and shape of the divergences #' ) #' #' # Using the condition argument to show divergences above the diagonal #' pairs( #' fit, #' pars = c("(Intercept)", "wt", "log-posterior"), #' condition = pairs_condition(nuts = "divergent__") #' ) #' #' } #' } pairs.stanreg <- function(x, pars = NULL, regex_pars = NULL, condition = pairs_condition(nuts = "accept_stat__"), ...) { if (!used.sampling(x)) STOP_sampling_only("pairs") dots <- list(...) ignored_args <- c("np", "lp", "max_treedepth") specified <- ignored_args %in% names(dots) if (any(specified)) { warning( "The following arguments were ignored because they are ", "specified automatically by rstanarm: ", paste(sQuote(ignored_args[specified]), collapse = ", ") ) } posterior <- as.array.stanreg(x, pars = pars, regex_pars = regex_pars) if (is.null(pars) && is.null(regex_pars)) { # include log-posterior by default lp_arr <- as.array.stanreg(x, pars = "log-posterior") dd <- dim(posterior) dn <- dimnames(posterior) dd[3] <- dd[3] + 1 dn$parameters <- c(dn$parameters, "log-posterior") tmp <- array(NA, dim = dd, dimnames = dn) tmp[,, 1:(dd[3] - 1)] <- posterior tmp[,, dd[3]] <- lp_arr posterior <- tmp } posterior <- round(posterior, digits = 12) bayesplot::mcmc_pairs( x = posterior, np = bayesplot::nuts_params(x), lp = bayesplot::log_posterior(x), max_treedepth = .max_treedepth(x), condition = condition, ... ) } # internal for pairs.stanreg ---------------------------------------------- # @param x stanreg object .max_treedepth <- function(x) { control <- x$stanfit@stan_args[[1]]$control if (is.null(control)) { max_td <- 10 } else { max_td <- control$max_treedepth if (is.null(max_td)) max_td <- 10 } return(max_td) } rstanarm/R/posterior_traj.R0000644000176200001440000011564514406606742015520 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2013, 2014, 2015, 2016, 2017 Trustees of Columbia University # Copyright (C) 2016, 2017 Sam Brilleman # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #' Estimate the subject-specific or marginal longitudinal trajectory #' #' This function allows us to generate an estimated longitudinal trajectory #' (either subject-specific, or by marginalising over the distribution of the #' group-specific parameters) based on draws from the posterior predictive #' distribution. #' #' @export #' #' @templateVar stanjmArg object #' @templateVar mArg m #' @template args-stanjm-object #' @template args-m #' @param newdata \strong{Deprecated}: please use \code{newdataLong} instead. #' Optionally, a data frame in which to look for variables with #' which to predict. If omitted, the model matrix is used. If \code{newdata} #' is provided and any variables were transformed (e.g. rescaled) in the data #' used to fit the model, then these variables must also be transformed in #' \code{newdata}. This only applies if variables were transformed before #' passing the data to one of the modeling functions and \emph{not} if #' transformations were specified inside the model formula. #' @param newdataLong,newdataEvent Optionally, a data frame (or in the case of #' \code{newdataLong} this can be a list of data frames) in which to look #' for variables with which to predict. If omitted, the model matrices are used. #' If new data is provided, then two options are available. Either one can #' provide observed covariate and outcome data, collected up to some time #' \emph{t}, and use this data to draw new individual-specific coefficients #' (i.e. individual-level random effects). This is the default behaviour when #' new data is provided, determined by the argument \code{dynamic = TRUE}, and #' requiring both \code{newdataLong} and \code{newdataEvent} to be specified. #' Alternatively, one can specify \code{dynamic = FALSE}, and then predict #' using just covariate data, by marginalising over the distribution #' of the group-specific coefficients; in this case, only \code{newdataLong} #' needs to be specified and it only needs to be a single data frame with #' the covariate data for the predictions for the one longitudinal submodel. #' @param interpolate A logical specifying whether to interpolate the estimated #' longitudinal trajectory in between the observation times. This can be used #' to achieve a smooth estimate of the longitudinal trajectory across the #' entire follow up time. If \code{TRUE} then the interpolation can be further #' controlled using the \code{control} argument. #' @param extrapolate A logical specifying whether to extrapolate the estimated #' longitudinal trajectory beyond the time of the last known observation time. #' If \code{TRUE} then the extrapolation can be further controlled using #' the \code{control} argument. #' @param control A named list with parameters controlling the interpolation or #' extrapolation of the estimated longitudinal trajectory when either #' \code{interpolate = TRUE} or \code{extrapolate = TRUE}. The #' list can contain one or more of the following named elements: \cr #' \describe{ #' \item{\code{ipoints}}{a positive integer specifying the number of discrete #' time points at which to calculate the estimated longitudinal response for #' \code{interpolate = TRUE}. These time points are evenly spaced starting at #' 0 and ending at the last known observation time for each individual. The #' last observation time for each individual is taken to be either: the #' event or censoring time if no new data is provided; the time specified #' in the "last_time" column if provided in the new data (see \strong{Details} #' section below); or the time of the last longitudinal measurement if new #' data is provided but no "last_time" column is included. The default is 15.} #' \item{\code{epoints}}{a positive integer specifying the number of discrete #' time points at which to calculate the estimated longitudinal response for #' \code{extrapolate = TRUE}. These time points are evenly spaced between the #' last known observation time for each individual and the extrapolation #' distance specifed using either \code{edist} or \code{eprop}. #' The default is 15.} #' \item{\code{eprop}}{a positive scalar between 0 and 1 specifying the #' amount of time across which to extrapolate the longitudinal trajectory, #' represented as a proportion of the total observed follow up time for each #' individual. For example specifying \code{eprop = 0.2} means that for an #' individual for whom the latest of their measurement, event or censoring times #' was 10 years, their estimated longitudinal trajectory will be extrapolated #' out to 12 years (i.e. 10 + (0.2 * 10)). The default value is 0.2.} #' \item{\code{edist}}{a positive scalar specifying the amount of time #' across which to extrapolate the longitudinal trajectory for each individual, #' represented in units of the time variable \code{time_var} (from fitting the #' model). This cannot be specified if \code{eprop} is specified.} #' } #' @param last_time A scalar, character string, or \code{NULL}. This argument #' specifies the last known survival time for each individual when #' conditional predictions are being obtained. If #' \code{newdataEvent} is provided and conditional survival predictions are being #' obtained, then the \code{last_time} argument can be one of the following: #' (i) a scalar, this will use the same last time for each individual in #' \code{newdataEvent}; (ii) a character string, naming a column in #' \code{newdataEvent} in which to look for the last time for each individual; #' (iii) \code{NULL}, in which case the default is to use the time of the latest #' longitudinal observation in \code{newdataLong}. If \code{newdataEvent} is #' \code{NULL} then the \code{last_time} argument cannot be specified #' directly; instead it will be set equal to the event or censoring time for #' each individual in the dataset that was used to estimate the model. #' If standardised survival probabilities are requested (i.e. #' \code{standardise = TRUE}) then conditional survival probabilities are #' not allowed and therefore the \code{last_time} argument is ignored. #' @param ids An optional vector specifying a subset of subject IDs for whom the #' predictions should be obtained. The default is to predict for all individuals #' who were used in estimating the model or, if \code{newdata} is specified, #' then all individuals contained in \code{newdata}. #' @param prob A scalar between 0 and 1 specifying the width to use for the #' uncertainty interval (sometimes called credible interval) for the predicted #' mean response and the prediction interval for the predicted (raw) response. #' For example \code{prob = 0.95} (the default) means that the 2.5th and 97.5th #' percentiles will be provided. Only relevant when \code{return_matrix} is #' \code{FALSE}. #' @param dynamic A logical that is only relevant if new data is provided #' via the \code{newdata} argument. If #' \code{dynamic = TRUE}, then new group-specific parameters are drawn for #' the individuals in the new data, conditional on their longitudinal #' biomarker data contained in \code{newdata}. These group-specific #' parameters are then used to generate individual-specific survival probabilities #' for these individuals. These are often referred to as "dynamic predictions" #' in the joint modelling context, because the predictions can be updated #' each time additional longitudinal biomarker data is collected on the individual. #' On the other hand, if \code{dynamic = FALSE} then the survival probabilities #' will just be marginalised over the distribution of the group-specific #' coefficients; this will mean that the predictions will incorporate all #' uncertainty due to between-individual variation so there will likely be #' very wide credible intervals on the predicted survival probabilities. #' @param scale A scalar, specifying how much to multiply the asymptotic #' variance-covariance matrix for the random effects by, which is then #' used as the "width" (ie. variance-covariance matrix) of the multivariate #' Student-t proposal distribution in the Metropolis-Hastings algorithm. This #' is only relevant when \code{newdataEvent} is supplied and #' \code{dynamic = TRUE}, in which case new random effects are simulated #' for the individuals in the new data using the Metropolis-Hastings algorithm. #' @param draws An integer indicating the number of MCMC draws to return. #' The default is to set the number of draws equal to 200, or equal to the #' size of the posterior sample if that is less than 200. #' @param seed An optional \code{\link[=set.seed]{seed}} to use. #' @param return_matrix A logical. If \code{TRUE} then a \code{draws} by #' \code{nrow(newdata)} matrix is returned which contains all the actual #' simulations or draws from the posterior predictive distribution. Otherwise #' if \code{return_matrix} is set to \code{FALSE} (the default) then a #' data frame is returned, as described in the \strong{Value} section below. #' @param ... Other arguments passed to \code{\link{posterior_predict}}, for #' example \code{draws}, \code{re.form}, \code{seed}, etc. #' #' @details The \code{posterior_traj} function acts as a wrapper to the #' \code{\link{posterior_predict}} function, but allows predictions to be #' easily generated at time points that are interpolated and/or extrapolated #' between time zero (baseline) and the last known survival time for the #' individual, thereby providing predictions that correspond to a smooth estimate #' of the longitudinal trajectory (useful for the plotting via the associated #' \code{\link{plot.predict.stanjm}} method). In addition it returns a data #' frame by default, whereas the \code{\link{posterior_predict}} function #' returns a matrix; see the \strong{Value} section below for details. Also, #' \code{posterior_traj} allows predictions to only be generated for a subset #' of individuals, via the \code{ids} argument. #' #' @return When \code{return_matrix = FALSE}, a data frame #' of class \code{predict.stanjm}. The data frame includes a column for the median #' of the posterior predictions of the mean longitudinal response (\code{yfit}), #' a column for each of the lower and upper limits of the uncertainty interval #' corresponding to the posterior predictions of the mean longitudinal response #' (\code{ci_lb} and \code{ci_ub}), and a column for each of the lower and upper #' limits of the prediction interval corresponding to the posterior predictions #' of the (raw) longitudinal response. The data frame also includes columns for #' the subject ID variable, and each of the predictor variables. The returned #' object also includes a number of attributes. #' #' When \code{return_matrix = TRUE}, the returned object is the same as that #' described for \code{\link{posterior_predict}}. #' #' @seealso \code{\link{plot.predict.stanjm}}, \code{\link{posterior_predict}}, #' \code{\link{posterior_survfit}} #' #' @examples #' if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { #' \donttest{ #' # Run example model if not already loaded #' if (!exists("example_jm")) example(example_jm) #' #' # Obtain subject-specific predictions for all individuals #' # in the estimation dataset #' pt1 <- posterior_traj(example_jm, interpolate = FALSE, extrapolate = FALSE) #' head(pt1) #' #' # Obtain subject-specific predictions only for a few selected individuals #' pt2 <- posterior_traj(example_jm, ids = c(1,3,8)) #' #' # If we wanted to obtain subject-specific predictions in order to plot the #' # longitudinal trajectories, then we might want to ensure a full trajectory #' # is obtained by interpolating and extrapolating time. We can then use the #' # generic plot function to plot the subject-specific predicted trajectories #' # for the first three individuals. Interpolation and extrapolation is #' # carried out by default. #' pt3 <- posterior_traj(example_jm) #' head(pt3) # predictions at additional time points compared with pt1 #' plot(pt3, ids = 1:3) #' #' # If we wanted to extrapolate further in time, but decrease the number of #' # discrete time points at which we obtain predictions for each individual, #' # then we could specify a named list in the 'control' argument #' pt4 <- posterior_traj(example_jm, control = list(ipoints = 10, epoints = 10, eprop = 0.5)) #' #' # If we have prediction data for a new individual, and we want to #' # estimate the longitudinal trajectory for that individual conditional #' # on this new data (perhaps extrapolating forward from our last #' # longitudinal measurement) then we can do that. It requires drawing #' # new individual-specific parameters, based on the full likelihood, #' # so we must supply new data for both the longitudinal and event #' # submodels. These are sometimes known as dynamic predictions. #' ndL <- pbcLong[pbcLong$id == 8, , drop = FALSE] #' ndE <- pbcSurv[pbcSurv$id == 8, , drop = FALSE] #' ndL$id <- "new_subject" # new id can't match one used in training data #' ndE$id <- "new_subject" #' pt5 <- posterior_traj(example_jm, #' newdataLong = ndL, #' newdataEvent = ndE) #' #' # By default it is assumed that the last known survival time for #' # the individual is the time of their last biomarker measurement, #' # but if we know they survived to some later time then we can #' # condition on that information using the last_time argument #' pt6 <- posterior_traj(example_jm, #' newdataLong = ndL, #' newdataEvent = ndE, #' last_time = "futimeYears") #' #' # Alternatively we may want to estimate the marginal longitudinal #' # trajectory for a given set of covariates. To do this, we can pass #' # the desired covariate values in a new data frame (however the only #' # covariate in our fitted model was the time variable, year). To make sure #' # that we marginalise over the random effects, we need to specify an ID value #' # which does not correspond to any of the individuals who were used in the #' # model estimation and specify the argument dynamic=FALSE. #' # The marginal prediction is obtained by generating subject-specific #' # predictions using a series of random draws from the random #' # effects distribution, and then integrating (ie, averaging) over these. #' # Our marginal prediction will therefore capture the between-individual #' # variation associated with the random effects. #' #' nd <- data.frame(id = rep("new1", 11), year = (0:10 / 2)) #' pt7 <- posterior_traj(example_jm, newdataLong = nd, dynamic = FALSE) #' head(pt7) # note the greater width of the uncertainty interval compared #' # with the subject-specific predictions in pt1, pt2, etc #' #' # Alternatively, we could have estimated the "marginal" trajectory by #' # ignoring the random effects (ie, assuming the random effects were set #' # to zero). This will generate a predicted longitudinal trajectory only #' # based on the fixed effect component of the model. In essence, for a #' # linear mixed effects model (ie, a model that uses an identity link #' # function), we should obtain a similar point estimate ("yfit") to the #' # estimates obtained in pt5 (since the mean of the estimated random effects #' # distribution will be approximately 0). However, it is important to note that #' # the uncertainty interval will be much more narrow, since it completely #' # ignores the between-individual variability captured by the random effects. #' # Further, if the model uses a non-identity link function, then the point #' # estimate ("yfit") obtained only using the fixed effect component of the #' # model will actually provide a biased estimate of the marginal prediction. #' # Nonetheless, to demonstrate how we can obtain the predictions only using #' # the fixed effect component of the model, we simply specify 're.form = NA'. #' # (We will use the same covariate values as used in the prediction for #' # example for pt5). #' #' pt8 <- posterior_traj(example_jm, newdataLong = nd, dynamic = FALSE, #' re.form = NA) #' head(pt8) # note the much narrower ci, compared with pt5 #' } #' } posterior_traj <- function(object, m = 1, newdata = NULL, newdataLong = NULL, newdataEvent = NULL, interpolate = TRUE, extrapolate = FALSE, control = list(), last_time = NULL, prob = 0.95, ids, dynamic = TRUE, scale = 1.5, draws = NULL, seed = NULL, return_matrix = FALSE, ...) { if (!requireNamespace("data.table")) stop("the 'data.table' package must be installed to use this function") validate_stanjm_object(object) M <- object$n_markers; validate_positive_scalar(m, M) id_var <- object$id_var time_var <- object$time_var grp_stuff <- object$grp_stuff[[m]] glmod <- object$glmod[[m]] if (!is.null(seed)) set.seed(seed) if (missing(ids)) ids <- NULL dots <- list(...) # Deal with deprecate newdata argument if (!is.null(newdata)) { warning("The 'newdata' argument is deprecated. Use 'newdataLong' instead.") if (!is.null(newdataLong)) stop2("'newdata' and 'newdataLong' cannot both be specified.") newdataLong <- newdata } # Construct prediction data, NB dats == observed data to return to user if (is.null(newdataLong)) { # user did not specify newdata if (!is.null(newdataEvent)) stop2("'newdataEvent' can only be specified when 'newdataLong' is provided.") dats <- get_model_data(object) ndL <- dats[1:M] ndE <- dats[["Event"]] } else { # user specified newdataLong if (dynamic && is.null(newdataEvent)) stop2("Dynamic predictions require both 'newdataLong' and 'newdataEvent' ", "to be specified. Either specify data for both the longitudinal and ", "event submodels or, alternatively, specify argument 'dynamic = FALSE' ", "to marginalise over the distribution of group-specific parameters.") dats <- validate_newdatas(object, newdataLong, newdataEvent, response = isTRUE(dynamic)) ndL <- dats[1:M] ndE <- dats[["Event"]] } if (!is.null(ids)) { # user specified a subset of ids ndL <- subset_ids(object, ndL, ids) ndE <- subset_ids(object, ndE, ids) } id_list <- factor(unique(ndL[[m]][[id_var]])) # order of ids from data, not ids arg # Last known survival time for each individual if (is.null(newdataLong)) { # user did not provide newdata if (!is.null(last_time)) stop("'last_time' cannot be provided when newdata is NULL, since times ", "are taken to be the event or censoring time for each individual.") last_time <- object$eventtime[as.character(id_list)] } else { # user specified newdata if (is.null(last_time)) { # use latest longitudinal observation max_ytimes <- do.call("cbind", lapply(ndL, function(x) tapply(x[[time_var]], x[[id_var]], FUN = max))) last_time <- apply(max_ytimes, 1L, max) # re-order last-time according to id_list last_time <- last_time[as.character(id_list)] } else if (is.character(last_time) && (length(last_time) == 1L)) { if (!is.null(ndE)) { # user provided newdataEvent for dynamic predictions if (!last_time %in% colnames(ndE)) stop("Cannot find 'last_time' column named in newdataEvent.") last_time <- ndE[[last_time]] } } else if (is.numeric(last_time) && (length(last_time) == 1L)) { last_time <- rep(last_time, length(id_list)) } else if (is.numeric(last_time) && (length(last_time) > 1L)) { last_time <- last_time[as.character(id_list)] } else { stop("Bug found: could not reconcile 'last_time' argument.") } names(last_time) <- as.character(id_list) } # Get stanmat parameter matrix for specified number of draws S <- posterior_sample_size(object) if (is.null(draws)) draws <- if (S > 200L) 200L else S if (draws > S) stop("'draws' should be <= posterior sample size (", S, ").") stanmat <- as.matrix(object$stanfit) some_draws <- isTRUE(draws < S) if (some_draws) { samp <- sample(S, draws) stanmat <- stanmat[samp, , drop = FALSE] } # Draw b pars for new individuals if (dynamic && !is.null(newdataEvent)) { stanmat <- simulate_b_pars(object, stanmat = stanmat, ndL = ndL, ndE = ndE, ids = id_list, times = last_time, scale = scale) b_new <- attr(stanmat, "b_new") acceptance_rate <- attr(stanmat, "acceptance_rate") } newX <- ndL[[m]] # design matrix used for predictions if (interpolate || extrapolate) { # user specified interpolation or extrapolation if (return_matrix) stop("'return_matrix' cannot be TRUE if 'interpolate' or 'extrapolate' is TRUE.") ok_control_args <- c("ipoints", "epoints", "edist", "eprop") control <- get_extrapolation_control(control, ok_control_args = ok_control_args) dist <- if (!is.null(control$eprop)) control$eprop * (last_time - 0) else control$edist iseq <- if (interpolate) get_time_seq(control$ipoints, 0, last_time) else NULL eseq <- if (extrapolate) get_time_seq(control$epoints, last_time, last_time + dist) else NULL time_seq <- as.data.frame(cbind(iseq, eseq)) colnames(time_seq) <- paste0("V", 1:NCOL(time_seq)) time_seq <- reshape(time_seq, direction = "long", varying = colnames(time_seq), v.names = time_var, timevar = "obs", ids = id_list, idvar = id_var) newX[[time_var]] <- as.numeric(newX[[time_var]]) # ensures no rounding during data.table merge if (grp_stuff$has_grp) { grp_var <- grp_stuff$grp_var time_seq <- merge(time_seq, unique(newX[, c(id_var, grp_var)]), by = id_var) time_seq <- time_seq[order(time_seq[["obs"]], time_seq[[id_var]], time_seq[[grp_var]]), ] newX <- prepare_data_table(newX, id_var = id_var, time_var = time_var, grp_var = grp_var) newX <- rolling_merge(newX, time_seq[[id_var]], time_seq[[time_var]], time_seq[[grp_var]]) } else { newX <- prepare_data_table(newX, id_var = id_var, time_var = time_var) newX <- rolling_merge(newX, time_seq[[id_var]], time_seq[[time_var]]) } } if (isTRUE(as.logical(glmod$has_offset))) { # create a temporary data frame with a fake outcome to avoid error response_name <- as.character(formula(object)[[m]])[2] newX_temp <- cbind(0, newX) colnames(newX_temp) <- c(response_name, colnames(newX)) newOffset <- model.offset(model.frame(terms(glmod), newX_temp)) } else { newOffset <- NULL } ytilde <- posterior_predict(object, newdata = newX, m = m, stanmat = stanmat, offset = newOffset, ...) if (return_matrix) { attr(ytilde, "mu") <- NULL # remove attribute mu return(ytilde) # return S * N matrix, instead of data frame } mutilde <- attr(ytilde, "mu") if (!is.null(newX) && nrow(newX) == 1L) mutilde <- t(mutilde) ytilde_bounds <- median_and_bounds(ytilde, prob) # median and prob% CrI limits mutilde_bounds <- median_and_bounds(mutilde, prob) # median and prob% CrI limits out <- data.frame(IDVAR = newX[[id_var]], TIMEVAR = newX[[time_var]], yfit = mutilde_bounds$med, ci_lb = mutilde_bounds$lb, ci_ub = mutilde_bounds$ub, pi_lb = ytilde_bounds$lb, pi_ub = ytilde_bounds$ub) if (grp_stuff$has_grp) { out$GRPVAR = newX[[grp_var]] # add grp_var and reorder cols out <- out[, c("IDVAR", "GRPVAR", "TIMEVAR", "yfit", "ci_lb", "ci_ub", "pi_lb", "pi_ub")] } colnames(out) <- c(id_var, if (grp_stuff$has_grp) grp_var, time_var, "yfit", "ci_lb", "ci_ub", "pi_lb", "pi_ub") class(out) <- c("predict.stanjm", "data.frame") Terms <- terms(formula(object, m = m)) vars <- rownames(attr(Terms, "factors")) y_var <- vars[[attr(Terms, "response")]] out <- structure(out, observed_data = ndL[[m]], last_time = last_time, y_var = y_var, id_var = id_var, time_var = time_var, grp_var = if (grp_stuff$has_grp) grp_var else NULL, interpolate = interpolate, extrapolate = extrapolate, control = control, call = match.call()) if (dynamic && !is.null(newdataEvent)) { out <- structure(out, b_new = b_new, acceptance_rate = acceptance_rate) } out } #' Plot the estimated subject-specific or marginal longitudinal trajectory #' #' This generic \code{plot} method for \code{predict.stanjm} objects will #' plot the estimated subject-specific or marginal longitudinal trajectory #' using the data frame returned by a call to \code{\link{posterior_traj}}. #' To ensure that enough data points are available to plot the longitudinal #' trajectory, it is assumed that the call to \code{\link{posterior_traj}} #' would have used the default \code{interpolate = TRUE}, and perhaps also #' \code{extrapolate = TRUE} (the latter being optional, depending on #' whether or not the user wants to see extrapolation of the longitudinal #' trajectory beyond the last observation time). #' #' @method plot predict.stanjm #' @export #' @importFrom ggplot2 ggplot aes aes_string geom_line geom_smooth geom_ribbon #' geom_point facet_wrap geom_vline labs ggplot_build theme_bw #' #' @templateVar labsArg xlab,ylab #' @templateVar scalesArg facet_scales #' @templateVar cigeomArg ci_geom_args #' @template args-ids #' @template args-labs #' @template args-scales #' @template args-ci-geom-args #' #' @param x A data frame and object of class \code{predict.stanjm} #' returned by a call to the function \code{\link{posterior_traj}}. #' The object contains point estimates and uncertainty interval limits #' for the fitted values of the longitudinal response. #' @param limits A quoted character string specifying the type of limits to #' include in the plot. Can be one of: \code{"ci"} for the Bayesian #' posterior uncertainty interval for the estimated mean longitudinal #' response (often known as a credible interval); #' \code{"pi"} for the prediction interval for the estimated (raw) #' longitudinal response; or \code{"none"} for no interval limits. #' @param vline A logical. If \code{TRUE} then a vertical dashed line #' is added to the plot indicating the event or censoring time for #' the individual. Can only be used if each plot within the figure #' is for a single individual. #' @param plot_observed A logical. If \code{TRUE} then the observed #' longitudinal measurements are overlaid on the plot. #' @param grp_overlay Only relevant if the model had lower level units #' clustered within an individual. If \code{TRUE}, then the fitted trajectories #' for the lower level units will be overlaid in the same plot region (that #' is, all lower level units for a single individual will be shown within a #' single facet). If \code{FALSE}, then the fitted trajectories for each lower #' level unit will be shown in a separate facet. #' @param ... Optional arguments passed to #' \code{\link[ggplot2]{geom_smooth}} and used to control features #' of the plotted longitudinal trajectory. #' #' @return A \code{ggplot} object, also of class \code{plot.predict.stanjm}. #' This object can be further customised using the \pkg{ggplot2} package. #' It can also be passed to the function \code{\link{plot_stack_jm}}. #' #' @seealso \code{\link{posterior_traj}}, \code{\link{plot_stack_jm}}, #' \code{\link{posterior_survfit}}, \code{\link{plot.survfit.stanjm}} #' #' @examples #' if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { #' \donttest{ #' # Run example model if not already loaded #' if (!exists("example_jm")) example(example_jm) #' #' # For a subset of individuals in the estimation dataset we will #' # obtain subject-specific predictions for the longitudinal submodel #' # at evenly spaced times between 0 and their event or censoring time. #' pt1 <- posterior_traj(example_jm, ids = c(7,13,15), interpolate = TRUE) #' plot(pt1) # credible interval for mean response #' plot(pt1, limits = "pi") # prediction interval for raw response #' plot(pt1, limits = "none") # no uncertainty interval #' #' # We can also extrapolate the longitudinal trajectories. #' pt2 <- posterior_traj(example_jm, ids = c(7,13,15), interpolate = TRUE, #' extrapolate = TRUE) #' plot(pt2) #' plot(pt2, vline = TRUE) # add line indicating event or censoring time #' plot(pt2, vline = TRUE, plot_observed = TRUE) # overlay observed longitudinal data #' #' # We can change or add attributes to the plot #' plot1 <- plot(pt2, ids = c(7,13,15), xlab = "Follow up time", #' vline = TRUE, plot_observed = TRUE, #' facet_scales = "fixed", color = "blue", linetype = 2, #' ci_geom_args = list(fill = "red")) #' plot1 #' #' # Since the returned plot is also a ggplot object, we can #' # modify some of its attributes after it has been returned #' plot1 + #' ggplot2::theme(strip.background = ggplot2::element_blank()) + #' ggplot2::labs(title = "Some plotted longitudinal trajectories") #' } #' } plot.predict.stanjm <- function(x, ids = NULL, limits = c("ci", "pi", "none"), xlab = NULL, ylab = NULL, vline = FALSE, plot_observed = FALSE, facet_scales = "free_x", ci_geom_args = NULL, grp_overlay = FALSE, ...) { limits <- match.arg(limits) if (!(limits == "none")) ci <- (limits == "ci") y_var <- attr(x, "y_var") id_var <- attr(x, "id_var") time_var <- attr(x, "time_var") grp_var <- attr(x, "grp_var") obs_dat <- attr(x, "observed_data") if (is.null(ylab)) ylab <- paste0("Long. response (", y_var, ")") if (is.null(xlab)) xlab <- paste0("Time (", time_var, ")") if (!id_var %in% colnames(x)) stop("Bug found: could not find 'id_var' column in the data frame.") if (!is.null(grp_var) && (!grp_var %in% colnames(x))) stop("Bug found: could not find 'grp_var' column in the data frame.") if (!is.null(ids)) { ids_missing <- which(!ids %in% x[[id_var]]) if (length(ids_missing)) stop("The following 'ids' are not present in the predict.stanjm object: ", paste(ids[ids_missing], collapse = ", "), call. = FALSE) plot_dat <- x[x[[id_var]] %in% ids, , drop = FALSE] obs_dat <- obs_dat[obs_dat[[id_var]] %in% ids, , drop = FALSE] } else { plot_dat <- x } # 'id_list' provides unique IDs sorted in the same order as plotting data id_list <- unique(plot_dat[[id_var]]) if (!is.null(grp_var)) grp_list <- unique(plot_dat[[grp_var]]) plot_dat$id <- factor(plot_dat[[id_var]]) plot_dat$time <- plot_dat[[time_var]] if (!is.null(grp_var)) plot_dat$grp <- plot_dat[[grp_var]] geom_defaults <- list(color = "black", method = "loess", se = FALSE) geom_args <- set_geom_args(geom_defaults, ...) lim_defaults <- list(alpha = 0.3) lim_args <- do.call("set_geom_args", c(defaults = list(lim_defaults), ci_geom_args)) obs_defaults <- list() obs_args <- set_geom_args(obs_defaults) if (is.null(grp_var)) { # no lower level clusters group_var <- NULL facet_var <- "id" } else if (grp_overlay) { # overlay lower level clusters group_var <- "grp" facet_var <- "id" } else { # separate facets for lower level clusters group_var <- NULL facet_var <- "grp" } n_facets <- if (facet_var == "id") length(id_list) else length(grp_list) if (n_facets > 60L) { stop("Too many facets (ie. individuals) to plot. Perhaps limit the ", "number of individuals by specifying the 'ids' argument.") } else if (n_facets > 1L) { geom_mapp <- list( mapping = aes_string(x = "time", y = "yfit", group = group_var), data = plot_dat) graph <- ggplot() + theme_bw() + do.call("geom_smooth", c(geom_mapp, geom_args)) + facet_wrap(facet_var, scales = facet_scales) if (!limits == "none") { graph_smoothlim <- ggplot(plot_dat) + geom_smooth( aes_string(x = "time", y = if (ci) "ci_lb" else "pi_lb", group = group_var), method = "loess", se = FALSE) + geom_smooth( aes_string(x = "time", y = if (ci) "ci_ub" else "pi_ub", group = group_var), method = "loess", se = FALSE) + facet_wrap(facet_var, scales = facet_scales) build_smoothlim <- ggplot_build(graph_smoothlim) df_smoothlim <- data.frame(PANEL = build_smoothlim$data[[1]]$PANEL, time = build_smoothlim$data[[1]]$x, lb = build_smoothlim$data[[1]]$y, ub = build_smoothlim$data[[2]]$y, group = build_smoothlim$data[[1]]$group) panel_id_map <- build_smoothlim$layout$layout[, c("PANEL", facet_var), drop = FALSE] df_smoothlim <- merge(df_smoothlim, panel_id_map) lim_mapp <- list( mapping = aes_string(x = "time", ymin = "lb", ymax = "ub", group = "group"), data = df_smoothlim) graph_limits <- do.call("geom_ribbon", c(lim_mapp, lim_args)) } else graph_limits <- NULL } else { geom_mapp <- list(mapping = aes_string(x = "time", y = "yfit", group = group_var), data = plot_dat) graph <- ggplot() + theme_bw() + do.call("geom_smooth", c(geom_mapp, geom_args)) if (!(limits == "none")) { graph_smoothlim <- ggplot(plot_dat) + geom_smooth(aes_string(x = "time", y = if (ci) "ci_lb" else "pi_lb"), method = "loess", se = FALSE) + geom_smooth(aes_string(x = "time", y = if (ci) "ci_ub" else "pi_ub"), method = "loess", se = FALSE) build_smoothlim <- ggplot_build(graph_smoothlim) df_smoothlim <- data.frame(time = build_smoothlim$data[[1]]$x, lb = build_smoothlim$data[[1]]$y, ub = build_smoothlim$data[[2]]$y, group = build_smoothlim$data[[1]]$group) lim_mapp <- list( mapping = aes_string(x = "time", ymin = "lb", ymax = "ub", group = "group"), data = df_smoothlim) graph_limits <- do.call("geom_ribbon", c(lim_mapp, lim_args)) } else graph_limits <- NULL } if (plot_observed) { if (y_var %in% colnames(obs_dat)) { obs_dat$y <- obs_dat[[y_var]] } else { obs_dat$y <- try(eval(parse(text = y_var), obs_dat)) if (inherits(obs_dat$y, "try-error")) stop("Could not find ", y_var, "in observed data, nor able to parse ", y_var, "as an expression.") } obs_dat$id <- factor(obs_dat[[id_var]]) obs_dat$time <- obs_dat[[time_var]] if (!is.null(grp_var)) obs_dat$grp <- obs_dat[[grp_var]] if (is.null(obs_dat[["y"]])) stop("Cannot find observed outcome data to add to plot.") obs_mapp <- list( mapping = aes_string(x = "time", y = "y", group = group_var), data = obs_dat) graph_obs <- do.call("geom_point", c(obs_mapp, obs_args)) } else graph_obs <- NULL if (vline) { if (facet_var == "id") { facet_list <- unique(plot_dat[, id_var]) last_time <- attr(x, "last_time")[as.character(facet_list)] # potentially reorder last_time to match plot_dat } else { facet_list <- unique(plot_dat[, c(id_var, grp_var)]) last_time <- attr(x, "last_time")[as.character(facet_list[[id_var]])] # potentially reorder last_time to match plot_dat facet_list <- facet_list[[grp_var]] } vline_dat <- data.frame(FACETVAR = facet_list, last_time = last_time) colnames(vline_dat) <- c(facet_var, "last_time") graph_vline <- geom_vline( mapping = aes_string(xintercept = "last_time"), data = vline_dat, linetype = 2) } else graph_vline <- NULL ret <- graph + graph_limits + graph_obs + graph_vline + labs(x = xlab, y = ylab) class_ret <- class(ret) class(ret) <- c("plot.predict.stanjm", class_ret) ret } # internal ---------------------------------------------------------------- # Return a list with the control arguments for interpolation and/or # extrapolation in posterior_predict.stanmvreg and posterior_survfit.stanjm # # @param control A named list, being the user input to the control argument # in the posterior_predict.stanmvreg or posterior_survfit.stanjm call # @param ok_control_args A character vector of allowed control arguments # @return A named list get_extrapolation_control <- function(control = list(), ok_control_args = c("epoints", "edist", "eprop")) { defaults <- list(ipoints = 15, epoints = 15, edist = NULL, eprop = 0.2, last_time = NULL) if (!is.list(control)) { stop("'control' should be a named list.") } else if (!length(control)) { control <- defaults[ok_control_args] } else { # user specified control list nms <- names(control) if (!length(nms)) stop("'control' should be a named list.") if (any(!nms %in% ok_control_args)) stop(paste0("'control' list can only contain the following named arguments: ", paste(ok_control_args, collapse = ", "))) if (all(c("edist", "eprop") %in% nms)) stop("'control' list cannot include both 'edist' and 'eprop'.") if (("ipoints" %in% ok_control_args) && is.null(control$ipoints)) control$ipoints <- defaults$ipoints if (("epoints" %in% ok_control_args) && is.null(control$epoints)) control$epoints <- defaults$epoints if (is.null(control$edist) && is.null(control$eprop)) control$eprop <- defaults$eprop } return(control) } # Set plotting defaults set_geom_args <- function(defaults, ...) { dots <- list(...) if (!length(dots)) return(defaults) dot_names <- names(dots) def_names <- names(defaults) for (j in seq_along(def_names)) { if (def_names[j] %in% dot_names) defaults[[j]] <- dots[[def_names[j]]] } extras <- setdiff(dot_names, def_names) if (length(extras)) { for (j in seq_along(extras)) defaults[[extras[j]]] <- dots[[extras[j]]] } return(defaults) } rstanarm/R/stan_betareg.fit.R0000644000176200001440000005410514370470372015660 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2013, 2014, 2015, 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #' @rdname stan_betareg #' @export #' @param z For \code{stan_betareg.fit}, a regressor matrix for \code{phi}. #' Defaults to an intercept only. #' stan_betareg.fit <- function(x, y, z = NULL, weights = rep(1, NROW(x)), offset = rep(0, NROW(x)), link = c("logit", "probit", "cloglog", "cauchit", "log", "loglog"), link.phi = NULL, ..., prior = normal(autoscale=TRUE), prior_intercept = normal(autoscale=TRUE), prior_z = normal(autoscale=TRUE), prior_intercept_z = normal(autoscale=TRUE), prior_phi = exponential(autoscale=TRUE), prior_PD = FALSE, algorithm = c("sampling", "optimizing", "meanfield", "fullrank"), adapt_delta = NULL, QR = FALSE) { algorithm <- match.arg(algorithm) # determine whether the user has passed a matrix for the percision model (z) if (is.null(link.phi) && is.null(z)) { Z_true <- 0 z <- model.matrix(y ~ 1) } else if (is.null(link.phi) && !(is.null(z))) { Z_true <- 1 link.phi <- "log" } else { Z_true <- 1 } # link for X variables link <- match.arg(link) supported_links <- c("logit", "probit", "cloglog", "cauchit", "log", "loglog") link_num <- which(supported_links == link) if (!length(link)) stop("'link' must be one of ", paste(supported_links, collapse = ", ")) # link for Z variables link.phi <- match.arg(link.phi, c(NULL, "log", "identity", "sqrt")) supported_phi_links <- c("log", "identity", "sqrt") link_num_phi <- which(supported_phi_links == link.phi) if (!length(link_num_phi)) stop("'link' must be one of ", paste(supported_phi_links, collapse = ", ")) if (Z_true == 0) link_num_phi <- 0 # useless assignments to pass R CMD check has_intercept <- min_prior_scale <- prior_df <- prior_df_for_intercept <- prior_df_for_intercept_z <- prior_df_z <- prior_dist <- prior_dist_for_intercept <- prior_dist_for_intercept_z <- prior_dist_z <- prior_mean <- prior_mean_for_intercept <- prior_mean_for_intercept_z <- prior_mean_z <- prior_scale <- prior_scale_for_intercept <- prior_scale_for_intercept_z <- prior_df_for_aux <- prior_dist_for_aux <- prior_mean_for_aux <- prior_scale_for_aux <- xbar <- xtemp <- prior_autoscale <- prior_autoscale_z <- global_prior_scale_z <- global_prior_df_z <- slab_df <- slab_scale <- slab_df_z <- slab_scale_z <- NULL sparse <- FALSE x_stuff <- center_x(x, sparse) for (i in names(x_stuff)) # xtemp, xbar, has_intercept assign(i, x_stuff[[i]]) nvars <- ncol(xtemp) z_stuff <- center_x(z, sparse) ztemp <- z_stuff$xtemp zbar <- z_stuff$xbar has_intercept_z <- z_stuff$has_intercept nvars_z <- ncol(ztemp) if (Z_true == 0) has_intercept_z <- FALSE ok_dists <- nlist("normal", student_t = "t", "cauchy", "hs", "hs_plus", "laplace", "lasso", "product_normal") ok_intercept_dists <- ok_dists[1:3] ok_aux_dists <- c(ok_dists[1:3], exponential = "exponential") # prior distributions (handle_glm_prior() from data_block.R) prior_stuff <- handle_glm_prior(prior, nvars, link, default_scale = 2.5, ok_dists = ok_dists) for (i in names(prior_stuff)) # prior_{dist, mean, scale, df, autoscale} assign(i, prior_stuff[[i]]) prior_intercept_stuff <- handle_glm_prior(prior_intercept, nvars = 1, default_scale = 2.5, link = link, ok_dists = ok_intercept_dists) names(prior_intercept_stuff) <- paste0(names(prior_intercept_stuff), "_for_intercept") for (i in names(prior_intercept_stuff)) # prior_{dist, mean, scale, df, autoscale}_for_intercept assign(i, prior_intercept_stuff[[i]]) # prior distributions for parameters on z variables prior_stuff_z <- handle_glm_prior(prior_z, nvars_z, link = link.phi, default_scale = 2.5, ok_dists = ok_dists) for (i in names(prior_stuff_z)) assign(paste0(i,"_z"), prior_stuff_z[[i]]) prior_intercept_stuff_z <- handle_glm_prior(prior_intercept_z, nvars = 1, link = link.phi, default_scale = 2.5, ok_dists = ok_intercept_dists) names(prior_intercept_stuff_z) <- paste0(names(prior_intercept_stuff_z), "_for_intercept") for (i in names(prior_intercept_stuff_z)) assign(paste0(i, "_z"), prior_intercept_stuff_z[[i]]) prior_aux <- prior_phi prior_aux_stuff <- handle_glm_prior( prior_aux, nvars = 1, default_scale = 5, link = NULL, # don't need to adjust scale based on logit vs probit ok_dists = ok_aux_dists ) # prior_{dist, mean, scale, df, dist_name, autoscale}_for_aux names(prior_aux_stuff) <- paste0(names(prior_aux_stuff), "_for_aux") if (is.null(prior_aux)) { if (prior_PD) stop("'prior_aux' can't be NULL if 'prior_PD' is TRUE.") prior_aux_stuff$prior_scale_for_aux <- Inf } for (i in names(prior_aux_stuff)) assign(i, prior_aux_stuff[[i]]) if (nvars_z == 0) { prior_mean_z <- double() prior_scale_z <- double() prior_df_z <- integer() } # prior scaling (using sd of predictors) min_prior_scale <- 1e-12 if (prior_dist > 0L && !QR && nvars != 0 && prior_autoscale) { prior_scale <- pmax(min_prior_scale, prior_scale / apply(xtemp, 2L, FUN = function(x) { num.categories <- length(unique(x)) x.scale <- 1 if (num.categories == 2) { x.scale <- diff(range(x)) } else if (num.categories > 2) { x.scale <- sd(x) } return(x.scale) })) } if (prior_dist_z > 0L && !QR && nvars_z != 0 && prior_autoscale_z) { prior_scale_z <- pmax(min_prior_scale, prior_scale_z / apply(ztemp, 2L, FUN = function(z) { num.categories <- length(unique(z)) z.scale <- 1 if (num.categories == 2) { z.scale <- diff(range(z)) } else if (num.categories > 2) { z.scale <- sd(z) } return(z.scale) })) } prior_scale <- as.array(pmin(.Machine$double.xmax, prior_scale)) prior_scale_for_intercept <- min(.Machine$double.xmax, prior_scale_for_intercept) if(nvars_z != 0) { prior_scale_z <- as.array(pmin(.Machine$double.xmax, prior_scale_z)) prior_scale_for_intercept_z <- min(.Machine$double.xmax, prior_scale_for_intercept_z) } # QR decomposition for both x and z if (QR) { if ((nvars <= 1 && nvars_z <= 1 && Z_true == 1) || (nvars <= 1 && Z_true == 0)) stop("'QR' can only be specified when there are multiple predictors.") if (nvars > 1) { cn <- colnames(xtemp) decomposition <- qr(xtemp) sqrt_nm1 <- sqrt(nrow(xtemp) - 1L) Q <- qr.Q(decomposition) if (prior_autoscale) scale_factor <- sqrt(nrow(xtemp) - 1L) else scale_factor <- diag(qr.R(decomposition))[ncol(xtemp)] R_inv <- qr.solve(decomposition, Q) * scale_factor xtemp <- Q * scale_factor colnames(xtemp) <- cn xbar <- c(xbar %*% R_inv) } if (Z_true == 1 && nvars_z > 1) { cn_z <- colnames(ztemp) decomposition_z <- qr(ztemp) Q_z <- qr.Q(decomposition_z) if (nvars <= 1) scale_factor <- sqrt(nrow(ztemp) - 1L) R_inv_z <- qr.solve(decomposition_z, Q_z) * scale_factor ztemp <- Q_z * scale_factor colnames(ztemp) <- cn_z zbar <- c(zbar %*% R_inv_z) } } # create entries in the data block of the .stan file standata <- nlist( N = nrow(xtemp), K = ncol(xtemp), xbar = as.array(xbar), dense_X = !sparse, X = array(xtemp, dim = c(1L, dim(xtemp))), nnz_X = 0L, w_X = double(), v_X = integer(), u_X = integer(), y = y, lb_y = 0, ub_y = 1, prior_PD, has_intercept, family = 4L, link = link_num, prior_dist, prior_mean, prior_scale = as.array(pmin(.Machine$double.xmax, prior_scale)), prior_df, prior_dist_for_intercept, prior_mean_for_intercept = c(prior_mean_for_intercept), prior_scale_for_intercept = min(.Machine$double.xmax, prior_scale_for_intercept), prior_df_for_intercept = c(prior_df_for_intercept), prior_dist_for_aux = prior_dist_for_aux, prior_scale_for_aux = prior_scale_for_aux %ORifINF% 0, prior_df_for_aux = c(prior_df_for_aux), prior_mean_for_aux = c(prior_mean_for_aux), prior_dist_for_smooth = 0L, prior_mean_for_smooth = array(NA_real_, dim = 0), prior_scale_for_smooth = array(NA_real_, dim = 0), prior_df_for_smooth = array(NA_real_, dim = 0), K_smooth = 0L, S = matrix(NA_real_, nrow(xtemp), ncol = 0L), smooth_map = integer(), has_weights = length(weights) > 0, weights = weights, has_offset = length(offset) > 0, offset_ = offset, t = 0L, p = integer(), l = integer(), q = 0L, len_theta_L = 0L, shape = double(), scale = double(), len_concentration = 0L, concentration = double(), len_regularization = 0L, regularization = double(), num_non_zero = 0L, w = double(), v = integer(), u = integer(), special_case = 0L, z_dim = nvars_z, link_phi = link_num_phi, betareg_z = array(ztemp, dim = c(dim(ztemp))), has_intercept_z, zbar = array(zbar), prior_dist_z, prior_mean_z, prior_df_z, prior_scale_z = as.array(pmin(.Machine$double.xmax, prior_scale_z)), prior_dist_for_intercept_z, prior_mean_for_intercept_z = c(prior_mean_for_intercept_z), prior_df_for_intercept_z = c(prior_df_for_intercept_z), prior_scale_for_intercept_z = min(.Machine$double.xmax, prior_scale_for_intercept_z), # for hs family priors global_prior_scale_z, global_prior_df_z, slab_df_z, slab_scale_z, # for product normal prior num_normals = if (prior_dist == 7) as.array(as.integer(prior_df)) else integer(0), num_normals_z = if (prior_dist_z == 7) as.array(as.integer(prior_df_z)) else integer(0), len_y = nrow(xtemp), SSfun = 0L, input = double(), Dose = double(), compute_mean_PPD = TRUE ) # call stan() to draw from posterior distribution stanfit <- stanmodels$continuous if (Z_true == 1) { pars <- c(if (has_intercept) "alpha", "beta", "omega_int", "omega", "mean_PPD") } else { pars <- c(if (has_intercept) "alpha", "beta", "aux", "mean_PPD") } prior_info <- summarize_betareg_prior( user_prior = prior_stuff, user_prior_intercept = prior_intercept_stuff, user_prior_z = prior_stuff_z, user_prior_intercept_z = prior_intercept_stuff_z, user_prior_aux = prior_aux_stuff, has_phi = !Z_true, has_intercept = has_intercept, has_intercept_z = has_intercept_z, has_predictors = nvars > 0, has_predictors_z = nvars_z > 0, adjusted_prior_scale = prior_scale, adjusted_prior_intercept_scale = prior_scale_for_intercept, adjusted_prior_scale_z = prior_scale_z, adjusted_prior_intercept_scale_z = prior_scale_for_intercept_z ) if (algorithm == "optimizing") { optimizing_args <- list(...) if (is.null(optimizing_args$draws)) optimizing_args$draws <- 1000L optimizing_args$object <- stanfit optimizing_args$data <- standata optimizing_args$constrained <- TRUE out <- do.call(optimizing, args = optimizing_args) check_stanfit(out) out$par <- out$par[!grepl("eta_z", names(out$par))] out$theta_tilde <- out$theta_tilde[, !grepl("eta_z", colnames(out$theta_tilde))] new_names <- names(out$par) mark <- grepl("^beta\\[[[:digit:]]+\\]$", new_names) if (QR && ncol(xtemp) > 1) { out$par[mark] <- R_inv %*% out$par[mark] out$theta_tilde[,mark] <- out$theta_tilde[, mark] %*% t(R_inv) } new_names[mark] <- colnames(xtemp) new_names[new_names == "alpha[1]"] <- "(Intercept)" if (Z_true == 1) { new_names[new_names == "omega_int[1]"] <- "(phi)_(Intercept)" mark_z <- grepl("^omega\\[[[:digit:]]+\\]$", new_names) if (QR && ncol(ztemp) > 1) { out$par[mark_z] <- R_inv_z %*% out$par[mark_z] out$theta_tilde[,mark_z] <- out$theta_tilde[, mark_z] %*% t(R_inv_z) } new_names[mark_z] <- paste0("(phi)_", colnames(ztemp)) } else { new_names[new_names == "aux"] <- "(phi)" } names(out$par) <- new_names colnames(out$theta_tilde) <- new_names out$stanfit <- suppressMessages(sampling(stanfit, data = standata, chains = 0)) return(structure(out, prior.info = prior_info, dropped_cols = x_stuff$dropped_cols)) } else { if (algorithm == "sampling") { sampling_args <- set_sampling_args( object = stanfit, prior = prior, user_dots = list(...), user_adapt_delta = adapt_delta, data = standata, pars = pars, show_messages = FALSE) stanfit <- do.call(sampling, sampling_args) } else { # algorithm either "meanfield" or "fullrank" stanfit <- rstan::vb(stanfit, pars = pars, data = standata, algorithm = algorithm, init = 0.001, ...) if (!QR && standata$K > 1) { recommend_QR_for_vb() } } check <- check_stanfit(stanfit) if (!isTRUE(check)) return(standata) if (QR) { if (ncol(xtemp) > 1) { thetas <- extract(stanfit, pars = "beta", inc_warmup = TRUE, permuted = FALSE) betas <- apply(thetas, 1:2, FUN = function(theta) R_inv %*% theta) end <- tail(dim(betas), 1L) for (chain in 1:end) for (param in 1:nrow(betas)) { stanfit@sim$samples[[chain]][[has_intercept + param]] <- if (ncol(xtemp) > 1) betas[param, , chain] else betas[param, chain] } } if (Z_true == 1 & ncol(ztemp) > 1) { thetas_z <- extract(stanfit, pars = "omega", inc_warmup = TRUE, permuted = FALSE) omegas <- apply(thetas_z, 1:2, FUN = function(theta) R_inv_z %*% theta) end_z <- tail(dim(omegas), 1L) for (chain_z in 1:end_z) for (param_z in 1:nrow(omegas)) { sel <- has_intercept + ncol(xtemp) + has_intercept_z + param_z stanfit@sim$samples[[chain_z]][[sel]] <- if (ncol(ztemp) > 1) omegas[param_z, , chain_z] else omegas[param_z, chain_z] } } } if (Z_true == 1) { new_names <- c(if (has_intercept) "(Intercept)", colnames(xtemp), if (has_intercept_z) "(phi)_(Intercept)", paste0("(phi)_", colnames(ztemp)), "mean_PPD", "log-posterior") } else { new_names <- c(if (has_intercept) "(Intercept)", colnames(xtemp), "(phi)", "mean_PPD", "log-posterior") } stanfit@sim$fnames_oi <- new_names return(structure(stanfit, prior.info = prior_info, dropped_cols = x_stuff$dropped_cols)) } } # Create "prior.info" attribute needed for prior_summary() # # @param user_* The user's prior, prior_intercept, prior_covariance, and # prior_options specifications. For prior and prior_intercept these should be # passed in after broadcasting the df/location/scale arguments if necessary. # @param has_intercept T/F, does model have an intercept? # @param has_predictors T/F, does model have predictors? # @param adjusted_prior_* adjusted scales computed if prior_ops$scaled is TRUE # @return A named list with components 'prior', 'prior_intercept', and possibly # 'prior_covariance', each of which itself is a list containing the needed # values for prior_summary. summarize_betareg_prior <- function(user_prior, user_prior_intercept, user_prior_z, user_prior_intercept_z, user_prior_aux, has_phi, has_intercept, has_intercept_z, has_predictors, has_predictors_z, adjusted_prior_scale, adjusted_prior_intercept_scale, adjusted_prior_scale_z, adjusted_prior_intercept_scale_z) { rescaled_coef <- user_prior$prior_autoscale && has_predictors && !is.na(user_prior$prior_dist_name) && !all(user_prior$prior_scale == adjusted_prior_scale) rescaled_coef_z <- user_prior_z$prior_autoscale && has_predictors_z && !is.na(user_prior_z$prior_dist_name) && !all(user_prior_z$prior_scale == adjusted_prior_scale_z) rescaled_int <- user_prior_intercept$prior_autoscale_for_intercept && has_intercept && !is.na(user_prior_intercept$prior_dist_name_for_intercept) && (user_prior_intercept$prior_scale != adjusted_prior_intercept_scale) rescaled_int_z <- user_prior_intercept_z$prior_autoscale_for_intercept && has_intercept_z && !is.na(user_prior_intercept_z$prior_dist_name_for_intercept) && (user_prior_intercept_z$prior_scale != adjusted_prior_intercept_scale_z) if (has_predictors && user_prior$prior_dist_name %in% "t") { if (all(user_prior$prior_df == 1)) { user_prior$prior_dist_name <- "cauchy" } else { user_prior$prior_dist_name <- "student_t" } } if (has_predictors_z && user_prior_z$prior_dist_name %in% "t") { if (all(user_prior_z$prior_df == 1)) { user_prior_z$prior_dist_name <- "cauchy" } else { user_prior_z$prior_dist_name <- "student_t" } } if (has_intercept && user_prior_intercept$prior_dist_name_for_intercept %in% "t") { if (all(user_prior_intercept$prior_df_for_intercept == 1)) { user_prior_intercept$prior_dist_name_for_intercept <- "cauchy" } else { user_prior_intercept$prior_dist_name_for_intercept <- "student_t" } } if (has_intercept_z && user_prior_intercept_z$prior_dist_name_for_intercept %in% "t") { if (all(user_prior_intercept_z$prior_df_for_intercept == 1)) { user_prior_intercept_z$prior_dist_name_for_intercept <- "cauchy" } else { user_prior_intercept_z$prior_dist_name_for_intercept <- "student_t" } } if (has_phi && user_prior_aux$prior_dist_name_for_aux %in% "t") { if (all(user_prior_aux$prior_df_for_aux == 1)) { user_prior_aux$prior_dist_name_for_aux <- "cauchy" } else { user_prior_aux$prior_dist_name_for_aux <- "student_t" } } prior_list <- list( prior = if (!has_predictors) NULL else with(user_prior, list( dist = prior_dist_name, location = prior_mean, scale = prior_scale, adjusted_scale = if (rescaled_coef) adjusted_prior_scale else NULL, df = if (prior_dist_name %in% c("student_t", "hs", "hs_plus", "lasso", "product_normal")) prior_df else NULL )), prior_z = if (!has_predictors_z) NULL else with(user_prior_z, list( dist = prior_dist_name, location = prior_mean, scale = prior_scale, adjusted_scale = if (rescaled_coef_z) adjusted_prior_scale_z else NULL, df = if (prior_dist_name %in% c("student_t", "hs", "hs_plus", "lasso", "product_normal")) prior_df else NULL )), prior_intercept = if (!has_intercept) NULL else with(user_prior_intercept, list( dist = prior_dist_name_for_intercept, location = prior_mean_for_intercept, scale = prior_scale_for_intercept, adjusted_scale = if (rescaled_int) adjusted_prior_intercept_scale else NULL, df = if (prior_dist_name_for_intercept %in% "student_t") prior_df_for_intercept else NULL )), prior_intercept_z = if (!has_intercept_z) NULL else with(user_prior_intercept_z, list( dist = prior_dist_name_for_intercept, location = prior_mean_for_intercept, scale = prior_scale_for_intercept, adjusted_scale = if (rescaled_int_z) adjusted_prior_intercept_scale_z else NULL, df = if (prior_dist_name_for_intercept %in% "student_t") prior_df_for_intercept else NULL )), prior_aux = if (!has_phi) NULL else with(user_prior_aux, list( dist = prior_dist_name_for_aux, location = if (!is.na(prior_dist_name_for_aux) && prior_dist_name_for_aux != "exponential") prior_mean_for_aux else NULL, scale = if (!is.na(prior_dist_name_for_aux) && prior_dist_name_for_aux != "exponential") prior_scale_for_aux else NULL, df = if (!is.na(prior_dist_name_for_aux) && prior_dist_name_for_aux %in% "student_t") prior_df_for_aux else NULL, rate = if (!is.na(prior_dist_name_for_aux) && prior_dist_name_for_aux %in% "exponential") 1 / prior_scale_for_aux else NULL, aux_name = "phi" )) ) return(prior_list) } rstanarm/R/stan_polr.fit.R0000644000176200001440000001555213722762571015233 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #' @rdname stan_polr #' @export #' @param x A design matrix. #' @param y A response variable, which must be a (preferably ordered) factor. #' @param wt A numeric vector (possibly \code{NULL}) of observation weights. #' @param offset A numeric vector (possibly \code{NULL}) of offsets. #' #' @importFrom utils head tail stan_polr.fit <- function(x, y, wt = NULL, offset = NULL, method = c("logistic", "probit", "loglog", "cloglog", "cauchit"), ..., prior = R2(stop("'location' must be specified")), prior_counts = dirichlet(1), shape = NULL, rate = NULL, prior_PD = FALSE, algorithm = c("sampling", "meanfield", "fullrank"), adapt_delta = NULL, do_residuals = algorithm == "sampling") { algorithm <- match.arg(algorithm) method <- match.arg(method) all_methods <- c("logistic", "probit", "loglog", "cloglog", "cauchit") link <- which(all_methods == method) if (!is.factor(y)) stop("'y' must be a factor.") y_lev <- levels(y) J <- length(y_lev) y <- as.integer(y) if (colnames(x)[1] == "(Intercept)") x <- x[, -1, drop=FALSE] xbar <- as.array(colMeans(x)) X <- sweep(x, 2, xbar, FUN = "-") cn <- colnames(X) decomposition <- qr(X) Q <- qr.Q(decomposition) R_inv <- qr.solve(decomposition, Q) X <- Q colnames(X) <- cn xbar <- c(xbar %*% R_inv) if (length(xbar) == 1) dim(xbar) <- 1L has_weights <- isTRUE(length(wt) > 0 && !all(wt == 1)) if (!has_weights) weights <- double(0) has_offset <- isTRUE(length(offset) > 0 && !all(offset == 0)) if (!has_offset) offset <- double(0) if (length(prior)) { regularization <- make_eta(prior$location, prior$what, K = ncol(x)) prior_dist <- 1L } else { regularization <- 0 prior_dist <- 0L } if (!length(prior_counts)) { prior_counts <- rep(1, J) } else { prior_counts <- maybe_broadcast(prior_counts$concentration, J) } if (is.null(shape)) { shape <- 0L } else { if (J > 2) stop("'shape' must be NULL when there are more than 2 outcome categories.") if (!is.numeric(shape) || shape <= 0) stop("'shape' must be positive") } if (is.null(rate)) { rate <- 0L } else { if (J > 2) stop("'rate' must be NULL when there are more than 2 outcome categories.") if (!is.numeric(rate) || rate <= 0) stop("'rate' must be positive") } is_skewed <- as.integer(shape > 0 & rate > 0) if (is_skewed && method != "logistic") stop("Skewed models are only supported when method = 'logistic'.") N <- nrow(X) K <- ncol(X) X <- array(X, dim = c(1L, N, K)) standata <- nlist(J, N, K, X, xbar, y, prior_PD, link, has_weights, wt, has_offset, offset_ = offset, prior_dist, regularization, prior_counts, is_skewed, shape, rate, # the rest of these are not actually used has_intercept = 0L, prior_dist_for_intercept = 0L, prior_dist_for_aux = 0L, dense_X = TRUE, # sparse is not a viable option nnz_X = 0L, w_X = double(0), v_X = integer(0), u_X = integer(0), prior_dist_for_smooth = 0L, K_smooth = 0L, S = matrix(NA_real_, N, 0L), smooth_map = integer(0), compute_mean_PPD = FALSE) stanfit <- stanmodels$polr if (J > 2) { pars <- c("beta", "zeta", "mean_PPD") } else { pars <- c("zeta", "beta", if (is_skewed) "alpha", "mean_PPD") } if (do_residuals) { standata$do_residuals <- isTRUE(J > 2) && !prior_PD } else { standata$do_residuals <- FALSE } if (algorithm == "sampling") { sampling_args <- set_sampling_args( object = stanfit, prior = prior, user_dots = list(...), user_adapt_delta = adapt_delta, data = standata, pars = pars, show_messages = FALSE) stanfit <- do.call(sampling, sampling_args) } else { stanfit <- rstan::vb(stanfit, pars = pars, data = standata, algorithm = algorithm, ...) } check_stanfit(stanfit) thetas <- extract(stanfit, pars = "beta", inc_warmup = TRUE, permuted = FALSE) betas <- apply(thetas, 1:2, FUN = function(theta) R_inv %*% theta) if (K == 1) for (chain in 1:tail(dim(betas), 1)) { stanfit@sim$samples[[chain]][[(J == 2) + 1L]] <- betas[,chain] } else for (chain in 1:tail(dim(betas), 1)) for (param in 1:nrow(betas)) { stanfit@sim$samples[[chain]][[(J == 2) + param]] <- betas[param, , chain] } if (J > 2) { new_names <- c(colnames(x), paste(head(y_lev, -1), tail(y_lev, -1), sep = "|"), paste("mean_PPD", y_lev, sep = ":"), "log-posterior") } else { new_names <- c("(Intercept)", colnames(x), if (is_skewed) "alpha", "mean_PPD", "log-posterior") } stanfit@sim$fnames_oi <- new_names prior_info <- summarize_polr_prior(prior, prior_counts, shape, rate) structure(stanfit, prior.info = prior_info) } # internal ---------------------------------------------------------------- # Create "prior.info" attribute needed for prior_summary() # # @param prior, prior_counts User's prior and prior_counts specifications # @return A named list with elements 'prior' and 'prior_counts' containing # the values needed for prior_summary summarize_polr_prior <- function(prior, prior_counts, shape=NULL, rate=NULL) { flat <- !length(prior) prior_list <- list( prior = list( dist = ifelse(flat, NA, "R2"), location = ifelse(flat, NA, prior$location), what = ifelse(flat, NA, prior$what) ), prior_counts = list( dist = "dirichlet", concentration = prior_counts ) ) if ((!is.null(shape) && shape > 0) && (!is.null(rate) && rate > 0)) prior_list$scobit_exponent <- list(dist = "gamma", shape = shape, rate = rate) return(prior_list) } rstanarm/R/stan_jm.R0000644000176200001440000011042314370470372014070 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2013, 2014, 2015, 2016, 2017 Trustees of Columbia University # Copyright (C) 2016, 2017 Sam Brilleman # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #' Bayesian joint longitudinal and time-to-event models via Stan #' #' \if{html}{\figure{stanlogo.png}{options: width="25" alt="https://mc-stan.org/about/logo/"}} #' Fits a shared parameter joint model for longitudinal and time-to-event #' (e.g. survival) data under a Bayesian framework using Stan. #' #' @export #' @template args-dots #' @template args-prior_PD #' @template args-algorithm #' @template args-adapt_delta #' @template args-max_treedepth #' @template args-QR #' @template args-sparse #' #' @param formulaLong A two-sided linear formula object describing both the #' fixed-effects and random-effects parts of the longitudinal submodel, #' similar in vein to formula specification in the \strong{lme4} package #' (see \code{\link[lme4]{glmer}} or the \strong{lme4} vignette for details). #' Note however that the double bar (\code{||}) notation is not allowed #' when specifying the random-effects parts of the formula, and neither #' are nested grouping factors (e.g. \code{(1 | g1/g2))} or #' \code{(1 | g1:g2)}, where \code{g1}, \code{g2} are grouping factors. #' Offset terms can also be included in the model formula. #' For a multivariate joint model (i.e. more than one longitudinal marker) #' this should be a list of such formula objects, with each element #' of the list providing the formula for one of the longitudinal submodels. #' @param dataLong A data frame containing the variables specified in #' \code{formulaLong}. If fitting a multivariate joint model, then this can #' be either a single data frame which contains the data for all #' longitudinal submodels, or it can be a list of data frames where each #' element of the list provides the data for one of the longitudinal #' submodels. #' @param formulaEvent A two-sided formula object describing the event #' submodel. The left hand side of the formula should be a \code{Surv()} #' object. See \code{\link[survival]{Surv}}. #' @param dataEvent A data frame containing the variables specified in #' \code{formulaEvent}. #' @param time_var A character string specifying the name of the variable #' in \code{dataLong} which represents time. #' @param id_var A character string specifying the name of the variable in #' \code{dataLong} which distinguishes between individuals. This can be #' left unspecified if there is only one grouping factor (which is assumed #' to be the individual). If there is more than one grouping factor (i.e. #' clustering beyond the level of the individual) then the \code{id_var} #' argument must be specified. #' @param family The family (and possibly also the link function) for the #' longitudinal submodel(s). See \code{\link[lme4]{glmer}} for details. #' If fitting a multivariate joint model, then this can optionally be a #' list of families, in which case each element of the list specifies the #' family for one of the longitudinal submodels. #' @param assoc A character string or character vector specifying the joint #' model association structure. Possible association structures that can #' be used include: "etavalue" (the default); "etaslope"; "etaauc"; #' "muvalue"; "muslope"; "muauc"; "shared_b"; "shared_coef"; or "null". #' These are described in the \strong{Details} section below. For a multivariate #' joint model, different association structures can optionally be used for #' each longitudinal submodel by specifying a list of character #' vectors, with each element of the list specifying the desired association #' structure for one of the longitudinal submodels. Specifying \code{assoc = NULL} #' will fit a joint model with no association structure (equivalent #' to fitting separate longitudinal and time-to-event models). It is also #' possible to include interaction terms between the association term #' ("etavalue", "etaslope", "muvalue", "muslope") and observed data/covariates. #' It is also possible, when fitting a multivariate joint model, to include #' interaction terms between the association terms ("etavalue" or "muvalue") #' corresponding to the different longitudinal outcomes. See the #' \strong{Details} section as well as the \strong{Examples} below. #' @param lag_assoc A non-negative scalar specifying the time lag that should be #' used for the association structure. That is, the hazard of the event at #' time \emph{t} will be assumed to be associated with the value/slope/auc of #' the longitudinal marker at time \emph{t-u}, where \emph{u} is the time lag. #' If fitting a multivariate joint model, then a different time lag can be used #' for each longitudinal marker by providing a numeric vector of lags, otherwise #' if a scalar is provided then the specified time lag will be used for all #' longitudinal markers. Note however that only one time lag can be specified #' for linking each longitudinal marker to the #' event, and that that time lag will be used for all association structure #' types (e.g. \code{"etavalue"}, \code{"etaslope"}, \code{"etaauc"}, #' \code{"muvalue"}, etc) that are specified for that longitudinal marker in #' the \code{assoc} argument. #' @param grp_assoc Character string specifying the method for combining information #' across lower level units clustered within an individual when forming the #' association structure. This is only relevant when a grouping factor is #' specified in \code{formulaLong} that corresponds to clustering within #' individuals. This can be specified as either \code{"sum"}, \code{mean}, #' \code{"min"} or \code{"max"}. For example, specifying \code{grp_assoc = "sum"} #' indicates that the association structure should be based on a summation across #' the lower level units clustered within an individual, or specifying #' \code{grp_assoc = "mean"} indicates that the association structure #' should be based on the mean (i.e. average) taken across the lower level #' units clustered within an individual. #' So, for example, specifying \code{assoc = "muvalue"} #' and \code{grp_assoc = "sum"} would mean that the log hazard at time #' \emph{t} for individual \emph{i} would be linearly related to the sum of #' the expected values at time \emph{t} for each of the lower level #' units (which may be for example tumor lesions) clustered within that #' individual. #' @param scale_assoc A non-zero numeric value specifying an optional scaling #' parameter for the association structure. This multiplicatively scales the #' value/slope/auc of the longitudinal marker by \code{scale_assoc} within the #' event submodel. When fitting a multivariate joint model, a scaling parameter #' must be specified for each longitudinal submodel using a vector of numeric #' values. Note that only one scaling parameter can be specified for each #' longitudinal submodel, and it will be used for all association structure #' types (e.g. \code{"etavalue"}, \code{"etaslope"}, \code{"etaauc"}, #' \code{"muvalue"}, etc) that are specified for that longitudinal marker in #' the \code{assoc} argument. #' @param basehaz A character string indicating which baseline hazard to use #' for the event submodel. Options are a B-splines approximation estimated #' for the log baseline hazard (\code{"bs"}, the default), a Weibull #' baseline hazard (\code{"weibull"}), or a piecewise #' constant baseline hazard (\code{"piecewise"}). (Note however that there #' is currently limited post-estimation functionality available for #' models estimated using a piecewise constant baseline hazard). #' @param basehaz_ops A named list specifying options related to the baseline #' hazard. Currently this can include: \cr #' \describe{ #' \item{\code{df}}{A positive integer specifying the degrees of freedom #' for the B-splines if \code{basehaz = "bs"}, or the number of #' intervals used for the piecewise constant baseline hazard if #' \code{basehaz = "piecewise"}. The default is 6.} #' \item{\code{knots}}{An optional numeric vector specifying the internal knot #' locations for the B-splines if \code{basehaz = "bs"}, or the #' internal cut-points for defining intervals of the piecewise constant #' baseline hazard if \code{basehaz = "piecewise"}. Knots cannot be #' specified if \code{df} is specified. If not specified, then the #' default is to use \code{df - 4} knots if \code{basehaz = "bs"}, #' or \code{df - 1} knots if \code{basehaz = "piecewise"}, which are #' placed at equally spaced percentiles of the distribution of #' observed event times.} #' } #' @param epsilon The half-width of the central difference used to numerically #' calculate the derivate when the \code{"etaslope"} association structure #' is used. #' @param qnodes The number of nodes to use for the Gauss-Kronrod quadrature #' that is used to evaluate the cumulative hazard in the likelihood function. #' Options are 15 (the default), 11 or 7. #' @param weights Experimental and should be used with caution. The #' user can optionally supply a 2-column data frame containing a set of #' 'prior weights' to be used in the estimation process. The data frame should #' contain two columns: the first containing the IDs for each individual, and #' the second containing the corresponding weights. The data frame should only #' have one row for each individual; that is, weights should be constant #' within individuals. #' @param init The method for generating the initial values for the MCMC. #' The default is \code{"prefit"}, which uses those obtained from #' fitting separate longitudinal and time-to-event models prior to #' fitting the joint model. The separate longitudinal model is a #' (possibly multivariate) generalised linear mixed #' model estimated using variational bayes. This is achieved via the #' \code{\link{stan_mvmer}} function with \code{algorithm = "meanfield"}. #' The separate Cox model is estimated using \code{\link[survival]{coxph}}. #' This is achieved #' using the and time-to-event models prior #' to fitting the joint model. The separate models are estimated using the #' \code{\link[lme4]{glmer}} and \code{\link[survival]{coxph}} functions. #' This should provide reasonable initial values which should aid the #' MCMC sampler. Parameters that cannot be obtained from #' fitting separate longitudinal and time-to-event models are initialised #' using the "random" method for \code{\link[rstan]{stan}}. #' However it is recommended that any final analysis should ideally #' be performed with several MCMC chains each initiated from a different #' set of initial values; this can be obtained by setting #' \code{init = "random"}. In addition, other possibilities for specifying #' \code{init} are the same as those described for \code{\link[rstan]{stan}}. #' @param priorLong,priorEvent,priorEvent_assoc The prior distributions for the #' regression coefficients in the longitudinal submodel(s), event submodel, #' and the association parameter(s). Can be a call to one of the various functions #' provided by \pkg{rstanarm} for specifying priors. The subset of these functions #' that can be used for the prior on the coefficients can be grouped into several #' "families": #' #' \tabular{ll}{ #' \strong{Family} \tab \strong{Functions} \cr #' \emph{Student t family} \tab \code{normal}, \code{student_t}, \code{cauchy} \cr #' \emph{Hierarchical shrinkage family} \tab \code{hs}, \code{hs_plus} \cr #' \emph{Laplace family} \tab \code{laplace}, \code{lasso} \cr #' } #' #' See the \link[=priors]{priors help page} for details on the families and #' how to specify the arguments for all of the functions in the table above. #' To omit a prior ---i.e., to use a flat (improper) uniform prior--- #' \code{prior} can be set to \code{NULL}, although this is rarely a good #' idea. #' #' \strong{Note:} Unless \code{QR=TRUE}, if \code{prior} is from the Student t #' family or Laplace family, and if the \code{autoscale} argument to the #' function used to specify the prior (e.g. \code{\link{normal}}) is left at #' its default and recommended value of \code{TRUE}, then the default or #' user-specified prior scale(s) may be adjusted internally based on the scales #' of the predictors. See the \link[=priors]{priors help page} for details on #' the rescaling and the \code{\link{prior_summary}} function for a summary of #' the priors used for a particular model. #' @param priorLong_intercept,priorEvent_intercept The prior distributions #' for the intercepts in the longitudinal submodel(s) and event submodel. #' Can be a call to \code{normal}, \code{student_t} or #' \code{cauchy}. See the \link[=priors]{priors help page} for details on #' these functions. To omit a prior on the intercept ---i.e., to use a flat #' (improper) uniform prior--- \code{prior_intercept} can be set to #' \code{NULL}. #' #' \strong{Note:} The prior distribution for the intercept is set so it #' applies to the value when all predictors are centered. Moreover, #' note that a prior is only placed on the intercept for the event submodel #' when a Weibull baseline hazard has been specified. For the B-splines and #' piecewise constant baseline hazards there is not intercept parameter that #' is given a prior distribution; an intercept parameter will be shown in #' the output for the fitted model, but this just corresponds to the #' necessary post-estimation adjustment in the linear predictor due to the #' centering of the predictiors in the event submodel. #' #' @param priorLong_aux The prior distribution for the "auxiliary" parameters #' in the longitudinal submodels (if applicable). #' The "auxiliary" parameter refers to a different parameter #' depending on the \code{family}. For Gaussian models \code{priorLong_aux} #' controls \code{"sigma"}, the error #' standard deviation. For negative binomial models \code{priorLong_aux} controls #' \code{"reciprocal_dispersion"}, which is similar to the #' \code{"size"} parameter of \code{\link[stats:NegBinomial]{rnbinom}}: #' smaller values of \code{"reciprocal_dispersion"} correspond to #' greater dispersion. For gamma models \code{priorLong_aux} sets the prior on #' to the \code{"shape"} parameter (see e.g., #' \code{\link[stats:GammaDist]{rgamma}}), and for inverse-Gaussian models it is the #' so-called \code{"lambda"} parameter (which is essentially the reciprocal of #' a scale parameter). Binomial and Poisson models do not have auxiliary #' parameters. #' #' \code{priorLong_aux} can be a call to \code{exponential} to #' use an exponential distribution, or \code{normal}, \code{student_t} or #' \code{cauchy}, which results in a half-normal, half-t, or half-Cauchy #' prior. See \code{\link{priors}} for details on these functions. To omit a #' prior ---i.e., to use a flat (improper) uniform prior--- set #' \code{priorLong_aux} to \code{NULL}. #' #' If fitting a multivariate joint model, you have the option to #' specify a list of prior distributions, however the elements of the list #' that correspond to any longitudinal submodel which does not have an #' auxiliary parameter will be ignored. #' @param priorEvent_aux The prior distribution for the "auxiliary" parameters #' in the event submodel. The "auxiliary" parameters refers to different #' parameters depending on the baseline hazard. For \code{basehaz = "weibull"} #' the auxiliary parameter is the Weibull shape parameter. For #' \code{basehaz = "bs"} the auxiliary parameters are the coefficients for the #' B-spline approximation to the log baseline hazard. #' For \code{basehaz = "piecewise"} the auxiliary parameters are the piecewise #' estimates of the log baseline hazard. #' @param prior_covariance Cannot be \code{NULL}; see \code{\link{priors}} for #' more information about the prior distributions on covariance matrices. #' Note however that the default prior for covariance matrices in #' \code{stan_jm} is slightly different to that in \code{\link{stan_glmer}} #' (the details of which are described on the \code{\link{priors}} page). #' #' @details The \code{stan_jm} function can be used to fit a joint model (also #' known as a shared parameter model) for longitudinal and time-to-event data #' under a Bayesian framework. The underlying #' estimation is carried out using the Bayesian C++ package Stan #' (\url{https://mc-stan.org/}). \cr #' \cr #' The joint model may be univariate (with only one longitudinal submodel) or #' multivariate (with more than one longitudinal submodel). #' For the longitudinal submodel a (possibly multivariate) generalised linear #' mixed model is assumed with any of the \code{\link[stats]{family}} choices #' allowed by \code{\link[lme4]{glmer}}. If a multivariate joint model is specified #' (by providing a list of formulas in the \code{formulaLong} argument), then #' the multivariate longitudinal submodel consists of a multivariate generalized #' linear model (GLM) with group-specific terms that are assumed to be correlated #' across the different GLM submodels. That is, within #' a grouping factor (for example, patient ID) the group-specific terms are #' assumed to be correlated across the different GLM submodels. It is #' possible to specify a different outcome type (for example a different #' family and/or link function) for each of the GLM submodels, by providing #' a list of \code{\link[stats]{family}} objects in the \code{family} #' argument. Multi-level #' clustered data are allowed, and that additional clustering can occur at a #' level higher than the individual-level (e.g. patients clustered within #' clinics), or at a level lower than the individual-level (e.g. tumor lesions #' clustered within patients). If the clustering occurs at a level lower than #' the individual, then the user needs to indicate how the lower level #' clusters should be handled when forming the association structure between #' the longitudinal and event submodels (see the \code{grp_assoc} argument #' described above). \cr #' \cr #' For the event submodel a parametric #' proportional hazards model is assumed. The baseline hazard can be estimated #' using either a cubic B-splines approximation (\code{basehaz = "bs"}, the #' default), a Weibull distribution (\code{basehaz = "weibull"}), or a #' piecewise constant baseline hazard (\code{basehaz = "piecewise"}). #' If the B-spline or piecewise constant baseline hazards are used, #' then the degrees of freedom or the internal knot locations can be #' (optionally) specified. If #' the degrees of freedom are specified (through the \code{df} argument) then #' the knot locations are automatically generated based on the #' distribution of the observed event times (not including censoring times). #' Otherwise internal knot locations can be specified #' directly through the \code{knots} argument. If neither \code{df} or #' \code{knots} is specified, then the default is to set \code{df} equal to 6. #' It is not possible to specify both \code{df} and \code{knots}. \cr #' \cr #' Time-varying covariates are allowed in both the #' longitudinal and event submodels. These should be specified in the data #' in the same way as they normally would when fitting a separate #' longitudinal model using \code{\link[lme4]{lmer}} or a separate #' time-to-event model using \code{\link[survival]{coxph}}. These time-varying #' covariates should be exogenous in nature, otherwise they would perhaps #' be better specified as an additional outcome (i.e. by including them as an #' additional longitudinal outcome in the joint model). \cr #' \cr #' Bayesian estimation of the joint model is performed via MCMC. The Bayesian #' model includes independent priors on the #' regression coefficients for both the longitudinal and event submodels, #' including the association parameter(s) (in much the same way as the #' regression parameters in \code{\link{stan_glm}}) and #' priors on the terms of a decomposition of the covariance matrices of the #' group-specific parameters. #' See \code{\link{priors}} for more information about the priors distributions #' that are available. \cr #' \cr #' Gauss-Kronrod quadrature is used to numerically evaluate the integral #' over the cumulative hazard in the likelihood function for the event submodel. #' The accuracy of the numerical approximation can be controlled using the #' number of quadrature nodes, specified through the \code{qnodes} #' argument. Using a higher number of quadrature nodes will result in a more #' accurate approximation. #' #' \subsection{Association structures}{ #' The association structure for the joint model can be based on any of the #' following parameterisations: #' \itemize{ #' \item current value of the linear predictor in the #' longitudinal submodel (\code{"etavalue"}) #' \item first derivative (slope) of the linear predictor in the #' longitudinal submodel (\code{"etaslope"}) #' \item the area under the curve of the linear predictor in the #' longitudinal submodel (\code{"etaauc"}) #' \item current expected value of the longitudinal submodel #' (\code{"muvalue"}) #' \item the area under the curve of the expected value from the #' longitudinal submodel (\code{"muauc"}) #' \item shared individual-level random effects (\code{"shared_b"}) #' \item shared individual-level random effects which also incorporate #' the corresponding fixed effect as well as any corresponding #' random effects for clustering levels higher than the individual) #' (\code{"shared_coef"}) #' \item interactions between association terms and observed data/covariates #' (\code{"etavalue_data"}, \code{"etaslope_data"}, \code{"muvalue_data"}, #' \code{"muslope_data"}). These are described further below. #' \item interactions between association terms corresponding to different #' longitudinal outcomes in a multivariate joint model #' (\code{"etavalue_etavalue(#)"}, \code{"etavalue_muvalue(#)"}, #' \code{"muvalue_etavalue(#)"}, \code{"muvalue_muvalue(#)"}). These #' are described further below. #' \item no association structure (equivalent to fitting separate #' longitudinal and event models) (\code{"null"} or \code{NULL}) #' } #' More than one association structure can be specified, however, #' not all possible combinations are allowed. #' Note that for the lagged association structures baseline values (time = 0) #' are used for the instances #' where the time lag results in a time prior to baseline. When using the #' \code{"etaauc"} or \code{"muauc"} association structures, the area under #' the curve is evaluated using Gauss-Kronrod quadrature with 15 quadrature #' nodes. By default, \code{"shared_b"} and \code{"shared_coef"} contribute #' all random effects to the association structure; however, a subset of the #' random effects can be chosen by specifying their indices between parentheses #' as a suffix, for example, \code{"shared_b(1)"} or \code{"shared_b(1:3)"} or #' \code{"shared_b(1,2,4)"}, and so on. \cr #' \cr #' In addition, several association terms (\code{"etavalue"}, \code{"etaslope"}, #' \code{"muvalue"}, \code{"muslope"}) can be interacted with observed #' data/covariates. To do this, use the association term's main handle plus a #' suffix of \code{"_data"} then followed by the model matrix formula in #' parentheses. For example if we had a variable in our dataset for gender #' named \code{sex} then we might want to obtain different estimates for the #' association between the current slope of the marker and the risk of the #' event for each gender. To do this we would specify #' \code{assoc = c("etaslope", "etaslope_data(~ sex)")}. \cr #' \cr #' It is also possible, when fitting a multivariate joint model, to include #' interaction terms between the association terms themselves (this only #' applies for interacting \code{"etavalue"} or \code{"muvalue"}). For example, #' if we had a joint model with two longitudinal markers, we could specify #' \code{assoc = list(c("etavalue", "etavalue_etavalue(2)"), "etavalue")}. #' The first element of list says we want to use the value of the linear #' predictor for the first marker, as well as it's interaction with the #' value of the linear predictor for the second marker. The second element of #' the list says we want to also include the expected value of the second marker #' (i.e. as a "main effect"). Therefore, the linear predictor for the event #' submodel would include the "main effects" for each marker as well as their #' interaction. \cr #' \cr #' There are additional examples in the \strong{Examples} section below. #' } #' #' @return A \link[=stanreg-objects]{stanjm} object is returned. #' #' @seealso \code{\link{stanreg-objects}}, \code{\link{stanmvreg-methods}}, #' \code{\link{print.stanmvreg}}, \code{\link{summary.stanmvreg}}, #' \code{\link{posterior_traj}}, \code{\link{posterior_survfit}}, #' \code{\link{posterior_predict}}, \code{\link{posterior_interval}}, #' \code{\link{pp_check}}, \code{\link{ps_check}}, \code{\link{stan_mvmer}}. #' #' @examples #' if (.Platform$OS.type != "windows" || .Platform$r_arch !="i386") { #' \donttest{ #' #' ##### #' # Univariate joint model, with association structure based on the #' # current value of the linear predictor #' f1 <- stan_jm(formulaLong = logBili ~ year + (1 | id), #' dataLong = pbcLong, #' formulaEvent = Surv(futimeYears, death) ~ sex + trt, #' dataEvent = pbcSurv, #' time_var = "year", #' # this next line is only to keep the example small in size! #' chains = 1, cores = 1, seed = 12345, iter = 1000) #' print(f1) #' summary(f1) #' #' ##### #' # Univariate joint model, with association structure based on the #' # current value and slope of the linear predictor #' f2 <- stan_jm(formulaLong = logBili ~ year + (year | id), #' dataLong = pbcLong, #' formulaEvent = Surv(futimeYears, death) ~ sex + trt, #' dataEvent = pbcSurv, #' assoc = c("etavalue", "etaslope"), #' time_var = "year", #' chains = 1, cores = 1, seed = 12345, iter = 1000) #' print(f2) #' #' ##### #' # Univariate joint model, with association structure based on the #' # lagged value of the linear predictor, where the lag is 2 time #' # units (i.e. 2 years in this example) #' f3 <- stan_jm(formulaLong = logBili ~ year + (1 | id), #' dataLong = pbcLong, #' formulaEvent = Surv(futimeYears, death) ~ sex + trt, #' dataEvent = pbcSurv, #' time_var = "year", #' assoc = "etavalue", lag_assoc = 2, #' chains = 1, cores = 1, seed = 12345, iter = 1000) #' print(f3) #' #' ##### #' # Univariate joint model, where the association structure includes #' # interactions with observed data. Here we specify that we want to use #' # an association structure based on the current value of the linear #' # predictor from the longitudinal submodel (i.e. "etavalue"), but we #' # also want to interact this with the treatment covariate (trt) from #' # pbcLong data frame, so that we can estimate a different association #' # parameter (i.e. estimated effect of log serum bilirubin on the log #' # hazard of death) for each treatment group #' f4 <- stan_jm(formulaLong = logBili ~ year + (1 | id), #' dataLong = pbcLong, #' formulaEvent = Surv(futimeYears, death) ~ sex + trt, #' dataEvent = pbcSurv, #' time_var = "year", #' assoc = c("etavalue", "etavalue_data(~ trt)"), #' chains = 1, cores = 1, seed = 12345, iter = 1000) #' print(f4) #' #' ###### #' # Multivariate joint model, with association structure based #' # on the current value and slope of the linear predictor in the #' # first longitudinal submodel and the area under the marker #' # trajectory for the second longitudinal submodel #' mv1 <- stan_jm( #' formulaLong = list( #' logBili ~ year + (1 | id), #' albumin ~ sex + year + (year | id)), #' dataLong = pbcLong, #' formulaEvent = Surv(futimeYears, death) ~ sex + trt, #' dataEvent = pbcSurv, #' assoc = list(c("etavalue", "etaslope"), "etaauc"), #' time_var = "year", #' chains = 1, cores = 1, seed = 12345, iter = 100) #' print(mv1) #' #' ##### #' # Multivariate joint model, where the association structure is formed by #' # including the expected value of each longitudinal marker (logBili and #' # albumin) in the linear predictor of the event submodel, as well as their #' # interaction effect (i.e. the interaction between the two "etavalue" terms). #' # Note that whether such an association structure based on a marker by #' # marker interaction term makes sense will depend on the context of your #' # application -- here we just show it for demostration purposes). #' mv2 <- stan_jm( #' formulaLong = list( #' logBili ~ year + (1 | id), #' albumin ~ sex + year + (year | id)), #' dataLong = pbcLong, #' formulaEvent = Surv(futimeYears, death) ~ sex + trt, #' dataEvent = pbcSurv, #' assoc = list(c("etavalue", "etavalue_etavalue(2)"), "etavalue"), #' time_var = "year", #' chains = 1, cores = 1, seed = 12345, iter = 100) #' #' ##### #' # Multivariate joint model, with one bernoulli marker and one #' # Gaussian marker. We will artificially create the bernoulli #' # marker by dichotomising log serum bilirubin #' pbcLong$ybern <- as.integer(pbcLong$logBili >= mean(pbcLong$logBili)) #' mv3 <- stan_jm( #' formulaLong = list( #' ybern ~ year + (1 | id), #' albumin ~ sex + year + (year | id)), #' dataLong = pbcLong, #' formulaEvent = Surv(futimeYears, death) ~ sex + trt, #' dataEvent = pbcSurv, #' family = list(binomial, gaussian), #' time_var = "year", #' chains = 1, cores = 1, seed = 12345, iter = 1000) #' } #' } #' stan_jm <- function(formulaLong, dataLong, formulaEvent, dataEvent, time_var, id_var, family = gaussian, assoc = "etavalue", lag_assoc = 0, grp_assoc, scale_assoc = NULL, epsilon = 1E-5, basehaz = c("bs", "weibull", "piecewise"), basehaz_ops, qnodes = 15, init = "prefit", weights, priorLong = normal(autoscale=TRUE), priorLong_intercept = normal(autoscale=TRUE), priorLong_aux = cauchy(0, 5, autoscale=TRUE), priorEvent = normal(autoscale=TRUE), priorEvent_intercept = normal(autoscale=TRUE), priorEvent_aux = cauchy(autoscale=TRUE), priorEvent_assoc = normal(autoscale=TRUE), prior_covariance = lkj(autoscale=TRUE), prior_PD = FALSE, algorithm = c("sampling", "meanfield", "fullrank"), adapt_delta = NULL, max_treedepth = 10L, QR = FALSE, sparse = FALSE, ...) { #----------------------------- # Pre-processing of arguments #----------------------------- # Set seed if specified dots <- list(...) if ("seed" %in% names(dots)) set.seed(dots$seed) algorithm <- match.arg(algorithm) basehaz <- match.arg(basehaz) if (missing(basehaz_ops)) basehaz_ops <- NULL if (missing(weights)) weights <- NULL if (missing(id_var)) id_var <- NULL if (missing(time_var)) time_var <- NULL if (missing(grp_assoc)) grp_assoc <- NULL if (!is.null(weights)) stop("'weights' are not yet implemented.") if (QR) stop("'QR' decomposition is not yet implemented.") if (sparse) stop("'sparse' option is not yet implemented.") if (is.null(time_var)) stop("'time_var' must be specified.") # Formula formulaLong <- validate_arg(formulaLong, "formula"); M <- length(formulaLong) if (M > 3L) stop("'stan_jm' is currently limited to a maximum of 3 longitudinal outcomes.") # Data dataLong <- validate_arg(dataLong, "data.frame", validate_length = M) dataEvent <- as.data.frame(dataEvent) # Family ok_family_classes <- c("function", "family", "character") ok_families <- c("binomial", "gaussian", "Gamma", "inverse.gaussian", "poisson", "neg_binomial_2") family <- validate_arg(family, ok_family_classes, validate_length = M) family <- lapply(family, validate_famlink, ok_families) # Assoc ok_assoc_classes <- c("NULL", "character") assoc <- validate_arg(assoc, ok_assoc_classes, validate_length = M) # Is priorLong* already a list? priorLong <- broadcast_prior(priorLong, M) priorLong_intercept <- broadcast_prior(priorLong_intercept, M) priorLong_aux <- broadcast_prior(priorLong_aux, M) #----------- # Fit model #----------- stanfit <- stan_jm.fit(formulaLong = formulaLong, dataLong = dataLong, formulaEvent = formulaEvent, dataEvent = dataEvent, time_var = time_var, id_var = id_var, family = family, assoc = assoc, lag_assoc = lag_assoc, grp_assoc = grp_assoc, epsilon = epsilon, basehaz = basehaz, basehaz_ops = basehaz_ops, qnodes = qnodes, init = init, weights = weights, scale_assoc = scale_assoc, priorLong = priorLong, priorLong_intercept = priorLong_intercept, priorLong_aux = priorLong_aux, priorEvent = priorEvent, priorEvent_intercept = priorEvent_intercept, priorEvent_aux = priorEvent_aux, priorEvent_assoc = priorEvent_assoc, prior_covariance = prior_covariance, prior_PD = prior_PD, algorithm = algorithm, adapt_delta = adapt_delta, max_treedepth = max_treedepth, QR = QR, sparse = sparse, ...) if (algorithm != "optimizing" && !is(stanfit, "stanfit")) return(stanfit) y_mod <- attr(stanfit, "y_mod") e_mod <- attr(stanfit, "e_mod") a_mod <- attr(stanfit, "a_mod") cnms <- attr(stanfit, "cnms") flevels <- attr(stanfit, "flevels") assoc <- attr(stanfit, "assoc") scale_assoc <- attr(stanfit, "scale_assoc") id_var <- attr(stanfit, "id_var") basehaz <- attr(stanfit, "basehaz") grp_stuff <- attr(stanfit, "grp_stuff") prior_info <- attr(stanfit, "prior_info") stanfit <- drop_attributes(stanfit, "y_mod", "e_mod", "a_mod", "cnms", "flevels", "assoc", "id_var", "basehaz", "grp_stuff", "prior_info","scale_assoc") terms <- c(fetch(y_mod, "terms"), list(terms(e_mod$mod))) n_yobs <- fetch_(y_mod, "x", "N") n_grps <- sapply(flevels, n_distinct) n_subjects <- e_mod$Npat fit <- nlist(stanfit, formula = c(formulaLong, formulaEvent), family, id_var, time_var, weights, scale_assoc, qnodes, basehaz, assoc, M, cnms, flevels, n_grps, n_subjects, n_yobs, epsilon, algorithm, terms, glmod = y_mod, survmod = e_mod, assocmod = a_mod, grp_stuff, dataLong, dataEvent, prior.info = prior_info, stan_function = "stan_jm", call = match.call(expand.dots = TRUE)) out <- stanmvreg(fit) return(out) } rstanarm/R/stanreg_list.R0000644000176200001440000002412514370470372015136 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #' Create lists of fitted model objects, combine them, or append new models to #' existing lists of models. #' #' @export #' @param ... Objects to combine into a \code{"stanreg_list"}, #' \code{"stanmvreg_list"}, or \code{"stanjm_list"}. Can be fitted model #' objects, existing \code{"stan*_list"} objects to combine, or one existing #' \code{"stan*_list"} object followed by fitted model objects to append to #' the list. #' @param model_names Optionally, a character vector of model names. If not #' specified then the names are inferred from the name of the objects passed #' in via \code{...}. These model names are used, for example, when printing #' the results of the \code{loo_compare.stanreg_list} and #' \code{loo_model_weights.stanreg_list} methods. #' @return A list of class \code{"stanreg_list"}, \code{"stanmvreg_list"}, or #' \code{"stanjm_list"}, containing the fitted model objects and some metadata #' stored as attributes. #' #' @seealso \code{\link{loo_model_weights}} for usage of \code{stanreg_list}. #' stanreg_list <- function(..., model_names = NULL) { mods <- list(...) names(mods) <- stanreg_list_names( model_names, n_models = length(mods), call_dots = match.call(expand.dots = FALSE)$... ) .stanreg_list(mods, model_class = "stanreg") } #' @rdname stanreg_list #' @export stanmvreg_list <- function(..., model_names = NULL) { mods <- list(...) names(mods) <- stanreg_list_names( model_names, n_models = length(mods), call_dots = match.call(expand.dots = FALSE)$... ) .stanreg_list(mods, model_class = "stanmvreg") } #' @rdname stanreg_list #' @export stanjm_list <- function(..., model_names = NULL) { mods <- list(...) names(mods) <- stanreg_list_names( model_names, n_models = length(mods), call_dots = match.call(expand.dots = FALSE)$... ) .stanreg_list(mods, model_class = "stanjm") } #' @export names.stanreg_list <- function(x) { attr(x, "names") } #' @rdname stanreg_list #' @export #' @method print stanreg_list #' @param x The object to print. print.stanreg_list <- function(x, ...) { cl <- class(x) if (length(cl) > 1) { cl <- cl[1] } cat(cl, " with ", length(x), " models: \n\n") df <- data.frame( name = attr(x, "names"), family = unname(attr(x, "families")), formula = sapply(x, function(y) formula_string(formula(y))), row.names = seq_along(x) ) print(df, right = FALSE, ...) invisible(x) } # internal ---------------------------------------------------------------- #' Create, combine, or append new models to a stanreg_list, stanmvreg_list, or #' stanjm_list object. #' #' @noRd #' @param mods List of objects to combine. Can be fitted model objects (stanreg, #' stanmvreg, stanjm) or stan*_list objects. #' @param model_class The type of objects to allow. #' @return A stanreg_list, stanmvreg_list, or stanjm_list with one component per #' model and attributes containing various metadata about the models. #' .stanreg_list <- function(mods, model_class = c("stanreg", "stanmvreg", "stanjm")) { stopifnot(length(mods) >= 1, is.list(mods)) model_class <- match.arg(model_class) is_stanreg_list <- sapply(mods, is.stanreg_list) if (!any(is_stanreg_list)) { .stopifnot_valid_objects(mods, valid_for = "create", model_class = model_class) out <- stanreg_list_create(mods, model_class = model_class) } else if (all(is_stanreg_list)) { .stopifnot_valid_objects(mods, valid_for = "combine", model_class = model_class) out <- stanreg_list_combine(mods, model_class = model_class) } else { .stopifnot_valid_objects(mods, valid_for = "append", model_class = model_class) out <- stanreg_list_append(base_list = mods[[1]], mods = mods[-1], model_class = model_class) } # set model_name attributes of loo/waic/kfold objects to stanreg_list names out <- rename_loos.stanreg_list(out) return(out) } #' Create a stanreg_list from list of fitted model objects #' #' @noRd #' @param mods List of fitted model objects. #' @param model_class What type of list is it? ('stanreg', 'stanmvreg', 'stanjm') #' @return A stanreg_list object stanreg_list_create <- function(mods, model_class) { list_class <- unique(c(paste0(model_class, "_list"), "stanreg_list")) structure(mods, class = list_class, names = names(mods), families = stanreg_list_families(mods) ) } #' Combine existing stanreg_list objects #' #' @noRd #' @param lists List of stanreg_list objects. #' @param model_class What type of list is it? ('stanreg', 'stanmvreg', 'stanjm') #' @return A stanreg_list object #' stanreg_list_combine <- function(lists, model_class) { N_models_per_list <- sapply(lists, length) N_models <- sum(N_models_per_list) classes <- lapply(lists, class) classes <- sapply(classes, function(x) x[1]) if (!all(classes == classes[1])) { stop("Can't combine ", classes[1], " with ", paste(unique(classes[-1]), collapse = ", ")) } new_names <- unlist(lapply(lists, attr, "names", exact = TRUE), use.names = FALSE) new_families <- unlist(lapply(lists, attr, "families", exact = TRUE), use.names = FALSE) new_list <- vector(mode = "list", length = N_models) pos <- 1 for (j in seq_along(lists)) { for (m in seq_len(N_models_per_list[j])) { new_list[[pos]] <- lists[[j]][[m]] pos <- pos + 1 } } structure( new_list, class = unique(c(paste0(model_class, "_list"), "stanreg_list")), names = new_names, families = new_families ) } #' Append new models to an existing stanreg_list object #' #' @noRd #' @param base_list The existing stanreg_list to append the new models to. #' @param mods List of fitted model objects to append to the existing list. #' @param model_class What type of list is it? ('stanreg', 'stanmvreg', 'stanjm') #' @return A stanreg_list object #' stanreg_list_append <- function(base_list, mods, model_class) { new_list <- stanreg_list_create(mods, model_class = model_class) stanreg_list_combine(list(base_list, new_list), model_class = model_class) } is.stanreg_list <- function(x) inherits(x, "stanreg_list") is.stanmvreg_list <- function(x) is.stanreg_list(x) && inherits(x, "stanmvreg_list") is.stanjm_list <- function(x) is.stanreg_list(x) && inherits(x, "stanjm_list") .stopifnot_valid_objects <- function(mods, valid_for = c("create", "combine", "append"), model_class = c("stanreg", "stanmvreg", "stanjm")) { valid_for <- match.arg(valid_for) model_class <- match.arg(model_class) list_class <- paste0(model_class, "_list") error_msg <- paste0( "For ", list_class,"() objects in '...' must: ", "\n(1) all be ", model_class, " objects, or", "\n(2) all be ", list_class, " objects, or", "\n(3) be one ", list_class, " object followed by all ", model_class, " objects" ) is_model_class <- sapply(mods, FUN = match.fun(paste0("is.", model_class))) is_list_class <- sapply(mods, FUN = match.fun(paste0("is.", list_class))) throw_error <- (valid_for == "create" && !all(is_model_class)) || (valid_for == "combine" && !all(is_list_class)) || (valid_for == "append" && !(is_list_class[1] && all(is_model_class[-1]))) if (throw_error) { stop(error_msg, call. = FALSE) } } #' Determine names of the models in a stanreg_list #' @noRd #' @param user_model_names Either NULL or user-specified model_names argument #' @param n_models The number of models in the stanreg_list #' @param call_dots The result of match.call(expand.dots = FALSE)$... #' @return Either the user-specified model names or names inferred from the #' names of the fitted model objects passed to '...'. #' stanreg_list_names <- function(user_model_names, n_models, call_dots) { if (!is.null(user_model_names)) { stopifnot(is.character(user_model_names)) if (length(user_model_names) != n_models) { stop("Length of 'model_names' must be the same as the number of models.") } nms <- user_model_names } else { nms <- sapply(call_dots, FUN = deparse) } return(nms) } #' Determine the families of the models in a stanreg_list #' @noRd #' @param mods List of fitted model objects #' @return Character vector of family names #' stanreg_list_families <- function(mods) { fams <- sapply(mods, FUN = function(x) { fam <- family(x) if (!is.character(fam)) fam <- fam$family return(fam) }) unname(fams) } # loo/waic/kfold objects created by rstanarm have a model_name attribute. # when a stanreg_list is created those attributes should be changed to match # the names of the models used for the stanreg_list in case user has specified # the model_names argument rename_loos <- function(x,...) UseMethod("rename_loos") # Change model_name attributes of a loo/waic/kfold object stored in a stanreg object, rename_loos.stanreg <- function(x, new_model_name,...) { for (criterion in c("loo", "waic", "kfold")) { if (!is.null(x[[criterion]])) { attr(x[[criterion]], "model_name") <- new_model_name } } return(x) } # Change model_name attributes of loo/waic/kfold objects to correspond to # model names used for stanreg_list rename_loos.stanreg_list <- function(x, ...) { for (j in seq_along(x)) { x[[j]] <- rename_loos.stanreg(x[[j]], new_model_name = names(x)[j]) } return(x) } rstanarm/R/posterior_survfit.R0000644000176200001440000011755614414044166016260 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University # Copyright (C) 2016, 2017 Sam Brilleman # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #' Estimate subject-specific or standardised survival probabilities #' #' This function allows us to generate estimated survival probabilities #' based on draws from the posterior predictive distribution. By default #' the survival probabilities are conditional on an individual's #' group-specific coefficients (i.e. their individual-level random #' effects). If prediction data is provided via the \code{newdataLong} #' and \code{newdataEvent} arguments, then the default behaviour is to #' sample new group-specific coefficients for the individuals in the #' new data using a Monte Carlo scheme that conditions on their #' longitudinal outcome data provided in \code{newdataLong} #' (sometimes referred to as "dynamic predictions", see Rizopoulos #' (2011)). This default behaviour can be stopped by specifying #' \code{dynamic = FALSE}, in which case the predicted survival #' probabilities will be marginalised over the distribution of the #' group-specific coefficients. This has the benefit that the user does #' not need to provide longitudinal outcome measurements for the new #' individuals, however, it does mean that the predictions will incorporate #' all the uncertainty associated with between-individual variation, since #' the predictions aren't conditional on any observed data for the individual. #' In addition, by default, the predicted subject-specific survival #' probabilities are conditional on observed values of the fixed effect #' covariates (ie, the predictions will be obtained using either the design #' matrices used in the original \code{\link{stan_jm}} model call, or using the #' covariate values provided in the \code{newdataLong} and \code{newdataEvent} #' arguments). However, if you wish to average over the observed distribution #' of the fixed effect covariates then this is possible -- such predictions #' are sometimes referred to as standardised survival probabilties -- see the #' \code{standardise} argument below. #' #' @export #' @templateVar stanjmArg object #' @template args-stanjm-object #' #' @param newdataLong,newdataEvent Optionally, a data frame (or in the case of #' \code{newdataLong} this can be a list of data frames) in which to look #' for variables with which to predict. If omitted, the model matrices are used. #' If new data is provided, then it should also contain the longitudinal #' outcome data on which to condition when drawing the new group-specific #' coefficients for individuals in the new data. Note that there is only #' allowed to be one row of data for each individual in \code{newdataEvent}, #' that is, time-varying covariates are not allowed in the prediction data for #' the event submodel. Also, \code{newdataEvent} can optionally include a #' variable with information about the last known survival time for the new #' individuals -- see the description for the \code{last_time} argument below #' -- however also note that when generating the survival probabilities it #' is of course assumed that all individuals in \code{newdataEvent} have not #' yet experienced the event (that is, any variable in \code{newdataEvent} that #' corresponds to the event indicator will be ignored). #' @param extrapolate A logical specifying whether to extrapolate the estimated #' survival probabilities beyond the times specified in the \code{times} argument. #' If \code{TRUE} then the extrapolation can be further controlled using #' the \code{control} argument. #' @param control A named list with parameters controlling extrapolation #' of the estimated survival function when \code{extrapolate = TRUE}. The list #' can contain one or more of the following named elements: \cr #' \describe{ #' \item{\code{epoints}}{a positive integer specifying the number of #' discrete time points at which to calculate the forecasted survival #' probabilities. The default is 10.} #' \item{\code{edist}}{a positive scalar specifying the amount of time #' across which to forecast the estimated survival function, represented #' in units of the time variable \code{time_var} (from fitting the model). #' The default is to extrapolate between the times specified in the #' \code{times} argument and the maximum event or censoring time in the #' original data. If \code{edist} leads to times that are beyond #' the maximum event or censoring time in the original data then the #' estimated survival probabilities will be truncated at that point, since #' the estimate for the baseline hazard is not available beyond that time.} #' } #' @param condition A logical specifying whether the estimated #' subject-specific survival probabilities at time \code{t} should be #' conditioned on survival up to a fixed time point \code{u}. The default #' is for \code{condition} to be set to \code{TRUE}, unless standardised survival #' probabilities have been requested (by specifying \code{standardise = TRUE}), #' in which case \code{condition} must (and will) be set to \code{FALSE}. #' When conditional survival probabilities are requested, the fixed #' time point \code{u} will be either: (i) the value specified via the #' \code{last_time} argument; or if the \code{last_time} argument is #' \code{NULL} then the latest observation time for each individual #' (taken to be the value in the \code{times} argument if \code{newdataEvent} #' is specified, or the observed event or censoring time if \code{newdataEvent} #' is \code{NULL}. #' @param last_time A scalar, character string, or \code{NULL}. This argument #' specifies the last known survival time for each individual when #' conditional predictions are being obtained. If #' \code{newdataEvent} is provided and conditional survival predictions are being #' obtained, then the \code{last_time} argument can be one of the following: #' (i) a scalar, this will use the same last time for each individual in #' \code{newdataEvent}; (ii) a character string, naming a column in #' \code{newdataEvent} in which to look for the last time for each individual; #' (iii) \code{NULL}, in which case the default is to use the time of the latest #' longitudinal observation in \code{newdataLong}. If \code{newdataEvent} is #' \code{NULL} then the \code{last_time} argument cannot be specified #' directly; instead it will be set equal to the event or censoring time for #' each individual in the dataset that was used to estimate the model. #' If standardised survival probabilities are requested (i.e. #' \code{standardise = TRUE}) then conditional survival probabilities are #' not allowed and therefore the \code{last_time} argument is ignored. #' @param ids An optional vector specifying a subset of IDs for whom the #' predictions should be obtained. The default is to predict for all individuals #' who were used in estimating the model or, if \code{newdataLong} and #' \code{newdataEvent} are specified, then all individuals contained in #' the new data. #' @param prob A scalar between 0 and 1 specifying the width to use for the #' uncertainty interval (sometimes called credible interval) for the predictions. #' For example \code{prob = 0.95} (the default) means that the 2.5th and 97.5th #' percentiles will be provided. #' @param times A scalar, a character string, or \code{NULL}. Specifies the #' times at which the estimated survival probabilities should be calculated. #' It can be either: (i) \code{NULL}, in which case it will default to the last known #' survival time for each individual, as determined by the \code{last_time} #' argument; (ii) a scalar, specifying a time to estimate the survival probability #' for each of the individuals; or (iii) if \code{newdataEvent} is #' provided, it can be the name of a variable in \code{newdataEvent} that #' indicates the time at which the survival probabilities should be calculated #' for each individual. #' @param standardise A logical specifying whether the estimated #' subject-specific survival probabilities should be averaged #' across all individuals for whom the subject-specific predictions are #' being obtained. This can be used to average over the covariate and random effects #' distributions of the individuals used in estimating the model, or the individuals #' included in the \code{newdata} arguments. This approach of #' averaging across the observed distribution of the covariates is sometimes #' referred to as a "standardised" survival curve. If \code{standardise = TRUE}, #' then the \code{times} argument must be specified and it must be constant across #' individuals, that is, the survival probabilities must be calculated at the #' same time for all individuals. #' @param dynamic A logical that is only relevant if new data is provided #' via the \code{newdataLong} and \code{newdataEvent} arguments. If #' \code{dynamic = TRUE}, then new group-specific parameters are drawn for #' the individuals in the new data, conditional on their longitudinal #' biomarker data contained in \code{newdataLong}. These group-specific #' parameters are then used to generate individual-specific survival probabilities #' for these individuals. These are often referred to as "dynamic predictions" #' in the joint modelling context, because the predictions can be updated #' each time additional longitudinal biomarker data is collected on the individual. #' On the other hand, if \code{dynamic = FALSE} then the survival probabilities #' will just be marginalised over the distribution of the group-specific #' coefficients; this will mean that the predictions will incorporate all #' uncertainty due to between-individual variation so there will likely be #' very wide credible intervals on the predicted survival probabilities. #' @param scale A scalar, specifying how much to multiply the asymptotic #' variance-covariance matrix for the random effects by, which is then #' used as the "width" (ie. variance-covariance matrix) of the multivariate #' Student-t proposal distribution in the Metropolis-Hastings algorithm. This #' is only relevant when \code{newdataEvent} is supplied and #' \code{dynamic = TRUE}, in which case new random effects are simulated #' for the individuals in the new data using the Metropolis-Hastings algorithm. #' @param draws An integer indicating the number of MCMC draws to return. #' The default is to set the number of draws equal to 200, or equal to the #' size of the posterior sample if that is less than 200. #' @param seed An optional \code{\link[=set.seed]{seed}} to use. #' @param ... Currently unused. #' #' @note #' Note that if any variables were transformed (e.g. rescaled) in the data #' used to fit the model, then these variables must also be transformed in #' \code{newdataLong} and \code{newdataEvent}. This only applies if variables #' were transformed before passing the data to one of the modeling functions and #' \emph{not} if transformations were specified inside the model formula. #' #' @return A data frame of class \code{survfit.stanjm}. The data frame includes #' columns for each of the following: #' (i) the median of the posterior predictions of the estimated survival #' probabilities (\code{survpred}); #' (ii) each of the lower and upper limits of the corresponding uncertainty #' interval for the estimated survival probabilities (\code{ci_lb} and #' \code{ci_ub}); #' (iii) a subject identifier (\code{id_var}), unless standardised survival #' probabilities were estimated; #' (iv) the time that the estimated survival probability is calculated for #' (\code{time_var}). #' The returned object also includes a number of additional attributes. #' #' @seealso \code{\link{plot.survfit.stanjm}} for plotting the estimated survival #' probabilities, \code{\link{ps_check}} for for graphical checks of the estimated #' survival function, and \code{\link{posterior_traj}} for estimating the #' marginal or subject-specific longitudinal trajectories, and #' \code{\link{plot_stack_jm}} for combining plots of the estimated subject-specific #' longitudinal trajectory and survival function. #' #' @references #' Rizopoulos, D. (2011). Dynamic predictions and prospective accuracy in #' joint models for longitudinal and time-to-event data. \emph{Biometrics} #' \strong{67}, 819. #' #' @examples #' if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { #' \donttest{ #' # Run example model if not already loaded #' if (!exists("example_jm")) example(example_jm) #' #' # Obtain subject-specific survival probabilities for a few #' # selected individuals in the estimation dataset who were #' # known to survive up until their censoring time. By default #' # the posterior_survfit function will estimate the conditional #' # survival probabilities, that is, conditional on having survived #' # until the event or censoring time, and then by default will #' # extrapolate the survival predictions forward from there. #' ps1 <- posterior_survfit(example_jm, ids = c(7,13,15)) #' # We can plot the estimated survival probabilities using the #' # associated plot function #' plot(ps1) #' #' # If we wanted to estimate the survival probabilities for the #' # same three individuals as the previous example, but this time #' # we won't condition on them having survived up until their #' # censoring time. Instead, we will estimate their probability #' # of having survived between 0 and 5 years given their covariates #' # and their estimated random effects. #' # The easiest way to achieve the time scale we want (ie, 0 to 5 years) #' # is to specify that we want the survival time estimated at time 0 #' # and then extrapolated forward 5 years. We also specify that we #' # do not want to condition on their last known survival time. #' ps2 <- posterior_survfit(example_jm, ids = c(7,13,15), times = 0, #' extrapolate = TRUE, condition = FALSE, control = list(edist = 5)) #' #' # Instead we may want to estimate subject-specific survival probabilities #' # for a set of new individuals. To demonstrate this, we will simply take #' # the first two individuals in the estimation dataset, but pass their data #' # via the newdata arguments so that posterior_survfit will assume we are #' # predicting survival for new individuals and draw new random effects #' # under a Monte Carlo scheme (see Rizopoulos (2011)). #' ndL <- pbcLong[pbcLong$id %in% c(1,2),] #' ndE <- pbcSurv[pbcSurv$id %in% c(1,2),] #' ps3 <- posterior_survfit(example_jm, #' newdataLong = ndL, newdataEvent = ndE, #' last_time = "futimeYears", seed = 12345) #' head(ps3) #' # We can then compare the estimated random effects for these #' # individuals based on the fitted model and the Monte Carlo scheme #' ranef(example_jm)$Long1$id[1:2,,drop=FALSE] # from fitted model #' colMeans(attr(ps3, "b_new")) # from Monte Carlo scheme #' #' # Lastly, if we wanted to obtain "standardised" survival probabilities, #' # (by averaging over the observed distribution of the fixed effect #' # covariates, as well as averaging over the estimated random effects #' # for individuals in our estimation sample or new data) then we can #' # specify 'standardise = TRUE'. We can then plot the resulting #' # standardised survival curve. #' ps4 <- posterior_survfit(example_jm, standardise = TRUE, #' times = 0, extrapolate = TRUE) #' plot(ps4) #' } #' } posterior_survfit <- function(object, newdataLong = NULL, newdataEvent = NULL, extrapolate = TRUE, control = list(), condition = NULL, last_time = NULL, prob = 0.95, ids, times = NULL, standardise = FALSE, dynamic = TRUE, scale = 1.5, draws = NULL, seed = NULL, ...) { validate_stanjm_object(object) M <- object$n_markers id_var <- object$id_var time_var <- object$time_var basehaz <- object$basehaz assoc <- object$assoc family <- family(object) if (!is.null(seed)) set.seed(seed) if (missing(ids)) ids <- NULL dots <- list(...) # Temporary stop, until make_assoc_terms can handle it sel_stop <- grep("^shared", rownames(object$assoc)) if (any(unlist(object$assoc[sel_stop,]))) stop("'posterior_survfit' cannot yet be used with shared_b or shared_coef ", "association structures.") # Construct prediction data # ndL: dataLong to be used in predictions # ndE: dataEvent to be used in predictions if (!identical(is.null(newdataLong), is.null(newdataEvent))) stop("Both newdataLong and newdataEvent must be supplied together.") if (is.null(newdataLong)) { # user did not specify newdata dats <- get_model_data(object) ndL <- dats[1:M] ndE <- dats[["Event"]] } else { # user specified newdata if (!dynamic) stop2("Marginalised predictions for the event outcome are ", "not currently implemented.") newdatas <- validate_newdatas(object, newdataLong, newdataEvent) ndL <- newdatas[1:M] ndE <- newdatas[["Event"]] } if (!is.null(ids)) { # user specified a subset of ids ndL <- subset_ids(object, ndL, ids) ndE <- subset_ids(object, ndE, ids) } id_list <- factor(unique(ndE[[id_var]])) # order of ids from data, not ids arg # Last known survival time for each individual if (is.null(newdataLong)) { # user did not specify newdata if (!is.null(last_time)) stop("'last_time' cannot be provided when newdata is NULL, since times ", "are taken to be the event or censoring time for each individual.") last_time <- object$eventtime[as.character(id_list)] } else { # user specified newdata if (is.null(last_time)) { # use latest longitudinal observation max_ytimes <- do.call("cbind", lapply(ndL, function(x) tapply(x[[time_var]], x[[id_var]], FUN = max))) last_time <- apply(max_ytimes, 1L, max) # re-order last-time according to id_list last_time <- last_time[as.character(id_list)] } else if (is.character(last_time) && (length(last_time) == 1L)) { if (!last_time %in% colnames(ndE)) stop("Cannot find 'last_time' column named in newdataEvent.") last_time <- ndE[[last_time]] } else if (is.numeric(last_time) && (length(last_time) == 1L)) { last_time <- rep(last_time, length(id_list)) } else if (is.numeric(last_time) && (length(last_time) > 1L)) { last_time <- last_time[as.character(id_list)] } else { stop("Bug found: could not reconcile 'last_time' argument.") } names(last_time) <- as.character(id_list) } # Prediction times if (standardise) { # standardised survival probs times <- if (is.null(times)) { stop("'times' cannot be NULL for obtaining standardised survival probabilities.") } else if (is.numeric(times) && (length(times) == 1L)) { rep(times, length(id_list)) } else { stop("'times' should be a numeric vector of length 1 in order to obtain ", "standardised survival probabilities (the subject-specific survival ", "probabilities will be calculated at the specified time point, and ", "then averaged).") } } else if (is.null(newdataLong)) { # subject-specific survival probs without newdata times <- if (is.null(times)) { object$eventtime[as.character(id_list)] } else if (is.numeric(times) && (length(times) == 1L)) { rep(times, length(id_list)) } else { stop("If newdata is NULL then 'times' must be NULL or a single number.") } } else { # subject-specific survival probs with newdata times <- if (is.null(times)) { times <- last_time } else if (is.character(times) && (length(times) == 1L)) { if (!times %in% colnames(ndE)) stop("Variable specified in 'times' argument could not be found in newdata.") tapply(ndE[[times]], ndE[[id_var]], FUN = max) } else if (is.numeric(times) && (length(times) == 1L)) { rep(times, length(id_list)) } else { stop("If newdata is specified then 'times' can only be the name of a ", "variable in newdata, or a single number.") } } if (!identical(length(times), length(id_list))) stop(paste0("length of the 'times' vector should be equal to the number of individuals ", "for whom predictions are being obtained (", length(id_list), ").")) maxtime <- max(object$eventtime) if (any(times > maxtime)) stop("'times' are not allowed to be greater than the last event or censoring ", "time (since unable to extrapolate the baseline hazard).") # User specified extrapolation if (extrapolate) { ok_control_args <- c("epoints", "edist") control <- get_extrapolation_control(control, ok_control_args = ok_control_args) endtime <- if (!is.null(control$edist)) times + control$edist else maxtime endtime[endtime > maxtime] <- maxtime # nothing beyond end of baseline hazard time_seq <- get_time_seq(control$epoints, times, endtime, simplify = FALSE) } else time_seq <- list(times) # no extrapolation # Conditional survival times if (is.null(condition)) { condition <- !standardise } else if (condition && standardise) { stop("'condition' cannot be set to TRUE if standardised survival ", "probabilities are requested.") } # Get stanmat parameter matrix for specified number of draws S <- posterior_sample_size(object) if (is.null(draws)) draws <- if (S > 200L) 200L else S if (draws > S) stop("'draws' should be <= posterior sample size (", S, ").") stanmat <- as.matrix(object$stanfit) some_draws <- isTRUE(draws < S) if (some_draws) { samp <- sample(S, draws) stanmat <- stanmat[samp, , drop = FALSE] } # Draw b pars for new individuals if (dynamic && !is.null(newdataEvent)) { stanmat <- simulate_b_pars(object, stanmat = stanmat, ndL = ndL, ndE = ndE, ids = id_list, times = last_time, scale = scale) b_new <- attr(stanmat, "b_new") acceptance_rate <- attr(stanmat, "acceptance_rate") } pars <- extract_pars(object, stanmat) # list of stanmat arrays # Matrix of surv probs at each increment of the extrapolation sequence # NB If no extrapolation then length(time_seq) == 1L surv_t <- lapply(time_seq, function(t) { if (!identical(length(t), length(id_list))) stop("Bug found: the vector of prediction times is not the same length ", "as the number of individuals.") dat <- .pp_data_jm(object, newdataLong = ndL, newdataEvent = ndE, ids = id_list, etimes = t, long_parts = FALSE) surv_t <- .ll_survival(object, data = dat, pars = pars, survprob = TRUE) if (is.vector(surv_t) == 1L) surv_t <- t(surv_t) # transform if only one individual surv_t[, (t == 0)] <- 1 # avoids possible NaN due to numerical inaccuracies if (standardise) { # standardised survival probs surv_t <- matrix(rowMeans(surv_t), ncol = 1) dimnames(surv_t) <- list(iterations = NULL, "standardised_survprob") } else { dimnames(surv_t) <- list(iterations = NULL, ids = id_list) } surv_t }) # If conditioning, need to obtain matrix of surv probs at last known surv time if (condition) { cond_dat <- .pp_data_jm(object, newdataLong = ndL, newdataEvent = ndE, ids = id_list, etimes = last_time, long_parts = FALSE) # matrix of survival probs at last_time cond_surv <- .ll_survival(object, data = cond_dat, pars = pars, survprob = TRUE) if (is.vector(cond_surv) == 1L) cond_surv <- t(cond_surv) # transform if only one individual cond_surv[, (last_time == 0)] <- 1 # avoids possible NaN due to numerical inaccuracies surv <- lapply(surv_t, function(x) { # conditional survival probs vec <- x / cond_surv vec[vec > 1] <- 1 # if t was before last_time then surv prob may be > 1 vec }) } else surv <- surv_t # Summarise posterior draws to get median and ci out <- do.call("rbind", lapply( seq_along(surv), function(x, standardise, id_list, time_seq, prob) { val <- median_and_bounds(surv[[x]], prob, na.rm = TRUE) if (standardise) { data.frame(TIMEVAR = unique(time_seq[[x]]), val$med, val$lb, val$ub) } else data.frame(IDVAR = id_list, TIMEVAR = time_seq[[x]], val$med, val$lb, val$ub) }, standardise, id_list, time_seq, prob)) out <- data.frame(out) colnames(out) <- c(if ("IDVAR" %in% colnames(out)) id_var, time_var, "survpred", "ci_lb", "ci_ub") if (id_var %in% colnames(out)) { # data has id column -- sort by id and time out <- out[order(out[, id_var], out[, time_var]), , drop = FALSE] } else { # data does not have id column -- sort by time only out <- out[order(out[, time_var]), , drop = FALSE] } rownames(out) <- NULL # temporary hack so that predictive_error can call posterior_survfit # with two separate conditioning times... fn <- tryCatch(sys.call(-1)[[1]], error = function(e) NULL) if (!is.null(fn) && grepl("predictive_error", deparse(fn), fixed = TRUE) && "last_time2" %in% names(dots)) { last_time2 <- ndE[[dots$last_time2]] cond_dat2 <- .pp_data_jm(object, newdataLong = ndL, newdataEvent = ndE, ids = id_list, etimes = last_time2, long_parts = FALSE) cond_surv2 <- .ll_survival(object, data = cond_dat2, pars = pars, survprob = TRUE) if (is.vector(cond_surv2) == 1L) cond_surv2 <- t(cond_surv2) # transform if only one individual cond_surv2[, (last_time2 == 0)] <- 1 # avoids possible NaN due to numerical inaccuracies surv2 <- lapply(surv_t, function(x) { # conditional survival probs vec <- x / cond_surv2 vec[vec > 1] <- 1 # if t was before last_time then surv prob may be > 1 vec }) out2 <- do.call("rbind", lapply( seq_along(surv2), function(x, standardise, id_list, time_seq, prob) { val <- median_and_bounds(surv2[[x]], prob, na.rm = TRUE) data.frame(IDVAR = id_list, TIMEVAR = time_seq[[x]], val$med) }, standardise, id_list, time_seq, prob)) out2 <- data.frame(out2) colnames(out2) <- c(id_var, time_var, "survpred_eventtime") out2 <- out2[order(out2[, id_var, drop = F], out2[, time_var, drop = F]), , drop = F] rownames(out2) <- NULL out <- merge(out, out2) } class(out) <- c("survfit.stanjm", "data.frame") out <- structure(out, id_var = id_var, time_var = time_var, extrapolate = extrapolate, control = control, standardise = standardise, condition = condition, last_time = last_time, ids = id_list, draws = draws, seed = seed, offset = offset) if (dynamic && !is.null(newdataEvent)) { out <- structure(out, b_new = b_new, acceptance_rate = acceptance_rate) } out } #' Plot the estimated subject-specific or marginal survival function #' #' This generic \code{plot} method for \code{survfit.stanjm} objects will #' plot the estimated subject-specific or marginal survival function #' using the data frame returned by a call to \code{\link{posterior_survfit}}. #' The call to \code{posterior_survfit} should ideally have included an #' "extrapolation" of the survival function, obtained by setting the #' \code{extrapolate} argument to \code{TRUE}. #' #' @method plot survfit.stanjm #' @export #' @importFrom ggplot2 ggplot aes_string geom_line geom_ribbon #' facet_wrap labs coord_cartesian #' #' @templateVar idsArg ids #' @templateVar labsArg xlab,ylab #' @templateVar scalesArg facet_scales #' @templateVar cigeomArg ci_geom_args #' @template args-ids #' @template args-labs #' @template args-scales #' @template args-ci-geom-args #' #' @param x A data frame and object of class \code{survfit.stanjm} #' returned by a call to the function \code{\link{posterior_survfit}}. #' The object contains point estimates and uncertainty interval limits #' for estimated values of the survival function. #' @param limits A quoted character string specifying the type of limits to #' include in the plot. Can be one of: \code{"ci"} for the Bayesian #' posterior uncertainty interval for the estimated survival probability #' (often known as a credible interval); or \code{"none"} for no interval #' limits. #' @param ... Optional arguments passed to #' \code{\link[ggplot2:geom_path]{geom_line}} and used to control features #' of the plotted survival function. #' #' @return The plot method returns a \code{ggplot} object, also of class #' \code{plot.survfit.stanjm}. This object can be further customised using the #' \pkg{ggplot2} package. It can also be passed to the function #' \code{plot_stack_jm}. #' #' @seealso \code{\link{posterior_survfit}}, \code{\link{plot_stack_jm}}, #' \code{\link{posterior_traj}}, \code{\link{plot.predict.stanjm}} #' #' @examples #' if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { #' \donttest{ #' # Run example model if not already loaded #' if (!exists("example_jm")) example(example_jm) #' #' # Obtain subject-specific conditional survival probabilities #' # for all individuals in the estimation dataset. #' ps1 <- posterior_survfit(example_jm, extrapolate = TRUE) #' #' # We then plot the conditional survival probabilities for #' # a subset of individuals #' plot(ps1, ids = c(7,13,15)) #' # We can change or add attributes to the plot #' plot(ps1, ids = c(7,13,15), limits = "none") #' plot(ps1, ids = c(7,13,15), xlab = "Follow up time") #' plot(ps1, ids = c(7,13,15), ci_geom_args = list(fill = "red"), #' color = "blue", linetype = 2) #' plot(ps1, ids = c(7,13,15), facet_scales = "fixed") #' #' # Since the returned plot is also a ggplot object, we can #' # modify some of its attributes after it has been returned #' plot1 <- plot(ps1, ids = c(7,13,15)) #' plot1 + #' ggplot2::theme(strip.background = ggplot2::element_blank()) + #' ggplot2::coord_cartesian(xlim = c(0, 15)) + #' ggplot2::labs(title = "Some plotted survival functions") #' #' # We can also combine the plot(s) of the estimated #' # subject-specific survival functions, with plot(s) #' # of the estimated longitudinal trajectories for the #' # same individuals #' ps1 <- posterior_survfit(example_jm, ids = c(7,13,15)) #' pt1 <- posterior_traj(example_jm, , ids = c(7,13,15)) #' plot_surv <- plot(ps1) #' plot_traj <- plot(pt1, vline = TRUE, plot_observed = TRUE) #' plot_stack_jm(plot_traj, plot_surv) #' #' # Lastly, let us plot the standardised survival function #' # based on all individuals in our estimation dataset #' ps2 <- posterior_survfit(example_jm, standardise = TRUE, times = 0, #' control = list(epoints = 20)) #' plot(ps2) #' } #' } plot.survfit.stanjm <- function(x, ids = NULL, limits = c("ci", "none"), xlab = NULL, ylab = NULL, facet_scales = "free", ci_geom_args = NULL, ...) { limits <- match.arg(limits) ci <- (limits == "ci") standardise <- attr(x, "standardise") id_var <- attr(x, "id_var") time_var <- attr(x, "time_var") if (is.null(xlab)) xlab <- paste0("Time (", time_var, ")") if (is.null(ylab)) ylab <- "Event free probability" if (!is.null(ids)) { if (standardise) stop("'ids' argument cannot be specified when plotting standardised ", "survival probabilities.") if (!id_var %in% colnames(x)) stop("Bug found: could not find 'id_var' column in the data frame.") ids_missing <- which(!ids %in% x[[id_var]]) if (length(ids_missing)) stop("The following 'ids' are not present in the survfit.stanjm object: ", paste(ids[[ids_missing]], collapse = ", "), call. = FALSE) x <- x[(x[[id_var]] %in% ids), , drop = FALSE] } else { ids <- if (!standardise) attr(x, "ids") else NULL } if (!standardise) x$id <- factor(x[[id_var]]) x$time <- x[[time_var]] geom_defaults <- list(color = "black") geom_args <- set_geom_args(geom_defaults, ...) lim_defaults <- list(alpha = 0.3) lim_args <- do.call("set_geom_args", c(defaults = list(lim_defaults), ci_geom_args)) if ((!standardise) && (length(ids) > 60L)) { stop("Too many individuals to plot for. Perhaps consider limiting ", "the number of individuals by specifying the 'ids' argument.") } else if ((!standardise) && (length(ids) > 1L)) { graph <- ggplot(x, aes_string(x = "time", y = "survpred")) + theme_bw() + do.call("geom_line", geom_args) + coord_cartesian(ylim = c(0, 1)) + facet_wrap(~ id, scales = facet_scales) if (ci) { lim_mapp <- list(mapping = aes_string(ymin = "ci_lb", ymax = "ci_ub")) graph_limits <- do.call("geom_ribbon", c(lim_mapp, lim_args)) } else graph_limits <- NULL } else { graph <- ggplot(x, aes_string(x = "time", y = "survpred")) + theme_bw() + do.call("geom_line", geom_args) + coord_cartesian(ylim = c(0, 1)) if (ci) { lim_mapp <- list(mapping = aes_string(ymin = "ci_lb", ymax = "ci_ub")) graph_limits <- do.call("geom_ribbon", c(lim_mapp, lim_args)) } else graph_limits <- NULL } ret <- graph + graph_limits + labs(x = xlab, y = ylab) class_ret <- class(ret) class(ret) <- c("plot.survfit.stanjm", class_ret) ret } #' @rdname plot.survfit.stanjm #' @export #' @importFrom ggplot2 ggplot_build facet_wrap aes_string expand_limits #' #' @description The \code{plot_stack_jm} function takes arguments containing the plots of the estimated #' subject-specific longitudinal trajectory (or trajectories if a multivariate #' joint model was estimated) and the plot of the estimated subject-specific #' survival function and combines them into a single figure. This is most #' easily understood by running the \strong{Examples} below. #' #' @param yplot An object of class \code{plot.predict.stanjm}, returned by a #' call to the generic \code{\link[=plot.predict.stanjm]{plot}} method for #' objects of class \code{predict.stanjm}. If there is more than one #' longitudinal outcome, then a list of such objects can be provided. #' @param survplot An object of class \code{plot.survfit.stanjm}, returned by a #' call to the generic \code{\link[=plot.survfit.stanjm]{plot}} method for #' objects of class \code{survfit.stanjm}. #' #' @return \code{plot_stack_jm} returns an object of class #' \code{\link[bayesplot]{bayesplot_grid}} that includes plots of the #' estimated subject-specific longitudinal trajectories stacked on top of the #' associated subject-specific survival curve. #' #' @seealso \code{\link{plot.predict.stanjm}}, \code{\link{plot.survfit.stanjm}}, #' \code{\link{posterior_predict}}, \code{\link{posterior_survfit}} #' #' @examples #' if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { #' \donttest{ #' if (!exists("example_jm")) example(example_jm) #' ps1 <- posterior_survfit(example_jm, ids = c(7,13,15)) #' pt1 <- posterior_traj(example_jm, ids = c(7,13,15), extrapolate = TRUE) #' plot_surv <- plot(ps1) #' plot_traj <- plot(pt1, vline = TRUE, plot_observed = TRUE) #' plot_stack_jm(plot_traj, plot_surv) #' } #' } plot_stack_jm <- function(yplot, survplot) { if (!is(yplot, "list")) yplot <- list(yplot) lapply(yplot, function(x) { if (!is(x, "plot.predict.stanjm")) stop("'yplot' should be an object of class 'plot.predict.stanjm', ", "or a list of such objects.", call. = FALSE) }) if (!is(survplot, "plot.survfit.stanjm")) stop("'survplot' should be an object of class 'plot.survfit.stanjm'.", call. = FALSE) y_build <- lapply(yplot, ggplot_build) y_layout <- lapply(y_build, function(x) x$layout$panel_layout) y_ids <- lapply(y_layout, function(x) if (!"id" %in% colnames(x)) NULL else x[["id"]]) e_build <- ggplot_build(survplot) e_layout <- e_build$layout$panel_layout e_ids <- if (!"id" %in% colnames(e_layout)) NULL else e_layout[["id"]] if (!is.null(e_ids)) { lapply(y_ids, function(x, e_ids) { if (!all(sort(x) == sort(e_ids))) { stop("The individuals in the 'yplot' and 'survplot' appear to differ. Please ", "reestimate the plots using a common 'ids' argument.", call. = FALSE) } }, e_ids = e_ids) } vline <- lapply(seq_along(y_build), function(m) { L <- length(y_build[[m]]$data) dat <- y_build[[m]]$data[[L]] if (!"xintercept" %in% colnames(dat)) { found <- FALSE } else { found <- TRUE dat <- dat[, c("PANEL", "xintercept"), drop = FALSE] if (NROW(y_layout[[m]]) > 1) { panel_id_map <- y_layout[[m]][, c("PANEL", "id"), drop = FALSE] dat <- merge(dat, panel_id_map, by = "PANEL") } dat <- dat[, grep("PANEL", colnames(dat), invert = TRUE), drop = FALSE] colnames(dat) <- gsub("xintercept", paste0("xintercept", m), colnames(dat), fixed = TRUE) } list(dat = dat, found = found) }) vline_found <- any(sapply(vline, function(x) x$found)) if (!vline_found) cat("Could not find vertical line indicating last observation time in the", "plot of the longitudinal trajectory; you may wish to plot the longitudinal", "trajectories again with 'vline = TRUE' to aid interpretation.") vline_dat <- lapply(vline, function(x) x$dat) vline_alldat <- Reduce(function(...) merge(..., all = TRUE), vline_dat) vline_alldat$xintercept_max <- apply(vline_alldat[, grep("id", colnames(vline_alldat), invert = TRUE), drop = FALSE], 1, max) xmax <- max(sapply(c(y_build, list(e_build)), function(i) max(i$data[[1]]$x))) if ((!is.null(e_ids)) && (length(e_ids) > 20L)) { stop("Unable to generate 'plot_stack_jm' for this many individuals.", call. = FALSE) } else if ((!is.null(e_ids)) && (length(e_ids) > 3L)) { warning("'plot_stack_jm' is unlikely to be legible with more than a few individuals.", immediate. = TRUE, call. = FALSE) } if (!is.null(e_ids)) { graph_facet <- facet_wrap(~ id, scales = "free", nrow = 1) } else { graph_facet <- NULL } if (vline_found) { graph_vline <- geom_vline(aes_string(xintercept = "xintercept_max"), vline_alldat, linetype = 2) } else { graph_vline <- NULL } graph_xlims <- expand_limits(x = c(0, xmax)) survplot_updated <- survplot + graph_xlims + graph_facet + graph_vline yplot_updated <- lapply(yplot, function(x) x + graph_xlims + graph_facet) bayesplot::bayesplot_grid( plots = c(yplot_updated, list(survplot_updated)), grid_args = list(ncol = 1) ) } # ------------------ exported but doc kept internal #' Generic print method for \code{survfit.stanjm} objects #' #' @rdname print.survfit.stanjm #' @method print survfit.stanjm #' @keywords internal #' @export #' @param x An object of class \code{survfit.stanjm}, returned by a call to #' \code{\link{posterior_survfit}}. #' @param digits Number of digits to use for formatting the time variable and #' the survival probabilities. #' @param ... Ignored. #' print.survfit.stanjm <- function(x, digits = 4, ...) { time_var <- attr(x, "time_var") x <- as.data.frame(x) sel <- c(time_var, "survpred", "ci_lb", "ci_ub") for (i in sel) x[[i]] <- format(round(x[[i]], digits), nsmall = digits) print(x, quote = FALSE) invisible(x) } # ------------------ internal # default plotting attributes .PP_FILL <- "skyblue" .PP_DARK <- "skyblue4" .PP_VLINE_CLR <- "#222222" .PP_YREP_CLR <- "#487575" .PP_YREP_FILL <- "#222222" rstanarm/R/posterior_interval.R0000644000176200001440000001231113722762571016371 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #' Posterior uncertainty intervals #' #' For models fit using MCMC (\code{algorithm="sampling"}) or one of the #' variational approximations (\code{"meanfield"} or \code{"fullrank"}), the #' \code{posterior_interval} function computes Bayesian posterior uncertainty #' intervals. These intervals are often referred to as \emph{credible} #' intervals, but we use the term \emph{uncertainty} intervals to highlight the #' fact that wider intervals correspond to greater uncertainty. #' #' @aliases posterior_interval #' @export #' #' @templateVar stanregArg object #' @template args-stanreg-object #' @template args-dots-ignored #' @template args-pars #' @template args-regex-pars #' @param prob A number \eqn{p \in (0,1)}{p (0 < p < 1)} indicating the desired #' probability mass to include in the intervals. The default is to report #' \eqn{90}\% intervals (\code{prob=0.9}) rather than the traditionally used #' \eqn{95}\% (see Details). #' @param type The type of interval to compute. Currently the only option is #' \code{"central"} (see Details). A central \eqn{100p}\% #' interval is defined by the \eqn{\alpha/2} and \eqn{1 - \alpha/2} quantiles, #' where \eqn{\alpha = 1 - p}. #' #' @return A matrix with two columns and as many rows as model parameters (or #' the subset of parameters specified by \code{pars} and/or #' \code{regex_pars}). For a given value of \code{prob}, \eqn{p}, the columns #' correspond to the lower and upper \eqn{100p}\% interval limits and have the #' names \eqn{100\alpha/2}\% and \eqn{100(1 - \alpha/2)}\%, where \eqn{\alpha #' = 1-p}. For example, if \code{prob=0.9} is specified (a \eqn{90}\% #' interval), then the column names will be \code{"5\%"} and \code{"95\%"}, #' respectively. #' #' @details #' \subsection{Interpretation}{ #' Unlike for a frenquentist confidence interval, it is valid to say that, #' conditional on the data and model, we believe that with probability \eqn{p} #' the value of a parameter is in its \eqn{100p}\% posterior interval. This #' intuitive interpretation of Bayesian intervals is often erroneously applied #' to frequentist confidence intervals. See Morey et al. (2015) for more details #' on this issue and the advantages of using Bayesian posterior uncertainty #' intervals (also known as credible intervals). #' } #' \subsection{Default 90\% intervals}{ #' We default to reporting \eqn{90}\% intervals rather than \eqn{95}\% intervals #' for several reasons: #' \itemize{ #' \item Computational stability: \eqn{90}\% intervals are more stable than #' \eqn{95}\% intervals (for which each end relies on only \eqn{2.5}\% of the #' posterior draws). \item Relation to Type-S errors (Gelman and Carlin, 2014): #' \eqn{95}\% of the mass in a \eqn{90}\% central interval is above the lower #' value (and \eqn{95}\% is below the upper value). For a parameter #' \eqn{\theta}, it is therefore easy to see if the posterior probability that #' \eqn{\theta > 0} (or \eqn{\theta < 0}) is larger or smaller than \eqn{95}\%. #' } #' Of course, if \eqn{95}\% intervals are desired they can be computed by #' specifying \code{prob=0.95}. #' } #' \subsection{Types of intervals}{ #' Currently \code{posterior_interval} only computes central intervals because #' other types of intervals are rarely useful for the models that \pkg{rstanarm} #' can estimate. Additional possibilities may be provided in future releases as #' more models become available. #' } #' #' @seealso #' \code{\link{confint.stanreg}}, which, for models fit using optimization, can #' be used to compute traditional confidence intervals. #' #' \code{\link{predictive_interval}} for predictive intervals. #' #' @template reference-gelman-carlin #' @template reference-morey #' #' @examples #' if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { #' if (!exists("example_model")) example(example_model) #' posterior_interval(example_model) #' posterior_interval(example_model, regex_pars = "herd") #' posterior_interval(example_model, pars = "period2", prob = 0.5) #' } posterior_interval.stanreg <- function(object, prob = 0.9, type = "central", pars = NULL, regex_pars = NULL, ...) { if (!identical(type, "central")) stop("Currently the only option for 'type' is 'central'.", call. = FALSE) mat <- as.matrix.stanreg(object, pars = pars, regex_pars = regex_pars) rstantools::posterior_interval(mat, prob = prob) } rstanarm/R/stan_gamm4.R0000644000176200001440000004367714370470372014507 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2016 Simon N. Wood # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #' Bayesian generalized linear additive models with optional group-specific #' terms via Stan #' #' \if{html}{\figure{stanlogo.png}{options: width="25" alt="https://mc-stan.org/about/logo/"}} #' Bayesian inference for GAMMs with flexible priors. #' #' @export #' @templateVar fun stan_gamm4 #' @templateVar pkg gamm4 #' @templateVar pkgfun gamm4 #' @template return-stanreg-object #' @template see-also #' @template args-prior_intercept #' @template args-priors #' @template args-prior_aux #' @template args-prior_smooth #' @template args-prior_PD #' @template args-algorithm #' @template args-adapt_delta #' @template args-QR #' @template args-sparse #' #' @param formula,random,family,data,knots,drop.unused.levels Same as for #' \code{\link[gamm4]{gamm4}}. \emph{We strongly advise against #' omitting the \code{data} argument}. Unless \code{data} is specified (and is #' a data frame) many post-estimation functions (including \code{update}, #' \code{loo}, \code{kfold}) are not guaranteed to work properly. #' @param subset,weights,na.action Same as \code{\link[stats]{glm}}, #' but rarely specified. #' @param ... Further arguments passed to \code{\link[rstan:stanmodel-method-sampling]{sampling}} (e.g. #' \code{iter}, \code{chains}, \code{cores}, etc.) or to #' \code{\link[rstan:stanmodel-method-vb]{vb}} (if \code{algorithm} is \code{"meanfield"} or #' \code{"fullrank"}). #' @param prior_covariance Cannot be \code{NULL}; see \code{\link{decov}} for #' more information about the default arguments. #' #' @details The \code{stan_gamm4} function is similar in syntax to #' \code{\link[gamm4]{gamm4}} in the \pkg{gamm4} package. But rather than performing #' (restricted) maximum likelihood estimation with the \pkg{lme4} package, #' the \code{stan_gamm4} function utilizes MCMC to perform Bayesian #' estimation. The Bayesian model adds priors on the common regression #' coefficients (in the same way as \code{\link{stan_glm}}), priors on the #' standard deviations of the smooth terms, and a prior on the decomposition #' of the covariance matrices of any group-specific parameters (as in #' \code{\link{stan_glmer}}). Estimating these models via MCMC avoids #' the optimization issues that often crop up with GAMMs and provides better #' estimates for the uncertainty in the parameter estimates. #' #' See \code{\link[gamm4]{gamm4}} for more information about the model #' specicification and \code{\link{priors}} for more information about the #' priors on the main coefficients. The \code{formula} should include at least #' one smooth term, which can be specified in any way that is supported by the #' \code{\link[mgcv]{jagam}} function in the \pkg{mgcv} package. The #' \code{prior_smooth} argument should be used to specify a prior on the unknown #' standard deviations that govern how smooth the smooth function is. The #' \code{prior_covariance} argument can be used to specify the prior on the #' components of the covariance matrix for any (optional) group-specific terms. #' The \code{\link[gamm4]{gamm4}} function in the \pkg{gamm4} package uses #' group-specific terms to implement the departure from linearity in the smooth #' terms, but that is not the case for \code{stan_gamm4} where the group-specific #' terms are exactly the same as in \code{\link{stan_glmer}}. #' #' The \code{plot_nonlinear} function creates a ggplot object with one facet for #' each smooth function specified in the call to \code{stan_gamm4} in the case #' where all smooths are univariate. A subset of the smooth functions can be #' specified using the \code{smooths} argument, which is necessary to plot a #' bivariate smooth or to exclude the bivariate smooth and plot the univariate #' ones. In the bivariate case, a plot is produced using #' \code{\link[ggplot2]{geom_contour}}. In the univariate case, the resulting #' plot is conceptually similar to \code{\link[mgcv]{plot.gam}} except the #' outer lines here demark the edges of posterior uncertainty intervals #' (credible intervals) rather than confidence intervals and the inner line #' is the posterior median of the function rather than the function implied #' by a point estimate. To change the colors used in the plot see #' \code{\link[bayesplot:bayesplot-colors]{color_scheme_set}}. #' #' @references #' Crainiceanu, C., Ruppert D., and Wand, M. (2005). Bayesian analysis for #' penalized spline regression using WinBUGS. \emph{Journal of Statistical #' Software}. \strong{14}(14), 1--22. #' \url{https://www.jstatsoft.org/article/view/v014i14} #' #' @seealso The vignette for \code{stan_glmer}, which also discusses #' \code{stan_gamm4}. \url{https://mc-stan.org/rstanarm/articles/} #' #' @examples #' if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { #' # from example(gamm4, package = "gamm4"), prefixing gamm4() call with stan_ #' \donttest{ #' dat <- mgcv::gamSim(1, n = 400, scale = 2) ## simulate 4 term additive truth #' ## Now add 20 level random effect `fac'... #' dat$fac <- fac <- as.factor(sample(1:20, 400, replace = TRUE)) #' dat$y <- dat$y + model.matrix(~ fac - 1) %*% rnorm(20) * .5 #' #' br <- stan_gamm4(y ~ s(x0) + x1 + s(x2), data = dat, random = ~ (1 | fac), #' chains = 1, iter = 500) # for example speed #' print(br) #' plot_nonlinear(br) #' plot_nonlinear(br, smooths = "s(x0)", alpha = 2/3) #' } #' } stan_gamm4 <- function(formula, random = NULL, family = gaussian(), data, weights = NULL, subset = NULL, na.action, knots = NULL, drop.unused.levels = TRUE, ..., prior = default_prior_coef(family), prior_intercept = default_prior_intercept(family), prior_smooth = exponential(autoscale = FALSE), prior_aux = exponential(autoscale=TRUE), prior_covariance = decov(), prior_PD = FALSE, algorithm = c("sampling", "meanfield", "fullrank"), adapt_delta = NULL, QR = FALSE, sparse = FALSE) { data <- validate_data(data, if_missing = list()) family <- validate_family(family) if (length(mgcv::interpret.gam(formula)$smooth.spec) == 0) { stop("Formula must have at least one smooth term to use stan_gamm4.", call. = FALSE) } if (!is.null(random)) { fake.formula <- as.character(mgcv::interpret.gam(formula)$fake.formula) form <- paste(fake.formula[2], fake.formula[1], fake.formula[3], "+", random[2], collapse = " ") glmod <- lme4::glFormula(as.formula(form), data, family = gaussian, subset, weights, na.action, control = make_glmerControl( ignore_x_scale = prior$autoscale %ORifNULL% FALSE ) ) data <- glmod$fr weights <- validate_weights(glmod$fr$weights) } else { weights <- validate_weights(weights) glmod <- NULL } if (family$family == "binomial") { data$temp_y <- rep(1, NROW(data)) # work around jagam bug temp_formula <- update(formula, temp_y ~ .) jd <- mgcv::jagam(formula = temp_formula, family = gaussian(), data = data, file = tempfile(fileext = ".jags"), weights = NULL, na.action = na.action, offset = NULL, knots = knots, drop.unused.levels = drop.unused.levels, diagonalize = TRUE) if (!is.null(random)) { y <- data[, as.character(formula[2L])] } else { y <- eval(formula[[2L]], data) } if (binom_y_prop(y, family, weights)) { y1 <- as.integer(as.vector(y) * weights) y <- cbind(y1, y0 = weights - y1) weights <- double(0) } } else { jd <- mgcv::jagam(formula = formula, family = gaussian(), data = data, file = tempfile(fileext = ".jags"), weights = NULL, na.action = na.action, offset = NULL, knots = knots, drop.unused.levels = drop.unused.levels, diagonalize = TRUE) y <- jd$jags.data$y } # there is no offset allowed by gamm4::gamm4 offset <- validate_offset(as.vector(model.offset(jd$pregam$model)), y = y) X <- jd$jags.data$X mark <- which(colnames(X) != "") colnames(X) <- colnames(jd$pregam$X) <- jd$pregam$term.names S <- lapply(jd$pregam$smooth, FUN = function(s) { ranks <- s$rank start <- s$first.para out <- list() for (r in seq_along(ranks)) { end <- start + ranks[r] - 1L out[[r]] <- X[,start:end, drop = FALSE] start <- end + 1L } return(out) }) if (any(sapply(S, length) > 1)) S <- unlist(S, recursive = FALSE) names(S) <- names(jd$pregam$sp) X <- X[,mark, drop = FALSE] for (s in seq_along(S)) { # sometimes elements of S are lists themselves that need to be unpacked # before passing to stan_glm.fit (https://github.com/stan-dev/rstanarm/issues/362) if (is.list(S[[s]])) S[[s]] <- do.call(cbind, S[[s]]) } X <- c(list(X), S) if (is.null(prior)) prior <- list() if (is.null(prior_intercept)) prior_intercept <- list() if (is.null(prior_aux)) prior_aux <- list() if (is.null(prior_smooth)) prior_smooth <- list() if (is.null(random)) { group <- list() prior_covariance <- list() } else { group <- glmod$reTrms group$decov <- prior_covariance } algorithm <- match.arg(algorithm) stanfit <- stan_glm.fit(x = X, y = y, weights = weights, offset = offset, family = family, prior = prior, prior_intercept = prior_intercept, prior_aux = prior_aux, prior_smooth = prior_smooth, prior_PD = prior_PD, algorithm = algorithm, adapt_delta = adapt_delta, group = group, QR = QR, ...) if (algorithm != "optimizing" && !is(stanfit, "stanfit")) return(stanfit) if (family$family == "Beta regression") family$family <- "beta" X <- do.call(cbind, args = X) if (is.null(random)) Z <- Matrix::Matrix(nrow = NROW(y), ncol = 0, sparse = TRUE) else { Z <- pad_reTrms(Ztlist = group$Ztlist, cnms = group$cnms, flist = group$flist)$Z colnames(Z) <- b_names(names(stanfit), value = TRUE) } XZ <- cbind(X, Z) # make jam object with point estimates, see ?mgcv::sim2jam mat <- as.matrix(stanfit) mark <- 1:ncol(X) jd$pregam$Vp <- cov(mat[,mark, drop = FALSE]) jd$pregam$coefficients <- colMeans(mat[,mark, drop = FALSE]) jd$pregam$sig2 <- if ("sigma" %in% colnames(mat)) mean(mat[,"sigma"]) else 1 eta <- X %*% t(mat[,mark,drop = FALSE]) mu <- rowMeans(family$linkinv(eta)) eta <- rowMeans(eta) w <- as.numeric(jd$pregam$w * family$mu.eta(eta) ^ 2 / family$variance(mu)) XWX <- t(X) %*% (w * X) jd$pregam$edf <- rowSums(jd$pregam$Vp * t(XWX)) / jd$pregam$sig2 class(jd$pregam) <- c("jam", "gam") fit <- nlist(stanfit, family, formula, offset, weights, x = XZ, y = y, data, terms = jd$pregam$terms, model = if (is.null(random)) jd$pregam$model else glmod$fr, call = match.call(expand.dots = TRUE), algorithm, glmod = glmod, stan_function = "stan_gamm4") out <- stanreg(fit) out$jam <- jd$pregam class(out) <- c(class(out), "gamm4", if (!is.null(glmod)) "lmerMod") return(out) } #' @rdname stan_gamm4 #' @export #' @param x An object produced by \code{stan_gamm4}. #' @param smooths An optional character vector specifying a subset of the smooth #' functions specified in the call to \code{stan_gamm4}. The default is #' include all smooth terms. #' @param prob For univarite smooths, a scalar between 0 and 1 governing the #' width of the uncertainty interval. #' @param facet_args An optional named list of arguments passed to #' \code{\link[ggplot2]{facet_wrap}} (other than the \code{facets} argument). #' @param alpha,size For univariate smooths, passed to #' \code{\link[ggplot2]{geom_ribbon}}. For bivariate smooths, \code{size/2} is #' passed to \code{\link[ggplot2]{geom_contour}}. #' #' @return \code{plot_nonlinear} returns a ggplot object. #' #' @importFrom ggplot2 aes_ aes_string facet_wrap ggplot geom_contour geom_line geom_ribbon labs scale_color_gradient2 #' plot_nonlinear <- function(x, smooths, ..., prob = 0.9, facet_args = list(), alpha = 1, size = 0.75) { validate_stanreg_object(x) if (!is(x, "gamm4")) stop("Plot only available for models fit using the stan_gamm4 function.") on.exit(message("try plot(x$jam) instead")) scheme <- bayesplot::color_scheme_get() XZ <- x$x XZ <- XZ[,!grepl("_NEW_", colnames(XZ), fixed = TRUE)] labels <- sapply(x$jam$smooth, "[[", "label") xnames <- sapply(x$jam$smooth, "[[", "vn") names(x$jam$smooth) <- labels names(xnames) <- labels fs <- sapply(x$jam$smooth, FUN = "inherits", what = "fs.interaction") if (!missing(smooths)) { found <- smooths %in% labels if (all(!found)) { stop("All specified terms are invalid. Valid terms are: ", paste(grep(",", labels, fixed = TRUE, value = TRUE, invert = TRUE), collapse = ", ")) } else if (any(!found)) { warning("The following specified terms were not found and ignored: ", paste(smooths[!found], collapse = ", ")) } labels <- smooths[found] fs <- fs[found] if (!is.matrix(xnames)) xnames <- xnames[found] } else smooths <- 1:length(labels) B <- as.matrix(x)[, colnames(XZ), drop = FALSE] original <- x$jam$model bivariate <- any(grepl(",", labels, fixed = TRUE)) if (bivariate && !any(fs)) { if (length(labels) > 1) { on.exit(NULL) stop("Multivariate functions can only be plotted one at a time; specify 'smooths'.") } if (length(xnames) > 2) stop("Only univariate and bivariate functions can be plotted currently.") xrange <- range(original[, xnames[1]]) yrange <- range(original[, xnames[2]]) xz <- expand.grid(seq(from = xrange[1], to = xrange[2], length.out = 100), seq(from = yrange[1], to = yrange[2], length.out = 100)) colnames(xz) <- xnames[1:2] plot_data <- data.frame(x = xz[, 1], y = xz[, 2]) nd <- original nd <- nd[sample(nrow(xz), size = nrow(xz), replace = TRUE), ] nd[[xnames[1]]] <- xz[[xnames[1]]] nd[[xnames[2]]] <- xz[[xnames[2]]] requireNamespace("mgcv", quietly = TRUE) XZ <- predict(x$jam, newdata = nd, type = "lpmatrix") incl <- grepl(labels, colnames(B), fixed = TRUE) b <- B[, incl, drop = FALSE] xz <- XZ[, grepl(labels, colnames(XZ), fixed = TRUE), drop = FALSE] plot_data$z <- apply(linear_predictor.matrix(b, xz), 2, FUN = median) return( ggplot(plot_data, aes_(x = ~x, y = ~y, z = ~z)) + geom_contour(aes_string(color = "..level.."), size = size/2) + labs(x = xnames[1], y = xnames[2]) + scale_color_gradient2(low = scheme[[1]], mid = scheme[[3]], high = scheme[[6]]) + bayesplot::theme_default() ) } df_list <- lapply(x$jam$smooth[smooths], FUN = function(s) { incl <- s$first.para:s$last.para b <- B[, incl, drop = FALSE] if (inherits(s, "fs.interaction")) { # see mgcv:::plot.fs.interaction xx <- original[,s$base$term] fac <- original[,s$fterm] out <- by(data.frame(fac, xx), list(fac), FUN = function(df) { df <- df[order(df[,2]),] names(df) <- c(s$fterm, s$base$term) xz <- mgcv::PredictMat(s, df) f <- linear_predictor.matrix(b, xz) data.frame( predictor = df[,2], lower = apply(f, 2, quantile, probs = (1 - prob) / 2), upper = apply(f, 2, quantile, probs = prob + (1 - prob) / 2), middle = apply(f, 2, median), term = paste(s$label, df[,1], sep = ".") ) }) do.call(rbind, args = out) } else { xz <- XZ[, incl, drop = FALSE] x <- original[, s$term] ord <- order(x) x <- x[ord] xz <- xz[ord, , drop=FALSE] if (!is.null(s$by.level)) { fac <- original[,s$by][ord] mark <- fac == s$by.level x <- x[mark] xz <- xz[mark, , drop = FALSE] } f <- linear_predictor.matrix(b, xz) data.frame( predictor = x, lower = apply(f, 2, quantile, probs = (1 - prob) / 2), upper = apply(f, 2, quantile, probs = prob + (1 - prob) / 2), middle = apply(f, 2, median), term = s$label ) } }) plot_data <- do.call(rbind, df_list) facet_args[["facets"]] <- ~ term if (is.null(facet_args[["scales"]])) facet_args[["scales"]] <- "free" if (is.null(facet_args[["strip.position"]])) facet_args[["strip.position"]] <- "left" on.exit(NULL) ggplot(plot_data, aes_(x = ~ predictor)) + geom_ribbon(aes_(ymin = ~ lower, ymax = ~ upper), fill = scheme[[1]], color = scheme[[2]], alpha = alpha, size = size) + geom_line(aes_(y = ~ middle), color = scheme[[5]], size = 0.75 * size, lineend = "round") + labs(y = NULL) + do.call(facet_wrap, facet_args) + bayesplot::theme_default() } rstanarm/R/pp_data.R0000644000176200001440000004457114406606742014061 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright 2015 Douglas Bates, Martin Maechler, Ben Bolker, Steve Walker # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. pp_data <- function(object, newdata = NULL, re.form = NULL, offset = NULL, m = NULL, ...) { validate_stanreg_object(object) if (is.mer(object)) { if (is.nlmer(object)) out <- .pp_data_nlmer(object, newdata = newdata, re.form = re.form, m = m, ...) else out <- .pp_data_mer(object, newdata = newdata, re.form = re.form, m = m, ...) if (!is.null(offset)) out$offset <- offset return(out) } .pp_data(object, newdata = newdata, offset = offset, ...) } # for models without lme4 structure .pp_data <- function(object, newdata = NULL, offset = NULL, ...) { if (is(object, "gamm4")) { requireNamespace("mgcv", quietly = TRUE) if (is.null(newdata)) x <- predict(object$jam, type = "lpmatrix") else x <- predict(object$jam, newdata = newdata, type = "lpmatrix") if (is.null(offset)) offset <- object$offset %ORifNULL% rep(0, nrow(x)) return(nlist(x, offset)) } if (is.null(newdata)) { x <- get_x(object) if (is.null(offset)) { offset <- object$offset %ORifNULL% rep(0, nrow(x)) } if (inherits(object, "betareg")) { return(nlist(x, offset, z_betareg = object$z)) } return(nlist(x, offset)) } offset <- .pp_data_offset(object, newdata, offset) Terms <- delete.response(terms(object)) m <- model.frame(Terms, newdata, xlev = object$xlevels) if (!is.null(cl <- attr(Terms, "dataClasses"))) .checkMFClasses(cl, m) x <- model.matrix(Terms, m, contrasts.arg = object$contrasts) if (is(object, "polr") && !is_scobit(object)) x <- x[,colnames(x) != "(Intercept)", drop = FALSE] if (inherits(object, "betareg")) { mf <- model.frame(delete.response(object$terms$precision), data = newdata, na.action = object$na.action, xlev = object$levels$precision) z_betareg <- model.matrix(object$terms$precision, mf, contrasts = object$contrasts$precision) return(nlist(x, offset, z_betareg)) } return(nlist(x, offset)) } # for models fit using stan_(g)lmer or stan_gamm4 .pp_data_mer <- function(object, newdata, re.form, m = NULL, ...) { if (is(object, "gamm4")) { requireNamespace("mgcv", quietly = TRUE) if (is.null(newdata)) x <- predict(object$jam, type = "lpmatrix") else x <- predict(object$jam, newdata = newdata, type = "lpmatrix") if (is.null(re.form)) { re.form <- as.formula(object$call$random) if (length(re.form) == 0) re.form <- NA z <- .pp_data_mer_z(object, newdata, re.form, ...) } else z <- .pp_data_mer_z(object, newdata, re.form, ...) } else { x <- .pp_data_mer_x(object, newdata, m = m, ...) z <- .pp_data_mer_z(object, newdata, re.form, m = m, ...) } offset <- model.offset(model.frame(object, m = m)) if (!is.null(newdata) && (!is.null(offset) || !is.null(object$call$offset))) { if (is.jm(object)) { form <- lme4::subbars(object$formula[[m]]) form[2] <- NULL # get rid of response to avoid error that it isn't found in newdata mf <- stats::model.frame(form, data = newdata) offset <- model.offset(mf) } else { offset <- offset %ORifNULL% object$call$offset offset <- try(eval(offset, newdata), silent = TRUE) } if (!is.numeric(offset)) offset <- NULL } return(nlist(x, offset = offset, Zt = z$Zt, Z_names = z$Z_names)) } # for models fit using stan_nlmer .pp_data_nlmer <- function(object, newdata, re.form, offset = NULL, m = NULL, ...) { inputs <- parse_nlf_inputs(object$glmod$respMod) if (is.null(newdata)) { arg1 <- arg2 <- NULL } else if (object$family$link == "inv_SSfol") { arg1 <- newdata[[inputs[2]]] arg2 <- newdata[[inputs[3]]] } else { arg1 <- newdata[[inputs[2]]] arg2 <- NULL } f <- formula(object, m = m) if (!is.null(re.form) && !is.na(re.form)) { f <- as.character(f) f[3] <- as.character(re.form) f <- as.formula(f[-1]) } if (is.null(newdata)) newdata <- model.frame(object) else { yname <- names(model.frame(object))[1] newdata[[yname]] <- 0 } mc <- match.call(expand.dots = FALSE) mc$re.form <- mc$offset <- mc$object <- mc$newdata <- NULL mc$data <- newdata mc$formula <- f mc$start <- fixef(object) nlf <- nlformula(mc) offset <- .pp_data_offset(object, newdata, offset) group <- with(nlf$reTrms, pad_reTrms(Ztlist, cnms, flist)) if (!is.null(re.form) && !is(re.form, "formula") && is.na(re.form)) group$Z@x <- 0 return(nlist(x = nlf$X, offset = offset, Z = group$Z, Z_names = make_b_nms(group), arg1, arg2)) } # the functions below are heavily based on a combination of # lme4:::predict.merMod and lme4:::mkNewReTrms, although they do also have # substantial modifications .pp_data_mer_x <- function(object, newdata, m = NULL, ...) { x <- get_x(object, m = m) if (is.null(newdata)) return(x) form <- if (is.null(m)) attr(object$glmod$fr, "formula") else formula(object, m = m) L <- length(form) form[[L]] <- lme4::nobars(form[[L]]) RHS <- formula(substitute(~R, list(R = form[[L]]))) Terms <- terms(object, m = m) mf <- model.frame(object, m = m) ff <- formula(form) vars <- rownames(attr(terms.formula(ff), "factors")) mf <- mf[vars] isFac <- vapply(mf, is.factor, FUN.VALUE = TRUE) isFac[attr(Terms, "response")] <- FALSE orig_levs <- if (length(isFac) == 0) NULL else lapply(mf[isFac], levels) mfnew <- model.frame(delete.response(Terms), newdata, xlev = orig_levs) x <- model.matrix(RHS, data = mfnew, contrasts.arg = attr(x, "contrasts")) return(x) } .pp_data_mer_z <- function(object, newdata, re.form = NULL, allow.new.levels = TRUE, na.action = na.pass, m = NULL, ...) { NAcheck <- !is.null(re.form) && !is(re.form, "formula") && is.na(re.form) fmla0check <- (is(re.form, "formula") && length(re.form) == 2 && identical(re.form[[2]], 0)) if (NAcheck || fmla0check) return(list()) if (is.null(newdata) && is.null(re.form)) { Z <- get_z(object, m = m) if (!is.stanmvreg(object)) { # Z_names not needed for stanreg with no newdata return(list(Zt = t(Z))) } else { # must supply Z_names for stanmvreg since b pars # might be for multiple submodels and Zt will only # be for one submodel, so their elements may not # correspond exactly ReTrms <- object$glmod[[m]]$reTrms Z_names <- make_b_nms(ReTrms, m = m, stub = get_stub(object)) return(nlist(Zt = ReTrms$Zt, Z_names)) } } else if (is.null(newdata)) { rfd <- mfnew <- model.frame(object, m = m) } else if (inherits(object, "gamm4")) { requireNamespace("mgcv", quietly = TRUE) if (is.null(newdata)) x <- predict(object$jam, type = "lpmatrix") else x <- predict(object$jam, newdata = newdata, type = "lpmatrix") NAs <- apply(is.na(x), 1, any) rfd <- mfnew <- newdata[!NAs,, drop=FALSE] attr(rfd,"na.action") <- "na.omit" } else { terms_fixed <- delete.response(terms(object, fixed.only = TRUE, m = m)) mfnew <- model.frame(terms_fixed, newdata, na.action = na.action) newdata.NA <- newdata if (!is.null(fixed.na.action <- attr(mfnew,"na.action"))) { newdata.NA <- newdata.NA[-fixed.na.action,] } tt <- delete.response(terms(object, random.only = TRUE, m = m)) rfd <- model.frame(tt, newdata.NA, na.action = na.pass) if (!is.null(fixed.na.action)) attr(rfd,"na.action") <- fixed.na.action } if (is.null(re.form)) re.form <- justRE(formula(object, m = m)) if (!inherits(re.form, "formula")) stop("'re.form' must be NULL, NA, or a formula.") if (length(fit.na.action <- attr(mfnew,"na.action")) > 0) { newdata <- newdata[-fit.na.action,] } ReTrms <- lme4::mkReTrms(lme4::findbars(re.form[[2]]), rfd) if (!allow.new.levels && any(vapply(ReTrms$flist, anyNA, NA))) stop("NAs are not allowed in prediction data", " for grouping variables unless 'allow.new.levels' is TRUE.") ns.re <- names(re <- ranef(object, m = m)) nRnms <- names(Rcnms <- ReTrms$cnms) if (!all(nRnms %in% ns.re)) stop("Grouping factors specified in re.form that were not present in original model.") new_levels <- lapply(ReTrms$flist, function(x) levels(factor(x))) Zt <- ReTrms$Zt Z_names <- make_b_nms(ReTrms, m = m, stub = get_stub(object)) z <- nlist(Zt = ReTrms$Zt, Z_names) return(z) } # handle offsets ---------------------------------------------------------- null_or_zero <- function(x) { isTRUE(is.null(x) || all(x == 0)) } .pp_data_offset <- function(object, newdata = NULL, offset = NULL) { if (is.null(newdata)) { # get offset from model object (should be null if no offset) if (is.null(offset)) offset <- object$offset %ORifNULL% model.offset(model.frame(object)) } else { if (!is.null(offset)) stopifnot(length(offset) == nrow(newdata)) else { # if newdata specified but not offset then confirm that model wasn't fit # with an offset (warning, not error) if (!is.null(object$call$offset) || !null_or_zero(object$offset) || !null_or_zero(model.offset(model.frame(object)))) { warning( "'offset' argument is NULL but it looks like you estimated ", "the model using an offset term.", call. = FALSE ) } offset <- rep(0, nrow(newdata)) } } return(offset) } #----------------------- pp_data for joint models -------------------------- # Return the design matrices required for evaluating the linear predictor or # log-likelihood in post-estimation functions for a \code{stan_jm} model # # @param object A stanmvreg object # @param newdataLong A data frame or list of data frames with the new # covariate data for the longitudinal submodel # @param newdataEvent A data frame with the new covariate data for the # event submodel # @param ids An optional vector of subject IDs specifying which individuals # should be included in the returned design matrices. # @param etimes An optional vector of times at which the event submodel # design matrices should be evaluated (also used to determine the # quadrature times). If NULL then times are taken to be the eventimes in # the fitted object (if newdataEvent is NULL) or in newdataEvent. # @param long_parts,event_parts A logical specifying whether to return the # design matrices for the longitudinal and/or event submodels. # @return A named list (with components M, Npat, ndL, ndE, yX, tZt, # yZnames, eXq, assoc_parts) .pp_data_jm <- function(object, newdataLong = NULL, newdataEvent = NULL, ids = NULL, etimes = NULL, long_parts = TRUE, event_parts = TRUE) { M <- get_M(object) id_var <- object$id_var time_var <- object$time_var if (!is.null(newdataLong) || !is.null(newdataEvent)) newdatas <- validate_newdatas(object, newdataLong, newdataEvent) # prediction data for longitudinal submodels ndL <- if (is.null(newdataLong)) get_model_data(object)[1:M] else newdatas[1:M] # prediction data for event submodel ndE <- if (is.null(newdataEvent)) get_model_data(object)[["Event"]] else newdatas[["Event"]] # possibly subset if (!is.null(ids)) { ndL <- subset_ids(object, ndL, ids) ndE <- subset_ids(object, ndE, ids) } id_list <- unique(ndE[[id_var]]) # unique subject id list # evaluate the last known survival time and status if (!is.null(newdataEvent) && is.null(etimes)) { # prediction data for the event submodel was provided but # no event times were explicitly specified by the user, so # they must be evaluated using the data frame surv <- eval(formula(object, m = "Event")[[2L]], ndE) etimes <- unclass(surv)[,"time"] estatus <- unclass(surv)[,"status"] } else if (is.null(etimes)) { # if no prediction data was provided then event times are # taken from the fitted model etimes <- object$eventtime[as.character(id_list)] estatus <- object$status[as.character(id_list)] } else { # otherwise, event times ('etimes') are only directly specified for dynamic # predictions via posterior_survfit in which case the 'etimes' correspond # to the last known survival time and therefore we assume everyone has survived # up to that point (ie, set estatus = 0 for all individuals), this is true # even if there is an event indicated in the data supplied by the user. estatus <- rep(0, length(etimes)) } res <- nlist(M, Npat = length(id_list), ndL, ndE) if (long_parts && event_parts) lapply(ndL, function(x) { if (!time_var %in% colnames(x)) STOP_no_var(time_var) if (!id_var %in% colnames(x)) STOP_no_var(id_var) if (any(x[[time_var]] < 0)) stop2("Values for the time variable (", time_var, ") should not be negative.") mt <- tapply(x[[time_var]], factor(x[[id_var]]), max) if (any(mt > etimes)) stop2("There appears to be observation times in the longitudinal data that ", "are later than the event time specified in the 'etimes' argument.") }) # response and design matrices for longitudinal submodels if (long_parts) { y <- lapply(1:M, function(m) eval(formula(object, m = m)[[2L]], ndL[[m]])) ydat <- lapply(1:M, function(m) pp_data(object, ndL[[m]], m = m)) yX <- fetch(ydat, "x") yZt <- fetch(ydat, "Zt") yZ_names <- fetch(ydat, "Z_names") yOffset <- fetch(ydat, "offset") flist <- lapply(ndL, function(x) factor(x[[id_var]])) res <- c(res, nlist(y, yX, yZt, yZ_names, yOffset, flist)) } # design matrices for event submodel and association structure if (event_parts) { qnodes <- object$qnodes qq <- get_quadpoints(qnodes) qtimes <- uapply(qq$points, unstandardise_qpts, 0, etimes) qwts <- uapply(qq$weights, unstandardise_qwts, 0, etimes) starttime <- deparse(formula(object, m = "Event")[[2L]][[2L]]) edat <- prepare_data_table(ndE, id_var, time_var = starttime) id_rep <- rep(id_list, qnodes + 1) times <- c(etimes, qtimes) # times used to design event submodel matrices edat <- rolling_merge(edat, ids = id_rep, times = times) eXq <- .pp_data_mer_x(object, newdata = edat, m = "Event") assoc_parts <- lapply(1:M, function(m) { ymf <- ndL[[m]] grp_stuff <- object$grp_stuff[[m]] if (grp_stuff$has_grp) { grp_stuff <- get_extra_grp_info( # update grp_info with new data grp_stuff, flist = ymf, id_var = id_var, grp_assoc = grp_stuff$grp_assoc) } ymf <- prepare_data_table(ymf, id_var = id_var, time_var = time_var, grp_var = grp_stuff$grp_var) make_assoc_parts( ymf, assoc = object$assoc[,m], id_var = id_var, time_var = time_var, ids = id_rep, times = times, grp_stuff = grp_stuff, use_function = pp_data, object = object, m = m) }) assoc_attr <- nlist(.Data = assoc_parts, qnodes, qtimes, qwts, etimes, estatus) assoc_parts <- do.call("structure", assoc_attr) res <- c(res, nlist(eXq, assoc_parts)) } return(res) } # Return a data frame for each submodel that: # (1) only includes variables used in the model formula # (2) only includes rows contained in the glmod/coxmod model frames # (3) ensures that additional variables that are required # such as the ID variable or variables used in the # interaction-type association structures, are included. # # It is necessary to drop unneeded variables though so that # errors are not encountered if the original data contained # NA values for variables unrelated to the model formula. # We generate a data frame here for in-sample predictions # rather than using a model frame, since some quantities will # need to be recalculated at quadrature points etc, for example # in posterior_survfit. # # @param object A stanmvreg object. # @param m Integer specifying which submodel to get the # prediction data frame for. # @return A data frame or list of data frames with all the # (unevaluated) variables required for predictions. get_model_data <- function(object, m = NULL) { validate_stanmvreg_object(object) M <- get_M(object) terms <- terms(object, fixed.only = FALSE) # identify variables to add to the terms objects if (is.jm(object)) { extra_vars <- lapply(1:M, function(m) { # for each submodel loop over the four possible assoc # interaction formulas and collect any variables used forms_m <- object$assoc["which_formulas",][[m]] uapply(forms_m, function(x) { if (length(x)) { rownames(attr(terms.formula(x), "factors")) } else NULL }) }) # also ensure that id_var is in the event data extra_vars$Event <- object$id_var if (!identical(length(terms), length(extra_vars))) stop2("Bug found: terms and extra_vars should be same length.") # add the extra variables to the terms formula for each submodel terms <- xapply(terms, extra_vars, FUN = function(x, y) { lhs <- x[[2L]] rhs <- deparse(x[[3L]], 500L) if (!is.null(y)) rhs <- c(rhs, y) reformulate(rhs, response = lhs) }) datas <- c(object$dataLong, list(object$dataEvent)) } else { datas <- object$data } # identify rows that were in the model frame row_nms <- lapply(model.frame(object), rownames) # drop rows and variables not required for predictions mfs <- xapply(w = terms, x = datas, y = row_nms, FUN = function(w, x, y) get_all_vars(w, x)[y, , drop = FALSE]) mfs <- list_nms(mfs, M, stub = get_stub(object)) if (is.null(m)) mfs else mfs[[m]] } rstanarm/R/doc-rstanarm-package.R0000644000176200001440000001140214414044166016412 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #' Applied Regression Modeling via RStan #' #' @docType package #' @name rstanarm-package #' @aliases rstanarm #' @useDynLib rstanarm, .registration = TRUE #' #' @import methods #' @importFrom rstan optimizing sampling vb constrain_pars extract #' extract_sparse_parts get_posterior_mean stanc #' @importFrom utils capture.output #' @importFrom RcppParallel RcppParallelLibs #' @import stats #' @import Rcpp #' @import bayesplot #' @import shinystan #' @import rstantools #' @export log_lik posterior_linpred posterior_epred posterior_predict posterior_interval #' @export predictive_interval predictive_error prior_summary bayes_R2 #' @export loo_linpred loo_predict loo_predictive_interval loo_R2 #' @export loo waic kfold loo_compare #' @export launch_shinystan #' #' @description #' \if{html}{ #' \figure{stanlogo.png}{options: width="50" alt="https://mc-stan.org/about/logo/"} #' \emph{Stan Development Team} #' } #' #' The \pkg{rstanarm} package is an appendage to the \pkg{rstan} package that #' enables many of the most common applied regression models to be estimated #' using Markov Chain Monte Carlo, variational approximations to the posterior #' distribution, or optimization. The \pkg{rstanarm} package allows these models #' to be specified using the customary R modeling syntax (e.g., like that of #' \code{\link[stats]{glm}} with a \code{formula} and a \code{data.frame}). #' #' The sections below provide an overview of the modeling functions and #' estimation algorithms used by \pkg{rstanarm}. #' #' @details #' The set of models supported by \pkg{rstanarm} is large (and will continue to #' grow), but also limited enough so that it is possible to integrate them #' tightly with the \code{\link{pp_check}} function for graphical posterior #' predictive checks with \pkg{\link[bayesplot:bayesplot-package]{bayesplot}} and the #' \code{\link{posterior_predict}} function to easily estimate the effect of #' specific manipulations of predictor variables or to predict the outcome in a #' training set. #' #' The objects returned by the \pkg{rstanarm} modeling functions are called #' \code{\link[=stanreg-objects]{stanreg}} objects. In addition to all of the #' typical \code{\link[=stanreg-methods]{methods}} defined for fitted model #' objects, stanreg objects can be passed to the \code{\link[loo]{loo}} function #' in the \pkg{loo} package for model comparison or to the #' \code{\link[shinystan]{launch_shinystan}} function in the \pkg{shinystan} #' package in order to visualize the posterior distribution using the ShinyStan #' graphical user interface. See the \pkg{rstanarm} vignettes for more details #' about the entire process. #' #' @inheritSection available-models Modeling functions #' @inheritSection available-algorithms Estimation algorithms #' #' @section Prior distributions: #' See \link[=priors]{priors help page} and the vignette #' \href{https://mc-stan.org/rstanarm/articles/priors.html}{\emph{Prior Distributions for rstanarm Models}} #' for an overview of the various choices the user can make for prior #' distributions. The package vignettes for the modeling functions also provide #' examples of using many of the available priors as well as more detailed #' descriptions of some of the novel priors used by \pkg{rstanarm}. #' #' @seealso #' \itemize{ #' \item \url{https://mc-stan.org/} for more information on the Stan C++ #' package used by \pkg{rstanarm} for model fitting. #' \item \url{https://github.com/stan-dev/rstanarm/issues/} to submit a bug #' report or feature request. #' \item \url{https://discourse.mc-stan.org} to ask a #' question about \pkg{rstanarm} on the Stan-users forum. #' } #' #' @templateVar armRef \url{https://stat.columbia.edu/~gelman/arm/} #' @templateVar bdaRef \url{https://stat.columbia.edu/~gelman/book/} #' @template reference-lme4 #' @template reference-bda #' @template reference-gelman-hill #' @template reference-stan-manual #' @template reference-loo #' @template reference-bayesvis #' @template reference-muth #' NULL rstanarm/R/misc.R0000644000176200001440000016034314406606742013400 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #' Logit and inverse logit #' #' @export #' @param x Numeric vector. #' @return A numeric vector the same length as \code{x}. logit <- function(x) stats::qlogis(x) #' @rdname logit #' @export invlogit <- function(x) stats::plogis(x) # Set arguments for sampling # # Prepare a list of arguments to use with \code{rstan::sampling} via # \code{do.call}. # # @param object The stanfit object to use for sampling. # @param user_dots The contents of \code{...} from the user's call to # the \code{stan_*} modeling function. # @param user_adapt_delta The value for \code{adapt_delta} specified by the # user. # @param prior Prior distribution list (can be NULL). # @param ... Other arguments to \code{\link[rstan]{sampling}} not coming from # \code{user_dots} (e.g. \code{data}, \code{pars}, \code{init}, etc.) # @return A list of arguments to use for the \code{args} argument for # \code{do.call(sampling, args)}. set_sampling_args <- function(object, prior, user_dots = list(), user_adapt_delta = NULL, ...) { args <- list(object = object, ...) unms <- names(user_dots) for (j in seq_along(user_dots)) { args[[unms[j]]] <- user_dots[[j]] } defaults <- default_stan_control(prior = prior, adapt_delta = user_adapt_delta) if (!"control" %in% unms) { # no user-specified 'control' argument args$control <- defaults } else { # user specifies a 'control' argument if (!is.null(user_adapt_delta)) { # if user specified adapt_delta argument to stan_* then # set control$adapt_delta to user-specified value args$control$adapt_delta <- user_adapt_delta } else { # use default adapt_delta for the user's chosen prior args$control$adapt_delta <- defaults$adapt_delta } if (is.null(args$control$max_treedepth)) { # if user's 'control' has no max_treedepth set it to rstanarm default args$control$max_treedepth <- defaults$max_treedepth } } args$save_warmup <- FALSE return(args) } # Default control arguments for sampling # # Called by set_sampling_args to set the default 'control' argument for # \code{rstan::sampling} if none specified by user. This allows the value of # \code{adapt_delta} to depend on the prior. # # @param prior Prior distribution list (can be NULL). # @param adapt_delta User's \code{adapt_delta} argument. # @param max_treedepth Default for \code{max_treedepth}. # @return A list with \code{adapt_delta} and \code{max_treedepth}. default_stan_control <- function(prior, adapt_delta = NULL, max_treedepth = 15L) { if (!length(prior)) { if (is.null(adapt_delta)) adapt_delta <- 0.95 } else if (is.null(adapt_delta)) { adapt_delta <- switch(prior$dist, "R2" = 0.99, "hs" = 0.99, "hs_plus" = 0.99, "lasso" = 0.99, "product_normal" = 0.99, 0.95) # default } nlist(adapt_delta, max_treedepth) } # Test if an object is a stanreg object # # @param x The object to test. is.stanreg <- function(x) inherits(x, "stanreg") # Throw error if object isn't a stanreg object # # @param x The object to test. validate_stanreg_object <- function(x, call. = FALSE) { if (!is.stanreg(x)) stop("Object is not a stanreg object.", call. = call.) } # Test for a given family # # @param x A character vector (probably x = family(fit)$family) is.binomial <- function(x) x == "binomial" is.gaussian <- function(x) x == "gaussian" is.gamma <- function(x) x == "Gamma" is.ig <- function(x) x == "inverse.gaussian" is.nb <- function(x) x == "neg_binomial_2" is.poisson <- function(x) x == "poisson" is.beta <- function(x) x == "beta" || x == "Beta regression" # test if a stanreg object has class clogit is_clogit <- function(object) { is(object, "clogit") } # test if a stanreg object has class polr is_polr <- function(object) { is(object, "polr") } # test if a stanreg object is a scobit model is_scobit <- function(object) { validate_stanreg_object(object) if (!is_polr(object)) return(FALSE) return("alpha" %in% rownames(object$stan_summary)) } # Test for a given estimation method # # @param x A stanreg object. used.optimizing <- function(x) { x$algorithm == "optimizing" } used.sampling <- function(x) { x$algorithm == "sampling" } used.variational <- function(x) { x$algorithm %in% c("meanfield", "fullrank") } # Test if stanreg object used stan_[gn]lmer # # @param x A stanreg object. is.mer <- function(x) { stopifnot(is.stanreg(x)) check1 <- inherits(x, "lmerMod") check2 <- !is.null(x$glmod) if (check1 && !check2) { stop("Bug found. 'x' has class 'lmerMod' but no 'glmod' component.") } else if (!check1 && check2) { stop("Bug found. 'x' has 'glmod' component but not class 'lmerMod'.") } isTRUE(check1 && check2) } # Test if stanreg object used stan_nlmer # # @param x A stanreg object. is.nlmer <- function(x) { is.mer(x) && inherits(x, "nlmerMod") } # Consistent error message to use when something is only available for # models fit using MCMC # # @param what An optional message to prepend to the default message. STOP_sampling_only <- function(what) { msg <- "only available for models fit using MCMC (algorithm='sampling')." if (!missing(what)) msg <- paste(what, msg) stop(msg, call. = FALSE) } # Consistent error message to use when something is only available for models # fit using MCMC or VB but not optimization # # @param what An optional message to prepend to the default message. STOP_not_optimizing <- function(what) { msg <- "not available for models fit using algorithm='optimizing'." if (!missing(what)) msg <- paste(what, msg) stop(msg, call. = FALSE) } # Consistent error message to use when something is only available for models # fit using MCMC or optimization but not VB # # @param what An optional message to prepend to the default message. STOP_not_VB <- function(what) { msg <- "not available for models fit using algorithm='meanfield|fullrank'." if (!missing(what)) msg <- paste(what, msg) stop(msg, call. = FALSE) } # Message to issue when fitting model with ADVI but 'QR=FALSE'. recommend_QR_for_vb <- function() { message( "Setting 'QR' to TRUE can often be helpful when using ", "one of the variational inference algorithms. ", "See the documentation for the 'QR' argument." ) } # Issue warning if high rhat values # # @param rhats Vector of rhat values. # @param threshold Threshold value. If any rhat values are above threshold a # warning is issued. check_rhats <- function(rhats, threshold = 1.1, check_lp = FALSE) { if (!check_lp) rhats <- rhats[!names(rhats) %in% c("lp__", "log-posterior")] if (any(rhats > threshold, na.rm = TRUE)) warning("Markov chains did not converge! Do not analyze results!", call. = FALSE, noBreaks. = TRUE) } # If y is a 1D array keep any names but convert to vector (used in stan_glm) # # @param y Result of calling model.response array1D_check <- function(y) { if (length(dim(y)) == 1L) { nms <- rownames(y) dim(y) <- NULL if (!is.null(nms)) names(y) <- nms } return(y) } # Check for a binomial model with Y given as proportion of successes and weights # given as total number of trials # binom_y_prop <- function(y, family, weights) { if (!is.binomial(family$family)) return(FALSE) yprop <- NCOL(y) == 1L && is.numeric(y) && any(y > 0 & y < 1) && !any(y < 0 | y > 1) if (!yprop) return(FALSE) wtrials <- !identical(weights, double(0)) && all(weights > 0) && all(abs(weights - round(weights)) < .Machine$double.eps^0.5) isTRUE(wtrials) } # Convert 2-level factor to 0/1 fac2bin <- function(y) { if (!is.factor(y)) stop("Bug found: non-factor as input to fac2bin.", call. = FALSE) if (!identical(nlevels(y), 2L)) stop("Bug found: factor with nlevels != 2 as input to fac2bin.", call. = FALSE) as.integer(y != levels(y)[1L]) } # Check weights argument # # @param w The \code{weights} argument specified by user or the result of # calling \code{model.weights} on a model frame. # @return If no error is thrown then \code{w} is returned. validate_weights <- function(w) { if (missing(w) || is.null(w)) { w <- double(0) } else { if (!is.numeric(w)) stop("'weights' must be a numeric vector.", call. = FALSE) if (any(w < 0)) stop("Negative weights are not allowed.", call. = FALSE) } return(w) } # Check offset argument # # @param o The \code{offset} argument specified by user or the result of calling # \code{model.offset} on a model frame. # @param y The result of calling \code{model.response} on a model frame. # @return If no error is thrown then \code{o} is returned. validate_offset <- function(o, y) { if (is.null(o)) { o <- double(0) } else { if (length(o) != NROW(y)) stop(gettextf("Number of offsets is %d but should be %d (number of observations)", length(o), NROW(y)), domain = NA, call. = FALSE) } return(o) } # Check family argument # # @param f The \code{family} argument specified by user (or the default). # @return If no error is thrown, then either \code{f} itself is returned (if # already a family) or the family object created from \code{f} is returned (if # \code{f} is a string or function). validate_family <- function(f) { if (is.character(f)) f <- get(f, mode = "function", envir = parent.frame(2)) if (is.function(f)) f <- f() if (!is(f, "family")) stop("'family' must be a family.", call. = FALSE) return(f) } # Check for glmer syntax in formulas for non-glmer models # # @param f The model \code{formula}. # @return Nothing is returned but an error might be thrown validate_glm_formula <- function(f) { if (any(grepl("\\|", f))) stop("Using '|' in model formula not allowed. ", "Maybe you meant to use 'stan_(g)lmer'?", call. = FALSE) } # Check if model formula has something on the LHS of ~ # @param f Model formula # @return FALSE if there is no outcome on the LHS of the formula has_outcome_variable <- function(f) { tt <- terms(as.formula(f)) if (attr(tt, "response") == 0) { return(FALSE) } else { return(TRUE) } } # Check if any variables in a model frame are constants # # exceptions: constant variable of all 1's is allowed and outcomes with all 0s # or 1s are allowed (e.g., for binomial models) # # @param mf A model frame or model matrix # @return If no constant variables are found mf is returned, otherwise an error # is thrown. check_constant_vars <- function(mf) { mf1 <- mf if (NCOL(mf[, 1]) == 2 || all(mf[, 1] %in% c(0, 1))) { mf1 <- mf[, -1, drop=FALSE] } lu1 <- function(x) !all(x == 1) && length(unique(x)) == 1 nocheck <- c("(weights)", "(offset)", "(Intercept)") sel <- !colnames(mf1) %in% nocheck is_constant <- apply(mf1[, sel, drop=FALSE], 2, lu1) if (any(is_constant)) { stop("Constant variable(s) found: ", paste(names(is_constant)[is_constant], collapse = ", "), call. = FALSE) } return(mf) } # Grep for "b" parameters (ranef) # # @param x Character vector (often rownames(fit$stan_summary)) # @param ... Passed to grep b_names <- function(x, ...) { grep("^b\\[", x, ...) } # Return names of the last dimension in a matrix/array (e.g. colnames if matrix) # # @param x A matrix or array last_dimnames <- function(x) { ndim <- length(dim(x)) dimnames(x)[[ndim]] } # Get the correct column name to use for selecting the median # # @param algorithm String naming the estimation algorithm (probably # \code{fit$algorithm}). # @return Either \code{"50%"} or \code{"Median"} depending on \code{algorithm}. select_median <- function(algorithm) { switch(algorithm, sampling = "50%", meanfield = "50%", fullrank = "50%", optimizing = "Median", stop("Bug found (incorrect algorithm name passed to select_median)", call. = FALSE)) } # Regex parameter selection # # @param x stanreg object # @param regex_pars Character vector of patterns grep_for_pars <- function(x, regex_pars) { validate_stanreg_object(x) if (used.optimizing(x)) { warning("'regex_pars' ignored for models fit using algorithm='optimizing'.", call. = FALSE) return(NULL) } stopifnot(is.character(regex_pars)) out <- unlist(lapply(seq_along(regex_pars), function(j) { grep(regex_pars[j], rownames(x$stan_summary), value = TRUE) })) if (!length(out)) stop("No matches for 'regex_pars'.", call. = FALSE) return(out) } # Combine pars and regex_pars # # @param x stanreg object # @param pars Character vector of parameter names # @param regex_pars Character vector of patterns collect_pars <- function(x, pars = NULL, regex_pars = NULL) { if (is.null(pars) && is.null(regex_pars)) return(NULL) if (!is.null(pars)) pars[pars == "varying"] <- "b" if (!is.null(regex_pars)) pars <- c(pars, grep_for_pars(x, regex_pars)) unique(pars) } # Get the posterior sample size # # @param x A stanreg object # @return the posterior sample size (or size of sample from approximate posterior) posterior_sample_size <- function(x) { validate_stanreg_object(x) if (used.optimizing(x)) { return(NROW(x$asymptotic_sampling_dist)) } pss <- x$stanfit@sim$n_save if (used.variational(x)) return(pss) sum(pss - x$stanfit@sim$warmup2) } # If a is NULL (and Inf, respectively) return b, otherwise just return a # @param a,b Objects `%ORifNULL%` <- function(a, b) { if (is.null(a)) b else a } `%ORifINF%` <- function(a, b) { if (a == Inf) b else a } # Maybe broadcast # # @param x A vector or scalar. # @param n Number of replications to possibly make. # @return If \code{x} has no length the \code{0} replicated \code{n} times is # returned. If \code{x} has length 1, the \code{x} replicated \code{n} times # is returned. Otherwise \code{x} itself is returned. maybe_broadcast <- function(x, n) { if (!length(x)) { rep(0, times = n) } else if (length(x) == 1L) { rep(x, times = n) } else { x } } # Create a named list using specified names or, if names are omitted, using the # names of the objects in the list # # @param ... Objects to include in the list. # @return A named list. nlist <- function(...) { m <- match.call() out <- list(...) no_names <- is.null(names(out)) has_name <- if (no_names) FALSE else nzchar(names(out)) if (all(has_name)) return(out) nms <- as.character(m)[-1L] if (no_names) { names(out) <- nms } else { names(out)[!has_name] <- nms[!has_name] } return(out) } # Check and set scale parameters for priors # # @param scale Value of scale parameter (can be NULL). # @param default Default value to use if \code{scale} is NULL. # @param link String naming the link function or NULL. # @return If a probit link is being used, \code{scale} (or \code{default} if # \code{scale} is NULL) is scaled by \code{dnorm(0) / dlogis(0)}. Otherwise # either \code{scale} or \code{default} is returned. set_prior_scale <- function(scale, default, link) { stopifnot(is.numeric(default), is.character(link) || is.null(link)) if (is.null(scale)) scale <- default if (isTRUE(link == "probit")) scale <- scale * dnorm(0) / dlogis(0) return(scale) } # Methods for creating linear predictor # # Make linear predictor vector from x and point estimates for beta, or linear # predictor matrix from x and full posterior sample of beta. # # @param beta A vector or matrix or parameter estimates. # @param x Predictor matrix. # @param offset Optional offset vector. # @return A vector or matrix. linear_predictor <- function(beta, x, offset = NULL) { UseMethod("linear_predictor") } linear_predictor.default <- function(beta, x, offset = NULL) { eta <- as.vector(if (NCOL(x) == 1L) x * beta else x %*% beta) if (length(offset)) eta <- eta + offset return(eta) } linear_predictor.matrix <- function(beta, x, offset = NULL) { if (NCOL(beta) == 1L) beta <- as.matrix(beta) eta <- beta %*% t(x) if (length(offset)) eta <- sweep(eta, 2L, offset, `+`) return(eta) } #' Extract X, Y or Z from a stanreg object #' #' @keywords internal #' @export #' @templateVar stanregArg object #' @template args-stanreg-object #' @param ... Other arguments passed to methods. For a \code{stanmvreg} object #' this can be an integer \code{m} specifying the submodel. #' @return For \code{get_x} and \code{get_z}, a matrix. For \code{get_y}, either #' a vector or a matrix, depending on how the response variable was specified. get_y <- function(object, ...) UseMethod("get_y") #' @rdname get_y #' @export get_x <- function(object, ...) UseMethod("get_x") #' @rdname get_y #' @export get_z <- function(object, ...) UseMethod("get_z") #' @export get_y.default <- function(object, ...) { object[["y"]] %ORifNULL% model.response(model.frame(object)) } #' @export get_x.default <- function(object, ...) { object[["x"]] %ORifNULL% model.matrix(object) } #' @export get_x.gamm4 <- function(object, ...) { as.matrix(object[["x"]]) } #' @export get_x.lmerMod <- function(object, ...) { object$glmod$X %ORifNULL% stop("X not found") } #' @export get_z.lmerMod <- function(object, ...) { Zt <- object$glmod$reTrms$Zt %ORifNULL% stop("Z not found") t(Zt) } #' @export get_y.stanmvreg <- function(object, m = NULL, ...) { ret <- fetch(object$glmod, "y", "y") %ORifNULL% stop("y not found") stub <- get_stub(object) if (!is.null(m)) ret[[m]] else list_nms(ret, stub = stub) } #' @export get_x.stanmvreg <- function(object, m = NULL, ...) { ret <- fetch(object$glmod, "x", "x") %ORifNULL% stop("X not found") stub <- get_stub(object) if (!is.null(m)) ret[[m]] else list_nms(ret, stub = stub) } #' @export get_z.stanmvreg <- function(object, m = NULL, ...) { Zt <- fetch(object$glmod, "reTrms", "Zt") %ORifNULL% stop("Z not found") ret <- lapply(Zt, t) stub <- get_stub(object) if (!is.null(m)) ret[[m]] else list_nms(ret, stub = stub) } # Get inverse link function # # @param x A stanreg object, family object, or string. # @param ... Other arguments passed to methods. For a \code{stanmvreg} object # this can be an integer \code{m} specifying the submodel. # @return The inverse link function associated with x. linkinv <- function(x, ...) UseMethod("linkinv") linkinv.stanreg <- function(x, ...) { if (is(x, "polr")) polr_linkinv(x) else family(x)$linkinv } linkinv.stanmvreg <- function(x, m = NULL, ...) { ret <- lapply(family(x), `[[`, "linkinv") stub <- get_stub(x) if (!is.null(m)) ret[[m]] else list_nms(ret, stub = stub) } linkinv.family <- function(x, ...) { x$linkinv } linkinv.character <- function(x, ...) { stopifnot(length(x) == 1) polr_linkinv(x) } # Make inverse link function for stan_polr models, neglecting any # exponent in the scobit case # # @param x A stanreg object or character scalar giving the "method". # @return The inverse link function associated with x. polr_linkinv <- function(x) { if (is.stanreg(x) && is(x, "polr")) { method <- x$method } else if (is.character(x) && length(x) == 1L) { method <- x } else { stop("'x' should be a stanreg object created by stan_polr ", "or a single string.") } if (is.null(method) || method == "logistic") method <- "logit" if (method == "loglog") return(pgumbel) make.link(method)$linkinv } # Wrapper for rstan::summary # @param stanfit A stanfit object created using rstan::sampling or rstan::vb # @return A matrix of summary stats make_stan_summary <- function(stanfit) { levs <- c(0.5, 0.8, 0.95) qq <- (1 - levs) / 2 probs <- sort(c(0.5, qq, 1 - qq)) rstan::summary(stanfit, probs = probs, digits = 10)$summary } check_reTrms <- function(reTrms) { stopifnot(is.list(reTrms)) nms <- names(reTrms$cnms) dupes <- duplicated(nms) for (i in which(dupes)) { original <- reTrms$cnms[[nms[i]]] dupe <- reTrms$cnms[[i]] overlap <- dupe %in% original if (any(overlap)) stop("rstanarm does not permit formulas with duplicate group-specific terms.\n", "In this case ", nms[i], " is used as a grouping factor multiple times and\n", dupe[overlap], " is included multiple times.\n", "Consider using || or -1 in your formulas to prevent this from happening.") } return(invisible(NULL)) } #' @importFrom lme4 glmerControl # @param ignore_lhs ignore or throw error if LHS of formula is missing? (relevant if prior_PD is TRUE) make_glmerControl <- function(..., ignore_lhs = FALSE, ignore_x_scale = FALSE) { glmerControl(check.nlev.gtreq.5 = "ignore", check.nlev.gtr.1 = "stop", check.nobs.vs.rankZ = "ignore", check.nobs.vs.nlev = "ignore", check.nobs.vs.nRE = "ignore", check.formula.LHS = if (ignore_lhs) "ignore" else "stop", check.scaleX = if (ignore_x_scale) "ignore" else "warning", ...) } # Check if a fitted model (stanreg object) has weights # # @param x stanreg object # @return Logical. Only TRUE if x$weights has positive length and the elements # of x$weights are not all the same. # model_has_weights <- function(x) { wts <- x[["weights"]] if (!length(wts)) { FALSE } else if (all(wts == wts[1])) { FALSE } else { TRUE } } # Check that a stanfit object (or list returned by rstan::optimizing) is valid # check_stanfit <- function(x) { if (is.list(x)) { if (!all(c("par", "value") %in% names(x))) stop("Invalid object produced please report bug") } else { stopifnot(is(x, "stanfit")) if (x@mode != 0) stop("Invalid stanfit object produced please report bug") } return(TRUE) } # Validate data argument # # Make sure that, if specified, data is a data frame. If data is not missing # then dimension reduction is also performed on variables (i.e., a one column # matrix inside a data frame is converted to a vector). # # @param data User's data argument # @param if_missing Object to return if data is missing/null # @return If no error is thrown, data itself is returned if not missing/null, # otherwise if_missing is returned. # drop_redundant_dims <- function(data) { drop_dim <- sapply(data, function(v) is.matrix(v) && NCOL(v) == 1) data[, drop_dim] <- lapply(data[, drop_dim, drop=FALSE], drop) return(data) } validate_data <- function(data, if_missing = NULL) { if (missing(data) || is.null(data)) { warn_data_arg_missing() return(if_missing) } if (!is.data.frame(data)) { stop("'data' must be a data frame.", call. = FALSE) } # drop other classes (e.g. 'tbl_df', 'tbl', 'data.table') data <- as.data.frame(data) drop_redundant_dims(data) } # Throw a warning if 'data' argument to modeling function is missing warn_data_arg_missing <- function() { warning( "Omitting the 'data' argument is not recommended ", "and may not be allowed in future versions of rstanarm. ", "Some post-estimation functions (in particular 'update', 'loo', 'kfold') ", "are not guaranteed to work properly unless 'data' is specified as a data frame.", call. = FALSE ) } # Validate newdata argument for posterior_predict, log_lik, etc. # # Checks for NAs in used variables only (but returns all variables), # and also drops any unused dimensions in variables (e.g. a one column # matrix inside a data frame is converted to a vector). # # @param object stanreg object # @param newdata NULL or a data frame # @pararm m For stanmvreg objects, the submodel (passed to formula()) # @return NULL or a data frame # validate_newdata <- function(object, newdata = NULL, m = NULL) { if (is.null(newdata)) { return(newdata) } if (!is.data.frame(newdata)) { stop("If 'newdata' is specified it must be a data frame.", call. = FALSE) } # drop other classes (e.g. 'tbl_df', 'tbl') newdata <- as.data.frame(newdata) if (nrow(newdata) == 0) { stop("If 'newdata' is specified it must have more than 0 rows.", call. = FALSE) } # only check for NAs in used variables vars <- all.vars(formula(object, m = m)) newdata_check <- newdata[, colnames(newdata) %in% vars, drop=FALSE] if (any(is.na(newdata_check))) { stop("NAs are not allowed in 'newdata'.", call. = FALSE) } if (ncol(newdata) > 0) { newdata <- drop_redundant_dims(newdata) } return(newdata) } #---------------------- for stan_{mvmer,jm} only ----------------------------- # Return a list (or vector if unlist = TRUE) which # contains the embedded elements in list x named y fetch <- function(x, y, z = NULL, zz = NULL, null_to_zero = FALSE, pad_length = NULL, unlist = FALSE) { ret <- lapply(x, `[[`, y) if (!is.null(z)) ret <- lapply(ret, `[[`, z) if (!is.null(zz)) ret <- lapply(ret, `[[`, zz) if (null_to_zero) ret <- lapply(ret, function(i) ifelse(is.null(i), 0L, i)) if (!is.null(pad_length)) { padding <- rep(list(0L), pad_length - length(ret)) ret <- c(ret, padding) } if (unlist) unlist(ret) else ret } # Wrapper for using fetch with unlist = TRUE fetch_ <- function(x, y, z = NULL, zz = NULL, null_to_zero = FALSE, pad_length = NULL) { fetch(x = x, y = y, z = z, zz = zz, null_to_zero = null_to_zero, pad_length = pad_length, unlist = TRUE) } # Wrapper for using fetch with unlist = TRUE and # returning array. Also converts logical to integer. fetch_array <- function(x, y, z = NULL, zz = NULL, null_to_zero = FALSE, pad_length = NULL) { val <- fetch(x = x, y = y, z = z, zz = zz, null_to_zero = null_to_zero, pad_length = pad_length, unlist = TRUE) if (is.logical(val)) val <- as.integer(val) as.array(val) } # Unlist the result from an lapply call # # @param X,FUN,... Same as lapply uapply <- function(X, FUN, ...) { unlist(lapply(X, FUN, ...)) } # A refactored version of mapply with SIMPLIFY = FALSE # # @param FUN,... Same as mapply # @param arg Passed to MoreArgs xapply <- function(..., FUN, args = NULL) { mapply(FUN, ..., MoreArgs = args, SIMPLIFY = FALSE) } # Test if family object corresponds to a linear mixed model # # @param x A family object is.lmer <- function(x) { if (!is(x, "family")) stop("x should be a family object.", call. = FALSE) isTRUE((x$family == "gaussian") && (x$link == "identity")) } # Split a 2D array into nsplits subarrays, returned as a list # # @param x A 2D array or matrix # @param nsplits An integer, the number of subarrays or submatrices # @param bycol A logical, if TRUE then the subarrays are generated by # splitting the columns of x # @return A list of nsplits arrays or matrices array2list <- function(x, nsplits, bycol = TRUE) { len <- if (bycol) ncol(x) else nrow(x) len_k <- len %/% nsplits if (!len == (len_k * nsplits)) stop("Dividing x by nsplits does not result in an integer.") lapply(1:nsplits, function(k) { if (bycol) x[, (k-1) * len_k + 1:len_k, drop = FALSE] else x[(k-1) * len_k + 1:len_k, , drop = FALSE]}) } # Convert a standardised quadrature node to an unstandardised value based on # the specified integral limits # # @param x An unstandardised quadrature node # @param a The lower limit(s) of the integral, possibly a vector # @param b The upper limit(s) of the integral, possibly a vector unstandardise_qpts <- function(x, a, b) { if (!identical(length(x), 1L) || !is.numeric(x)) stop("'x' should be a single numeric value.", call. = FALSE) if (!all(is.numeric(a), is.numeric(b))) stop("'a' and 'b' should be numeric.", call. = FALSE) if (!length(a) %in% c(1L, length(b))) stop("'a' and 'b' should be vectors of length 1, or, be the same length.", call. = FALSE) if (any((b - a) < 0)) stop("The upper limits for the integral ('b' values) should be greater than ", "the corresponding lower limits for the integral ('a' values).", call. = FALSE) ((b - a) / 2) * x + ((b + a) / 2) } # Convert a standardised quadrature weight to an unstandardised value based on # the specified integral limits # # @param x An unstandardised quadrature weight # @param a The lower limit(s) of the integral, possibly a vector # @param b The upper limit(s) of the integral, possibly a vector unstandardise_qwts <- function(x, a, b) { if (!identical(length(x), 1L) || !is.numeric(x)) stop("'x' should be a single numeric value.", call. = FALSE) if (!all(is.numeric(a), is.numeric(b))) stop("'a' and 'b' should be numeric.", call. = FALSE) if (!length(a) %in% c(1L, length(b))) stop("'a' and 'b' should be vectors of length 1, or, be the same length.", call. = FALSE) if (any((b - a) < 0)) stop("The upper limits for the integral ('b' values) should be greater than ", "the corresponding lower limits for the integral ('a' values).", call. = FALSE) ((b - a) / 2) * x } # Test if object is stanmvreg class # # @param x An object to be tested. is.stanmvreg <- function(x) { inherits(x, "stanmvreg") } # Test if object is stanjm class # # @param x An object to be tested. is.stanjm <- function(x) { inherits(x, "stanjm") } # Test if object is a joint longitudinal and survival model # # @param x An object to be tested. is.jm <- function(x) { isTRUE(x$stan_function == "stan_jm") } # Test if object contains a multivariate GLM # # @param x An object to be tested. is.mvmer <- function(x) { isTRUE(x$stan_function %in% c("stan_mvmer", "stan_jm")) } # Test if object contains a survival model # # @param x An object to be tested. is.surv <- function(x) { isTRUE(x$stan_function %in% c("stan_jm")) } # Throw error if object isn't a stanmvreg object # # @param x The object to test. validate_stanmvreg_object <- function(x, call. = FALSE) { if (!is.stanmvreg(x)) stop("Object is not a stanmvreg object.", call. = call.) } # Throw error if object isn't a stanjm object # # @param x The object to test. validate_stanjm_object <- function(x, call. = FALSE) { if (!is.stanjm(x)) stop("Object is not a stanjm object.", call. = call.) } # Throw error if parameter isn't a positive scalar # # @param x The object to test. validate_positive_scalar <- function(x, not_greater_than = NULL) { nm <- deparse(substitute(x)) if (is.null(x)) stop(nm, " cannot be NULL", call. = FALSE) if (!is.numeric(x)) stop(nm, " should be numeric", call. = FALSE) if (any(x <= 0)) stop(nm, " should be postive", call. = FALSE) if (!is.null(not_greater_than)) { if (!is.numeric(not_greater_than) || (not_greater_than <= 0)) stop("'not_greater_than' should be numeric and postive") if (!all(x <= not_greater_than)) stop(nm, " should less than or equal to ", not_greater_than, call. = FALSE) } } # Return a list with the median and prob% CrI bounds for each column of a # matrix or 2D array # # @param x A matrix or 2D array # @param prob Value between 0 and 1 indicating the desired width of the CrI median_and_bounds <- function(x, prob, na.rm = FALSE) { if (!any(is.matrix(x), is.array(x))) stop("x should be a matrix or 2D array.") med <- apply(x, 2, median, na.rm = na.rm) lb <- apply(x, 2, quantile, (1 - prob)/2, na.rm = na.rm) ub <- apply(x, 2, quantile, (1 + prob)/2, na.rm = na.rm) nlist(med, lb, ub) } # Return the stub for variable names from one submodel of a stan_jm model # # @param m An integer specifying the number of the longitudinal submodel or # a character string specifying the submodel (e.g. "Long1", "Event", etc) # @param stub A character string to prefix to m, if m is supplied as an integer get_m_stub <- function(m, stub = "Long") { if (is.null(m)) { return(NULL) } else if (is.numeric(m)) { return(paste0(stub, m, "|")) } else if (is.character(m)) { return(paste0(m, "|")) } } # Return the appropriate stub for variable names # # @param object A stanmvreg object get_stub <- function(object) { if (is.jm(object)) "Long" else if (is.mvmer(object)) "y" else NULL } # Separates a names object into separate parts based on the longitudinal, # event, or association parameters. # # @param x Character vector (often rownames(fit$stan_summary)) # @param M An integer specifying the number of longitudinal submodels. # @param stub The character string used at the start of the names of variables # in the longitudinal/GLM submodels # @param ... Arguments passed to grep # @return A list with x separated out into those names corresponding # to parameters from the M longitudinal submodels, the event submodel # or association parameters. collect_nms <- function(x, M, stub = "Long", ...) { ppd <- grep(paste0("^", stub, ".{1}\\|mean_PPD"), x, ...) y <- lapply(1:M, function(m) grep(mod2rx(m, stub = stub), x, ...)) y_extra <- lapply(1:M, function(m) c(grep(paste0("^", stub, m, "\\|sigma"), x, ...), grep(paste0("^", stub, m, "\\|shape"), x, ...), grep(paste0("^", stub, m, "\\|lambda"), x, ...), grep(paste0("^", stub, m, "\\|reciprocal_dispersion"), x, ...))) y <- lapply(1:M, function(m) setdiff(y[[m]], c(y_extra[[m]], ppd[m]))) e <- grep(mod2rx("^Event"), x, ...) e_extra <- c(grep("^Event\\|weibull-shape|^Event\\|b-splines-coef|^Event\\|piecewise-coef", x, ...)) e <- setdiff(e, e_extra) a <- grep(mod2rx("^Assoc"), x, ...) b <- b_names(x, ...) y_b <- lapply(1:M, function(m) b_names_M(x, m, stub = stub, ...)) alpha <- grep("^.{5}\\|\\(Intercept\\)", x, ...) alpha <- c(alpha, grep(pattern=paste0("^", stub, ".{1}\\|\\(Intercept\\)"), x=x, ...)) beta <- setdiff(c(unlist(y), e, a), alpha) nlist(y, y_extra, y_b, e, e_extra, a, b, alpha, beta, ppd) } # Grep for "b" parameters (ranef), can optionally be specified # for a specific longitudinal submodel # # @param x Character vector (often rownames(fit$stan_summary)) # @param submodel Optional integer specifying which long submodel # @param ... Passed to grep b_names_M <- function(x, submodel = NULL, stub = "Long", ...) { if (is.null(submodel)) { grep("^b\\[", x, ...) } else { grep(paste0("^b\\[", stub, submodel, "\\|"), x, ...) } } # Grep for regression coefs (fixef), can optionally be specified # for a specific submodel # # @param x Character vector (often rownames(fit$stan_summary)) # @param submodel Character vector specifying which submodels # to obtain the coef names for. Can be "Long", "Event", "Assoc", or # an integer specifying a specific longitudinal submodel. Specifying # NULL selects all submodels. # @param ... Passed to grep beta_names <- function(x, submodel = NULL, ...) { if (is.null(submodel)) { rxlist <- c(mod2rx("^Long"), mod2rx("^Event"), mod2rx("^Assoc")) } else { rxlist <- c() if ("Long" %in% submodel) rxlist <- c(rxlist, mod2rx("^Long")) if ("Event" %in% submodel) rxlist <- c(rxlist, mod2rx("^Event")) if ("Assoc" %in% submodel) rxlist <- c(rxlist, mod2rx("^Assoc")) miss <- setdiff(submodel, c("Long", "Event", "Assoc")) if (length(miss)) rxlist <- c(rxlist, sapply(miss, mod2rx)) } unlist(lapply(rxlist, function(y) grep(y, x, ...))) } # Converts "Long", "Event" or "Assoc" to the regular expression # used at the start of variable names for the fitted joint model # # @param x The submodel for which the regular expression should be # obtained. Can be "Long", "Event", "Assoc", or an integer specifying # a specific longitudinal submodel. mod2rx <- function(x, stub = "Long") { if (x == "^Long") { c("^Long[1-9]\\|") } else if (x == "^Event") { c("^Event\\|") } else if (x == "^Assoc") { c("^Assoc\\|") } else if (x == "Long") { c("Long[1-9]\\|") } else if (x == "Event") { c("Event\\|") } else if (x == "Assoc") { c("Assoc\\|") } else if (x == "^y") { c("^y[1-9]\\|") } else if (x == "y") { c("y[1-9]\\|") } else { paste0("^", stub, x, "\\|") } } # Return the number of longitudinal submodels # # @param object A stanmvreg object get_M <- function(object) { validate_stanmvreg_object(object) return(object$n_markers) } # Supplies names for the output list returned by most stanmvreg methods # # @param object The list object to which the names are to be applied # @param M The number of longitudinal/GLM submodels. If NULL then the number of # longitudinal/GLM submodels is assumed to be equal to the length of object. # @param stub The character string to use at the start of the names for # list items related to the longitudinal/GLM submodels list_nms <- function(object, M = NULL, stub = "Long") { ok_type <- is.null(object) || is.list(object) || is.vector(object) if (!ok_type) stop("'object' argument should be a list or vector.") if (is.null(object)) return(object) if (is.null(M)) M <- length(object) nms <- paste0(stub, 1:M) if (length(object) > M) nms <- c(nms, "Event") names(object) <- nms object } # Removes the submodel identifying text (e.g. "Long1|", "Event|", etc # from variable names # # @param x Character vector (often rownames(fit$stan_summary)) from which # the stub should be removed rm_stub <- function(x) { x <- gsub(mod2rx("^y"), "", x) x <- gsub(mod2rx("^Long"), "", x) x <- gsub(mod2rx("^Event"), "", x) } # Removes a specified character string from the names of an # object (for example, a matched call) # # @param x The matched call # @param string The character string to be removed strip_nms <- function(x, string) { names(x) <- gsub(string, "", names(x)) x } # Check argument contains one of the allowed options check_submodelopt2 <- function(x) { if (!x %in% c("long", "event")) stop("submodel option must be 'long' or 'event'") } check_submodelopt3 <- function(x) { if (!x %in% c("long", "event", "both")) stop("submodel option must be 'long', 'event' or 'both'") } # Error message when the argument contains an object of the incorrect type STOP_arg <- function(arg_name, type) { stop(paste0("'", arg_name, "' should be a ", paste0(type, collapse = " or "), " object or a list of those objects."), call. = FALSE) } # Return error msg if both elements of the object are TRUE STOP_combination_not_allowed <- function(object, x, y) { if (object[[x]] && object[[y]]) stop("In ", deparse(substitute(object)), ", '", x, "' and '", y, "' cannot be specified together", call. = FALSE) } # Error message when not specifying an argument required for stanmvreg objects # # @param arg The argument STOP_arg_required_for_stanmvreg <- function(arg) { nm <- deparse(substitute(arg)) msg <- paste0("Argument '", nm, "' required for stanmvreg objects.") stop2(msg) } # Error message when a function is not yet implemented for stanmvreg objects # # @param what A character string naming the function not yet implemented STOP_if_stanmvreg <- function(what) { msg <- "not yet implemented for stanmvreg objects." if (!missing(what)) msg <- paste(what, msg) stop2(msg) } # Error message when a function is not yet implemented for stan_mvmer models # # @param what An optional message to prepend to the default message. STOP_stan_mvmer <- function(what) { msg <- "is not yet implemented for models fit using stan_mvmer." if (!missing(what)) msg <- paste(what, msg) stop2(msg) } # Consistent error message to use when something that is only available for # models fit using stan_jm # # @param what An optional message to prepend to the default message. STOP_jm_only <- function(what) { msg <- "can only be used with stan_jm models." if (!missing(what)) msg <- paste(what, msg) stop2(msg) } # Consistent error message when binomial models with greater than # one trial are not allowed # STOP_binomial <- function() { stop2("Binomial models with number of trials greater than one ", "are not allowed (i.e. only bernoulli models are allowed).") } # Error message when a required variable is missing from the data frame # # @param var The name of the variable that could not be found STOP_no_var <- function(var) { stop2("Variable '", var, "' cannot be found in the data frame.") } # Error message for dynamic predictions # # @param what A reason why the dynamic predictions are not allowed STOP_dynpred <- function(what) { stop2(paste("Dynamic predictions are not yet implemented for", what)) } # Check if individuals in ids argument were also used in model estimation # # @param object A stanmvreg object # @param ids A vector of ids appearing in the pp data # @param m Integer specifying which submodel to get the estimation IDs from # @return A logical. TRUE indicates their are new ids in the prediction data, # while FALSE indicates all ids in the prediction data were used in fitting # the model. This return is used to determine whether to draw new b pars. check_pp_ids <- function(object, ids, m = 1) { ids2 <- unique(model.frame(object, m = m)[[object$id_var]]) if (any(ids %in% ids2)) warning("Some of the IDs in the 'newdata' correspond to individuals in the ", "estimation dataset. Please be sure you want to obtain subject-", "specific predictions using the estimated random effects for those ", "individuals. If you instead meant to marginalise over the distribution ", "of the random effects (for posterior_predict or posterior_traj), or ", "to draw new random effects conditional on outcome data provided in ", "the 'newdata' arguments (for posterior_survfit), then please make ", "sure the ID values do not correspond to individuals in the ", "estimation dataset.", immediate. = TRUE) if (!all(ids %in% ids2)) TRUE else FALSE } # Validate newdataLong and newdataEvent arguments # # @param object A stanmvreg object # @param newdataLong A data frame, or a list of data frames # @param newdataEvent A data frame # @param duplicate_ok A logical. If FALSE then only one row per individual is # allowed in the newdataEvent data frame # @param response A logical specifying whether the longitudinal response # variable must be included in the new data frame # @return A list of validated data frames validate_newdatas <- function(object, newdataLong = NULL, newdataEvent = NULL, duplicate_ok = FALSE, response = TRUE) { validate_stanmvreg_object(object) id_var <- object$id_var newdatas <- list() if (!is.null(newdataLong)) { if (!is(newdataLong, "list")) newdataLong <- rep(list(newdataLong), get_M(object)) dfcheck <- sapply(newdataLong, is.data.frame) if (!all(dfcheck)) stop("'newdataLong' must be a data frame or list of data frames.", call. = FALSE) nacheck <- sapply(seq_along(newdataLong), function(m) { if (response) { # newdataLong needs the reponse variable fmL <- formula(object, m = m) } else { # newdataLong only needs the covariates fmL <- formula(object, m = m)[c(1,3)] } all(!is.na(get_all_vars(fmL, newdataLong[[m]]))) }) if (!all(nacheck)) stop("'newdataLong' cannot contain NAs.", call. = FALSE) newdatas <- c(newdatas, newdataLong) } if (!is.null(newdataEvent)) { if (!is.data.frame(newdataEvent)) stop("'newdataEvent' must be a data frame.", call. = FALSE) if (response) { # newdataEvent needs the reponse variable fmE <- formula(object, m = "Event") } else { # newdataEvent only needs the covariates fmE <- formula(object, m = "Event")[c(1,3)] } dat <- get_all_vars(fmE, newdataEvent) dat[[id_var]] <- newdataEvent[[id_var]] # include ID variable in event data if (any(is.na(dat))) stop("'newdataEvent' cannot contain NAs.", call. = FALSE) if (!duplicate_ok && any(duplicated(newdataEvent[[id_var]]))) stop("'newdataEvent' should only contain one row per individual, since ", "time varying covariates are not allowed in the prediction data.") newdatas <- c(newdatas, list(Event = newdataEvent)) } if (length(newdatas)) { idvar_check <- sapply(newdatas, function(x) id_var %in% colnames(x)) if (!all(idvar_check)) STOP_no_var(id_var) ids <- lapply(newdatas, function(x) unique(x[[id_var]])) sorted_ids <- lapply(ids, sort) if (!length(unique(sorted_ids)) == 1L) stop("The same subject ids should appear in each new data frame.") if (!length(unique(ids)) == 1L) stop("The subject ids should be ordered the same in each new data frame.") return(newdatas) } else return(NULL) } # Return data frames only including the specified subset of individuals # # @param object A stanmvreg object # @param data A data frame, or a list of data frames # @param ids A vector of ids indicating which individuals to keep # @return A data frame, or a list of data frames, depending on the input subset_ids <- function(object, data, ids) { if (is.null(data)) return(NULL) validate_stanmvreg_object(object) id_var <- object$id_var is_list <- is(data, "list") if (!is_list) data <- list(data) is_df <- sapply(data, is.data.frame) if (!all(is_df)) stop("'data' should be a data frame, or list of data frames.") data <- lapply(data, function(x) { if (!id_var %in% colnames(x)) STOP_no_var(id_var) sel <- which(!ids %in% x[[id_var]]) if (length(sel)) stop("The following 'ids' do not appear in the data: ", paste(ids[[sel]], collapse = ", ")) x[x[[id_var]] %in% ids, , drop = FALSE] }) if (is_list) return(data) else return(data[[1]]) } # Return a data.table with a key set using the appropriate id/time/grp variables # # @param data A data frame. # @param id_var The name of the ID variable. # @param grp_var The name of the variable identifying groups clustered within # individuals. # @param time_var The name of the time variable. # @return A data.table (which will be used in a rolling merge against the # event times and/or quadrature times). prepare_data_table <- function(data, id_var, time_var, grp_var = NULL) { if (!requireNamespace("data.table")) stop("the 'data.table' package must be installed to use this function") if (!is.data.frame(data)) stop("'data' should be a data frame.") # check required vars are in the data if (!id_var %in% colnames(data)) STOP_no_var(id_var) if (!time_var %in% colnames(data)) STOP_no_var(time_var) if (!is.null(grp_var) && (!grp_var %in% colnames(data))) STOP_no_var(grp_var) # define and set the key for the data.table key_vars <- if (!is.null(grp_var)) c(id_var, grp_var, time_var) else c(id_var, time_var) dt <- data.table::data.table(data, key = key_vars) dt[[time_var]] <- as.numeric(dt[[time_var]]) # ensures no rounding on merge dt[[id_var]] <- factor(dt[[id_var]]) # ensures matching of ids if (!is.null(grp_var)) dt[[grp_var]] <- factor(dt[[grp_var]]) # ensures matching of grps dt } # Carry out a rolling merge # # @param data A data.table with a set key corresponding to ids, times (and # possibly also grps). # @param ids A vector of patient ids to merge against. # @param times A vector of times to (rolling) merge against. # @param grps An optional vector of groups clustered within patients to # merge against. Only relevant when there is clustering within patient ids. # @return A data.table formed by a merge of ids, (grps), times, and the closest # preceding (in terms of times) rows in data. rolling_merge <- function(data, ids, times, grps = NULL) { if (!requireNamespace("data.table")) stop("the 'data.table' package must be installed to use this function") # check data.table is keyed key_length <- length(data.table::key(data)) val_length <- if (is.null(grps)) 2L else 3L if (key_length == 0L) stop2("Bug found: data.table should have a key.") if (!key_length == val_length) stop2("Bug found: data.table key is not the same length as supplied keylist.") # ensure data types are same as returned by the prepare_data_table function ids <- factor(ids) # ensures matching of ids times <- as.numeric(times) # ensures no rounding on merge # carry out the rolling merge against the specified times if (is.null(grps)) { tmp <- data.table::data.table(ids, times) val <- data[tmp, roll = TRUE, rollends = c(TRUE, TRUE)] } else { grps <- factor(grps) tmp <- data.table::data.table(ids, grps, times) val <- data[tmp, roll = TRUE, rollends = c(TRUE, TRUE)] } val } # Return an array or list with the time sequence used for posterior predictions # # @param increments An integer with the number of increments (time points) at # which to predict the outcome for each individual # @param t0,t1 Numeric vectors giving the start and end times across which to # generate prediction times # @param simplify Logical specifying whether to return each increment as a # column of an array (TRUE) or as an element of a list (FALSE) get_time_seq <- function(increments, t0, t1, simplify = TRUE) { val <- sapply(0:(increments - 1), function(x, t0, t1) { t0 + (t1 - t0) * (x / (increments - 1)) }, t0 = t0, t1 = t1, simplify = simplify) if (simplify && is.vector(val)) { # need to transform if there is only one individual val <- t(val) rownames(val) <- if (!is.null(names(t0))) names(t0) else if (!is.null(names(t1))) names(t1) else NULL } return(val) } # Extract parameters from stanmat and return as a list # # @param object A stanmvreg object # @param stanmat A matrix of posterior draws, may be provided if the desired # stanmat is only a subset of the draws from as.matrix(object$stanfit) # @return A named list extract_pars <- function(object, stanmat = NULL, means = FALSE) { validate_stanmvreg_object(object) M <- get_M(object) if (is.null(stanmat)) stanmat <- as.matrix(object$stanfit) if (means) stanmat <- t(colMeans(stanmat)) # return posterior means nms <- collect_nms(colnames(stanmat), M, stub = get_stub(object)) beta <- lapply(1:M, function(m) stanmat[, nms$y[[m]], drop = FALSE]) ebeta <- stanmat[, nms$e, drop = FALSE] abeta <- stanmat[, nms$a, drop = FALSE] bhcoef <- stanmat[, nms$e_extra, drop = FALSE] b <- lapply(1:M, function(m) stanmat[, nms$y_b[[m]], drop = FALSE]) nlist(beta, ebeta, abeta, bhcoef, b, stanmat) } # Promote a character variable to a factor # # @param x The variable to potentially promote promote_to_factor <- function(x) { if (is.character(x)) as.factor(x) else x } # Draw from a multivariate normal distribution # @param mu A mean vector # @param Sigma A variance-covariance matrix # @param df A degrees of freedom rmt <- function(mu, Sigma, df) { y <- c(t(chol(Sigma)) %*% rnorm(length(mu))) u <- rchisq(1, df = df) return(mu + y / sqrt(u / df)) } # Evaluate the multivariate t log-density # @param x A realization # @param mu A mean vector # @param Sigma A variance-covariance matrix # @param df A degrees of freedom dmt <- function(x, mu, Sigma, df) { x_mu <- x - mu p <- length(x) lgamma(0.5 * (df + p)) - lgamma(0.5 * df) - 0.5 * p * log(df) - 0.5 * p * log(pi) - 0.5 * c(determinant(Sigma, logarithm = TRUE)$modulus) - 0.5 * (df + p) * log1p((x_mu %*% chol2inv(chol(Sigma)) %*% x_mu)[1] / df) } # Count the number of unique values # # @param x A vector or list n_distinct <- function(x) { length(unique(x)) } # Transpose function that can handle NULL objects # # @param x A matrix, a vector, or otherwise (e.g. NULL) transpose <- function(x) { if (is.matrix(x) || is.vector(x)) { t(x) } else { x } } # Translate group/factor IDs into integer values # # @param x A vector of group/factor IDs groups <- function(x) { if (!is.null(x)) { as.integer(as.factor(x)) } else { x } } # Drop named attributes listed in ... from the object x # # @param x Any object with attributes # @param ... The named attributes to drop drop_attributes <- function(x, ...) { dots <- list(...) if (length(dots)) { for (i in dots) { attr(x, i) <- NULL } } x } # Check if x and any objects in ... were all NULL or not # # @param x The first object to use in the comparison # @param ... Any additional objects to include in the comparison # @param error If TRUE then return an error if all objects aren't # equal with regard to the 'is.null' test. # @return If error = TRUE, then an error if all objects aren't # equal with regard to the 'is.null' test. Otherwise, a logical # specifying whether all objects were equal with regard to the # 'is.null' test. supplied_together <- function(x, ..., error = FALSE) { dots <- list(...) for (i in dots) { if (!identical(is.null(x), is.null(i))) { if (error) { nm_x <- deparse(substitute(x)) nm_i <- deparse(substitute(i)) stop2(nm_x, " and ", nm_i, " must be supplied together.") } else { return(FALSE) # not supplied together, ie. one NULL and one not NULL } } } return(TRUE) # supplied together, ie. all NULL or all not NULL } # Check variables specified in ... are in the data frame # # @param data A data frame # @param ... The names of the variables check_vars_are_included <- function(data, ...) { nms <- names(data) vars <- list(...) for (i in vars) { if (!i %in% nms) { arg_nm <- deparse(substitute(data)) stop2("Variable '", i, "' is not present in ", arg_nm, ".") } } data } # Check whether a vector/matrix/array contains an "(Intercept)" check_for_intercept <- function(x, logical = FALSE) { nms <- if (is.matrix(x)) colnames(x) else names(x) sel <- which("(Intercept)" %in% nms) if (logical) as.logical(length(sel)) else sel } # Drop intercept from a vector/matrix/array of named coefficients drop_intercept <- function(x) { sel <- check_for_intercept(x) if (length(sel) && is.matrix(x)) { x[, -sel, drop = FALSE] } else if (length(sel)) { x[-sel] } else { x } } # Return intercept from a vector/matrix/array of named coefficients return_intercept <- function(x) { sel <- which("(Intercept)" %in% names(x)) if (length(sel)) x[sel] else NULL } # Standardise a coefficient standardise_coef <- function(x, location = 0, scale = 1) (x - location) / scale # Return a one-dimensional array or an empty numeric array_else_double <- function(x) if (!length(x)) double(0) else as.array(unlist(x)) # Return a matrix of uniform random variables or an empty matrix matrix_of_uniforms <- function(nrow = 0, ncol = 0) { if (nrow == 0 || ncol == 0) { matrix(0,0,0) } else { matrix(runif(nrow * ncol), nrow, ncol) } } # If x is NULL then return an empty object of the specified 'type' # # @param x An object to test whether it is null. # @param type The type of empty object to return if x is null. convert_null <- function(x, type = c("double", "integer", "matrix", "arraydouble", "arrayinteger")) { if (!is.null(x)) { return(x) } else if (type == "double") { return(double(0)) } else if (type == "integer") { return(integer(0)) } else if (type == "matrix") { return(matrix(0,0,0)) } else if (type == "arraydouble") { return(as.array(double(0))) } else if (type == "arrayinteger") { return(as.array(integer(0))) } else { stop("Input type not valid.") } } # Expand/pad a matrix to the specified number of cols/rows # # @param x A matrix or 2D array # @param cols,rows Integer specifying the desired number # of columns/rows # @param value The value to use for the padded cells # @return A matrix pad_matrix <- function(x, cols = NULL, rows = NULL, value = 0L) { nc <- ncol(x) nr <- nrow(x) if (!is.null(cols) && nc < cols) { pad_mat <- matrix(value, nr, cols - nc) x <- cbind(x, pad_mat) nc <- ncol(x) # update nc to reflect new num cols } if (!is.null(rows) && nr < rows) { pad_mat <- matrix(value, rows - nr, nc) x <- rbind(x, pad_mat) } x } #------- helpers from brms package stop2 <- function(...) { stop(..., call. = FALSE) } is_null <- function(x) { # check if an object is NULL is.null(x) || ifelse(is.vector(x), all(sapply(x, is.null)), FALSE) } rm_null <- function(x, recursive = TRUE) { # recursively removes NULL entries from an object x <- Filter(Negate(is_null), x) if (recursive) { x <- lapply(x, function(x) if (is.list(x)) rm_null(x) else x) } x } rstanarm/R/loo-kfold.R0000644000176200001440000002566414370470372014337 0ustar liggesusers#' K-fold cross-validation #' #' The \code{kfold} method performs exact \eqn{K}-fold cross-validation. First #' the data are randomly partitioned into \eqn{K} subsets of equal size (or as close #' to equal as possible), or the user can specify the \code{folds} argument #' to determine the partitioning. Then the model is refit \eqn{K} times, each time #' leaving out one of the \eqn{K} subsets. If \eqn{K} is equal to the total #' number of observations in the data then \eqn{K}-fold cross-validation is #' equivalent to exact leave-one-out cross-validation (to which #' \code{\link[=loo.stanreg]{loo}} is an efficient approximation). #' #' @aliases kfold #' @importFrom loo kfold is.kfold #' @export #' @template reference-loo #' #' @param x A fitted model object returned by one of the rstanarm modeling #' functions. See \link{stanreg-objects}. #' @param K For \code{kfold}, the number of subsets (folds) into which the data #' will be partitioned for performing \eqn{K}-fold cross-validation. The model #' is refit \code{K} times, each time leaving out one of the \code{K} folds. #' If the \code{folds} argument is specified then \code{K} will automatically #' be set to \code{length(unique(folds))}, otherwise the specified value of #' \code{K} is passed to \code{loo::\link[loo:kfold-helpers]{kfold_split_random}} to #' randomly partition the data into \code{K} subsets of equal (or as close to #' equal as possible) size. #' @param save_fits For \code{kfold}, if \code{TRUE}, a component \code{'fits'} #' is added to the returned object to store the cross-validated #' \link[=stanreg-objects]{stanreg} objects and the indices of the omitted #' observations for each fold. Defaults to \code{FALSE}. #' @param folds For \code{kfold}, an optional integer vector with one element #' per observation in the data used to fit the model. Each element of the #' vector is an integer in \code{1:K} indicating to which of the \code{K} #' folds the corresponding observation belongs. There are some convenience #' functions available in the \pkg{loo} package that create integer vectors to #' use for this purpose (see the \strong{Examples} section below and also the #' \link[loo]{kfold-helpers} page). #' #' @param cores The number of cores to use for parallelization. Instead fitting #' separate Markov chains for the same model on different cores, by default #' \code{kfold} will distribute the \code{K} models to be fit across the cores #' (using \code{\link[parallel:clusterApply]{parLapply}} on Windows and #' \code{\link[parallel]{mclapply}} otherwise). The Markov chains for each #' model will be run sequentially. This will often be the most efficient #' option, especially if many cores are available, but in some cases it may be #' preferable to fit the \code{K} models sequentially and instead use the #' cores for the Markov chains. This can be accomplished by setting #' \code{options(mc.cores)} to be the desired number of cores to use #' for the Markov chains \emph{and} also manually specifying \code{cores=1} #' when calling the \code{kfold} function. See the end of the #' \strong{Examples} section for a demonstration. #' #' @param ... Currently ignored. #' #' @return An object with classes 'kfold' and 'loo' that has a similar structure #' as the objects returned by the \code{\link{loo}} and \code{\link{waic}} #' methods and is compatible with the \code{\link{loo_compare}} function for #' comparing models. #' #' @examples #' if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { #' \donttest{ #' fit1 <- stan_glm(mpg ~ wt, data = mtcars, refresh = 0) #' fit2 <- stan_glm(mpg ~ wt + cyl, data = mtcars, refresh = 0) #' fit3 <- stan_glm(mpg ~ disp * as.factor(cyl), data = mtcars, refresh = 0) #' #' # 10-fold cross-validation #' # (if possible also specify the 'cores' argument to use multiple cores) #' (kfold1 <- kfold(fit1, K = 10)) #' kfold2 <- kfold(fit2, K = 10) #' kfold3 <- kfold(fit3, K = 10) #' loo_compare(kfold1, kfold2, kfold3) #' #' # stratifying by a grouping variable #' # (note: might get some divergences warnings with this model but #' # this is just intended as a quick example of how to code this) #' fit4 <- stan_lmer(mpg ~ disp + (1|cyl), data = mtcars, refresh = 0) #' table(mtcars$cyl) #' folds_cyl <- loo::kfold_split_stratified(K = 3, x = mtcars$cyl) #' table(cyl = mtcars$cyl, fold = folds_cyl) #' kfold4 <- kfold(fit4, folds = folds_cyl, cores = 2) #' print(kfold4) #' } #' } #' # Example code demonstrating the different ways to specify the number #' # of cores and how the cores are used #' # #' # options(mc.cores = NULL) #' # #' # # spread the K models over N_CORES cores (method 1) #' # kfold(fit, K, cores = N_CORES) #' # #' # # spread the K models over N_CORES cores (method 2) #' # options(mc.cores = N_CORES) #' # kfold(fit, K) #' # #' # # fit K models sequentially using N_CORES cores for the Markov chains each time #' # options(mc.cores = N_CORES) #' # kfold(fit, K, cores = 1) #' kfold.stanreg <- function(x, K = 10, ..., folds = NULL, save_fits = FALSE, cores = getOption("mc.cores", 1)) { if (is.stanmvreg(x)) { STOP_if_stanmvreg("kfold") } if (model_has_weights(x)) { stop("kfold is not currently available for models fit using weights.") } stopifnot(length(cores) == 1, cores == as.integer(cores), cores >= 1) stan_cores <- 1 kfold_cores <- cores if (kfold_cores == 1) { stan_cores <- getOption("mc.cores", 1) } d <- kfold_and_reloo_data(x) # defined in loo.R N <- nrow(d) if (is.null(folds)) { stopifnot(K > 1, K <= nobs(x)) K <- as.integer(K) folds <- loo::kfold_split_random(K = K, N = N) } else { K <- length(unique(folds)) stopifnot( length(folds) == N, all(folds == as.integer(folds)), all(folds %in% 1L:K), all(1:K %in% folds) ) folds <- as.integer(folds) } calls <- list() omitteds <- list() for (k in 1:K) { omitted_k <- which(folds == k) if (used.sampling(x)) { fit_k_call <- update.stanreg( object = x, data = d[-omitted_k,, drop=FALSE], subset = rep(TRUE, nrow(d) - length(omitted_k)), weights = NULL, cores = stan_cores, refresh = 0, open_progress = FALSE, evaluate = FALSE # just store unevaluated calls for now ) } else { fit_k_call <- update.stanreg( object = x, data = d[-omitted_k,, drop=FALSE], subset = rep(TRUE, nrow(d) - length(omitted_k)), weights = NULL, refresh = 0, evaluate = FALSE # just store unevaluated calls for now ) } if (!is.null(getCall(x)$offset)) { fit_k_call$offset <- x$offset[-omitted_k] } fit_k_call$cores <- eval(fit_k_call$cores) fit_k_call$subset <- eval(fit_k_call$subset) fit_k_call$data <- eval(fit_k_call$data) fit_k_call$offset <- eval(fit_k_call$offset) omitteds[[k]] <- omitted_k calls[[k]] <- fit_k_call } fits <- array(list(), c(K, 2), list(NULL, c("fit", "omitted"))) if (kfold_cores == 1) { lppds <- list() for (k in 1:K) { message("Fitting model ", k, " out of ", K) capture.output( fit_k <- eval(calls[[k]]) ) omitted_k <- omitteds[[k]] lppds[[k]] <- log_lik.stanreg( fit_k, newdata = d[omitted_k, , drop = FALSE], offset = x$offset[omitted_k], newx = get_x(x)[omitted_k, , drop = FALSE], newz = x$z[omitted_k, , drop = FALSE], # NULL other than for some stan_betareg models stanmat = as.matrix.stanreg(fit_k) ) if (save_fits) { fits[k, ] <- list(fit = fit_k, omitted = omitted_k) } } } else { # parallelize by fold message("Fitting K = ", K, " models distributed over ", cores, " cores") if (.Platform$OS.type != "windows") { out <- parallel::mclapply( mc.cores = kfold_cores, mc.preschedule = FALSE, X = 1:K, FUN = function(k) { fit_k <- eval(calls[[k]]) omitted_k <- omitteds[[k]] lppds_k <- log_lik.stanreg( fit_k, newdata = d[omitted_k, , drop = FALSE], offset = x$offset[omitted_k], newx = get_x(x)[omitted_k, , drop = FALSE], newz = x$z[omitted_k, , drop = FALSE], stanmat = as.matrix.stanreg(fit_k) ) return(list(lppds = lppds_k, fit = if (save_fits) fit_k else NULL)) } ) } else { # windows cl <- parallel::makePSOCKcluster(kfold_cores) on.exit(parallel::stopCluster(cl)) out <- parallel::parLapply( cl = cl, X = 1:K, ..., fun = function(k) { fit_k <- eval(calls[[k]]) omitted_k <- omitteds[[k]] lppds_k <- log_lik.stanreg( fit_k, newdata = d[omitted_k, , drop = FALSE], offset = x$offset[omitted_k], newx = get_x(x)[omitted_k, , drop = FALSE], newz = x$z[omitted_k, , drop = FALSE], stanmat = as.matrix.stanreg(fit_k) ) return(list(lppds = lppds_k, fit = if (save_fits) fit_k else NULL)) } ) } lppds <- lapply(out, "[[", "lppds") if (save_fits) { for (k in 1:K) { fits[k, ] <- list(fit = out[[k]][["fit"]], omitted = omitteds[[k]]) } } } elpds_unord <- unlist(lapply(lppds, function(x) { apply(x, 2, log_mean_exp) })) # make sure elpds are put back in the right order obs_order <- unlist(lapply(1:K, function(k) which(folds == k))) elpds <- rep(NA, length(elpds_unord)) elpds[obs_order] <- elpds_unord # for computing effective number of parameters ll_full <- log_lik(x) lpds <- apply(ll_full, 2, log_mean_exp) ps <- lpds - elpds pointwise <- cbind(elpd_kfold = elpds, p_kfold = ps, kfoldic = -2 * elpds) est <- colSums(pointwise) se_est <- sqrt(N * apply(pointwise, 2, var)) out <- list( estimates = cbind(Estimate = est, SE = se_est), pointwise = pointwise, elpd_kfold = est[["elpd_kfold"]], se_elpd_kfold = se_est[["elpd_kfold"]], p_kfold = est[["p_kfold"]], se_p_kfold = se_est[["p_kfold"]] ) rownames(out$estimates) <- colnames(pointwise) if (save_fits) { out$fits <- fits } structure(out, class = c("kfold", "loo"), K = K, dims = dim(lppds[[1]]), model_name = deparse(substitute(x)), discrete = is_discrete(x), yhash = hash_y(x), formula = loo_model_formula(x)) } rstanarm/R/bayes_R2.R0000644000176200001440000001111014370470372014074 0ustar liggesusers#' Compute a Bayesian version of R-squared or LOO-adjusted R-squared for #' regression models. #' #' @aliases bayes_R2 #' @export #' @templateVar stanregArg object #' @template args-stanreg-object #' @param re.form For models with group-level terms, \code{re.form} is #' passed to \code{\link{posterior_epred}} if specified. #' @param ... Currently ignored. #' #' @return A vector of R-squared values with length equal to the posterior #' sample size (the posterior distribution of R-squared). #' #' @references #' Andrew Gelman, Ben Goodrich, Jonah Gabry, and Aki Vehtari (2018). R-squared #' for Bayesian regression models. \emph{The American Statistician}, to appear. #' \doi{10.1080/00031305.2018.1549100} #' (\href{http://www.stat.columbia.edu/~gelman/research/published/bayes_R2_v3.pdf}{Preprint}, #' \href{https://avehtari.github.io/bayes_R2/bayes_R2.html}{Notebook}) #' #' @examples #' if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { #' fit <- stan_glm( #' mpg ~ wt + cyl, #' data = mtcars, #' QR = TRUE, #' chains = 2, #' refresh = 0 #' ) #' rsq <- bayes_R2(fit) #' print(median(rsq)) #' hist(rsq) #' #' loo_rsq <- loo_R2(fit) #' print(median(loo_rsq)) #' #' # multilevel binomial model #' if (!exists("example_model")) example(example_model) #' print(example_model) #' median(bayes_R2(example_model)) #' median(bayes_R2(example_model, re.form = NA)) # exclude group-level #' } bayes_R2.stanreg <- function(object, ..., re.form = NULL) { if (!used.sampling(object)) STOP_sampling_only("bayes_R2") if (is_polr(object)) stop("bayes_R2 is not available for stan_polr models.") fam <- family(object)$family if (!fam %in% c("gaussian", "binomial")) { stop("bayes_R2 is only available for Gaussian and binomial models.") } mu_pred <- posterior_epred(object, re.form = re.form) if (is.binomial(fam)) { y <- get_y(object) if (NCOL(y) == 2) { trials <- rowSums(y) trials_mat <- matrix(trials, nrow = nrow(mu_pred), ncol = ncol(mu_pred), byrow = TRUE) tmp <- mu_pred * trials_mat sigma2 <- rowMeans(tmp * (1 - mu_pred)) mu_pred <- tmp } else { sigma2 <- rowMeans(mu_pred * (1 - mu_pred)) } } else { sigma2 <- drop(as.matrix(object, pars = "sigma"))^2 } var_mu_pred <- apply(mu_pred, 1, var) r_squared <- var_mu_pred / (var_mu_pred + sigma2) return(r_squared) } #' @rdname bayes_R2.stanreg #' @aliases loo_R2 #' @importFrom rstantools loo_R2 #' @export #' loo_R2.stanreg <- function(object, ...) { if (!used.sampling(object)) STOP_sampling_only("loo_R2") if (is_polr(object)) stop("loo_R2 is not available for stan_polr models.") fam <- family(object)$family if (!fam %in% c("gaussian", "binomial")) { stop("loo_R2 is only available for Gaussian and binomial models.") } y <- get_y(object) log_ratios <- -log_lik(object) psis_object <- object[["loo"]][["psis_object"]] if (is.null(psis_object)) { psis_object <- loo::psis(log_ratios, r_eff = NA) } mu_pred <- posterior_epred(object) if (is.binomial(fam)) { if (is.factor(y)) { y <- fac2bin(y) } else if (NCOL(y) == 2) { trials <- rowSums(y) y <- y[, 1] trials_mat <- matrix(trials, nrow = nrow(mu_pred), ncol = ncol(mu_pred), byrow = TRUE) mu_pred <- mu_pred * trials_mat } } mu_pred_loo <- loo::E_loo(mu_pred, psis_object, log_ratios = log_ratios)$value err_loo <- mu_pred_loo - y S <- nrow(mu_pred) N <- ncol(mu_pred) # set the random seed as the seed used in the first chain and ensure # the old RNG state is restored on exit rng_state_old <- .Random.seed on.exit(assign(".Random.seed", rng_state_old, envir = .GlobalEnv)) set.seed(object$stanfit@stan_args[[1]]$seed) # dirichlet weights exp_draws <- matrix(rexp(S * N, rate = 1), nrow = S, ncol = N) wts <- exp_draws / rowSums(exp_draws) var_y <- (rowSums(sweep(wts, 2, y^2, FUN = "*")) - rowSums(sweep(wts, 2, y, FUN = "*"))^2) * (N/(N-1)) var_err_loo <- (rowSums(sweep(wts, 2, err_loo^2, FUN = "*")) - rowSums(sweep(wts, 2, err_loo, FUN = "*")^2)) * (N/(N-1)) loo_r_squared <- 1 - var_err_loo / var_y loo_r_squared[loo_r_squared < -1] <- -1 loo_r_squared[loo_r_squared > 1] <- 1 return(loo_r_squared) } # internal ---------------------------------------------------------------- get_y_new <- function(object, newdata = NULL) { if (is.null(newdata)) { get_y(object) } else { eval(formula(object)[[2]], newdata) } } rstanarm/R/stan_betareg.R0000644000176200001440000002414014370470372015073 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2013, 2014, 2015, 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #' Bayesian beta regression models via Stan #' #' \if{html}{\figure{stanlogo.png}{options: width="25" alt="https://mc-stan.org/about/logo/"}} #' Beta regression modeling with optional prior distributions for the #' coefficients, intercept, and auxiliary parameter \code{phi} (if applicable). #' #' @export #' @templateVar armRef (Ch. 3-6) #' @templateVar pkg betareg #' @templateVar pkgfun betareg #' @templateVar sameargs model,offset,weights #' @templateVar rareargs na.action #' @templateVar fun stan_betareg #' @templateVar fitfun stan_betareg.fit #' @template return-stanreg-object #' @template return-stanfit-object #' @template see-also #' @template args-formula-data-subset #' @template args-same-as #' @template args-same-as-rarely #' @template args-x-y #' @template args-dots #' @template args-prior_intercept #' @template args-priors #' @template args-prior_PD #' @template args-algorithm #' @template args-adapt_delta #' @template args-QR #' #' @param link Character specification of the link function used in the model #' for mu (specified through \code{x}). Currently, "logit", "probit", #' "cloglog", "cauchit", "log", and "loglog" are supported. #' @param link.phi If applicable, character specification of the link function #' used in the model for \code{phi} (specified through \code{z}). Currently, #' "identity", "log" (default), and "sqrt" are supported. Since the "sqrt" #' link function is known to be unstable, it is advisable to specify a #' different link function (or to model \code{phi} as a scalar parameter #' instead of via a linear predictor by excluding \code{z} from the #' \code{formula} and excluding \code{link.phi}). #' @param prior_z Prior distribution for the coefficients in the model for #' \code{phi} (if applicable). Same options as for \code{prior}. #' @param prior_intercept_z Prior distribution for the intercept in the model #' for \code{phi} (if applicable). Same options as for \code{prior_intercept}. #' @param prior_phi The prior distribution for \code{phi} if it is \emph{not} #' modeled as a function of predictors. If \code{z} variables are specified #' then \code{prior_phi} is ignored and \code{prior_intercept_z} and #' \code{prior_z} are used to specify the priors on the intercept and #' coefficients in the model for \code{phi}. When applicable, \code{prior_phi} #' can be a call to \code{exponential} to use an exponential distribution, or #' one of \code{normal}, \code{student_t} or \code{cauchy} to use half-normal, #' half-t, or half-Cauchy prior. See \code{\link{priors}} for details on these #' functions. To omit a prior ---i.e., to use a flat (improper) uniform #' prior--- set \code{prior_phi} to \code{NULL}. #' #' @details The \code{stan_betareg} function is similar in syntax to #' \code{\link[betareg]{betareg}} but rather than performing maximum #' likelihood estimation, full Bayesian estimation is performed (if #' \code{algorithm} is \code{"sampling"}) via MCMC. The Bayesian model adds #' priors (independent by default) on the coefficients of the beta regression #' model. The \code{stan_betareg} function calls the workhorse #' \code{stan_betareg.fit} function, but it is also possible to call the #' latter directly. #' #' @seealso The vignette for \code{stan_betareg}. #' \url{https://mc-stan.org/rstanarm/articles/} #' #' @references Ferrari, SLP and Cribari-Neto, F (2004). Beta regression for #' modeling rates and proportions. \emph{Journal of Applied Statistics}. #' 31(7), 799--815. #' #' @examples #' if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { #' ### Simulated data #' N <- 200 #' x <- rnorm(N, 2, 1) #' z <- rnorm(N, 2, 1) #' mu <- binomial(link = "logit")$linkinv(1 + 0.2*x) #' phi <- exp(1.5 + 0.4*z) #' y <- rbeta(N, mu * phi, (1 - mu) * phi) #' hist(y, col = "dark grey", border = FALSE, xlim = c(0,1)) #' fake_dat <- data.frame(y, x, z) #' #' fit <- stan_betareg( #' y ~ x | z, data = fake_dat, #' link = "logit", #' link.phi = "log", #' algorithm = "optimizing" # just for speed of example #' ) #' print(fit, digits = 2) #' } stan_betareg <- function(formula, data, subset, na.action, weights, offset, link = c("logit", "probit", "cloglog", "cauchit", "log", "loglog"), link.phi = NULL, model = TRUE, y = TRUE, x = FALSE, ..., prior = normal(autoscale=TRUE), prior_intercept = normal(autoscale=TRUE), prior_z = normal(autoscale=TRUE), prior_intercept_z = normal(autoscale=TRUE), prior_phi = exponential(autoscale=TRUE), prior_PD = FALSE, algorithm = c("sampling", "optimizing", "meanfield", "fullrank"), adapt_delta = NULL, QR = FALSE) { if (!requireNamespace("betareg", quietly = TRUE)) { stop("Please install the betareg package before using 'stan_betareg'.") } if (!has_outcome_variable(formula)) { stop("LHS of formula must be specified.") } mc <- match.call(expand.dots = FALSE) data <- validate_data(data, if_missing = environment(formula)) mc$data <- data mc$model <- mc$y <- mc$x <- TRUE # NULLify any Stan specific arguments in mc mc$prior <- mc$prior_intercept <- mc$prior_PD <- mc$algorithm <- mc$adapt_delta <- mc$QR <- mc$sparse <- mc$prior_dispersion <- NULL mc$drop.unused.levels <- TRUE mc[[1L]] <- quote(betareg::betareg) mc$control <- betareg::betareg.control(maxit = 0, fsmaxit = 0) br <- suppressWarnings(eval(mc, parent.frame())) mf <- check_constant_vars(br$model) mt <- br$terms Y <- array1D_check(model.response(mf, type = "any")) X <- model.matrix(br) Z <- model.matrix(br, model = "precision") weights <- validate_weights(as.vector(model.weights(mf))) offset <- validate_offset(as.vector(model.offset(mf)), y = Y) # check if user specified matrix for precision model if (length(grep("\\|", all.names(formula))) == 0 && is.null(link.phi)) Z <- NULL algorithm <- match.arg(algorithm) link <- match.arg(link) link_phi <- match.arg(link.phi, c(NULL, "log", "identity", "sqrt")) stanfit <- stan_betareg.fit(x = X, y = Y, z = Z, weights = weights, offset = offset, link = link, link.phi = link.phi, ..., prior = prior, prior_z = prior_z, prior_intercept = prior_intercept, prior_intercept_z = prior_intercept_z, prior_phi = prior_phi, prior_PD = prior_PD, algorithm = algorithm, adapt_delta = adapt_delta, QR = QR) if (algorithm != "optimizing" && !is(stanfit, "stanfit")) return(stanfit) if (is.null(link.phi) && is.null(Z)) link_phi <- "identity" sel <- apply(X, 2L, function(x) !all(x == 1) && length(unique(x)) < 2) X <- X[ , !sel, drop = FALSE] if (!is.null(Z)) { sel <- apply(Z, 2L, function(x) !all(x == 1) && length(unique(x)) < 2) Z <- Z[ , !sel, drop = FALSE] } fit <- nlist(stanfit, algorithm, data, offset, weights, x = X, y = Y, z = Z %ORifNULL% model.matrix(y ~ 1), family = beta_fam(link), family_phi = beta_phi_fam(link_phi), formula, model = mf, terms = mt, call = match.call(), na.action = attr(mf, "na.action"), contrasts = attr(X, "contrasts"), stan_function = "stan_betareg") out <- stanreg(fit) if (algorithm == "optimizing") { out$log_p <- stanfit$log_p out$log_g <- stanfit$log_g } out$xlevels <- lapply(mf[,-1], FUN = function(x) { xlev <- if (is.factor(x) || is.character(x)) levels(x) else NULL xlev[!vapply(xlev, is.null, NA)] }) out$levels <- br$levels if (!x) out$x <- NULL if (!y) out$y <- NULL if (!model) out$model <- NULL structure(out, class = c("stanreg", "betareg")) } # internal ---------------------------------------------------------------- beta_fam <- function(link = "logit") { stopifnot(is.character(link)) if (link == "loglog") { out <- binomial("cloglog") out$linkinv <- function(eta) { 1 - pmax(pmin(-expm1(-exp(eta)), 1 - .Machine$double.eps), .Machine$double.eps) } out$linkfun <- function(mu) log(-log(mu)) } else { out <- binomial(link) } out$family <- "beta" out$variance <- function(mu, phi) mu * (1 - mu) / (phi + 1) out$dev.resids <- function(y, mu, wt) stop("'dev.resids' function should not be called") out$aic <- function(y, n, mu, wt, dev) stop("'aic' function should not have been called") out$simulate <- function(object, nsim) stop("'simulate' function should not have been called") return(out) } beta_phi_fam <- function(link = "log") { stopifnot(is.character(link)) out <- poisson(link) out$family <- "beta_phi" out$variance <- function(mu, phi) mu * (1 - mu) / (phi + 1) out$dev.resids <- function(y, mu, wt) stop("'dev.resids' function should not be called") out$aic <- function(y, n, mu, wt, dev) stop("'aic' function should not have been called") out$simulate <- function(object, nsim) stop("'simulate' function should not have been called") return(out) } rstanarm/R/ps_check.R0000644000176200001440000001205514406606742014220 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University # Copyright (C) 2016, 2017 Sam Brilleman # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # #' Graphical checks of the estimated survival function #' #' This function plots the estimated marginal survival function based on draws #' from the posterior predictive distribution of the fitted joint model, and then #' overlays the Kaplan-Meier curve based on the observed data. #' #' @export #' @templateVar stanjmArg object #' @templateVar labsArg xlab,ylab #' @templateVar cigeomArg ci_geom_args #' @template args-stanjm-object #' @template args-labs #' @template args-ci-geom-args #' #' @param check The type of plot to show. Currently only "survival" is #' allowed, which compares the estimated marginal survival function under #' the joint model to the estimated Kaplan-Meier curve based on the #' observed data. #' @param limits A quoted character string specifying the type of limits to #' include in the plot. Can be one of: \code{"ci"} for the Bayesian #' posterior uncertainty interval (often known as a credible interval); #' or \code{"none"} for no interval limits. #' @param draws An integer indicating the number of MCMC draws to use to #' to estimate the survival function. The default and maximum number of #' draws is the size of the posterior sample. #' @param seed An optional \code{\link[=set.seed]{seed}} to use. #' @param ... Optional arguments passed to #' \code{\link[ggplot2:geom_path]{geom_line}} and used to control features #' of the plotted trajectory. #' #' @return A ggplot object that can be further customized using the #' \pkg{ggplot2} package. #' #' @seealso \code{\link{posterior_survfit}} for the estimated marginal or #' subject-specific survival function based on draws of the model parameters #' from the posterior distribution, #' \code{\link{posterior_predict}} for drawing from the posterior #' predictive distribution for the longitudinal submodel, and #' \code{\link{pp_check}} for graphical checks of the longitudinal submodel. #' #' @examples #' if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { #' \donttest{ #' if (!exists("example_jm")) example(example_jm) #' # Compare estimated survival function to Kaplan-Meier curve #' ps <- ps_check(example_jm) #' ps + #' ggplot2::scale_color_manual(values = c("red", "black")) + # change colors #' ggplot2::scale_size_manual(values = c(0.5, 3)) + # change line sizes #' ggplot2::scale_fill_manual(values = c(NA, NA)) # remove fill #' } #' } #' @importFrom ggplot2 ggplot aes_string geom_step #' ps_check <- function(object, check = "survival", limits = c("ci", "none"), draws = NULL, seed = NULL, xlab = NULL, ylab = NULL, ci_geom_args = NULL, ...) { if (!requireNamespace("survival")) stop("the 'survival' package must be installed to use this function") validate_stanjm_object(object) limits <- match.arg(limits) # Predictions for plotting the estimated survival function dat <- posterior_survfit(object, standardise = TRUE, condition = FALSE, times = 0, extrapolate = TRUE, draws = draws, seed = seed) # Estimate KM curve based on response from the event submodel form <- reformulate("1", response = formula(object)$Event[[2]]) coxdat <- object$survmod$mod$y if (is.null(coxdat)) stop("Bug found: no response y found in the 'survmod' component of the ", "fitted joint model.") resp <- attr(coxdat, "type") if (resp == "right") { form <- formula(survival::Surv(time, status) ~ 1) } else if (resp == "counting") { form <- formula(survival::Surv(start, stop, time) ~ 1) } else { stop("Bug found: only 'right' or 'counting' survival outcomes should ", "have been allowed as the response type in the fitted joint model.") } km <- survival::survfit(form, data = as.data.frame(unclass(coxdat))) kmdat <- data.frame(times = km$time, surv = km$surv, lb = km$lower, ub = km$upper) # Plot estimated survival function with KM curve overlaid graph <- plot.survfit.stanjm(dat, ids = NULL, limits = limits, ...) kmgraph <- geom_step(data = kmdat, mapping = aes_string(x = "times", y = "surv")) graph + kmgraph } rstanarm/R/stanreg-objects.R0000644000176200001440000001553113722762571015540 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #' Fitted model objects #' #' The \pkg{rstanarm} model-fitting functions return an object of class #' \code{'stanreg'}, which is a list containing at a minimum the components listed #' below. Each \code{stanreg} object will also have additional classes (e.g. 'aov', #' 'betareg', 'glm', 'polr', etc.) and several additional components depending #' on the model and estimation algorithm. \cr #' \cr #' Some additional details apply to models estimated using the \code{\link{stan_mvmer}} #' or \code{\link{stan_jm}} modelling functions. The \code{\link{stan_mvmer}} modelling #' function returns an object of class \code{'stanmvreg'}, which inherits the #' \code{'stanreg'} class, but has a number of additional elements described in the #' subsection below. The \code{\link{stan_jm}} modelling function returns an object of class #' \code{'stanjm'}, which inherits both the \code{'stanmvreg'} and \code{'stanreg'} #' classes, but has a number of additional elements described in the subsection below. #' Both the \code{'stanjm'} and \code{'stanmvreg'} classes have several of their own #' methods for situations in which the default \code{'stanreg'} methods are not #' suitable; see the \strong{See Also} section below. #' #' @name stanreg-objects #' #' @section Elements for \code{stanreg} objects: #' \describe{ #' \item{\code{coefficients}}{ #' Point estimates, as described in \code{\link{print.stanreg}}. #' } #' \item{\code{ses}}{ #' Standard errors based on \code{\link[stats]{mad}}, as described in #' \code{\link{print.stanreg}}. #' } #' \item{\code{residuals}}{ #' Residuals of type \code{'response'}. #' } #' \item{\code{fitted.values}}{ #' Fitted mean values. For GLMs the linear predictors are transformed by the #' inverse link function. #' } #' \item{\code{linear.predictors}}{ #' Linear fit on the link scale. For linear models this is the same as #' \code{fitted.values}. #' } #' \item{\code{covmat}}{ #' Variance-covariance matrix for the coefficients based on draws from the #' posterior distribution, the variational approximation, or the asymptotic #' sampling distribution, depending on the estimation algorithm. #' } #' \item{\code{model,x,y}}{ #' If requested, the the model frame, model matrix and response variable used, #' respectively. #' } #' \item{\code{family}}{ #' The \code{\link[stats]{family}} object used. #' } #' \item{\code{call}}{ #' The matched call. #' } #' \item{\code{formula}}{ #' The model \code{\link[stats]{formula}}. #' } #' \item{\code{data,offset,weights}}{ #' The \code{data}, \code{offset}, and \code{weights} arguments. #' } #' \item{\code{algorithm}}{ #' The estimation method used. #' } #' \item{\code{prior.info}}{ #' A list with information about the prior distributions used. #' } #' \item{\code{stanfit,stan_summary}}{ #' The object of \code{\link[rstan]{stanfit-class}} returned by RStan and a #' matrix of various summary statistics from the stanfit object. #' } #' \item{\code{rstan_version}}{ #' The version of the \pkg{rstan} package that was used to fit the model. #' } #' } #' #' @section Elements for \code{stanmvreg} objects: #' \describe{ #' The \code{stanmvreg} objects contain the majority of the elements described #' above for \code{stanreg} objects, but in most cases these will be a list with each #' elements of the list correponding to one of the submodels (for example, #' the \code{family} element of a \code{stanmvreg} object will be a list with each #' element of the list containing the \code{\link[stats]{family}} object for one #' submodel). In addition, \code{stanmvreg} objects contain the following additional #' elements: #' \item{\code{cnms}}{ #' The names of the grouping factors and group specific parameters, collapsed #' across the longitudinal or glmer submodels. #' } #' \item{\code{flevels}}{ #' The unique factor levels for each grouping factor, collapsed across the #' longitudinal or glmer submodels. #' } #' \item{\code{n_markers}}{ #' The number of longitudinal or glmer submodels. #' } #' \item{\code{n_yobs}}{ #' The number of observations for each longitudinal or glmer submodel. #' } #' \item{\code{n_grps}}{ #' The number of levels for each grouping factor (for models estimated using #' \code{\link{stan_jm}}, this will be equal to \code{n_subjects} if the #' individual is the only grouping factor). #' } #' \item{\code{runtime}}{ #' The time taken to fit the model (in minutes). #' } #' } #' #' @section Additional elements for \code{stanjm} objects: #' \describe{ #' The \code{stanjm} objects contain the elements described above for #' \code{stanmvreg} objects, but also contain the following additional #' elements: #' \item{\code{id_var,time_var}}{ #' The names of the variables distinguishing between individuals, and #' representing time in the longitudinal submodel. #' } #' \item{\code{n_subjects}}{ #' The number of individuals. #' } #' \item{\code{n_events}}{ #' The number of non-censored events. #' } #' \item{\code{eventtime,status}}{ #' The event (or censoring) time and status indicator for each individual. #' } #' \item{\code{basehaz}}{ #' A list containing information about the baseline hazard. #' } #' \item{\code{assoc}}{ #' An array containing information about the association structure. #' } #' \item{\code{epsilon}}{ #' The width of the one-sided difference used to numerically evaluate the #' slope of the longitudinal trajectory; only relevant if a slope-based #' association structure was specified (e.g. etaslope, muslope, etc). #' } #' \item{\code{qnodes}}{ #' The number of Gauss-Kronrod quadrature nodes used to evaluate the #' cumulative hazard in the joint likelihood function. #' } #' } #' #' @note The \code{\link{stan_biglm}} function is an exception. It returns a #' \link[rstan:stanfit-class]{stanfit} object rather than a stanreg object. #' #' @seealso \code{\link{stanreg-methods}}, \code{\link{stanmvreg-methods}} #' NULL rstanarm/R/stan_polr.R0000644000176200001440000003073714370470372014447 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University # Copyright 1994-2013 William N. Venables and Brian D. Ripley # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #' Bayesian ordinal regression models via Stan #' #' \if{html}{\figure{stanlogo.png}{options: width="25" alt="https://mc-stan.org/about/logo/"}} #' Bayesian inference for ordinal (or binary) regression models under a #' proportional odds assumption. #' #' @export #' @templateVar fun stan_polr #' @templateVar fitfun stan_polr.fit #' @templateVar pkg MASS #' @templateVar pkgfun polr #' @templateVar rareargs weights,na.action,contrasts,model #' @template return-stanreg-object #' @template return-stanfit-object #' @template see-also #' @template args-formula-data-subset #' @template args-same-as-rarely #' @template args-prior_PD #' @template args-algorithm #' @template args-dots #' @template args-adapt_delta #' #' @param method One of 'logistic', 'probit', 'loglog', 'cloglog' or 'cauchit', #' but can be abbreviated. See \code{\link[MASS]{polr}} for more details. #' @param prior Prior for coefficients. Should be a call to \code{\link{R2}} #' to specify the prior location of the \eqn{R^2} but can be \code{NULL} #' to indicate a standard uniform prior. See \code{\link{priors}}. #' @param prior_counts A call to \code{\link{dirichlet}} to specify the #' prior counts of the outcome when the predictors are at their sample #' means. #' @param shape Either \code{NULL} or a positive scalar that is interpreted #' as the shape parameter for a \code{\link[stats]{GammaDist}}ribution on #' the exponent applied to the probability of success when there are only #' two outcome categories. If \code{NULL}, which is the default, then the #' exponent is taken to be fixed at \eqn{1}. #' @param rate Either \code{NULL} or a positive scalar that is interpreted #' as the rate parameter for a \code{\link[stats]{GammaDist}}ribution on #' the exponent applied to the probability of success when there are only #' two outcome categories. If \code{NULL}, which is the default, then the #' exponent is taken to be fixed at \eqn{1}. #' @param do_residuals A logical scalar indicating whether or not to #' automatically calculate fit residuals after sampling completes. Defaults to #' \code{TRUE} if and only if \code{algorithm="sampling"}. Setting #' \code{do_residuals=FALSE} is only useful in the somewhat rare case that #' \code{stan_polr} appears to finish sampling but hangs instead of returning #' the fitted model object. #' #' @details The \code{stan_polr} function is similar in syntax to #' \code{\link[MASS]{polr}} but rather than performing maximum likelihood #' estimation of a proportional odds model, Bayesian estimation is performed #' (if \code{algorithm = "sampling"}) via MCMC. The \code{stan_polr} #' function calls the workhorse \code{stan_polr.fit} function, but it is #' possible to call the latter directly. #' #' As for \code{\link{stan_lm}}, it is necessary to specify the prior #' location of \eqn{R^2}. In this case, the \eqn{R^2} pertains to the #' proportion of variance in the latent variable (which is discretized #' by the cutpoints) attributable to the predictors in the model. #' #' Prior beliefs about the cutpoints are governed by prior beliefs about the #' outcome when the predictors are at their sample means. Both of these #' are explained in the help page on \code{\link{priors}} and in the #' \pkg{rstanarm} vignettes. #' #' Unlike \code{\link[MASS]{polr}}, \code{stan_polr} also allows the "ordinal" #' outcome to contain only two levels, in which case the likelihood is the #' same by default as for \code{\link{stan_glm}} with \code{family = binomial} #' but the prior on the coefficients is different. However, \code{stan_polr} #' allows the user to specify the \code{shape} and \code{rate} hyperparameters, #' in which case the probability of success is defined as the logistic CDF of #' the linear predictor, raised to the power of \code{alpha} where \code{alpha} #' has a gamma prior with the specified \code{shape} and \code{rate}. This #' likelihood is called \dQuote{scobit} by Nagler (1994) because if \code{alpha} #' is not equal to \eqn{1}, then the relationship between the linear predictor #' and the probability of success is skewed. If \code{shape} or \code{rate} is #' \code{NULL}, then \code{alpha} is assumed to be fixed to \eqn{1}. #' #' Otherwise, it is usually advisible to set \code{shape} and \code{rate} to #' the same number so that the expected value of \code{alpha} is \eqn{1} while #' leaving open the possibility that \code{alpha} may depart from \eqn{1} a #' little bit. It is often necessary to have a lot of data in order to estimate #' \code{alpha} with much precision and always necessary to inspect the #' Pareto shape parameters calculated by \code{\link{loo}} to see if the #' results are particularly sensitive to individual observations. #' #' Users should think carefully about how the outcome is coded when using #' a scobit-type model. When \code{alpha} is not \eqn{1}, the asymmetry #' implies that the probability of success is most sensitive to the predictors #' when the probability of success is less than \eqn{0.63}. Reversing the #' coding of the successes and failures allows the predictors to have the #' greatest impact when the probability of failure is less than \eqn{0.63}. #' Also, the gamma prior on \code{alpha} is positively skewed, but you #' can reverse the coding of the successes and failures to circumvent this #' property. #' #' @references #' Nagler, J., (1994). Scobit: An Alternative Estimator to Logit and Probit. #' \emph{American Journal of Political Science}. 230 -- 255. #' #' @seealso The vignette for \code{stan_polr}. #' \url{https://mc-stan.org/rstanarm/articles/} #' #' @examples #' if (.Platform$OS.type != "windows" || .Platform$r_arch !="i386") { #' fit <- stan_polr(tobgp ~ agegp, data = esoph, method = "probit", #' prior = R2(0.2, "mean"), init_r = 0.1, seed = 12345, #' algorithm = "fullrank") # for speed only #' print(fit) #' plot(fit) #' } #' #' @importFrom utils packageVersion stan_polr <- function(formula, data, weights, ..., subset, na.action = getOption("na.action", "na.omit"), contrasts = NULL, model = TRUE, method = c("logistic", "probit", "loglog", "cloglog", "cauchit"), prior = R2(stop("'location' must be specified")), prior_counts = dirichlet(1), shape = NULL, rate = NULL, prior_PD = FALSE, algorithm = c("sampling", "meanfield", "fullrank"), adapt_delta = NULL, do_residuals = NULL) { data <- validate_data(data, if_missing = environment(formula)) is_char <- which(sapply(data, is.character)) for (j in is_char) { data[[j]] <- as.factor(data[[j]]) } algorithm <- match.arg(algorithm) if (is.null(do_residuals)) { do_residuals <- algorithm == "sampling" } call <- match.call(expand.dots = TRUE) call$formula <- try(eval(call$formula), silent = TRUE) # https://discourse.mc-stan.org/t/loo-with-k-threshold-error-for-stan-polr/17052/19 m <- match.call(expand.dots = FALSE) method <- match.arg(method) if (is.matrix(eval.parent(m$data))) { m$data <- as.data.frame(data) } else { m$data <- data } m$method <- m$model <- m$... <- m$prior <- m$prior_counts <- m$prior_PD <- m$algorithm <- m$adapt_delta <- m$shape <- m$rate <- m$do_residuals <- NULL m[[1L]] <- quote(stats::model.frame) m$drop.unused.levels <- FALSE m <- eval.parent(m) m <- check_constant_vars(m) Terms <- attr(m, "terms") x <- model.matrix(Terms, m, contrasts) xint <- match("(Intercept)", colnames(x), nomatch = 0L) n <- nrow(x) pc <- ncol(x) cons <- attr(x, "contrasts") if (xint > 0L) { x <- x[, -xint, drop = FALSE] pc <- pc - 1L } else stop("an intercept is needed and assumed") K <- ncol(x) wt <- model.weights(m) if (!length(wt)) wt <- rep(1, n) offset <- model.offset(m) if (length(offset) <= 1L) offset <- rep(0, n) y <- model.response(m) if (!is.factor(y)) stop("Response variable must be a factor.", call. = FALSE) lev <- levels(y) llev <- length(lev) if (llev < 2L) stop("Response variable must have 2 or more levels.", call. = FALSE) # y <- unclass(y) q <- llev - 1L stanfit <- stan_polr.fit( x = x, y = y, wt = wt, offset = offset, method = method, prior = prior, prior_counts = prior_counts, shape = shape, rate = rate, prior_PD = prior_PD, algorithm = algorithm, adapt_delta = adapt_delta, do_residuals = do_residuals, ... ) if (algorithm != "optimizing" && !is(stanfit, "stanfit")) return(stanfit) inverse_link <- linkinv(method) if (llev == 2L) { # actually a Bernoulli model family <- switch(method, logistic = binomial(link = "logit"), loglog = binomial(loglog), binomial(link = method)) fit <- nlist(stanfit, family, formula, offset, weights = wt, x = cbind("(Intercept)" = 1, x), y = as.integer(y == lev[2]), data, call, terms = Terms, model = m, algorithm, na.action = attr(m, "na.action"), contrasts = attr(x, "contrasts"), stan_function = "stan_polr") out <- stanreg(fit) if (!model) out$model <- NULL if (algorithm == "sampling") check_rhats(out$stan_summary[, "Rhat"]) if (is.null(shape) && is.null(rate)) # not a scobit model return(out) out$method <- method return(structure(out, class = c("stanreg", "polr"))) } # more than 2 outcome levels K2 <- K + llev - 1 # number of coefficients + number of cutpoints stanmat <- as.matrix(stanfit)[, 1:K2, drop = FALSE] covmat <- cov(stanmat) coefs <- apply(stanmat[, 1:K, drop = FALSE], 2L, median) ses <- apply(stanmat[, 1:K, drop = FALSE], 2L, mad) zeta <- apply(stanmat[, (K+1):K2, drop = FALSE], 2L, median) eta <- linear_predictor(coefs, x, offset) mu <- inverse_link(eta) means <- rstan::get_posterior_mean(stanfit) residuals <- means[grep("^residuals", rownames(means)), ncol(means)] names(eta) <- names(mu) <- rownames(x) if (!prior_PD) { if (!do_residuals) { residuals <- rep(NA, times = n) } names(residuals) <- rownames(x) } stan_summary <- make_stan_summary(stanfit) if (algorithm == "sampling") check_rhats(stan_summary[, "Rhat"]) out <- nlist(coefficients = coefs, ses, zeta, residuals, fitted.values = mu, linear.predictors = eta, covmat, y, x, model = if (model) m, data, offset, weights = wt, prior.weights = wt, family = method, method, contrasts, na.action, call, formula, terms = Terms, prior.info = attr(stanfit, "prior.info"), algorithm, stan_summary, stanfit, rstan_version = packageVersion("rstan"), stan_function = "stan_polr") structure(out, class = c("stanreg", "polr")) } # internal ---------------------------------------------------------------- # CDF, inverse-CDF and PDF for Gumbel distribution pgumbel <- function (q, loc = 0, scale = 1, lower.tail = TRUE) { q <- (q - loc)/scale p <- exp(-exp(-q)) if (!lower.tail) 1 - p else p } qgumbel <- function(p, loc = 0, scale = 1) { loc - scale * log(-log(p)) } dgumbel <- function(x, loc = 0, scale = 1, log = FALSE) { z <- (x - loc) / scale log_f <- -(z + exp(-z)) if (!log) exp(log_f) else log_f } loglog <- list(linkfun = qgumbel, linkinv = pgumbel, mu.eta = dgumbel, valideta = function(eta) TRUE, name = "loglog") class(loglog) <- "link-glm" rstanarm/R/stanreg.R0000644000176200001440000001313714370470372014104 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # Create a stanreg object # # @param object A list provided by one of the \code{stan_*} modeling functions. # @return A stanreg object. # stanreg <- function(object) { opt <- object$algorithm == "optimizing" mer <- !is.null(object$glmod) # used stan_(g)lmer stanfit <- object$stanfit family <- object$family y <- object$y x <- object$x nvars <- ncol(x) nobs <- NROW(y) ynames <- if (is.matrix(y)) rownames(y) else names(y) is_betareg <- is.beta(family$family) if (is_betareg) { family_phi <- object$family_phi # pull out phi family/link if (is.null(family_phi)) { family_phi <- beta_fam("log") z <- matrix(1, nrow = nobs, ncol = 1, dimnames = list(NULL, "(Intercept)")) } else z <- object$z # pull out betareg z vars so that they can be used in posterior_predict/loo nvars_z <- NCOL(z) # used so that all coefficients are printed with coef() } if (opt) { stanmat <- stanfit$theta_tilde probs <- c(0.025, .975) stan_summary <- cbind(Median = apply(stanmat, 2L, median), MAD_SD = apply(stanmat, 2L, mad), t(apply(stanmat, 2L, quantile, probs))) xnms <- colnames(x) covmat <- cov(stanmat)[xnms, xnms] coefs <- apply(stanmat[, xnms, drop = FALSE], 2L, median) ses <- apply(stanmat[, xnms, drop = FALSE], 2L, mad) rank <- qr(x, tol = .Machine$double.eps, LAPACK = TRUE)$rank df.residual <- nobs - sum(object$weights == 0) - rank if (is_betareg) { if (length(colnames(z)) == 1) coefs_z <- apply(stanmat[, grepl("(phi)", colnames(stanmat), fixed = TRUE), drop = FALSE], 2L, median) else coefs_z <- apply(stanmat[, paste0("(phi)_",colnames(z)), drop = FALSE], 2L, median) } } else { stan_summary <- make_stan_summary(stanfit) coefs <- stan_summary[1:nvars, select_median(object$algorithm)] if (is_betareg) { coefs_z <- stan_summary[(nvars + 1):(nvars + nvars_z), select_median(object$algorithm)] if (length(coefs_z) == 1L) names(coefs_z) <- rownames(stan_summary)[nvars + 1] } if (length(coefs) == 1L) # ensures that if only a single coef it still gets a name names(coefs) <- rownames(stan_summary)[1L] if (is_betareg) { stanmat <- as.matrix(stanfit)[,c(names(coefs),names(coefs_z)), drop = FALSE] colnames(stanmat) <- c(names(coefs),names(coefs_z)) } else { stanmat <- as.matrix(stanfit)[, 1:nvars, drop = FALSE] colnames(stanmat) <- colnames(x) } ses <- apply(stanmat, 2L, mad) if (mer) { mark <- sum(sapply(object$stanfit@par_dims[c("alpha", "beta")], prod)) stanmat <- stanmat[,1:mark, drop = FALSE] } covmat <- cov(stanmat) # rownames(covmat) <- colnames(covmat) <- rownames(stan_summary)[1:nrow(covmat)] if (object$algorithm == "sampling") check_rhats(stan_summary[, "Rhat"]) } # linear predictor, fitted values eta <- linear_predictor(coefs, x, object$offset) mu <- family$linkinv(eta) if (NCOL(y) == 2L) { # residuals of type 'response', (glm which does 'deviance' residuals by default) residuals <- y[, 1L] / rowSums(y) - mu } else { ytmp <- if (is.factor(y)) fac2bin(y) else y residuals <- ytmp - mu } names(eta) <- names(mu) <- names(residuals) <- ynames if (is_betareg) { eta_z <- linear_predictor(coefs_z, z, object$offset) phi <- family_phi$linkinv(eta_z) } out <- nlist( coefficients = unpad_reTrms(coefs), ses = unpad_reTrms(ses), fitted.values = mu, linear.predictors = eta, residuals, df.residual = if (opt) df.residual else NA_integer_, # covmat = unpad_reTrms(unpad_reTrms(covmat, col = TRUE), col = FALSE), covmat, y, x, model = object$model, data = object$data, family, offset = if (any(object$offset != 0)) object$offset else NULL, weights = object$weights, prior.weights = object$weights, contrasts = object$contrasts, na.action = object$na.action, formula = object$formula, terms = object$terms, prior.info = attr(stanfit, "prior.info"), dropped_cols = attr(stanfit, "dropped_cols"), algorithm = object$algorithm, stan_summary, stanfit = if (opt) stanfit$stanfit else stanfit, rstan_version = packageVersion("rstan"), call = object$call, # sometimes 'call' is no good (e.g. if using do.call(stan_glm, args)) so # also include the name of the modeling function (for use when printing, # etc.) stan_function = object$stan_function ) if (opt) out$asymptotic_sampling_dist <- stanmat if (mer) out$glmod <- object$glmod if (is_betareg) { out$coefficients <- unpad_reTrms(c(coefs, coefs_z)) out$z <- z out$family_phi <- family_phi out$eta_z <- eta_z out$phi <- phi } structure(out, class = c("stanreg", "glm", "lm")) } rstanarm/R/posterior_vs_prior.R0000644000176200001440000002056013722762571016415 0ustar liggesusers#' Juxtapose prior and posterior #' #' Plot medians and central intervals comparing parameter draws from the prior #' and posterior distributions. If the plotted priors look different than the #' priors you think you specified it is likely either because of internal #' rescaling or the use of the \code{QR} argument (see the documentation for the #' \code{\link[=prior_summary.stanreg]{prior_summary}} method for details on #' these special cases). #' #' @export #' @templateVar stanregArg object #' @template args-stanreg-object #' @inheritParams summary.stanreg #' @param group_by_parameter Should estimates be grouped together by parameter #' (\code{TRUE}) or by posterior and prior (\code{FALSE}, the default)? #' @param color_by How should the estimates be colored? Use \code{"parameter"} #' to color by parameter name, \code{"vs"} to color the prior one color and #' the posterior another, and \code{"none"} to use no color. Except when #' \code{color_by="none"}, a variable is mapped to the color #' \code{\link[ggplot2]{aes}}thetic and it is therefore also possible to #' change the default colors by adding one of the various discrete color #' scales available in \code{ggplot2} #' (\code{\link[ggplot2:scale_manual]{scale_color_manual}}, #' \code{scale_colour_brewer}, etc.). See Examples. #' @param prob A number \eqn{p \in (0,1)}{p (0 < p < 1)} indicating the desired #' posterior probability mass to include in the (central posterior) interval #' estimates displayed in the plot. The default is \eqn{0.9}. #' @param facet_args A named list of arguments passed to #' \code{\link[ggplot2]{facet_wrap}} (other than the \code{facets} argument), #' e.g., \code{nrow} or \code{ncol} to change the layout, \code{scales} to #' allow axis scales to vary across facets, etc. See Examples. #' @param ... The S3 generic uses \code{...} to pass arguments to any defined #' methods. For the method for stanreg objects, \code{...} is for arguments #' (other than \code{color}) passed to \code{geom_pointrange} in the \pkg{ggplot2} #' package to control the appearance of the plotted intervals. #' #' @return A ggplot object that can be further customized using the #' \pkg{ggplot2} package. #' #' @template reference-bayesvis #' #' @examples #' if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { #' \dontrun{ #' if (!exists("example_model")) example(example_model) #' # display non-varying (i.e. not group-level) coefficients #' posterior_vs_prior(example_model, pars = "beta") #' #' # show group-level (varying) parameters and group by parameter #' posterior_vs_prior(example_model, pars = "varying", #' group_by_parameter = TRUE, color_by = "vs") #' #' # group by parameter and allow axis scales to vary across facets #' posterior_vs_prior(example_model, regex_pars = "period", #' group_by_parameter = TRUE, color_by = "none", #' facet_args = list(scales = "free")) #' #' # assign to object and customize with functions from ggplot2 #' (gg <- posterior_vs_prior(example_model, pars = c("beta", "varying"), prob = 0.8)) #' #' gg + #' ggplot2::geom_hline(yintercept = 0, size = 0.3, linetype = 3) + #' ggplot2::coord_flip() + #' ggplot2::ggtitle("Comparing the prior and posterior") #' #' # compare very wide and very narrow priors using roaches example #' # (see help(roaches, "rstanarm") for info on the dataset) #' roaches$roach100 <- roaches$roach1 / 100 #' wide_prior <- normal(0, 10) #' narrow_prior <- normal(0, 0.1) #' fit_pois_wide_prior <- stan_glm(y ~ treatment + roach100 + senior, #' offset = log(exposure2), #' family = "poisson", data = roaches, #' prior = wide_prior) #' posterior_vs_prior(fit_pois_wide_prior, pars = "beta", prob = 0.5, #' group_by_parameter = TRUE, color_by = "vs", #' facet_args = list(scales = "free")) #' #' fit_pois_narrow_prior <- update(fit_pois_wide_prior, prior = narrow_prior) #' posterior_vs_prior(fit_pois_narrow_prior, pars = "beta", prob = 0.5, #' group_by_parameter = TRUE, color_by = "vs", #' facet_args = list(scales = "free")) #' #' #' # look at cutpoints for ordinal model #' fit_polr <- stan_polr(tobgp ~ agegp, data = esoph, method = "probit", #' prior = R2(0.2, "mean"), init_r = 0.1) #' (gg_polr <- posterior_vs_prior(fit_polr, regex_pars = "\\|", color_by = "vs", #' group_by_parameter = TRUE)) #' # flip the x and y axes #' gg_polr + ggplot2::coord_flip() #' } #' } #' @importFrom ggplot2 geom_pointrange facet_wrap aes_string labs #' scale_x_discrete element_line element_text #' posterior_vs_prior <- function(object, ...) { UseMethod("posterior_vs_prior") } #' @rdname posterior_vs_prior #' @export posterior_vs_prior.stanreg <- function(object, pars = NULL, regex_pars = NULL, prob = 0.9, color_by = c("parameter", "vs", "none"), group_by_parameter = FALSE, facet_args = list(), ...) { if (!used.sampling(object)) STOP_sampling_only("posterior_vs_prior") stopifnot(isTRUE(prob > 0 && prob < 1)) # stuff needed for ggplot color_by <- switch( match.arg(color_by), parameter = "parameter", vs = "model", none = NA ) if (group_by_parameter) { group_by <- "parameter" xvar <- "model" } else { group_by <- "model" xvar <- "parameter" } aes_args <- list( x = xvar, y = "estimate", ymin = "lb", ymax = "ub" ) if (!is.na(color_by)) aes_args$color <- color_by if (!length(facet_args)) { facet_args <- list(facets = group_by) } else { facet_args$facets <- group_by } # draw from prior distribution and prepare plot data message("\nDrawing from prior...") capture.output( Prior <- suppressWarnings(update( object, prior_PD = TRUE, refresh = -1, chains = 2 )) ) objects <- nlist(Prior, Posterior = object) plot_data <- stack_estimates(objects, prob = prob, pars = pars, regex_pars = regex_pars) graph <- ggplot(plot_data, mapping = do.call("aes_string", aes_args)) + geom_pointrange(...) + do.call("facet_wrap", facet_args) + theme_default() + xaxis_title(FALSE) + yaxis_title(FALSE) + xaxis_ticks() + xaxis_text(angle = -30, hjust = 0) + grid_lines(color = "gray", size = 0.1) if (group_by == "parameter") return(graph) # clean up x-axis labels a bit if tick labels are parameter names # (user can override this after plot is created if need be, # but this makes the default a bit nicer if many parameters) abbrevs <- abbreviate(plot_data$parameter, 12, method = "both.sides", dot = TRUE) graph + scale_x_discrete(name = "Parameter", labels = abbrevs) } # internal ---------------------------------------------------------------- stack_estimates <- function(models = list(), pars = NULL, regex_pars = NULL, prob = NULL) { mnames <- names(models) if (is.null(mnames)) { mnames <- paste0("model_", seq_along(models)) } else { has_name <- nzchar(mnames) if (!all(has_name)) stop("Either all or none of the elements in 'models' should be named.") } alpha <- (1 - prob) / 2 probs <- sort(c(0.5, alpha, 1 - alpha)) labs <- c(paste0(100 * probs, "%")) ests <- lapply(models, function(x) { s <- summary(x, pars = pars, regex_pars = regex_pars, probs = probs) if (is.null(pars)) s <- s[!rownames(s) %in% c("log-posterior", "mean_PPD"),] s[, labs, drop = FALSE] }) est_column <- function(list_of_matrices, col) { x <- sapply(list_of_matrices, function(x) x[, col]) if (is.list(x)) unlist(x) else as.vector(x) } data.frame( model = rep(mnames, times = sapply(ests, nrow)), parameter = unlist(lapply(ests, rownames)), estimate = est_column(ests, labs[2]), lb = est_column(ests, labs[1]), ub = est_column(ests, labs[3]) ) } rstanarm/R/jm_make_assoc_terms.R0000644000176200001440000003374413340675562016460 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2013, 2014, 2015, 2016, 2017 Trustees of Columbia University # Copyright (C) 2016, 2017 Sam Brilleman # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # Function to construct a design matrix for the association structure in # the event submodel, to be multiplied by a vector of association parameters # # @param assoc An array with information about the desired association # structure, returned by a call to validate_assoc. # @param parts A list equal in length to the number of markers. Each element # parts[[m]] should contain a named list with components $mod_eta, $mod_eps, # $mod_auc, etc, which each contain either the linear predictor at quadtimes, # quadtimes + eps, and auc quadtimes, or the design matrices # used for constructing the linear predictor. Each element parts[[m]] should # also contain $X_data and $K_data. # @param family A list of family objects, equal in length to the number of # longitudinal submodels. # @param ... If parts does not contain the linear predictors, then this should # include elements beta and b, each being a length M list of parameters for the # longitudinal submodels. # @return A design matrix containing the association terms to be multiplied by # the association paramters. make_assoc_terms <- function(parts, assoc, family, ...) { M <- length(parts) a_X <- list() mark <- 1 for (m in 1:M) { times <- attr(parts[[m]], "times") epsilon <- attr(parts[[m]], "epsilon") qnodes <- attr(parts[[m]], "auc_qnodes") qwts <- attr(parts[[m]], "auc_qwts") eps_uses_derivative_of_x <- attr(parts[[m]], "eps_uses_derivative_of_x") # experimental has_assoc <- !assoc["null",][[m]] if (has_assoc) { assoc_m <- assoc[,m] invlink_m <- family[[m]]$linkinv eta_m <- get_element(parts, m = m, "eta", ...) eps_m <- get_element(parts, m = m, "eps", ...) auc_m <- get_element(parts, m = m, "auc", ...) X_data_m <- get_element(parts, m = m, "X_data", ...) K_data_m <- get_element(parts, m = m, "K_data", ...) grp_m <- get_element(parts, m = m, "grp_stuff", ...) has_grp <- grp_m$has_grp # TRUE/FALSE if (has_grp) { # method for collapsing information across clusters within patients grp_assoc <- grp_m$grp_assoc # indexing for collapsing across grps (based on the ids and times # used to generate the design matrices in make_assoc_parts) grp_idx <- attr(parts[[m]], "grp_idx") } #--- etavalue and any interactions ---# # etavalue if (assoc_m[["etavalue"]]) { if (has_grp) { a_X[[mark]] <- collapse_within_groups(eta_m, grp_idx, grp_assoc) } else { a_X[[mark]] <- eta_m } mark <- mark + 1 } # etavalue * data interactions if (assoc_m[["etavalue_data"]]) { X_temp <- X_data_m[["etavalue_data"]] K_temp <- K_data_m[["etavalue_data"]] for (i in 1:K_temp) { if (is.matrix(eta_m)) { val <- sweep(eta_m, 2L, X_temp[, i], `*`) } else { val <- as.vector(eta_m) * X_temp[, i] } if (has_grp) { a_X[[mark]] <- collapse_within_groups(val, grp_idx, grp_assoc) } else { a_X[[mark]] <- val } mark <- mark + 1 } } # etavalue * etavalue interactions if (assoc_m[["etavalue_etavalue"]]) { sel <- assoc_m[["which_interactions"]][["etavalue_etavalue"]] for (j in sel) { eta_j <- get_element(parts, m = j, "eta", ...) val <- eta_m * eta_j a_X[[mark]] <- val mark <- mark + 1 } } # etavalue * muvalue interactions if (assoc_m[["etavalue_muvalue"]]) { sel <- assoc_m[["which_interactions"]][["etavalue_muvalue"]] for (j in sel) { eta_j <- get_element(parts, m = j, "eta", ...) invlink_j <- family[[j]]$linkinv val <- eta_m * invlink_j(eta_j) a_X[[mark]] <- val mark <- mark + 1 } } #--- etaslope and any interactions ---# if (assoc_m[["etaslope"]] || assoc_m[["etaslope_data"]]) { if (eps_uses_derivative_of_x) { deta_m <- eps_m } else { deta_m <- (eps_m - eta_m) / epsilon } } # etaslope if (assoc_m[["etaslope"]]) { if (has_grp) { a_X[[mark]] <- collapse_within_groups(deta_m, grp_idx, grp_assoc) } else { a_X[[mark]] <- deta_m } mark <- mark + 1 } # etaslope * data interactions if (assoc_m[["etaslope_data"]]) { X_temp <- X_data_m[["etaslope_data"]] K_temp <- K_data_m[["etaslope_data"]] for (i in 1:K_temp) { if (is.matrix(deta_m)) { val <- sweep(deta_m, 2L, X_temp[, i], `*`) } else { val <- as.vector(deta_m) * X_temp[, i] } if (has_grp) { a_X[[mark]] <- collapse_within_groups(val, grp_idx, grp_assoc) } else { a_X[[mark]] <- val } mark <- mark + 1 } } #--- etaauc ---# if (assoc_m[["etaauc"]]) { if (is.matrix(eta_m)) { nr <- nrow(eta_m) nc <- ncol(eta_m) val <- matrix(NA, nrow = nr, ncol = nc) for (j in 1:nc) { wgt_j <- qwts[((j-1) * qnodes + 1):(j * qnodes)] auc_j <- auc_m[, ((j-1) * qnodes + 1):(j * qnodes), drop = FALSE] tmp_j <- sweep(auc_j, 2L, wgt_j, `*`) val[,j] <- rowSums(tmp_j) } } else { val <- c() for (j in 1:length(eta_m)) { wgt_j <- qwts[((j-1) * qnodes + 1):(j * qnodes)] auc_j <- auc_m[((j-1) * qnodes + 1):(j * qnodes)] val[j] <- sum(wgt_j * auc_j) } } a_X[[mark]] <- val mark <- mark + 1 } #--- muvalue and any interactions ---# # muvalue if (assoc_m[["muvalue"]]) { mu_m <- invlink_m(eta_m) a_X[[mark]] <- mu_m mark <- mark + 1 } # muvalue * data interactions if (assoc_m[["muvalue_data"]]) { mu_m <- invlink_m(eta_m) X_temp <- X_data_m[["muvalue_data"]] K_temp <- K_data_m[["muvalue_data"]] for (i in 1:K_temp) { if (is.matrix(mu_m)) { val <- sweep(mu_m, 2L, X_temp[, i], `*`) } else { val <- as.vector(mu_m) * X_temp[, i] } if (has_grp) { a_X[[mark]] <- collapse_within_groups(val, grp_idx, grp_assoc) } else { a_X[[mark]] <- val } mark <- mark + 1 } } # muvalue * etavalue interactions if (assoc_m[["muvalue_etavalue"]]) { sel <- assoc_m[["which_interactions"]][["muvalue_etavalue"]] for (j in sel) { eta_j <- get_element(parts, m = j, "eta", ...) val <- invlink_m(eta_m) * eta_j a_X[[mark]] <- val mark <- mark + 1 } } # muvalue * muvalue interactions if (assoc_m[["muvalue_muvalue"]]) { sel <- assoc_m[["which_interactions"]][["muvalue_muvalue"]] for (j in sel) { eta_j <- get_element(parts, m = j, "eta", ...) invlink_j <- family[[j]]$linkinv val <- invlink_m(eta_m) * invlink_j(eta_j) a_X[[mark]] <- val mark <- mark + 1 } } #--- muslope and any interactions ---# if (assoc_m[["muslope"]] || assoc_m[["muslope_data"]]) { if (eps_uses_derivative_of_x) { stop2("Cannot currently use muslope interaction structure.") } else { dmu_m <- (invlink_m(eps_m) - invlink_m(eta_m)) / epsilon } } # muslope if (assoc_m[["muslope"]]) { a_X[[mark]] <- dmu_m mark <- mark + 1 } # muslope * data interactions if (assoc_m[["muslope_data"]]) { X_temp <- X_data_m[["muslope_data"]] K_temp <- K_data_m[["muslope_data"]] for (i in 1:K_temp) { if (is.matrix(dmu_m)) { val <- sweep(dmu_m, 2L, X_temp[, i], `*`) } else { val <- as.vector(dmu_m) * X_temp[, i] } if (has_grp) { a_X[[mark]] <- collapse_within_groups(val, grp_idx, grp_assoc) } else { a_X[[mark]] <- val } mark <- mark + 1 } } #--- muauc ---# if (assoc_m[["muauc"]]) { if (is.matrix(eta_m)) { nr <- nrow(eta_m) nc <- ncol(eta_m) val <- matrix(NA, nrow = nr, ncol = nc) for (j in 1:nc) { wgt_j <- qwts[((j-1) * qnodes + 1):(j * qnodes)] auc_j <- invlink_m(auc_m[, ((j-1) * qnodes + 1):(j * qnodes), drop = FALSE]) tmp_j <- sweep(auc_j, 2L, wgt_j, `*`) val[,j] <- rowSums(tmp_j) } } else { val <- c() for (j in 1:length(eta_m)) { wgt_j <- qwts[((j-1) * qnodes + 1):(j * qnodes)] auc_j <- invlink_m(auc_m[((j-1) * qnodes + 1):(j * qnodes)]) val[j] <- sum(wgt_j * auc_j) } } a_X[[mark]] <- val mark <- mark + 1 } } } for (m in 1:M) { # shared_b if (assoc["shared_b",][[m]]) { sel <- assoc["which_b_zindex",][[m]] val <- get_element(parts, m = m, "b_mat", ...)[,sel] a_X[[mark]] <- val mark <- mark + 1 } } for (m in 1:M) { # shared_coef if (assoc["shared_coef",][[m]]) { sel <- assoc["which_coef_zindex",][[m]] val <- get_element(parts, m = m, "b_mat", ...)[,sel] a_X[[mark]] <- val mark <- mark + 1 } } if (is.matrix(a_X[[1L]])) a_X else do.call("cbind", a_X) } # Function to get an "element" (e.g. a linear predictor, a linear predictor # evaluated at epsilon shift, linear predictor evaluated at auc quadpoints, # etc) constructed from the "parts" (e.g. mod_eta, mod_eps, mod_auc, etc) # returned by a call to the function 'make_assoc_parts'. # # @param parts A named list containing the parts for constructing the association # structure. It may contain elements $mod_eta, $mod_eps, $mod_auc, etc. as # well as $X_data, $K_data, $grp_stuff. It is returned by a call to the # function 'make_assoc_parts'. # @param m An integer specifying which submodel to get the element for. # @param which A character string specifying which element to get. get_element <- function(parts, m = 1, which = "eta", ...) { ok_which_args <- c("eta", "eps", "auc", "X_data", "K_data", "b_mat", "grp_stuff") if (!which %in% ok_which_args) stop("'which' must be one of: ", paste(ok_which_args, collapse = ", ")) if (which %in% c("eta", "eps", "auc")) { part <- parts[[m]][[paste0("mod_", which)]] if (is.null(part)) { # model doesn't include an assoc related to 'which' return(NULL) } else { # construct linear predictor for the 'which' part x <- part$x Zt <- part$Zt Znames <- part$Z_names if (is.null(x) || is.null(Zt)) stop2("Bug found: cannot find x and Zt in 'parts'. They are ", "required to build the linear predictor for '", which, "'.") dots <- list(...) beta <- dots$beta[[m]] b <- dots$b[[m]] if (is.null(beta) || is.null(b)) stop2("Bug found: beta and b must be provided to build the ", "linear predictor for '", which, "'.") eta <- linear_predictor(beta, x) if (NCOL(b) == 1) { eta <- eta + as.vector(b %*% Zt) } else { eta <- eta + as.matrix(b %*% Zt) } return(eta) } } else if (which %in% c("X_data", "K_data", "b_mat", "grp_stuff")) { return(parts[[m]][[which]]) } else { stop("'which' argument doesn't include a valid entry.") } } # Collapse the linear predictor across the lower level units # clustered an individual, using the function specified in the # 'grp_assoc' argument # # @param eta The linear predictor evaluated for all lower level groups # at the quadrature points. # @param grp_idx An N*2 array providing the indices of the first (col 1) # and last (col 2) observations in eta that correspond to individuals # i = 1,...,N. # @param grp_assoc Character string, the function to use to collapse # across the lower level units clustered within individuals. # @return A vector or matrix, depending on the method called. collapse_within_groups <- function(eta, grp_idx, grp_assoc = "sum") { UseMethod("collapse_within_groups") } collapse_within_groups.default <- function(eta, grp_idx, grp_assoc) { N <- nrow(grp_idx) val <- rep(NA, N) for (n in 1:N) { tmp <- eta[grp_idx[n,1]:grp_idx[n,2]] val[n] <- do.call(grp_assoc, list(tmp)) } val } collapse_within_groups.matrix <- function(eta, grp_idx, grp_assoc) { N <- nrow(grp_idx) val <- matrix(NA, nrow = nrow(eta), ncol = N) for (n in 1:N) { tmp <- eta[, grp_idx[n,1]:grp_idx[n,2], drop = FALSE] val[,n] = apply(tmp, 1L, grp_assoc) } val } rstanarm/R/stan_nlmer.R0000644000176200001440000002205114370470372014576 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2016 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #' Bayesian nonlinear models with group-specific terms via Stan #' #' \if{html}{\figure{stanlogo.png}{options: width="25" alt="https://mc-stan.org/about/logo/"}} #' Bayesian inference for NLMMs with group-specific coefficients that have #' unknown covariance matrices with flexible priors. #' #' @export #' @templateVar fun stan_nlmer #' @templateVar pkg lme4 #' @templateVar pkgfun nlmer #' @template return-stanreg-object #' @template see-also #' @template args-dots #' @template args-prior_aux #' @template args-priors #' @template args-prior_PD #' @template args-algorithm #' @template args-adapt_delta #' @template args-sparse #' @template args-QR #' #' @param formula,data Same as for \code{\link[lme4]{nlmer}}. \emph{We strongly #' advise against omitting the \code{data} argument}. Unless \code{data} is #' specified (and is a data frame) many post-estimation functions (including #' \code{update}, \code{loo}, \code{kfold}) are not guaranteed to work #' properly. #' @param subset,weights,offset Same as \code{\link[stats]{glm}}. #' @param na.action,contrasts Same as \code{\link[stats]{glm}}, but rarely #' specified. #' @param prior_covariance Cannot be \code{NULL}; see \code{\link{decov}} for #' more information about the default arguments. #' #' @details The \code{stan_nlmer} function is similar in syntax to #' \code{\link[lme4]{nlmer}} but rather than performing (approximate) maximum #' marginal likelihood estimation, Bayesian estimation is by default performed #' via MCMC. The Bayesian model adds independent priors on the "coefficients" #' --- which are really intercepts --- in the same way as #' \code{\link{stan_nlmer}} and priors on the terms of a decomposition of the #' covariance matrices of the group-specific parameters. See #' \code{\link{priors}} for more information about the priors. #' #' The supported transformation functions are limited to the named #' "self-starting" functions in the \pkg{stats} library: #' \code{\link[stats]{SSasymp}}, \code{\link[stats]{SSasympOff}}, #' \code{\link[stats]{SSasympOrig}}, \code{\link[stats]{SSbiexp}}, #' \code{\link[stats]{SSfol}}, \code{\link[stats]{SSfpl}}, #' \code{\link[stats]{SSgompertz}}, \code{\link[stats]{SSlogis}}, #' \code{\link[stats]{SSmicmen}}, and \code{\link[stats]{SSweibull}}. #' #' #' @seealso The vignette for \code{stan_glmer}, which also discusses #' \code{stan_nlmer} models. \url{https://mc-stan.org/rstanarm/articles/} #' #' @examples #' if (.Platform$OS.type != "windows" || .Platform$r_arch !="i386") { #' \donttest{ #' data("Orange", package = "datasets") #' Orange$circumference <- Orange$circumference / 100 #' Orange$age <- Orange$age / 100 #' fit <- stan_nlmer( #' circumference ~ SSlogis(age, Asym, xmid, scal) ~ Asym|Tree, #' data = Orange, #' # for speed only #' chains = 1, #' iter = 1000 #' ) #' print(fit) #' posterior_interval(fit) #' plot(fit, regex_pars = "b\\[") #' } #' } #' @importFrom lme4 nlformula #' @importFrom stats getInitial stan_nlmer <- function(formula, data = NULL, subset, weights, na.action, offset, contrasts = NULL, ..., prior = normal(autoscale=TRUE), prior_aux = exponential(autoscale=TRUE), prior_covariance = decov(), prior_PD = FALSE, algorithm = c("sampling", "meanfield", "fullrank"), adapt_delta = NULL, QR = FALSE, sparse = FALSE) { if (!has_outcome_variable(formula[[2]])) { stop("LHS of formula must be specified.") } f <- as.character(formula[-3]) SSfunctions <- grep("^SS[[:lower:]]+", ls("package:stats"), value = TRUE) SSfun <- sapply(SSfunctions, function(ss) grepl(paste0(ss, "("), x = f[2], fixed = TRUE)) if (!any(SSfun)) { stop("'stan_nlmer' requires a named self-starting nonlinear function.") } SSfun <- which(SSfun) SSfun_char <- names(SSfun) mc <- match.call(expand.dots = FALSE) mc$prior <- mc$prior_aux <- mc$prior_covariance <- mc$prior_PD <- mc$algorithm <- mc$adapt_delta <- mc$QR <- mc$sparse <- NULL mc$start <- unlist(getInitial( object = as.formula(f[-1]), data = data, control = list(maxiter = 0, warnOnly = TRUE) )) nlf <- nlformula(mc) X <- nlf$X y <- nlf$respMod$y weights <- nlf$respMod$weights offset <- nlf$respMod$offset nlf$reTrms$SSfun <- SSfun nlf$reTrms$decov <- prior_covariance nlf_inputs <- parse_nlf_inputs(nlf$respMod) if (SSfun_char == "SSfol") { nlf$reTrms$Dose <- nlf$frame[[nlf_inputs[2]]] nlf$reTrms$input <- nlf$frame[[nlf_inputs[3]]] } else { nlf$reTrms$input <- nlf$frame[[nlf_inputs[2]]] } algorithm <- match.arg(algorithm) stanfit <- stan_glm.fit(x = X, y = y, family = gaussian(link = "identity"), weights = weights, offset = offset, prior = prior, prior_intercept = NULL, prior_aux = prior_aux, prior_PD = prior_PD, algorithm = algorithm, adapt_delta = adapt_delta, group = nlf$reTrms, QR = QR, sparse = sparse, ...) if (algorithm != "optimizing" && !is(stanfit, "stanfit")) { return(stanfit) } if (SSfun_char == "SSfpl") { # SSfun = 6 stanfit@sim$samples <- lapply(stanfit@sim$samples, FUN = function(x) { x[[4L]] <- exp(x[[4L]]) return(x) }) } else if (SSfun_char == "SSlogis") { # SSfun = 8 stanfit@sim$samples <- lapply(stanfit@sim$samples, FUN = function(x) { x[[3L]] <- exp(x[[3L]]) return(x) }) } Z <- pad_reTrms(Ztlist = nlf$reTrms$Ztlist, cnms = nlf$reTrms$cnms, flist = nlf$reTrms$flist)$Z colnames(Z) <- b_names(names(stanfit), value = TRUE) fit <- nlist(stanfit, family = make_nlf_family(SSfun_char, nlf), formula, offset, weights, x = cbind(X, Z), y = y, data, call = match.call(), terms = NULL, model = NULL, na.action = na.omit, contrasts, algorithm, glmod = nlf, stan_function = "stan_nlmer") out <- stanreg(fit) class(out) <- c(class(out), "nlmerMod", "lmerMod") return(out) } # internal ---------------------------------------------------------------- # @param respMod The respMod slot of the object returned by nlformula # @return A character vector, the first element of which is the name of the SS # function and the rest of the elements are the names of the arguments to the # SS function parse_nlf_inputs <- function(respMod) { inputs <- as.character(respMod$nlmod[2]) inputs <- sub("(", ",", inputs, fixed = TRUE) inputs <- sub(")", "", inputs, fixed = TRUE) scan( text = inputs, what = character(), sep = ",", strip.white = TRUE, quiet = TRUE ) } # Make family object # # @param SSfun_char SS function name as a string # @param nlf Object returned by nlformula # @return A family object make_nlf_family <- function(SSfun_char, nlf) { g <- gaussian(link = "identity") g$link <- paste("inv", SSfun_char, sep = "_") g$linkinv <- function(eta, arg1, arg2 = NULL, FUN = SSfun_char) { if (is.matrix(eta)) { len <- length(arg1) nargs <- ncol(eta) / len SSargs <- lapply(1:nargs, FUN = function(i) { start <- 1 + (i - 1) * len end <- i * len t(eta[, start:end, drop = FALSE]) }) if (is.null(arg2)) SSargs <- c(list(arg1), SSargs) else SSargs <- c(list(arg1, arg2), SSargs) } else { SSargs <- as.data.frame(matrix(eta, nrow = length(arg1))) if (is.null(arg2)) SSargs <- cbind(arg1, SSargs) else SSargs <- cbind(arg1, arg2, SSargs) } names(SSargs) <- names(formals(FUN)) if (FUN == "SSbiexp") SSargs$A1 <- SSargs$A1 + exp(SSargs$A2) do.call(FUN, args = SSargs) } nlf_inputs <- parse_nlf_inputs(nlf$respMod) if (SSfun_char == "SSfol") { formals(g$linkinv)$arg1 <- nlf$frame[[nlf_inputs[2]]] formals(g$linkinv)$arg2 <- nlf$frame[[nlf_inputs[3]]] } else { formals(g$linkinv)$arg1 <- nlf$frame[[nlf_inputs[2]]] } g$linkfun <- function(mu) stop("'linkfun' should not have been called") g$variance <- function(mu) stop("'variance' should not have been called") g$mu.eta <- function(mu) stop("'mu.eta' should not have been called") return(g) } rstanarm/R/doc-algorithms.R0000644000176200001440000000700514370470372015352 0ustar liggesusers#' Estimation algorithms available for \pkg{rstanarm} models #' #' @name available-algorithms #' #' @section Estimation algorithms: #' The modeling functions in the \pkg{rstanarm} package take an \code{algorithm} #' argument that can be one of the following: #' \describe{ #' \item{\strong{Sampling} (\code{algorithm="sampling"})}{ #' Uses Markov Chain Monte Carlo (MCMC) --- in particular, Hamiltonian Monte #' Carlo (HMC) with a tuned but diagonal mass matrix --- to draw from the #' posterior distribution of the parameters. See \code{\link[rstan:stanmodel-method-sampling]{sampling}} #' (\pkg{rstan}) for more details. This is the slowest but most reliable of the #' available estimation algorithms and it is \strong{the default and #' recommended algorithm for statistical inference.} #' } #' \item{\strong{Mean-field} (\code{algorithm="meanfield"})}{ #' Uses mean-field variational inference to draw from an approximation to the #' posterior distribution. In particular, this algorithm finds the set of #' independent normal distributions in the unconstrained space that --- when #' transformed into the constrained space --- most closely approximate the #' posterior distribution. Then it draws repeatedly from these independent #' normal distributions and transforms them into the constrained space. The #' entire process is much faster than HMC and yields independent draws but #' \strong{is not recommended for final statistical inference}. It can be #' useful to narrow the set of candidate models in large problems, particularly #' when specifying \code{QR=TRUE} in \code{\link{stan_glm}}, #' \code{\link{stan_glmer}}, and \code{\link{stan_gamm4}}, but is \strong{only #' an approximation to the posterior distribution}. #' } #' \item{\strong{Full-rank} (\code{algorithm="fullrank"})}{ #' Uses full-rank variational inference to draw from an approximation to the #' posterior distribution by finding the multivariate normal distribution in #' the unconstrained space that --- when transformed into the constrained space #' --- most closely approximates the posterior distribution. Then it draws #' repeatedly from this multivariate normal distribution and transforms the #' draws into the constrained space. This process is slower than meanfield #' variational inference but is faster than HMC. Although still an #' approximation to the posterior distribution and thus \strong{not recommended #' for final statistical inference}, the approximation is more realistic than #' that of mean-field variational inference because the parameters are not #' assumed to be independent in the unconstrained space. Nevertheless, fullrank #' variational inference is a more difficult optimization problem and the #' algorithm is more prone to non-convergence or convergence to a local #' optimum. #' } #' \item{\strong{Optimizing} (\code{algorithm="optimizing"})}{ #' Finds the posterior mode using a C++ implementation of the LBGFS algorithm. #' See \code{\link[rstan:stanmodel-method-optimizing]{optimizing}} for more details. If there is no prior #' information, then this is equivalent to maximum likelihood, in which case #' there is no great reason to use the functions in the \pkg{rstanarm} package #' over the emulated functions in other packages. However, if priors are #' specified, then the estimates are penalized maximum likelihood estimates, #' which may have some redeeming value. Currently, optimization is only #' supported for \code{\link{stan_glm}}. #' } #' } #' #' @seealso \url{https://mc-stan.org/rstanarm/} #' NULL rstanarm/R/posterior_linpred.R0000644000176200001440000001425514406606742016210 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #' Posterior distribution of the (possibly transformed) linear predictor #' #' Extract the posterior draws of the linear predictor, possibly transformed by #' the inverse-link function. This function is occasionally useful, but it #' should be used sparingly: inference and model checking should generally be #' carried out using the posterior predictive distribution (i.e., using #' \code{\link{posterior_predict}}). #' #' @aliases posterior_linpred posterior_epred #' @export #' #' @templateVar stanregArg object #' @template args-stanreg-object #' @param transform Should the linear predictor be transformed using the #' inverse-link function? The default is \code{FALSE}. This argument is still #' allowed but not recommended because the \code{posterior_epred} function now #' provides the equivalent of \code{posterior_linpred(..., transform=TRUE)}. #' See \strong{Examples}. #' @param newdata,draws,re.form,offset Same as for \code{\link{posterior_predict}}. #' @param XZ If \code{TRUE} then instead of computing the linear predictor the #' design matrix \code{X} (or \code{cbind(X,Z)} for models with group-specific #' terms) constructed from \code{newdata} is returned. The default is #' \code{FALSE}. #' @param ... Currently ignored. #' #' @return The default is to return a \code{draws} by \code{nrow(newdata)} #' matrix of simulations from the posterior distribution of the (possibly #' transformed) linear predictor. The exception is if the argument \code{XZ} #' is set to \code{TRUE} (see the \code{XZ} argument description above). #' #' @details The \code{posterior_linpred} function returns the posterior #' distribution of the linear predictor, while the \code{posterior_epred} #' function returns the posterior distribution of the conditional expectation. #' In the special case of a Gaussian likelihood with an identity link #' function, these two concepts are the same. The \code{posterior_epred} #' function is a less noisy way to obtain expectations over the output of #' \code{\link{posterior_predict}}. #' #' @note For models estimated with \code{\link{stan_clogit}}, the number of #' successes per stratum is ostensibly fixed by the research design. Thus, #' when calling \code{posterior_linpred} with new data and \code{transform = #' TRUE}, the \code{data.frame} passed to the \code{newdata} argument must #' contain an outcome variable and a stratifying factor, both with the same #' name as in the original \code{data.frame}. Then, the probabilities will #' condition on this outcome in the new data. #' #' @seealso \code{\link{posterior_predict}} to draw from the posterior #' predictive distribution of the outcome, which is typically preferable. #' #' @examples #' if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { #' if (!exists("example_model")) example(example_model) #' print(family(example_model)) #' #' # linear predictor on log-odds scale #' linpred <- posterior_linpred(example_model) #' colMeans(linpred) #' #' # probabilities #' # same as posterior_linpred(example_model, transform = TRUE) #' probs <- posterior_epred(example_model) #' colMeans(probs) #' #' # not conditioning on any group-level parameters #' probs2 <- posterior_epred(example_model, re.form = NA) #' apply(probs2, 2, median) #' } posterior_linpred.stanreg <- function(object, transform = FALSE, newdata = NULL, draws = NULL, re.form = NULL, offset = NULL, XZ = FALSE, ...) { if (is.stanmvreg(object)) { STOP_if_stanmvreg("'posterior_linpred'") } newdata <- validate_newdata(object, newdata = newdata, m = NULL) dat <- pp_data(object, newdata = newdata, re.form = re.form, offset = offset) if (XZ) { XZ <- dat[["x"]] if (is.mer(object)) XZ <- cbind(XZ, t(dat[["Zt"]])) return(XZ) } eta <- pp_eta(object, data = dat, draws = draws)[["eta"]] if (is.null(newdata)) { colnames(eta) <- rownames(model.frame(object)) } else { colnames(eta) <- rownames(newdata) } if (isTRUE(transform)) { message( "Instead of posterior_linpred(..., transform=TRUE) please call posterior_epred(), ", "which provides equivalent functionality." ) } if (!transform || is.nlmer(object)) { return(eta) } if (is_clogit(object)) { return(clogit_linpred_transform(object, newdata = newdata, eta = eta)) } g <- linkinv(object) return(g(eta)) } #' @rdname posterior_linpred.stanreg #' @export posterior_epred.stanreg <- function(object, newdata = NULL, draws = NULL, re.form = NULL, offset = NULL, XZ = FALSE, ...) { return(suppressMessages(posterior_linpred(object, transform = TRUE, newdata, draws, re.form, offset, XZ, ...))) } # internal ---------------------------------------------------------------- clogit_linpred_transform <- function(object, newdata = NULL, eta = NULL) { g <- linkinv(object) if (!is.null(newdata)) { y <- eval(formula(object)[[2L]], newdata) strata <- as.factor(eval(object$call$strata, newdata)) formals(g)$g <- strata formals(g)$successes <- aggregate(y, by = list(strata), FUN = sum)$x } return(t(apply(eta, 1, FUN = g))) } rstanarm/R/print-and-summary.R0000644000176200001440000007355214406606742016041 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #' Print method for stanreg objects #' #' The \code{print} method for stanreg objects displays a compact summary of the #' fitted model. See the \strong{Details} section below for descriptions of the #' different components of the printed output. For additional summary statistics #' and diagnostics use the \code{\link[=summary.stanreg]{summary}} method. #' #' @export #' @method print stanreg #' @templateVar stanregArg x #' @template args-stanreg-object #' @param detail Logical, defaulting to \code{TRUE}. If \code{FALSE} a more #' minimal summary is printed consisting only of the parameter estimates. #' @param digits Number of digits to use for formatting numbers. #' @param ... Ignored. #' @return Returns \code{x}, invisibly. #' @details #' \subsection{Point estimates}{ #' Regardless of the estimation algorithm, point estimates are medians computed #' from simulations. For models fit using MCMC (\code{"sampling"}) the posterior #' sample is used. For optimization (\code{"optimizing"}), the simulations are #' generated from the asymptotic Gaussian sampling distribution of the #' parameters. For the \code{"meanfield"} and \code{"fullrank"} variational #' approximations, draws from the variational approximation to the posterior are #' used. In all cases, the point estimates reported are the same as the values #' returned by \code{\link[=coef.stanreg]{coef}}. #' } #' \subsection{Uncertainty estimates (MAD_SD)}{ #' The standard deviations reported (labeled \code{MAD_SD} in the print output) #' are computed from the same set of draws described above and are proportional #' to the median absolute deviation (\code{\link[stats]{mad}}) from the median. #' Compared to the raw posterior standard deviation, the MAD_SD will be #' more robust for long-tailed distributions. These are the same as the values #' returned by \code{\link[=se.stanreg]{se}}. #' } #' \subsection{Additional output}{ #' \itemize{ #' \item For GLMs with group-specific terms (see \code{\link{stan_glmer}}) the printed #' output also shows point estimates of the standard deviations of the group #' effects (and correlations if there are both intercept and slopes that vary by #' group). #' #' \item For analysis of variance models (see \code{\link{stan_aov}}) models, an #' ANOVA-like table is also displayed. #' #' \item For joint longitudinal and time-to-event (see \code{\link{stan_jm}}) models #' the estimates are presented separately for each of the distinct submodels. #' } #' } #' #' @seealso \code{\link{summary.stanreg}}, \code{\link{stanreg-methods}} #' print.stanreg <- function(x, digits = 1, detail = TRUE, ...) { if (detail) { cat(x$stan_function) cat("\n family: ", family_plus_link(x)) cat("\n formula: ", formula_string(formula(x))) cat("\n observations:", nobs(x)) if (isTRUE(x$stan_function %in% c("stan_glm", "stan_glm.nb", "stan_lm", "stan_aov"))) { cat("\n predictors: ", length(coef(x))) } if (!is.null(x$call$subset)) { cat("\n subset: ", deparse(x$call$subset)) } cat("\n------\n") } mer <- is.mer(x) gamm <- isTRUE(x$stan_function == "stan_gamm4") ord <- is_polr(x) && !("(Intercept)" %in% rownames(x$stan_summary)) aux_nms <- .aux_name(x) if (!used.optimizing(x)) { if (isTRUE(x$stan_function %in% c("stan_lm", "stan_aov"))) { aux_nms <- c("R2", "log-fit_ratio", aux_nms) } mat <- as.matrix(x$stanfit) # don't used as.matrix.stanreg method b/c want access to mean_PPD nms <- setdiff(rownames(x$stan_summary), c("log-posterior", aux_nms)) if (gamm) { smooth_sd_nms <- grep("^smooth_sd\\[", nms, value = TRUE) nms <- setdiff(nms, smooth_sd_nms) smooth_sd_mat <- mat[, smooth_sd_nms, drop = FALSE] smooth_sd_estimates <- .median_and_madsd(smooth_sd_mat) } if (mer) { nms <- setdiff(nms, grep("^b\\[", nms, value = TRUE)) } if (ord) { cut_nms <- grep("|", nms, fixed = TRUE, value = TRUE) nms <- setdiff(nms, cut_nms) cut_mat <- mat[, cut_nms, drop = FALSE] cut_estimates <- .median_and_madsd(cut_mat) } ppd_nms <- grep("^mean_PPD", nms, value = TRUE) nms <- setdiff(nms, ppd_nms) coef_mat <- mat[, nms, drop = FALSE] estimates <- .median_and_madsd(coef_mat) if (mer) { estimates <- estimates[!grepl("^Sigma\\[", rownames(estimates)),, drop=FALSE] } .printfr(estimates, digits, ...) if (length(aux_nms)) { aux_estimates <- .median_and_madsd(mat[, aux_nms, drop=FALSE]) cat("\nAuxiliary parameter(s):\n") .printfr(aux_estimates, digits, ...) } if (ord) { cat("\nCutpoints:\n") .printfr(cut_estimates, digits, ...) } if (gamm) { cat("\nSmoothing terms:\n") .printfr(smooth_sd_estimates, digits, ...) } if (mer) { cat("\nError terms:\n") print(VarCorr(x), digits = digits + 1, ...) cat("Num. levels:", paste(names(ngrps(x)), unname(ngrps(x)), collapse = ", "), "\n") } if (is(x, "aov")) { print_anova_table(x, digits, ...) } } else { # used optimization nms <- names(x$coefficients) ppd_nms <- grep("^mean_PPD", rownames(x$stan_summary), value = TRUE) estimates <- x$stan_summary[nms, 1:2, drop=FALSE] .printfr(estimates, digits, ...) if (length(aux_nms)) { cat("\nAuxiliary parameter(s):\n") .printfr(x$stan_summary[aux_nms, 1:2, drop=FALSE], digits, ...) } } if (detail) { cat("\n------\n") cat("* For help interpreting the printed output see ?print.stanreg\n") cat("* For info on the priors used see ?prior_summary.stanreg\n") } invisible(x) } #' @rdname print.stanreg #' @export #' @method print stanmvreg print.stanmvreg <- function(x, digits = 3, ...) { M <- x$n_markers mvmer <- is.mvmer(x) surv <- is.surv(x) jm <- is.jm(x) stubs <- paste0("(", get_stub(x), 1:M, "):") cat(x$stan_function) if (mvmer) { for (m in 1:M) { cat("\n formula", stubs[m], formula_string(formula(x, m = m))) cat("\n family ", stubs[m], family_plus_link(x, m = m)) } } if (surv) { cat("\n formula (Event):", formula_string(formula(x, m = "Event"))) cat("\n baseline hazard:", x$basehaz$type_name) } if (jm) { sel <- grep("^which", rownames(x$assoc), invert = TRUE, value = TRUE) assoc <- lapply(1:M, function(m) { vals <- sel[which(x$assoc[sel,m] == TRUE)] paste0(vals, " (Long", m, ")") }) cat("\n assoc: ", paste(unlist(assoc), collapse = ", ")) } cat("\n------\n") mat <- as.matrix(x$stanfit) nms <- collect_nms(rownames(x$stan_summary), M, stub = get_stub(x), value = TRUE) # Estimates table for longitudinal submodel(s) if (mvmer) { link <- sapply(1:M, function(m) x$family[[m]]$link) for (m in 1:M) { terms_m <- terms(x)[[m]] sel <- attr(terms_m, "response") yvar <- rownames(attr(terms_m, "factors"))[sel] if (is.jm(x)) { cat(paste0("\nLongitudinal submodel", if (M > 1) paste0(" ", m), ": ", yvar,"\n")) } else { cat(paste0("\nSubmodel for y", m, ": ", yvar,"\n")) } coef_mat <- mat[, c(nms$y[[m]], nms$y_extra[[m]]), drop = FALSE] # Calculate median and MAD estimates <- .median_and_madsd(coef_mat) # Add column with eform if (link[m] %in% c("log", "logit")) estimates <- cbind(estimates, "exp(Median)" = c(exp(estimates[nms$y[[m]], "Median"]), rep(NA, length(nms$y_extra[[m]])))) # Print estimates rownames(estimates) <- gsub(paste0("^", get_stub(x), m, "\\|"), "", rownames(estimates)) .printfr(estimates, digits, ...) } } # Estimates table for event submodel if (surv) { cat("\nEvent submodel:\n") coef_mat <- mat[, c(nms$e, nms$a, nms$e_extra), drop = FALSE] # Calculate median and MAD estimates <- .median_and_madsd(coef_mat) # Add column with eform estimates <- cbind(estimates, "exp(Median)" = c(exp(estimates[c(nms$e, nms$a), "Median"]), rep(NA, length(nms$e_extra)))) rownames(estimates) <- gsub("^Event\\|", "", rownames(estimates)) rownames(estimates) <- gsub("^Assoc\\|", "", rownames(estimates)) .printfr(estimates, digits, ...) } # Estimates table for group-level random effects if (mvmer) { cat("\nGroup-level error terms:\n") print(VarCorr(x), digits = digits + 1, ...) cat("Num. levels:", paste(names(ngrps(x)), unname(ngrps(x)), collapse = ", "), "\n") # Sample average of the PPD ppd_mat <- mat[, nms$ppd, drop = FALSE] ppd_estimates <- .median_and_madsd(ppd_mat) cat("\nSample avg. posterior predictive distribution \nof", if (is.jm(x)) "longitudinal outcomes:\n" else "y:\n") .printfr(ppd_estimates, digits, ...) } cat("\n------\n") cat("For info on the priors used see help('prior_summary.stanreg').") invisible(x) } #' Summary method for stanreg objects #' #' Summaries of parameter estimates and MCMC convergence diagnostics #' (Monte Carlo error, effective sample size, Rhat). #' #' @export #' @method summary stanreg #' #' @templateVar stanregArg object #' @template args-stanreg-object #' @template args-regex-pars #' #' @param ... Currently ignored. #' @param pars An optional character vector specifying a subset of parameters to #' display. Parameters can be specified by name or several shortcuts can be #' used. Using \code{pars="beta"} will restrict the displayed parameters to #' only the regression coefficients (without the intercept). \code{"alpha"} #' can also be used as a shortcut for \code{"(Intercept)"}. If the model has #' varying intercepts and/or slopes they can be selected using \code{pars = #' "varying"}. #' #' In addition, for \code{stanmvreg} objects there are some additional shortcuts #' available. Using \code{pars = "long"} will display the #' parameter estimates for the longitudinal submodels only (excluding group-specific #' pparameters, but including auxiliary parameters). #' Using \code{pars = "event"} will display the #' parameter estimates for the event submodel only, including any association #' parameters. #' Using \code{pars = "assoc"} will display only the #' association parameters. #' Using \code{pars = "fixef"} will display all fixed effects, but not #' the random effects or the auxiliary parameters. #' \code{pars} and \code{regex_pars} are set to \code{NULL} then all #' fixed effect regression coefficients are selected, as well as any #' auxiliary parameters and the log posterior. #' #' If \code{pars} is \code{NULL} all parameters are selected for a \code{stanreg} #' object, while for a \code{stanmvreg} object all #' fixed effect regression coefficients are selected as well as any #' auxiliary parameters and the log posterior. See #' \strong{Examples}. #' @param probs For models fit using MCMC or one of the variational algorithms, #' an optional numeric vector of probabilities passed to #' \code{\link[stats]{quantile}}. #' @param digits Number of digits to use for formatting numbers when printing. #' When calling \code{summary}, the value of digits is stored as the #' \code{"print.digits"} attribute of the returned object. #' #' @return The \code{summary} method returns an object of class #' \code{"summary.stanreg"} (or \code{"summary.stanmvreg"}, inheriting #' \code{"summary.stanreg"}), which is a matrix of #' summary statistics and #' diagnostics, with attributes storing information for use by the #' \code{print} method. The \code{print} method for \code{summary.stanreg} or #' \code{summary.stanmvreg} objects is called for its side effect and just returns #' its input. The \code{as.data.frame} method for \code{summary.stanreg} #' objects converts the matrix to a data.frame, preserving row and column #' names but dropping the \code{print}-related attributes. #' #' @details #' \subsection{mean_PPD diagnostic}{ #' Summary statistics are also reported for \code{mean_PPD}, the sample #' average posterior predictive distribution of the outcome. This is useful as a #' quick diagnostic. A useful heuristic is to check if \code{mean_PPD} is #' plausible when compared to \code{mean(y)}. If it is plausible then this does #' \emph{not} mean that the model is good in general (only that it can reproduce #' the sample mean), however if \code{mean_PPD} is implausible then it is a sign #' that something is wrong (severe model misspecification, problems with the #' data, computational issues, etc.). #' } #' #' @seealso \code{\link{prior_summary}} to extract or print a summary of the #' priors used for a particular model. #' #' @examples #' if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { #' if (!exists("example_model")) example(example_model) #' summary(example_model, probs = c(0.1, 0.9)) #' #' # These produce the same output for this example, #' # but the second method can be used for any model #' summary(example_model, pars = c("(Intercept)", "size", #' paste0("period", 2:4))) #' summary(example_model, pars = c("alpha", "beta")) #' #' # Only show parameters varying by group #' summary(example_model, pars = "varying") #' as.data.frame(summary(example_model, pars = "varying")) #' } #' @importMethodsFrom rstan summary summary.stanreg <- function(object, pars = NULL, regex_pars = NULL, probs = c(0.1, 0.5, 0.9), ..., digits = 1) { mer <- is.mer(object) pars <- collect_pars(object, pars, regex_pars) if (!used.optimizing(object)) { args <- list(object = object$stanfit, probs = probs) out <- do.call("summary", args)$summary if (is.null(pars) && used.variational(object)) { out <- out[!rownames(out) %in% "log-posterior", , drop = FALSE] } if (!is.null(pars)) { pars <- allow_special_parnames(object, pars) out <- out[rownames(out) %in% pars, , drop = FALSE] } out <- out[!grepl(":_NEW_", rownames(out), fixed = TRUE), , drop = FALSE] stats <- colnames(out) if ("n_eff" %in% stats) { out[, "n_eff"] <- round(out[, "n_eff"]) } if ("se_mean" %in% stats) {# So people don't confuse se_mean and sd colnames(out)[stats %in% "se_mean"] <- "mcse" } } else { # used optimization if (!is.null(probs)) { stanmat <- object$asymptotic_sampling_dist object$stan_summary <- cbind(Median = apply(stanmat, 2L, median), MAD_SD = apply(stanmat, 2L, mad), t(apply(stanmat, 2L, quantile, probs))) } object$stan_summary <- cbind(object$stan_summary, object$diagnostics) if (is.null(pars)) { famname <- family(object)$family mark <- names(object$coefficients) if (is.gaussian(famname)) mark <- c(mark, "sigma") if (is.nb(famname)) mark <- c(mark, "reciprocal_dispersion") } else { mark <- NA if ("alpha" %in% pars) mark <- c(mark, "(Intercept)") if ("beta" %in% pars) mark <- c(mark, setdiff(names(object$coefficients), "(Intercept)")) mark <- c(mark, setdiff(pars, c("alpha", "beta"))) mark <- mark[!is.na(mark)] } out <- object$stan_summary[mark, , drop=FALSE] } structure( out, call = object$call, algorithm = object$algorithm, stan_function = object$stan_function, family = family_plus_link(object), formula = formula(object), posterior_sample_size = posterior_sample_size(object), nobs = nobs(object), npreds = if (isTRUE(object$stan_function %in% c("stan_glm", "stan_glm.nb", "stan_lm"))) length(coef(object)) else NULL, ngrps = if (mer) ngrps(object) else NULL, print.digits = digits, priors = object$prior.info, no_ppd_diagnostic = no_mean_PPD(object), class = "summary.stanreg" ) } #' @rdname summary.stanreg #' @export #' @method print summary.stanreg #' #' @param x An object of class \code{"summary.stanreg"}. print.summary.stanreg <- function(x, digits = max(1, attr(x, "print.digits")), ...) { atts <- attributes(x) cat("\nModel Info:") cat("\n function: ", atts$stan_function) cat("\n family: ", atts$family) cat("\n formula: ", formula_string(atts$formula)) cat("\n algorithm: ", atts$algorithm) if (!is.null(atts$posterior_sample_size) && atts$algorithm == "sampling") { cat("\n sample: ", atts$posterior_sample_size, "(posterior sample size)") } cat("\n priors: ", "see help('prior_summary')") cat("\n observations:", atts$nobs) if (!is.null(atts$npreds)) { cat("\n predictors: ", atts$npreds) } if (!is.null(atts$call$subset)) { cat("\n subset: ", deparse(atts$call$subset)) } if (!is.null(atts$ngrps)) { cat("\n groups: ", paste0(names(atts$ngrps), " (", unname(atts$ngrps), ")", collapse = ", ")) } cat("\n\nEstimates:\n") if (used.optimizing(atts) || used.variational(atts)) { hat <- "khat" str_diag <- "Monte Carlo diagnostics" str1 <- "and khat is the Pareto k diagnostic for importance sampling" str2 <- " (perfomance is usually good when khat < 0.7).\n" } else { hat <- "Rhat" str_diag <- "MCMC diagnostics" str1 <- "and Rhat is the potential scale reduction factor on split chains" str2 <- " (at convergence Rhat=1).\n" } sel <- which(colnames(x) %in% c("mcse", "n_eff", hat)) has_mc_diagnostic <- length(sel) > 0 if (has_mc_diagnostic) { xtemp <- x[, -sel, drop = FALSE] colnames(xtemp) <- paste(" ", colnames(xtemp)) } else { xtemp <- x } ppd_nms <- grep("^mean_PPD", rownames(x), value = TRUE) has_ppd_diagnostic <- !atts$no_ppd_diagnostic && length(ppd_nms) > 0 if (has_ppd_diagnostic) { ppd_estimates <- xtemp[rownames(xtemp) %in% ppd_nms, , drop=FALSE] } else { ppd_estimates <- NULL } xtemp <- xtemp[!rownames(xtemp) %in% c(ppd_nms, "log-posterior"), , drop=FALSE] # print table of parameter stats .printfr(xtemp, digits) if (has_ppd_diagnostic) { cat("\nFit Diagnostics:\n") .printfr(ppd_estimates, digits) cat("\nThe mean_ppd is the sample average posterior predictive ", "distribution of the outcome variable ", "(for details see help('summary.stanreg')).\n", sep = '') } if (has_mc_diagnostic) { cat("\n", str_diag, "\n", sep = '') mcse_hat <- format(round(x[, c("mcse", hat), drop = FALSE], digits), nsmall = digits) n_eff <- format(x[, "n_eff", drop = FALSE], drop0trailing = TRUE) print(cbind(mcse_hat, n_eff), quote = FALSE) cat("\nFor each parameter, mcse is Monte Carlo standard error, ", "n_eff is a crude measure of effective sample size, ", str1, str2, sep = '') } invisible(x) } #' @rdname summary.stanreg #' @method as.data.frame summary.stanreg #' @export as.data.frame.summary.stanreg <- function(x, ...) { as.data.frame(unclass(x), ...) } #' @rdname summary.stanreg #' @export #' @method summary stanmvreg summary.stanmvreg <- function(object, pars = NULL, regex_pars = NULL, probs = NULL, ..., digits = 3) { pars <- collect_pars(object, pars, regex_pars) M <- object$n_markers mvmer <- is.mvmer(object) surv <- is.surv(object) jm <- is.jm(object) if (mvmer) { # Outcome variable for each longitudinal submodel y_vars <- sapply(1:M, function(m, object) { terms_m <- terms(object)[[m]] sel <- attr(terms_m, "response") ret <- rownames(attr(terms_m, "factors"))[sel] }, object = object) # Family and link for each longitudinal submodel fam <- lapply(1:M, function(m) family_plus_link(object, m = m)) } if (jm) { # Association structure sel <- grep("^which", rownames(object$assoc), invert = TRUE, value = TRUE) assoc <- list_nms(lapply(1:M, function(m) sel[which(object$assoc[sel,m] == TRUE)]), M) } # Construct summary table args <- list(object = object$stanfit) if (!is.null(probs)) args$probs <- probs out <- do.call("summary", args)$summary nms <- collect_nms(rownames(object$stan_summary), M, stub = get_stub(object), value = TRUE) if (!is.null(pars)) { pars2 <- NA if ("alpha" %in% pars) pars2 <- c(pars2, nms$alpha) if ("beta" %in% pars) pars2 <- c(pars2, nms$beta) if ("long" %in% pars) pars2 <- c(pars2, unlist(nms$y), unlist(nms$y_extra)) if ("event" %in% pars) pars2 <- c(pars2, nms$e, nms$a, nms$e_extra) if ("assoc" %in% pars) pars2 <- c(pars2, nms$a) if ("fixef" %in% pars) pars2 <- c(pars2, unlist(nms$y), nms$e, nms$a) if ("b" %in% pars) pars2 <- c(pars2, nms$b) pars2 <- c(pars2, setdiff(pars, c("alpha", "beta", "varying", "b", "long", "event", "assoc", "fixef"))) pars <- pars2[!is.na(pars2)] } else { pars <- rownames(object$stan_summary) pars <- setdiff(pars, b_names(pars, value = TRUE)) if (used.variational(object)) pars <- setdiff(pars, "log-posterior") } out <- out[rownames(out) %in% pars, , drop = FALSE] out <- out[!grepl(":_NEW_", rownames(out), fixed = TRUE), , drop = FALSE] stats <- colnames(out) if ("n_eff" %in% stats) out[, "n_eff"] <- round(out[, "n_eff"]) if ("se_mean" %in% stats) # So people don't confuse se_mean and sd colnames(out)[stats %in% "se_mean"] <- "mcse" # Reorder rows of output table nms_tmp <- rownames(out) nms_tmp_y <- lapply(1:M, function(m) grep(paste0("^", get_stub(object), m, "\\|"), nms_tmp, value = TRUE)) nms_tmp_e <- grep("^Event\\|", nms_tmp, value = TRUE) nms_tmp_a <- grep("^Assoc\\|", nms_tmp, value = TRUE) nms_tmp_b <- b_names(nms_tmp, value = TRUE) nms_tmp_Sigma <- grep("^Sigma", nms_tmp, value = TRUE) nms_tmp_lp <- grep("^log-posterior$", nms_tmp, value = TRUE) out <- out[c(unlist(nms_tmp_y), nms_tmp_e, nms_tmp_a, nms_tmp_b, nms_tmp_Sigma, nms_tmp_lp), , drop = FALSE] # Output object if (mvmer) out <- structure( out, y_vars = y_vars, family = fam, n_markers = object$n_markers, n_yobs = object$n_yobs, n_grps = object$n_grps) if (surv) out <- structure( out, n_subjects = object$n_subjects, n_events = object$n_events, basehaz = object$basehaz) if (jm) out <- structure( out, id_var = object$id_var, time_var = object$time_var, assoc = assoc) structure( out, formula = object$formula, algorithm = object$algorithm, stan_function = object$stan_function, posterior_sample_size = posterior_sample_size(object), runtime = object$runtime, print.digits = digits, class = c("summary.stanmvreg", "summary.stanreg")) } #' @rdname summary.stanreg #' @export #' @method print summary.stanmvreg print.summary.stanmvreg <- function(x, digits = max(1, attr(x, "print.digits")), ...) { atts <- attributes(x) mvmer <- atts$stan_function %in% c("stan_mvmer", "stan_jm") jm <- atts$stan_function == "stan_jm" tab <- if (jm) " " else "" cat("\nModel Info:\n") cat("\n function: ", tab, atts$stan_function) if (mvmer) { M <- atts$n_markers stubs <- paste0("(", if (jm) "Long" else "y", 1:M, "):") for (m in 1:M) { cat("\n formula", stubs[m], formula_string(atts$formula[[m]])) cat("\n family ", stubs[m], atts$family[[m]]) } } if (jm) { cat("\n formula (Event):", formula_string(atts$formula[["Event"]])) cat("\n baseline hazard:", atts$basehaz$type_name) assoc_fmt <- unlist(lapply(1:M, function(m) paste0(atts$assoc[[m]], " (Long", m, ")"))) cat("\n assoc: ", paste(assoc_fmt, collapse = ", ")) } cat("\n algorithm: ", tab, atts$algorithm) cat("\n priors: ", tab, "see help('prior_summary')") if (!is.null(atts$posterior_sample_size) && atts$algorithm == "sampling") cat("\n sample: ", tab, atts$posterior_sample_size, "(posterior sample size)") if (mvmer) { obs_vals <- paste0(atts$n_yobs, " (", if (jm) "Long" else "y", 1:M, ")") cat("\n num obs: ", tab, paste(obs_vals, collapse = ", ")) } if (jm) { cat("\n num subjects: ", atts$n_subjects) cat(paste0("\n num events: ", atts$n_events, " (", round(100 * atts$n_events/atts$n_subjects, 1), "%)")) } if (!is.null(atts$n_grps)) cat("\n groups: ", tab, paste0(names(atts$n_grps), " (", unname(atts$n_grps), ")", collapse = ", ")) if (atts$algorithm == "sampling") { maxtime <- max(atts$runtime[, "total"]) if (maxtime == 0) maxtime <- "<0.1" cat("\n runtime: ", tab, maxtime, "mins") } cat("\n\nEstimates:\n") sel <- which(colnames(x) %in% c("mcse", "n_eff", "Rhat")) if (!length(sel)) { .printfr(x, digits) } else { xtemp <- x[, -sel, drop = FALSE] colnames(xtemp) <- paste(" ", colnames(xtemp)) .printfr(xtemp, digits) cat("\nDiagnostics:\n") mcse_rhat <- format(round(x[, c("mcse", "Rhat"), drop = FALSE], digits), nsmall = digits) n_eff <- format(x[, "n_eff", drop = FALSE], drop0trailing = TRUE) print(cbind(mcse_rhat, n_eff), quote = FALSE) cat("\nFor each parameter, mcse is Monte Carlo standard error, ", "n_eff is a crude measure of effective sample size, ", "and Rhat is the potential scale reduction factor on split chains", " (at convergence Rhat=1).\n", sep = '') } invisible(x) } # internal ---------------------------------------------------------------- .printfr <- function(x, digits, ...) { print(format(round(x, digits), nsmall = digits), quote = FALSE, ...) } .median_and_madsd <- function(x) { cbind(Median = apply(x, 2, median), MAD_SD = apply(x, 2, mad)) } # equivalent to isFALSE(object$compute_mean_PPD) no_mean_PPD <- function(object) { x <- object$compute_mean_PPD is.logical(x) && length(x) == 1L && !is.na(x) && !x } # Allow "alpha", "beta", "varying" as shortcuts # # @param object stanreg object # @param pars result of calling collect_pars(object, pars, regex_pars) allow_special_parnames <- function(object, pars) { pars[pars == "varying"] <- "b" pars2 <- NA if ("alpha" %in% pars) pars2 <- c(pars2, "(Intercept)") if ("beta" %in% pars) { beta_nms <- if (is.mer(object)) names(fixef(object)) else names(object$coefficients) pars2 <- c(pars2, setdiff(beta_nms, "(Intercept)")) } if ("b" %in% pars) { if (is.mer(object)) { pars2 <- c(pars2, b_names(rownames(object$stan_summary), value = TRUE)) pars[pars == "b"] <- NA } else { warning("No group-specific parameters. 'varying' ignored.", call. = FALSE) } } pars2 <- c(pars2, setdiff(pars, c("alpha", "beta", "varying"))) pars2[!is.na(pars2)] } # Family name with link in parenthesis # @param x stanreg object # @param ... Optionally include m to specify which submodel for stanmvreg models family_plus_link <- function(x, ...) { fam <- family(x, ...) if (is.character(fam)) { stopifnot(identical(fam, x$method)) fam <- paste0("ordered [", fam, "]") } else if (inherits(x, "betareg")) { fam <- paste0("beta [", x$family$link, ", link.phi=", x$family_phi$link, "]") } else { fam <- paste0(fam$family, " [", fam$link, "]") } return(fam) } # @param formula formula object formula_string <- function(formula, break_and_indent = TRUE) { coll <- if (break_and_indent) "--MARK--" else " " char <- gsub("\\s+", " ", paste(deparse(formula), collapse = coll)) if (!break_and_indent) return(char) gsub("--MARK--", "\n\t ", char, fixed = TRUE) } # get name of aux parameter based on family .aux_name <- function(object) { aux <- character() if (!is_polr(object)) { aux <- .rename_aux(family(object)) if (is.na(aux)) { aux <- character() } } return(aux) } # print anova table for stan_aov models # @param x stanreg object created by stan_aov() print_anova_table <- function(x, digits, ...) { labels <- attributes(x$terms)$term.labels patterns <- gsub(":", ".*:", labels) dnms <- dimnames(extract(x$stanfit, pars = "beta", permuted = FALSE))$parameters groups <- sapply(patterns, simplify = FALSE, FUN = grep, x = dnms) names(groups) <- gsub(".*", "", names(groups), fixed = TRUE) groups <- groups[sapply(groups, length) > 0] effects_dim <- dim(x$effects) effects <- x$effects^2 effects <- sapply(groups, FUN = function(i) { apply(effects[, , i, drop = FALSE], 1:2, mean) }) dim(effects) <- c(effects_dim[-3], ncol(effects)) dim(effects) <- c(nrow(effects) * ncol(effects), dim(effects)[3]) colnames(effects) <- paste("Mean Sq", names(groups)) anova_table <- .median_and_madsd(effects) cat("\nANOVA-like table:\n") .printfr(anova_table, digits, ...) } rstanarm/R/stan_glm.fit.R0000644000176200001440000011720414370470372015026 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2013, 2014, 2015, 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #' @rdname stan_glm #' @export #' @template args-prior_smooth #' @param prior_ops Deprecated. See \link{rstanarm-deprecated} for details. #' @param group A list, possibly of length zero (the default), but otherwise #' having the structure of that produced by \code{\link[lme4]{mkReTrms}} to #' indicate the group-specific part of the model. In addition, this list must #' have elements for the \code{regularization}, \code{concentration} #' \code{shape}, and \code{scale} components of a \code{\link{decov}} #' prior for the covariance matrices among the group-specific coefficients. #' @param importance_resampling Logical scalar indicating whether to use #' importance resampling when approximating the posterior distribution with #' a multivariate normal around the posterior mode, which only applies #' when \code{algorithm} is \code{"optimizing"} but defaults to \code{TRUE} #' in that case #' @param keep_every Positive integer, which defaults to 1, but can be higher #' in order to "thin" the importance sampling realizations. Applies only #' when \code{importance_resampling=TRUE}. #' @importFrom lme4 mkVarCorr #' @importFrom loo psis stan_glm.fit <- function(x, y, weights = rep(1, NROW(y)), offset = rep(0, NROW(y)), family = gaussian(), ..., prior = default_prior_coef(family), prior_intercept = default_prior_intercept(family), prior_aux = exponential(autoscale = TRUE), prior_smooth = exponential(autoscale = FALSE), prior_ops = NULL, group = list(), prior_PD = FALSE, algorithm = c("sampling", "optimizing", "meanfield", "fullrank"), mean_PPD = algorithm != "optimizing" && !prior_PD, adapt_delta = NULL, QR = FALSE, sparse = FALSE, importance_resampling = algorithm != "sampling", keep_every = algorithm != "sampling") { # prior_ops deprecated but make sure it still works until # removed in future release if (!is.null(prior_ops)) { tmp <- .support_deprecated_prior_options(prior, prior_intercept, prior_aux, prior_ops) prior <- tmp[["prior"]] prior_intercept <- tmp[["prior_intercept"]] prior_aux <- tmp[["prior_aux"]] prior_ops <- NULL } algorithm <- match.arg(algorithm) family <- validate_family(family) supported_families <- c("binomial", "gaussian", "Gamma", "inverse.gaussian", "poisson", "neg_binomial_2", "Beta regression") fam <- which(pmatch(supported_families, family$family, nomatch = 0L) == 1L) if (!length(fam)) { supported_families_err <- supported_families supported_families_err[supported_families_err == "Beta regression"] <- "mgcv::betar" stop("'family' must be one of ", paste(supported_families_err, collapse = ", ")) } supported_links <- supported_glm_links(supported_families[fam]) link <- which(supported_links == family$link) if (!length(link)) stop("'link' must be one of ", paste(supported_links, collapse = ", ")) if (binom_y_prop(y, family, weights)) { stop("To specify 'y' as proportion of successes and 'weights' as ", "number of trials please use stan_glm rather than calling ", "stan_glm.fit directly.", call. = FALSE) } y <- validate_glm_outcome_support(y, family) trials <- NULL if (is.binomial(family$family) && NCOL(y) == 2L) { trials <- as.integer(y[, 1L] + y[, 2L]) y <- as.integer(y[, 1L]) if (length(y == 1)) { y <- array(y) trials <- array(trials) } } # useless assignments to pass R CMD check has_intercept <- prior_df <- prior_df_for_intercept <- prior_df_for_aux <- prior_df_for_smooth <- prior_dist <- prior_dist_for_intercept <- prior_dist_for_aux <- prior_dist_for_smooth <- prior_mean <- prior_mean_for_intercept <- prior_mean_for_aux <- prior_mean_for_smooth <- prior_scale <- prior_scale_for_intercept <- prior_scale_for_aux <- prior_scale_for_smooth <- prior_autoscale <- prior_autoscale_for_intercept <- prior_autoscale_for_aux <- prior_autoscale_for_smooth <- global_prior_scale <- global_prior_df <- slab_df <- slab_scale <- NULL if (is.list(x)) { x_stuff <- center_x(x[[1]], sparse) smooth_map <- unlist(lapply(1:(length(x) - 1L), FUN = function(j) { rep(j, NCOL(x[[j + 1L]])) })) S <- do.call(cbind, x[-1L]) } else { x_stuff <- center_x(x, sparse) S <- matrix(NA_real_, nrow = nrow(x), ncol = 0L) smooth_map <- integer() } for (i in names(x_stuff)) # xtemp, xbar, has_intercept assign(i, x_stuff[[i]]) nvars <- ncol(xtemp) ok_dists <- nlist("normal", student_t = "t", "cauchy", "hs", "hs_plus", "laplace", "lasso", "product_normal") ok_intercept_dists <- ok_dists[1:3] ok_aux_dists <- c(ok_dists[1:3], exponential = "exponential") # prior distributions prior_stuff <- handle_glm_prior( prior, nvars, link = family$link, default_scale = 2.5, ok_dists = ok_dists ) # prior_{dist, mean, scale, df, dist_name, autoscale}, # global_prior_df, global_prior_scale, slab_df, slab_scale for (i in names(prior_stuff)) assign(i, prior_stuff[[i]]) if (isTRUE(is.list(prior_intercept)) && isTRUE(prior_intercept$default)) { m_y <- 0 if (family$family == "gaussian" && family$link == "identity") { if (!is.null(y)) m_y <- mean(y) # y can be NULL if prior_PD=TRUE } prior_intercept$location <- m_y } prior_intercept_stuff <- handle_glm_prior( prior_intercept, nvars = 1, default_scale = 2.5, link = family$link, ok_dists = ok_intercept_dists ) # prior_{dist, mean, scale, df, dist_name, autoscale}_for_intercept names(prior_intercept_stuff) <- paste0(names(prior_intercept_stuff), "_for_intercept") for (i in names(prior_intercept_stuff)) assign(i, prior_intercept_stuff[[i]]) prior_aux_stuff <- handle_glm_prior( prior_aux, nvars = 1, default_scale = 1, link = NULL, # don't need to adjust scale based on logit vs probit ok_dists = ok_aux_dists ) # prior_{dist, mean, scale, df, dist_name, autoscale}_for_aux names(prior_aux_stuff) <- paste0(names(prior_aux_stuff), "_for_aux") if (is.null(prior_aux)) { if (prior_PD) stop("'prior_aux' cannot be NULL if 'prior_PD' is TRUE.") prior_aux_stuff$prior_scale_for_aux <- Inf } for (i in names(prior_aux_stuff)) assign(i, prior_aux_stuff[[i]]) if (ncol(S) > 0) { # prior_{dist, mean, scale, df, dist_name, autoscale}_for_smooth prior_smooth_stuff <- handle_glm_prior( prior_smooth, nvars = max(smooth_map), default_scale = 1, link = NULL, ok_dists = ok_aux_dists ) names(prior_smooth_stuff) <- paste0(names(prior_smooth_stuff), "_for_smooth") if (is.null(prior_smooth)) { if (prior_PD) stop("'prior_smooth' cannot be NULL if 'prior_PD' is TRUE") prior_smooth_stuff$prior_scale_for_smooth <- Inf } for (i in names(prior_smooth_stuff)) assign(i, prior_smooth_stuff[[i]]) prior_scale_for_smooth <- array(prior_scale_for_smooth) } else { prior_dist_for_smooth <- 0L prior_mean_for_smooth <- array(NA_real_, dim = 0) prior_scale_for_smooth <- array(NA_real_, dim = 0) prior_df_for_smooth <- array(NA_real_, dim = 0) } famname <- supported_families[fam] is_bernoulli <- is.binomial(famname) && all(y %in% 0:1) && is.null(trials) is_nb <- is.nb(famname) is_gaussian <- is.gaussian(famname) is_gamma <- is.gamma(famname) is_ig <- is.ig(famname) is_beta <- is.beta(famname) is_continuous <- is_gaussian || is_gamma || is_ig || is_beta # require intercept for certain family and link combinations if (!has_intercept) { linkname <- supported_links[link] needs_intercept <- !is_gaussian && linkname == "identity" || is_gamma && linkname == "inverse" || is.binomial(famname) && linkname == "log" if (needs_intercept) stop("To use this combination of family and link ", "the model must have an intercept.") } # allow prior_PD even if no y variable if (is.null(y)) { if (!prior_PD) { stop("Outcome variable must be specified if 'prior_PD' is not TRUE.") } else { y <- fake_y_for_prior_PD(N = NROW(x), family = family) if (is_gaussian && (prior_autoscale || prior_autoscale_for_intercept || prior_autoscale_for_aux)) { message("'y' not specified, will assume sd(y)=1 when calculating scaled prior(s). ") } } } if (is_gaussian) { ss <- sd(y) if (prior_dist > 0L && prior_autoscale) prior_scale <- ss * prior_scale if (prior_dist_for_intercept > 0L && prior_autoscale_for_intercept) prior_scale_for_intercept <- ss * prior_scale_for_intercept if (prior_dist_for_aux > 0L && prior_autoscale_for_aux) prior_scale_for_aux <- ss * prior_scale_for_aux } if (!QR && prior_dist > 0L && prior_autoscale) { min_prior_scale <- 1e-12 prior_scale <- pmax(min_prior_scale, prior_scale / apply(xtemp, 2L, FUN = function(x) { num.categories <- length(unique(x)) x.scale <- 1 if (num.categories == 1) { x.scale <- 1 } else { x.scale <- sd(x) } return(x.scale) })) } prior_scale <- as.array(pmin(.Machine$double.xmax, prior_scale)) prior_scale_for_intercept <- min(.Machine$double.xmax, prior_scale_for_intercept) if (QR) { if (ncol(xtemp) <= 1) stop("'QR' can only be specified when there are multiple predictors.") if (sparse) stop("'QR' and 'sparse' cannot both be TRUE.") cn <- colnames(xtemp) decomposition <- qr(xtemp) Q <- qr.Q(decomposition) if (prior_autoscale) scale_factor <- sqrt(nrow(xtemp) - 1L) else scale_factor <- diag(qr.R(decomposition))[ncol(xtemp)] R_inv <- qr.solve(decomposition, Q) * scale_factor xtemp <- Q * scale_factor colnames(xtemp) <- cn xbar <- c(xbar %*% R_inv) } if (length(weights) > 0 && all(weights == 1)) weights <- double() if (length(offset) > 0 && all(offset == 0)) offset <- double() # create entries in the data block of the .stan file standata <- nlist( N = nrow(xtemp), K = ncol(xtemp), xbar = as.array(xbar), dense_X = !sparse, family = stan_family_number(famname), link, has_weights = length(weights) > 0, has_offset = length(offset) > 0, has_intercept, prior_PD, compute_mean_PPD = mean_PPD, prior_dist, prior_mean, prior_scale, prior_df, prior_dist_for_intercept, prior_scale_for_intercept = c(prior_scale_for_intercept), prior_mean_for_intercept = c(prior_mean_for_intercept), prior_df_for_intercept = c(prior_df_for_intercept), global_prior_df, global_prior_scale, slab_df, slab_scale, # for hs priors z_dim = 0, # betareg data link_phi = 0, betareg_z = array(0, dim = c(nrow(xtemp), 0)), has_intercept_z = 0, zbar = array(0, dim = c(0)), prior_dist_z = 0, prior_mean_z = integer(), prior_scale_z = integer(), prior_df_z = integer(), global_prior_scale_z = 0, global_prior_df_z = 0, prior_dist_for_intercept_z = 0, prior_mean_for_intercept_z = 0, prior_scale_for_intercept_z = 0, prior_df_for_intercept_z = 0, prior_df_for_intercept = c(prior_df_for_intercept), prior_dist_for_aux = prior_dist_for_aux, prior_dist_for_smooth, prior_mean_for_smooth, prior_scale_for_smooth, prior_df_for_smooth, slab_df_z = 0, slab_scale_z = 0, num_normals = if(prior_dist == 7) as.integer(prior_df) else integer(0), num_normals_z = integer(0), clogit = 0L, J = 0L, strata = integer() # mean,df,scale for aux added below depending on family ) # make a copy of user specification before modifying 'group' (used for keeping # track of priors) user_covariance <- if (!length(group)) NULL else group[["decov"]] if (length(group) && length(group$flist)) { if (length(group$strata)) { standata$clogit <- TRUE standata$J <- nlevels(group$strata) standata$strata <- c(as.integer(group$strata)[y == 1], as.integer(group$strata)[y == 0]) } check_reTrms(group) decov <- group$decov if (is.null(group$SSfun)) { standata$SSfun <- 0L standata$input <- double() standata$Dose <- double() } else { standata$SSfun <- group$SSfun standata$input <- group$input if (group$SSfun == 5) standata$Dose <- group$Dose else standata$Dose <- double() } Z <- t(group$Zt) group <- pad_reTrms(Ztlist = group$Ztlist, cnms = group$cnms, flist = group$flist) Z <- group$Z p <- sapply(group$cnms, FUN = length) l <- sapply(attr(group$flist, "assign"), function(i) nlevels(group$flist[[i]])) t <- length(l) b_nms <- make_b_nms(group) g_nms <- unlist(lapply(1:t, FUN = function(i) { paste(group$cnms[[i]], names(group$cnms)[i], sep = "|") })) standata$t <- t standata$p <- as.array(p) standata$l <- as.array(l) standata$q <- ncol(Z) standata$len_theta_L <- sum(choose(p, 2), p) if (is_bernoulli) { parts0 <- extract_sparse_parts(Z[y == 0, , drop = FALSE]) parts1 <- extract_sparse_parts(Z[y == 1, , drop = FALSE]) standata$num_non_zero <- c(length(parts0$w), length(parts1$w)) standata$w0 <- as.array(parts0$w) standata$w1 <- as.array(parts1$w) standata$v0 <- as.array(parts0$v) standata$v1 <- as.array(parts1$v) standata$u0 <- as.array(parts0$u) standata$u1 <- as.array(parts1$u) } else { parts <- extract_sparse_parts(Z) standata$num_non_zero <- length(parts$w) standata$w <- parts$w standata$v <- parts$v standata$u <- parts$u } standata$shape <- as.array(maybe_broadcast(decov$shape, t)) standata$scale <- as.array(maybe_broadcast(decov$scale, t)) standata$len_concentration <- sum(p[p > 1]) standata$concentration <- as.array(maybe_broadcast(decov$concentration, sum(p[p > 1]))) standata$len_regularization <- sum(p > 1) standata$regularization <- as.array(maybe_broadcast(decov$regularization, sum(p > 1))) standata$special_case <- all(sapply(group$cnms, FUN = function(x) { length(x) == 1 && x == "(Intercept)" })) } else { # not multilevel if (length(group)) { standata$clogit <- TRUE standata$J <- nlevels(group$strata) standata$strata <- c(as.integer(group$strata)[y == 1], as.integer(group$strata)[y == 0]) } standata$t <- 0L standata$p <- integer(0) standata$l <- integer(0) standata$q <- 0L standata$len_theta_L <- 0L if (is_bernoulli) { standata$num_non_zero <- rep(0L, 2) standata$w0 <- standata$w1 <- double(0) standata$v0 <- standata$v1 <- integer(0) standata$u0 <- standata$u1 <- integer(0) } else { standata$num_non_zero <- 0L standata$w <- double(0) standata$v <- integer(0) standata$u <- integer(0) } standata$special_case <- 0L standata$shape <- standata$scale <- standata$concentration <- standata$regularization <- rep(0, 0) standata$len_concentration <- 0L standata$len_regularization <- 0L standata$SSfun <- 0L standata$input <- double() standata$Dose <- double() } if (!is_bernoulli) { if (sparse) { parts <- extract_sparse_parts(xtemp) standata$nnz_X <- length(parts$w) standata$w_X <- parts$w standata$v_X <- parts$v standata$u_X <- parts$u standata$X <- array(0, dim = c(0L, dim(xtemp))) } else { standata$X <- array(xtemp, dim = c(1L, dim(xtemp))) standata$nnz_X <- 0L standata$w_X <- double(0) standata$v_X <- integer(0) standata$u_X <- integer(0) } standata$y <- y standata$weights <- weights standata$offset_ <- offset standata$K_smooth <- ncol(S) standata$S <- S standata$smooth_map <- smooth_map } # call stan() to draw from posterior distribution if (is_continuous) { standata$ub_y <- Inf standata$lb_y <- if (is_gaussian) -Inf else 0 standata$prior_scale_for_aux <- prior_scale_for_aux %ORifINF% 0 standata$prior_df_for_aux <- c(prior_df_for_aux) standata$prior_mean_for_aux <- c(prior_mean_for_aux) standata$len_y <- length(y) stanfit <- stanmodels$continuous } else if (is.binomial(famname)) { standata$prior_scale_for_aux <- if (!length(group) || prior_scale_for_aux == Inf) 0 else prior_scale_for_aux standata$prior_mean_for_aux <- 0 standata$prior_df_for_aux <- 0 if (is_bernoulli) { y0 <- y == 0 y1 <- y == 1 standata$N <- c(sum(y0), sum(y1)) if (sparse) { standata$X0 <- array(0, dim = c(0L, sum(y0), ncol(xtemp))) standata$X1 <- array(0, dim = c(0L, sum(y1), ncol(xtemp))) parts0 <- extract_sparse_parts(xtemp[y0, , drop = FALSE]) standata$nnz_X0 <- length(parts0$w) standata$w_X0 = parts0$w standata$v_X0 = parts0$v standata$u_X0 = parts0$u parts1 <- extract_sparse_parts(xtemp[y1, , drop = FALSE]) standata$nnz_X1 <- length(parts1$w) standata$w_X1 = parts1$w standata$v_X1 = parts1$v standata$u_X1 = parts1$u } else { standata$X0 <- array(xtemp[y0, , drop = FALSE], dim = c(1, sum(y0), ncol(xtemp))) standata$X1 <- array(xtemp[y1, , drop = FALSE], dim = c(1, sum(y1), ncol(xtemp))) standata$nnz_X0 = 0L standata$w_X0 = double(0) standata$v_X0 = integer(0) standata$u_X0 = integer(0) standata$nnz_X1 = 0L standata$w_X1 = double(0) standata$v_X1 = integer(0) standata$u_X1 = integer(0) } if (length(weights)) { # nocov start # this code is unused because weights are interpreted as number of # trials for binomial glms standata$weights0 <- weights[y0] standata$weights1 <- weights[y1] # nocov end } else { standata$weights0 <- double(0) standata$weights1 <- double(0) } if (length(offset)) { standata$offset0 <- offset[y0] standata$offset1 <- offset[y1] } else { standata$offset0 <- double(0) standata$offset1 <- double(0) } standata$K_smooth <- ncol(S) standata$S0 <- S[y0, , drop = FALSE] standata$S1 <- S[y1, , drop = FALSE] standata$smooth_map <- smooth_map stanfit <- stanmodels$bernoulli } else { standata$trials <- trials stanfit <- stanmodels$binomial } } else if (is.poisson(famname)) { standata$prior_scale_for_aux <- prior_scale_for_aux %ORifINF% 0 standata$prior_mean_for_aux <- 0 standata$prior_df_for_aux <- 0 stanfit <- stanmodels$count } else if (is_nb) { standata$prior_scale_for_aux <- prior_scale_for_aux %ORifINF% 0 standata$prior_df_for_aux <- c(prior_df_for_aux) standata$prior_mean_for_aux <- c(prior_mean_for_aux) stanfit <- stanmodels$count } else if (is_gamma) { # nothing } else { stop(paste(famname, "is not supported.")) } prior_info <- summarize_glm_prior( user_prior = prior_stuff, user_prior_intercept = prior_intercept_stuff, user_prior_aux = prior_aux_stuff, user_prior_covariance = user_covariance, has_intercept = has_intercept, has_predictors = nvars > 0, adjusted_prior_scale = prior_scale, adjusted_prior_intercept_scale = prior_scale_for_intercept, adjusted_prior_aux_scale = prior_scale_for_aux, family = family ) pars <- c(if (has_intercept) "alpha", "beta", if (ncol(S)) "beta_smooth", if (length(group)) "b", if (is_continuous | is_nb) "aux", if (ncol(S)) "smooth_sd", if (standata$len_theta_L) "theta_L", if (mean_PPD && !standata$clogit) "mean_PPD") if (algorithm == "optimizing") { optimizing_args <- list(...) if (is.null(optimizing_args$draws)) optimizing_args$draws <- 1000L optimizing_args$object <- stanfit optimizing_args$data <- standata optimizing_args$constrained <- TRUE optimizing_args$importance_resampling <- importance_resampling if (is.null(optimizing_args$tol_rel_grad)) optimizing_args$tol_rel_grad <- 10000L out <- do.call(optimizing, args = optimizing_args) check_stanfit(out) if (optimizing_args$draws == 0) { out$theta_tilde <- out$par dim(out$theta_tilde) <- c(1,length(out$par)) } new_names <- names(out$par) mark <- grepl("^beta\\[[[:digit:]]+\\]$", new_names) if (QR) { out$par[mark] <- R_inv %*% out$par[mark] out$theta_tilde[,mark] <- out$theta_tilde[, mark] %*% t(R_inv) } new_names[mark] <- colnames(xtemp) if (ncol(S)) { mark <- grepl("^beta_smooth\\[[[:digit:]]+\\]$", new_names) new_names[mark] <- colnames(S) } new_names[new_names == "alpha[1]"] <- "(Intercept)" new_names[grepl("aux(\\[1\\])?$", new_names)] <- if (is_gaussian) "sigma" else if (is_gamma) "shape" else if (is_ig) "lambda" else if (is_nb) "reciprocal_dispersion" else if (is_beta) "(phi)" else NA names(out$par) <- new_names colnames(out$theta_tilde) <- new_names if (optimizing_args$draws > 0 && importance_resampling) { ## begin: psis diagnostics and importance resampling lr <- out$log_p-out$log_g lr[lr==-Inf] <- -800 p <- suppressWarnings(psis(lr, r_eff = 1)) p$log_weights <- p$log_weights-log_sum_exp(p$log_weights) theta_pareto_k <- suppressWarnings(apply(out$theta_tilde, 2L, function(col) { if (all(is.finite(col))) psis(log1p(col ^ 2) / 2 + lr, r_eff = 1)$diagnostics$pareto_k else NaN })) ## todo: change fixed threshold to an option if (p$diagnostics$pareto_k > 1) { warning("Pareto k diagnostic value is ", round(p$diagnostics$pareto_k, digits = 2), ". Resampling is disabled. ", "Decreasing tol_rel_grad may help if optimization has terminated prematurely. ", "Otherwise consider using sampling.", call. = FALSE, immediate. = TRUE) importance_resampling <- FALSE } else if (p$diagnostics$pareto_k > 0.7) { warning("Pareto k diagnostic value is ", round(p$diagnostics$pareto_k, digits = 2), ". Resampling is unreliable. ", "Increasing the number of draws or decreasing tol_rel_grad may help.", call. = FALSE, immediate. = TRUE) } out$psis <- nlist(pareto_k = p$diagnostics$pareto_k, n_eff = p$diagnostics$n_eff / keep_every) } else { theta_pareto_k <- rep(NaN,length(new_names)) importance_resampling <- FALSE } ## importance_resampling if (importance_resampling) { ir_idx <- .sample_indices(exp(p$log_weights), n_draws = ceiling(optimizing_args$draws / keep_every)) out$theta_tilde <- out$theta_tilde[ir_idx,] out$ir_idx <- ir_idx ## SIR mcse and n_eff w_sir <- as.numeric(table(ir_idx)) / length(ir_idx) mcse <- apply(out$theta_tilde[!duplicated(ir_idx),], 2L, function(col) { if (all(is.finite(col))) sqrt(sum(w_sir^2*(col-mean(col))^2)) else NaN }) n_eff <- round(apply(out$theta_tilde[!duplicated(ir_idx),], 2L, var)/ (mcse ^ 2), digits = 0) } else { out$ir_idx <- NULL mcse <- rep(NaN, length(theta_pareto_k)) n_eff <- rep(NaN, length(theta_pareto_k)) } out$diagnostics <- cbind(mcse, theta_pareto_k, n_eff) colnames(out$diagnostics) <- c("mcse", "khat", "n_eff") ## end: psis diagnostics and SIR out$stanfit <- suppressMessages(sampling(stanfit, data = standata, chains = 0)) return(structure(out, prior.info = prior_info, dropped_cols = x_stuff$dropped_cols)) } else { if (algorithm == "sampling") { sampling_args <- set_sampling_args( object = stanfit, prior = prior, user_dots = list(...), user_adapt_delta = adapt_delta, data = standata, pars = pars, show_messages = FALSE) stanfit <- do.call(rstan::sampling, sampling_args) } else { # meanfield or fullrank vb vb_args <- list(...) if (is.null(vb_args$output_samples)) vb_args$output_samples <- 1000L if (is.null(vb_args$tol_rel_obj)) vb_args$tol_rel_obj <- 1e-4 if (is.null(vb_args$keep_every)) vb_args$keep_every <- keep_every vb_args$object <- stanfit vb_args$data <- standata vb_args$pars <- pars vb_args$algorithm <- algorithm vb_args$importance_resampling <- importance_resampling stanfit <- do.call(vb, args = vb_args) if (!QR && standata$K > 1) { recommend_QR_for_vb() } } check <- try(check_stanfit(stanfit)) if (!isTRUE(check)) return(standata) if (QR) { thetas <- extract(stanfit, pars = "beta", inc_warmup = TRUE, permuted = FALSE) betas <- apply(thetas, 1:2, FUN = function(theta) R_inv %*% theta) end <- tail(dim(betas), 1L) for (chain in 1:end) for (param in 1:nrow(betas)) { stanfit@sim$samples[[chain]][[has_intercept + param]] <- if (ncol(xtemp) > 1) betas[param, , chain] else betas[param, chain] } } if (standata$len_theta_L) { thetas <- extract(stanfit, pars = "theta_L", inc_warmup = TRUE, permuted = FALSE) cnms <- group$cnms nc <- sapply(cnms, FUN = length) nms <- names(cnms) Sigma <- apply(thetas, 1:2, FUN = function(theta) { Sigma <- mkVarCorr(sc = 1, cnms, nc, theta, nms) unlist(sapply(Sigma, simplify = FALSE, FUN = function(x) x[lower.tri(x, TRUE)])) }) l <- length(dim(Sigma)) end <- tail(dim(Sigma), 1L) shift <- grep("^theta_L", names(stanfit@sim$samples[[1]]))[1] - 1L if (l == 3) for (chain in 1:end) for (param in 1:nrow(Sigma)) { stanfit@sim$samples[[chain]][[shift + param]] <- Sigma[param, , chain] } else for (chain in 1:end) { stanfit@sim$samples[[chain]][[shift + 1]] <- Sigma[, chain] } Sigma_nms <- lapply(cnms, FUN = function(grp) { nm <- outer(grp, grp, FUN = paste, sep = ",") nm[lower.tri(nm, diag = TRUE)] }) for (j in seq_along(Sigma_nms)) { Sigma_nms[[j]] <- paste0(nms[j], ":", Sigma_nms[[j]]) } Sigma_nms <- unlist(Sigma_nms) } new_names <- c(if (has_intercept) "(Intercept)", colnames(xtemp), if (ncol(S)) colnames(S), if (length(group) && length(group$flist)) c(paste0("b[", b_nms, "]")), if (is_gaussian) "sigma", if (is_gamma) "shape", if (is_ig) "lambda", if (is_nb) "reciprocal_dispersion", if (is_beta) "(phi)", if (ncol(S)) paste0("smooth_sd[", names(x)[-1], "]"), if (standata$len_theta_L) paste0("Sigma[", Sigma_nms, "]"), if (mean_PPD && !standata$clogit) "mean_PPD", "log-posterior") stanfit@sim$fnames_oi <- new_names return(structure(stanfit, prior.info = prior_info, dropped_cols = x_stuff$dropped_cols)) } } # internal ---------------------------------------------------------------- # @param famname string naming the family # @return character vector of supported link functions for the family supported_glm_links <- function(famname) { switch( famname, binomial = c("logit", "probit", "cauchit", "log", "cloglog"), gaussian = c("identity", "log", "inverse"), Gamma = c("identity", "log", "inverse"), inverse.gaussian = c("identity", "log", "inverse", "1/mu^2"), "neg_binomial_2" = , # intentional poisson = c("log", "identity", "sqrt"), "Beta regression" = c("logit", "probit", "cloglog", "cauchit"), stop("unsupported family") ) } # Family number to pass to Stan # @param famname string naming the family # @return an integer family code stan_family_number <- function(famname) { switch( famname, "gaussian" = 1L, "Gamma" = 2L, "inverse.gaussian" = 3L, "beta" = 4L, "Beta regression" = 4L, "binomial" = 5L, "poisson" = 6L, "neg_binomial_2" = 7L, stop("Family not valid.") ) } # Verify that outcome values match support implied by family object # # @param y outcome variable # @param family family object # @return y (possibly slightly modified) unless an error is thrown # validate_glm_outcome_support <- function(y, family) { if (is.character(y)) { stop("Outcome variable can't be type 'character'.", call. = FALSE) } if (is.null(y)) { return(y) } .is_count <- function(x) { all(x >= 0) && all(abs(x - round(x)) < .Machine$double.eps^0.5) } fam <- family$family if (!is.binomial(fam)) { # make sure y has ok dimensions (matrix only allowed for binomial models) if (length(dim(y)) > 1) { if (NCOL(y) == 1) { y <- y[, 1] } else { stop("Except for binomial models the outcome variable ", "should not have multiple columns.", call. = FALSE) } } # check that values match support for non-binomial models if (is.gaussian(fam)) { return(y) } else if (is.gamma(fam) && any(y <= 0)) { stop("All outcome values must be positive for gamma models.", call. = FALSE) } else if (is.ig(fam) && any(y <= 0)) { stop("All outcome values must be positive for inverse-Gaussian models.", call. = FALSE) } else if (is.poisson(fam) && !.is_count(y)) { stop("All outcome values must be counts for Poisson models", call. = FALSE) } else if (is.nb(fam) && !.is_count(y)) { stop("All outcome values must be counts for negative binomial models", call. = FALSE) } } else { # binomial models if (NCOL(y) == 1L) { if (is.numeric(y) || is.logical(y)) y <- as.integer(y) if (is.factor(y)) y <- fac2bin(y) if (!all(y %in% c(0L, 1L))) stop("All outcome values must be 0 or 1 for Bernoulli models.", call. = FALSE) } else if (isTRUE(NCOL(y) == 2L)) { if (!.is_count(y)) stop("All outcome values must be counts for binomial models.", call. = FALSE) } else { stop("For binomial models the outcome should be a vector or ", "a matrix with 2 columns.", call. = FALSE) } } return(y) } # Generate fake y variable to use if prior_PD and no y is specified # @param N number of observations # @param family family object fake_y_for_prior_PD <- function(N, family) { fam <- family$family if (is.gaussian(fam)) { # if prior autoscaling is on then the value of sd(y) matters # generate a fake y so that sd(y) is 1 fake_y <- as.vector(scale(rnorm(N))) } else if (is.binomial(fam) || is.poisson(fam) || is.nb(fam)) { # valid for all discrete cases fake_y <- rep_len(c(0, 1), N) } else { # valid for gamma, inverse gaussian, beta fake_y <- runif(N) } return(fake_y) } # Add extra level _NEW_ to each group # # @param Ztlist ranef indicator matrices # @param cnms group$cnms # @param flist group$flist pad_reTrms <- function(Ztlist, cnms, flist) { stopifnot(is.list(Ztlist)) l <- sapply(attr(flist, "assign"), function(i) nlevels(flist[[i]])) p <- sapply(cnms, FUN = length) n <- ncol(Ztlist[[1]]) for (i in attr(flist, "assign")) { levels(flist[[i]]) <- c(gsub(" ", "_", levels(flist[[i]])), paste0("_NEW_", names(flist)[i])) } for (i in 1:length(p)) { Ztlist[[i]] <- rbind(Ztlist[[i]], Matrix(0, nrow = p[i], ncol = n, sparse = TRUE)) } Z <- t(do.call(rbind, args = Ztlist)) return(nlist(Z, cnms, flist)) } # Drop the extra reTrms from a matrix x # # @param x A matrix or array (e.g. the posterior sample or matrix of summary # stats) # @param columns Do the columns (TRUE) or rows (FALSE) correspond to the # variables? unpad_reTrms <- function(x, ...) UseMethod("unpad_reTrms") unpad_reTrms.default <- function(x, ...) { if (is.matrix(x) || is.array(x)) return(unpad_reTrms.array(x, ...)) keep <- !grepl("_NEW_", names(x), fixed = TRUE) x[keep] } unpad_reTrms.array <- function(x, columns = TRUE, ...) { ndim <- length(dim(x)) if (ndim > 3) stop("'x' should be a matrix or 3-D array") nms <- if (columns) last_dimnames(x) else rownames(x) keep <- !grepl("_NEW_", nms, fixed = TRUE) if (length(dim(x)) == 2) { x_keep <- if (columns) x[, keep, drop = FALSE] else x[keep, , drop = FALSE] } else { x_keep <- if (columns) x[, , keep, drop = FALSE] else x[keep, , , drop = FALSE] } return(x_keep) } make_b_nms <- function(group, m = NULL, stub = "Long") { group_nms <- names(group$cnms) b_nms <- character() m_stub <- if (!is.null(m)) get_m_stub(m, stub = stub) else NULL for (i in seq_along(group$cnms)) { nm <- group_nms[i] nms_i <- paste(group$cnms[[i]], nm) levels(group$flist[[nm]]) <- gsub(" ", "_", levels(group$flist[[nm]])) if (length(nms_i) == 1) { b_nms <- c(b_nms, paste0(m_stub, nms_i, ":", levels(group$flist[[nm]]))) } else { b_nms <- c(b_nms, c(t(sapply(paste0(m_stub, nms_i), paste0, ":", levels(group$flist[[nm]]))))) } } return(b_nms) } # Create "prior.info" attribute needed for prior_summary() # # @param user_* The user's prior, prior_intercept, prior_covariance, and # prior_aux specifications. For prior and prior_intercept these should be # passed in after broadcasting the df/location/scale arguments if necessary. # @param has_intercept T/F, does model have an intercept? # @param has_predictors T/F, does model have predictors? # @param adjusted_prior_*_scale adjusted scales computed if using autoscaled priors # @param family Family object. # @return A named list with components 'prior', 'prior_intercept', and possibly # 'prior_covariance' and 'prior_aux' each of which itself is a list # containing the needed values for prior_summary. summarize_glm_prior <- function(user_prior, user_prior_intercept, user_prior_aux, user_prior_covariance, has_intercept, has_predictors, adjusted_prior_scale, adjusted_prior_intercept_scale, adjusted_prior_aux_scale, family) { rescaled_coef <- user_prior$prior_autoscale && has_predictors && !is.na(user_prior$prior_dist_name) && !all(user_prior$prior_scale == adjusted_prior_scale) rescaled_int <- user_prior_intercept$prior_autoscale_for_intercept && has_intercept && !is.na(user_prior_intercept$prior_dist_name_for_intercept) && (user_prior_intercept$prior_scale_for_intercept != adjusted_prior_intercept_scale) rescaled_aux <- user_prior_aux$prior_autoscale_for_aux && !is.na(user_prior_aux$prior_dist_name_for_aux) && (user_prior_aux$prior_scale_for_aux != adjusted_prior_aux_scale) if (has_predictors && user_prior$prior_dist_name %in% "t") { if (all(user_prior$prior_df == 1)) { user_prior$prior_dist_name <- "cauchy" } else { user_prior$prior_dist_name <- "student_t" } } if (has_intercept && user_prior_intercept$prior_dist_name_for_intercept %in% "t") { if (all(user_prior_intercept$prior_df_for_intercept == 1)) { user_prior_intercept$prior_dist_name_for_intercept <- "cauchy" } else { user_prior_intercept$prior_dist_name_for_intercept <- "student_t" } } if (user_prior_aux$prior_dist_name_for_aux %in% "t") { if (all(user_prior_aux$prior_df_for_aux == 1)) { user_prior_aux$prior_dist_name_for_aux <- "cauchy" } else { user_prior_aux$prior_dist_name_for_aux <- "student_t" } } prior_list <- list( prior = if (!has_predictors) NULL else with(user_prior, list( dist = prior_dist_name, location = prior_mean, scale = prior_scale, adjusted_scale = if (rescaled_coef) adjusted_prior_scale else NULL, df = if (prior_dist_name %in% c ("student_t", "hs", "hs_plus", "lasso", "product_normal")) prior_df else NULL )), prior_intercept = if (!has_intercept) NULL else with(user_prior_intercept, list( dist = prior_dist_name_for_intercept, location = prior_mean_for_intercept, scale = prior_scale_for_intercept, adjusted_scale = if (rescaled_int) adjusted_prior_intercept_scale else NULL, df = if (prior_dist_name_for_intercept %in% "student_t") prior_df_for_intercept else NULL )) ) if (length(user_prior_covariance)) prior_list$prior_covariance <- user_prior_covariance aux_name <- .rename_aux(family) prior_list$prior_aux <- if (is.na(aux_name)) NULL else with(user_prior_aux, list( dist = prior_dist_name_for_aux, location = if (!is.na(prior_dist_name_for_aux) && prior_dist_name_for_aux != "exponential") prior_mean_for_aux else NULL, scale = if (!is.na(prior_dist_name_for_aux) && prior_dist_name_for_aux != "exponential") prior_scale_for_aux else NULL, adjusted_scale = if (rescaled_aux) adjusted_prior_aux_scale else NULL, df = if (!is.na(prior_dist_name_for_aux) && prior_dist_name_for_aux %in% "student_t") prior_df_for_aux else NULL, rate = if (!is.na(prior_dist_name_for_aux) && prior_dist_name_for_aux %in% "exponential") 1 / prior_scale_for_aux else NULL, aux_name = aux_name )) return(prior_list) } # rename aux parameter based on family .rename_aux <- function(family) { fam <- family$family if (is.gaussian(fam)) "sigma" else if (is.gamma(fam)) "shape" else if (is.ig(fam)) "lambda" else if (is.nb(fam)) "reciprocal_dispersion" else NA } .sample_indices <- function(wts, n_draws) { ## Stratified resampling ## Kitagawa, G., Monte Carlo Filter and Smoother for Non-Gaussian ## Nonlinear State Space Models, Journal of Computational and ## Graphical Statistics, 5(1):1-25, 1996. K <- length(wts) w <- n_draws * wts # expected number of draws from each model idx <- rep(NA, n_draws) c <- 0 j <- 0 for (k in 1:K) { c <- c + w[k] if (c >= 1) { a <- floor(c) c <- c - a idx[j + 1:a] <- k j <- j + a } if (j < n_draws && c >= runif(1)) { c <- c - 1 j <- j + 1 idx[j] <- k } } return(idx) } rstanarm/R/log_lik.R0000644000176200001440000010231714406606742014062 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #' Pointwise log-likelihood matrix #' #' For models fit using MCMC only, the \code{log_lik} method returns the #' \eqn{S} by \eqn{N} pointwise log-likelihood matrix, where \eqn{S} is the size #' of the posterior sample and \eqn{N} is the number of data points, or in the #' case of the \code{stanmvreg} method (when called on \code{\link{stan_jm}} #' model objects) an \eqn{S} by \eqn{Npat} matrix where \eqn{Npat} is the number #' of individuals. #' #' @aliases log_lik #' @export #' #' @templateVar stanregArg object #' @template args-stanreg-object #' @template args-dots-ignored #' @param newdata An optional data frame of new data (e.g. holdout data) to use #' when evaluating the log-likelihood. See the description of \code{newdata} #' for \code{\link{posterior_predict}}. #' @param offset A vector of offsets. Only required if \code{newdata} is #' specified and an \code{offset} was specified when fitting the model. #' #' @return For the \code{stanreg} and \code{stanmvreg} methods an \eqn{S} by #' \eqn{N} matrix, where \eqn{S} is the size of the posterior sample and #' \eqn{N} is the number of data points. For the \code{stanjm} method #' an \eqn{S} by \eqn{Npat} matrix where \eqn{Npat} is the number of individuals. #' #' #' @examples #' if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { #' \donttest{ #' roaches$roach100 <- roaches$roach1 / 100 #' fit <- stan_glm( #' y ~ roach100 + treatment + senior, #' offset = log(exposure2), #' data = roaches, #' family = poisson(link = "log"), #' prior = normal(0, 2.5), #' prior_intercept = normal(0, 10), #' iter = 500, # just to speed up example, #' refresh = 0 #' ) #' ll <- log_lik(fit) #' dim(ll) #' all.equal(ncol(ll), nobs(fit)) #' #' # using newdata argument #' nd <- roaches[1:2, ] #' nd$treatment[1:2] <- c(0, 1) #' ll2 <- log_lik(fit, newdata = nd, offset = c(0, 0)) #' head(ll2) #' dim(ll2) #' all.equal(ncol(ll2), nrow(nd)) #' } #' } log_lik.stanreg <- function(object, newdata = NULL, offset = NULL, ...) { newdata <- validate_newdata(object, newdata, m = NULL) calling_fun <- as.character(sys.call(-1))[1] dots <- list(...) if (is.stanmvreg(object)) { m <- dots[["m"]] if (is.null(m)) STOP_arg_required_for_stanmvreg(m) if (!is.null(offset)) stop2("'offset' cannot be specified for stanmvreg objects.") } else { m <- NULL } newdata <- validate_newdata(object, newdata = newdata, m = m) args <- ll_args.stanreg(object, newdata = newdata, offset = offset, reloo_or_kfold = calling_fun %in% c("kfold", "reloo"), ...) fun <- ll_fun(object, m = m) if (is_clogit(object)) { out <- vapply( seq_len(args$N), FUN.VALUE = numeric(length = args$S), FUN = function(i) { as.vector(fun( draws = args$draws, data_i = args$data[args$data$strata == levels(args$data$strata)[i], , drop = FALSE] )) } ) return(out) } else { out <- vapply( seq_len(args$N), FUN = function(i) { as.vector(fun( data_i = args$data[i, , drop = FALSE], draws = args$draws )) }, FUN.VALUE = numeric(length = args$S) ) } if (is.null(newdata)) colnames(out) <- rownames(model.frame(object, m = m)) else colnames(out) <- rownames(newdata) return(out) } #' @rdname log_lik.stanreg #' @export #' @templateVar mArg m #' @template args-m #' log_lik.stanmvreg <- function(object, m = 1, newdata = NULL, ...) { validate_stanmvreg_object(object) out <- log_lik.stanreg(object, newdata = newdata, m = m, ...) return(out) } #' @rdname log_lik.stanreg #' @export #' @param newdataLong,newdataEvent Optional data frames containing new data #' (e.g. holdout data) to use when evaluating the log-likelihood for a #' model estimated using \code{\link{stan_jm}}. If the fitted model #' was a multivariate joint model (i.e. more than one longitudinal outcome), #' then \code{newdataLong} is allowed to be a list of data frames. If supplying #' new data, then \code{newdataEvent} should also include variables corresponding #' to the event time and event indicator as these are required for evaluating the #' log likelihood for the event submodel. For more details, see the description #' of \code{newdataLong} and \code{newdataEvent} for \code{\link{posterior_survfit}}. #' log_lik.stanjm <- function(object, newdataLong = NULL, newdataEvent = NULL, ...) { if (!used.sampling(object)) STOP_sampling_only("Pointwise log-likelihood matrix") validate_stanjm_object(object) M <- get_M(object) if ("m" %in% names(list(...))) stop("'m' should not be specified for stan_jm objects since the ", "log-likelihood is calculated for the full joint model.") if (!identical(is.null(newdataLong), is.null(newdataEvent))) stop("Both newdataLong and newdataEvent must be supplied together.") if (!is.null(newdataLong)) { newdatas <- validate_newdatas(object, newdataLong, newdataEvent) newdataLong <- newdatas[1:M] newdataEvent <- newdatas[["Event"]] } pars <- extract_pars(object) # full array of draws data <- .pp_data_jm(object, newdataLong, newdataEvent) calling_fun <- as.character(sys.call(-1))[1] reloo_or_kfold <- calling_fun %in% c("kfold", "reloo") val <- .ll_jm(object, data, pars, reloo_or_kfold = reloo_or_kfold, ...) return(val) } # internal ---------------------------------------------------------------- # get log likelihood function for a particular model # @param x stanreg object # @return a function ll_fun <- function(x, m = NULL) { validate_stanreg_object(x) f <- family(x, m = m) if (!is(f, "family") || is_scobit(x)) return(.ll_polr_i) else if (is_clogit(x)) return(.ll_clogit_i) else if (is.nlmer(x)) return(.ll_nlmer_i) fun <- paste0(".ll_", family(x, m = m)$family, "_i") get(fun, mode = "function") } # get arguments needed for ll_fun # @param object stanreg object # @param newdata same as posterior predict # @param offset vector of offsets (only required if model has offset term and # newdata is specified) # @param m Integer specifying which submodel for stanmvreg objects # @param reloo_or_kfold logical. TRUE if ll_args is for reloo or kfold # @param ... For models without group-specific terms (i.e., not stan_[g]lmer), # if reloo_or_kfold is TRUE and 'newdata' is specified then ... is used to # pass 'newx' and 'stanmat' from reloo or kfold (bypassing pp_data). This is a # workaround in case there are issues with newdata containing factors with # only a single level. Or for stanmvreg objects, then ... can be used to pass # 'stanmat', which may be a matrix with a reduced number of draws (potentially # just a single MCMC draw). # @return a named list with elements data, draws, S (posterior sample size) and # N = number of observations ll_args <- function(object, ...) UseMethod("ll_args") ll_args.stanreg <- function(object, newdata = NULL, offset = NULL, m = NULL, reloo_or_kfold = FALSE, ...) { validate_stanreg_object(object) f <- family(object, m = m) draws <- nlist(f) has_newdata <- !is.null(newdata) dots <- list(...) z_betareg <- NULL if (has_newdata && reloo_or_kfold && !is.mer(object)) { x <- dots$newx z_betareg <- dots$newz # NULL except for some stan_betareg models if (!is.null(z_betareg)) { z_betareg <- as.matrix(z_betareg) } stanmat <- dots$stanmat form <- as.formula(formula(object)) # in case formula is string y <- eval(form[[2L]], newdata) } else if (has_newdata) { ppdat <- pp_data(object, as.data.frame(newdata), offset = offset, m = m) pp_eta_dat <- pp_eta(object, ppdat, m = m) eta <- pp_eta_dat$eta stanmat <- pp_eta_dat$stanmat z_betareg <- ppdat$z_betareg x <- ppdat$x form <- as.formula(formula(object, m = m)) y <- eval(form[[2L]], newdata) } else { stanmat <- as.matrix.stanreg(object) x <- get_x(object, m = m) y <- get_y(object, m = m) } if (is.stanmvreg(object) && !is.null(dots$stanmat)) { stanmat <- dots$stanmat # potentially use a stanmat with a single draw } if (!is.null(object$dropped_cols)) { x <- x[, !(colnames(x) %in% object$dropped_cols), drop = FALSE] } if (!is_polr(object)) { # not polr or scobit model fname <- f$family if (is.nlmer(object)) { draws <- list(mu = posterior_linpred(object, newdata = newdata), sigma = stanmat[,"sigma"]) data <- data.frame(y) data$offset <- if (has_newdata) offset else object$offset if (model_has_weights(object)) { data$weights <- object$weights } data$i_ <- seq_len(nrow(data)) # for nlmer need access to i inside .ll_nlmer_i return(nlist(data, draws, S = NROW(draws$mu), N = nrow(data))) } else if (!is.binomial(fname)) { data <- data.frame(y, x) if (!is.null(z_betareg)) { data <- cbind(data, z_betareg) } } else { if (NCOL(y) == 2L) { trials <- rowSums(y) y <- y[, 1L] } else if (is_clogit(object)) { if (has_newdata) strata <- eval(object$call$strata, newdata) else strata <- model.frame(object)[,"(weights)"] strata <- as.factor(strata) successes <- aggregate(y, by = list(strata), FUN = sum)$x formals(draws$f$linkinv)$g <- strata formals(draws$f$linkinv)$successes <- successes trials <- 1L } else { trials <- 1 if (is.factor(y)) y <- fac2bin(y) stopifnot(all(y %in% c(0, 1))) } data <- data.frame(y, trials, x) } nms <- if (is.stanmvreg(object)) collect_nms(colnames(stanmat), M = get_M(object), stub = get_stub(object)) else NULL beta_sel <- if (is.null(nms)) seq_len(ncol(x)) else nms$y[[m]] draws$beta <- stanmat[, beta_sel, drop = FALSE] m_stub <- get_m_stub(m, stub = get_stub(object)) if (is.gaussian(fname)) draws$sigma <- stanmat[, paste0(m_stub, "sigma")] if (is.gamma(fname)) draws$shape <- stanmat[, paste0(m_stub, "shape")] if (is.ig(fname)) draws$lambda <- stanmat[, paste0(m_stub, "lambda")] if (is.nb(fname)) draws$size <- stanmat[, paste0(m_stub, "reciprocal_dispersion")] if (is.beta(fname)) { draws$f_phi <- object$family_phi z_vars <- colnames(stanmat)[grepl("(phi)", colnames(stanmat))] if (length(z_vars) == 1 && z_vars == "(phi)") { draws$phi <- stanmat[, z_vars] } else { if (has_newdata) { if (!is.null(z_betareg)) { colnames(data) <- c("y", colnames(get_x(object)), paste0("(phi)_", colnames(z_betareg))) } } else { x_dat <- get_x(object) z_dat <- as.matrix(object$z) colnames(x_dat) <- colnames(x_dat) colnames(z_dat) <- paste0("(phi)_", colnames(z_dat)) data <- data.frame(y = get_y(object), cbind(x_dat, z_dat), check.names = FALSE) } draws$phi <- stanmat[,z_vars] } } } else { stopifnot(is_polr(object)) y <- as.integer(y) if (has_newdata) { x <- .validate_polr_x(object, x) } data <- data.frame(y, x) draws$beta <- stanmat[, colnames(x), drop = FALSE] zetas <- grep(pattern = if (length(unique(y)) == 2L) "(Intercept)" else "|", x = colnames(stanmat), fixed = TRUE, value = TRUE) draws$zeta <- stanmat[, zetas, drop = FALSE] draws$max_y <- max(y) if ("alpha" %in% colnames(stanmat)) { stopifnot(is_scobit(object)) # scobit draws$alpha <- stanmat[, "alpha"] draws$f <- object$method } } data$offset <- if (has_newdata) offset else object$offset if (model_has_weights(object)) { if (is.stanmvreg(object)) STOP_if_stanmvreg("posterior_survfit with weights") data$weights <- object$weights } if (is.mer(object)) { b_sel <- if (is.null(nms)) b_names(colnames(stanmat)) else nms$y_b[[m]] b <- stanmat[, b_sel, drop = FALSE] if (has_newdata) { Z_names <- ppdat$Z_names if (is.null(Z_names)) { b <- b[, !grepl("_NEW_", colnames(b), fixed = TRUE), drop = FALSE] } else { b <- pp_b_ord(b, Z_names) } if (is.null(ppdat$Zt)) z <- matrix(NA, nrow = nrow(x), ncol = 0) else z <- t(ppdat$Zt) } else { z <- get_z(object, m = m) } data <- cbind(data, as.matrix(z)[1:NROW(x),, drop = FALSE]) draws$beta <- cbind(draws$beta, b) } if (is_clogit(object)) { data$strata <- strata out <- nlist(data, draws, S = NROW(draws$beta), N = nlevels(strata)) } else { out <- nlist(data, draws, S = NROW(draws$beta), N = nrow(data)) } return(out) } # check intercept for polr models ----------------------------------------- # Check if a model fit with stan_polr has an intercept (i.e. if it's actually a # bernoulli model). If it doesn't have an intercept then the intercept column in # x is dropped. This is only necessary if newdata is specified because otherwise # the correct x is taken from the fitted model object. .validate_polr_x <- function(object, x) { x0 <- get_x(object) has_intercept <- colnames(x0)[1L] == "(Intercept)" if (!has_intercept && colnames(x)[1L] == "(Intercept)") x <- x[, -1L, drop = FALSE] x } # log-likelihood function helpers ----------------------------------------- .weighted <- function(val, w) { if (is.null(w)) { val } else { val * w } } .xdata <- function(data) { sel <- c("y", "weights","offset", "trials","strata") data[, -which(colnames(data) %in% sel)] } .mu <- function(data, draws) { eta <- as.vector(linear_predictor(draws$beta, .xdata(data), data$offset)) draws$f$linkinv(eta) } # for stan_betareg only .xdata_beta <- function(data) { sel <- c("y", "weights","offset", "trials") data[, -c(which(colnames(data) %in% sel), grep("(phi)_", colnames(data), fixed = TRUE))] } .zdata_beta <- function(data) { sel <- c("y", "weights","offset", "trials") data[, grep("(phi)_", colnames(data), fixed = TRUE)] } .mu_beta <- function(data, draws) { eta <- as.vector(linear_predictor(draws$beta, .xdata_beta(data), data$offset)) draws$f$linkinv(eta) } .phi_beta <- function(data, draws) { eta <- as.vector(linear_predictor(draws$phi, .zdata_beta(data), data$offset)) draws$f_phi$linkinv(eta) } # log-likelihood functions ------------------------------------------------ .ll_gaussian_i <- function(data_i, draws) { val <- dnorm(data_i$y, mean = .mu(data_i, draws), sd = draws$sigma, log = TRUE) .weighted(val, data_i$weights) } .ll_binomial_i <- function(data_i, draws) { val <- dbinom(data_i$y, size = data_i$trials, prob = .mu(data_i, draws), log = TRUE) .weighted(val, data_i$weights) } .ll_clogit_i <- function(data_i, draws) { eta <- linear_predictor(draws$beta, .xdata(data_i), data_i$offset) denoms <- apply(eta, 1, log_clogit_denom, N_j = NCOL(eta), D_j = sum(data_i$y)) rowSums(eta[,data_i$y == 1, drop = FALSE] - denoms) } .ll_poisson_i <- function(data_i, draws) { val <- dpois(data_i$y, lambda = .mu(data_i, draws), log = TRUE) .weighted(val, data_i$weights) } .ll_neg_binomial_2_i <- function(data_i, draws) { val <- dnbinom(data_i$y, size = draws$size, mu = .mu(data_i, draws), log = TRUE) .weighted(val, data_i$weights) } .ll_Gamma_i <- function(data_i, draws) { val <- dgamma(data_i$y, shape = draws$shape, rate = draws$shape / .mu(data_i,draws), log = TRUE) .weighted(val, data_i$weights) } .ll_inverse.gaussian_i <- function(data_i, draws) { mu <- .mu(data_i, draws) val <- 0.5 * log(draws$lambda / (2 * pi)) - 1.5 * log(data_i$y) - 0.5 * draws$lambda * (data_i$y - mu)^2 / (data_i$y * mu^2) .weighted(val, data_i$weights) } .ll_polr_i <- function(data_i, draws) { eta <- linear_predictor(draws$beta, .xdata(data_i), data_i$offset) f <- draws$f y_i <- data_i$y J <- ncol(draws$zeta) + 1 linkinv <- polr_linkinv(f) if (is.null(draws$alpha)) { if (y_i == 1) { val <- log(linkinv(draws$zeta[, 1] - eta)) } else if (y_i == J) { val <- log1p(-linkinv(draws$zeta[, J-1] - eta)) } else { val <- log(linkinv(draws$zeta[, y_i] - eta) - linkinv(draws$zeta[, y_i - 1L] - eta)) } } else { if (y_i == 0) { val <- draws$alpha * log(linkinv(draws$zeta[, 1] - eta)) } else if (y_i == 1) { val <- log1p(-linkinv(draws$zeta[, 1] - eta) ^ draws$alpha) } else { stop("Exponentiation only possible when there are exactly 2 outcomes.") } } .weighted(val, data_i$weights) } .ll_beta_i <- function(data_i, draws) { mu <- .mu_beta(data_i, draws) phi <- draws$phi if (length(grep("(phi)_", colnames(data_i), fixed = TRUE)) > 0) { phi <- .phi_beta(data_i, draws) } val <- dbeta(data_i$y, mu * phi, (1 - mu) * phi, log = TRUE) .weighted(val, data_i$weights) } .ll_nlmer_i <- function(data_i, draws) { i_ <- data_i$i_ val <- dnorm(data_i$y, mean = draws$mu[, i_], sd = draws$sigma, log = TRUE) .weighted(val, data_i$weights) } # log-likelihood functions for stanjm objects only ---------------------- # Alternative ll_args method for stanjm objects that allows data and pars to be # passed directly, rather than constructed using pp_data within the ll_args # method. This can be much faster when used in the MH algorithm within # posterior_survfit, since it doesn't require repeated calls to pp_data. # # @param object A stanmvreg object # @param data Output from .pp_data_jm # @param pars Output from extract_pars # @param m Integer specifying which submodel # @param reloo_or_kfold logical. TRUE if ll_args is for reloo or kfold ll_args.stanjm <- function(object, data, pars, m = 1, reloo_or_kfold = FALSE, ...) { validate_stanjm_object(object) if (model_has_weights(object)) STOP_if_stanmvreg("posterior_survfit or log_lik with weights") f <- family(object, m = m) fname <- f$family draws <- nlist(f) stanmat <- pars$stanmat # potentially a stanmat with a single draw nms <- collect_nms(colnames(stanmat), get_M(object)) if (is.jm(object)) { # for stan_jm models, log_lik is evaluated for the full # joint model, so data contains info on all submodels y <- data$y[[m]] x <- data$yX[[m]] z <- t(data$yZt[[m]]) Z_names <- data$yZ_names[[m]] offset <- data$yOffset[[m]] } else { # for stan_mvmer models, log_lik is only ever called for # one submodel at a time, so data is for one submodel y <- data$y x <- data$X z <- t(data$Zt) Z_names <- data$Z_names offset <- data$yOffset } if (!is.binomial(fname)) { if (!is.null(offset)) { dat <- data.frame(y, x, offset) } else { dat <- data.frame(y, x) } } else { if (NCOL(y) == 2L) { trials <- rowSums(y) y <- y[, 1L] } else { trials <- 1 if (is.factor(y)) y <- fac2bin(y) stopifnot(all(y %in% c(0, 1))) } if (!is.null(offset)) { dat <- data.frame(y, trials, x, offset) } else { dat <- data.frame(y, trials, x) } } dat <- cbind(dat, as.matrix(z)) draws$beta <- stanmat[, nms$y[[m]], drop = FALSE] m_stub <- get_m_stub(m) if (is.gaussian(fname)) draws$sigma <- stanmat[, paste0(m_stub, "sigma")] if (is.gamma(fname)) draws$shape <- stanmat[, paste0(m_stub, "shape")] if (is.ig(fname)) draws$lambda <- stanmat[, paste0(m_stub, "lambda")] if (is.nb(fname)) draws$size <- stanmat[, paste0(m_stub, "reciprocal_dispersion")] b <- stanmat[, nms$y_b[[m]], drop = FALSE] b <- pp_b_ord(b, Z_names) draws$beta <- cbind(draws$beta, b) nlist(data = dat, draws, S = NROW(draws$beta), N = nrow(dat)) } # Return log likelihood for full joint model # # @param object A stanmvreg object, or (when used in stan_jm function) a named list # with elements $basehaz, $family, $assoc # @param data Output from .pp_data_jm # @param pars Output from extract_pars # @param include_long A logical, if TRUE then the log likelihood for the # longitudinal submodels are included in the log likelihood calculation. # @param include_b A logical, if TRUE then the log likelihood for the random # effects distribution is also included in the log likelihood calculation. # @param sum A logical. If TRUE then the log likelihood is summed across all # individuals. If FALSE then the log likelihood is returned for each # individual (either as an S * Npat matrix, or a length Npat vector, depending # on the type of inputs to the pars argument). # @param ... Arguments passed to .ll_mvmer. Can include 'reloo_or_kfold' which is # a logical specifying whether the function calling ll_jm was reloo or kfold. # @return Either a matrix, a vector or a scalar, depending on the input types # and whether sum is set to TRUE. .ll_jm <- function(object, data, pars, include_long = TRUE, include_b = FALSE, sum = FALSE, ...) { M <- get_M(object) # Log likelihood for event submodel ll_event <- .ll_survival(object, data, pars) # Log likelihoods for longitudinal submodels if (include_long) { ll_long <- lapply(1:M, function(m) .ll_long(object, data, pars, m = m, ...)) } # Log likelihood for random effects submodel # NB this is only used in the Metropolis algorithm in 'posterior_survfit' # when drawing random effects for new individuals. But it is not used # in generating the pointwise log likelihood matrix under log_lik or loo. if (include_b) { if (length(object$cnms) > 2L) stop("Bug found: 'include_b' cannot be TRUE when there is more than ", "2 grouping factors.") if (length(object$cnms) == 2L && M > 1) stop("Bug found: 'include_b' cannot be TRUE when there is more than ", "one longitudinal submodel and more than one grouping factor.") if ((data$Npat > 1) || (nrow(pars$stanmat) > 1L)) stop("Bug found: 'include_b' can only be TRUE when 'data' is for one ", "individual, and stanmat is for a single draw.") id_var <- object$id_var cnms <- object$cnms Z_names <- fetch_(data$assoc_parts, "mod_eta", "Z_names") b <- do.call("cbind", pars$b) b <- as.vector(pp_b_ord(b, Z_names)) Sigma_id <- VarCorr(object, stanmat = pars$stanmat)[[id_var]] if (length(cnms) > 1L) { b2_var <- grep(utils::glob2rx(id_var), names(cnms), value = TRUE, invert = TRUE) Sigma_b2 <- VarCorr(object, stanmat = pars$stanmat)[[b2_var]] Sigma_list <- rep(list(Sigma_b2), data$Ni) which_slot <- which(names(cnms) == b2_var) if (which_slot == 1L) { Sigma_bind <- c(Sigma_list, list(Sigma_id)) } else { Sigma_bind <- c(list(Sigma_id), Sigma_list) } Sigma <- as.matrix(Matrix::bdiag(Sigma_bind)) } else { Sigma <- Sigma_id } ll_b <- -0.5 * (c(determinant(Sigma, logarithm = TRUE)$modulus) + (b %*% chol2inv(chol(Sigma)) %*% b)[1] + length(b) * log(2 * pi)) } else { ll_b <- NULL } # Check the dimensions of the various components if (is.matrix(ll_event)) { # S * Npat matrices if (include_long) { mats <- unique(sapply(c(ll_long, list(ll_event)), is.matrix)) dims <- unique(lapply(c(ll_long, list(ll_event)), dim)) if ((length(dims) > 1L) || (length(mats) > 1L)) stop("Bug found: elements of 'll_long' should be same class and ", "dimension as 'll_event'.") } if (include_b && !identical(length(ll_b), ncol(ll_event))) stop("Bug found: length of 'll_b' should be equal to the number of ", "columns in 'll_event'.") } else { # length Npat vectors (ie, log-lik based on a single draw of pars) if (include_long) { lens <- unique(sapply(c(ll_long, list(ll_event)), length)) if (length(lens) > 1L) stop("Bug found: elements of 'll_long' should be same length as 'll_event'.") } if (include_b && !identical(length(ll_b), length(ll_event))) stop("Bug found: length of 'll_b' should be equal to length of 'll_event'.") } # Sum the various components (long + event + random effects) if (include_long) { val <- Reduce('+', c(ll_long, list(ll_event))) } else { val <- ll_event } if (include_b && is.matrix(val)) { val <- sweep(val, 2L, ll_b, `+`) } else if (include_b && is.vector(val)) { val <- val + ll_b } # Return log likelihood for joint model if (!sum) { return(val) # S * Npat matrix or length Npat vector } else if (is.matrix(val)) { return(rowSums(val)) # length S vector } else { return(sum(val)) # scalar } } # Return log-likelihood for longitudinal submodel m # # @param object A stanjm object. # @param data Output from .pp_data_jm. # @param pars Output from extract_pars. # @param m Integer specifying the longitudinal submodel. # @param reloo_or_kfold Logical specifying whether the call came from # reloo or kfold. # @return An S*Npat matrix. .ll_long <- function(object, data, pars, m = 1, reloo_or_kfold = FALSE) { args <- ll_args.stanjm(object, data, pars, m = m, reloo_or_kfold = reloo_or_kfold) fun <- ll_fun(object, m = m) ll <- lapply(seq_len(args$N), function(j) as.vector( fun(data_i = args$data[j, , drop = FALSE], draws = args$draws))) ll <- do.call("cbind", ll) # return S*Npat matrix by summing log-lik for y within each individual res <- apply(ll, 1L, function(row) tapply(row, data$flist[[m]], sum)) res <- if (is.vector(res) & (args$S > 1L)) cbind(res) else t(res) return(res) } # Return survival probability or log-likelihood for event submodel # # @param object A stanjm object. # @param data Output from .pp_data_jm. # @param pars Output from extract_pars. # @param one_draw A logical specifying whether the parameters provided in the # pars argument are vectors for a single realisation of the parameter (e.g. # a single MCMC draw, or a posterior mean) (TRUE) or a stanmat array (FALSE). # @param survprob A logical specifying whether to return the survival probability # (TRUE) or the log likelihood for the event submodel (FALSE). # @param An S by Npat matrix, or a length Npat vector, depending on the inputs # (where S is the size of the posterior sample and Npat is the number of # individuals). .ll_survival <- function(object, data, pars, one_draw = FALSE, survprob = FALSE) { basehaz <- object$basehaz family <- object$family assoc <- object$assoc etimes <- attr(data$assoc_parts, "etimes") estatus <- attr(data$assoc_parts, "estatus") qnodes <- attr(data$assoc_parts, "qnodes") qtimes <- attr(data$assoc_parts, "qtimes") qwts <- attr(data$assoc_parts, "qwts") times <- c(etimes, qtimes) # To avoid an error in log(times) replace times equal to zero with a small # non-zero value. Note that these times correspond to individuals where the, # event time (etimes) was zero, and therefore the cumhaz (at baseline) will # be forced to zero for these individuals further down in the code anyhow. times[times == 0] <- 1E-10 # Linear predictor for the survival submodel e_eta <- linear_predictor(pars$ebeta, data$eXq) # Scaling parameter for linear predictor assoc_as_list <- apply(assoc, 2L, c) scale_assoc <- validate_scale_assoc(object$scale_assoc, assoc_as_list) # Add on contribution from assoc structure if (length(pars$abeta)) { M <- get_M(object) # Temporary stop, until make_assoc_terms can handle it sel_stop <- grep("^shared", rownames(object$assoc)) if (any(unlist(object$assoc[sel_stop,]))) stop("'log_lik' cannot yet be used with shared_b or shared_coef ", "association structures.", call. = FALSE) pars$b <- lapply(1:M, function(m) { b_m <- pars$b[[m]] Z_names_m <- data$assoc_parts[[m]][["mod_eta"]][["Z_names"]] pp_b_ord(if (is.matrix(b_m)) b_m else t(b_m), Z_names_m) }) if (one_draw) { aXq <- make_assoc_terms(parts = data$assoc_parts, assoc = assoc, family = family, beta = pars$beta, b = pars$b) e_eta <- e_eta + scale_assoc * linear_predictor.default(pars$abeta, aXq) } else { aXq <- make_assoc_terms(parts = data$assoc_parts, assoc = assoc, family = family, beta = pars$beta, b = pars$b) for (k in 1:length(aXq)) { e_eta <- e_eta + scale_assoc[k] * sweep(aXq[[k]], 1L, pars$abeta[,k], `*`) } } } # Log baseline hazard at etimes (if not NULL) and qtimes log_basehaz <- evaluate_log_basehaz(times = times, basehaz = basehaz, coefs = pars$bhcoef) # Log hazard at etimes (if not NULL) and qtimes log_haz <- log_basehaz + e_eta # Extract log hazard at qtimes only if (is.vector(log_haz)) { q_log_haz <- tail(log_haz, length(qtimes)) } else { sel_cols <- tail(1:ncol(log_haz), length(qtimes)) q_log_haz <- log_haz[, sel_cols, drop = FALSE] } # Evaluate log survival log_surv <- evaluate_log_survival(log_haz = q_log_haz, qnodes = qnodes, qwts = qwts) # Force surv prob to 1 (ie. log surv prob to 0) if evaluating # at time t = 0; this avoids possible numerical errors log_surv[etimes == 0] <- 0 # Possibly return surv prob at time t (upper limit of integral) if (survprob) return(exp(log_surv)) # Otherwise return log likelihood at time t if (is.null(etimes) || is.null(estatus)) stop("'etimes' and 'estatus' cannot be NULL if 'survprob = FALSE'.") times_length <- length(c(etimes, qtimes)) if (one_draw) { # return vector of length npat if (!length(log_haz) == times_length) stop2("Bug found: length of log_haz vector is incorrect.") e_log_haz <- log_haz[1:length(etimes)] return(estatus * e_log_haz + log_surv) } else { # return S * npat matrix if (!ncol(log_haz) == times_length) stop2("Bug found: number of cols in log_haz matrix is incorrect.") e_log_haz <- log_haz[, 1:length(etimes), drop = FALSE] return(sweep(e_log_haz, 2L, estatus, `*`) + log_surv) } } # Evaluate the log baseline hazard at the specified times # given the vector or matrix of MCMC draws for the baseline # hazard coeffients / parameters # # @param times A vector of times. # @param basehaz A list with info about the baseline hazard. # @param coefs A vector or matrix of parameter estimates (MCMC draws). # @return A vector or matrix, depending on the input type of coefs. evaluate_log_basehaz <- function(times, basehaz, coefs) { type <- basehaz$type_name if (type == "weibull") { X <- log(times) # log times B1 <- log(coefs) # log shape B2 <- coefs - 1 # shape - 1 log_basehaz <- as.vector(B1) + linear_predictor(B2,X) } else if (type == "bs") { X <- predict(basehaz$bs_basis, times) # b-spline basis B <- coefs # b-spline coefs log_basehaz <- linear_predictor(B,X) } else { stop2("Not yet implemented for basehaz = ", type) } log_basehaz } # Evaluate the log baseline hazard at the specified times # given the vector or matrix of MCMC draws for the baseline # hazard coeffients / parameters # # @param log_haz A vector containing the log hazard for each # individual, evaluated at each of the quadrature points. The # vector should be ordered such that the first N elements contain # the log_haz evaluated for each individual at quadrature point 1, # then the next N elements are the log_haz evaluated for each # individual at quadrature point 2, and so on. # @param qnodes Integer specifying the number of quadrature nodes # at which the log hazard was evaluated for each individual. # @param qwts A vector of unstandardised GK quadrature weights. # @return A vector or matrix of log survival probabilities. evaluate_log_survival <- function(log_haz, qnodes, qwts) { UseMethod("evaluate_log_survival") } evaluate_log_survival.default <- function(log_haz, qnodes, qwts) { # convert log hazard to hazard haz <- exp(log_haz) # apply GK quadrature weights weighted_haz <- qwts * haz # sum quadrature points for each individual to get cum_haz splitting_vec <- rep(1:qnodes, each = length(haz) / qnodes) cum_haz <- Reduce('+', split(weighted_haz, splitting_vec)) # return: -cum_haz == log survival probability -cum_haz } evaluate_log_survival.matrix <- function(log_haz, qnodes, qwts) { # convert log hazard to hazard haz <- exp(log_haz) # apply GK quadrature weights weighted_haz <- sweep(haz, 2L, qwts, `*`) # sum quadrature points for each individual to get cum_haz cum_haz <- Reduce('+', array2list(weighted_haz, nsplits = qnodes)) # return: -cum_haz == log survival probability -cum_haz } rstanarm/R/data_block.R0000644000176200001440000001222714406606742014525 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2013, 2014, 2015, 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # drop any column of x with < 2 unique values (empty interaction levels) # exception is column of 1s isn't dropped # @param x A design matrix # @param xbar Optionally a vector of column means for compatibility with center_x(). # @param warn Should warning be thrown if columns are dropped? # @return A list with updated x and xbar. drop_empty_levels <- function(x, xbar = NULL, warn = TRUE) { sel <- apply(x, 2L, function(w) length(w) > 1 && !all(w == 1) && length(unique(w)) < 2) if (any(sel)) { dropped_cols <- colnames(x)[sel] if (warn) { warning("Dropped empty interaction levels: ", paste(dropped_cols, collapse = ", "), call. = FALSE) } x <- x[, !sel, drop = FALSE] xbar <- xbar[!sel] } else { dropped_cols <- NULL } nlist(x, xbar, dropped_cols) } # Center a matrix x and return extra stuff # # @param x A design matrix # @param sparse A flag indicating whether x is to be treated as sparse center_x <- function(x, sparse) { x <- as.matrix(x) has_intercept <- if (ncol(x) == 0) FALSE else grepl("(Intercept", colnames(x)[1L], fixed = TRUE) xtemp <- if (has_intercept) x[, -1L, drop=FALSE] else x if (has_intercept && !sparse) { xbar <- colMeans(xtemp) xtemp <- sweep(xtemp, 2, xbar, FUN = "-") } else { xbar <- rep(0, ncol(xtemp)) } dropped <- drop_empty_levels(xtemp, xbar) nlist(xtemp = dropped$x, xbar = dropped$xbar, has_intercept, dropped_cols = dropped$dropped_cols) } # Deal with priors # # @param prior A list # @param nvars An integer indicating the number of variables # @param default_scale Default value to use to scale if not specified by user # @param link String naming the link function. # @param ok_dists A list of admissible distributions. handle_glm_prior <- function(prior, nvars, default_scale, link, ok_dists = nlist("normal", student_t = "t", "cauchy", "hs", "hs_plus", "laplace", "lasso", "product_normal")) { if (!length(prior)) return(list(prior_dist = 0L, prior_mean = as.array(rep(0, nvars)), prior_scale = as.array(rep(1, nvars)), prior_df = as.array(rep(1, nvars)), prior_dist_name = NA, global_prior_scale = 0, global_prior_df = 0, slab_df = 0, slab_scale = 0, prior_autoscale = FALSE)) if (!is.list(prior)) stop(sQuote(deparse(substitute(prior))), " should be a named list") prior_dist_name <- prior$dist prior_scale <- prior$scale prior_mean <- prior$location prior_df <- prior$df prior_mean[is.na(prior_mean)] <- 0 prior_df[is.na(prior_df)] <- 1 global_prior_scale <- 0 global_prior_df <- 0 slab_df <- 0 slab_scale <- 0 if (!prior_dist_name %in% unlist(ok_dists)) { stop("The prior distribution should be one of ", paste(names(ok_dists), collapse = ", ")) } else if (prior_dist_name %in% c("normal", "t", "cauchy", "laplace", "lasso", "product_normal")) { if (prior_dist_name == "normal") prior_dist <- 1L else if (prior_dist_name == "t") prior_dist <- 2L else if (prior_dist_name == "laplace") prior_dist <- 5L else if (prior_dist_name == "lasso") prior_dist <- 6L else if (prior_dist_name == "product_normal") prior_dist <- 7L prior_scale <- set_prior_scale(prior_scale, default = default_scale, link = link) } else if (prior_dist_name %in% c("hs", "hs_plus")) { prior_dist <- ifelse(prior_dist_name == "hs", 3L, 4L) global_prior_scale <- prior$global_scale global_prior_df <- prior$global_df slab_df <- prior$slab_df slab_scale <- prior$slab_scale } else if (prior_dist_name %in% "exponential") { prior_dist <- 3L # only used for scale parameters so 3 not a conflict with 3 for hs } prior_df <- maybe_broadcast(prior_df, nvars) prior_df <- as.array(pmin(.Machine$double.xmax, prior_df)) prior_mean <- maybe_broadcast(prior_mean, nvars) prior_mean <- as.array(prior_mean) prior_scale <- maybe_broadcast(prior_scale, nvars) nlist(prior_dist, prior_mean, prior_scale, prior_df, prior_dist_name, global_prior_scale, global_prior_df, slab_df, slab_scale, prior_autoscale = isTRUE(prior$autoscale)) } rstanarm/R/stan_glm.R0000644000176200001440000002711114370470372014242 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2013, 2014, 2015, 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #' Bayesian generalized linear models via Stan #' #' \if{html}{\figure{stanlogo.png}{options: width="25" alt="https://mc-stan.org/about/logo/"}} #' Generalized linear modeling with optional prior distributions for the #' coefficients, intercept, and auxiliary parameters. #' #' @export #' @templateVar armRef (Ch. 3-6) #' @templateVar pkg stats #' @templateVar pkgfun glm #' @templateVar sameargs model,offset,weights #' @templateVar rareargs na.action,contrasts #' @templateVar fun stan_glm, stan_glm.nb #' @templateVar fitfun stan_glm.fit #' @template return-stanreg-object #' @template return-stanfit-object #' @template see-also #' @template args-formula-data-subset #' @template args-same-as #' @template args-same-as-rarely #' @template args-dots #' @template args-prior_intercept #' @template args-priors #' @template args-prior_aux #' @template args-prior_PD #' @template args-algorithm #' @template args-adapt_delta #' @template args-QR #' @template args-sparse #' @template reference-gelman-hill #' @template reference-muth #' #' @param family Same as \code{\link[stats]{glm}}, except negative binomial GLMs #' are also possible using the \code{\link{neg_binomial_2}} family object. #' @param y In \code{stan_glm}, logical scalar indicating whether to #' return the response vector. In \code{stan_glm.fit}, a response vector. #' @param x In \code{stan_glm}, logical scalar indicating whether to #' return the design matrix. In \code{stan_glm.fit}, usually a design matrix #' but can also be a list of design matrices with the same number of rows, in #' which case the first element of the list is interpreted as the primary design #' matrix and the remaining list elements collectively constitute a basis for a #' smooth nonlinear function of the predictors indicated by the \code{formula} #' argument to \code{\link{stan_gamm4}}. #' @param mean_PPD A logical value indicating whether the sample mean of the #' posterior predictive distribution of the outcome should be calculated in #' the \code{generated quantities} block. If \code{TRUE} then \code{mean_PPD} #' is computed and displayed as a diagnostic in the #' \link[=print.stanreg]{printed output}. The default is \code{TRUE} except if #' \code{algorithm=="optimizing"}. A useful heuristic is to check if #' \code{mean_PPD} is plausible when compared to \code{mean(y)}. If it is #' plausible then this does \emph{not} mean that the model is good in general #' (only that it can reproduce the sample mean), but if \code{mean_PPD} is #' implausible then there may be something wrong, e.g., severe model #' misspecification, problems with the data and/or priors, computational #' issues, etc. #' #' @details The \code{stan_glm} function is similar in syntax to #' \code{\link[stats]{glm}} but rather than performing maximum likelihood #' estimation of generalized linear models, full Bayesian estimation is #' performed (if \code{algorithm} is \code{"sampling"}) via MCMC. The Bayesian #' model adds priors (independent by default) on the coefficients of the GLM. #' The \code{stan_glm} function calls the workhorse \code{stan_glm.fit} #' function, but it is also possible to call the latter directly. #' #' The \code{stan_glm.nb} function, which takes the extra argument #' \code{link}, is a wrapper for \code{stan_glm} with \code{family = #' \link{neg_binomial_2}(link)}. #' #' @seealso The various vignettes for \code{stan_glm} at #' \url{https://mc-stan.org/rstanarm/articles/}. #' #' @examples #' if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { #' ### Linear regression #' mtcars$mpg10 <- mtcars$mpg / 10 #' fit <- stan_glm( #' mpg10 ~ wt + cyl + am, #' data = mtcars, #' QR = TRUE, #' # for speed of example only (default is "sampling") #' algorithm = "fullrank", #' refresh = 0 #' ) #' #' plot(fit, prob = 0.5) #' plot(fit, prob = 0.5, pars = "beta") #' plot(fit, "hist", pars = "sigma") #' \donttest{ #' ### Logistic regression #' head(wells) #' wells$dist100 <- wells$dist / 100 #' fit2 <- stan_glm( #' switch ~ dist100 + arsenic, #' data = wells, #' family = binomial(link = "logit"), #' prior_intercept = normal(0, 10), #' QR = TRUE, #' refresh = 0, #' # for speed of example only #' chains = 2, iter = 200 #' ) #' print(fit2) #' prior_summary(fit2) #' #' # ?bayesplot::mcmc_areas #' plot(fit2, plotfun = "areas", prob = 0.9, #' pars = c("(Intercept)", "arsenic")) #' #' # ?bayesplot::ppc_error_binned #' pp_check(fit2, plotfun = "error_binned") #' #' #' ### Poisson regression (example from help("glm")) #' count_data <- data.frame( #' counts = c(18,17,15,20,10,20,25,13,12), #' outcome = gl(3,1,9), #' treatment = gl(3,3) #' ) #' fit3 <- stan_glm( #' counts ~ outcome + treatment, #' data = count_data, #' family = poisson(link="log"), #' prior = normal(0, 2), #' refresh = 0, #' # for speed of example only #' chains = 2, iter = 250 #' ) #' print(fit3) #' #' bayesplot::color_scheme_set("viridis") #' plot(fit3) #' plot(fit3, regex_pars = c("outcome", "treatment")) #' plot(fit3, plotfun = "combo", regex_pars = "treatment") # ?bayesplot::mcmc_combo #' posterior_vs_prior(fit3, regex_pars = c("outcome", "treatment")) #' #' ### Gamma regression (example from help("glm")) #' clotting <- data.frame(log_u = log(c(5,10,15,20,30,40,60,80,100)), #' lot1 = c(118,58,42,35,27,25,21,19,18), #' lot2 = c(69,35,26,21,18,16,13,12,12)) #' fit4 <- stan_glm( #' lot1 ~ log_u, #' data = clotting, #' family = Gamma(link="log"), #' iter = 500, # for speed of example only #' refresh = 0 #' ) #' print(fit4, digits = 2) #' #' fit5 <- update(fit4, formula = lot2 ~ log_u) #' #' # ?bayesplot::ppc_dens_overlay #' bayesplot::bayesplot_grid( #' pp_check(fit4, seed = 123), #' pp_check(fit5, seed = 123), #' titles = c("lot1", "lot2") #' ) #' #' #' ### Negative binomial regression #' fit6 <- stan_glm.nb( #' Days ~ Sex/(Age + Eth*Lrn), #' data = MASS::quine, #' link = "log", #' prior_aux = exponential(1.5, autoscale=TRUE), #' chains = 2, iter = 200, # for speed of example only #' refresh = 0 #' ) #' #' prior_summary(fit6) #' bayesplot::color_scheme_set("brightblue") #' plot(fit6) #' pp_check(fit6, plotfun = "hist", nreps = 5) # ?bayesplot::ppc_hist #' #' # 80% interval of estimated reciprocal_dispersion parameter #' posterior_interval(fit6, pars = "reciprocal_dispersion", prob = 0.8) #' plot(fit6, "areas", pars = "reciprocal_dispersion", prob = 0.8) #' } #' } stan_glm <- function(formula, family = gaussian(), data, weights, subset, na.action = NULL, offset = NULL, model = TRUE, x = FALSE, y = TRUE, contrasts = NULL, ..., prior = default_prior_coef(family), prior_intercept = default_prior_intercept(family), prior_aux = exponential(autoscale=TRUE), prior_PD = FALSE, algorithm = c("sampling", "optimizing", "meanfield", "fullrank"), mean_PPD = algorithm != "optimizing" && !prior_PD, adapt_delta = NULL, QR = FALSE, sparse = FALSE) { algorithm <- match.arg(algorithm) family <- validate_family(family) validate_glm_formula(formula) data <- validate_data(data, if_missing = environment(formula)) call <- match.call(expand.dots = TRUE) mf <- match.call(expand.dots = FALSE) m <- match(c("formula", "subset", "weights", "na.action", "offset"), table = names(mf), nomatch = 0L) mf <- mf[c(1L, m)] mf$data <- data mf$drop.unused.levels <- TRUE mf[[1L]] <- as.name("model.frame") mf <- eval(mf, parent.frame()) mf <- check_constant_vars(mf) mt <- attr(mf, "terms") Y <- array1D_check(model.response(mf, type = "any")) if (is.empty.model(mt)) stop("No intercept or predictors specified.", call. = FALSE) X <- model.matrix(mt, mf, contrasts) contrasts <- attr(X, "contrasts") weights <- validate_weights(as.vector(model.weights(mf))) offset <- validate_offset(as.vector(model.offset(mf)), y = Y) if (binom_y_prop(Y, family, weights)) { y1 <- as.integer(as.vector(Y) * weights) Y <- cbind(y1, y0 = weights - y1) weights <- double(0) } if (prior_PD) { # can result in errors (e.g. from poisson) if draws from prior are weird mean_PPD <- FALSE } stanfit <- stan_glm.fit( x = X, y = Y, weights = weights, offset = offset, family = family, prior = prior, prior_intercept = prior_intercept, prior_aux = prior_aux, prior_PD = prior_PD, algorithm = algorithm, mean_PPD = mean_PPD, adapt_delta = adapt_delta, QR = QR, sparse = sparse, ... ) if (algorithm != "optimizing" && !is(stanfit, "stanfit")) return(stanfit) if (family$family == "Beta regression") { family$family <- "beta" } sel <- apply(X, 2L, function(x) !all(x == 1) && length(unique(x)) < 2) X <- X[ , !sel, drop = FALSE] fit <- nlist(stanfit, algorithm, family, formula, data, offset, weights, x = X, y = Y, model = mf, terms = mt, call, na.action = attr(mf, "na.action"), contrasts = contrasts, stan_function = "stan_glm") out <- stanreg(fit) if (algorithm == "optimizing") { out$log_p <- stanfit$log_p out$log_g <- stanfit$log_g out$psis <- stanfit$psis out$ir_idx <- stanfit$ir_idx out$diagnostics <- stanfit$diagnostics } out$compute_mean_PPD <- mean_PPD out$xlevels <- .getXlevels(mt, mf) if (!x) out$x <- NULL if (!y) out$y <- NULL if (!model) out$model <- NULL return(out) } #' @rdname stan_glm #' @export #' @param link For \code{stan_glm.nb} only, the link function to use. See #' \code{\link{neg_binomial_2}}. #' stan_glm.nb <- function(formula, data, weights, subset, na.action = NULL, offset = NULL, model = TRUE, x = FALSE, y = TRUE, contrasts = NULL, link = "log", ..., prior = default_prior_coef(family), prior_intercept = default_prior_intercept(family), prior_aux = exponential(autoscale=TRUE), prior_PD = FALSE, algorithm = c("sampling", "optimizing", "meanfield", "fullrank"), mean_PPD = algorithm != "optimizing", adapt_delta = NULL, QR = FALSE) { if ("family" %in% names(list(...))) stop("'family' should not be specified.") mc <- call <- match.call() if (!"formula" %in% names(call)) names(call)[2L] <- "formula" mc[[1L]] <- quote(stan_glm) mc$link <- NULL mc$family <- neg_binomial_2(link = link) out <- eval(mc, parent.frame()) out$call <- call out$stan_function <- "stan_glm.nb" return(out) } rstanarm/R/prior_summary.R0000644000176200001440000004345114406606742015355 0ustar liggesusers#' Summarize the priors used for an rstanarm model #' #' The \code{prior_summary} method provides a summary of the prior distributions #' used for the parameters in a given model. In some cases the user-specified #' prior does not correspond exactly to the prior used internally by #' \pkg{rstanarm} (see the sections below). Especially in these cases, but also #' in general, it can be much more useful to visualize the priors. Visualizing #' the priors can be done using the \code{\link{posterior_vs_prior}} function, #' or alternatively by fitting the model with the \code{prior_PD} argument set #' to \code{TRUE} (to draw from the prior predictive distribution instead of #' conditioning on the outcome) and then plotting the parameters. #' #' @aliases prior_summary #' @export #' @templateVar stanregArg object #' @template args-stanreg-object #' @param digits Number of digits to use for rounding. #' @param ... Currently ignored by the method for stanreg objects. #' #' @section Intercept (after predictors centered): #' For \pkg{rstanarm} modeling functions that accept a \code{prior_intercept} #' argument, the specified prior for the intercept term applies to the #' intercept after \pkg{rstanarm} internally centers the predictors so they #' each have mean zero. The estimate of the intercept returned to the user #' correspond to the intercept with the predictors as specified by the user #' (unmodified by \pkg{rstanarm}), but when \emph{specifying} the prior the #' intercept can be thought of as the expected outcome when the predictors are #' set to their means. The only exception to this is for models fit with the #' \code{sparse} argument set to \code{TRUE} (which is only possible with a #' subset of the modeling functions and never the default). #' #' @section Adjusted scales: For some models you may see "\code{adjusted scale}" #' in the printed output and adjusted scales included in the object returned #' by \code{prior_summary}. These adjusted scale values are the prior scales #' actually used by \pkg{rstanarm} and are computed by adjusting the prior #' scales specified by the user to account for the scales of the predictors #' (as described in the documentation for the \code{\link[=priors]{autoscale}} #' argument). To disable internal prior scale adjustments set the #' \code{autoscale} argument to \code{FALSE} when setting a prior using one of #' the distributions that accepts an \code{autoscale} argument. For example, #' \code{normal(0, 5, autoscale=FALSE)} instead of just \code{normal(0, 5)}. #' #' @section Coefficients in Q-space: #' For the models fit with an \pkg{rstanarm} modeling function that supports #' the \code{QR} argument (see e.g, \code{\link{stan_glm}}), if \code{QR} is #' set to \code{TRUE} then the prior distributions for the regression #' coefficients specified using the \code{prior} argument are not relative to #' the original predictor variables \eqn{X} but rather to the variables in the #' matrix \eqn{Q} obtained from the \eqn{QR} decomposition of \eqn{X}. #' #' In particular, if \code{prior = normal(location,scale)}, then this prior on #' the coefficients in \eqn{Q}-space can be easily translated into a joint #' multivariate normal (MVN) prior on the coefficients on the original #' predictors in \eqn{X}. Letting \eqn{\theta} denote the coefficients on #' \eqn{Q} and \eqn{\beta} the coefficients on \eqn{X} then if \eqn{\theta #' \sim N(\mu, \sigma)}{\theta ~ N(\mu, \sigma)} the corresponding prior on #' \eqn{\beta} is \eqn{\beta \sim MVN(R\mu, R'R\sigma^2)}{\beta ~ MVN(R\mu, #' R'R\sigma)}, where \eqn{\mu} and \eqn{\sigma} are vectors of the #' appropriate length. Technically, \pkg{rstanarm} uses a scaled \eqn{QR} #' decomposition to ensure that the columns of the predictor matrix used to #' fit the model all have unit scale, when the \code{autoscale} argument #' to the function passed to the \code{prior} argument is \code{TRUE} (the #' default), in which case the matrices actually used are #' \eqn{Q^\ast = Q \sqrt{n-1}}{Q* = Q (n-1)^0.5} and \eqn{R^\ast = #' \frac{1}{\sqrt{n-1}} R}{R* = (n-1)^(-0.5) R}. If \code{autoscale = FALSE} #' we instead scale such that the lower-right element of \eqn{R^\ast}{R*} is #' \eqn{1}, which is useful if you want to specify a prior on the coefficient #' of the last predictor in its original units (see the documentation for the #' \code{\link[=stan_glm]{QR}} argument). #' #' If you are interested in the prior on \eqn{\beta} implied by the prior on #' \eqn{\theta}, we strongly recommend visualizing it as described above in #' the \strong{Description} section, which is simpler than working it out #' analytically. #' #' @return A list of class "prior_summary.stanreg", which has its own print #' method. #' #' @seealso The \link[=priors]{priors help page} and the \emph{Prior #' Distributions} vignette. #' #' @examples #' if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { #' if (!exists("example_model")) example(example_model) #' prior_summary(example_model) #' #' priors <- prior_summary(example_model) #' names(priors) #' priors$prior$scale #' priors$prior$adjusted_scale #' #' # for a glm with adjusted scales (see Details, above), compare #' # the default (rstanarm adjusting the scales) to setting #' # autoscale=FALSE for prior on coefficients #' fit <- stan_glm(mpg ~ wt + am, data = mtcars, #' prior = normal(0, c(2.5, 4)), #' prior_intercept = normal(0, 5), #' iter = 10, chains = 1) # only for demonstration #' prior_summary(fit) #' #' fit2 <- update(fit, prior = normal(0, c(2.5, 4), autoscale=FALSE), #' prior_intercept = normal(0, 5, autoscale=FALSE)) #' prior_summary(fit2) #' } prior_summary.stanreg <- function(object, digits = 2,...) { x <- object[["prior.info"]] if (is.null(x)) { message("Priors not found in stanreg object.") return(invisible(NULL)) } if (is.stanmvreg(object)) { M <- get_M(object) x <- structure(x, M = M) } structure(x, class = "prior_summary.stanreg", QR = used.QR(object), sparse = used.sparse(object), model_name = deparse(substitute(object)), stan_function = object$stan_function, print_digits = digits) } #' @export #' @method print prior_summary.stanreg print.prior_summary.stanreg <- function(x, digits, ...) { if (missing(digits)) digits <- attr(x, "print_digits") %ORifNULL% 2 .dig <- digits .fr2 <- function(y, .digits = .dig, ...) format(y, digits = .digits, ...) .fr3 <- function(y, .nsmall = .dig) .fr2(y, nsmall = .nsmall) formatters <- list(.fr2, .fr3) QR <- attr(x, "QR") sparse <- attr(x, "sparse") model_name <- attr(x, "model_name") stan_function <- attr(x, "stan_function") msg <- paste0("Priors for model '", model_name, "'") cat(msg, "\n------") if (!stan_function == "stan_mvmer") { if (!is.null(x[["prior_intercept"]])) .print_scalar_prior( x[["prior_intercept"]], txt = paste0("Intercept", if (!sparse) " (after predictors centered)"), formatters ) if (!is.null(x[["prior"]])) .print_vector_prior( x[["prior"]], txt = paste0("\nCoefficients", if (QR) " (in Q-space)"), formatters = formatters ) if (!is.null(x[["prior_aux"]])) { aux_name <- x[["prior_aux"]][["aux_name"]] aux_dist <- x[["prior_aux"]][["dist"]] if (aux_dist %in% c("normal", "student_t", "cauchy")) x[["prior_aux"]][["dist"]] <- paste0("half-", aux_dist) .print_scalar_prior( x[["prior_aux"]], txt = paste0("\nAuxiliary (", aux_name, ")"), formatters ) } } else { # unique to stan_mvmer M <- attr(x, "M") for (m in 1:M) { if (!is.null(x[["prior_intercept"]][[m]])) .print_scalar_prior( x[["prior_intercept"]][[m]], txt = paste0(if (m > 1) "\n", "y", m, "|Intercept", if (!sparse) " (after predictors centered)"), formatters ) if (!is.null(x[["prior"]][[m]])) .print_vector_prior( x[["prior"]][[m]], txt = paste0("\ny", m, "|Coefficients", if (QR) " (in Q-space)"), formatters = formatters ) if (!is.null(x[["prior_aux"]][[m]])) { aux_name <- x[["prior_aux"]][[m]][["aux_name"]] aux_dist <- x[["prior_aux"]][[m]][["dist"]] if (aux_dist %in% c("normal", "student_t", "cauchy")) x[["prior_aux"]][[m]][["dist"]] <- paste0("half-", aux_dist) .print_scalar_prior( x[["prior_aux"]][[m]], txt = paste0("\ny", m, "|Auxiliary (", aux_name, ")"), formatters ) } } } # unique to stan_betareg if (!is.null(x[["prior_intercept_z"]])) .print_scalar_prior( x[["prior_intercept_z"]], txt = paste0("\nIntercept_z", if (!sparse) " (after predictors centered)"), formatters ) if (!is.null(x[["prior_z"]])) .print_vector_prior(x[["prior_z"]], txt = "\nCoefficients_z", formatters) # unique to stan_jm if (stan_function == "stan_jm") { M <- attr(x, "M") for (m in 1:M) { if (!is.null(x[["priorLong_intercept"]][[m]])) .print_scalar_prior( x[["priorLong_intercept"]][[m]], txt = paste0(if (m > 1) "\n", "Long", m, "|Intercept", if (!sparse) " (after predictors centered)"), formatters ) if (!is.null(x[["priorLong"]][[m]])) .print_vector_prior( x[["priorLong"]][[m]], txt = paste0("\nLong", m, "|Coefficients", if (QR) " (in Q-space)"), formatters = formatters ) if (!is.null(x[["priorLong_aux"]][[m]])) { aux_name <- x[["priorLong_aux"]][[m]][["aux_name"]] aux_dist <- x[["priorLong_aux"]][[m]][["dist"]] if (aux_dist %in% c("normal", "student_t", "cauchy")) x[["priorLong_aux"]][[m]][["dist"]] <- paste0("half-", aux_dist) .print_scalar_prior( x[["priorLong_aux"]][[m]], txt = paste0("\nLong", m, "|Auxiliary (", aux_name, ")"), formatters ) } } if (!is.null(x[["priorEvent_intercept"]])) .print_scalar_prior( x[["priorEvent_intercept"]], txt = paste0("\nEvent|Intercept", if (!sparse) " (after predictors centered)"), formatters ) if (!is.null(x[["priorEvent"]])) .print_vector_prior( x[["priorEvent"]], txt = "\nEvent|Coefficients", formatters = formatters ) if (!is.null(x[["priorEvent_aux"]])) { aux_name <- x[["priorEvent_aux"]][["aux_name"]] aux_dist <- x[["priorEvent_aux"]][["dist"]] if ((aux_name == "weibull-shape") && (aux_dist %in% c("normal", "student_t", "cauchy"))) { # weibull x[["priorEvent_aux"]][["dist"]] <- paste0("half-", aux_dist) .print_scalar_prior( x[["priorEvent_aux"]], txt = paste0("\nEvent|Auxiliary (", aux_name, ")"), formatters ) } else { # bs or piecewise .print_vector_prior( x[["priorEvent_aux"]], txt = paste0("\nEvent|Auxiliary (", aux_name, ")"), formatters ) } } if (!is.null(x[["priorEvent_assoc"]])) .print_vector_prior( x[["priorEvent_assoc"]], txt = "\nAssociation parameters", formatters = formatters ) } # unique to stan_(g)lmer, stan_gamm4, stan_mvmer, or stan_jm if (!is.null(x[["prior_covariance"]])) .print_covariance_prior(x[["prior_covariance"]], txt = "\nCovariance", formatters) # unique to stan_polr if (!is.null(x[["prior_counts"]])) { p <- x[["prior_counts"]] p$concentration <- .format_pars(p$concentration, .fr2) cat("\n\nCounts\n ~", paste0(p$dist, "(", "concentration = ", .fr2(p$concentration), ")")) } if (!is.null(x[["scobit_exponent"]])) { p <- x[["scobit_exponent"]] cat("\n\nScobit Exponent\n ~", paste0(p$dist, "(shape = ", .fr2(p$shape), ", rate = ", .fr2(p$rate), ")")) } cat("\n------\n") cat("See help('prior_summary.stanreg') for more details\n") invisible(x) } # internal ---------------------------------------------------------------- # check if model was fit using QR=TRUE used.QR <- function(x) { isTRUE(getCall(x)[["QR"]]) } # check if model was fit using sparse=TRUE used.sparse <- function(x) { isTRUE(getCall(x)[["sparse"]]) } # # @param x numeric vector # @param formatter a formatting function to apply (see .fr2, .fr3 above) # @param N the maximum number of values to include before replacing the rest # with '...' .format_pars <- function(x, formatter, N = 3) { K <- length(x) if (K < 2) return(x) paste0( "[", paste(c(formatter(x[1:min(N, K)]), if (N < K) "..."), collapse = ","), "]" ) } # Print priors for intercept/coefs (called internally by print.prior_summary.stanreg) # # @param p named list of prior stuff # @param txt header to be printed # @param formatters a list of two formatter functions like .fr2, .fr3 (defined # in prior_summary.stanreg). The first is used for format all numbers except # for adjusted scales, for which the second function is used. This is kind of # hacky and should be replaced at some point. # .print_scalar_prior <- function(p, txt = "Intercept", formatters = list()) { stopifnot(length(formatters) == 2) .f1 <- formatters[[1]] .f2 <- formatters[[2]] .cat_scalar_prior <- function(p, adjusted = FALSE, prepend_chars = "\n ~") { if (adjusted) { p$scale <- p$adjusted_scale p$rate <- 1/p$adjusted_scale } cat(prepend_chars, if (is.na(p$dist)) { "flat" } else if (p$dist == "exponential") { paste0(p$dist,"(rate = ", .f1(p$rate), ")") } else { # normal, student_t, cauchy if (is.null(p$df)) { paste0(p$dist,"(location = ", .f1(p$location), ", scale = ", .f1(p$scale),")") } else { paste0(p$dist, "(df = ", .f1(p$df), ", location = ", .f1(p$location), ", scale = ", .f1(p$scale), ")") } } ) } cat(paste0("\n", txt)) if (is.null(p$adjusted_scale)) { .cat_scalar_prior(p, adjusted = FALSE) } else { cat("\n Specified prior:") .cat_scalar_prior(p, adjusted = FALSE, prepend_chars = "\n ~") cat("\n Adjusted prior:") .cat_scalar_prior(p, adjusted = TRUE, prepend_chars = "\n ~") } } .print_covariance_prior <- function(p, txt = "Covariance", formatters = list()) { if (p$dist == "decov") { .f1 <- formatters[[1]] p$regularization <- .format_pars(p$regularization, .f1) p$concentration <- .format_pars(p$concentration, .f1) p$shape <- .format_pars(p$shape, .f1) p$scale <- .format_pars(p$scale, .f1) cat(paste0("\n", txt, "\n ~"), paste0(p$dist, "(", "reg. = ", .f1(p$regularization), ", conc. = ", .f1(p$concentration), ", shape = ", .f1(p$shape), ", scale = ", .f1(p$scale), ")") ) } else if (p$dist == "lkj") { .f1 <- formatters[[1]] .f2 <- formatters[[2]] p$regularization <- .format_pars(p$regularization, .f1) p$df <- .format_pars(p$df, .f1) p$scale <- .format_pars(p$scale, .f1) if (!is.null(p$adjusted_scale)) p$adjusted_scale <- .format_pars(p$adjusted_scale, .f2) cat(paste0("\n", txt, "\n ~"), paste0(p$dist, "(", "reg. = ", .f1(p$regularization), ", df = ", .f1(p$df), ", scale = ", .f1(p$scale), ")") ) if (!is.null(p$adjusted_scale)) cat("\n **adjusted scale =", .f2(p$adjusted_scale)) } } .print_vector_prior <- function(p, txt = "Coefficients", formatters = list()) { stopifnot(length(formatters) == 2) .f1 <- formatters[[1]] .f2 <- formatters[[2]] if (!(p$dist %in% c("R2", NA))) { if (p$dist %in% c("normal", "student_t", "cauchy", "laplace", "lasso", "product_normal")) { p$location <- .format_pars(p$location, .f1) p$scale <- .format_pars(p$scale, .f1) if (!is.null(p$df)) p$df <- .format_pars(p$df, .f1) if (!is.null(p$adjusted_scale)) p$adjusted_scale <- .format_pars(p$adjusted_scale, .f2) } else if (p$dist %in% c("hs_plus")) { p$df1 <- .format_pars(p$df, .f1) p$df2 <- .format_pars(p$scale, .f1) } else if (p$dist %in% c("hs")) { p$df <- .format_pars(p$df, .f1) } else if (p$dist %in% c("product_normal")) p$df <- .format_pars(p$df, .f1) } .cat_vector_prior <- function(p, adjusted = FALSE, prepend_chars = "\n ~") { if (adjusted) { p$scale <- p$adjusted_scale } cat(prepend_chars, if (is.na(p$dist)) { "flat" } else if (p$dist %in% c("normal", "student_t", "cauchy", "laplace", "lasso", "product_normal")) { if (is.null(p$df)) { paste0(p$dist, "(location = ", .f1(p$location), ", scale = ", .f1(p$scale), ")") } else { paste0(p$dist, "(df = ", .f1(p$df), ", location = ", .f1(p$location), ", scale = ", .f1(p$scale),")") } } else if (p$dist %in% c("hs_plus")) { paste0("hs_plus(df1 = ", .f1(p$df1), ", df2 = ", .f1(p$df2), ")") } else if (p$dist %in% c("hs")) { paste0("hs(df = ", .f1(p$df), ")") } else if (p$dist %in% c("R2")) { paste0("R2(location = ", .f1(p$location), ", what = '", p$what, "')") }) } cat(paste0("\n", txt)) if (is.null(p$adjusted_scale)) { .cat_vector_prior(p, adjusted = FALSE) } else { cat("\n Specified prior:") .cat_vector_prior(p, adjusted = FALSE, prepend_chars = "\n ~") cat("\n Adjusted prior:") .cat_vector_prior(p, adjusted = TRUE, prepend_chars = "\n ~") } } rstanarm/R/simulate_b_pars.R0000644000176200001440000002123513365374540015613 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University # Copyright (C) 2016, 2017 Sam Brilleman # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # Draw new group-specific parameters # # Run a Metropolis-Hastings algorithm to draw group-specific parameters for new # groups conditional on new outcome data provided by the user. These parameters # are required for the so-called "dynamic predictions" relevant to joint modelling # of longitudinal and time-to-event data, whereby we wish to draw new group-specific # parameters that condition on longitudinal data observed up to the current time t. # # @param object A stanjm object. # @param stanmat Matrix of draws that are being used to generate the predictions. # @param ndL A list of data frames with each element containing the prediction data # for one longitudinal submodel. # @param ndE A data frame with the prediction data for the event submodel. # @param ids A vector of unique IDs for the individuals in the prediction data. # @param times A vector of last known survival times for the individuals in the # prediction data. simulate_b_pars <- function(object, stanmat, ndL, ndE, ids, times, scale = 1.5) { # Preliminaries and dimensions p <- .p(object) # num of b pars for each grouping factor has_two_grp_factors <- (length(object$cnms) > 1L) if (!has_two_grp_factors) { # one grouping factor b1_var <- object$id_var b1_p <- p[[b1_var]] # num of b pars for ID grouping factor } else { # more than one grouping factor if (get_M(object) > 1) STOP_dynpred("multivariate joint models with more than one grouping factor.") if (length(p) > 2L) STOP_dynpred("models with more than two grouping factors.") b1_var <- object$id_var b2_var <- grep(utils::glob2rx(b1_var), names(p), value = TRUE, invert = TRUE) b1_p <- p[[b1_var]] # num of b pars for ID grouping factor b2_p <- p[[b2_var]] # num of b pars for second grouping factor b2_n <- tapply(ndL[[1]][[b2_var]], ndL[[1]][[b1_var]], n_distinct) # num of unique levels for b2 within each ID } # Obtain a list with the posterior means for each parameter pars_means <- extract_pars(object, means = TRUE) # Simulate new b pars cat("Drawing new random effects for", length(ids), "individuals. ") cat("Monitoring progress:\n") pb <- utils::txtProgressBar(min = 0, max = length(ids), style = 3) acceptance_rate <- c() b_new <- list() for (i in 1:length(ids)) { if (!has_two_grp_factors) { # one grouping factor len <- b1_p } else { # more than one grouping factor len <- b1_p + b2_p * b2_n[ids[[i]]] } mat <- matrix(NA, nrow(stanmat), len) # Design matrices for individual i only dat_i <- .pp_data_jm(object, ndL, ndE, etimes = times[[i]], ids = ids[[i]]) if (has_two_grp_factors) { dat_i$Ni <- b2_n[ids[[i]]] } # Obtain mode and var-cov matrix of posterior distribution of new b pars # based on asymptotic assumptions, used as center and width of proposal # distribution in MH algorithm inits <- rep(0, len) val <- optim(inits, optim_fn, object = object, data = dat_i, pars = pars_means, method = "BFGS", hessian = TRUE) mu_i <- val$par # asymptotic mode of posterior sigma_i <- scale * solve(val$hessian) # (scaled) asymptotic vcov of posterior # Run MH algorithm for each individual b_current <- mu_i # asympotic mode used as init value for MH algorithm accept <- c() for (s in 1:nrow(stanmat)) { pars_s <- extract_pars(object, stanmat[s, , drop = FALSE]) b_step <- mh_step(b_old = b_current, mu = mu_i, sigma = sigma_i, df = 4, object = object, data = dat_i, pars = pars_s) accept[s] <- any(!b_step == b_current) mat[s,] <- b_current <- b_step } new_nms <- unlist(sapply(dat_i$assoc_parts, function(x) x$mod_eta$Z_names)) colnames(mat) <- paste0("b[", new_nms, "]") utils::setTxtProgressBar(pb, i) acceptance_rate[[paste0(object$id_var, ":", ids[i])]] <- mean(accept) b_new[[i]] <- mat } close(pb) # return stanmat with only the new b pars included b_new <- do.call("cbind", b_new) # cbind new b pars for all individuals sel <- b_names(colnames(stanmat)) # stanmat cols containing old b pars stanmat <- stanmat[, -sel, drop = F] # drop old b pars from stanmat stanmat <- cbind(stanmat, b_new) # add new b pars to stanmat structure(stanmat, b_new = b_new, acceptance_rate = acceptance_rate) } # The function to optimise, in order to obtain the asymptotic mode and var-cov # matrix of the posterior distribution for the new b pars # # @param b The vector of b parameters # @param object A stanjm object # @param data Output from .pp_data_jm # @param pars Output from extract_pars optim_fn <- function(b, object, data, pars) { nms <- lapply(data$assoc_parts, function(x) x$mod_eta$Z_names) pars <- substitute_b_pars(object, data, pars, new_b = b, new_Z_names = nms) ll <- .ll_jm(object, data, pars, include_b = TRUE) return(-ll) # optimise -ll for full joint model } # Perform one iteration of the Metropolis-Hastings algorithm # # @param b_old The current vector of b parameters # @param mu The mean vector for the proposal distribution # @param sigma The variance-covariance matrix for the proposal distribution # @param object A stanjm object # @param data Output from .pp_data_jm # @param pars Output from extract_pars mh_step <- function(b_old, mu, sigma, df, object, data, pars) { # New proposal for b vector b_new <- rmt(mu = mu, Sigma = sigma, df = df) # Calculate density for proposal distribution propdens_old <- dmt(x = b_old, mu = mu, Sigma = sigma, df = df) propdens_new <- dmt(x = b_new, mu = mu, Sigma = sigma, df = df) # Calculate density for target distribution nms <- lapply(data$assoc_parts, function(x) x$mod_eta$Z_names) pars_old <- substitute_b_pars(object, data, pars, new_b = b_old, new_Z_names = nms) pars_new <- substitute_b_pars(object, data, pars, new_b = b_new, new_Z_names = nms) targdens_old <- .ll_jm(object, data, pars_old, include_b = TRUE) targdens_new <- .ll_jm(object, data, pars_new, include_b = TRUE) # MH accept/reject step accept_ratio <- exp(targdens_new - targdens_old - propdens_new + propdens_old) if (accept_ratio >= runif(1)) return(b_new) else return(b_old) } # Function to add new b parameters to the stanmat # # @param object A stanjm object # @param data Output from .pp_data_jm # @param pars Output from extract_pars # @param new_b A vector of new b pars, or a list of vectors, with # each element being the new b pars for a single submodel. # @param new_Z_names A vector, or a list of vectors, with the names # for the new b pars. substitute_b_pars <- function(object, data, pars, new_b, new_Z_names) { M <- get_M(object) if (!is(new_b, "list")) { # split b into submodels if (M == 1) { new_b <- list(new_b) } else { y_cnms <- fetch(object$glmod, "z", "group_cnms") len_b <- sapply(y_cnms, function(x) length(unlist(x))) new_b <- split(new_b, rep(1:length(len_b), len_b)) } } if (!is(new_Z_names, "list")) { # split Z_names into submodels if (M == 1) { new_b <- list(new_b) } else { y_cnms <- fetch(object$glmod, "z", "group_cnms") len_b <- sapply(y_cnms, function(x) length(unlist(x))) new_Z_names <- split(new_Z_names, rep(1:length(len_b), len_b)) } } mapply(function(x, y) { if (!identical(is.vector(x), is.vector(y))) stop("Bug found: new_b and new_Z_names should both be vectors or lists of vectors.") if (!identical(length(x), length(y))) stop("Bug found: new_b and new_Z_names should be the same length.") }, new_b, new_Z_names) pars$b <- mapply(function(b, nms) { names(b) <- paste0("b[", nms, "]") t(b) }, new_b, new_Z_names, SIMPLIFY = FALSE) pars$stanmat <- pars$stanmat[, -b_names(colnames(pars$stanmat)), drop = FALSE] pars$stanmat <- do.call("cbind", c(list(pars$stanmat), pars$b)) return(pars) } rstanarm/R/doc-QR.R0000644000176200001440000000573414370470372013532 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #' The \code{QR} argument #' #' Details about the \code{QR} argument to \pkg{rstanarm}'s modeling #' functions. #' #' @name QR-argument #' @template reference-stan-manual #' #' @details The \code{QR} argument is a logical scalar defaulting to #' \code{FALSE}, but if \code{TRUE} applies a scaled \code{\link{qr}} #' decomposition to the design matrix, \eqn{X = Q^\ast R^\ast}{X = Q* R*}. #' If \code{autoscale = TRUE} (the default) #' in the call to the function passed to the \code{prior} argument, then #' \eqn{Q^\ast = Q \sqrt{n-1}}{Q* = Q (n-1)^0.5} and #' \eqn{R^\ast = \frac{1}{\sqrt{n-1}} R}{R* = (n-1)^(-0.5) R}. When #' \code{autoscale = FALSE}, \eqn{R} is scaled such that the lower-right #' element of \eqn{R^\ast}{R*} is \eqn{1}. #' #' The coefficients relative to \eqn{Q^\ast}{Q*} are obtained and then #' premultiplied by the inverse of \eqn{R^{\ast}}{R*} to obtain coefficients #' relative to the original predictors, \eqn{X}. Thus, when #' \code{autoscale = FALSE}, the coefficient on the last column of \eqn{X} #' is the same as the coefficient on the last column of \eqn{Q^\ast}{Q*}. #' #' These transformations do not change the likelihood of the data but are #' recommended for computational reasons when there are multiple predictors. #' Importantly, while the columns of \eqn{X} are almost generally correlated, #' the columns of \eqn{Q^\ast}{Q*} are uncorrelated by design, which often makes #' sampling from the posterior easier. However, because when \code{QR} is #' \code{TRUE} the \code{prior} argument applies to the coefficients relative to #' \eqn{Q^\ast}{Q*} (and those are not very interpretable), setting \code{QR=TRUE} #' is only recommended if you do not have an informative prior for the regression #' coefficients or if the only informative prior is on the last regression #' coefficient (in which case you should set \code{autoscale = FALSE} when #' specifying such priors). #' #' For more details see the Stan case study #' \emph{The QR Decomposition For Regression Models} at #' \url{https://mc-stan.org/users/documentation/case-studies/qr_regression.html}. #' NULL rstanarm/R/loo-prediction.R0000644000176200001440000001235314500326605015361 0ustar liggesusers#' Compute weighted expectations using LOO #' #' These functions are wrappers around the \code{\link[loo]{E_loo}} function #' (\pkg{loo} package) that provide compatibility for \pkg{rstanarm} models. #' #' @export #' @aliases loo_predict loo_linpred loo_predictive_interval #' #' @template reference-loo #' @template reference-bayesvis #' @templateVar stanregArg object #' @template args-stanreg-object #' @param psis_object An object returned by \code{\link[loo]{psis}}. If missing #' then \code{psis} will be run internally, which may be time consuming #' for models fit to very large datasets. #' @param ... Currently unused. #' @inheritParams loo::E_loo #' #' @return A list with elements \code{value} and \code{pareto_k}. #' #' For \code{loo_predict} and \code{loo_linpred} the value component is a #' vector with one element per observation. #' #' For \code{loo_predictive_interval} the \code{value} component is a matrix #' with one row per observation and two columns (like #' \code{\link{predictive_interval}}). \code{loo_predictive_interval(..., prob #' = p)} is equivalent to \code{loo_predict(..., type = "quantile", probs = #' c(a, 1-a))} with \code{a = (1 - p)/2}, except it transposes the result and #' adds informative column names. #' #' See \code{\link[loo]{E_loo}} and \code{\link[loo]{pareto-k-diagnostic}} for #' details on the \code{pareto_k} diagnostic. #' #' @examples #' if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { #' \dontrun{ #' if (!exists("example_model")) example(example_model) #' #' # optionally, log-weights can be pre-computed and reused #' psis_result <- loo::psis(log_ratios = -log_lik(example_model)) #' #' loo_probs <- loo_linpred(example_model, type = "mean", transform = TRUE, psis_object = psis_result) #' str(loo_probs) #' #' loo_pred_var <- loo_predict(example_model, type = "var", psis_object = psis_result) #' str(loo_pred_var) #' #' loo_pred_ints <- loo_predictive_interval(example_model, prob = 0.8, psis_object = psis_result) #' str(loo_pred_ints) #' } #' } loo_predict.stanreg <- function(object, type = c("mean", "var", "quantile"), probs = 0.5, ..., psis_object = NULL) { if ("lw" %in% names(list(...))) { stop( "Due to changes in the 'loo' package, the 'lw' argument ", "is no longer supported. Use the 'psis_object' argument instead." ) } type <- match.arg(type) log_ratios <- -log_lik(object) if (is.null(psis_object)) { message("Running PSIS to compute weights...") r_eff <- loo::relative_eff(exp(-log_ratios), chain_id = chain_id_for_loo(object)) psis_object <- loo::psis(log_ratios, r_eff = r_eff) } preds <- posterior_predict(object) if (is_polr(object) && !is_scobit(object)) { preds <- polr_yrep_to_numeric(preds) } loo::E_loo( x = preds, psis_object = psis_object, type = type, probs = probs, log_ratios = log_ratios ) } #' @rdname loo_predict.stanreg #' @export #' @param transform Passed to \code{\link{posterior_linpred}}. #' loo_linpred.stanreg <- function(object, type = c("mean", "var", "quantile"), probs = 0.5, transform = FALSE, ..., psis_object = NULL) { if ("lw" %in% names(list(...))) { stop( "Due to changes in the 'loo' package, the 'lw' argument ", "is no longer supported. Use the 'psis_object' argument instead." ) } type <- match.arg(type) log_ratios <- -log_lik(object) if (is.null(psis_object)) { message("Running PSIS to compute weights...") r_eff <- loo::relative_eff(exp(-log_ratios), chain_id = chain_id_for_loo(object)) psis_object <- loo::psis(log_ratios, r_eff = r_eff) } type <- match.arg(type) linpreds <- posterior_linpred(object, transform = transform) loo::E_loo( x = linpreds, psis_object = psis_object, type = type, probs = probs, log_ratios = log_ratios ) } #' @rdname loo_predict.stanreg #' @export #' @param prob For \code{loo_predictive_interval}, a scalar in \eqn{(0,1)} #' indicating the desired probability mass to include in the intervals. The #' default is \code{prob=0.9} (\eqn{90}\% intervals). loo_predictive_interval.stanreg <- function(object, prob = 0.9, ..., psis_object = NULL) { stopifnot(length(prob) == 1) alpha <- (1 - prob) / 2 probs <- c(alpha, 1 - alpha) labs <- paste0(100 * probs, "%") E_loo_result <- loo_predict.stanreg(object, type = "quantile", probs = probs, psis_object = psis_object, ...) intervals <- E_loo_result$value rownames(intervals) <- labs intervals <- t(intervals) list(value = intervals, pareto_k = E_loo_result$pareto_k) } # internal ---------------------------------------------------------------- psis.stanreg <- function(log_ratios, ...) { object <- log_ratios message("Running PSIS to compute weights...") ll <- log_lik(object) r_eff <- loo::relative_eff(exp(ll), chain_id = chain_id_for_loo(object)) loo::psis(-ll, r_eff = r_eff, ...) } rstanarm/R/doc-rstanarm-deprecated.R0000644000176200001440000000725213365374540017135 0ustar liggesusers#' Deprecated functions #' #' These functions are deprecated and will be removed in a future release. The #' \strong{Arguments} section below provides details on how the functionality #' obtained via each of the arguments has been replaced. #' #' @name rstanarm-deprecated #' NULL #' @rdname rstanarm-deprecated #' @export #' @param prior_scale_for_dispersion,min_prior_scale,scaled Arguments to #' deprecated \code{prior_options} function. The functionality provided #' by the now deprecated \code{prior_options} function has been replaced #' as follows: #' \describe{ #' \item{\code{prior_scale_for_dispersion}}{ #' Instead of using the \code{prior_scale_for_dispersion} argument to #' \code{prior_options}, priors for these parameters can now be #' specified directly when calling \code{\link{stan_glm}} (or #' \code{\link{stan_glmer}}, etc.) using the new \code{prior_aux} #' argument. #' } #' \item{\code{scaled}}{ #' Instead of setting \code{prior_options(scaled=FALSE)}, internal rescaling #' is now toggled using the new \code{autoscale} arguments to #' \code{\link{normal}}, \code{\link{student_t}}, and \code{\link{cauchy}} #' (the other prior distributions do not support 'autoscale'). #' } #' \item{\code{min_prior_scale}}{ #' No replacement. \code{min_prior_scale} (the minimum possible scale #' parameter value that be used for priors) is now fixed to \code{1e-12}. #' } #' } #' prior_options <- function(prior_scale_for_dispersion = 5, min_prior_scale = 1e-12, scaled = TRUE) { warning( "'prior_options' is deprecated and will be removed in a future release.", "\n* Priors for auxiliary parameters should now be set using", " the new 'prior_aux' argument when calling ", "'stan_glm', 'stan_glmer', etc.", "\n* Instead of setting 'prior_options(scaled=FALSE)',", " internal rescaling is now toggled using the", " new 'autoscale' argument to 'normal', 'student_t', or 'cauchy'", " (the other prior distributions do not support 'autoscale').", call. = FALSE ) validate_parameter_value(prior_scale_for_dispersion) validate_parameter_value(min_prior_scale) out <- nlist(scaled, min_prior_scale, prior_scale_for_dispersion) structure(out, from_prior_options = TRUE) } # function used in stan_glm.fit to preserve backwards compatibility. # should be removed when prior_options is officially removed .support_deprecated_prior_options <- function(prior, prior_intercept, prior_aux, prior_ops) { if (!isTRUE(attr(prior_ops, "from_prior_options"))) stop( "The 'prior_ops' argument must be a call to 'prior_options'. ", "But 'prior_options' is deprecated and will be removed in a future release. ", "See help('rstanarm-deprecated') for details on the functionality ", "that replaces 'prior_options'.", call. = FALSE ) po_disp_scale <- prior_ops[["prior_scale_for_dispersion"]] po_scaled <- prior_ops[["scaled"]] if (!is.null(prior_aux) && !is.null(po_disp_scale)) { if (po_disp_scale != prior_aux[["scale"]]) { warning( "Setting prior scale for aux to value specified in ", "'prior_options' rather than value specified in 'prior_aux'.", call. = FALSE ) prior_aux[["scale"]] <- po_disp_scale } } if (!is.null(po_scaled) && identical(po_scaled, FALSE)) { if (isTRUE(prior$dist %in% c("normal", "t"))) prior$autoscale <- FALSE if (!is.null(prior_intercept)) prior_intercept$autoscale <- FALSE } nlist(prior, prior_intercept, prior_aux) } rstanarm/R/stan_lm.R0000644000176200001440000002075714370470372014104 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2013, 2014, 2015, 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #' Bayesian regularized linear models via Stan #' #' \if{html}{\figure{stanlogo.png}{options: width="25" alt="https://mc-stan.org/about/logo/"}} #' Bayesian inference for linear modeling with regularizing priors on the model #' parameters that are driven by prior beliefs about \eqn{R^2}, the proportion #' of variance in the outcome attributable to the predictors. See #' \code{\link{priors}} for an explanation of this critical point. #' \code{\link{stan_glm}} with \code{family="gaussian"} also estimates a linear #' model with normally-distributed errors and allows for various other priors on #' the coefficients. #' #' @export #' @templateVar fun stan_lm, stan_aov #' @templateVar fitfun stan_lm.fit or stan_lm.wfit #' @templateVar pkg stats #' @templateVar pkgfun lm #' @templateVar rareargs model,offset,weights #' @templateVar rareargs2 na.action,singular.ok,contrasts #' @template return-stanreg-object #' @template return-stanfit-object #' @template args-formula-data-subset #' @template args-same-as-rarely #' @template args-same-as-rarely-2 #' @template args-x-y #' @template args-dots #' @template args-prior_PD #' @template args-algorithm #' @template args-adapt_delta #' #' @param w Same as in \code{lm.wfit} but rarely specified. #' @param prior Must be a call to \code{\link{R2}} with its #' \code{location} argument specified or \code{NULL}, which would #' indicate a standard uniform prior for the \eqn{R^2}. #' @param prior_intercept Either \code{NULL} (the default) or a call to #' \code{\link{normal}}. If a \code{\link{normal}} prior is specified #' without a \code{scale}, then the standard deviation is taken to be #' the marginal standard deviation of the outcome divided by the square #' root of the sample size, which is legitimate because the marginal #' standard deviation of the outcome is a primitive parameter being #' estimated. #' #' \strong{Note:} If using a dense representation of the design matrix #' ---i.e., if the \code{sparse} argument is left at its default value of #' \code{FALSE}--- then the prior distribution for the intercept is set so it #' applies to the value \emph{when all predictors are centered}. If you prefer #' to specify a prior on the intercept without the predictors being #' auto-centered, then you have to omit the intercept from the #' \code{\link[stats]{formula}} and include a column of ones as a predictor, #' in which case some element of \code{prior} specifies the prior on it, #' rather than \code{prior_intercept}. Regardless of how #' \code{prior_intercept} is specified, the reported \emph{estimates} of the #' intercept always correspond to a parameterization without centered #' predictors (i.e., same as in \code{glm}). #' #' #' @details The \code{stan_lm} function is similar in syntax to the #' \code{\link[stats]{lm}} function but rather than choosing the parameters to #' minimize the sum of squared residuals, samples from the posterior #' distribution are drawn using MCMC (if \code{algorithm} is #' \code{"sampling"}). The \code{stan_lm} function has a formula-based #' interface and would usually be called by users but the \code{stan_lm.fit} #' and \code{stan_lm.wfit} functions might be called by other functions that #' parse the data themselves and are analogous to \code{lm.fit} #' and \code{lm.wfit} respectively. #' #' In addition to estimating \code{sigma} --- the standard deviation of the #' normally-distributed errors --- this model estimates a positive parameter #' called \code{log-fit_ratio}. If it is positive, the marginal posterior #' variance of the outcome will exceed the sample variance of the outcome #' by a multiplicative factor equal to the square of \code{fit_ratio}. #' Conversely if \code{log-fit_ratio} is negative, then the model underfits. #' Given the regularizing nature of the priors, a slight underfit is good. #' #' Finally, the posterior predictive distribution is generated with the #' predictors fixed at their sample means. This quantity is useful for #' checking convergence because it is reasonably normally distributed #' and a function of all the parameters in the model. #' #' The \code{stan_aov} function is similar to \code{\link[stats]{aov}}, but #' does a Bayesian analysis of variance that is basically equivalent to #' \code{stan_lm} with dummy variables. \code{stan_aov} has a somewhat #' customized \code{\link{print}} method that prints an ANOVA-like table in #' addition to the output printed for \code{stan_lm} models. #' #' #' @references #' Lewandowski, D., Kurowicka D., and Joe, H. (2009). Generating random #' correlation matrices based on vines and extended onion method. #' \emph{Journal of Multivariate Analysis}. \strong{100}(9), 1989--2001. #' #' @seealso #' The vignettes for \code{stan_lm} and \code{stan_aov}, which have more #' thorough descriptions and examples. #' \url{https://mc-stan.org/rstanarm/articles/} #' #' Also see \code{\link{stan_glm}}, which --- if \code{family = #' gaussian(link="identity")} --- also estimates a linear model with #' normally-distributed errors but specifies different priors. #' #' #' @examples #' if (.Platform$OS.type != "windows" || .Platform$r_arch !="i386") { #' (fit <- stan_lm(mpg ~ wt + qsec + am, data = mtcars, prior = R2(0.75), #' # the next line is only to make the example go fast enough #' chains = 1, iter = 300, seed = 12345, refresh = 0)) #' plot(fit, "hist", pars = c("wt", "am", "qsec", "sigma"), #' transformations = list(sigma = "log")) #' } stan_lm <- function(formula, data, subset, weights, na.action, model = TRUE, x = FALSE, y = FALSE, singular.ok = TRUE, contrasts = NULL, offset, ..., prior = R2(stop("'location' must be specified")), prior_intercept = NULL, prior_PD = FALSE, algorithm = c("sampling", "meanfield", "fullrank"), adapt_delta = NULL) { algorithm <- match.arg(algorithm) validate_glm_formula(formula) data <- validate_data(data, if_missing = environment(formula)) call <- match.call(expand.dots = TRUE) mf <- match.call(expand.dots = FALSE) mf[[1L]] <- as.name("lm") mf$data <- data mf$x <- mf$y <- mf$singular.ok <- TRUE mf$qr <- FALSE mf$prior <- mf$prior_intercept <- mf$prior_PD <- mf$algorithm <- mf$adapt_delta <- NULL mf$method <- "model.frame" modelframe <- suppressWarnings(eval(mf, parent.frame())) mt <- attr(modelframe, "terms") Y <- model.response(modelframe, "numeric") X <- model.matrix(mt, modelframe, contrasts) w <- as.vector(model.weights(modelframe)) offset <- as.vector(model.offset(modelframe)) stanfit <- stan_lm.wfit(y = Y, x = X, w, offset, singular.ok = singular.ok, prior = prior, prior_intercept = prior_intercept, prior_PD = prior_PD, algorithm = algorithm, adapt_delta = adapt_delta, ...) if (algorithm != "optimizing" && !is(stanfit, "stanfit")) return(stanfit) fit <- nlist(stanfit, family = gaussian(), formula, offset, weights = w, x = X[,intersect(colnames(X), dimnames(stanfit)[[3]]), drop = FALSE], y = Y, data = data, prior.info = prior, algorithm, call, terms = mt, model = if (model) modelframe else NULL, na.action = attr(modelframe, "na.action"), contrasts = attr(X, "contrasts"), stan_function = "stan_lm") out <- stanreg(fit) out$xlevels <- .getXlevels(mt, modelframe) if (!x) out$x <- NULL if (!y) out$y <- NULL return(out) } rstanarm/R/pp_check.R0000644000176200001440000003447714551535205014225 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # #' Graphical posterior predictive checks #' #' Interface to the \link[bayesplot:PPC-overview]{PPC} (posterior predictive checking) module #' in the \pkg{\link{bayesplot}} package, providing various plots comparing the #' observed outcome variable \eqn{y} to simulated datasets \eqn{y^{rep}}{yrep} #' from the posterior predictive distribution. The \code{pp_check} method for #' \link{stanreg-objects} prepares the arguments required for the specified #' \pkg{bayesplot} PPC plotting function and then calls that function. It is #' also straightforward to use the functions from the \pkg{bayesplot} package #' directly rather than via the \code{pp_check} method. Examples of both are #' given below. #' #' @export #' @export pp_check #' @aliases pp_check #' @method pp_check stanreg #' @templateVar bdaRef (Ch. 6) #' @templateVar stanregArg object #' @template reference-bda #' @template reference-bayesvis #' @template args-stanreg-object #' @param plotfun A character string naming the \pkg{bayesplot} #' \link[bayesplot:PPC-overview]{PPC} function to use. The default is to call #' \code{\link[bayesplot:PPC-distributions]{ppc_dens_overlay}}. \code{plotfun} can be specified #' either as the full name of a \pkg{bayesplot} plotting function (e.g. #' \code{"ppc_hist"}) or can be abbreviated to the part of the name following #' the \code{"ppc_"} prefix (e.g. \code{"hist"}). To get the names of all #' available PPC functions see \code{\link[bayesplot]{available_ppc}}. #' @param nreps The number of \eqn{y^{rep}}{yrep} datasets to generate from the #' \link[=posterior_predict]{posterior predictive distribution} and show in #' the plots. The default depends on \code{plotfun}. For functions that plot #' each \code{yrep} dataset separately (e.g. \code{ppc_hist}), \code{nreps} #' defaults to a small value to make the plots readable. For functions that #' overlay many \code{yrep} datasets (e.g., \code{ppc_dens_overlay}) a larger #' number is used by default, and for other functions (e.g. \code{ppc_stat}) #' the default is to set \code{nreps} equal to the posterior sample size. #' @param ... Additonal arguments passed to the \pkg{\link{bayesplot}} function #' called. For many plotting functions \code{...} is optional, however for #' functions that require a \code{group} or \code{x} argument, these arguments #' should be specified in \code{...}. If specifying \code{group} and/or #' \code{x}, they can be provided as either strings naming variables (in which #' case they are searched for in the model frame) or as vectors containing the #' actual values of the variables. See the \strong{Examples} section, below. #' @param seed An optional \code{\link[=set.seed]{seed}} to pass to #' \code{\link{posterior_predict}}. #' #' @return \code{pp_check} returns a ggplot object that can be further #' customized using the \pkg{ggplot2} package. #' #' @note For binomial data, plots of \eqn{y} and \eqn{y^{rep}}{yrep} show the #' proportion of 'successes' rather than the raw count. Also for binomial #' models see \code{\link[bayesplot:PPC-errors]{ppc_error_binned}} for binned residual #' plots. #' #' @seealso #' \itemize{ #' \item The vignettes in the \pkg{bayesplot} package for many examples. #' Examples of posterior predictive checks can also be found in the #' \pkg{rstanarm} vignettes and demos. #' \item \code{\link[bayesplot]{PPC-overview}} (\pkg{bayesplot}) for links to #' the documentation for all the available plotting functions. #' \item \code{\link{posterior_predict}} for drawing from the posterior #' predictive distribution. #' \item \code{\link[bayesplot:bayesplot-colors]{color_scheme_set}} to change the color scheme #' of the plots. #' } #' #' @examples #' if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { #' fit <- stan_glmer( #' mpg ~ wt + am + (1|cyl), #' data = mtcars, #' iter = 400, # iter and chains small just to keep example quick #' chains = 2, #' refresh = 0 #' ) #' #' # Compare distribution of y to distributions of multiple yrep datasets #' pp_check(fit) #' pp_check(fit, plotfun = "boxplot", nreps = 10, notch = FALSE) #' pp_check(fit, plotfun = "hist", nreps = 3) #' #' \donttest{ #' # Same plot (up to RNG noise) using bayesplot package directly #' bayesplot::ppc_hist(y = mtcars$mpg, yrep = posterior_predict(fit, draws = 3)) #' #' # Check histograms of test statistics by level of grouping variable 'cyl' #' pp_check(fit, plotfun = "stat_grouped", stat = "median", group = "cyl") #' #' # Defining a custom test statistic #' q25 <- function(y) quantile(y, probs = 0.25) #' pp_check(fit, plotfun = "stat_grouped", stat = "q25", group = "cyl") #' #' # Scatterplot of two test statistics #' pp_check(fit, plotfun = "stat_2d", stat = c("mean", "sd")) #' #' # Scatterplot of y vs. average yrep #' pp_check(fit, plotfun = "scatter_avg") # y vs. average yrep #' # Same plot (up to RNG noise) using bayesplot package directly #' bayesplot::ppc_scatter_avg(y = mtcars$mpg, yrep = posterior_predict(fit)) #' #' # Scatterplots of y vs. several individual yrep datasets #' pp_check(fit, plotfun = "scatter", nreps = 3) #' #' # Same plot (up to RNG noise) using bayesplot package directly #' bayesplot::ppc_scatter(y = mtcars$mpg, yrep = posterior_predict(fit, draws = 3)) #' #' # yrep intervals with y points overlaid #' # by default 1:length(y) used on x-axis but can also specify an x variable #' pp_check(fit, plotfun = "intervals") #' pp_check(fit, plotfun = "intervals", x = "wt") + ggplot2::xlab("wt") #' #' # Same plot (up to RNG noise) using bayesplot package directly #' bayesplot::ppc_intervals(y = mtcars$mpg, yrep = posterior_predict(fit), #' x = mtcars$wt) + ggplot2::xlab("wt") #' #' # predictive errors #' pp_check(fit, plotfun = "error_hist", nreps = 6) #' pp_check(fit, plotfun = "error_scatter_avg_vs_x", x = "wt") + #' ggplot2::xlab("wt") #' #' # Example of a PPC for ordinal models (stan_polr) #' fit2 <- stan_polr(tobgp ~ agegp, data = esoph, method = "probit", #' prior = R2(0.2, "mean"), init_r = 0.1, #' refresh = 0) #' pp_check(fit2, plotfun = "bars", nreps = 500, prob = 0.5) #' pp_check(fit2, plotfun = "bars_grouped", group = esoph$agegp, #' nreps = 500, prob = 0.5) #' } #' } pp_check.stanreg <- function(object, plotfun = "dens_overlay", nreps = NULL, seed = NULL, ...) { if (used.optimizing(object)) STOP_not_optimizing("pp_check") if (is.stanmvreg(object)) { dots <- list(...) m <- dots[["m"]] if (is.null(m)) stop("Argument 'm' must be provided for stanmvreg objects.") } else m <- NULL plotfun_name <- .ppc_function_name(plotfun) plotfun <- get(plotfun_name, pos = asNamespace("bayesplot"), mode = "function") is_binomial_model <- is_binomial_ppc(object, m = m) y_yrep <- .ppc_y_and_yrep( object, seed = seed, nreps = .set_nreps(nreps, fun = plotfun_name), binned_resid_plot = isTRUE(plotfun_name == "ppc_error_binned"), ... ) args <- .ppc_args( object, y = y_yrep[["y"]], yrep = y_yrep[["yrep"]], fun = plotfun_name, ... ) do.call(plotfun, args) } # internal ---------------------------------------------------------------- # check if binomial is_binomial_ppc <- function(object, ...) { if (is_polr(object) && !is_scobit(object)) { FALSE } else { is.binomial(family(object, ...)$family) } } # prepare y and yrep arguments to bayesplot function .ppc_y_and_yrep <- function(object, nreps = NULL, seed = NULL, binned_resid_plot = FALSE, ...) { y <- get_y(object, ...) if (binned_resid_plot) { yrep <- posterior_epred(object, ...) yrep <- yrep[1:nreps, , drop = FALSE] } else { yrep <- posterior_predict(object, draws = nreps, seed = seed, ...) } if (is_binomial_ppc(object, ...)) { # includes stan_polr's scobit models if (NCOL(y) == 2L) { trials <- rowSums(y) y <- y[, 1L] / trials if (!binned_resid_plot) yrep <- sweep(yrep, 2L, trials, "/") } else if (is.factor(y)) y <- fac2bin(y) } else if (is_polr(object)) { # excluding scobit y <- as.integer(y) yrep <- polr_yrep_to_numeric(yrep) } nlist(y, yrep) } # prepare 'group' and 'x' variable for certain plots .ppc_xvar <- .ppc_groupvar <- function(object, var = NULL, ...) { if (is.null(var) || !is.character(var)) return(var) mf <- model.frame(object, ...) vars <- colnames(mf) if (var %in% vars) return(mf[, var]) stop("Variable '", var, "' not found in model frame. ") } # # @param fun user's plotfun argument .ppc_function_name <- function(fun = character()) { if (!length(fun)) stop("Plotting function not specified.", call. = FALSE) if (identical(substr(fun, 1, 5), "mcmc_")) stop( "For 'mcmc_' functions use the 'plot' ", "method instead of 'pp_check'.", call. = FALSE ) if (!identical(substr(fun, 1, 4), "ppc_")) fun <- paste0("ppc_", fun) if (fun == "ppc_loo_pit") { warning( "'ppc_loo_pit' is deprecated. ", "Use 'ppc_loo_pit_overlay' or 'ppc_loo_pit_qq' instead.", call.=FALSE ) fun <- "ppc_loo_pit_qq" } if (!fun %in% bayesplot::available_ppc()) stop( fun, " is not a valid PPC function name.", " Use bayesplot::available_ppc() for a list of available PPC functions." ) return(fun) } # prepare all arguments to pass to bayesplot function # @param object user's object # @param y,yrep returned by .ppc_y_and_yrep # @param fun string returned by .ppc_function_name # @param ... user's ... # @return named list # .ppc_args <- function(object, y, yrep, fun, ...) { funname <- fun fun <- match.fun(fun) dots <- list(...) dots[["y"]] <- as.numeric(y) dots[["yrep"]] <- yrep argnames <- names(formals(fun)) if (is.stanmvreg(object)) { m <- dots[["m"]] if (is.null(m)) stop("Argument 'm' must be provided for stanmvreg objects.") dots[["m"]] <- NULL # don't return m as part of bayesplot arguments } else m <- NULL if ("group" %in% argnames) { groupvar <- dots[["group"]] %ORifNULL% stop("This PPC requires the 'group' argument.", call. = FALSE) dots[["group"]] <- .ppc_groupvar(object, groupvar, m = m) } if ("x" %in% argnames) { xvar <- dots[["x"]] if (!is.null(xvar)) { dots[["x"]] <- .ppc_xvar(object, xvar, m = m) } else { if (funname %in% c("ppc_intervals", "ppc_ribbon")) { message("'x' not specified in '...'. Using x=1:length(y).") dots[["x"]] <- seq_along(y) } else { stop("This PPC requires the 'x' argument.", call. = FALSE) } } } if ("psis_object" %in% argnames && is.null(dots[["psis_object"]])) { dots[["psis_object"]] <- psis.stanreg(object) } else if ("lw" %in% argnames && is.null(dots[["lw"]])) { # for LOO predictive checks dots[["lw"]] <- weights(psis.stanreg(object)) } return(dots) } # set default nreps value based on plot .set_nreps <- function(nreps = NULL, fun = character()) { fun <- sub("ppc_", "", fun) switch(fun, # DISTRIBUTIONS "dens_overlay" = nreps %ORifNULL% 50, "dens_overlay_grouped" = nreps %ORifNULL% 50, "ecdf_overlay" = nreps %ORifNULL% 50, "ecdf_overlay_grouped" = nreps %ORifNULL% 50, "hist" = nreps %ORifNULL% 8, "dens" = nreps %ORifNULL% 8, "boxplot" = nreps %ORifNULL% 8, "freqpoly" = nreps %ORifNULL% 8, "freqpoly_grouped" = nreps %ORifNULL% 3, "violin_grouped" = nreps, # NULL ok "km_overlay" = nreps %ORifNULL% 50, # PIT-ECDFs "pit_ecdf" = .ignore_nreps(nreps), "pit_ecdf_grouped" = .ignore_nreps(nreps), # PREDICTIVE ERRORS "error_binned" = nreps %ORifNULL% 3, "error_hist" = nreps %ORifNULL% 3, "error_hist_grouped" = nreps %ORifNULL% 3, "error_scatter" = nreps %ORifNULL% 3, "error_scatter_avg" = nreps, # NULL ok "error_scatter_avg_vs_x" = nreps, # NULL ok "error_scatter_avg_grouped" = nreps, # NULL ok # SCATTERPLOTS "scatter" = nreps %ORifNULL% 3, "scatter_avg" = nreps, # NULL ok "scatter_avg_grouped" = nreps, # NULL ok # TEST-STATISTICS "stat" = .ignore_nreps(nreps), "stat_2d" = .ignore_nreps(nreps), "stat_grouped" = .ignore_nreps(nreps), "stat_freqpoly" = .ignore_nreps(nreps), "stat_freqpoly_grouped" = .ignore_nreps(nreps), # INTERVALS "intervals" = .ignore_nreps(nreps), "intervals_grouped" = .ignore_nreps(nreps), "ribbon" = .ignore_nreps(nreps), "ribbon_grouped" = .ignore_nreps(nreps), # DISCRETE ONLY "rootogram" = nreps, # NULL ok "bars" = nreps, # NULL ok "bars_grouped" = nreps, # NULL ok # LOO PLOTS "loo_pit" = .ignore_nreps(nreps), "loo_pit_overlay" = .ignore_nreps(nreps), "loo_pit_qq" = .ignore_nreps(nreps), "loo_intervals" = .ignore_nreps(nreps), "loo_ribbon" = .ignore_nreps(nreps), # otherwise function not found stop( "Plotting function not supported. ", "(If the plotting function is included in the output from ", "bayesplot::available_ppc() then it should be available via pp_check ", "and this error is probably a bug.)" ) ) } .ignore_nreps <- function(nreps) { if (!is.null(nreps)) warning("'nreps' is ignored for this PPC", call. = FALSE) return(NULL) } # convert a character matrix (returned by posterior_predict for ordinal models) to a # numeric matrix # # @param yrep character matrix polr_yrep_to_numeric <- function(yrep) { apply(yrep, 2L, function(x) as.integer(as.factor(x))) } rstanarm/R/stan_clogit.R0000644000176200001440000002116614370470372014750 0ustar liggesusers# Part of the rstanarm package for estimating model parameters # Copyright (C) 2017 Trustees of Columbia University # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #' Conditional logistic (clogit) regression models via Stan #' #' \if{html}{\figure{stanlogo.png}{options: width="25" alt="https://mc-stan.org/about/logo/"}} #' A model for case-control studies with optional prior distributions for the #' coefficients, intercept, and auxiliary parameters. #' #' @export #' @templateVar pkg survival #' @templateVar pkgfun clogit #' @templateVar sameargs model,offset #' @templateVar rareargs na.action,contrasts #' @templateVar fun stan_clogit #' @templateVar fitfun stan_glm.fit #' @template return-stanreg-object #' @template see-also #' @template args-priors #' @template args-prior_PD #' @template args-algorithm #' @template args-adapt_delta #' @template args-QR #' @template args-sparse #' @template args-dots #' #' @param formula,data,subset,na.action,contrasts Same as for \code{\link[lme4]{glmer}}, #' except that any global intercept included in the formula will be dropped. #' \emph{We strongly advise against omitting the \code{data} argument}. Unless #' \code{data} is specified (and is a data frame) many post-estimation #' functions (including \code{update}, \code{loo}, \code{kfold}) are not #' guaranteed to work properly. #' @param strata A factor indicating the groups in the data where the number of #' successes (possibly one) is fixed by the research design. It may be useful #' to use \code{\link{interaction}} or \code{\link[survival]{strata}} to #' create this factor. However, the \code{strata} argument must not rely on #' any object besides the \code{data} \code{\link{data.frame}}. #' @param prior_covariance Cannot be \code{NULL} when lme4-style group-specific #' terms are included in the \code{formula}. See \code{\link{decov}} for #' more information about the default arguments. Ignored when there are no #' group-specific terms. #' #' @details The \code{stan_clogit} function is mostly similar in syntax to #' \code{\link[survival]{clogit}} but rather than performing maximum #' likelihood estimation of generalized linear models, full Bayesian #' estimation is performed (if \code{algorithm} is \code{"sampling"}) via #' MCMC. The Bayesian model adds priors (independent by default) on the #' coefficients of the GLM. #' #' The \code{data.frame} passed to the \code{data} argument must be sorted by #' the variable passed to the \code{strata} argument. #' #' The \code{formula} may have group-specific terms like in #' \code{\link{stan_glmer}} but should not allow the intercept to vary by the #' stratifying variable, since there is no information in the data with which #' to estimate such deviations in the intercept. #' #' @seealso The vignette for Bernoulli and binomial models, which has more #' details on using \code{stan_clogit}. #' \url{https://mc-stan.org/rstanarm/articles/} #' #' @examples #' if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { #' dat <- infert[order(infert$stratum), ] # order by strata #' post <- stan_clogit(case ~ spontaneous + induced + (1 | education), #' strata = stratum, #' data = dat, #' subset = parity <= 2, #' QR = TRUE, #' chains = 2, iter = 500) # for speed only #' #' nd <- dat[dat$parity > 2, c("case", "spontaneous", "induced", "education", "stratum")] #' # next line would fail without case and stratum variables #' pr <- posterior_epred(post, newdata = nd) # get predicted probabilities #' #' # not a random variable b/c probabilities add to 1 within strata #' all.equal(rep(sum(nd$case), nrow(pr)), rowSums(pr)) #' } #' @importFrom lme4 findbars stan_clogit <- function(formula, data, subset, na.action = NULL, contrasts = NULL, ..., strata, prior = normal(autoscale=TRUE), prior_covariance = decov(), prior_PD = FALSE, algorithm = c("sampling", "optimizing", "meanfield", "fullrank"), adapt_delta = NULL, QR = FALSE, sparse = FALSE) { algorithm <- match.arg(algorithm) data <- validate_data(data, if_missing = environment(formula)) call <- match.call(expand.dots = TRUE) mf <- match.call(expand.dots = FALSE) m <- match(c("formula", "data", "subset", "na.action", "strata"), table = names(mf), nomatch = 0L) mf <- mf[c(1L, m)] names(mf)[length(mf)] <- "weights" mf$data <- data err <- try(eval(mf$weights, data, enclos = NULL), silent = TRUE) if (inherits(err, "try-error")) { stop("the 'stratum' argument must be evaluatable solely within 'data'") } has_bars <- length(findbars(formula)) > 0 if (has_bars) { if (is.null(prior_covariance)) stop("'prior_covariance' can't be NULL.", call. = FALSE) mf[[1L]] <- quote(lme4::glFormula) mf$control <- make_glmerControl() glmod <- eval(mf, parent.frame()) X <- glmod$X mf <- glmod$fr Y <- mf[, as.character(glmod$formula[2L])] group <- glmod$reTrms group$strata <- glmod$strata <- as.factor(mf[,"(weights)"]) group$decov <- prior_covariance } else { validate_glm_formula(formula) mf[[1L]] <- as.name("model.frame") mf$drop.unused.levels <- TRUE mf <- eval(mf, parent.frame()) group <- list(strata = as.factor(mf[,"(weights)"])) mt <- attr(mf, "terms") X <- model.matrix(mt, mf, contrasts) Y <- array1D_check(model.response(mf, type = "any")) } contrasts <- attr(X, "contrasts") if (is.factor(Y)) { Y <- fac2bin(Y) } ord <- order(group$strata) if (any(diff(ord) <= 0)) { stop("Data must be sorted by 'strata' (in increasing order).") } offset <- model.offset(mf) %ORifNULL% double(0) weights <- double(0) mf <- check_constant_vars(mf) mt <- attr(mf, "terms") if (is.empty.model(mt)) stop("Predictors specified.", call. = FALSE) xint <- match("(Intercept)", colnames(X), nomatch = 0L) if (xint > 0L) { X <- X[, -xint, drop = FALSE] # I cannot remember why I was calling drop.terms() to get rid of the intercept # mt <- drop.terms(mt, dropx = xint) attr(mt, "intercept") <- 0L } f <- binomial(link = "logit") stanfit <- stan_glm.fit(x = X, y = Y, weights = weights, offset = offset, family = f, prior = prior, prior_PD = prior_PD, algorithm = algorithm, adapt_delta = adapt_delta, group = group, QR = QR, sparse = sparse, ...) if (algorithm != "optimizing" && !is(stanfit, "stanfit")) return(stanfit) f$link <- "clogit" f$linkinv <- function(eta, g = group$strata, successes = aggregate(Y, by = list(g), FUN = sum)$x) { denoms <- unlist(lapply(1:length(successes), FUN = function(j) { mark <- g == levels(g)[j] log_clogit_denom(sum(mark), successes[j], eta[mark]) })) exp(eta - denoms[as.integer(g)]) } f$linkfun <- log f$mu.eta <- function(eta) stop("'mu.eta' should not have been called") fit <- nlist(stanfit, algorithm, family = f, formula, data, offset, weights, x = X, y = Y, model = mf, terms = mt, call, na.action = attr(mf, "na.action"), contrasts = contrasts, stan_function = "stan_clogit", glmod = if(has_bars) glmod) out <- stanreg(fit) out$xlevels <- .getXlevels(mt, mf) class(out) <- c(class(out), if(has_bars) "lmerMod", "clogit") return(out) } log_clogit_denom <- function(N_j, D_j, eta_j) { if (D_j == 1 && N_j == NROW(eta_j)) return(log_sum_exp(eta_j)); if (D_j == 0) return(0) if (N_j == D_j) { if (D_j == 1) return(eta_j[N_j]) return(sum(eta_j[(N_j - 1):(N_j + 1)])) } else { N_jm1 <- N_j - 1 return( log_sum_exp2(log_clogit_denom(N_jm1, D_j, eta_j), log_clogit_denom(N_jm1, D_j - 1, eta_j) + eta_j[N_j]) ) } } rstanarm/NEWS.md0000644000176200001440000003236214370470372013214 0ustar liggesusers# rstanarm 2.21.3 ### Bug fixes * Fix bug where `loo()` with `k_threshold` argument specified would error if the model formula was a string instead of a formula object. (#454) * Fix bug where `loo()` with `k_threshold` argument specified would error for models fit with `stan_polr()`. (#450) * Fix bug where `stan_aov()` would use the wrong `singular.ok` logic. (#448) * Fix bug where contrasts info was dropped when subsetting the model matrix in `stan_glm()`. (#459) * Fix bug where `stan_glmer()` would error if `prior_aux=NULL`. (#482) * `posterior_predict()` and `posterior_epred()` don't error with `newdata` for intercept only models by allowing data frames with 0 columns and multiple rows. (#492) ### New features * New vignette on AB testing. (#409) * `stan_jm()` gains an offset term for the longitudinal submodel. (#415, @pamelanluna) * Effective number of parameters are computed for K-fold CV not just LOO CV. (#462) * `stan_clogit()` now allows outcome variable to be a factor. (#520) # rstanarm 2.21.1 * Compatible with rstan v2.21.1 * Consistent with new book [Regression and Other Stories](https://statmodeling.stat.columbia.edu/2020/07/08/regression-and-other-stories-is-available/) ### Backwards incompatible changes * `stan_jm()` is not available for 32bit Windows * Some improvements to prior distributions, as described in detail in the vignette *Prior Distributions for rstanarm Models* and book *Regression and Other Stories*. These changes shouldn't cause any existing code to error, but default priors have changed in some cases: - default prior on intercept is still Gaussian but the way the location and scale are determined has been updated (#432) - `autoscale` argument to functions like `normal()`, `student_t()`, etc., now defaults to `FALSE` except when used by default priors (default priors still do autoscalinng). This makes it simpler to specify non-default priors. (#432) ### Bug fixes * Fixed error in `kfold()` for `stan_gamm4()` models that used `random` argument (#435) * Fixed error in `posterior_predict()` and `posterior_linpred()` when using `newdata` with `family = mgcv::betar` (#406, #407) * `singular.ok` now rules out singular design matrices in `stan_lm()` (#402) * Fix a potential error when `data` is a `data.table` object (#434, @danschrage) ### New functions * New method `posterior_epred()` returns the posterior distribution of the conditional expectation, which is equivalent to (and may eventually entirely replace) setting argument `transform=TRUE` with `posterior_linpred()`. (#432) * Added convenience functions `logit()` and `invlogit()` that are just wrappers for `qlogis()` and `plogis()`. These were previously provided by the `arm` package. (#432) # rstanarm 2.19.3 ### Bug fixes * Allow the vignettes to knit on platforms that do not support version 2 of RMarkdown # rstanarm 2.19.2 ### Bug fixes * src/Makevars{.win} now uses a more robust way to find StanHeaders * Fixed bug where `ranef()` and `coef()` methods for `glmer`-style models printed the wrong output for certain combinations of varying intercepts and slopes. * Fixed a bug where `posterior_predict()` failed for `stan_glmer()` models estimated with `family = mgcv::betar`. * Fixed bug in `bayes_R2()` for bernoulli models. (Thanks to @mcol) * `loo_R2()` can now be called on the same fitted model object multiple times with identical (not just up to rng noise) results. (Thanks to @mcol) ### New features and improvements * New vignette on doing MRP using rstanarm. (Thanks to @lauken13) * 4x speedup for most GLMs (`stan_glm()`) and GAMs (`stan_gamm4()` without `random` argument). This comes from using Stan's new compound `_glm` functions (`normal_id_glm`, `bernoulli_logit_glm`, `poisson_log_glm`, `neg_binomial_2_log_glm`) under the hood whenever possible. (Thanks to @avehtari and @VMatthijs) * `compare_models()` is deprecated in favor of `loo_compare()` to keep up with the loo package ([loo::loo_compare()](https://mc-stan.org/loo/reference/loo_compare)) * The `kfold()` method now has a `cores` argument and parallelizes by fold rather than by Markov chain (unless otherwise specified), which should be much more efficient when many cores are available. * For `stan_glm()` with `algorithm='optimizing'`, Pareto smoothed importance sampling ([arxiv.org/abs/1507.02646](https://arxiv.org/abs/1507.02646), [mc-stan.org/loo/reference/psis.html](https://mc-stan.org/loo/reference/psis.html)) is now used to diagnose and improve inference (see https://avehtari.github.io/RAOS-Examples/BigData/bigdata.html). This also now means that we can use PSIS-LOO also when `algorithm='optimizing'`. (Thanks to @avehtari) * For `stan_glm()` the `"meanfield"` and `"fullrank"` ADVI algorithms also include the PSIS diagnostics and adjustments, but so far we have not seen any example where these would be better than optimzation or MCMC. # rstanarm 2.18.1 ### Bug fixes * `stan_clogit()` now works even when there are no common predictors * `prior.info()` works better with models produced by `stan_jm()` and `stan_mvmer()` ### New features and improvements * `stan_glm()` (only) gets a `mean_PPD` argument that when `FALSE` avoids drawing from the posterior predictive distribution in the Stan code * `posterior_linpred()` now works even if the model was estimated with `algorithm = "optimizing"` # rstanarm 2.17.4 ### Bug fixes * `stan_jm()` and `stan_mvmer()` now correctly include the intercept in the longitudinal submodel ### New features and improvements * Compatible with **loo** package version `>= 2.0` * `QR = TRUE` no longer ignores the `autoscale` argument and has better behavior when `autoscale = FALSE` * `posterior_linpred()` now has a draws argument like for `posterior_predict()` * Dynamic predictions are now supported in `posterior_traj()` for `stan_jm` models. * More options for K-fold CV, including manually specifying the folds or using helper functions to create them for particular model/data combinations. # rstanarm 2.17.3 Minor release for build fixes for Solaris and avoiding a test failure # rstanarm 2.17.2 Lots of good stuff in this release. ### Bug fixes * `stan_polr()` and `stan_lm()` handle the `K = 1` case better ### Important user-facing improvements * The prior_aux arguments now defaults to exponential rather than Cauchy. This should be a safer default. * The Stan programs do not drop any constants and should now be safe to use with the **bridgesampling** package * `hs()` and `hs_plus()` priors have new defaults based on a new paper by Aki Vehtari and Juho Piironen * `stan_gamm4()` is now more closely based on `mgcv::jagam()`, which may affect some estimates but the options remain largely the same * The `product_normal()` prior permits `df = 1`, which is a product of ... one normal variate * The build system is more conventional now. It should require less RAM to build from source but it is slower unless you utilize parallel make and LTO ### Big new features * `stan_jm()` and `stan_mvmer()` contributed by Sam Brilleman * `bayes_R2()` method to calculate a quantity similar to $R^2$ * `stan_nlmer()`, which is similar to `lme4::nlmer` but watch out for multimodal posterior distributions * `stan_clogit()`, which is similar to `survival::clogit` but accepts lme4-style group-specific terms * The `mgcv::betar` family is supported for the lme4-like modeling functions, allowing for beta regressions with lme4-style group terms and / or smooth nonlinear functions of predictors # rstanarm 2.15.3 ### Bug fixes * Fix to `stan_glmer()` Bernoulli models with multiple group-specific intercept terms that could result in draws from the wrong posterior distribution * Fix bug with contrasts in `stan_aov()` (thanks to Henrik Singmann) * Fix bug with `na.action` in `stan_glmer()` (thanks to Henrik Singmann) # rstanarm 2.15.1 Minor release with only changes to allow tests to pass on CRAN # rstanrm 2.14.2 ### Bug fixes * Fix for intercept with identity or square root link functions for the auxiliary parameter of a beta regression * Fix for special case where only the intercepts vary by group and a non-default prior is specified for their standard deviation * Fix for off-by-one error in some lme4-style models with multiple grouping terms ### New features * New methods `loo_linpred()`, `loo_pit()`, `loo_predict()`, and `loo_predictive_interval()` * Support for many more plotfuns in `pp_check()` that are implemented in the **bayesplot** package * Option to compute latent residuals in `stan_polr()` (Thanks to Nate Sanders) * The pairs plot now uses the ggplot2 package # rstanarm 2.14.1 ### Bug fixes * `VarCorr()` could return duplicates in cases where a `stan_{g}lmer` model used grouping factor level names with spaces * `The pairs()` function now works with group-specific parameters * The `stan_gamm4()` function works better now * Fix a problem with factor levels after estimating a model via `stan_lm()` ### New features * New model-fitting function(s) `stan_betareg()` (and `stan_betareg.fit()`) that uses the same likelihoods as those supported by the `betareg()` function in the **betareg** package (Thanks to Imad Ali) * New choices for priors on coefficients: `laplace()`, `lasso()`, `product_normal()` * The `hs()` and `hs_plus()` priors now have new `global_df` and `global_scale` arguments * `stan_{g}lmer()` models that only have group-specific intercept shifts are considerably faster now * Models with Student t priors and low degrees of freedom (that are not 1, 2, or 4) may work better now due to Cornish-Fisher transformations * Many functions for priors have gained an `autoscale` argument that defaults to `TRUE` and indicates that rstanarm should make internal changes to the prior based on the scales of the variables so that they default priors are weakly informative * The new `compare_models()` function does more extensive checking that the models being compared are compatible ### Deprecated arguments * The `prior_ops` argument to various model fitting functions is deprecated and replaced by a the `prior_aux` argument for the prior on the auxiliary parameter of various GLM-like models # rstanarm 2.13.1 ### Bug fixes * Fix bug in `reloo()` if data was not specified * Fix bug in `pp_validate()` that was only introduced on GitHub ### New features * Uses the new **bayesplot** and **rstantools** R packages * The new `prior_summary()` function can be used to figure out what priors were actually used * `stan_gamm4()` is better implemented, can be followed by `plot_nonlinear()`, `posterior_predict()` (with newdata), etc. * Hyperparameters (i.e. covariance matrices in general) for lme4 style models are now returned by `as.matrix()` and `as.data.frame()` * `pp_validate()` can now be used if optimization or variational Bayesian inference was used to estimate the original model # rstanarm 2.12.1 ### Bug fixes * Fix for bad bug in `posterior_predict()` when factor labels have spaces in lme4-style models * Fix when weights are used in Poisson models ### New features * `posterior_linpred()` gains an `XZ` argument to output the design matrix # rstanarm 2.11.1 ### Bug fixes * Requiring manually specifying offsets when model has an offset and newdata is not NULL ### New features * `stan_biglm()` function that somewhat supports `biglm::biglm` * `as.array()` method for stanreg objects # rstanarm 2.10.1 ### Bug fixes * Works with devtools now ### New features * `k_threshold` argument to `loo()` to do PSIS-LOO+ * `kfold()` for K-fold CV * Ability to use sparse X matrices (slowly) for many models if memory is an issue ### rstanarm 2.9.0-4 ### Bug fixes * `posterior_predict()` with newdata now works correctly for ordinal models * `stan_lm()` now works when intercept is omitted * `stan_glmer.fit()` no longer permit models with duplicative group-specific terms since they don't make sense and are usually a mistake on the user's part * `posterior_predict()` with lme4-style models no longer fails if there are spaces or colons in the levels of the grouping variables * `posterior_predict()` with ordinal models outputs a character matrix now ### New features * `pp_validate()` function based on the BayesValidate package by Sam Cook * `posterior_vs_prior()` function to visualize the effect of conditioning on the data * Works (again) with R versions back to 3.0.2 (untested though) # rstanarm 2.9.0-3 ### Bug fixes * Fix problem with models that had group-specific coefficients, which were mislabled. Although the parameters were estimated correctly, users of previous versions of rstanarm should run such models again to obtain correct summaries and posterior predictions. Thanks to someone named Luke for pointing this problem out on stan-users. * Vignettes now view correctly on the CRAN webiste thanks to Yihui Xie * Fix problem with models without intercepts thanks to Paul-Christian Buerkner * Fix problem with specifying binomial 'size' for posterior_predict using newdata * Fix problem with lme4-style formulas that use the same grouping factor multiple times * Fix conclusion in rstanarm vignette thanks to someone named Michael ### New features * Group-specific design matrices are kept sparse throughout to reduce memory consumption * The `log_lik()` function now has a `newdata` argument * New vignette on hierarchical partial pooling # rstanarm 2.9.0-1 Initial CRAN release rstanarm/MD50000644000176200001440000006123614552326563012434 0ustar liggesusers6919925e09e0833154a1e88ab2bfd3ad *DESCRIPTION 9a6db979d83d1bf6665ce91bc0680fea *NAMESPACE 410816177fc9b2c7649f3ab1f46bd844 *NEWS.md d99de6d1db07f50dfb2d86eaabec008b *R/as.matrix.stanreg.R cd858bfd290da5f036be1052babb060c *R/bayes_R2.R 5c872f46c1bcef72e9d613c639f9b606 *R/data_block.R 751146549bd4fe1df4945f4d6502e51a *R/doc-QR.R ec81fc4c442dd384d3b69926d94ecb16 *R/doc-adapt_delta.R abc0c3c61e07ce5787746194505885f1 *R/doc-algorithms.R 552dd7647f0694420710d85bcba40330 *R/doc-datasets.R 7905a941dec99f0790c5391ebd644419 *R/doc-example_jm.R dbc21cb0b145fae79c2d6bbe14ed9aa9 *R/doc-example_model.R 9d6421c1ebee874bf21a4ba571fcb3a3 *R/doc-modeling-functions.R 565702b6137181b5e11ed3a47219b8e2 *R/doc-rstanarm-deprecated.R 695567d997f03aefb0deee69c7717aec *R/doc-rstanarm-package.R 231699330e252997db2552fc68a9113f *R/draws.R 65579e6f135527acf3b93bef0f96b6d7 *R/jm_data_block.R 0dba9355a8ee55a953a3b4d3707f1314 *R/jm_make_assoc_parts.R cff2431b4c442ebc7b0afa69165e36d2 *R/jm_make_assoc_terms.R 956471cfb96db3dca7f2c23ae74f38bb *R/launch_shinystan.R 4ea2e5ea7a30f96d458dd2c27da396f6 *R/log_lik.R 0fd4a9c8c2a29e31b11e33bc6747e28e *R/loo-kfold.R 01ea5427282bd0ec30dc82ca3410cf8f *R/loo-prediction.R 3498577b20797af379452978bab17bdd *R/loo.R f8e76c85e32f46d2d21a17cb9fb5247a *R/misc.R 4ad533add7fc8fbf2740c303dd2b392c *R/neg_binomial_2.R 38364fb236b472255eab252bfab074cd *R/plots.R f85060759a99919459de05234ef857de *R/posterior_interval.R 995dd93f1a078f0cc1307abdd8a310cb *R/posterior_linpred.R 98ffc405670a4f9e97d3d2627e0d5457 *R/posterior_predict.R 5005dfc9af4b7269dadfa050ef3142b2 *R/posterior_survfit.R 4bf3d447862d8b4f134f00b171902927 *R/posterior_traj.R 10aa9729c0530818d2591879d31abf33 *R/posterior_vs_prior.R 169c81818aecdd2ee3c841357972d746 *R/pp_check.R 123a3486c40fb24555546c71c8b771d3 *R/pp_data.R a2f5afcb9e5342ce114e877c2b0a5774 *R/pp_validate.R 6a69ee38578ffffd1d8bd8d98932215c *R/predict.R 2eda4f65e06e33f59be2a1d949039d50 *R/predictive_error.R 60e0aa1be0860c800c3509b94d90457e *R/predictive_interval.R b4ed09aa2f9b3bb40d6d79e326e7d764 *R/print-and-summary.R a41bb76e32e39594c195867f3952e90f *R/prior_summary.R 7cc38ca22135bacc8d886e6cbe588513 *R/priors.R c120b4aa2985cbc7adb5f86d6e4d4701 *R/ps_check.R 3f87676fb7274f6d4c9000e28f477963 *R/simulate_b_pars.R 2b5c5328d5e2a874a140fec207261a32 *R/stan_aov.R fff3375340615f8e99d557630cb8bc24 *R/stan_betareg.R 0d2419a75c95739fbc8dc0fda020fd0b *R/stan_betareg.fit.R aa056e355ddb7e4a48e21a362473a90c *R/stan_biglm.R 0604c5946d2e4b1d8a18d07cddff5b56 *R/stan_biglm.fit.R e0e8fbb9d02c0591c4a676811a330ceb *R/stan_clogit.R 2f7bbf6d27e9595a869701a7b393c5fb *R/stan_gamm4.R 471c33b39c4806258465d6670652b56b *R/stan_glm.R 941082c1edf7fa2c8eb16990df5f145c *R/stan_glm.fit.R 9bed2c8df445f7dec36ef198e71feb66 *R/stan_glmer.R 6170c2bfb3e608693393f858588443b7 *R/stan_jm.R 11c3fe25b7ef276aeddf85fa9f4cf95d *R/stan_jm.fit.R 59677eb4253f0ea2cea9f159ab2db04a *R/stan_lm.R 5d74f44950b9d441493daba91c9db478 *R/stan_lm.fit.R 91c8eeba90fa8b61cc419b60f42b19ea *R/stan_mvmer.R 4ca046646ed39e926907069ad8816729 *R/stan_nlmer.R a9ac0186a63b57c93d90c52ed954df5c *R/stan_polr.R a92019147f5e0305ac7c291a12f135f3 *R/stan_polr.fit.R cd5b7ef67eccc562b7e7552994264f37 *R/stanmodels.R bb0b82628cd3ccc417df11d522e58670 *R/stanmvreg-methods.R 5d6eaf3250bdcdd109a7a1953aec37f2 *R/stanmvreg.R e32c5bedcdd4f263ca39e80a6a640eb1 *R/stanreg-methods.R 510fe6715c44d140139c9d831a8f1bf0 *R/stanreg-objects.R 09987d26e2735ec1483a10fe48a19948 *R/stanreg.R 3975a677608ed5f2646b22a2f1955061 *R/stanreg_list.R b0744bcdf2317b4501864abe6f0f43d0 *R/zzz.R f61db891eb928123aba4412ca322f508 *build/partial.rdb 9e450d7c2f3ae4c63ef3e3e311fdd377 *build/vignette.rds 7f5b19e929131630ff17a4adcd6530d5 *cleanup cf27452a8deccdbc6e62ba0733a3dc0d *cleanup.win d91453a6f40bc2e6cef2e1ccdb3ebea7 *data/bball1970.rda ae88044c20cdfe9a799ac81cdc3ce987 *data/bball2006.rda f1a5a1f8af2c2461372d5ea50f5aba6c *data/kidiq.rda 73b19515b4e2ee9202fe8f872bb2db7e *data/mortality.rda dc25bb207d74955b59a6e6bc3170de5c *data/pbcLong.rda 5477e53789385028d19a91fe903a9c0c *data/pbcSurv.rda e3bfe37a7e0d0c09c5b0077fa438e788 *data/radon.rda f125b28036600b5de5dafdef0821acbc *data/roaches.rda 790284f10c4cefe67eb8c6d90ede8c3d *data/tumors.rda e19b6716f59737457cc2c0384f83f490 *data/wells.rda a6ea7cfef0262a8a90feca38ea79ab44 *demo/00Index 4576995d368f9dafe35761b5be0c7cba *demo/ARM_Ch03.R 59485e2de4d71f3c99ba7599cf63058d *demo/ARM_Ch04.R 55a6086ba2b24fac7014560071a949a2 *demo/ARM_Ch05.R 0fd55814536179e1e7e20750464fd540 *demo/ARM_Ch07.R e9b50a1ba4914f153c758127e6821ede *demo/ARM_Ch08.R 5f1276b0cee03dc46a78b361ec7ba499 *demo/ARM_Ch09.R b254042805c8b86c77137fc8e729f1a5 *demo/ARM_Ch12_13.R c9adca4f752fd8fd2b96c02b39c378d1 *demo/ARM_Ch14.R 72d7d667b1a3264a38478350c1712e5d *demo/CLEANUP.R e53995362738abd9c782f17e5dd5bb66 *demo/SETUP.R cebf6aed32627b6d5970de16faf5236d *inst/CITATION 5063a6a1dab5a7b6d769ff0277514aaa *inst/doc/ab-testing.R c8d97f2026732a7d56aab2a7c1e55207 *inst/doc/ab-testing.Rmd 6d18c2f84ddf1a2b9b9b82f6577069ab *inst/doc/ab-testing.html a76e3a1f5354d4da1011b6eadd794a69 *inst/doc/aov.R 9b1ca8d20fe608600691146a9ed74b62 *inst/doc/aov.Rmd b5b07f86cbc34cf7a3863efe8e7bc2a2 *inst/doc/aov.html 40c04cb717ed5269f3fc1944123adb0f *inst/doc/betareg.R b4bd207f6d583290cc3e12829b4946c4 *inst/doc/betareg.Rmd 21cd927378119ee059098d15764c24c5 *inst/doc/betareg.html 312d1b791b287461db4ffc05b595a1b0 *inst/doc/binomial.R a099550b1f127c5e7ac4419dc63cefb1 *inst/doc/binomial.Rmd d738d8ca63771390196558f15c062826 *inst/doc/binomial.html 0c1f4a4896f3a849e5f20ffd566cc326 *inst/doc/children/SETTINGS-gg.txt 5a25a1c5f52b49283e2b302f1d86aa17 *inst/doc/children/SETTINGS-knitr.txt cadd3b9df0350a0114a0c445d10df04a *inst/doc/children/four_steps.txt 1311cbf5efc1b07c4714ffeda012bce1 *inst/doc/children/stan_glm_priors.txt 98450b143ec9bea8d0f24e39873383d4 *inst/doc/continuous.R c73e6f372d8f2725acc6c5fbb3cfdca6 *inst/doc/continuous.Rmd 1d5fff4c0a8eb57b252b40e7f3b509e6 *inst/doc/continuous.html a65df646660ec2cfdb3cf9a14d36daa2 *inst/doc/count.R 82c46c88e4aeea14c203d1e0f1a4d674 *inst/doc/count.Rmd 9d9f5a9691fc3b9dbfc867e4990f7d31 *inst/doc/count.html 101a397456dae1eb50cd1648bfad5f4d *inst/doc/glmer.R 5eb735c84ba11a0b6be40587a4c8e5a3 *inst/doc/glmer.Rmd 4430ab72387dadd35f413d203a6adcb3 *inst/doc/glmer.html 1baf73c220a0e1728aba52e537595ba0 *inst/doc/interaction.rda 2ba666e683d0ee87f7c51b9c567787c5 *inst/doc/jm.R f9a38ca8658f9ba8625320b877792394 *inst/doc/jm.Rmd 2d65915163b3bc19956203314cd39e24 *inst/doc/jm.html a87d51ad1c1a6ade9febe8b124ba9313 *inst/doc/lm.R d50be19705a8996d049aa862f32f77eb *inst/doc/lm.Rmd 5a1a4690ece3d29c9865c55b7f4e2dd2 *inst/doc/lm.html 1baf73c220a0e1728aba52e537595ba0 *inst/doc/mrp-files/interaction.rda 198678076ff2213169023ea8045baef4 *inst/doc/mrp-files/mrp.bib 36c30825047509fe2fde4c3492c0ff0b *inst/doc/mrp-files/mrp_sim.rda e1777c1f8d7853696fe5bdc32aa59053 *inst/doc/mrp-files/plot_data.rda 50e409e62175d4f6ff918ad79a6e16ad *inst/doc/mrp-files/preference_by_state.rda 2f3a4b4b36fdff66fb5e6059e03c5d5c *inst/doc/mrp-files/sample_alt.rda b074e8668ceb26acfd890deb8d2cc436 *inst/doc/mrp-files/state_plot_data.rda d448e1dc3115f6a4046909d206a7e8b4 *inst/doc/mrp-files/summary_by_poststrat_var.rda 1766527b299b01873371517b3842a1bc *inst/doc/mrp.R 99a6ebb48b3005e4e0fe4a4bb582364a *inst/doc/mrp.Rmd 198678076ff2213169023ea8045baef4 *inst/doc/mrp.bib 129377a743d137681cbc5cd8a0bf8b40 *inst/doc/mrp.html 36c30825047509fe2fde4c3492c0ff0b *inst/doc/mrp_sim.rda e1777c1f8d7853696fe5bdc32aa59053 *inst/doc/plot_data.rda 0e81325edc3a293a07e5aa31ecb00eb1 *inst/doc/polr.R def674381827f669846db29a2861c446 *inst/doc/polr.Rmd 75b972aa79988741fbae4dc05267b0a4 *inst/doc/polr.html 9074555b60fb5fc365d03defc39f005e *inst/doc/pooling.R dd775e009ebd0d18af96ff2417a633d3 *inst/doc/pooling.Rmd 2dddffd33d9a5a23ed87950f2a6ea64a *inst/doc/pooling.html 50e409e62175d4f6ff918ad79a6e16ad *inst/doc/preference_by_state.rda 75d4e0b5561fb0488c19d1a1802cdee4 *inst/doc/priors.R ac8199a8077317615e44de8074b54ed7 *inst/doc/priors.Rmd cce1b04421ac376ab606fbe7524cc42e *inst/doc/priors.html 48f586ab4094f941881e9263def8971e *inst/doc/rstanarm.R a21ef8be759dfda50544c5fae46cf2ac *inst/doc/rstanarm.Rmd d03fddf43cc555c7e5374fa47e81dad8 *inst/doc/rstanarm.html 2f3a4b4b36fdff66fb5e6059e03c5d5c *inst/doc/sample_alt.rda b074e8668ceb26acfd890deb8d2cc436 *inst/doc/state_plot_data.rda d448e1dc3115f6a4046909d206a7e8b4 *inst/doc/summary_by_poststrat_var.rda 26a5a7f4659a9c6bd1cab7d2e272c025 *inst/include/CODOLS.hpp ffb7b7dc1baa643f3af54923948455fc *inst/include/meta_header.hpp 0bd448bded496922a23b6ff5629f397e *inst/include/tests.cpp 7025a8b34a70cd4295abcce71daff208 *man/QR-argument.Rd ec604e63d92f506d3fbc0857d81a6801 *man/adapt_delta.Rd 0aba4947e9eceeb357f0fc798a58213f *man/as.matrix.stanreg.Rd 53c58cf802532fcdda4a351395740d22 *man/available-algorithms.Rd 4cdc9077ea03c2b74a3af29e315f6da2 *man/available-models.Rd 7095cd8bb287aaeb806c16fa5b412858 *man/bayes_R2.stanreg.Rd 722264c352d84ead2091f07823d0c5bf *man/example_jm.Rd 8fa7b28a612eacae12848409fb17be8c *man/example_model.Rd 7b78070e4cf45eb7f5b1ac8d0aff641a *man/family.stanmvreg.Rd f27a8a4d68750791e562c8ece8a8b6cc *man/family.stanreg.Rd d30b67d89b4ff08c286c624b9dac80ad *man/figures/logo.svg 5fad10a5cc62c59ea429a5ce90191e2e *man/figures/stanlogo.png 7dc155876d58f3fe96ab0568527a9cdf *man/formula.stanreg.Rd 464b5439fd3ab44ff1345f5477ab34dd *man/get_y.Rd 039c1045df937169093e66410cfd695c *man/kfold.stanreg.Rd 255fc5ac3d6ea9b2b2b442c91c88d230 *man/launch_shinystan.stanreg.Rd 9a5b7b1b8fc1f45f0c2c55cb7f1f5155 *man/log_lik.stanreg.Rd 905baba8c4a6037d8779f385649c5bc3 *man/logit.Rd 4c63ea25a46b1256c9c9fc6d722b2850 *man/loo.stanreg.Rd 788fe130400c029f4fff0c420001cbc6 *man/loo_predict.stanreg.Rd 8e7082585ef14a02d3e9fb7079b13ee6 *man/model.frame.stanmvreg.Rd 93d3837d9824043003a5c480c8f2f435 *man/model.frame.stanreg.Rd 08164673b63907d82558b42f70f1939c *man/model.matrix.stanreg.Rd bd5a5575a1fc8c9d263b5e8c3e571882 *man/neg_binomial_2.Rd b136d1ff6e63a0a525be521153efef94 *man/pairs.stanreg.Rd 5c971346473d22639ac7f124c8aad31a *man/plot.predict.stanjm.Rd 75ee6eec2b587d129ba1ec003964d425 *man/plot.stanreg.Rd f48ad55caa9f9587c0bfbcdcdec0755c *man/plot.survfit.stanjm.Rd dc1ee592c444b66858cba0b2a25b53a2 *man/posterior_interval.stanreg.Rd ff885f5bf272541e15b5248f9d459717 *man/posterior_linpred.stanreg.Rd 74327e957416c16b24a49a9126b2f444 *man/posterior_predict.stanreg.Rd 30c0334809a1eb60cfb30ade91290bce *man/posterior_survfit.Rd 90c18f2048c7d7403774eb691359276c *man/posterior_traj.Rd a6f70dcda242750837edbecb89377f59 *man/posterior_vs_prior.Rd f253176659768a0dabfdaee34a6dfb92 *man/pp_check.stanreg.Rd 79a3f0bdcba58ba53198761984747fdc *man/pp_validate.Rd 9834fc9c2df501bd94347a9329ec3fd4 *man/predict.stanreg.Rd bcd298a781a1af8deb5d79cca819e144 *man/predictive_error.stanreg.Rd af5aa927688f0353ee4c84e2bd897ff2 *man/predictive_interval.stanreg.Rd fc6a40c6f43cc8157007c72c985dac62 *man/print.stanreg.Rd 92c8afe4745d755d7743f93d87c12527 *man/print.survfit.stanjm.Rd 218cc14b5e1f3eafa51d6f32877bacbc *man/prior_summary.stanreg.Rd e0515c3549b636944f1381a6e966c2ef *man/priors.Rd be556599a1e42559413b4222fd268c42 *man/ps_check.Rd 7bae546de2a4b0a2a77870d66b0dd252 *man/reexports.Rd a32540c0f1f2f8aeaa657f391ece3924 *man/rstanarm-datasets.Rd e7b6781e0a60e5bd65a3eb8747ebaa47 *man/rstanarm-deprecated.Rd 87e873aa577c5a5655ead65c99412949 *man/rstanarm-package.Rd e425efd76e1719bbb042a1f00a20029a *man/se.Rd c367679b422e6cc8484487ab4d0d17bf *man/stan_betareg.Rd 0967d0c7a7522b6c284980447bbacc4a *man/stan_biglm.Rd 680a6466471111a1b6fca5b3bc76b6f2 *man/stan_clogit.Rd eb13492fe0842ef4eea6e513845790b5 *man/stan_gamm4.Rd 2b1e6d4c932da3ce8799ea305f1eb294 *man/stan_glm.Rd 65aec4530959424223c2fe3601abdcd7 *man/stan_glmer.Rd 1e22d4b901a49d4273523e3ebe3a5470 *man/stan_jm.Rd 2a4488f1d23b5d50f9ff89b8f9fc56f5 *man/stan_lm.Rd 16cf800bd8ac043d6de6272843757543 *man/stan_mvmer.Rd 371dfb077097596ab1528942cdcb726b *man/stan_nlmer.Rd 788ce40c06ad4992d790547dd827080d *man/stan_polr.Rd 1f2a661127735c6840556aa6ae527e00 *man/stanmvreg-methods.Rd 89e24e97883515ddbe7ea83e38b1e2d0 *man/stanreg-draws-formats.Rd 903098d01e5c0ffed35810a07a667bc6 *man/stanreg-methods.Rd 3df1ee6d8126bdfb9feba7525425a28c *man/stanreg-objects.Rd 3375b274958ba0352f076cd7a824e957 *man/stanreg_list.Rd 468aa07cd17be79f8b068dd2f8b8ae11 *man/summary.stanreg.Rd 00903cd96902ccead116ad8813248e0b *man/terms.stanmvreg.Rd f0eb2cdc07ee52f650f5c6f841c91162 *man/terms.stanreg.Rd caac7e31241861b2b383ad3e69bb16fa *src/Makevars 6a5395b0299288c050a339fea3b9daf4 *src/Makevars.win 62cc06b82f7b66c6c2beb038ab23f988 *src/init.cpp c9e2a73cd82ec9606ce0fc3d8f32225e *src/rstanarm-win.def 4f70a2041b1b698f540c54624599a8f1 *src/stan_files/bernoulli.stan f4c06c7882ddbca028bec08a6d1544fd *src/stan_files/binomial.stan ebbba9925792c16137fa8c58d35af656 *src/stan_files/continuous.stan e002e2b08d4d5233412ef4f5e4911f7d *src/stan_files/count.stan 345c6316ccf4c635109a897ca7f59418 *src/stan_files/data/NKX.stan 87a8b51861677862620d337cc1f2654f *src/stan_files/data/data_assoc.stan f9f2526d793cd28644f1e1a7737273fc *src/stan_files/data/data_betareg.stan 7a25ab990bbad948cf06d6a812b65021 *src/stan_files/data/data_event.stan 88e3c495f3aa5c8198493b726b5ca9fb *src/stan_files/data/data_glm.stan aa29e009bd39d256f49db0fc14d4bfa2 *src/stan_files/data/data_mvmer.stan 6ebf053c19201f79bffa927b2710b86b *src/stan_files/data/dimensions_mvmer.stan ecead5d98b6e642ef32b585cdfc9bec3 *src/stan_files/data/glmer_stuff.stan f031688376f5231c09c0f2093b0e56f4 *src/stan_files/data/glmer_stuff2.stan e98f95466a644d6b32739c6d9842f8fb *src/stan_files/data/hyperparameters.stan a4aed2a1b0b89af646188064cadf3295 *src/stan_files/data/hyperparameters_assoc.stan c20384382dff5bf3500d577e32ebdb4f *src/stan_files/data/hyperparameters_event.stan 5efdf0b8b40ee428cdfddaf3dc7e7fa2 *src/stan_files/data/hyperparameters_mvmer.stan 0b23a20ee77643a66cc2615ac7530f19 *src/stan_files/data/weights_offset.stan 8fcc12284ac7a02cef0d7febe54a1332 *src/stan_files/functions/SSfunctions.stan 3d21a76540f4fe90956d4a5716af3113 *src/stan_files/functions/bernoulli_likelihoods.stan cb87e61813fcb6ad2f7a4d4db8909050 *src/stan_files/functions/binomial_likelihoods.stan 4f44323a5be2065ec9cbeb7cd421cfc1 *src/stan_files/functions/common_functions.stan acb1e0c74459970cd6ac42cc69855da5 *src/stan_files/functions/continuous_likelihoods.stan 694a66ab3e94bb691cfed9f508c435cb *src/stan_files/functions/count_likelihoods.stan 517ed0de0d9159ff9f49cb7a6e26dfc5 *src/stan_files/functions/jm_functions.stan bfaadd0abbbfd166cf6a571e23a66c23 *src/stan_files/functions/mvmer_functions.stan 827a72357425fcf07f4ef20b5967ca46 *src/stan_files/gqs/gen_quantities_mvmer.stan fd5fd876c760e9851f08021330964d0c *src/stan_files/jm.stan 087cf33be49cac7b9eca6c59d65ea3e1 *src/stan_files/lm.stan 4c1e040c1d6853a353fbbe4509b3cddd *src/stan_files/model/assoc_evaluate.stan 01de7ab4043cfbcede30982bcf4566c1 *src/stan_files/model/eta_add_Zb.stan 5f94bb583fa9a1508dfd6fda700f891b *src/stan_files/model/eta_no_intercept.stan 1ac79d5547072b5c2213f43be1dbcd43 *src/stan_files/model/eta_z_no_intercept.stan a106d1b942797dfd69de3c6050d31794 *src/stan_files/model/event_lp.stan 3ecbcc584c49b4dbaf40bd9bddfdaeee *src/stan_files/model/make_eta.stan 8087b029207a9c2facb80103c87e4f8e *src/stan_files/model/make_eta_bern.stan b0c11c32941a46b7b4aff6baa0c01165 *src/stan_files/model/make_eta_tmp.stan e2a245c8388fde3b5798e2bfaea7c2d1 *src/stan_files/model/make_eta_tmp2.stan 719c2b621a4deff92ca0273957a9e30e *src/stan_files/model/make_eta_z.stan 2c3c9c102cdefa1db19ff9e1c4b1ae4a *src/stan_files/model/mvmer_lp.stan ff5d5a024299bb5258f95d12d990ce11 *src/stan_files/model/priors_betareg.stan 35073dbbf566596c46edcdf782ccdeec *src/stan_files/model/priors_glm.stan 9c3829677a4e18719c7af8d1860ed325 *src/stan_files/model/priors_mvmer.stan 0d5682005713600154e0f4ae5a32e067 *src/stan_files/mvmer.stan ac93673c1d66981d6a72f169a792abd7 *src/stan_files/parameters/parameters_assoc.stan 9ff39e19885c713dd9f7dd11882db389 *src/stan_files/parameters/parameters_betareg.stan 242afaaa3a17ee3cdbc51928a9e5759b *src/stan_files/parameters/parameters_event.stan 18f7467bb96af547bfe8013b8b6d41f2 *src/stan_files/parameters/parameters_glm.stan 65d327a6fe6eae499745feeb73df9029 *src/stan_files/parameters/parameters_mvmer.stan 2007a1a7f6339ee0e2ec3ea7750066e2 *src/stan_files/polr.stan 7e926ce03bac531df8f252aa484a80e0 *src/stan_files/pre/Brilleman_copyright.stan 8a418619ee32aa946c4fd0cd951e5404 *src/stan_files/pre/Columbia_copyright.stan 55d52013d81d7f661bbffd2e14985a99 *src/stan_files/pre/license.stan 9f75717663bcea1cda7471ab117d4a20 *src/stan_files/tdata/tdata_betareg.stan 9af03babf49a3b89efb548b75cdfb286 *src/stan_files/tdata/tdata_glm.stan 612d6adfad2af048ee7663d152d8a6f0 *src/stan_files/tdata/tdata_mvmer.stan 09aa07a2c841e2f96f76dd6d5ec84512 *src/stan_files/tparameters/tparameters_betareg.stan 5734fbfa644465c1fc79707bf71a6fb1 *src/stan_files/tparameters/tparameters_glm.stan 24d75d01802f1a8479e87d3568bd09e6 *src/stan_files/tparameters/tparameters_mvmer.stan d9a36422dc66023a6e4e5cc2b1e353d8 *tests/testthat.R e4356e43bc8bd485cb071c697b361b19 *tests/testthat/Rplots.pdf 3a65bba9da5493737ba3c38be5bff538 *tests/testthat/helper.R 26a5a7f4659a9c6bd1cab7d2e272c025 *tests/testthat/include/CODOLS.hpp ffb7b7dc1baa643f3af54923948455fc *tests/testthat/include/meta_header.hpp 0bd448bded496922a23b6ff5629f397e *tests/testthat/include/tests.cpp 4f70a2041b1b698f540c54624599a8f1 *tests/testthat/stan_files/bernoulli.stan f4c06c7882ddbca028bec08a6d1544fd *tests/testthat/stan_files/binomial.stan ebbba9925792c16137fa8c58d35af656 *tests/testthat/stan_files/continuous.stan e002e2b08d4d5233412ef4f5e4911f7d *tests/testthat/stan_files/count.stan 345c6316ccf4c635109a897ca7f59418 *tests/testthat/stan_files/data/NKX.stan 87a8b51861677862620d337cc1f2654f *tests/testthat/stan_files/data/data_assoc.stan f9f2526d793cd28644f1e1a7737273fc *tests/testthat/stan_files/data/data_betareg.stan 7a25ab990bbad948cf06d6a812b65021 *tests/testthat/stan_files/data/data_event.stan 88e3c495f3aa5c8198493b726b5ca9fb *tests/testthat/stan_files/data/data_glm.stan aa29e009bd39d256f49db0fc14d4bfa2 *tests/testthat/stan_files/data/data_mvmer.stan 6ebf053c19201f79bffa927b2710b86b *tests/testthat/stan_files/data/dimensions_mvmer.stan ecead5d98b6e642ef32b585cdfc9bec3 *tests/testthat/stan_files/data/glmer_stuff.stan f031688376f5231c09c0f2093b0e56f4 *tests/testthat/stan_files/data/glmer_stuff2.stan e98f95466a644d6b32739c6d9842f8fb *tests/testthat/stan_files/data/hyperparameters.stan a4aed2a1b0b89af646188064cadf3295 *tests/testthat/stan_files/data/hyperparameters_assoc.stan c20384382dff5bf3500d577e32ebdb4f *tests/testthat/stan_files/data/hyperparameters_event.stan 5efdf0b8b40ee428cdfddaf3dc7e7fa2 *tests/testthat/stan_files/data/hyperparameters_mvmer.stan 0b23a20ee77643a66cc2615ac7530f19 *tests/testthat/stan_files/data/weights_offset.stan 8fcc12284ac7a02cef0d7febe54a1332 *tests/testthat/stan_files/functions/SSfunctions.stan 3d21a76540f4fe90956d4a5716af3113 *tests/testthat/stan_files/functions/bernoulli_likelihoods.stan cb87e61813fcb6ad2f7a4d4db8909050 *tests/testthat/stan_files/functions/binomial_likelihoods.stan 4f44323a5be2065ec9cbeb7cd421cfc1 *tests/testthat/stan_files/functions/common_functions.stan acb1e0c74459970cd6ac42cc69855da5 *tests/testthat/stan_files/functions/continuous_likelihoods.stan 694a66ab3e94bb691cfed9f508c435cb *tests/testthat/stan_files/functions/count_likelihoods.stan 517ed0de0d9159ff9f49cb7a6e26dfc5 *tests/testthat/stan_files/functions/jm_functions.stan bfaadd0abbbfd166cf6a571e23a66c23 *tests/testthat/stan_files/functions/mvmer_functions.stan 827a72357425fcf07f4ef20b5967ca46 *tests/testthat/stan_files/gqs/gen_quantities_mvmer.stan fd5fd876c760e9851f08021330964d0c *tests/testthat/stan_files/jm.stan 087cf33be49cac7b9eca6c59d65ea3e1 *tests/testthat/stan_files/lm.stan 4c1e040c1d6853a353fbbe4509b3cddd *tests/testthat/stan_files/model/assoc_evaluate.stan 01de7ab4043cfbcede30982bcf4566c1 *tests/testthat/stan_files/model/eta_add_Zb.stan 5f94bb583fa9a1508dfd6fda700f891b *tests/testthat/stan_files/model/eta_no_intercept.stan 1ac79d5547072b5c2213f43be1dbcd43 *tests/testthat/stan_files/model/eta_z_no_intercept.stan a106d1b942797dfd69de3c6050d31794 *tests/testthat/stan_files/model/event_lp.stan 3ecbcc584c49b4dbaf40bd9bddfdaeee *tests/testthat/stan_files/model/make_eta.stan 8087b029207a9c2facb80103c87e4f8e *tests/testthat/stan_files/model/make_eta_bern.stan b0c11c32941a46b7b4aff6baa0c01165 *tests/testthat/stan_files/model/make_eta_tmp.stan e2a245c8388fde3b5798e2bfaea7c2d1 *tests/testthat/stan_files/model/make_eta_tmp2.stan 719c2b621a4deff92ca0273957a9e30e *tests/testthat/stan_files/model/make_eta_z.stan 2c3c9c102cdefa1db19ff9e1c4b1ae4a *tests/testthat/stan_files/model/mvmer_lp.stan ff5d5a024299bb5258f95d12d990ce11 *tests/testthat/stan_files/model/priors_betareg.stan 35073dbbf566596c46edcdf782ccdeec *tests/testthat/stan_files/model/priors_glm.stan 9c3829677a4e18719c7af8d1860ed325 *tests/testthat/stan_files/model/priors_mvmer.stan 0d5682005713600154e0f4ae5a32e067 *tests/testthat/stan_files/mvmer.stan ac93673c1d66981d6a72f169a792abd7 *tests/testthat/stan_files/parameters/parameters_assoc.stan 9ff39e19885c713dd9f7dd11882db389 *tests/testthat/stan_files/parameters/parameters_betareg.stan 242afaaa3a17ee3cdbc51928a9e5759b *tests/testthat/stan_files/parameters/parameters_event.stan 18f7467bb96af547bfe8013b8b6d41f2 *tests/testthat/stan_files/parameters/parameters_glm.stan 65d327a6fe6eae499745feeb73df9029 *tests/testthat/stan_files/parameters/parameters_mvmer.stan 2007a1a7f6339ee0e2ec3ea7750066e2 *tests/testthat/stan_files/polr.stan 7e926ce03bac531df8f252aa484a80e0 *tests/testthat/stan_files/pre/Brilleman_copyright.stan 8a418619ee32aa946c4fd0cd951e5404 *tests/testthat/stan_files/pre/Columbia_copyright.stan 55d52013d81d7f661bbffd2e14985a99 *tests/testthat/stan_files/pre/license.stan 9f75717663bcea1cda7471ab117d4a20 *tests/testthat/stan_files/tdata/tdata_betareg.stan 9af03babf49a3b89efb548b75cdfb286 *tests/testthat/stan_files/tdata/tdata_glm.stan 612d6adfad2af048ee7663d152d8a6f0 *tests/testthat/stan_files/tdata/tdata_mvmer.stan 09aa07a2c841e2f96f76dd6d5ec84512 *tests/testthat/stan_files/tparameters/tparameters_betareg.stan 5734fbfa644465c1fc79707bf71a6fb1 *tests/testthat/stan_files/tparameters/tparameters_glm.stan 24d75d01802f1a8479e87d3568bd09e6 *tests/testthat/stan_files/tparameters/tparameters_mvmer.stan 4f8a27eed5d5c3079bd457f9dd754fa0 *tests/testthat/test_loo.R d73fdfd28ccaff29f2879389851e6232 *tests/testthat/test_methods.R caa542efe88851872adca975668e1702 *tests/testthat/test_misc.R 1ee3c187ad7a0d8e93c296c43c64e009 *tests/testthat/test_plots.R c354a2bab6de87388503cdbc9314026c *tests/testthat/test_posterior_predict.R 81cf623d9a7a4b8b7ddb5d1fc847f230 *tests/testthat/test_pp_check.R e9c871c0ced82446f3c415b840e0d20f *tests/testthat/test_pp_validate.R d9d67e9b9ec8ebcb3341d1aca63b2952 *tests/testthat/test_predict.R ceb5e85bf0108693b0bcacae04badf9f *tests/testthat/test_stan_betareg.R 4bc551e136966e0a4d99f1cee9ad2f92 *tests/testthat/test_stan_clogit.R 94db814795c3886d3f50daf4d0799dff *tests/testthat/test_stan_functions.R e505ec722cd893ab0c78aad449d912ac *tests/testthat/test_stan_glm.R 4720c3aec4bf5160dd76b62a42834502 *tests/testthat/test_stan_glmer.R e7b9bb16b9a915e3abb3a02fed75a127 *tests/testthat/test_stan_jm.R 084a8b8dd50398082ee3f243676d2a15 *tests/testthat/test_stan_lm.R f4d1a6fed4894b42ea92d59599523e9a *tests/testthat/test_stan_mvmer.R f3964067ffa7077a8768e0b403a0da45 *tests/testthat/test_stan_nlmer.R 3651fd362a999945e27f02fe7f998aaa *tests/testthat/test_stan_polr.R 75e86fca32941a575e3d4177d30974ac *tools/make_cc.R c8d97f2026732a7d56aab2a7c1e55207 *vignettes/ab-testing.Rmd 9b1ca8d20fe608600691146a9ed74b62 *vignettes/aov.Rmd b4bd207f6d583290cc3e12829b4946c4 *vignettes/betareg.Rmd a099550b1f127c5e7ac4419dc63cefb1 *vignettes/binomial.Rmd 0c1f4a4896f3a849e5f20ffd566cc326 *vignettes/children/SETTINGS-gg.txt 5a25a1c5f52b49283e2b302f1d86aa17 *vignettes/children/SETTINGS-knitr.txt cadd3b9df0350a0114a0c445d10df04a *vignettes/children/four_steps.txt 1311cbf5efc1b07c4714ffeda012bce1 *vignettes/children/stan_glm_priors.txt c73e6f372d8f2725acc6c5fbb3cfdca6 *vignettes/continuous.Rmd 82c46c88e4aeea14c203d1e0f1a4d674 *vignettes/count.Rmd 5eb735c84ba11a0b6be40587a4c8e5a3 *vignettes/glmer.Rmd f9a38ca8658f9ba8625320b877792394 *vignettes/jm.Rmd d50be19705a8996d049aa862f32f77eb *vignettes/lm.Rmd 1baf73c220a0e1728aba52e537595ba0 *vignettes/mrp-files/interaction.rda 198678076ff2213169023ea8045baef4 *vignettes/mrp-files/mrp.bib 36c30825047509fe2fde4c3492c0ff0b *vignettes/mrp-files/mrp_sim.rda e1777c1f8d7853696fe5bdc32aa59053 *vignettes/mrp-files/plot_data.rda 50e409e62175d4f6ff918ad79a6e16ad *vignettes/mrp-files/preference_by_state.rda 2f3a4b4b36fdff66fb5e6059e03c5d5c *vignettes/mrp-files/sample_alt.rda b074e8668ceb26acfd890deb8d2cc436 *vignettes/mrp-files/state_plot_data.rda d448e1dc3115f6a4046909d206a7e8b4 *vignettes/mrp-files/summary_by_poststrat_var.rda 99a6ebb48b3005e4e0fe4a4bb582364a *vignettes/mrp.Rmd def674381827f669846db29a2861c446 *vignettes/polr.Rmd dd775e009ebd0d18af96ff2417a633d3 *vignettes/pooling.Rmd ac8199a8077317615e44de8074b54ed7 *vignettes/priors.Rmd a21ef8be759dfda50544c5fae46cf2ac *vignettes/rstanarm.Rmd rstanarm/inst/0000755000176200001440000000000014551551771013071 5ustar liggesusersrstanarm/inst/doc/0000755000176200001440000000000014551552005013625 5ustar liggesusersrstanarm/inst/doc/count.R0000644000176200001440000000463614551550311015107 0ustar liggesusers## ---- SETTINGS-knitr, include=FALSE------------------------------------------- stopifnot(require(knitr)) opts_chunk$set( comment=NA, message = FALSE, warning = FALSE, eval = identical(Sys.getenv("NOT_CRAN"), "true"), dev = "png", dpi = 150, fig.asp = 0.618, fig.width = 5, out.width = "60%", fig.align = "center" ) ## ---- SETTINGS-gg, include=TRUE----------------------------------------------- library(ggplot2) library(bayesplot) theme_set(bayesplot::theme_default()) ## ---- count-roaches-mcmc, results="hide"-------------------------------------- library(rstanarm) data(roaches) # Rescale roaches$roach1 <- roaches$roach1 / 100 # Estimate original model glm1 <- glm(y ~ roach1 + treatment + senior, offset = log(exposure2), data = roaches, family = poisson) # Estimate Bayesian version with stan_glm stan_glm1 <- stan_glm(y ~ roach1 + treatment + senior, offset = log(exposure2), data = roaches, family = poisson, prior = normal(0, 2.5), prior_intercept = normal(0, 5), seed = 12345) ## ---- count-roaches-comparison------------------------------------------------ round(rbind(glm = coef(glm1), stan_glm = coef(stan_glm1)), digits = 2) round(rbind(glm = summary(glm1)$coefficients[, "Std. Error"], stan_glm = se(stan_glm1)), digits = 3) ## ---- count-roaches-posterior_predict----------------------------------------- yrep <- posterior_predict(stan_glm1) ## ---- count-roaches-plot-pp_check1-------------------------------------------- prop_zero <- function(y) mean(y == 0) (prop_zero_test1 <- pp_check(stan_glm1, plotfun = "stat", stat = "prop_zero", binwidth = .005)) ## ---- count-roaches-negbin, results="hide"------------------------------------ stan_glm2 <- update(stan_glm1, family = neg_binomial_2) ## ---- count-roaches-plot-pp_check2, fig.width=7, out.width="80%"-------------- prop_zero_test2 <- pp_check(stan_glm2, plotfun = "stat", stat = "prop_zero", binwidth = 0.01) # Show graphs for Poisson and negative binomial side by side bayesplot_grid(prop_zero_test1 + ggtitle("Poisson"), prop_zero_test2 + ggtitle("Negative Binomial"), grid_args = list(ncol = 2)) ## ---- count-roaches-loo------------------------------------------------------- loo1 <- loo(stan_glm1, cores = 2) loo2 <- loo(stan_glm2, cores = 2) loo_compare(loo1, loo2) rstanarm/inst/doc/ab-testing.R0000644000176200001440000002322414551550132016007 0ustar liggesusers## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set(echo = TRUE) knitr::opts_chunk$set(collapse = TRUE) knitr::opts_chunk$set(eval = identical(Sys.getenv("NOT_CRAN"), "true")) library(rstanarm) ## ----------------------------------------------------------------------------- set.seed(123) group <- c(rep(1,10), rep(2,12)) group <- factor(c(rep("A",10), rep("B",12))) N <- length(group) hc <- sample(c(-1,1), N, replace = TRUE) effect <- c(3,5) lp <- effect[group] + 0.7*hc y <- rnorm(N, lp, 0.5) experiment <- data.frame(y = y, group = factor(group), hc = hc) experiment ## ----results='hide'----------------------------------------------------------- fit <- stan_glm(y ~ 0 + group + hc, data = experiment, family = gaussian(link="identity"), prior = normal(c(3,3,0), 1), seed = 123) ## ----------------------------------------------------------------------------- c(coef(fit), sigma = sigma(fit)) ## ----------------------------------------------------------------------------- #' Quantify Overlapping Proportion #' Compute how much of the smaller distribution overlaps with the larger (i.e. wider) distribution. #' @param large Posterior predictive samples that have larger range than \code{small}. #' @param small Posterior predictive samples that have smaller range than \code{large}. #' @param p Probability to compute prediction interval. #' @return A proportion between 0 and 1 indicating how much of \code{small} is contained in \code{large} given the credible interval specification. overlap_prop <- function(large, small, p = 1) { p_lwr <- (1-p)/2 p_upr <- 1 - p_lwr large_ci <- quantile(large, probs = c(p_lwr, p_upr)) left <- min(large_ci) right <- max(large_ci) indxs <- which(small >= left & small <= right) return(length(indxs)/length(small)) } #' Quantify Overlapping Posterior Predictive Distributions #' Quantify the overlap between posterior samples from two distributions. #' @param a Group A posterior predictive samples. #' @param b Group B posterior predictive samples. #' @param p Probability to compute credible interval. #' @return A proportion between 0 and 1 indicating how much of the credible intervals for \code{a} and \code{b} overlap with one another. overlap <- function(a, b, p = 1) { length_a <- dist(range(a)) length_b <- dist(range(b)) if (length_a >= length_b) { out <- overlap_prop(a, b, p) } else if (length_a < length_b) { out <- overlap_prop(b, a, p) } return(out) } ## ----fig.align='center', fig.height=8, fig.width=6---------------------------- pp_a <- posterior_predict(fit, newdata = data.frame(group = factor("A"), hc = experiment$hc)) pp_b <- posterior_predict(fit, newdata = data.frame(group = factor("B"), hc = experiment$hc)) pp_a_quant <- quantile(pp_a, probs = c(0.05,0.95)) pp_b_quant <- quantile(pp_b, probs = c(0.05,0.95)) overlap(pp_a, pp_b, p = 0.9) par(mfrow=c(2,1)) # group A hist(pp_a, breaks = 50, col = '#808080', border = '#FFFFFF', main = "Group A", xlab = "Avg Streaming (hrs)", xlim = c(0,10)) abline(v = pp_a_quant[1], lwd = 2, col = "red") abline(v = pp_a_quant[2], lwd = 2, col = "red") # group B hist(pp_b, breaks = 50, col = '#808080', border = '#FFFFFF', main = "Group B", xlab = "Avg Streaming (hrs)", xlim = c(0,10)) abline(v = pp_b_quant[1], lwd = 2, col = "red") abline(v = pp_b_quant[2], lwd = 2, col = "red") ## ----fig.align='center', fig.height=6, fig.width=10--------------------------- pp_a0 <- posterior_predict(fit, newdata = data.frame(group = factor("A"), hc = -1)) pp_b0 <- posterior_predict(fit, newdata = data.frame(group = factor("B"), hc = -1)) pp_a1 <- posterior_predict(fit, newdata = data.frame(group = factor("A"), hc = 1)) pp_b1 <- posterior_predict(fit, newdata = data.frame(group = factor("B"), hc = 1)) pp_a0_quant <- quantile(pp_a0, probs = c(0.05,0.95)) pp_b0_quant <- quantile(pp_b0, probs = c(0.05,0.95)) pp_a1_quant <- quantile(pp_a1, probs = c(0.05,0.95)) pp_b1_quant <- quantile(pp_b1, probs = c(0.05,0.95)) par(mfrow=c(2,2)) # group A, x = 0 hist(pp_a0, breaks = 50, col = '#808080', border = '#FFFFFF', main = "Group A (hc=-1)", xlab = "Avg Streaming (hrs)", xlim = c(0,10)) abline(v = pp_a0_quant[1], lwd = 2, col = "red") abline(v = pp_a0_quant[2], lwd = 2, col = "red") # group B, x = 0 hist(pp_b0, breaks = 50, col = '#808080', border = '#FFFFFF', main = "Group B (hc=-1)", xlab = "Avg Streaming (hrs)", xlim = c(0,10)) abline(v = pp_b0_quant[1], lwd = 2, col = "red") abline(v = pp_b0_quant[2], lwd = 2, col = "red") # group A, x = 1 hist(pp_a1, breaks = 50, col = '#808080', border = '#FFFFFF', main = "Group A (hc=1)", xlab = "Avg Streaming (hrs)", xlim = c(0,10)) abline(v = pp_a1_quant[1], lwd = 2, col = "red") abline(v = pp_a1_quant[2], lwd = 2, col = "red") # group B, x = 1 hist(pp_b1, breaks = 50, col = '#808080', border = '#FFFFFF', main = "Group B (hc=1)", xlab = "Avg Streaming (hrs)", xlim = c(0,10)) abline(v = pp_b1_quant[1], lwd = 2, col = "red") abline(v = pp_b1_quant[2], lwd = 2, col = "red") ## ----fig.align='center', fig.height=5, fig.width=5---------------------------- # prediction interval probabilities ci_p <- seq(0.1,1, by = 0.05) # compute proportions overlap_ab <- sapply(ci_p, function(s){overlap(pp_a, pp_b, s)}) # plot plot(ci_p, overlap_ab, type = "o", pch = 20, xaxt = "n", yaxt = "n", main = "Group A vs Group B", xlab = "Prediction Interval Probability (1-Risk)", ylab = "Overlap Proportion (Group Similarity)") axis(1, seq(0,1,by=0.1), cex.axis = 0.8) axis(2, seq(0,1,by=0.1), cex.axis = 0.8) abline(v = 0.5, lty = 2) ## ----results='hide', message=FALSE, warning=FALSE----------------------------- experiment_bin <- data.frame(group = factor(c("C","D")), y = c(10,14), trials = c(19,22)) fit_group_bin <- stan_glm(cbind(y, trials - y) ~ 0 + group, data = experiment_bin, family = binomial(link="logit"), seed = 123) ## ----fig.align='center', fig.height=5, fig.width=10--------------------------- # pp_c <- posterior_linpred(fit_group_bin, newdata = data.frame(group = factor("C")), transform = TRUE) # pp_d <- posterior_linpred(fit_group_bin, newdata = data.frame(group = factor("D")), transform = TRUE) # below doesn't work as expected (predictions are bigger than the number of trials) # pp_c <- posterior_predict(fit_group_bin, newdata = data.frame(group = factor("C"), trials = 19)) # pp_d <- posterior_predict(fit_group_bin, newdata = data.frame(group = factor("D"), trials = 22)) pp <- posterior_predict(fit_group_bin) pp_c <- pp[,1] pp_d <- pp[,2] pp_c_quant <- quantile(pp_c, probs = c(0.05,0.95)) pp_d_quant <- quantile(pp_d, probs = c(0.05,0.95)) # compute overlap overlap(pp_c, pp_d, p = 0.9) # plot # group C par(mfrow=c(1,2)) hist(pp_c, breaks = 50, col = '#808080', border = '#FFFFFF', main = "Group C", xlab = "Product Consumption", xlim = c(0,25)) abline(v = pp_c_quant[1], lwd = 2, col = "red") abline(v = pp_c_quant[2], lwd = 2, col = "red") # group D hist(pp_d, breaks = 50, col = '#808080', border = '#FFFFFF', main = "Group D", xlab = "Product Consumption", xlim = c(0,25)) abline(v = pp_d_quant[1], lwd = 2, col = "red") abline(v = pp_d_quant[2], lwd = 2, col = "red") ## ----fig.align='center', fig.height=5, fig.width=5---------------------------- # prediction interval probabilities ci_p <- rev(seq(0.1,1, by = 0.05)) # compute proportions overlap_cd <- sapply(ci_p, function(s){overlap(pp_c, pp_d, s)}) # plot plot(ci_p, overlap_cd, type = "o", pch = 20, xaxt = "n", yaxt = "n", main = "Group C vs Group D", xlab = "Prediction Interval Probability (1-Risk)", ylab = "Overlap Proportion (Group Similarity)") axis(1, seq(0,1,by=0.1), cex.axis = 0.8) axis(2, seq(0,1,by=0.1), cex.axis = 0.8) abline(v = 0.5, lty = 2) ## ----------------------------------------------------------------------------- group_a <- experiment$y[experiment$group == "A"] group_b <- experiment$y[experiment$group == "B"] # Relevant dplyr code # group_a <- experiment %>% filter(group == "A") %>% select(y) %>% unlist %>% unname # group_b <- experiment %>% filter(group == "B") %>% select(y) %>% unlist %>% unname t_test <- t.test(x=group_a, y=group_b) t_stat <- abs(t_test$statistic) p_value <- t_test$p.value print(p_value) # You can manually compute the p-value with the following code # p_value <- pt(-t_stat, t_test$parameter)*2 # you can manually compute the confidence intervals with the following code # group_a_mean <- mean(group_a) # group_b_mean <- mean(group_b) # v <- sqrt((var(group_a)/length(group_a)) + (var(group_b)/length(group_b))) # ci_lwr <- (group_a_mean - group_b_new_mean) - abs(qt(0.025, t_test$parameter[['df']])*v) # ci_upr <- (group_a_mean - group_b_new_mean) + abs(qt(0.025, t_test$parameter[['df']])*v) ## ----fig.align='center', fig.height=5, fig.width=5---------------------------- dof <- t_test$parameter[["df"]] x <- seq(-10,10,length.out = 1e3) plot(x, dt(x, dof), type = "l", main = "Distribution of Test Statistics Under Null Hypothesis", xlab = "t-statistic value", ylab = "t-distribution density") abline(v=-t_stat, col="red", lwd=2) abline(v=t_stat, col="red", lwd=2) ## ----results='hide'----------------------------------------------------------- fit_hier <- stan_glmer(y ~ 0 + (1 | group) + hc, prior = normal(0, 1), data = experiment, family = gaussian(link="identity"), seed = 123) ## ----------------------------------------------------------------------------- coef(fit_hier) fixef(fit_hier) ranef(fit_hier) rstanarm/inst/doc/ab-testing.html0000644000176200001440000071731514551550132016565 0ustar liggesusers Probabilistic A/B Testing with rstanarm

Probabilistic A/B Testing with rstanarm

Imad Ali

2024-01-16

Abstract

This case study shows how basic A/B testing using Stan and Bayesian methods can be used to facilitate business decisions. In practice, we find this approach useful given its ability to quantify domain-specific business knowledge and hypotheses through the use of prior distributions. Instead of using p-values and confidence intervals, we are able to perform inference with probability intervals estimated from posterior predictions. In addition to the results being highly interpretable, this approach allows us to quantify business risk.

Introduction

A/B testing is an experiment. It is essentially a randomized controlled trial in an industry setting. The test or experiment is conducted on a subset of users in order to determine if a change in service (e.g. user experience) will have a positive impact on the business, before rolling out that change to all the users. (Here we consider static A/B testing where inference is performed after the experiment. For A/B testing where the users are dynamically allocated to the outperforming group during the experiment consider multi-arm bandits.)

Here are a few stylized scenarios where A/B testing could provide useful insight:

  • If you change the order in which information is displayed on a service will users spend more time on the service?
  • If you personalize the cover art for streaming content are users more likely to stream that content?
  • Are users more responsive to a revised recommendation system?
  • Is there a noticeable difference between the three point accuracy of two basketball players?
  • In a drug trial, is there evidence that the treatment is better than the placebo?

Typically, A/B testing involves one group of people being served the existing content (control group, group A) while another group is served different content (treatment group, group B) and, through a measurable indicator, the business wants to determine if there is a difference in reaction between the two groups. If we compare the two groups and find that the difference in the indicator is large (relative to our uncertainty) then we can argue that the different content drove that change. Conversely, if the change was minimal then we may be hesitant to conclude that the different content resulted in any change in behavior at all. In that situation perhaps the content needs to be redesigned and retested.

Most A/B testing approaches used in practice typically rely on frequentist hypothesis testing methods. Not only are the results of these methods difficult to interpret, but they can also be misleading. Terms such as “p-values” and “confidence intervals” are often misinterpreted as probabilities directly related to the quantity of interest (e.g. the difference in means between two groups). P-values are also often used as cutoffs for business decisions. In other words, reaching a statistically significant result is often sufficient to convince a business to move forward with a particular decision.

We argue that these decisions should not be reductively derived from arbitrary cutoffs (e.g. a p-value of less than 0.05). Instead they should be determined by domain-specific experts who understand the industry, with statisticians providing interpretable results that can help these experts make more informed decisions. This case study provides a way for domain-specific experts to apply their knowledge to the statistical inference process of A/B testing through prior distributions. Additionally, the experts can quantify the risk they are willing to take and probabilistically incorporate this into the inference.

Some key benefits to the Bayesian approach outlined in this case study include,

  • Allowing domain-specific experts to apply their knowledge and appetite for risk to statistical inference.
  • Modeling the data rather than defining/computing a test statistic from the data. (This allows us to perform inference on the (predicted) data instead of the parameters.)
  • The ability to describe differences in groups probabilistically rather than using traditional hypothesis testing methods.
  • Quantifying null hypotheses in priors.

We use simple examples to show how to apply Bayesian inference to A/B testing using continuous and count data. The examples used here are analogous to the t-test and the Fisher’s exact test, but the methodology discussed can be applied to data that follow other distributions. The first section considers continuous data (assumed to be generated from the normal distribution) and the second section considers count data (assumed to be generated from the binomial distribution). (If you need a referesher on how convoluted hypothesis testing is, Appendix A goes over the interpretation of p-values using a two-sample t-test as an example.)

At a high-level, we stress that frequentist methods focus on the distribution of the test statistic as opposed to the quantity of interest (i.e. predictions or parameters). In such methods inference is done by understanding how the observed test statistic compares to the distribution of the test statistic under the null hypothesis. Alternatively, the Bayesian approach proposed here allows the statistician to perform inference directly on the quantity of interest (in this case predicted data), which is more transparent and informative in the context of A/B testing.

Continuous Data

This example is analogous to the two sample t-test (specifically Welch’s t-test) where the statistician is interested in testing if there is a noticeable difference between the means of two different samples.

Suppose an online streaming company is interested in testing whether ads affect the consumption of their service. The hypothesis is that reducing ads will increase hourly streaming consumption. Since this decision can be costly if a significant amount of revenue is derived from ads, it would be useful to conduct a test to evaluate the impact of ad reduction. One way to test this is to draw two random samples from the user base, serve them with different levels of ad content, and see if there is a substantial difference in streaming consumption (say hours per day). Suppose we treat the two groups in the following way,

  • Group A (control): streaming service contains ads.
  • Group B (treatment): streaming service contains no ads.

The data collected might look something like the following below. Each observation is a user’s average daily streaming consumption in hours. Suppose we also have an additional (binary) variable hc which defines whether a user is predisposed to being a high consumer of streaming content (a value of 1 represents a high consumer and a value of -1 represents a low consumer).

In order to determine if there is a difference between the groups we need to define a model that predicts the outcome for each group. The data has been generated from the normal distribution so it is appropriate to specify a normal likelihood. (Often we do not know how the data is generated and have to make an assumption about which distribution should be used to model the likelihood.) Since we are modeling the outcome we can include other variables, such as the high consumer indicator hc. Traditional hypothesis testing methods are focused on comparing the outcome of two groups. Here we model the outcome before comparing the groups. This allows us to include additional information in the model which will enable us to perform more granular inferences.

Next we need to specify prior distributions on each of these parameters. This is where the domain-specific expert can provide valuable input. For example, they may believe that (due to poor sampling) the sampled average of daily streaming hours is too low for each group. In such a situation a prior can be applied to coerce the estimated average closer to the value they feel is more appropriate and representative of the population.

Putting these pieces together gives us the model below. \(y\) is the outcome (average streaming hours) and \(sigma\) is the residual standard deviation ( i.e. the standard deviation of \(y\) conditional on the parameters and the data). \(\mu\) is the parameter associated with the variable \(group\) which defines group membership, and \(\beta\) is the parameter associated with the the high consumer indicator. One limitation of this approach is that \(\sigma\) does not vary among groups. However, in this case it is sufficient to assume that the outcome of both groups has the same standard deviation. (In order to allow the standard deviation to vary among groups the model would have to be fit in rstan, which would require defining the model in a Stan file.)

\[ \begin{align*} y_i \sim &\mathcal{N}(\mu_A \cdot groupA_i + \mu_B \cdot groupB_i + \beta \cdot high\_consumer_i, \sigma) \\ \mu_A \sim& \mathcal{N}(3,1) \\ \mu_B \sim& \mathcal{N}(3,1) \\ \beta \sim& \mathcal{N}(0,1) \\ & \mbox{(default prior specified on } \sigma \mbox{)} \end{align*} \]

With regard to priors, we have applied \(\mathcal{N}(3,1)\) distributions on both group effects. The reasoning behind this is twofold:

  1. Based on prior knowledge (past data and/or domain specific experts) we believe that users spend around three hours per day on the service (regardless of what our random sample says).
  2. We allow the hyperparameters for both group groups to be identical to quantify our belief that group B (which received the treatment) is not substantially different from group A. This can be interpreted as incorporating the belief underlying our null hypothesis into the prior. More importantly, this approach allows us to be more conservative when we do our inference. If we end up concluding that the two groups are different, we can say that the difference in behavior was so strong that it overcame our prior belief that the two groups are identical.

Now that we have established our model, we need to fit the model to the data so that we can estimate the parameters. We can do this using the rstanarm package which can fit a Bayesian linear regression model (using the stan_glm() function) without an intercept, and with group membership and additional variables as parameters. We fit the model below.

Recall that Stan uses a sampling algorithm to estimate the joint posterior distribution of the parameters which means that we have samples instead of point estimates for the parameter values. The medians for each parameter are provided below.

With these estimates it looks like Group A had an average consumption of about 3 hours while Group B had an average consumption of about 5 hours. This gives us a difference in consumption of approximately 2 hours. Unfortunately, this assessment does not say anything about how uncertain this difference is. We would like to be able to say something like “we are \(p\%\) sure that the two groups are different enough”.

We can quantify the uncertainty of how different the two estimates are by computing sample quantiles on the posterior predictive distribution. This is often referred to as a credible interval, although the preferred term is predictive interval when describing predictions (and posterior interval when describing parameters).

If we compute the \(90\%\) predictive interval then we can say that \(90\%\) of the posterior predictions for that group lie between that interval. In order for us to evaluate whether the two groups are different enough we can compute the overlap coefficient, which describes the overlap of the prediction intervals for each group as a proportion. For example, suppose there is a \(15\%\) overlap between the \(90\%\) prediction intervals in each of the two groups. This allows us to say, given that we are \(90\%\) certain about where the predictions lie, there’s a \(15\%\) chance that the two groups are similar.

The functions below compute the proportion of overlap between the two groups.

Below we compute the \(0.9\) prediction interval for both groups. Note that the prediction interval choice is arbitrary, and may vary depending on the applied context and the appetite for uncertainty. This is also where we recommend getting input from domain-specific experts. In this case we are willing to accept a \(10\%\) chance of being wrong about where the predictions lie. The closer the prediction interval is to \(1\) the more risk averse the business is with regards to inference.

After computing the \(90\%\) prediction interval for both groups we find an overlap proportion of approximately \(0.25\). Thus, given that we are \(90\%\) sure about our posterior predictions for the two groups, we are about \(75\%\) sure that the two groups are in fact different. Going back to the business context, we can conclude that we are \(75\%\) sure that reducing ads increases daily streaming consumption given our acceptable risk of being \(10\%\) wrong about daily streaming consumption.

Since we modeled the outcome using a predictor (in addition to group membership variables) we can vary the predictor as well as group membership for an observation for more detailed inference. Below we plot the prediction intervals for each group and high consumer variable combination. This allows to us compare the difference in average streaming hours among the two groups for those individuals that were categorized as high/low consumers.

pp_a0 <- posterior_predict(fit, newdata = data.frame(group = factor("A"), hc = -1))
pp_b0 <- posterior_predict(fit, newdata = data.frame(group = factor("B"), hc = -1))
pp_a1 <- posterior_predict(fit, newdata = data.frame(group = factor("A"), hc = 1))
pp_b1 <- posterior_predict(fit, newdata = data.frame(group = factor("B"), hc = 1))
pp_a0_quant <- quantile(pp_a0, probs = c(0.05,0.95))
pp_b0_quant <- quantile(pp_b0, probs = c(0.05,0.95))
pp_a1_quant <- quantile(pp_a1, probs = c(0.05,0.95))
pp_b1_quant <- quantile(pp_b1, probs = c(0.05,0.95))

par(mfrow=c(2,2))
# group A, x = 0
hist(pp_a0, breaks = 50, col = '#808080', border = '#FFFFFF',
     main = "Group A (hc=-1)",
     xlab = "Avg Streaming (hrs)",
     xlim = c(0,10))
abline(v = pp_a0_quant[1], lwd = 2, col = "red")
abline(v = pp_a0_quant[2], lwd = 2, col = "red")
# group B, x = 0
hist(pp_b0, breaks = 50, col = '#808080', border = '#FFFFFF',
     main = "Group B (hc=-1)",
     xlab = "Avg Streaming (hrs)",
     xlim = c(0,10))
abline(v = pp_b0_quant[1], lwd = 2, col = "red")
abline(v = pp_b0_quant[2], lwd = 2, col = "red")
# group A, x = 1
hist(pp_a1, breaks = 50, col = '#808080', border = '#FFFFFF',
     main = "Group A (hc=1)",
     xlab = "Avg Streaming (hrs)",
     xlim = c(0,10))
abline(v = pp_a1_quant[1], lwd = 2, col = "red")
abline(v = pp_a1_quant[2], lwd = 2, col = "red")
# group B, x = 1
hist(pp_b1, breaks = 50, col = '#808080', border = '#FFFFFF',
     main = "Group B (hc=1)",
     xlab = "Avg Streaming (hrs)",
     xlim = c(0,10))
abline(v = pp_b1_quant[1], lwd = 2, col = "red")
abline(v = pp_b1_quant[2], lwd = 2, col = "red")

In the plot below we show how the overlap proportion will vary as the prediction interval varies. To put it differently, it shows how the probabilistic difference between groups varies as risk varies. Notice that the more risk we take when defining our prediction interval (i.e. the closer the prediction interval is to 0) the lower the overlap proportion, and consequentially the more apparent the difference between the two groups.

Count Data

This example is analogous to Fisher’s exact test where the statistician is interested in testing differences in proportions (particularly in the form of a contingency table).

Now, suppose that the business wants to know whether a product sells better if there is a change to the online user interface (UI) that users interact with to buy the product. They run an experiment on two groups and obtain the following results,

  • Group C (control): 10 users out of a sample of 19 purchased the product with the default UI.
  • Group D (treatment): 14 users out of a sample of 22 purchased the product with the alternative UI.

Here we can assume that the data is binomially distributed, in which case we can define the model for the the two groups as follows,

\[ y_i \sim \mbox{Bin}(\mbox{logit}^{-1}(\mu_C \cdot groupC_i + \mu_D \cdot groupD_i), N_i)\\ \]

where \(\mu\) is the parameter for each group, \(group\) is a binary variable indicating group membership, \(y\) is the number of users that purchased the product and \(N\) is the total number of users in each group. Below we fit this model to the data.

Similar to the method described in the previous section we compute and plot the \(90\%\) prediction intervals for the posterior predictions in each group. We also compute the overlap proportion of these two sets of predictions.

Looking at the histograms it’s clear that there’s quite a bit of overlap between the two groups. The overlap proportion is about 0.7. So under our \(90\%\) prediction interval, there is a \(70\%\) chance that there is no difference in behavior when the UI changes. This might suggest that we don’t have strong evidence that the UI change encouraged a change in behavior.

Below we show how the overlap proportion varies based on the amount of risk we’re willing to take when we define our prediction intervals. Similar to the continuous example in the previous section, risk is inversely related to group similarity.

Note, this example involved a really small data set (only one observation for each group). But the same model can easily be extended to many observations within each group. Also, just as we described in the continuous example, we can define a more comprehensive model for the outcome if we had additional predictors.

Benefits of Bayesian Methods

The key benefits that we have discussed include the ability to probabilistically interpret the results of our inference, and the ability to incorporate prior beliefs (i.e. business knowledge and hypotheses) into our models.

Interpretation of probability

With regards to interpretation, there are some advantages with taking a Bayesian inference approach to A/B testing using Stan:

  1. The ability to communicate our results using the intuitive concept of probability.
  2. The ability to quantify business risk using probability when doing inference.

Quantifying our uncertainty probabilistically enables us to make statements like “based on the data collected, the model specified, and the risk we are willing to take; we are 80% certain that the two groups are different.” This is much more interpretable than statements like ‘with a p-value of less than 0.2 we can reject the null hypothesis that the two groups are identical’. While this is not exclusively a Bayesian benefit (i.e. we could have completely excluded priors from our models, estimating the parameters solely from the likelihood of the data), we took advantage of the fact that appropriately implemented Bayesian computational methods rely on robust sampling methods. These samples can then be transformed and used to make probabilistic statements about the posterior predictive distribution, and consequentially about the question being asked.

Incorporating prior beliefs

The ability to define a prior distribution on your parameters is a useful feature of Bayesian methods. Prior information can be incorporated in your model with two choices: the type of the distribution and how the distribution is parametrized.

The type of distribution relates to which distribution you choose to define on the parameters. In the continuous data example we chose the normal distribution. But, since the underlying data (hours streamed per day) cannot be negative, it might be more sensible to define a truncated normal distribution as the prior (which is straightforward to implement in rstan). This gives us the opportunity to model the data generation process more appropriately.

How the prior distribution is parameterized reflects your belief on the value that parameter takes. This gives us the opportunity to quantify business knowledge in prior distributions. In the continuous data example we showed how we parameterized the prior distribution for each group’s parameter to capture our prior belief that the two groups are similar. A similar approach can be taken for the treatment group in the count data example.

With these types of priors, if we concluded that the two groups are in fact different then we could really be sure that the treatment actually changed the treatment group’s behavior. In other words, the treatment group’s observed behavior overcame our prior belief. We could also tune this belief to be more or less strong by adjusting where most of the density/mass of the prior distribution sits. Applying this type of prior would help mitigate false-positive conclusions from this type of analysis.

Conclusion

Below is an abstracted summary of the inference process we’ve gone through to compare groups involved in A/B testing.

  1. Model the indicator that is being measured to track the difference between the two groups.
  2. Compute the prediction interval \(p\) over the posterior predictions of the two groups. \(1-p\) quantifies how much risk the business is willing to take in regards to the predicted indicator. The value of \(p\) should be driven by domain-specific experts.
  3. Compute the proportion \(o\) of how much each interval overlaps with one another. \(o\) defines the similarity between the two groups.

After implementing the steps above, we can construct the following conclusion: given there is a \((1-p) \cdot 100\) percent chance that we are wrong about the predictions from our model, there is a \((1-o) \cdot 100\) percent chance that the two groups are different.

The Bayesian methods outlined in this case study focused on modeling the data generation process and performing inference on the posterior predictive distribution of two groups. We did not need to worry about computing test statistics and determining the distribution of these statistics under the null hypothesis. Nor did we need to calculate p-values to figure out whether the groups involved in the A/B test are different. Instead we performed inference directly on the posterior predictions. By constructing prediction intervals and computing the overlap of these intervals we are able to probabilistically convey how sure we are about the difference between the two groups. Bayesian inference gives statisticians the ability to quantify business information/risk and enables them to communicate uncertainty unambiguously to decision makers, allowing more informed decisions to be made.

Acknowlegements

Thanks to Jonah Gabry and Charles Zhou for feedback on initial drafts.

References

Fisher’s exact test. Wikipedia. Available from https://en.wikipedia.org/wiki/Fisher%27s_exact_test.

Gallo, A. (2017) A Refresher on A/B Testing. Harvard Business Review. https://hbr.org/2017/06/a-refresher-on-ab-testing.

Goodrich, B., Gabry, J., Ali, I. & Brilleman, S. (2019). rstanarm: Bayesian applied regression modeling via Stan. R package version 2.17.4. https://mc-stan.org/.

Krushke, J.K. (2015). Doing Bayesian Data Analysis - A Tutorial with R, JAGS, and Stan. Elsevier, New York, 2nd edition.

Overlap coefficient. Wikipedia. Available from https://en.wikipedia.org/wiki/Overlap_coefficient

Stan Development Team (2019). RStan: the R interface to Stan. R package version 2.19.2. https://mc-stan.org/.

Student’s t-test. Wikipedia. Available from https://en.wikipedia.org/wiki/Student's_t-test.

Welch’s t-test. Wikipedia. Available from https://en.wikipedia.org/wiki/Welch%27s_t-test.

Appendix A: Refresher on p-values

Recall that frequentist methods of hypothesis testing involve constructing a test statistic with the available data. Then, using the distribution of that test statistic under the null hypothesis, you can determine the probability of observing statistics that are more extreme than the one calculated. This is known as a p-value. A small p-value suggests a small probability of observing a more extreme test statistic, which in turn means that it is unlikely for that statistic to have been generated under the null hypothesis. Since the statistic is computed from the data this suggests that the data itself is unlikely to have been generated under the null hypothesis. The value of how small a p-value should be to arrive at this conclusion is up to the statistician.

As an example consider the data associated with Group A and Group B in the continuous data section. The null hypothesis is whether the two groups have equal means. Below we compute Welch’s test statistic and p-value given the data.

The p-value in this case is really small, approximately zero. We can visualize this result. Since we know that the test statistic is t-distributed we can plot what the distribution of the test statistic under the null, along with the test statistic calculated with the observed data. This is illustrated below. The red lines are the (two-tailed) test statistics calculated from the data.

Given the small p-value we can make the following sequence of conclusions:

  1. The computed test statistic is unlikely to occur under the null hypothesis.
  2. The data used to compute this statistic is unlikely to have been generated under the null hypothesis.
  3. Therefore the null hypothesis must be invalid and can be rejected, allowing us to conclude that the two groups are different.

Notice how far removed we are from the data and the observed data generation process. Once we calculate the test statistic we step away from the distribution of the data itself and start dealing with the distribution of the test statistic under the null. We were also unable to encode any prior belief or business knowledge into our inference.

Appendix B: Hierarchical Example

Here we show how to use hierarchical (or multilevel) models as an alternative modeling approach when performing A/B tests. Using the data in the continuous example we want to build a model where we account for group-level intercepts while allowing information to be shared among the groups.

\[ \begin{align*} y_{i=A} \sim &\mathcal{N}(\mu_A + \beta \cdot high\_consumer_{i=A}, \sigma) \\ y_{i=B} \sim &\mathcal{N}(\mu_B + \beta \cdot high\_consumer_{i=B}, \sigma) \\ \beta \sim& \mathcal{N}(0,1) \\ & \mbox{(default priors specified on covariance matrix and } \sigma \mbox{)} \end{align*} \]

Below we fit the model.

With this modeling approach we can perform the same inferences as we have shown above while accounting for the hierarchical nature of the data.

rstanarm/inst/doc/polr.R0000644000176200001440000000470514551551736014744 0ustar liggesusers## ---- SETTINGS-knitr, include=FALSE------------------------------------------- stopifnot(require(knitr)) opts_chunk$set( comment=NA, message = FALSE, warning = FALSE, eval = identical(Sys.getenv("NOT_CRAN"), "true"), dev = "png", dpi = 150, fig.asp = 0.618, fig.width = 5, out.width = "60%", fig.align = "center" ) ## ---- SETTINGS-gg, include=TRUE----------------------------------------------- library(ggplot2) library(bayesplot) theme_set(bayesplot::theme_default()) ## ----polr-tobgp-mass---------------------------------------------------------- library(MASS) print(polr(tobgp ~ agegp + alcgp, data = esoph), digits = 1) ## ----polr-tobgp-mcmc, results="hide"------------------------------------------ library(rstanarm) post0 <- stan_polr(tobgp ~ agegp + alcgp, data = esoph, prior = R2(0.25), prior_counts = dirichlet(1), seed = 12345) ## ----------------------------------------------------------------------------- print(post0, digits = 1) ## ---- polr-tobgp-cutpoints, echo=FALSE---------------------------------------- zeta_medians <- round(apply(rstan::extract(post0$stanfit, pars = "zeta")[[1]], 2, median), digits = 2) ## ----polr-birthwt-recodes----------------------------------------------------- data("birthwt", package = "MASS") birthwt$race <- factor(birthwt$race, levels = 1:3, labels = c("white", "black", "other")) birthwt$bwt <- birthwt$bwt / 1000 # convert from grams to kilograms birthwt$low <- factor(birthwt$low, levels = 0:1, labels = c("no", "yes")) ## ----polr-stan_lm, results="hide"--------------------------------------------- post1 <- stan_lm(-bwt ~ smoke + age + race + ptl + ht + ftv, data = birthwt, prior = R2(0.5), seed = 12345) ## ----------------------------------------------------------------------------- print(post1) ## ----polr-birthwt-mcmc, results="hide"---------------------------------------- post2 <- stan_polr(low ~ smoke + age + race + ptl + ht + ftv, data = birthwt, prior = R2(0.5), prior_counts = dirichlet(c(1,1)), method = "probit", seed = 12345) ## ---- polr-loo-plot----------------------------------------------------------- plot(loo(post2)) ## ----polr-birthwt-comparison-------------------------------------------------- round(cbind(Linear = coef(post1), Ordinal = coef(post2), Rescaled = coef(post1) / sigma(post1)), 3) rstanarm/inst/doc/pooling.R0000644000176200001440000003044114551551754015433 0ustar liggesusers## ---- knitr-settings, include=FALSE------------------------------------------- stopifnot(require(knitr)) opts_chunk$set( comment=NA, message = FALSE, warning = FALSE, eval = identical(Sys.getenv("NOT_CRAN"), "true"), dev = "png", dpi = 150, fig.asp = 0.618, fig.width = 7, out.width = "70%", fig.align = "center" ) ## ---- SETTINGS-gg, include=TRUE----------------------------------------------- library(ggplot2) library(bayesplot) theme_set(bayesplot::theme_default()) ## ---- load-data--------------------------------------------------------------- library(rstanarm) data(bball1970) bball <- bball1970 print(bball) ## ---- N-K-y------------------------------------------------------------------- # A few quantities we'll use throughout N <- nrow(bball) K <- bball$AB y <- bball$Hits K_new <- bball$RemainingAB y_new <- bball$RemainingHits ## ---- create-objects, results="hold"------------------------------------------ batting_avg <- function(x) print(format(round(x, digits = 3), nsmall = 3), quote = FALSE) player_avgs <- y / K # player avgs through 45 AB tot_avg <- sum(y) / sum(K) # overall avg through 45 AB cat("Player averages through 45 at-bats:\n") batting_avg(player_avgs) cat("Overall average through 45 at-bats:\n") batting_avg(tot_avg) ## ---- echo=FALSE-------------------------------------------------------------- par(mfrow = c(1,3), las = 1) p_alpha <- function(alpha) { dnorm(alpha, -1, 1) } p_theta <- function(theta) { dnorm(log(theta) - log1p(-theta), -1, 1) / (theta - theta^2) } curve2 <- function(expr, limits, xlab, ...) { curve(expr, from = limits[1], to = limits[2], xlab = xlab, lwd = 3, bty = "l", ylab = "", cex.lab = 1.5, ...) } curve2(p_alpha, c(-3, 1), expression(alpha)) text(x = 0.25, y = 0.35, labels = expression(p(alpha)), cex = 1.5) curve2(p_theta, c(0, 1), expression(theta), col = "red", ylim = c(0, 2.5)) text(x = 0.575, y = 1.5, labels = expression(p(theta)), cex = 1.5, col = "red") curve2(p_alpha, c(-3, 1), expression(paste(alpha,", ", theta)), ylim = c(0, 2.5)) curve2(p_theta, c(0,1), col = "red", add = TRUE) text(x = -1, y = 0.65, labels = expression(p(alpha)), cex = 1.5) text(x = -0.5, y = 1.5, labels = expression(p(theta)), cex = 1.5, col = "red") ## ---- full-pooling, results="hide"-------------------------------------------- SEED <- 101 wi_prior <- normal(-1, 1) # weakly informative prior on log-odds fit_pool <- stan_glm(cbind(Hits, AB - Hits) ~ 1, data = bball, family = binomial("logit"), prior_intercept = wi_prior, seed = SEED) ## ---- summary-stats-function-------------------------------------------------- invlogit <- plogis # function(x) 1/(1 + exp(-x)) summary_stats <- function(posterior) { x <- invlogit(posterior) # log-odds -> probabilities t(apply(x, 2, quantile, probs = c(0.1, 0.5, 0.9))) } pool <- summary_stats(as.matrix(fit_pool)) # as.matrix extracts the posterior draws pool <- matrix(pool, # replicate to give each player the same estimates nrow(bball), ncol(pool), byrow = TRUE, dimnames = list(bball$Player, c("10%", "50%", "90%"))) batting_avg(pool) ## ---- no-pooling, results="hide"---------------------------------------------- fit_nopool <- update(fit_pool, formula = . ~ 0 + Player, prior = wi_prior) nopool <- summary_stats(as.matrix(fit_nopool)) rownames(nopool) <- as.character(bball$Player) batting_avg(nopool) ## ---- no-pooling-print, echo=FALSE-------------------------------------------- batting_avg(nopool) ## ---- partial-pooling, results="hide"----------------------------------------- fit_partialpool <- stan_glmer(cbind(Hits, AB - Hits) ~ (1 | Player), data = bball, family = binomial("logit"), prior_intercept = wi_prior, seed = SEED) ## ---- partial-pooling-shift-draws--------------------------------------------- # shift each player's estimate by intercept (and then drop intercept) shift_draws <- function(draws) { sweep(draws[, -1], MARGIN = 1, STATS = draws[, 1], FUN = "+") } alphas <- shift_draws(as.matrix(fit_partialpool)) partialpool <- summary_stats(alphas) partialpool <- partialpool[-nrow(partialpool),] rownames(partialpool) <- as.character(bball$Player) batting_avg(partialpool) ## ---- plot-observed-vs-estimated---------------------------------------------- library(ggplot2) models <- c("complete pooling", "no pooling", "partial pooling") estimates <- rbind(pool, nopool, partialpool) colnames(estimates) <- c("lb", "median", "ub") plotdata <- data.frame(estimates, observed = rep(player_avgs, times = length(models)), model = rep(models, each = N), row.names = NULL) ggplot(plotdata, aes(x = observed, y = median, ymin = lb, ymax = ub)) + geom_hline(yintercept = tot_avg, color = "lightpink", size = 0.75) + geom_abline(intercept = 0, slope = 1, color = "skyblue") + geom_linerange(color = "gray60", size = 0.75) + geom_point(size = 2.5, shape = 21, fill = "gray30", color = "white", stroke = 0.2) + facet_grid(. ~ model) + coord_fixed() + scale_x_continuous(breaks = c(0.2, 0.3, 0.4)) + labs(x = "Observed Hits / AB", y = "Predicted chance of hit") + ggtitle("Posterior Medians and 80% Intervals") ## ---- log_p_new--------------------------------------------------------------- newdata <- data.frame(Hits = y_new, AB = K_new, Player = bball$Player) fits <- list(Pooling = fit_pool, NoPooling = fit_nopool, PartialPooling = fit_partialpool) # compute log_p_new matrix with each of the models in 'fits' log_p_new_mats <- lapply(fits, log_lik, newdata = newdata) # for each matrix in the list take the row sums log_p_new <- sapply(log_p_new_mats, rowSums) M <- nrow(log_p_new) head(log_p_new) ## ---- log_p_new-mean---------------------------------------------------------- mean_log_p_new <- colMeans(log_p_new) round(sort(mean_log_p_new, decreasing = TRUE), digits = 1) ## ---- log_sum_exp------------------------------------------------------------- log_sum_exp <- function(u) { max_u <- max(u) a <- 0 for (n in 1:length(u)) { a <- a + exp(u[n] - max_u) } max_u + log(a) } # Or equivalently using vectorization log_sum_exp <- function(u) { max_u <- max(u) max_u + log(sum(exp(u - max_u))) } ## ---- log_mean_exp------------------------------------------------------------ log_mean_exp <- function(u) { M <- length(u) -log(M) + log_sum_exp(u) } ## ----comment=NA--------------------------------------------------------------- new_lps <- lapply(log_p_new_mats, function(x) apply(x, 2, log_mean_exp)) # sum over the data points new_lps_sums <- sapply(new_lps, sum) round(sort(new_lps_sums, decreasing = TRUE), digits = 1) ## ---- loo--------------------------------------------------------------------- loo_compare(loo(fit_partialpool), loo(fit_pool), loo(fit_nopool)) ## ---- ppd--------------------------------------------------------------------- newdata <- data.frame(Hits = y_new, AB = K_new, Player = bball$Player) ppd_pool <- posterior_predict(fit_pool, newdata) ppd_nopool <- posterior_predict(fit_nopool, newdata) ppd_partialpool <- posterior_predict(fit_partialpool, newdata) colnames(ppd_pool) <- colnames(ppd_nopool) <- colnames(ppd_partialpool) <- as.character(bball$Player) colMeans(ppd_partialpool) ## ---- clemente---------------------------------------------------------------- z_1 <- ppd_partialpool[, 1] clemente_80pct <- (y[1] + quantile(z_1, prob = c(0.1, 0.9))) / (K[1] + K_new[1]) batting_avg(clemente_80pct) ## ---- ppd-stats--------------------------------------------------------------- ppd_intervals <- function(x) t(apply(x, 2, quantile, probs = c(0.25, 0.75))) ppd_summaries <- (1 / K_new) * rbind(ppd_intervals(ppd_pool), ppd_intervals(ppd_nopool), ppd_intervals(ppd_partialpool)) df_ppd <- data.frame(player = rep(1:length(y_new), 3), y = rep(y_new / K_new, 3), lb = ppd_summaries[, "25%"], ub = ppd_summaries[, "75%"], model = rep(models, each = length(y_new))) ## ---- plot-ppd---------------------------------------------------------------- ggplot(df_ppd, aes(x=player, y=y, ymin=lb, ymax=ub)) + geom_linerange(color = "gray60", size = 2) + geom_point(size = 2.5, color = "skyblue4") + facet_grid(. ~ model) + labs(x = NULL, y = "batting average") + scale_x_continuous(breaks = NULL) + ggtitle(expression( atop("Posterior Predictions for Batting Average in Remainder of Season", atop("50% posterior predictive intervals (gray bars); observed (blue dots)", "")))) ## ---- event-probabilities, results="hold"------------------------------------- draws_partialpool <- shift_draws(as.matrix(fit_partialpool)) thetas_partialpool <- plogis(draws_partialpool) thetas_partialpool <- thetas_partialpool[,-ncol(thetas_partialpool)] colnames(thetas_partialpool) <- as.character(bball$Player) ability_gt_400 <- thetas_partialpool > 0.4 cat("Pr(theta_n >= 0.400 | y)\n") colMeans(ability_gt_400)[c(1, 5, 10)] some_gt_350 <- apply(thetas_partialpool, 1, function(x) max(x) > 0.35) cat("Pr(at least one theta_n >= 0.350 | y)\n") mean(some_gt_350) ## ---- echo=FALSE-------------------------------------------------------------- thetas_pool <- plogis(as.matrix(fit_pool)) thetas_nopool <- plogis(as.matrix(fit_nopool)) some_gt_350_all <- sapply(list(thetas_pool, thetas_nopool, thetas_partialpool), function(x) apply(x, 1, max) > 0.35) chance_gt_350 <- round(100 * colMeans(some_gt_350_all)) ## ---- ranking----------------------------------------------------------------- reverse_rank <- function(x) 1 + length(x) - rank(x) # so lower rank is better rank <- apply(thetas_partialpool, 1, reverse_rank) t(apply(rank, 1, quantile, prob = c(0.1, 0.5, 0.9))) ## ---- plot-ranks-------------------------------------------------------------- df_rank <- data.frame(name = rep(bball$Player, each = M), rank = c(t(rank))) ggplot(df_rank, aes(rank)) + stat_count(width = 0.8) + facet_wrap(~ name) + scale_x_discrete("Rank", limits = c(1, 5, 10, 15)) + scale_y_discrete("Probability", limits = c(0, 0.1 * M, 0.2 * M), labels = c("0.0", "0.1", "0.2")) + ggtitle("Rankings for Partial Pooling Model") ## ---- plot-best-player-------------------------------------------------------- thetas_nopool <- plogis(as.matrix(fit_nopool)) colnames(thetas_nopool) <- as.character(bball$Player) rank_nopool <- apply(thetas_nopool, 1, reverse_rank) is_best_nopool <- rowMeans(rank_nopool == 1) is_best_partialpool <- rowMeans(rank == 1) df_is_best <- data.frame(unit = rep(bball$Player, 2), is_best = c(is_best_partialpool, is_best_nopool), model = rep(c("partial pooling", "no pooling"), each = N)) ggplot(df_is_best, aes(x=unit, y=is_best)) + geom_bar(stat = "identity") + facet_wrap(~ model) + scale_y_continuous(name = "Pr[player is best]") + ggtitle("Who is the Best Player?") + theme(axis.text.x = element_text(angle = -45, vjust = 1, hjust = 0)) ## ---- plot-ppc-stats-mean----------------------------------------------------- pp_check(fit_nopool, plotfun = "stat", stat = "mean") ## ---- plot-ppc-stats---------------------------------------------------------- tstat_plots <- function(model, stats) { lapply(stats, function(stat) { graph <- pp_check(model, plotfun = "stat", stat = stat, seed = SEED) # optional arguments graph + xlab(stat) + theme(legend.position = "none") }) } Tstats <- c("mean", "sd", "min", "max") ppcs_pool <- tstat_plots(fit_pool, Tstats) ppcs_nopool <- tstat_plots(fit_nopool, Tstats) ppcs_partialpool <- tstat_plots(fit_partialpool, Tstats) if (require(gridExtra)) { grid.arrange( arrangeGrob(grobs = ppcs_pool, nrow = 1, left = "Pooling"), arrangeGrob(grobs = ppcs_nopool, nrow = 1, left = "No Pooling"), arrangeGrob(grobs = ppcs_partialpool, nrow = 1, left = "Partial Pooling") ) } ## ---- p-value----------------------------------------------------------------- yrep <- posterior_predict(fit_nopool, seed = SEED) # seed is optional Ty <- sd(y) Tyrep <- apply(yrep, 1, sd) # tail-area probability p <- 1 - mean(Tyrep > Ty) print(p) ## ---- plot-ppc-y-vs-yrep------------------------------------------------------ pp_check(fit_partialpool, plotfun = "hist", nreps = 15, binwidth = 0.025) + ggtitle("Model: Partial Pooling") rstanarm/inst/doc/jm.R0000644000176200001440000001571714551550561014376 0ustar liggesusers## ---- SETTINGS-knitr, include=FALSE------------------------------------------- stopifnot(require(knitr)) opts_chunk$set( comment=NA, message = FALSE, warning = FALSE, eval = identical(Sys.getenv("NOT_CRAN"), "true"), dev = "png", dpi = 150, fig.asp = 0.618, fig.width = 5, out.width = "60%", fig.align = "center" ) ## ---- SETTINGS-gg, include=TRUE----------------------------------------------- library(ggplot2) library(bayesplot) theme_set(bayesplot::theme_default()) ## ----setup_jm, include=FALSE, message=FALSE----------------------------------- knitr::opts_chunk$set(fig.width=10, fig.height=4) library(rstanarm) ## ----traj_figure, echo=FALSE-------------------------------------------------- # Plot observed longitudinal trajectories for log serum bilirubin ids <- c(25,31:33,36,38:40) pbcLong_subset <- pbcLong[pbcLong$id %in% ids, ] pbcLong_subset <- merge(pbcLong_subset, pbcSurv) pbcLong_subset$Died <- factor(pbcLong_subset$death, labels = c("No", "Yes")) patient_labels <- paste("Patient", 1:8) names(patient_labels) <- ids ggplot() + geom_line(aes(y = logBili, x = year, lty = Died), data = pbcLong_subset) + facet_wrap(~ id, ncol = 4, labeller = labeller(id = patient_labels)) + theme_bw() + ylab("Log serum bilirubin") + xlab("Time (years)") ## ----pbcLong------------------------------------------------------------------ head(pbcLong) ## ----pbcSurv------------------------------------------------------------------ head(pbcSurv) ## ----datasets_help, eval = FALSE---------------------------------------------- # help("datasets", package = "rstanarm") ## ----univariate_fit, results = "hold", message = FALSE, warning = FALSE------- library(rstanarm) mod1 <- stan_jm(formulaLong = logBili ~ sex + trt + year + (year | id), dataLong = pbcLong, formulaEvent = survival::Surv(futimeYears, death) ~ sex + trt, dataEvent = pbcSurv, time_var = "year", chains = 1, refresh = 2000, seed = 12345) ## ----print, echo = FALSE------------------------------------------------------ alpha_mod1 <- as.data.frame(mod1)[["Assoc|Long1|etavalue"]] alpha_median <- round(median(alpha_mod1), 3) print(mod1) ## ----summary------------------------------------------------------------------ summary(mod1, probs = c(.025,.975)) ## ----VarCorr------------------------------------------------------------------ as.data.frame(VarCorr(mod1)) ## ----assoc_etaslope, eval = FALSE--------------------------------------------- # mod2 <- stan_jm(formulaLong = logBili ~ sex + trt + year + (year | id), # dataLong = pbcLong, # formulaEvent = survival::Surv(futimeYears, death) ~ sex + trt, # dataEvent = pbcSurv, # assoc = c("etavalue", "etaslope"), # time_var = "year", # chains = 1, refresh = 2000, seed = 12345) ## ----fitmodel_mv_ev_ev, warning=FALSE, message=FALSE-------------------------- mod3 <- stan_jm( formulaLong = list( logBili ~ sex + trt + year + (year | id), albumin ~ sex + trt + year + (year | id)), formulaEvent = survival::Surv(futimeYears, death) ~ sex + trt, dataLong = pbcLong, dataEvent = pbcSurv, time_var = "year", chains = 1, refresh = 2000, seed = 12345) ## ----results_print------------------------------------------------------------ print(mod3) ## ----results_summary---------------------------------------------------------- summary(mod3, pars = "assoc") ## ----plots_872312------------------------------------------------------------- p1 <- posterior_traj(mod3, m = 1, ids = 6:8) pp1 <- plot(p1, plot_observed = TRUE) pp1 ## ----plots_555762------------------------------------------------------------- p2 <- posterior_traj(mod3, m = 2, ids = 6:8) pp2 <- plot(p2, plot_observed = TRUE) pp2 ## ----plots_65662-------------------------------------------------------------- p3 <- posterior_traj(mod3, m = 1, ids = 6:8, extrapolate = TRUE) pp3 <- plot(p3, plot_observed = TRUE, vline = TRUE) pp3 ## ----plots_998889------------------------------------------------------------- p4 <- posterior_traj(mod3, m = 2, ids = 6:8, extrapolate = TRUE) pp4 <- plot(p4, plot_observed = TRUE, vline = TRUE) pp4 ## ----plots_23812-------------------------------------------------------------- p5 <- posterior_survfit(mod3, ids = 6:8) pp5 <- plot(p5) pp5 ## ----plots_987321, fig.height=13---------------------------------------------- plot_stack_jm(yplot = list(pp3, pp4), survplot = pp5) ## ----newdata_23188------------------------------------------------------------ ndL <- pbcLong[pbcLong$id == 8, , drop = FALSE] ndE <- pbcSurv[pbcSurv$id == 8, , drop = FALSE] ndL$id <- paste0("new_patient") ndE$id <- paste0("new_patient") ## ----plots_999333------------------------------------------------------------- p6 <- posterior_traj(mod3, m = 1, newdataLong = ndL, newdataEvent = ndE, last_time = "futimeYears") pp6 <- plot(p6, plot_observed = TRUE, vline = TRUE) pp6 ## ----plots_122223------------------------------------------------------------- p7 <- posterior_traj(mod3, m = 2, newdataLong = ndL, newdataEvent = ndE, last_time = "futimeYears") pp7 <- plot(p7, plot_observed = TRUE, vline = TRUE) pp7 ## ----plots_65401-------------------------------------------------------------- p8 <- posterior_survfit(mod3, newdataLong = ndL, newdataEvent = ndE, last_time = "futimeYears") pp8 <- plot(p8) pp8 ## ----plots_0089231, fig.height=13--------------------------------------------- plot_stack_jm(yplot = list(pp6, pp7), survplot = pp8) ## ----b_pars_23123------------------------------------------------------------- c(ranef(mod3)[["Long1"]][["id"]][8,], ranef(mod3)[["Long2"]][["id"]][8,]) ## ----b_pars_5436765----------------------------------------------------------- colMeans(attr(p6, "b_new")) ## ----newdata_19213------------------------------------------------------------ ndL <- expand.grid(year = seq(0, 10, 1), sex = c("m", "f"), trt = 0:1) ndL$id <- rep(c("male_notrt", "female_notrt", "male_trt", "female_trt"), each = 11) ndL <- ndL[, c(4,1,2,3)] str(ndL) ## ----plot_traj_218391--------------------------------------------------------- p1 <- posterior_traj(mod3, m = 1, newdataLong = ndL, dynamic = FALSE) plot(p1) + ggplot2::coord_cartesian(ylim = c(-10,15)) ## ----fixef_2132--------------------------------------------------------------- fixef(mod3)$Long1 ## ----ranef_5664--------------------------------------------------------------- VarCorr(mod3) ## ----standsurv---------------------------------------------------------------- p1 <- posterior_survfit(mod3, standardise = TRUE, times = 0) head(p1) # data frame with standardised survival probabilities plot(p1) # plot the standardised survival curve rstanarm/inst/doc/mrp.bib0000644000176200001440000000634613540072577015122 0ustar liggesusers@article{little1993post, title={Post-stratification: a modeler's perspective}, author={Little, Roderick JA}, journal={Journal of the American Statistical Association}, volume={88}, number={423}, pages={1001--1012}, year={1993}, publisher={Taylor \& Francis Group} } @article{park2004bayesian, title={Bayesian multilevel estimation with poststratification: state-level estimates from national polls}, author={Park, David K and Gelman, Andrew and Bafumi, Joseph}, journal={Political Analysis}, volume={12}, number={4}, pages={375--385}, year={2004}, publisher={Cambridge University Press} } @article{shirley2015hierarchical, title={Hierarchical models for estimating state and demographic trends in US death penalty public opinion}, author={Shirley, Kenneth E and Gelman, Andrew}, journal={Journal of the Royal Statistical Society: Series A (Statistics in Society)}, volume={178}, number={1}, pages={1--28}, year={2015}, publisher={Wiley Online Library} } @article{barr2013random, title={Random effects structure for confirmatory hypothesis testing: Keep it maximal}, author={Barr, Dale J and Levy, Roger and Scheepers, Christoph and Tily, Harry J}, journal={Journal of memory and language}, volume={68}, number={3}, pages={255--278}, year={2013}, publisher={Elsevier} } @article{ghitza2013deep, title={Deep interactions with MRP: Election turnout and voting patterns among small electoral subgroups}, author={Ghitza, Yair and Gelman, Andrew}, journal={American Journal of Political Science}, volume={57}, number={3}, pages={762--776}, year={2013}, publisher={Wiley Online Library} } @article{lei20172008, title={The 2008 election: A preregistered replication analysis}, author={Lei, Rayleigh and Gelman, Andrew and Ghitza, Yair}, journal={Statistics and Public Policy}, pages={1--8}, year={2017}, publisher={Taylor \& Francis} } @article{gelman2007struggles, title={Struggles with survey weighting and regression modeling}, author={Gelman, Andrew}, journal={Statistical Science}, pages={153--164}, year={2007}, publisher={JSTOR} } @article{lax2009should, title={How should we estimate public opinion in the states?}, author={Lax, Jeffrey R and Phillips, Justin H}, journal={American Journal of Political Science}, volume={53}, number={1}, pages={107--121}, year={2009}, publisher={Wiley Online Library} } @article{park2004bayesian, title={Bayesian multilevel estimation with poststratification: state-level estimates from national polls}, author={Park, David K and Gelman, Andrew and Bafumi, Joseph}, journal={Political Analysis}, volume={12}, number={4}, pages={375--385}, year={2004}, publisher={Cambridge University Press} } @article{gelman2005analysis, title={Analysis of variance-why it is more important than ever}, author={Gelman, Andrew and others}, journal={The annals of statistics}, volume={33}, number={1}, pages={1--53}, year={2005}, publisher={Institute of Mathematical Statistics} } @article{si2017bayesian, title={Bayesian hierarchical weighting adjustment and survey inference}, author={Si, Yajuan and Trangucci, Rob and Gabry, Jonah Sol and Gelman, Andrew}, journal={arXiv preprint arXiv:1707.08220}, year={2017} }rstanarm/inst/doc/binomial.html0000644000176200001440000144031714551550255016324 0ustar liggesusers Estimating Generalized Linear Models for Binary and Binomial Data with rstanarm

Estimating Generalized Linear Models for Binary and Binomial Data with rstanarm

Jonah Gabry and Ben Goodrich

2024-01-16

Introduction

This vignette explains how to estimate generalized linear models (GLMs) for binary (Bernoulli) and Binomial response variables using the stan_glm function in the rstanarm package.

The four steps of a Bayesian analysis are

  1. Specify a joint distribution for the outcome(s) and all the unknowns, which typically takes the form of a marginal prior distribution for the unknowns multiplied by a likelihood for the outcome(s) conditional on the unknowns. This joint distribution is proportional to a posterior distribution of the unknowns conditional on the observed data
  2. Draw from posterior distribution using Markov Chain Monte Carlo (MCMC).
  3. Evaluate how well the model fits the data and possibly revise the model.
  4. Draw from the posterior predictive distribution of the outcome(s) given interesting values of the predictors in order to visualize how a manipulation of a predictor affects (a function of) the outcome(s).

Steps 3 and 4 are covered in more depth by the vignette entitled “How to Use the rstanarm Package”. This vignette focuses on Step 1 when the likelihood is the product of conditionally independent binomial distributions (possibly with only one trial per observation).

Likelihood

For a binomial GLM the likelihood for one observation \(y\) can be written as a conditionally binomial PMF \[\binom{n}{y} \pi^{y} (1 - \pi)^{n - y},\] where \(n\) is the known number of trials, \(\pi = g^{-1}(\eta)\) is the probability of success and \(\eta = \alpha + \mathbf{x}^\top \boldsymbol{\beta}\) is a linear predictor. For a sample of size \(N\), the likelihood of the entire sample is the product of \(N\) individual likelihood contributions.

Because \(\pi\) is a probability, for a binomial model the link function \(g\) maps between the unit interval (the support of \(\pi\)) and the set of all real numbers \(\mathbb{R}\). When applied to a linear predictor \(\eta\) with values in \(\mathbb{R}\), the inverse link function \(g^{-1}(\eta)\) therefore returns a valid probability between 0 and 1.

The two most common link functions used for binomial GLMs are the logit and probit functions. With the logit (or log-odds) link function \(g(x) = \ln{\left(\frac{x}{1-x}\right)}\), the likelihood for a single observation becomes

\[\binom{n}{y}\left(\text{logit}^{-1}(\eta)\right)^y \left(1 - \text{logit}^{-1}(\eta)\right)^{n-y} = \binom{n}{y} \left(\frac{e^{\eta}}{1 + e^{\eta}}\right)^{y} \left(\frac{1}{1 + e^{\eta}}\right)^{n - y}\]

and the probit link function \(g(x) = \Phi^{-1}(x)\) yields the likelihood

\[\binom{n}{y} \left(\Phi(\eta)\right)^{y} \left(1 - \Phi(\eta)\right)^{n - y},\]

where \(\Phi\) is the CDF of the standard normal distribution. The differences between the logit and probit functions are minor and – if, as rstanarm does by default, the probit is scaled so its slope at the origin matches the logit’s – the two link functions should yield similar results. With stan_glm, binomial models with a logit link function can typically be fit slightly faster than the identical model with a probit link because of how the two models are implemented in Stan. Unless the user has a specific reason to prefer the probit link, we recommend the logit simply because it will be slightly faster and more numerically stable.

In theory, there are infinitely many possible link functions, although in practice only a few are typically used. Other common choices are the cauchit and cloglog functions, which can also be used with stan_glm (every link function compatible withglm will work with stan_glm).

Priors

A full Bayesian analysis requires specifying prior distributions \(f(\alpha)\) and \(f(\boldsymbol{\beta})\) for the intercept and vector of regression coefficients. When using stan_glm, these distributions can be set using the prior_intercept and prior arguments. The stan_glm function supports a variety of prior distributions, which are explained in the rstanarm documentation (help(priors, package = 'rstanarm')).

As an example, suppose we have \(K\) predictors and believe — prior to seeing the data — that \(\alpha, \beta_1, \dots, \beta_K\) are as likely to be positive as they are to be negative, but are highly unlikely to be far from zero. These beliefs can be represented by normal distributions with mean zero and a small scale (standard deviation). To give \(\alpha\) and each of the \(\beta\)s this prior (with a scale of 1, say), in the call to stan_glm we would include the arguments prior_intercept = normal(0,1) and prior = normal(0,1).

If, on the other hand, we have less a priori confidence that the parameters will be close to zero then we could use a larger scale for the normal distribution and/or a distribution with heavier tails than the normal like the Student t distribution. Step 1 in the “How to Use the rstanarm Package” vignette discusses one such example.

Posterior

With independent prior distributions, the joint posterior distribution for \(\alpha\) and \(\boldsymbol{\beta}\) is proportional to the product of the priors and the \(N\) likelihood contributions:

\[f\left(\alpha,\boldsymbol{\beta} | \mathbf{y},\mathbf{X}\right) \propto f\left(\alpha\right) \times \prod_{k=1}^K f\left(\beta_k\right) \times \prod_{i=1}^N { g^{-1}\left(\eta_i\right)^{y_i} \left(1 - g^{-1}\left(\eta_i\right)\right)^{n_i-y_i}}.\]

This is posterior distribution that stan_glm will draw from when using MCMC.

Logistic Regression Example

When the logit link function is used the model is often referred to as a logistic regression model (the inverse logit function is the CDF of the standard logistic distribution). As an example, here we will show how to carry out a few parts of the analysis from Chapter 5.4 of Gelman and Hill (2007) using stan_glm.

Gelman and Hill describe a survey of 3200 residents in a small area of Bangladesh suffering from arsenic contamination of groundwater. Respondents with elevated arsenic levels in their wells had been encouraged to switch their water source to a safe public or private well in the nearby area and the survey was conducted several years later to learn which of the affected residents had switched wells. The goal of the analysis presented by Gelman and Hill is to learn about the factors associated with switching wells.

To start, we’ll use dist (the distance from the respondent’s house to the nearest well with safe drinking water) as the only predictor of switch (1 if switched, 0 if not). Then we’ll expand the model by adding the arsenic level of the water in the resident’s own well as a predictor and compare this larger model to the original.

After loading the wells data, we first rescale the dist variable (measured in meters) so that it is measured in units of 100 meters. If we leave dist in its original units then the corresponding regression coefficient will represent the effect of the marginal meter, which is too small to have a useful interpretation.

Before estimating any models we can visualize the distribution of dist100 in the data:

In the plot above the blue bars correspond to the 1737 residents who said they switched wells and darker bars show the distribution of dist100 for the 1283 residents who didn’t switch. As we would expect, for the residents who switched wells, the distribution of dist100 is more concentrated at smaller distances.

A Bayesian version of Gelman and Hill’s initial logistic regression model can be estimated using the stan_glm function. Here we’ll use a Student t prior with 7 degrees of freedom and a scale of 2.5, which, as discussed above, is a reasonable default prior when coefficients should be close to zero but have some chance of being large.

(Intercept)     dist100 
      0.605      -0.621 

The formula, data and family arguments to stan_glm are specified in exactly the same way as for glm. We’ve also added the optional additional arguments chains (how many chains we want to execute), cores (how many cores we want the computer to utilize) and seed (for reproducibility). You can read about other possible arguments in the stan_glm documentation (help(stan_glm, package = 'rstanarm')).

To get a sense for the uncertainty in our estimates we can use the posterior_interval function to get Bayesian uncertainty intervals. The uncertainty intervals are computed by finding the relevant quantiles of the draws from the posterior distribution. For example, to compute 50% intervals we use:

              25%   75%
(Intercept)  0.57  0.65
dist100     -0.69 -0.56

For more on posterior_interval and interpreting the parameter estimates from a Bayesian model see Step 2 in the “How to Use the rstanarm Package” vignette.

Using the coefficient estimates we can plot the predicted probability of switch = 1 (as a function of dist100) together with the observed outcomes:

The plot shows that under this model the predicted probability of switching is a decent bit above 50% for residents living very close to wells with safe drinking water. As expected, larger values of dist100 are associated with lower predicted probabilities of switching. At the extreme (\(\approx 300\) meters), the probability is about 25%.

Next, we incorporate an additional predictor into the model: the arsenic level of water in the respondent’s well. According to Gelman and Hill, “At the levels present in the Bangladesh drinking water, the health risks from arsenic are roughly proportional to exposure, and so we would expect switching to be more likely from wells with high arsenic levels” (pg. 90). We only need to change the formula, so we can use the update function:

(Intercept)     dist100     arsenic 
      0.002      -0.896       0.462 

As expected the coefficient on arsenic is positive. The plot below shows distance on the x-axis and arsenic level on the y-axis with the predicted probability of well-switching mapped to the color of the background tiles (the lighter the color the higher the probability). The observed value of switch is indicated by the color of the points.

We can see that the black points (switch=1) are predominantly clustered in the upper-left region of the plot where the predicted probability of switching is highest.

Another way we can visualize the data and model is to follow Gelman and Hill and create separate plots for varying the arsenic level and distance. Here we’ll plot curves representing the predicted probability of switching for the minimum, maximum and quartile values of both variables.

We can compare our two models (with and without arsenic) using an approximation to Leave-One-Out (LOO) cross-validation, which is a method for estimating out of sample predictive performance and is implemented by the loo function in the loo package:


Computed from 4000 by 3020 log-likelihood matrix

         Estimate   SE
elpd_loo  -2040.1 10.4
p_loo         2.0  0.0
looic      4080.2 20.8
------
Monte Carlo SE of elpd_loo is 0.0.

All Pareto k estimates are good (k < 0.5).
See help('pareto-k-diagnostic') for details.

Computed from 4000 by 3020 log-likelihood matrix

         Estimate   SE
elpd_loo  -1968.4 15.7
p_loo         3.2  0.1
looic      3936.9 31.3
------
Monte Carlo SE of elpd_loo is 0.0.

All Pareto k estimates are good (k < 0.5).
See help('pareto-k-diagnostic') for details.
     elpd_diff se_diff
fit2   0.0       0.0  
fit1 -71.7      12.2  

These results favor fit2 over fit1, as the estimated difference in elpd (the expected log pointwise predictive density for a new dataset) is so much larger than its standard error. LOO penalizes models for adding additional predictors (this helps counter overfitting), but in this case fit2 represents enough of an improvement over fit1 that the penalty for including arsenic is negligible (as it should be if arsenic is an important predictor).

The vignette for the stan_lm function also has an example of using the loo function where the results are quite a bit different from what we see here and some important additional considerations are discussed.

Conditional Logit Models

The previous example relies on the fact that observations are plausibly conditionally independent. In contrast, so-called “case-control studies” require that there are a fixed number of successes and failures within each stratum, and the question is which members of each stratum succeed and fail? The stan_clogit function estimates such a model and is very similar to the clogit function in the survival package. The main syntactical difference is that the clogit function requires that the user call the strata function in the model formula, whereas the stan_clogit function has a required strata argument. In addition, in the stan_clogit case the data must be sorted by the variable passed to strata. The advantage to these changes is that stan_clogit can optionally utilize the multilevel syntax from the lme4 package to specify group-specific terms, rather than the more limited multilevel structure supported by the frailty function in the survival package. The vignette for the stan_glmer function discusses the lme4-style syntax in more detail. For example,

stan_clogit
 family:       binomial [clogit]
 formula:      case ~ spontaneous + induced + (1 | parity)
 observations: 248
------
            Median MAD_SD
spontaneous 2.0    0.4   
induced     1.4    0.3   

Error terms:
 Groups Name        Std.Dev.
 parity (Intercept) 1.4     
Num. levels: parity 6 

------
* For help interpreting the printed output see ?print.stanreg
* For info on the priors used see ?prior_summary.stanreg

The posterior predictions are also constrained such that there is exactly one success (in this case) for each of the strata and thus the posterior distribution of the probabilities are also so constrained:

Binomial Models

Although the example in this vignette focused on a binary response variable, we can use nearly identical code if we have the sum of multiple binary variables. For example, image a hypothetical dataset similar to the well-switching data but spanning multiple villages. Each observation (each row) of this data.frame corresponds to an entire village: switch[i] is the number of ‘yes’ responses to the well-switching question for village i, dist100[i] is the average distance to the closest well with clean water for village i, etc. We also now have a variable n where n[i] is the number of respondents from village i.

For this data we can estimate a similar model to the one we used in the binary case by changing the formula to

cbind(switch, n - switch) ~ dist100 + arsenic

The left-hand side is now a 2-column matrix where the first column is the number of ‘yes’ responses and the second column is the number of ‘no’ responses (or more generally, the number of successes and number of failures). The same model can also be specified using the proportion of ‘yes’ responses and the total number of responses in each village. This corresponds to the formula

prop_switch ~ dist100 + arsenic

where prop_switch = switch / n is the proportion of ‘yes’ responses. The total number of responses is provided using the weights argument. In this case we would add weights = n to the call to stan_glm.

An example of a similar model can also be found in Step 1 of the “How to Use the rstanarm Package” vignette.

Going Further

In the hypothetical scenario above, if we also have access to the observations for each individual in all of the villages (not just the aggregate data), then a natural extension would be to consider a multilevel model that takes advantage of the inherent multilevel structure of the data (individuals nested within villages). The vignette for the stan_glmer function discusses these models.

References

Gelman, A. and Hill, J. (2007). Data Analysis Using Regression and Multilevel/Hierarchical Models. Cambridge University Press, Cambridge, UK.

rstanarm/inst/doc/pooling.Rmd0000644000176200001440000017321414500256225015747 0ustar liggesusers--- title: "Hierarchical Partial Pooling for Repeated Binary Trials" author: "Bob Carpenter, Jonah Gabry and Ben Goodrich" date: "`r Sys.Date()`" output: html_vignette: toc: yes toc_depth: 3 --- ```{r, knitr-settings, include=FALSE} stopifnot(require(knitr)) opts_chunk$set( comment=NA, message = FALSE, warning = FALSE, eval = identical(Sys.getenv("NOT_CRAN"), "true"), dev = "png", dpi = 150, fig.asp = 0.618, fig.width = 7, out.width = "70%", fig.align = "center" ) ``` ```{r, child="children/SETTINGS-gg.txt"} ``` # Introduction This vignette illustrates the effects on posterior inference of pooling data (a.k.a sharing strength) across units for repeated binary trial data. It provides R code to fit and check predictive models for three situations: (a) complete pooling, which assumes each unit is the same, (b) no pooling, which assumes the units are unrelated, and (c) partial pooling, where the similarity among the units is estimated. The note explains with working examples how to (i) fit the models using **rstanarm** and plot the results, (ii) estimate event probabilities, (iii) evaluate posterior predictive densities to evaluate model predictions on held-out data, (iv) rank units by chance of success, (v) perform multiple comparisons in several settings, (vi) replicate new data for posterior $p$-values, and (vii) perform graphical posterior predictive checks. The content of the vignette is based on Bob Carpenter's Stan tutorial *[Hierarchical Partial Pooling for Repeated Binary Trials](https://mc-stan.org/users/documentation/case-studies/pool-binary-trials.html)*, but here we show how to fit the models and carry out predictions and model checking and comparison using **rstanarm**. Most of the text is taken from the original, with some additions and subtractions to make the content more useful for **rstanarm** users. The Stan code from the original tutorial has also been entirely removed, as **rstanarm** will fit all of the models in Stan without the user having to write the underlying Stan programs. The Stan code in the original document is a good reference for anyone interested in how these models are estimated "under-the-hood", though the parameterizations used internally by **rstanarm** differ somewhat from those in the original. # Repeated Binary Trials Suppose that for each of $N$ units $n \in 1{:}N$, we observe $y_n$ successes out of $K_n$ trials. For example, the data may consist of * rat tumor development, with $y_n$ rats developing tumors of $K_n$ total rats in experimental control group $n \in 1{:}N$ (Tarone 1982) * surgical mortality, with $y_n$ surgical patients dying in $K_n$ surgeries for hospitals $n \in 1{:}N$ (Spiegelhalter et al. 1996) * baseball batting ability, with $y_n$ hits in $K_n$ at bats for baseball players $n \in 1{:}N$ (Efron and Morris 1975; Carpenter 2009) * machine learning system accuracy, with $y_n$ correct classifications out of $K_n$ examples for systems $n \in 1{:}N$ (ML conference proceedings; Kaggle competitions) In this vignette we use the small baseball data set of Efron and Morris (1975), but we also provide the rat control data of Tarone (1982), the surgical mortality data of Spiegelhalter et al. (1996) and the extended baseball data set of Carpenter (2009). ### Baseball Hits (Efron and Morris 1975) As a running example, we will use the data from Table 1 of (Efron and Morris 1975), which is included in **rstanarm** under the name `bball1970` (it was downloaded 24 Dec 2015 from [here](https://www.swarthmore.edu/NatSci/peverso1/Sports%20Data/JamesSteinData/Efron-Morris%20Baseball/EfronMorrisBB.txt)). It is drawn from the 1970 Major League Baseball season (from both leagues). ```{r, load-data} library(rstanarm) data(bball1970) bball <- bball1970 print(bball) ``` ```{r, N-K-y} # A few quantities we'll use throughout N <- nrow(bball) K <- bball$AB y <- bball$Hits K_new <- bball$RemainingAB y_new <- bball$RemainingHits ``` The data separates the outcome from the initial 45 at-bats from the rest of the season. After running this code, `N` is the number of units (players). Then for each unit `n`, `K[n]` is the number of initial trials (at-bats), `y[n]` is the number of initial successes (hits), `K_new[n]` is the remaining number of trials (remaining at-bats), and `y_new[n]` is the number of successes in the remaining trials (remaining hits). The remaining data can be used to evaluate the predictive performance of our models conditioned on the observed data. That is, we will "train" on the first 45 at bats and see how well our various models do at predicting the rest of the season. # Pooling With *complete pooling*, each unit is assumed to have the same chance of success. With *no pooling*, each unit is assumed to have a completely unrelated chance of success. With *partial pooling*, each unit is assumed to have a different chance of success, but the data for all of the observed units informs the estimates for each unit. Partial pooling is typically accomplished through hierarchical models. Hierarchical models directly model the population of units. From a population model perspective, no pooling corresponds to infinite population variance, whereas complete pooling corresponds to zero population variance. In the following sections, all three types of pooling models will be fit for the baseball data. # Fitting the Models First we'll create some useful objects to use throughout the rest of this vignette. One of them is a function `batting_avg`, which just formats a number to include three decimal places to the right of zero when printing, as is customary for batting averages. ```{r, create-objects, results="hold"} batting_avg <- function(x) print(format(round(x, digits = 3), nsmall = 3), quote = FALSE) player_avgs <- y / K # player avgs through 45 AB tot_avg <- sum(y) / sum(K) # overall avg through 45 AB cat("Player averages through 45 at-bats:\n") batting_avg(player_avgs) cat("Overall average through 45 at-bats:\n") batting_avg(tot_avg) ``` ## Complete Pooling The complete pooling model assumes a single parameter $\theta$ representing the chance of success for all units (in this case players). Assuming each player's at-bats are independent Bernoulli trials, the probability distribution for each player's number of hits $y_n$ is modeled as \[ p(y_n \, | \, \theta) \ = \ \mathsf{Binomial}(y_n \, | \, K_n, \theta). \] When viewed as a function of $\theta$ for fixed $y_n$, this is called the likelihood function. Assuming each player is independent leads to the complete data likelihood \[ p(y \, | \, \theta) = \prod_{n=1}^N \mathsf{Binomial}(y_n \, | \, K_n, \theta). \] Using `family=binomial("logit")`, the `stan_glm` function in **rstanarm** will parameterize the model in terms of the log-odds $\alpha$, which are defined by the logit transform as \[ \alpha = \mathrm{logit}(\theta) = \log \, \frac{\theta}{1 - \theta}. \] For example, $\theta = 0.25$ corresponds to odds of $.25$ to $.75$ (equivalently, $1$ to $3$), or log-odds of $\log .25 / .75 = -1.1$. The model is therefore \[ p(y_n \, | \, K_n, \alpha) \ = \ \mathsf{Binomial}(y_n \, | \, K_n, \ \mathrm{logit}^{-1}(\alpha)) \] The inverse logit function is the logistic [sigmoid](https://en.wikipedia.org/wiki/Sigmoid_function) from which logistic regression gets its name because the inverse logit function is also the standard logistic Cumulative Distribution Function (CDF), \[ \mathrm{logit}^{-1}(\alpha) = \frac{1}{1 + \exp(-\alpha)} = \theta. \] By construction, for any $\alpha \in (-\infty, \infty)$, $\mathrm{logit}^{-1}(\alpha) \in (0, 1)$; the sigmoid converts arbitrary log odds back to the probability scale. We will use a normal distribution with mean $-1$ and standard deviation $1$ as the prior on the log-odds $\alpha$. This is a weakly informative prior that places about 95% of the prior probability in the interval $(-3, 1)$, which inverse-logit transforms to the interval $(0.05, 0.73)$. The prior median $-1$ corresponds to a $0.27$ chance of success. In fact, an even narrower prior is actually motivated here from substantial baseball knowledge. The figure below shows both this prior on $\alpha$ as well as the prior it implies on the probability $\theta$. ```{r, echo=FALSE} par(mfrow = c(1,3), las = 1) p_alpha <- function(alpha) { dnorm(alpha, -1, 1) } p_theta <- function(theta) { dnorm(log(theta) - log1p(-theta), -1, 1) / (theta - theta^2) } curve2 <- function(expr, limits, xlab, ...) { curve(expr, from = limits[1], to = limits[2], xlab = xlab, lwd = 3, bty = "l", ylab = "", cex.lab = 1.5, ...) } curve2(p_alpha, c(-3, 1), expression(alpha)) text(x = 0.25, y = 0.35, labels = expression(p(alpha)), cex = 1.5) curve2(p_theta, c(0, 1), expression(theta), col = "red", ylim = c(0, 2.5)) text(x = 0.575, y = 1.5, labels = expression(p(theta)), cex = 1.5, col = "red") curve2(p_alpha, c(-3, 1), expression(paste(alpha,", ", theta)), ylim = c(0, 2.5)) curve2(p_theta, c(0,1), col = "red", add = TRUE) text(x = -1, y = 0.65, labels = expression(p(alpha)), cex = 1.5) text(x = -0.5, y = 1.5, labels = expression(p(theta)), cex = 1.5, col = "red") ``` To fit the model we call `stan_glm` with the formula `cbind(Hits, AB - Hits) ~ 1`. The left-hand side of the formula specifies the binomial outcome by providing the number of successes (hits) and failures (at-bats) for each player, and the right-hand side indicates that we want an intercept-only model. ```{r, full-pooling, results="hide"} SEED <- 101 wi_prior <- normal(-1, 1) # weakly informative prior on log-odds fit_pool <- stan_glm(cbind(Hits, AB - Hits) ~ 1, data = bball, family = binomial("logit"), prior_intercept = wi_prior, seed = SEED) ``` The `summary` function will compute all sorts of summary statistics from the fitted model, but here we'll create a small function that will compute just a few posterior summary statistics that we'll want for each of the models we estimate. The `summary_stats` function, defined below, will take a matrix of posterior draws as its input, apply an inverse-logit transformation (to convert from log-odds to probabilities) and then compute the median and 80% interval. ```{r, summary-stats-function} invlogit <- plogis # function(x) 1/(1 + exp(-x)) summary_stats <- function(posterior) { x <- invlogit(posterior) # log-odds -> probabilities t(apply(x, 2, quantile, probs = c(0.1, 0.5, 0.9))) } pool <- summary_stats(as.matrix(fit_pool)) # as.matrix extracts the posterior draws pool <- matrix(pool, # replicate to give each player the same estimates nrow(bball), ncol(pool), byrow = TRUE, dimnames = list(bball$Player, c("10%", "50%", "90%"))) batting_avg(pool) ``` With more data, such as from more players or from the rest of the season, the posterior approaches a delta function around the maximum likelihood estimate and the posterior interval around the central posterior intervals will shrink. Nevertheless, even if we know a player's chance of success exactly, there is a large amount of uncertainty in running $K$ binary trials with that chance of success; using a binomial model fundamentally bounds our prediction accuracy. Although this model will be a good baseline for comparison, we have good reason to believe from a large amount of prior data (players with as many as 10,000 trials) that it is very unlikely that all baseball players have the same chance of success. ## No Pooling A model with no pooling involves a separate chance-of-success parameter $\theta_n \in [0,1]$ for each player $n$, where the $\theta_n$ are assumed to be independent. **rstanarm** will again parameterize the model in terms of the log-odds, $\alpha_n = \mathrm{logit}(\theta_n)$, so the likelihood then uses the log-odds of success $\alpha_n$ for unit $n$ in modeling the number of successes $y_n$ as \[ p(y_n \, | \, \alpha_n) = \mathsf{Binomial}(y_n \, | \, K_n, \mathrm{logit}^{-1}(\alpha_n)). \] Assuming the $y_n$ are independent (conditional on $\theta$), this leads to the total data likelihood \[ p(y \, | \, \alpha) = \prod_{n=1}^N \mathsf{Binomial}(y_n \, | \, K_n, \mathrm{logit}^{-1}(\alpha_n)). \] To fit the model we need only tweak the model formula used for the full pooling model to drop the intercept and instead include as the only predictor the factor variable `Player`. This is equivalent to estimating a separate intercept on the log-odds scale for each player. We'll also use the `prior` (rather than `prior_intercept`) argument since `Player` is considered a predictor rather than an intercept from R's perspective. Using the same weakly informative prior now means that the each $\alpha_n$ gets a $\mathsf{Normal}(-1, 1)$ prior, independent of the others. ```{r, no-pooling, results="hide"} fit_nopool <- update(fit_pool, formula = . ~ 0 + Player, prior = wi_prior) nopool <- summary_stats(as.matrix(fit_nopool)) rownames(nopool) <- as.character(bball$Player) batting_avg(nopool) ``` ```{r, no-pooling-print, echo=FALSE} batting_avg(nopool) ``` Each 80% interval is much wider than the estimated interval for the population in the complete pooling model; this is to be expected---there are only 45 data units for each parameter here as opposed to 810 in the complete pooling case. If the units each had different numbers of trials, the intervals would also vary based on size. As the estimated chance of success goes up toward 0.5, the 80% intervals gets wider. This is to be expected for chance of success parameters, because the variance is maximized when $\theta = 0.5$. Based on our existing knowledge of baseball, the no-pooling model is almost certainly overestimating the high abilities and underestimating lower abilities (Ted Williams, 30 years prior to the year this data was collected, was the last player with a 40% observed success rate over a season, whereas 20% or less is too low for all but a few rare defensive specialists). ## Partial Pooling Complete pooling provides estimated abilities that are too narrowly distributed for the units and removes any chance of modeling population variation. Estimating each chance of success separately without any pooling provides estimated abilities that are too broadly distributed for the units and hence too variable. Clearly some amount of pooling between these two extremes is called for. But how much? A hierarchical model treats the players as belonging to a population of players. The properties of this population will be estimated along with player abilities, implicitly controlling the amount of pooling that is applied. The more variable the (estimate of the) population, the less pooling is applied. Mathematically, the hierarchical model places a prior on the abilities with parameters that are themselves estimated. This model can be estimated using the `stan_glmer` function. ```{r, partial-pooling, results="hide"} fit_partialpool <- stan_glmer(cbind(Hits, AB - Hits) ~ (1 | Player), data = bball, family = binomial("logit"), prior_intercept = wi_prior, seed = SEED) ``` Because `stan_glmer` (like `glmer`) estimates the varying intercepts for `Player` by estimating a single global intercept $\alpha_0$ and individual deviations from that intercept for each player $\delta_n = \alpha_n - \alpha_0$, to get the posterior distribution for each $\alpha_n$ we need to shift each of the posterior draws by the corresponding draw for the intercept. We can do this easily using the `sweep` function. ```{r, partial-pooling-shift-draws} # shift each player's estimate by intercept (and then drop intercept) shift_draws <- function(draws) { sweep(draws[, -1], MARGIN = 1, STATS = draws[, 1], FUN = "+") } alphas <- shift_draws(as.matrix(fit_partialpool)) partialpool <- summary_stats(alphas) partialpool <- partialpool[-nrow(partialpool),] rownames(partialpool) <- as.character(bball$Player) batting_avg(partialpool) ``` Here the estimates are less extreme than in the no-pooling case, which we should expect due to the partial pooling. It is also clear from the wide posteriors for the $\theta_n$ that there is considerable uncertainty in the estimates of chance-of-success on an unit-by-unit (player-by-player) basis. ## Observed vs. Estimated Chance of Success Figure 5.4 from (Gelman et al. 2013) plots the observed number of successes $y_n$ for the first $K_n$ trials versus the median and 80\% intervals for the estimated chance-of-success parameters $\theta_n$ in the posterior. The following R code reproduces a similar plot for our data. ```{r, plot-observed-vs-estimated} library(ggplot2) models <- c("complete pooling", "no pooling", "partial pooling") estimates <- rbind(pool, nopool, partialpool) colnames(estimates) <- c("lb", "median", "ub") plotdata <- data.frame(estimates, observed = rep(player_avgs, times = length(models)), model = rep(models, each = N), row.names = NULL) ggplot(plotdata, aes(x = observed, y = median, ymin = lb, ymax = ub)) + geom_hline(yintercept = tot_avg, color = "lightpink", size = 0.75) + geom_abline(intercept = 0, slope = 1, color = "skyblue") + geom_linerange(color = "gray60", size = 0.75) + geom_point(size = 2.5, shape = 21, fill = "gray30", color = "white", stroke = 0.2) + facet_grid(. ~ model) + coord_fixed() + scale_x_continuous(breaks = c(0.2, 0.3, 0.4)) + labs(x = "Observed Hits / AB", y = "Predicted chance of hit") + ggtitle("Posterior Medians and 80% Intervals") ``` The horizontal axis is the observed rate of success, broken out by player (the overplotting is from players with the same number of successes---they all had the same number of trials in this data). The dots are the posterior medians with bars extending to cover the central 80% posterior interval. Players with the same observed rates are indistinguishable, any differences in estimates are due to MCMC error. The horizontal red line has an intercept equal to the overall success rate, The overall success rate is also the posterior mode (i.e., maximum likelihood estimate) for the complete pooling model. The diagonal blue line has intercept 0 and slope 1. Estimates falling on this line make up the maximum likelihood estimates for the no-pooling model. Overall, the plot makes the amount of pooling toward the prior evident. # Posterior Predictive Distribution After we have fit a model using some "training" data, we are usually interested in the predictions of the fitted model for new data, which we can use to * make predictions for new data points; e.g., predict how many hits will Roberto Clemente get in the rest of the season, * evaluate predictions against observed future data; e.g., how well did we predict how many hits Roberto Clemente actually got in the rest of the season, and * generate new simulated data to validate our model fits. With full Bayesian inference, we do not make a point estimate of parameters and use those prediction---we instead use an average of predictions weighted by the posterior. Given data $y$ and a model with parameters $\theta$, the posterior predictive distribution for new data $\tilde{y}$ is defined by \[ p(\tilde{y} \, | \, y) \ = \ \int_{\Theta} p(\tilde{y} \, | \, \theta) \ p(\theta \, | \, y) \ \mathrm{d}\theta, \] where $\Theta$ is the support of the parameters $\theta$. What an integral of this form says is that $p(\tilde{y} \, | \, y)$ is defined as a weighted average over the legal parameter values $\theta \in \Theta$ of the likelihood function $p(\tilde{y} \, | \, \theta)$, with weights given by the posterior, $p(\theta \, | \, y)$. While we do not want to get sidetracked with the notational and mathematical subtleties of expectations here, the posterior predictive density reduces to the expectation of $p(\tilde{y} \, | \, \theta)$ conditioned on $y$. ### Evaluating Held-Out Data Predictions Because the posterior predictive density is formulated as an expectation over the posterior, it is possible to compute via MCMC. With $M$ draws $\theta^{(m)}$ from the posterior $p(\theta \, | \, y)$, the posterior predictive log density for new data $y^{\mathrm{new}}$ is given by the MCMC approximation \[ \log \frac{1}{M} \, \sum_{m=1}^M \ p\left( y^{\mathrm{new}} \, | \, \theta^{(m)} \right). \] In practice, this requires care to prevent underflow in floating point calculations; a robust calculation on the log scale is provided below. ### Simulating Replicated Data It is also straightforward to use forward simulation from the probability distribution of the data $p(y \, | \, \theta)$ to generate replicated data $y^{\mathrm{rep}}$ according to the posterior predictive distribution. (Recall that $p(y \, | \, \theta)$ is called the probability distribution when $\theta$ is fixed and the likelihood when $y$ is fixed.) With $M$ draws $\theta^{(m)}$ from the posterior $p(\theta \, | \, y)$, replicated data can be simulated by drawing a sequence of $M$ simulations according $y^{\mathrm{rep} \ (m)}$ with each drawn according to distribution $p(y \, | \, \theta^{(m)})$. This latter random variate generation can usually be done efficiently (both computationally and statistically) by means of forward simulation from the probability distribution of the data; we provide an example below. ## Prediction for New Trials Efron and Morris's (1975) baseball data includes not only the observed hit rate in the initial 45 at bats, but also includes the data for how the player did for the rest of the season. The question arises as to how well these models predict a player's performance for the rest of the season based on their initial 45 at bats. ### Calibration A well calibrated statistical model is one in which the uncertainty in the predictions matches the uncertainty in further data. That is, if we estimate posterior 50% intervals for predictions on new data (here, number of hits in the rest of the season for each player), roughly 50% of the new data should fall in its predicted 50% interval. If the model is true in the sense of correctly describing the generative process of the data, then Bayesian inference is guaranteed to be well calibrated. Given that our models are rarely correct in this deep sense, in practice we are concerned with testing their calibration on quantities of interest. ### Sharpness Given two well calibrated models, the one that makes the more precise predictions in the sense of having narrower intervals is better predictively (Gneiting et al. 2007). To see this in an example, we would rather have a well-calibrated prediction that there's a 90% chance the number of hits for a player in the rest of the season will fall in $(120, 130)$ than a 90% prediction that the number of hits will fall in $(100, 150)$. For the models introduced here, a posterior that is a delta function provides the sharpest predictions. Even so, there is residual uncertainty due to the repeated trials; with $K^{\mathrm{new}}$ further trials and a a fixed $\theta_n$ chance of success, the random variable $Y^{\mathrm{new}}_n$ denoting the number of further successes for unit $n$ has a standard deviation from the repeated binary trials of \[ \mathrm{sd}[Y^{\mathrm{new}}_n] \ = \ \sqrt{K \ \theta \, (1 - \theta)}. \] ### Why Evaluate with the Predictive Posterior? The predictive posterior density directly measures the probability of seeing the new data. The higher the probability assigned to the new data, the better job the model has done at predicting the outcome. In the limit, an ideal model would perfectly predict the new outcome with no uncertainty (probability of 1 for a discrete outcome or a delta function at the true value for the density in a continuous outcome). This notion is related to the notion of sharpness discussed in the previous section, because if the new observations have higher predictive densities, they're probably within narrower posterior intervals (Gneiting et al. 2007). ### $\log E[p(\tilde{y} \, | \, \theta)]$ vs $E[\log p(\tilde{y} \, | \, \theta)]$ The log of posterior predictive density is defined in the obvious way as \[ \log p(\tilde{y} \, | \, y) = \log \int_{\Theta} p(\tilde{y} \, | \, \theta) \ p(\theta \, | \, y) \ \mathrm{d}\theta. \] This is not a posterior expectation, but rather the log of a posterior expectation. In particular, it should not be confused with the posterior expectation of the log predictive density, which is given by \[ \int_{\Theta} \left( \log p(\tilde{y} \, | \, \theta) \right) \ p(\theta \, | \, y) \ \mathrm{d}\theta. \] Although this is easy to compute in Stan in a stable fashion, it does not produce the same answer (as we show below). Because $-\log(u)$ is convex, a little wrangling with [Jensen's inequality](https://en.wikipedia.org/wiki/Jensen%27s_inequality) shows that the expectation of the log is less than or equal to the log of the expectation, \[ \int_{\Theta} \left( \, \log p(\tilde{y} \, | \, \theta) \, \right) \ p(\theta \, | \, y) \ \mathrm{d}\theta \ \leq \ \log \int_{\Theta} p(\tilde{y} \, | \, \theta) \ p(\theta \, | \, y) \ \mathrm{d}\theta \] We'll compute both expectations and demonstrate Jensen's inequality in our running example. The variables `K_new[n]` and `y_new[n]` hold the number of at bats (trials) and the number of hits (successes) for player (unit) `n`. With the held out data we can compute the log density of each data point using the `log_lik` function, which, like `posterior_predict`, accepts a `newdata` argument. The `log_lik` function will return an $M \times N$ matrix, where $M$ is the size of the posterior sample (the number of draws we obtained from the posterior distribution) and $N$ is the number of data points in `newdata`. We can then take the row sums of this matrix to sum over the data points. ```{r, log_p_new} newdata <- data.frame(Hits = y_new, AB = K_new, Player = bball$Player) fits <- list(Pooling = fit_pool, NoPooling = fit_nopool, PartialPooling = fit_partialpool) # compute log_p_new matrix with each of the models in 'fits' log_p_new_mats <- lapply(fits, log_lik, newdata = newdata) # for each matrix in the list take the row sums log_p_new <- sapply(log_p_new_mats, rowSums) M <- nrow(log_p_new) head(log_p_new) ``` We now have the distributions of `log_p_new` in a matrix with a column for each model. For each model, the posterior mean for `log_p_new` will give us \[ \int_{\Theta} \left( \log p(\tilde{y} \, | \, \theta) \right) \ p(\theta \, | \, y) \ \mathrm{d}\theta \ \approx \ \frac{1}{M} \, \sum_{m=1}^M \log p(y^{\mathrm{new}} \, | \, \theta^{(m)}). \] To compute this for each of the models we only need to take the mean of the corresponding column of `log_p_new`. ```{r, log_p_new-mean} mean_log_p_new <- colMeans(log_p_new) round(sort(mean_log_p_new, decreasing = TRUE), digits = 1) ``` From a predictive standpoint, the models are ranked by the amount of pooling they do, with complete pooling being the best, and no pooling being the worst predictively. All of these models do predictions by averaging over their posteriors, with the amount of posterior uncertainty also being ranked in reverse order of the amount of pooling they do. As we will now see, the ranking of the models can change when we compute the posterior expectation of the log predictive density. #### Posterior expectation of the log predictive density The straight path to calculate this would be to define a generated quantity $p(y^{\mathrm{new}} \, | y)$, look at the posterior mean computed by Stan, and takes its log. That is, \[ \log p(y^{\mathrm{new}} \, | \, y) \ \approx \ \log \frac{1}{M} \, \sum_{m=1}^M p(y^{\mathrm{new}} \, | \, \theta^{(m)}). \] Unfortunately, this won't work in most cases because when we try to compute $p(y^{\mathrm{new}} \, | \, \theta^{(m)})$ directly, it is prone to underflow. For example, 2000 outcomes $y^{\mathrm{new}}_n$, each with likelihood 0.5 for $\theta^{(m)}$, will underflow, because $0.5^{2000}$ is smaller than the smallest positive number that a computer can represent using standard [double-precision floating point](https://en.wikipedia.org/wiki/IEEE_754-1985) (used by Stan, R, etc.). In contrast, if we work on the log scale, $\log p(y^{\mathrm{new}} \, | \, y)$ will not underflow. It's a sum of a bunch of terms of order 1. But we already saw we can't just average the log to get the log of the average. To avoid underflow, we're going to use the [log-sum-of-exponentials](https://en.wikipedia.org/wiki/LogSumExp) trick, which begins by noting the obvious, \[ \log \frac{1}{M} \, \sum_{m=1}^M \ p(y^{\mathrm{new}} \, | \, \theta^{(m)}). \ = \ \log \frac{1}{M} \, \sum_{m=1}^M \ \exp \left( \log p(y^{\mathrm{new}} \, | \, \theta^{(m)}) \right). \] We'll then write that last expression as \[ -\log M + \mathrm{log\_sum\_exp \, } \ \log p(y^{\mathrm{new}} \, | \, \theta^{(m)}) \] We can compute $\mathrm{log\_sum\_exp}$ stably by subtracting the max value. Suppose $u = u_1, \ldots, u_M$, and $\max(u)$ is the largest $u_m$. We can calculate \[ \mathrm{log\_sum\_exp \, } \ u_m \ = \ \log \sum_{m=1}^M \exp(u_m) \ = \ \max(u) + \log \sum_{m=1}^M \exp(u_m - \max(u)). \] Because $u_m - \max(u) \leq 0$, the exponentiations cannot overflow. They may underflow to zero, but this will not lose precision because of the leading $\max(u)$ term; the only way underflow can arise is if $u_m - \max(u)$ is very small, meaning that it won't add significant digits to $\max(u)$ if it had not underflowed. We can implement $\mathrm{log\_sum\_exp}$ in R as follows: ```{r, log_sum_exp} log_sum_exp <- function(u) { max_u <- max(u) a <- 0 for (n in 1:length(u)) { a <- a + exp(u[n] - max_u) } max_u + log(a) } # Or equivalently using vectorization log_sum_exp <- function(u) { max_u <- max(u) max_u + log(sum(exp(u - max_u))) } ``` and then include the $-\log M$ term to make it `log_mean_exp`: ```{r, log_mean_exp} log_mean_exp <- function(u) { M <- length(u) -log(M) + log_sum_exp(u) } ``` We can then use it to compute the log posterior predictive densities for each of the models: ```{r comment=NA} new_lps <- lapply(log_p_new_mats, function(x) apply(x, 2, log_mean_exp)) # sum over the data points new_lps_sums <- sapply(new_lps, sum) round(sort(new_lps_sums, decreasing = TRUE), digits = 1) ``` Now the ranking is different! As expected, the values here are greater than the expectation of the log density due to Jensen's inequality. The partial pooling model appears to be making slightly better predictions than the full pooling model, which in turn is making slightly better predictions than the no pooling model. #### Approximating the expected log predictive density Vehtari, Gelman, and Gabry (2016) shows that the expected log predictive density can be approximated using the `loo` function for each model and then compared across models: ```{r, loo} loo_compare(loo(fit_partialpool), loo(fit_pool), loo(fit_nopool)) ``` The third column is the leave-one-out (loo) approximation to the expected log predictive density. This approximation is only asymptotically valid and with only 18 observations in this case, substantially underestimates the expected log predictive densities found in the previous subsection. Nevertheless, the relative ranking of the models is essentially the same with the pooled and partially pooled models being virtually indistinguishable but much better than the no pooling model. ## Predicting New Observations With **rstanarm** it is straightforward to generate draws from the posterior predictive distribution using the `posterior_predict` function. With this capability, we can either generate predictions for new data or we can apply it to the predictors we already have. There will be two sources of uncertainty in our predictions, the first being the uncertainty in $\theta$ in the posterior $p(\theta \, | \, y)$ and the second being the uncertainty due to the likelihood $p(\tilde{y} \, | \, \theta)$. We let $z_n$ be the number of successes for unit $n$ in $K^{\mathrm{new}}_n$ further trials. It might seem tempting to eliminate that second source of uncertainty and set $z_n^{(m)}$ to its expectation, $\theta_n^{(m)} \, K^{\mathrm{new}}$, at each draw $m$ from the posterior rather than simulating a new value. Or it might seem tempting to remove the first source of uncertainty and use the posterior mean (or median or mode or ...) rather than draws from the posterior. Either way, the resulting values would suffice for estimating the posterior mean, but would not capture the uncertainty in the prediction for $y^{\mathrm{new}}_n$ and would thus not be useful in estimating predictive standard deviations or quantiles or as the basis for decision making under uncertainty. In other words, the predictions would not be properly calibrated (in a sense we define below). To predict $z$ for each player we can use the following code: ```{r, ppd} newdata <- data.frame(Hits = y_new, AB = K_new, Player = bball$Player) ppd_pool <- posterior_predict(fit_pool, newdata) ppd_nopool <- posterior_predict(fit_nopool, newdata) ppd_partialpool <- posterior_predict(fit_partialpool, newdata) colnames(ppd_pool) <- colnames(ppd_nopool) <- colnames(ppd_partialpool) <- as.character(bball$Player) colMeans(ppd_partialpool) ``` Translating the posterior number of hits into a season batting average, $\frac{y_n + z_n}{K_n + K^{\mathrm{new}}_n}$, we get an 80% posterior interval of ```{r, clemente} z_1 <- ppd_partialpool[, 1] clemente_80pct <- (y[1] + quantile(z_1, prob = c(0.1, 0.9))) / (K[1] + K_new[1]) batting_avg(clemente_80pct) ``` for Roberto Clemente from the partial pooling model. Part of our uncertainty here is due to our uncertainty in Clemente's underlying chance of success, and part of our uncertainty is due to there being 367 remaining trials (at bats) modeled as binomial. In the remaining at bats for the season, Clemente's success rate (batting average) was $127 / 367 = 0.346$. For each model, the following plot shows each player's posterior predictive 50% interval for predicted batting average (success rate) in his remaining at bats (trials); the observed success rate in the remainder of the season is shown as a blue dot. ```{r, ppd-stats} ppd_intervals <- function(x) t(apply(x, 2, quantile, probs = c(0.25, 0.75))) ppd_summaries <- (1 / K_new) * rbind(ppd_intervals(ppd_pool), ppd_intervals(ppd_nopool), ppd_intervals(ppd_partialpool)) df_ppd <- data.frame(player = rep(1:length(y_new), 3), y = rep(y_new / K_new, 3), lb = ppd_summaries[, "25%"], ub = ppd_summaries[, "75%"], model = rep(models, each = length(y_new))) ``` ```{r, plot-ppd} ggplot(df_ppd, aes(x=player, y=y, ymin=lb, ymax=ub)) + geom_linerange(color = "gray60", size = 2) + geom_point(size = 2.5, color = "skyblue4") + facet_grid(. ~ model) + labs(x = NULL, y = "batting average") + scale_x_continuous(breaks = NULL) + ggtitle(expression( atop("Posterior Predictions for Batting Average in Remainder of Season", atop("50% posterior predictive intervals (gray bars); observed (blue dots)", "")))) ``` We choose to plot 50% posterior intervals as they are a good single point for checking calibration. Rather than plotting the number of hits on the vertical axis, we have standardized all the predictions and outcomes to a success rate. Because each unit (player) has a different number of subsequent trials (at bats), the posterior intervals are relatively wider or narrower within the plots for each model (more trials imply narrower intervals for the average). Because each unit had the same number of initial observed trials, this variation is primarily due to the uncertainty from the binomial model of outcomes. ### Calibration With 50% intervals, we expect half of our estimates to lie outside their intervals in a well-calibrated model. If fewer than the expected number of outcomes lie in their estimated posterior intervals, we have reason to believe the model is not well calibrated---its posterior intervals are too narrow. This is also true if too many outcomes lie in their estimated posterior intervals---in this case the intervals are too broad. Of course, there is variation in the tests as the number of units lying in their intervals is itself a random variable (see the exercises), so in practice we are only looking for extreme values as indicators of miscalibration. Each of the models other than the complete pooling model appears to be reasonably well calibrated, and even the calibration for the complete pooling model is not bad (the variation in chance-of-success among players has low enough variance that the complete pooling model cannot be rejected as a possibility with only the amount of data we used here). ### Sharpness Consider the width of the posterior predictive intervals for the units across the models. The model with no pooling has the broadest posterior predictive intervals and the complete pooling model the narrowest. This is to be expected given the number of observations used to fit each model; 45 each in the no pooling case and 810 in the complete pooling case, and relatively something in between for the partial pooling models. Because the log odds model is doing more pooling, its intervals are slightly narrower than that of the direct hierarchical model. For two well calibrated models, the one with the narrower posterior intervals is preferable because its predictions are more tighter. The term introduced for this by Gneiting et al. (2007) is "sharpness." In the limit, a perfect model would provide a delta function at the true answer with a vanishing posterior interval. ## Estimating Event Probabilities The 80% interval in the partial pooling model coincidentally shows us that our model estimates a roughly 10% chance of Roberto Clemente batting 0.400 or better for the season based on batting 0.400 in his first 45 at bats. Not great, but non-trivial. Rather than fishing for the right quantile and hoping to get lucky, we can write a model to directly estimate event probabilities, such as Robert Clemente's batting average is 0.400 or better for the season. Event probabilities are defined as expectations of indicator functions over parameters and data. For example, the probability of player $n$'s batting average being 0.400 or better conditioned on the data $y$ is defined by the conditional event probability \[ \mathrm{Pr}\left[ \frac{(y_n + z_n)}{(45 + K^{\mathrm{new}}_n)} \geq 0.400 \, \Big| \, y \right] \ = \ \int_{\Theta} \mathrm{I}\left[\frac{(y_n + z_n)}{(45 + K^{\mathrm{new}}_n)} \geq 0.400\right] \ p(z_n \, | \, \theta_n, K^{\mathrm{new}}_n) \ p(\theta \, | \, y, K) \ \mathrm{d}\theta. \] The indicator function $\mathrm{I}[c]$ evaluates to 1 if the condition $c$ is true and 0 if it is false. Because it is just another expectation with respect to the posterior, we can calculate this event probability using MCMC as \[ \mathrm{Pr}\left[\frac{(y_n + z_n)}{(45 + K^{\mathrm{new}}_n)} \geq 0.400 \, \Big| \, y \right] \ \approx \ \frac{1}{M} \, \sum_{m=1}^M \mathrm{I}\left[\frac{(y_n + z_n^{(m)})}{(45 + K^{\mathrm{new}}_n)} \geq 0.400\right]. \] This event is about the season batting average being greater than 0.400. What if we care about ability (chance of success), not batting average (success rate) for the rest of the season? Then we would ask the question of whether $\mathrm{Pr}[\theta_n > 0.4]$. This is defined as a weighted average over the prior and computed via MCMC as the previous case. \[ \mathrm{Pr}\left[\theta_n \geq 0.400 \, | \, y \right] \ = \ \int_{\Theta} \mathrm{I}\left[\theta_n \geq 0.400\right] \ p(\theta \, | \, y, K) \ \mathrm{d}\theta \ \approx \ \frac{1}{M} \, \sum_{m=1}^M \mathrm{I}[\theta_n^{(m)} \geq 0.400]. \] ```{r, event-probabilities, results="hold"} draws_partialpool <- shift_draws(as.matrix(fit_partialpool)) thetas_partialpool <- plogis(draws_partialpool) thetas_partialpool <- thetas_partialpool[,-ncol(thetas_partialpool)] colnames(thetas_partialpool) <- as.character(bball$Player) ability_gt_400 <- thetas_partialpool > 0.4 cat("Pr(theta_n >= 0.400 | y)\n") colMeans(ability_gt_400)[c(1, 5, 10)] some_gt_350 <- apply(thetas_partialpool, 1, function(x) max(x) > 0.35) cat("Pr(at least one theta_n >= 0.350 | y)\n") mean(some_gt_350) ``` ## Multiple Comparisons We snuck in a "multiple comparison" event in the last section, namely whether there was some player with an a chance of success for hits of .350 or greater. With traditional significance testing over multiple trials, it is common to adjust for falsely rejecting the null hypothesis (a so-called Type I error) by inflating the conventional (and arguably far too low) 5% target for reporting "significance." For example, suppose we have our 18 players with ability parameters $\theta_n$ and we have $N$ null hypotheses of the form $H_0^n: \theta_n < 0.350$. Now suppose we evaluate each of these 18 hypotheses independently at the conventional $p = 0.05$ significance level, giving each a 5% chance of rejecting the null hypothesis in error. When we run all 18 hypothesis tests, the overall chance of falsely rejecting at least one of the null hypotheses is a whopping $1 - (1 - 0.05)^{18} = 0.60$. The traditional solution to this problem is to apply a Bonferroni adjustment to control the false rejection rate; the typical adjustment is to divide the $p$-value by the number of hypothesis tests in the "family" (that is, the collective test being done). Here that sets the rate to $p = 0.05/18$, or approximately $p = 0.003$, and results in a slightly less than 5% chance of falsely rejecting a null hypothesis in error. Although the Bonferroni correction does reduce the overall chance of falsely rejecting a null hypothesis, it also reduces the statistical power of the test to the same degree. This means that many null hypotheses will fail to be rejected in error. Rather than doing classical multiple comparison adjustments to adjust for false-discovery rate, such as a Bonferroni correction, Gelman et al. (2012) suggest using a hierarchical model to perform partial pooling instead. As already shown, hierarchical models partially pool the data, which pulls estimates toward the population mean with a strength determined by the amount of observed variation in the population (see also Figure 2 of (Gelman et al. 2012)). This automatically reduces the false-discovery rate, though not in a way that is intrinsically calibrated to false discovery, which is good, because reducing the overall false discovery rate in and of itself reduces the true discovery rate at the same time. The generated quantity `some_ability_gt_350` will be set to 1 if the maximum ability estimate in $\theta$ is greater than 0.35. And thus the posterior mean of this generated quantity will be the event probability \[ \mathrm{Pr}[\mathrm{max}(\theta) > 0.350] \ = \ \int_{\Theta} \mathrm{I}[\mathrm{max}(\theta) > 0.35] \ p(\theta \, | \, y, K) \ \mathrm{d}\theta \ \approx \ \frac{1}{M} \, \sum_{m=1}^M \ \mathrm{I}[\mathrm{max}(\theta^{(m)}) > 0.35] \] where $\theta^{(m)}$ is the sequence of posterior draws for the ability parameter vector. Stan reports this value as the posterior mean of the generated quantity `some_ability_gt_350`, which takes on the value $\mathrm{I}[\mathrm{max}(\theta^{(m)}) > 0.35]$ in each iteration. ```{r, echo=FALSE} thetas_pool <- plogis(as.matrix(fit_pool)) thetas_nopool <- plogis(as.matrix(fit_nopool)) some_gt_350_all <- sapply(list(thetas_pool, thetas_nopool, thetas_partialpool), function(x) apply(x, 1, max) > 0.35) chance_gt_350 <- round(100 * colMeans(some_gt_350_all)) ``` The probability estimate of there being a player with an ability (chance of success) greater than 0.350 is essentially zero in the complete and is essentially guaranteed in the no pooling model. The partially pooled estimates would not be considered significant at conventional p=0.05 thresholds. One way to get a handle on what's going on is to inspect the posterior 80% intervals for chance-of-success estimates in the first graph above. ## Ranking In addition to multiple comparisons, we can use the simultaneous estimation of the ability parameters to rank the units. In this section, we rank ballplayers by (estimated) chance of success (i.e., batting ability). Of course, ranking players by ability makes no sense for the complete pooling model, where every player is assumed to have the same ability. ```{r, ranking} reverse_rank <- function(x) 1 + length(x) - rank(x) # so lower rank is better rank <- apply(thetas_partialpool, 1, reverse_rank) t(apply(rank, 1, quantile, prob = c(0.1, 0.5, 0.9))) ``` It is again abundantly clear from the posterior intervals that our uncertainty is very great after only 45 at bats. In the original Volume I BUGS [example](https://www.mrc-bsu.cam.ac.uk/software/bugs/the-bugs-project-the-bugs-book/bugs-book-examples/the-bugs-book-examples-chapter-2-2-7-1/) of surgical mortality, the posterior distribution over ranks was plotted for each hospital. It is now straightforward to reproduce that figure here for the baseball data. ```{r, plot-ranks} df_rank <- data.frame(name = rep(bball$Player, each = M), rank = c(t(rank))) ggplot(df_rank, aes(rank)) + stat_count(width = 0.8) + facet_wrap(~ name) + scale_x_discrete("Rank", limits = c(1, 5, 10, 15)) + scale_y_discrete("Probability", limits = c(0, 0.1 * M, 0.2 * M), labels = c("0.0", "0.1", "0.2")) + ggtitle("Rankings for Partial Pooling Model") ``` #### Who has the Highest Chance of Success? We can use our ranking statistic to calculate the event probability for unit $n$ that the unit has the highest chance of success using MCMC as \[ \mathrm{Pr}[\theta_n = \max(\theta)] \ = \ \int_{\Theta} \mathrm{I}[\theta_n = \mathrm{max}(\theta)] \ p(\theta \, | \, y, K) \ \mathrm{d}\theta \ \approx \ \frac{1}{M} \, \sum_{m=1}^M \mathrm{I}[\theta^{(m)}_n = \mathrm{max}(\theta^{(m)})]. \] Like our other models, the partial pooling mitigates the implicit multiple comparisons being done to calculate the probabilities of rankings. Contrast this with an approach that does a pairwise significance test and then applies a false-discovery correction. We can compute this straightforwardly using the rank data we have already computed or we could compute it directly as above. Because $\mathrm{Pr}[\theta_n = \theta_{n'}] = 0$ for $n \neq n'$, we don't have to worry about ties. ```{r, plot-best-player} thetas_nopool <- plogis(as.matrix(fit_nopool)) colnames(thetas_nopool) <- as.character(bball$Player) rank_nopool <- apply(thetas_nopool, 1, reverse_rank) is_best_nopool <- rowMeans(rank_nopool == 1) is_best_partialpool <- rowMeans(rank == 1) df_is_best <- data.frame(unit = rep(bball$Player, 2), is_best = c(is_best_partialpool, is_best_nopool), model = rep(c("partial pooling", "no pooling"), each = N)) ggplot(df_is_best, aes(x=unit, y=is_best)) + geom_bar(stat = "identity") + facet_wrap(~ model) + scale_y_continuous(name = "Pr[player is best]") + ggtitle("Who is the Best Player?") + theme(axis.text.x = element_text(angle = -45, vjust = 1, hjust = 0)) ``` This question of which player has the highest chance of success (batting ability) doesn't even make sense in the complete pooling model, because the chance of success parameters are all the same by definition. In the other models, the amount of pooling directly determines the probabilities of being the best player. That is, the probability of being best goes down for high performing players with more pooling, whereas it goes up for below-average players. ## Graphical Posterior Predictive Checks We can simulate data from the predictive distribution and compare it to the original data used for fitting the model. If they are not consistent, then either our model is not capturing the aspects of the data we are probing with test statistics or the measurement we made is highly unlikely. That is, extreme $p$-values lead us to suspect there is something wrong with our model that deserves further exploration. In some cases, we are willing to work with models that are wrong in some measurable aspects, but accurately capture quantities of interest for an application. That is, it's possible for a model to capture some, but not all, aspects of a data set, and still be useful. ### Test Statistics and Bayesian $p$-Values A test statistic $T$ is a function from data to a real value. Following (Gelman et al. 2013), we will concentrate on four specific test statistics for repeated binary trial data (though these choices are fairly general): minimum value, maximum value, sample mean, and sample standard deviation. Given a test statistic $T$ and data $y$, the Bayesian $p$-value has a direct definition as a probability, \[ p_B = \mathrm{Pr}[T(y^{\mathrm{rep}}) \geq T(y) \, | \, y]. \] Bayesian $p$-values, like their traditional counterparts, are probabilities, but not probabilities that a model is true. They simply measure discrepancies between the observed data and what we would expect if the model is true. Values of Bayesian $p$-values near 0 or 1 indicate that the data $y$ used to estimate the model is unlikely to have been generated by the estimated model. As with other forms of full Bayesian inference, our estimate is the full posterior, not just a point estimate. As with other Bayesain inferences, we average over the posterior rather than working from a point estimate of the parameters. Expanding this as an expectation of an indicator function, \[ p_B \ = \ \int_{\Theta, Y^{\mathrm{rep}}} \mathrm{I}[T(y^{\mathrm{rep}}) \geq T(y)] \ p(y^{\mathrm{rep}} \, | \, \theta) \ p(\theta \, | \, y) \ \mathrm{d}\theta, \] We treat $y^{\mathrm{rep}}$ as a parameter in parallel with $\theta$, integrating over possible values $y^{\mathrm{rep}} \in Y^{\mathrm{rep}}$. As usual, we use the integration sign in a general way intended to include summation, as with the discrete variable $y^{\mathrm{rep}}$. The formulation as an expectation leads to the obvious MCMC calculation based on posterior draws $y^{\mathrm{rep} (m)}$ for $m \in 1{:}M$, \[ p_B \approx \frac{1}{M} \, \sum_{m=1}^M \mathrm{I}[T(y^{\mathrm{rep} \ (m)}) \geq T(y)]. \] Using the `pp_check` in **rstanarm**, we can easily reproduce Figure 6.12 from (Gelman et al. 2013), which shows the posterior predictive distribution for the test statistic, the observed value as a vertical line, and the $p$-value for each of the tests. First, here is just the plot for the no pooling model using the mean as the test statistic: ```{r, plot-ppc-stats-mean} pp_check(fit_nopool, plotfun = "stat", stat = "mean") ``` The `stat` argument can the be the name of any R function (including your own functions defined in the Global Environment) that takes a vector as an input and returns a scalar. To make plots for each of the models for several test statistics we can use the following code, which will create a list of ggplot objects for each model and then arrange everything in a single plot. ```{r, plot-ppc-stats} tstat_plots <- function(model, stats) { lapply(stats, function(stat) { graph <- pp_check(model, plotfun = "stat", stat = stat, seed = SEED) # optional arguments graph + xlab(stat) + theme(legend.position = "none") }) } Tstats <- c("mean", "sd", "min", "max") ppcs_pool <- tstat_plots(fit_pool, Tstats) ppcs_nopool <- tstat_plots(fit_nopool, Tstats) ppcs_partialpool <- tstat_plots(fit_partialpool, Tstats) if (require(gridExtra)) { grid.arrange( arrangeGrob(grobs = ppcs_pool, nrow = 1, left = "Pooling"), arrangeGrob(grobs = ppcs_nopool, nrow = 1, left = "No Pooling"), arrangeGrob(grobs = ppcs_partialpool, nrow = 1, left = "Partial Pooling") ) } ``` The only worrisomely extreme value visible in the plots is the $p$-value for standard deviation in the no-pooling model, where the vast majority of the simulated data sets under the model had standard deviations greater than the actual data. We didn't actually compute this $p$-value because extreme $p$-values are easy to detect visually and whether or not the $p$-value is less than $0.05$ or some other arbitrary value is of little use to us beyond what we can already see in the plot. However, if we did want to actually compute the $p$-value we can do so easily: ```{r, p-value} yrep <- posterior_predict(fit_nopool, seed = SEED) # seed is optional Ty <- sd(y) Tyrep <- apply(yrep, 1, sd) # tail-area probability p <- 1 - mean(Tyrep > Ty) print(p) ``` ### Comparing Observed and Replicated Data Following the advice of Gelman et al. (2013), we will take the fitted parameters of the data set and generate replicated data sets, then compare the replicated data sets visually to the observed data we used to fit the model. In this section we'll create the plots for the model using partial pooling, but the same plots can be made for the other models too. Again using **rstanarm**'s `pp_check` function, we can plot some of the simulated data sets along with the original data set to do a visual inspection as suggested by Gelman et al. (2013). For this type of posterior predictive check we set the `check` argument to `"distributions"` and we use `nreps` to specify how many replicated sets of data to generate from the posterior predictive distribution. Because our models have a binomial outcome, instead of plotting the number of successes (hits in this case) on the x-axis, `pp_check` will plot the proportion of successes. ```{r, plot-ppc-y-vs-yrep} pp_check(fit_partialpool, plotfun = "hist", nreps = 15, binwidth = 0.025) + ggtitle("Model: Partial Pooling") ``` These simulations are not unreasonable for a binomial likelihood, but they are more spread out than the actual data. In this case, this may actually have more to do with how the data were selected out of all the major league baseball players than the actual data distribution. Efron and Morris (1975, p 312) write > This sample was chosen because we wanted between 30 and 50 at bats to assure a satisfactory approximation of the binomial by the normal distribution while leaving the bulk of at bats to be estimated. We also wanted to include an unusually good hitter (Clemente) to test the method with at least one extreme parameter, a situation expected to be less favorable to Stein's estimator. Stein's estimator requires equal variances, or in this situation, equal at bats, so the remaining 17 players are all whom either the April 26 or May 3 *New York Times* reported with 45 at bats. # Discussion A hierarchical model introduces an estimation bias toward the population mean and the stronger the bias, the less variance there is in the estimates for the units. Exactly how much bias and variance is warranted can be estimated by further calibrating the model and testing where its predictions do not bear out. With very little data, there is very little we can do to gain sharp inferences other than provide more informative priors, which is well worth doing when prior information is available. On the other hand, with more data, the models provide similar results (see the exercises), and in the limit, all of the models (other than complete pooling) converge to posteriors that are delta functions around the empirical chance of success (i.e., the maximum likelihood estimate). Meanwhile, Bayesian inference is allowing us to make more accurate predictions with the data available before we hit that asymptotic regime. # Exercises 1. Generate fake data according to the pooling, no-pooling, and partial pooling models. Fit the model and consider the coverage of the posterior 80% intervals. 1. Try generating data where each player has a different number of at-bats (trials) and then fitting the models. What effect does the number of initial trials have on the posterior? Is there a way to quantify the effect? 1. In the section where we fit the complete pooling model we show a plot of the prior distribution on the probability of success $\theta$ implied by the $\mathsf{Normal}(-1,1)$ prior on the log-odds $\alpha$. If $\theta = \mathrm{logit}^{-1}(\alpha)$ and $p(\alpha) = \mathsf{Normal}(\alpha \,|\, -1, 1)$, what is $p(\theta)$? For a hint, see [here](https://en.wikipedia.org/wiki/Probability_density_function#Dependent_variables_and_change_of_variables). 1. How sensitive is the basic no-pooling model to the choice of prior? We used a somewhat informative prior due to our knowledge of baseball, but the prior could be made more or less informative. How, if at all, does this affect posterior inference? 1. What are some other test statistics that might be used to evaluate our model fit to data? Try some out using `pp_check(model, plotfun="stat", stat = "my_test")`, where `my_test` is your function that computes the test statistic. For example, to check the 25% quantile you could first define a function `q25 <- function(x) quantile(x, 0.25)` and then call `pp_check(model, plotfun = "stat", stat = "q25")`. 1. Discuss the difference between batting average and on-base percentage as random variables. Consider particularly the denominator (at-bat versus plate appearance). Is the denominator in these kinds of problems always a random variable itself? Why might this be important in inference? # References * Betancourt, M. and Girolami, M. (2015) Hamiltonian Monte Carlo for hierarchical models. *Current Trends in Bayesian Methodology with Applications* **79**. * Efron, B. and Morris, C. (1975) Data analysis using Stein's estimator and its generalizations. *Journal of the American Statistical Association* **70**(350), 311--319. [ [pdf](https://www.medicine.mcgill.ca/epidemiology/hanley/bios602/MultilevelData/EfronMorrisJASA1975.pdf) ] * Gelman, A., Carlin, J. B., Stern, H. S., Dunson, D. B., Vehtari, A., and Rubin, D. B. (2013) *Bayesian Data Analysis*, 3rd Edition. Chapman & Hall/CRC Press, London. * Gelman, A. and Hill, J. (2007) *Data Analysis Using Regression and Multilevel-Hierarchical Models*. Cambridge University Press, Cambridge, United Kingdom. * Gelman, A., Hill, J., and Yajima, M. (2012) Why we (usually) don't have to worry about multiple comparisons. *Journal of Research on Educational Effectiveness* **5**, 189--211. [ [pdf](http://www.stat.columbia.edu/~gelman/research/published/multiple2f.pdf) ] * Gneiting, T., Balabdaoui, F., and Raftery, A. E. (2007) Probabilistic forecasts, calibration and sharpness. *Journal of the Royal Statistical Society: Series B* (Statistical Methodology), **69**(2), 243--268. * Lunn, D., Jackson, C., Best, N., Thomas, A., and Spiegelhalter, D. (2013) *The BUGS Book: A Practical Introduction to Bayesian Analysis*. Chapman & Hall/CRC Press. * Neal, R. M. (2003) Slice sampling. *Annals of Statistics* **31**(3):705--767. * Papaspiliopoulos, O., Roberts, G. O., and Skold, M. (2003) Non-centered parameterisations for hierarchical models and data augmentation. In *Bayesian Statistics 7: Proceedings of the Seventh Valencia International Meeting*, edited by Bernardo, J. M., Bayarri, M. J., Berger, J. O., Dawid, A. P., Heckerman, D., Smith, A. F. M., and West, M. Oxford University Press, Chicago. * Plummer, M., Best, N., Cowles, K., & Vines, K. (2006). CODA: Convergence diagnosis and output analysis for MCMC. *R News*, **6**(1), 7--11. * Spiegelhalter, D., Thomas, A., Best, N., & Gilks, W. (1996) BUGS 0.5 Examples. MRC Biostatistics Unit, Institute of Public health, Cambridge, UK. * Stan Development Team (2015) *Stan Modeling Language User's Guide and Reference Manual*. [ [web page] ](https://mc-stan.org/documentation/) * Tarone, R. E. (1982) The use of historical control information in testing for a trend in proportions. *Biometrics* **38**(1):215--220. * Vehtari, A, Gelman, A., & Gabry, J. (2016) Practical Bayesian model evaluation using leave-one-out cross-validation and WAIC. [ [pdf](https://arxiv.org/abs/1507.04544) ] # Additional Data Sets The following additional data sets have a similar structure to the baseball data used in this vignette and are included with **rstanarm**. #### Rat tumors (N = 71) Tarone (1982) provides a data set of tumor incidence in historical control groups of rats; specifically endometrial stromal polyps in female lab rats of type F344. The data set is taken from the book site for (Gelman et al. 2013): * To load: `data(tumors, package = "rstanarm")` * Data source: [http://www.stat.columbia.edu/~gelman/book/data/rats.asc](http://www.stat.columbia.edu/~gelman/book/data/rats.asc) #### Surgical mortality (N = 12) Spiegelhalter et al. (1996) provide a data set of mortality rates in 12 hospitals performing cardiac surgery in babies. We just manually entered the data from the paper; it is also available in the Stan example models repository in R format. * To load: `data(mortality, package = "rstanarm")` * Data source: Unknown #### Baseball hits 1996 AL (N = 308) Carpenter (2009) updates Efron and Morris's (1975) data set for the entire set of players for the entire 2006 American League season of Major League Baseball. The data was originally downloaded from the seanlahman.com, which is currently not working. * To load: `data(bball2006, package = "rstanarm")` * Data Source: [https://web.archive.org/web/20220618114439/https://lingpipe-blog.com/2009/09/23/](https://web.archive.org/web/20220618114439/https://lingpipe-blog.com/2009/09/23/) rstanarm/inst/doc/jm.Rmd0000644000176200001440000020131014370470372014701 0ustar liggesusers--- title: "Estimating Joint Models for Longitudinal and Time-to-Event Data with rstanarm" author: "Sam Brilleman" date: "`r Sys.Date()`" output: html_vignette: toc: true number_sections: false --- ```{r, child="children/SETTINGS-knitr.txt"} ``` ```{r, child="children/SETTINGS-gg.txt"} ``` ```{r setup_jm, include=FALSE, message=FALSE} knitr::opts_chunk$set(fig.width=10, fig.height=4) library(rstanarm) ``` # Preamble This vignette provides an introduction to the `stan_jm` modelling function in the __rstanarm__ package. The `stan_jm` function allows the user to estimate a shared parameter joint model for longitudinal and time-to-event data under a Bayesian framework. # Introduction Joint modelling can be broadly defined as the simultaneous estimation of two or more statistical models which traditionally would have been separately estimated. When we refer to a shared parameter joint model for longitudinal and time-to-event data, we generally mean the joint estimation of: 1) a longitudinal mixed effects model which analyses patterns of change in an outcome variable that has been measured repeatedly over time (for example, a clinical biomarker) and 2) a survival or time-to-event model which analyses the time until an event of interest occurs (for example, death or disease progression). Joint estimation of these so-called "submodels" is achieved by assuming they are correlated via individual-specific parameters (i.e. individual-level random effects). Over the last two decades the joint modelling of longitudinal and time-to-event data has received a significant amount of attention [1-5]. Methodological developments in the area have been motivated by a growing awareness of the benefits that a joint modelling approach can provide. In clinical or epidemiological research it is common for a clinical biomarker to be repeatedly measured over time on a given patient. In addition, it is common for time-to-event data, such as the patient-specific time from a defined origin (e.g. time of diagnosis of a disease) until a terminating clinical event such as death or disease progression to also be collected. The figure below shows observed longitudinal measurements (i.e. observed "trajectories") of log serum bilirubin for a small sample of patients with primary biliary cirrhosis. Solid lines are used for those patients who were still alive at the end of follow up, while dashed lines are used for those patients who died. From the plots, we can observe between-patient variation in the longitudinal trajectories for log serum bilirubin, with some patients showing an increase in the biomarker over time, others decreasing, and some remaining stable. Moreover, there is variation between patients in terms of the frequency and timing of the longitudinal measurements. \ ```{r traj_figure, echo=FALSE} # Plot observed longitudinal trajectories for log serum bilirubin ids <- c(25,31:33,36,38:40) pbcLong_subset <- pbcLong[pbcLong$id %in% ids, ] pbcLong_subset <- merge(pbcLong_subset, pbcSurv) pbcLong_subset$Died <- factor(pbcLong_subset$death, labels = c("No", "Yes")) patient_labels <- paste("Patient", 1:8) names(patient_labels) <- ids ggplot() + geom_line(aes(y = logBili, x = year, lty = Died), data = pbcLong_subset) + facet_wrap(~ id, ncol = 4, labeller = labeller(id = patient_labels)) + theme_bw() + ylab("Log serum bilirubin") + xlab("Time (years)") ``` From the perspective of clinical risk prediction, we may be interested in asking whether the between-patient variation in the log serum bilirubin trajectories provides meaningful prognostic information that can help us differentiate patients with regard to some clinical event of interest, such as death. Alternatively, from an epidemiological perspective we may wish to explore the potential for etiological associations between changes in log serum bilirubin and mortality. Joint modelling approaches provide us with a framework under which we can begin to answer these types of clinical and epidemiological questions. More formally, the motivations for undertaking a joint modelling analysis of longitudinal and time-to-event data might include one or more of the following: - One may be interested in how *underlying changes in the biomarker influence the occurrence of the event*. However, including the observed biomarker measurements directly into a time-to-event model as time-varying covariates poses several problems. For example, if the widely used Cox proportional hazards model is assumed for the time-to-event model then biomarker measurements need to be available for all patients at all failure times, which is unlikely to be the case [3]. If simple methods of imputation are used, such as the "last observation carried forward" method, then these are likely to induce bias [6]. Furthermore, the observed biomarker measurements may be subject to measurement error and therefore their inclusion as time-varying covariates may result in biased and inefficient estimates. In most cases, the measurement error will result in parameter estimates which are shrunk towards the null [7]. On the other hand, joint modelling approaches allow us to estimate the association between the biomarker (or some function of the biomarker trajectory, such as rate of change in the biomarker) and the risk of the event, whilst allowing for both the discrete time and measurement-error aspects of the observed biomarker. - One may be interested primarily in the evolution of the clinical biomarker but *may wish to account for what is known as informative dropout*. If the value of future (unobserved) biomarker measurements are related to the occurrence of the terminating event, then those unobserved biomarker measurements will be "missing not at random" [8,9]. In other words, biomarker measurements for patients who have an event will differ from those who do not have an event. Under these circumstances, inference based solely on observed measurements of the biomarker will be subject to bias. A joint modelling approach can help to adjust for informative dropout and has been shown to reduce bias in the estimated parameters associated with longitudinal changes in the biomarker [1,9,10]. - Joint models are naturally suited to the task of *dynamic risk prediction*. For example, joint modelling approaches have been used to develop prognostic models where predictions of event risk can be updated as new longitudinal biomarker measurements become available. Taylor et al. [11] jointly modelled longitudinal measurements of the prostate specific antigen (PSA) and time to clinical recurrence of prostate cancer. The joint model was then used to develop a web-based calculator which could provide real-time predictions of the probability of recurrence based on a patient's up to date PSA measurements. In this vignette, we describe the __rstanarm__ package's `stan_jm` modelling function. This modelling function allows users to fit a shared parameter joint model for longitudinal and time-to-event data under a Bayesian framework, with the backend estimation carried out using Stan. In Section 2 we describe the formulation of the joint model used by `stan_jm`. In Section 3 we present a variety of examples showing the usage of `stan_jm`. Note that some aspects of the estimation are covered in other vignettes, such as the priors [vignette](priors.html) which contains details on the prior distributions available for regression coefficients. # Technical details ## Model formulation A shared parameter joint model consists of related submodels which are specified separately for each of the longitudinal and time-to-event outcomes. These are therefore commonly referred to as the *longitudinal submodel(s)* and the *event submodel*. The longitudinal and event submodels are linked using shared individual-specific parameters, which can be parameterised in a number of ways. We describe each of these submodels below. ### Longitudinal submodel(s) We assume $y_{ijm}(t) = y_{im}(t_{ij})$ corresponds to the observed value of the $m^{th}$ $(m = 1,...,M)$ biomarker for individual $i$ $(i = 1,...,N)$ taken at time point $t_{ij}$, $j = 1,...,n_{im}$. We specify a (multivariate) generalised linear mixed model that assumes $y_{ijm}(t)$ follows a distribution in the exponential family with mean $\mu_{ijm}(t)$ and linear predictor $$ \eta_{ijm}(t) = g_m(\mu_{ijm}(t)) = \boldsymbol{x}^T_{ijm}(t) \boldsymbol{\beta}_m + \boldsymbol{z}^T_{ijm}(t) \boldsymbol{b}_{im} $$ where $\boldsymbol{x}^T_{ijm}(t)$ and $\boldsymbol{z}^T_{ijm}(t)$ are both row-vectors of covariates (which likely include some function of time, for example a linear slope, cubic splines, or polynomial terms) with associated vectors of fixed and individual-specific parameters $\boldsymbol{\beta}_m$ and $\boldsymbol{b}_{im}$, respectively, and $g_m$ is some known link function. The distribution and link function are allowed to differ over the $M$ longitudinal submodels. We let the vector $\boldsymbol{\beta} = \{ \boldsymbol{\beta}_m ; m = 1,...,M\}$ denote the collection of population-level parameters across the $M$ longitudinal submodels. We assume that the dependence across the different longitudinal submodels (i.e. the correlation between the different longitudinal biomarkers) is captured through a shared multivariate normal distribution for the individual-specific parameters; that is, we assume $$ \begin{pmatrix} \boldsymbol{b}_{i1} \\ \vdots \\ \boldsymbol{b}_{iM} \end{pmatrix} = \boldsymbol{b}_i \sim \mathsf{Normal} \left( 0 , \boldsymbol{\Sigma} \right) $$ for some unstructured variance-covariance matrix $\boldsymbol{\Sigma}$. ### Event submodel We assume that we also observe an event time $T_i = \mathsf{min} \left( T^*_i , C_i \right)$ where $T^*_i$ denotes the so-called "true" event time for individual $i$ (potentially unobserved) and $C_i$ denotes the censoring time. We define an event indicator $d_i = I(T^*_i \leq C_i)$. We then model the hazard of the event using a parametric proportional hazards regression model of the form $$ h_i(t) = h_0(t; \boldsymbol{\omega}) \mathsf{exp} \left( \boldsymbol{w}^T_i(t) \boldsymbol{\gamma} + \sum_{m=1}^M \sum_{q=1}^{Q_m} f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{i}, \alpha_{mq}; t) \right) $$ where $h_i(t)$ is the hazard of the event for individual $i$ at time $t$, $h_0(t; \boldsymbol{\omega})$ is the baseline hazard at time $t$ given parameters $\boldsymbol{\omega}$, $\boldsymbol{w}^T_i(t)$ is a row-vector of individual-specific covariates (possibly time-dependent) with an associated vector of regression coefficients $\boldsymbol{\gamma}$ (log hazard ratios), $f_{mq}(.)$ are a set of known functions for $m=1,...,M$ and $q=1,...,Q_m$, and the $\alpha_{mq}$ are regression coefficients (log hazard ratios). The longitudinal and event submodels are assumed to be related via an "association structure", which is a set of functions each $\{ f_{mq} ; m = 1,...,M, q = 1,...,Q_m \}$ that may each be conditional on the population-level parameters from the longitudinal submodel $\boldsymbol{\beta}$, the individual-specific parameters $\boldsymbol{b}_{i}$, and the population-level parameters $\alpha_{mq}$ for $m=1,...,M$ and $q=1,...,Q_m$. That is, the association structure of the joint model is captured via the $\sum_{m=1}^M \sum_{q=1}^{Q_m} f_{mq}(\boldsymbol{\beta}_m, \boldsymbol{b}_{im}, \alpha_{mq}; t)$ term in the linear predictor of the event submodel. The $\alpha_{mq}$ are referred to as the "association parameters" since they quantify the strength of the association between the longitudinal and event processes. The various ways in which the association structure can be are described in the next section. The probability of individual $i$ still being event-free at time $t$, often referred to as the "survival probability", is defined as $$ S_i(t) = \text{Prob} \Big( T_i^* \geq t \Big) = \exp \Big( -H_i(t) \Big) $$ where $H_i(t) = \int_{s=0}^t h_i(s) ds$ is the cumulative hazard for individual $i$. We assume that the baseline hazard $h_0(t; \boldsymbol{\omega})$ is modelled parametrically. In the `stan_jm` modelling function the baseline hazard be specified as either: an approximation using B-splines on the log hazard scale (the default); a Weibull distribution; or an approximation using a piecewise constant function on the log hazard scale (sometimes referred to as piecewise exponential). The choice of baseline hazard can be made via the `basehaz` argument. In the case of the B-splines or piecewise constant baseline hazard, the user can control the flexibility by specifying the knots or degrees of freedom via the `basehaz_ops` argument. (Note that currently there is slightly limited post-estimation functionality available for models estimated with a piecewise constant baseline hazard, so this is perhaps the least preferable choice). ### Association structures As mentioned in the previous section, the dependence between the longitudinal and event submodels is captured through the association structure, which can be specified in a number of ways. The simplest association structure is likely to be $$ f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{im}, \alpha_{mq}; t) = \alpha_{mq} \eta_{im}(t) $$ and this is often referred to as a *current value* association structure since it assumes that the log hazard of the event at time $t$ is linearly associated with the value of the longitudinal submodel's linear predictor also evaluated at time $t$. This is the most common association structure used in the joint modelling literature to date. In the situation where the longitudinal submodel is based on an identity link function and normal error distribution (i.e. a linear mixed model) the *current value* association structure can be viewed as a method for including the underlying "true" value of the biomarker as a time-varying covariate in the event submodel.^[By "true" value of the biomarker, we mean the value of the biomarker which is not subject to measurement error or discrete time observation. Of course, for the expected value from the longitudinal submodel to be considered the so-called "true" underlying biomarker value, we would need to have specified the longitudinal submodel appropriately!] However, other association structures are also possible. For example, we could assume the log hazard of the event is linearly associated with the *current slope* (i.e. rate of change) of the longitudinal submodel's linear predictor, that is $$ f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{i}, \alpha_{mq}; t) = \alpha_{mq} \frac{d\eta_{im}(t)}{dt} $$ There are in fact a whole range of possible association structures, many of which have been discussed in the literature [14-16]. The `stan_jm` modelling function in the __rstanarm__ package allows for the following association structures, which are specified via the `assoc` argument: Current value (of the linear predictor or expected value) $$ f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{i}, \alpha_{mq}; t) = \alpha_{mq} \eta_{im}(t) \\ f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{i}, \alpha_{mq}; t) = \alpha_{mq} \mu_{im}(t) $$ Current slope (of the linear predictor or expected value) $$ f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{i}, \alpha_{mq}; t) = \alpha_{mq} \frac{d\eta_{im}(t)}{dt} \\ f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{i}, \alpha_{mq}; t) = \alpha_{mq} \frac{d\mu_{im}(t)}{dt} $$ Area under the curve (of the linear predictor or expected value) $$ f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{i}, \alpha_{mq}; t) = \alpha_{mq} \int_0^t \eta_{im}(u) du \\ f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{i}, \alpha_{mq}; t) = \alpha_{mq} \int_0^t \mu_{im}(u) du $$ Interactions between different biomarkers $$ f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{i}, \alpha_{mq}; t) = \alpha_{mq} \eta_{im}(t) \eta_{im'}(t) \text{ for some } m = m' \text{ or } m \neq m' \\ f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{i}, \alpha_{mq}; t) = \alpha_{mq} \eta_{im}(t) \mu_{im'}(t) \text{ for some } m = m' \text{ or } m \neq m' \\ f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{i}, \alpha_{mq}; t) = \alpha_{mq} \mu_{im}(t) \mu_{im'}(t) \text{ for some } m = m' \text{ or } m \neq m' $$ Interactions between the biomarker (or it's slope) and observed data $$ f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{i}, \alpha_{mq}; t) = \alpha_{mq} c_{i}(t) \eta_{im}(t) \text{ for some covariate value } c_{i}(t) \\ f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{i}, \alpha_{mq}; t) = \alpha_{mq} c_{i}(t) \mu_{im}(t) \text{ for some covariate value } c_{i}(t) \\ f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{i}, \alpha_{mq}; t) = \alpha_{mq} c_{i}(t) \frac{d\eta_{im}(t)}{dt} \text{ for some covariate value } c_{i}(t) \\ f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{i}, \alpha_{mq}; t) = \alpha_{mq} c_{i}(t) \frac{d\mu_{im}(t)}{dt} \text{ for some covariate value } c_{i}(t) $$ As well as using lagged values for any of the above. That is, replacing $t$ with $t-u$ where $u$ is some lag time, such that the hazard of the event at time $t$ is assumed to be associated with some function of the longitudinal submodel parameters at time $t-u$. Lastly, we can specify some time-independent function of the random effects, possibly including the fixed effect component. For example, $$ f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{i}, \alpha_{mq}; t) = \alpha_{mq} \boldsymbol{b}_{im0} $$ or $$ f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{i}, \alpha_{mq}; t) = \alpha_{mq} \Big( \boldsymbol{\beta}_{m0} + \boldsymbol{b}_{im0} \Big) $$ where $\boldsymbol{\beta}_{m0}$ is the population-level intercept for the $m^{th}$ longitudinal submodel and $\boldsymbol{b}_{im0}$ is the $i^{th}$ individual's random deviation from the population-level intercept for the $m^{th}$ longitudinal submodel. Note that more than one association structure can be specified, however, not all possible combinations are allowed. Moreover, if you are fitting a multivariate joint model (i.e. more than one longitudinal outcome) then you can optionally choose to use a different association structure(s) for linking each longitudinal submodel to the event submodel. To do this you can pass a list of length $M$ to the `assoc` argument. ### Assumptions Here we define a set of assumptions for the multivariate shared parameter joint model. The so-called conditional independence assumption of the shared parameter joint model postulates $$ y_{im}(t) \perp y_{im'}(t) \mid \boldsymbol{b}_i, \boldsymbol{\theta} \\ y_{im}(t) \perp y_{im}(t') \mid \boldsymbol{b}_i, \boldsymbol{\theta} \\ y_{im}(t) \perp T_i^* \mid \boldsymbol{b}_i, \boldsymbol{\theta} $$ for some $m \neq m'$ and $t \neq t'$, and where $\boldsymbol{\theta}$ denotes the combined vector of all remaining population-level parameters in the model. That is, conditional on the individual-specific parameters $\boldsymbol{b}_i$ and population-level parameters $\boldsymbol{\theta}$, the following are assumed: (i) any biomarker measurement for individual $i$ is independent of that individual's true event time $T_i^*$; (ii) any two measurements of the $m^{th}$ biomarker taken on the $i^{th}$ individual at two distinct time points $t$ and $t'$ (i.e. longitudinal or repeated measurements) are independent of one another; and (iii) any two measurements of two different biomarkers, taken on the $i^{th}$ individual at some time point $t$ are independent of one another. These conditional independence assumptions allow for a convenient factorisation of the full likelihood for joint model into the likelihoods for each of the component parts (i.e. the likelihood for the longitudinal submodel, the likelihood for the event submodel, and the likelihood for the distribution of the individual-specific parameters), which facilitates the estimation of the model. Moreover, we require two additional assumptions: (i) that the censoring process for the event outcome is independent of the true event time, that is $C_i \perp T_i^* \mid \boldsymbol{\theta}$ (i.e. uninformative censoring); and (ii) that the visiting process by which the observation times $t_{ijm}$ are determined is independent of the true event time $T_i^*$ and all missing future unobserved longitudinal biomarker measurements. ### Log posterior distribution Under the conditional independence assumption, the log posterior for the $i^{th}$ individual can be specified as $$ \log p(\boldsymbol{\theta}, \boldsymbol{b}_{i} \mid \boldsymbol{y}_{i}, T_i, d_i) \propto \log \Bigg[ \Bigg( \prod_{m=1}^M \prod_{j=1}^{n_i} p(y_{ijm}(t) \mid \boldsymbol{b}_{i}, \boldsymbol{\theta}) \Bigg) p(T_i, d_i \mid \boldsymbol{b}_{i}, \boldsymbol{\theta}) p(\boldsymbol{b}_{i} \mid \boldsymbol{\theta}) p(\boldsymbol{\theta}) \Bigg] $$ where $\boldsymbol{y}_i = \{ y_{ijm}(t); j = 1,...,n_i, m = 1,...,M \}$ denotes the collection of longitudinal biomarker data for individual $i$ and $\boldsymbol{\theta}$ denotes all remaining population-level parameters in the model. We can rewrite this log posterior as $$ \log p(\boldsymbol{\theta}, \boldsymbol{b}_{i} \mid \boldsymbol{y}_{i}, T_i, d_i) \propto \Bigg( \sum_{m=1}^M \sum_{j=1}^{n_i} \log p(y_{ijm}(t) \mid \boldsymbol{b}_{i}, \boldsymbol{\theta}) \Bigg) + \log p(T_i, d_i \mid \boldsymbol{b}_{i}, \boldsymbol{\theta}) + \log p(\boldsymbol{b}_{i} \mid \boldsymbol{\theta}) + \log p(\boldsymbol{\theta}) $$ where $\sum_{j=1}^{n_{im}} \log p(y_{ijm} \mid \boldsymbol{b}_{i}, \boldsymbol{\theta})$ is the log likelihood for the $m^{th}$ longitudinal submodel, $\log p(T_i, d_i \mid \boldsymbol{b}_{i}, \boldsymbol{\theta})$ is the log likelihood for the event submodel, $\log p(\boldsymbol{b}_{i} \mid \boldsymbol{\theta})$ is the log likelihood for the distribution of the group-specific parameters (i.e. random effects), and $\log p(\boldsymbol{\theta})$ represents the log likelihood for the joint prior distribution across all remaining unknown parameters.^[We refer the reader to the priors [vignette](priors.html) for a discussion of the possible prior distributions.] We can rewrite the log likelihood for the event submodel as $$ \log p(T_i, d_i \mid \boldsymbol{b}_{i}, \boldsymbol{\theta}) = d_i * \log h_i(T_i) - \int_0^{T_i} h_i(s) ds $$ and then use Gauss-Kronrod quadrature with $Q$ nodes to approximate $\int_0^{T_i} h_i(s) ds$, such that $$ \int_0^{T_i} h_i(s) ds \approx \frac{T_i}{2} \sum_{q=1}^{Q} w_q h_i \bigg( \frac{T_i(1+s_q)}{2} \bigg) $$ where $w_q$ and $s_q$, respectively, are the standardised weights and locations ("abscissa") for quadrature node $q$ $(q=1,...,Q)$ [17]. The default for the `stan_jm` modelling function is to use $Q=15$ quadrature nodes, however if the user wishes, they can choose between $Q=15$, $11$, or $7$ quadrature nodes (specified via the `qnodes` argument). Therefore, once we have an individual's event time $T_i$ we can evaluate the design matrices for the event submodel and longitudinal submodels at the $Q+1$ necessary time points (which are the event time $T_i$ and the quadrature points $\frac{T_i(1+s_q)}{2}$ for $q=1,...,Q$) and then pass these to Stan's data block. We can then evaluate the log likelihood for the event submodel by simply calculating the hazard $h_i(t)$ at those $Q+1$ time points and summing the quantities appropriately. This calculation will need to be performed each time we iterate through Stan's model block. A simplified example of the underlying Stan code used to fit the joint model can be found in [Brilleman et al. (2018)](https://github.com/stan-dev/stancon_talks/blob/master/2018/Contributed-Talks/03_brilleman/notebook.pdf) [12]. ## Model predictions Before discussing the methods by which we can generate posterior predictions, first let us define some additional relevant quantities. Let $\mathcal{D} = \{ \boldsymbol{y}_i, T_i, d_i; i = 1,...,N \}$ be the entire collection of outcome data in the sample. We will refer to this sample as the "training data". Let $T_{max} = \max \{ T_i; i = 1,...,N \}$ denote the maximum event or censoring time across the $i = 1,...,N$ individuals in our training data. ### Individual-specific predictions for in-sample individuals (for $0 \leq t \leq T_i$) We can generate posterior predictions for the longitudinal and time-to-event outcomes in the following manner. For the $i^{th}$ individual in our training data, a predicted value for the $m^{th}$ longitudinal biomarker at time $t$, denoted $y^*_{im}(t)$, can be generated from the posterior predictive distribution $$ p \Big( y^{*}_{im}(t) \mid \mathcal{D} \Big) = \int \int p \Big( y^{*}_{im}(t) \mid \boldsymbol{\theta}, \boldsymbol{b}_i \Big) p \Big( \boldsymbol{\theta}, \boldsymbol{b}_i \mid \mathcal{D} \Big) \space d \boldsymbol{b}_i \space d \boldsymbol{\theta} $$ and, similarly, a predicted probability of the $i^{th}$ individual being event-free at time $t$, denoted $S^*_i(t)$, can be generated from the posterior predictive distribution $$ p \Big( S^{*}_{i}(t) \mid \mathcal{D} \Big) = \int \int p \Big( S^{*}_i(t) \mid \boldsymbol{\theta}, \boldsymbol{b}_i \Big) p \Big( \boldsymbol{\theta}, \boldsymbol{b}_i \mid \mathcal{D} \Big) d \boldsymbol{b}_i \space d \boldsymbol{\theta} $$ Note that for simplicity we have ignored the implicit conditioning on covariates; $\boldsymbol{x}_{im}(t)$ and $\boldsymbol{z}_{im}(t)$, for $m = 1,...,M$, and $\boldsymbol{w}_{i}(t)$. Since individual $i$ is included in the training data, it is easy for us to approximate these posterior predictive distributions by drawing from $p(y^{*}_{im}(t) \mid \boldsymbol{\theta}^{(l)}, \boldsymbol{b}_i^{(l)})$ and $p(S^{*}_i(t) \mid \boldsymbol{\theta}^{(l)}, \boldsymbol{b}_i^{(l)})$ where $\boldsymbol{\theta}^{(l)}$ and $\boldsymbol{b}_i^{(l)}$ are the $l^{th}$ $(l = 1,...,L)$ MCMC draws from the joint posterior distribution $p(\boldsymbol{\theta}, \boldsymbol{b}_i \mid \mathcal{D})$. These draws from the posterior predictive distributions can be used for assessing the fit of the model. For example, - the draws from $p(y^{*}_{im}(t) \mid \mathcal{D})$ for $0 \leq t \leq T_i$ can be used to evaluate the fit of the longitudinal trajectory for the $m^{th}$ biomarker for the $i^{th}$ individual, and - the draws from $p(S^{*}_{i}(t) \mid \mathcal{D})$ for $0 \leq t \leq T_{max}$ can be averaged across the $N$ individuals to obtain a standardised survival curve (discussed in greater detail in later sections) which can then be compared to the observed survival curve, for example, the Kaplan-Meier curve. ### Individual-specific predictions for in-sample individuals (for $t > C_i$) However, given that we know the event or censoring time for each individual in our training data, it may make more sense to consider what will happen to censored individuals in our study when we look beyond their last known survival time (i.e. extrapolation). For an individual $i$, who was in our training data, and who was known to be event-free up until their censoring time $C_i$, we wish to draw from the conditional posterior predictive distribution for their longitudinal outcome at some time $t > C_i$, that is $$ p \Big( y^{*}_{im}(t) \mid \mathcal{D}, t > C_i \Big) = \int \int p \Big( y^{*}_{im}(t) \mid \boldsymbol{\theta}, \boldsymbol{b}_i, t > C_i \Big) p \Big( \boldsymbol{\theta}, \boldsymbol{b}_i \mid \mathcal{D} \Big) d \boldsymbol{b}_i \space d \boldsymbol{\theta} $$ and the conditional posterior predictive distribution for their survival probability at some time $t > C_i$, that is $$ \begin{aligned} p \Big( S^{*}_{i}(t) \mid \mathcal{D}, t > C_i, T_i^* > C_i \Big) & = \frac {p \Big( S^{*}_{i}(t) \mid \mathcal{D} \Big)} {p \Big( S^{*}_{i}(C_i) \mid \mathcal{D} \Big)} \\ & = \int \int \frac {p \Big( S^{*}_i(t) \mid \boldsymbol{\theta}, \boldsymbol{b}_i \Big)} {p \Big( S^{*}_i(C_i) \mid \boldsymbol{\theta}, \boldsymbol{b}_i \Big)} \space p \Big( \boldsymbol{\theta}, \boldsymbol{b}_i \mid \mathcal{D} \Big) d \boldsymbol{b}_i \space d \boldsymbol{\theta} \end{aligned} $$ These draws from the conditional posterior predictive distributions can be used to extrapolate into the future for individual $i$, conditional on their longitudinal biomarker data collected between baseline and their censoring time $C_i$. For example, - the draws from $p(y^{*}_{im}(t) \mid \mathcal{D}, t > C_i)$ for $C_i \leq t \leq T_{max}$ can be used to show the forecasted longitudinal trajectory for the $m^{th}$ biomarker for the $i^{th}$ individual, and - the draws from $p(S^{*}_{i}(t) \mid \mathcal{D}, t > C_i, T_i^* > C_i))$ for $C_i \leq t \leq T_{max}$ can be used to show the estimated conditional probability of individual $i$ remaining event-free into the future. ### Individual-specific predictions for out-of-sample individuals (i.e. dynamic predictions) **TBC.** Describe dynamic predictions under the framework of Rizopoulos (2011) [18]. These types of individual-specific predictions can be obtained using the `posterior_traj` and `posterior_survfit` functions by providing prediction data and specifying `dynamic = TRUE` (which is the default); see the examples provided below. ### Population-level (i.e. marginal) predictions We can also generate posterior predictions for the longitudinal and time-to-event outcomes that do not require any conditioning on observed outcome data for a specific individual. Here, we will discuss two ways in which this can be done. The first way is to "marginalise" over the distribution of the individual-specific parameters. We wish to generate a predicted value for the $m^{th}$ longitudinal biomarker at time $t$ for a new individual $k$ for whom we do not have any observed data. We will denote this prediction $y^*_{km}(t)$ and note that it can be generated from the posterior predictive distribution for the longitudinal outcome $$ \begin{aligned} p \Big( y^{*}_{km}(t) \mid \mathcal{D} \Big) & = \int \int p \Big( y^{*}_{km}(t) \mid \boldsymbol{\theta}, \boldsymbol{\tilde{b}}_k \Big) p \Big( \boldsymbol{\theta}, \boldsymbol{\tilde{b}}_k \mid \mathcal{D} \Big) \space d \boldsymbol{\tilde{b}}_{k} \space d \boldsymbol{\theta} \\ & = \int \int p \Big( y^{*}_{km}(t) \mid \boldsymbol{\theta}, \boldsymbol{\tilde{b}}_k \Big) p \Big( \boldsymbol{\tilde{b}}_k \mid \boldsymbol{\theta} \Big) p \Big( \boldsymbol{\theta} \mid \mathcal{D} \Big) \space d \boldsymbol{\tilde{b}}_{k} \space d \boldsymbol{\theta} \end{aligned} $$ and similarly for the survival probability $$ \begin{aligned} p \Big( S^{*}_{k}(t) \mid \mathcal{D} \Big) & = \int \int p \Big( S^{*}_k(t) \mid \boldsymbol{\theta}, \boldsymbol{\tilde{b}}_k \Big) p \Big( \boldsymbol{\theta}, \boldsymbol{\tilde{b}}_k \mid \mathcal{D} \Big) d \boldsymbol{b}_k \space d \boldsymbol{\theta} \\ & = \int \int p \Big( S^{*}_k(t) \mid \boldsymbol{\theta}, \boldsymbol{\tilde{b}}_k \Big) p \Big( \boldsymbol{\tilde{b}}_k \mid \boldsymbol{\theta} \Big) p \Big( \boldsymbol{\theta} \mid \mathcal{D} \Big) d \boldsymbol{b}_k \space d \boldsymbol{\theta} \\ \end{aligned} $$ We can obtain draws for $\boldsymbol{\tilde{b}}_k$ in the same manner as for the individual-specific parameters $\boldsymbol{b}_i$. That is, at the $l^{th}$ iteration of the MCMC sampler we draw $\boldsymbol{\tilde{b}}_k^{(l)}$ and store it^[These random draws from the posterior distribution of the group-specific parameters are stored each time a joint model is estimated using `stan_glmer`, `stan_mvmer`, or `stan_jm`; they are saved under an ID value called `"_NEW_"`]. However, individual $k$ did not provide any contribution to the training data and so we are effectively taking random draws from the posterior distribution for the individual-specific parameters. We are therefore effectively marginalising over the distribution of the group-specific coefficients when we obtain predictions using the draws $\boldsymbol{\tilde{b}}_k^{(l)}$ fro $l = 1,\dots,L$. In other words, we are predicting for a new individual whom we have no information except that they are drawn from the same population as the $i = 1,...,N$ individuals in the training data. Because these predictions will incorporate all the uncertainty associated with between-individual variation our 95% credible intervals are likely to be very wide. These types of marginal predictions can be obtained using the `posterior_traj` and `posterior_survfit` functions by providing prediction data and specifying `dynamic = FALSE`; see the examples provided below. The second way is to effectively ignore the group-level structure in the model. That is, to only predict with only the population-level parameters contributing to the model. For example, under a identity link function and normal error distribution (i.e. linear mixed effect longitudinal submodel), we would obtain draws from the distribution $y^{(l)}_{km}(t) \sim N \Big( \boldsymbol{x}^T_{km}(t) \boldsymbol{\beta}_m^{(l)}, \sigma_m^{(l)} \Big)$ where $\boldsymbol{\beta}_m^{(l)}$ and $\sigma_m^{(l)}$ are the population-level parameters and residual error standard deviation, respectively, for the $l^{th}$ draw of the MCMC samples. However, referring to this as a "marginal" prediction is somewhat misleading since we are not explicitly conditioning on the individual-specific parameters but we are implicitly assuming that we know they are equal to zero with absolute certainty. That is, we are actually drawing from the posterior predictive distribution for the longitudinal outcome $$ \begin{aligned} p \Big( y^{*}_{km}(t) \mid \mathcal{D} \Big) & = \int p \Big( y^{*}_{km}(t) \mid \boldsymbol{\theta}, \boldsymbol{b}_k = 0 \Big) p \Big( \boldsymbol{\theta} \mid \mathcal{D} \Big) d \boldsymbol{\theta} \\ \end{aligned} $$ and similarly for the survival probability $$ p \Big( S^{*}_{k}(t) \mid \mathcal{D} \Big) = \int p \Big( S^{*}_k(t) \mid \boldsymbol{\theta}, \boldsymbol{b}_k = 0 \Big) p \Big( \boldsymbol{\theta} \mid \mathcal{D} \Big) d \boldsymbol{\theta} \\ $$ These types of so-called "marginal" predictions can not currently be obtained using the `posterior_traj` and `posterior_survfit` functions. ### Standardised survival probabilities All of the previously discussed population-level (i.e. marginal) predictions assumed implicit conditioning on some covariate values for the longitudinal submodel, $\boldsymbol{x}_{im}(t)$ and $\boldsymbol{z}_{im}(t)$ for $m = 1,...,M$, and for the event submodel, $\boldsymbol{w}_{i}(t)$. Even though we marginalise over the distribution of the individual-specific parameters we were still assuming that we obtained predictions for some known values of the covariates. However, sometimes we wish to marginalise (i.e. average) over the observed distribution of covariates as well. Here we discuss a method by which we can do that for the predicted survival probabilities. At any time $t$, it is possible to obtain a standardised survival probability by averaging the individual-specific survival probabilities. That is, we can obtain $$ S^*(t) = \frac{\sum_{i=1}^{N^{pred}} S_i^*(t)}{N^{pred}} $$ where $S_i^*(t)$ is the predicted survival probability for individual $i$ ($i = 1,\dots,N^{pred}$ at time $t$, and $N^{pred}$ is the number of individuals included in the prediction dataset. We refer to these predictions as *standardised survival probabilities*. Note however, that if $N_{pred}$ is not sufficiently large (e.g. we pass new data with just 2 individuals, say) then marginalising over their covariate distribution may not be meaningful and, similarly, their joint random effects distribution may be a poor representation of the random effects distribution for the entire population. It is better to calculate these standardised survival probabilities using where, say, $N^{pred}$ is equal to the total number of individuals in the training data. ## Model extensions ### Delayed entry (left-truncation) **TBC.** ### Multilevel clustering **TBC.** ## Model comparison ### LOO/WAIC in the context of joint models **TBC.** # Usage examples ## Dataset used in the examples We use the Mayo Clinic's primary biliary cirrhosis (PBC) dataset in the examples below. The dataset contains 312 individuals with primary biliary cirrhosis who participated in a randomised placebo controlled trial of D-penicillamine conducted at the Mayo Clinic between 1974 and 1984 [19]. However, to ensure the examples run quickly, we use a small random subset of just 40 patients from the full data. These example data are contained in two separate data frames. The first data frame contains multiple-row per patient longitudinal biomarker information, as shown in ```{r pbcLong} head(pbcLong) ``` while the second data frame contains single-row per patient survival information, as shown in ```{r pbcSurv} head(pbcSurv) ``` The variables included across the two datasets can be defined as follows: - `age` in years - `albumin` serum albumin (g/dl) - `logBili` logarithm of serum bilirubin - `death` indicator of death at endpoint - `futimeYears` time (in years) between baseline and the earliest of death, transplantion or censoring - `id` numeric ID unique to each individual - `platelet` platelet count - `sex` gender (m = male, f = female) - `status` status at endpoint (0 = censored, 1 = transplant, 2 = dead) - `trt` binary treatment code (0 = placebo, 1 = D-penicillamine) - `year` time (in years) of the longitudinal measurements, taken as time since baseline) A description of the example datasets can be found by accessing the following help documentation: ```{r datasets_help, eval = FALSE} help("datasets", package = "rstanarm") ``` ## Fitting the models ### Univariate joint model (current value association structure) In this example we fit a simple univariate joint model, with one normally distributed longitudinal marker, an association structure based on the current value of the linear predictor, and B-splines baseline hazard. To fit the model we use the joint (longitudinal and time-to-event) modelling function in the **rstanarm** package: `stan_jm`. When calling `stan_jm` we must, at a minimum, specify a formula object for each of the longitudinal and event submodels (through the arguments `formulaLong` and `formulaEvent`), the data frames which contain the variables for each of the the longitudinal and event submodels (through the arguments `dataLong` and `dataEvent`), and the name of the variable representing time in the longitudinal submodel (through the argument `time_var`). The formula for the longitudinal submodel is specified using the **lme4** package formula style. That is `y ~ x + (random_effects | grouping_factor)`. In this example we specify that log serum bilirubin (`logBili`) follows a subject-specific linear trajectory. To do this we include a fixed intercept and fixed slope (`year`), as well as a random intercept and random slope for each subject `id` (`(year | id)`). The formula for the event submodel is specified using the **survival** package formula style. That is, the outcome of the left of the `~` needs to be of the format `Surv(event_time, event_indicator)` for single row per individual data, or `Surv(start_time, stop_time, event_indicator)` for multiple row per individual data. The latter allows for exogenous time-varying covariates to be included in the event submodel. In this example we assume that the log hazard of death is linearly related to gender (`sex`) and an indicator of treatment with D-penicillamine (`trt`). ```{r univariate_fit, results = "hold", message = FALSE, warning = FALSE} library(rstanarm) mod1 <- stan_jm(formulaLong = logBili ~ sex + trt + year + (year | id), dataLong = pbcLong, formulaEvent = survival::Surv(futimeYears, death) ~ sex + trt, dataEvent = pbcSurv, time_var = "year", chains = 1, refresh = 2000, seed = 12345) ``` The argument `refresh = 2000` was specified so that Stan didn't provide us with excessive progress updates whilst fitting the model. However, if you are fitting a model that will take several minutes or hours to fit, then you may wish to request progress updates quite regularly, for example setting `refresh = 20` for every 20 iterations (by default the refresh argument is set to 1/10th of the total number of iterations). The fitted model is returned as an object of the S3 class `stanjm`. We have a variety of methods and post-estimation functions available for this class, including: `print`, `summary`, `plot`, `fixef`, `ranef`, `coef`, `VarCorr`, `posterior_interval`, `update`, and more. Here, we will examine the most basic output for the fitted joint model by typing `print(mod1)`: ```{r print, echo = FALSE} alpha_mod1 <- as.data.frame(mod1)[["Assoc|Long1|etavalue"]] alpha_median <- round(median(alpha_mod1), 3) print(mod1) ``` The "Long1|etavalue" row under "Event submodel" is our $\alpha_{mq}$ parameter ($m = 1$, $q = 1$). The estimated median of tells us that for each one unit increase in an individual's underlying level of log serum bilirubin, their estimated log hazard of death increases by some amount. The mean absolute deviation (MAD) is provided as a more robust estimate of the standard deviation of the posterior distribution. In this case the MAD_SD for the association parameter indicates there is quite large uncertainty around the estimated association between log serum bilirubin and risk of death (recall this is a small dataset). If we wanted some slightly more detailed output for each of the model parameters, as well as further details regarding the model estimation (for example computation time, number of longitudinal observations, number of individuals, type of baseline hazard, etc) we can instead use the `summary` method: ```{r summary} summary(mod1, probs = c(.025,.975)) ``` The easiest way to extract the correlation matrix for the random effects (aside from viewing the `print` output) is to use the `VarCorr` function (modelled on the `VarCorr` function from the **lme4** package). If you wish to extract the variances and covariances (instead of the standard deviations and correlations) then you can type the following to return a data frame with all of the relevant information: ```{r VarCorr} as.data.frame(VarCorr(mod1)) ``` ### Univariate joint model (current value and current slope association structure) In the previous example we were fitting a shared parameter joint model which assumed that the log hazard of the event (in this case the log hazard of death) at time *t* was linearly related to the subject-specific expected value of the longitudinal marker (in this case the expected value of log serum bilirubin) also at time *t*. This is the default association structure, although it could be explicitly specified by setting the `assoc = "etavalue"` argument. However, let's suppose we believe that the log hazard of death is actually related to both the *current value* of log serum bilirubin and the current *rate of change* in log serum bilirubin. To estimate this joint model we need to indicate that we want to also include the subject-specific slope (at time *t*) from the longitudinal submodel as part of the association structure. We do this by setting the `assoc` argument equal to a character vector `c("etavalue", "etaslope")` which indicates our desired association structure: ```{r assoc_etaslope, eval = FALSE} mod2 <- stan_jm(formulaLong = logBili ~ sex + trt + year + (year | id), dataLong = pbcLong, formulaEvent = survival::Surv(futimeYears, death) ~ sex + trt, dataEvent = pbcSurv, assoc = c("etavalue", "etaslope"), time_var = "year", chains = 1, refresh = 2000, seed = 12345) ``` In this example the subject-specific slope is actually constant across time *t* since we have a linear trajectory. Note however that we could still use the `"etaslope"` association structure even if we had a non-linear subject specific trajectory (for example modelled using cubic splines or polynomials). ### Multivariate joint model (current value association structures) Suppose instead that we were interested in *two* repeatedly measured clinical biomarkers, log serum bilirubin and serum albumin, and their association with the risk of death. We may wish to model these two biomarkers, allowing for the correlation between them, and estimating their respective associations with the log hazard of death. We will fit a linear mixed effects submodel (identity link, normal distribution) for each biomarker with a patient-specific intercept and linear slope but no other covariates. In the event submodel we will include gender (`sex`) and treatment (`trt`) as baseline covariates. Each biomarker is assumed to be associated with the log hazard of death at time $t$ via it's expected value at time $t$ (i.e. a *current value* association structure). The model we are going to fit can therefore be specified as: $$ y_{im}(t_{ijm}) \sim N(\mu_{im}(t_{ijm}), \sigma_m) $$ $$ \eta_{im}(t) = \mu_{im}(t) = \beta_{0m} + \beta_{1m} t + b_{0mi} + b_{1mi} t $$ $$ h_i(t) = h_0(t; \boldsymbol{\omega}) \exp(\gamma_1 w_{1i} + \gamma_2 w_{2i} + \alpha_{1i} \mu_{i1}(t) + \alpha_{2i} \mu_{i2}(t)) $$ where $t$ is time in years, and $w_{1i}$ and $w_{2i}$ are, respectively, the gender and treatment indicators for individual $i$. (Note that due to the very small sample size, the clinical findings from this analysis should not to be overinterpreted!). ```{r fitmodel_mv_ev_ev, warning=FALSE, message=FALSE} mod3 <- stan_jm( formulaLong = list( logBili ~ sex + trt + year + (year | id), albumin ~ sex + trt + year + (year | id)), formulaEvent = survival::Surv(futimeYears, death) ~ sex + trt, dataLong = pbcLong, dataEvent = pbcSurv, time_var = "year", chains = 1, refresh = 2000, seed = 12345) ``` We can now examine the output from the fitted model, for example \ ```{r results_print} print(mod3) ``` or we can examine the summary output for the association parameters alone: \ ```{r results_summary} summary(mod3, pars = "assoc") ``` ## Posterior predictions We can also access the range of post-estimation functions (described in the `stan_jm` and related help documentation; see for example `help(posterior_traj)` or `help(posterior_survfit)`). ### Predicted individual-specific longitudinal trajectory for in-sample individuals Predicted individual-specific biomarker values can be obtained using either the `posterior_traj` or `posterior_predict` function. The `posterior_traj` is preferable, because it can be used to obtain the biomarker values at a series of evenly spaced time points between baseline and the individual's event or censoring time by using the default `interpolate = TRUE` option. Whereas, the `posterior_predict` function only provides the predicted biomarker values at the observed time points, or the time points in the new data. Predicting the biomarker values at a series of evenly spaced time points can be convenient because they can be easily used for plotting the longitudinal trajectory. Moreover, by default the `posterior_traj` returns a data frame with variables corresponding to the individual ID, the time, the predicted mean biomarker value, the limits for the 95% credible interval (i.e. uncertainty interval for the predicted mean biomarker value), and limits for the 95% prediction interval (i.e. uncertainty interval for a predicted biomarker data point), where the level for the uncertainty intervals can be changed via the `prob` argument. Conversely, the `posterior_predict` function returns an $S$ by $N$ matrix of predictions where $S$ is the number of posterior draws and $N$ is the number of prediction time points (note that this return type can also be obtained for `posterior_traj` by specifying the argument `return_matrix = TRUE`). As an example, let's plot the predicted individual-specific longitudinal trajectories for each of the two biomarkers (log serum bilirubin and serum albumin) in the multivariate joint model estimated above. We will do this for three individuals (IDs 6, 7 and 8) who were included in the model estimation. Here are the plots for log serum bilirubin: ```{r plots_872312} p1 <- posterior_traj(mod3, m = 1, ids = 6:8) pp1 <- plot(p1, plot_observed = TRUE) pp1 ``` and here are the plots for serum albumin: ```{r plots_555762} p2 <- posterior_traj(mod3, m = 2, ids = 6:8) pp2 <- plot(p2, plot_observed = TRUE) pp2 ``` The `m` argument specifies which biomarker we want to predict for (only relevant for a multivariate joint model). The `ids` argument is optional, and specifies a subset of individuals for whom we want to predict. In the plotting method, the `plot_observed = TRUE` specifies that we want to include the observed biomarker values in the plot of the longitudinal trajectory. If we wanted to extrapolate the trajectory forward from the event or censoring time for each individual, then this can be easily achieved by specifying `extrapolate = TRUE` in the `posterior_traj` call. For example, here is the plot for log serum bilirubin with extrapolation: ```{r plots_65662} p3 <- posterior_traj(mod3, m = 1, ids = 6:8, extrapolate = TRUE) pp3 <- plot(p3, plot_observed = TRUE, vline = TRUE) pp3 ``` and for serum albumin with extrapolation: ```{r plots_998889} p4 <- posterior_traj(mod3, m = 2, ids = 6:8, extrapolate = TRUE) pp4 <- plot(p4, plot_observed = TRUE, vline = TRUE) pp4 ``` Here, we included the `vline = TRUE` which adds a vertical dashed line at the timing of the individual's event or censoring time. The interpolation and extrapolation of the biomarker trajectory can be further controlled through the `control` argument to the `posterior_traj` function; for example, we could specify the number of time points at which to predict, the distance by which to extrapolate, and so on. We could customize these plots further, for example, by using any of the __ggplot2__ functionality or using the additional arguments described in `help(plot.predict.stanjm)`. ### Predicted individual-specific survival curves for in-sample individuals Predicted individual-specific survival probabilities and/or survival curves can be obtained using the `posterior_survfit` function. The function by default returns a data frame with the individual ID, the time, and the predicted survival probability (posterior mean and limits for the 95% credible interval). The uncertainty level for the credible interval can be changed via the `prob` argument. By default, individual-specific survival probabilities are calculated *conditional* on the individual's last known survival time. When we are predicting survival probabilities for individuals that were used in the estimation of the model (i.e. in-sample individuals, where no new covariate data is provided), then the individual's "last known survival time" will be their event or censoring time. (Note that if we wanted didn't want to condition on the individual's last known survival time, then we could specify `condition = FALSE`, but we probably wouldn't want to do this unless we were calculating marginal or standardised survival probabilities, which are discussed later). The default argument `extrapolate = TRUE` specifies that the individual-specific conditional survival probabilities will be calculated at evenly spaced time points between the individual's last known survival time and the maximum follow up time that was observed in the estimation sample. The behaviour of the extrapolation can be further controlled via the `control` argument. If we were to specify `extrapolate = FALSE` then the survival probabilities would only be calculated at one time point, which could be specified in the `times` argument (or otherwise would default to the individual's last known survival time). As an example, let plot the predicted individual-specific conditional survival curve for the same three individual's that were used in the previous example. The predicted survival curve will be obtained under the multivariate joint model estimated above. \ ```{r plots_23812} p5 <- posterior_survfit(mod3, ids = 6:8) pp5 <- plot(p5) pp5 ``` We could customize the plot further, for example, by using any of the __ggplot2__ functionality or using the additional arguments described in `help(plot.survfit.stanjm)`. ### Combined plot of longitudinal trajectories and survival curves The package also provides a convenience plotting function, which combines plots of the individual-specific longitudinal trajectories, and the individual-specific survival function. We can demonstrate this by replotting the predictions for the three individuals in the previous example: ```{r plots_987321, fig.height=13} plot_stack_jm(yplot = list(pp3, pp4), survplot = pp5) ``` Here we can see the strong relationship between the underlying values of the biomarkers and mortality. Patient `8` who, relative to patients `6` and `7`, has a higher underlying value for log serum bilirubin and a lower underlying value for serum albumin at the end of their follow up has a far worse predicted probability of survival. ### Predicted individual-specific longitudinal trajectory and survival curve for out-of-sample individuals (i.e. dynamic predictions) Let us take an individual from our training data, in this case the individual with subject ID value `8`. However, we will pretend this individual was not a member of our training data and rather that they are a new individual for whom we have obtained new biomarker measurements. Our goal is to obtain predictions for the longitudinal trajectory for this individual, and their conditional survival curve, given that we know they are conditional on their biomarker measurements we currently have available. First, let's extract the data for subject `8` and then rename their subject ID value so that they appear to be an individual who was not included in our training dataset: ```{r newdata_23188} ndL <- pbcLong[pbcLong$id == 8, , drop = FALSE] ndE <- pbcSurv[pbcSurv$id == 8, , drop = FALSE] ndL$id <- paste0("new_patient") ndE$id <- paste0("new_patient") ``` Note that we have both the longitudinal data and event data for this new individual. We require data for both submodels because we are going to generate *dynamic predictions* that require drawing new individual-specific parameters (i.e. random effects) for this individual conditional on their observed data. That means we need to evaluate the likelihood for the full joint model and that requires both the longitudinal and event data (note however that the status indicator `death` will be ignored, since it is assumed that the individual we are predicting for is still alive at the time we wish to generate the predictions). Now we can pass this data to the `posterior_traj` function in the same way as for the in-sample individuals, except we will now specify the `newdataLong` and `newdataEvent` arguments. We will also specify the `last_time` argument so that the function knows which variable in the event data specifies the individual's last known survival time (the default behaviour is to use the time of the last biomarker measurement). Our predictions for this new individual for the log serum bilirubin trajectory can be obtained using: ```{r plots_999333} p6 <- posterior_traj(mod3, m = 1, newdataLong = ndL, newdataEvent = ndE, last_time = "futimeYears") pp6 <- plot(p6, plot_observed = TRUE, vline = TRUE) pp6 ``` and for the serum albumin trajectory: ```{r plots_122223} p7 <- posterior_traj(mod3, m = 2, newdataLong = ndL, newdataEvent = ndE, last_time = "futimeYears") pp7 <- plot(p7, plot_observed = TRUE, vline = TRUE) pp7 ``` For the conditional survival probabilities we use similar information, provided to the `posterior_survfit` function: ```{r plots_65401} p8 <- posterior_survfit(mod3, newdataLong = ndL, newdataEvent = ndE, last_time = "futimeYears") pp8 <- plot(p8) pp8 ``` We can then use the `plot_stack_jm` function, as we saw in a previous example, to stack the plots of the longitudinal trajectory and the conditional survival curve: ```{r plots_0089231, fig.height=13} plot_stack_jm(yplot = list(pp6, pp7), survplot = pp8) ``` Here we see that the predicted longitudinal trajectories and conditional survival curve for this individual, obtained using the dynamic predictions approach, are similar to the predictions we obtained when we used their individual-specific parameters from the original model estimation. This is because in both situations we are conditioning on the same outcome data. **Side note:** We can even compare the estimated individual specific parameters obtained under the two approaches. For example, here is the posterior mean for the estimated individual-specific parameters for individual `8` from the fitted model: ```{r b_pars_23123} c(ranef(mod3)[["Long1"]][["id"]][8,], ranef(mod3)[["Long2"]][["id"]][8,]) ``` and here is the mean of the draws for the individual-specific parameters for individual `8` under the dynamic predictions approach: ```{r b_pars_5436765} colMeans(attr(p6, "b_new")) ``` ### Predicted population-level longitudinal trajectory Suppose we wanted to predict the longitudinal trajectory for each of the biomarkers, marginalising over the distribution of the individual-specific parameters. To do this, we can pass a new data frame with the covariate values we want to use in the predictions. Here, we will demonstrate this by obtaining the predicted trajectory for log serum bilirubin, under the multivariate joint model that was estimated previously. Our prediction data will require the variables `year`, `sex` and `trt`, since these were the covariates used in the longitudinal submodel. We will predict the value of log serum bilirubin at years 0 through 10, for each combination of `sex` and `trt`. We also need to include the `id` variable in our prediction data because this is relevant to the longitudinal submodel. Since we want to marginalise over the individual-specific parameters (i.e. individual-level random effects) we need to note two things: - First, the values for the `id` variable **must not** match any individual used in the model estimation. Here, we use the following `id` values: `"male_notrt"`, `"female_notrt"`, `"male_trt"`, and `"female_trt"`, since each individual in our prediction data represents a different combination of `sex` and `trt`. However, we could have given the individuals any `id` value just as long as is didn't match an individual who was used in the model estimation - Second, we need to specify the argument `dynamic = FALSE` when calling `posterior_traj`. This specifies that we do not want to draw new individual-specific parameters conditional on outcome data observed up to some time $t$. Instead, we want predictions that marginalise over the distribution of individual-specific parameters and are therefore conditional *only on covariates* and not conditional on outcome data for the new individuals. Here is our prediction data: ```{r newdata_19213} ndL <- expand.grid(year = seq(0, 10, 1), sex = c("m", "f"), trt = 0:1) ndL$id <- rep(c("male_notrt", "female_notrt", "male_trt", "female_trt"), each = 11) ndL <- ndL[, c(4,1,2,3)] str(ndL) ``` And to predict the marginal longitudinal trajectory for log serum bilirubin under each covariate profile and plot it we can type: ```{r plot_traj_218391} p1 <- posterior_traj(mod3, m = 1, newdataLong = ndL, dynamic = FALSE) plot(p1) + ggplot2::coord_cartesian(ylim = c(-10,15)) ``` Because we are marginalising over the distribution of the individual-specific parameters, we are incorporating all the variation related to between-individual differences, and therefore the prediction interval is wide (shown by the shaded area around the marginal longitudinal trajectory). The magnitude of the effects of both `sex` and `trt` are relatively small compared to the population-level effect of `year` and the between-individual variation in the intercept and slope. For example, here are the point estimates for the population-level effects of `sex`, `trt`, and `year`: ```{r fixef_2132} fixef(mod3)$Long1 ``` and here are the standard deviations for the individual-level random effects: ```{r ranef_5664} VarCorr(mod3) ``` This shows us that the point estimates for the population-level effects of `sex` and `trt` are 0.57 and -0.10, respectively, whereas the standard deviation for the individual-specific intercept and slope parameters are 1.24 and 0.19; hence, any differences due to the population-level effects of gender and treatment (i.e. differences in the black line across the four panels of the plot) are swamped by the width of the uncertainty intervals (i.e. the grey shaded areas). ### Standardised survival curves In this example we show how a standardised survival curve can be obtained, where the $i = 1,...,N^{pred}$ individuals used in generating the standardised survival curve are the same individuals that were used in estimating the model. We will obtain the survival curve for the multivariate joint model estimated in an earlier example (`mod3`). The `standardise = TRUE` argument to `posterior_survfit` specifies that we want to obtain individual-specific predictions of the survival curve and then average these. Because, in practical terms, we need to obtain survival probabilities at time $t$ for each individual and then average them we want to explicitly specify the values of $t$ we want to use (and the same values of $t$ will be used for individuals). We specify the values of $t$ to use via the `times` argument; here we will predict the standardised survival curve at time 0 and then for convenience we can just specify `extrapolate = TRUE` (which is the default anyway) which will mean we automatically predict at 10 evenly spaced time points between 0 and the maximum event or censoring time. ```{r standsurv} p1 <- posterior_survfit(mod3, standardise = TRUE, times = 0) head(p1) # data frame with standardised survival probabilities plot(p1) # plot the standardised survival curve ``` # References 1. Henderson R, Diggle P, Dobson A. Joint modelling of longitudinal measurements and event time data. *Biostatistics* 2000;**1**(4):465-80. 2. Wulfsohn MS, Tsiatis AA. A joint model for survival and longitudinal data measured with error. *Biometrics* 1997;**53**(1):330-9. 3. Tsiatis AA, Davidian M. Joint modeling of longitudinal and time-to-event data: An overview. *Stat Sinica* 2004;**14**(3):809-34. 4. Gould AL, Boye ME, Crowther MJ, Ibrahim JG, Quartey G, Micallef S, et al. Joint modeling of survival and longitudinal non-survival data: current methods and issues. Report of the DIA Bayesian joint modeling working group. *Stat Med*. 2015;**34**(14):2181-95. 5. Rizopoulos D. *Joint Models for Longitudinal and Time-to-Event Data: With Applications in R* CRC Press; 2012. 6. Liu G, Gould AL. Comparison of alternative strategies for analysis of longitudinal trials with dropouts. *J Biopharm Stat* 2002;**12**(2):207-26. 7. Prentice RL. Covariate Measurement Errors and Parameter-Estimation in a Failure Time Regression-Model. *Biometrika* 1982;**69**(2):331-42. 8. Baraldi AN, Enders CK. An introduction to modern missing data analyses. *J Sch Psychol* 2010;**48**(1):5-37. 9. Philipson PM, Ho WK, Henderson R. Comparative review of methods for handling drop-out in longitudinal studies. *Stat Med* 2008;**27**(30):6276-98. 10. Pantazis N, Touloumi G. Bivariate modelling of longitudinal measurements of two human immunodeficiency type 1 disease progression markers in the presence of informative drop-outs. *Applied Statistics* 2005;**54**:405-23. 11. Taylor JM, Park Y, Ankerst DP, et al. Real-time individual predictions of prostate cancer recurrence using joint models. *Biometrics* 2013;**69**(1):206-13. 12. Brilleman SL, Crowther MJ, Moreno-Betancur M, Buros Novik J, Wolfe R. Joint longitudinal and time-to-event models via Stan. *In: Proceedings of StanCon 2018.* https://github.com/stan-dev/stancon_talks 12. Stan Development Team. *rstanarm: Bayesian applied regression modeling via Stan.* R package version 2.14.1. https://mc-stan.org/. 2016. 13. R Core Team. *R: A language and environment for statistical computing.* Vienna, Austria: R Foundation for Statistical Computing; 2015. 14. Crowther MJ, Lambert PC, Abrams KR. Adjusting for measurement error in baseline prognostic biomarkers included in a time-to-event analysis: a joint modelling approach. *BMC Med Res Methodol* 2013;**13**. 15. Hickey GL, Philipson P, Jorgensen A, Kolamunnage-Dona R. Joint modelling of time-to-event and multivariate longitudinal outcomes: recent developments and issues. *BMC Med Res Methodol* 2016;**16**(1):117. 16. Rizopoulos D, Ghosh P. A Bayesian semiparametric multivariate joint model for multiple longitudinal outcomes and a time-to-event. *Stat Med*. 2011;**30**(12):1366-80. 17. Laurie DP. Calculation of Gauss-Kronrod quadrature rules. *Math Comput* 1997;**66**(219):1133-45. 18. Rizopoulos D. Dynamic Predictions and Prospective Accuracy in Joint Models for Longitudinal and Time-to-Event Data. *Biometrics* 2011;**67**(3):819-829. 19. Therneau T, Grambsch P. *Modeling Survival Data: Extending the Cox Model* Springer-Verlag, New York; 2000. ISBN: 0-387-98784-3 rstanarm/inst/doc/priors.html0000644000176200001440000012551214551551756016053 0ustar liggesusers Prior Distributions for rstanarm Models

Prior Distributions for rstanarm Models

Jonah Gabry and Ben Goodrich

2024-01-16

July 2020 Update

As of July 2020 there are a few changes to prior distributions:

  • Except for in default priors, autoscale now defaults to FALSE. This means that when specifying custom priors you no longer need to manually set autoscale=FALSE every time you use a distribution.

  • There are minor changes to the default priors on the intercept and (non-hierarchical) regression coefficients. See Default priors and scale adjustments below.

We recommend the new book Regression and Other Stories, which discusses the background behind the default priors in rstanarm and also provides examples of specifying non-default priors.

Introduction

This vignette provides an overview of how the specification of prior distributions works in the rstanarm package. It is still a work in progress and more content will be added in future versions of rstanarm. Before reading this vignette it is important to first read the How to Use the rstanarm Package vignette, which provides a general overview of the package.

Every modeling function in rstanarm offers a subset of the arguments in the table below which are used for specifying prior distributions for the model parameters.


Argument Used in Applies to
prior_intercept All modeling functions except stan_polr and stan_nlmer Model intercept, after centering predictors.
prior All modeling functions Regression coefficients. Does not include coefficients that vary by group in a multilevel model (see prior_covariance).
prior_aux stan_glm*, stan_glmer*, stan_gamm4, stan_nlmer Auxiliary parameter, e.g. error SD (interpretation depends on the GLM).
prior_covariance stan_glmer*, stan_gamm4, stan_nlmer Covariance matrices in multilevel models with varying slopes and intercepts. See the stan_glmer vignette for details on this prior.

* stan_glm also implies stan_glm.nb. stan_glmer implies stan_lmer and stan_glmer.nb.


The stan_polr, stan_betareg, and stan_gamm4 functions also provide additional arguments specific only to those models:

Argument Used only in Applies to
prior_smooth stan_gamm4 Prior for hyperparameters in GAMs (lower values yield less flexible smooth functions).
prior_counts stan_polr Prior counts of an ordinal outcome (when predictors at sample means).
prior_z stan_betareg Coefficients in the model for phi.
prior_intercept_z stan_betareg Intercept in the model for phi.
prior_phi stan_betareg phi, if not modeled as function of predictors.


To specify these arguments the user provides a call to one of the various available functions for specifying priors (e.g., prior = normal(0, 1), prior = cauchy(c(0, 1), c(1, 2.5))). The documentation for these functions can be found at help("priors"). The rstanarm documentation and the other vignettes provide many examples of using these arguments to specify priors and the documentation for these arguments on the help pages for the various rstanarm modeling functions (e.g., help("stan_glm")) also explains which distributions can be used when specifying each of the prior-related arguments.


Default (Weakly Informative) Prior Distributions

With very few exceptions, the default priors in rstanarm —the priors used if the arguments in the tables above are untouched— are not flat priors. Rather, the defaults are intended to be weakly informative. That is, they are designed to provide moderate regularization and help stabilize computation. For many (if not most) applications the defaults will perform well, but this is not guaranteed (there are no default priors that make sense for every possible model specification).

The way rstanarm attempts to make priors weakly informative by default is to internally adjust the scales of the priors. How this works (and, importantly, how to turn it off) is explained below, but first we can look at the default priors in action by fitting a basic linear regression model with the stan_glm function. For specifying priors, the stan_glm function accepts the arguments prior_intercept, prior, and prior_aux. To use the default priors we just leave those arguments at their defaults (i.e., we don’t specify them):

The prior_summary function provides a concise summary of the priors used:

Priors for model 'default_prior_test' 
------
Intercept (after predictors centered)
  Specified prior:
    ~ normal(location = 20, scale = 2.5)
  Adjusted prior:
    ~ normal(location = 20, scale = 15)

Coefficients
  Specified prior:
    ~ normal(location = [0,0], scale = [2.5,2.5])
  Adjusted prior:
    ~ normal(location = [0,0], scale = [15.40,30.20])

Auxiliary (sigma)
  Specified prior:
    ~ exponential(rate = 1)
  Adjusted prior:
    ~ exponential(rate = 0.17)
------
See help('prior_summary.stanreg') for more details

Starting from the bottom up, we can see that:

  • Auxiliary: sigma, the error standard deviation, has a default prior that is \(\mathsf{exponential}(1)\). However, as a result of the automatic rescaling, the actual scale used was 6.03.

  • Coefficients: By default the regression coefficients (in this case the coefficients on the wt and am variables) are treated as a priori independent with normal priors centered at 0 and with scale (standard deviation) \(2.5\). Like for sigma, in order for the default to be weakly informative rstanarm will adjust the scales of the priors on the coefficients. As a result, the prior scales actually used were 15.40 and 30.20.

  • Intercept: For the intercept, the default prior is normal with mean \(0\) and standard deviation \(2.5\), but in this case the standard deviation was adjusted to 15.07. There is also a note in parentheses informing you that the prior applies to the intercept after all predictors have been centered (a similar note can be found in the documentation of the prior_intercept argument). In many cases the value of \(y\) when \(x=0\) is not meaningful and it is easier to think about the value when \(x = \bar{x}\). Therefore placing a prior on the intercept after centering the predictors typically makes it easier to specify a reasonable prior for the intercept. (Note: the user does not need to manually center the predictors.)

To disable the centering of the predictors, you need to omit the intercept from the model formula and include a column of ones as a predictor (which cannot be named "(Intercept)" in the data.frame). Then you can specify a prior “coefficient” for the column of ones.

The next two subsections describe how the rescaling works and how to easily disable it if desired.

Default priors and scale adjustments

Automatic scale adjustments happen in two cases:

  1. When the default priors are used.
  2. When the user sets autoscale=TRUE when specifying their own prior (e.g., normal(0, 3, autoscale=TRUE)). See help("priors") for a list of distributions to see which have an autoscale argument.

Here we describe how the default priors work for the intercept, regression coefficients, and (if applicable) auxiliary parameters. Autoscaling when not using default priors works analogously (if autoscale=TRUE).

Assume we have outcome \(y\) and predictors \(x_1,\ldots,x_k\) and our model has linear predictor

\[ \alpha + \beta_1 x_1 + \dots + \beta_K x_K. \]

Regression coefficients

The default prior on regression coefficients \(\beta_k\) is

\[ \beta_k \sim \mathsf{Normal}(0, \, 2.5 \cdot s_y/s_x) \] where \(s_x = \text{sd}(x)\) and \[ s_y = \begin{cases} \text{sd}(y) & \text{if } \:\: {\tt family=gaussian(link)}, \\ 1 & \text{otherwise}. \end{cases} \]

This corresponds to prior = normal(0, 2.5, autoscale = TRUE) in rstanarm code.

Intercept

The intercept is assigned a prior indirectly. The prior_intercept argument refers to the intercept after all predictors have been centered (internally by rstanarm). That is, instead of placing the prior on the expected value of \(y\) when \(x=0\), we place a prior on the expected value of \(y\) when \(x = \bar{x}\). The default prior for this centered intercept, say \(\alpha_c\), is

\[ \alpha_c \sim \mathsf{Normal}(m_y, \, 2.5 \cdot s_y) \] where

\[ m_y = \begin{cases} \bar{y} & \text{if } \:\: {\tt family=gaussian(link="identity")}, \\ 0 & \text{otherwise} \end{cases} \] and \(s_y\) is the same as above (either 1 or \(\text{sd(y)}\)).

Auxiliary parameters

The default prior on the auxiliary parameter (residual standard deviation for Gaussian, shape for gamma, reciprocal dispersion for negative binomial, etc.) is an exponential distribution with rate \(1/s_y\)

\[ \text{aux} \sim \mathsf{Exponential}(1/s_y) \] where \(s_y\) is the same as above (either 1 or \(\text{sd(y)}\)).

This corresponds to prior_aux = exponential(1, autoscale=TRUE) in rstanarm code.

Note on data-based priors

Because the scaling is based on the scales of the predictors (and possibly the outcome) these are technically data-dependent priors. However, since these priors are quite wide (and in most cases rather conservative), the amount of information used is weak and mainly takes into account the order of magnitude of the variables. This enables rstanarm to offer defaults that are reasonable for many models.

Disabling prior scale adjustments

To disable automatic rescaling simply specify a prior other than the default. rstanarm versions up to and including version 2.19.3 used to require you to explicitly set the autoscale argument to FALSE, but now autoscaling only happens by default for the default priors. To use autoscaling with manually specified priors you have to set autoscale = TRUE. For example, this prior specification will not include any autoscaling:

We can verify that the prior scales weren’t adjusted by checking prior_summary:

Priors for model 'test_no_autoscale' 
------
Intercept (after predictors centered)
 ~ student_t(df = 4, location = 0, scale = 10)

Coefficients
 ~ normal(location = [0,0], scale = [5,5])

Auxiliary (sigma)
 ~ half-cauchy(location = 0, scale = 3)
------
See help('prior_summary.stanreg') for more details


How to Specify Flat Priors (and why you typically shouldn’t)

Uninformative is usually unwarranted and unrealistic (flat is frequently frivolous and fictional)

When “non-informative” or “uninformative” is used in the context of prior distributions, it typically refers to a flat (uniform) distribution or a nearly flat distribution. Sometimes it may also be used to refer to the parameterization-invariant Jeffreys prior. Although rstanarm does not prevent you from using very diffuse or flat priors, unless the data is very strong it is wise to avoid them.

Rarely is it appropriate in any applied setting to use a prior that gives the same (or nearly the same) probability mass to values near zero as it gives values bigger than the age of the universe in nanoseconds. Even a much narrower prior than that, e.g., a normal distribution with \(\sigma = 500\), will tend to put much more probability mass on unreasonable parameter values than reasonable ones. In fact, using the prior \(\theta \sim \mathsf{Normal(\mu = 0, \sigma = 500)}\) implies some strange prior beliefs. For example, you believe a priori that \(P(|\theta| < 250) < P(|\theta| > 250)\), which can easily be verified by doing the calculation with the normal CDF

[1] "Pr(-250 < theta < 250) = 0.38"

or via approximation with Monte Carlo draws:

[1] "Pr(-250 < theta < 250) = 0.38"
_There is much more probability mass outside the interval (-250, 250)._

There is much more probability mass outside the interval (-250, 250).


This will almost never correspond to the prior beliefs of a researcher about a parameter in a well-specified applied regression model and yet priors like \(\theta \sim \mathsf{Normal(\mu = 0, \sigma = 500)}\) (and more extreme) remain quite popular.

Even when you know very little, a flat or very wide prior will almost never be the best approximation to your beliefs about the parameters in your model that you can express using rstanarm (or other software). Some amount of prior information will be available. For example, even if there is nothing to suggest a priori that a particular coefficient will be positive or negative, there is almost always enough information to suggest that different orders of magnitude are not equally likely. Making use of this information when setting a prior scale parameter is simple —one heuristic is to set the scale an order of magnitude bigger than you suspect it to be— and has the added benefit of helping to stabilize computations.

A more in-depth discussion of non-informative vs weakly informative priors is available in the case study How the Shape of a Weakly Informative Prior Affects Inferences.

Specifying flat priors

rstanarm will use flat priors if NULL is specified rather than a distribution. For example, to use a flat prior on regression coefficients you would specify prior=NULL:

In this case we let rstanarm use the default priors for the intercept and error standard deviation (we could change that if we wanted), but the coefficient on the wt variable will have a flat prior. To double check that indeed a flat prior was used for the coefficient on wt we can call prior_summary:

Priors for model 'flat_prior_test' 
------
Intercept (after predictors centered)
  Specified prior:
    ~ normal(location = 20, scale = 2.5)
  Adjusted prior:
    ~ normal(location = 20, scale = 15)

Coefficients
 ~ flat

Auxiliary (sigma)
  Specified prior:
    ~ exponential(rate = 1)
  Adjusted prior:
    ~ exponential(rate = 0.17)
------
See help('prior_summary.stanreg') for more details


Informative Prior Distributions

Although the default priors tend to work well, prudent use of more informative priors is encouraged. For example, suppose we have a linear regression model \[y_i \sim \mathsf{Normal}\left(\alpha + \beta_1 x_{1,i} + \beta_2 x_{2,i}, \, \sigma\right)\] and we have evidence (perhaps from previous research on the same topic) that approximately \(\beta_1 \in (-15, -5)\) and \(\beta_2 \in (-1, 1)\). An example of an informative prior for \(\boldsymbol{\beta} = (\beta_1, \beta_2)'\) could be

\[ \boldsymbol{\beta} \sim \mathsf{Normal} \left( \begin{pmatrix} -10 \\ 0 \end{pmatrix}, \begin{pmatrix} 5^2 & 0 \\ 0 & 2^2 \end{pmatrix} \right), \] which sets the prior means at the midpoints of the intervals and then allows for some wiggle room on either side. If the data are highly informative about the parameter values (enough to overwhelm the prior) then this prior will yield similar results to a non-informative prior. But as the amount of data and/or the signal-to-noise ratio decrease, using a more informative prior becomes increasingly important.

If the variables y, x1, and x2 are in the data frame dat then this model can be specified as

We left the priors for the intercept and error standard deviation at their defaults, but informative priors can be specified for those parameters in an analogous manner.

rstanarm/inst/doc/glmer.R0000644000176200001440000000543514551550347015074 0ustar liggesusers## ---- SETTINGS-knitr, include=FALSE------------------------------------------- stopifnot(require(knitr)) opts_chunk$set( comment=NA, message = FALSE, warning = FALSE, eval = identical(Sys.getenv("NOT_CRAN"), "true"), dev = "png", dpi = 150, fig.asp = 0.618, fig.width = 5, out.width = "60%", fig.align = "center" ) ## ---- SETTINGS-gg, include=TRUE----------------------------------------------- library(ggplot2) library(bayesplot) theme_set(bayesplot::theme_default()) ## ---- results = "hide"-------------------------------------------------------- library(rstanarm) data(roaches) roaches$roach1 <- roaches$roach1 / 100 roaches$log_exposure2 <- log(roaches$exposure2) post <- stan_gamm4( y ~ s(roach1) + treatment + log_exposure2, random = ~(1 | senior), data = roaches, family = neg_binomial_2, QR = TRUE, cores = 2, chains = 2, adapt_delta = 0.99, seed = 12345 ) ## ----------------------------------------------------------------------------- plot_nonlinear(post) ## ----------------------------------------------------------------------------- data("Orange", package = "datasets") Orange$age <- Orange$age / 100 Orange$circumference <- Orange$circumference / 100 ## ---- warning=TRUE------------------------------------------------------------ startvec <- c(Asym = 2, xmid = 7.25, scal = 3.5) library(lme4) nm1 <- nlmer(circumference ~ SSlogis(age, Asym, xmid, scal) ~ Asym|Tree, data = Orange, start = startvec) summary(nm1) ## ---- echo = FALSE------------------------------------------------------------ grep("^SS[[:lower:]]+", ls("package:stats"), value = TRUE) ## ---- results = "hide"-------------------------------------------------------- post1 <- stan_nlmer(circumference ~ SSlogis(age, Asym, xmid, scal) ~ Asym|Tree, data = Orange, cores = 2, seed = 12345, init_r = 0.5) ## ----------------------------------------------------------------------------- post1 ## ----------------------------------------------------------------------------- plot(post1, regex_pars = "^[b]") ## ----------------------------------------------------------------------------- nd <- data.frame(age = 1:20, Tree = factor("6", levels = 1:6)) PPD <- posterior_predict(post1, newdata = nd) PPD_df <- data.frame(age = as.factor(rep(1:20, each = nrow(PPD))), circumference = c(PPD)) ggplot(PPD_df, aes(age, circumference)) + geom_boxplot() ## ---- eval = FALSE------------------------------------------------------------ # post3 <- stan_nlmer(conc ~ SSfol(Dose, Time, lKe, lKa, lCl) ~ # (0 + lKe + lKa + lCl | Subject), data = Theoph, # cores = 2, seed = 12345, # QR = TRUE, init_r = 0.25, adapt_delta = 0.999) # pairs(post3, regex_pars = "^l") # pairs(post3, regex_pars = "igma") rstanarm/inst/doc/mrp.html0000644000176200001440000323746214551551710015334 0ustar liggesusers MRP with rstanarm

MRP with rstanarm

Lauren Kennedy and Jonah Gabry

2024-01-16

Inference about the population is one the main aims of statistical methodology. Multilevel regression and post-stratification (MRP) (Little 1993; Lax and Phillips 2009; Park, Gelman, and Bafumi 2004) has been shown to be an effective method of adjusting the sample to be more representative of the population for a set of key variables. Recent work has demonstrated the effectiveness of MRP when there are a number of suspected interactions between these variables (Ghitza and Gelman 2013), replicated by Lei, Gelman, and Ghitza (2017). While Ghitza and Gelman (2013) use approximate marginal maximum likelihood estimates; Lei, Gelman, and Ghitza (2017) implement a fully Bayesian approach through Stan.

The rstanarm package allows the user to conduct complicated regression analyses in Stan with the simplicity of standard formula notation in R. The purpose of this vignette is to demonstrate the utility of rstanarm when conducting MRP analyses. We will not delve into the details of conducting logistic regression with rstanarm as this is already covered in other vignettes.

Most of the code for data manipulation and plotting is not shown in the text but is available in the R markdown source code on GitHub.

The Data

Three data sets are simulated by the function simulate_mrp_data(), which is defined in the source code for this R markdown document (and printed in the appendix). The first, sample, contains \(n\) observations from the individuals that form our sample (i.e., \(n\) rows). For each individual we have their age (recorded as membership within a specific age bracket), ethnicity, income level (recorded as membership within a specific bracket), and gender. Participants were randomly sampled from a state.

MRP is often used for dichotomous fixed choice questions (e.g., McCain’s share of two party vote (Ghitza and Gelman 2013); support for George W Bush, (Park, Gelman, and Bafumi 2004); or support for the death penalty (Shirley and Gelman 2015)), so we will use a binary variable as the outcome in this vignette. However, MRP can also be used if there are more than two categories or if the outcome is continuous.

As this is a simple toy example, we will describe the proportion of the population who would choose to adopt a cat over a dog, given the opportunity. We will simulate data using a function that is included in the appendix of this document. The simulate_mrp_data() function simulates a sample from a much larger population. It returns a list including the sample, population poststratification matrix and the true population preference for cats.

List of 3
 $ sample   :'data.frame':  1200 obs. of  7 variables:
  ..$ cat_pref: num [1:1200] 1 1 1 1 0 0 1 1 1 1 ...
  ..$ male    : num [1:1200] 0 0 0 0 0 0 0 1 0 0 ...
  ..$ age     : num [1:1200] 5 6 3 6 1 5 7 6 5 7 ...
  ..$ eth     : num [1:1200] 3 1 2 1 1 1 3 3 2 3 ...
  ..$ income  : num [1:1200] 3 1 2 1 3 1 2 1 1 1 ...
  ..$ state   : num [1:1200] 19 45 21 47 12 38 2 20 11 34 ...
  ..$ id      : num [1:1200] 1 2 3 4 5 6 7 8 9 10 ...
 $ poststrat:'data.frame':  6300 obs. of  6 variables:
  ..$ male  : num [1:6300] 0 0 0 0 0 0 0 0 0 0 ...
  ..$ eth   : num [1:6300] 1 1 1 1 1 1 1 1 1 1 ...
  ..$ age   : num [1:6300] 1 1 1 1 1 1 1 1 1 1 ...
  ..$ income: num [1:6300] 1 1 1 1 1 1 1 1 1 1 ...
  ..$ state : num [1:6300] 1 2 3 4 5 6 7 8 9 10 ...
  ..$ N     : num [1:6300] 103741 104862 164704 133049 167578 ...
 $ true_popn:'data.frame':  6300 obs. of  6 variables:
  ..$ male    : num [1:6300] 0 0 0 0 0 0 0 0 0 0 ...
  ..$ eth     : num [1:6300] 1 1 1 1 1 1 1 1 1 1 ...
  ..$ age     : num [1:6300] 1 1 1 1 1 1 1 1 1 1 ...
  ..$ income  : num [1:6300] 1 1 1 1 1 1 1 1 1 1 ...
  ..$ state   : num [1:6300] 1 2 3 4 5 6 7 8 9 10 ...
  ..$ cat_pref: num [1:6300] 0.5 0.426 0.269 0.574 0.332 ...
     cat_pref male age eth income state   id
1           1    0   5   3      3    19    1
2           1    0   6   1      1    45    2
3           1    0   3   2      2    21    3
4           1    0   6   1      1    47    4
5           0    0   1   1      3    12    5
6           0    0   5   1      1    38    6
1195        0    0   6   3      2    21 1195
1196        1    0   3   3      1    46 1196
1197        1    0   5   1      2    48 1197
1198        0    1   1   1      1    14 1198
1199        0    0   1   3      1    12 1199
1200        0    1   3   2      2    12 1200

The variables describing the individual (age, ethnicity, income level and gender) will be used to match the sample to the population of interest. To do this we will need to form a post-stratification table, which contains the number of people in each possible combination of the post-stratification variables. We have 4 variables with 2 (male), 7 (age), 3 (ethnicity) and 3 (income) levels, so there are 2x7x3x3 different levels. Participants are also selected from a state (50), increasing the number of possible levels to \(6300\).

To make inference about the population, we will also need the proportion of individuals in each post stratification cell at the population level. We will use this information to update the estimate of our outcome variable from the sample so that is more representative of the population. This is particularly helpful if there is a belief that the sample has some bias (e.g., a greater proportion of females responded than males), and that the bias impacts the outcome variable (e.g., maybe women are more likely to pick a cat than men). For each possible combination of factors, the post-stratification table shows the proportion/number of the population in that cell (rather than the proportion/number in the sample in the cell).

Below we read in the poststrat data our simulated data list.

     male eth age income state      N
1       0   1   1      1     1 103741
2       0   1   1      1     2 104862
3       0   1   1      1     3 164704
4       0   1   1      1     4 133049
5       0   1   1      1     5 167578
6       0   1   1      1     6 109814
6295    1   3   7      3    45  10061
6296    1   3   7      3    46  13055
6297    1   3   7      3    47  12578
6298    1   3   7      3    48  13754
6299    1   3   7      3    49   9937
6300    1   3   7      3    50   9646

One of the benefits of using a simulated data set for this example is that the actual population level probability of cat preference is known for each post-stratification cell. In real world data analysis, we don’t have this luxury, but we will use it later in this case study to check the predictions of the model. Details regarding the simulation of this data are available in the appendix.

     male eth age income state  cat_pref
1       0   1   1      1     1 0.5000000
2       0   1   1      1     2 0.4255575
3       0   1   1      1     3 0.2689414
4       0   1   1      1     4 0.5744425
5       0   1   1      1     5 0.3318122
6       0   1   1      1     6 0.6224593
6295    1   3   7      3    45 0.7502601
6296    1   3   7      3    46 0.8581489
6297    1   3   7      3    47 0.9241418
6298    1   3   7      3    48 0.6456563
6299    1   3   7      3    49 0.4255575
6300    1   3   7      3    50 0.9308616

Exploring Graphically

Before we begin with the MRP analysis, we first explore the data set with some basic visualizations.

Comparing sample to population

The aim of this analysis is to obtain a population estimation of cat preference given our sample of \(4626\). We can see in the following plot the difference in proportions between the sample and the population. Horizontal panels represent each variable. Bars represent the proportion of the sample (solid) and population (dashed) in each category (represented by colour and the x-axis). For ease of viewing, we ordered the states in terms of the proportion of the sample in that state that was observed. We will continue this formatting choice thoughout this vignette.

Effect of the post-stratification variable on preference for cats

Secondly; we consider the evidence of different proportions across different levels of a post-stratification variable; which we should consider for each of the post-stratification variables. Here we break down the proportion of individuals who would prefer a cat (y-axis) by different levels (x-axis) of the post-stratification variable (horizontal panels). We can see from this figure that there appears to be differences in cat preference for the different levels of post-stratification variables. Given the previous figure, which suggested that the sample was different to the population in the share of different levels of theses variables, this should suggest that using the sample to estimate cat preference may not give accurate estimates of cat preference in the population.

Interaction effect

Thirdly, we demonstrate visually that there is an interaction between age and gender and compare to a case where there is no interaction. Here a simulated interaction effect between age (x-axis) and gender (color), right panel, is contrasted with no interaction effect (left panel). While both panels demonstrate a difference between the genders on the outcome variable (y-axis), only the second panel shows this difference changing with the variable on the x-axis.

Design effect

Lastly we look at the difference in cat preference between states, which will form the basis for the multi-level component of our analysis. Participants were randomly selected from particular states. Plotting the state (x-axis) against the overall proportion of participants who prefer cats (y-axis) demonstrates state differences. The downward slope is because we ordered the x-axis by the proportion of cat preference for ease of viewing. We also include second plot with a horizontal line to represent the overall preference for cats in the total population, according to the sample.

MRP with rstanarm

From visual inspection, it appears that different levels of post-stratification variable have different preferences for cats. Our survey also appears to have sampling bias; indicating that some groups were over/under sampled relative to the population. The net effect of this is that we could not make good population level estimates of cat preference straight from our sample. Our aim is to infer the preference for cats in the population using the post-stratification variables to account for systematic differences between the sample and population. Using rstanarm, this becomes a simple procedure.

The first step is to use a multi-level logistic regression model to predict preference for cats in the sample given the variables that we will use to post-stratify. Note that we actually have more rows in the post-stratification matrix than the we have observed units, so there are some cells in the poststrat matrix that we don’t observe. We can use a multi-level model to partially pool information across the different levels within each variable to assist with this. In the model described below, we use a fixed intercept for gender, and hierarchically modeled varying intercepts for each of the other factors.

Let \(\theta_{j}\) denote the preference for cats in the \(j\)th poststratification cell. The non-hierarchical part of the model can be written as

\[\theta_j= logit^{-1}(X_{j}\beta),\]

where here \(X\) only contains an indicator for male or female and an interaction term with age.

Adding the varying intercepts for the other variables the model becomes

\[ \theta_j = logit^{-1}( X_{j}\beta + \alpha_{\rm state[j]}^{\rm state} + \alpha_{\rm age[j]}^{\rm age} + \alpha_{\rm eth[j]}^{\rm eth} + \alpha_{\rm inc[j]}^{\rm inc} ) \] with

$$ \[\begin{align*} \alpha_{\rm state[j]}^{\rm state} & \sim N(0,\sigma^{\rm state}) \\ \alpha_{\rm age[j]}^{\rm age} & \sim N(0,\sigma^{\rm age})\\ \alpha_{\rm eth[j]}^{\rm eth} & \sim N(0,\sigma^{\rm eth})\\ \alpha_{\rm inc[j]}^{\rm inc} &\sim N(0,\sigma^{\rm inc}) \\ \end{align*}\]

$$

Each of \(\sigma^{\rm state}\), \(\sigma^{\rm age}\), \(\sigma^{\rm eth}\), and \(\sigma^{\rm inc}\) are estimated from the data (in this case using rstanarm’s default priors), which is beneficial as it means we share information between the levels of each variable and we can prevent levels with with less data from being too sensitive to the few observed values. This also helps with the levels we don’t observe at all it will use information from the levels that we do observe. For more on the benefits of this type of model, see Gelman and others (2005), and see Ghitza and Gelman (2013) and Si et al. (2017) for more complicated extensions that involve deep interactions and structured prior distributions.

Here is the model specified using the stan_glmer() function in rstanarm, which uses the same formula syntax as the glmer() function from the lme4 package:

stan_glmer
 family:       binomial [logit]
 formula:      cat_pref ~ factor(male) + factor(male) * factor(age) + (1 | state) + 
       (1 | age) + (1 | eth) + (1 | income)
 observations: 1200
------
                           Median MAD_SD
(Intercept)                 0.6    0.9  
factor(male)1              -0.3    0.5  
factor(age)2               -0.1    1.1  
factor(age)3               -0.4    0.8  
factor(age)4                0.7    0.8  
factor(age)5                0.4    0.8  
factor(age)6                1.1    0.8  
factor(age)7                0.9    0.8  
factor(male)1:factor(age)2  0.2    1.7  
factor(male)1:factor(age)3 -0.7    0.8  
factor(male)1:factor(age)4 -1.2    0.7  
factor(male)1:factor(age)5 -1.0    0.7  
factor(male)1:factor(age)6 -0.5    0.6  
factor(male)1:factor(age)7 -1.1    0.6  

Error terms:
 Groups Name        Std.Dev.
 state  (Intercept) 1.10    
 age    (Intercept) 1.04    
 eth    (Intercept) 0.92    
 income (Intercept) 0.72    
Num. levels: state 50, age 7, eth 3, income 3 

------
* For help interpreting the printed output see ?print.stanreg
* For info on the priors used see ?prior_summary.stanreg

As a first pass to check whether the model is performing well, note that there are no warnings about divergences, failure to converge or tree depth. If these errors do occur, more information on how to alleviate them is provided here.

Population Estimate

From this we get a summary of the baseline log odds of cat preference at the first element of each factor (i.e., male = 0, age = 1) for each state, plus estimates on variability of the intercept for state, ethnicity, age and income. While this is interesting, currently all we have achieved is a model that predicts cat preference given a number of factor-type predictors in a sample. What we would like to do is estimate cat preference in the population by accounting for differences between our sample and the population. We use the posterior_linpred() function to obtain posterior estimates for cat preference given the proportion of people in the population in each level of the factors included in the model.

 mean    sd 
0.567 0.024 

We can compare this to the estimate we would have made if we had just used the sample:

[1] 0.681

We can also add it to the last figure to graphically represent the difference between the sample and population estimate.

As this is simulated data, we can look directly at the preference for cats that we simulated from to consider how good our estimate is.

[1] 0.561

Which we will also add to the figure.

Our MRP estimate is barely off, while our sample estimate is off by more than 10 percentage points. This indicates that using MRP helps to make estimates for the population from our sample that are more accurate.

Estimates for states

One of the nice benefits of using MRP to make inference about the population is that we can change the population of interest. In the previous paragraph we inferred the preference for cats in the whole population. We can also infer the preference for cats in a single state. In the following code we post-stratify for each state in turn. Note that we can reuse the predictive model from the previous step and update for different population demographics. This is particularly useful for complicated cases or large data sets where the model takes some time to fit.

As before, first we use the proportion of the population in each combination of the post-stratification groups to estimate the proportion of people who preferred cats in the population, only in this case the population of interest is the state.

   State model_state_pref sample_state_pref true_state_pref  N
1      1           0.5150            0.5000          0.5966  2
2      2           0.6659            0.8571          0.5315  7
3      3           0.4451            0.5385          0.3803 13
4      4           0.7603            0.8846          0.6590 26
5      5           0.4266            0.5200          0.4439 50
6      6           0.6810            0.8947          0.6982 19
7      7           0.7130            0.8250          0.6386 40
8      8           0.7021            0.9167          0.7850 12
9      9           0.7201            0.8571          0.6788 14
10    10           0.6134            0.7727          0.5966 44
11    11           0.7331            0.8824          0.7850 17
12    12           0.3903            0.4737          0.4012 57
13    13           0.7145            1.0000          0.7690  5
14    14           0.6059            0.6818          0.5966 22
15    15           0.2026            0.2000          0.1181 15
16    16           0.3854            0.4706          0.3599 34
17    17           0.2175            0.1538          0.2169 13
18    18           0.3462            0.0000          0.2169  2
19    19           0.4719            0.6000          0.4656  5
20    20           0.7897            0.8974          0.7850 39
21    21           0.3841            0.5250          0.3599 40
22    22           0.7624            0.9167          0.8755 12
23    23           0.5944            0.7333          0.5751 30
24    24           0.6283            1.0000          0.7690  2
25    25           0.7392            0.8261          0.8002 23
26    26           0.7577            0.8704          0.7524 54
27    27           0.2637            0.3500          0.2657 20
28    28           0.5651            0.6667          0.4656  3
29    29           0.5366            0.6757          0.5095 37
30    30           0.5206            0.5946          0.5315 37
31    31           0.5971            0.7143          0.7350  7
32    32           0.4213            0.5000          0.3399 42
33    33           0.6079            0.7353          0.6178 68
34    34           0.5807            0.7200          0.4656 25
35    35           0.5959            0.7200          0.6590 25
36    36           0.5286            0.6250          0.5751  8
37    37           0.6960            0.8571          0.8855  7
38    38           0.2945            0.3750          0.3205 48
39    39           0.7256            0.8824          0.8533 17
40    40           0.3228            0.3684          0.2657 19
41    41           0.7675            0.9286          0.6982 14
42    42           0.6741            0.8235          0.5751 34
43    43           0.5281            0.6667          0.5315  6
44    44           0.6588            0.8000          0.4439 15
45    45           0.6623            0.8077          0.5315 26
46    46           0.6496            0.7750          0.6788 40
47    47           0.7204            0.8621          0.8002 29
48    48           0.5114            0.6545          0.4224 55
49    49           0.2343            0.2500          0.2487  8
50    50           0.8116            1.0000          0.8146 13

Here we similar findings to when we considered the population as whole. While estimates for cat preference (in percent) using the sample are off by

mean  max 
  14   36 

the MRP based estimates are much closer to the actual percentage,

mean  max 
   6   21 

and especially when the sample size for that population is relatively small. This is easier to see graphically, so we will continue to add additional layers to the previous figure. Here we add model estimates,represented by triangles, and the true population cat preference, represented as transparent circles.

Other formats

Alternate methods of modelling

Previously we used a binary outcome variable. An alternative form of this model is to aggregate the data to the poststrat cell level and model the number of successes (or endorsement of cat preference in this case) out of the total number of people in that cell. To do this we need to create two n x 1 outcome variables, N_cat_pref (number in cell who prefer cats) and N (number in the poststrat cell).

We then can use these two outcome variables to model the data using the binomial distribution.

stan_glmer
 family:       binomial [logit]
 formula:      cbind(N_cat_pref, N - N_cat_pref) ~ factor(male) + factor(male) * 
       factor(age) + (1 | state) + (1 | age) + (1 | eth) + (1 | 
       income)
 observations: 940
------
                           Median MAD_SD
(Intercept)                 0.7    0.9  
factor(male)1              -0.3    0.5  
factor(age)2               -0.2    1.0  
factor(age)3               -0.4    0.8  
factor(age)4                0.7    0.8  
factor(age)5                0.3    0.8  
factor(age)6                1.0    0.8  
factor(age)7                0.8    0.7  
factor(male)1:factor(age)2  0.2    1.6  
factor(male)1:factor(age)3 -0.8    0.8  
factor(male)1:factor(age)4 -1.3    0.7  
factor(male)1:factor(age)5 -1.0    0.7  
factor(male)1:factor(age)6 -0.6    0.6  
factor(male)1:factor(age)7 -1.1    0.6  

Error terms:
 Groups Name        Std.Dev.
 state  (Intercept) 1.10    
 age    (Intercept) 1.12    
 eth    (Intercept) 0.87    
 income (Intercept) 0.73    
Num. levels: state 50, age 7, eth 3, income 3 

------
* For help interpreting the printed output see ?print.stanreg
* For info on the priors used see ?prior_summary.stanreg

Like before, we can use the posterior_linpred() function to obtain an estimate of the preference for cats in the population.

 mean    sd 
0.568 0.023 

As we should, we get the same answer as when we fit the model using the binary outcome. The two ways are equivalent, so we can use whichever form is most convenient for the data at hand. More details on these two forms of binomial models are available here.

Appendix

Examples of other formulas

The formulas for fitting so-called “mixed-effects” models in rstanarm are the same as those in the lme4 package. A table of examples can be found in Table 2 of the vignette for the lme4 package, available here.

Code to simulate the data

Here is the source code for the simulate_mrp_function(), which is based off of some code provided by Aki Vehtari.

function(n) {
  J <- c(2, 3, 7, 3, 50) # male or not, eth, age, income level, state
  poststrat <- as.data.frame(array(NA, c(prod(J), length(J)+1))) # Columns of post-strat matrix, plus one for size
  colnames(poststrat) <- c("male", "eth", "age","income", "state",'N')
  count <- 0
  for (i1 in 1:J[1]){
    for (i2 in 1:J[2]){
      for (i3 in 1:J[3]){
        for (i4 in 1:J[4]){
          for (i5 in 1:J[5]){
              count <- count + 1
              # Fill them in so we know what category we are referring to
              poststrat[count, 1:5] <- c(i1-1, i2, i3,i4,i5) 
          }
        }
      }
    }
  }
  # Proportion in each sample in the population
  p_male <- c(0.52, 0.48)
  p_eth <- c(0.5, 0.2, 0.3)
  p_age <- c(0.2,.1,0.2,0.2, 0.10, 0.1, 0.1)
  p_income<-c(.50,.35,.15)
  p_state_tmp<-runif(50,10,20)
  p_state<-p_state_tmp/sum(p_state_tmp)
  poststrat$N<-0
  for (j in 1:prod(J)){
    poststrat$N[j] <- round(250e6 * p_male[poststrat[j,1]+1] * p_eth[poststrat[j,2]] *
      p_age[poststrat[j,3]]*p_income[poststrat[j,4]]*p_state[poststrat[j,5]]) #Adjust the N to be the number observed in each category in each group
  }
  
  # Now let's adjust for the probability of response
  p_response_baseline <- 0.01
  p_response_male <- c(2, 0.8) / 2.8
  p_response_eth <- c(1, 1.2, 2.5) / 4.7
  p_response_age <- c(1, 0.4, 1, 1.5,  3, 5, 7) / 18.9
  p_response_inc <- c(1, 0.9, 0.8) / 2.7
  p_response_state <- rbeta(50, 1, 1)
  p_response_state <- p_response_state / sum(p_response_state)
  p_response <- rep(NA, prod(J))
  for (j in 1:prod(J)) {
    p_response[j] <-
      p_response_baseline * p_response_male[poststrat[j, 1] + 1] *
      p_response_eth[poststrat[j, 2]] * p_response_age[poststrat[j, 3]] *
      p_response_inc[poststrat[j, 4]] * p_response_state[poststrat[j, 5]]
  }
  people <- sample(prod(J), n, replace = TRUE, prob = poststrat$N * p_response)
  
  ## For respondent i, people[i] is that person's poststrat cell,
  ## some number between 1 and 32
  n_cell <- rep(NA, prod(J))
  for (j in 1:prod(J)) {
    n_cell[j] <- sum(people == j)
  }
  
  coef_male <- c(0,-0.3)
  coef_eth <- c(0, 0.6, 0.9)
  coef_age <- c(0,-0.2,-0.3, 0.4, 0.5, 0.7, 0.8, 0.9)
  coef_income <- c(0,-0.2, 0.6)
  coef_state <- c(0, round(rnorm(49, 0, 1), 1))
  coef_age_male <- t(cbind(c(0, .1, .23, .3, .43, .5, .6),
                           c(0, -.1, -.23, -.5, -.43, -.5, -.6)))
  true_popn <- data.frame(poststrat[, 1:5], cat_pref = rep(NA, prod(J)))
  for (j in 1:prod(J)) {
    true_popn$cat_pref[j] <- plogis(
      coef_male[poststrat[j, 1] + 1] +
        coef_eth[poststrat[j, 2]] + coef_age[poststrat[j, 3]] +
        coef_income[poststrat[j, 4]] + coef_state[poststrat[j, 5]] +
        coef_age_male[poststrat[j, 1] + 1, poststrat[j, 3]]
      )
  }
  
  #male or not, eth, age, income level, state, city
  y <- rbinom(n, 1, true_popn$cat_pref[people])
  male <- poststrat[people, 1]
  eth <- poststrat[people, 2]
  age <- poststrat[people, 3]
  income <- poststrat[people, 4]
  state <- poststrat[people, 5]
  
  sample <- data.frame(cat_pref = y, 
                       male, age, eth, income, state, 
                       id = 1:length(people))
  
  #Make all numeric:
  for (i in 1:ncol(poststrat)) {
    poststrat[, i] <- as.numeric(poststrat[, i])
  }
  for (i in 1:ncol(true_popn)) {
    true_popn[, i] <- as.numeric(true_popn[, i])
  }
  for (i in 1:ncol(sample)) {
    sample[, i] <- as.numeric(sample[, i])
  }
  list(
    sample = sample,
    poststrat = poststrat,
    true_popn = true_popn
  )
}

References

Gelman, Andrew, and others. 2005. “Analysis of Variance-Why It Is More Important Than Ever.” The Annals of Statistics 33 (1). Institute of Mathematical Statistics: 1–53.

Ghitza, Yair, and Andrew Gelman. 2013. “Deep Interactions with Mrp: Election Turnout and Voting Patterns Among Small Electoral Subgroups.” American Journal of Political Science 57 (3). Wiley Online Library: 762–76.

Lax, Jeffrey R, and Justin H Phillips. 2009. “How Should We Estimate Public Opinion in the States?” American Journal of Political Science 53 (1). Wiley Online Library: 107–21.

Lei, Rayleigh, Andrew Gelman, and Yair Ghitza. 2017. “The 2008 Election: A Preregistered Replication Analysis.” Statistics and Public Policy. Taylor & Francis, 1–8.

Little, Roderick JA. 1993. “Post-Stratification: A Modeler’s Perspective.” Journal of the American Statistical Association 88 (423). Taylor & Francis Group: 1001–12.

Park, David K, Andrew Gelman, and Joseph Bafumi. 2004. “Bayesian Multilevel Estimation with Poststratification: State-Level Estimates from National Polls.” Political Analysis 12 (4). Cambridge University Press: 375–85.

Shirley, Kenneth E, and Andrew Gelman. 2015. “Hierarchical Models for Estimating State and Demographic Trends in Us Death Penalty Public Opinion.” Journal of the Royal Statistical Society: Series A (Statistics in Society) 178 (1). Wiley Online Library: 1–28.

Si, Yajuan, Rob Trangucci, Jonah Sol Gabry, and Andrew Gelman. 2017. “Bayesian Hierarchical Weighting Adjustment and Survey Inference.” arXiv Preprint arXiv:1707.08220.

rstanarm/inst/doc/mrp.Rmd0000644000176200001440000010314413722762571015104 0ustar liggesusers--- title: "MRP with rstanarm" author: "Lauren Kennedy and Jonah Gabry" date: "`r Sys.Date()`" output: html_vignette: toc: yes bibliography: mrp-files/mrp.bib --- ```{r, child="children/SETTINGS-knitr.txt"} ``` ```{r packages-1, message=FALSE} library(rstanarm) library(ggplot2) library(bayesplot) theme_set(bayesplot::theme_default()) # options(mc.cores = 4) ``` ```{r packages-2, eval=FALSE, message=FALSE} library(dplyr) library(tidyr) ``` Inference about the population is one the main aims of statistical methodology. Multilevel regression and post-stratification (MRP) [@little1993post; @lax2009should; @park2004bayesian] has been shown to be an effective method of adjusting the sample to be more representative of the population for a set of key variables. Recent work has demonstrated the effectiveness of MRP when there are a number of suspected interactions between these variables [@ghitza2013deep], replicated by @lei20172008. While @ghitza2013deep use approximate marginal maximum likelihood estimates; @lei20172008 implement a fully Bayesian approach through Stan. The **rstanarm** package allows the user to conduct complicated regression analyses in Stan with the simplicity of standard formula notation in R. The purpose of this vignette is to demonstrate the utility of **rstanarm** when conducting MRP analyses. We will not delve into the details of conducting logistic regression with rstanarm as this is already covered in [other vignettes](https://mc-stan.org/rstanarm/articles/). Most of the code for data manipulation and plotting is not shown in the text but is available in the R markdown [source code on GitHub](https://github.com/stan-dev/rstanarm/blob/master/vignettes/mrp.Rmd). ```{r, include=FALSE, collapse=TRUE} simulate_mrp_data <- function(n) { J <- c(2, 3, 7, 3, 50) # male or not, eth, age, income level, state poststrat <- as.data.frame(array(NA, c(prod(J), length(J)+1))) # Columns of post-strat matrix, plus one for size colnames(poststrat) <- c("male", "eth", "age","income", "state",'N') count <- 0 for (i1 in 1:J[1]){ for (i2 in 1:J[2]){ for (i3 in 1:J[3]){ for (i4 in 1:J[4]){ for (i5 in 1:J[5]){ count <- count + 1 # Fill them in so we know what category we are referring to poststrat[count, 1:5] <- c(i1-1, i2, i3,i4,i5) } } } } } # Proportion in each sample in the population p_male <- c(0.52, 0.48) p_eth <- c(0.5, 0.2, 0.3) p_age <- c(0.2,.1,0.2,0.2, 0.10, 0.1, 0.1) p_income<-c(.50,.35,.15) p_state_tmp<-runif(50,10,20) p_state<-p_state_tmp/sum(p_state_tmp) poststrat$N<-0 for (j in 1:prod(J)){ poststrat$N[j] <- round(250e6 * p_male[poststrat[j,1]+1] * p_eth[poststrat[j,2]] * p_age[poststrat[j,3]]*p_income[poststrat[j,4]]*p_state[poststrat[j,5]]) #Adjust the N to be the number observed in each category in each group } # Now let's adjust for the probability of response p_response_baseline <- 0.01 p_response_male <- c(2, 0.8) / 2.8 p_response_eth <- c(1, 1.2, 2.5) / 4.7 p_response_age <- c(1, 0.4, 1, 1.5, 3, 5, 7) / 18.9 p_response_inc <- c(1, 0.9, 0.8) / 2.7 p_response_state <- rbeta(50, 1, 1) p_response_state <- p_response_state / sum(p_response_state) p_response <- rep(NA, prod(J)) for (j in 1:prod(J)) { p_response[j] <- p_response_baseline * p_response_male[poststrat[j, 1] + 1] * p_response_eth[poststrat[j, 2]] * p_response_age[poststrat[j, 3]] * p_response_inc[poststrat[j, 4]] * p_response_state[poststrat[j, 5]] } people <- sample(prod(J), n, replace = TRUE, prob = poststrat$N * p_response) ## For respondent i, people[i] is that person's poststrat cell, ## some number between 1 and 32 n_cell <- rep(NA, prod(J)) for (j in 1:prod(J)) { n_cell[j] <- sum(people == j) } coef_male <- c(0,-0.3) coef_eth <- c(0, 0.6, 0.9) coef_age <- c(0,-0.2,-0.3, 0.4, 0.5, 0.7, 0.8, 0.9) coef_income <- c(0,-0.2, 0.6) coef_state <- c(0, round(rnorm(49, 0, 1), 1)) coef_age_male <- t(cbind(c(0, .1, .23, .3, .43, .5, .6), c(0, -.1, -.23, -.5, -.43, -.5, -.6))) true_popn <- data.frame(poststrat[, 1:5], cat_pref = rep(NA, prod(J))) for (j in 1:prod(J)) { true_popn$cat_pref[j] <- plogis( coef_male[poststrat[j, 1] + 1] + coef_eth[poststrat[j, 2]] + coef_age[poststrat[j, 3]] + coef_income[poststrat[j, 4]] + coef_state[poststrat[j, 5]] + coef_age_male[poststrat[j, 1] + 1, poststrat[j, 3]] ) } #male or not, eth, age, income level, state, city y <- rbinom(n, 1, true_popn$cat_pref[people]) male <- poststrat[people, 1] eth <- poststrat[people, 2] age <- poststrat[people, 3] income <- poststrat[people, 4] state <- poststrat[people, 5] sample <- data.frame(cat_pref = y, male, age, eth, income, state, id = 1:length(people)) #Make all numeric: for (i in 1:ncol(poststrat)) { poststrat[, i] <- as.numeric(poststrat[, i]) } for (i in 1:ncol(true_popn)) { true_popn[, i] <- as.numeric(true_popn[, i]) } for (i in 1:ncol(sample)) { sample[, i] <- as.numeric(sample[, i]) } list( sample = sample, poststrat = poststrat, true_popn = true_popn ) } ``` # The Data Three data sets are simulated by the function `simulate_mrp_data()`, which is defined in the [source code](https://github.com/stan-dev/rstanarm/blob/master/vignettes/mrp.Rmd) for this R markdown document (and printed in the appendix). The first, `sample`, contains $n$ observations from the individuals that form our sample (i.e., $n$ rows). For each individual we have their age (recorded as membership within a specific age bracket), ethnicity, income level (recorded as membership within a specific bracket), and gender. Participants were randomly sampled from a state. MRP is often used for dichotomous fixed choice questions (e.g., McCain's share of two party vote [@ghitza2013deep]; support for George W Bush, [@park2004bayesian]; or support for the death penalty [@shirley2015hierarchical]), so we will use a binary variable as the outcome in this vignette. However, MRP can also be used if there are more than two categories or if the outcome is continuous. As this is a simple toy example, we will describe the proportion of the population who would choose to adopt a cat over a dog, given the opportunity. We will simulate data using a function that is included in the appendix of this document. The `simulate_mrp_data()` function simulates a sample from a much larger population. It returns a list including the sample, population poststratification matrix and the true population preference for cats. ```{r include=FALSE, eval=FALSE} mrp_sim <- simulate_mrp_data(n=1200) save(mrp_sim, file = "mrp-files/mrp_sim.rda", version = 2) ``` ```{r eval=FALSE} mrp_sim <- simulate_mrp_data(n=1200) str(mrp_sim) ``` ```{r, echo=FALSE} load("mrp-files/mrp_sim.rda") str(mrp_sim) ``` ```{r, message=FALSE} sample <- mrp_sim[["sample"]] rbind(head(sample), tail(sample)) ``` The variables describing the individual (age, ethnicity, income level and gender) will be used to match the sample to the population of interest. To do this we will need to form a post-stratification table, which contains the number of people in each possible combination of the post-stratification variables. We have 4 variables with 2 (male), 7 (age), 3 (ethnicity) and 3 (income) levels, so there are 2x7x3x3 different levels. Participants are also selected from a state (50), increasing the number of possible levels to $6300$. To make inference about the population, we will also need the proportion of individuals in each post stratification cell at the *population* level. We will use this information to update the estimate of our outcome variable from the sample so that is more representative of the population. This is particularly helpful if there is a belief that the sample has some bias (e.g., a greater proportion of females responded than males), and that the bias impacts the outcome variable (e.g., maybe women are more likely to pick a cat than men). For each possible combination of factors, the post-stratification table shows the proportion/number of the population in that cell (rather than the proportion/number in the sample in the cell). Below we read in the poststrat data our simulated data list. ```{r message=FALSE} poststrat <- mrp_sim[["poststrat"]] rbind(head(poststrat), tail(poststrat)) ``` One of the benefits of using a simulated data set for this example is that the actual population level probability of cat preference is known for each post-stratification cell. In real world data analysis, we don't have this luxury, but we will use it later in this case study to check the predictions of the model. Details regarding the simulation of this data are available in the appendix. ```{r message=FALSE} true_popn <- mrp_sim[["true_popn"]] rbind(head(true_popn), tail(true_popn)) ``` # Exploring Graphically Before we begin with the MRP analysis, we first explore the data set with some basic visualizations. ## Comparing sample to population The aim of this analysis is to obtain a *population* estimation of cat preference given our sample of $4626$. We can see in the following plot the difference in proportions between the sample and the population. Horizontal panels represent each variable. Bars represent the proportion of the sample (solid) and population (dashed) in each category (represented by colour and the x-axis). For ease of viewing, we ordered the states in terms of the proportion of the sample in that state that was observed. We will continue this formatting choice thoughout this vignette. ```{r order-states} sample$state <- factor(sample$state, levels=1:50) sample$state <- with(sample, factor(state, levels=order(table(state)))) true_popn$state <- factor(true_popn$state,levels = levels(sample$state)) poststrat$state <- factor(poststrat$state,levels = levels(sample$state)) ``` ```{r state-and-pop-data-for-plots, eval=FALSE, include=FALSE} # not evaluated to avoid tidyverse dependency income_popn <- poststrat %>% group_by(income) %>% summarize(Num=sum(N)) %>% mutate(PROP=Num/sum(Num),TYPE='Popn',VAR='Income',CAT=income) %>% ungroup() income_data <- sample %>% group_by(income) %>% summarise(Num=n()) %>% mutate(PROP=Num/sum(Num),TYPE='Sample',VAR='Income',CAT=income) %>% ungroup() income<-rbind(income_data[,2:6],income_popn[,2:6]) age_popn <- poststrat%>% group_by(age)%>% summarize(Num=sum(N))%>% mutate(PROP=Num/sum(Num),TYPE='Popn',VAR='Age',CAT=age)%>% ungroup() age_data <- sample%>% group_by(age)%>% summarise(Num=n())%>% mutate(PROP=Num/sum(Num),TYPE='Sample',VAR='Age',CAT=age)%>% ungroup() age <- rbind(age_data[,2:6],age_popn[,2:6] ) eth_popn <- poststrat%>% group_by(eth)%>% summarize(Num=sum(N))%>% mutate(PROP=Num/sum(Num),TYPE='Popn',VAR='Ethnicity',CAT=eth)%>% ungroup() eth_data <- sample%>% group_by(eth)%>% summarise(Num=n())%>% mutate(PROP=Num/sum(Num),TYPE='Sample',VAR='Ethnicity',CAT=eth)%>% ungroup() eth<-rbind(eth_data[,2:6],eth_popn[,2:6]) male_popn <- poststrat%>% group_by(male)%>% summarize(Num=sum(N))%>% mutate(PROP=Num/sum(Num),TYPE='Popn',VAR='Male',CAT=male)%>% ungroup() male_data <- sample%>% group_by(male)%>% summarise(Num=n())%>% mutate(PROP=Num/sum(Num),TYPE='Sample',VAR='Male',CAT=male)%>% ungroup() male <- rbind(male_data[,2:6],male_popn[,2:6]) state_popn <- poststrat%>% group_by(state)%>% summarize(Num=sum(N))%>% mutate(PROP=Num/sum(poststrat$N),TYPE='Popn',VAR='State',CAT=state)%>% ungroup() state_plot_data <- sample%>% group_by(state)%>% summarise(Num=n())%>% mutate(PROP=Num/nrow(sample),TYPE='Sample',VAR='State',CAT=state)%>% ungroup() state_plot_data <- rbind(state_plot_data[,2:6],state_popn[,2:6]) state_plot_data$TYPE <- factor(state_plot_data$TYPE, levels = c("Sample","Popn")) plot_data <- rbind(male,eth,age,income) plot_data$TYPE <- factor(plot_data$TYPE, levels = c("Sample","Popn")) save(state_plot_data, file = "mrp-files/state_plot_data.rda", version = 2) save(plot_data, file = "mrp-files/plot_data.rda", version = 2) ``` ```{r plot-data, echo=FALSE, fig.height = 4, fig.width = 7, fig.align = "center"} load("mrp-files/plot_data.rda") # created in previous chunk ggplot(data=plot_data, aes(x=as.factor(CAT), y=PROP, group=as.factor(TYPE), linetype=as.factor(TYPE))) + geom_point(stat="identity",colour='black')+ geom_line()+ facet_wrap( ~ VAR, scales = "free",nrow=1,ncol=5)+ theme_bw()+ scale_fill_manual(values=c('#1f78b4','#33a02c', '#e31a1c','#ff7f00','#8856a7'),guide=FALSE)+ scale_y_continuous(breaks=c(0,.25,.5,.75,1), labels=c('0%','25%',"50%","75%","100%"))+ scale_alpha_manual(values=c(1, .3))+ ylab('Proportion')+ labs(alpha='')+ theme(legend.position="bottom", axis.title.y=element_blank(), axis.title.x=element_blank(), legend.title=element_blank(), legend.text=element_text(size=10), axis.text=element_text(size=10), strip.text=element_text(size=10), strip.background = element_rect(fill='grey92')) load("mrp-files/state_plot_data.rda") # created in previous chunk ggplot(data=state_plot_data, aes(x=as.factor(CAT), y=PROP, group=as.factor(TYPE), linetype=as.factor(TYPE))) + geom_point(stat="identity",colour='black')+ geom_line()+ facet_wrap( ~ VAR)+ theme_bw()+ scale_fill_manual(values=c('#1f78b4','#33a02c', '#e31a1c','#ff7f00','#8856a7'),guide=FALSE)+ scale_y_continuous(breaks=c(0,.025,.05,1), labels=c('0%','2.5%',"5%","100%"),expand=c(0,0),limits=c(0,.06))+ scale_alpha_manual(values=c(1, .3))+ ylab('Proportion')+ labs(alpha='')+ theme(legend.position="bottom", axis.title.y=element_blank(), axis.title.x=element_blank(), legend.title=element_blank(), legend.text=element_text(size=10), axis.text.y=element_text(size=10), axis.text.x=element_text(size=8,angle=90), strip.text=element_text(size=10), strip.background = element_rect(fill='grey92')) ``` # Effect of the post-stratification variable on preference for cats Secondly; we consider the evidence of different proportions across different levels of a post-stratification variable; which we should consider for each of the post-stratification variables. Here we break down the proportion of individuals who would prefer a cat (*y-axis*) by different levels (*x-axis*) of the post-stratification variable (*horizontal panels*). We can see from this figure that there appears to be differences in cat preference for the different levels of post-stratification variables. Given the previous figure, which suggested that the sample was different to the population in the share of different levels of theses variables, this should suggest that using the sample to estimate cat preference may not give accurate estimates of cat preference in the population. ```{r, eval=FALSE, echo=FALSE} # not evaluated to avoid dependency on tidyverse #Summarise summary_by_poststrat_var <- sample %>% gather(variable,category,c("income","eth","age","male")) %>% group_by(variable,category) %>% #Wald confidence interval summarise(y_mean=mean(cat_pref),y_sd=sqrt(mean(cat_pref)*(1-mean(cat_pref))/n())) %>% ungroup() summary_by_poststrat_var$variable <- as.factor(summary_by_poststrat_var$variable) levels(summary_by_poststrat_var$variable) <- list('Age'='age','Ethnicity'='eth','Income'='income','Male'='male') save(summary_by_poststrat_var, file = "mrp-files/summary_by_poststrat_var.rda", version = 2) ``` ```{r plot-summary-by-poststrat-var, echo=FALSE, fig.height = 4, fig.width = 7, fig.align = "center"} load("mrp-files/summary_by_poststrat_var.rda") # created in previous chunk ggplot(data=summary_by_poststrat_var, aes(x=as.factor(category), y=y_mean,group=1)) + geom_errorbar(aes(ymin=y_mean-y_sd, ymax=y_mean+y_sd), width=0)+ geom_line()+ geom_point()+ scale_colour_manual(values=c('#1f78b4','#33a02c','#e31a1c','#ff7f00', '#8856a7'))+theme_bw()+ facet_wrap(~variable,scales = "free_x",nrow=1,ncol=5)+ scale_y_continuous(breaks=c(.5,.75,1), labels=c("50%","75%", "100%"), limits=c(0.4-.4*.05,.9),expand = c(0,0))+ labs(x="",y="Cat preference")+ theme(legend.position="none", axis.title.y=element_text(size=10), axis.title.x=element_blank(), axis.text=element_text(size=10), strip.text=element_text(size=10), strip.background = element_rect(fill='grey92')) ``` ## Interaction effect Thirdly, we demonstrate visually that there is an interaction between age and gender and compare to a case where there is no interaction. Here a simulated interaction effect between age (*x-axis*) and gender (*color*), right panel, is contrasted with no interaction effect (*left panel*). While both panels demonstrate a difference between the genders on the outcome variable (*y-axis*), only the second panel shows this difference changing with the variable on the x-axis. ```{r interaction-summary, eval=FALSE, echo=FALSE} # not evaluated to avoid dependency on tidyverse #Summarise interaction <- sample %>% gather(variable, category, c("age", "eth")) %>% group_by(variable, category, male) %>% summarise(y_mean = mean(cat_pref), y_sd = sqrt(mean(cat_pref) * (1 - mean(cat_pref)) / n())) %>% ungroup() #Tidy for nice facet labels interaction$variable <- as.factor(interaction$variable) levels(interaction$variable) <- list('Ethnicity' = 'eth', 'Age' = 'age') save(interaction, file = "mrp-files/interaction.rda", version = 2) ``` ```{r plot-interaction, echo=FALSE, fig.height = 4, fig.width = 7, fig.align = "center"} load("mrp-files/interaction.rda") # created in previous chunk ggplot(data=interaction, aes(x=as.factor(category), y=y_mean, colour=as.factor(male),group=as.factor(male))) + geom_errorbar(aes(ymin=y_mean-y_sd, ymax=y_mean+y_sd),width=0 )+ geom_line(aes(x=as.factor(category), y=y_mean,colour=as.factor(male)))+ geom_point()+ facet_wrap(~variable,scales = "free_x",nrow=1,ncol=2)+ labs(x="",y="Cat preference",colour='Gender')+ scale_y_continuous(breaks=c(0,.25,.5,.75,1), labels=c("0%",'25%',"50%","75%", "100%"), limits=c(0,1),expand=c(0,0))+ scale_colour_manual(values=c('#4575b4','#d73027'))+theme_bw()+ theme(axis.title=element_text(size=10), axis.text=element_text(size=10), legend.position='none', strip.text=element_text(size=10), strip.background = element_rect(fill='grey92')) ``` ## Design effect Lastly we look at the difference in cat preference between states, which will form the basis for the multi-level component of our analysis. Participants were randomly selected from particular states. Plotting the state (*x-axis*) against the overall proportion of participants who prefer cats (*y-axis*) demonstrates state differences. The downward slope is because we ordered the x-axis by the proportion of cat preference for ease of viewing. We also include second plot with a horizontal line to represent the overall preference for cats in the total population, according to the sample. ```{r, eval=FALSE, echo=FALSE} # not evaluated to avoid dependency on tidyverse #Summarise by state preference_by_state <- sample %>% group_by(state) %>% summarise(y_mean = mean(cat_pref), y_sd = sqrt(mean(cat_pref) * (1 - mean(cat_pref)) / n())) %>% ungroup() save(preference_by_state, file = "mrp-files/preference_by_state.rda", version = 2) ``` ```{r, echo=FALSE, fig.height = 4, fig.width = 8, fig.align = "center"} load("mrp-files/preference_by_state.rda") compare <- ggplot(data=preference_by_state, aes(x=state, y=y_mean,group=1)) + geom_ribbon(aes(ymin=y_mean-y_sd,ymax=y_mean+y_sd,x=state),fill='lightgrey',alpha=.7)+ geom_line(aes(x=state, y=y_mean))+ geom_point()+ scale_y_continuous(breaks=c(0,.25,.5,.75,1), labels=c("0%","25%","50%","75%","100%"), limits=c(0,1), expand=c(0,0))+ scale_x_discrete(drop=FALSE)+ scale_colour_manual(values=c('#1f78b4','#33a02c','#e31a1c','#ff7f00', '#8856a7'))+ theme_bw()+ labs(x="States",y="Cat preference")+ theme(legend.position="none", axis.title=element_text(size=10), axis.text.y=element_text(size=10), axis.text.x=element_text(angle=90,size=8), legend.title=element_text(size=10), legend.text=element_text(size=10)) compare2 <- ggplot()+ geom_hline(yintercept = mean(sample$cat_pref),size=.8)+ geom_text(aes(x = 5.2, y = mean(sample$cat_pref)+.025, label = "Sample"))+ scale_y_continuous(breaks=c(0,.25,.5,.75,1), labels=c("0%","25%","50%","75%","100%"), limits=c(-0.25,1.25),expand=c(0,0))+ theme_bw()+ labs(x="Popn",y="")+ theme(legend.position="none", axis.title.y=element_blank(), axis.title.x=element_text(size=10), axis.text=element_blank(), axis.ticks=element_blank(), legend.title=element_text(size=10), legend.text=element_text(size=10)) bayesplot_grid(compare,compare2, grid_args = list(nrow=1, widths = c(8,1))) ``` # MRP with rstanarm From visual inspection, it appears that different levels of post-stratification variable have different preferences for cats. Our survey also appears to have sampling bias; indicating that some groups were over/under sampled relative to the population. The net effect of this is that we could not make good population level estimates of cat preference straight from our sample. Our aim is to infer the preference for cats in the *population* using the post-stratification variables to account for systematic differences between the sample and population. Using rstanarm, this becomes a simple procedure. The first step is to use a multi-level logistic regression model to predict preference for cats in the sample given the variables that we will use to post-stratify. Note that we actually have more rows in the post-stratification matrix than the we have observed units, so there are some cells in the poststrat matrix that we don't observe. We can use a multi-level model to partially pool information across the different levels within each variable to assist with this. In the model described below, we use a fixed intercept for gender, and hierarchically modeled varying intercepts for each of the other factors. Let $\theta_{j}$ denote the preference for cats in the $j$th poststratification cell. The non-hierarchical part of the model can be written as $$\theta_j= logit^{-1}(X_{j}\beta),$$ where here $X$ only contains an indicator for male or female and an interaction term with age. Adding the varying intercepts for the other variables the model becomes $$ \theta_j = logit^{-1}( X_{j}\beta + \alpha_{\rm state[j]}^{\rm state} + \alpha_{\rm age[j]}^{\rm age} + \alpha_{\rm eth[j]}^{\rm eth} + \alpha_{\rm inc[j]}^{\rm inc} ) $$ with $$ \begin{align*} \alpha_{\rm state[j]}^{\rm state} & \sim N(0,\sigma^{\rm state}) \\ \alpha_{\rm age[j]}^{\rm age} & \sim N(0,\sigma^{\rm age})\\ \alpha_{\rm eth[j]}^{\rm eth} & \sim N(0,\sigma^{\rm eth})\\ \alpha_{\rm inc[j]}^{\rm inc} &\sim N(0,\sigma^{\rm inc}) \\ \end{align*} $$ Each of $\sigma^{\rm state}$, $\sigma^{\rm age}$, $\sigma^{\rm eth}$, and $\sigma^{\rm inc}$ are estimated from the data (in this case using rstanarm's default priors), which is beneficial as it means we share information between the levels of each variable and we can prevent levels with with less data from being too sensitive to the few observed values. This also helps with the levels we don't observe at all it will use information from the levels that we do observe. For more on the benefits of this type of model, see @gelman2005analysis, and see @ghitza2013deep and @si2017bayesian for more complicated extensions that involve deep interactions and structured prior distributions. Here is the model specified using the `stan_glmer()` function in rstanarm, which uses the same formula syntax as the `glmer()` function from the lme4 package: ```{r, message=FALSE, warning=FALSE, results='hide'} fit <- stan_glmer( cat_pref ~ factor(male) + factor(male) * factor(age) + (1 | state) + (1 | age) + (1 | eth) + (1 | income), family = binomial(link = "logit"), data = sample ) ``` ```{r} print(fit) ``` As a first pass to check whether the model is performing well, note that there are no warnings about divergences, failure to converge or tree depth. If these errors do occur, more information on how to alleviate them is provided [here](https://mc-stan.org/rstanarm/articles/rstanarm.html#step-3-criticize-the-model). ## Population Estimate From this we get a summary of the baseline log odds of cat preference at the first element of each factor (i.e., male = 0, age = 1) for each state, plus estimates on variability of the intercept for state, ethnicity, age and income. While this is interesting, currently all we have achieved is a model that predicts cat preference given a number of factor-type predictors in a sample. What we would like to do is estimate cat preference in the population by accounting for differences between our sample and the population. We use the `posterior_linpred()` function to obtain posterior estimates for cat preference given the proportion of people in the *population* in each level of the factors included in the model. ```{r, message=FALSE} posterior_prob <- posterior_linpred(fit, transform = TRUE, newdata = poststrat) poststrat_prob <- posterior_prob %*% poststrat$N / sum(poststrat$N) model_popn_pref <- c(mean = mean(poststrat_prob), sd = sd(poststrat_prob)) round(model_popn_pref, 3) ``` We can compare this to the estimate we would have made if we had just used the sample: ```{r, message=FALSE} sample_popn_pref <- mean(sample$cat_pref) round(sample_popn_pref, 3) ``` We can also add it to the last figure to graphically represent the difference between the sample and population estimate. ```{r, message=FALSE,fig.height = 4, fig.width = 8, fig.align = "center"} compare2 <- compare2 + geom_hline(yintercept = model_popn_pref[1], colour = '#2ca25f', size = 1) + geom_text(aes(x = 5.2, y = model_popn_pref[1] + .025), label = "MRP", colour = '#2ca25f') bayesplot_grid(compare, compare2, grid_args = list(nrow = 1, widths = c(8, 1))) ``` As this is simulated data, we can look directly at the preference for cats that we simulated from to consider how good our estimate is. ```{r, message=FALSE} true_popn_pref <- sum(true_popn$cat_pref * poststrat$N) / sum(poststrat$N) round(true_popn_pref, 3) ``` Which we will also add to the figure. ```{r, echo=FALSE, message=FALSE,fig.height = 4, fig.width = 8, fig.align = "center"} compare2 <- compare2 + geom_hline(yintercept = mean(true_popn_pref), linetype = 'dashed', size = .8) + geom_text(aes(x = 5.2, y = mean(true_popn_pref) - .025), label = "True") bayesplot_grid(compare, compare2, grid_args = list(nrow = 1, widths = c(8, 1))) ``` Our MRP estimate is barely off, while our sample estimate is off by more than 10 percentage points. This indicates that using MRP helps to make estimates for the population from our sample that are more accurate. ## Estimates for states One of the nice benefits of using MRP to make inference about the population is that we can change the population of interest. In the previous paragraph we inferred the preference for cats in the whole population. We can also infer the preference for cats in a single state. In the following code we post-stratify for each state in turn. Note that we can reuse the predictive model from the previous step and update for different population demographics. This is particularly useful for complicated cases or large data sets where the model takes some time to fit. As before, first we use the proportion of the population in each combination of the post-stratification groups to estimate the proportion of people who preferred cats in the population, only in this case the population of interest is the state. ```{r, message=FALSE} state_df <- data.frame( State = 1:50, model_state_sd = rep(-1, 50), model_state_pref = rep(-1, 50), sample_state_pref = rep(-1, 50), true_state_pref = rep(-1, 50), N = rep(-1, 50) ) for(i in 1:length(levels(as.factor(poststrat$state)))) { poststrat_state <- poststrat[poststrat$state == i, ] posterior_prob_state <- posterior_linpred( fit, transform = TRUE, draws = 1000, newdata = as.data.frame(poststrat_state) ) poststrat_prob_state <- (posterior_prob_state %*% poststrat_state$N) / sum(poststrat_state$N) #This is the estimate for popn in state: state_df$model_state_pref[i] <- round(mean(poststrat_prob_state), 4) state_df$model_state_sd[i] <- round(sd(poststrat_prob_state), 4) #This is the estimate for sample state_df$sample_state_pref[i] <- round(mean(sample$cat_pref[sample$state == i]), 4) #And what is the actual popn? state_df$true_state_pref[i] <- round(sum(true_popn$cat_pref[true_popn$state == i] * poststrat_state$N) / sum(poststrat_state$N), digits = 4) state_df$N[i] <- length(sample$cat_pref[sample$state == i]) } state_df[c(1,3:6)] state_df$State <- factor(state_df$State, levels = levels(sample$state)) ``` Here we similar findings to when we considered the population as whole. While estimates for cat preference (in percent) using the sample are off by ```{r} round(100 * c( mean = mean(abs(state_df$sample_state_pref-state_df$true_state_pref), na.rm = TRUE), max = max(abs(state_df$sample_state_pref-state_df$true_state_pref), na.rm = TRUE) )) ``` the MRP based estimates are much closer to the actual percentage, ```{r} round(100 * c( mean = mean(abs(state_df$model_state_pref-state_df$true_state_pref)), max = max(abs(state_df$model_state_pref-state_df$true_state_pref)) )) ``` and especially when the sample size for that population is relatively small. This is easier to see graphically, so we will continue to add additional layers to the previous figure. Here we add model estimates,represented by triangles, and the true population cat preference, represented as transparent circles. ```{r, message=FALSE, echo=FALSE, fig.height = 4, fig.width = 8, fig.align = "center",warning=FALSE, fig.align = "center"} #Summarise by state compare <- compare + geom_point(data=state_df, mapping=aes(x=State, y=model_state_pref), inherit.aes=TRUE,colour='#238b45')+ geom_line(data=state_df, mapping=aes(x=State, y=model_state_pref,group=1), inherit.aes=TRUE,colour='#238b45')+ geom_ribbon(data=state_df,mapping=aes(x=State,ymin=model_state_pref-model_state_sd, ymax=model_state_pref+model_state_sd,group=1), inherit.aes=FALSE,fill='#2ca25f',alpha=.3)+ geom_point(data=state_df, mapping=aes(x=State, y=true_state_pref), alpha=.5,inherit.aes=TRUE)+ geom_line(data=state_df, mapping=aes(x=State, y=true_state_pref), inherit.aes = TRUE,linetype='dashed') bayesplot_grid(compare, compare2, grid_args = list(nrow = 1, widths = c(8, 1))) ``` # Other formats ## Alternate methods of modelling Previously we used a binary outcome variable. An alternative form of this model is to aggregate the data to the poststrat cell level and model the number of successes (or endorsement of cat preference in this case) out of the total number of people in that cell. To do this we need to create two n x 1 outcome variables, `N_cat_pref` (number in cell who prefer cats) and `N` (number in the poststrat cell). ```{r, eval=FALSE} # not evaluated to avoid dependency on tidyverse sample_alt <- sample %>% group_by(male, age, income, state, eth) %>% summarise(N_cat_pref = sum(cat_pref), N = n()) %>% ungroup() ``` ```{r, include=FALSE} load("mrp-files/sample_alt.rda") ``` We then can use these two outcome variables to model the data using the binomial distribution. ```{r, message=FALSE, warning=FALSE, results='hide'} fit2 <- stan_glmer( cbind(N_cat_pref, N - N_cat_pref) ~ factor(male) + factor(male) * factor(age) + (1 | state) + (1 | age) + (1 | eth) + (1 | income), family = binomial("logit"), data = sample_alt, refresh = 0 ) ``` ```{r} print(fit2) ``` Like before, we can use the `posterior_linpred()` function to obtain an estimate of the preference for cats in the population. ```{r, message=FALSE} posterior_prob_alt <- posterior_linpred(fit2, transform = TRUE, newdata = poststrat) poststrat_prob_alt <- posterior_prob_alt %*% poststrat$N / sum(poststrat$N) model_popn_pref_alt <- c(mean = mean(poststrat_prob_alt), sd = sd(poststrat_prob_alt)) round(model_popn_pref_alt, 3) ``` As we should, we get the same answer as when we fit the model using the binary outcome. The two ways are equivalent, so we can use whichever form is most convenient for the data at hand. More details on these two forms of binomial models are available [here](https://mc-stan.org/rstanarm/articles/binomial.html). # Appendix ### Examples of other formulas The formulas for fitting so-called "mixed-effects" models in **rstanarm** are the same as those in the **lme4** package. A table of examples can be found in Table 2 of the vignette for the **lme4** package, available [here](https://CRAN.R-project.org/package=lme4/vignettes/lmer.pdf). ### Code to simulate the data Here is the source code for the `simulate_mrp_function()`, which is based off of some code provided by Aki Vehtari. ```{r} print(simulate_mrp_data) ``` # References rstanarm/inst/doc/priors.R0000644000176200001440000000516514551551756015311 0ustar liggesusers## ---- SETTINGS-knitr, include=FALSE------------------------------------------- stopifnot(require(knitr)) opts_chunk$set( comment=NA, message = FALSE, warning = FALSE, eval = identical(Sys.getenv("NOT_CRAN"), "true"), dev = "png", dpi = 150, fig.asp = 0.618, fig.width = 5, out.width = "60%", fig.align = "center" ) ## ---- SETTINGS-gg, include=TRUE----------------------------------------------- library(ggplot2) library(bayesplot) theme_set(bayesplot::theme_default()) ## ---- default-prior-1, results="hide"----------------------------------------- library("rstanarm") default_prior_test <- stan_glm(mpg ~ wt + am, data = mtcars, chains = 1) ## ---- default-prior-summary--------------------------------------------------- prior_summary(default_prior_test) ## ---- echo=FALSE-------------------------------------------------------------- priors <- prior_summary(default_prior_test) fr2 <- function(x) format(round(x, 2), nsmall = 2) ## ---- no-autoscale, results="hide"-------------------------------------------- test_no_autoscale <- update( default_prior_test, prior = normal(0, 5), prior_intercept = student_t(4, 0, 10), prior_aux = cauchy(0, 3) ) ## ---- no-autoscale-prior-summary---------------------------------------------- prior_summary(test_no_autoscale) ## ----------------------------------------------------------------------------- p <- 1 - 2 * pnorm(-250, mean = 0, sd = 500) print(paste("Pr(-250 < theta < 250) =", round(p, 2))) ## ---- fig.cap="_There is much more probability mass outside the interval (-250, 250)._"---- theta <- rnorm(1e5, mean = 0, sd = 500) p_approx <- mean(abs(theta) < 250) print(paste("Pr(-250 < theta < 250) =", round(p_approx, 2))) d <- data.frame(theta, clr = abs(theta) > 250) library(ggplot2) ggplot(d, aes(x = theta, fill = clr)) + geom_histogram(binwidth = 5, show.legend = FALSE) + scale_y_continuous(name = "", labels = NULL, expand = c(0,0)) + scale_x_continuous(name = expression(theta), breaks = c(-1000, -250, 250, 1000)) ## ---- flat-prior-1, echo=FALSE, results="hide"-------------------------------- flat_prior_test <- stan_glm(mpg ~ wt, data = mtcars, prior = NULL, iter = 10, chains = 1) ## ---- flat-prior-2, eval=FALSE------------------------------------------------ # flat_prior_test <- stan_glm(mpg ~ wt, data = mtcars, prior = NULL) ## ---- flat-prior-summary------------------------------------------------------ prior_summary(flat_prior_test) ## ---- eval=FALSE-------------------------------------------------------------- # my_prior <- normal(location = c(-10, 0), scale = c(5, 2)) # stan_glm(y ~ x1 + x2, data = dat, prior = my_prior) rstanarm/inst/doc/continuous.R0000644000176200001440000001513414551550300016156 0ustar liggesusers## ---- SETTINGS-knitr, include=FALSE------------------------------------------- stopifnot(require(knitr)) opts_chunk$set( comment=NA, message = FALSE, warning = FALSE, eval = identical(Sys.getenv("NOT_CRAN"), "true"), dev = "png", dpi = 150, fig.asp = 0.618, fig.width = 5, out.width = "60%", fig.align = "center" ) ## ---- SETTINGS-gg, include=TRUE----------------------------------------------- library(ggplot2) library(bayesplot) theme_set(bayesplot::theme_default()) ## ---- continuous-kidiq-mcmc,results="hide"------------------------------------ library(rstanarm) data(kidiq) post1 <- stan_glm(kid_score ~ mom_hs, data = kidiq, family = gaussian(link = "identity"), seed = 12345) post2 <- update(post1, formula = . ~ mom_iq) post3 <- update(post1, formula = . ~ mom_hs + mom_iq) (post4 <- update(post1, formula = . ~ mom_hs * mom_iq)) ## ---- continuous-kidiq-print, echo=FALSE-------------------------------------- print(post4) ## ---- continuous-kidiq-plot1a------------------------------------------------- base <- ggplot(kidiq, aes(x = mom_hs, y = kid_score)) + geom_point(size = 1, position = position_jitter(height = 0.05, width = 0.1)) + scale_x_continuous(breaks = c(0,1), labels = c("No HS", "HS")) base + geom_abline(intercept = coef(post1)[1], slope = coef(post1)[2], color = "skyblue4", size = 1) ## ---- continuous-kidiq-plot1b------------------------------------------------- draws <- as.data.frame(post1) colnames(draws)[1:2] <- c("a", "b") base + geom_abline(data = draws, aes(intercept = a, slope = b), color = "skyblue", size = 0.2, alpha = 0.25) + geom_abline(intercept = coef(post1)[1], slope = coef(post1)[2], color = "skyblue4", size = 1) ## ---- continuous-kidiq-plot2-------------------------------------------------- draws <- as.data.frame(as.matrix(post2)) colnames(draws)[1:2] <- c("a", "b") ggplot(kidiq, aes(x = mom_iq, y = kid_score)) + geom_point(size = 1) + geom_abline(data = draws, aes(intercept = a, slope = b), color = "skyblue", size = 0.2, alpha = 0.25) + geom_abline(intercept = coef(post2)[1], slope = coef(post2)[2], color = "skyblue4", size = 1) ## ---- continuous-kidiq-plot3-------------------------------------------------- reg0 <- function(x, ests) cbind(1, 0, x) %*% ests reg1 <- function(x, ests) cbind(1, 1, x) %*% ests args <- list(ests = coef(post3)) kidiq$clr <- factor(kidiq$mom_hs, labels = c("No HS", "HS")) lgnd <- guide_legend(title = NULL) base2 <- ggplot(kidiq, aes(x = mom_iq, fill = relevel(clr, ref = "HS"))) + geom_point(aes(y = kid_score), shape = 21, stroke = .2, size = 1) + guides(color = lgnd, fill = lgnd) + theme(legend.position = "right") base2 + stat_function(fun = reg0, args = args, aes(color = "No HS"), size = 1.5) + stat_function(fun = reg1, args = args, aes(color = "HS"), size = 1.5) ## ---- continuous-kidiq-plot4-------------------------------------------------- reg0 <- function(x, ests) cbind(1, 0, x, 0 * x) %*% ests reg1 <- function(x, ests) cbind(1, 1, x, 1 * x) %*% ests args <- list(ests = coef(post4)) base2 + stat_function(fun = reg0, args = args, aes(color = "No HS"), size = 1.5) + stat_function(fun = reg1, args = args, aes(color = "HS"), size = 1.5) ## ---- continuous-kidiq-loo---------------------------------------------------- # Compare them with loo loo1 <- loo(post1, cores = 2) loo2 <- loo(post2, cores = 2) loo3 <- loo(post3, cores = 2) loo4 <- loo(post4, cores = 2) (comp <- loo_compare(loo1, loo2, loo3, loo4)) ## ---- continuous-kidiq-loo-2-------------------------------------------------- loo_compare(loo1, loo4) ## ---- continuous-kidiq-loo-3-------------------------------------------------- loo_compare(loo3, loo4) loo_compare(loo2, loo4) ## ---- continuous-kidiq-pp_check1---------------------------------------------- pp_check(post4, plotfun = "hist", nreps = 5) ## ---- continuous-kidiq-pp_check2---------------------------------------------- pp_check(post4, plotfun = "stat", stat = "mean") ## ---- continuous-kidiq-pp_check3---------------------------------------------- pp_check(post4, plotfun = "stat_2d", stat = c("mean", "sd")) ## ---- continuous-kidiq-posterior_predict-------------------------------------- IQ_SEQ <- seq(from = 75, to = 135, by = 5) y_nohs <- posterior_predict(post4, newdata = data.frame(mom_hs = 0, mom_iq = IQ_SEQ)) y_hs <- posterior_predict(post4, newdata = data.frame(mom_hs = 1, mom_iq = IQ_SEQ)) dim(y_hs) ## ---- continuous-kidiq-plot-predict, fig.width=7------------------------------ par(mfrow = c(1:2), mar = c(5,4,2,1)) boxplot(y_hs, axes = FALSE, outline = FALSE, ylim = c(10,170), xlab = "Mom IQ", ylab = "Predicted Kid IQ", main = "Mom HS") axis(1, at = 1:ncol(y_hs), labels = IQ_SEQ, las = 3) axis(2, las = 1) boxplot(y_nohs, outline = FALSE, col = "red", axes = FALSE, ylim = c(10,170), xlab = "Mom IQ", ylab = NULL, main = "Mom No HS") axis(1, at = 1:ncol(y_hs), labels = IQ_SEQ, las = 3) ## ---- continuous-kidiq-validation, eval=FALSE, include=FALSE------------------ # # # External Validation # # source(paste0(ROOT, "ARM/Ch.3/kids_before1987.data.R"), # # local = kidiq, verbose = FALSE) # # source(paste0(ROOT, "ARM/Ch.3/kids_after1987.data.R"), # # local = kidiq, verbose = FALSE) # # post5 <- stan_lm(ppvt ~ hs + afqt, data = kidiq, # # prior = R2(location = 0.25, what = "mean"), seed = SEED) # # y_ev <- posterior_predict(post5, newdata = # # data.frame(hs = kidiq$hs_ev, afqt = kidiq$afqt_ev)) # # par(mfrow = c(1,1)) # # hist(-sweep(y_ev, 2, STATS = kidiq$ppvt_ev, FUN = "-"), prob = TRUE, # # xlab = "Predictive Errors in ppvt", main = "", las = 2) ## ---- continuous-clotting-mle, results='hide'--------------------------------- clotting <- data.frame( u = c(5,10,15,20,30,40,60,80,100), lot1 = c(118,58,42,35,27,25,21,19,18), lot2 = c(69,35,26,21,18,16,13,12,12)) summary(glm(lot1 ~ log(u), data = clotting, family = Gamma)) summary(glm(lot2 ~ log(u), data = clotting, family = Gamma)) ## ---- continuous-clotting-mcmc, results="hide"-------------------------------- clotting2 <- with(clotting, data.frame( log_plasma = rep(log(u), 2), clot_time = c(lot1, lot2), lot_id = factor(rep(c(1,2), each = length(u))) )) fit <- stan_glm(clot_time ~ log_plasma * lot_id, data = clotting2, family = Gamma, prior_intercept = normal(0, 1, autoscale = TRUE), prior = normal(0, 1, autoscale = TRUE), seed = 12345) ## ----------------------------------------------------------------------------- print(fit, digits = 3) rstanarm/inst/doc/binomial.Rmd0000644000176200001440000004131414214422264016065 0ustar liggesusers--- title: "Estimating Generalized Linear Models for Binary and Binomial Data with rstanarm" author: "Jonah Gabry and Ben Goodrich" date: "`r Sys.Date()`" output: html_vignette: toc: yes --- ```{r, child="children/SETTINGS-knitr.txt"} ``` ```{r, child="children/SETTINGS-gg.txt"} ``` # Introduction This vignette explains how to estimate generalized linear models (GLMs) for binary (Bernoulli) and Binomial response variables using the `stan_glm` function in the __rstanarm__ package. ```{r, child="children/four_steps.txt"} ``` Steps 3 and 4 are covered in more depth by the vignette entitled ["How to Use the __rstanarm__ Package"](rstanarm.html). This vignette focuses on Step 1 when the likelihood is the product of conditionally independent binomial distributions (possibly with only one trial per observation). # Likelihood For a binomial GLM the likelihood for one observation $y$ can be written as a conditionally binomial PMF $$\binom{n}{y} \pi^{y} (1 - \pi)^{n - y},$$ where $n$ is the known number of trials, $\pi = g^{-1}(\eta)$ is the probability of success and $\eta = \alpha + \mathbf{x}^\top \boldsymbol{\beta}$ is a linear predictor. For a sample of size $N$, the likelihood of the entire sample is the product of $N$ individual likelihood contributions. Because $\pi$ is a probability, for a binomial model the _link_ function $g$ maps between the unit interval (the support of $\pi$) and the set of all real numbers $\mathbb{R}$. When applied to a linear predictor $\eta$ with values in $\mathbb{R}$, the inverse link function $g^{-1}(\eta)$ therefore returns a valid probability between 0 and 1. The two most common link functions used for binomial GLMs are the [logit](https://en.wikipedia.org/wiki/Logit) and [probit](https://en.wikipedia.org/wiki/Probit) functions. With the logit (or log-odds) link function $g(x) = \ln{\left(\frac{x}{1-x}\right)}$, the likelihood for a single observation becomes $$\binom{n}{y}\left(\text{logit}^{-1}(\eta)\right)^y \left(1 - \text{logit}^{-1}(\eta)\right)^{n-y} = \binom{n}{y} \left(\frac{e^{\eta}}{1 + e^{\eta}}\right)^{y} \left(\frac{1}{1 + e^{\eta}}\right)^{n - y}$$ and the probit link function $g(x) = \Phi^{-1}(x)$ yields the likelihood $$\binom{n}{y} \left(\Phi(\eta)\right)^{y} \left(1 - \Phi(\eta)\right)^{n - y},$$ where $\Phi$ is the CDF of the standard normal distribution. The differences between the logit and probit functions are minor and -- if, as __rstanarm__ does by default, the probit is scaled so its slope at the origin matches the logit's -- the two link functions should yield similar results. With `stan_glm`, binomial models with a logit link function can typically be fit slightly faster than the identical model with a probit link because of how the two models are implemented in Stan. Unless the user has a specific reason to prefer the probit link, we recommend the logit simply because it will be slightly faster and more numerically stable. In theory, there are infinitely many possible link functions, although in practice only a few are typically used. Other common choices are the `cauchit` and `cloglog` functions, which can also be used with `stan_glm` (every link function compatible with`glm` will work with `stan_glm`). # Priors ```{r, child="children/stan_glm_priors.txt"} ``` # Posterior With independent prior distributions, the joint posterior distribution for $\alpha$ and $\boldsymbol{\beta}$ is proportional to the product of the priors and the $N$ likelihood contributions: $$f\left(\alpha,\boldsymbol{\beta} | \mathbf{y},\mathbf{X}\right) \propto f\left(\alpha\right) \times \prod_{k=1}^K f\left(\beta_k\right) \times \prod_{i=1}^N { g^{-1}\left(\eta_i\right)^{y_i} \left(1 - g^{-1}\left(\eta_i\right)\right)^{n_i-y_i}}.$$ This is posterior distribution that `stan_glm` will draw from when using MCMC. # Logistic Regression Example When the logit link function is used the model is often referred to as a logistic regression model (the inverse logit function is the CDF of the standard logistic distribution). As an example, here we will show how to carry out a few parts of the analysis from Chapter 5.4 of [Gelman and Hill (2007)](http://www.stat.columbia.edu/~gelman/arm/) using `stan_glm`. Gelman and Hill describe a survey of 3200 residents in a small area of Bangladesh suffering from arsenic contamination of groundwater. Respondents with elevated arsenic levels in their wells had been encouraged to switch their water source to a safe public or private well in the nearby area and the survey was conducted several years later to learn which of the affected residents had switched wells. The goal of the analysis presented by Gelman and Hill is to learn about the factors associated with switching wells. To start, we'll use `dist` (the distance from the respondent's house to the nearest well with safe drinking water) as the only predictor of `switch` (1 if switched, 0 if not). Then we'll expand the model by adding the arsenic level of the water in the resident's own well as a predictor and compare this larger model to the original. After loading the `wells` data, we first rescale the `dist` variable (measured in meters) so that it is measured in units of 100 meters. If we leave `dist` in its original units then the corresponding regression coefficient will represent the effect of the marginal meter, which is too small to have a useful interpretation. ```{r binom-arsenic-data} library(rstanarm) data(wells) wells$dist100 <- wells$dist / 100 ``` Before estimating any models we can visualize the distribution of `dist100` in the data: ```{r, binom-arsenic-plot-dist100, fig.height=3} ggplot(wells, aes(x = dist100, y = ..density.., fill = switch == 1)) + geom_histogram() + scale_fill_manual(values = c("gray30", "skyblue")) ``` In the plot above the blue bars correspond to the `r sum(rstanarm::wells$switch == 1)` residents who said they switched wells and darker bars show the distribution of `dist100` for the `r sum(rstanarm::wells$switch == 0)` residents who didn't switch. As we would expect, for the residents who switched wells, the distribution of `dist100` is more concentrated at smaller distances. A Bayesian version of Gelman and Hill's initial logistic regression model can be estimated using the `stan_glm` function. Here we'll use a Student t prior with 7 degrees of freedom and a scale of 2.5, which, as discussed above, is a reasonable default prior when coefficients should be close to zero but have some chance of being large. ```{r, binom-arsenic-mcmc, results="hide"} t_prior <- student_t(df = 7, location = 0, scale = 2.5) fit1 <- stan_glm(switch ~ dist100, data = wells, family = binomial(link = "logit"), prior = t_prior, prior_intercept = t_prior, cores = 2, seed = 12345) ``` ```{r, binom-arsenic-print, echo=FALSE} (coef_fit1 <- round(coef(fit1), 3)) ``` The `formula`, `data` and `family` arguments to `stan_glm` are specified in exactly the same way as for `glm`. We've also added the optional additional arguments `chains` (how many chains we want to execute), `cores` (how many cores we want the computer to utilize) and `seed` (for reproducibility). You can read about other possible arguments in the `stan_glm` documentation (`help(stan_glm, package = 'rstanarm')`). To get a sense for the uncertainty in our estimates we can use the `posterior_interval` function to get Bayesian uncertainty intervals. The uncertainty intervals are computed by finding the relevant quantiles of the draws from the posterior distribution. For example, to compute 50% intervals we use: ```{r, binom-arsenic-ci} round(posterior_interval(fit1, prob = 0.5), 2) ``` For more on `posterior_interval` and interpreting the parameter estimates from a Bayesian model see Step 2 in the ["How to Use the __rstanarm__ Package"](rstanarm.html) vignette. Using the coefficient estimates we can plot the predicted probability of `switch = 1` (as a function of `dist100`) together with the observed outcomes: ```{r, binom-arsenic-plot-model} # Predicted probability as a function of x pr_switch <- function(x, ests) plogis(ests[1] + ests[2] * x) # A function to slightly jitter the binary data jitt <- function(...) { geom_point(aes_string(...), position = position_jitter(height = 0.05, width = 0.1), size = 2, shape = 21, stroke = 0.2) } ggplot(wells, aes(x = dist100, y = switch, color = switch)) + scale_y_continuous(breaks = c(0, 0.5, 1)) + jitt(x="dist100") + stat_function(fun = pr_switch, args = list(ests = coef(fit1)), size = 2, color = "gray35") ``` The plot shows that under this model the predicted probability of switching is a decent bit above 50% for residents living very close to wells with safe drinking water. As expected, larger values of `dist100` are associated with lower predicted probabilities of switching. At the extreme ($\approx 300$ meters), the probability is about 25%. Next, we incorporate an additional predictor into the model: the arsenic level of water in the respondent's well. According to Gelman and Hill, "At the levels present in the Bangladesh drinking water, the health risks from arsenic are roughly proportional to exposure, and so we would expect switching to be more likely from wells with high arsenic levels" (pg. 90). We only need to change the formula, so we can use the `update` function: ```{r binom-arsenic-mcmc2, results="hide"} fit2 <- update(fit1, formula = switch ~ dist100 + arsenic) ``` ```{r} (coef_fit2 <- round(coef(fit2), 3)) ``` As expected the coefficient on `arsenic` is positive. The plot below shows distance on the x-axis and arsenic level on the y-axis with the predicted probability of well-switching mapped to the color of the background tiles (the lighter the color the higher the probability). The observed value of `switch` is indicated by the color of the points. ```{r,echo=FALSE} theme_update(legend.position = "right") ``` ```{r, binom-arsenic-plot-model2} pr_switch2 <- function(x, y, ests) plogis(ests[1] + ests[2] * x + ests[3] * y) grid <- expand.grid(dist100 = seq(0, 4, length.out = 100), arsenic = seq(0, 10, length.out = 100)) grid$prob <- with(grid, pr_switch2(dist100, arsenic, coef(fit2))) ggplot(grid, aes(x = dist100, y = arsenic)) + geom_tile(aes(fill = prob)) + geom_point(data = wells, aes(color = factor(switch)), size = 2, alpha = 0.85) + scale_fill_gradient() + scale_color_manual("switch", values = c("white", "black"), labels = c("No", "Yes")) ``` We can see that the black points (`switch=1`) are predominantly clustered in the upper-left region of the plot where the predicted probability of switching is highest. Another way we can visualize the data and model is to follow Gelman and Hill and create separate plots for varying the arsenic level and distance. Here we'll plot curves representing the predicted probability of switching for the minimum, maximum and quartile values of both variables. ```{r,echo=FALSE} theme_update(legend.position = "none") ``` ```{r, binom-arsenic-plot-model2-alt} # Quantiles q_ars <- quantile(wells$dist100, seq(0, 1, 0.25)) q_dist <- quantile(wells$arsenic, seq(0, 1, 0.25)) base <- ggplot(wells) + xlim(c(0, NA)) + scale_y_continuous(breaks = c(0, 0.5, 1)) vary_arsenic <- base + jitt(x="arsenic", y="switch", color="switch") vary_dist <- base + jitt(x="dist100", y="switch", color="switch") for (i in 1:5) { vary_dist <- vary_dist + stat_function(fun = pr_switch2, color = "gray35", args = list(ests = coef(fit2), y = q_dist[i])) vary_arsenic <- vary_arsenic + stat_function(fun = pr_switch2, color = "gray35", args = list(ests = coef(fit2), x = q_ars[i])) } bayesplot_grid(vary_dist, vary_arsenic, grid_args = list(ncol = 2)) ``` We can compare our two models (with and without `arsenic`) using an approximation to Leave-One-Out (LOO) cross-validation, which is a method for estimating out of sample predictive performance and is implemented by the `loo` function in the __loo__ package: ```{r, binom-arsenic-loo} (loo1 <- loo(fit1)) (loo2 <- loo(fit2)) loo_compare(loo1, loo2) ``` These results favor `fit2` over `fit1`, as the estimated difference in `elpd` (the expected log pointwise predictive density for a new dataset) is so much larger than its standard error. LOO penalizes models for adding additional predictors (this helps counter overfitting), but in this case `fit2` represents enough of an improvement over `fit1` that the penalty for including `arsenic` is negligible (as it should be if `arsenic` is an important predictor). The [vignette](lm.html) for the `stan_lm` function also has an example of using the `loo` function where the results are quite a bit different from what we see here and some important additional considerations are discussed. # Conditional Logit Models The previous example relies on the fact that observations are plausibly conditionally independent. In contrast, so-called "case-control studies" require that there are a fixed number of successes and failures within each stratum, and the question is _which_ members of each stratum succeed and fail? The `stan_clogit` function estimates such a model and is very similar to the `clogit` function in the **survival** package. The main syntactical difference is that the `clogit` function requires that the user call the `strata` function in the model formula, whereas the `stan_clogit` function has a required `strata` argument. In addition, in the `stan_clogit` case the data must be sorted by the variable passed to `strata`. The advantage to these changes is that `stan_clogit` can optionally utilize the multilevel syntax from the **lme4** package to specify group-specific terms, rather than the more limited multilevel structure supported by the `frailty` function in the **survival** package. The [vignette](glmer.html) for the `stan_glmer` function discusses the lme4-style syntax in more detail. For example, ```{r, results = "hide"} post <- stan_clogit(case ~ spontaneous + induced + (1 | parity), data = infert[order(infert$stratum), ], # order necessary strata = stratum, QR = TRUE, cores = 2, seed = 12345) ``` ```{r} post ``` The posterior predictions are also constrained such that there is exactly one success (in this case) for each of the strata and thus the posterior distribution of the probabilities are also so constrained: ```{r} PPD <- posterior_predict(post) stopifnot(rowSums(PPD) == max(infert$stratum)) PLP <- posterior_linpred(post, transform = TRUE) stopifnot(round(rowSums(PLP)) == max(infert$stratum)) ``` # Binomial Models Although the example in this vignette focused on a binary response variable, we can use nearly identical code if we have the sum of multiple binary variables. For example, image a hypothetical dataset similar to the well-switching data but spanning multiple villages. Each observation (each row) of this `data.frame` corresponds to an entire village: `switch[i]` is the number of 'yes' responses to the well-switching question for village `i`, `dist100[i]` is the average distance to the closest well with clean water for village `i`, etc. We also now have a variable `n` where `n[i]` is the number of respondents from village `i`. For this data we can estimate a similar model to the one we used in the binary case by changing the formula to `cbind(switch, n - switch) ~ dist100 + arsenic` The left-hand side is now a 2-column matrix where the first column is the number of 'yes' responses and the second column is the number of 'no' responses (or more generally, the number of successes and number of failures). The same model can also be specified using the proportion of 'yes' responses and the total number of responses in each village. This corresponds to the formula `prop_switch ~ dist100 + arsenic` where `prop_switch = switch / n` is the proportion of 'yes' responses. The total number of responses is provided using the `weights` argument. In this case we would add `weights = n` to the call to `stan_glm`. An example of a similar model can also be found in __Step 1__ of the ["How to Use the __rstanarm__ Package"](rstanarm.html) vignette. # Going Further In the hypothetical scenario above, if we also have access to the observations for each individual in all of the villages (not just the aggregate data), then a natural extension would be to consider a multilevel model that takes advantage of the inherent multilevel structure of the data (individuals nested within villages). The [vignette](glmer.html) for the `stan_glmer` function discusses these models. # References Gelman, A. and Hill, J. (2007). _Data Analysis Using Regression and Multilevel/Hierarchical Models._ Cambridge University Press, Cambridge, UK. rstanarm/inst/doc/summary_by_poststrat_var.rda0000644000176200001440000000104113540753420021474 0ustar liggesusers r0b```b`fcd`b2Y# '(.M,O//.).)J,/K,b``)Ҽ@7fBHQdI-K)QfT(ӵ$#/393/9?&⛘f&krNb1HEi%@0b>7?0PZJK@iK.qt .?wXkOv>Zns751 *SЌ,/׃ JL @! CdYK@ɕXV4 |Mrstanarm/inst/doc/preference_by_state.rda0000644000176200001440000000202513540753420020325 0ustar liggesusers]}L[UkY`"C7kLCd[e+miJ!Ѩ"M椓MSD pMPT6d, 戴ݗxs{=Ԡ*U1 #c L2is8>a-m~x Q| ! f@@DT|*_FB |9$ YCAE#}AGr H لnۺ8pՐ $!o-Z4h+fq4W@-3] .eZ]ioJ5~M4FKi)z>-J+襭tqb!T&1HK#191) mHp4.GN RU}IK9+*\U}h"g16? 4> ~Gw'^ EEݎ%/'xf\I|!FM'a)će늇`d&Z)Ų̌51&_춭ZAz 0Wk*xa&sOW|K]/,LM'L:=cďaB'0w5#\T>۸w ;z^{8|rI, Spevb\Q{VX[tBCwG Q%O}*uV6? nC4 Sr(}յ:1sC2qy<_/wYN#ntN"[`&rstanarm/inst/doc/mrp-files/0000755000176200001440000000000013540753420015524 5ustar liggesusersrstanarm/inst/doc/mrp-files/mrp.bib0000644000176200001440000000634613540072577017020 0ustar liggesusers@article{little1993post, title={Post-stratification: a modeler's perspective}, author={Little, Roderick JA}, journal={Journal of the American Statistical Association}, volume={88}, number={423}, pages={1001--1012}, year={1993}, publisher={Taylor \& Francis Group} } @article{park2004bayesian, title={Bayesian multilevel estimation with poststratification: state-level estimates from national polls}, author={Park, David K and Gelman, Andrew and Bafumi, Joseph}, journal={Political Analysis}, volume={12}, number={4}, pages={375--385}, year={2004}, publisher={Cambridge University Press} } @article{shirley2015hierarchical, title={Hierarchical models for estimating state and demographic trends in US death penalty public opinion}, author={Shirley, Kenneth E and Gelman, Andrew}, journal={Journal of the Royal Statistical Society: Series A (Statistics in Society)}, volume={178}, number={1}, pages={1--28}, year={2015}, publisher={Wiley Online Library} } @article{barr2013random, title={Random effects structure for confirmatory hypothesis testing: Keep it maximal}, author={Barr, Dale J and Levy, Roger and Scheepers, Christoph and Tily, Harry J}, journal={Journal of memory and language}, volume={68}, number={3}, pages={255--278}, year={2013}, publisher={Elsevier} } @article{ghitza2013deep, title={Deep interactions with MRP: Election turnout and voting patterns among small electoral subgroups}, author={Ghitza, Yair and Gelman, Andrew}, journal={American Journal of Political Science}, volume={57}, number={3}, pages={762--776}, year={2013}, publisher={Wiley Online Library} } @article{lei20172008, title={The 2008 election: A preregistered replication analysis}, author={Lei, Rayleigh and Gelman, Andrew and Ghitza, Yair}, journal={Statistics and Public Policy}, pages={1--8}, year={2017}, publisher={Taylor \& Francis} } @article{gelman2007struggles, title={Struggles with survey weighting and regression modeling}, author={Gelman, Andrew}, journal={Statistical Science}, pages={153--164}, year={2007}, publisher={JSTOR} } @article{lax2009should, title={How should we estimate public opinion in the states?}, author={Lax, Jeffrey R and Phillips, Justin H}, journal={American Journal of Political Science}, volume={53}, number={1}, pages={107--121}, year={2009}, publisher={Wiley Online Library} } @article{park2004bayesian, title={Bayesian multilevel estimation with poststratification: state-level estimates from national polls}, author={Park, David K and Gelman, Andrew and Bafumi, Joseph}, journal={Political Analysis}, volume={12}, number={4}, pages={375--385}, year={2004}, publisher={Cambridge University Press} } @article{gelman2005analysis, title={Analysis of variance-why it is more important than ever}, author={Gelman, Andrew and others}, journal={The annals of statistics}, volume={33}, number={1}, pages={1--53}, year={2005}, publisher={Institute of Mathematical Statistics} } @article{si2017bayesian, title={Bayesian hierarchical weighting adjustment and survey inference}, author={Si, Yajuan and Trangucci, Rob and Gabry, Jonah Sol and Gelman, Andrew}, journal={arXiv preprint arXiv:1707.08220}, year={2017} }rstanarm/inst/doc/mrp-files/summary_by_poststrat_var.rda0000644000176200001440000000104113540753420023372 0ustar liggesusers r0b```b`fcd`b2Y# '(.M,O//.).)J,/K,b``)Ҽ@7fBHQdI-K)QfT(ӵ$#/393/9?&⛘f&krNb1HEi%@0b>7?0PZJK@iK.qt .?wXkOv>Zns751 *SЌ,/׃ JL @! CdYK@ɕXV4 |Mrstanarm/inst/doc/mrp-files/preference_by_state.rda0000644000176200001440000000202513540753420022223 0ustar liggesusers]}L[UkY`"C7kLCd[e+miJ!Ѩ"M椓MSD pMPT6d, 戴ݗxs{=Ԡ*U1 #c L2is8>a-m~x Q| ! f@@DT|*_FB |9$ YCAE#}AGr H لnۺ8pՐ $!o-Z4h+fq4W@-3] .eZ]ioJ5~M4FKi)z>-J+襭tqb!T&1HK#191) mHp4.GN RU}IK9+*\U}h"g16? 4> ~Gw'^ EEݎ%/'xf\I|!FM'a)će늇`d&Z)Ų̌51&_춭ZAz 0Wk*xa&sOW|K]/,LM'L:=cďaB'0w5#\T>۸w ;z^{8|rI, Spevb\Q{VX[tBCwG Q%O}*uV6? nC4 Sr(}յ:1sC2qy<_/wYN#ntN"[`&rstanarm/inst/doc/mrp-files/sample_alt.rda0000644000176200001440000000503413540753420020337 0ustar liggesusers]Ys7=l \ r>/d\vmW%bBuG7n{l½<$vyR) O>ՁzPϐ;[^D6[Wq~(x17e/0|&L_Aˎ y`><~aV<-"㤘]#Xggjk#js[0RYlv;s{bna,܆`s;q>4Qou8 GŞ0֠ga24 ]'{|~)([&k%+2Z8x뀯FCWZx-y=}1I퇵tbjaبQ̼KkF^y0I^O,Y 1kyCڃ}G[ݾ8)bY[gu5އ>LSF\ɻ]k(hm]%cqxgZ~]+u*S?Ӆ4QށN:rss>0.+ Z+z^kƛ  G!GV>NX\-.PڸʾbXg~uָ9kkjzkvU݈wcAëu-^447Ɠ\ +y{4?֮~=gfuJxAAN9.}ԑsWFS}-ǫS O0ޚm޳[s;ԯ<~pw?yxh \|=80~BZ<4kŗ)8U5ySqICɳʕ淕)3Zƅ76zܯ8ld0.Q_L/up`r\q߶?Ug}7XYojB8Ky5 KqX72jtrMǵw1X=:G#=xNp |/zleo_mM}193IʅưgqMcKU>cLOy,I3#_Y+N~wNhZﵸ9 eզ8?>Ӥf7-`6U54ڦ|m77|\7{[z?/gWmo} ))=_xYG~zy7^eL}Ƴrstanarm/inst/doc/mrp-files/state_plot_data.rda0000644000176200001440000000264313540753420021370 0ustar liggesusersSUTxY%$ 4dw(-n^mh2,&)Hªir(IRyԒ4L$ɬ~`jty{E=gvsְ8CqSx85Cƹq#[M,Кc8N1ݑĜq8S(}A? gK Ε0XB C))#(K%BhX =Es RW()8Iz]E{BqG`ꂺ3=%|k#cS$_ t|=ľm3O>O%K{H>÷gחO~Χ1>" ռN}|/s6y" Y[i3rl<oϿΏV{5|1yqڃ|ܸ~ޮ~ڊ9/#!<ϏϟC~O`ߦ bΦ}] h_mM|wKr:^mޥՏ\! UƷk7)ګvZ,ǗܗѾj֝ݛqi&_$q_ υ7=LOJ@((!cvQTrp&n9yiUʨu@CW6tCUwK_puڑ=PeSL: _{NAmaGHp8tZC}N=q ;cnݨ&GVVTmZX -bp&k Ǖٶ֋wmjIm N*w@u7e+:E96ߵzʢI?@=w8i޿oB?G8 ށwFN[Fmv]qno X\ S:r{`6T]XuS?Tn z`WQd'PwVPQw!o>pBqnzWF _=χχxs7䚋QkKFSŜK{nBK=l*X"H6) <+N>@kREhCZPb6 P&5,&™*sB3,D) c6VJLRCXRA%N0Fɘ)Tʂd쑨Y611+=Ѱ *U%._zOH?\aN 3v aHzAh.MDQTX,D6100()P!h]m"mzvP,z?+Lrstanarm/inst/doc/mrp-files/interaction.rda0000644000176200001440000000113313540753420020531 0ustar liggesusers r0b```b`fcd`b2Y# '+I-JL.c`` i^ -FfĎQfI-K) %yə%PfT490pҀ/1F;0h-FK24 B>$<w:ho`u.ۂ%OLV:? KH9_}y'2˾ܛ}So?H~fWbJ{ы]OV4;2ͺI,=mmKק-~u(n ɰR[O\5%'hՕV=!mJ5]_s5zÙ;oU_rsܭ x.~iuO;Ϳ9~vUدs=38>-v_%ylv"+-.?_ԙ1&AaO=}߬~.Гs^bn*,9B92rRaĒ"X`M˱U&d*SЬ,/׃YL @o CgY˃@ɕXV4fz8frstanarm/inst/doc/mrp-files/mrp_sim.rda0000644000176200001440000006562613540753420017701 0ustar liggesusers E@h@s7D$A/"p#"yˌ{uEYT&B2Y&d2Yطy=}g&o=7]U9U9㤳?E>ώ{D~qa+ݑݎ|vwW6D;8#)/6|SەT>mh[[__R:m'՛$T}ӌkGRI5w=Sq(.+ޥmGSB~)_ǵ+zsæ=򷴽J{{:W7m)kvZ?Z*>'Ŷ+I^s3#I(ŖO:Jsz*Xo.y{Sl{hv(5I7W=MO5כJI+/qGSRvSh<gR:jb5Վiۑ6׾b\z+')R=Ii.;&o)=%#.KOjWUSXK٧v5UNSmOSck_jw_-n8L[zX{'\IhR7hj)6c'K[?*'htMN/mM[V?9vJs[(jW5m9T9ebgv455ul\j~TZz4W'mMWSY;Q:-e檯RRoSi쿧{r3MGK$(|6n$mz|=5?T|m.yŖ+6\.zsR{J^s;9劯qGT$\s׷Sn{7󅦖kuSS-5W_5|ZesO*WnK;}rbͽoj-5Rqoy;m,~v;T*V/rַ㶗\bOIn-iҿ;܇;`4|q}O[`Ƕ1>oo\A?rr*\80]14ۑdO]lyW|2ŵ7N8XO[G^O#ڟ4=bш^/4|=qI8/z^٧?3>ǕwQmOLH{[NsR~$=Ώ,珸ǍꋙgzΣq+ipy n qLs.q*qq1qs}ҼNٞXy.]McgZ91od87|;.i=ꍍ1kzQL}qcn롔u'?ĵ&|9 19Mqt\Cz3vŬO$֙wR܉[71M/m%#I"z:&Ώq??\?Iv|\q0mK{?W.;v=N9Fϸ_ۓK~\uoG^i9Iz&Sᘤw?q&iW~4ޒ]WEL=^N}@qK\;x'nqV[w=}Fr:Ǖ/zckOvž.>M͟Z(e(%v%!}oSR뗐>X'ٱbK'ţ3uq iH_:nGL_(}O\MҎ[|7w>)N^_Ĕ/yމ :~&ħRIbיiSy3?O|)QƽoZ}tVb/9ΦRWE-5ZץΧ%W,\ѣv͗~J_캮d{9.Q,5ɍvM;/Q4}by81Ǵ/Jh_L-Mw8O/ҎS#e\N;RRb환?}F˗O?RI~/^I/mK='_Rh0u{Ŏb6g_?mMbsq1?kP8Mݟ;_t,튻8dUw=A^O;;/vO?]|]I9SOَb_y"]1S}>e;Q-m.OףB=?DOvjb}H?F)a7SE1LYoIrK_jʢCLT?-Jӵ+Q^t=xNjGZ+m\*Z^"]bcWb+5>/%7qL;^7^OJE!1ޖx说 ?wפIr:/b͟}*~84.Ӟoo׭IkI+F6_Tכ$'} վ|\:M'g/oORS"7R?}~\eb}7._v]6Χ|^hߤ8ϋ6{)9AiG<.1.pߚgm?|mjS߄I&i^/uݞoq:~cگ/I+u#wBED'.>< t%mc{E|koۡ NnslӗdPo:vu}\0?uqI/v`=yKx<ҕϔqz_'sdsGlt#;9R.U~Q^W`=~>qh/Gݗ|? ! Ѻs|g|b9?G>x/a?6c=7w?.O!^b~?*\@7aqڇr%Ŀ|ali/x^Wtqh}$}wvvf=ϳ㤿/ |a~!}wNcyc~z |a9?GݓO{ gyG~]y?CA:եI_?kE~~}Ǎ.ykw>oa?^}>(HR?ΎK[׭wLnhG/c;Zf:o/?]g{+@\߷.:xvRo__2;-q'k>^ll |a9?/~zڻߜhW]s~opv %8/ Bx?'s5|?>ڱoЏw~Gs|1n{;Gҝ>טzp_pv9|a:!_X˷wy߇q? EQ=p7}w:y{;! @?d~+ wܘG1^|G{lٝG{1:_߄Is7?8姿*\G99'Ϸϯ>\ Ǔ\9\Y?W?O:SNISyzr<_ߏIPXx o$o+v\ڳU3ceԋveaL#ڇ{z۟~_Ҟo' 1q~_~!_E@"/dc7PwCn( 廡(1_(1_%( _%( _()_()_|wQ;wG(廣e(_e(_(9_(9_(_W(_W(%_W(%_WU(_WU(_W(5_W(5_W5( _נ5( _נ(-_ע(-_עu(_סu(_ס(=_ף(=_ף|Q@tAq?y=f`o ȿo ȿo ȿo  =!'{B~O =!'{B~O ^ {A~/ ^ {A~/ !7ސ{C~o !7ސ{C~o} >@~} >@~ȿo!Fȿo!Fȿo!Fȿo!F }!/B~_ }!/B~_ ~A~? ~A~?ȿ o &ȿ o &ȿ o &ȿ o &!?C~!?C~ȿo!fȿo!fȿo!fȿo!fȿo[ ȿo[ ȿo[ ȿo[  ?@ ?@ȿo[!Vȿo[!Vȿo[!Vȿo[!V! ?B@! ?B@C~-vۃhOP廂hgl_~ ڟ B/A=/? ͯAAAAAAAAAAAAAAAA0 C0 C0 C0 C0 C0 C0 C0 C!@!@!@!@!@!@!@!@( B( B( B( B( B( B( B( BaAaAaAaAaAaAaAaA8C8C8C8C8C8C8C8CG@G@G@G@G@G@G@G@$ GB$ GB$ GB$ GB$ GB$ GB$ GB$ GBQGAQGAQGAQGAQGAQGAQGAQGA4 GC4 GC4 GC4 GC4 GC4 GC4 GC4 GC1 @1 @1 @1 @1 @1 @1 @1 @, B, B, B, B, B, B, B, BqAqAqAqAqAqAqAqA<C<C<C<C<C<C<C<C '@ '@ '@ '@ '@ '@ '@ '@bv 1@vx0L;M<~&~́'gKAy5`7ÕCAaXp"xL ?MM| >39 :q Xu`1VE _WU _WU@ :_W@ N* *WUU*j"WU _WU_WM* U_UU_U_5_ WW5_ Wwj5_j>j" _ Wy _ Ww5_ W5_=9;W&?t]kjbdKG!ntǿvygW^y%_ ջ[o&R's=r?ALow5zqB"1S8b!s)fdrOH32NEcǧX\$fS,d.3ŌSX)2bFƩh, L1#T4{|Ebq*=>B"1S8b!s)fdrOH32NEcǧX\$fS,d.3ŌSX)2bFƩh, L1#T4{|Ebq*=>B"1S8b!s)fdrOH32NEcǧX\$fS,d.3ŌSX)2bFƩh, L1#T4{|Ebq*=>B"1S8b!s)fdrOH32NEcǧX\$fS,d.3ŌSX)2bFƩh, L1#T4{|Ebq*=>B"1S8b!s)fdrOH32NEcǧX\$fS,d.3ŌSX)2bFƩh, L1#T4{|Ebq*=>B"1S8#DQ;ۃC/_;G_~< <1cǁǃ`;#>>铐> 铑>.HwAOA+]>G(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`ޘ8wь߼vn;GMto+Y{z<v_9fw?SPZho/l@=VvOߙґ;Ldy֎wXOik3gXE'M*kLewݖ䇦ܽ/v]֎5}^MkQUaq{s?{7.SNnˣv~F/=n~瞝m}ڲy;ݸRkǛszG}}ϟzwnd׿fq]ehtuRzg1?d7t2{SV~Kc][ML띭SM V~&EVϛ5;ߵvlQcηr.=lu׍550˞,(wqEvk`kkn[|2#_kG \Vn%vo^ZhW[r]bz7|ڸ[7ʭzWmv,?@{X5_r-Yr-^r6 ^xنxUn;&WW6v>.lQ]ߎxuo K4˿jk/gVU\īMr[^yr.4ZmnzxrcVjū\Նv~զ*k6W7C.զo@?īWu_4AW"^j܈xUxUxj=JīU{[5W+ߍz[WαֽaZxUnīU߆īu_ ^ա?kZ=VYz˷ j U Ζo%jīՈW[5ƺ^W^F{a g ^q>(x5j. īWӿf_Ax5\e^ ^ʹKn吳ҳnf?30g"^Ax+?ҋ>mʽ(_g ^Fx5W3深!^-!0+7J뫻-W_+W+ݖ_+^YW&/cJw[~Z~Z4yT+^Y~+W)^y+xe,ɍWO`p;,}~iW6_}mN ;_;>S&omPk`rɍsڶn4m[MV_L7x;=v`߷[_oWboAY}&Y=n3^c\[oX;+WT kϳ-]r/ zVv2ke!$/EU[t˿ra>Yv/i9WC oAv}U^Y yaU+k0YC( r_zJؔ[dr+;L5ў[,B\K/S1Χ8V~=~zq;*떞~aqm|tb}z 3ug/k!^Gxt3ɮ/ \cu|īe^ā]#܋CM,īyW/#^MGx5iīWs eī|^Bq|īfc>x5- ī=H1v#Cv sï_!^BN&ī9W~W ^=|{6D8ÈWOc4!1īG=#ѮMb]-Z ī|=x5|j`43WO!^=s{zl}:F;A?zzrqgZ_Wh`+]W2*^YZ lt?~~lr?x+^Y~+W |WV|Wxe!W v@n7:mNN;ـ%lHu/r'ݿ[34`I5;7V~~*7؎%[{W{-nb{7֎-_&@uOOYzUa]߆Vڵmc9hV˿֮X}![Y-_Ry`UGXk/vad18G5[{05_:<ZoauGX^ճP|h>e~ڷ]c[v~;Y~{)&~Sr扵Y߁3n\gb{̳tç7#P/;:dq>%-ž߷r x^}Λ^~0pqZw&A-?g =]|?|sWC;B-x_~ @Ў`o~gysOE]}j}(7wxj<īW5FZ.Cfg#^ͳ4 ^FZIAZx닥W a_RīEW5W?`]=j+!^EZx56˿j6bGZ9xUx_ī1x埾_b ^=B?{鳖~jSOS[zZԃxVK?xӈW/AzY x_Ջ/SE M^y'!?+gZځuW^WOî"^={_Dv'x,3Z_^LމJSU2y'Z:+JVD4yTzWW)^╵WU^VavnʽvW3ƓY?Nwvܫ'UZlz8/69,rk{wh;gL~&\M Lottn CHk=֮G VNfM_Ͽ޾4oν>7{n{밝5gw[0{6Tޜ`ox됻wrM[M-[-cM^O,ߖDc]yfǭe_շ i^noߘj 9ۭB|Wi%_`*Ƚ[լ n_fvk-^NMr[WZuWkhvX[|W,^͵rj!^;Dk][Qnū\WލWuO@U~W=L.F[یxU{;x۲Nnxkx{߀x~߄oXx볍W{-r[,^l@pܚVn=jī [+j#^-Cڀxr[xjXK/]_x2 W5땈WkVaՈWkVo_W:kNˎoCZu@X[lWbܯjjV nzCZrV"^D7oLīyKZz:9n<Ws^/W/!^@x5o`_līWV#;cYiوW3"^x5~)KW  ^MGx |>l<ī~ī9W.^Ak}eel}x+^Y~+X ~ШAA+˧xWF+xeL/+xeLnLZF`ƵYantwqw ~bmXxIY}]q3;+Uا~;_{pտ@};֮;ڳ+څ&跏x8f5}u,*ħ5ܹLoٴꭝvEE׶KLzjOj$dW4.42- =' ?7`XUx۸t_'l7;|\5r1,{|H;_۳z?@gՇ` v|п_qݦ ˲*˿jžͲN,V̙/gu?mFV߳3<q<;MQ yY=CgW^A?6Ю|~ɟ?r]q㬽-L|h6P9Ʃ5c1; ~K67"uW oRīWˎΞnqxt7v[9W3C4jFZx{շlgռ-X>j 9].īGf"^-ks +.Ft0wZ@.Vī٧]gxj1?g͝h:ZzZ_Gx5E엜x5q*īf!^8vA}' ?#^E}3^B~2 īY3ng#^M9Ov~jռ3qi,ī>9OCNސx2sQ/s^FzvƧGzjCWOGy %a|qÈW{]GXӰˤ},#wO?:8=A.գ~=a ^M؇]'E}5L2 X={W_+˯իJҊWȯxe*^+"-^_zcv~-d7aXڿ'&ooP/{:t>VϢGL#3[EWFE+,P ,ӯzġ'Xz>̆r9a`?r;B| ' /v`?a_ܱ~<|g&l㷨T~K ԋQ5?#x5j9j9W5j + jVwj/%GZ9Ks?jf}Ӯ:˝xj ԋxv}>օ Aj.ɐx|qUrӒw_||#^-Dx5jV~ ڿjo,=嚌SXOCzv})ī>|WODgrGߏzj8#^=x"SW#^M9C(x̃8x,K0r;#~Iī^@SBz뻧o|v,Az)ī) WSyՔ+NCjZ_Y~^L^WW _+^YW&/cJ5__VM^j,WzL╝WW&O+^v͎;Q}6n\>{_vgKݭ7{jZ?p|6 |nqYn~0QζDю`H:tСC:tСC:tСC:tСC:tСC:tСC:tСC:tСC:tСC:tСC:tСC:tСCG:*DQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQQrCEQEQEQEQEQEQEQE3((((((((fmEQEQEQEQEQEQEQEQrBEQEQEQEQEQEQEQE3((((((((fN(((((((("8 QD(e.EQl%zOE1((EQl%B"1S8b!s)fdrOH32NEcǧX\$fS,d.3ŌSX)2bFƩh, L1#T4{|Ebq*=>B"1S8b!s)fdrOH32NEcǧX\$fS,d.3ŌSX)2bFƩh, L1#T4{|Ebq*=>B"1S8b!s)fdrOH32NEcǧX\$fS,d.3ŌSX)2bFƩh, L1#T4{|Ebq*=>B"1S8b!s)fdrOH32NEcǧX\$fS,d.3ŌSX)2bFƩh, L1#T4{|Ebq*=>B"1S8b!s)fdrOH32NEcǧX\$fb(jG|`{`xKa`'H7oGG<6x,8x̑ycg;#}' }"'"}'!}2'#.H)HwE+ҧZZa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0F3QKT.Ώ{밍3>3^ܓW_5rw<Uq*W?~`K:\uh}?PO5Kp~S\ݲɿݯ|ym?WޮϐPm<%'~aAwO s7wxa5|^ć*_zof⎠_mn|{5_ᐟzS>۟S|Uύ𕪭 va~ʡݘt̲WNyvmU?:T.yeXŜO.\:2I=+Xӎԟ~CA6=soNd;Yc?|N>CG]B=3^8ceO)gïj?;T>G+qpAC0?nLNl/=!`Oll>#l{T zoAþ7cW=Y/? 3?=Y١]ЏϠ7l:Gv١ >OM.Ӵ?!vO0n/^|/20zW=/ԛ0!]9YI/ߏwڏ~jC0?nL{08~IkWO\|3O=22?@ڡk#/v~ԛr|aq~`C0?nL~9^N_ϳ}Onvs|u//z9n'|@8oOk/0Nup^rƏyF6WVtߏY苣Wqsnt}G6󤭕.T٭rS:mGGdvk>$w)_:UwH'ϬKsP=׆zoSkʥ%Ի[UrofU>s?Pς;ܶ8 ^qmrY_}~U9>5?'ޔ񈋷?poi.O9Ӵ;x`17\qGjsK=]93>zO7j'vҮԟa=O5wLϩeb9C>ڑK}{h z[?{^ާ@~)#Yp_]6cڅl/g!88kos\lgz˩!n缸g<ậ'n uaM =sv!^s_jC=ߌO)_סn׺iӕ~%+ vcvb{9yUmu.Ϝ|;iN?iG\OP?g$z]\'ٯ7l{m衍C0?nL~߳QP/Shl>#~IwPO+ԏWޔSzÏ?7x#%7^O[u!^s`}yCg90_Od?<7F&Я^N_ϳ}#s]zoG~kz9n6p1AOC;u!֍kg/lg>wO_r+ ǯ3bvb{?!  vf=~==?ӯi_<9~O~y_Q a>wuOwǼ_4V?&cOq}`A=X/S쏮pF}`G^77Is|`Ws|\r6~z=&.Ǘd{8l-|;Q;iWvyӎԟ'h70N:|_B=3;hG#r{kЏ_7t3}m}Ua>q>>s]?7Ġۡb` '? ~ٯPoW]Lz~ǥ/5!{ڑ׹O}(r>#|הFiX/ܮ>s4ܧݿ1ؓ~ q>7c rsn|{z_\}l' P#s=v$zo7?z9nA='53r6_wj. rh?bv a}muo' Pge?9f=ޮ U*%f=Å ߣ:ɿc?d-wvzx>;}"Xgg6uwkz9n~ͯkúB~.c?!g7ܐB|~%6L?\؉96[k3揻ZA=X/˅! mvuߟ\ۿ5vV'a|q_ʹ7su'p ̍/9g識^.G| z{;KoQ]x}`ڵ}ei:K&5&>k&QsЯ9~籸L;:egzmM[>;~K?_ ~_LuB=^\9w\{?PC(Ws` ~nLN>sѯs?ܮ@>Sy?"M,q?ܻe~~>%ztԣOk<O a?uZ|9{kyw|>}߅a>>󿋳ا|z9n+2]Q ~A1]G0vҮOi8ԟ|ˌ x ko|}v~)Oۂomvan JC\o^ ~vz׹/>#||߳xϴb0>gh/_ӿ 0ݎ%>߽1nU/gwZ_|;~[vS~ujg:p >F3s]?d;}=ᾍN?c=9nq?k;3r6_Ysp_n|Y8o1Ά܇a?n߇'v?0O;W}F6^P!nEطD/l=S+ ]vمÍ vb{wD8)<i?Ё!1U? ٸ}ٮ}at;SU.y[yϯ\?q^s]MܟvO린^юĬο¸x{8{g3{Mo(\gh|lǡ/??F~ܩڋ|~~{Xqմ/{ ǿ-pc}`2G?}3=n=¿#fЛrߗ`څ)vcv*{|q2?~޹??w+$Fgli7#Grߥg!.YK>rW׫o0Uov ~O+C0?nLNl/0~I0A;2n]m4ϻ-/!yiC=C;h7~N9 Sƴ; ~?jF5?jx ;8}UJOSTʧvaWؾ_{;_c#7|rOU.<~ύB=s?gʿ=ĤFzo,86跴gl>G?r+yvy!?˟suZXW~v.O9Ӵ|pչƲg+Wn:W^9c_>i>VvI+Xӎԟj7@0al#/KH#6R_<{з {ho v'ޔS=7rt|C0?nLNl/=!`O,G_;;?:vߑ7a m&֝`OqǓ.]wWГԏx_zSΆ׿_u*׽[%ȡ]ߏ/i'0_xs?Nzr|$w] /\܁4au]bڅ)vcگ{ybgW޽'f;jw/r[߭wqݮnzwJ޽'؍eC-wYK>s~Ynu=߷\zRѤFߗ0cϓXk9"!C;yϓ9{7ynU8> qw] }dA,gIƉ~ RKv}e/iWK6[k9> v󌏛q]g=>v4!록 ߛL _/ƧǢ]p?įCJ~/cdMQߏ?J]K}>6%>oi?\wv}v_Gs>Aƴn>f}-a~vҮ~C#uۯ:%g'NJp!록_oҎO)K=.;`=[|/ؑ~|o]?'eg?d5؟X.<wz <ܗv Ym:SXP/ {}$} )7*|Fiډ8v=ov=oo-ۛguqH3S]Ew%N?>_7'?S-7Z;+C=ޮ ׋Ǫ v~<~̦hpKX/ Ͼ eg?d3a9Ca_ofo߬{;3N۵r}o7vEط~FBNmt؇^ῂ]߯Ϙ}ͻq{k"K/us]Ao?~?~`_M/#{=RƫfW[Lv:!^ūʄxU2^U&īʔjvujx5ū~W =HAEm7*vo&\>{_vgKݭ7m/=_fA{6}nqYn;\^ݮ{ջg<ѻguVG L)D rstanarm/inst/doc/mrp-files/plot_data.rda0000644000176200001440000000115713540753420020167 0ustar liggesusers r0b```b`fcd`b2Y# ',/OI,Id`` i9 P<L;73Ǽ$^u" M8_o.G6@ԙ1@H:*_/{1h ]~i2]vhɀS579=p!}0?_pwuO6?X|Տ,vi1vyA#P{P>йBڟ9s_ecc9[h|=l?OIvدc`*elL[v_@МYj3wΜqsv<.N?̼`69 i-/t@'}R?{_KK;fCac#n' 78Q@03bCGLx0u( -',5BEs rR<<4}90mpKA (7F&"#ZYRIKfdKϥ aT܁ ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) knitr::opts_chunk$set(collapse = TRUE) knitr::opts_chunk$set(eval = identical(Sys.getenv("NOT_CRAN"), "true")) library(rstanarm) ``` ## Abstract This case study shows how basic A/B testing using Stan and Bayesian methods can be used to facilitate business decisions. In practice, we find this approach useful given its ability to quantify domain-specific business knowledge and hypotheses through the use of prior distributions. Instead of using p-values and confidence intervals, we are able to perform inference with _probability_ intervals estimated from posterior predictions. In addition to the results being highly interpretable, this approach allows us to quantify business risk. ## Introduction A/B testing is an experiment. It is essentially a randomized controlled trial in an industry setting. The test or experiment is conducted on a subset of users in order to determine if a change in service (e.g. user experience) will have a positive impact on the business, before rolling out that change to all the users. (Here we consider _static_ A/B testing where inference is performed after the experiment. For A/B testing where the users are _dynamically_ allocated to the outperforming group during the experiment consider _multi-arm bandits_.) Here are a few stylized scenarios where A/B testing could provide useful insight: * If you change the order in which information is displayed on a service will users spend more time on the service? * If you personalize the cover art for streaming content are users more likely to stream that content? * Are users more responsive to a revised recommendation system? * Is there a noticeable difference between the three point accuracy of two basketball players? * In a drug trial, is there evidence that the treatment is better than the placebo? Typically, A/B testing involves one group of people being served the existing content (control group, group A) while another group is served different content (treatment group, group B) and, through a measurable indicator, the business wants to determine if there is a difference in reaction between the two groups. If we compare the two groups and find that the difference in the indicator is large (relative to our uncertainty) then we can argue that the different content drove that change. Conversely, if the change was minimal then we may be hesitant to conclude that the different content resulted in any change in behavior at all. In that situation perhaps the content needs to be redesigned and retested. Most A/B testing approaches used in practice typically rely on frequentist hypothesis testing methods. Not only are the results of these methods difficult to interpret, but they can also be misleading. Terms such as "p-values" and "confidence intervals" are often misinterpreted as probabilities directly related to the quantity of interest (e.g. the difference in means between two groups). P-values are also often used as cutoffs for business decisions. In other words, reaching a statistically significant result is often sufficient to convince a business to move forward with a particular decision. We argue that these decisions should not be reductively derived from arbitrary cutoffs (e.g. a p-value of less than 0.05). Instead they should be determined by domain-specific experts who understand the industry, with statisticians providing interpretable results that can help these experts make more informed decisions. This case study provides a way for domain-specific experts to apply their knowledge to the statistical inference process of A/B testing through prior distributions. Additionally, the experts can quantify the risk they are willing to take and probabilistically incorporate this into the inference. Some key benefits to the Bayesian approach outlined in this case study include, * Allowing domain-specific experts to apply their knowledge and appetite for risk to statistical inference. * Modeling the data rather than defining/computing a test statistic from the data. (This allows us to perform inference on the (predicted) data instead of the parameters.) * The ability to describe differences in groups probabilistically rather than using traditional hypothesis testing methods. * Quantifying null hypotheses in priors. We use simple examples to show how to apply Bayesian inference to A/B testing using continuous and count data. The examples used here are analogous to the t-test and the Fisher's exact test, but the methodology discussed can be applied to data that follow other distributions. The first section considers continuous data (assumed to be generated from the normal distribution) and the second section considers count data (assumed to be generated from the binomial distribution). (If you need a referesher on how convoluted hypothesis testing is, Appendix A goes over the interpretation of p-values using a two-sample t-test as an example.) At a high-level, we stress that frequentist methods focus on the distribution of the test statistic as opposed to the quantity of interest (i.e. predictions or parameters). In such methods inference is done by understanding how the observed test statistic compares to the distribution of the test statistic under the null hypothesis. Alternatively, the Bayesian approach proposed here allows the statistician to perform inference directly on the quantity of interest (in this case predicted data), which is more transparent and informative in the context of A/B testing. ## Continuous Data _This example is analogous to the two sample t-test (specifically Welch's t-test) where the statistician is interested in testing if there is a noticeable difference between the means of two different samples._ Suppose an online streaming company is interested in testing whether ads affect the consumption of their service. The hypothesis is that reducing ads will increase hourly streaming consumption. Since this decision can be costly if a significant amount of revenue is derived from ads, it would be useful to conduct a test to evaluate the impact of ad reduction. One way to test this is to draw two random samples from the user base, serve them with different levels of ad content, and see if there is a substantial difference in streaming consumption (say hours per day). Suppose we treat the two groups in the following way, * Group A (control): streaming service contains ads. * Group B (treatment): streaming service contains no ads. The data collected might look something like the following below. Each observation is a user's average daily streaming consumption in hours. Suppose we also have an additional (binary) variable `hc` which defines whether a user is predisposed to being a _high consumer_ of streaming content (a value of 1 represents a high consumer and a value of -1 represents a low consumer). ```{r} set.seed(123) group <- c(rep(1,10), rep(2,12)) group <- factor(c(rep("A",10), rep("B",12))) N <- length(group) hc <- sample(c(-1,1), N, replace = TRUE) effect <- c(3,5) lp <- effect[group] + 0.7*hc y <- rnorm(N, lp, 0.5) experiment <- data.frame(y = y, group = factor(group), hc = hc) experiment ``` In order to determine if there is a difference between the groups we need to define a model that predicts the outcome for each group. The data has been generated from the normal distribution so it is appropriate to specify a normal likelihood. (Often we do not know how the data is generated and have to make an assumption about which distribution should be used to model the likelihood.) Since we are _modeling_ the outcome we can include other variables, such as the high consumer indicator `hc`. Traditional hypothesis testing methods are focused on comparing the outcome of two groups. Here we model the outcome before comparing the groups. This allows us to include additional information in the model which will enable us to perform more granular inferences. Next we need to specify prior distributions on each of these parameters. This is where the domain-specific expert can provide valuable input. For example, they may believe that (due to poor sampling) the sampled average of daily streaming hours is too low for each group. In such a situation a prior can be applied to coerce the estimated average closer to the value they feel is more appropriate and representative of the population. Putting these pieces together gives us the model below. $y$ is the outcome (average streaming hours) and $sigma$ is the residual standard deviation (i.e. the standard deviation of $y$ conditional on the parameters and the data). $\mu$ is the parameter associated with the variable $group$ which defines group membership, and $\beta$ is the parameter associated with the the high consumer indicator. One limitation of this approach is that $\sigma$ does not vary among groups. However, in this case it is sufficient to assume that the outcome of both groups has the same standard deviation. (In order to allow the standard deviation to vary among groups the model would have to be fit in [**rstan**](https://mc-stan.org/rstan/), which would require defining the model in a Stan file.) $$ \begin{align*} y_i \sim &\mathcal{N}(\mu_A \cdot groupA_i + \mu_B \cdot groupB_i + \beta \cdot high\_consumer_i, \sigma) \\ \mu_A \sim& \mathcal{N}(3,1) \\ \mu_B \sim& \mathcal{N}(3,1) \\ \beta \sim& \mathcal{N}(0,1) \\ & \mbox{(default prior specified on } \sigma \mbox{)} \end{align*} $$ With regard to priors, we have applied $\mathcal{N}(3,1)$ distributions on both group effects. The reasoning behind this is twofold: 1. Based on prior knowledge (past data and/or domain specific experts) we believe that users spend around three hours per day on the service (regardless of what our random sample says). 2. We allow the hyperparameters for both group groups to be identical to quantify our belief that group B (which received the treatment) is not substantially different from group A. This can be interpreted as incorporating the belief underlying our null hypothesis into the prior. More importantly, this approach allows us to be more conservative when we do our inference. If we end up concluding that the two groups are different, we can say that the difference in behavior was so strong that it overcame our prior belief that the two groups are identical. Now that we have established our model, we need to fit the model to the data so that we can estimate the parameters. We can do this using the [**rstanarm**](https://mc-stan.org/rstanarm/) package which can fit a Bayesian linear regression model (using the `stan_glm()` function) without an intercept, and with group membership and additional variables as parameters. We fit the model below. ```{r results='hide'} fit <- stan_glm(y ~ 0 + group + hc, data = experiment, family = gaussian(link="identity"), prior = normal(c(3,3,0), 1), seed = 123) ``` Recall that Stan uses a sampling algorithm to estimate the joint posterior distribution of the parameters which means that we have samples instead of point estimates for the parameter values. The medians for each parameter are provided below. ```{r} c(coef(fit), sigma = sigma(fit)) ``` With these estimates it looks like Group A had an average consumption of about 3 hours while Group B had an average consumption of about 5 hours. This gives us a difference in consumption of approximately 2 hours. Unfortunately, this assessment does not say anything about how uncertain this difference is. We would like to be able to say something like "we are $p\%$ sure that the two groups are different enough". We can quantify the uncertainty of how different the two estimates are by computing sample quantiles on the posterior predictive distribution. This is often referred to as a credible interval, although the preferred term is _predictive interval_ when describing predictions (and _posterior interval_ when describing parameters). If we compute the $90\%$ predictive interval then we can say that $90\%$ of the posterior predictions for that group lie between that interval. In order for us to evaluate whether the two groups are different enough we can compute the [overlap coefficient](https://en.wikipedia.org/wiki/Overlap_coefficient), which describes the overlap of the prediction intervals for each group as a proportion. For example, suppose there is a $15\%$ overlap between the $90\%$ prediction intervals in each of the two groups. This allows us to say, given that we are $90\%$ certain about where the predictions lie, there's a $15\%$ chance that the two groups are similar. The functions below compute the proportion of overlap between the two groups. ```{r} #' Quantify Overlapping Proportion #' Compute how much of the smaller distribution overlaps with the larger (i.e. wider) distribution. #' @param large Posterior predictive samples that have larger range than \code{small}. #' @param small Posterior predictive samples that have smaller range than \code{large}. #' @param p Probability to compute prediction interval. #' @return A proportion between 0 and 1 indicating how much of \code{small} is contained in \code{large} given the credible interval specification. overlap_prop <- function(large, small, p = 1) { p_lwr <- (1-p)/2 p_upr <- 1 - p_lwr large_ci <- quantile(large, probs = c(p_lwr, p_upr)) left <- min(large_ci) right <- max(large_ci) indxs <- which(small >= left & small <= right) return(length(indxs)/length(small)) } #' Quantify Overlapping Posterior Predictive Distributions #' Quantify the overlap between posterior samples from two distributions. #' @param a Group A posterior predictive samples. #' @param b Group B posterior predictive samples. #' @param p Probability to compute credible interval. #' @return A proportion between 0 and 1 indicating how much of the credible intervals for \code{a} and \code{b} overlap with one another. overlap <- function(a, b, p = 1) { length_a <- dist(range(a)) length_b <- dist(range(b)) if (length_a >= length_b) { out <- overlap_prop(a, b, p) } else if (length_a < length_b) { out <- overlap_prop(b, a, p) } return(out) } ``` Below we compute the $0.9$ prediction interval for both groups. Note that the prediction interval choice is arbitrary, and may vary depending on the applied context and the appetite for uncertainty. This is also where we recommend getting input from domain-specific experts. In this case we are willing to accept a $10\%$ chance of being wrong about where the predictions lie. The closer the prediction interval is to $1$ the more risk averse the business is with regards to inference. ```{r fig.align='center', fig.height=8, fig.width=6} pp_a <- posterior_predict(fit, newdata = data.frame(group = factor("A"), hc = experiment$hc)) pp_b <- posterior_predict(fit, newdata = data.frame(group = factor("B"), hc = experiment$hc)) pp_a_quant <- quantile(pp_a, probs = c(0.05,0.95)) pp_b_quant <- quantile(pp_b, probs = c(0.05,0.95)) overlap(pp_a, pp_b, p = 0.9) par(mfrow=c(2,1)) # group A hist(pp_a, breaks = 50, col = '#808080', border = '#FFFFFF', main = "Group A", xlab = "Avg Streaming (hrs)", xlim = c(0,10)) abline(v = pp_a_quant[1], lwd = 2, col = "red") abline(v = pp_a_quant[2], lwd = 2, col = "red") # group B hist(pp_b, breaks = 50, col = '#808080', border = '#FFFFFF', main = "Group B", xlab = "Avg Streaming (hrs)", xlim = c(0,10)) abline(v = pp_b_quant[1], lwd = 2, col = "red") abline(v = pp_b_quant[2], lwd = 2, col = "red") ``` After computing the $90\%$ prediction interval for both groups we find an overlap proportion of approximately $0.25$. Thus, given that we are $90\%$ sure about our posterior predictions for the two groups, we are about $75\%$ sure that the two groups are in fact different. Going back to the business context, we can conclude that we are $75\%$ sure that reducing ads increases daily streaming consumption given our acceptable risk of being $10\%$ wrong about daily streaming consumption. Since we modeled the outcome using a predictor (in addition to group membership variables) we can vary the predictor as well as group membership for an observation for more detailed inference. Below we plot the prediction intervals for each group and high consumer variable combination. This allows to us compare the difference in average streaming hours among the two groups for those individuals that were categorized as high/low consumers. ```{r fig.align='center', fig.height=6, fig.width=10} pp_a0 <- posterior_predict(fit, newdata = data.frame(group = factor("A"), hc = -1)) pp_b0 <- posterior_predict(fit, newdata = data.frame(group = factor("B"), hc = -1)) pp_a1 <- posterior_predict(fit, newdata = data.frame(group = factor("A"), hc = 1)) pp_b1 <- posterior_predict(fit, newdata = data.frame(group = factor("B"), hc = 1)) pp_a0_quant <- quantile(pp_a0, probs = c(0.05,0.95)) pp_b0_quant <- quantile(pp_b0, probs = c(0.05,0.95)) pp_a1_quant <- quantile(pp_a1, probs = c(0.05,0.95)) pp_b1_quant <- quantile(pp_b1, probs = c(0.05,0.95)) par(mfrow=c(2,2)) # group A, x = 0 hist(pp_a0, breaks = 50, col = '#808080', border = '#FFFFFF', main = "Group A (hc=-1)", xlab = "Avg Streaming (hrs)", xlim = c(0,10)) abline(v = pp_a0_quant[1], lwd = 2, col = "red") abline(v = pp_a0_quant[2], lwd = 2, col = "red") # group B, x = 0 hist(pp_b0, breaks = 50, col = '#808080', border = '#FFFFFF', main = "Group B (hc=-1)", xlab = "Avg Streaming (hrs)", xlim = c(0,10)) abline(v = pp_b0_quant[1], lwd = 2, col = "red") abline(v = pp_b0_quant[2], lwd = 2, col = "red") # group A, x = 1 hist(pp_a1, breaks = 50, col = '#808080', border = '#FFFFFF', main = "Group A (hc=1)", xlab = "Avg Streaming (hrs)", xlim = c(0,10)) abline(v = pp_a1_quant[1], lwd = 2, col = "red") abline(v = pp_a1_quant[2], lwd = 2, col = "red") # group B, x = 1 hist(pp_b1, breaks = 50, col = '#808080', border = '#FFFFFF', main = "Group B (hc=1)", xlab = "Avg Streaming (hrs)", xlim = c(0,10)) abline(v = pp_b1_quant[1], lwd = 2, col = "red") abline(v = pp_b1_quant[2], lwd = 2, col = "red") ``` In the plot below we show how the overlap proportion will vary as the prediction interval varies. To put it differently, it shows how the probabilistic difference between groups varies as risk varies. Notice that the more risk we take when defining our prediction interval (i.e. the closer the prediction interval is to 0) the lower the overlap proportion, and consequentially the more apparent the difference between the two groups. ```{r fig.align='center', fig.height=5, fig.width=5} # prediction interval probabilities ci_p <- seq(0.1,1, by = 0.05) # compute proportions overlap_ab <- sapply(ci_p, function(s){overlap(pp_a, pp_b, s)}) # plot plot(ci_p, overlap_ab, type = "o", pch = 20, xaxt = "n", yaxt = "n", main = "Group A vs Group B", xlab = "Prediction Interval Probability (1-Risk)", ylab = "Overlap Proportion (Group Similarity)") axis(1, seq(0,1,by=0.1), cex.axis = 0.8) axis(2, seq(0,1,by=0.1), cex.axis = 0.8) abline(v = 0.5, lty = 2) ``` ## Count Data _This example is analogous to Fisher's exact test where the statistician is interested in testing differences in proportions (particularly in the form of a contingency table)._ Now, suppose that the business wants to know whether a product sells better if there is a change to the online user interface (UI) that users interact with to buy the product. They run an experiment on two groups and obtain the following results, * Group C (control): 10 users out of a sample of 19 purchased the product with the default UI. * Group D (treatment): 14 users out of a sample of 22 purchased the product with the alternative UI. Here we can assume that the data is binomially distributed, in which case we can define the model for the the two groups as follows, $$ y_i \sim \mbox{Bin}(\mbox{logit}^{-1}(\mu_C \cdot groupC_i + \mu_D \cdot groupD_i), N_i)\\ $$ where $\mu$ is the parameter for each group, $group$ is a binary variable indicating group membership, $y$ is the number of users that purchased the product and $N$ is the total number of users in each group. Below we fit this model to the data. ```{r results='hide', message=FALSE, warning=FALSE} experiment_bin <- data.frame(group = factor(c("C","D")), y = c(10,14), trials = c(19,22)) fit_group_bin <- stan_glm(cbind(y, trials - y) ~ 0 + group, data = experiment_bin, family = binomial(link="logit"), seed = 123) ``` Similar to the method described in the previous section we compute and plot the $90\%$ prediction intervals for the posterior predictions in each group. We also compute the overlap proportion of these two sets of predictions. ```{r fig.align='center', fig.height=5, fig.width=10} # pp_c <- posterior_linpred(fit_group_bin, newdata = data.frame(group = factor("C")), transform = TRUE) # pp_d <- posterior_linpred(fit_group_bin, newdata = data.frame(group = factor("D")), transform = TRUE) # below doesn't work as expected (predictions are bigger than the number of trials) # pp_c <- posterior_predict(fit_group_bin, newdata = data.frame(group = factor("C"), trials = 19)) # pp_d <- posterior_predict(fit_group_bin, newdata = data.frame(group = factor("D"), trials = 22)) pp <- posterior_predict(fit_group_bin) pp_c <- pp[,1] pp_d <- pp[,2] pp_c_quant <- quantile(pp_c, probs = c(0.05,0.95)) pp_d_quant <- quantile(pp_d, probs = c(0.05,0.95)) # compute overlap overlap(pp_c, pp_d, p = 0.9) # plot # group C par(mfrow=c(1,2)) hist(pp_c, breaks = 50, col = '#808080', border = '#FFFFFF', main = "Group C", xlab = "Product Consumption", xlim = c(0,25)) abline(v = pp_c_quant[1], lwd = 2, col = "red") abline(v = pp_c_quant[2], lwd = 2, col = "red") # group D hist(pp_d, breaks = 50, col = '#808080', border = '#FFFFFF', main = "Group D", xlab = "Product Consumption", xlim = c(0,25)) abline(v = pp_d_quant[1], lwd = 2, col = "red") abline(v = pp_d_quant[2], lwd = 2, col = "red") ``` Looking at the histograms it's clear that there's quite a bit of overlap between the two groups. The overlap proportion is about 0.7. So under our $90\%$ prediction interval, there is a $70\%$ chance that there is no difference in behavior when the UI changes. This might suggest that we don't have strong evidence that the UI change encouraged a change in behavior. Below we show how the overlap proportion varies based on the amount of risk we're willing to take when we define our prediction intervals. Similar to the continuous example in the previous section, risk is inversely related to group similarity. ```{r fig.align='center', fig.height=5, fig.width=5} # prediction interval probabilities ci_p <- rev(seq(0.1,1, by = 0.05)) # compute proportions overlap_cd <- sapply(ci_p, function(s){overlap(pp_c, pp_d, s)}) # plot plot(ci_p, overlap_cd, type = "o", pch = 20, xaxt = "n", yaxt = "n", main = "Group C vs Group D", xlab = "Prediction Interval Probability (1-Risk)", ylab = "Overlap Proportion (Group Similarity)") axis(1, seq(0,1,by=0.1), cex.axis = 0.8) axis(2, seq(0,1,by=0.1), cex.axis = 0.8) abline(v = 0.5, lty = 2) ``` Note, this example involved a really small data set (only one observation for each group). But the same model can easily be extended to many observations within each group. Also, just as we described in the continuous example, we can define a more comprehensive model for the outcome if we had additional predictors. ## Benefits of Bayesian Methods The key benefits that we have discussed include the ability to probabilistically interpret the results of our inference, and the ability to incorporate prior beliefs (i.e. business knowledge and hypotheses) into our models. **Interpretation of probability** With regards to interpretation, there are some advantages with taking a Bayesian inference approach to A/B testing using Stan: 1. The ability to communicate our results using the intuitive concept of probability. 3. The ability to quantify business risk using probability when doing inference. Quantifying our uncertainty probabilistically enables us to make statements like "based on the data collected, the model specified, and the risk we are willing to take; we are 80% certain that the two groups are different." This is much more interpretable than statements like 'with a p-value of less than 0.2 we can reject the null hypothesis that the two groups are identical'. While this is not exclusively a Bayesian benefit (i.e. we could have completely excluded priors from our models, estimating the parameters solely from the likelihood of the data), we took advantage of the fact that appropriately implemented Bayesian computational methods rely on robust sampling methods. These samples can then be transformed and used to make probabilistic statements about the posterior predictive distribution, and consequentially about the question being asked. **Incorporating prior beliefs** The ability to define a prior distribution on your parameters is a useful feature of Bayesian methods. Prior information can be incorporated in your model with two choices: the type of the distribution and how the distribution is parametrized. The type of distribution relates to which distribution you choose to define on the parameters. In the continuous data example we chose the normal distribution. But, since the underlying data (hours streamed per day) cannot be negative, it might be more sensible to define a truncated normal distribution as the prior (which is straightforward to implement in rstan). This gives us the opportunity to model the data generation process more appropriately. How the prior distribution is parameterized reflects your belief on the value that parameter takes. This gives us the opportunity to quantify business knowledge in prior distributions. In the continuous data example we showed how we parameterized the prior distribution for each group's parameter to capture our prior belief that the two groups are similar. A similar approach can be taken for the treatment group in the count data example. With these types of priors, if we concluded that the two groups are in fact different then we could really be sure that the treatment actually changed the treatment group's behavior. In other words, the treatment group's observed behavior overcame our prior belief. We could also tune this belief to be more or less strong by adjusting where most of the density/mass of the prior distribution sits. Applying this type of prior would help mitigate false-positive conclusions from this type of analysis. ## Conclusion Below is an abstracted summary of the inference process we've gone through to compare groups involved in A/B testing. 1. Model the indicator that is being measured to track the difference between the two groups. 2. Compute the prediction interval $p$ over the posterior predictions of the two groups. $1-p$ quantifies how much risk the business is willing to take in regards to the predicted indicator. The value of $p$ should be driven by domain-specific experts. 3. Compute the proportion $o$ of how much each interval overlaps with one another. $o$ defines the similarity between the two groups. After implementing the steps above, we can construct the following conclusion: given there is a $(1-p) \cdot 100$ percent chance that we are wrong about the predictions from our model, there is a $(1-o) \cdot 100$ percent chance that the two groups are different. The Bayesian methods outlined in this case study focused on modeling the data generation process and performing inference on the posterior predictive distribution of two groups. We did not need to worry about computing test statistics and determining the distribution of these statistics under the null hypothesis. Nor did we need to calculate p-values to figure out whether the groups involved in the A/B test are different. Instead we performed inference directly on the posterior predictions. By constructing prediction intervals and computing the overlap of these intervals we are able to probabilistically convey how sure we are about the difference between the two groups. Bayesian inference gives statisticians the ability to quantify business information/risk and enables them to communicate uncertainty unambiguously to decision makers, allowing more informed decisions to be made. ## Acknowlegements Thanks to Jonah Gabry and Charles Zhou for feedback on initial drafts. ## References Fisher's exact test. Wikipedia. Available from https://en.wikipedia.org/wiki/Fisher%27s_exact_test. Gallo, A. (2017) A Refresher on A/B Testing. _Harvard Business Review_. https://hbr.org/2017/06/a-refresher-on-ab-testing. Goodrich, B., Gabry, J., Ali, I. & Brilleman, S. (2019). rstanarm: Bayesian applied regression modeling via Stan. R package version 2.17.4. https://mc-stan.org/. Krushke, J.K. (2015). _Doing Bayesian Data Analysis - A Tutorial with R, JAGS, and Stan_. Elsevier, New York, 2nd edition. Overlap coefficient. Wikipedia. Available from https://en.wikipedia.org/wiki/Overlap_coefficient Stan Development Team (2019). RStan: the R interface to Stan. R package version 2.19.2. https://mc-stan.org/. Student's t-test. Wikipedia. Available from https://en.wikipedia.org/wiki/Student's_t-test. Welch's t-test. Wikipedia. Available from https://en.wikipedia.org/wiki/Welch%27s_t-test. ## Appendix A: Refresher on p-values Recall that frequentist methods of hypothesis testing involve constructing a test statistic with the available data. Then, using the distribution of that test statistic under the null hypothesis, you can determine the probability of observing statistics that are more extreme than the one calculated. This is known as a p-value. A small p-value suggests a small probability of observing a more extreme test statistic, which in turn means that it is unlikely for that statistic to have been generated under the null hypothesis. Since the statistic is computed from the data this suggests that the data itself is unlikely to have been generated under the null hypothesis. The value of how small a p-value should be to arrive at this conclusion is up to the statistician. As an example consider the data associated with Group A and Group B in the continuous data section. The null hypothesis is whether the two groups have equal means. Below we compute Welch's test statistic and p-value given the data. ```{r} group_a <- experiment$y[experiment$group == "A"] group_b <- experiment$y[experiment$group == "B"] # Relevant dplyr code # group_a <- experiment %>% filter(group == "A") %>% select(y) %>% unlist %>% unname # group_b <- experiment %>% filter(group == "B") %>% select(y) %>% unlist %>% unname t_test <- t.test(x=group_a, y=group_b) t_stat <- abs(t_test$statistic) p_value <- t_test$p.value print(p_value) # You can manually compute the p-value with the following code # p_value <- pt(-t_stat, t_test$parameter)*2 # you can manually compute the confidence intervals with the following code # group_a_mean <- mean(group_a) # group_b_mean <- mean(group_b) # v <- sqrt((var(group_a)/length(group_a)) + (var(group_b)/length(group_b))) # ci_lwr <- (group_a_mean - group_b_new_mean) - abs(qt(0.025, t_test$parameter[['df']])*v) # ci_upr <- (group_a_mean - group_b_new_mean) + abs(qt(0.025, t_test$parameter[['df']])*v) ``` The p-value in this case is really small, approximately zero. We can visualize this result. Since we know that the test statistic is t-distributed we can plot what the distribution of the test statistic under the null, along with the test statistic calculated with the observed data. This is illustrated below. The red lines are the (two-tailed) test statistics calculated from the data. ```{r fig.align='center', fig.height=5, fig.width=5} dof <- t_test$parameter[["df"]] x <- seq(-10,10,length.out = 1e3) plot(x, dt(x, dof), type = "l", main = "Distribution of Test Statistics Under Null Hypothesis", xlab = "t-statistic value", ylab = "t-distribution density") abline(v=-t_stat, col="red", lwd=2) abline(v=t_stat, col="red", lwd=2) ``` Given the small p-value we can make the following sequence of conclusions: 1. The computed test statistic is unlikely to occur under the null hypothesis. 2. The data used to compute this statistic is unlikely to have been generated under the null hypothesis. 3. Therefore the null hypothesis must be invalid and can be rejected, allowing us to conclude that the two groups are different. Notice how far removed we are from the data and the observed data generation process. Once we calculate the test statistic we step away from the distribution of the data itself and start dealing with the distribution of the test statistic under the null. We were also unable to encode any prior belief or business knowledge into our inference. ## Appendix B: Hierarchical Example Here we show how to use hierarchical (or multilevel) models as an alternative modeling approach when performing A/B tests. Using the data in the continuous example we want to build a model where we account for group-level intercepts while allowing information to be shared among the groups. $$ \begin{align*} y_{i=A} \sim &\mathcal{N}(\mu_A + \beta \cdot high\_consumer_{i=A}, \sigma) \\ y_{i=B} \sim &\mathcal{N}(\mu_B + \beta \cdot high\_consumer_{i=B}, \sigma) \\ \beta \sim& \mathcal{N}(0,1) \\ & \mbox{(default priors specified on covariance matrix and } \sigma \mbox{)} \end{align*} $$ Below we fit the model. ```{r results='hide'} fit_hier <- stan_glmer(y ~ 0 + (1 | group) + hc, prior = normal(0, 1), data = experiment, family = gaussian(link="identity"), seed = 123) ``` ```{r} coef(fit_hier) fixef(fit_hier) ranef(fit_hier) ``` With this modeling approach we can perform the same inferences as we have shown above while accounting for the hierarchical nature of the data. rstanarm/inst/doc/count.html0000644000176200001440000021101614551550312015643 0ustar liggesusers Estimating Generalized Linear Models for Count Data with rstanarm

Estimating Generalized Linear Models for Count Data with rstanarm

Jonah Gabry and Ben Goodrich

2024-01-16

Introduction

This vignette explains how to estimate generalized linear models (GLMs) for count data using the stan_glm function in the rstanarm package.

The four steps of a Bayesian analysis are

  1. Specify a joint distribution for the outcome(s) and all the unknowns, which typically takes the form of a marginal prior distribution for the unknowns multiplied by a likelihood for the outcome(s) conditional on the unknowns. This joint distribution is proportional to a posterior distribution of the unknowns conditional on the observed data
  2. Draw from posterior distribution using Markov Chain Monte Carlo (MCMC).
  3. Evaluate how well the model fits the data and possibly revise the model.
  4. Draw from the posterior predictive distribution of the outcome(s) given interesting values of the predictors in order to visualize how a manipulation of a predictor affects (a function of) the outcome(s).

Steps 3 and 4 are covered in more depth by the vignette entitled “How to Use the rstanarm Package”. This vignette focuses on Step 1 for Poisson and negative binomial regression models using the stan_glm function.

Likelihood

If the outcome for a single observation \(y\) is assumed to follow a Poisson distribution, the likelihood for one observation can be written as a conditionally Poisson PMF

\[\tfrac{1}{y!} \lambda^y e^{-\lambda},\]

where \(\lambda = E(y | \mathbf{x}) = g^{-1}(\eta)\) and \(\eta = \alpha + \mathbf{x}^\top \boldsymbol{\beta}\) is a linear predictor. For the Poisson distribution it is also true that \(\lambda = Var(y | \mathbf{x})\), i.e. the mean and variance are both \(\lambda\). Later in this vignette we also show how to estimate a negative binomial regression, which relaxes this assumption of equal conditional mean and variance of \(y\).

Because the rate parameter \(\lambda\) must be positive, for a Poisson GLM the link function \(g\) maps between the positive real numbers \(\mathbb{R}^+\) (the support of \(\lambda\)) and the set of all real numbers \(\mathbb{R}\). When applied to a linear predictor \(\eta\) with values in \(\mathbb{R}\), the inverse link function \(g^{-1}(\eta)\) therefore returns a positive real number.

Although other link functions are possible, the canonical link function for a Poisson GLM is the log link \(g(x) = \ln{(x)}\). With the log link, the inverse link function is simply the exponential function and the likelihood for a single observation becomes

\[\frac{g^{-1}(\eta)^y}{y!} e^{-g^{-1}(\eta)} = \frac{e^{\eta y}}{y!} e^{-e^\eta}.\]

Priors

A full Bayesian analysis requires specifying prior distributions \(f(\alpha)\) and \(f(\boldsymbol{\beta})\) for the intercept and vector of regression coefficients. When using stan_glm, these distributions can be set using the prior_intercept and prior arguments. The stan_glm function supports a variety of prior distributions, which are explained in the rstanarm documentation (help(priors, package = 'rstanarm')).

As an example, suppose we have \(K\) predictors and believe — prior to seeing the data — that \(\alpha, \beta_1, \dots, \beta_K\) are as likely to be positive as they are to be negative, but are highly unlikely to be far from zero. These beliefs can be represented by normal distributions with mean zero and a small scale (standard deviation). To give \(\alpha\) and each of the \(\beta\)s this prior (with a scale of 1, say), in the call to stan_glm we would include the arguments prior_intercept = normal(0,1) and prior = normal(0,1).

If, on the other hand, we have less a priori confidence that the parameters will be close to zero then we could use a larger scale for the normal distribution and/or a distribution with heavier tails than the normal like the Student t distribution. Step 1 in the “How to Use the rstanarm Package” vignette discusses one such example.

Posterior

With independent prior distributions, the joint posterior distribution for \(\alpha\) and \(\boldsymbol{\beta}\) in the Poisson model is proportional to the product of the priors and the \(N\) likelihood contributions:

\[f\left(\alpha,\boldsymbol{\beta} | \mathbf{y},\mathbf{X}\right) \propto f\left(\alpha\right) \times \prod_{k=1}^K f\left(\beta_k\right) \times \prod_{i=1}^N { \frac{g^{-1}(\eta_i)^{y_i}}{y_i!} e^{-g^{-1}(\eta_i)}}.\]

This is posterior distribution that stan_glm will draw from when using MCMC.

Poisson and Negative Binomial Regression Example

This example comes from Chapter 8.3 of Gelman and Hill (2007).

We want to make inferences about the efficacy of a certain pest management system at reducing the number of roaches in urban apartments. Here is how Gelman and Hill describe the experiment (pg. 161):

[…] the treatment and control were applied to 160 and 104 apartments, respectively, and the outcome measurement \(y_i\) in each apartment \(i\) was the number of roaches caught in a set of traps. Different apartments had traps for different numbers of days […]

In addition to an intercept, the regression predictors for the model are the pre-treatment number of roaches roach1, the treatment indicator treatment, and a variable indicating whether the apartment is in a building restricted to elderly residents senior. Because the number of days for which the roach traps were used is not the same for all apartments in the sample, we include it as an exposure, which slightly changes the model described in the Likelihood section above in that the rate parameter \(\lambda_i = exp(\eta_i)\) is multiplied by the exposure \(u_i\) giving us \(y_i \sim Poisson(u_i \lambda_i)\). This is equivalent to adding \(\ln{(u_i)}\) to the linear predictor \(\eta_i\) and it can be specified using the offset argument to stan_glm.

The formula, data, family, and offset arguments to stan_glm can be specified in exactly the same way as for glm. The poisson family function defaults to using the log link, but to write code readable to someone not familiar with the defaults we should be explicit and use family = poisson(link = "log").

We’ve also specified some optional arguments. The chains argument controls how many Markov chains are executed, the cores argument controls the number of cores utilized by the computer when fitting the model. We also provided a seed so that we have the option to deterministically reproduce these results at any time. The stan_glm function has many other optional arguments that allow for more user control over the way estimation is performed. The documentation for stan_glm has more information about these controls as well as other topics related to GLM estimation.

Here are the point estimates and uncertainties from the glm fit and stan_glm fit, which we see are nearly identical:

         (Intercept) roach1 treatment senior
glm             3.09    0.7     -0.52  -0.38
stan_glm        3.09    0.7     -0.52  -0.38
         (Intercept) roach1 treatment senior
glm            0.021  0.009     0.025  0.033
stan_glm       0.021  0.009     0.024  0.034

(Note: the dataset we have is slightly different from the one used in Gelman and Hill (2007), which leads to slightly different parameter estimates than those shown in the book even when copying the glm call verbatim. Also, we have rescaled the roach1 predictor. For the purposes of this example, the actual estimates are less important than the process.)

Gelman and Hill next show how to compare the observed data to replicated datasets from the model to check the quality of the fit. Here we don’t show the original code used by Gelman and Hill because it’s many lines, requiring several loops and some care to get the matrix multiplications right (see pg. 161-162). On the other hand, the rstanarm package makes this easy. We can generate replicated datasets with a single line of code using the posterior_predict function:

By default posterior_predict will generate a dataset for each set of parameter draws from the posterior distribution. That is, yrep will be an \(S \times N\) matrix, where \(S\) is the size of the posterior sample and \(N\) is the number of data points. Each row of yrep represents a full dataset generated from the posterior predictive distribution. For more about the importance of the posterior_predict function, see the “How to Use the rstanarm Package” vignette.

Gelman and Hill take the simulated datasets and for each of them compute the proportion of zeros and compare to the observed proportion in the original data. We can do this easily using the pp_check function, which generates graphical comparisons of the data y and replicated datasets yrep.

The value of the test statistic (in this case the proportion of zeros) computed from the sample y is the dark blue vertical line. More than 30% of these observations are zeros, whereas the replicated datasets all contain less than 1% zeros (light blue histogram). This is a sign that we should consider a model that more accurately accounts for the large proportion of zeros in the data. Gelman and Hill show how we can do this using an overdispersed Poisson regression. To illustrate the use of a different stan_glm model, here we will instead try negative binomial regression, which is also used for overdispersed or zero-inflated count data. The negative binomial distribution allows the (conditional) mean and variance of \(y\) to differ unlike the Poisson distribution. To fit the negative binomial model can either use the stan_glm.nb function or, equivalently, change the family we specify in the call to stan_glm to neg_binomial_2 instead of poisson. To do the latter we can just use update:

We now use pp_check again, this time to check the proportion of zeros in the replicated datasets under the negative binomial model:

This is a much better fit, as the proportion of zeros in the data falls nicely near the center of the distribution of the proportion of zeros among the replicated datasets. The observed proportion of zeros is quite plausible under this model.

We could have also made these plots manually without using the pp_check function because we have the yrep datasets created by posterior_predict. The pp_check function takes care of this for us, but yrep can be used directly to carry out other posterior predictive checks that aren’t automated by pp_check.

When we comparing the models using the loo package we also see a clear preference for the negative binomial model

          elpd_diff se_diff
stan_glm2     0.0       0.0
stan_glm1 -5345.4     706.8

which is not surprising given the better fit we’ve already observed from the posterior predictive checks.

References

Gelman, A. and Hill, J. (2007). Data Analysis Using Regression and Multilevel/Hierarchical Models. Cambridge University Press, Cambridge, UK.

rstanarm/inst/doc/sample_alt.rda0000644000176200001440000000503413540753420016441 0ustar liggesusers]Ys7=l \ r>/d\vmW%bBuG7n{l½<$vyR) O>ՁzPϐ;[^D6[Wq~(x17e/0|&L_Aˎ y`><~aV<-"㤘]#Xggjk#js[0RYlv;s{bna,܆`s;q>4Qou8 GŞ0֠ga24 ]'{|~)([&k%+2Z8x뀯FCWZx-y=}1I퇵tbjaبQ̼KkF^y0I^O,Y 1kyCڃ}G[ݾ8)bY[gu5އ>LSF\ɻ]k(hm]%cqxgZ~]+u*S?Ӆ4QށN:rss>0.+ Z+z^kƛ  G!GV>NX\-.PڸʾbXg~uָ9kkjzkvU݈wcAëu-^447Ɠ\ +y{4?֮~=gfuJxAAN9.}ԑsWFS}-ǫS O0ޚm޳[s;ԯ<~pw?yxh \|=80~BZ<4kŗ)8U5ySqICɳʕ淕)3Zƅ76zܯ8ld0.Q_L/up`r\q߶?Ug}7XYojB8Ky5 KqX72jtrMǵw1X=:G#=xNp |/zleo_mM}193IʅưgqMcKU>cLOy,I3#_Y+N~wNhZﵸ9 eզ8?>Ӥf7-`6U54ڦ|m77|\7{[z?/gWmo} ))=_xYG~zy7^eL}Ƴrstanarm/inst/doc/count.Rmd0000644000176200001440000002477014214422264015432 0ustar liggesusers--- title: "Estimating Generalized Linear Models for Count Data with rstanarm" author: "Jonah Gabry and Ben Goodrich" date: "`r Sys.Date()`" output: html_vignette: toc: yes --- ```{r, child="children/SETTINGS-knitr.txt"} ``` ```{r, child="children/SETTINGS-gg.txt"} ``` # Introduction This vignette explains how to estimate generalized linear models (GLMs) for count data using the `stan_glm` function in the __rstanarm__ package. ```{r, child="children/four_steps.txt"} ``` Steps 3 and 4 are covered in more depth by the vignette entitled ["How to Use the __rstanarm__ Package"](rstanarm.html). This vignette focuses on Step 1 for Poisson and negative binomial regression models using the `stan_glm` function. # Likelihood If the outcome for a single observation $y$ is assumed to follow a Poisson distribution, the likelihood for one observation can be written as a conditionally Poisson PMF $$\tfrac{1}{y!} \lambda^y e^{-\lambda},$$ where $\lambda = E(y | \mathbf{x}) = g^{-1}(\eta)$ and $\eta = \alpha + \mathbf{x}^\top \boldsymbol{\beta}$ is a linear predictor. For the Poisson distribution it is also true that $\lambda = Var(y | \mathbf{x})$, i.e. the mean and variance are both $\lambda$. Later in this vignette we also show how to estimate a negative binomial regression, which relaxes this assumption of equal conditional mean and variance of $y$. Because the rate parameter $\lambda$ must be positive, for a Poisson GLM the _link_ function $g$ maps between the positive real numbers $\mathbb{R}^+$ (the support of $\lambda$) and the set of all real numbers $\mathbb{R}$. When applied to a linear predictor $\eta$ with values in $\mathbb{R}$, the inverse link function $g^{-1}(\eta)$ therefore returns a positive real number. Although other link functions are possible, the canonical link function for a Poisson GLM is the log link $g(x) = \ln{(x)}$. With the log link, the inverse link function is simply the exponential function and the likelihood for a single observation becomes $$\frac{g^{-1}(\eta)^y}{y!} e^{-g^{-1}(\eta)} = \frac{e^{\eta y}}{y!} e^{-e^\eta}.$$ # Priors ```{r, child="children/stan_glm_priors.txt"} ``` # Posterior With independent prior distributions, the joint posterior distribution for $\alpha$ and $\boldsymbol{\beta}$ in the Poisson model is proportional to the product of the priors and the $N$ likelihood contributions: $$f\left(\alpha,\boldsymbol{\beta} | \mathbf{y},\mathbf{X}\right) \propto f\left(\alpha\right) \times \prod_{k=1}^K f\left(\beta_k\right) \times \prod_{i=1}^N { \frac{g^{-1}(\eta_i)^{y_i}}{y_i!} e^{-g^{-1}(\eta_i)}}.$$ This is posterior distribution that `stan_glm` will draw from when using MCMC. # Poisson and Negative Binomial Regression Example This example comes from Chapter 8.3 of [Gelman and Hill (2007)](http://www.stat.columbia.edu/~gelman/arm/). We want to make inferences about the efficacy of a certain pest management system at reducing the number of roaches in urban apartments. Here is how Gelman and Hill describe the experiment (pg. 161): > [...] the treatment and control were applied to 160 and 104 apartments, respectively, and the outcome measurement $y_i$ in each apartment $i$ was the number of roaches caught in a set of traps. Different apartments had traps for different numbers of days [...] In addition to an intercept, the regression predictors for the model are the pre-treatment number of roaches `roach1`, the treatment indicator `treatment`, and a variable indicating whether the apartment is in a building restricted to elderly residents `senior`. Because the number of days for which the roach traps were used is not the same for all apartments in the sample, we include it as an exposure, which slightly changes the model described in the __Likelihood__ section above in that the rate parameter $\lambda_i = exp(\eta_i)$ is multiplied by the exposure $u_i$ giving us $y_i \sim Poisson(u_i \lambda_i)$. This is equivalent to adding $\ln{(u_i)}$ to the linear predictor $\eta_i$ and it can be specified using the `offset` argument to `stan_glm`. ```{r, count-roaches-mcmc, results="hide"} library(rstanarm) data(roaches) # Rescale roaches$roach1 <- roaches$roach1 / 100 # Estimate original model glm1 <- glm(y ~ roach1 + treatment + senior, offset = log(exposure2), data = roaches, family = poisson) # Estimate Bayesian version with stan_glm stan_glm1 <- stan_glm(y ~ roach1 + treatment + senior, offset = log(exposure2), data = roaches, family = poisson, prior = normal(0, 2.5), prior_intercept = normal(0, 5), seed = 12345) ``` The `formula`, `data`, `family`, and `offset` arguments to `stan_glm` can be specified in exactly the same way as for `glm`. The `poisson` family function defaults to using the log link, but to write code readable to someone not familiar with the defaults we should be explicit and use `family = poisson(link = "log")`. We've also specified some optional arguments. The `chains` argument controls how many Markov chains are executed, the `cores` argument controls the number of cores utilized by the computer when fitting the model. We also provided a seed so that we have the option to deterministically reproduce these results at any time. The `stan_glm` function has many other optional arguments that allow for more user control over the way estimation is performed. The documentation for `stan_glm` has more information about these controls as well as other topics related to GLM estimation. Here are the point estimates and uncertainties from the `glm` fit and `stan_glm` fit, which we see are nearly identical: ```{r, count-roaches-comparison} round(rbind(glm = coef(glm1), stan_glm = coef(stan_glm1)), digits = 2) round(rbind(glm = summary(glm1)$coefficients[, "Std. Error"], stan_glm = se(stan_glm1)), digits = 3) ``` (Note: the dataset we have is slightly different from the one used in Gelman and Hill (2007), which leads to slightly different parameter estimates than those shown in the book even when copying the `glm` call verbatim. Also, we have rescaled the `roach1` predictor. For the purposes of this example, the actual estimates are less important than the process.) Gelman and Hill next show how to compare the observed data to replicated datasets from the model to check the quality of the fit. Here we don't show the original code used by Gelman and Hill because it's many lines, requiring several loops and some care to get the matrix multiplications right (see pg. 161-162). On the other hand, the __rstanarm__ package makes this easy. We can generate replicated datasets with a single line of code using the `posterior_predict` function: ```{r, count-roaches-posterior_predict} yrep <- posterior_predict(stan_glm1) ``` By default `posterior_predict` will generate a dataset for each set of parameter draws from the posterior distribution. That is, `yrep` will be an $S \times N$ matrix, where $S$ is the size of the posterior sample and $N$ is the number of data points. Each row of `yrep` represents a full dataset generated from the posterior predictive distribution. For more about the importance of the `posterior_predict` function, see the ["How to Use the __rstanarm__ Package"](rstanarm.html) vignette. Gelman and Hill take the simulated datasets and for each of them compute the proportion of zeros and compare to the observed proportion in the original data. We can do this easily using the `pp_check` function, which generates graphical comparisons of the data `y` and replicated datasets `yrep`. ```{r, count-roaches-plot-pp_check1} prop_zero <- function(y) mean(y == 0) (prop_zero_test1 <- pp_check(stan_glm1, plotfun = "stat", stat = "prop_zero", binwidth = .005)) ``` The value of the test statistic (in this case the proportion of zeros) computed from the sample `y` is the dark blue vertical line. More than 30% of these observations are zeros, whereas the replicated datasets all contain less than 1% zeros (light blue histogram). This is a sign that we should consider a model that more accurately accounts for the large proportion of zeros in the data. Gelman and Hill show how we can do this using an overdispersed Poisson regression. To illustrate the use of a different `stan_glm` model, here we will instead try [negative binomial](https://en.wikipedia.org/wiki/Negative_binomial_distribution) regression, which is also used for overdispersed or zero-inflated count data. The negative binomial distribution allows the (conditional) mean and variance of $y$ to differ unlike the Poisson distribution. To fit the negative binomial model can either use the `stan_glm.nb` function or, equivalently, change the `family` we specify in the call to `stan_glm` to `neg_binomial_2` instead of `poisson`. To do the latter we can just use `update`: ```{r, count-roaches-negbin, results="hide"} stan_glm2 <- update(stan_glm1, family = neg_binomial_2) ``` We now use `pp_check` again, this time to check the proportion of zeros in the replicated datasets under the negative binomial model: ```{r, count-roaches-plot-pp_check2, fig.width=7, out.width="80%"} prop_zero_test2 <- pp_check(stan_glm2, plotfun = "stat", stat = "prop_zero", binwidth = 0.01) # Show graphs for Poisson and negative binomial side by side bayesplot_grid(prop_zero_test1 + ggtitle("Poisson"), prop_zero_test2 + ggtitle("Negative Binomial"), grid_args = list(ncol = 2)) ``` This is a much better fit, as the proportion of zeros in the data falls nicely near the center of the distribution of the proportion of zeros among the replicated datasets. The observed proportion of zeros is quite plausible under this model. We could have also made these plots manually without using the `pp_check` function because we have the `yrep` datasets created by `posterior_predict`. The `pp_check` function takes care of this for us, but `yrep` can be used directly to carry out other posterior predictive checks that aren't automated by `pp_check`. When we comparing the models using the __loo__ package we also see a clear preference for the negative binomial model ```{r, count-roaches-loo} loo1 <- loo(stan_glm1, cores = 2) loo2 <- loo(stan_glm2, cores = 2) loo_compare(loo1, loo2) ``` which is not surprising given the better fit we've already observed from the posterior predictive checks. # References Gelman, A. and Hill, J. (2007). _Data Analysis Using Regression and Multilevel/Hierarchical Models._ Cambridge University Press, Cambridge, UK. rstanarm/inst/doc/aov.Rmd0000644000176200001440000001662713722762571015104 0ustar liggesusers--- title: "Estimating ANOVA Models with rstanarm" author: "Jonah Gabry and Ben Goodrich" date: "`r Sys.Date()`" output: html_vignette: toc: yes --- ```{r, child="children/SETTINGS-knitr.txt"} ``` ```{r, child="children/SETTINGS-gg.txt"} ``` # Introduction This vignette explains how to estimate ANalysis Of VAriance (ANOVA) models using the `stan_aov` function in the __rstanarm__ package ```{r, child="children/four_steps.txt"} ``` Steps 3 and 4 are covered in more depth by the vignette entitled ["How to Use the __rstanarm__ Package"](rstanarm.html). This vignette focuses on Step 1 when the likelihood is the product of independent normal distributions. We also demonstrate that Step 2 is not entirely automatic because it is sometimes necessary to specify some additional tuning parameters in order to obtain optimally efficient results. # Likelihood The likelihood for one observation under a linear model can be written as a conditionally normal PDF $$\frac{1}{\sigma_{\epsilon} \sqrt{2 \pi}} e^{-\frac{1}{2} \left(\frac{y - \mu}{\sigma_{\epsilon}}\right)^2},$$ where $\mu = \alpha + \mathbf{x}^\top \boldsymbol{\beta}$ is a linear predictor and $\sigma_{\epsilon}$ is the standard deviation of the error in predicting the outcome, $y$. The likelihood of the entire sample is the product of $N$ individual likelihood contributions. An ANOVA model can be considered a special case of the above linear regression model where each of the $K$ predictors in $\mathbf{x}$ is a dummy variable indicating membership in a group. An equivalent linear predictor can be written as $\mu_j = \alpha + \alpha_j$, which expresses the conditional expectation of the outcome in the $j$-th group as the sum of a common mean, $\alpha$, and a group-specific deviation from the common mean, $\alpha_j$. # Priors If we view the ANOVA model as a special case of a linear regression model with only dummy variables as predictors, then the model could be estimated using the prior specification in the `stan_lm` function. In fact, this is exactly how the `stan_aov` function is coded. These functions require the user to specify a value for the prior location (by default the mode) of the $R^2$, the proportion of variance in the outcome attributable to the predictors under a linear model. This prior specification is appealing in an ANOVA context because of the fundamental identity $$SS_{\mbox{total}} = SS_{\mbox{model}} + SS_{\mbox{error}},$$ where $SS$ stands for sum-of-squares. If we normalize this identity, we obtain the tautology $1 = R^2 + \left(1 - R^2\right)$ but it is reasonable to expect a researcher to have a plausible guess for $R^2$ before conducting an ANOVA. See the [vignette](lm.html) for the `stan_lm` function (regularized linear models) for more information on this approach. If we view the ANOVA model as a difference of means, then the model could be estimated using the prior specification in the `stan_lmer` function. In the syntax popularized by the __lme4__ package, `y ~ 1 + (1|group)` represents a likelihood where $\mu_j = \alpha + \alpha_j$ and $\alpha_j$ is normally distributed across the $J$ groups with mean zero and some unknown standard deviation. The `stan_lmer` function specifies that this standard deviation has a Gamma prior with, by default, both its shape and scale parameters equal to $1$, which is just an standard exponential distribution. However, the shape and scale parameters can be specified as other positive values. This approach also requires specifying a prior distribution on the standard deviation of the errors that is independent of the prior distribution for each $\alpha_j$. See the [vignette](glmer.html) for the `stan_glmer` function (__lme4__-style models using __rstanarm__) for more information on this approach. # Example We will utilize an example from the __HSAUR3__ package by Brian S. Everitt and Torsten Hothorn, which is used in their 2014 book _A Handbook of Statistical Analyses Using R (3rd Edition)_ (Chapman & Hall / CRC). This book is frequentist in nature and we will show how to obtain the corresponding Bayesian results. The model in section 4.3.1 analyzes an experiment where rats were subjected to different diets in order to see how much weight they gained. The experimental factors were whether their diet had low or high protein and whether the protein was derived from beef or cereal. Before seeing the data, one might expect that a moderate proportion of the variance in weight gain might be attributed to protein (source) in the diet. The frequentist ANOVA estimates can be obtained: ```{r aov-weightgain-aov} data("weightgain", package = "HSAUR3") coef(aov(weightgain ~ source * type, data = weightgain)) ``` To obtain Bayesian estimates we can prepend `stan_` to `aov` and specify the prior location of the $R^2$ as well as optionally the number of cores that the computer is allowed to utilize: ```{r aov-weightgain-mcmc, results="hide"} library(rstanarm) post1 <- stan_aov(weightgain ~ source * type, data = weightgain, prior = R2(location = 0.5), adapt_delta = 0.999, seed = 12345) post1 ``` ```{r, echo=FALSE} print(post1) ``` Here we have specified `adapt_delta = 0.999` to decrease the stepsize and largely prevent divergent transitions. See the Troubleshooting section in the main rstanarm [vignette](rstanarm.html) for more details about `adapt_delta`. Also, our prior guess that $R^2 = 0.5$ was overly optimistic. However, the frequentist estimates presumably overfit the data even more. Alternatively, we could prepend `stan_` to `lmer` and specify the corresponding priors ```{r, aov-weightgain-stan_lmer, eval=FALSE} post2 <- stan_lmer(weightgain ~ 1 + (1|source) + (1|type) + (1|source:type), data = weightgain, prior_intercept = cauchy(), prior_covariance = decov(shape = 2, scale = 2), adapt_delta = 0.999, seed = 12345) ``` Comparing these two models using the `loo` function in the __loo__ package reveals a negligible preference for the first approach that is almost entirely due to its having a smaller number of effective parameters as a result of the more regularizing priors. However, the difference is so small that it may seem advantageous to present the second results which are more in line with a mainstream Bayesian approach to an ANOVA model. # Conclusion This vignette has compared and contrasted two approaches to estimating an ANOVA model with Bayesian techniques using the __rstanarm__ package. They both have the same likelihood, so the (small in this case) differences in the results are attributable to differences in the priors. The `stan_aov` approach just calls `stan_lm` and thus only requires a prior location on the $R^2$ of the linear model. This seems rather easy to do in the context of an ANOVA decomposition of the total sum-of-squares in the outcome into model sum-of-squares and residual sum-of-squares. The `stan_lmer` approach just calls `stan_glm` but specifies a normal prior with mean zero for the deviations from $\alpha$ across groups. This is more in line with what most Bayesians would do naturally --- particularly if the factors were considered "random" --- but also requires a prior for $\alpha$, $\sigma$, and the standard deviation of the normal prior on the group-level intercepts. The `stan_lmer` approach is very flexible and might be more appropriate for more complicated experimental designs. rstanarm/inst/doc/lm.Rmd0000644000176200001440000004417513722762571014726 0ustar liggesusers--- title: "Estimating Regularized Linear Models with rstanarm" author: "Jonah Gabry and Ben Goodrich" date: "`r Sys.Date()`" output: html_vignette: toc: yes --- ```{r, child="children/SETTINGS-knitr.txt"} ``` ```{r, child="children/SETTINGS-gg.txt"} ``` # Introduction This vignette explains how to estimate linear models using the `stan_lm` function in the __rstanarm__ package. ```{r, child = "children/four_steps.txt"} ``` Steps 3 and 4 are covered in more depth by the vignette entitled ["How to Use the __rstanarm__ Package"](rstanarm.html). This vignette focuses on Step 1 when the likelihood is the product of independent normal distributions. The goal of the __rstanarm__ package is to make Bayesian estimation of common regression models routine. That goal can be partially accomplished by providing interfaces that are similar to the popular formula-based interfaces to frequentist estimators of those regression models. But fully accomplishing that goal sometimes entails utilizing priors that applied researchers are unaware that they prefer. These priors are intended to work well for any data that a user might pass to the interface that was generated according to the assumptions of the likelihood function. It is important to distinguish between priors that are easy for applied researchers to _specify_ and priors that are easy for applied researchers to _conceptualize_. The prior described below emphasizes the former but we outline its derivation so that applied researchers may feel more comfortable utilizing it. # Likelihood The likelihood for one observation under a linear model can be written as a conditionally normal PDF $$\frac{1}{\sigma_{\epsilon} \sqrt{2 \pi}} e^{-\frac{1}{2} \left(\frac{y - \mu}{\sigma_{\epsilon}}\right)^2},$$ where $\mu = \alpha + \mathbf{x}^\top \boldsymbol{\beta}$ is a linear predictor and $\sigma_{\epsilon}$ is the standard deviation of the error in predicting the outcome, $y$. The likelihood of the entire sample is the product of $N$ individual likelihood contributions. It is well-known that the likelihood of the sample is maximized when the sum-of-squared residuals is minimized, which occurs when $$ \widehat{\boldsymbol{\beta}} = \left(\mathbf{X}^\top \mathbf{X}\right)^{-1} \mathbf{X}^\top \mathbf{y}, $$ $$ \widehat{\alpha} = \overline{y} - \overline{\mathbf{x}}^\top \widehat{\boldsymbol{\beta}}, $$ $$ \widehat{\sigma}_{\epsilon}^2 = \frac{\left(\mathbf{y} - \widehat{\alpha} - \mathbf{X} \widehat{ \boldsymbol{\beta}}\right)^\top \left(\mathbf{y} - \widehat{\alpha} - \mathbf{X} \widehat{ \boldsymbol{\beta}}\right)}{N},$$ where $\overline{\mathbf{x}}$ is a vector that contains the sample means of the $K$ predictors, $\mathbf{X}$ is a $N \times K$ matrix of _centered_ predictors, $\mathbf{y}$ is a $N$-vector of outcomes and $\overline{y}$ is the sample mean of the outcome. # QR Decomposition The `lm` function in R actually performs a QR decomposition of the design matrix, $\mathbf{X} = \mathbf{Q}\mathbf{R}$, where $\mathbf{Q}^\top \mathbf{Q} = \mathbf{I}$ and $\mathbf{R}$ is upper triangular. Thus, the OLS solution for the coefficients can be written as $\left(\mathbf{X}^\top \mathbf{X}\right)^{-1} \mathbf{X}^\top \mathbf{y} = \mathbf{R}^{-1} \mathbf{Q}^\top \mathbf{y}$. The `lm` function utilizes the QR decomposition for numeric stability reasons, but the QR decomposition is also useful for thinking about priors in a Bayesian version of the linear model. In addition, writing the likelihood in terms of $\mathbf{Q}$ allows it to be evaluated in a very efficient manner in Stan. # Priors The key innovation in the `stan_lm` function in the __rstanarm__ package is the prior for the parameters in the QR-reparameterized model. To understand this prior, think about the equations that characterize the maximum likelihood solutions before observing the data on $\mathbf{X}$ and especially $\mathbf{y}$. What would the prior distribution of $\boldsymbol{\theta} = \mathbf{Q}^\top \mathbf{y}$ be? We can write its $k$-th element as $\theta_k = \rho_k \sigma_Y \sqrt{N - 1}$ where $\rho_k$ is the correlation between the $k$th column of $\mathbf{Q}$ and the outcome, $\sigma_Y$ is the standard deviation of the outcome, and $\frac{1}{\sqrt{N-1}}$ is the standard deviation of the $k$ column of $\mathbf{Q}$. Then let $\boldsymbol{\rho} = \sqrt{R^2}\mathbf{u}$ where $\mathbf{u}$ is a unit vector that is uniformly distributed on the surface of a hypersphere. Consequently, $R^2 = \boldsymbol{\rho}^\top \boldsymbol{\rho}$ is the familiar coefficient of determination for the linear model. An uninformative prior on $R^2$ would be standard uniform, which is a special case of a Beta distribution with both shape parameters equal to $1$. A non-uniform prior on $R^2$ is somewhat analogous to ridge regression, which is popular in data mining and produces better out-of-sample predictions than least squares because it penalizes $\boldsymbol{\beta}^\top \boldsymbol{\beta}$, usually after standardizing the predictors. An informative prior on $R^2$ effectively penalizes $\boldsymbol{\rho}\top \boldsymbol{\rho}$, which encourages $\boldsymbol{\beta} = \mathbf{R}^{-1} \boldsymbol{\theta}$ to be closer to the origin. Lewandowski, Kurowicka, and Joe (2009) derives a distribution for a correlation matrix that depends on a single shape parameter $\eta > 0$, which implies the variance of one variable given the remaining $K$ variables is $\mathrm{Beta}\left(\eta,\frac{K}{2}\right)$. Thus, the $R^2$ is distributed $\mathrm{Beta}\left(\frac{K}{2},\eta\right)$ and any prior information about the location of $R^2$ can be used to choose a value of the hyperparameter $\eta$. The `R2(location, what)` function in the __rstanarm__ package supports four ways of choosing $\eta$: 1. `what = "mode"` and `location` is some prior mode on the $\left(0,1\right)$ interval. This is the default but since the mode of a $\mathrm{Beta}\left(\frac{K}{2},\eta\right)$ distribution is $\frac{\frac{K}{2} - 1}{\frac{K}{2} + \eta - 2}$ the mode only exists if $K > 2$. If $K \leq 2$, then the user must specify something else for `what`. 2. `what = "mean"` and `location` is some prior mean on the $\left(0,1\right)$ interval, where the mean of a $\mathrm{Beta}\left(\frac{K}{2},\eta\right)$ distribution is $\frac{\frac{K}{2}}{\frac{K}{2} + \eta}$. 3. `what = "median"` and `location` is some prior median on the $\left(0,1\right)$ interval. The median of a $\mathrm{Beta}\left(\frac{K}{2},\eta\right)$ distribution is not available in closed form but if $K > 2$ it is approximately equal to $\frac{\frac{K}{2} - \frac{1}{3}}{\frac{K}{2} + \eta - \frac{2}{3}}$. Regardless of whether $K > 2$, the `R2` function can numerically solve for the value of $\eta$ that is consistent with a given prior median utilizing the quantile function. 4. `what = "log"` and `location` is some (negative) prior value for $\mathbb{E} \ln R^2 = \psi\left(\frac{K}{2}\right)- \psi\left(\frac{K}{2}+\eta\right)$, where $\psi\left(\cdot\right)$ is the `digamma` function. Again, given a prior value for the left-hand side it is easy to numerically solve for the corresponding value of $\eta$. There is no default value for the `location` argument of the `R2` function. This is an _informative_ prior on $R^2$, which must be chosen by the user in light of the research project. However, specifying `location = 0.5` is often safe, in which case $\eta = \frac{K}{2}$ regardless of whether `what` is `"mode"`, `"mean"`, or `"median"`. In addition, it is possible to specify `NULL`, in which case a standard uniform on $R^2$ is utilized. We set $\sigma_y = \omega s_y$ where $s_y$ is the sample standard deviation of the outcome and $\omega > 0$ is an unknown scale parameter to be estimated. The only prior for $\omega$ that does not contravene Bayes' theorem in this situation is Jeffreys prior, $f\left(\omega\right) \propto \frac{1}{\omega}$, which is proportional to a Jeffreys prior on the unknown $\sigma_y$, $f\left(\sigma_y\right) \propto \frac{1}{\sigma_y} = \frac{1}{\omega \widehat{\sigma}_y} \propto \frac{1}{\omega}$. This parameterization and prior makes it easy for Stan to work with any continuous outcome variable, no matter what its units of measurement are. It would seem that we need a prior for $\sigma_{\epsilon}$, but our prior beliefs about $\sigma_{\epsilon} = \omega s_y \sqrt{1 - R^2}$ are already implied by our prior beliefs about $\omega$ and $R^2$. That only leaves a prior for $\alpha = \overline{y} - \overline{\mathbf{x}}^\top \mathbf{R}^{-1} \boldsymbol{\theta}$. The default choice is an improper uniform prior, but a normal prior can also be specified such as one with mean zero and standard deviation $\frac{\sigma_y}{\sqrt{N}}$. # Posterior The previous sections imply a posterior distribution for $\omega$, $\alpha$, $\mathbf{u}$, and $R^2$. The parameters of interest can then be recovered as generated quantities: * $\sigma_y = \omega s_y$ * $\sigma_{\epsilon} = \sigma_y \sqrt{1 - R^2}$ * $\boldsymbol{\beta} = \mathbf{R}^{-1} \mathbf{u} \sigma_y \sqrt{R^2 \left(N-1\right)}$ The implementation actually utilizes an improper uniform prior on $\ln \omega$. Consequently, if $\ln \omega = 0$, then the marginal standard deviation of the outcome _implied by the model_ is the same as the sample standard deviation of the outcome. If $\ln \omega > 0$, then the marginal standard deviation of the outcome implied by the model exceeds the sample standard deviation, so the model overfits the data. If $\ln \omega < 0$, then the marginal standard deviation of the outcome implied by the model is less than the sample standard deviation, so the model _underfits_ the data or that the data-generating process is nonlinear. Given the regularizing nature of the prior on $R^2$, a minor underfit would be considered ideal if the goal is to obtain good out-of-sample predictions. If the model badly underfits or overfits the data, then you may want to reconsider the model. # Example We will utilize an example from the __HSAUR3__ package by Brian S. Everitt and Torsten Hothorn, which is used in their 2014 book _A Handbook of Statistical Analyses Using R (3rd Edition)_ (Chapman & Hall / CRC). This book is frequentist in nature and we will show how to obtain the corresponding Bayesian results. The model in section 5.3.1 analyzes an experiment where clouds were seeded with different amounts of silver iodide to see if there was increased rainfall. This effect could vary according to covariates, which (except for `time`) are interacted with the treatment variable. Most people would probably be skeptical that cloud hacking could explain very much of the variation in rainfall and thus the prior mode of the $R^2$ would probably be fairly small. The frequentist estimator of this model can be replicated by executing ```{r lm-clouds-ols} data("clouds", package = "HSAUR3") ols <- lm(rainfall ~ seeding * (sne + cloudcover + prewetness + echomotion) + time, data = clouds) round(coef(ols), 3) ``` Note that we have _not_ looked at the estimated $R^2$ or $\sigma$ for the ordinary least squares model. We can estimate a Bayesian version of this model by prepending `stan_` to the `lm` call, specifying a prior mode for $R^2$, and optionally specifying how many cores the computer may utilize: ```{r lm-clouds-mcmc, results='hide'} library(rstanarm) post <- stan_lm( rainfall ~ seeding * (sne + cloudcover + prewetness + echomotion) + time, data = clouds, prior = R2(location = 0.2), seed = 12345 ) post ``` ```{r, echo=FALSE} print(post) ``` In this case, the "Bayesian point estimates", which are represented by the posterior medians, appear quite different from the ordinary least squares estimates. However, the `log-fit_ratio` (i.e. $\ln \omega$) is quite small, indicating that the model only slightly overfits the data when the prior derived above is utilized. Thus, it would be safe to conclude that the ordinary least squares estimator considerably overfits the data since there are only $24$ observations to estimate $12$ parameters with and no prior information on the parameters. Also, it is not obvious what the estimated average treatment effect is since the treatment variable, `seeding`, is interacted with four other correlated predictors. However, it is easy to estimate or visualize the average treatment effect (ATE) using __rstanarm__'s `posterior_predict` function. ```{r lm-clouds-ate-plot} clouds_cf <- clouds clouds_cf$seeding[] <- "yes" y1_rep <- posterior_predict(post, newdata = clouds_cf) clouds_cf$seeding[] <- "no" y0_rep <- posterior_predict(post, newdata = clouds_cf) qplot(x = c(y1_rep - y0_rep), geom = "histogram", xlab = "Estimated ATE") ``` As can be seen, the treatment effect is not estimated precisely and is as almost as likely to be negative as it is to be positive. # Alternative Approach The prior derived above works well in many situations and is quite simple to _use_ since it only requires the user to specify the prior location of the $R^2$. Nevertheless, the implications of the prior are somewhat difficult to _conceptualize_. Thus, it is perhaps worthwhile to compare to another estimator of a linear model that simply puts independent Cauchy priors on the regression coefficients. This simpler approach can be executed by calling the `stan_glm` function with `family = gaussian()` and specifying the priors: ```{r lm-clouds-simple, results="hide"} simple <- stan_glm( rainfall ~ seeding * (sne + cloudcover + prewetness + echomotion) + time, data = clouds, family = gaussian(), prior = cauchy(), prior_intercept = cauchy(), seed = 12345 ) ``` We can compare the two approaches using an approximation to Leave-One-Out (LOO) cross-validation, which is implemented by the `loo` function in the __loo__ package. ```{r lm-clouds-loo, warning=TRUE} (loo_post <- loo(post)) loo_compare(loo_post, loo(simple)) ``` The results indicate that the first approach is expected to produce better out-of-sample predictions but the Warning messages are at least as important. Many of the estimated shape parameters for the Generalized Pareto distribution are above $0.5$ in the model with Cauchy priors, which indicates that these estimates are only going to converge slowly to the true out-of-sample deviance measures. Thus, with only $24$ observations, they should not be considered reliable. The more complicated prior derived above is stronger --- as evidenced by the fact that the effective number of parameters is about half of that in the simpler approach and $12$ for the maximum likelihood estimator --- and only has a few of the $24$ Pareto shape estimates in the "danger zone". We might want to reexamine these observations ```{r lm-clouds-plot-loo} plot(loo_post, label_points = TRUE) ``` because the posterior is sensitive to them but, overall, the results seem tolerable. In general, we would expect the joint prior derived here to work better when there are many predictors relative to the number of observations. Placing independent, heavy-tailed priors on the coefficients neither reflects the beliefs of the researcher nor conveys enough information to stabilize all the computations. # Conclusion This vignette has discussed the prior distribution utilized in the `stan_lm` function, which has the same likelihood and a similar syntax as the `lm` function in R but adds the ability to expression prior beliefs about the location of the $R^2$, which is the familiar proportion of variance in the outcome variable that is attributable to the predictors under a linear model. Since the $R^2$ is a well-understood bounded scalar, it is easy to specify prior information about it, whereas other Bayesian approaches require the researcher to specify a joint prior distribution for the regression coefficients (and the intercept and error variance). However, most researchers have little inclination to specify all these prior distributions thoughtfully and take a short-cut by specifying one prior distribution that is taken to apply to all the regression coefficients as if they were independent of each other (and the intercept and error variance). This short-cut is available in the `stan_glm` function and is described in more detail in other __rstanarm__ vignettes for Generalized Linear Models (GLMs), which can be found by navigating up one level. We are optimistic that this prior on the $R^2$ will greatly help in accomplishing our goal for __rstanarm__ of making Bayesian estimation of regression models routine. The same approach is used to specify a prior in ANOVA models (see `stan_aov`) and proportional-odds models for ordinal outcomes (see `stan_polr`). Finally, the `stan_biglm` function can be used when the design matrix is too large for the `qr` function to process. The `stan_biglm` function inputs the output of the `biglm` function in the __biglm__ package, which utilizes an incremental QR decomposition that does not require the entire dataset to be loaded into memory simultaneously. However, the `biglm` function needs to be called in a particular way in order to work with `stan_biglm`. In particular, The means of the columns of the design matrix, the sample mean of the outcome, and the sample standard deviation of the outcome all need to be passed to the `stan_biglm` function, as well as a flag indicating whether the model really does include an intercept. Also, the number of columns of the design matrix currently cannot exceed the number of rows. Although `stan_biglm` should run fairly quickly and without much memory, the resulting object is a fairly plain `stanfit` object rather than an enhanced `stanreg` object like that produced by `stan_lm`. Many of the enhanced capabilities of a `stanreg` object depend on being able to access the full design matrix, so doing posterior predictions, posterior checks, etc. with the output of `stan_biglm` would require some custom R code. # References Lewandowski, D., Kurowicka D., and Joe, H. (2009). Generating random correlation matrices based on vines and extended onion method. _Journal of Multivariate Analysis_. __100__(9), 1989--2001. rstanarm/inst/doc/aov.R0000644000176200001440000000256714551550134014550 0ustar liggesusers## ---- SETTINGS-knitr, include=FALSE------------------------------------------- stopifnot(require(knitr)) opts_chunk$set( comment=NA, message = FALSE, warning = FALSE, eval = identical(Sys.getenv("NOT_CRAN"), "true"), dev = "png", dpi = 150, fig.asp = 0.618, fig.width = 5, out.width = "60%", fig.align = "center" ) ## ---- SETTINGS-gg, include=TRUE----------------------------------------------- library(ggplot2) library(bayesplot) theme_set(bayesplot::theme_default()) ## ----aov-weightgain-aov------------------------------------------------------- data("weightgain", package = "HSAUR3") coef(aov(weightgain ~ source * type, data = weightgain)) ## ----aov-weightgain-mcmc, results="hide"-------------------------------------- library(rstanarm) post1 <- stan_aov(weightgain ~ source * type, data = weightgain, prior = R2(location = 0.5), adapt_delta = 0.999, seed = 12345) post1 ## ---- echo=FALSE-------------------------------------------------------------- print(post1) ## ---- aov-weightgain-stan_lmer, eval=FALSE------------------------------------ # post2 <- stan_lmer(weightgain ~ 1 + (1|source) + (1|type) + (1|source:type), # data = weightgain, prior_intercept = cauchy(), # prior_covariance = decov(shape = 2, scale = 2), # adapt_delta = 0.999, seed = 12345) rstanarm/inst/doc/state_plot_data.rda0000644000176200001440000000264313540753420017472 0ustar liggesusersSUTxY%$ 4dw(-n^mh2,&)Hªir(IRyԒ4L$ɬ~`jty{E=gvsְ8CqSx85Cƹq#[M,Кc8N1ݑĜq8S(}A? gK Ε0XB C))#(K%BhX =Es RW()8Iz]E{BqG`ꂺ3=%|k#cS$_ t|=ľm3O>O%K{H>÷gחO~Χ1>" ռN}|/s6y" Y[i3rl<oϿΏV{5|1yqڃ|ܸ~ޮ~ڊ9/#!<ϏϟC~O`ߦ bΦ}] h_mM|wKr:^mޥՏ\! UƷk7)ګvZ,ǗܗѾj֝ݛqi&_$q_ υ7=LOJ@((!cvQTrp&n9yiUʨu@CW6tCUwK_puڑ=PeSL: _{NAmaGHp8tZC}N=q ;cnݨ&GVVTmZX -bp&k Ǖٶ֋wmjIm N*w@u7e+:E96ߵzʢI?@=w8i޿oB?G8 ށwFN[Fmv]qno X\ S:r{`6T]XuS?Tn z`WQd'PwVPQw!o>pBqnzWF _=χχxs7䚋QkKFSŜK{nBK=l*X"H6) <+N>@kREhCZPb6 P&5,&™*sB3,D) c6VJLRCXRA%N0Fɘ)Tʂd쑨Y611+=Ѱ *U%._zOH?\aN 3v aHzAh.MDQTX,D6100()P!h]m"mzvP,z?+Lrstanarm/inst/doc/jm.html0000644000176200001440000256446614551550562015156 0ustar liggesusers Estimating Joint Models for Longitudinal and Time-to-Event Data with rstanarm

Estimating Joint Models for Longitudinal and Time-to-Event Data with rstanarm

Sam Brilleman

2024-01-16

Preamble

This vignette provides an introduction to the stan_jm modelling function in the rstanarm package. The stan_jm function allows the user to estimate a shared parameter joint model for longitudinal and time-to-event data under a Bayesian framework.

Introduction

Joint modelling can be broadly defined as the simultaneous estimation of two or more statistical models which traditionally would have been separately estimated. When we refer to a shared parameter joint model for longitudinal and time-to-event data, we generally mean the joint estimation of: 1) a longitudinal mixed effects model which analyses patterns of change in an outcome variable that has been measured repeatedly over time (for example, a clinical biomarker) and 2) a survival or time-to-event model which analyses the time until an event of interest occurs (for example, death or disease progression). Joint estimation of these so-called “submodels” is achieved by assuming they are correlated via individual-specific parameters (i.e. individual-level random effects).

Over the last two decades the joint modelling of longitudinal and time-to-event data has received a significant amount of attention [1-5]. Methodological developments in the area have been motivated by a growing awareness of the benefits that a joint modelling approach can provide. In clinical or epidemiological research it is common for a clinical biomarker to be repeatedly measured over time on a given patient. In addition, it is common for time-to-event data, such as the patient-specific time from a defined origin (e.g. time of diagnosis of a disease) until a terminating clinical event such as death or disease progression to also be collected. The figure below shows observed longitudinal measurements (i.e. observed “trajectories”) of log serum bilirubin for a small sample of patients with primary biliary cirrhosis. Solid lines are used for those patients who were still alive at the end of follow up, while dashed lines are used for those patients who died. From the plots, we can observe between-patient variation in the longitudinal trajectories for log serum bilirubin, with some patients showing an increase in the biomarker over time, others decreasing, and some remaining stable. Moreover, there is variation between patients in terms of the frequency and timing of the longitudinal measurements.

From the perspective of clinical risk prediction, we may be interested in asking whether the between-patient variation in the log serum bilirubin trajectories provides meaningful prognostic information that can help us differentiate patients with regard to some clinical event of interest, such as death. Alternatively, from an epidemiological perspective we may wish to explore the potential for etiological associations between changes in log serum bilirubin and mortality. Joint modelling approaches provide us with a framework under which we can begin to answer these types of clinical and epidemiological questions.

More formally, the motivations for undertaking a joint modelling analysis of longitudinal and time-to-event data might include one or more of the following:

  • One may be interested in how underlying changes in the biomarker influence the occurrence of the event. However, including the observed biomarker measurements directly into a time-to-event model as time-varying covariates poses several problems. For example, if the widely used Cox proportional hazards model is assumed for the time-to-event model then biomarker measurements need to be available for all patients at all failure times, which is unlikely to be the case [3]. If simple methods of imputation are used, such as the “last observation carried forward” method, then these are likely to induce bias [6]. Furthermore, the observed biomarker measurements may be subject to measurement error and therefore their inclusion as time-varying covariates may result in biased and inefficient estimates. In most cases, the measurement error will result in parameter estimates which are shrunk towards the null [7]. On the other hand, joint modelling approaches allow us to estimate the association between the biomarker (or some function of the biomarker trajectory, such as rate of change in the biomarker) and the risk of the event, whilst allowing for both the discrete time and measurement-error aspects of the observed biomarker.

  • One may be interested primarily in the evolution of the clinical biomarker but may wish to account for what is known as informative dropout. If the value of future (unobserved) biomarker measurements are related to the occurrence of the terminating event, then those unobserved biomarker measurements will be “missing not at random” [8,9]. In other words, biomarker measurements for patients who have an event will differ from those who do not have an event. Under these circumstances, inference based solely on observed measurements of the biomarker will be subject to bias. A joint modelling approach can help to adjust for informative dropout and has been shown to reduce bias in the estimated parameters associated with longitudinal changes in the biomarker [1,9,10].

  • Joint models are naturally suited to the task of dynamic risk prediction. For example, joint modelling approaches have been used to develop prognostic models where predictions of event risk can be updated as new longitudinal biomarker measurements become available. Taylor et al. [11] jointly modelled longitudinal measurements of the prostate specific antigen (PSA) and time to clinical recurrence of prostate cancer. The joint model was then used to develop a web-based calculator which could provide real-time predictions of the probability of recurrence based on a patient’s up to date PSA measurements.

In this vignette, we describe the rstanarm package’s stan_jm modelling function. This modelling function allows users to fit a shared parameter joint model for longitudinal and time-to-event data under a Bayesian framework, with the backend estimation carried out using Stan. In Section 2 we describe the formulation of the joint model used by stan_jm. In Section 3 we present a variety of examples showing the usage of stan_jm.

Note that some aspects of the estimation are covered in other vignettes, such as the priors vignette which contains details on the prior distributions available for regression coefficients.

Technical details

Model formulation

A shared parameter joint model consists of related submodels which are specified separately for each of the longitudinal and time-to-event outcomes. These are therefore commonly referred to as the longitudinal submodel(s) and the event submodel. The longitudinal and event submodels are linked using shared individual-specific parameters, which can be parameterised in a number of ways. We describe each of these submodels below.

Longitudinal submodel(s)

We assume \(y_{ijm}(t) = y_{im}(t_{ij})\) corresponds to the observed value of the \(m^{th}\) \((m = 1,...,M)\) biomarker for individual \(i\) \((i = 1,...,N)\) taken at time point \(t_{ij}\), \(j = 1,...,n_{im}\). We specify a (multivariate) generalised linear mixed model that assumes \(y_{ijm}(t)\) follows a distribution in the exponential family with mean \(\mu_{ijm}(t)\) and linear predictor

\[ \eta_{ijm}(t) = g_m(\mu_{ijm}(t)) = \boldsymbol{x}^T_{ijm}(t) \boldsymbol{\beta}_m + \boldsymbol{z}^T_{ijm}(t) \boldsymbol{b}_{im} \]

where \(\boldsymbol{x}^T_{ijm}(t)\) and \(\boldsymbol{z}^T_{ijm}(t)\) are both row-vectors of covariates (which likely include some function of time, for example a linear slope, cubic splines, or polynomial terms) with associated vectors of fixed and individual-specific parameters \(\boldsymbol{\beta}_m\) and \(\boldsymbol{b}_{im}\), respectively, and \(g_m\) is some known link function. The distribution and link function are allowed to differ over the \(M\) longitudinal submodels. We let the vector \(\boldsymbol{\beta} = \{ \boldsymbol{\beta}_m ; m = 1,...,M\}\) denote the collection of population-level parameters across the \(M\) longitudinal submodels. We assume that the dependence across the different longitudinal submodels (i.e. the correlation between the different longitudinal biomarkers) is captured through a shared multivariate normal distribution for the individual-specific parameters; that is, we assume

\[ \begin{pmatrix} \boldsymbol{b}_{i1} \\ \vdots \\ \boldsymbol{b}_{iM} \end{pmatrix} = \boldsymbol{b}_i \sim \mathsf{Normal} \left( 0 , \boldsymbol{\Sigma} \right) \]

for some unstructured variance-covariance matrix \(\boldsymbol{\Sigma}\).

Event submodel

We assume that we also observe an event time \(T_i = \mathsf{min} \left( T^*_i , C_i \right)\) where \(T^*_i\) denotes the so-called “true” event time for individual \(i\) (potentially unobserved) and \(C_i\) denotes the censoring time. We define an event indicator \(d_i = I(T^*_i \leq C_i)\). We then model the hazard of the event using a parametric proportional hazards regression model of the form

\[ h_i(t) = h_0(t; \boldsymbol{\omega}) \mathsf{exp} \left( \boldsymbol{w}^T_i(t) \boldsymbol{\gamma} + \sum_{m=1}^M \sum_{q=1}^{Q_m} f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{i}, \alpha_{mq}; t) \right) \]

where \(h_i(t)\) is the hazard of the event for individual \(i\) at time \(t\), \(h_0(t; \boldsymbol{\omega})\) is the baseline hazard at time \(t\) given parameters \(\boldsymbol{\omega}\), \(\boldsymbol{w}^T_i(t)\) is a row-vector of individual-specific covariates (possibly time-dependent) with an associated vector of regression coefficients \(\boldsymbol{\gamma}\) (log hazard ratios), \(f_{mq}(.)\) are a set of known functions for \(m=1,...,M\) and \(q=1,...,Q_m\), and the \(\alpha_{mq}\) are regression coefficients (log hazard ratios).

The longitudinal and event submodels are assumed to be related via an “association structure”, which is a set of functions each \(\{ f_{mq} ; m = 1,...,M, q = 1,...,Q_m \}\) that may each be conditional on the population-level parameters from the longitudinal submodel \(\boldsymbol{\beta}\), the individual-specific parameters \(\boldsymbol{b}_{i}\), and the population-level parameters \(\alpha_{mq}\) for \(m=1,...,M\) and \(q=1,...,Q_m\). That is, the association structure of the joint model is captured via the \(\sum_{m=1}^M \sum_{q=1}^{Q_m} f_{mq}(\boldsymbol{\beta}_m, \boldsymbol{b}_{im}, \alpha_{mq}; t)\) term in the linear predictor of the event submodel. The \(\alpha_{mq}\) are referred to as the “association parameters” since they quantify the strength of the association between the longitudinal and event processes. The various ways in which the association structure can be are described in the next section.

The probability of individual \(i\) still being event-free at time \(t\), often referred to as the “survival probability”, is defined as

\[ S_i(t) = \text{Prob} \Big( T_i^* \geq t \Big) = \exp \Big( -H_i(t) \Big) \]

where \(H_i(t) = \int_{s=0}^t h_i(s) ds\) is the cumulative hazard for individual \(i\).

We assume that the baseline hazard \(h_0(t; \boldsymbol{\omega})\) is modelled parametrically. In the stan_jm modelling function the baseline hazard be specified as either: an approximation using B-splines on the log hazard scale (the default); a Weibull distribution; or an approximation using a piecewise constant function on the log hazard scale (sometimes referred to as piecewise exponential). The choice of baseline hazard can be made via the basehaz argument. In the case of the B-splines or piecewise constant baseline hazard, the user can control the flexibility by specifying the knots or degrees of freedom via the basehaz_ops argument. (Note that currently there is slightly limited post-estimation functionality available for models estimated with a piecewise constant baseline hazard, so this is perhaps the least preferable choice).

Association structures

As mentioned in the previous section, the dependence between the longitudinal and event submodels is captured through the association structure, which can be specified in a number of ways. The simplest association structure is likely to be

\[ f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{im}, \alpha_{mq}; t) = \alpha_{mq} \eta_{im}(t) \]

and this is often referred to as a current value association structure since it assumes that the log hazard of the event at time \(t\) is linearly associated with the value of the longitudinal submodel’s linear predictor also evaluated at time \(t\). This is the most common association structure used in the joint modelling literature to date. In the situation where the longitudinal submodel is based on an identity link function and normal error distribution (i.e. a linear mixed model) the current value association structure can be viewed as a method for including the underlying “true” value of the biomarker as a time-varying covariate in the event submodel.1

However, other association structures are also possible. For example, we could assume the log hazard of the event is linearly associated with the current slope (i.e. rate of change) of the longitudinal submodel’s linear predictor, that is

\[ f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{i}, \alpha_{mq}; t) = \alpha_{mq} \frac{d\eta_{im}(t)}{dt} \]

There are in fact a whole range of possible association structures, many of which have been discussed in the literature [14-16].

The stan_jm modelling function in the rstanarm package allows for the following association structures, which are specified via the assoc argument:

Current value (of the linear predictor or expected value) \[ f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{i}, \alpha_{mq}; t) = \alpha_{mq} \eta_{im}(t) \\ f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{i}, \alpha_{mq}; t) = \alpha_{mq} \mu_{im}(t) \]

Current slope (of the linear predictor or expected value) \[ f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{i}, \alpha_{mq}; t) = \alpha_{mq} \frac{d\eta_{im}(t)}{dt} \\ f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{i}, \alpha_{mq}; t) = \alpha_{mq} \frac{d\mu_{im}(t)}{dt} \]

Area under the curve (of the linear predictor or expected value) \[ f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{i}, \alpha_{mq}; t) = \alpha_{mq} \int_0^t \eta_{im}(u) du \\ f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{i}, \alpha_{mq}; t) = \alpha_{mq} \int_0^t \mu_{im}(u) du \]

Interactions between different biomarkers \[ f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{i}, \alpha_{mq}; t) = \alpha_{mq} \eta_{im}(t) \eta_{im'}(t) \text{ for some } m = m' \text{ or } m \neq m' \\ f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{i}, \alpha_{mq}; t) = \alpha_{mq} \eta_{im}(t) \mu_{im'}(t) \text{ for some } m = m' \text{ or } m \neq m' \\ f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{i}, \alpha_{mq}; t) = \alpha_{mq} \mu_{im}(t) \mu_{im'}(t) \text{ for some } m = m' \text{ or } m \neq m' \]

Interactions between the biomarker (or it’s slope) and observed data \[ f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{i}, \alpha_{mq}; t) = \alpha_{mq} c_{i}(t) \eta_{im}(t) \text{ for some covariate value } c_{i}(t) \\ f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{i}, \alpha_{mq}; t) = \alpha_{mq} c_{i}(t) \mu_{im}(t) \text{ for some covariate value } c_{i}(t) \\ f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{i}, \alpha_{mq}; t) = \alpha_{mq} c_{i}(t) \frac{d\eta_{im}(t)}{dt} \text{ for some covariate value } c_{i}(t) \\ f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{i}, \alpha_{mq}; t) = \alpha_{mq} c_{i}(t) \frac{d\mu_{im}(t)}{dt} \text{ for some covariate value } c_{i}(t) \]

As well as using lagged values for any of the above. That is, replacing \(t\) with \(t-u\) where \(u\) is some lag time, such that the hazard of the event at time \(t\) is assumed to be associated with some function of the longitudinal submodel parameters at time \(t-u\).

Lastly, we can specify some time-independent function of the random effects, possibly including the fixed effect component. For example,

\[ f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{i}, \alpha_{mq}; t) = \alpha_{mq} \boldsymbol{b}_{im0} \]

or

\[ f_{mq}(\boldsymbol{\beta}, \boldsymbol{b}_{i}, \alpha_{mq}; t) = \alpha_{mq} \Big( \boldsymbol{\beta}_{m0} + \boldsymbol{b}_{im0} \Big) \]

where \(\boldsymbol{\beta}_{m0}\) is the population-level intercept for the \(m^{th}\) longitudinal submodel and \(\boldsymbol{b}_{im0}\) is the \(i^{th}\) individual’s random deviation from the population-level intercept for the \(m^{th}\) longitudinal submodel.

Note that more than one association structure can be specified, however, not all possible combinations are allowed. Moreover, if you are fitting a multivariate joint model (i.e. more than one longitudinal outcome) then you can optionally choose to use a different association structure(s) for linking each longitudinal submodel to the event submodel. To do this you can pass a list of length \(M\) to the assoc argument.

Assumptions

Here we define a set of assumptions for the multivariate shared parameter joint model.

The so-called conditional independence assumption of the shared parameter joint model postulates

\[ y_{im}(t) \perp y_{im'}(t) \mid \boldsymbol{b}_i, \boldsymbol{\theta} \\ y_{im}(t) \perp y_{im}(t') \mid \boldsymbol{b}_i, \boldsymbol{\theta} \\ y_{im}(t) \perp T_i^* \mid \boldsymbol{b}_i, \boldsymbol{\theta} \]

for some \(m \neq m'\) and \(t \neq t'\), and where \(\boldsymbol{\theta}\) denotes the combined vector of all remaining population-level parameters in the model. That is, conditional on the individual-specific parameters \(\boldsymbol{b}_i\) and population-level parameters \(\boldsymbol{\theta}\), the following are assumed: (i) any biomarker measurement for individual \(i\) is independent of that individual’s true event time \(T_i^*\); (ii) any two measurements of the \(m^{th}\) biomarker taken on the \(i^{th}\) individual at two distinct time points \(t\) and \(t'\) (i.e. longitudinal or repeated measurements) are independent of one another; and (iii) any two measurements of two different biomarkers, taken on the \(i^{th}\) individual at some time point \(t\) are independent of one another. These conditional independence assumptions allow for a convenient factorisation of the full likelihood for joint model into the likelihoods for each of the component parts (i.e. the likelihood for the longitudinal submodel, the likelihood for the event submodel, and the likelihood for the distribution of the individual-specific parameters), which facilitates the estimation of the model.

Moreover, we require two additional assumptions: (i) that the censoring process for the event outcome is independent of the true event time, that is \(C_i \perp T_i^* \mid \boldsymbol{\theta}\) (i.e. uninformative censoring); and (ii) that the visiting process by which the observation times \(t_{ijm}\) are determined is independent of the true event time \(T_i^*\) and all missing future unobserved longitudinal biomarker measurements.

Log posterior distribution

Under the conditional independence assumption, the log posterior for the \(i^{th}\) individual can be specified as

\[ \log p(\boldsymbol{\theta}, \boldsymbol{b}_{i} \mid \boldsymbol{y}_{i}, T_i, d_i) \propto \log \Bigg[ \Bigg( \prod_{m=1}^M \prod_{j=1}^{n_i} p(y_{ijm}(t) \mid \boldsymbol{b}_{i}, \boldsymbol{\theta}) \Bigg) p(T_i, d_i \mid \boldsymbol{b}_{i}, \boldsymbol{\theta}) p(\boldsymbol{b}_{i} \mid \boldsymbol{\theta}) p(\boldsymbol{\theta}) \Bigg] \]

where \(\boldsymbol{y}_i = \{ y_{ijm}(t); j = 1,...,n_i, m = 1,...,M \}\) denotes the collection of longitudinal biomarker data for individual \(i\) and \(\boldsymbol{\theta}\) denotes all remaining population-level parameters in the model.

We can rewrite this log posterior as

\[ \log p(\boldsymbol{\theta}, \boldsymbol{b}_{i} \mid \boldsymbol{y}_{i}, T_i, d_i) \propto \Bigg( \sum_{m=1}^M \sum_{j=1}^{n_i} \log p(y_{ijm}(t) \mid \boldsymbol{b}_{i}, \boldsymbol{\theta}) \Bigg) + \log p(T_i, d_i \mid \boldsymbol{b}_{i}, \boldsymbol{\theta}) + \log p(\boldsymbol{b}_{i} \mid \boldsymbol{\theta}) + \log p(\boldsymbol{\theta}) \]

where \(\sum_{j=1}^{n_{im}} \log p(y_{ijm} \mid \boldsymbol{b}_{i}, \boldsymbol{\theta})\) is the log likelihood for the \(m^{th}\) longitudinal submodel, \(\log p(T_i, d_i \mid \boldsymbol{b}_{i}, \boldsymbol{\theta})\) is the log likelihood for the event submodel, \(\log p(\boldsymbol{b}_{i} \mid \boldsymbol{\theta})\) is the log likelihood for the distribution of the group-specific parameters (i.e. random effects), and \(\log p(\boldsymbol{\theta})\) represents the log likelihood for the joint prior distribution across all remaining unknown parameters.2

We can rewrite the log likelihood for the event submodel as

\[ \log p(T_i, d_i \mid \boldsymbol{b}_{i}, \boldsymbol{\theta}) = d_i * \log h_i(T_i) - \int_0^{T_i} h_i(s) ds \]

and then use Gauss-Kronrod quadrature with \(Q\) nodes to approximate \(\int_0^{T_i} h_i(s) ds\), such that

\[ \int_0^{T_i} h_i(s) ds \approx \frac{T_i}{2} \sum_{q=1}^{Q} w_q h_i \bigg( \frac{T_i(1+s_q)}{2} \bigg) \]

where \(w_q\) and \(s_q\), respectively, are the standardised weights and locations (“abscissa”) for quadrature node \(q\) \((q=1,...,Q)\) [17]. The default for the stan_jm modelling function is to use \(Q=15\) quadrature nodes, however if the user wishes, they can choose between \(Q=15\), \(11\), or \(7\) quadrature nodes (specified via the qnodes argument).

Therefore, once we have an individual’s event time \(T_i\) we can evaluate the design matrices for the event submodel and longitudinal submodels at the \(Q+1\) necessary time points (which are the event time \(T_i\) and the quadrature points \(\frac{T_i(1+s_q)}{2}\) for \(q=1,...,Q\)) and then pass these to Stan’s data block. We can then evaluate the log likelihood for the event submodel by simply calculating the hazard \(h_i(t)\) at those \(Q+1\) time points and summing the quantities appropriately. This calculation will need to be performed each time we iterate through Stan’s model block. A simplified example of the underlying Stan code used to fit the joint model can be found in Brilleman et al. (2018) [12].

Model predictions

Before discussing the methods by which we can generate posterior predictions, first let us define some additional relevant quantities. Let \(\mathcal{D} = \{ \boldsymbol{y}_i, T_i, d_i; i = 1,...,N \}\) be the entire collection of outcome data in the sample. We will refer to this sample as the “training data”. Let \(T_{max} = \max \{ T_i; i = 1,...,N \}\) denote the maximum event or censoring time across the \(i = 1,...,N\) individuals in our training data.

Individual-specific predictions for in-sample individuals (for \(0 \leq t \leq T_i\))

We can generate posterior predictions for the longitudinal and time-to-event outcomes in the following manner. For the \(i^{th}\) individual in our training data, a predicted value for the \(m^{th}\) longitudinal biomarker at time \(t\), denoted \(y^*_{im}(t)\), can be generated from the posterior predictive distribution

\[ p \Big( y^{*}_{im}(t) \mid \mathcal{D} \Big) = \int \int p \Big( y^{*}_{im}(t) \mid \boldsymbol{\theta}, \boldsymbol{b}_i \Big) p \Big( \boldsymbol{\theta}, \boldsymbol{b}_i \mid \mathcal{D} \Big) \space d \boldsymbol{b}_i \space d \boldsymbol{\theta} \]

and, similarly, a predicted probability of the \(i^{th}\) individual being event-free at time \(t\), denoted \(S^*_i(t)\), can be generated from the posterior predictive distribution

\[ p \Big( S^{*}_{i}(t) \mid \mathcal{D} \Big) = \int \int p \Big( S^{*}_i(t) \mid \boldsymbol{\theta}, \boldsymbol{b}_i \Big) p \Big( \boldsymbol{\theta}, \boldsymbol{b}_i \mid \mathcal{D} \Big) d \boldsymbol{b}_i \space d \boldsymbol{\theta} \]

Note that for simplicity we have ignored the implicit conditioning on covariates; \(\boldsymbol{x}_{im}(t)\) and \(\boldsymbol{z}_{im}(t)\), for \(m = 1,...,M\), and \(\boldsymbol{w}_{i}(t)\). Since individual \(i\) is included in the training data, it is easy for us to approximate these posterior predictive distributions by drawing from \(p(y^{*}_{im}(t) \mid \boldsymbol{\theta}^{(l)}, \boldsymbol{b}_i^{(l)})\) and \(p(S^{*}_i(t) \mid \boldsymbol{\theta}^{(l)}, \boldsymbol{b}_i^{(l)})\) where \(\boldsymbol{\theta}^{(l)}\) and \(\boldsymbol{b}_i^{(l)}\) are the \(l^{th}\) \((l = 1,...,L)\) MCMC draws from the joint posterior distribution \(p(\boldsymbol{\theta}, \boldsymbol{b}_i \mid \mathcal{D})\).

These draws from the posterior predictive distributions can be used for assessing the fit of the model. For example,

  • the draws from \(p(y^{*}_{im}(t) \mid \mathcal{D})\) for \(0 \leq t \leq T_i\) can be used to evaluate the fit of the longitudinal trajectory for the \(m^{th}\) biomarker for the \(i^{th}\) individual, and

  • the draws from \(p(S^{*}_{i}(t) \mid \mathcal{D})\) for \(0 \leq t \leq T_{max}\) can be averaged across the \(N\) individuals to obtain a standardised survival curve (discussed in greater detail in later sections) which can then be compared to the observed survival curve, for example, the Kaplan-Meier curve.

Individual-specific predictions for in-sample individuals (for \(t > C_i\))

However, given that we know the event or censoring time for each individual in our training data, it may make more sense to consider what will happen to censored individuals in our study when we look beyond their last known survival time (i.e. extrapolation).

For an individual \(i\), who was in our training data, and who was known to be event-free up until their censoring time \(C_i\), we wish to draw from the conditional posterior predictive distribution for their longitudinal outcome at some time \(t > C_i\), that is

\[ p \Big( y^{*}_{im}(t) \mid \mathcal{D}, t > C_i \Big) = \int \int p \Big( y^{*}_{im}(t) \mid \boldsymbol{\theta}, \boldsymbol{b}_i, t > C_i \Big) p \Big( \boldsymbol{\theta}, \boldsymbol{b}_i \mid \mathcal{D} \Big) d \boldsymbol{b}_i \space d \boldsymbol{\theta} \]

and the conditional posterior predictive distribution for their survival probability at some time \(t > C_i\), that is

\[ \begin{aligned} p \Big( S^{*}_{i}(t) \mid \mathcal{D}, t > C_i, T_i^* > C_i \Big) & = \frac {p \Big( S^{*}_{i}(t) \mid \mathcal{D} \Big)} {p \Big( S^{*}_{i}(C_i) \mid \mathcal{D} \Big)} \\ & = \int \int \frac {p \Big( S^{*}_i(t) \mid \boldsymbol{\theta}, \boldsymbol{b}_i \Big)} {p \Big( S^{*}_i(C_i) \mid \boldsymbol{\theta}, \boldsymbol{b}_i \Big)} \space p \Big( \boldsymbol{\theta}, \boldsymbol{b}_i \mid \mathcal{D} \Big) d \boldsymbol{b}_i \space d \boldsymbol{\theta} \end{aligned} \]

These draws from the conditional posterior predictive distributions can be used to extrapolate into the future for individual \(i\), conditional on their longitudinal biomarker data collected between baseline and their censoring time \(C_i\). For example,

  • the draws from \(p(y^{*}_{im}(t) \mid \mathcal{D}, t > C_i)\) for \(C_i \leq t \leq T_{max}\) can be used to show the forecasted longitudinal trajectory for the \(m^{th}\) biomarker for the \(i^{th}\) individual, and

  • the draws from \(p(S^{*}_{i}(t) \mid \mathcal{D}, t > C_i, T_i^* > C_i))\) for \(C_i \leq t \leq T_{max}\) can be used to show the estimated conditional probability of individual \(i\) remaining event-free into the future.

Individual-specific predictions for out-of-sample individuals (i.e. dynamic predictions)

TBC. Describe dynamic predictions under the framework of Rizopoulos (2011) [18]. These types of individual-specific predictions can be obtained using the posterior_traj and posterior_survfit functions by providing prediction data and specifying dynamic = TRUE (which is the default); see the examples provided below.

Population-level (i.e. marginal) predictions

We can also generate posterior predictions for the longitudinal and time-to-event outcomes that do not require any conditioning on observed outcome data for a specific individual. Here, we will discuss two ways in which this can be done.

The first way is to “marginalise” over the distribution of the individual-specific parameters. We wish to generate a predicted value for the \(m^{th}\) longitudinal biomarker at time \(t\) for a new individual \(k\) for whom we do not have any observed data. We will denote this prediction \(y^*_{km}(t)\) and note that it can be generated from the posterior predictive distribution for the longitudinal outcome

\[ \begin{aligned} p \Big( y^{*}_{km}(t) \mid \mathcal{D} \Big) & = \int \int p \Big( y^{*}_{km}(t) \mid \boldsymbol{\theta}, \boldsymbol{\tilde{b}}_k \Big) p \Big( \boldsymbol{\theta}, \boldsymbol{\tilde{b}}_k \mid \mathcal{D} \Big) \space d \boldsymbol{\tilde{b}}_{k} \space d \boldsymbol{\theta} \\ & = \int \int p \Big( y^{*}_{km}(t) \mid \boldsymbol{\theta}, \boldsymbol{\tilde{b}}_k \Big) p \Big( \boldsymbol{\tilde{b}}_k \mid \boldsymbol{\theta} \Big) p \Big( \boldsymbol{\theta} \mid \mathcal{D} \Big) \space d \boldsymbol{\tilde{b}}_{k} \space d \boldsymbol{\theta} \end{aligned} \]

and similarly for the survival probability

\[ \begin{aligned} p \Big( S^{*}_{k}(t) \mid \mathcal{D} \Big) & = \int \int p \Big( S^{*}_k(t) \mid \boldsymbol{\theta}, \boldsymbol{\tilde{b}}_k \Big) p \Big( \boldsymbol{\theta}, \boldsymbol{\tilde{b}}_k \mid \mathcal{D} \Big) d \boldsymbol{b}_k \space d \boldsymbol{\theta} \\ & = \int \int p \Big( S^{*}_k(t) \mid \boldsymbol{\theta}, \boldsymbol{\tilde{b}}_k \Big) p \Big( \boldsymbol{\tilde{b}}_k \mid \boldsymbol{\theta} \Big) p \Big( \boldsymbol{\theta} \mid \mathcal{D} \Big) d \boldsymbol{b}_k \space d \boldsymbol{\theta} \\ \end{aligned} \]

We can obtain draws for \(\boldsymbol{\tilde{b}}_k\) in the same manner as for the individual-specific parameters \(\boldsymbol{b}_i\). That is, at the \(l^{th}\) iteration of the MCMC sampler we draw \(\boldsymbol{\tilde{b}}_k^{(l)}\) and store it3. However, individual \(k\) did not provide any contribution to the training data and so we are effectively taking random draws from the posterior distribution for the individual-specific parameters. We are therefore effectively marginalising over the distribution of the group-specific coefficients when we obtain predictions using the draws \(\boldsymbol{\tilde{b}}_k^{(l)}\) fro \(l = 1,\dots,L\). In other words, we are predicting for a new individual whom we have no information except that they are drawn from the same population as the \(i = 1,...,N\) individuals in the training data. Because these predictions will incorporate all the uncertainty associated with between-individual variation our 95% credible intervals are likely to be very wide. These types of marginal predictions can be obtained using the posterior_traj and posterior_survfit functions by providing prediction data and specifying dynamic = FALSE; see the examples provided below.

The second way is to effectively ignore the group-level structure in the model. That is, to only predict with only the population-level parameters contributing to the model. For example, under a identity link function and normal error distribution (i.e. linear mixed effect longitudinal submodel), we would obtain draws from the distribution \(y^{(l)}_{km}(t) \sim N \Big( \boldsymbol{x}^T_{km}(t) \boldsymbol{\beta}_m^{(l)}, \sigma_m^{(l)} \Big)\) where \(\boldsymbol{\beta}_m^{(l)}\) and \(\sigma_m^{(l)}\) are the population-level parameters and residual error standard deviation, respectively, for the \(l^{th}\) draw of the MCMC samples. However, referring to this as a “marginal” prediction is somewhat misleading since we are not explicitly conditioning on the individual-specific parameters but we are implicitly assuming that we know they are equal to zero with absolute certainty. That is, we are actually drawing from the posterior predictive distribution for the longitudinal outcome

\[ \begin{aligned} p \Big( y^{*}_{km}(t) \mid \mathcal{D} \Big) & = \int p \Big( y^{*}_{km}(t) \mid \boldsymbol{\theta}, \boldsymbol{b}_k = 0 \Big) p \Big( \boldsymbol{\theta} \mid \mathcal{D} \Big) d \boldsymbol{\theta} \\ \end{aligned} \] and similarly for the survival probability

\[ p \Big( S^{*}_{k}(t) \mid \mathcal{D} \Big) = \int p \Big( S^{*}_k(t) \mid \boldsymbol{\theta}, \boldsymbol{b}_k = 0 \Big) p \Big( \boldsymbol{\theta} \mid \mathcal{D} \Big) d \boldsymbol{\theta} \\ \]

These types of so-called “marginal” predictions can not currently be obtained using the posterior_traj and posterior_survfit functions.

Standardised survival probabilities

All of the previously discussed population-level (i.e. marginal) predictions assumed implicit conditioning on some covariate values for the longitudinal submodel, \(\boldsymbol{x}_{im}(t)\) and \(\boldsymbol{z}_{im}(t)\) for \(m = 1,...,M\), and for the event submodel, \(\boldsymbol{w}_{i}(t)\). Even though we marginalise over the distribution of the individual-specific parameters we were still assuming that we obtained predictions for some known values of the covariates. However, sometimes we wish to marginalise (i.e. average) over the observed distribution of covariates as well. Here we discuss a method by which we can do that for the predicted survival probabilities.

At any time \(t\), it is possible to obtain a standardised survival probability by averaging the individual-specific survival probabilities. That is, we can obtain

\[ S^*(t) = \frac{\sum_{i=1}^{N^{pred}} S_i^*(t)}{N^{pred}} \]

where \(S_i^*(t)\) is the predicted survival probability for individual \(i\) (\(i = 1,\dots,N^{pred}\) at time \(t\), and \(N^{pred}\) is the number of individuals included in the prediction dataset. We refer to these predictions as standardised survival probabilities.

Note however, that if \(N_{pred}\) is not sufficiently large (e.g. we pass new data with just 2 individuals, say) then marginalising over their covariate distribution may not be meaningful and, similarly, their joint random effects distribution may be a poor representation of the random effects distribution for the entire population. It is better to calculate these standardised survival probabilities using where, say, \(N^{pred}\) is equal to the total number of individuals in the training data.

Model extensions

Delayed entry (left-truncation)

TBC.

Multilevel clustering

TBC.

Model comparison

LOO/WAIC in the context of joint models

TBC.

Usage examples

Dataset used in the examples

We use the Mayo Clinic’s primary biliary cirrhosis (PBC) dataset in the examples below. The dataset contains 312 individuals with primary biliary cirrhosis who participated in a randomised placebo controlled trial of D-penicillamine conducted at the Mayo Clinic between 1974 and 1984 [19]. However, to ensure the examples run quickly, we use a small random subset of just 40 patients from the full data.

These example data are contained in two separate data frames. The first data frame contains multiple-row per patient longitudinal biomarker information, as shown in

  id      age sex trt      year     logBili albumin platelet
1  1 58.76523   f   1 0.0000000  2.67414865    2.60      190
2  1 58.76523   f   1 0.5256674  3.05870707    2.94      183
3  2 56.44627   f   1 0.0000000  0.09531018    4.14      221
4  2 56.44627   f   1 0.4982888 -0.22314355    3.60      188
5  2 56.44627   f   1 0.9993155  0.00000000    3.55      161
6  2 56.44627   f   1 2.1026694  0.64185389    3.92      122

while the second data frame contains single-row per patient survival information, as shown in

   id      age sex trt futimeYears status death
1   1 58.76523   f   1    1.095140      2     1
3   2 56.44627   f   1   14.151951      0     0
12  3 70.07255   m   1    2.770705      2     1
16  4 54.74059   f   1    5.270363      2     1
23  5 38.10541   f   0    4.120465      1     0
29  6 66.25873   f   0    6.852841      2     1

The variables included across the two datasets can be defined as follows:

  • age in years
  • albumin serum albumin (g/dl)
  • logBili logarithm of serum bilirubin
  • death indicator of death at endpoint
  • futimeYears time (in years) between baseline and the earliest of death, transplantion or censoring
  • id numeric ID unique to each individual
  • platelet platelet count
  • sex gender (m = male, f = female)
  • status status at endpoint (0 = censored, 1 = transplant, 2 = dead)
  • trt binary treatment code (0 = placebo, 1 = D-penicillamine)
  • year time (in years) of the longitudinal measurements, taken as time since baseline)

A description of the example datasets can be found by accessing the following help documentation:

Fitting the models

Univariate joint model (current value association structure)

In this example we fit a simple univariate joint model, with one normally distributed longitudinal marker, an association structure based on the current value of the linear predictor, and B-splines baseline hazard. To fit the model we use the joint (longitudinal and time-to-event) modelling function in the rstanarm package: stan_jm. When calling stan_jm we must, at a minimum, specify a formula object for each of the longitudinal and event submodels (through the arguments formulaLong and formulaEvent), the data frames which contain the variables for each of the the longitudinal and event submodels (through the arguments dataLong and dataEvent), and the name of the variable representing time in the longitudinal submodel (through the argument time_var).

The formula for the longitudinal submodel is specified using the lme4 package formula style. That is y ~ x + (random_effects | grouping_factor). In this example we specify that log serum bilirubin (logBili) follows a subject-specific linear trajectory. To do this we include a fixed intercept and fixed slope (year), as well as a random intercept and random slope for each subject id ((year | id)).

The formula for the event submodel is specified using the survival package formula style. That is, the outcome of the left of the ~ needs to be of the format Surv(event_time, event_indicator) for single row per individual data, or Surv(start_time, stop_time, event_indicator) for multiple row per individual data. The latter allows for exogenous time-varying covariates to be included in the event submodel. In this example we assume that the log hazard of death is linearly related to gender (sex) and an indicator of treatment with D-penicillamine (trt).

Fitting a univariate joint model.

Please note the warmup may be much slower than later iterations!

SAMPLING FOR MODEL 'jm' NOW (CHAIN 1).
Chain 1: 
Chain 1: Gradient evaluation took 0.000372 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 3.72 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1: 
Chain 1: 
Chain 1: Iteration:    1 / 2000 [  0%]  (Warmup)
Chain 1: Iteration: 1001 / 2000 [ 50%]  (Sampling)
Chain 1: Iteration: 2000 / 2000 [100%]  (Sampling)
Chain 1: 
Chain 1:  Elapsed Time: 20.255 seconds (Warm-up)
Chain 1:                19.962 seconds (Sampling)
Chain 1:                40.217 seconds (Total)
Chain 1: 

The argument refresh = 2000 was specified so that Stan didn’t provide us with excessive progress updates whilst fitting the model. However, if you are fitting a model that will take several minutes or hours to fit, then you may wish to request progress updates quite regularly, for example setting refresh = 20 for every 20 iterations (by default the refresh argument is set to 1/10th of the total number of iterations).

The fitted model is returned as an object of the S3 class stanjm. We have a variety of methods and post-estimation functions available for this class, including: print, summary, plot, fixef, ranef, coef, VarCorr, posterior_interval, update, and more. Here, we will examine the most basic output for the fitted joint model by typing print(mod1):

stan_jm
 formula (Long1): logBili ~ sex + trt + year + (year | id)
 family  (Long1): gaussian [identity]
 formula (Event): survival::Surv(futimeYears, death) ~ sex + trt
 baseline hazard: bs
 assoc:           etavalue (Long1)
------

Longitudinal submodel: logBili
            Median MAD_SD
(Intercept)  0.255  0.633
sexf         0.521  0.651
trt         -0.170  0.385
year         0.210  0.041
sigma        0.353  0.018

Event submodel:
                Median MAD_SD exp(Median)
(Intercept)     -3.184  0.607  0.041     
sexf            -0.338  0.566  0.713     
trt             -0.750  0.454  0.472     
Long1|etavalue   1.362  0.263  3.902     
b-splines-coef1 -0.913  1.017     NA     
b-splines-coef2  0.473  0.821     NA     
b-splines-coef3 -1.765  1.201     NA     
b-splines-coef4  0.324  1.597     NA     
b-splines-coef5 -0.037  1.710     NA     
b-splines-coef6 -0.888  1.697     NA     

Group-level error terms:
 Groups Name              Std.Dev. Corr
 id     Long1|(Intercept) 1.3084       
        Long1|year        0.1898   0.50
Num. levels: id 40 

Sample avg. posterior predictive distribution 
of longitudinal outcomes:
               Median MAD_SD
Long1|mean_PPD 0.588  0.027 

------
For info on the priors used see help('prior_summary.stanreg').

The “Long1|etavalue” row under “Event submodel” is our \(\alpha_{mq}\) parameter (\(m = 1\), \(q = 1\)). The estimated median of tells us that for each one unit increase in an individual’s underlying level of log serum bilirubin, their estimated log hazard of death increases by some amount. The mean absolute deviation (MAD) is provided as a more robust estimate of the standard deviation of the posterior distribution. In this case the MAD_SD for the association parameter indicates there is quite large uncertainty around the estimated association between log serum bilirubin and risk of death (recall this is a small dataset).

If we wanted some slightly more detailed output for each of the model parameters, as well as further details regarding the model estimation (for example computation time, number of longitudinal observations, number of individuals, type of baseline hazard, etc) we can instead use the summary method:


Model Info:

 function:        stan_jm
 formula (Long1): logBili ~ sex + trt + year + (year | id)
 family  (Long1): gaussian [identity]
 formula (Event): survival::Surv(futimeYears, death) ~ sex + trt
 baseline hazard: bs
 assoc:           etavalue (Long1)
 algorithm:       sampling
 priors:          see help('prior_summary')
 sample:          1000 (posterior sample size)
 num obs:         304 (Long1)
 num subjects:    40
 num events:      29 (72.5%)
 groups:          id (40)
 runtime:         0.6 mins

Estimates:
                                                mean     sd       2.5%  
Long1|(Intercept)                                0.263    0.635   -0.934
Long1|sexf                                       0.505    0.618   -0.722
Long1|trt                                       -0.163    0.413   -1.047
Long1|year                                       0.212    0.042    0.138
Long1|sigma                                      0.354    0.017    0.323
Long1|mean_PPD                                   0.587    0.029    0.533
Event|(Intercept)                               -3.183    0.603   -4.373
Event|sexf                                      -0.340    0.573   -1.460
Event|trt                                       -0.763    0.470   -1.777
Event|b-splines-coef1                           -0.977    1.058   -3.307
Event|b-splines-coef2                            0.432    0.839   -1.364
Event|b-splines-coef3                           -1.831    1.212   -4.221
Event|b-splines-coef4                            0.305    1.633   -2.910
Event|b-splines-coef5                           -0.079    1.715   -3.481
Event|b-splines-coef6                           -1.119    1.831   -5.310
Assoc|Long1|etavalue                             1.380    0.257    0.922
Sigma[id:Long1|(Intercept),Long1|(Intercept)]    1.712    0.431    0.999
Sigma[id:Long1|year,Long1|(Intercept)]           0.124    0.074    0.000
Sigma[id:Long1|year,Long1|year]                  0.036    0.017    0.014
log-posterior                                 -328.548   10.303 -349.653
                                                97.5% 
Long1|(Intercept)                                1.508
Long1|sexf                                       1.717
Long1|trt                                        0.706
Long1|year                                       0.299
Long1|sigma                                      0.387
Long1|mean_PPD                                   0.644
Event|(Intercept)                               -2.031
Event|sexf                                       0.808
Event|trt                                        0.179
Event|b-splines-coef1                            0.896
Event|b-splines-coef2                            1.956
Event|b-splines-coef3                            0.452
Event|b-splines-coef4                            3.467
Event|b-splines-coef5                            3.195
Event|b-splines-coef6                            1.827
Assoc|Long1|etavalue                             1.933
Sigma[id:Long1|(Intercept),Long1|(Intercept)]    2.725
Sigma[id:Long1|year,Long1|(Intercept)]           0.283
Sigma[id:Long1|year,Long1|year]                  0.080
log-posterior                                 -308.625

Diagnostics:
                                              mcse  Rhat  n_eff
Long1|(Intercept)                             0.035 1.025  320 
Long1|sexf                                    0.030 1.009  433 
Long1|trt                                     0.028 1.004  221 
Long1|year                                    0.003 1.013  268 
Long1|sigma                                   0.001 0.999  790 
Long1|mean_PPD                                0.001 1.001  932 
Event|(Intercept)                             0.020 0.999  952 
Event|sexf                                    0.018 0.999 1018 
Event|trt                                     0.014 0.999 1086 
Event|b-splines-coef1                         0.035 1.000  909 
Event|b-splines-coef2                         0.030 0.999  784 
Event|b-splines-coef3                         0.047 1.000  675 
Event|b-splines-coef4                         0.067 0.999  588 
Event|b-splines-coef5                         0.071 0.999  588 
Event|b-splines-coef6                         0.075 0.999  593 
Assoc|Long1|etavalue                          0.009 0.999  799 
Sigma[id:Long1|(Intercept),Long1|(Intercept)] 0.034 1.004  165 
Sigma[id:Long1|year,Long1|(Intercept)]        0.005 0.999  212 
Sigma[id:Long1|year,Long1|year]               0.001 0.999  232 
log-posterior                                 0.795 1.000  168 

For each parameter, mcse is Monte Carlo standard error, n_eff is a crude measure of effective sample size, and Rhat is the potential scale reduction factor on split chains (at convergence Rhat=1).

The easiest way to extract the correlation matrix for the random effects (aside from viewing the print output) is to use the VarCorr function (modelled on the VarCorr function from the lme4 package). If you wish to extract the variances and covariances (instead of the standard deviations and correlations) then you can type the following to return a data frame with all of the relevant information:

  grp              var1       var2       vcov     sdcor
1  id Long1|(Intercept)       <NA> 1.71178525 1.3083521
2  id        Long1|year       <NA> 0.03603564 0.1898306
3  id Long1|(Intercept) Long1|year 0.12382877 0.4985753

Univariate joint model (current value and current slope association structure)

In the previous example we were fitting a shared parameter joint model which assumed that the log hazard of the event (in this case the log hazard of death) at time t was linearly related to the subject-specific expected value of the longitudinal marker (in this case the expected value of log serum bilirubin) also at time t. This is the default association structure, although it could be explicitly specified by setting the assoc = "etavalue" argument.

However, let’s suppose we believe that the log hazard of death is actually related to both the current value of log serum bilirubin and the current rate of change in log serum bilirubin. To estimate this joint model we need to indicate that we want to also include the subject-specific slope (at time t) from the longitudinal submodel as part of the association structure. We do this by setting the assoc argument equal to a character vector c("etavalue", "etaslope") which indicates our desired association structure:

In this example the subject-specific slope is actually constant across time t since we have a linear trajectory. Note however that we could still use the "etaslope" association structure even if we had a non-linear subject specific trajectory (for example modelled using cubic splines or polynomials).

Multivariate joint model (current value association structures)

Suppose instead that we were interested in two repeatedly measured clinical biomarkers, log serum bilirubin and serum albumin, and their association with the risk of death. We may wish to model these two biomarkers, allowing for the correlation between them, and estimating their respective associations with the log hazard of death. We will fit a linear mixed effects submodel (identity link, normal distribution) for each biomarker with a patient-specific intercept and linear slope but no other covariates. In the event submodel we will include gender (sex) and treatment (trt) as baseline covariates. Each biomarker is assumed to be associated with the log hazard of death at time \(t\) via it’s expected value at time \(t\) (i.e. a current value association structure).

The model we are going to fit can therefore be specified as:

\[ y_{im}(t_{ijm}) \sim N(\mu_{im}(t_{ijm}), \sigma_m) \]

\[ \eta_{im}(t) = \mu_{im}(t) = \beta_{0m} + \beta_{1m} t + b_{0mi} + b_{1mi} t \]

\[ h_i(t) = h_0(t; \boldsymbol{\omega}) \exp(\gamma_1 w_{1i} + \gamma_2 w_{2i} + \alpha_{1i} \mu_{i1}(t) + \alpha_{2i} \mu_{i2}(t)) \]

where \(t\) is time in years, and \(w_{1i}\) and \(w_{2i}\) are, respectively, the gender and treatment indicators for individual \(i\).

(Note that due to the very small sample size, the clinical findings from this analysis should not to be overinterpreted!).

Fitting a multivariate joint model.

Please note the warmup may be much slower than later iterations!

SAMPLING FOR MODEL 'jm' NOW (CHAIN 1).
Chain 1: 
Chain 1: Gradient evaluation took 0.000506 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 5.06 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1: 
Chain 1: 
Chain 1: Iteration:    1 / 2000 [  0%]  (Warmup)
Chain 1: Iteration: 1001 / 2000 [ 50%]  (Sampling)
Chain 1: Iteration: 2000 / 2000 [100%]  (Sampling)
Chain 1: 
Chain 1:  Elapsed Time: 36.18 seconds (Warm-up)
Chain 1:                30.184 seconds (Sampling)
Chain 1:                66.364 seconds (Total)
Chain 1: 

We can now examine the output from the fitted model, for example

stan_jm
 formula (Long1): logBili ~ sex + trt + year + (year | id)
 family  (Long1): gaussian [identity]
 formula (Long2): albumin ~ sex + trt + year + (year | id)
 family  (Long2): gaussian [identity]
 formula (Event): survival::Surv(futimeYears, death) ~ sex + trt
 baseline hazard: bs
 assoc:           etavalue (Long1), etavalue (Long2)
------

Longitudinal submodel 1: logBili
            Median MAD_SD
(Intercept)  0.238  0.567
sexf         0.523  0.563
trt         -0.109  0.336
year         0.224  0.045
sigma        0.354  0.016

Longitudinal submodel 2: albumin
            Median MAD_SD
(Intercept)  3.474  0.225
sexf         0.057  0.237
trt          0.006  0.168
year        -0.156  0.024
sigma        0.290  0.014

Event submodel:
                Median   MAD_SD   exp(Median)
(Intercept)        7.009    2.805 1106.933   
sexf              -0.047    0.720    0.954   
trt               -0.514    0.481    0.598   
Long1|etavalue     0.806    0.286    2.239   
Long2|etavalue    -3.126    0.868    0.044   
b-splines-coef1   -0.984    1.144       NA   
b-splines-coef2    0.493    0.888       NA   
b-splines-coef3   -2.594    1.335       NA   
b-splines-coef4   -0.561    1.892       NA   
b-splines-coef5   -1.259    1.907       NA   
b-splines-coef6   -2.656    1.862       NA   

Group-level error terms:
 Groups Name              Std.Dev. Corr             
 id     Long1|(Intercept) 1.2385                    
        Long1|year        0.1870    0.50            
        Long2|(Intercept) 0.5162   -0.65 -0.51      
        Long2|year        0.0962   -0.56 -0.80  0.45
Num. levels: id 40 

Sample avg. posterior predictive distribution 
of longitudinal outcomes:
               Median MAD_SD
Long1|mean_PPD 0.588  0.028 
Long2|mean_PPD 3.342  0.022 

------
For info on the priors used see help('prior_summary.stanreg').

or we can examine the summary output for the association parameters alone:


Model Info:

 function:        stan_jm
 formula (Long1): logBili ~ sex + trt + year + (year | id)
 family  (Long1): gaussian [identity]
 formula (Long2): albumin ~ sex + trt + year + (year | id)
 family  (Long2): gaussian [identity]
 formula (Event): survival::Surv(futimeYears, death) ~ sex + trt
 baseline hazard: bs
 assoc:           etavalue (Long1), etavalue (Long2)
 algorithm:       sampling
 priors:          see help('prior_summary')
 sample:          1000 (posterior sample size)
 num obs:         304 (Long1), 304 (Long2)
 num subjects:    40
 num events:      29 (72.5%)
 groups:          id (40)
 runtime:         1.1 mins

Estimates:
                       mean   sd     2.5%   25%    50%    75%    97.5%
Assoc|Long1|etavalue  0.803  0.290  0.236  0.611  0.806  0.995  1.355 
Assoc|Long2|etavalue -3.165  0.862 -4.973 -3.730 -3.126 -2.570 -1.574 

Diagnostics:
                     mcse  Rhat  n_eff
Assoc|Long1|etavalue 0.009 0.999 951  
Assoc|Long2|etavalue 0.034 1.001 632  

For each parameter, mcse is Monte Carlo standard error, n_eff is a crude measure of effective sample size, and Rhat is the potential scale reduction factor on split chains (at convergence Rhat=1).

Posterior predictions

We can also access the range of post-estimation functions (described in the stan_jm and related help documentation; see for example help(posterior_traj) or help(posterior_survfit)).

Predicted individual-specific longitudinal trajectory for in-sample individuals

Predicted individual-specific biomarker values can be obtained using either the posterior_traj or posterior_predict function. The posterior_traj is preferable, because it can be used to obtain the biomarker values at a series of evenly spaced time points between baseline and the individual’s event or censoring time by using the default interpolate = TRUE option. Whereas, the posterior_predict function only provides the predicted biomarker values at the observed time points, or the time points in the new data. Predicting the biomarker values at a series of evenly spaced time points can be convenient because they can be easily used for plotting the longitudinal trajectory. Moreover, by default the posterior_traj returns a data frame with variables corresponding to the individual ID, the time, the predicted mean biomarker value, the limits for the 95% credible interval (i.e. uncertainty interval for the predicted mean biomarker value), and limits for the 95% prediction interval (i.e. uncertainty interval for a predicted biomarker data point), where the level for the uncertainty intervals can be changed via the prob argument. Conversely, the posterior_predict function returns an \(S\) by \(N\) matrix of predictions where \(S\) is the number of posterior draws and \(N\) is the number of prediction time points (note that this return type can also be obtained for posterior_traj by specifying the argument return_matrix = TRUE).

As an example, let’s plot the predicted individual-specific longitudinal trajectories for each of the two biomarkers (log serum bilirubin and serum albumin) in the multivariate joint model estimated above. We will do this for three individuals (IDs 6, 7 and 8) who were included in the model estimation.

Here are the plots for log serum bilirubin:

and here are the plots for serum albumin:

The m argument specifies which biomarker we want to predict for (only relevant for a multivariate joint model). The ids argument is optional, and specifies a subset of individuals for whom we want to predict. In the plotting method, the plot_observed = TRUE specifies that we want to include the observed biomarker values in the plot of the longitudinal trajectory.

If we wanted to extrapolate the trajectory forward from the event or censoring time for each individual, then this can be easily achieved by specifying extrapolate = TRUE in the posterior_traj call. For example, here is the plot for log serum bilirubin with extrapolation:

and for serum albumin with extrapolation:

Here, we included the vline = TRUE which adds a vertical dashed line at the timing of the individual’s event or censoring time. The interpolation and extrapolation of the biomarker trajectory can be further controlled through the control argument to the posterior_traj function; for example, we could specify the number of time points at which to predict, the distance by which to extrapolate, and so on.

We could customize these plots further, for example, by using any of the ggplot2 functionality or using the additional arguments described in help(plot.predict.stanjm).

Predicted individual-specific survival curves for in-sample individuals

Predicted individual-specific survival probabilities and/or survival curves can be obtained using the posterior_survfit function. The function by default returns a data frame with the individual ID, the time, and the predicted survival probability (posterior mean and limits for the 95% credible interval). The uncertainty level for the credible interval can be changed via the prob argument. By default, individual-specific survival probabilities are calculated conditional on the individual’s last known survival time. When we are predicting survival probabilities for individuals that were used in the estimation of the model (i.e. in-sample individuals, where no new covariate data is provided), then the individual’s “last known survival time” will be their event or censoring time. (Note that if we wanted didn’t want to condition on the individual’s last known survival time, then we could specify condition = FALSE, but we probably wouldn’t want to do this unless we were calculating marginal or standardised survival probabilities, which are discussed later).

The default argument extrapolate = TRUE specifies that the individual-specific conditional survival probabilities will be calculated at evenly spaced time points between the individual’s last known survival time and the maximum follow up time that was observed in the estimation sample. The behaviour of the extrapolation can be further controlled via the control argument. If we were to specify extrapolate = FALSE then the survival probabilities would only be calculated at one time point, which could be specified in the times argument (or otherwise would default to the individual’s last known survival time).

As an example, let plot the predicted individual-specific conditional survival curve for the same three individual’s that were used in the previous example. The predicted survival curve will be obtained under the multivariate joint model estimated above.

We could customize the plot further, for example, by using any of the ggplot2 functionality or using the additional arguments described in help(plot.survfit.stanjm).

Combined plot of longitudinal trajectories and survival curves

The package also provides a convenience plotting function, which combines plots of the individual-specific longitudinal trajectories, and the individual-specific survival function. We can demonstrate this by replotting the predictions for the three individuals in the previous example:

Here we can see the strong relationship between the underlying values of the biomarkers and mortality. Patient 8 who, relative to patients 6 and 7, has a higher underlying value for log serum bilirubin and a lower underlying value for serum albumin at the end of their follow up has a far worse predicted probability of survival.

Predicted individual-specific longitudinal trajectory and survival curve for out-of-sample individuals (i.e. dynamic predictions)

Let us take an individual from our training data, in this case the individual with subject ID value 8. However, we will pretend this individual was not a member of our training data and rather that they are a new individual for whom we have obtained new biomarker measurements. Our goal is to obtain predictions for the longitudinal trajectory for this individual, and their conditional survival curve, given that we know they are conditional on their biomarker measurements we currently have available.

First, let’s extract the data for subject 8 and then rename their subject ID value so that they appear to be an individual who was not included in our training dataset:

Note that we have both the longitudinal data and event data for this new individual. We require data for both submodels because we are going to generate dynamic predictions that require drawing new individual-specific parameters (i.e. random effects) for this individual conditional on their observed data. That means we need to evaluate the likelihood for the full joint model and that requires both the longitudinal and event data (note however that the status indicator death will be ignored, since it is assumed that the individual we are predicting for is still alive at the time we wish to generate the predictions).

Now we can pass this data to the posterior_traj function in the same way as for the in-sample individuals, except we will now specify the newdataLong and newdataEvent arguments. We will also specify the last_time argument so that the function knows which variable in the event data specifies the individual’s last known survival time (the default behaviour is to use the time of the last biomarker measurement).

Our predictions for this new individual for the log serum bilirubin trajectory can be obtained using:

Drawing new random effects for 1 individuals. Monitoring progress:

  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |======================================================================| 100%

and for the serum albumin trajectory:

Drawing new random effects for 1 individuals. Monitoring progress:

  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |======================================================================| 100%

For the conditional survival probabilities we use similar information, provided to the posterior_survfit function:

Drawing new random effects for 1 individuals. Monitoring progress:

  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |======================================================================| 100%

We can then use the plot_stack_jm function, as we saw in a previous example, to stack the plots of the longitudinal trajectory and the conditional survival curve:

Here we see that the predicted longitudinal trajectories and conditional survival curve for this individual, obtained using the dynamic predictions approach, are similar to the predictions we obtained when we used their individual-specific parameters from the original model estimation. This is because in both situations we are conditioning on the same outcome data.

Side note: We can even compare the estimated individual specific parameters obtained under the two approaches. For example, here is the posterior mean for the estimated individual-specific parameters for individual 8 from the fitted model:

$`(Intercept)`
[1] -1.713588

$year
[1] 0.1109282

$`(Intercept)`
[1] 0.4775566

$year
[1] -0.05163334

and here is the mean of the draws for the individual-specific parameters for individual 8 under the dynamic predictions approach:

b[Long1|(Intercept) id:new_patient]        b[Long1|year id:new_patient] 
                        -1.72072281                          0.10716182 
b[Long2|(Intercept) id:new_patient]        b[Long2|year id:new_patient] 
                         0.45668406                         -0.03562197 

Predicted population-level longitudinal trajectory

Suppose we wanted to predict the longitudinal trajectory for each of the biomarkers, marginalising over the distribution of the individual-specific parameters. To do this, we can pass a new data frame with the covariate values we want to use in the predictions. Here, we will demonstrate this by obtaining the predicted trajectory for log serum bilirubin, under the multivariate joint model that was estimated previously. Our prediction data will require the variables year, sex and trt, since these were the covariates used in the longitudinal submodel.

We will predict the value of log serum bilirubin at years 0 through 10, for each combination of sex and trt. We also need to include the id variable in our prediction data because this is relevant to the longitudinal submodel. Since we want to marginalise over the individual-specific parameters (i.e. individual-level random effects) we need to note two things:

  • First, the values for the id variable must not match any individual used in the model estimation. Here, we use the following id values: "male_notrt", "female_notrt", "male_trt", and "female_trt", since each individual in our prediction data represents a different combination of sex and trt. However, we could have given the individuals any id value just as long as is didn’t match an individual who was used in the model estimation

  • Second, we need to specify the argument dynamic = FALSE when calling posterior_traj. This specifies that we do not want to draw new individual-specific parameters conditional on outcome data observed up to some time \(t\). Instead, we want predictions that marginalise over the distribution of individual-specific parameters and are therefore conditional only on covariates and not conditional on outcome data for the new individuals.

Here is our prediction data:

'data.frame':   44 obs. of  4 variables:
 $ id  : chr  "male_notrt" "male_notrt" "male_notrt" "male_notrt" ...
 $ year: num  0 1 2 3 4 5 6 7 8 9 ...
 $ sex : Factor w/ 2 levels "m","f": 1 1 1 1 1 1 1 1 1 1 ...
 $ trt : int  0 0 0 0 0 0 0 0 0 0 ...

And to predict the marginal longitudinal trajectory for log serum bilirubin under each covariate profile and plot it we can type:

Because we are marginalising over the distribution of the individual-specific parameters, we are incorporating all the variation related to between-individual differences, and therefore the prediction interval is wide (shown by the shaded area around the marginal longitudinal trajectory). The magnitude of the effects of both sex and trt are relatively small compared to the population-level effect of year and the between-individual variation in the intercept and slope. For example, here are the point estimates for the population-level effects of sex, trt, and year:

(Intercept)        sexf         trt        year 
  0.2379999   0.5234954  -0.1092264   0.2242435 

and here are the standard deviations for the individual-level random effects:

 Groups Name              Std.Dev. Corr                
 id     Long1|(Intercept) 1.23855                      
        Long1|year        0.18703   0.498              
        Long2|(Intercept) 0.51616  -0.649 -0.510       
        Long2|year        0.09620  -0.562 -0.799  0.448

This shows us that the point estimates for the population-level effects of sex and trt are 0.57 and -0.10, respectively, whereas the standard deviation for the individual-specific intercept and slope parameters are 1.24 and 0.19; hence, any differences due to the population-level effects of gender and treatment (i.e. differences in the black line across the four panels of the plot) are swamped by the width of the uncertainty intervals (i.e. the grey shaded areas).

Standardised survival curves

In this example we show how a standardised survival curve can be obtained, where the \(i = 1,...,N^{pred}\) individuals used in generating the standardised survival curve are the same individuals that were used in estimating the model. We will obtain the survival curve for the multivariate joint model estimated in an earlier example (mod3). The standardise = TRUE argument to posterior_survfit specifies that we want to obtain individual-specific predictions of the survival curve and then average these. Because, in practical terms, we need to obtain survival probabilities at time \(t\) for each individual and then average them we want to explicitly specify the values of \(t\) we want to use (and the same values of \(t\) will be used for individuals). We specify the values of \(t\) to use via the times argument; here we will predict the standardised survival curve at time 0 and then for convenience we can just specify extrapolate = TRUE (which is the default anyway) which will mean we automatically predict at 10 evenly spaced time points between 0 and the maximum event or censoring time.

    year survpred  ci_lb  ci_ub
1 0.0000   1.0000 1.0000 1.0000
2 1.0154   0.8321 0.7655 0.8831
3 2.0307   0.7334 0.6598 0.7843
4 3.0461   0.6847 0.6159 0.7350
5 4.0614   0.6423 0.5737 0.6925
6 5.0768   0.6009 0.5329 0.6495

References

  1. Henderson R, Diggle P, Dobson A. Joint modelling of longitudinal measurements and event time data. Biostatistics 2000;1(4):465-80.
  2. Wulfsohn MS, Tsiatis AA. A joint model for survival and longitudinal data measured with error. Biometrics 1997;53(1):330-9.
  3. Tsiatis AA, Davidian M. Joint modeling of longitudinal and time-to-event data: An overview. Stat Sinica 2004;14(3):809-34.
  4. Gould AL, Boye ME, Crowther MJ, Ibrahim JG, Quartey G, Micallef S, et al. Joint modeling of survival and longitudinal non-survival data: current methods and issues. Report of the DIA Bayesian joint modeling working group. Stat Med. 2015;34(14):2181-95.
  5. Rizopoulos D. Joint Models for Longitudinal and Time-to-Event Data: With Applications in R CRC Press; 2012.
  6. Liu G, Gould AL. Comparison of alternative strategies for analysis of longitudinal trials with dropouts. J Biopharm Stat 2002;12(2):207-26.
  7. Prentice RL. Covariate Measurement Errors and Parameter-Estimation in a Failure Time Regression-Model. Biometrika 1982;69(2):331-42.
  8. Baraldi AN, Enders CK. An introduction to modern missing data analyses. J Sch Psychol 2010;48(1):5-37.
  9. Philipson PM, Ho WK, Henderson R. Comparative review of methods for handling drop-out in longitudinal studies. Stat Med 2008;27(30):6276-98.
  10. Pantazis N, Touloumi G. Bivariate modelling of longitudinal measurements of two human immunodeficiency type 1 disease progression markers in the presence of informative drop-outs. Applied Statistics 2005;54:405-23.
  11. Taylor JM, Park Y, Ankerst DP, et al. Real-time individual predictions of prostate cancer recurrence using joint models. Biometrics 2013;69(1):206-13.
  12. Brilleman SL, Crowther MJ, Moreno-Betancur M, Buros Novik J, Wolfe R. Joint longitudinal and time-to-event models via Stan. In: Proceedings of StanCon 2018. https://github.com/stan-dev/stancon_talks
  13. Stan Development Team. rstanarm: Bayesian applied regression modeling via Stan. R package version 2.14.1. https://mc-stan.org/. 2016.
  14. R Core Team. R: A language and environment for statistical computing. Vienna, Austria: R Foundation for Statistical Computing; 2015.
  15. Crowther MJ, Lambert PC, Abrams KR. Adjusting for measurement error in baseline prognostic biomarkers included in a time-to-event analysis: a joint modelling approach. BMC Med Res Methodol 2013;13.
  16. Hickey GL, Philipson P, Jorgensen A, Kolamunnage-Dona R. Joint modelling of time-to-event and multivariate longitudinal outcomes: recent developments and issues. BMC Med Res Methodol 2016;16(1):117.
  17. Rizopoulos D, Ghosh P. A Bayesian semiparametric multivariate joint model for multiple longitudinal outcomes and a time-to-event. Stat Med. 2011;30(12):1366-80.
  18. Laurie DP. Calculation of Gauss-Kronrod quadrature rules. Math Comput 1997;66(219):1133-45.
  19. Rizopoulos D. Dynamic Predictions and Prospective Accuracy in Joint Models for Longitudinal and Time-to-Event Data. Biometrics 2011;67(3):819-829.
  20. Therneau T, Grambsch P. Modeling Survival Data: Extending the Cox Model Springer-Verlag, New York; 2000. ISBN: 0-387-98784-3

  1. By “true” value of the biomarker, we mean the value of the biomarker which is not subject to measurement error or discrete time observation. Of course, for the expected value from the longitudinal submodel to be considered the so-called “true” underlying biomarker value, we would need to have specified the longitudinal submodel appropriately!

  2. We refer the reader to the priors vignette for a discussion of the possible prior distributions.

  3. These random draws from the posterior distribution of the group-specific parameters are stored each time a joint model is estimated using stan_glmer, stan_mvmer, or stan_jm; they are saved under an ID value called "_NEW_"

rstanarm/inst/doc/glmer.html0000644000176200001440000042536114551550347015643 0ustar liggesusers Estimating Generalized (Non-)Linear Models with Group-Specific Terms with rstanarm

Estimating Generalized (Non-)Linear Models with Group-Specific Terms with rstanarm

Jonah Gabry and Ben Goodrich

2024-01-16

Introduction

This vignette explains how to use the stan_lmer, stan_glmer, stan_nlmer, and stan_gamm4 functions in the rstanarm package to estimate linear and generalized (non-)linear models with parameters that may vary across groups. Before continuing, we recommend reading the vignettes (navigate up one level) for the various ways to use the stan_glm function. The Hierarchical Partial Pooling vignette also has examples of both stan_glm and stan_glmer.

GLMs with group-specific terms

Models with this structure are refered to by many names: multilevel models, (generalized) linear mixed (effects) models (GLMM), hierarchical (generalized) linear models, etc. In the simplest case, the model for an outcome can be written as \[\mathbf{y} = \alpha + \mathbf{X} \boldsymbol{\beta} + \mathbf{Z} \mathbf{b} + \boldsymbol{\epsilon},\] where \(\mathbf{X}\) is a matrix predictors that is analogous to that in Generalized Linear Models and \(\mathbf{Z}\) is a matrix that encodes deviations in the predictors across specified groups.

The terminology for the unknowns in the model is diverse. To frequentists, the error term consists of \(\mathbf{Z}\mathbf{b} + \boldsymbol{\epsilon}\) and the observations within each group are not independent conditional on \(\mathbf{X}\) alone. Since, \(\mathbf{b}\) is considered part of the random error term, frequentists allow themselves to make distributional assumptions about \(\mathbf{b}\), invariably that it is distributed multivariate normal with mean vector zero and structured covariance matrix \(\boldsymbol{\Sigma}\). If \(\epsilon_i\) is also distributed (univariate) normal with mean zero and standard deviation \(\sigma\), then \(\mathbf{b}\) can be integrated out, which implies \[\mathbf{y} \thicksim \mathcal{N}\left(\alpha + \mathbf{X}\boldsymbol{\beta}, \sigma^2 \mathbf{I}+\mathbf{Z}^\top \boldsymbol{\Sigma} \mathbf{Z} \right),\] and it is possible to maximize this likelihood function by choosing proposals for the parameters \(\alpha\), \(\boldsymbol{\beta}\), and (the free elements of) \(\boldsymbol{\Sigma}\).

Consequently, frequentists refer to \(\mathbf{b}\) as the random effects because they capture the random deviation in the effects of predictors from one group to the next. In contradistinction, \(\alpha\) and \(\boldsymbol{\beta}\) are referred to as fixed effects because they are the same for all groups. Moreover, \(\alpha\) and \(\boldsymbol{\beta}\) persist in the model in hypothetical replications of the analysis that draw the members of the groups afresh every time, whereas \(\mathbf{b}\) would differ from one replication to the next. Consequently, \(\mathbf{b}\) is not a “parameter” to be estimated because parameters are unknown constants that are fixed in repeated sampling.

Bayesians condition on the data in-hand without reference to repeated sampling and describe their beliefs about the unknowns with prior distributions before observing the data. Thus, the likelihood in a simple hierarchical model in rstarnarm is \[\mathbf{y} \thicksim \mathcal{N}\left(\alpha + \mathbf{X}\boldsymbol{\beta} + \mathbf{Z}\mathbf{b}, \sigma^2 \mathbf{I}\right)\] and the observations are independent conditional on \(\mathbf{X}\) and \(\mathbf{Z}\). In this formulation, there are

  • intercept(s) and coefficients that are common across groups
  • deviations in the intercept(s) and / or coefficients that vary across groups

Bayesians are compelled to state their prior beliefs about all unknowns and the usual assumption (which is maintained in rstanarm) is that \(\mathbf{b} \thicksim \mathcal{N}\left(\mathbf{0},\boldsymbol{\Sigma}\right),\) but it is then necessary to state prior beliefs about \(\boldsymbol{\Sigma}\), in addition to \(\alpha\), \(\boldsymbol{\beta}\), and \(\sigma\).

One of the many challenges of fitting models to data comprising multiple groupings is confronting the tradeoff between validity and precision. An analysis that disregards between-group heterogeneity can yield parameter estimates that are wrong if there is between-group heterogeneity but would be relatively precise if there actually were no between-group heterogeneity. Group-by-group analyses, on the other hand, are valid but produces estimates that are relatively imprecise. While complete pooling or no pooling of data across groups is sometimes called for, models that ignore the grouping structures in the data tend to underfit or overfit (Gelman et al.,2013). Hierarchical modeling provides a compromise by allowing parameters to vary by group at lower levels of the hierarchy while estimating common parameters at higher levels. Inference for each group-level parameter is informed not only by the group-specific information contained in the data but also by the data for other groups as well. This is commonly referred to as borrowing strength or shrinkage.

In rstanarm, these models can be estimated using the stan_lmer and stan_glmer functions, which are similar in syntax to the lmer and glmer functions in the lme4 package. However, rather than performing (restricted) maximum likelihood (RE)ML estimation, Bayesian estimation is performed via MCMC. The Bayesian model adds independent prior distributions on the regression coefficients (in the same way as stan_glm) as well as priors on the terms of a decomposition of the covariance matrices of the group-specific parameters. These priors are discussed in greater detail below.

Priors on covariance matrices

In this section we discuss a flexible family of prior distributions for the unknown covariance matrices of the group-specific coefficients.

Overview

For each group, we assume the vector of varying slopes and intercepts is a zero-mean random vector following a multivariate Gaussian distribution with an unknown covariance matrix to be estimated. Unfortunately, expressing prior information about a covariance matrix is not intuitive and can also be computationally challenging. When the covariance matrix is not \(1\times 1\), it is often both much more intuitive and efficient to work instead with the correlation matrix and variances. When the covariance matrix is \(1\times 1\), we still denote it as \(\boldsymbol{\Sigma}\) but most of the details in this section do not apply.

The variances are in turn decomposed into the product of a simplex vector (probability vector) and the trace of the implied covariance matrix, which is defined as the sum of its diagonal elements. Finally, this trace is set equal to the product of the order of the matrix and the square of a scale parameter. This implied prior on a covariance matrix is represented by the decov (short for decomposition of covariance) function in rstanarm.

Details

Using the decomposition described above, the prior used for a correlation matrix \(\Omega\) is called the LKJ distribution and has a probability density function proportional to the determinant of the correlation matrix raised to a power of \(\zeta\) minus one:

\[ f(\Omega | \zeta) \propto \text{det}(\Omega)^{\zeta - 1}, \quad \zeta > 0. \]

The shape of this prior depends on the value of the regularization parameter, \(\zeta\) in the following ways:

  • If \(\zeta = 1\) (the default), then the LKJ prior is jointly uniform over all correlation matrices of the same dimension as \(\Omega\).
  • If \(\zeta > 1\), then the mode of the distribution is the identity matrix. The larger the value of \(\zeta\) the more sharply peaked the density is at the identity matrix.
  • If \(0 < \zeta < 1\), then the density has a trough at the identity matrix.

The \(J \times J\) covariance matrix \(\Sigma\) of a random vector \(\boldsymbol{\theta} = (\theta_1, \dots, \theta_J)\) has diagonal entries \({\Sigma}_{jj} = \sigma^2_j = \text{var}(\theta_j)\). Therefore, the trace of the covariance matrix is equal to the sum of the variances. We set the trace equal to the product of the order of the covariance matrix and the square of a positive scale parameter \(\tau\):

\[\text{tr}(\Sigma) = \sum_{j=1}^{J} \Sigma_{jj} = J\tau^2.\]

The vector of variances is set equal to the product of a simplex vector \(\boldsymbol{\pi}\) — which is non-negative and sums to 1 — and the scalar trace: \(J \tau^2 \boldsymbol{\pi}\). Each element \(\pi_j\) of \(\boldsymbol{\pi}\) then represents the proportion of the trace (total variance) attributable to the corresponding variable \(\theta_j\).

For the simplex vector \(\boldsymbol{\pi}\) we use a symmetric Dirichlet prior, which has a single concentration parameter \(\gamma > 0\):

  • If \(\gamma = 1\) (the default), then the prior is jointly uniform over the space of simplex vectors with \(J\) elements.
  • If \(\gamma > 1\), then the prior mode corresponds to all variables having the same (proportion of total) variance, which can be used to ensure that the posterior variances are not zero. As the concentration parameter approaches infinity, this mode becomes more pronounced.
  • If \(0 < \gamma < 1\), then the variances are more polarized.

If all the elements of \(\boldsymbol{\theta}\) were multiplied by the same number \(k\), the trace of their covariance matrix would increase by a factor of \(k^2\). For this reason, it is sensible to use a scale-invariant prior for \(\tau\). We choose a Gamma distribution, with shape and scale parameters both set to \(1\) by default, implying a unit-exponential distribution. Users can set the shape hyperparameter to some value greater than one to ensure that the posterior trace is not zero. In the case where \(\boldsymbol{\Sigma}\) is \(1\times 1\), \(\tau\) is the cross-group standard deviation in the parameters and its square is the variance (so the Gamma prior with its shape and scale directly applies to the cross-group standard deviation in the parameters).

Comparison with lme4

There are several advantages to estimating these models using rstanarm rather than the lme4 package. There are also a few drawbacks. In this section we briefly discuss what we find to be the two most important advantages as well as an important disadvantage.

Advantage: better uncertainty estimates

While lme4 uses (restricted) maximum likelihood (RE)ML estimation, rstanarm enables full Bayesian inference via MCMC to be performed. It is well known that (RE)ML tends to underestimate uncertainties because it relies on point estimates of hyperparameters. Full Bayes, on the other hand, propagates the uncertainty in the hyperparameters throughout all levels of the model and provides more appropriate estimates of uncertainty for models that consist of a mix of common and group-specific parameters.

Advantage: incorporate prior information

The stan_glmer and stan_lmer functions allow the user to specify prior distributions over the regression coefficients as well as any unknown covariance matrices. There are various reasons to specify priors, from helping to stabilize computation to incorporating important information into an analysis that does not enter through the data.

Disadvantage: speed

The benefits of full Bayesian inference (via MCMC) come with a cost. Fitting models with (RE)ML will tend to be much faster than fitting a similar model using MCMC. Speed comparable to lme4 can be obtained with rstanarm using approximate Bayesian inference via the mean-field and full-rank variational algorithms (see help("rstanarm-package", "rstanarm") for details). These algorithms can be useful to narrow the set of candidate models in large problems, but MCMC should always be used for final statistical inference.

Relationship to glmer

In the lme4 package, there is a fundamental distinction between the way that Linear Mixed Models and Generalized Linear Mixed Models are estimated. In Linear Mixed Models, \(\mathbf{b}\) can be integrated out analytically, leaving a likelihood function that can be maximized over proposals for the parameters. To estimate a Linear Mixed Model, one can call the lmer function.

Generalized Linear Mixed Models are appropriate when the conditional mean of the outcome is determined by an inverse link function, \(\boldsymbol{\mu} = g\left(\alpha + \mathbf{X} \boldsymbol{\beta} + \mathbf{Z}\mathbf{b}\right)\). If \(g\left(\cdot\right)\) is not the identity function, then it is not possible to integrate out \(\mathbf{b}\) analytically and numerical integration must be used. To estimate a Generalized Linear Mixed Model, one can call the glmer function and specify the family argument.

In the rstanarm package, there is no such fundamental distinction; in fact stan_lmer simply calls stan_glmer with family = gaussian(link = "identity"). Bayesians do not (have to) integrate \(\mathbf{b}\) out of the likelihood and if \(\mathbf{b}\) is not of interest, then the margins of its posterior distribution can simply be ignored.

Relationship to gamm4

The rstanarm package includes a stan_gamm4 function that is similar to the gamm4 function in the gamm4 package, which is in turn similar to the gamm function in the mgcv package. The substring gamm stands for Generalized Additive Mixed Models, which differ from Generalized Additive Models (GAMs) due to the presence of group-specific terms that can be specified with the syntax of lme4. Both GAMs and GAMMs include nonlinear functions of (non-categorical) predictors called “smooths”. In the example below, so-called “thin-plate splines” are used to model counts of roaches where we might fear that the number of roaches in the current period is an exponentially increasing function of the number of roaches in the previous period. Unlike stan_glmer, in stan_gamm4 it is necessary to specify group-specific terms as a one-sided formula that is passed to the random argument as in the lme function in the nlme package.

Here we see that the relationship between past and present roaches is estimated to be nonlinear. For a small number of past roaches, the function is steep and then it appears to flatten out, although we become highly uncertain about the function in the rare cases where the number of past roaches is large.

Relationship to nlmer

The stan_gamm4 function allows designated predictors to have a nonlinear effect on what would otherwise be called the “linear” predictor in Generalized Linear Models. The stan_nlmer function is similar to the nlmer function in the lme4 package, and essentially allows a wider range of nonlinear functions that relate the linear predictor to the conditional expectation of a Gaussian outcome.

To estimate an example model with the nlmer function in the lme4 package, we start by rescaling the outcome and main predictor(s) by a constant

Although doing so has no substantive effect on the inferences obtained, it is numerically much easier for Stan and for lme4 to work with variables whose units are such that the estimated parameters tend to be single-digit numbers that are not too close to zero. The nlmer function requires that the user pass starting values to the ironically-named self-starting non-linear function:

Warning in vcov.merMod(object, use.hessian = use.hessian): variance-covariance matrix computed from finite-difference Hessian is
not positive definite or contains NA values: falling back to var-cov estimated from RX
Warning in vcov.merMod(object, correlation = correlation, sigm = sig): variance-covariance matrix computed from finite-difference Hessian is
not positive definite or contains NA values: falling back to var-cov estimated from RX
Nonlinear mixed model fit by maximum likelihood  ['nlmerMod']
Formula: circumference ~ SSlogis(age, Asym, xmid, scal) ~ Asym | Tree
   Data: Orange

     AIC      BIC   logLik deviance df.resid 
   -49.2    -41.4     29.6    -59.2       30 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-1.9170 -0.5421  0.1754  0.7116  1.6820 

Random effects:
 Groups   Name Variance Std.Dev.
 Tree     Asym 0.100149 0.31646 
 Residual      0.006151 0.07843 
Number of obs: 35, groups:  Tree, 5

Fixed effects:
     Estimate Std. Error t value
Asym   1.9205     0.1558   12.32
xmid   7.2791     0.3444   21.14
scal   3.4807     0.2631   13.23

Correlation of Fixed Effects:
     Asym  xmid 
xmid 0.384      
scal 0.362 0.762

Note the warning messages indicating difficulty estimating the variance-covariance matrix. Although lme4 has a fallback mechanism, the need to utilize it suggests that the sample is too small to sustain the asymptotic assumptions underlying the maximum likelihood estimator.

In the above example, we use the SSlogis function, which is a lot like the logistic CDF, but with an additional Asym argument that need not be one and indicates what value the function approaches for large values of the first argument. In this case, we can interpret the asymptote as the maximum possible circumference for an orange. However, this asymptote is allowed to vary from tree to tree using the Asym | Tree syntax, which reflects an assumption that the asymptote for a randomly-selected tree deviates from the asymptote for the population of orange trees in a Gaussian fashion with mean zero and an unknown standard deviation.

The nlmer function supports user-defined non-linear functions, whereas the stan_nlmer function only supports the pre-defined non-linear functions starting with SS in the stats package, which are

 [1] "SSasymp"     "SSasympOff"  "SSasympOrig" "SSbiexp"     "SSfol"      
 [6] "SSfpl"       "SSgompertz"  "SSlogis"     "SSmicmen"    "SSweibull"  

To fit essentially the same model using Stan’s implementation of MCMC, we add a stan_ prefix

stan_nlmer
 family:       gaussian [inv_SSlogis]
 formula:      circumference ~ SSlogis(age, Asym, xmid, scal) ~ Asym | Tree
 observations: 35
------
     Median MAD_SD
Asym 1.9    0.1   
xmid 7.2    0.4   
scal 3.4    0.3   

Auxiliary parameter(s):
      Median MAD_SD
sigma 0.1    0.0   

Error terms:
 Groups   Name Std.Dev.
 Tree     Asym 0.311   
 Residual      0.089   
Num. levels: Tree 5 

------
* For help interpreting the printed output see ?print.stanreg
* For info on the priors used see ?prior_summary.stanreg

In stan_nlmer, it is not necessary to supply starting values; however, in this case it was necessary to specify the init_r argument so that the randomly-chosen starting values were not more than \(0.5\) away from zero (in the unconstrained parameter space). The default value of \(2.0\) produced suboptimal results.

As can be seen, the posterior medians and estimated standard deviations in the MCMC case are quite similar to the maximum likelihood estimates and estimated standard errors. However, stan_nlmer produces uncertainty estimates for the tree-specific deviations in the asymptote, which are considerable.

As can be seen, the age of the tree has a non-linear effect on the predicted circumference of the tree (here for a out-of-sample tree):

If we were pharmacological, we could evaluate drug concentration using a first-order compartment model, such as

However, in this case the posterior distribution is bimodal Thus, you should always be running many chains when using Stan, especially stan_nlmer.

Conclusion

There are model fitting functions in the rstanarm package that can do essentially all of what can be done in the lme4 and gamm4 packages — in the sense that they can fit models with multilevel structure and / or nonlinear relationships — and propagate the uncertainty in the parameter estimates to the predictions and other functions of interest. The documentation of lme4 and gamm4 has various warnings that acknowledge that the estimated standard errors, confidence intervals, etc. are not entirely correct, even from a frequentist perspective.

A frequentist point estimate would also completely miss the second mode in the last example with stan_nlmer. Thus, there is considerable reason to prefer the rstanarm variants of these functions for regression modeling. The only disadvantage is the execution time required to produce an answer that properly captures the uncertainty in the estimates of complicated models such as these.

rstanarm/inst/doc/interaction.rda0000644000176200001440000000113313540753420016633 0ustar liggesusers r0b```b`fcd`b2Y# '+I-JL.c`` i^ -FfĎQfI-K) %yə%PfT490pҀ/1F;0h-FK24 B>$<w:ho`u.ۂ%OLV:? KH9_}y'2˾ܛ}So?H~fWbJ{ы]OV4;2ͺI,=mmKק-~u(n ɰR[O\5%'hՕV=!mJ5]_s5zÙ;oU_rsܭ x.~iuO;Ϳ9~vUدs=38>-v_%ylv"+-.?_ԙ1&AaO=}߬~.Гs^bn*,9B92rRaĒ"X`M˱U&d*SЬ,/׃YL @o CgY˃@ɕXV4fz8frstanarm/inst/doc/polr.Rmd0000644000176200001440000003134713722762571015267 0ustar liggesusers--- title: "Estimating Ordinal Regression Models with rstanarm" author: "Jonah Gabry and Ben Goodrich" date: "`r Sys.Date()`" output: html_vignette: toc: yes --- ```{r, child="children/SETTINGS-knitr.txt"} ``` ```{r, child="children/SETTINGS-gg.txt"} ``` # Introduction This vignette explains how to estimate models for ordinal outcomes using the `stan_polr` function in the __rstanarm__ package. ```{r, child="children/four_steps.txt"} ``` Steps 3 and 4 are covered in more depth by the vignette entitled ["How to Use the __rstanarm__ Package"](rstanarm.html). This vignette focuses on Step 1. One of the strengths of doing MCMC with Stan --- as opposed to a Gibbs sampler --- is that reparameterizations are essentially costless, which allows the user to specify priors on parameters that are either more intuitive, numerically stable, or computationally efficient without changing the posterior distribution of the parameters that enter the likelihood. Advantageous parameterizations are already built into the Stan programs used in the __rstanarm__ package, so it is just a matter of using these vignettes to explain how the priors work in the context of these reparameterizations. # Likelihood Ordinal outcomes fall in one of $J$ categories. One way to motivate an ordinal model is to introduce a latent variable, $y^\ast$, that is related to the observed outcomes via an observation mechanism: $$y=\begin{cases} 1 & \mbox{if }y^{\ast}<\zeta_{1}\\ 2 & \mbox{if }\zeta_{1}\leq y^{\ast}<\zeta_{2}\\ \vdots\\ J & \mbox{if }\zeta_{J-1}\leq y^{\ast} \end{cases},$$ where $\boldsymbol{\zeta}$ is a vector of cutpoints of length $J-1$. Then $y^\ast$ is modeled as a linear function of $K$ predictors $$y^\ast = \mu + \epsilon = \mathbf{x}^\top \boldsymbol{\beta} + \epsilon,$$ where $\epsilon$ has mean zero and unit scale but can be specified as being drawn from one of several distributions. Note that there is no "intercept" in this model since the data cannot distinguish an intercept from the cutpoints. However, if $J = 2$, then $\zeta_1$ can be referred to as either the cutpoint or the intercept. A Bayesian can treat $y^\ast$ as another unknown parameter, although for computational efficiency the Stan code essentially integrates each $y^\ast$ out of the posterior distribution, leaving the posterior distribution of $\boldsymbol{\beta}$ and $\boldsymbol{\zeta}$. Nevertheless, it is useful to motivate the model theoretically as if $y^\ast$ were just an unknown parameter with a distribution truncated by the relevant element(s) of $\boldsymbol{\zeta}$. # Priors If $y^\ast$ were observed we would simply have a linear regression model for it, and the description of the priors in the vignette entitled ["Estimating Linear Models with the __rstanarm__ Package"](lm.html) would apply directly. Another way to say the same thing is _conditional_ on a realization of $y^\ast$, we have a linear regression model and the description of the priors in the other [vignette](lm.html) does apply (and should be read before continuing with this subsection). The `stan_lm` function essentially specifies a prior on $\boldsymbol{\theta} = \mathbf{R}^{-1} \boldsymbol{\beta}$, where $\mathbf{R}$ is the upper triangular matrix in the QR decomposition of the design matrix, $\mathbf{X} = \mathbf{Q} \mathbf{R}$. Furthermore, in `stan_lm`, $\sigma_{\epsilon} = \sigma_y \sqrt{1 - R^2}$ where $R^2$ is the proportion of variance in the outcome that is attributable to the coefficients in a linear model. The main difference in the context of a model for an ordinal outcome is that the scale of $y^\ast$ is not identified by the data. Thus, the ordinal model specifies that $\sigma_{\epsilon} = 1$, which implies that $\sigma_{y^\ast} = 1 / \sqrt{1 - R^2}$ is an intermediate parameter rather than a primitive parameter. It is somewhat more difficult to specify a prior value for the $R^2$ in an ordinal model because $R^2$ refers to the proportion of variance in the \emph{unobservable} $y^\ast$ that is attributable to the predictors under a linear model. In general, the $R^2$ tends to be lower in an ordinal model than in a linear model where the continuous outcome is observed. The other difference is that an ordinal model does not have a global intercept but rather a vector of $J-1$ cutpoints. The implied prior on these cutpoints used by the __rstanarm__ package is somewhat novel. The user instead specifies a Dirichlet prior on $\Pr\left(y=j \, \left.\right| \, \overline{\mathbf{x}} \right)$, which is to say the prior probability of the outcome falling in each of the $J$ categories given that the predictors are at their sample means. The Dirichlet prior is for a simplex random variable, whose elements are non-negative and sum to $1$. The Dirichlet PDF can be written as $$f\left(\boldsymbol{\pi}|\boldsymbol{\alpha}\right) \propto \prod_{j=1}^J{\pi_j^{\alpha_j - 1}}, $$ where $\boldsymbol{\pi}$ is a simplex vector such that $\pi_j = \Pr\left(y=j \, \left.\right| \, \overline{\mathbf{x}} \right)$. The Dirichlet prior is one of the easiest to specify because the so-called "concentration" hyperparameters $\boldsymbol{\alpha}$ can be interpreted as prior counts, i.e., prior observations for each of the J categories (although they need not be integers). If $\alpha_j = 1$ for every $j$ (the default used by __rstanarm__) then the Dirichlet prior is jointly uniform over the space of these simplexes. This corresponds to a prior count of one observation falling in each of the $J$ ordinal categories when the predictors are at their sample means and conveys the reasonable but weak prior information that no category has probability zero. If, for each $j$, $\alpha_j = \alpha > 1$ then the prior mode is that the $J$ categories are equiprobable, with prior probability $1/J$ of the outcome falling in each of the $J$ categories. The larger the value of $\alpha$ the more sharply peaked the distribution is at the mode. The $j$-th cutpoint $\zeta_j$ is then given by $$\zeta_j = F_{y^\ast}^{-1}\left(\sum_{i=1}^j{\pi_i}\right),$$ where $F_{y^\ast}^{-1}$ is an inverse CDF function, which depends on the assumed distribution of $y^\ast$. Common choices include the normal and logistic distributions. The scale parameter of this distribution is again $\sigma_{y^\ast} = 1/\sqrt{1 - R^2}$. In short, by making each $\zeta_j$ a function of $\boldsymbol{\pi}$, it allows us to specify a Dirichlet prior on $\boldsymbol{\pi}$, which is simpler than specifying a prior on $\boldsymbol{\zeta}$ directly. # Example In this section, we start with an ordinal model of tobacco consumption as a function of age and alcohol consumption. Frequentist estimates can be obtained using the `polr` function in the __MASS__ package: ```{r polr-tobgp-mass} library(MASS) print(polr(tobgp ~ agegp + alcgp, data = esoph), digits = 1) ``` To obtain Bayesian estimates, we prepend `stan_` and specify the priors: ```{r polr-tobgp-mcmc, results="hide"} library(rstanarm) post0 <- stan_polr(tobgp ~ agegp + alcgp, data = esoph, prior = R2(0.25), prior_counts = dirichlet(1), seed = 12345) ``` ```{r} print(post0, digits = 1) ``` ```{r, polr-tobgp-cutpoints, echo=FALSE} zeta_medians <- round(apply(rstan::extract(post0$stanfit, pars = "zeta")[[1]], 2, median), digits = 2) ``` The point estimates, represented by the posterior medians, are qualitatively similar to the maximum-likelihood estimates but are somewhat shrunk toward zero due to the regularizing prior on the coefficients. Since these cutpoints are actually _known_, it would be more appropriate for the model to take that into account, but `stan_polr` does not currently support that. Next, we utilize an example from the __MASS__ package where low birthweight is the binary outcome of interest. First, we recode some of the variables: ```{r polr-birthwt-recodes} data("birthwt", package = "MASS") birthwt$race <- factor(birthwt$race, levels = 1:3, labels = c("white", "black", "other")) birthwt$bwt <- birthwt$bwt / 1000 # convert from grams to kilograms birthwt$low <- factor(birthwt$low, levels = 0:1, labels = c("no", "yes")) ``` It is usually a good idea to rescale variables by constants so that all the numbers are in single or double digits. We start by estimating a linear model for birthweight in kilograms, flipping the sign so that positive coefficients are associated with _lower_ birthweights. ```{r polr-stan_lm, results="hide"} post1 <- stan_lm(-bwt ~ smoke + age + race + ptl + ht + ftv, data = birthwt, prior = R2(0.5), seed = 12345) ``` ```{r} print(post1) ``` Next, we estimate an "ordinal" model for the incidence of low birthweight, which is defined as a birth weight of less than $2.5$ kilograms. Even though this outcome is binary, a binary variable is a special case of an ordinal variable with $J=2$ categories and is acceptable to `stan_polr`. We can think of `bwt` as something proportional to $y^\ast$ and pretend that it is not observed, forcing us to estimate an ordinal model. ```{r polr-birthwt-mcmc, results="hide"} post2 <- stan_polr(low ~ smoke + age + race + ptl + ht + ftv, data = birthwt, prior = R2(0.5), prior_counts = dirichlet(c(1,1)), method = "probit", seed = 12345) ``` ```{r, polr-loo-plot} plot(loo(post2)) ``` This prior seems to have worked well in this case because none of the points in the plot are above $0.5$, which would have indicated the the posterior is very sensitive to those observations. If we compare the estimated coefficients, ```{r polr-birthwt-comparison} round(cbind(Linear = coef(post1), Ordinal = coef(post2), Rescaled = coef(post1) / sigma(post1)), 3) ``` they have the same signs and similar magnitudes, with the exception of the "Intercept". In an ordinal model where the outcome only has $J=2$ categories, this "Intercept" is actually $\zeta_1$, but it is more conventional to call it the "Intercept" so that it agrees with `stan_glm` when `family = binomial(link = 'probit')`. Recall that $\sigma_{\epsilon} = 1$ in an ordinal model, so if we rescale the coefficients from a linear model by dividing by the posterior median of $\sigma$, the resulting coefficients are even closer to those of the ordinal model. This illustrates the fundamental similarity between a linear model for a continuous observed outcome and a linear model for a latent $y^\ast$ that generates an ordinal observed outcome. The main difference is when the outcome is continuous and observed, we can estimate the scale of the errors meaningfully. When the outcome is ordinal, we can only fix the scale of the latent errors to $1$ arbitrarily. Finally, when $J = 2$, the `stan_polr` function allows you to specify non-`NULL` values of the `shape` and `rate` arguments, which implies a "scobit" likelihood where the probability of success is given by $F\left(y^\ast \right)^\alpha$, where $F\left(\right)$ is the logistic CDF and $\alpha > 0$ is a skewing parameter that has a gamma prior with a given `shape` and `rate`. If $\alpha \neq 1$, then the relationship between $y^\ast$ and the probability of success is asymmetric. In principle, it seems appropriate to estimate $\alpha$ but in practice, a lot of data is needed to estimate $\alpha$ with adequate precision. In the previous example, if we specify `shape = 2` and `rate = 2` to reflect the prior beliefs that $\alpha$ is expected to be $1$ but has a variance of $\frac{1}{2}$, then the `loo` calculation yields many Pareto shape parameters that are excessively large. However, with more than $189$ observations, such a model may be more fruitful. # Conclusion The posterior distribution for an ordinal model requires priors on the coefficients and the cutpoints. The priors used by the `stan_polr` function are unconventional but should work well for a variety of problems. The prior on the coefficients is essentially the same as that used by the `stan_lm` function but omits a scale parameter because the standard deviation of the latent $y^\ast$ is not identified by the data. The cutpoints are conditionally deterministic given a simplex vector for the probability of falling in each of the $J$ ordinal categories given that the predictors are at their sample means. Thus, a Dirichlet prior --- which is relatively easy to specify and has a good default of jointly uniform --- on this simplex completes the posterior distribution. This approach provides an alternative to `stan_glm` with `family = binomial()` even if the outcome variable has only two categories. The `stan_glm` function has more options for the prior on the coefficients and the prior on the intercept (which can be interpreted as the first cutpoint when $J = 2$). However, it may be more difficult to obtain efficient sampling with those priors. rstanarm/inst/doc/rstanarm.html0000644000176200001440000055726514551551771016377 0ustar liggesusers How to Use the rstanarm Package

How to Use the rstanarm Package

Jonah Gabry and Ben Goodrich

2024-01-16

Introduction

This vignette provides an overview of how to use the functions in the rstanarm package that focuses on commonalities. The other rstanarm vignettes go into the particularities of each of the individual model-estimating functions.

The goal of the rstanarm package is to make Bayesian estimation routine for the most common regression models that applied researchers use. This will enable researchers to avoid the counter-intuitiveness of the frequentist approach to probability and statistics with only minimal changes to their existing R scripts.

The four steps of a Bayesian analysis are

  1. Specify a joint distribution for the outcome(s) and all the unknowns, which typically takes the form of a marginal prior distribution for the unknowns multiplied by a likelihood for the outcome(s) conditional on the unknowns. This joint distribution is proportional to a posterior distribution of the unknowns conditional on the observed data
  2. Draw from posterior distribution using Markov Chain Monte Carlo (MCMC).
  3. Evaluate how well the model fits the data and possibly revise the model.
  4. Draw from the posterior predictive distribution of the outcome(s) given interesting values of the predictors in order to visualize how a manipulation of a predictor affects (a function of) the outcome(s).

Step 1 is necessarily model-specific and is covered in more detail in the other vignettes that cover specific forms of the marginal prior distribution and likelihood of the outcome. It is somewhat more involved than the corresponding first step of a frequentist analysis, which only requires that the likelihood of the outcome be specified. However, the default priors in the rstanarm package should work well in the majority of cases. Steps 2, 3, and 4 are the focus of this vignette because they are largely not specific to how the joint distribution in Step 1 is specified.

The key concept in Step 3 and Step 4 is the posterior predictive distribution, which is the distribution of the outcome implied by the model after having used the observed data to update our beliefs about the unknown parameters. Frequentists, by definition, have no posterior predictive distribution and frequentist predictions are subtly different from what applied researchers seek. Maximum likelihood estimates do not condition on the observed outcome data and so the uncertainty in the estimates pertains to the variation in the sampling distribution of the estimator, i.e. the distribution of estimates that would occur if we could repeat the process of drawing a random sample from a well-defined population and apply the estimator to each sample. It is possible to construct a distribution of predictions under the frequentist paradigm but it evokes the hypothetical of repeating the process of drawing a random sample, applying the estimator each time, and generating point predictions of the outcome. In contrast, the posterior predictive distribution conditions on the observed outcome data in hand to update beliefs about the unknowns and the variation in the resulting distribution of predictions reflects the remaining uncertainty in our beliefs about the unknowns.

Step 1: Specify a posterior distribution

For the sake of discussion, we need some posterior distribution to draw from. We will utilize an example from the HSAUR3 package by Brian S. Everitt and Torsten Hothorn, which is used in their 2014 book A Handbook of Statistical Analyses Using R (3rd Edition) (Chapman & Hall / CRC). This book is frequentist in nature and we will show how to obtain the corresponding Bayesian results.

The model in section 6.3.2 pertains to whether a survey respondent agrees or disagrees with a conservative statement about the role of women in society, which is modeled as a function of the gender and education of the respondents. The posterior distribution — with independent priors — can be written as \[f\left(\alpha,\beta_1,\beta_2|\mathbf{y},\mathbf{X}\right) \propto f\left(\alpha\right) f\left(\beta_1\right) f\left(\beta_2\right) \times \prod_{i=1}^J { g^{-1}\left(\eta_i\right)^{y_i} \left(1 - g^{-1}\left(\eta_i\right)\right)^{n_i-y_i}},\] where \(\eta_i = \alpha + \beta_1 \mbox{education}_i + \beta_2 \mbox{Female}_i\) is the linear predictor and a function of an intercept \(\left(\alpha\right)\), a coefficient on the years of education \(\left(\beta_1\right)\), and an intercept-shift \(\left(\beta_2\right)\) for the case where the respondent is female. These data are organized such that \(y_i\) is the number of respondents who agree with the statement that have the same level of education and the same gender, and \(n_i - y_i\) is the number of such people who disagree with the statement. The inverse link function, \(p = g^{-1}\left(\eta_i \right)\), for a binomial likelihood can be one of several Cumulative Distribution Functions (CDFs) but in this case is the standard logistic CDF, \(g^{-1}\left(\eta_i \right)=\frac{1}{1 + e^{-\eta_i}}\).

Suppose we believe — prior to seeing the data — that \(\alpha\), \(\beta_1\), and \(\beta_2\) are probably close to zero, are as likely to be positive as they are to be negative, but have a small chance of being quite far from zero. These beliefs can be represented by Student t distributions with a few degrees of freedom in order to produce moderately heavy tails. In particular, we will specify seven degrees of freedom. Note that these purported beliefs may well be more skeptical than your actual beliefs, which are probably that women and people with more education have less conservative societal views.

Note on “prior beliefs” and default priors

In this vignette we use the term “prior beliefs” to refer in generality to the information content of the prior distribution (conditional on the model). Sometimes previous research on the topic of interest motivates beliefs about model parameters, but other times such work may not exist or several studies may make contradictory claims. Regardless, we nearly always have some knowledge that should be reflected in our choice of prior distributions. For example, no one believes a logistic regression coefficient will be greater than five in absolute value if the predictors are scaled reasonably. You may also have seen examples of so-called “non-informative” (or “vague”, “diffuse”, etc.) priors like a normal distribution with a variance of 1000. When data are reasonably scaled, these priors are almost always a bad idea for various reasons (they give non-trivial weight to extreme values, reduce computational efficiency, etc). The default priors in rstanarm are designed to be weakly informative, by which we mean that they avoid placing unwarranted prior weight on nonsensical parameter values and provide some regularization to avoid overfitting, but also do allow for extreme values if warranted by the data. If additional information is available, the weakly informative defaults can be replaced with more informative priors.

Step 2: Draw from the posterior distribution

The likelihood for the sample is just the product over the \(J\) groups of \[g^{-1}\left(\eta_i \right)^{y_i} \left(1 - g^{-1}\left(\eta_i \right)\right)^{n_i-y_i},\] which can be maximized over \(\alpha\), \(\beta_1\), and \(\beta_2\) to obtain frequentist estimates by calling

             Estimate Std. Error z value Pr(>|z|)
(Intercept)     2.509      0.184  13.646    0.000
education      -0.271      0.015 -17.560    0.000
genderFemale   -0.011      0.084  -0.136    0.892

The p-value for the null hypothesis that \(\beta_1 = 0\) is very small, while the p-value for the null hypothesis that \(\beta_2 = 0\) is very large. However, frequentist p-values are awkward because they do not pertain to the probability that a scientific hypothesis is true but rather to the probability of observing a \(z\)-statistic that is so large (in magnitude) if the null hypothesis were true. The desire to make probabilistic statements about a scientific hypothesis is one reason why many people are drawn to the Bayesian approach.

A model with the same likelihood but Student t priors with seven degrees of freedom can be specified using the rstanarm package in a similar way by prepending stan_ to the glm call and specifying priors (and optionally the number of cores on your computer to utilize):

stan_glm
 family:       binomial [logit]
 formula:      cbind(agree, disagree) ~ education + gender
 observations: 42
 predictors:   3
------
             Median MAD_SD
(Intercept)   2.5    0.2  
education    -0.3    0.0  
genderFemale  0.0    0.1  

------
* For help interpreting the printed output see ?print.stanreg
* For info on the priors used see ?prior_summary.stanreg

As can be seen, the “Bayesian point estimates” — which are represented by the posterior medians — are very similar to the maximum likelihood estimates. Frequentists would ask whether the point estimate is greater in magnitude than double the estimated standard deviation of the sampling distribution. But here we simply have estimates of the standard deviation of the marginal posterior distributions, which are based on a scaling of the Median Absolute Deviation (MAD) from the posterior medians to obtain a robust estimator of the posterior standard deviation. In addition, we can use the posterior_interval function to obtain a Bayesian uncertainty interval for \(\beta_1\):

          2.5% 97.5%
education -0.3 -0.24

Unlike frequentist confidence intervals — which are not interpretable in terms of post-data probabilities — the Bayesian uncertainty interval indicates we believe after seeing the data that there is a \(0.95\) probability that \(\beta_2\) is between ci95[1,1] and ci95[1,2]. Alternatively, we could say that there is essentially zero probability that \(\beta_2 > 0\), although frequentists cannot make such a claim coherently.

Many of the post-estimation methods that are available for a model that is estimated by glm are also available for a model that is estimated by stan_glm. For example,

                  Median     MAD_SD
(Intercept)   2.52098276 0.18285768
education    -0.27153061 0.01556542
genderFemale -0.01262136 0.08463091
      Min.    1st Qu.     Median       Mean    3rd Qu.       Max.       NA's 
-0.3076575 -0.0359870 -0.0041319 -0.0003265  0.0660755  0.2822688          1 
             (Intercept)   education genderFemale
(Intercept)    1.0000000 -0.93963167  -0.23059559
education     -0.9396317  1.00000000  -0.02463045
genderFemale  -0.2305956 -0.02463045   1.00000000

rstanarm does provide a confint method, although it is reserved for computing confidence intervals in the case that the user elects to estimate a model by (penalized) maximum likelihood. When using full Bayesian inference (the rstanarm default) or approximate Bayesian inference the posterior_interval function should be used to obtain Bayesian uncertainty intervals.

Step 3: Criticize the model

The launch_shinystan function in the shinystan package provides almost all the tools you need to visualize the posterior distribution and diagnose any problems with the Markov chains. In this case, the results are fine and to verify that, you can call

which will open a web browser that drives the visualizations.

For the rest of this subsection, we focus on what users can do programmatically to evaluate whether a model is adequate. A minimal requirement for Bayesian estimates is that the model should fit the data that the estimates conditioned on. The key function here is posterior_predict, which can be passed a new data.frame to predict out-of-sample, but in this case is omitted to obtain in-sample posterior predictions:

[1] 4000   42

The resulting matrix has rows equal to the number of posterior simulations, which in this case is \(2000\) and columns equal to the number of observations in the original dataset, which is \(42\) combinations of education and gender. Each element of this matrix is a predicted number of respondents with that value of education and gender who agreed with the survey question and thus should be reasonably close to the observed proportion of agreements in the data. We can create a plot to check this:

Posterior predictive boxplots vs. observed datapoints

Posterior predictive boxplots vs. observed datapoints

Here the boxplots provide the median, interquartile range, and hinges of the posterior predictive distribution for a given gender and level of education, while the red points represent the corresponding observed data. As can be seen, the model predicts the observed data fairly well for six to sixteen years of education but predicts less well for very low or very high levels of education where there are less data.

Consequently, we might consider a model where education has a quadratic effect on agreement, which is easy to specify using R’s formula-based syntax.

stan_glm
 family:       binomial [logit]
 formula:      cbind(agree, disagree) ~ education + gender + I(education^2)
 observations: 42
 predictors:   4
------
               Median MAD_SD
(Intercept)     2.1    0.4  
education      -0.2    0.1  
genderFemale    0.0    0.1  
I(education^2)  0.0    0.0  

------
* For help interpreting the printed output see ?print.stanreg
* For info on the priors used see ?prior_summary.stanreg

Frequentists would test the null hypothesis that the coefficient on the squared level of education is zero. Bayesians might ask whether such a model is expected to produce better out-of-sample predictions than a model with only the level of education. The latter question can be answered using leave-one-out cross-validation or the approximation thereof provided by the loo function in the loo package, for which a method is provided by the rstanarm package.

First, we verify that the posterior is not too sensitive to any particular observation in the dataset.

There are only one or two moderate outliers (whose statistics are greater than \(0.5\)), which should not have too much of an effect on the resulting model comparison:

                  elpd_diff se_diff
womensrole_bglm_1  0.0       0.0   
womensrole_bglm_2 -0.7       1.6   

In this case, there is little difference in the expected log pointwise deviance between the two models, so we are essentially indifferent between them after taking into account that the second model estimates an additional parameter. The “LOO Information Criterion (LOOIC)”


Computed from 4000 by 42 log-likelihood matrix

         Estimate   SE
elpd_loo   -104.8  9.5
p_loo         4.2  1.7
looic       209.7 18.9
------
Monte Carlo SE of elpd_loo is NA.

Pareto k diagnostic values:
                         Count Pct.    Min. n_eff
(-Inf, 0.5]   (good)     40    95.2%   1532      
 (0.5, 0.7]   (ok)        1     2.4%   209       
   (0.7, 1]   (bad)       0     0.0%   <NA>      
   (1, Inf)   (very bad)  1     2.4%   2000      
See help('pareto-k-diagnostic') for details.

has the same purpose as the Akaike Information Criterion (AIC) that is used by frequentists. Both are intended to estimate the expected log predicted density (ELPD) for a new dataset. However, the AIC ignores priors and assumes that the posterior distribution is multivariate normal, whereas the functions from the loo package used here do not assume that the posterior distribution is multivariate normal and integrate over uncertainty in the parameters. This only assumes that any one observation can be omitted without having a major effect on the posterior distribution, which can be judged using the plots above.

Step 4: Analyze manipulations of predictors

Frequentists attempt to interpret the estimates of the model, which is difficult except when the model is linear, has no inverse link function, and contains no interaction terms. Bayesians can avoid this difficulty simply by inspecting the posterior predictive distribution at different levels of the predictors. For example,

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 -39.00  -24.00  -20.00  -19.71  -16.00    0.00 

As can be seen, out of \(100\) women who have a college degree versus \(100\) women with only a high school degree, we would expect about \(20\) fewer college-educated women to agree with the question. There is an even chance that the difference is between \(24\) and \(16\), a one-in-four chance that it is greater, and one-in-four chance that it is less.

Troubleshooting

This section provides suggestions for how to proceed when you encounter warning messages generated by the modeling functions in the rstanarm package. The example models below are used just for the purposes of concisely demonstrating certain difficulties and possible remedies (we won’t worry about the merit of the models themselves). The references at the end provide more information on the relevant issues.

Markov chains did not converge

Recommendation: run the chains for more iterations.

By default, all rstanarm modeling functions will run four randomly initialized Markov chains, each for 2000 iterations (including a warmup period of 1000 iterations that is discarded). All chains must converge to the target distribution for inferences to be valid. For most models, the default settings are sufficient, but if you see a warning message about Markov chains not converging, the first thing to try is increasing the number of iterations. This can be done by specifying the iter argument (e.g. iter = 3000).

One way to monitor whether a chain has converged to the equilibrium distribution is to compare its behavior to other randomly initialized chains. This is the motivation for the Gelman and Rubin potential scale reduction statistic Rhat. The Rhat statistic measures the ratio of the average variance of the draws within each chain to the variance of the pooled draws across chains; if all chains are at equilibrium, these will be the same and Rhat will be one. If the chains have not converged to a common distribution, the Rhat statistic will tend to be greater than one.

Gelman and Rubin’s recommendation is that the independent Markov chains be initialized with diffuse starting values for the parameters and sampled until all values for Rhat are below 1.1. When any Rhat values are above 1.1 rstanarm will print a warning message like this:

Markov chains did not converge! Do not analyze results! 

To illustrate how to check the Rhat values after fitting a model using rstanarm we’ll fit two models and run them for different numbers of iterations.

Warning: There were 2 chains where the estimated Bayesian Fraction of Missing Information was low. See
https://mc-stan.org/misc/warnings.html#bfmi-low
Warning: Examine the pairs() plot to diagnose sampling problems
Warning: The largest R-hat is 2.83, indicating chains have not mixed.
Running the chains for more iterations may help. See
https://mc-stan.org/misc/warnings.html#r-hat
Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
Running the chains for more iterations may help. See
https://mc-stan.org/misc/warnings.html#bulk-ess
Warning: Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
Running the chains for more iterations may help. See
https://mc-stan.org/misc/warnings.html#tail-ess
Warning: Markov chains did not converge! Do not analyze results!

Here the first model leads to the warning message about convergence but the second model does not. Indeed, we can see that many Rhat values are much bigger than 1 for the first model:

  (Intercept)           cyl          drat            wt          qsec 
     2.341370      3.218821      2.270997      1.334041      1.350006 
           vs          gear          carb      mean_PPD log-posterior 
     2.335472      1.542550      1.948308      1.715434      1.564986 

Since we didn’t get a warning for the second model we shouldn’t find any parameters with an Rhat far from 1:

[1] FALSE

Details on the computation of Rhat and some of its limitations can be found in Stan Modeling Language User’s Guide and Reference Manual.

Divergent transitions

Recommendation: increase the target acceptance rate adapt_delta.

Hamiltonian Monte Carlo (HMC), the MCMC algorithm used by Stan, works by simulating the evolution of a Hamiltonian system. Stan uses a symplectic integrator to approximate the exact solution of the Hamiltonian dynamics. When the step size parameter is too large relative to the curvature of the log posterior this approximation can diverge and threaten the validity of the sampler. rstanarm will print a warning if there are any divergent transitions after the warmup period, in which case the posterior sample may be biased. The recommended method is to increase the adapt_delta parameter – target average proposal acceptance probability in the adaptation – which will in turn reduce the step size. Each of the modeling functions accepts an adapt_delta argument, so to increase adapt_delta you can simply change the value from the default value to a value closer to \(1\). To reduce the frequency with which users need to manually set adapt_delta, the default value depends on the prior distribution used (see help("adapt_delta", package = "rstanarm") for details).

The downside to increasing the target acceptance rate – and, as a consequence, decreasing the step size – is that sampling will tend to be slower. Intuitively, this is because a smaller step size means that more steps are required to explore the posterior distribution. Since the validity of the estimates is not guaranteed if there are any post-warmup divergent transitions, the slower sampling is a minor cost.

Maximum treedepth exceeded

Recommendation: increase the maximum allowed treedepth max_treedepth.

Configuring the No-U-Turn-Sampler (the variant of HMC used by Stan) involves putting a cap on the depth of the trees that it evaluates during each iteration. This is controlled through a maximum depth parameter max_treedepth. When the maximum allowed tree depth is reached it indicates that NUTS is terminating prematurely to avoid excessively long execution time. If rstanarm prints a warning about transitions exceeding the maximum treedepth you should try increasing the max_treedepth parameter using the optional control argument. For example, to increase max_treedepth to 20 (the default used rstanarm is 15) you can provide the argument control = list(max_treedepth = 20) to any of the rstanarm modeling functions. If you do not see a warning about hitting the maximum treedepth (which is rare), then you do not need to worry.

Conclusion

In this vignette, we have gone through the four steps of a Bayesian analysis. The first step — specifying the posterior distribution — varies considerably from one analysis to the next because the likelihood function employed differs depending on the nature of the outcome variable and our prior beliefs about the parameters in the model varies not only from situation to situation but from researcher to researcher. However, given a posterior distribution and given that this posterior distribution can be drawn from using the rstanarm package, the remaining steps are conceptually similar across analyses. The key is to draw from the posterior predictive distribution of the outcome, which is the what the model predicts the outcome to be after having updated our beliefs about the unknown parameters with the observed data. Posterior predictive distributions can be used for model checking and for making inferences about how manipulations of the predictors would affect the outcome.

Of course, all of this assumes that you have obtained draws from the posterior distribution faithfully. The functions in the rstanarm package will throw warnings if there is evidence that the draws are tainted, and we have discussed some steps to remedy these problems. For the most part, the model-fitting functions in the rstanarm package are unlikely to produce many such warnings, but they may appear in more complicated models.

If the posterior distribution that you specify in the first step cannot be sampled from using the rstanarm package, then it is often possible to create a hand-written program in the the Stan language so that the posterior distribution can be drawn from using the rstan package. See the documentation for the rstan package or https://mc-stan.org for more details about this more advanced usage of Stan. However, many relatively simple models can be fit using the rstanarm package without writing any code in the Stan language, which is illustrated for each estimating function in the rstanarm package in the other vignettes.

References

Betancourt, M. J., & Girolami, M. (2013). Hamiltonian Monte Carlo for hierarchical models. arXiv preprint.

Stan Development Team. (2015). Stan modeling language user’s guide and reference manual, Version 2.9.0. https://mc-stan.org/documentation/. See the ‘Hamiltonian Monte Carlo Sampling’ chapter.

Gelman, A., & Rubin, D. B. (1992). Inference from iterative simulation using multiple sequences. Statistical Science, 7(4), 457 – 472.

Gelman, A., & Shirley, K. (2011). Inference from simulations and monitoring convergence. In S. Brooks, A. Gelman, G. Jones, & X. Meng (Eds.), Handbook of Markov chain Monte Carlo. Boca Raton: Chapman & Hall/CRC.

rstanarm/inst/doc/betareg.R0000644000176200001440000000604114551550162015364 0ustar liggesusers## ---- SETTINGS-knitr, include=FALSE------------------------------------------- stopifnot(require(knitr)) opts_chunk$set( comment=NA, message = FALSE, warning = FALSE, eval = identical(Sys.getenv("NOT_CRAN"), "true"), dev = "png", dpi = 150, fig.asp = 0.618, fig.width = 5, out.width = "60%", fig.align = "center" ) ## ---- SETTINGS-gg, include=TRUE----------------------------------------------- library(ggplot2) library(bayesplot) theme_set(bayesplot::theme_default()) ## ----simulated-data, fig.height=5--------------------------------------------- SEED <- 1234 set.seed(SEED) eta <- c(1, -0.2) gamma <- c(1.8, 0.4) N <- 200 x <- rnorm(N, 2, 2) z <- rnorm(N, 0, 2) mu <- binomial(link = logit)$linkinv(eta[1] + eta[2]*x) phi <- binomial(link = log)$linkinv(gamma[1] + gamma[2]*z) y <- rbeta(N, mu * phi, (1 - mu) * phi) dat <- data.frame(cbind(y, x, z)) hist(dat$y, col = "darkgrey", border = F, main = "Distribution of Outcome Variable", xlab = "y", breaks = 20, freq = F) ## ----simulated-fit, results = "hide"------------------------------------------ library(rstanarm) fit1 <- stan_betareg(y ~ x | z, data = dat, link = "logit", link.phi = "log", cores = 2, seed = 12345) fit2 <- stan_betareg(y ~ -1 + x , data = dat, link = "logit", link.phi = "log", cores = 2, seed = 12345) round(coef(fit1), 2) round(coef(fit2), 2) ## ----simulated-fit-print, echo=FALSE------------------------------------------ round(coef(fit1), 2) round(coef(fit2), 2) ## ----print-priors------------------------------------------------------------- prior_summary(fit1) ## ----simulated-analysis, fig.height=5----------------------------------------- library(ggplot2) library(bayesplot) bayesplot_grid( pp_check(fit1), pp_check(fit2), xlim = c(0,1), ylim = c(0,4), titles = c("True Model: y ~ x | z", "False Model: y ~ x - 1"), grid_args = list(ncol = 2) ) ## ----simulated-loo------------------------------------------------------------ loo1 <- loo(fit1) loo2 <- loo(fit2) loo_compare(loo1, loo2) ## ---- gas-fit, results="hide"------------------------------------------------- library(rstanarm) data("GasolineYield", package = "betareg") gas_fit1 <- stan_betareg(yield ~ temp + batch, data = GasolineYield, link = "logit", seed = 12345) gas_fit2 <- stan_betareg(yield ~ temp + batch | pressure, data = GasolineYield, link = "logit", seed = 12345) round(coef(gas_fit1), 2) round(coef(gas_fit2), 2) ## ---- gas-print, echo=FALSE--------------------------------------------------- round(coef(gas_fit1), 2) round(coef(gas_fit2), 2) ## ----gas-analysis, fig.height=5----------------------------------------------- library(ggplot2) bayesplot_grid( pp_check(gas_fit1), pp_check(gas_fit2), xlim = c(0,1), ylim = c(0,5), titles = c("gas_fit1", "gas_fit2"), grid_args = list(ncol = 2) ) ## ---- gas-loo----------------------------------------------------------------- gas_loo1 <- loo(gas_fit1) gas_loo2 <- loo(gas_fit2) loo_compare(gas_loo1, gas_loo2) rstanarm/inst/doc/children/0000755000176200001440000000000013722762571015427 5ustar liggesusersrstanarm/inst/doc/children/SETTINGS-gg.txt0000644000176200001440000000016013540537566020022 0ustar liggesusers```{r, SETTINGS-gg, include=TRUE} library(ggplot2) library(bayesplot) theme_set(bayesplot::theme_default()) ``` rstanarm/inst/doc/children/stan_glm_priors.txt0000644000176200001440000000244513340675562021376 0ustar liggesusersA full Bayesian analysis requires specifying prior distributions $f(\alpha)$ and $f(\boldsymbol{\beta})$ for the intercept and vector of regression coefficients. When using `stan_glm`, these distributions can be set using the `prior_intercept` and `prior` arguments. The `stan_glm` function supports a variety of prior distributions, which are explained in the __rstanarm__ documentation (`help(priors, package = 'rstanarm')`). As an example, suppose we have $K$ predictors and believe --- prior to seeing the data --- that $\alpha, \beta_1, \dots, \beta_K$ are as likely to be positive as they are to be negative, but are highly unlikely to be far from zero. These beliefs can be represented by normal distributions with mean zero and a small scale (standard deviation). To give $\alpha$ and each of the $\beta$s this prior (with a scale of 1, say), in the call to `stan_glm` we would include the arguments `prior_intercept = normal(0,1)` and `prior = normal(0,1)`. If, on the other hand, we have less a priori confidence that the parameters will be close to zero then we could use a larger scale for the normal distribution and/or a distribution with heavier tails than the normal like the Student t distribution. __Step 1__ in the "How to Use the __rstanarm__ Package" vignette discusses one such example. rstanarm/inst/doc/children/SETTINGS-knitr.txt0000644000176200001440000000045513722762571020561 0ustar liggesusers```{r, SETTINGS-knitr, include=FALSE} stopifnot(require(knitr)) opts_chunk$set( comment=NA, message = FALSE, warning = FALSE, eval = identical(Sys.getenv("NOT_CRAN"), "true"), dev = "png", dpi = 150, fig.asp = 0.618, fig.width = 5, out.width = "60%", fig.align = "center" ) ``` rstanarm/inst/doc/children/four_steps.txt0000644000176200001440000000137013340675562020361 0ustar liggesusersThe four steps of a Bayesian analysis are 1. Specify a joint distribution for the outcome(s) and all the unknowns, which typically takes the form of a marginal prior distribution for the unknowns multiplied by a likelihood for the outcome(s) conditional on the unknowns. This joint distribution is proportional to a posterior distribution of the unknowns conditional on the observed data 2. Draw from posterior distribution using Markov Chain Monte Carlo (MCMC). 3. Evaluate how well the model fits the data and possibly revise the model. 4. Draw from the posterior predictive distribution of the outcome(s) given interesting values of the predictors in order to visualize how a manipulation of a predictor affects (a function of) the outcome(s). rstanarm/inst/doc/mrp_sim.rda0000644000176200001440000006562613540753420016003 0ustar liggesusers E@h@s7D$A/"p#"yˌ{uEYT&B2Y&d2Yطy=}g&o=7]U9U9㤳?E>ώ{D~qa+ݑݎ|vwW6D;8#)/6|SەT>mh[[__R:m'՛$T}ӌkGRI5w=Sq(.+ޥmGSB~)_ǵ+zsæ=򷴽J{{:W7m)kvZ?Z*>'Ŷ+I^s3#I(ŖO:Jsz*Xo.y{Sl{hv(5I7W=MO5כJI+/qGSRvSh<gR:jb5Վiۑ6׾b\z+')R=Ii.;&o)=%#.KOjWUSXK٧v5UNSmOSck_jw_-n8L[zX{'\IhR7hj)6c'K[?*'htMN/mM[V?9vJs[(jW5m9T9ebgv455ul\j~TZz4W'mMWSY;Q:-e檯RRoSi쿧{r3MGK$(|6n$mz|=5?T|m.yŖ+6\.zsR{J^s;9劯qGT$\s׷Sn{7󅦖kuSS-5W_5|ZesO*WnK;}rbͽoj-5Rqoy;m,~v;T*V/rַ㶗\bOIn-iҿ;܇;`4|q}O[`Ƕ1>oo\A?rr*\80]14ۑdO]lyW|2ŵ7N8XO[G^O#ڟ4=bш^/4|=qI8/z^٧?3>ǕwQmOLH{[NsR~$=Ώ,珸ǍꋙgzΣq+ipy n qLs.q*qq1qs}ҼNٞXy.]McgZ91od87|;.i=ꍍ1kzQL}qcn롔u'?ĵ&|9 19Mqt\Cz3vŬO$֙wR܉[71M/m%#I"z:&Ώq??\?Iv|\q0mK{?W.;v=N9Fϸ_ۓK~\uoG^i9Iz&Sᘤw?q&iW~4ޒ]WEL=^N}@qK\;x'nqV[w=}Fr:Ǖ/zckOvž.>M͟Z(e(%v%!}oSR뗐>X'ٱbK'ţ3uq iH_:nGL_(}O\MҎ[|7w>)N^_Ĕ/yމ :~&ħRIbיiSy3?O|)QƽoZ}tVb/9ΦRWE-5ZץΧ%W,\ѣv͗~J_캮d{9.Q,5ɍvM;/Q4}by81Ǵ/Jh_L-Mw8O/ҎS#e\N;RRb환?}F˗O?RI~/^I/mK='_Rh0u{Ŏb6g_?mMbsq1?kP8Mݟ;_t,튻8dUw=A^O;;/vO?]|]I9SOَb_y"]1S}>e;Q-m.OףB=?DOvjb}H?F)a7SE1LYoIrK_jʢCLT?-Jӵ+Q^t=xNjGZ+m\*Z^"]bcWb+5>/%7qL;^7^OJE!1ޖx说 ?wפIr:/b͟}*~84.Ӟoo׭IkI+F6_Tכ$'} վ|\:M'g/oORS"7R?}~\eb}7._v]6Χ|^hߤ8ϋ6{)9AiG<.1.pߚgm?|mjS߄I&i^/uݞoq:~cگ/I+u#wBED'.>< t%mc{E|koۡ NnslӗdPo:vu}\0?uqI/v`=yKx<ҕϔqz_'sdsGlt#;9R.U~Q^W`=~>qh/Gݗ|? ! Ѻs|g|b9?G>x/a?6c=7w?.O!^b~?*\@7aqڇr%Ŀ|ali/x^Wtqh}$}wvvf=ϳ㤿/ |a~!}wNcyc~z |a9?GݓO{ gyG~]y?CA:եI_?kE~~}Ǎ.ykw>oa?^}>(HR?ΎK[׭wLnhG/c;Zf:o/?]g{+@\߷.:xvRo__2;-q'k>^ll |a9?/~zڻߜhW]s~opv %8/ Bx?'s5|?>ڱoЏw~Gs|1n{;Gҝ>טzp_pv9|a:!_X˷wy߇q? EQ=p7}w:y{;! @?d~+ wܘG1^|G{lٝG{1:_߄Is7?8姿*\G99'Ϸϯ>\ Ǔ\9\Y?W?O:SNISyzr<_ߏIPXx o$o+v\ڳU3ceԋveaL#ڇ{z۟~_Ҟo' 1q~_~!_E@"/dc7PwCn( 廡(1_(1_%( _%( _()_()_|wQ;wG(廣e(_e(_(9_(9_(_W(_W(%_W(%_WU(_WU(_W(5_W(5_W5( _נ5( _נ(-_ע(-_עu(_סu(_ס(=_ף(=_ף|Q@tAq?y=f`o ȿo ȿo ȿo  =!'{B~O =!'{B~O ^ {A~/ ^ {A~/ !7ސ{C~o !7ސ{C~o} >@~} >@~ȿo!Fȿo!Fȿo!Fȿo!F }!/B~_ }!/B~_ ~A~? ~A~?ȿ o &ȿ o &ȿ o &ȿ o &!?C~!?C~ȿo!fȿo!fȿo!fȿo!fȿo[ ȿo[ ȿo[ ȿo[  ?@ ?@ȿo[!Vȿo[!Vȿo[!Vȿo[!V! ?B@! ?B@C~-vۃhOP廂hgl_~ ڟ B/A=/? ͯAAAAAAAAAAAAAAAA0 C0 C0 C0 C0 C0 C0 C0 C!@!@!@!@!@!@!@!@( B( B( B( B( B( B( B( BaAaAaAaAaAaAaAaA8C8C8C8C8C8C8C8CG@G@G@G@G@G@G@G@$ GB$ GB$ GB$ GB$ GB$ GB$ GB$ GBQGAQGAQGAQGAQGAQGAQGAQGA4 GC4 GC4 GC4 GC4 GC4 GC4 GC4 GC1 @1 @1 @1 @1 @1 @1 @1 @, B, B, B, B, B, B, B, BqAqAqAqAqAqAqAqA<C<C<C<C<C<C<C<C '@ '@ '@ '@ '@ '@ '@ '@bv 1@vx0L;M<~&~́'gKAy5`7ÕCAaXp"xL ?MM| >39 :q Xu`1VE _WU _WU@ :_W@ N* *WUU*j"WU _WU_WM* U_UU_U_5_ WW5_ Wwj5_j>j" _ Wy _ Ww5_ W5_=9;W&?t]kjbdKG!ntǿvygW^y%_ ջ[o&R's=r?ALow5zqB"1S8b!s)fdrOH32NEcǧX\$fS,d.3ŌSX)2bFƩh, L1#T4{|Ebq*=>B"1S8b!s)fdrOH32NEcǧX\$fS,d.3ŌSX)2bFƩh, L1#T4{|Ebq*=>B"1S8b!s)fdrOH32NEcǧX\$fS,d.3ŌSX)2bFƩh, L1#T4{|Ebq*=>B"1S8b!s)fdrOH32NEcǧX\$fS,d.3ŌSX)2bFƩh, L1#T4{|Ebq*=>B"1S8b!s)fdrOH32NEcǧX\$fS,d.3ŌSX)2bFƩh, L1#T4{|Ebq*=>B"1S8#DQ;ۃC/_;G_~< <1cǁǃ`;#>>铐> 铑>.HwAOA+]>G(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`(Q0`ޘ8wь߼vn;GMto+Y{z<v_9fw?SPZho/l@=VvOߙґ;Ldy֎wXOik3gXE'M*kLewݖ䇦ܽ/v]֎5}^MkQUaq{s?{7.SNnˣv~F/=n~瞝m}ڲy;ݸRkǛszG}}ϟzwnd׿fq]ehtuRzg1?d7t2{SV~Kc][ML띭SM V~&EVϛ5;ߵvlQcηr.=lu׍550˞,(wqEvk`kkn[|2#_kG \Vn%vo^ZhW[r]bz7|ڸ[7ʭzWmv,?@{X5_r-Yr-^r6 ^xنxUn;&WW6v>.lQ]ߎxuo K4˿jk/gVU\īMr[^yr.4ZmnzxrcVjū\Նv~զ*k6W7C.զo@?īWu_4AW"^j܈xUxUxj=JīU{[5W+ߍz[WαֽaZxUnīU߆īu_ ^ա?kZ=VYz˷ j U Ζo%jīՈW[5ƺ^W^F{a g ^q>(x5j. īWӿf_Ax5\e^ ^ʹKn吳ҳnf?30g"^Ax+?ҋ>mʽ(_g ^Fx5W3深!^-!0+7J뫻-W_+W+ݖ_+^YW&/cJw[~Z~Z4yT+^Y~+W)^y+xe,ɍWO`p;,}~iW6_}mN ;_;>S&omPk`rɍsڶn4m[MV_L7x;=v`߷[_oWboAY}&Y=n3^c\[oX;+WT kϳ-]r/ zVv2ke!$/EU[t˿ra>Yv/i9WC oAv}U^Y yaU+k0YC( r_zJؔ[dr+;L5ў[,B\K/S1Χ8V~=~zq;*떞~aqm|tb}z 3ug/k!^Gxt3ɮ/ \cu|īe^ā]#܋CM,īyW/#^MGx5iīWs eī|^Bq|īfc>x5- ī=H1v#Cv sï_!^BN&ī9W~W ^=|{6D8ÈWOc4!1īG=#ѮMb]-Z ī|=x5|j`43WO!^=s{zl}:F;A?zzrqgZ_Wh`+]W2*^YZ lt?~~lr?x+^Y~+W |WV|Wxe!W v@n7:mNN;ـ%lHu/r'ݿ[34`I5;7V~~*7؎%[{W{-nb{7֎-_&@uOOYzUa]߆Vڵmc9hV˿֮X}![Y-_Ry`UGXk/vad18G5[{05_:<ZoauGX^ճP|h>e~ڷ]c[v~;Y~{)&~Sr扵Y߁3n\gb{̳tç7#P/;:dq>%-ž߷r x^}Λ^~0pqZw&A-?g =]|?|sWC;B-x_~ @Ў`o~gysOE]}j}(7wxj<īW5FZ.Cfg#^ͳ4 ^FZIAZx닥W a_RīEW5W?`]=j+!^EZx56˿j6bGZ9xUx_ī1x埾_b ^=B?{鳖~jSOS[zZԃxVK?xӈW/AzY x_Ջ/SE M^y'!?+gZځuW^WOî"^={_Dv'x,3Z_^LމJSU2y'Z:+JVD4yTzWW)^╵WU^VavnʽvW3ƓY?Nwvܫ'UZlz8/69,rk{wh;gL~&\M Lottn CHk=֮G VNfM_Ͽ޾4oν>7{n{밝5gw[0{6Tޜ`ox됻wrM[M-[-cM^O,ߖDc]yfǭe_շ i^noߘj 9ۭB|Wi%_`*Ƚ[լ n_fvk-^NMr[WZuWkhvX[|W,^͵rj!^;Dk][Qnū\WލWuO@U~W=L.F[یxU{;x۲Nnxkx{߀x~߄oXx볍W{-r[,^l@pܚVn=jī [+j#^-Cڀxr[xjXK/]_x2 W5땈WkVaՈWkVo_W:kNˎoCZu@X[lWbܯjjV nzCZrV"^D7oLīyKZz:9n<Ws^/W/!^@x5o`_līWV#;cYiوW3"^x5~)KW  ^MGx |>l<ī~ī9W.^Ak}eel}x+^Y~+X ~ШAA+˧xWF+xeL/+xeLnLZF`ƵYantwqw ~bmXxIY}]q3;+Uا~;_{pտ@};֮;ڳ+څ&跏x8f5}u,*ħ5ܹLoٴꭝvEE׶KLzjOj$dW4.42- =' ?7`XUx۸t_'l7;|\5r1,{|H;_۳z?@gՇ` v|п_qݦ ˲*˿jžͲN,V̙/gu?mFV߳3<q<;MQ yY=CgW^A?6Ю|~ɟ?r]q㬽-L|h6P9Ʃ5c1; ~K67"uW oRīWˎΞnqxt7v[9W3C4jFZx{շlgռ-X>j 9].īGf"^-ks +.Ft0wZ@.Vī٧]gxj1?g͝h:ZzZ_Gx5E엜x5q*īf!^8vA}' ?#^E}3^B~2 īY3ng#^M9Ov~jռ3qi,ī>9OCNސx2sQ/s^FzvƧGzjCWOGy %a|qÈW{]GXӰˤ},#wO?:8=A.գ~=a ^M؇]'E}5L2 X={W_+˯իJҊWȯxe*^+"-^_zcv~-d7aXڿ'&ooP/{:t>VϢGL#3[EWFE+,P ,ӯzġ'Xz>̆r9a`?r;B| ' /v`?a_ܱ~<|g&l㷨T~K ԋQ5?#x5j9j9W5j + jVwj/%GZ9Ks?jf}Ӯ:˝xj ԋxv}>օ Aj.ɐx|qUrӒw_||#^-Dx5jV~ ڿjo,=嚌SXOCzv})ī>|WODgrGߏzj8#^=x"SW#^M9C(x̃8x,K0r;#~Iī^@SBz뻧o|v,Az)ī) WSyՔ+NCjZ_Y~^L^WW _+^YW&/cJ5__VM^j,WzL╝WW&O+^v͎;Q}6n\>{_vgKݭ7{jZ?p|6 |nqYn~0QζDю`H:tСC:tСC:tСC:tСC:tСC:tСC:tСC:tСC:tСC:tСC:tСC:tСCG:*DQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQQrCEQEQEQEQEQEQEQE3((((((((fmEQEQEQEQEQEQEQEQrBEQEQEQEQEQEQEQE3((((((((fN(((((((("8 QD(e.EQl%zOE1((EQl%B"1S8b!s)fdrOH32NEcǧX\$fS,d.3ŌSX)2bFƩh, L1#T4{|Ebq*=>B"1S8b!s)fdrOH32NEcǧX\$fS,d.3ŌSX)2bFƩh, L1#T4{|Ebq*=>B"1S8b!s)fdrOH32NEcǧX\$fS,d.3ŌSX)2bFƩh, L1#T4{|Ebq*=>B"1S8b!s)fdrOH32NEcǧX\$fS,d.3ŌSX)2bFƩh, L1#T4{|Ebq*=>B"1S8b!s)fdrOH32NEcǧX\$fb(jG|`{`xKa`'H7oGG<6x,8x̑ycg;#}' }"'"}'!}2'#.H)HwE+ҧZZa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0Fa?Q0F3QKT.Ώ{밍3>3^ܓW_5rw<Uq*W?~`K:\uh}?PO5Kp~S\ݲɿݯ|ym?WޮϐPm<%'~aAwO s7wxa5|^ć*_zof⎠_mn|{5_ᐟzS>۟S|Uύ𕪭 va~ʡݘt̲WNyvmU?:T.yeXŜO.\:2I=+Xӎԟ~CA6=soNd;Yc?|N>CG]B=3^8ceO)gïj?;T>G+qpAC0?nLNl/=!`Oll>#l{T zoAþ7cW=Y/? 3?=Y١]ЏϠ7l:Gv١ >OM.Ӵ?!vO0n/^|/20zW=/ԛ0!]9YI/ߏwڏ~jC0?nL{08~IkWO\|3O=22?@ڡk#/v~ԛr|aq~`C0?nL~9^N_ϳ}Onvs|u//z9n'|@8oOk/0Nup^rƏyF6WVtߏY苣Wqsnt}G6󤭕.T٭rS:mGGdvk>$w)_:UwH'ϬKsP=׆zoSkʥ%Ի[UrofU>s?Pς;ܶ8 ^qmrY_}~U9>5?'ޔ񈋷?poi.O9Ӵ;x`17\qGjsK=]93>zO7j'vҮԟa=O5wLϩeb9C>ڑK}{h z[?{^ާ@~)#Yp_]6cڅl/g!88kos\lgz˩!n缸g<ậ'n uaM =sv!^s_jC=ߌO)_סn׺iӕ~%+ vcvb{9yUmu.Ϝ|;iN?iG\OP?g$z]\'ٯ7l{m衍C0?nL~߳QP/Shl>#~IwPO+ԏWޔSzÏ?7x#%7^O[u!^s`}yCg90_Od?<7F&Я^N_ϳ}#s]zoG~kz9n6p1AOC;u!֍kg/lg>wO_r+ ǯ3bvb{?!  vf=~==?ӯi_<9~O~y_Q a>wuOwǼ_4V?&cOq}`A=X/S쏮pF}`G^77Is|`Ws|\r6~z=&.Ǘd{8l-|;Q;iWvyӎԟ'h70N:|_B=3;hG#r{kЏ_7t3}m}Ua>q>>s]?7Ġۡb` '? ~ٯPoW]Lz~ǥ/5!{ڑ׹O}(r>#|הFiX/ܮ>s4ܧݿ1ؓ~ q>7c rsn|{z_\}l' P#s=v$zo7?z9nA='53r6_wj. rh?bv a}muo' Pge?9f=ޮ U*%f=Å ߣ:ɿc?d-wvzx>;}"Xgg6uwkz9n~ͯkúB~.c?!g7ܐB|~%6L?\؉96[k3揻ZA=X/˅! mvuߟ\ۿ5vV'a|q_ʹ7su'p ̍/9g識^.G| z{;KoQ]x}`ڵ}ei:K&5&>k&QsЯ9~籸L;:egzmM[>;~K?_ ~_LuB=^\9w\{?PC(Ws` ~nLN>sѯs?ܮ@>Sy?"M,q?ܻe~~>%ztԣOk<O a?uZ|9{kyw|>}߅a>>󿋳ا|z9n+2]Q ~A1]G0vҮOi8ԟ|ˌ x ko|}v~)Oۂomvan JC\o^ ~vz׹/>#||߳xϴb0>gh/_ӿ 0ݎ%>߽1nU/gwZ_|;~[vS~ujg:p >F3s]?d;}=ᾍN?c=9nq?k;3r6_Ysp_n|Y8o1Ά܇a?n߇'v?0O;W}F6^P!nEطD/l=S+ ]vمÍ vb{wD8)<i?Ё!1U? ٸ}ٮ}at;SU.y[yϯ\?q^s]MܟvO린^юĬο¸x{8{g3{Mo(\gh|lǡ/??F~ܩڋ|~~{Xqմ/{ ǿ-pc}`2G?}3=n=¿#fЛrߗ`څ)vcv*{|q2?~޹??w+$Fgli7#Grߥg!.YK>rW׫o0Uov ~O+C0?nLNl/0~I0A;2n]m4ϻ-/!yiC=C;h7~N9 Sƴ; ~?jF5?jx ;8}UJOSTʧvaWؾ_{;_c#7|rOU.<~ύB=s?gʿ=ĤFzo,86跴gl>G?r+yvy!?˟suZXW~v.O9Ӵ|pչƲg+Wn:W^9c_>i>VvI+Xӎԟj7@0al#/KH#6R_<{з {ho v'ޔS=7rt|C0?nLNl/=!`O,G_;;?:vߑ7a m&֝`OqǓ.]wWГԏx_zSΆ׿_u*׽[%ȡ]ߏ/i'0_xs?Nzr|$w] /\܁4au]bڅ)vcگ{ybgW޽'f;jw/r[߭wqݮnzwJ޽'؍eC-wYK>s~Ynu=߷\zRѤFߗ0cϓXk9"!C;yϓ9{7ynU8> qw] }dA,gIƉ~ RKv}e/iWK6[k9> v󌏛q]g=>v4!록 ߛL _/ƧǢ]p?įCJ~/cdMQߏ?J]K}>6%>oi?\wv}v_Gs>Aƴn>f}-a~vҮ~C#uۯ:%g'NJp!록_oҎO)K=.;`=[|/ؑ~|o]?'eg?d5؟X.<wz <ܗv Ym:SXP/ {}$} )7*|Fiډ8v=ov=oo-ۛguqH3S]Ew%N?>_7'?S-7Z;+C=ޮ ׋Ǫ v~<~̦hpKX/ Ͼ eg?d3a9Ca_ofo߬{;3N۵r}o7vEط~FBNmt؇^ῂ]߯Ϙ}ͻq{k"K/us]Ao?~?~`_M/#{=RƫfW[Lv:!^ūʄxU2^U&īʔjvujx5ū~W =HAEm7*vo&\>{_vgKݭ7m/=_fA{6}nqYn;\^ݮ{ջg<ѻguVG L)D rstanarm/inst/doc/continuous.Rmd0000644000176200001440000004027314214422264016504 0ustar liggesusers--- title: "Estimating Generalized Linear Models for Continuous Data with rstanarm" author: "Jonah Gabry and Ben Goodrich" date: "`r Sys.Date()`" output: html_vignette: toc: yes --- ```{r, child="children/SETTINGS-knitr.txt"} ``` ```{r, child="children/SETTINGS-gg.txt"} ``` # Introduction This vignette explains how to estimate linear and generalized linear models (GLMs) for continuous response variables using the `stan_glm` function in the __rstanarm__ package. For GLMs for discrete outcomes see the vignettes for [binary/binomial](binomial.html) and [count](count.html) outcomes. ```{r, child="children/four_steps.txt"} ``` This vignette primarily focuses on Steps 1 and 2 when the likelihood is the product of conditionally independent continuous distributions. Steps 3 and 4 are covered in more depth by the vignette entitled ["How to Use the __rstanarm__ Package"](rstanarm.html), although this vignette does also give a few examples of model checking and generating predictions. # Likelihood In the simplest case a GLM for a continuous outcome is simply a linear model and the likelihood for one observation is a conditionally normal PDF $$\frac{1}{\sigma \sqrt{2 \pi}} e^{-\frac{1}{2} \left(\frac{y - \mu}{\sigma}\right)^2},$$ where $\mu = \alpha + \mathbf{x}^\top \boldsymbol{\beta}$ is a linear predictor and $\sigma$ is the standard deviation of the error in predicting the outcome, $y$. More generally, a linear predictor $\eta = \alpha + \mathbf{x}^\top \boldsymbol{\beta}$ can be related to the conditional mean of the outcome via a link function $g$ that serves as a map between the range of values on which the outcome is defined and the space on which the linear predictor is defined. For the linear model described above no transformation is needed and so the link function is taken to be the identity function. However, there are cases in which a link function is used for Gaussian models; the log link, for example, can be used to log transform the (conditional) expected value of the outcome when it is constrained to be positive. Like the `glm` function, the `stan_glm` function uses R's family objects. The family objects for continuous outcomes compatible with `stan_glm` are the `gaussian`, `Gamma`, and `inverse.gaussian` distributions. All of the link functions provided by these family objects are also compatible with `stan_glm`. For example, for a Gamma GLM, where we assume that observations are conditionally independent Gamma random variables, common link functions are the log and inverse links. Regardless of the distribution and link function, the likelihood for the entire sample is the product of the likelihood contributions of the individual observations. # Priors ```{r, child="children/stan_glm_priors.txt"} ``` # Posterior With independent prior distributions, the joint posterior distribution for $\alpha$ and $\boldsymbol{\beta}$ is proportional to the product of the priors and the $N$ likelihood contributions: $$f\left(\boldsymbol{\beta} | \mathbf{y},\mathbf{X}\right) \propto f\left(\alpha\right) \times \prod_{k=1}^K f\left(\beta_k\right) \times \prod_{i=1}^N {f(y_i|\eta_i)},$$ where $\mathbf{X}$ is the matrix of predictors and $\eta$ the linear predictor. This is the posterior distribution that `stan_glm` will draw from when using MCMC. # Linear Regression Example The `stan_lm` function, which has its own [vignette](lm.html), fits regularized linear models using a novel means of specifying priors for the regression coefficients. Here we focus using the `stan_glm` function, which can be used to estimate linear models with independent priors on the regression coefficients. To illustrate the usage of `stan_glm` and some of the post-processing functions in the __rstanarm__ package we'll use a simple example from Chapter 3 of [Gelman and Hill (2007)](http://www.stat.columbia.edu/~gelman/arm/): > We shall fit a series of regressions predicting cognitive test scores of three- and four-year-old children given characteristics of their mothers, using data from a survey of adult American women and their children (a subsample from the National Longitudinal Survey of Youth). Using two predictors -- a binary indicator for whether the mother has a high-school degree (`mom_hs`) and the mother's score on an IQ test (`mom_iq`) -- we will fit four contending models. The first two models will each use just one of the predictors, the third will use both, and the fourth will also include a term for the interaction of the two predictors. For these models we'll use the default weakly informative priors for `stan_glm`, which are currently set to `normal(0,10)` for the intercept and `normal(0,5)` for the other regression coefficients. For an overview of the many other available prior distributions see `help("prior", package = "rstanarm")`. ```{r, continuous-kidiq-mcmc,results="hide"} library(rstanarm) data(kidiq) post1 <- stan_glm(kid_score ~ mom_hs, data = kidiq, family = gaussian(link = "identity"), seed = 12345) post2 <- update(post1, formula = . ~ mom_iq) post3 <- update(post1, formula = . ~ mom_hs + mom_iq) (post4 <- update(post1, formula = . ~ mom_hs * mom_iq)) ``` ```{r, continuous-kidiq-print, echo=FALSE} print(post4) ``` Following Gelman and Hill's example, we make some plots overlaying the estimated regression lines on the data. ```{r, continuous-kidiq-plot1a} base <- ggplot(kidiq, aes(x = mom_hs, y = kid_score)) + geom_point(size = 1, position = position_jitter(height = 0.05, width = 0.1)) + scale_x_continuous(breaks = c(0,1), labels = c("No HS", "HS")) base + geom_abline(intercept = coef(post1)[1], slope = coef(post1)[2], color = "skyblue4", size = 1) ``` There several ways we could add the uncertainty in our estimates to the plot. One way is to also plot the estimated regression line at each draw from the posterior distribution. To do this we can extract the posterior draws from the fitted model object using the `as.matrix` or `as.data.frame` methods: ```{r, continuous-kidiq-plot1b} draws <- as.data.frame(post1) colnames(draws)[1:2] <- c("a", "b") base + geom_abline(data = draws, aes(intercept = a, slope = b), color = "skyblue", size = 0.2, alpha = 0.25) + geom_abline(intercept = coef(post1)[1], slope = coef(post1)[2], color = "skyblue4", size = 1) ``` For the second model we can make the same plot but the x-axis will show the continuous predictor `mom_iq`: ```{r, continuous-kidiq-plot2} draws <- as.data.frame(as.matrix(post2)) colnames(draws)[1:2] <- c("a", "b") ggplot(kidiq, aes(x = mom_iq, y = kid_score)) + geom_point(size = 1) + geom_abline(data = draws, aes(intercept = a, slope = b), color = "skyblue", size = 0.2, alpha = 0.25) + geom_abline(intercept = coef(post2)[1], slope = coef(post2)[2], color = "skyblue4", size = 1) ``` For the third and fourth models, each of which uses both predictors, we can plot the continuous `mom_iq` on the x-axis and use color to indicate which points correspond to the different subpopulations defined by `mom_hs`. We also now plot two regression lines, one for each subpopulation: ```{r, continuous-kidiq-plot3} reg0 <- function(x, ests) cbind(1, 0, x) %*% ests reg1 <- function(x, ests) cbind(1, 1, x) %*% ests args <- list(ests = coef(post3)) kidiq$clr <- factor(kidiq$mom_hs, labels = c("No HS", "HS")) lgnd <- guide_legend(title = NULL) base2 <- ggplot(kidiq, aes(x = mom_iq, fill = relevel(clr, ref = "HS"))) + geom_point(aes(y = kid_score), shape = 21, stroke = .2, size = 1) + guides(color = lgnd, fill = lgnd) + theme(legend.position = "right") base2 + stat_function(fun = reg0, args = args, aes(color = "No HS"), size = 1.5) + stat_function(fun = reg1, args = args, aes(color = "HS"), size = 1.5) ``` ```{r, continuous-kidiq-plot4} reg0 <- function(x, ests) cbind(1, 0, x, 0 * x) %*% ests reg1 <- function(x, ests) cbind(1, 1, x, 1 * x) %*% ests args <- list(ests = coef(post4)) base2 + stat_function(fun = reg0, args = args, aes(color = "No HS"), size = 1.5) + stat_function(fun = reg1, args = args, aes(color = "HS"), size = 1.5) ``` ## Model comparison One way we can compare the four contending models is to use an approximation to Leave-One-Out (LOO) cross-validation, which is implemented by the `loo` function in the __loo__ package: ```{r, continuous-kidiq-loo} # Compare them with loo loo1 <- loo(post1, cores = 2) loo2 <- loo(post2, cores = 2) loo3 <- loo(post3, cores = 2) loo4 <- loo(post4, cores = 2) (comp <- loo_compare(loo1, loo2, loo3, loo4)) ``` In this case the fourth model is preferred as it has the highest expected log predicted density (`elpd_loo`) or, equivalently, the lowest value of the LOO Information Criterion (`looic`). The fourth model is preferred by a lot over the first model ```{r, continuous-kidiq-loo-2} loo_compare(loo1, loo4) ``` because the difference in `elpd` is so much larger than the standard error. However, the preference of the fourth model over the others isn't as strong: ```{r, continuous-kidiq-loo-3} loo_compare(loo3, loo4) loo_compare(loo2, loo4) ``` ## The posterior predictive distribution The posterior predictive distribution is the distribution of the outcome implied by the model after using the observed data to update our beliefs about the unknown parameters. When simulating observations from the posterior predictive distribution we use the notation $y^{\rm rep}$ (for _replicate_) when we use the same observations of $X$ that were used to estimate the model parameters. When $X$ contains new observations we use the notation $\tilde{y}$ to refer to the posterior predictive simulations. Simulating data from the posterior predictive distribution using the observed predictors is useful for checking the fit of the model. Drawing from the posterior predictive distribution at interesting values of the predictors also lets us visualize how a manipulation of a predictor affects (a function of) the outcome(s). ### Graphical posterior predictive checks The `pp_check` function generates a variety of plots comparing the observed outcome $y$ to simulated datasets $y^{\rm rep}$ from the posterior predictive distribution using the same observations of the predictors $X$ as we used to fit the model. He we show a few of the possible displays. The documentation at `help("pp_check.stanreg", package = "rstanarm")` has details on all of the `pp_check` options. First we'll look at a plot directly comparing the distributions of $y$ and $y^{\rm rep}$. The following call to `pp_check` will create a plot juxtaposing the histogram of $y$ and histograms of five $y^{\rm rep}$ datasets: ```{r, continuous-kidiq-pp_check1} pp_check(post4, plotfun = "hist", nreps = 5) ``` The idea is that if the model is a good fit to the data we should be able to generate data $y^{\rm rep}$ from the posterior predictive distribution that looks a lot like the observed data $y$. That is, given $y$, the $y^{\rm rep}$ we generate should be plausible. Another useful plot we can make using `pp_check` shows the distribution of a test quantity $T(y^{\rm rep})$ compared to $T(y)$, the value of the quantity in the observed data. When the argument `plotfun = "stat"` is specified, `pp_check` will simulate $S$ datasets $y_1^{\rm rep}, \dots, y_S^{\rm rep}$, each containing $N$ observations. Here $S$ is the size of the posterior sample (the number of MCMC draws from the posterior distribution of the model parameters) and $N$ is the length of $y$. We can then check if $T(y)$ is consistent with the distribution of $\left(T(y_1^{\rm yep}), \dots, T(y_S^{\rm yep})\right)$. In the plot below we see that the mean of the observations is plausible when compared to the distribution of the means of the $S$ $y^{\rm rep}$ datasets: ```{r, continuous-kidiq-pp_check2} pp_check(post4, plotfun = "stat", stat = "mean") ``` Using `plotfun="stat_2d"` we can also specify two test quantities and look at a scatterplot: ```{r, continuous-kidiq-pp_check3} pp_check(post4, plotfun = "stat_2d", stat = c("mean", "sd")) ``` ### Generating predictions The `posterior_predict` function is used to generate replicated data $y^{\rm rep}$ or predictions for future observations $\tilde{y}$. Here we show how to use `posterior_predict` to generate predictions of the outcome `kid_score` for a range of different values of `mom_iq` and for both subpopulations defined by `mom_hs`. ```{r, continuous-kidiq-posterior_predict} IQ_SEQ <- seq(from = 75, to = 135, by = 5) y_nohs <- posterior_predict(post4, newdata = data.frame(mom_hs = 0, mom_iq = IQ_SEQ)) y_hs <- posterior_predict(post4, newdata = data.frame(mom_hs = 1, mom_iq = IQ_SEQ)) dim(y_hs) ``` We now have two matrices, `y_nohs` and `y_hs`. Each matrix has as many columns as there are values of `IQ_SEQ` and as many rows as the size of the posterior sample. One way to show the predictors is to plot the predictions for the two groups of kids side by side: ```{r, continuous-kidiq-plot-predict, fig.width=7} par(mfrow = c(1:2), mar = c(5,4,2,1)) boxplot(y_hs, axes = FALSE, outline = FALSE, ylim = c(10,170), xlab = "Mom IQ", ylab = "Predicted Kid IQ", main = "Mom HS") axis(1, at = 1:ncol(y_hs), labels = IQ_SEQ, las = 3) axis(2, las = 1) boxplot(y_nohs, outline = FALSE, col = "red", axes = FALSE, ylim = c(10,170), xlab = "Mom IQ", ylab = NULL, main = "Mom No HS") axis(1, at = 1:ncol(y_hs), labels = IQ_SEQ, las = 3) ``` ```{r, continuous-kidiq-validation, eval=FALSE, include=FALSE} # # External Validation # source(paste0(ROOT, "ARM/Ch.3/kids_before1987.data.R"), # local = kidiq, verbose = FALSE) # source(paste0(ROOT, "ARM/Ch.3/kids_after1987.data.R"), # local = kidiq, verbose = FALSE) # post5 <- stan_lm(ppvt ~ hs + afqt, data = kidiq, # prior = R2(location = 0.25, what = "mean"), seed = SEED) # y_ev <- posterior_predict(post5, newdata = # data.frame(hs = kidiq$hs_ev, afqt = kidiq$afqt_ev)) # par(mfrow = c(1,1)) # hist(-sweep(y_ev, 2, STATS = kidiq$ppvt_ev, FUN = "-"), prob = TRUE, # xlab = "Predictive Errors in ppvt", main = "", las = 2) ``` # Gamma Regression Example Gamma regression is often used when the response variable is continuous and positive, and the _coefficient of variation_ (rather than the variance) is constant. We'll use one of the standard examples of Gamma regression, which is taken from McCullagh & Nelder (1989). This example is also given in the documentation for R's `glm` function. The outcome of interest is the clotting time of blood (in seconds) for "normal plasma diluted to nine different percentage concentrations with prothrombin-free plasma; clotting was induced by two lots of thromboplastin" (p. 300). The help page for R's `glm` function presents the example as follows: ```{r, continuous-clotting-mle, results='hide'} clotting <- data.frame( u = c(5,10,15,20,30,40,60,80,100), lot1 = c(118,58,42,35,27,25,21,19,18), lot2 = c(69,35,26,21,18,16,13,12,12)) summary(glm(lot1 ~ log(u), data = clotting, family = Gamma)) summary(glm(lot2 ~ log(u), data = clotting, family = Gamma)) ``` To fit the analogous Bayesian models we can simply substitute `stan_glm` for `glm` above. However, instead of fitting separate models we can also reshape the data slightly and fit a model interacting lot with plasma concentration: ```{r, continuous-clotting-mcmc, results="hide"} clotting2 <- with(clotting, data.frame( log_plasma = rep(log(u), 2), clot_time = c(lot1, lot2), lot_id = factor(rep(c(1,2), each = length(u))) )) fit <- stan_glm(clot_time ~ log_plasma * lot_id, data = clotting2, family = Gamma, prior_intercept = normal(0, 1, autoscale = TRUE), prior = normal(0, 1, autoscale = TRUE), seed = 12345) ``` ```{r} print(fit, digits = 3) ``` In the output above, the estimate reported for `shape` is for the shape parameter of the Gamma distribution. The _reciprocal_ of the shape parameter can be interpreted similarly to what `summary.glm` refers to as the dispersion parameter. # References Gelman, A. and Hill, J. (2007). _Data Analysis Using Regression and Multilevel/Hierarchical Models._ Cambridge University Press, Cambridge, UK. McCullagh, P. and Nelder, J. A. (1989). _Generalized Linear Models._ Chapman and Hall/CRC Press, New York. rstanarm/inst/doc/glmer.Rmd0000644000176200001440000004771714370470372015424 0ustar liggesusers--- title: "Estimating Generalized (Non-)Linear Models with Group-Specific Terms with rstanarm" author: "Jonah Gabry and Ben Goodrich" date: "`r Sys.Date()`" output: html_vignette: toc: yes --- ```{r, child="children/SETTINGS-knitr.txt"} ``` ```{r, child="children/SETTINGS-gg.txt"} ``` # Introduction This vignette explains how to use the `stan_lmer`, `stan_glmer`, `stan_nlmer`, and `stan_gamm4` functions in the __rstanarm__ package to estimate linear and generalized (non-)linear models with parameters that may vary across groups. Before continuing, we recommend reading the vignettes (navigate up one level) for the various ways to use the `stan_glm` function. The _Hierarchical Partial Pooling_ vignette also has examples of both `stan_glm` and `stan_glmer`. # GLMs with group-specific terms Models with this structure are refered to by many names: multilevel models, (generalized) linear mixed (effects) models (GLMM), hierarchical (generalized) linear models, etc. In the simplest case, the model for an outcome can be written as $$\mathbf{y} = \alpha + \mathbf{X} \boldsymbol{\beta} + \mathbf{Z} \mathbf{b} + \boldsymbol{\epsilon},$$ where $\mathbf{X}$ is a matrix predictors that is analogous to that in Generalized Linear Models and $\mathbf{Z}$ is a matrix that encodes deviations in the predictors across specified groups. The terminology for the unknowns in the model is diverse. To frequentists, the error term consists of $\mathbf{Z}\mathbf{b} + \boldsymbol{\epsilon}$ and the observations within each group are _not_ independent conditional on $\mathbf{X}$ alone. Since, $\mathbf{b}$ is considered part of the random error term, frequentists allow themselves to make distributional assumptions about $\mathbf{b}$, invariably that it is distributed multivariate normal with mean vector zero and structured covariance matrix $\boldsymbol{\Sigma}$. If $\epsilon_i$ is also distributed (univariate) normal with mean zero and standard deviation $\sigma$, then $\mathbf{b}$ can be integrated out, which implies $$\mathbf{y} \thicksim \mathcal{N}\left(\alpha + \mathbf{X}\boldsymbol{\beta}, \sigma^2 \mathbf{I}+\mathbf{Z}^\top \boldsymbol{\Sigma} \mathbf{Z} \right),$$ and it is possible to maximize this likelihood function by choosing proposals for the parameters $\alpha$, $\boldsymbol{\beta}$, and (the free elements of) $\boldsymbol{\Sigma}$. Consequently, frequentists refer to $\mathbf{b}$ as the _random effects_ because they capture the random deviation in the effects of predictors from one group to the next. In contradistinction, $\alpha$ and $\boldsymbol{\beta}$ are referred to as _fixed effects_ because they are the same for all groups. Moreover, $\alpha$ and $\boldsymbol{\beta}$ persist in the model in hypothetical replications of the analysis that draw the members of the groups afresh every time, whereas $\mathbf{b}$ would differ from one replication to the next. Consequently, $\mathbf{b}$ is not a "parameter" to be estimated because parameters are unknown constants that are fixed in repeated sampling. Bayesians condition on the data in-hand without reference to repeated sampling and describe their _beliefs_ about the unknowns with prior distributions before observing the data. Thus, the likelihood in a simple hierarchical model in __rstarnarm__ is $$\mathbf{y} \thicksim \mathcal{N}\left(\alpha + \mathbf{X}\boldsymbol{\beta} + \mathbf{Z}\mathbf{b}, \sigma^2 \mathbf{I}\right)$$ and the observations are independent conditional on $\mathbf{X}$ and $\mathbf{Z}$. In this formulation, there are * intercept(s) and coefficients that are _common across groups_ * deviations in the intercept(s) and / or coefficients that _vary across groups_ Bayesians are compelled to state their prior beliefs about all unknowns and the usual assumption (which is maintained in __rstanarm__) is that $\mathbf{b} \thicksim \mathcal{N}\left(\mathbf{0},\boldsymbol{\Sigma}\right),$ but it is then necessary to state prior beliefs about $\boldsymbol{\Sigma}$, in addition to $\alpha$, $\boldsymbol{\beta}$, and $\sigma$. One of the many challenges of fitting models to data comprising multiple groupings is confronting the tradeoff between validity and precision. An analysis that disregards between-group heterogeneity can yield parameter estimates that are wrong if there is between-group heterogeneity but would be relatively precise if there actually were no between-group heterogeneity. Group-by-group analyses, on the other hand, are valid but produces estimates that are relatively imprecise. While complete pooling or no pooling of data across groups is sometimes called for, models that ignore the grouping structures in the data tend to underfit or overfit (Gelman et al.,2013). Hierarchical modeling provides a compromise by allowing parameters to vary by group at lower levels of the hierarchy while estimating common parameters at higher levels. Inference for each group-level parameter is informed not only by the group-specific information contained in the data but also by the data for other groups as well. This is commonly referred to as _borrowing strength_ or _shrinkage_. In __rstanarm__, these models can be estimated using the `stan_lmer` and `stan_glmer` functions, which are similar in syntax to the `lmer` and `glmer` functions in the __lme4__ package. However, rather than performing (restricted) maximum likelihood (RE)ML estimation, Bayesian estimation is performed via MCMC. The Bayesian model adds independent prior distributions on the regression coefficients (in the same way as `stan_glm`) as well as priors on the terms of a decomposition of the covariance matrices of the group-specific parameters. These priors are discussed in greater detail below. # Priors on covariance matrices In this section we discuss a flexible family of prior distributions for the unknown covariance matrices of the group-specific coefficients. ### Overview For each group, we assume the vector of varying slopes and intercepts is a zero-mean random vector following a multivariate Gaussian distribution with an unknown covariance matrix to be estimated. Unfortunately, expressing prior information about a covariance matrix is not intuitive and can also be computationally challenging. When the covariance matrix is not $1\times 1$, it is often both much more intuitive and efficient to work instead with the __correlation__ matrix and variances. When the covariance matrix is $1\times 1$, we still denote it as $\boldsymbol{\Sigma}$ but most of the details in this section do not apply. The variances are in turn decomposed into the product of a simplex vector (probability vector) and the trace of the implied covariance matrix, which is defined as the sum of its diagonal elements. Finally, this trace is set equal to the product of the order of the matrix and the square of a scale parameter. This implied prior on a covariance matrix is represented by the `decov` (short for decomposition of covariance) function in __rstanarm__. ### Details Using the decomposition described above, the prior used for a correlation matrix $\Omega$ is called the LKJ distribution and has a probability density function proportional to the determinant of the correlation matrix raised to a power of $\zeta$ minus one: $$ f(\Omega | \zeta) \propto \text{det}(\Omega)^{\zeta - 1}, \quad \zeta > 0. $$ The shape of this prior depends on the value of the regularization parameter, $\zeta$ in the following ways: * If $\zeta = 1$ (the default), then the LKJ prior is jointly uniform over all correlation matrices of the same dimension as $\Omega$. * If $\zeta > 1$, then the mode of the distribution is the identity matrix. The larger the value of $\zeta$ the more sharply peaked the density is at the identity matrix. * If $0 < \zeta < 1$, then the density has a trough at the identity matrix. The $J \times J$ covariance matrix $\Sigma$ of a random vector $\boldsymbol{\theta} = (\theta_1, \dots, \theta_J)$ has diagonal entries ${\Sigma}_{jj} = \sigma^2_j = \text{var}(\theta_j)$. Therefore, the trace of the covariance matrix is equal to the sum of the variances. We set the trace equal to the product of the order of the covariance matrix and the square of a positive scale parameter $\tau$: $$\text{tr}(\Sigma) = \sum_{j=1}^{J} \Sigma_{jj} = J\tau^2.$$ The vector of variances is set equal to the product of a simplex vector $\boldsymbol{\pi}$ --- which is non-negative and sums to 1 --- and the scalar trace: $J \tau^2 \boldsymbol{\pi}$. Each element $\pi_j$ of $\boldsymbol{\pi}$ then represents the proportion of the trace (total variance) attributable to the corresponding variable $\theta_j$. For the simplex vector $\boldsymbol{\pi}$ we use a symmetric Dirichlet prior, which has a single _concentration_ parameter $\gamma > 0$: * If $\gamma = 1$ (the default), then the prior is jointly uniform over the space of simplex vectors with $J$ elements. * If $\gamma > 1$, then the prior mode corresponds to all variables having the same (proportion of total) variance, which can be used to ensure that the posterior variances are not zero. As the concentration parameter approaches infinity, this mode becomes more pronounced. * If $0 < \gamma < 1$, then the variances are more polarized. If all the elements of $\boldsymbol{\theta}$ were multiplied by the same number $k$, the trace of their covariance matrix would increase by a factor of $k^2$. For this reason, it is sensible to use a scale-invariant prior for $\tau$. We choose a Gamma distribution, with shape and scale parameters both set to $1$ by default, implying a unit-exponential distribution. Users can set the shape hyperparameter to some value greater than one to ensure that the posterior trace is not zero. In the case where $\boldsymbol{\Sigma}$ is $1\times 1$, $\tau$ is the cross-group standard deviation in the parameters and its square is the variance (so the Gamma prior with its shape and scale directly applies to the cross-group standard deviation in the parameters). # Comparison with __lme4__ There are several advantages to estimating these models using __rstanarm__ rather than the __lme4__ package. There are also a few drawbacks. In this section we briefly discuss what we find to be the two most important advantages as well as an important disadvantage. ### Advantage: better uncertainty estimates While __lme4__ uses (restricted) maximum likelihood (RE)ML estimation, __rstanarm__ enables full Bayesian inference via MCMC to be performed. It is well known that (RE)ML tends to underestimate uncertainties because it relies on point estimates of hyperparameters. Full Bayes, on the other hand, propagates the uncertainty in the hyperparameters throughout all levels of the model and provides more appropriate estimates of uncertainty for models that consist of a mix of common and group-specific parameters. ### Advantage: incorporate prior information The `stan_glmer` and `stan_lmer` functions allow the user to specify prior distributions over the regression coefficients as well as any unknown covariance matrices. There are various reasons to specify priors, from helping to stabilize computation to incorporating important information into an analysis that does not enter through the data. ### Disadvantage: speed The benefits of full Bayesian inference (via MCMC) come with a cost. Fitting models with (RE)ML will tend to be much faster than fitting a similar model using MCMC. Speed comparable to __lme4__ can be obtained with __rstanarm__ using approximate Bayesian inference via the mean-field and full-rank variational algorithms (see `help("rstanarm-package", "rstanarm")` for details). These algorithms can be useful to narrow the set of candidate models in large problems, but MCMC should always be used for final statistical inference. # Relationship to `glmer` In the __lme4__ package, there is a fundamental distinction between the way that Linear Mixed Models and Generalized Linear Mixed Models are estimated. In Linear Mixed Models, $\mathbf{b}$ can be integrated out analytically, leaving a likelihood function that can be maximized over proposals for the parameters. To estimate a Linear Mixed Model, one can call the `lmer` function. Generalized Linear Mixed Models are appropriate when the conditional mean of the outcome is determined by an inverse link function, $\boldsymbol{\mu} = g\left(\alpha + \mathbf{X} \boldsymbol{\beta} + \mathbf{Z}\mathbf{b}\right)$. If $g\left(\cdot\right)$ is not the identity function, then it is not possible to integrate out $\mathbf{b}$ analytically and numerical integration must be used. To estimate a Generalized Linear Mixed Model, one can call the `glmer` function and specify the `family` argument. In the __rstanarm__ package, there is no such fundamental distinction; in fact `stan_lmer` simply calls `stan_glmer` with `family = gaussian(link = "identity")`. Bayesians do not (have to) integrate $\mathbf{b}$ out of the likelihood and if $\mathbf{b}$ is not of interest, then the margins of its posterior distribution can simply be ignored. # Relationship to `gamm4` The __rstanarm__ package includes a `stan_gamm4` function that is similar to the `gamm4` function in the __gamm4__ package, which is in turn similar to the `gamm` function in the __mgcv__ package. The substring `gamm` stands for Generalized Additive Mixed Models, which differ from Generalized Additive Models (GAMs) due to the presence of group-specific terms that can be specified with the syntax of __lme4__. Both GAMs and GAMMs include nonlinear functions of (non-categorical) predictors called "smooths". In the example below, so-called "thin-plate splines" are used to model counts of roaches where we might fear that the number of roaches in the current period is an exponentially increasing function of the number of roaches in the previous period. Unlike `stan_glmer`, in `stan_gamm4` it is necessary to specify group-specific terms as a one-sided formula that is passed to the `random` argument as in the `lme` function in the __nlme__ package. ```{r, results = "hide"} library(rstanarm) data(roaches) roaches$roach1 <- roaches$roach1 / 100 roaches$log_exposure2 <- log(roaches$exposure2) post <- stan_gamm4( y ~ s(roach1) + treatment + log_exposure2, random = ~(1 | senior), data = roaches, family = neg_binomial_2, QR = TRUE, cores = 2, chains = 2, adapt_delta = 0.99, seed = 12345 ) ``` ```{r} plot_nonlinear(post) ``` Here we see that the relationship between past and present roaches is estimated to be nonlinear. For a small number of past roaches, the function is steep and then it appears to flatten out, although we become highly uncertain about the function in the rare cases where the number of past roaches is large. # Relationship to `nlmer` The `stan_gamm4` function allows designated predictors to have a nonlinear effect on what would otherwise be called the "linear" predictor in Generalized Linear Models. The `stan_nlmer` function is similar to the `nlmer` function in the __lme4__ package, and essentially allows a wider range of nonlinear functions that relate the linear predictor to the conditional expectation of a Gaussian outcome. To estimate an example model with the `nlmer` function in the __lme4__ package, we start by rescaling the outcome and main predictor(s) by a constant ```{r} data("Orange", package = "datasets") Orange$age <- Orange$age / 100 Orange$circumference <- Orange$circumference / 100 ``` Although doing so has no substantive effect on the inferences obtained, it is numerically much easier for Stan and for __lme4__ to work with variables whose units are such that the estimated parameters tend to be single-digit numbers that are not too close to zero. The `nlmer` function requires that the user pass starting values to the ironically-named self-starting non-linear function: ```{r, warning=TRUE} startvec <- c(Asym = 2, xmid = 7.25, scal = 3.5) library(lme4) nm1 <- nlmer(circumference ~ SSlogis(age, Asym, xmid, scal) ~ Asym|Tree, data = Orange, start = startvec) summary(nm1) ``` Note the warning messages indicating difficulty estimating the variance-covariance matrix. Although __lme4__ has a fallback mechanism, the need to utilize it suggests that the sample is too small to sustain the asymptotic assumptions underlying the maximum likelihood estimator. In the above example, we use the `SSlogis` function, which is a lot like the logistic CDF, but with an additional `Asym` argument that need not be one and indicates what value the function approaches for large values of the first argument. In this case, we can interpret the asymptote as the maximum possible circumference for an orange. However, this asymptote is allowed to vary from tree to tree using the `Asym | Tree` syntax, which reflects an assumption that the asymptote for a randomly-selected tree deviates from the asymptote for the population of orange trees in a Gaussian fashion with mean zero and an unknown standard deviation. The `nlmer` function supports user-defined non-linear functions, whereas the `stan_nlmer` function only supports the pre-defined non-linear functions starting with `SS` in the __stats__ package, which are ```{r, echo = FALSE} grep("^SS[[:lower:]]+", ls("package:stats"), value = TRUE) ``` To fit essentially the same model using Stan's implementation of MCMC, we add a `stan_` prefix ```{r, results = "hide"} post1 <- stan_nlmer(circumference ~ SSlogis(age, Asym, xmid, scal) ~ Asym|Tree, data = Orange, cores = 2, seed = 12345, init_r = 0.5) ``` ```{r} post1 ``` In `stan_nlmer`, it is not necessary to supply starting values; however, in this case it was necessary to specify the `init_r` argument so that the randomly-chosen starting values were not more than $0.5$ away from zero (in the unconstrained parameter space). The default value of $2.0$ produced suboptimal results. As can be seen, the posterior medians and estimated standard deviations in the MCMC case are quite similar to the maximum likelihood estimates and estimated standard errors. However, `stan_nlmer` produces uncertainty estimates for the tree-specific deviations in the asymptote, which are considerable. ```{r} plot(post1, regex_pars = "^[b]") ``` As can be seen, the age of the tree has a non-linear effect on the predicted circumference of the tree (here for a out-of-sample tree): ```{r} nd <- data.frame(age = 1:20, Tree = factor("6", levels = 1:6)) PPD <- posterior_predict(post1, newdata = nd) PPD_df <- data.frame(age = as.factor(rep(1:20, each = nrow(PPD))), circumference = c(PPD)) ggplot(PPD_df, aes(age, circumference)) + geom_boxplot() ``` If we were pharmacological, we could evaluate drug concentration using a first-order compartment model, such as ```{r, eval = FALSE} post3 <- stan_nlmer(conc ~ SSfol(Dose, Time, lKe, lKa, lCl) ~ (0 + lKe + lKa + lCl | Subject), data = Theoph, cores = 2, seed = 12345, QR = TRUE, init_r = 0.25, adapt_delta = 0.999) pairs(post3, regex_pars = "^l") pairs(post3, regex_pars = "igma") ``` However, in this case the posterior distribution is bimodal Thus, you should always be running many chains when using Stan, especially `stan_nlmer`. # Conclusion There are model fitting functions in the **rstanarm** package that can do essentially all of what can be done in the **lme4** and **gamm4** packages --- in the sense that they can fit models with multilevel structure and / or nonlinear relationships --- and propagate the uncertainty in the parameter estimates to the predictions and other functions of interest. The documentation of **lme4** and **gamm4** has various warnings that acknowledge that the estimated standard errors, confidence intervals, etc. are not entirely correct, even from a frequentist perspective. A frequentist point estimate would also completely miss the second mode in the last example with `stan_nlmer`. Thus, there is considerable reason to prefer the **rstanarm** variants of these functions for regression modeling. The only disadvantage is the execution time required to produce an answer that properly captures the uncertainty in the estimates of complicated models such as these. rstanarm/inst/doc/betareg.Rmd0000644000176200001440000002252113722762571015716 0ustar liggesusers--- title: "Modeling Rates/Proportions using Beta Regression with rstanarm" author: "Imad Ali, Jonah Gabry and Ben Goodrich" date: "`r Sys.Date()`" output: html_vignette: toc: yes --- ```{r, child="children/SETTINGS-knitr.txt"} ``` ```{r, child="children/SETTINGS-gg.txt"} ``` # Introduction This vignette explains how to model continuous outcomes on the open unit interval using the `stan_betareg` function in the __rstanarm__ package. ```{r, child="children/four_steps.txt"} ``` Steps 3 and 4 are covered in more depth by the vignette entitled ["How to Use the __rstanarm__ Package"](rstanarm.html). This vignette focuses on Step 1 when the likelihood is the product of beta distributions. # Likelihood Beta regression uses the beta distribution as the likelihood for the data, $$ f(y_i | a, b) = \frac{y_i^{(a-1)}(1-y_i)^{(b-1)}}{B(a,b)} $$ where $B(\cdot)$ is the beta function. The shape parameters for the distribution are $a$ and $b$ and enter into the model according to the following transformations, $$ a = \mu\cdot\phi \\ b = (1-\mu)\cdot\phi $$ Let $g_1(\cdot)$ be some link function. Then, in the specification of the shape parameters above, $\mu = g_1^{-1}(\mathbf{X}\boldsymbol{\beta})$, where $\boldsymbol{X}$ is a $N\times K$ dimensional matrix of predictors, and $\boldsymbol{\beta}$ is a $K$ dimensional vector of parameters associated with each predictor. In the simplest case (with only one set of regressors), $\phi$ is a scalar parameter. Alternatively, it is possible to model $\phi$ using a second set of regressors $\mathbf{Z}$. In this context let $g_2(\cdot)$ be some link function that is not necessarily identical to $g_1(\cdot)$. Then $\phi = g_2^{-1}(\mathbf{Z}\boldsymbol{\gamma})$, where $\boldsymbol{\gamma}$ is a $J$ dimensional vector of parameters associated with the $N\times J$ dimensional matrix of predictors $\mathbf{Z}$. After substituting the shape parameter values in, the likelihood used in beta regression takes the following form, $$ f(y_i | \mu, \phi) = \frac{y_i^{(\mu\phi-1)}(1-y_i)^{((1-\mu)\phi-1)}}{B(\mu\phi,(1-\mu)\phi)} $$ # Priors A full Bayesian analysis requires specifying prior distributions $f(\boldsymbol{\beta})$ and $f(\phi)$ for the vector of regression coefficients and $\phi$. When using `stan_betareg`, these distributions can be set using the `prior_intercept`, `prior`, and `prior_phi` arguments. The `stan_betareg` function supports a variety of prior distributions, which are explained in the __rstanarm__ documentation (`help(priors, package = 'rstanarm')`). When modeling $\phi$ with a linear predictor a full Bayesian analysis requires specifying the prior distributions $f(\boldsymbol{\beta})$ and $f(\boldsymbol{\gamma})$. In `stan_betareg` the prior distributions on $\boldsymbol{\gamma}$ can be set using the `prior_intercept_z` and `prior_z` arguments. As an example, suppose we have $K$ predictors and believe --- prior to seeing the data --- that $\beta_1, \dots, \beta_K$ and $\phi$ are as likely to be positive as they are to be negative, but are highly unlikely to be far from zero. These beliefs can be represented by normal distributions with mean zero and a small scale (standard deviation). To give $\phi$ and each of the $\beta$s this prior (with a scale of 1, say), in the call to `stan_betareg` we would include the arguments `prior_intercept = normal(0,1)`, `prior = normal(0,1)`, and `prior_phi = normal(0,1)`. If, on the other hand, we have less a priori confidence that the parameters will be close to zero then we could use a larger scale for the normal distribution and/or a distribution with heavier tails than the normal like the Student t distribution. __Step 1__ in the "How to Use the __rstanarm__ Package" vignette discusses one such example. After fitting the model we can use the `prior_summary` function to print information about the prior distributions used when fitting the model. # Posterior When using only a *single set of regressors*, the posterior distribution of $\boldsymbol{\beta}$ and $\phi$ is proportional to the product of the likelihood contributions, the $K$ priors on the $\beta_k$ parameters, and $\phi$, $$ f(\boldsymbol{\beta},\phi|\mathbf{y},\mathbf{X}) \propto \prod_{i=1}^N f(y_i | a, b) \times \prod_{k=1}^K f(\beta_k) \times f(\phi) $$ When using *two sets of regressors*, the posterior distribution of $\boldsymbol{\beta}$ and $\boldsymbol{\gamma}$ is proportional to the product of the likelihood contribution, the $K$ priors on the $\beta_k$ parameters, and the $J$ priors on the $\gamma_j$ parameters, $$ f(\boldsymbol{\beta},\boldsymbol{\gamma}|\mathbf{y},\mathbf{X}) \propto \prod_{i=1}^N f(y_i | a, b) \times \prod_{k=1}^K f(\beta_k) \times \prod_{j=1}^J f(\gamma_j) $$ # An Example Using Simulated Data In this example the outcome variable $\mathbf{y}$ is simulated in a way that warrants the use of beta regression. It is worth mentioning that the data generation process is quite convoluted, which is apparent in the identification of the likelihood above. The data simulated below uses the logistic link function on the first set of regressors and the log link function on the second set of regressors. ```{r simulated-data, fig.height=5} SEED <- 1234 set.seed(SEED) eta <- c(1, -0.2) gamma <- c(1.8, 0.4) N <- 200 x <- rnorm(N, 2, 2) z <- rnorm(N, 0, 2) mu <- binomial(link = logit)$linkinv(eta[1] + eta[2]*x) phi <- binomial(link = log)$linkinv(gamma[1] + gamma[2]*z) y <- rbeta(N, mu * phi, (1 - mu) * phi) dat <- data.frame(cbind(y, x, z)) hist(dat$y, col = "darkgrey", border = F, main = "Distribution of Outcome Variable", xlab = "y", breaks = 20, freq = F) ``` The model can be fit by calling `stan_betareg`, using the appropriate link functions. ```{r simulated-fit, results = "hide"} library(rstanarm) fit1 <- stan_betareg(y ~ x | z, data = dat, link = "logit", link.phi = "log", cores = 2, seed = 12345) fit2 <- stan_betareg(y ~ -1 + x , data = dat, link = "logit", link.phi = "log", cores = 2, seed = 12345) round(coef(fit1), 2) round(coef(fit2), 2) ``` ``` {r simulated-fit-print, echo=FALSE} round(coef(fit1), 2) round(coef(fit2), 2) ``` For clarity we can use `prior_summary` to print the information about the prior distributions used to fit the models. The priors used in `fit1` are provided below. ``` {r print-priors} prior_summary(fit1) ``` The usual posterior analyses are available in **rstanarm**. The plots below illustrate simulated values of the outcome variable. The incorrect model noticeably fails to capture the top of the distribution consistently in comparison to the true model. ```{r simulated-analysis, fig.height=5} library(ggplot2) library(bayesplot) bayesplot_grid( pp_check(fit1), pp_check(fit2), xlim = c(0,1), ylim = c(0,4), titles = c("True Model: y ~ x | z", "False Model: y ~ x - 1"), grid_args = list(ncol = 2) ) ``` We can also compare models by evaluating the expected log pointwise predictive density (`elpd`), which can be calculated using the `loo` method, which provides an interface for __rstanarm__ models to the functionality in the __loo__ package. ``` {r simulated-loo} loo1 <- loo(fit1) loo2 <- loo(fit2) loo_compare(loo1, loo2) ``` The difference in `elpd` is negative indicating that the expected predictive accuracy for the first model is higher. # An Example Using Gasoline Data In some applied contexts it may be necessary to work with an outcome variable that is a proportion. If the proportion is bound on the open unit interval then beta regression can be considered a reasonable estimation method. The `betareg` package provides a dataset on the proportion of crude oil converted to gasoline after distillation and fractionation. This variable is defined as yield. Below `stan_betareg` is used to model yield as a function of temperature, pressure, and the batch of conditions. ```{r, gas-fit, results="hide"} library(rstanarm) data("GasolineYield", package = "betareg") gas_fit1 <- stan_betareg(yield ~ temp + batch, data = GasolineYield, link = "logit", seed = 12345) gas_fit2 <- stan_betareg(yield ~ temp + batch | pressure, data = GasolineYield, link = "logit", seed = 12345) round(coef(gas_fit1), 2) round(coef(gas_fit2), 2) ``` ``` {r, gas-print, echo=FALSE} round(coef(gas_fit1), 2) round(coef(gas_fit2), 2) ``` The plots below illustrate simulated values of gasoline yield. While the first model accounts for variation in batch conditions its predictions looks somewhat uniform rather than resembling the peaked and right-skewed behavior of the true data. The second model does a somewhat better job at capturing the shape of the distribution, however its location is off as it is centered around 0.50 rather than 0.20. ```{r gas-analysis, fig.height=5} library(ggplot2) bayesplot_grid( pp_check(gas_fit1), pp_check(gas_fit2), xlim = c(0,1), ylim = c(0,5), titles = c("gas_fit1", "gas_fit2"), grid_args = list(ncol = 2) ) ``` ``` {r, gas-loo} gas_loo1 <- loo(gas_fit1) gas_loo2 <- loo(gas_fit2) loo_compare(gas_loo1, gas_loo2) ``` Evaluating the expected log predictive distribution using `loo` reveals that the second of the two models is preferred. # References Ferrari, SLP and Cribari-Neto, F (2004) "Beta Regression for Modeling Rates and Proportions". _Journal of Applied Statistics._ Vol. 31, No. 07, p799-815. rstanarm/inst/doc/mrp.R0000644000176200001440000005137214551551707014566 0ustar liggesusers## ---- SETTINGS-knitr, include=FALSE------------------------------------------- stopifnot(require(knitr)) opts_chunk$set( comment=NA, message = FALSE, warning = FALSE, eval = identical(Sys.getenv("NOT_CRAN"), "true"), dev = "png", dpi = 150, fig.asp = 0.618, fig.width = 5, out.width = "60%", fig.align = "center" ) ## ----packages-1, message=FALSE------------------------------------------------ library(rstanarm) library(ggplot2) library(bayesplot) theme_set(bayesplot::theme_default()) # options(mc.cores = 4) ## ----packages-2, eval=FALSE, message=FALSE------------------------------------ # library(dplyr) # library(tidyr) ## ---- include=FALSE, collapse=TRUE-------------------------------------------- simulate_mrp_data <- function(n) { J <- c(2, 3, 7, 3, 50) # male or not, eth, age, income level, state poststrat <- as.data.frame(array(NA, c(prod(J), length(J)+1))) # Columns of post-strat matrix, plus one for size colnames(poststrat) <- c("male", "eth", "age","income", "state",'N') count <- 0 for (i1 in 1:J[1]){ for (i2 in 1:J[2]){ for (i3 in 1:J[3]){ for (i4 in 1:J[4]){ for (i5 in 1:J[5]){ count <- count + 1 # Fill them in so we know what category we are referring to poststrat[count, 1:5] <- c(i1-1, i2, i3,i4,i5) } } } } } # Proportion in each sample in the population p_male <- c(0.52, 0.48) p_eth <- c(0.5, 0.2, 0.3) p_age <- c(0.2,.1,0.2,0.2, 0.10, 0.1, 0.1) p_income<-c(.50,.35,.15) p_state_tmp<-runif(50,10,20) p_state<-p_state_tmp/sum(p_state_tmp) poststrat$N<-0 for (j in 1:prod(J)){ poststrat$N[j] <- round(250e6 * p_male[poststrat[j,1]+1] * p_eth[poststrat[j,2]] * p_age[poststrat[j,3]]*p_income[poststrat[j,4]]*p_state[poststrat[j,5]]) #Adjust the N to be the number observed in each category in each group } # Now let's adjust for the probability of response p_response_baseline <- 0.01 p_response_male <- c(2, 0.8) / 2.8 p_response_eth <- c(1, 1.2, 2.5) / 4.7 p_response_age <- c(1, 0.4, 1, 1.5, 3, 5, 7) / 18.9 p_response_inc <- c(1, 0.9, 0.8) / 2.7 p_response_state <- rbeta(50, 1, 1) p_response_state <- p_response_state / sum(p_response_state) p_response <- rep(NA, prod(J)) for (j in 1:prod(J)) { p_response[j] <- p_response_baseline * p_response_male[poststrat[j, 1] + 1] * p_response_eth[poststrat[j, 2]] * p_response_age[poststrat[j, 3]] * p_response_inc[poststrat[j, 4]] * p_response_state[poststrat[j, 5]] } people <- sample(prod(J), n, replace = TRUE, prob = poststrat$N * p_response) ## For respondent i, people[i] is that person's poststrat cell, ## some number between 1 and 32 n_cell <- rep(NA, prod(J)) for (j in 1:prod(J)) { n_cell[j] <- sum(people == j) } coef_male <- c(0,-0.3) coef_eth <- c(0, 0.6, 0.9) coef_age <- c(0,-0.2,-0.3, 0.4, 0.5, 0.7, 0.8, 0.9) coef_income <- c(0,-0.2, 0.6) coef_state <- c(0, round(rnorm(49, 0, 1), 1)) coef_age_male <- t(cbind(c(0, .1, .23, .3, .43, .5, .6), c(0, -.1, -.23, -.5, -.43, -.5, -.6))) true_popn <- data.frame(poststrat[, 1:5], cat_pref = rep(NA, prod(J))) for (j in 1:prod(J)) { true_popn$cat_pref[j] <- plogis( coef_male[poststrat[j, 1] + 1] + coef_eth[poststrat[j, 2]] + coef_age[poststrat[j, 3]] + coef_income[poststrat[j, 4]] + coef_state[poststrat[j, 5]] + coef_age_male[poststrat[j, 1] + 1, poststrat[j, 3]] ) } #male or not, eth, age, income level, state, city y <- rbinom(n, 1, true_popn$cat_pref[people]) male <- poststrat[people, 1] eth <- poststrat[people, 2] age <- poststrat[people, 3] income <- poststrat[people, 4] state <- poststrat[people, 5] sample <- data.frame(cat_pref = y, male, age, eth, income, state, id = 1:length(people)) #Make all numeric: for (i in 1:ncol(poststrat)) { poststrat[, i] <- as.numeric(poststrat[, i]) } for (i in 1:ncol(true_popn)) { true_popn[, i] <- as.numeric(true_popn[, i]) } for (i in 1:ncol(sample)) { sample[, i] <- as.numeric(sample[, i]) } list( sample = sample, poststrat = poststrat, true_popn = true_popn ) } ## ----include=FALSE, eval=FALSE------------------------------------------------ # mrp_sim <- simulate_mrp_data(n=1200) # save(mrp_sim, file = "mrp-files/mrp_sim.rda", version = 2) ## ----eval=FALSE--------------------------------------------------------------- # mrp_sim <- simulate_mrp_data(n=1200) # str(mrp_sim) ## ---- echo=FALSE-------------------------------------------------------------- load("mrp-files/mrp_sim.rda") str(mrp_sim) ## ---- message=FALSE----------------------------------------------------------- sample <- mrp_sim[["sample"]] rbind(head(sample), tail(sample)) ## ----message=FALSE------------------------------------------------------------ poststrat <- mrp_sim[["poststrat"]] rbind(head(poststrat), tail(poststrat)) ## ----message=FALSE------------------------------------------------------------ true_popn <- mrp_sim[["true_popn"]] rbind(head(true_popn), tail(true_popn)) ## ----order-states------------------------------------------------------------- sample$state <- factor(sample$state, levels=1:50) sample$state <- with(sample, factor(state, levels=order(table(state)))) true_popn$state <- factor(true_popn$state,levels = levels(sample$state)) poststrat$state <- factor(poststrat$state,levels = levels(sample$state)) ## ----state-and-pop-data-for-plots, eval=FALSE, include=FALSE------------------ # # not evaluated to avoid tidyverse dependency # income_popn <- poststrat %>% # group_by(income) %>% # summarize(Num=sum(N)) %>% # mutate(PROP=Num/sum(Num),TYPE='Popn',VAR='Income',CAT=income) %>% # ungroup() # income_data <- sample %>% # group_by(income) %>% # summarise(Num=n()) %>% # mutate(PROP=Num/sum(Num),TYPE='Sample',VAR='Income',CAT=income) %>% # ungroup() # income<-rbind(income_data[,2:6],income_popn[,2:6]) # # age_popn <- poststrat%>% # group_by(age)%>% # summarize(Num=sum(N))%>% # mutate(PROP=Num/sum(Num),TYPE='Popn',VAR='Age',CAT=age)%>% # ungroup() # age_data <- sample%>% # group_by(age)%>% # summarise(Num=n())%>% # mutate(PROP=Num/sum(Num),TYPE='Sample',VAR='Age',CAT=age)%>% # ungroup() # age <- rbind(age_data[,2:6],age_popn[,2:6] ) # # eth_popn <- poststrat%>% # group_by(eth)%>% # summarize(Num=sum(N))%>% # mutate(PROP=Num/sum(Num),TYPE='Popn',VAR='Ethnicity',CAT=eth)%>% # ungroup() # eth_data <- sample%>% # group_by(eth)%>% # summarise(Num=n())%>% # mutate(PROP=Num/sum(Num),TYPE='Sample',VAR='Ethnicity',CAT=eth)%>% # ungroup() # eth<-rbind(eth_data[,2:6],eth_popn[,2:6]) # # male_popn <- poststrat%>% # group_by(male)%>% # summarize(Num=sum(N))%>% # mutate(PROP=Num/sum(Num),TYPE='Popn',VAR='Male',CAT=male)%>% # ungroup() # male_data <- sample%>% # group_by(male)%>% # summarise(Num=n())%>% # mutate(PROP=Num/sum(Num),TYPE='Sample',VAR='Male',CAT=male)%>% # ungroup() # male <- rbind(male_data[,2:6],male_popn[,2:6]) # # state_popn <- poststrat%>% # group_by(state)%>% # summarize(Num=sum(N))%>% # mutate(PROP=Num/sum(poststrat$N),TYPE='Popn',VAR='State',CAT=state)%>% # ungroup() # # state_plot_data <- sample%>% # group_by(state)%>% # summarise(Num=n())%>% # mutate(PROP=Num/nrow(sample),TYPE='Sample',VAR='State',CAT=state)%>% # ungroup() # # state_plot_data <- rbind(state_plot_data[,2:6],state_popn[,2:6]) # state_plot_data$TYPE <- factor(state_plot_data$TYPE, levels = c("Sample","Popn")) # # plot_data <- rbind(male,eth,age,income) # plot_data$TYPE <- factor(plot_data$TYPE, levels = c("Sample","Popn")) # # save(state_plot_data, file = "mrp-files/state_plot_data.rda", version = 2) # save(plot_data, file = "mrp-files/plot_data.rda", version = 2) ## ----plot-data, echo=FALSE, fig.height = 4, fig.width = 7, fig.align = "center"---- load("mrp-files/plot_data.rda") # created in previous chunk ggplot(data=plot_data, aes(x=as.factor(CAT), y=PROP, group=as.factor(TYPE), linetype=as.factor(TYPE))) + geom_point(stat="identity",colour='black')+ geom_line()+ facet_wrap( ~ VAR, scales = "free",nrow=1,ncol=5)+ theme_bw()+ scale_fill_manual(values=c('#1f78b4','#33a02c', '#e31a1c','#ff7f00','#8856a7'),guide=FALSE)+ scale_y_continuous(breaks=c(0,.25,.5,.75,1), labels=c('0%','25%',"50%","75%","100%"))+ scale_alpha_manual(values=c(1, .3))+ ylab('Proportion')+ labs(alpha='')+ theme(legend.position="bottom", axis.title.y=element_blank(), axis.title.x=element_blank(), legend.title=element_blank(), legend.text=element_text(size=10), axis.text=element_text(size=10), strip.text=element_text(size=10), strip.background = element_rect(fill='grey92')) load("mrp-files/state_plot_data.rda") # created in previous chunk ggplot(data=state_plot_data, aes(x=as.factor(CAT), y=PROP, group=as.factor(TYPE), linetype=as.factor(TYPE))) + geom_point(stat="identity",colour='black')+ geom_line()+ facet_wrap( ~ VAR)+ theme_bw()+ scale_fill_manual(values=c('#1f78b4','#33a02c', '#e31a1c','#ff7f00','#8856a7'),guide=FALSE)+ scale_y_continuous(breaks=c(0,.025,.05,1), labels=c('0%','2.5%',"5%","100%"),expand=c(0,0),limits=c(0,.06))+ scale_alpha_manual(values=c(1, .3))+ ylab('Proportion')+ labs(alpha='')+ theme(legend.position="bottom", axis.title.y=element_blank(), axis.title.x=element_blank(), legend.title=element_blank(), legend.text=element_text(size=10), axis.text.y=element_text(size=10), axis.text.x=element_text(size=8,angle=90), strip.text=element_text(size=10), strip.background = element_rect(fill='grey92')) ## ---- eval=FALSE, echo=FALSE-------------------------------------------------- # # not evaluated to avoid dependency on tidyverse # # #Summarise # summary_by_poststrat_var <- sample %>% # gather(variable,category,c("income","eth","age","male")) %>% # group_by(variable,category) %>% # #Wald confidence interval # summarise(y_mean=mean(cat_pref),y_sd=sqrt(mean(cat_pref)*(1-mean(cat_pref))/n())) %>% # ungroup() # summary_by_poststrat_var$variable <- as.factor(summary_by_poststrat_var$variable) # levels(summary_by_poststrat_var$variable) <- list('Age'='age','Ethnicity'='eth','Income'='income','Male'='male') # # save(summary_by_poststrat_var, file = "mrp-files/summary_by_poststrat_var.rda", # version = 2) ## ----plot-summary-by-poststrat-var, echo=FALSE, fig.height = 4, fig.width = 7, fig.align = "center"---- load("mrp-files/summary_by_poststrat_var.rda") # created in previous chunk ggplot(data=summary_by_poststrat_var, aes(x=as.factor(category), y=y_mean,group=1)) + geom_errorbar(aes(ymin=y_mean-y_sd, ymax=y_mean+y_sd), width=0)+ geom_line()+ geom_point()+ scale_colour_manual(values=c('#1f78b4','#33a02c','#e31a1c','#ff7f00', '#8856a7'))+theme_bw()+ facet_wrap(~variable,scales = "free_x",nrow=1,ncol=5)+ scale_y_continuous(breaks=c(.5,.75,1), labels=c("50%","75%", "100%"), limits=c(0.4-.4*.05,.9),expand = c(0,0))+ labs(x="",y="Cat preference")+ theme(legend.position="none", axis.title.y=element_text(size=10), axis.title.x=element_blank(), axis.text=element_text(size=10), strip.text=element_text(size=10), strip.background = element_rect(fill='grey92')) ## ----interaction-summary, eval=FALSE, echo=FALSE------------------------------ # # not evaluated to avoid dependency on tidyverse # # #Summarise # interaction <- sample %>% # gather(variable, category, c("age", "eth")) %>% # group_by(variable, category, male) %>% # summarise(y_mean = mean(cat_pref), # y_sd = sqrt(mean(cat_pref) * (1 - mean(cat_pref)) / n())) %>% # ungroup() # # #Tidy for nice facet labels # interaction$variable <- as.factor(interaction$variable) # levels(interaction$variable) <- list('Ethnicity' = 'eth', 'Age' = 'age') # save(interaction, file = "mrp-files/interaction.rda", version = 2) ## ----plot-interaction, echo=FALSE, fig.height = 4, fig.width = 7, fig.align = "center"---- load("mrp-files/interaction.rda") # created in previous chunk ggplot(data=interaction, aes(x=as.factor(category), y=y_mean, colour=as.factor(male),group=as.factor(male))) + geom_errorbar(aes(ymin=y_mean-y_sd, ymax=y_mean+y_sd),width=0 )+ geom_line(aes(x=as.factor(category), y=y_mean,colour=as.factor(male)))+ geom_point()+ facet_wrap(~variable,scales = "free_x",nrow=1,ncol=2)+ labs(x="",y="Cat preference",colour='Gender')+ scale_y_continuous(breaks=c(0,.25,.5,.75,1), labels=c("0%",'25%',"50%","75%", "100%"), limits=c(0,1),expand=c(0,0))+ scale_colour_manual(values=c('#4575b4','#d73027'))+theme_bw()+ theme(axis.title=element_text(size=10), axis.text=element_text(size=10), legend.position='none', strip.text=element_text(size=10), strip.background = element_rect(fill='grey92')) ## ---- eval=FALSE, echo=FALSE-------------------------------------------------- # # not evaluated to avoid dependency on tidyverse # # #Summarise by state # preference_by_state <- sample %>% # group_by(state) %>% # summarise(y_mean = mean(cat_pref), # y_sd = sqrt(mean(cat_pref) * (1 - mean(cat_pref)) / n())) %>% # ungroup() # # save(preference_by_state, file = "mrp-files/preference_by_state.rda", version = 2) ## ---- echo=FALSE, fig.height = 4, fig.width = 8, fig.align = "center"--------- load("mrp-files/preference_by_state.rda") compare <- ggplot(data=preference_by_state, aes(x=state, y=y_mean,group=1)) + geom_ribbon(aes(ymin=y_mean-y_sd,ymax=y_mean+y_sd,x=state),fill='lightgrey',alpha=.7)+ geom_line(aes(x=state, y=y_mean))+ geom_point()+ scale_y_continuous(breaks=c(0,.25,.5,.75,1), labels=c("0%","25%","50%","75%","100%"), limits=c(0,1), expand=c(0,0))+ scale_x_discrete(drop=FALSE)+ scale_colour_manual(values=c('#1f78b4','#33a02c','#e31a1c','#ff7f00', '#8856a7'))+ theme_bw()+ labs(x="States",y="Cat preference")+ theme(legend.position="none", axis.title=element_text(size=10), axis.text.y=element_text(size=10), axis.text.x=element_text(angle=90,size=8), legend.title=element_text(size=10), legend.text=element_text(size=10)) compare2 <- ggplot()+ geom_hline(yintercept = mean(sample$cat_pref),size=.8)+ geom_text(aes(x = 5.2, y = mean(sample$cat_pref)+.025, label = "Sample"))+ scale_y_continuous(breaks=c(0,.25,.5,.75,1), labels=c("0%","25%","50%","75%","100%"), limits=c(-0.25,1.25),expand=c(0,0))+ theme_bw()+ labs(x="Popn",y="")+ theme(legend.position="none", axis.title.y=element_blank(), axis.title.x=element_text(size=10), axis.text=element_blank(), axis.ticks=element_blank(), legend.title=element_text(size=10), legend.text=element_text(size=10)) bayesplot_grid(compare,compare2, grid_args = list(nrow=1, widths = c(8,1))) ## ---- message=FALSE, warning=FALSE, results='hide'---------------------------- fit <- stan_glmer( cat_pref ~ factor(male) + factor(male) * factor(age) + (1 | state) + (1 | age) + (1 | eth) + (1 | income), family = binomial(link = "logit"), data = sample ) ## ----------------------------------------------------------------------------- print(fit) ## ---- message=FALSE----------------------------------------------------------- posterior_prob <- posterior_linpred(fit, transform = TRUE, newdata = poststrat) poststrat_prob <- posterior_prob %*% poststrat$N / sum(poststrat$N) model_popn_pref <- c(mean = mean(poststrat_prob), sd = sd(poststrat_prob)) round(model_popn_pref, 3) ## ---- message=FALSE----------------------------------------------------------- sample_popn_pref <- mean(sample$cat_pref) round(sample_popn_pref, 3) ## ---- message=FALSE,fig.height = 4, fig.width = 8, fig.align = "center"------- compare2 <- compare2 + geom_hline(yintercept = model_popn_pref[1], colour = '#2ca25f', size = 1) + geom_text(aes(x = 5.2, y = model_popn_pref[1] + .025), label = "MRP", colour = '#2ca25f') bayesplot_grid(compare, compare2, grid_args = list(nrow = 1, widths = c(8, 1))) ## ---- message=FALSE----------------------------------------------------------- true_popn_pref <- sum(true_popn$cat_pref * poststrat$N) / sum(poststrat$N) round(true_popn_pref, 3) ## ---- echo=FALSE, message=FALSE,fig.height = 4, fig.width = 8, fig.align = "center"---- compare2 <- compare2 + geom_hline(yintercept = mean(true_popn_pref), linetype = 'dashed', size = .8) + geom_text(aes(x = 5.2, y = mean(true_popn_pref) - .025), label = "True") bayesplot_grid(compare, compare2, grid_args = list(nrow = 1, widths = c(8, 1))) ## ---- message=FALSE----------------------------------------------------------- state_df <- data.frame( State = 1:50, model_state_sd = rep(-1, 50), model_state_pref = rep(-1, 50), sample_state_pref = rep(-1, 50), true_state_pref = rep(-1, 50), N = rep(-1, 50) ) for(i in 1:length(levels(as.factor(poststrat$state)))) { poststrat_state <- poststrat[poststrat$state == i, ] posterior_prob_state <- posterior_linpred( fit, transform = TRUE, draws = 1000, newdata = as.data.frame(poststrat_state) ) poststrat_prob_state <- (posterior_prob_state %*% poststrat_state$N) / sum(poststrat_state$N) #This is the estimate for popn in state: state_df$model_state_pref[i] <- round(mean(poststrat_prob_state), 4) state_df$model_state_sd[i] <- round(sd(poststrat_prob_state), 4) #This is the estimate for sample state_df$sample_state_pref[i] <- round(mean(sample$cat_pref[sample$state == i]), 4) #And what is the actual popn? state_df$true_state_pref[i] <- round(sum(true_popn$cat_pref[true_popn$state == i] * poststrat_state$N) / sum(poststrat_state$N), digits = 4) state_df$N[i] <- length(sample$cat_pref[sample$state == i]) } state_df[c(1,3:6)] state_df$State <- factor(state_df$State, levels = levels(sample$state)) ## ----------------------------------------------------------------------------- round(100 * c( mean = mean(abs(state_df$sample_state_pref-state_df$true_state_pref), na.rm = TRUE), max = max(abs(state_df$sample_state_pref-state_df$true_state_pref), na.rm = TRUE) )) ## ----------------------------------------------------------------------------- round(100 * c( mean = mean(abs(state_df$model_state_pref-state_df$true_state_pref)), max = max(abs(state_df$model_state_pref-state_df$true_state_pref)) )) ## ---- message=FALSE, echo=FALSE, fig.height = 4, fig.width = 8, fig.align = "center",warning=FALSE, fig.align = "center"---- #Summarise by state compare <- compare + geom_point(data=state_df, mapping=aes(x=State, y=model_state_pref), inherit.aes=TRUE,colour='#238b45')+ geom_line(data=state_df, mapping=aes(x=State, y=model_state_pref,group=1), inherit.aes=TRUE,colour='#238b45')+ geom_ribbon(data=state_df,mapping=aes(x=State,ymin=model_state_pref-model_state_sd, ymax=model_state_pref+model_state_sd,group=1), inherit.aes=FALSE,fill='#2ca25f',alpha=.3)+ geom_point(data=state_df, mapping=aes(x=State, y=true_state_pref), alpha=.5,inherit.aes=TRUE)+ geom_line(data=state_df, mapping=aes(x=State, y=true_state_pref), inherit.aes = TRUE,linetype='dashed') bayesplot_grid(compare, compare2, grid_args = list(nrow = 1, widths = c(8, 1))) ## ---- eval=FALSE-------------------------------------------------------------- # # not evaluated to avoid dependency on tidyverse # sample_alt <- sample %>% # group_by(male, age, income, state, eth) %>% # summarise(N_cat_pref = sum(cat_pref), N = n()) %>% # ungroup() ## ---- include=FALSE----------------------------------------------------------- load("mrp-files/sample_alt.rda") ## ---- message=FALSE, warning=FALSE, results='hide'---------------------------- fit2 <- stan_glmer( cbind(N_cat_pref, N - N_cat_pref) ~ factor(male) + factor(male) * factor(age) + (1 | state) + (1 | age) + (1 | eth) + (1 | income), family = binomial("logit"), data = sample_alt, refresh = 0 ) ## ----------------------------------------------------------------------------- print(fit2) ## ---- message=FALSE----------------------------------------------------------- posterior_prob_alt <- posterior_linpred(fit2, transform = TRUE, newdata = poststrat) poststrat_prob_alt <- posterior_prob_alt %*% poststrat$N / sum(poststrat$N) model_popn_pref_alt <- c(mean = mean(poststrat_prob_alt), sd = sd(poststrat_prob_alt)) round(model_popn_pref_alt, 3) ## ----------------------------------------------------------------------------- print(simulate_mrp_data) rstanarm/inst/doc/aov.html0000644000176200001440000005250014551550134015303 0ustar liggesusers Estimating ANOVA Models with rstanarm

Estimating ANOVA Models with rstanarm

Jonah Gabry and Ben Goodrich

2024-01-16

Introduction

This vignette explains how to estimate ANalysis Of VAriance (ANOVA) models using the stan_aov function in the rstanarm package

The four steps of a Bayesian analysis are

  1. Specify a joint distribution for the outcome(s) and all the unknowns, which typically takes the form of a marginal prior distribution for the unknowns multiplied by a likelihood for the outcome(s) conditional on the unknowns. This joint distribution is proportional to a posterior distribution of the unknowns conditional on the observed data
  2. Draw from posterior distribution using Markov Chain Monte Carlo (MCMC).
  3. Evaluate how well the model fits the data and possibly revise the model.
  4. Draw from the posterior predictive distribution of the outcome(s) given interesting values of the predictors in order to visualize how a manipulation of a predictor affects (a function of) the outcome(s).

Steps 3 and 4 are covered in more depth by the vignette entitled “How to Use the rstanarm Package”. This vignette focuses on Step 1 when the likelihood is the product of independent normal distributions. We also demonstrate that Step 2 is not entirely automatic because it is sometimes necessary to specify some additional tuning parameters in order to obtain optimally efficient results.

Likelihood

The likelihood for one observation under a linear model can be written as a conditionally normal PDF \[\frac{1}{\sigma_{\epsilon} \sqrt{2 \pi}} e^{-\frac{1}{2} \left(\frac{y - \mu}{\sigma_{\epsilon}}\right)^2},\] where \(\mu = \alpha + \mathbf{x}^\top \boldsymbol{\beta}\) is a linear predictor and \(\sigma_{\epsilon}\) is the standard deviation of the error in predicting the outcome, \(y\). The likelihood of the entire sample is the product of \(N\) individual likelihood contributions.

An ANOVA model can be considered a special case of the above linear regression model where each of the \(K\) predictors in \(\mathbf{x}\) is a dummy variable indicating membership in a group. An equivalent linear predictor can be written as \(\mu_j = \alpha + \alpha_j\), which expresses the conditional expectation of the outcome in the \(j\)-th group as the sum of a common mean, \(\alpha\), and a group-specific deviation from the common mean, \(\alpha_j\).

Priors

If we view the ANOVA model as a special case of a linear regression model with only dummy variables as predictors, then the model could be estimated using the prior specification in the stan_lm function. In fact, this is exactly how the stan_aov function is coded. These functions require the user to specify a value for the prior location (by default the mode) of the \(R^2\), the proportion of variance in the outcome attributable to the predictors under a linear model. This prior specification is appealing in an ANOVA context because of the fundamental identity \[SS_{\mbox{total}} = SS_{\mbox{model}} + SS_{\mbox{error}},\] where \(SS\) stands for sum-of-squares. If we normalize this identity, we obtain the tautology \(1 = R^2 + \left(1 - R^2\right)\) but it is reasonable to expect a researcher to have a plausible guess for \(R^2\) before conducting an ANOVA. See the vignette for the stan_lm function (regularized linear models) for more information on this approach.

If we view the ANOVA model as a difference of means, then the model could be estimated using the prior specification in the stan_lmer function. In the syntax popularized by the lme4 package, y ~ 1 + (1|group) represents a likelihood where \(\mu_j = \alpha + \alpha_j\) and \(\alpha_j\) is normally distributed across the \(J\) groups with mean zero and some unknown standard deviation. The stan_lmer function specifies that this standard deviation has a Gamma prior with, by default, both its shape and scale parameters equal to \(1\), which is just an standard exponential distribution. However, the shape and scale parameters can be specified as other positive values. This approach also requires specifying a prior distribution on the standard deviation of the errors that is independent of the prior distribution for each \(\alpha_j\). See the vignette for the stan_glmer function (lme4-style models using rstanarm) for more information on this approach.

Example

We will utilize an example from the HSAUR3 package by Brian S. Everitt and Torsten Hothorn, which is used in their 2014 book A Handbook of Statistical Analyses Using R (3rd Edition) (Chapman & Hall / CRC). This book is frequentist in nature and we will show how to obtain the corresponding Bayesian results.

The model in section 4.3.1 analyzes an experiment where rats were subjected to different diets in order to see how much weight they gained. The experimental factors were whether their diet had low or high protein and whether the protein was derived from beef or cereal. Before seeing the data, one might expect that a moderate proportion of the variance in weight gain might be attributed to protein (source) in the diet. The frequentist ANOVA estimates can be obtained:

         (Intercept)         sourceCereal              typeLow 
               100.0                -14.1                -20.8 
sourceCereal:typeLow 
                18.8 

To obtain Bayesian estimates we can prepend stan_ to aov and specify the prior location of the \(R^2\) as well as optionally the number of cores that the computer is allowed to utilize:

stan_aov
 family:       gaussian [identity]
 formula:      weightgain ~ source * type
 observations: 40
 predictors:   4
------
                     Median MAD_SD
(Intercept)           98.7    4.6 
sourceCereal         -12.7    6.2 
typeLow              -18.7    6.4 
sourceCereal:typeLow  17.1    8.5 

Auxiliary parameter(s):
              Median MAD_SD
R2             0.2    0.1  
log-fit_ratio  0.0    0.1  
sigma         14.7    1.7  

ANOVA-like table:
                    Median MAD_SD
Mean Sq source      554.4  409.4 
Mean Sq type        968.6  592.4 
Mean Sq source:type 729.5  682.8 

------
* For help interpreting the printed output see ?print.stanreg
* For info on the priors used see ?prior_summary.stanreg

Here we have specified adapt_delta = 0.999 to decrease the stepsize and largely prevent divergent transitions. See the Troubleshooting section in the main rstanarm vignette for more details about adapt_delta. Also, our prior guess that \(R^2 = 0.5\) was overly optimistic. However, the frequentist estimates presumably overfit the data even more.

Alternatively, we could prepend stan_ to lmer and specify the corresponding priors

Comparing these two models using the loo function in the loo package reveals a negligible preference for the first approach that is almost entirely due to its having a smaller number of effective parameters as a result of the more regularizing priors. However, the difference is so small that it may seem advantageous to present the second results which are more in line with a mainstream Bayesian approach to an ANOVA model.

Conclusion

This vignette has compared and contrasted two approaches to estimating an ANOVA model with Bayesian techniques using the rstanarm package. They both have the same likelihood, so the (small in this case) differences in the results are attributable to differences in the priors.

The stan_aov approach just calls stan_lm and thus only requires a prior location on the \(R^2\) of the linear model. This seems rather easy to do in the context of an ANOVA decomposition of the total sum-of-squares in the outcome into model sum-of-squares and residual sum-of-squares.

The stan_lmer approach just calls stan_glm but specifies a normal prior with mean zero for the deviations from \(\alpha\) across groups. This is more in line with what most Bayesians would do naturally — particularly if the factors were considered “random” — but also requires a prior for \(\alpha\), \(\sigma\), and the standard deviation of the normal prior on the group-level intercepts. The stan_lmer approach is very flexible and might be more appropriate for more complicated experimental designs.

rstanarm/inst/doc/rstanarm.Rmd0000644000176200001440000006310214370470372016127 0ustar liggesusers--- title: "How to Use the rstanarm Package" author: "Jonah Gabry and Ben Goodrich" date: "`r Sys.Date()`" output: html_vignette: toc: yes --- ```{r, child="children/SETTINGS-knitr.txt"} ``` ```{r, child="children/SETTINGS-gg.txt"} ``` # Introduction This vignette provides an _overview_ of how to use the functions in the __rstanarm__ package that focuses on commonalities. The other __rstanarm__ vignettes go into the particularities of each of the individual model-estimating functions. The goal of the __rstanarm__ package is to make Bayesian estimation _routine_ for the most common regression models that applied researchers use. This will enable researchers to avoid the counter-intuitiveness of the frequentist approach to probability and statistics with only minimal changes to their existing R scripts. ```{r, child="children/four_steps.txt"} ``` Step 1 is necessarily model-specific and is covered in more detail in the other vignettes that cover specific forms of the marginal prior distribution and likelihood of the outcome. It is somewhat more involved than the corresponding first step of a frequentist analysis, which only requires that the likelihood of the outcome be specified. However, the default priors in the __rstanarm__ package should work well in the majority of cases. Steps 2, 3, and 4 are the focus of this vignette because they are largely not specific to how the joint distribution in Step 1 is specified. The key concept in Step 3 and Step 4 is the posterior predictive distribution, which is the distribution of the outcome implied by the model after having used the observed data to update our beliefs about the unknown parameters. Frequentists, by definition, have no posterior predictive distribution and frequentist predictions are subtly different from what applied researchers seek. Maximum likelihood estimates do _not_ condition on the observed outcome data and so the uncertainty in the estimates pertains to the variation in the sampling distribution of the estimator, i.e. the distribution of estimates that would occur if we could repeat the process of drawing a random sample from a well-defined population and apply the estimator to each sample. It is possible to construct a distribution of predictions under the frequentist paradigm but it evokes the hypothetical of repeating the process of drawing a random sample, applying the estimator each time, and generating point predictions of the outcome. In contrast, the posterior predictive distribution conditions on the observed outcome data in hand to update beliefs about the unknowns and the variation in the resulting distribution of predictions reflects the remaining uncertainty in our beliefs about the unknowns. # Step 1: Specify a posterior distribution For the sake of discussion, we need some posterior distribution to draw from. We will utilize an example from the __HSAUR3__ package by Brian S. Everitt and Torsten Hothorn, which is used in their 2014 book _A Handbook of Statistical Analyses Using R (3rd Edition)_ (Chapman & Hall / CRC). This book is frequentist in nature and we will show how to obtain the corresponding Bayesian results. The model in section 6.3.2 pertains to whether a survey respondent agrees or disagrees with a conservative statement about the role of women in society, which is modeled as a function of the gender and education of the respondents. The posterior distribution --- with independent priors --- can be written as $$f\left(\alpha,\beta_1,\beta_2|\mathbf{y},\mathbf{X}\right) \propto f\left(\alpha\right) f\left(\beta_1\right) f\left(\beta_2\right) \times \prod_{i=1}^J { g^{-1}\left(\eta_i\right)^{y_i} \left(1 - g^{-1}\left(\eta_i\right)\right)^{n_i-y_i}},$$ where $\eta_i = \alpha + \beta_1 \mbox{education}_i + \beta_2 \mbox{Female}_i$ is the linear predictor and a function of an intercept $\left(\alpha\right)$, a coefficient on the years of education $\left(\beta_1\right)$, and an intercept-shift $\left(\beta_2\right)$ for the case where the respondent is female. These data are organized such that $y_i$ is the number of respondents who agree with the statement that have the same level of education and the same gender, and $n_i - y_i$ is the number of such people who disagree with the statement. The inverse link function, $p = g^{-1}\left(\eta_i \right)$, for a binomial likelihood can be one of several Cumulative Distribution Functions (CDFs) but in this case is the standard logistic CDF, $g^{-1}\left(\eta_i \right)=\frac{1}{1 + e^{-\eta_i}}$. Suppose we believe --- prior to seeing the data --- that $\alpha$, $\beta_1$, and $\beta_2$ are probably close to zero, are as likely to be positive as they are to be negative, but have a small chance of being quite far from zero. These beliefs can be represented by Student t distributions with a few degrees of freedom in order to produce moderately heavy tails. In particular, we will specify seven degrees of freedom. Note that these purported beliefs may well be more skeptical than your actual beliefs, which are probably that women and people with more education have less conservative societal views. ### Note on "prior beliefs" and default priors In this vignette we use the term "prior beliefs" to refer in generality to the information content of the prior distribution (conditional on the model). Sometimes previous research on the topic of interest motivates beliefs about model parameters, but other times such work may not exist or several studies may make contradictory claims. Regardless, we nearly always have _some_ knowledge that should be reflected in our choice of prior distributions. For example, no one believes a logistic regression coefficient will be greater than five in absolute value if the predictors are scaled reasonably. You may also have seen examples of so-called "non-informative" (or "vague", "diffuse", etc.) priors like a normal distribution with a variance of 1000. When data are reasonably scaled, these priors are almost always a bad idea for various reasons (they give non-trivial weight to extreme values, reduce computational efficiency, etc). The default priors in __rstanarm__ are designed to be _weakly informative_, by which we mean that they avoid placing unwarranted prior weight on nonsensical parameter values and provide some regularization to avoid overfitting, but also do allow for extreme values if warranted by the data. If additional information is available, the weakly informative defaults can be replaced with more informative priors. # Step 2: Draw from the posterior distribution The likelihood for the sample is just the product over the $J$ groups of $$g^{-1}\left(\eta_i \right)^{y_i} \left(1 - g^{-1}\left(\eta_i \right)\right)^{n_i-y_i},$$ which can be maximized over $\alpha$, $\beta_1$, and $\beta_2$ to obtain frequentist estimates by calling ```{r rstanarm-mle, eval = TRUE} data("womensrole", package = "HSAUR3") womensrole$total <- womensrole$agree + womensrole$disagree womensrole_glm_1 <- glm(cbind(agree, disagree) ~ education + gender, data = womensrole, family = binomial(link = "logit")) round(coef(summary(womensrole_glm_1)), 3) ``` The p-value for the null hypothesis that $\beta_1 = 0$ is very small, while the p-value for the null hypothesis that $\beta_2 = 0$ is very large. However, frequentist p-values are awkward because they do not pertain to the probability that a scientific hypothesis is true but rather to the probability of observing a $z$-statistic that is so large (in magnitude) if the null hypothesis were true. The desire to make probabilistic statements about a scientific hypothesis is one reason why many people are drawn to the Bayesian approach. A model with the same likelihood but Student t priors with seven degrees of freedom can be specified using the __rstanarm__ package in a similar way by prepending `stan_` to the `glm` call and specifying priors (and optionally the number of cores on your computer to utilize): ```{r rstanarm-mcmc, results="hide", eval = TRUE} library(rstanarm) womensrole_bglm_1 <- stan_glm(cbind(agree, disagree) ~ education + gender, data = womensrole, family = binomial(link = "logit"), prior = student_t(df = 7, 0, 5), prior_intercept = student_t(df = 7, 0, 5), cores = 2, seed = 12345) womensrole_bglm_1 ``` ```{r, echo=FALSE, eval = TRUE} print(womensrole_bglm_1) ``` As can be seen, the "Bayesian point estimates" --- which are represented by the posterior medians --- are very similar to the maximum likelihood estimates. Frequentists would ask whether the point estimate is greater in magnitude than double the estimated standard deviation of the sampling distribution. But here we simply have estimates of the standard deviation of the marginal posterior distributions, which are based on a scaling of the Median Absolute Deviation (MAD) from the posterior medians to obtain a robust estimator of the posterior standard deviation. In addition, we can use the `posterior_interval` function to obtain a Bayesian uncertainty interval for $\beta_1$: ```{r rstanarm-ci, eval = TRUE} ci95 <- posterior_interval(womensrole_bglm_1, prob = 0.95, pars = "education") round(ci95, 2) ``` Unlike frequentist confidence intervals --- which are _not_ interpretable in terms of post-data probabilities --- the Bayesian uncertainty interval indicates we believe after seeing the data that there is a $0.95$ probability that $\beta_2$ is between `ci95[1,1]` and `ci95[1,2]`. Alternatively, we could say that there is essentially zero probability that $\beta_2 > 0$, although frequentists cannot make such a claim coherently. Many of the post-estimation methods that are available for a model that is estimated by `glm` are also available for a model that is estimated by `stan_glm`. For example, ```{r rstanarm-methods, eval = TRUE} cbind(Median = coef(womensrole_bglm_1), MAD_SD = se(womensrole_bglm_1)) summary(residuals(womensrole_bglm_1)) # not deviance residuals cov2cor(vcov(womensrole_bglm_1)) ``` __rstanarm__ does provide a `confint` method, although it is reserved for computing confidence intervals in the case that the user elects to estimate a model by (penalized) maximum likelihood. When using full Bayesian inference (the __rstanarm__ default) or approximate Bayesian inference the `posterior_interval` function should be used to obtain Bayesian uncertainty intervals. # Step 3: Criticize the model The `launch_shinystan` function in the __shinystan__ package provides almost all the tools you need to visualize the posterior distribution and diagnose any problems with the Markov chains. In this case, the results are fine and to verify that, you can call ```{r rstanarm-shinystan, eval = FALSE} launch_shinystan(womensrole_bglm_1, ppd = FALSE) ``` which will open a web browser that drives the visualizations. For the rest of this subsection, we focus on what users can do programmatically to evaluate whether a model is adequate. A minimal requirement for Bayesian estimates is that the model should fit the data that the estimates conditioned on. The key function here is `posterior_predict`, which can be passed a new `data.frame` to predict out-of-sample, but in this case is omitted to obtain in-sample posterior predictions: ```{r rstanarm-posterior_predict, eval = TRUE} y_rep <- posterior_predict(womensrole_bglm_1) dim(y_rep) ``` The resulting matrix has rows equal to the number of posterior simulations, which in this case is $2000$ and columns equal to the number of observations in the original dataset, which is $42$ combinations of education and gender. Each element of this matrix is a predicted number of respondents with that value of education and gender who agreed with the survey question and thus should be reasonably close to the observed proportion of agreements in the data. We can create a plot to check this: ```{r rstanarm-criticism-plot, fig.width=8, out.width="90%", fig.cap="Posterior predictive boxplots vs. observed datapoints", eval = TRUE} par(mfrow = 1:2, mar = c(5,3.7,1,0) + 0.1, las = 3) boxplot(sweep(y_rep[,womensrole$gender == "Male"], 2, STATS = womensrole$total[womensrole$gender == "Male"], FUN = "/"), axes = FALSE, main = "Male", pch = NA, xlab = "Years of Education", ylab = "Proportion of Agrees") with(womensrole, axis(1, at = education[gender == "Male"] + 1, labels = 0:20)) axis(2, las = 1) with(womensrole[womensrole$gender == "Male",], points(education + 1, agree / (agree + disagree), pch = 16, col = "red")) boxplot(sweep(y_rep[,womensrole$gender == "Female"], 2, STATS = womensrole$total[womensrole$gender == "Female"], FUN = "/"), axes = FALSE, main = "Female", pch = NA, xlab = "Years of Education", ylab = "") with(womensrole, axis(1, at = education[gender == "Female"] + 1, labels = 0:20)) with(womensrole[womensrole$gender == "Female",], points(education + 1, agree / (agree + disagree), pch = 16, col = "red")) ``` Here the boxplots provide the median, interquartile range, and hinges of the posterior predictive distribution for a given gender and level of education, while the red points represent the corresponding observed data. As can be seen, the model predicts the observed data fairly well for six to sixteen years of education but predicts less well for very low or very high levels of education where there are less data. Consequently, we might consider a model where education has a quadratic effect on agreement, which is easy to specify using R's formula-based syntax. ```{r, rstanarm-update, results="hide", eval = TRUE} (womensrole_bglm_2 <- update(womensrole_bglm_1, formula. = . ~ . + I(education^2))) ``` ```{r, echo=FALSE} print(womensrole_bglm_2) ``` Frequentists would test the null hypothesis that the coefficient on the squared level of education is zero. Bayesians might ask whether such a model is expected to produce better out-of-sample predictions than a model with only the level of education. The latter question can be answered using leave-one-out cross-validation or the approximation thereof provided by the `loo` function in the __loo__ package, for which a method is provided by the __rstanarm__ package. ```{r rstanarm-loo, eval = TRUE} loo_bglm_1 <- loo(womensrole_bglm_1) loo_bglm_2 <- loo(womensrole_bglm_2) ``` First, we verify that the posterior is not too sensitive to any particular observation in the dataset. ```{r rstanarm-loo-plot, fig.width=7, out.width="70%", eval = TRUE} par(mfrow = 1:2, mar = c(5,3.8,1,0) + 0.1, las = 3) plot(loo_bglm_1, label_points = TRUE) plot(loo_bglm_2, label_points = TRUE) ``` There are only one or two moderate outliers (whose statistics are greater than $0.5$), which should not have too much of an effect on the resulting model comparison: ```{r, rstanarm-loo-compare, eval = TRUE} loo_compare(loo_bglm_1, loo_bglm_2) ``` In this case, there is little difference in the expected log pointwise deviance between the two models, so we are essentially indifferent between them after taking into account that the second model estimates an additional parameter. The "LOO Information Criterion (LOOIC)" ```{r, rstanarm-loo-print, eval = TRUE} loo_bglm_1 ``` has the same purpose as the Akaike Information Criterion (AIC) that is used by frequentists. Both are intended to estimate the expected log predicted density (ELPD) for a new dataset. However, the AIC ignores priors and assumes that the posterior distribution is multivariate normal, whereas the functions from the loo package used here do not assume that the posterior distribution is multivariate normal and integrate over uncertainty in the parameters. This only assumes that any one observation can be omitted without having a major effect on the posterior distribution, which can be judged using the plots above. # Step 4: Analyze manipulations of predictors Frequentists attempt to interpret the estimates of the model, which is difficult except when the model is linear, has no inverse link function, and contains no interaction terms. Bayesians can avoid this difficulty simply by inspecting the posterior predictive distribution at different levels of the predictors. For example, ```{r, rstanarm-posterior_predict-manipulate, eval = TRUE} # note: in newdata we want agree and disagree to sum to the number of people we # want to predict for. the values of agree and disagree don't matter so long as # their sum is the desired number of trials. we need to explicitly imply the # number of trials like this because our original data are aggregate. if we had # bernoulli data then it would be a given we wanted to predict for single # individuals. newdata <- data.frame(agree = c(0,0), disagree = c(100,100), education = c(12,16), gender = factor("Female", levels = c("Male", "Female"))) y_rep <- posterior_predict(womensrole_bglm_2, newdata) summary(apply(y_rep, 1, diff)) ``` As can be seen, out of $100$ women who have a college degree versus $100$ women with only a high school degree, we would expect about $20$ fewer college-educated women to agree with the question. There is an even chance that the difference is between $24$ and $16$, a one-in-four chance that it is greater, and one-in-four chance that it is less. # Troubleshooting This section provides suggestions for how to proceed when you encounter warning messages generated by the modeling functions in the __rstanarm__ package. The example models below are used just for the purposes of concisely demonstrating certain difficulties and possible remedies (we won't worry about the merit of the models themselves). The references at the end provide more information on the relevant issues. ### Markov chains did not converge __Recommendation:__ run the chains for more iterations.
By default, all __rstanarm__ modeling functions will run four randomly initialized Markov chains, each for 2000 iterations (including a warmup period of 1000 iterations that is discarded). All chains must converge to the target distribution for inferences to be valid. For most models, the default settings are sufficient, but if you see a warning message about Markov chains not converging, the first thing to try is increasing the number of iterations. This can be done by specifying the `iter` argument (e.g. `iter = 3000`). One way to monitor whether a chain has converged to the equilibrium distribution is to compare its behavior to other randomly initialized chains. This is the motivation for the Gelman and Rubin potential scale reduction statistic Rhat. The Rhat statistic measures the ratio of the average variance of the draws within each chain to the variance of the pooled draws across chains; if all chains are at equilibrium, these will be the same and Rhat will be one. If the chains have not converged to a common distribution, the Rhat statistic will tend to be greater than one. Gelman and Rubin's recommendation is that the independent Markov chains be initialized with diffuse starting values for the parameters and sampled until all values for Rhat are below 1.1. When any Rhat values are above 1.1 __rstanarm__ will print a warning message like this: Markov chains did not converge! Do not analyze results! To illustrate how to check the Rhat values after fitting a model using __rstanarm__ we'll fit two models and run them for different numbers of iterations. ```{r, rstanarm-rhat-fit, results='hide', warning=TRUE, eval = TRUE} bad_rhat <- stan_glm(mpg ~ ., data = mtcars, iter = 20, chains = 2, seed = 12345) good_rhat <- update(bad_rhat, iter = 1000, chains = 2, seed = 12345) ``` Here the first model leads to the warning message about convergence but the second model does not. Indeed, we can see that many Rhat values are much bigger than 1 for the first model: ```{r, rstasnarm-rhat-bad, eval = TRUE} rhat <- summary(bad_rhat)[, "Rhat"] rhat[rhat > 1.1] ``` Since we didn't get a warning for the second model we shouldn't find any parameters with an Rhat far from 1: ```{r, rstasnarm-rhat-good, eval = TRUE} any(summary(good_rhat)[, "Rhat"] > 1.1) ``` Details on the computation of Rhat and some of its limitations can be found in [Stan Modeling Language User's Guide and Reference Manual](https://mc-stan.org/users/documentation/). ### Divergent transitions __Recommendation:__ increase the target acceptance rate `adapt_delta`.
Hamiltonian Monte Carlo (HMC), the MCMC algorithm used by [Stan](https://mc-stan.org), works by simulating the evolution of a Hamiltonian system. Stan uses a [symplectic integrator](https://en.wikipedia.org/wiki/Symplectic_integrator) to approximate the exact solution of the Hamiltonian dynamics. When the step size parameter is too large relative to the curvature of the log posterior this approximation can diverge and threaten the validity of the sampler. __rstanarm__ will print a warning if there are any divergent transitions after the warmup period, in which case the posterior sample may be biased. The recommended method is to increase the `adapt_delta` parameter -- target average proposal acceptance probability in the adaptation -- which will in turn reduce the step size. Each of the modeling functions accepts an `adapt_delta` argument, so to increase `adapt_delta` you can simply change the value from the default value to a value closer to $1$. To reduce the frequency with which users need to manually set `adapt_delta`, the default value depends on the prior distribution used (see `help("adapt_delta", package = "rstanarm")` for details). The downside to increasing the target acceptance rate -- and, as a consequence, decreasing the step size -- is that sampling will tend to be slower. Intuitively, this is because a smaller step size means that more steps are required to explore the posterior distribution. Since the validity of the estimates is not guaranteed if there are any post-warmup divergent transitions, the slower sampling is a minor cost. ### Maximum treedepth exceeded __Recommendation:__ increase the maximum allowed treedepth `max_treedepth`.
Configuring the No-U-Turn-Sampler (the variant of HMC used by Stan) involves putting a cap on the depth of the trees that it evaluates during each iteration. This is controlled through a maximum depth parameter `max_treedepth`. When the maximum allowed tree depth is reached it indicates that NUTS is terminating prematurely to avoid excessively long execution time. If __rstanarm__ prints a warning about transitions exceeding the maximum treedepth you should try increasing the `max_treedepth` parameter using the optional `control` argument. For example, to increase `max_treedepth` to 20 (the default used __rstanarm__ is 15) you can provide the argument `control = list(max_treedepth = 20)` to any of the __rstanarm__ modeling functions. If you do not see a warning about hitting the maximum treedepth (which is rare), then you do not need to worry. # Conclusion In this vignette, we have gone through the four steps of a Bayesian analysis. The first step --- specifying the posterior distribution --- varies considerably from one analysis to the next because the likelihood function employed differs depending on the nature of the outcome variable and our prior beliefs about the parameters in the model varies not only from situation to situation but from researcher to researcher. However, given a posterior distribution and given that this posterior distribution can be drawn from using the __rstanarm__ package, the remaining steps are conceptually similar across analyses. The key is to draw from the posterior predictive distribution of the outcome, which is the what the model predicts the outcome to be after having updated our beliefs about the unknown parameters with the observed data. Posterior predictive distributions can be used for model checking and for making inferences about how manipulations of the predictors would affect the outcome. Of course, all of this assumes that you have obtained draws from the posterior distribution faithfully. The functions in the __rstanarm__ package will throw warnings if there is evidence that the draws are tainted, and we have discussed some steps to remedy these problems. For the most part, the model-fitting functions in the __rstanarm__ package are unlikely to produce many such warnings, but they may appear in more complicated models. If the posterior distribution that you specify in the first step cannot be sampled from using the __rstanarm__ package, then it is often possible to create a hand-written program in the the Stan language so that the posterior distribution can be drawn from using the __rstan__ package. See the documentation for the __rstan__ package or https://mc-stan.org for more details about this more advanced usage of Stan. However, many relatively simple models can be fit using the __rstanarm__ package without writing any code in the Stan language, which is illustrated for each estimating function in the __rstanarm__ package in the other [vignettes](index.html). # References Betancourt, M. J., & Girolami, M. (2013). Hamiltonian Monte Carlo for hierarchical models. [arXiv preprint](https://arxiv.org/abs/1312.0906). Stan Development Team. (2015). _Stan modeling language user's guide and reference manual, Version 2.9.0_. https://mc-stan.org/documentation/. See the 'Hamiltonian Monte Carlo Sampling' chapter. Gelman, A., & Rubin, D. B. (1992). Inference from iterative simulation using multiple sequences. _Statistical Science_, 7(4), 457 -- 472. Gelman, A., & Shirley, K. (2011). Inference from simulations and monitoring convergence. In S. Brooks, A. Gelman, G. Jones, & X. Meng (Eds.), _Handbook of Markov chain Monte Carlo_. Boca Raton: Chapman & Hall/CRC. rstanarm/inst/doc/plot_data.rda0000644000176200001440000000115713540753420016271 0ustar liggesusers r0b```b`fcd`b2Y# ',/OI,Id`` i9 P<L;73Ǽ$^u" M8_o.G6@ԙ1@H:*_/{1h ]~i2]vhɀS579=p!}0?_pwuO6?X|Տ,vi1vyA#P{P>йBڟ9s_ecc9[h|=l?OIvدc`*elL[v_@МYj3wΜqsv<.N?̼`69 i-/t@'}R?{_KK;fCac#n' 78Q@03bCGLx0u( -',5BEs rR<<4}90mpKA (7F&"#ZYRIKfdKϥ aT܁ Estimating Regularized Linear Models with rstanarm

Estimating Regularized Linear Models with rstanarm

Jonah Gabry and Ben Goodrich

2024-01-16

Introduction

This vignette explains how to estimate linear models using the stan_lm function in the rstanarm package.

The four steps of a Bayesian analysis are

  1. Specify a joint distribution for the outcome(s) and all the unknowns, which typically takes the form of a marginal prior distribution for the unknowns multiplied by a likelihood for the outcome(s) conditional on the unknowns. This joint distribution is proportional to a posterior distribution of the unknowns conditional on the observed data
  2. Draw from posterior distribution using Markov Chain Monte Carlo (MCMC).
  3. Evaluate how well the model fits the data and possibly revise the model.
  4. Draw from the posterior predictive distribution of the outcome(s) given interesting values of the predictors in order to visualize how a manipulation of a predictor affects (a function of) the outcome(s).

Steps 3 and 4 are covered in more depth by the vignette entitled “How to Use the rstanarm Package”. This vignette focuses on Step 1 when the likelihood is the product of independent normal distributions.

The goal of the rstanarm package is to make Bayesian estimation of common regression models routine. That goal can be partially accomplished by providing interfaces that are similar to the popular formula-based interfaces to frequentist estimators of those regression models. But fully accomplishing that goal sometimes entails utilizing priors that applied researchers are unaware that they prefer. These priors are intended to work well for any data that a user might pass to the interface that was generated according to the assumptions of the likelihood function.

It is important to distinguish between priors that are easy for applied researchers to specify and priors that are easy for applied researchers to conceptualize. The prior described below emphasizes the former but we outline its derivation so that applied researchers may feel more comfortable utilizing it.

Likelihood

The likelihood for one observation under a linear model can be written as a conditionally normal PDF \[\frac{1}{\sigma_{\epsilon} \sqrt{2 \pi}} e^{-\frac{1}{2} \left(\frac{y - \mu}{\sigma_{\epsilon}}\right)^2},\] where \(\mu = \alpha + \mathbf{x}^\top \boldsymbol{\beta}\) is a linear predictor and \(\sigma_{\epsilon}\) is the standard deviation of the error in predicting the outcome, \(y\). The likelihood of the entire sample is the product of \(N\) individual likelihood contributions.

It is well-known that the likelihood of the sample is maximized when the sum-of-squared residuals is minimized, which occurs when \[ \widehat{\boldsymbol{\beta}} = \left(\mathbf{X}^\top \mathbf{X}\right)^{-1} \mathbf{X}^\top \mathbf{y}, \] \[ \widehat{\alpha} = \overline{y} - \overline{\mathbf{x}}^\top \widehat{\boldsymbol{\beta}}, \] \[ \widehat{\sigma}_{\epsilon}^2 = \frac{\left(\mathbf{y} - \widehat{\alpha} - \mathbf{X} \widehat{ \boldsymbol{\beta}}\right)^\top \left(\mathbf{y} - \widehat{\alpha} - \mathbf{X} \widehat{ \boldsymbol{\beta}}\right)}{N},\] where \(\overline{\mathbf{x}}\) is a vector that contains the sample means of the \(K\) predictors, \(\mathbf{X}\) is a \(N \times K\) matrix of centered predictors, \(\mathbf{y}\) is a \(N\)-vector of outcomes and \(\overline{y}\) is the sample mean of the outcome.

QR Decomposition

The lm function in R actually performs a QR decomposition of the design matrix, \(\mathbf{X} = \mathbf{Q}\mathbf{R}\), where \(\mathbf{Q}^\top \mathbf{Q} = \mathbf{I}\) and \(\mathbf{R}\) is upper triangular. Thus, the OLS solution for the coefficients can be written as \(\left(\mathbf{X}^\top \mathbf{X}\right)^{-1} \mathbf{X}^\top \mathbf{y} = \mathbf{R}^{-1} \mathbf{Q}^\top \mathbf{y}\). The lm function utilizes the QR decomposition for numeric stability reasons, but the QR decomposition is also useful for thinking about priors in a Bayesian version of the linear model. In addition, writing the likelihood in terms of \(\mathbf{Q}\) allows it to be evaluated in a very efficient manner in Stan.

Priors

The key innovation in the stan_lm function in the rstanarm package is the prior for the parameters in the QR-reparameterized model. To understand this prior, think about the equations that characterize the maximum likelihood solutions before observing the data on \(\mathbf{X}\) and especially \(\mathbf{y}\).

What would the prior distribution of \(\boldsymbol{\theta} = \mathbf{Q}^\top \mathbf{y}\) be? We can write its \(k\)-th element as \(\theta_k = \rho_k \sigma_Y \sqrt{N - 1}\) where \(\rho_k\) is the correlation between the \(k\)th column of \(\mathbf{Q}\) and the outcome, \(\sigma_Y\) is the standard deviation of the outcome, and \(\frac{1}{\sqrt{N-1}}\) is the standard deviation of the \(k\) column of \(\mathbf{Q}\). Then let \(\boldsymbol{\rho} = \sqrt{R^2}\mathbf{u}\) where \(\mathbf{u}\) is a unit vector that is uniformly distributed on the surface of a hypersphere. Consequently, \(R^2 = \boldsymbol{\rho}^\top \boldsymbol{\rho}\) is the familiar coefficient of determination for the linear model.

An uninformative prior on \(R^2\) would be standard uniform, which is a special case of a Beta distribution with both shape parameters equal to \(1\). A non-uniform prior on \(R^2\) is somewhat analogous to ridge regression, which is popular in data mining and produces better out-of-sample predictions than least squares because it penalizes \(\boldsymbol{\beta}^\top \boldsymbol{\beta}\), usually after standardizing the predictors. An informative prior on \(R^2\) effectively penalizes \(\boldsymbol{\rho}\top \boldsymbol{\rho}\), which encourages \(\boldsymbol{\beta} = \mathbf{R}^{-1} \boldsymbol{\theta}\) to be closer to the origin.

Lewandowski, Kurowicka, and Joe (2009) derives a distribution for a correlation matrix that depends on a single shape parameter \(\eta > 0\), which implies the variance of one variable given the remaining \(K\) variables is \(\mathrm{Beta}\left(\eta,\frac{K}{2}\right)\). Thus, the \(R^2\) is distributed \(\mathrm{Beta}\left(\frac{K}{2},\eta\right)\) and any prior information about the location of \(R^2\) can be used to choose a value of the hyperparameter \(\eta\). The R2(location, what) function in the rstanarm package supports four ways of choosing \(\eta\):

  1. what = "mode" and location is some prior mode on the \(\left(0,1\right)\) interval. This is the default but since the mode of a \(\mathrm{Beta}\left(\frac{K}{2},\eta\right)\) distribution is \(\frac{\frac{K}{2} - 1}{\frac{K}{2} + \eta - 2}\) the mode only exists if \(K > 2\). If \(K \leq 2\), then the user must specify something else for what.
  2. what = "mean" and location is some prior mean on the \(\left(0,1\right)\) interval, where the mean of a \(\mathrm{Beta}\left(\frac{K}{2},\eta\right)\) distribution is \(\frac{\frac{K}{2}}{\frac{K}{2} + \eta}\).
  3. what = "median" and location is some prior median on the \(\left(0,1\right)\) interval. The median of a \(\mathrm{Beta}\left(\frac{K}{2},\eta\right)\) distribution is not available in closed form but if \(K > 2\) it is approximately equal to \(\frac{\frac{K}{2} - \frac{1}{3}}{\frac{K}{2} + \eta - \frac{2}{3}}\). Regardless of whether \(K > 2\), the R2 function can numerically solve for the value of \(\eta\) that is consistent with a given prior median utilizing the quantile function.
  4. what = "log" and location is some (negative) prior value for \(\mathbb{E} \ln R^2 = \psi\left(\frac{K}{2}\right)- \psi\left(\frac{K}{2}+\eta\right)\), where \(\psi\left(\cdot\right)\) is the digamma function. Again, given a prior value for the left-hand side it is easy to numerically solve for the corresponding value of \(\eta\).

There is no default value for the location argument of the R2 function. This is an informative prior on \(R^2\), which must be chosen by the user in light of the research project. However, specifying location = 0.5 is often safe, in which case \(\eta = \frac{K}{2}\) regardless of whether what is "mode", "mean", or "median". In addition, it is possible to specify NULL, in which case a standard uniform on \(R^2\) is utilized.

We set \(\sigma_y = \omega s_y\) where \(s_y\) is the sample standard deviation of the outcome and \(\omega > 0\) is an unknown scale parameter to be estimated. The only prior for \(\omega\) that does not contravene Bayes’ theorem in this situation is Jeffreys prior, \(f\left(\omega\right) \propto \frac{1}{\omega}\), which is proportional to a Jeffreys prior on the unknown \(\sigma_y\), \(f\left(\sigma_y\right) \propto \frac{1}{\sigma_y} = \frac{1}{\omega \widehat{\sigma}_y} \propto \frac{1}{\omega}\). This parameterization and prior makes it easy for Stan to work with any continuous outcome variable, no matter what its units of measurement are.

It would seem that we need a prior for \(\sigma_{\epsilon}\), but our prior beliefs about \(\sigma_{\epsilon} = \omega s_y \sqrt{1 - R^2}\) are already implied by our prior beliefs about \(\omega\) and \(R^2\). That only leaves a prior for \(\alpha = \overline{y} - \overline{\mathbf{x}}^\top \mathbf{R}^{-1} \boldsymbol{\theta}\). The default choice is an improper uniform prior, but a normal prior can also be specified such as one with mean zero and standard deviation \(\frac{\sigma_y}{\sqrt{N}}\).

Posterior

The previous sections imply a posterior distribution for \(\omega\), \(\alpha\), \(\mathbf{u}\), and \(R^2\). The parameters of interest can then be recovered as generated quantities:

  • \(\sigma_y = \omega s_y\)
  • \(\sigma_{\epsilon} = \sigma_y \sqrt{1 - R^2}\)
  • \(\boldsymbol{\beta} = \mathbf{R}^{-1} \mathbf{u} \sigma_y \sqrt{R^2 \left(N-1\right)}\)

The implementation actually utilizes an improper uniform prior on \(\ln \omega\). Consequently, if \(\ln \omega = 0\), then the marginal standard deviation of the outcome implied by the model is the same as the sample standard deviation of the outcome. If \(\ln \omega > 0\), then the marginal standard deviation of the outcome implied by the model exceeds the sample standard deviation, so the model overfits the data. If \(\ln \omega < 0\), then the marginal standard deviation of the outcome implied by the model is less than the sample standard deviation, so the model underfits the data or that the data-generating process is nonlinear. Given the regularizing nature of the prior on \(R^2\), a minor underfit would be considered ideal if the goal is to obtain good out-of-sample predictions. If the model badly underfits or overfits the data, then you may want to reconsider the model.

Example

We will utilize an example from the HSAUR3 package by Brian S. Everitt and Torsten Hothorn, which is used in their 2014 book A Handbook of Statistical Analyses Using R (3rd Edition) (Chapman & Hall / CRC). This book is frequentist in nature and we will show how to obtain the corresponding Bayesian results.

The model in section 5.3.1 analyzes an experiment where clouds were seeded with different amounts of silver iodide to see if there was increased rainfall. This effect could vary according to covariates, which (except for time) are interacted with the treatment variable. Most people would probably be skeptical that cloud hacking could explain very much of the variation in rainfall and thus the prior mode of the \(R^2\) would probably be fairly small.

The frequentist estimator of this model can be replicated by executing

                    (Intercept)                      seedingyes 
                         -0.346                          15.683 
                            sne                      cloudcover 
                          0.420                           0.388 
                     prewetness            echomotionstationary 
                          4.108                           3.153 
                           time                  seedingyes:sne 
                         -0.045                          -3.197 
          seedingyes:cloudcover           seedingyes:prewetness 
                         -0.486                          -2.557 
seedingyes:echomotionstationary 
                         -0.562 

Note that we have not looked at the estimated \(R^2\) or \(\sigma\) for the ordinary least squares model. We can estimate a Bayesian version of this model by prepending stan_ to the lm call, specifying a prior mode for \(R^2\), and optionally specifying how many cores the computer may utilize:

stan_lm
 family:       gaussian [identity]
 formula:      rainfall ~ seeding * (sne + cloudcover + prewetness + echomotion) + 
       time
 observations: 24
 predictors:   11
------
                                Median MAD_SD
(Intercept)                      2.4    2.3  
seedingyes                       6.8    3.8  
sne                              0.2    0.7  
cloudcover                       0.2    0.2  
prewetness                       1.7    2.8  
echomotionstationary             1.4    1.5  
time                             0.0    0.0  
seedingyes:sne                  -1.4    1.0  
seedingyes:cloudcover           -0.2    0.2  
seedingyes:prewetness           -1.1    3.5  
seedingyes:echomotionstationary -0.2    2.0  

Auxiliary parameter(s):
              Median MAD_SD
R2            0.3    0.1   
log-fit_ratio 0.0    0.1   
sigma         2.6    0.4   

------
* For help interpreting the printed output see ?print.stanreg
* For info on the priors used see ?prior_summary.stanreg

In this case, the “Bayesian point estimates”, which are represented by the posterior medians, appear quite different from the ordinary least squares estimates. However, the log-fit_ratio (i.e. \(\ln \omega\)) is quite small, indicating that the model only slightly overfits the data when the prior derived above is utilized. Thus, it would be safe to conclude that the ordinary least squares estimator considerably overfits the data since there are only \(24\) observations to estimate \(12\) parameters with and no prior information on the parameters.

Also, it is not obvious what the estimated average treatment effect is since the treatment variable, seeding, is interacted with four other correlated predictors. However, it is easy to estimate or visualize the average treatment effect (ATE) using rstanarm’s posterior_predict function.

As can be seen, the treatment effect is not estimated precisely and is as almost as likely to be negative as it is to be positive.

Alternative Approach

The prior derived above works well in many situations and is quite simple to use since it only requires the user to specify the prior location of the \(R^2\). Nevertheless, the implications of the prior are somewhat difficult to conceptualize. Thus, it is perhaps worthwhile to compare to another estimator of a linear model that simply puts independent Cauchy priors on the regression coefficients. This simpler approach can be executed by calling the stan_glm function with family = gaussian() and specifying the priors:

We can compare the two approaches using an approximation to Leave-One-Out (LOO) cross-validation, which is implemented by the loo function in the loo package.

Warning: Found 1 observation(s) with a pareto_k > 0.7. We recommend calling 'loo' again with argument 'k_threshold = 0.7' in order to calculate the ELPD without the assumption that these observations are negligible. This will refit the model 1 times to compute the ELPDs for the problematic observations directly.

Computed from 4000 by 24 log-likelihood matrix

         Estimate   SE
elpd_loo    -60.3  5.3
p_loo         5.9  2.4
looic       120.5 10.6
------
Monte Carlo SE of elpd_loo is NA.

Pareto k diagnostic values:
                         Count Pct.    Min. n_eff
(-Inf, 0.5]   (good)     21    87.5%   850       
 (0.5, 0.7]   (ok)        2     8.3%   372       
   (0.7, 1]   (bad)       1     4.2%   116       
   (1, Inf)   (very bad)  0     0.0%   <NA>      
See help('pareto-k-diagnostic') for details.
Warning: Found 3 observation(s) with a pareto_k > 0.7. We recommend calling 'loo' again with argument 'k_threshold = 0.7' in order to calculate the ELPD without the assumption that these observations are negligible. This will refit the model 3 times to compute the ELPDs for the problematic observations directly.
       elpd_diff se_diff
post    0.0       0.0   
simple -1.1       3.0   

The results indicate that the first approach is expected to produce better out-of-sample predictions but the Warning messages are at least as important. Many of the estimated shape parameters for the Generalized Pareto distribution are above \(0.5\) in the model with Cauchy priors, which indicates that these estimates are only going to converge slowly to the true out-of-sample deviance measures. Thus, with only \(24\) observations, they should not be considered reliable. The more complicated prior derived above is stronger — as evidenced by the fact that the effective number of parameters is about half of that in the simpler approach and \(12\) for the maximum likelihood estimator — and only has a few of the \(24\) Pareto shape estimates in the “danger zone”. We might want to reexamine these observations

because the posterior is sensitive to them but, overall, the results seem tolerable.

In general, we would expect the joint prior derived here to work better when there are many predictors relative to the number of observations. Placing independent, heavy-tailed priors on the coefficients neither reflects the beliefs of the researcher nor conveys enough information to stabilize all the computations.

Conclusion

This vignette has discussed the prior distribution utilized in the stan_lm function, which has the same likelihood and a similar syntax as the lm function in R but adds the ability to expression prior beliefs about the location of the \(R^2\), which is the familiar proportion of variance in the outcome variable that is attributable to the predictors under a linear model. Since the \(R^2\) is a well-understood bounded scalar, it is easy to specify prior information about it, whereas other Bayesian approaches require the researcher to specify a joint prior distribution for the regression coefficients (and the intercept and error variance).

However, most researchers have little inclination to specify all these prior distributions thoughtfully and take a short-cut by specifying one prior distribution that is taken to apply to all the regression coefficients as if they were independent of each other (and the intercept and error variance). This short-cut is available in the stan_glm function and is described in more detail in other rstanarm vignettes for Generalized Linear Models (GLMs), which can be found by navigating up one level.

We are optimistic that this prior on the \(R^2\) will greatly help in accomplishing our goal for rstanarm of making Bayesian estimation of regression models routine. The same approach is used to specify a prior in ANOVA models (see stan_aov) and proportional-odds models for ordinal outcomes (see stan_polr).

Finally, the stan_biglm function can be used when the design matrix is too large for the qr function to process. The stan_biglm function inputs the output of the biglm function in the biglm package, which utilizes an incremental QR decomposition that does not require the entire dataset to be loaded into memory simultaneously. However, the biglm function needs to be called in a particular way in order to work with stan_biglm. In particular, The means of the columns of the design matrix, the sample mean of the outcome, and the sample standard deviation of the outcome all need to be passed to the stan_biglm function, as well as a flag indicating whether the model really does include an intercept. Also, the number of columns of the design matrix currently cannot exceed the number of rows. Although stan_biglm should run fairly quickly and without much memory, the resulting object is a fairly plain stanfit object rather than an enhanced stanreg object like that produced by stan_lm. Many of the enhanced capabilities of a stanreg object depend on being able to access the full design matrix, so doing posterior predictions, posterior checks, etc. with the output of stan_biglm would require some custom R code.

References

Lewandowski, D., Kurowicka D., and Joe, H. (2009). Generating random correlation matrices based on vines and extended onion method. Journal of Multivariate Analysis. 100(9), 1989–2001.

rstanarm/inst/doc/pooling.html0000644000176200001440000161227014551551755016206 0ustar liggesusers Hierarchical Partial Pooling for Repeated Binary Trials

Hierarchical Partial Pooling for Repeated Binary Trials

Bob Carpenter, Jonah Gabry and Ben Goodrich

2024-01-16

Introduction

This vignette illustrates the effects on posterior inference of pooling data (a.k.a sharing strength) across units for repeated binary trial data. It provides R code to fit and check predictive models for three situations: (a) complete pooling, which assumes each unit is the same, (b) no pooling, which assumes the units are unrelated, and (c) partial pooling, where the similarity among the units is estimated. The note explains with working examples how to (i) fit the models using rstanarm and plot the results, (ii) estimate event probabilities, (iii) evaluate posterior predictive densities to evaluate model predictions on held-out data, (iv) rank units by chance of success, (v) perform multiple comparisons in several settings, (vi) replicate new data for posterior \(p\)-values, and (vii) perform graphical posterior predictive checks.

The content of the vignette is based on Bob Carpenter’s Stan tutorial Hierarchical Partial Pooling for Repeated Binary Trials, but here we show how to fit the models and carry out predictions and model checking and comparison using rstanarm. Most of the text is taken from the original, with some additions and subtractions to make the content more useful for rstanarm users. The Stan code from the original tutorial has also been entirely removed, as rstanarm will fit all of the models in Stan without the user having to write the underlying Stan programs. The Stan code in the original document is a good reference for anyone interested in how these models are estimated “under-the-hood”, though the parameterizations used internally by rstanarm differ somewhat from those in the original.

Repeated Binary Trials

Suppose that for each of \(N\) units \(n \in 1{:}N\), we observe \(y_n\) successes out of \(K_n\) trials. For example, the data may consist of

  • rat tumor development, with \(y_n\) rats developing tumors of \(K_n\) total rats in experimental control group \(n \in 1{:}N\) (Tarone 1982)

  • surgical mortality, with \(y_n\) surgical patients dying in \(K_n\) surgeries for hospitals \(n \in 1{:}N\) (Spiegelhalter et al. 1996)

  • baseball batting ability, with \(y_n\) hits in \(K_n\) at bats for baseball players \(n \in 1{:}N\) (Efron and Morris 1975; Carpenter 2009)

  • machine learning system accuracy, with \(y_n\) correct classifications out of \(K_n\) examples for systems \(n \in 1{:}N\) (ML conference proceedings; Kaggle competitions)

In this vignette we use the small baseball data set of Efron and Morris (1975), but we also provide the rat control data of Tarone (1982), the surgical mortality data of Spiegelhalter et al. (1996) and the extended baseball data set of Carpenter (2009).

Baseball Hits (Efron and Morris 1975)

As a running example, we will use the data from Table 1 of (Efron and Morris 1975), which is included in rstanarm under the name bball1970 (it was downloaded 24 Dec 2015 from here). It is drawn from the 1970 Major League Baseball season (from both leagues).

       Player AB Hits RemainingAB RemainingHits
1    Clemente 45   18         367           127
2    Robinson 45   17         426           127
3      Howard 45   16         521           144
4   Johnstone 45   15         275            61
5       Berry 45   14         418           114
6     Spencer 45   14         466           126
7   Kessinger 45   13         586           155
8    Alvarado 45   12         138            29
9       Santo 45   11         510           137
10    Swaboda 45   11         200            46
11 Petrocelli 45   10         538           142
12  Rodriguez 45   10         186            42
13      Scott 45   10         435           132
14      Unser 45   10         277            73
15   Williams 45   10         591           195
16 Campaneris 45    9         558           159
17     Munson 45    8         408           129
18      Alvis 45    7          70            14

The data separates the outcome from the initial 45 at-bats from the rest of the season. After running this code, N is the number of units (players). Then for each unit n, K[n] is the number of initial trials (at-bats), y[n] is the number of initial successes (hits), K_new[n] is the remaining number of trials (remaining at-bats), and y_new[n] is the number of successes in the remaining trials (remaining hits).

The remaining data can be used to evaluate the predictive performance of our models conditioned on the observed data. That is, we will “train” on the first 45 at bats and see how well our various models do at predicting the rest of the season.

Pooling

With complete pooling, each unit is assumed to have the same chance of success. With no pooling, each unit is assumed to have a completely unrelated chance of success. With partial pooling, each unit is assumed to have a different chance of success, but the data for all of the observed units informs the estimates for each unit.

Partial pooling is typically accomplished through hierarchical models. Hierarchical models directly model the population of units. From a population model perspective, no pooling corresponds to infinite population variance, whereas complete pooling corresponds to zero population variance.

In the following sections, all three types of pooling models will be fit for the baseball data.

Fitting the Models

First we’ll create some useful objects to use throughout the rest of this vignette. One of them is a function batting_avg, which just formats a number to include three decimal places to the right of zero when printing, as is customary for batting averages.

Player averages through 45 at-bats:
 [1] 0.400 0.378 0.356 0.333 0.311 0.311 0.289 0.267 0.244 0.244 0.222 0.222
[13] 0.222 0.222 0.222 0.200 0.178 0.156
Overall average through 45 at-bats:
[1] 0.265

Complete Pooling

The complete pooling model assumes a single parameter \(\theta\) representing the chance of success for all units (in this case players).

Assuming each player’s at-bats are independent Bernoulli trials, the probability distribution for each player’s number of hits \(y_n\) is modeled as

\[ p(y_n \, | \, \theta) \ = \ \mathsf{Binomial}(y_n \, | \, K_n, \theta). \]

When viewed as a function of \(\theta\) for fixed \(y_n\), this is called the likelihood function.

Assuming each player is independent leads to the complete data likelihood

\[ p(y \, | \, \theta) = \prod_{n=1}^N \mathsf{Binomial}(y_n \, | \, K_n, \theta). \]

Using family=binomial("logit"), the stan_glm function in rstanarm will parameterize the model in terms of the log-odds \(\alpha\), which are defined by the logit transform as

\[ \alpha = \mathrm{logit}(\theta) = \log \, \frac{\theta}{1 - \theta}. \]

For example, \(\theta = 0.25\) corresponds to odds of \(.25\) to \(.75\) (equivalently, \(1\) to \(3\)), or log-odds of \(\log .25 / .75 = -1.1\).

The model is therefore

\[ p(y_n \, | \, K_n, \alpha) \ = \ \mathsf{Binomial}(y_n \, | \, K_n, \ \mathrm{logit}^{-1}(\alpha)) \]

The inverse logit function is the logistic sigmoid from which logistic regression gets its name because the inverse logit function is also the standard logistic Cumulative Distribution Function (CDF),

\[ \mathrm{logit}^{-1}(\alpha) = \frac{1}{1 + \exp(-\alpha)} = \theta. \]

By construction, for any \(\alpha \in (-\infty, \infty)\), \(\mathrm{logit}^{-1}(\alpha) \in (0, 1)\); the sigmoid converts arbitrary log odds back to the probability scale.

We will use a normal distribution with mean \(-1\) and standard deviation \(1\) as the prior on the log-odds \(\alpha\). This is a weakly informative prior that places about 95% of the prior probability in the interval \((-3, 1)\), which inverse-logit transforms to the interval \((0.05, 0.73)\). The prior median \(-1\) corresponds to a \(0.27\) chance of success. In fact, an even narrower prior is actually motivated here from substantial baseball knowledge.

The figure below shows both this prior on \(\alpha\) as well as the prior it implies on the probability \(\theta\).

To fit the model we call stan_glm with the formula cbind(Hits, AB - Hits) ~ 1. The left-hand side of the formula specifies the binomial outcome by providing the number of successes (hits) and failures (at-bats) for each player, and the right-hand side indicates that we want an intercept-only model.

The summary function will compute all sorts of summary statistics from the fitted model, but here we’ll create a small function that will compute just a few posterior summary statistics that we’ll want for each of the models we estimate. The summary_stats function, defined below, will take a matrix of posterior draws as its input, apply an inverse-logit transformation (to convert from log-odds to probabilities) and then compute the median and 80% interval.

           10%   50%   90%  
Clemente   0.245 0.265 0.285
Robinson   0.245 0.265 0.285
Howard     0.245 0.265 0.285
Johnstone  0.245 0.265 0.285
Berry      0.245 0.265 0.285
Spencer    0.245 0.265 0.285
Kessinger  0.245 0.265 0.285
Alvarado   0.245 0.265 0.285
Santo      0.245 0.265 0.285
Swaboda    0.245 0.265 0.285
Petrocelli 0.245 0.265 0.285
Rodriguez  0.245 0.265 0.285
Scott      0.245 0.265 0.285
Unser      0.245 0.265 0.285
Williams   0.245 0.265 0.285
Campaneris 0.245 0.265 0.285
Munson     0.245 0.265 0.285
Alvis      0.245 0.265 0.285

With more data, such as from more players or from the rest of the season, the posterior approaches a delta function around the maximum likelihood estimate and the posterior interval around the central posterior intervals will shrink. Nevertheless, even if we know a player’s chance of success exactly, there is a large amount of uncertainty in running \(K\) binary trials with that chance of success; using a binomial model fundamentally bounds our prediction accuracy.

Although this model will be a good baseline for comparison, we have good reason to believe from a large amount of prior data (players with as many as 10,000 trials) that it is very unlikely that all baseball players have the same chance of success.

No Pooling

A model with no pooling involves a separate chance-of-success parameter \(\theta_n \in [0,1]\) for each player \(n\), where the \(\theta_n\) are assumed to be independent.

rstanarm will again parameterize the model in terms of the log-odds, \(\alpha_n = \mathrm{logit}(\theta_n)\), so the likelihood then uses the log-odds of success \(\alpha_n\) for unit \(n\) in modeling the number of successes \(y_n\) as

\[ p(y_n \, | \, \alpha_n) = \mathsf{Binomial}(y_n \, | \, K_n, \mathrm{logit}^{-1}(\alpha_n)). \]

Assuming the \(y_n\) are independent (conditional on \(\theta\)), this leads to the total data likelihood

\[ p(y \, | \, \alpha) = \prod_{n=1}^N \mathsf{Binomial}(y_n \, | \, K_n, \mathrm{logit}^{-1}(\alpha_n)). \]

To fit the model we need only tweak the model formula used for the full pooling model to drop the intercept and instead include as the only predictor the factor variable Player. This is equivalent to estimating a separate intercept on the log-odds scale for each player. We’ll also use the prior (rather than prior_intercept) argument since Player is considered a predictor rather than an intercept from R’s perspective. Using the same weakly informative prior now means that the each \(\alpha_n\) gets a \(\mathsf{Normal}(-1, 1)\) prior, independent of the others.

            
parameters   10%   50%   90%  
  Clemente   0.300 0.386 0.473
  Robinson   0.279 0.366 0.458
  Howard     0.263 0.344 0.435
  Johnstone  0.244 0.326 0.414
  Berry      0.227 0.305 0.393
  Spencer    0.226 0.306 0.389
  Kessinger  0.209 0.284 0.370
  Alvarado   0.190 0.266 0.352
  Santo      0.172 0.244 0.330
  Swaboda    0.172 0.243 0.328
  Petrocelli 0.154 0.223 0.305
  Rodriguez  0.157 0.226 0.305
  Scott      0.156 0.224 0.306
  Unser      0.156 0.225 0.303
  Williams   0.156 0.225 0.305
  Campaneris 0.138 0.204 0.282
  Munson     0.124 0.185 0.258
  Alvis      0.108 0.166 0.241

Each 80% interval is much wider than the estimated interval for the population in the complete pooling model; this is to be expected—there are only 45 data units for each parameter here as opposed to 810 in the complete pooling case. If the units each had different numbers of trials, the intervals would also vary based on size.

As the estimated chance of success goes up toward 0.5, the 80% intervals gets wider. This is to be expected for chance of success parameters, because the variance is maximized when \(\theta = 0.5\).

Based on our existing knowledge of baseball, the no-pooling model is almost certainly overestimating the high abilities and underestimating lower abilities (Ted Williams, 30 years prior to the year this data was collected, was the last player with a 40% observed success rate over a season, whereas 20% or less is too low for all but a few rare defensive specialists).

Partial Pooling

Complete pooling provides estimated abilities that are too narrowly distributed for the units and removes any chance of modeling population variation. Estimating each chance of success separately without any pooling provides estimated abilities that are too broadly distributed for the units and hence too variable. Clearly some amount of pooling between these two extremes is called for. But how much?

A hierarchical model treats the players as belonging to a population of players. The properties of this population will be estimated along with player abilities, implicitly controlling the amount of pooling that is applied. The more variable the (estimate of the) population, the less pooling is applied. Mathematically, the hierarchical model places a prior on the abilities with parameters that are themselves estimated.

This model can be estimated using the stan_glmer function.

Because stan_glmer (like glmer) estimates the varying intercepts for Player by estimating a single global intercept \(\alpha_0\) and individual deviations from that intercept for each player \(\delta_n = \alpha_n - \alpha_0\), to get the posterior distribution for each \(\alpha_n\) we need to shift each of the posterior draws by the corresponding draw for the intercept. We can do this easily using the sweep function.

            
parameters   10%   50%   90%  
  Clemente   0.249 0.283 0.346
  Robinson   0.247 0.280 0.339
  Howard     0.244 0.277 0.332
  Johnstone  0.240 0.273 0.324
  Berry      0.237 0.271 0.316
  Spencer    0.237 0.271 0.315
  Kessinger  0.232 0.267 0.310
  Alvarado   0.228 0.265 0.304
  Santo      0.224 0.262 0.298
  Swaboda    0.223 0.261 0.297
  Petrocelli 0.216 0.258 0.293
  Rodriguez  0.217 0.258 0.294
  Scott      0.217 0.259 0.294
  Unser      0.216 0.259 0.295
  Williams   0.217 0.259 0.293
  Campaneris 0.212 0.256 0.290
  Munson     0.204 0.252 0.286
  Alvis      0.196 0.249 0.285

Here the estimates are less extreme than in the no-pooling case, which we should expect due to the partial pooling. It is also clear from the wide posteriors for the \(\theta_n\) that there is considerable uncertainty in the estimates of chance-of-success on an unit-by-unit (player-by-player) basis.

Observed vs. Estimated Chance of Success

Figure 5.4 from (Gelman et al. 2013) plots the observed number of successes \(y_n\) for the first \(K_n\) trials versus the median and 80% intervals for the estimated chance-of-success parameters \(\theta_n\) in the posterior. The following R code reproduces a similar plot for our data.

The horizontal axis is the observed rate of success, broken out by player (the overplotting is from players with the same number of successes—they all had the same number of trials in this data). The dots are the posterior medians with bars extending to cover the central 80% posterior interval. Players with the same observed rates are indistinguishable, any differences in estimates are due to MCMC error.

The horizontal red line has an intercept equal to the overall success rate, The overall success rate is also the posterior mode (i.e., maximum likelihood estimate) for the complete pooling model. The diagonal blue line has intercept 0 and slope 1. Estimates falling on this line make up the maximum likelihood estimates for the no-pooling model. Overall, the plot makes the amount of pooling toward the prior evident.

Posterior Predictive Distribution

After we have fit a model using some “training” data, we are usually interested in the predictions of the fitted model for new data, which we can use to

  • make predictions for new data points; e.g., predict how many hits will Roberto Clemente get in the rest of the season,

  • evaluate predictions against observed future data; e.g., how well did we predict how many hits Roberto Clemente actually got in the rest of the season, and

  • generate new simulated data to validate our model fits.

With full Bayesian inference, we do not make a point estimate of parameters and use those prediction—we instead use an average of predictions weighted by the posterior.

Given data \(y\) and a model with parameters \(\theta\), the posterior predictive distribution for new data \(\tilde{y}\) is defined by

\[ p(\tilde{y} \, | \, y) \ = \ \int_{\Theta} p(\tilde{y} \, | \, \theta) \ p(\theta \, | \, y) \ \mathrm{d}\theta, \]

where \(\Theta\) is the support of the parameters \(\theta\). What an integral of this form says is that \(p(\tilde{y} \, | \, y)\) is defined as a weighted average over the legal parameter values \(\theta \in \Theta\) of the likelihood function \(p(\tilde{y} \, | \, \theta)\), with weights given by the posterior, \(p(\theta \, | \, y)\). While we do not want to get sidetracked with the notational and mathematical subtleties of expectations here, the posterior predictive density reduces to the expectation of \(p(\tilde{y} \, | \, \theta)\) conditioned on \(y\).

Evaluating Held-Out Data Predictions

Because the posterior predictive density is formulated as an expectation over the posterior, it is possible to compute via MCMC. With \(M\) draws \(\theta^{(m)}\) from the posterior \(p(\theta \, | \, y)\), the posterior predictive log density for new data \(y^{\mathrm{new}}\) is given by the MCMC approximation

\[ \log \frac{1}{M} \, \sum_{m=1}^M \ p\left( y^{\mathrm{new}} \, | \, \theta^{(m)} \right). \]

In practice, this requires care to prevent underflow in floating point calculations; a robust calculation on the log scale is provided below.

Simulating Replicated Data

It is also straightforward to use forward simulation from the probability distribution of the data \(p(y \, | \, \theta)\) to generate replicated data \(y^{\mathrm{rep}}\) according to the posterior predictive distribution. (Recall that \(p(y \, | \, \theta)\) is called the probability distribution when \(\theta\) is fixed and the likelihood when \(y\) is fixed.)

With \(M\) draws \(\theta^{(m)}\) from the posterior \(p(\theta \, | \, y)\), replicated data can be simulated by drawing a sequence of \(M\) simulations according \(y^{\mathrm{rep} \ (m)}\) with each drawn according to distribution \(p(y \, | \, \theta^{(m)})\). This latter random variate generation can usually be done efficiently (both computationally and statistically) by means of forward simulation from the probability distribution of the data; we provide an example below.

Prediction for New Trials

Efron and Morris’s (1975) baseball data includes not only the observed hit rate in the initial 45 at bats, but also includes the data for how the player did for the rest of the season. The question arises as to how well these models predict a player’s performance for the rest of the season based on their initial 45 at bats.

Calibration

A well calibrated statistical model is one in which the uncertainty in the predictions matches the uncertainty in further data. That is, if we estimate posterior 50% intervals for predictions on new data (here, number of hits in the rest of the season for each player), roughly 50% of the new data should fall in its predicted 50% interval. If the model is true in the sense of correctly describing the generative process of the data, then Bayesian inference is guaranteed to be well calibrated. Given that our models are rarely correct in this deep sense, in practice we are concerned with testing their calibration on quantities of interest.

Sharpness

Given two well calibrated models, the one that makes the more precise predictions in the sense of having narrower intervals is better predictively (Gneiting et al. 2007). To see this in an example, we would rather have a well-calibrated prediction that there’s a 90% chance the number of hits for a player in the rest of the season will fall in \((120, 130)\) than a 90% prediction that the number of hits will fall in \((100, 150)\).

For the models introduced here, a posterior that is a delta function provides the sharpest predictions. Even so, there is residual uncertainty due to the repeated trials; with \(K^{\mathrm{new}}\) further trials and a a fixed \(\theta_n\) chance of success, the random variable \(Y^{\mathrm{new}}_n\) denoting the number of further successes for unit \(n\) has a standard deviation from the repeated binary trials of

\[ \mathrm{sd}[Y^{\mathrm{new}}_n] \ = \ \sqrt{K \ \theta \, (1 - \theta)}. \]

Why Evaluate with the Predictive Posterior?

The predictive posterior density directly measures the probability of seeing the new data. The higher the probability assigned to the new data, the better job the model has done at predicting the outcome. In the limit, an ideal model would perfectly predict the new outcome with no uncertainty (probability of 1 for a discrete outcome or a delta function at the true value for the density in a continuous outcome). This notion is related to the notion of sharpness discussed in the previous section, because if the new observations have higher predictive densities, they’re probably within narrower posterior intervals (Gneiting et al. 2007).

\(\log E[p(\tilde{y} \, | \, \theta)]\) vs \(E[\log p(\tilde{y} \, | \, \theta)]\)

The log of posterior predictive density is defined in the obvious way as

\[ \log p(\tilde{y} \, | \, y) = \log \int_{\Theta} p(\tilde{y} \, | \, \theta) \ p(\theta \, | \, y) \ \mathrm{d}\theta. \]

This is not a posterior expectation, but rather the log of a posterior expectation. In particular, it should not be confused with the posterior expectation of the log predictive density, which is given by

\[ \int_{\Theta} \left( \log p(\tilde{y} \, | \, \theta) \right) \ p(\theta \, | \, y) \ \mathrm{d}\theta. \]

Although this is easy to compute in Stan in a stable fashion, it does not produce the same answer (as we show below).

Because \(-\log(u)\) is convex, a little wrangling with Jensen’s inequality shows that the expectation of the log is less than or equal to the log of the expectation,

\[ \int_{\Theta} \left( \, \log p(\tilde{y} \, | \, \theta) \, \right) \ p(\theta \, | \, y) \ \mathrm{d}\theta \ \leq \ \log \int_{\Theta} p(\tilde{y} \, | \, \theta) \ p(\theta \, | \, y) \ \mathrm{d}\theta \]

We’ll compute both expectations and demonstrate Jensen’s inequality in our running example.

The variables K_new[n] and y_new[n] hold the number of at bats (trials) and the number of hits (successes) for player (unit) n. With the held out data we can compute the log density of each data point using the log_lik function, which, like posterior_predict, accepts a newdata argument. The log_lik function will return an \(M \times N\) matrix, where \(M\) is the size of the posterior sample (the number of draws we obtained from the posterior distribution) and \(N\) is the number of data points in newdata. We can then take the row sums of this matrix to sum over the data points.

       Pooling NoPooling PartialPooling
[1,] -87.50510 -270.1454      -90.21324
[2,] -73.97575 -310.3988      -82.12971
[3,] -80.95743 -250.4451      -82.61155
[4,] -77.54662 -281.8998     -130.43606
[5,] -74.39093 -172.3741      -97.82334
[6,] -88.18157 -171.3092      -93.85154

We now have the distributions of log_p_new in a matrix with a column for each model.

For each model, the posterior mean for log_p_new will give us

\[ \int_{\Theta} \left( \log p(\tilde{y} \, | \, \theta) \right) \ p(\theta \, | \, y) \ \mathrm{d}\theta \ \approx \ \frac{1}{M} \, \sum_{m=1}^M \log p(y^{\mathrm{new}} \, | \, \theta^{(m)}). \]

To compute this for each of the models we only need to take the mean of the corresponding column of log_p_new.

       Pooling PartialPooling      NoPooling 
         -81.8          -99.5         -207.8 

From a predictive standpoint, the models are ranked by the amount of pooling they do, with complete pooling being the best, and no pooling being the worst predictively. All of these models do predictions by averaging over their posteriors, with the amount of posterior uncertainty also being ranked in reverse order of the amount of pooling they do.

As we will now see, the ranking of the models can change when we compute the posterior expectation of the log predictive density.

Posterior expectation of the log predictive density

The straight path to calculate this would be to define a generated quantity \(p(y^{\mathrm{new}} \, | y)\), look at the posterior mean computed by Stan, and takes its log. That is,

\[ \log p(y^{\mathrm{new}} \, | \, y) \ \approx \ \log \frac{1}{M} \, \sum_{m=1}^M p(y^{\mathrm{new}} \, | \, \theta^{(m)}). \]

Unfortunately, this won’t work in most cases because when we try to compute \(p(y^{\mathrm{new}} \, | \, \theta^{(m)})\) directly, it is prone to underflow. For example, 2000 outcomes \(y^{\mathrm{new}}_n\), each with likelihood 0.5 for \(\theta^{(m)}\), will underflow, because \(0.5^{2000}\) is smaller than the smallest positive number that a computer can represent using standard double-precision floating point (used by Stan, R, etc.).

In contrast, if we work on the log scale, \(\log p(y^{\mathrm{new}} \, | \, y)\) will not underflow. It’s a sum of a bunch of terms of order 1. But we already saw we can’t just average the log to get the log of the average.

To avoid underflow, we’re going to use the log-sum-of-exponentials trick, which begins by noting the obvious,

\[ \log \frac{1}{M} \, \sum_{m=1}^M \ p(y^{\mathrm{new}} \, | \, \theta^{(m)}). \ = \ \log \frac{1}{M} \, \sum_{m=1}^M \ \exp \left( \log p(y^{\mathrm{new}} \, | \, \theta^{(m)}) \right). \]

We’ll then write that last expression as

\[ -\log M + \mathrm{log\_sum\_exp \, } \ \log p(y^{\mathrm{new}} \, | \, \theta^{(m)}) \]

We can compute \(\mathrm{log\_sum\_exp}\) stably by subtracting the max value. Suppose \(u = u_1, \ldots, u_M\), and \(\max(u)\) is the largest \(u_m\). We can calculate

\[ \mathrm{log\_sum\_exp \, } \ u_m \ = \ \log \sum_{m=1}^M \exp(u_m) \ = \ \max(u) + \log \sum_{m=1}^M \exp(u_m - \max(u)). \]

Because \(u_m - \max(u) \leq 0\), the exponentiations cannot overflow. They may underflow to zero, but this will not lose precision because of the leading \(\max(u)\) term; the only way underflow can arise is if \(u_m - \max(u)\) is very small, meaning that it won’t add significant digits to \(\max(u)\) if it had not underflowed.

We can implement \(\mathrm{log\_sum\_exp}\) in R as follows:

and then include the \(-\log M\) term to make it log_mean_exp:

We can then use it to compute the log posterior predictive densities for each of the models:

PartialPooling        Pooling      NoPooling 
         -71.9          -73.1          -81.5 

Now the ranking is different! As expected, the values here are greater than the expectation of the log density due to Jensen’s inequality. The partial pooling model appears to be making slightly better predictions than the full pooling model, which in turn is making slightly better predictions than the no pooling model.

Approximating the expected log predictive density

Vehtari, Gelman, and Gabry (2016) shows that the expected log predictive density can be approximated using the loo function for each model and then compared across models:

                elpd_diff se_diff
fit_pool         0.0       0.0   
fit_partialpool -0.1       0.5   
fit_nopool      -6.0       2.6   

The third column is the leave-one-out (loo) approximation to the expected log predictive density. This approximation is only asymptotically valid and with only 18 observations in this case, substantially underestimates the expected log predictive densities found in the previous subsection. Nevertheless, the relative ranking of the models is essentially the same with the pooled and partially pooled models being virtually indistinguishable but much better than the no pooling model.

Predicting New Observations

With rstanarm it is straightforward to generate draws from the posterior predictive distribution using the posterior_predict function. With this capability, we can either generate predictions for new data or we can apply it to the predictors we already have.

There will be two sources of uncertainty in our predictions, the first being the uncertainty in \(\theta\) in the posterior \(p(\theta \, | \, y)\) and the second being the uncertainty due to the likelihood \(p(\tilde{y} \, | \, \theta)\).

We let \(z_n\) be the number of successes for unit \(n\) in \(K^{\mathrm{new}}_n\) further trials. It might seem tempting to eliminate that second source of uncertainty and set \(z_n^{(m)}\) to its expectation, \(\theta_n^{(m)} \, K^{\mathrm{new}}\), at each draw \(m\) from the posterior rather than simulating a new value. Or it might seem tempting to remove the first source of uncertainty and use the posterior mean (or median or mode or …) rather than draws from the posterior. Either way, the resulting values would suffice for estimating the posterior mean, but would not capture the uncertainty in the prediction for \(y^{\mathrm{new}}_n\) and would thus not be useful in estimating predictive standard deviations or quantiles or as the basis for decision making under uncertainty. In other words, the predictions would not be properly calibrated (in a sense we define below).

To predict \(z\) for each player we can use the following code:

  Clemente   Robinson     Howard  Johnstone      Berry    Spencer  Kessinger 
 107.10175  122.31225  147.40850   76.71650  114.76625  127.48775  157.96650 
  Alvarado      Santo    Swaboda Petrocelli  Rodriguez      Scott      Unser 
  36.75125  133.25050   52.41350  138.17950   47.71725  112.11400   71.30550 
  Williams Campaneris     Munson      Alvis 
 151.83725  141.54625  101.34425   17.06025 

Translating the posterior number of hits into a season batting average, \(\frac{y_n + z_n}{K_n + K^{\mathrm{new}}_n}\), we get an 80% posterior interval of

  10%   90% 
0.257 0.359 

for Roberto Clemente from the partial pooling model. Part of our uncertainty here is due to our uncertainty in Clemente’s underlying chance of success, and part of our uncertainty is due to there being 367 remaining trials (at bats) modeled as binomial. In the remaining at bats for the season, Clemente’s success rate (batting average) was \(127 / 367 = 0.346\).

For each model, the following plot shows each player’s posterior predictive 50% interval for predicted batting average (success rate) in his remaining at bats (trials); the observed success rate in the remainder of the season is shown as a blue dot.

We choose to plot 50% posterior intervals as they are a good single point for checking calibration. Rather than plotting the number of hits on the vertical axis, we have standardized all the predictions and outcomes to a success rate. Because each unit (player) has a different number of subsequent trials (at bats), the posterior intervals are relatively wider or narrower within the plots for each model (more trials imply narrower intervals for the average). Because each unit had the same number of initial observed trials, this variation is primarily due to the uncertainty from the binomial model of outcomes.

Calibration

With 50% intervals, we expect half of our estimates to lie outside their intervals in a well-calibrated model. If fewer than the expected number of outcomes lie in their estimated posterior intervals, we have reason to believe the model is not well calibrated—its posterior intervals are too narrow. This is also true if too many outcomes lie in their estimated posterior intervals—in this case the intervals are too broad. Of course, there is variation in the tests as the number of units lying in their intervals is itself a random variable (see the exercises), so in practice we are only looking for extreme values as indicators of miscalibration.

Each of the models other than the complete pooling model appears to be reasonably well calibrated, and even the calibration for the complete pooling model is not bad (the variation in chance-of-success among players has low enough variance that the complete pooling model cannot be rejected as a possibility with only the amount of data we used here).

Sharpness

Consider the width of the posterior predictive intervals for the units across the models. The model with no pooling has the broadest posterior predictive intervals and the complete pooling model the narrowest. This is to be expected given the number of observations used to fit each model; 45 each in the no pooling case and 810 in the complete pooling case, and relatively something in between for the partial pooling models. Because the log odds model is doing more pooling, its intervals are slightly narrower than that of the direct hierarchical model.

For two well calibrated models, the one with the narrower posterior intervals is preferable because its predictions are more tighter. The term introduced for this by Gneiting et al. (2007) is “sharpness.” In the limit, a perfect model would provide a delta function at the true answer with a vanishing posterior interval.

Estimating Event Probabilities

The 80% interval in the partial pooling model coincidentally shows us that our model estimates a roughly 10% chance of Roberto Clemente batting 0.400 or better for the season based on batting 0.400 in his first 45 at bats. Not great, but non-trivial. Rather than fishing for the right quantile and hoping to get lucky, we can write a model to directly estimate event probabilities, such as Robert Clemente’s batting average is 0.400 or better for the season.

Event probabilities are defined as expectations of indicator functions over parameters and data. For example, the probability of player \(n\)’s batting average being 0.400 or better conditioned on the data \(y\) is defined by the conditional event probability

\[ \mathrm{Pr}\left[ \frac{(y_n + z_n)}{(45 + K^{\mathrm{new}}_n)} \geq 0.400 \, \Big| \, y \right] \ = \ \int_{\Theta} \mathrm{I}\left[\frac{(y_n + z_n)}{(45 + K^{\mathrm{new}}_n)} \geq 0.400\right] \ p(z_n \, | \, \theta_n, K^{\mathrm{new}}_n) \ p(\theta \, | \, y, K) \ \mathrm{d}\theta. \]

The indicator function \(\mathrm{I}[c]\) evaluates to 1 if the condition \(c\) is true and 0 if it is false. Because it is just another expectation with respect to the posterior, we can calculate this event probability using MCMC as

\[ \mathrm{Pr}\left[\frac{(y_n + z_n)}{(45 + K^{\mathrm{new}}_n)} \geq 0.400 \, \Big| \, y \right] \ \approx \ \frac{1}{M} \, \sum_{m=1}^M \mathrm{I}\left[\frac{(y_n + z_n^{(m)})}{(45 + K^{\mathrm{new}}_n)} \geq 0.400\right]. \]

This event is about the season batting average being greater than 0.400. What if we care about ability (chance of success), not batting average (success rate) for the rest of the season? Then we would ask the question of whether \(\mathrm{Pr}[\theta_n > 0.4]\). This is defined as a weighted average over the prior and computed via MCMC as the previous case.

\[ \mathrm{Pr}\left[\theta_n \geq 0.400 \, | \, y \right] \ = \ \int_{\Theta} \mathrm{I}\left[\theta_n \geq 0.400\right] \ p(\theta \, | \, y, K) \ \mathrm{d}\theta \ \approx \ \frac{1}{M} \, \sum_{m=1}^M \mathrm{I}[\theta_n^{(m)} \geq 0.400]. \]

Pr(theta_n >= 0.400 | y)
Clemente    Berry  Swaboda 
 0.02250  0.00375  0.00075 
Pr(at least one theta_n >= 0.350 | y)
[1] 0.23725

Multiple Comparisons

We snuck in a “multiple comparison” event in the last section, namely whether there was some player with an a chance of success for hits of .350 or greater.

With traditional significance testing over multiple trials, it is common to adjust for falsely rejecting the null hypothesis (a so-called Type I error) by inflating the conventional (and arguably far too low) 5% target for reporting “significance.”

For example, suppose we have our 18 players with ability parameters \(\theta_n\) and we have \(N\) null hypotheses of the form \(H_0^n: \theta_n < 0.350\). Now suppose we evaluate each of these 18 hypotheses independently at the conventional \(p = 0.05\) significance level, giving each a 5% chance of rejecting the null hypothesis in error. When we run all 18 hypothesis tests, the overall chance of falsely rejecting at least one of the null hypotheses is a whopping \(1 - (1 - 0.05)^{18} = 0.60\).

The traditional solution to this problem is to apply a Bonferroni adjustment to control the false rejection rate; the typical adjustment is to divide the \(p\)-value by the number of hypothesis tests in the “family” (that is, the collective test being done). Here that sets the rate to \(p = 0.05/18\), or approximately \(p = 0.003\), and results in a slightly less than 5% chance of falsely rejecting a null hypothesis in error.

Although the Bonferroni correction does reduce the overall chance of falsely rejecting a null hypothesis, it also reduces the statistical power of the test to the same degree. This means that many null hypotheses will fail to be rejected in error.

Rather than doing classical multiple comparison adjustments to adjust for false-discovery rate, such as a Bonferroni correction, Gelman et al. (2012) suggest using a hierarchical model to perform partial pooling instead. As already shown, hierarchical models partially pool the data, which pulls estimates toward the population mean with a strength determined by the amount of observed variation in the population (see also Figure 2 of (Gelman et al. 2012)). This automatically reduces the false-discovery rate, though not in a way that is intrinsically calibrated to false discovery, which is good, because reducing the overall false discovery rate in and of itself reduces the true discovery rate at the same time.

The generated quantity some_ability_gt_350 will be set to 1 if the maximum ability estimate in \(\theta\) is greater than 0.35. And thus the posterior mean of this generated quantity will be the event probability

\[ \mathrm{Pr}[\mathrm{max}(\theta) > 0.350] \ = \ \int_{\Theta} \mathrm{I}[\mathrm{max}(\theta) > 0.35] \ p(\theta \, | \, y, K) \ \mathrm{d}\theta \ \approx \ \frac{1}{M} \, \sum_{m=1}^M \ \mathrm{I}[\mathrm{max}(\theta^{(m)}) > 0.35] \]

where \(\theta^{(m)}\) is the sequence of posterior draws for the ability parameter vector. Stan reports this value as the posterior mean of the generated quantity some_ability_gt_350, which takes on the value \(\mathrm{I}[\mathrm{max}(\theta^{(m)}) > 0.35]\) in each iteration.

The probability estimate of there being a player with an ability (chance of success) greater than 0.350 is essentially zero in the complete and is essentially guaranteed in the no pooling model. The partially pooled estimates would not be considered significant at conventional p=0.05 thresholds. One way to get a handle on what’s going on is to inspect the posterior 80% intervals for chance-of-success estimates in the first graph above.

Ranking

In addition to multiple comparisons, we can use the simultaneous estimation of the ability parameters to rank the units. In this section, we rank ballplayers by (estimated) chance of success (i.e., batting ability).

Of course, ranking players by ability makes no sense for the complete pooling model, where every player is assumed to have the same ability.

            
parameters   10% 50% 90%
  Clemente     1   5  13
  Robinson     1   5  14
  Howard       1   6  15
  Johnstone    2   7  15
  Berry        2   8  15
  Spencer      2   8  15
  Kessinger    2   9  16
  Alvarado     3   9  16
  Santo        3  10  17
  Swaboda      3  10  17
  Petrocelli   4  11  17
  Rodriguez    4  11  17
  Scott        4  11  17
  Unser        4  11  17
  Williams     4  11  17
  Campaneris   4  12  17
  Munson       5  13  18
  Alvis        5  13  18

It is again abundantly clear from the posterior intervals that our uncertainty is very great after only 45 at bats.

In the original Volume I BUGS example of surgical mortality, the posterior distribution over ranks was plotted for each hospital. It is now straightforward to reproduce that figure here for the baseball data.

Who has the Highest Chance of Success?

We can use our ranking statistic to calculate the event probability for unit \(n\) that the unit has the highest chance of success using MCMC as

\[ \mathrm{Pr}[\theta_n = \max(\theta)] \ = \ \int_{\Theta} \mathrm{I}[\theta_n = \mathrm{max}(\theta)] \ p(\theta \, | \, y, K) \ \mathrm{d}\theta \ \approx \ \frac{1}{M} \, \sum_{m=1}^M \mathrm{I}[\theta^{(m)}_n = \mathrm{max}(\theta^{(m)})]. \]

Like our other models, the partial pooling mitigates the implicit multiple comparisons being done to calculate the probabilities of rankings. Contrast this with an approach that does a pairwise significance test and then applies a false-discovery correction.

We can compute this straightforwardly using the rank data we have already computed or we could compute it directly as above. Because \(\mathrm{Pr}[\theta_n = \theta_{n'}] = 0\) for \(n \neq n'\), we don’t have to worry about ties.

This question of which player has the highest chance of success (batting ability) doesn’t even make sense in the complete pooling model, because the chance of success parameters are all the same by definition. In the other models, the amount of pooling directly determines the probabilities of being the best player. That is, the probability of being best goes down for high performing players with more pooling, whereas it goes up for below-average players.

Graphical Posterior Predictive Checks

We can simulate data from the predictive distribution and compare it to the original data used for fitting the model. If they are not consistent, then either our model is not capturing the aspects of the data we are probing with test statistics or the measurement we made is highly unlikely. That is, extreme \(p\)-values lead us to suspect there is something wrong with our model that deserves further exploration.

In some cases, we are willing to work with models that are wrong in some measurable aspects, but accurately capture quantities of interest for an application. That is, it’s possible for a model to capture some, but not all, aspects of a data set, and still be useful.

Test Statistics and Bayesian \(p\)-Values

A test statistic \(T\) is a function from data to a real value. Following (Gelman et al. 2013), we will concentrate on four specific test statistics for repeated binary trial data (though these choices are fairly general): minimum value, maximum value, sample mean, and sample standard deviation.

Given a test statistic \(T\) and data \(y\), the Bayesian \(p\)-value has a direct definition as a probability,

\[ p_B = \mathrm{Pr}[T(y^{\mathrm{rep}}) \geq T(y) \, | \, y]. \]

Bayesian \(p\)-values, like their traditional counterparts, are probabilities, but not probabilities that a model is true. They simply measure discrepancies between the observed data and what we would expect if the model is true.

Values of Bayesian \(p\)-values near 0 or 1 indicate that the data \(y\) used to estimate the model is unlikely to have been generated by the estimated model. As with other forms of full Bayesian inference, our estimate is the full posterior, not just a point estimate.

As with other Bayesain inferences, we average over the posterior rather than working from a point estimate of the parameters. Expanding this as an expectation of an indicator function,

\[ p_B \ = \ \int_{\Theta, Y^{\mathrm{rep}}} \mathrm{I}[T(y^{\mathrm{rep}}) \geq T(y)] \ p(y^{\mathrm{rep}} \, | \, \theta) \ p(\theta \, | \, y) \ \mathrm{d}\theta, \]

We treat \(y^{\mathrm{rep}}\) as a parameter in parallel with \(\theta\), integrating over possible values \(y^{\mathrm{rep}} \in Y^{\mathrm{rep}}\). As usual, we use the integration sign in a general way intended to include summation, as with the discrete variable \(y^{\mathrm{rep}}\).

The formulation as an expectation leads to the obvious MCMC calculation based on posterior draws \(y^{\mathrm{rep} (m)}\) for \(m \in 1{:}M\),

\[ p_B \approx \frac{1}{M} \, \sum_{m=1}^M \mathrm{I}[T(y^{\mathrm{rep} \ (m)}) \geq T(y)]. \]

Using the pp_check in rstanarm, we can easily reproduce Figure 6.12 from (Gelman et al. 2013), which shows the posterior predictive distribution for the test statistic, the observed value as a vertical line, and the \(p\)-value for each of the tests. First, here is just the plot for the no pooling model using the mean as the test statistic:

The stat argument can the be the name of any R function (including your own functions defined in the Global Environment) that takes a vector as an input and returns a scalar.

To make plots for each of the models for several test statistics we can use the following code, which will create a list of ggplot objects for each model and then arrange everything in a single plot.

The only worrisomely extreme value visible in the plots is the \(p\)-value for standard deviation in the no-pooling model, where the vast majority of the simulated data sets under the model had standard deviations greater than the actual data.

We didn’t actually compute this \(p\)-value because extreme \(p\)-values are easy to detect visually and whether or not the \(p\)-value is less than \(0.05\) or some other arbitrary value is of little use to us beyond what we can already see in the plot. However, if we did want to actually compute the \(p\)-value we can do so easily:

[1] 0.01325

Comparing Observed and Replicated Data

Following the advice of Gelman et al. (2013), we will take the fitted parameters of the data set and generate replicated data sets, then compare the replicated data sets visually to the observed data we used to fit the model. In this section we’ll create the plots for the model using partial pooling, but the same plots can be made for the other models too.

Again using rstanarm’s pp_check function, we can plot some of the simulated data sets along with the original data set to do a visual inspection as suggested by Gelman et al. (2013). For this type of posterior predictive check we set the check argument to "distributions" and we use nreps to specify how many replicated sets of data to generate from the posterior predictive distribution. Because our models have a binomial outcome, instead of plotting the number of successes (hits in this case) on the x-axis, pp_check will plot the proportion of successes.

These simulations are not unreasonable for a binomial likelihood, but they are more spread out than the actual data. In this case, this may actually have more to do with how the data were selected out of all the major league baseball players than the actual data distribution. Efron and Morris (1975, p 312) write

This sample was chosen because we wanted between 30 and 50 at bats to assure a satisfactory approximation of the binomial by the normal distribution while leaving the bulk of at bats to be estimated. We also wanted to include an unusually good hitter (Clemente) to test the method with at least one extreme parameter, a situation expected to be less favorable to Stein’s estimator. Stein’s estimator requires equal variances, or in this situation, equal at bats, so the remaining 17 players are all whom either the April 26 or May 3 New York Times reported with 45 at bats.

Discussion

A hierarchical model introduces an estimation bias toward the population mean and the stronger the bias, the less variance there is in the estimates for the units. Exactly how much bias and variance is warranted can be estimated by further calibrating the model and testing where its predictions do not bear out.

With very little data, there is very little we can do to gain sharp inferences other than provide more informative priors, which is well worth doing when prior information is available.

On the other hand, with more data, the models provide similar results (see the exercises), and in the limit, all of the models (other than complete pooling) converge to posteriors that are delta functions around the empirical chance of success (i.e., the maximum likelihood estimate). Meanwhile, Bayesian inference is allowing us to make more accurate predictions with the data available before we hit that asymptotic regime.

Exercises

  1. Generate fake data according to the pooling, no-pooling, and partial pooling models. Fit the model and consider the coverage of the posterior 80% intervals.

  2. Try generating data where each player has a different number of at-bats (trials) and then fitting the models. What effect does the number of initial trials have on the posterior? Is there a way to quantify the effect?

  3. In the section where we fit the complete pooling model we show a plot of the prior distribution on the probability of success \(\theta\) implied by the \(\mathsf{Normal}(-1,1)\) prior on the log-odds \(\alpha\). If \(\theta = \mathrm{logit}^{-1}(\alpha)\) and \(p(\alpha) = \mathsf{Normal}(\alpha \,|\, -1, 1)\), what is \(p(\theta)\)? For a hint, see here.

  4. How sensitive is the basic no-pooling model to the choice of prior? We used a somewhat informative prior due to our knowledge of baseball, but the prior could be made more or less informative. How, if at all, does this affect posterior inference?

  5. What are some other test statistics that might be used to evaluate our model fit to data? Try some out using pp_check(model, plotfun="stat", stat = "my_test"), where my_test is your function that computes the test statistic. For example, to check the 25% quantile you could first define a function q25 <- function(x) quantile(x, 0.25) and then call pp_check(model, plotfun = "stat", stat = "q25").

  6. Discuss the difference between batting average and on-base percentage as random variables. Consider particularly the denominator (at-bat versus plate appearance). Is the denominator in these kinds of problems always a random variable itself? Why might this be important in inference?

References

  • Betancourt, M. and Girolami, M. (2015) Hamiltonian Monte Carlo for hierarchical models. Current Trends in Bayesian Methodology with Applications 79.

  • Efron, B. and Morris, C. (1975) Data analysis using Stein’s estimator and its generalizations. Journal of the American Statistical Association 70(350), 311–319. [ pdf]

  • Gelman, A., Carlin, J. B., Stern, H. S., Dunson, D. B., Vehtari, A., and Rubin, D. B. (2013) Bayesian Data Analysis, 3rd Edition. Chapman & Hall/CRC Press, London.

  • Gelman, A. and Hill, J. (2007) Data Analysis Using Regression and Multilevel-Hierarchical Models. Cambridge University Press, Cambridge, United Kingdom.

  • Gelman, A., Hill, J., and Yajima, M. (2012) Why we (usually) don’t have to worry about multiple comparisons. Journal of Research on Educational Effectiveness 5, 189–211. [ pdf]

  • Gneiting, T., Balabdaoui, F., and Raftery, A. E. (2007) Probabilistic forecasts, calibration and sharpness. Journal of the Royal Statistical Society: Series B (Statistical Methodology), 69(2), 243–268.

  • Lunn, D., Jackson, C., Best, N., Thomas, A., and Spiegelhalter, D. (2013) The BUGS Book: A Practical Introduction to Bayesian Analysis. Chapman & Hall/CRC Press.

  • Neal, R. M. (2003) Slice sampling. Annals of Statistics 31(3):705–767.

  • Papaspiliopoulos, O., Roberts, G. O., and Skold, M. (2003) Non-centered parameterisations for hierarchical models and data augmentation. In Bayesian Statistics 7: Proceedings of the Seventh Valencia International Meeting, edited by Bernardo, J. M., Bayarri, M. J., Berger, J. O., Dawid, A. P., Heckerman, D., Smith, A. F. M., and West, M. Oxford University Press, Chicago.

  • Plummer, M., Best, N., Cowles, K., & Vines, K. (2006). CODA: Convergence diagnosis and output analysis for MCMC. R News, 6(1), 7–11.

  • Spiegelhalter, D., Thomas, A., Best, N., & Gilks, W. (1996) BUGS 0.5 Examples. MRC Biostatistics Unit, Institute of Public health, Cambridge, UK.

  • Stan Development Team (2015) Stan Modeling Language User’s Guide and Reference Manual. [web page]

  • Tarone, R. E. (1982) The use of historical control information in testing for a trend in proportions. Biometrics 38(1):215–220.

  • Vehtari, A, Gelman, A., & Gabry, J. (2016) Practical Bayesian model evaluation using leave-one-out cross-validation and WAIC. [ pdf]

Additional Data Sets

The following additional data sets have a similar structure to the baseball data used in this vignette and are included with rstanarm.

Rat tumors (N = 71)

Tarone (1982) provides a data set of tumor incidence in historical control groups of rats; specifically endometrial stromal polyps in female lab rats of type F344. The data set is taken from the book site for (Gelman et al. 2013):

Surgical mortality (N = 12)

Spiegelhalter et al. (1996) provide a data set of mortality rates in 12 hospitals performing cardiac surgery in babies. We just manually entered the data from the paper; it is also available in the Stan example models repository in R format.

  • To load: data(mortality, package = "rstanarm")
  • Data source: Unknown

Baseball hits 1996 AL (N = 308)

Carpenter (2009) updates Efron and Morris’s (1975) data set for the entire set of players for the entire 2006 American League season of Major League Baseball. The data was originally downloaded from the seanlahman.com, which is currently not working.

rstanarm/inst/doc/continuous.html0000644000176200001440000265674714551550300016750 0ustar liggesusers Estimating Generalized Linear Models for Continuous Data with rstanarm

Estimating Generalized Linear Models for Continuous Data with rstanarm

Jonah Gabry and Ben Goodrich

2024-01-16

Introduction

This vignette explains how to estimate linear and generalized linear models (GLMs) for continuous response variables using the stan_glm function in the rstanarm package. For GLMs for discrete outcomes see the vignettes for binary/binomial and count outcomes.

The four steps of a Bayesian analysis are

  1. Specify a joint distribution for the outcome(s) and all the unknowns, which typically takes the form of a marginal prior distribution for the unknowns multiplied by a likelihood for the outcome(s) conditional on the unknowns. This joint distribution is proportional to a posterior distribution of the unknowns conditional on the observed data
  2. Draw from posterior distribution using Markov Chain Monte Carlo (MCMC).
  3. Evaluate how well the model fits the data and possibly revise the model.
  4. Draw from the posterior predictive distribution of the outcome(s) given interesting values of the predictors in order to visualize how a manipulation of a predictor affects (a function of) the outcome(s).

This vignette primarily focuses on Steps 1 and 2 when the likelihood is the product of conditionally independent continuous distributions. Steps 3 and 4 are covered in more depth by the vignette entitled “How to Use the rstanarm Package”, although this vignette does also give a few examples of model checking and generating predictions.

Likelihood

In the simplest case a GLM for a continuous outcome is simply a linear model and the likelihood for one observation is a conditionally normal PDF \[\frac{1}{\sigma \sqrt{2 \pi}} e^{-\frac{1}{2} \left(\frac{y - \mu}{\sigma}\right)^2},\] where \(\mu = \alpha + \mathbf{x}^\top \boldsymbol{\beta}\) is a linear predictor and \(\sigma\) is the standard deviation of the error in predicting the outcome, \(y\).

More generally, a linear predictor \(\eta = \alpha + \mathbf{x}^\top \boldsymbol{\beta}\) can be related to the conditional mean of the outcome via a link function \(g\) that serves as a map between the range of values on which the outcome is defined and the space on which the linear predictor is defined. For the linear model described above no transformation is needed and so the link function is taken to be the identity function. However, there are cases in which a link function is used for Gaussian models; the log link, for example, can be used to log transform the (conditional) expected value of the outcome when it is constrained to be positive.

Like the glm function, the stan_glm function uses R’s family objects. The family objects for continuous outcomes compatible with stan_glm are the gaussian, Gamma, and inverse.gaussian distributions. All of the link functions provided by these family objects are also compatible with stan_glm. For example, for a Gamma GLM, where we assume that observations are conditionally independent Gamma random variables, common link functions are the log and inverse links.

Regardless of the distribution and link function, the likelihood for the entire sample is the product of the likelihood contributions of the individual observations.

Priors

A full Bayesian analysis requires specifying prior distributions \(f(\alpha)\) and \(f(\boldsymbol{\beta})\) for the intercept and vector of regression coefficients. When using stan_glm, these distributions can be set using the prior_intercept and prior arguments. The stan_glm function supports a variety of prior distributions, which are explained in the rstanarm documentation (help(priors, package = 'rstanarm')).

As an example, suppose we have \(K\) predictors and believe — prior to seeing the data — that \(\alpha, \beta_1, \dots, \beta_K\) are as likely to be positive as they are to be negative, but are highly unlikely to be far from zero. These beliefs can be represented by normal distributions with mean zero and a small scale (standard deviation). To give \(\alpha\) and each of the \(\beta\)s this prior (with a scale of 1, say), in the call to stan_glm we would include the arguments prior_intercept = normal(0,1) and prior = normal(0,1).

If, on the other hand, we have less a priori confidence that the parameters will be close to zero then we could use a larger scale for the normal distribution and/or a distribution with heavier tails than the normal like the Student t distribution. Step 1 in the “How to Use the rstanarm Package” vignette discusses one such example.

Posterior

With independent prior distributions, the joint posterior distribution for \(\alpha\) and \(\boldsymbol{\beta}\) is proportional to the product of the priors and the \(N\) likelihood contributions:

\[f\left(\boldsymbol{\beta} | \mathbf{y},\mathbf{X}\right) \propto f\left(\alpha\right) \times \prod_{k=1}^K f\left(\beta_k\right) \times \prod_{i=1}^N {f(y_i|\eta_i)},\]

where \(\mathbf{X}\) is the matrix of predictors and \(\eta\) the linear predictor. This is the posterior distribution that stan_glm will draw from when using MCMC.

Linear Regression Example

The stan_lm function, which has its own vignette, fits regularized linear models using a novel means of specifying priors for the regression coefficients. Here we focus using the stan_glm function, which can be used to estimate linear models with independent priors on the regression coefficients.

To illustrate the usage of stan_glm and some of the post-processing functions in the rstanarm package we’ll use a simple example from Chapter 3 of Gelman and Hill (2007):

We shall fit a series of regressions predicting cognitive test scores of three- and four-year-old children given characteristics of their mothers, using data from a survey of adult American women and their children (a subsample from the National Longitudinal Survey of Youth).

Using two predictors – a binary indicator for whether the mother has a high-school degree (mom_hs) and the mother’s score on an IQ test (mom_iq) – we will fit four contending models. The first two models will each use just one of the predictors, the third will use both, and the fourth will also include a term for the interaction of the two predictors.

For these models we’ll use the default weakly informative priors for stan_glm, which are currently set to normal(0,10) for the intercept and normal(0,5) for the other regression coefficients. For an overview of the many other available prior distributions see help("prior", package = "rstanarm").

stan_glm
 family:       gaussian [identity]
 formula:      kid_score ~ mom_hs + mom_iq + mom_hs:mom_iq
 observations: 434
 predictors:   4
------
              Median MAD_SD
(Intercept)   -10.1   13.3 
mom_hs         49.6   15.1 
mom_iq          1.0    0.1 
mom_hs:mom_iq  -0.5    0.2 

Auxiliary parameter(s):
      Median MAD_SD
sigma 18.0    0.6  

------
* For help interpreting the printed output see ?print.stanreg
* For info on the priors used see ?prior_summary.stanreg

Following Gelman and Hill’s example, we make some plots overlaying the estimated regression lines on the data.

There several ways we could add the uncertainty in our estimates to the plot. One way is to also plot the estimated regression line at each draw from the posterior distribution. To do this we can extract the posterior draws from the fitted model object using the as.matrix or as.data.frame methods:

For the second model we can make the same plot but the x-axis will show the continuous predictor mom_iq:

For the third and fourth models, each of which uses both predictors, we can plot the continuous mom_iq on the x-axis and use color to indicate which points correspond to the different subpopulations defined by mom_hs. We also now plot two regression lines, one for each subpopulation:

Model comparison

One way we can compare the four contending models is to use an approximation to Leave-One-Out (LOO) cross-validation, which is implemented by the loo function in the loo package:

      elpd_diff se_diff
post4   0.0       0.0  
post3  -3.5       2.8  
post2  -6.2       4.1  
post1 -42.4       8.7  

In this case the fourth model is preferred as it has the highest expected log predicted density (elpd_loo) or, equivalently, the lowest value of the LOO Information Criterion (looic). The fourth model is preferred by a lot over the first model

      elpd_diff se_diff
post4   0.0       0.0  
post1 -42.4       8.7  

because the difference in elpd is so much larger than the standard error. However, the preference of the fourth model over the others isn’t as strong:

      elpd_diff se_diff
post4  0.0       0.0   
post3 -3.5       2.8   
      elpd_diff se_diff
post4  0.0       0.0   
post2 -6.2       4.1   

The posterior predictive distribution

The posterior predictive distribution is the distribution of the outcome implied by the model after using the observed data to update our beliefs about the unknown parameters. When simulating observations from the posterior predictive distribution we use the notation \(y^{\rm rep}\) (for replicate) when we use the same observations of \(X\) that were used to estimate the model parameters. When \(X\) contains new observations we use the notation \(\tilde{y}\) to refer to the posterior predictive simulations.

Simulating data from the posterior predictive distribution using the observed predictors is useful for checking the fit of the model. Drawing from the posterior predictive distribution at interesting values of the predictors also lets us visualize how a manipulation of a predictor affects (a function of) the outcome(s).

Graphical posterior predictive checks

The pp_check function generates a variety of plots comparing the observed outcome \(y\) to simulated datasets \(y^{\rm rep}\) from the posterior predictive distribution using the same observations of the predictors \(X\) as we used to fit the model. He we show a few of the possible displays. The documentation at help("pp_check.stanreg", package = "rstanarm") has details on all of the pp_check options.

First we’ll look at a plot directly comparing the distributions of \(y\) and \(y^{\rm rep}\). The following call to pp_check will create a plot juxtaposing the histogram of \(y\) and histograms of five \(y^{\rm rep}\) datasets:

The idea is that if the model is a good fit to the data we should be able to generate data \(y^{\rm rep}\) from the posterior predictive distribution that looks a lot like the observed data \(y\). That is, given \(y\), the \(y^{\rm rep}\) we generate should be plausible.

Another useful plot we can make using pp_check shows the distribution of a test quantity \(T(y^{\rm rep})\) compared to \(T(y)\), the value of the quantity in the observed data. When the argument plotfun = "stat" is specified, pp_check will simulate \(S\) datasets \(y_1^{\rm rep}, \dots, y_S^{\rm rep}\), each containing \(N\) observations. Here \(S\) is the size of the posterior sample (the number of MCMC draws from the posterior distribution of the model parameters) and \(N\) is the length of \(y\). We can then check if \(T(y)\) is consistent with the distribution of \(\left(T(y_1^{\rm yep}), \dots, T(y_S^{\rm yep})\right)\). In the plot below we see that the mean of the observations is plausible when compared to the distribution of the means of the \(S\) \(y^{\rm rep}\) datasets:

Using plotfun="stat_2d" we can also specify two test quantities and look at a scatterplot:

Generating predictions

The posterior_predict function is used to generate replicated data \(y^{\rm rep}\) or predictions for future observations \(\tilde{y}\). Here we show how to use posterior_predict to generate predictions of the outcome kid_score for a range of different values of mom_iq and for both subpopulations defined by mom_hs.

[1] 4000   13

We now have two matrices, y_nohs and y_hs. Each matrix has as many columns as there are values of IQ_SEQ and as many rows as the size of the posterior sample. One way to show the predictors is to plot the predictions for the two groups of kids side by side:

Gamma Regression Example

Gamma regression is often used when the response variable is continuous and positive, and the coefficient of variation (rather than the variance) is constant.

We’ll use one of the standard examples of Gamma regression, which is taken from McCullagh & Nelder (1989). This example is also given in the documentation for R’s glm function. The outcome of interest is the clotting time of blood (in seconds) for “normal plasma diluted to nine different percentage concentrations with prothrombin-free plasma; clotting was induced by two lots of thromboplastin” (p. 300).

The help page for R’s glm function presents the example as follows:

To fit the analogous Bayesian models we can simply substitute stan_glm for glm above. However, instead of fitting separate models we can also reshape the data slightly and fit a model interacting lot with plasma concentration:

stan_glm
 family:       Gamma [inverse]
 formula:      clot_time ~ log_plasma * lot_id
 observations: 18
 predictors:   4
------
                   Median MAD_SD
(Intercept)        -0.016  0.006
log_plasma          0.015  0.003
lot_id2            -0.007  0.013
log_plasma:lot_id2  0.008  0.006

Auxiliary parameter(s):
      Median MAD_SD
shape 7.931  2.782 

------
* For help interpreting the printed output see ?print.stanreg
* For info on the priors used see ?prior_summary.stanreg

In the output above, the estimate reported for shape is for the shape parameter of the Gamma distribution. The reciprocal of the shape parameter can be interpreted similarly to what summary.glm refers to as the dispersion parameter.

References

Gelman, A. and Hill, J. (2007). Data Analysis Using Regression and Multilevel/Hierarchical Models. Cambridge University Press, Cambridge, UK.

McCullagh, P. and Nelder, J. A. (1989). Generalized Linear Models. Chapman and Hall/CRC Press, New York.

rstanarm/inst/doc/priors.Rmd0000644000176200001440000003741014370470372015621 0ustar liggesusers--- title: "Prior Distributions for rstanarm Models" author: "Jonah Gabry and Ben Goodrich" date: "`r Sys.Date()`" output: html_vignette: toc: yes --- ```{r, child="children/SETTINGS-knitr.txt"} ``` ```{r, child="children/SETTINGS-gg.txt"} ``` # July 2020 Update As of July 2020 there are a few changes to prior distributions: * Except for in default priors, `autoscale` now defaults to `FALSE`. This means that when specifying custom priors you no longer need to manually set `autoscale=FALSE` every time you use a distribution. * There are minor changes to the default priors on the intercept and (non-hierarchical) regression coefficients. See **Default priors and scale adjustments** below. We recommend the new book [Regression and Other Stories](https://avehtari.github.io/ROS-Examples/), which discusses the background behind the default priors in **rstanarm** and also provides examples of specifying non-default priors. # Introduction This vignette provides an overview of how the specification of prior distributions works in the __rstanarm__ package. It is still a work in progress and more content will be added in future versions of __rstanarm__. Before reading this vignette it is important to first read the [How to Use the __rstanarm__ Package](rstanarm.html) vignette, which provides a general overview of the package. Every modeling function in __rstanarm__ offers a subset of the arguments in the table below which are used for specifying prior distributions for the model parameters.
| Argument | Used in | Applies to | | ------------- | ------------- | ------------- | | `prior_intercept` | All modeling functions except `stan_polr` and `stan_nlmer`| Model intercept, after centering predictors.| | `prior` | All modeling functions| Regression coefficients. Does _not_ include coefficients that vary by group in a multilevel model (see `prior_covariance`).| | `prior_aux` | `stan_glm`\*, `stan_glmer`\*, `stan_gamm4`, `stan_nlmer`| Auxiliary parameter, e.g. error SD (interpretation depends on the GLM).| | `prior_covariance` | `stan_glmer`\*, `stan_gamm4`, `stan_nlmer`| Covariance matrices in multilevel models with varying slopes and intercepts. See the [`stan_glmer` vignette](https://mc-stan.org/rstanarm/articles/glmer.html) for details on this prior.| \* `stan_glm` also implies `stan_glm.nb`. `stan_glmer` implies `stan_lmer` and `stan_glmer.nb`.
The `stan_polr`, `stan_betareg`, and `stan_gamm4` functions also provide additional arguments specific only to those models: | Argument | Used only in | Applies to | | ------------- | ------------- | ------------- | | `prior_smooth` | `stan_gamm4` | Prior for hyperparameters in GAMs (lower values yield less flexible smooth functions). | | `prior_counts` | `stan_polr` | Prior counts of an _ordinal_ outcome (when predictors at sample means). | | `prior_z` | `stan_betareg`| Coefficients in the model for `phi`.| | `prior_intercept_z` | `stan_betareg`| Intercept in the model for `phi`. | | `prior_phi` | `stan_betareg`| `phi`, if not modeled as function of predictors. |
To specify these arguments the user provides a call to one of the various available functions for specifying priors (e.g., `prior = normal(0, 1)`, `prior = cauchy(c(0, 1), c(1, 2.5))`). The documentation for these functions can be found at `help("priors")`. The __rstanarm__ documentation and the other [vignettes](index.html) provide many examples of using these arguments to specify priors and the documentation for these arguments on the help pages for the various __rstanarm__ modeling functions (e.g., `help("stan_glm")`) also explains which distributions can be used when specifying each of the prior-related arguments.
# Default (Weakly Informative) Prior Distributions With very few exceptions, the default priors in __rstanarm__ ---the priors used if the arguments in the tables above are untouched--- are _not_ flat priors. Rather, the defaults are intended to be _weakly informative_. That is, they are designed to provide moderate regularization and help stabilize computation. For many (if not most) applications the defaults will perform well, but this is not guaranteed (there are no default priors that make sense for every possible model specification). The way __rstanarm__ attempts to make priors weakly informative by default is to internally adjust the scales of the priors. How this works (and, importantly, how to turn it off) is explained below, but first we can look at the default priors in action by fitting a basic linear regression model with the `stan_glm` function. For specifying priors, the `stan_glm` function accepts the arguments `prior_intercept`, `prior`, and `prior_aux`. To use the default priors we just leave those arguments at their defaults (i.e., we don't specify them): ```{r, default-prior-1, results="hide"} library("rstanarm") default_prior_test <- stan_glm(mpg ~ wt + am, data = mtcars, chains = 1) ``` The `prior_summary` function provides a concise summary of the priors used: ```{r, default-prior-summary} prior_summary(default_prior_test) ``` ```{r, echo=FALSE} priors <- prior_summary(default_prior_test) fr2 <- function(x) format(round(x, 2), nsmall = 2) ``` Starting from the bottom up, we can see that: * __Auxiliary__: `sigma`, the error standard deviation, has a default prior that is $\mathsf{exponential}(1)$. However, as a result of the automatic rescaling, the actual scale used was 6.03. * __Coefficients__: By default the regression coefficients (in this case the coefficients on the `wt` and `am` variables) are treated as a priori independent with normal priors centered at 0 and with scale (standard deviation) $2.5$. Like for `sigma`, in order for the default to be weakly informative __rstanarm__ will adjust the scales of the priors on the coefficients. As a result, the prior scales actually used were 15.40 and 30.20. * __Intercept__: For the intercept, the default prior is normal with mean $0$ and standard deviation $2.5$, but in this case the standard deviation was adjusted to 15.07. There is also a note in parentheses informing you that the prior applies to the intercept after all predictors have been centered (a similar note can be found in the documentation of the `prior_intercept` argument). In many cases the value of $y$ when $x=0$ is not meaningful and it is easier to think about the value when $x = \bar{x}$. Therefore placing a prior on the intercept after centering the predictors typically makes it easier to specify a reasonable prior for the intercept. (Note: the user does _not_ need to manually center the predictors.) To disable the centering of the predictors, you need to omit the intercept from the model `formula` and include a column of ones as a predictor (which cannot be named `"(Intercept)"` in the `data.frame`). Then you can specify a prior "coefficient" for the column of ones. The next two subsections describe how the rescaling works and how to easily disable it if desired. ### Default priors and scale adjustments Automatic scale adjustments happen in two cases: 1. When the default priors are used. 2. When the user sets `autoscale=TRUE` when specifying their own prior (e.g., `normal(0, 3, autoscale=TRUE)`). See `help("priors")` for a list of distributions to see which have an `autoscale` argument. Here we describe how the default priors work for the intercept, regression coefficients, and (if applicable) auxiliary parameters. Autoscaling when not using default priors works analogously (if `autoscale=TRUE`). Assume we have outcome $y$ and predictors $x_1,\ldots,x_k$ and our model has linear predictor $$ \alpha + \beta_1 x_1 + \dots + \beta_K x_K. $$ #### Regression coefficients The default prior on regression coefficients $\beta_k$ is $$ \beta_k \sim \mathsf{Normal}(0, \, 2.5 \cdot s_y/s_x) $$ where $s_x = \text{sd}(x)$ and $$ s_y = \begin{cases} \text{sd}(y) & \text{if } \:\: {\tt family=gaussian(link)}, \\ 1 & \text{otherwise}. \end{cases} $$ This corresponds to `prior = normal(0, 2.5, autoscale = TRUE)` in **rstanarm** code. #### Intercept The intercept is assigned a prior indirectly. The `prior_intercept` argument refers to the intercept after all predictors have been centered (internally by **rstanarm**). That is, instead of placing the prior on the expected value of $y$ when $x=0$, we place a prior on the expected value of $y$ when $x = \bar{x}$. The default prior for this centered intercept, say $\alpha_c$, is $$ \alpha_c \sim \mathsf{Normal}(m_y, \, 2.5 \cdot s_y) $$ where $$ m_y = \begin{cases} \bar{y} & \text{if } \:\: {\tt family=gaussian(link="identity")}, \\ 0 & \text{otherwise} \end{cases} $$ and $s_y$ is the same as above (either 1 or $\text{sd(y)}$). #### Auxiliary parameters The default prior on the auxiliary parameter (residual standard deviation for Gaussian, shape for gamma, reciprocal dispersion for negative binomial, etc.) is an exponential distribution with rate $1/s_y$ $$ \text{aux} \sim \mathsf{Exponential}(1/s_y) $$ where $s_y$ is the same as above (either 1 or $\text{sd(y)}$). This corresponds to `prior_aux = exponential(1, autoscale=TRUE)` in **rstanarm** code. #### Note on data-based priors Because the scaling is based on the scales of the predictors (and possibly the outcome) these are technically data-dependent priors. However, since these priors are quite wide (and in most cases rather conservative), the amount of information used is weak and mainly takes into account the order of magnitude of the variables. This enables __rstanarm__ to offer defaults that are reasonable for many models. ### Disabling prior scale adjustments To disable automatic rescaling simply specify a prior other than the default. **rstanarm** versions up to and including version `2.19.3` used to require you to explicitly set the `autoscale` argument to `FALSE`, but now autoscaling only happens by default for the default priors. To use autoscaling with manually specified priors you have to set `autoscale = TRUE`. For example, this prior specification will not include any autoscaling: ```{r, no-autoscale, results="hide"} test_no_autoscale <- update( default_prior_test, prior = normal(0, 5), prior_intercept = student_t(4, 0, 10), prior_aux = cauchy(0, 3) ) ``` We can verify that the prior scales weren't adjusted by checking `prior_summary`: ```{r, no-autoscale-prior-summary} prior_summary(test_no_autoscale) ```
# How to Specify Flat Priors (and why you typically shouldn't) ### Uninformative is usually unwarranted and unrealistic (flat is frequently frivolous and fictional) When "non-informative" or "uninformative" is used in the context of prior distributions, it typically refers to a flat (uniform) distribution or a nearly flat distribution. Sometimes it may also be used to refer to the parameterization-invariant Jeffreys prior. Although __rstanarm__ does not prevent you from using very diffuse or flat priors, unless the data is very strong it is wise to avoid them. Rarely is it appropriate in any applied setting to use a prior that gives the same (or nearly the same) probability mass to values near zero as it gives values bigger than the age of the universe in nanoseconds. Even a much narrower prior than that, e.g., a normal distribution with $\sigma = 500$, will tend to put much more probability mass on unreasonable parameter values than reasonable ones. In fact, using the prior $\theta \sim \mathsf{Normal(\mu = 0, \sigma = 500)}$ implies some strange prior beliefs. For example, you believe a priori that $P(|\theta| < 250) < P(|\theta| > 250)$, which can easily be verified by doing the calculation with the normal CDF ```{r} p <- 1 - 2 * pnorm(-250, mean = 0, sd = 500) print(paste("Pr(-250 < theta < 250) =", round(p, 2))) ``` or via approximation with Monte Carlo draws: ```{r, fig.cap="_There is much more probability mass outside the interval (-250, 250)._"} theta <- rnorm(1e5, mean = 0, sd = 500) p_approx <- mean(abs(theta) < 250) print(paste("Pr(-250 < theta < 250) =", round(p_approx, 2))) d <- data.frame(theta, clr = abs(theta) > 250) library(ggplot2) ggplot(d, aes(x = theta, fill = clr)) + geom_histogram(binwidth = 5, show.legend = FALSE) + scale_y_continuous(name = "", labels = NULL, expand = c(0,0)) + scale_x_continuous(name = expression(theta), breaks = c(-1000, -250, 250, 1000)) ```
This will almost never correspond to the prior beliefs of a researcher about a parameter in a well-specified applied regression model and yet priors like $\theta \sim \mathsf{Normal(\mu = 0, \sigma = 500)}$ (and more extreme) remain quite popular. Even when you know very little, a flat or very wide prior will almost never be the best approximation to your beliefs about the parameters in your model that you can express using __rstanarm__ (or other software). _Some_ amount of prior information will be available. For example, even if there is nothing to suggest a priori that a particular coefficient will be positive or negative, there is almost always enough information to suggest that different orders of magnitude are not equally likely. Making use of this information when setting a prior scale parameter is simple ---one heuristic is to set the scale an order of magnitude bigger than you suspect it to be--- and has the added benefit of helping to stabilize computations. A more in-depth discussion of non-informative vs weakly informative priors is available in the case study [_How the Shape of a Weakly Informative Prior Affects Inferences_](https://mc-stan.org/users/documentation/case-studies/weakly_informative_shapes.html). ### Specifying flat priors __rstanarm__ will use flat priors if `NULL` is specified rather than a distribution. For example, to use a flat prior on regression coefficients you would specify `prior=NULL`: ```{r, flat-prior-1, echo=FALSE, results="hide"} flat_prior_test <- stan_glm(mpg ~ wt, data = mtcars, prior = NULL, iter = 10, chains = 1) ``` ```{r, flat-prior-2, eval=FALSE} flat_prior_test <- stan_glm(mpg ~ wt, data = mtcars, prior = NULL) ``` In this case we let __rstanarm__ use the default priors for the intercept and error standard deviation (we could change that if we wanted), but the coefficient on the `wt` variable will have a flat prior. To double check that indeed a flat prior was used for the coefficient on `wt` we can call `prior_summary`: ```{r, flat-prior-summary} prior_summary(flat_prior_test) ```
# Informative Prior Distributions Although the default priors tend to work well, prudent use of more informative priors is encouraged. For example, suppose we have a linear regression model $$y_i \sim \mathsf{Normal}\left(\alpha + \beta_1 x_{1,i} + \beta_2 x_{2,i}, \, \sigma\right)$$ and we have evidence (perhaps from previous research on the same topic) that approximately $\beta_1 \in (-15, -5)$ and $\beta_2 \in (-1, 1)$. An example of an informative prior for $\boldsymbol{\beta} = (\beta_1, \beta_2)'$ could be $$ \boldsymbol{\beta} \sim \mathsf{Normal} \left( \begin{pmatrix} -10 \\ 0 \end{pmatrix}, \begin{pmatrix} 5^2 & 0 \\ 0 & 2^2 \end{pmatrix} \right), $$ which sets the prior means at the midpoints of the intervals and then allows for some wiggle room on either side. If the data are highly informative about the parameter values (enough to overwhelm the prior) then this prior will yield similar results to a non-informative prior. But as the amount of data and/or the signal-to-noise ratio decrease, using a more informative prior becomes increasingly important. If the variables `y`, `x1`, and `x2` are in the data frame `dat` then this model can be specified as ```{r, eval=FALSE} my_prior <- normal(location = c(-10, 0), scale = c(5, 2)) stan_glm(y ~ x1 + x2, data = dat, prior = my_prior) ``` We left the priors for the intercept and error standard deviation at their defaults, but informative priors can be specified for those parameters in an analogous manner. rstanarm/inst/doc/polr.html0000644000176200001440000025544714551551737015523 0ustar liggesusers Estimating Ordinal Regression Models with rstanarm

Estimating Ordinal Regression Models with rstanarm

Jonah Gabry and Ben Goodrich

2024-01-16

Introduction

This vignette explains how to estimate models for ordinal outcomes using the stan_polr function in the rstanarm package.

The four steps of a Bayesian analysis are

  1. Specify a joint distribution for the outcome(s) and all the unknowns, which typically takes the form of a marginal prior distribution for the unknowns multiplied by a likelihood for the outcome(s) conditional on the unknowns. This joint distribution is proportional to a posterior distribution of the unknowns conditional on the observed data
  2. Draw from posterior distribution using Markov Chain Monte Carlo (MCMC).
  3. Evaluate how well the model fits the data and possibly revise the model.
  4. Draw from the posterior predictive distribution of the outcome(s) given interesting values of the predictors in order to visualize how a manipulation of a predictor affects (a function of) the outcome(s).

Steps 3 and 4 are covered in more depth by the vignette entitled “How to Use the rstanarm Package”. This vignette focuses on Step 1.

One of the strengths of doing MCMC with Stan — as opposed to a Gibbs sampler — is that reparameterizations are essentially costless, which allows the user to specify priors on parameters that are either more intuitive, numerically stable, or computationally efficient without changing the posterior distribution of the parameters that enter the likelihood. Advantageous parameterizations are already built into the Stan programs used in the rstanarm package, so it is just a matter of using these vignettes to explain how the priors work in the context of these reparameterizations.

Likelihood

Ordinal outcomes fall in one of \(J\) categories. One way to motivate an ordinal model is to introduce a latent variable, \(y^\ast\), that is related to the observed outcomes via an observation mechanism: \[y=\begin{cases} 1 & \mbox{if }y^{\ast}<\zeta_{1}\\ 2 & \mbox{if }\zeta_{1}\leq y^{\ast}<\zeta_{2}\\ \vdots\\ J & \mbox{if }\zeta_{J-1}\leq y^{\ast} \end{cases},\] where \(\boldsymbol{\zeta}\) is a vector of cutpoints of length \(J-1\).

Then \(y^\ast\) is modeled as a linear function of \(K\) predictors \[y^\ast = \mu + \epsilon = \mathbf{x}^\top \boldsymbol{\beta} + \epsilon,\] where \(\epsilon\) has mean zero and unit scale but can be specified as being drawn from one of several distributions. Note that there is no “intercept” in this model since the data cannot distinguish an intercept from the cutpoints. However, if \(J = 2\), then \(\zeta_1\) can be referred to as either the cutpoint or the intercept.

A Bayesian can treat \(y^\ast\) as another unknown parameter, although for computational efficiency the Stan code essentially integrates each \(y^\ast\) out of the posterior distribution, leaving the posterior distribution of \(\boldsymbol{\beta}\) and \(\boldsymbol{\zeta}\). Nevertheless, it is useful to motivate the model theoretically as if \(y^\ast\) were just an unknown parameter with a distribution truncated by the relevant element(s) of \(\boldsymbol{\zeta}\).

Priors

If \(y^\ast\) were observed we would simply have a linear regression model for it, and the description of the priors in the vignette entitled “Estimating Linear Models with the rstanarm Package” would apply directly. Another way to say the same thing is conditional on a realization of \(y^\ast\), we have a linear regression model and the description of the priors in the other vignette does apply (and should be read before continuing with this subsection).

The stan_lm function essentially specifies a prior on \(\boldsymbol{\theta} = \mathbf{R}^{-1} \boldsymbol{\beta}\), where \(\mathbf{R}\) is the upper triangular matrix in the QR decomposition of the design matrix, \(\mathbf{X} = \mathbf{Q} \mathbf{R}\). Furthermore, in stan_lm, \(\sigma_{\epsilon} = \sigma_y \sqrt{1 - R^2}\) where \(R^2\) is the proportion of variance in the outcome that is attributable to the coefficients in a linear model.

The main difference in the context of a model for an ordinal outcome is that the scale of \(y^\ast\) is not identified by the data. Thus, the ordinal model specifies that \(\sigma_{\epsilon} = 1\), which implies that \(\sigma_{y^\ast} = 1 / \sqrt{1 - R^2}\) is an intermediate parameter rather than a primitive parameter.

It is somewhat more difficult to specify a prior value for the \(R^2\) in an ordinal model because \(R^2\) refers to the proportion of variance in the \(y^\ast\) that is attributable to the predictors under a linear model. In general, the \(R^2\) tends to be lower in an ordinal model than in a linear model where the continuous outcome is observed.

The other difference is that an ordinal model does not have a global intercept but rather a vector of \(J-1\) cutpoints. The implied prior on these cutpoints used by the rstanarm package is somewhat novel. The user instead specifies a Dirichlet prior on \(\Pr\left(y=j \, \left.\right| \, \overline{\mathbf{x}} \right)\), which is to say the prior probability of the outcome falling in each of the \(J\) categories given that the predictors are at their sample means. The Dirichlet prior is for a simplex random variable, whose elements are non-negative and sum to \(1\). The Dirichlet PDF can be written as \[f\left(\boldsymbol{\pi}|\boldsymbol{\alpha}\right) \propto \prod_{j=1}^J{\pi_j^{\alpha_j - 1}}, \] where \(\boldsymbol{\pi}\) is a simplex vector such that \(\pi_j = \Pr\left(y=j \, \left.\right| \, \overline{\mathbf{x}} \right)\).

The Dirichlet prior is one of the easiest to specify because the so-called “concentration” hyperparameters \(\boldsymbol{\alpha}\) can be interpreted as prior counts, i.e., prior observations for each of the J categories (although they need not be integers). If \(\alpha_j = 1\) for every \(j\) (the default used by rstanarm) then the Dirichlet prior is jointly uniform over the space of these simplexes. This corresponds to a prior count of one observation falling in each of the \(J\) ordinal categories when the predictors are at their sample means and conveys the reasonable but weak prior information that no category has probability zero. If, for each \(j\), \(\alpha_j = \alpha > 1\) then the prior mode is that the \(J\) categories are equiprobable, with prior probability \(1/J\) of the outcome falling in each of the \(J\) categories. The larger the value of \(\alpha\) the more sharply peaked the distribution is at the mode.

The \(j\)-th cutpoint \(\zeta_j\) is then given by \[\zeta_j = F_{y^\ast}^{-1}\left(\sum_{i=1}^j{\pi_i}\right),\] where \(F_{y^\ast}^{-1}\) is an inverse CDF function, which depends on the assumed distribution of \(y^\ast\). Common choices include the normal and logistic distributions. The scale parameter of this distribution is again \(\sigma_{y^\ast} = 1/\sqrt{1 - R^2}\). In short, by making each \(\zeta_j\) a function of \(\boldsymbol{\pi}\), it allows us to specify a Dirichlet prior on \(\boldsymbol{\pi}\), which is simpler than specifying a prior on \(\boldsymbol{\zeta}\) directly.

Example

In this section, we start with an ordinal model of tobacco consumption as a function of age and alcohol consumption. Frequentist estimates can be obtained using the polr function in the MASS package:

Call:
polr(formula = tobgp ~ agegp + alcgp, data = esoph)

Coefficients:
agegp.L agegp.Q agegp.C agegp^4 agegp^5 alcgp.L alcgp.Q alcgp.C 
  -0.37   -0.38   -0.24    0.04   -0.04   -0.19   -0.02    0.03 

Intercepts:
0-9g/day|10-19    10-19|20-29      20-29|30+ 
          -1.0            0.2            1.3 

Residual Deviance: 241.8195 
AIC: 263.8195 

To obtain Bayesian estimates, we prepend stan_ and specify the priors:

stan_polr
 family:       ordered [logistic]
 formula:      tobgp ~ agegp + alcgp
 observations: 88
------
        Median MAD_SD
agegp.L -0.2    0.4  
agegp.Q -0.2    0.4  
agegp.C -0.1    0.3  
agegp^4  0.0    0.3  
agegp^5  0.0    0.3  
alcgp.L -0.1    0.3  
alcgp.Q  0.0    0.3  
alcgp.C  0.0    0.3  

Cutpoints:
               Median MAD_SD
0-9g/day|10-19 -1.0    0.2  
10-19|20-29     0.2    0.2  
20-29|30+       1.3    0.2  

------
* For help interpreting the printed output see ?print.stanreg
* For info on the priors used see ?prior_summary.stanreg

The point estimates, represented by the posterior medians, are qualitatively similar to the maximum-likelihood estimates but are somewhat shrunk toward zero due to the regularizing prior on the coefficients. Since these cutpoints are actually known, it would be more appropriate for the model to take that into account, but stan_polr does not currently support that.

Next, we utilize an example from the MASS package where low birthweight is the binary outcome of interest. First, we recode some of the variables:

It is usually a good idea to rescale variables by constants so that all the numbers are in single or double digits. We start by estimating a linear model for birthweight in kilograms, flipping the sign so that positive coefficients are associated with lower birthweights.

stan_lm
 family:       gaussian [identity]
 formula:      -bwt ~ smoke + age + race + ptl + ht + ftv
 observations: 189
 predictors:   8
------
            Median MAD_SD
(Intercept) -3.3    0.2  
smoke        0.4    0.1  
age          0.0    0.0  
raceblack    0.4    0.2  
raceother    0.4    0.1  
ptl          0.2    0.1  
ht           0.4    0.2  
ftv          0.0    0.0  

Auxiliary parameter(s):
              Median MAD_SD
R2            0.2    0.0   
log-fit_ratio 0.0    0.1   
sigma         0.7    0.0   

------
* For help interpreting the printed output see ?print.stanreg
* For info on the priors used see ?prior_summary.stanreg

Next, we estimate an “ordinal” model for the incidence of low birthweight, which is defined as a birth weight of less than \(2.5\) kilograms. Even though this outcome is binary, a binary variable is a special case of an ordinal variable with \(J=2\) categories and is acceptable to stan_polr. We can think of bwt as something proportional to \(y^\ast\) and pretend that it is not observed, forcing us to estimate an ordinal model.

This prior seems to have worked well in this case because none of the points in the plot are above \(0.5\), which would have indicated the the posterior is very sensitive to those observations. If we compare the estimated coefficients,

            Linear Ordinal Rescaled
(Intercept) -3.254  -0.535   -4.812
smoke        0.361   0.514    0.534
age         -0.003  -0.025   -0.005
raceblack    0.394   0.514    0.582
raceother    0.400   0.530    0.592
ptl          0.154   0.400    0.228
ht           0.368   0.696    0.544
ftv         -0.004  -0.005   -0.006

they have the same signs and similar magnitudes, with the exception of the “Intercept”. In an ordinal model where the outcome only has \(J=2\) categories, this “Intercept” is actually \(\zeta_1\), but it is more conventional to call it the “Intercept” so that it agrees with stan_glm when family = binomial(link = 'probit'). Recall that \(\sigma_{\epsilon} = 1\) in an ordinal model, so if we rescale the coefficients from a linear model by dividing by the posterior median of \(\sigma\), the resulting coefficients are even closer to those of the ordinal model.

This illustrates the fundamental similarity between a linear model for a continuous observed outcome and a linear model for a latent \(y^\ast\) that generates an ordinal observed outcome. The main difference is when the outcome is continuous and observed, we can estimate the scale of the errors meaningfully. When the outcome is ordinal, we can only fix the scale of the latent errors to \(1\) arbitrarily.

Finally, when \(J = 2\), the stan_polr function allows you to specify non-NULL values of the shape and rate arguments, which implies a “scobit” likelihood where the probability of success is given by \(F\left(y^\ast \right)^\alpha\), where \(F\left(\right)\) is the logistic CDF and \(\alpha > 0\) is a skewing parameter that has a gamma prior with a given shape and rate. If \(\alpha \neq 1\), then the relationship between \(y^\ast\) and the probability of success is asymmetric. In principle, it seems appropriate to estimate \(\alpha\) but in practice, a lot of data is needed to estimate \(\alpha\) with adequate precision. In the previous example, if we specify shape = 2 and rate = 2 to reflect the prior beliefs that \(\alpha\) is expected to be \(1\) but has a variance of \(\frac{1}{2}\), then the loo calculation yields many Pareto shape parameters that are excessively large. However, with more than \(189\) observations, such a model may be more fruitful.

Conclusion

The posterior distribution for an ordinal model requires priors on the coefficients and the cutpoints. The priors used by the stan_polr function are unconventional but should work well for a variety of problems. The prior on the coefficients is essentially the same as that used by the stan_lm function but omits a scale parameter because the standard deviation of the latent \(y^\ast\) is not identified by the data. The cutpoints are conditionally deterministic given a simplex vector for the probability of falling in each of the \(J\) ordinal categories given that the predictors are at their sample means. Thus, a Dirichlet prior — which is relatively easy to specify and has a good default of jointly uniform — on this simplex completes the posterior distribution.

This approach provides an alternative to stan_glm with family = binomial() even if the outcome variable has only two categories. The stan_glm function has more options for the prior on the coefficients and the prior on the intercept (which can be interpreted as the first cutpoint when \(J = 2\)). However, it may be more difficult to obtain efficient sampling with those priors.

rstanarm/inst/doc/rstanarm.R0000644000176200001440000001245614551551770015617 0ustar liggesusers## ---- SETTINGS-knitr, include=FALSE------------------------------------------- stopifnot(require(knitr)) opts_chunk$set( comment=NA, message = FALSE, warning = FALSE, eval = identical(Sys.getenv("NOT_CRAN"), "true"), dev = "png", dpi = 150, fig.asp = 0.618, fig.width = 5, out.width = "60%", fig.align = "center" ) ## ---- SETTINGS-gg, include=TRUE----------------------------------------------- library(ggplot2) library(bayesplot) theme_set(bayesplot::theme_default()) ## ----rstanarm-mle, eval = TRUE------------------------------------------------ data("womensrole", package = "HSAUR3") womensrole$total <- womensrole$agree + womensrole$disagree womensrole_glm_1 <- glm(cbind(agree, disagree) ~ education + gender, data = womensrole, family = binomial(link = "logit")) round(coef(summary(womensrole_glm_1)), 3) ## ----rstanarm-mcmc, results="hide", eval = TRUE------------------------------- library(rstanarm) womensrole_bglm_1 <- stan_glm(cbind(agree, disagree) ~ education + gender, data = womensrole, family = binomial(link = "logit"), prior = student_t(df = 7, 0, 5), prior_intercept = student_t(df = 7, 0, 5), cores = 2, seed = 12345) womensrole_bglm_1 ## ---- echo=FALSE, eval = TRUE------------------------------------------------- print(womensrole_bglm_1) ## ----rstanarm-ci, eval = TRUE------------------------------------------------- ci95 <- posterior_interval(womensrole_bglm_1, prob = 0.95, pars = "education") round(ci95, 2) ## ----rstanarm-methods, eval = TRUE-------------------------------------------- cbind(Median = coef(womensrole_bglm_1), MAD_SD = se(womensrole_bglm_1)) summary(residuals(womensrole_bglm_1)) # not deviance residuals cov2cor(vcov(womensrole_bglm_1)) ## ----rstanarm-shinystan, eval = FALSE----------------------------------------- # launch_shinystan(womensrole_bglm_1, ppd = FALSE) ## ----rstanarm-posterior_predict, eval = TRUE---------------------------------- y_rep <- posterior_predict(womensrole_bglm_1) dim(y_rep) ## ----rstanarm-criticism-plot, fig.width=8, out.width="90%", fig.cap="Posterior predictive boxplots vs. observed datapoints", eval = TRUE---- par(mfrow = 1:2, mar = c(5,3.7,1,0) + 0.1, las = 3) boxplot(sweep(y_rep[,womensrole$gender == "Male"], 2, STATS = womensrole$total[womensrole$gender == "Male"], FUN = "/"), axes = FALSE, main = "Male", pch = NA, xlab = "Years of Education", ylab = "Proportion of Agrees") with(womensrole, axis(1, at = education[gender == "Male"] + 1, labels = 0:20)) axis(2, las = 1) with(womensrole[womensrole$gender == "Male",], points(education + 1, agree / (agree + disagree), pch = 16, col = "red")) boxplot(sweep(y_rep[,womensrole$gender == "Female"], 2, STATS = womensrole$total[womensrole$gender == "Female"], FUN = "/"), axes = FALSE, main = "Female", pch = NA, xlab = "Years of Education", ylab = "") with(womensrole, axis(1, at = education[gender == "Female"] + 1, labels = 0:20)) with(womensrole[womensrole$gender == "Female",], points(education + 1, agree / (agree + disagree), pch = 16, col = "red")) ## ---- rstanarm-update, results="hide", eval = TRUE---------------------------- (womensrole_bglm_2 <- update(womensrole_bglm_1, formula. = . ~ . + I(education^2))) ## ---- echo=FALSE-------------------------------------------------------------- print(womensrole_bglm_2) ## ----rstanarm-loo, eval = TRUE------------------------------------------------ loo_bglm_1 <- loo(womensrole_bglm_1) loo_bglm_2 <- loo(womensrole_bglm_2) ## ----rstanarm-loo-plot, fig.width=7, out.width="70%", eval = TRUE------------- par(mfrow = 1:2, mar = c(5,3.8,1,0) + 0.1, las = 3) plot(loo_bglm_1, label_points = TRUE) plot(loo_bglm_2, label_points = TRUE) ## ---- rstanarm-loo-compare, eval = TRUE--------------------------------------- loo_compare(loo_bglm_1, loo_bglm_2) ## ---- rstanarm-loo-print, eval = TRUE----------------------------------------- loo_bglm_1 ## ---- rstanarm-posterior_predict-manipulate, eval = TRUE---------------------- # note: in newdata we want agree and disagree to sum to the number of people we # want to predict for. the values of agree and disagree don't matter so long as # their sum is the desired number of trials. we need to explicitly imply the # number of trials like this because our original data are aggregate. if we had # bernoulli data then it would be a given we wanted to predict for single # individuals. newdata <- data.frame(agree = c(0,0), disagree = c(100,100), education = c(12,16), gender = factor("Female", levels = c("Male", "Female"))) y_rep <- posterior_predict(womensrole_bglm_2, newdata) summary(apply(y_rep, 1, diff)) ## ---- rstanarm-rhat-fit, results='hide', warning=TRUE, eval = TRUE------------ bad_rhat <- stan_glm(mpg ~ ., data = mtcars, iter = 20, chains = 2, seed = 12345) good_rhat <- update(bad_rhat, iter = 1000, chains = 2, seed = 12345) ## ---- rstasnarm-rhat-bad, eval = TRUE----------------------------------------- rhat <- summary(bad_rhat)[, "Rhat"] rhat[rhat > 1.1] ## ---- rstasnarm-rhat-good, eval = TRUE---------------------------------------- any(summary(good_rhat)[, "Rhat"] > 1.1) rstanarm/inst/doc/betareg.html0000644000176200001440000067440614551550162016147 0ustar liggesusers Modeling Rates/Proportions using Beta Regression with rstanarm

Modeling Rates/Proportions using Beta Regression with rstanarm

Imad Ali, Jonah Gabry and Ben Goodrich

2024-01-16

Introduction

This vignette explains how to model continuous outcomes on the open unit interval using the stan_betareg function in the rstanarm package.

The four steps of a Bayesian analysis are

  1. Specify a joint distribution for the outcome(s) and all the unknowns, which typically takes the form of a marginal prior distribution for the unknowns multiplied by a likelihood for the outcome(s) conditional on the unknowns. This joint distribution is proportional to a posterior distribution of the unknowns conditional on the observed data
  2. Draw from posterior distribution using Markov Chain Monte Carlo (MCMC).
  3. Evaluate how well the model fits the data and possibly revise the model.
  4. Draw from the posterior predictive distribution of the outcome(s) given interesting values of the predictors in order to visualize how a manipulation of a predictor affects (a function of) the outcome(s).

Steps 3 and 4 are covered in more depth by the vignette entitled “How to Use the rstanarm Package”. This vignette focuses on Step 1 when the likelihood is the product of beta distributions.

Likelihood

Beta regression uses the beta distribution as the likelihood for the data, \[ f(y_i | a, b) = \frac{y_i^{(a-1)}(1-y_i)^{(b-1)}}{B(a,b)} \] where \(B(\cdot)\) is the beta function. The shape parameters for the distribution are \(a\) and \(b\) and enter into the model according to the following transformations, \[ a = \mu\cdot\phi \\ b = (1-\mu)\cdot\phi \]

Let \(g_1(\cdot)\) be some link function. Then, in the specification of the shape parameters above, \(\mu = g_1^{-1}(\mathbf{X}\boldsymbol{\beta})\), where \(\boldsymbol{X}\) is a \(N\times K\) dimensional matrix of predictors, and \(\boldsymbol{\beta}\) is a \(K\) dimensional vector of parameters associated with each predictor.

In the simplest case (with only one set of regressors), \(\phi\) is a scalar parameter. Alternatively, it is possible to model \(\phi\) using a second set of regressors \(\mathbf{Z}\). In this context let \(g_2(\cdot)\) be some link function that is not necessarily identical to \(g_1(\cdot)\). Then \(\phi = g_2^{-1}(\mathbf{Z}\boldsymbol{\gamma})\), where \(\boldsymbol{\gamma}\) is a \(J\) dimensional vector of parameters associated with the \(N\times J\) dimensional matrix of predictors \(\mathbf{Z}\).

After substituting the shape parameter values in, the likelihood used in beta regression takes the following form, \[ f(y_i | \mu, \phi) = \frac{y_i^{(\mu\phi-1)}(1-y_i)^{((1-\mu)\phi-1)}}{B(\mu\phi,(1-\mu)\phi)} \]

Priors

A full Bayesian analysis requires specifying prior distributions \(f(\boldsymbol{\beta})\) and \(f(\phi)\) for the vector of regression coefficients and \(\phi\). When using stan_betareg, these distributions can be set using the prior_intercept, prior, and prior_phi arguments. The stan_betareg function supports a variety of prior distributions, which are explained in the rstanarm documentation (help(priors, package = 'rstanarm')).

When modeling \(\phi\) with a linear predictor a full Bayesian analysis requires specifying the prior distributions \(f(\boldsymbol{\beta})\) and \(f(\boldsymbol{\gamma})\). In stan_betareg the prior distributions on \(\boldsymbol{\gamma}\) can be set using the prior_intercept_z and prior_z arguments.

As an example, suppose we have \(K\) predictors and believe — prior to seeing the data — that \(\beta_1, \dots, \beta_K\) and \(\phi\) are as likely to be positive as they are to be negative, but are highly unlikely to be far from zero. These beliefs can be represented by normal distributions with mean zero and a small scale (standard deviation). To give \(\phi\) and each of the \(\beta\)s this prior (with a scale of 1, say), in the call to stan_betareg we would include the arguments prior_intercept = normal(0,1), prior = normal(0,1), and prior_phi = normal(0,1).

If, on the other hand, we have less a priori confidence that the parameters will be close to zero then we could use a larger scale for the normal distribution and/or a distribution with heavier tails than the normal like the Student t distribution. Step 1 in the “How to Use the rstanarm Package” vignette discusses one such example.

After fitting the model we can use the prior_summary function to print information about the prior distributions used when fitting the model.

Posterior

When using only a single set of regressors, the posterior distribution of \(\boldsymbol{\beta}\) and \(\phi\) is proportional to the product of the likelihood contributions, the \(K\) priors on the \(\beta_k\) parameters, and \(\phi\), \[ f(\boldsymbol{\beta},\phi|\mathbf{y},\mathbf{X}) \propto \prod_{i=1}^N f(y_i | a, b) \times \prod_{k=1}^K f(\beta_k) \times f(\phi) \]

When using two sets of regressors, the posterior distribution of \(\boldsymbol{\beta}\) and \(\boldsymbol{\gamma}\) is proportional to the product of the likelihood contribution, the \(K\) priors on the \(\beta_k\) parameters, and the \(J\) priors on the \(\gamma_j\) parameters,

\[ f(\boldsymbol{\beta},\boldsymbol{\gamma}|\mathbf{y},\mathbf{X}) \propto \prod_{i=1}^N f(y_i | a, b) \times \prod_{k=1}^K f(\beta_k) \times \prod_{j=1}^J f(\gamma_j) \]

An Example Using Simulated Data

In this example the outcome variable \(\mathbf{y}\) is simulated in a way that warrants the use of beta regression. It is worth mentioning that the data generation process is quite convoluted, which is apparent in the identification of the likelihood above.

The data simulated below uses the logistic link function on the first set of regressors and the log link function on the second set of regressors.

The model can be fit by calling stan_betareg, using the appropriate link functions.

      (Intercept)                 x (phi)_(Intercept)           (phi)_z 
             0.93             -0.20              1.84              0.31 
                x (phi)_(Intercept) 
             0.00              1.08 

For clarity we can use prior_summary to print the information about the prior distributions used to fit the models. The priors used in fit1 are provided below.

Priors for model 'fit1' 
------
Intercept (after predictors centered)
 ~ normal(location = 0, scale = 2.5)

Coefficients
  Specified prior:
    ~ normal(location = 0, scale = 2.5)
  Adjusted prior:
    ~ normal(location = 0, scale = 1.2)

Intercept_z (after predictors centered)
 ~ normal(location = 0, scale = 2.5)

Coefficients_z
  Specified prior:
    ~ normal(location = 0, scale = 2.5)
  Adjusted prior:
    ~ normal(location = 0, scale = 1.2)
------
See help('prior_summary.stanreg') for more details

The usual posterior analyses are available in rstanarm. The plots below illustrate simulated values of the outcome variable. The incorrect model noticeably fails to capture the top of the distribution consistently in comparison to the true model.

We can also compare models by evaluating the expected log pointwise predictive density (elpd), which can be calculated using the loo method, which provides an interface for rstanarm models to the functionality in the loo package.

     elpd_diff se_diff
fit1   0.0       0.0  
fit2 -79.9      11.8  

The difference in elpd is negative indicating that the expected predictive accuracy for the first model is higher.

An Example Using Gasoline Data

In some applied contexts it may be necessary to work with an outcome variable that is a proportion. If the proportion is bound on the open unit interval then beta regression can be considered a reasonable estimation method. The betareg package provides a dataset on the proportion of crude oil converted to gasoline after distillation and fractionation. This variable is defined as yield. Below stan_betareg is used to model yield as a function of temperature, pressure, and the batch of conditions.

(Intercept)        temp      batch1      batch2      batch3      batch4 
      -5.16        0.01        1.35        0.95        1.15        0.77 
     batch5      batch6      batch7      batch8      batch9       (phi) 
       0.80        0.72        0.33        0.26        0.15       12.04 
      (Intercept)              temp            batch1            batch2 
            -6.07              0.01              1.69              1.29 
           batch3            batch4            batch5            batch6 
             1.53              1.03              1.10              1.01 
           batch7            batch8            batch9 (phi)_(Intercept) 
             0.52              0.47              0.37              5.36 
   (phi)_pressure 
             0.04 

The plots below illustrate simulated values of gasoline yield. While the first model accounts for variation in batch conditions its predictions looks somewhat uniform rather than resembling the peaked and right-skewed behavior of the true data. The second model does a somewhat better job at capturing the shape of the distribution, however its location is off as it is centered around 0.50 rather than 0.20.

         elpd_diff se_diff
gas_fit2   0.0       0.0  
gas_fit1 -33.7       3.2  

Evaluating the expected log predictive distribution using loo reveals that the second of the two models is preferred.

References

Ferrari, SLP and Cribari-Neto, F (2004) “Beta Regression for Modeling Rates and Proportions”. Journal of Applied Statistics. Vol. 31, No. 07, p799-815.

rstanarm/inst/include/0000755000176200001440000000000014414044166014505 5ustar liggesusersrstanarm/inst/include/CODOLS.hpp0000644000176200001440000000203514414044166016201 0ustar liggesusers#ifndef RSTANARM__CODOLS_HPP #define RSTANARM__CODOLS_HPP /* * Compute ordinary least squares coefficients, * even in the situation where X is rank deficient * See https://eigen.tuxfamily.org/dox/classEigen_1_1CompleteOrthogonalDecomposition.html */ template inline Eigen::Matrix::type, Eigen::Dynamic, 1> CODOLS(const Eigen::Matrix& X, const Eigen::Matrix& y, std::ostream* pstream__) { typename boost::math::tools::promote_args::type T1__; using namespace Eigen; CompleteOrthogonalDecomposition cod(X); return cod.solve(y); } inline auto CODOLS(const Eigen::Matrix& X, const Eigen::Map, 0, Eigen::Stride<0, 0>>& y, std::ostream* pstream__) { using namespace Eigen; CompleteOrthogonalDecomposition cod(X); return cod.solve(y).eval(); } #endif rstanarm/inst/include/meta_header.hpp0000644000176200001440000000014214370470372017454 0ustar liggesusers#ifndef RSTANARM__META_HEADER_HPP #define RSTANARM__META_HEADER_HPP #include "CODOLS.hpp" #endif rstanarm/inst/include/tests.cpp0000644000176200001440000000032214370470372016353 0ustar liggesusers// [[Rcpp::depends(BH)]] // [[Rcpp::depends(RcppEigen)]] // [[Rcpp::depends(StanHeaders)]] #include #include #include #include "meta_header.hpp" // Need test for CODOLS rstanarm/inst/CITATION0000644000176200001440000000343514414044166014224 0ustar liggesusersyear <- sub("-.*", "", meta$Date) version_note <- sprintf("R package version %s", meta$Version) citHeader("To cite rstanarm in publications please use the first citation entry.", "If you were using the 'stan_jm' modelling function then, where possible,", "please consider including the second citation entry as well.") bibentry(bibtype = "Misc", title = "rstanarm: {Bayesian} applied regression modeling via {Stan}.", author = c(person("Ben", "Goodrich"), person("Jonah", "Gabry"), person("Imad", "Ali"), person("Sam", "Brilleman")), note = version_note, year = year, url = "https://mc-stan.org/rstanarm/", textVersion = paste0("Goodrich B, Gabry J, Ali I & Brilleman S. (", year, "). ", title = "rstanarm: Bayesian applied regression modeling via Stan. ", version_note, " https://mc-stan.org/rstanarm.") ) bibentry(bibtype = "Misc", title = "Joint longitudinal and time-to-event models via {Stan}.", author = c(person("SL", "Brilleman"), person("MJ", "Crowther"), person("M", "Moreno-Betancur"), person("J", "Buros Novik"), person("R", "Wolfe")), note = "StanCon 2018. 10-12 Jan 2018. Pacific Grove, CA, USA.", url = "https://github.com/stan-dev/stancon_talks/", year = "2018", textVersion = paste("Brilleman SL, Crowther MJ, Moreno-Betancur M, Buros Novik J & Wolfe R.", title = "Joint longitudinal and time-to-event models via Stan.", "StanCon 2018. 10-12 Jan 2018. Pacific Grove, CA, USA.", "https://github.com/stan-dev/stancon_talks/") ) rstanarm/cleanup0000755000176200001440000000030614551552005013457 0ustar liggesusers#!/bin/sh -e # Note to Windows users: This is not actually platform specific. "${R_HOME}/bin/R" --vanilla --slave -e 'roxygen2::roxygenize(load_code = roxygen2::load_source, clean = TRUE)' exit $?