marginaleffects/ 0000755 0001762 0000144 00000000000 14560154575 013420 5 ustar ligges users marginaleffects/NAMESPACE 0000644 0001762 0000144 00000012544 14560035476 014643 0 ustar ligges users # Generated by roxygen2: do not edit by hand
S3method(coef,comparisons)
S3method(coef,hypotheses)
S3method(coef,marginalmeans)
S3method(coef,predictions)
S3method(coef,slopes)
S3method(get_coef,afex_aov)
S3method(get_coef,betareg)
S3method(get_coef,bracl)
S3method(get_coef,brmsfit)
S3method(get_coef,brmultinom)
S3method(get_coef,data.frame)
S3method(get_coef,default)
S3method(get_coef,gam)
S3method(get_coef,gamlss)
S3method(get_coef,glmmTMB)
S3method(get_coef,lmerMod)
S3method(get_coef,lmerModLmerTest)
S3method(get_coef,mblogit)
S3method(get_coef,merMod)
S3method(get_coef,mlm)
S3method(get_coef,multinom)
S3method(get_coef,nls)
S3method(get_coef,polr)
S3method(get_coef,scam)
S3method(get_coef,selection)
S3method(get_coef,svyolr)
S3method(get_coef,workflow)
S3method(get_group_names,bracl)
S3method(get_group_names,brmsfit)
S3method(get_group_names,clm)
S3method(get_group_names,default)
S3method(get_group_names,hurdle)
S3method(get_group_names,mblogit)
S3method(get_group_names,mlm)
S3method(get_group_names,multinom)
S3method(get_group_names,polr)
S3method(get_group_names,svyolr)
S3method(get_mean_or_mode,character)
S3method(get_mean_or_mode,data.frame)
S3method(get_mean_or_mode,default)
S3method(get_mean_or_mode,factor)
S3method(get_mean_or_mode,logical)
S3method(get_model_matrix,default)
S3method(get_predict,Learner)
S3method(get_predict,MCMCglmm)
S3method(get_predict,afex_aov)
S3method(get_predict,bart)
S3method(get_predict,betareg)
S3method(get_predict,bife)
S3method(get_predict,biglm)
S3method(get_predict,brmsfit)
S3method(get_predict,brmultinom)
S3method(get_predict,clm)
S3method(get_predict,coxph)
S3method(get_predict,crch)
S3method(get_predict,default)
S3method(get_predict,fixest)
S3method(get_predict,gamlss)
S3method(get_predict,glimML)
S3method(get_predict,glm)
S3method(get_predict,glmmPQL)
S3method(get_predict,glmmTMB)
S3method(get_predict,hxlr)
S3method(get_predict,inferences_simulation)
S3method(get_predict,lm)
S3method(get_predict,lmerMod)
S3method(get_predict,lmerModLmerTest)
S3method(get_predict,lrm)
S3method(get_predict,mblogit)
S3method(get_predict,merMod)
S3method(get_predict,mhurdle)
S3method(get_predict,mlogit)
S3method(get_predict,model_fit)
S3method(get_predict,multinom)
S3method(get_predict,ols)
S3method(get_predict,orm)
S3method(get_predict,polr)
S3method(get_predict,rlmerMod)
S3method(get_predict,rms)
S3method(get_predict,rq)
S3method(get_predict,stanreg)
S3method(get_predict,svyolr)
S3method(get_predict,tobit1)
S3method(get_predict,workflow)
S3method(get_vcov,Learner)
S3method(get_vcov,MCMCglmm)
S3method(get_vcov,afex_aov)
S3method(get_vcov,bart)
S3method(get_vcov,biglm)
S3method(get_vcov,brmsfit)
S3method(get_vcov,default)
S3method(get_vcov,gamlss)
S3method(get_vcov,glimML)
S3method(get_vcov,glmmTMB)
S3method(get_vcov,inferences_simulation)
S3method(get_vcov,mhurdle)
S3method(get_vcov,model_fit)
S3method(get_vcov,orm)
S3method(get_vcov,scam)
S3method(get_vcov,workflow)
S3method(glance,comparisons)
S3method(glance,hypotheses)
S3method(glance,marginaleffects_mids)
S3method(glance,marginalmeans)
S3method(glance,predictions)
S3method(glance,slopes)
S3method(plot,comparisons)
S3method(plot,predictions)
S3method(plot,slopes)
S3method(print,comparisons)
S3method(print,hypotheses)
S3method(print,marginaleffects)
S3method(print,predictions)
S3method(print,slopes)
S3method(sanitize_model_specific,bart)
S3method(sanitize_model_specific,brmsfit)
S3method(sanitize_model_specific,inferences_simulation)
S3method(sanitize_model_specific,mblogit)
S3method(sanitize_model_specific,svyglm)
S3method(sanitize_model_specific,svyolr)
S3method(set_coef,afex_aov)
S3method(set_coef,betareg)
S3method(set_coef,crch)
S3method(set_coef,data.frame)
S3method(set_coef,default)
S3method(set_coef,gamlss)
S3method(set_coef,glimML)
S3method(set_coef,glm)
S3method(set_coef,glmmPQL)
S3method(set_coef,glmmTMB)
S3method(set_coef,glmx)
S3method(set_coef,hetprob)
S3method(set_coef,hurdle)
S3method(set_coef,hxlr)
S3method(set_coef,ivpml)
S3method(set_coef,lm)
S3method(set_coef,lme)
S3method(set_coef,lmerMod)
S3method(set_coef,lmerModLmerTest)
S3method(set_coef,merMod)
S3method(set_coef,mlm)
S3method(set_coef,model_fit)
S3method(set_coef,multinom)
S3method(set_coef,nls)
S3method(set_coef,polr)
S3method(set_coef,rlmerMod)
S3method(set_coef,scam)
S3method(set_coef,selection)
S3method(set_coef,svyolr)
S3method(set_coef,workflow)
S3method(set_coef,zeroinfl)
S3method(tidy,comparisons)
S3method(tidy,hypotheses)
S3method(tidy,marginaleffects_mids)
S3method(tidy,marginalmeans)
S3method(tidy,predictions)
S3method(tidy,slopes)
S3method(vcov,comparisons)
S3method(vcov,hypotheses)
S3method(vcov,marginalmeans)
S3method(vcov,predictions)
S3method(vcov,slopes)
export(avg_comparisons)
export(avg_predictions)
export(avg_slopes)
export(comparisons)
export(datagrid)
export(datagridcf)
export(deltamethod)
export(expect_margins)
export(expect_predictions)
export(expect_slopes)
export(get_coef)
export(get_group_names)
export(get_model_matrix)
export(get_predict)
export(get_vcov)
export(glance)
export(hypotheses)
export(inferences)
export(marginal_means)
export(marginaleffects)
export(marginalmeans)
export(meffects)
export(plot_comparisons)
export(plot_predictions)
export(plot_slopes)
export(posterior_draws)
export(posteriordraws)
export(predictions)
export(set_coef)
export(slopes)
export(tidy)
import(data.table)
importFrom(Rcpp,evalCpp)
importFrom(generics,glance)
importFrom(generics,tidy)
useDynLib(marginaleffects)
marginaleffects/README.md 0000644 0001762 0000144 00000014565 14557317544 014715 0 ustar ligges users

The parameters of a statistical model can sometimes be difficult to
interpret substantively, especially when that model includes non-linear
components, interactions, or transformations. Analysts who fit such
complex models often seek to transform raw parameter estimates into
quantities that are easier for domain experts and stakeholders to
understand, such as predictions, contrasts, risk differences, ratios,
odds, lift, slopes, and so on.
Unfortunately, computing these quantities—along with associated standard
errors—can be a tedious and error-prone task. This problem is compounded
by the fact that modeling packages in `R` and `Python` produce objects
with varied structures, which hold different information. This means
that end-users often have to write customized code to interpret the
estimates obtained by fitting Linear, GLM, GAM, Bayesian, Mixed Effects,
and other model types. This can lead to wasted effort, confusion, and
mistakes, and it can hinder the implementation of best practices.
## Marginal Effects Zoo: The Book
[This free online book](https://marginaleffects.com/) introduces a
conceptual framework to clearly define statistical quantities of
interest, and shows how to estimate those quantities using the
`marginaleffects` package for `R` and `Python`. The techniques
introduced herein can enhance the interpretability of [over 100 classes
of statistical and machine learning
models](https://marginaleffects.com/vignettes/supported_models.html),
including linear, GLM, GAM, mixed-effects, bayesian, categorical
outcomes, XGBoost, and more. With a single unified interface, users can
compute and plot many estimands, including:
- Predictions (aka fitted values or adjusted predictions)
- Comparisons such as contrasts, risk differences, risk ratios, odds,
etc.
- Slopes (aka marginal effects or partial derivatives)
- Marginal means
- Linear and non-linear hypothesis tests
- Equivalence tests
- Uncertainty estimates using the delta method, bootstrapping,
simulation, or conformal inference.
- Much more!
[The Marginal Effects Zoo](https://marginaleffects.com/) book includes
over 30 chapters of tutorials, case studies, and technical notes. It
covers a wide range of topics, including how the `marginaleffects`
package can facilitate the analysis of:
- Experiments
- Observational data
- Causal inference with G-Computation
- Machine learning models
- Bayesian modeling
- Multilevel regression with post-stratification (MRP)
- Missing data
- Matching
- Inverse probability weighting
- Conformal prediction
[Get started by clicking
here!](https://marginaleffects.com/vignettes/get_started.html)
## `marginaleffects`: The Package
The `marginaleffects` package for `R` and `Python` offers a single point
of entry to easily interpret the results of [over 100 classes of
models,](https://marginaleffects.com/vignettes/supported_models.html)
using a simple and consistent user interface. Its benefits include:
- *Powerful:* It can compute and plot predictions; comparisons
(contrasts, risk ratios, etc.); slopes; and conduct hypothesis and
equivalence tests for over 100 different classes of models in `R`.
- *Simple:* All functions share a simple and unified interface.
- *Documented*: Each function is thoroughly documented with abundant
examples. The Marginal Effects Zoo website includes 20,000+ words of
vignettes and case studies.
- *Efficient:* [Some
operations](https://marginaleffects.com/vignettes/performance.html)
can be up to 1000 times faster and use 30 times less memory than
with the `margins` package.
- *Valid:* When possible, [numerical results are
checked](https://marginaleffects.com/vignettes/supported_models.html)
against alternative software like `Stata` or other `R` packages.
- *Thin:* The `R` package requires relatively few dependencies.
- *Standards-compliant:* `marginaleffects` follows “tidy” principles
and returns objects that work with standard functions like
`summary()`, `tidy()`, and `glance()`. These objects are easy to
program with and feed to other packages like
[`ggplot2`](https://marginaleffects.com/vignettes/plot.html) or
[`modelsummary`.](https://marginaleffects.com/vignettes/tables.html)
- *Extensible:* Adding support for new models is very easy, often
requiring less than 10 lines of new code. Please submit [feature
requests on
Github.](https://github.com/vincentarelbundock/marginaleffects/issues)
- *Active development*: Bugs are fixed promptly.
## How to help
The `marginaleffects` package and the Marginal Effects Zoo book will
always be free. If you like this project, you can contribute in four
ways:
1. Make a donation to the [Native Women’s Shelter of
Montreal](https://www.nwsm.info/) or to [Give
Directly](https://www.givedirectly.org/), and send me (Vincent) a
quick note. You’ll make my day.
2. Submit bug reports, documentation improvements, or code
contributions to the Github repositories of the [R
version](https://github.com/vincentarelbundock/marginaleffects) or
the [Python
version](https://github.com/vincentarelbundock/pymarginaleffects) of
the package.
3. [Cite the `marginaleffects`
package](https://marginaleffects.com/CITATION.html) in your work and
tell your friends about it.
4. Create a new entry [for the Meme
Gallery!](https://marginaleffects.com/vignettes/meme.html)
marginaleffects/man/ 0000755 0001762 0000144 00000000000 14560042044 014156 5 ustar ligges users marginaleffects/man/get_averages.Rd 0000644 0001762 0000144 00000004412 14554076657 017126 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/get_averages.R
\name{get_averages}
\alias{get_averages}
\title{Average Estimates (aka "Margins")}
\usage{
get_averages(x, by = TRUE, ...)
}
\arguments{
\item{x}{Object produced by the \code{predictions()}, \code{comparisons()}, or \code{slopes()} functions.}
\item{by}{Character vector of variable names over which to compute group-wise average estimates. When \code{by=NULL}, the global average (per term) is reported.}
\item{...}{All additional arguments are passed to the original fitting
function to override the original call options: \code{conf_level}, \code{transform},
etc. See \code{?predictions}, \code{?comparisons}, \code{?slopes}.}
}
\value{
A \code{data.frame} of estimates and uncertainty estimates
}
\description{
Calculate average estimates by taking the (group-wise) mean of all the unit-level
estimates computed by the \code{predictions()}, \code{comparisons()}, or \code{slopes()} functions.
Warning: It is generally faster and safer to use the \code{by} argument of one of
the three functions listed above. Alternatively, one can call it in one step:
\code{avg_slopes(model)}
\code{slopes(model, by = TRUE)}
Proceeding in two steps by assigning the unit-level estimates is typically
slower, because all estimates must be computed twice.
Note that the \code{tidy()} and \code{summary()} methods are slower wrappers around \verb{avg_*()} functions.
}
\details{
Standard errors are estimated using the delta method. See the \code{marginaleffects} website for details.
In Bayesian models (e.g., \code{brms}), estimates are aggregated applying the
median (or mean) function twice. First, we apply it to all
marginal effects for each posterior draw, thereby estimating one Average (or
Median) Marginal Effect per iteration of the MCMC chain. Second, we
calculate the mean and the \code{quantile} function to the results of Step 1 to
obtain the Average Marginal Effect and its associated interval.
}
\examples{
\dontshow{if (interactive() || isTRUE(Sys.getenv("R_DOC_BUILD") == "true")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
\dontshow{\}) # examplesIf}
mod <- lm(mpg ~ factor(gear), data = mtcars)
avg_comparisons(mod, variables = list(gear = "sequential"))
}
\keyword{internal}
marginaleffects/man/complete_levels.Rd 0000644 0001762 0000144 00000001044 14541720224 017630 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/complete_levels.R
\name{complete_levels}
\alias{complete_levels}
\title{Create a data.frame with all factor or character levels}
\usage{
complete_levels(x, character_levels = NULL)
}
\description{
\code{model.matrix} breaks when \code{newdata} includes a factor
variable, but not all levels are present in the data. This is bad for us
because we often want to get predictions with one (or few) rows, where some
factor levels are inevitably missing.
}
\keyword{internal}
marginaleffects/man/hypotheses.Rd 0000644 0001762 0000144 00000031631 14557277362 016667 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/hypotheses.R
\name{hypotheses}
\alias{hypotheses}
\title{(Non-)Linear Tests for Null Hypotheses, Joint Hypotheses, Equivalence, Non Superiority, and Non Inferiority}
\usage{
hypotheses(
model,
hypothesis = NULL,
vcov = NULL,
conf_level = 0.95,
df = Inf,
equivalence = NULL,
joint = FALSE,
joint_test = "f",
FUN = NULL,
numderiv = "fdforward",
...
)
}
\arguments{
\item{model}{Model object or object generated by the \code{comparisons()}, \code{slopes()}, or \code{predictions()} functions.}
\item{hypothesis}{specify a hypothesis test or custom contrast using a numeric value, vector, or matrix, a string, or a string formula.
\itemize{
\item Numeric:
\itemize{
\item Single value: the null hypothesis used in the computation of Z and p (before applying \code{transform}).
\item Vector: Weights to compute a linear combination of (custom contrast between) estimates. Length equal to the number of rows generated by the same function call, but without the \code{hypothesis} argument.
\item Matrix: Each column is a vector of weights, as describe above, used to compute a distinct linear combination of (contrast between) estimates. The column names of the matrix are used as labels in the output.
}
\item String formula to specify linear or non-linear hypothesis tests. If the \code{term} column uniquely identifies rows, terms can be used in the formula. Otherwise, use \code{b1}, \code{b2}, etc. to identify the position of each parameter. The \verb{b*} wildcard can be used to test hypotheses on all estimates. Examples:
\itemize{
\item \code{hp = drat}
\item \code{hp + drat = 12}
\item \code{b1 + b2 + b3 = 0}
\item \verb{b* / b1 = 1}
}
\item String:
\itemize{
\item "pairwise": pairwise differences between estimates in each row.
\item "reference": differences between the estimates in each row and the estimate in the first row.
\item "sequential": difference between an estimate and the estimate in the next row.
\item "revpairwise", "revreference", "revsequential": inverse of the corresponding hypotheses, as described above.
}
\item See the Examples section below and the vignette: https://marginaleffects.com/vignettes/hypothesis.html
}}
\item{vcov}{Type of uncertainty estimates to report (e.g., for robust standard errors). Acceptable values:
\itemize{
\item FALSE: Do not compute standard errors. This can speed up computation considerably.
\item TRUE: Unit-level standard errors using the default \code{vcov(model)} variance-covariance matrix.
\item String which indicates the kind of uncertainty estimates to return.
\itemize{
\item Heteroskedasticity-consistent: \code{"HC"}, \code{"HC0"}, \code{"HC1"}, \code{"HC2"}, \code{"HC3"}, \code{"HC4"}, \code{"HC4m"}, \code{"HC5"}. See \code{?sandwich::vcovHC}
\item Heteroskedasticity and autocorrelation consistent: \code{"HAC"}
\item Mixed-Models degrees of freedom: "satterthwaite", "kenward-roger"
\item Other: \code{"NeweyWest"}, \code{"KernHAC"}, \code{"OPG"}. See the \code{sandwich} package documentation.
}
\item One-sided formula which indicates the name of cluster variables (e.g., \code{~unit_id}). This formula is passed to the \code{cluster} argument of the \code{sandwich::vcovCL} function.
\item Square covariance matrix
\item Function which returns a covariance matrix (e.g., \code{stats::vcov(model)})
}}
\item{conf_level}{numeric value between 0 and 1. Confidence level to use to build a confidence interval.}
\item{df}{Degrees of freedom used to compute p values and confidence intervals. A single numeric value between 1 and \code{Inf}. When \code{df} is \code{Inf}, the normal distribution is used. When \code{df} is finite, the \code{t} distribution is used. See \link[insight:get_df]{insight::get_df} for a convenient function to extract degrees of freedom. Ex: \code{slopes(model, df = insight::get_df(model))}}
\item{equivalence}{Numeric vector of length 2: bounds used for the two-one-sided test (TOST) of equivalence, and for the non-inferiority and non-superiority tests. See Details section below.}
\item{joint}{Joint test of statistical significance. The null hypothesis value can be set using the \code{hypothesis} argument.
\itemize{
\item FALSE: Hypotheses are not tested jointly.
\item TRUE: All parameters are tested jointly.
\item String: A regular expression to match parameters to be tested jointly. \code{grep(joint, perl = TRUE)}
\item Character vector of parameter names to be tested. Characters refer to the names of the vector returned by \code{coef(object)}.
\item Integer vector of indices. Which parameters positions to test jointly.
}}
\item{joint_test}{A character string specifying the type of test, either "f" or "chisq". The null hypothesis is set by the \code{hypothesis} argument, with default null equal to 0 for all parameters.}
\item{FUN}{\code{NULL} or function.
\itemize{
\item \code{NULL} (default): hypothesis test on a model's coefficients, or on the quantities estimated by one of the \code{marginaleffects} package functions.
\item Function which accepts a model object and returns a numeric vector or a data.frame with two columns called \code{term} and \code{estimate}. This argument can be useful when users want to conduct a hypothesis test on an arbitrary function of quantities held in a model object. See examples below.
}}
\item{numderiv}{string or list of strings indicating the method to use to for the numeric differentiation used in to compute delta method standard errors.
\itemize{
\item "fdforward": finite difference method with forward differences
\item "fdcenter": finite difference method with central differences (default)
\item "richardson": Richardson extrapolation method
\item Extra arguments can be specified by passing a list to the \code{numDeriv} argument, with the name of the method first and named arguments following, ex: \code{numderiv=list("fdcenter", eps = 1e-5)}. When an unknown argument is used, \code{marginaleffects} prints the list of valid arguments for each method.
}}
\item{...}{Additional arguments are passed to the \code{predict()} method
supplied by the modeling package.These arguments are particularly useful
for mixed-effects or bayesian models (see the online vignettes on the
\code{marginaleffects} website). Available arguments can vary from model to
model, depending on the range of supported arguments by each modeling
package. See the "Model-Specific Arguments" section of the
\code{?slopes} documentation for a non-exhaustive list of available
arguments.}
}
\description{
Uncertainty estimates are calculated as first-order approximate standard errors for linear or non-linear functions of a vector of random variables with known or estimated covariance matrix. In that sense, \code{\link{hypotheses}} emulates the behavior of the excellent and well-established \link[car:deltaMethod]{car::deltaMethod} and \link[car:linearHypothesis]{car::linearHypothesis} functions, but it supports more models; requires fewer dependencies; expands the range of tests to equivalence and superiority/inferiority; and offers convenience features like robust standard errors.
To learn more, read the hypothesis tests vignette, visit the
package website, or scroll down this page for a full list of vignettes:
\itemize{
\item \url{https://marginaleffects.com/vignettes/hypothesis.html}
\item \url{https://marginaleffects.com/}
}
Warning #1: Tests are conducted directly on the scale defined by the \code{type} argument. For some models, it can make sense to conduct hypothesis or equivalence tests on the \code{"link"} scale instead of the \code{"response"} scale which is often the default.
Warning #2: For hypothesis tests on objects produced by the \code{marginaleffects} package, it is safer to use the \code{hypothesis} argument of the original function. Using \code{hypotheses()} may not work in certain environments, in lists, or when working programmatically with *apply style functions.
Warning #3: The tests assume that the \code{hypothesis} expression is (approximately) normally distributed, which for non-linear functions of the parameters may not be realistic. More reliable confidence intervals can be obtained using the \code{inferences()} function with \code{method = "boot"}.
}
\section{Joint hypothesis tests}{
The test statistic for the joint Wald test is calculated as (R * theta_hat - r)' * inv(R * V_hat * R') * (R * theta_hat - r) / Q,
where theta_hat is the vector of estimated parameters, V_hat is the estimated covariance matrix, R is a Q x P matrix for testing Q hypotheses on P parameters,
r is a Q x 1 vector for the null hypothesis, and Q is the number of rows in R. If the test is a Chi-squared test, the test statistic is not normalized.
The p-value is then calculated based on either the F-distribution (for F-test) or the Chi-squared distribution (for Chi-squared test).
For the F-test, the degrees of freedom are Q and (n - P), where n is the sample size and P is the number of parameters.
For the Chi-squared test, the degrees of freedom are Q.
}
\section{Equivalence, Inferiority, Superiority}{
\eqn{\theta} is an estimate, \eqn{\sigma_\theta} its estimated standard error, and \eqn{[a, b]} are the bounds of the interval supplied to the \code{equivalence} argument.
Non-inferiority:
\itemize{
\item \eqn{H_0}{H0}: \eqn{\theta \leq a}{\theta <= a}
\item \eqn{H_1}{H1}: \eqn{\theta > a}
\item \eqn{t=(\theta - a)/\sigma_\theta}{t=(\theta - a)/\sigma_\theta}
\item p: Upper-tail probability
}
Non-superiority:
\itemize{
\item \eqn{H_0}{H0}: \eqn{\theta \geq b}{\theta >= b}
\item \eqn{H_1}{H1}: \eqn{\theta < b}
\item \eqn{t=(\theta - b)/\sigma_\theta}{t=(\theta - b)/\sigma_\theta}
\item p: Lower-tail probability
}
Equivalence: Two One-Sided Tests (TOST)
\itemize{
\item p: Maximum of the non-inferiority and non-superiority p values.
}
Thanks to Russell V. Lenth for the excellent \code{emmeans} package and documentation which inspired this feature.
}
\examples{
library(marginaleffects)
mod <- lm(mpg ~ hp + wt + factor(cyl), data = mtcars)
# When `FUN` and `hypotheses` are `NULL`, `hypotheses()` returns a data.frame of parameters
hypotheses(mod)
# Test of equality between coefficients
hypotheses(mod, hypothesis = "hp = wt")
# Non-linear function
hypotheses(mod, hypothesis = "exp(hp + wt) = 0.1")
# Robust standard errors
hypotheses(mod, hypothesis = "hp = wt", vcov = "HC3")
# b1, b2, ... shortcuts can be used to identify the position of the
# parameters of interest in the output of FUN
hypotheses(mod, hypothesis = "b2 = b3")
# wildcard
hypotheses(mod, hypothesis = "b* / b2 = 1")
# term names with special characters have to be enclosed in backticks
hypotheses(mod, hypothesis = "`factor(cyl)6` = `factor(cyl)8`")
mod2 <- lm(mpg ~ hp * drat, data = mtcars)
hypotheses(mod2, hypothesis = "`hp:drat` = drat")
# predictions(), comparisons(), and slopes()
mod <- glm(am ~ hp + mpg, data = mtcars, family = binomial)
cmp <- comparisons(mod, newdata = "mean")
hypotheses(cmp, hypothesis = "b1 = b2")
mfx <- slopes(mod, newdata = "mean")
hypotheses(cmp, hypothesis = "b2 = 0.2")
pre <- predictions(mod, newdata = datagrid(hp = 110, mpg = c(30, 35)))
hypotheses(pre, hypothesis = "b1 = b2")
# The `FUN` argument can be used to compute standard errors for fitted values
mod <- glm(am ~ hp + mpg, data = mtcars, family = binomial)
f <- function(x) predict(x, type = "link", newdata = mtcars)
p <- hypotheses(mod, FUN = f)
head(p)
f <- function(x) predict(x, type = "response", newdata = mtcars)
p <- hypotheses(mod, FUN = f)
head(p)
# Complex aggregation
# Step 1: Collapse predicted probabilities by outcome level, for each individual
# Step 2: Take the mean of the collapsed probabilities by group and `cyl`
library(dplyr)
library(MASS)
library(dplyr)
dat <- transform(mtcars, gear = factor(gear))
mod <- polr(gear ~ factor(cyl) + hp, dat)
aggregation_fun <- function(model) {
predictions(model, vcov = FALSE) |>
mutate(group = ifelse(group \%in\% c("3", "4"), "3 & 4", "5")) |>
summarize(estimate = sum(estimate), .by = c("rowid", "cyl", "group")) |>
summarize(estimate = mean(estimate), .by = c("cyl", "group")) |>
rename(term = cyl)
}
hypotheses(mod, FUN = aggregation_fun)
# Equivalence, non-inferiority, and non-superiority tests
mod <- lm(mpg ~ hp + factor(gear), data = mtcars)
p <- predictions(mod, newdata = "median")
hypotheses(p, equivalence = c(17, 18))
mfx <- avg_slopes(mod, variables = "hp")
hypotheses(mfx, equivalence = c(-.1, .1))
cmp <- avg_comparisons(mod, variables = "gear", hypothesis = "pairwise")
hypotheses(cmp, equivalence = c(0, 10))
# joint hypotheses: character vector
model <- lm(mpg ~ as.factor(cyl) * hp, data = mtcars)
hypotheses(model, joint = c("as.factor(cyl)6:hp", "as.factor(cyl)8:hp"))
# joint hypotheses: regular expression
hypotheses(model, joint = "cyl")
# joint hypotheses: integer indices
hypotheses(model, joint = 2:3)
# joint hypotheses: different null hypotheses
hypotheses(model, joint = 2:3, hypothesis = 1)
hypotheses(model, joint = 2:3, hypothesis = 1:2)
# joint hypotheses: marginaleffects object
cmp <- avg_comparisons(model)
hypotheses(cmp, joint = "cyl")
}
marginaleffects/man/print.marginaleffects.Rd 0000644 0001762 0000144 00000005052 14560035476 020747 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/print.R
\name{print.marginaleffects}
\alias{print.marginaleffects}
\title{Print \code{marginaleffects} objects}
\usage{
\method{print}{marginaleffects}(
x,
digits = getOption("marginaleffects_print_digits", default = 3),
p_eps = getOption("marginaleffects_print_p_eps", default = 0.001),
topn = getOption("marginaleffects_print_topn", default = 5),
nrows = getOption("marginaleffects_print_nrows", default = 30),
ncols = getOption("marginaleffects_print_ncols", default = 30),
style = getOption("marginaleffects_print_style", default = "summary"),
type = getOption("marginaleffects_print_type", default = TRUE),
column_names = getOption("marginaleffects_print_column_names", default = TRUE),
...
)
}
\arguments{
\item{x}{An object produced by one of the \code{marginaleffects} package functions.}
\item{digits}{The number of digits to display.}
\item{p_eps}{p values smaller than this number are printed in "<0.001" style.}
\item{topn}{The number of rows to be printed from the beginning and end of tables with more than \code{nrows} rows.}
\item{nrows}{The number of rows which will be printed before truncation.}
\item{ncols}{The maximum number of column names to display at the bottom of the printed output.}
\item{style}{"summary" or "data.frame"}
\item{type}{boolean: should the type be printed?}
\item{column_names}{boolean: should the column names be printed?}
\item{...}{Other arguments are currently ignored.}
}
\description{
This function controls the text which is printed to the console when one of the core \code{marginalefffects} functions is called and the object is returned: \code{predictions()}, \code{comparisons()}, \code{slopes()}, \code{hypotheses()}, \code{avg_predictions()}, \code{avg_comparisons()}, \code{avg_slopes()}.
All of those functions return standard data frames. Columns can be extracted by name, \code{predictions(model)$estimate}, and all the usual data manipulation functions work out-of-the-box: \code{colnames()}, \code{head()}, \code{subset()}, \code{dplyr::filter()}, \code{dplyr::arrange()}, etc.
Some of the data columns are not printed by default. You can disable pretty printing and print the full results as a standard data frame using the \code{style} argument or by applying \code{as.data.frame()} on the object. See examples below.
}
\examples{
library(marginaleffects)
mod <- lm(mpg ~ hp + am + factor(gear), data = mtcars)
p <- predictions(mod, by = c("am", "gear"))
p
subset(p, am == 1)
print(p, style = "data.frame")
data.frame(p)
}
marginaleffects/man/get_model_matrix.Rd 0000644 0001762 0000144 00000003504 14541720224 017774 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/get_model_matrix.R
\name{get_model_matrix}
\alias{get_model_matrix}
\alias{get_model_matrix.default}
\title{Get a named model matrix}
\usage{
get_model_matrix(model, newdata)
\method{get_model_matrix}{default}(model, newdata)
}
\arguments{
\item{model}{Model object}
\item{newdata}{Grid of predictor values at which we evaluate the slopes.
\itemize{
\item Warning: Please avoid modifying your dataset between fitting the model and calling a \code{marginaleffects} function. This can sometimes lead to unexpected results.
\item \code{NULL} (default): Unit-level slopes for each observed value in the dataset (empirical distribution). The dataset is retrieved using \code{\link[insight:get_data]{insight::get_data()}}, which tries to extract data from the environment. This may produce unexpected results if the original data frame has been altered since fitting the model.
\item \code{\link[=datagrid]{datagrid()}} call to specify a custom grid of regressors. For example:
\itemize{
\item \code{newdata = datagrid(cyl = c(4, 6))}: \code{cyl} variable equal to 4 and 6 and other regressors fixed at their means or modes.
\item See the Examples section and the \code{\link[=datagrid]{datagrid()}} documentation.
}
\item string:
\itemize{
\item "mean": Marginal Effects at the Mean. Slopes when each predictor is held at its mean or mode.
\item "median": Marginal Effects at the Median. Slopes when each predictor is held at its median or mode.
\item "marginalmeans": Marginal Effects at Marginal Means. See Details section below.
\item "tukey": Marginal Effects at Tukey's 5 numbers.
\item "grid": Marginal Effects on a grid of representative numbers (Tukey's 5 numbers and unique values of categorical predictors).
}
}}
}
\description{
Get a named model matrix
}
\keyword{internal}
marginaleffects/man/plot_slopes.Rd 0000644 0001762 0000144 00000021260 14557277362 017034 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/plot_slopes.R
\name{plot_slopes}
\alias{plot_slopes}
\title{Plot Conditional or Marginal Slopes}
\usage{
plot_slopes(
model,
variables = NULL,
condition = NULL,
by = NULL,
newdata = NULL,
type = "response",
vcov = NULL,
conf_level = 0.95,
wts = NULL,
slope = "dydx",
rug = FALSE,
gray = FALSE,
draw = TRUE,
...
)
}
\arguments{
\item{model}{Model object}
\item{variables}{Name of the variable whose marginal effect (slope) we want to plot on the y-axis.}
\item{condition}{Conditional slopes
\itemize{
\item Character vector (max length 4): Names of the predictors to display.
\item Named list (max length 4): List names correspond to predictors. List elements can be:
\itemize{
\item Numeric vector
\item Function which returns a numeric vector or a set of unique categorical values
\item Shortcut strings for common reference values: "minmax", "quartile", "threenum"
}
\item 1: x-axis. 2: color/shape. 3: facet (wrap if no fourth variable, otherwise cols of grid). 4: facet (rows of grid).
\item Numeric variables in positions 2 and 3 are summarized by Tukey's five numbers \code{?stats::fivenum}.
}}
\item{by}{Aggregate unit-level estimates (aka, marginalize, average over). Valid inputs:
\itemize{
\item \code{FALSE}: return the original unit-level estimates.
\item \code{TRUE}: aggregate estimates for each term.
\item Character vector of column names in \code{newdata} or in the data frame produced by calling the function without the \code{by} argument.
\item Data frame with a \code{by} column of group labels, and merging columns shared by \code{newdata} or the data frame produced by calling the same function without the \code{by} argument.
\item See examples below.
\item For more complex aggregations, you can use the \code{FUN} argument of the \code{hypotheses()} function. See that function's documentation and the Hypothesis Test vignettes on the \code{marginaleffects} website.
}}
\item{newdata}{When \code{newdata} is \code{NULL}, the grid is determined by the \code{condition} argument. When \code{newdata} is not \code{NULL}, the argument behaves in the same way as in the \code{slopes()} function.}
\item{type}{string indicates the type (scale) of the predictions used to
compute contrasts or slopes. This can differ based on the model
type, but will typically be a string such as: "response", "link", "probs",
or "zero". When an unsupported string is entered, the model-specific list of
acceptable values is returned in an error message. When \code{type} is \code{NULL}, the
first entry in the error message is used by default.}
\item{vcov}{Type of uncertainty estimates to report (e.g., for robust standard errors). Acceptable values:
\itemize{
\item FALSE: Do not compute standard errors. This can speed up computation considerably.
\item TRUE: Unit-level standard errors using the default \code{vcov(model)} variance-covariance matrix.
\item String which indicates the kind of uncertainty estimates to return.
\itemize{
\item Heteroskedasticity-consistent: \code{"HC"}, \code{"HC0"}, \code{"HC1"}, \code{"HC2"}, \code{"HC3"}, \code{"HC4"}, \code{"HC4m"}, \code{"HC5"}. See \code{?sandwich::vcovHC}
\item Heteroskedasticity and autocorrelation consistent: \code{"HAC"}
\item Mixed-Models degrees of freedom: "satterthwaite", "kenward-roger"
\item Other: \code{"NeweyWest"}, \code{"KernHAC"}, \code{"OPG"}. See the \code{sandwich} package documentation.
}
\item One-sided formula which indicates the name of cluster variables (e.g., \code{~unit_id}). This formula is passed to the \code{cluster} argument of the \code{sandwich::vcovCL} function.
\item Square covariance matrix
\item Function which returns a covariance matrix (e.g., \code{stats::vcov(model)})
}}
\item{conf_level}{numeric value between 0 and 1. Confidence level to use to build a confidence interval.}
\item{wts}{string or numeric: weights to use when computing average contrasts or slopes. These weights only affect the averaging in \verb{avg_*()} or with the \code{by} argument, and not the unit-level estimates themselves. Internally, estimates and weights are passed to the \code{weighted.mean()} function.
\itemize{
\item string: column name of the weights variable in \code{newdata}. When supplying a column name to \code{wts}, it is recommended to supply the original data (including the weights variable) explicitly to \code{newdata}.
\item numeric: vector of length equal to the number of rows in the original data or in \code{newdata} (if supplied).
}}
\item{slope}{string indicates the type of slope or (semi-)elasticity to compute:
\itemize{
\item "dydx": dY/dX
\item "eyex": dY/dX * Y / X
\item "eydx": dY/dX * Y
\item "dyex": dY/dX / X
\item Y is the predicted value of the outcome; X is the observed value of the predictor.
}}
\item{rug}{TRUE displays tick marks on the axes to mark the distribution of raw data.}
\item{gray}{FALSE grayscale or color plot}
\item{draw}{\code{TRUE} returns a \code{ggplot2} plot. \code{FALSE} returns a \code{data.frame} of the underlying data.}
\item{...}{Additional arguments are passed to the \code{predict()} method
supplied by the modeling package.These arguments are particularly useful
for mixed-effects or bayesian models (see the online vignettes on the
\code{marginaleffects} website). Available arguments can vary from model to
model, depending on the range of supported arguments by each modeling
package. See the "Model-Specific Arguments" section of the
\code{?slopes} documentation for a non-exhaustive list of available
arguments.}
}
\value{
A \code{ggplot2} object
}
\description{
Plot slopes on the y-axis against values of one or more predictors (x-axis, colors/shapes, and facets).
The \code{by} argument is used to plot marginal slopes, that is, slopes made on the original data, but averaged by subgroups. This is analogous to using the \code{by} argument in the \code{slopes()} function.
The \code{condition} argument is used to plot conditional slopes, that is, slopes computed on a user-specified grid. This is analogous to using the \code{newdata} argument and \code{datagrid()} function in a \code{slopes()} call. All variables whose values are not specified explicitly are treated as usual by \code{datagrid()}, that is, they are held at their mean or mode (or rounded mean for integers). This includes grouping variables in mixed-effects models, so analysts who fit such models may want to specify the groups of interest using the \code{condition} argument, or supply model-specific arguments to compute population-level estimates. See details below.
See the "Plots" vignette and website for tutorials and information on how to customize plots:
\itemize{
\item https://marginaleffects.com/vignettes/plot.html
\item https://marginaleffects.com
}
}
\section{Model-Specific Arguments}{
Some model types allow model-specific arguments to modify the nature of
marginal effects, predictions, marginal means, and contrasts. Please report
other package-specific \code{predict()} arguments on Github so we can add them to
the table below.
https://github.com/vincentarelbundock/marginaleffects/issues\tabular{llll}{
Package \tab Class \tab Argument \tab Documentation \cr
\code{brms} \tab \code{brmsfit} \tab \code{ndraws} \tab \link[brms:posterior_predict.brmsfit]{brms::posterior_predict} \cr
\tab \tab \code{re_formula} \tab \link[brms:posterior_predict.brmsfit]{brms::posterior_predict} \cr
\code{lme4} \tab \code{merMod} \tab \code{re.form} \tab \link[lme4:predict.merMod]{lme4::predict.merMod} \cr
\tab \tab \code{allow.new.levels} \tab \link[lme4:predict.merMod]{lme4::predict.merMod} \cr
\code{glmmTMB} \tab \code{glmmTMB} \tab \code{re.form} \tab \link[glmmTMB:predict.glmmTMB]{glmmTMB::predict.glmmTMB} \cr
\tab \tab \code{allow.new.levels} \tab \link[glmmTMB:predict.glmmTMB]{glmmTMB::predict.glmmTMB} \cr
\tab \tab \code{zitype} \tab \link[glmmTMB:predict.glmmTMB]{glmmTMB::predict.glmmTMB} \cr
\code{mgcv} \tab \code{bam} \tab \code{exclude} \tab \link[mgcv:predict.bam]{mgcv::predict.bam} \cr
\code{robustlmm} \tab \code{rlmerMod} \tab \code{re.form} \tab \link[robustlmm:rlmerMod-class]{robustlmm::predict.rlmerMod} \cr
\tab \tab \code{allow.new.levels} \tab \link[robustlmm:rlmerMod-class]{robustlmm::predict.rlmerMod} \cr
\code{MCMCglmm} \tab \code{MCMCglmm} \tab \code{ndraws} \tab \cr
}
}
\examples{
library(marginaleffects)
mod <- lm(mpg ~ hp * drat * factor(am), data = mtcars)
plot_slopes(mod, variables = "hp", condition = "drat")
plot_slopes(mod, variables = "hp", condition = c("drat", "am"))
plot_slopes(mod, variables = "hp", condition = list("am", "drat" = 3:5))
plot_slopes(mod, variables = "am", condition = list("hp", "drat" = range))
plot_slopes(mod, variables = "am", condition = list("hp", "drat" = "threenum"))
}
marginaleffects/man/expect_margins.Rd 0000644 0001762 0000144 00000000514 14541720224 017457 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tinytest.R
\name{expect_margins}
\alias{expect_margins}
\title{\code{tinytest} helper}
\usage{
expect_margins(
results,
margins_object,
se = TRUE,
tolerance = 1e-05,
verbose = FALSE
)
}
\description{
\code{tinytest} helper
}
\keyword{internal}
marginaleffects/man/marginal_means.Rd 0000644 0001762 0000144 00000001006 14560035476 017432 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/deprecated.R
\name{marginal_means}
\alias{marginal_means}
\title{Deprecated function}
\usage{
marginal_means(
model,
variables = NULL,
newdata = NULL,
vcov = TRUE,
conf_level = 0.95,
type = NULL,
transform = NULL,
cross = FALSE,
hypothesis = NULL,
equivalence = NULL,
p_adjust = NULL,
df = Inf,
wts = "equal",
by = NULL,
numderiv = "fdforward",
...
)
}
\description{
Deprecated function
}
\keyword{internal}
marginaleffects/man/expect_slopes.Rd 0000644 0001762 0000144 00000000452 14541720224 017325 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tinytest.R
\name{expect_slopes}
\alias{expect_slopes}
\title{\code{tinytest} helper}
\usage{
expect_slopes(object, n_unique = NULL, pct_na = 5, se = TRUE, ...)
}
\description{
\code{tinytest} helper
}
\keyword{internal}
marginaleffects/man/deltamethod.Rd 0000644 0001762 0000144 00000000360 14560035476 016751 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/deprecated.R
\name{deltamethod}
\alias{deltamethod}
\title{Deprecated function}
\usage{
deltamethod(...)
}
\description{
Deprecated function
}
\keyword{internal}
marginaleffects/man/posterior_draws.Rd 0000644 0001762 0000144 00000001654 14541720224 017703 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/posterior_draws.R
\name{posterior_draws}
\alias{posterior_draws}
\title{Extract Posterior Draws or Bootstrap Resamples from \code{marginaleffects} Objects}
\usage{
posterior_draws(x, shape = "long")
}
\arguments{
\item{x}{An object produced by a \code{marginaleffects} package function, such as \code{predictions()}, \code{avg_slopes()}, \code{hypotheses()}, etc.}
\item{shape}{string indicating the shape of the output format:
\itemize{
\item "long": long format data frame
\item "DxP": Matrix with draws as rows and parameters as columns
\item "PxD": Matrix with draws as rows and parameters as columns
\item "rvar": Random variable datatype (see \code{posterior} package documentation).
}}
}
\value{
A data.frame with \code{drawid} and \code{draw} columns.
}
\description{
Extract Posterior Draws or Bootstrap Resamples from \code{marginaleffects} Objects
}
marginaleffects/man/marginalmeans.Rd 0000644 0001762 0000144 00000001003 14560035476 017270 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/deprecated.R
\name{marginalmeans}
\alias{marginalmeans}
\title{Deprecated function}
\usage{
marginalmeans(
model,
variables = NULL,
newdata = NULL,
vcov = TRUE,
conf_level = 0.95,
type = NULL,
transform = NULL,
cross = FALSE,
hypothesis = NULL,
equivalence = NULL,
p_adjust = NULL,
df = Inf,
wts = "equal",
by = NULL,
numderiv = "fdforward",
...
)
}
\description{
Deprecated function
}
\keyword{internal}
marginaleffects/man/comparisons.Rd 0000644 0001762 0000144 00000071347 14554104515 017023 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/comparisons.R
\name{comparisons}
\alias{comparisons}
\alias{avg_comparisons}
\title{Comparisons Between Predictions Made With Different Regressor Values}
\usage{
comparisons(
model,
newdata = NULL,
variables = NULL,
comparison = "difference",
type = NULL,
vcov = TRUE,
by = FALSE,
conf_level = 0.95,
transform = NULL,
cross = FALSE,
wts = NULL,
hypothesis = NULL,
equivalence = NULL,
p_adjust = NULL,
df = Inf,
eps = NULL,
numderiv = "fdforward",
...
)
avg_comparisons(
model,
newdata = NULL,
variables = NULL,
type = NULL,
vcov = TRUE,
by = TRUE,
conf_level = 0.95,
comparison = "difference",
transform = NULL,
cross = FALSE,
wts = NULL,
hypothesis = NULL,
equivalence = NULL,
p_adjust = NULL,
df = Inf,
eps = NULL,
numderiv = "fdforward",
...
)
}
\arguments{
\item{model}{Model object}
\item{newdata}{Grid of predictor values at which we evaluate the comparisons.
\itemize{
\item Warning: Please avoid modifying your dataset between fitting the model and calling a \code{marginaleffects} function. This can sometimes lead to unexpected results.
\item \code{NULL} (default): Unit-level contrasts for each observed value in the dataset (empirical distribution). The dataset is retrieved using \code{\link[insight:get_data]{insight::get_data()}}, which tries to extract data from the environment. This may produce unexpected results if the original data frame has been altered since fitting the model.
\item data frame: Unit-level contrasts for each row of the \code{newdata} data frame.
\item string:
\itemize{
\item "mean": Contrasts at the Mean. Contrasts when each predictor is held at its mean or mode.
\item "median": Contrasts at the Median. Contrasts when each predictor is held at its median or mode.
\item "marginalmeans": Contrasts at Marginal Means.
\item "tukey": Contrasts at Tukey's 5 numbers.
\item "grid": Contrasts on a grid of representative numbers (Tukey's 5 numbers and unique values of categorical predictors).
}
\item \code{\link[=datagrid]{datagrid()}} call to specify a custom grid of regressors. For example:
\itemize{
\item \code{newdata = datagrid(cyl = c(4, 6))}: \code{cyl} variable equal to 4 and 6 and other regressors fixed at their means or modes.
\item \code{newdata = datagrid(mpg = fivenum)}: \code{mpg} variable held at Tukey's five numbers (using the \code{fivenum} function), and other regressors fixed at their means or modes.
\item See the Examples section and the \link{datagrid} documentation.
}
}}
\item{variables}{Focal variables
\itemize{
\item \code{NULL}: compute comparisons for all the variables in the model object (can be slow).
\item Character vector: subset of variables (usually faster).
\item Named list: names identify the subset of variables of interest, and values define the type of contrast to compute. Acceptable values depend on the variable type:
\itemize{
\item Factor or character variables:
\itemize{
\item "reference": Each factor level is compared to the factor reference (base) level
\item "all": All combinations of observed levels
\item "sequential": Each factor level is compared to the previous factor level
\item "pairwise": Each factor level is compared to all other levels
\item "minmax": The highest and lowest levels of a factor.
\item "revpairwise", "revreference", "revsequential": inverse of the corresponding hypotheses.
\item Vector of length 2 with the two values to compare.
\item Data frame with the same number of rows as \code{newdata}, with two columns of "lo" and "hi" values to compare.
\item Function that accepts a vector and returns a data frame with two columns of "lo" and "hi" values to compare. See examples below.
}
\item Logical variables:
\itemize{
\item NULL: contrast between TRUE and FALSE
\item Data frame with the same number of rows as \code{newdata}, with two columns of "lo" and "hi" values to compare.
\item Function that accepts a vector and returns a data frame with two columns of "lo" and "hi" values to compare. See examples below.
}
\item Numeric variables:
\itemize{
\item Numeric of length 1: Forward contrast for a gap of \code{x}, computed between the observed value and the observed value plus \code{x}. Users can set a global option to get a "center" or "backward" contrast instead: \code{options(marginaleffects_contrast_direction="center")}
\item Numeric vector of length 2: Contrast between the largest and the smallest elements of the \code{x} vector.
\item Data frame with the same number of rows as \code{newdata}, with two columns of "lo" and "hi" values to compare.
\item Function that accepts a vector and returns a data frame with two columns of "lo" and "hi" values to compare. See examples below.
\item "iqr": Contrast across the interquartile range of the regressor.
\item "sd": Contrast across one standard deviation around the regressor mean.
\item "2sd": Contrast across two standard deviations around the regressor mean.
\item "minmax": Contrast between the maximum and the minimum values of the regressor.
}
\item Examples:
\itemize{
\item \code{variables = list(gear = "pairwise", hp = 10)}
\item \code{variables = list(gear = "sequential", hp = c(100, 120))}
\item \verb{variables = list(hp = \\(x) data.frame(low = x - 5, high = x + 10))}
\item See the Examples section below for more.
}
}
}}
\item{comparison}{How should pairs of predictions be compared? Difference, ratio, odds ratio, or user-defined functions.
\itemize{
\item string: shortcuts to common contrast functions.
\itemize{
\item Supported shortcuts strings: difference, differenceavg, differenceavgwts, dydx, eyex, eydx, dyex, dydxavg, eyexavg, eydxavg, dyexavg, dydxavgwts, eyexavgwts, eydxavgwts, dyexavgwts, ratio, ratioavg, ratioavgwts, lnratio, lnratioavg, lnratioavgwts, lnor, lnoravg, lnoravgwts, lift, liftavg, expdydx, expdydxavg, expdydxavgwts
\item See the Comparisons section below for definitions of each transformation.
}
\item function: accept two equal-length numeric vectors of adjusted predictions (\code{hi} and \code{lo}) and returns a vector of contrasts of the same length, or a unique numeric value.
\itemize{
\item See the Transformations section below for examples of valid functions.
}
}}
\item{type}{string indicates the type (scale) of the predictions used to
compute contrasts or slopes. This can differ based on the model
type, but will typically be a string such as: "response", "link", "probs",
or "zero". When an unsupported string is entered, the model-specific list of
acceptable values is returned in an error message. When \code{type} is \code{NULL}, the
first entry in the error message is used by default.}
\item{vcov}{Type of uncertainty estimates to report (e.g., for robust standard errors). Acceptable values:
\itemize{
\item FALSE: Do not compute standard errors. This can speed up computation considerably.
\item TRUE: Unit-level standard errors using the default \code{vcov(model)} variance-covariance matrix.
\item String which indicates the kind of uncertainty estimates to return.
\itemize{
\item Heteroskedasticity-consistent: \code{"HC"}, \code{"HC0"}, \code{"HC1"}, \code{"HC2"}, \code{"HC3"}, \code{"HC4"}, \code{"HC4m"}, \code{"HC5"}. See \code{?sandwich::vcovHC}
\item Heteroskedasticity and autocorrelation consistent: \code{"HAC"}
\item Mixed-Models degrees of freedom: "satterthwaite", "kenward-roger"
\item Other: \code{"NeweyWest"}, \code{"KernHAC"}, \code{"OPG"}. See the \code{sandwich} package documentation.
}
\item One-sided formula which indicates the name of cluster variables (e.g., \code{~unit_id}). This formula is passed to the \code{cluster} argument of the \code{sandwich::vcovCL} function.
\item Square covariance matrix
\item Function which returns a covariance matrix (e.g., \code{stats::vcov(model)})
}}
\item{by}{Aggregate unit-level estimates (aka, marginalize, average over). Valid inputs:
\itemize{
\item \code{FALSE}: return the original unit-level estimates.
\item \code{TRUE}: aggregate estimates for each term.
\item Character vector of column names in \code{newdata} or in the data frame produced by calling the function without the \code{by} argument.
\item Data frame with a \code{by} column of group labels, and merging columns shared by \code{newdata} or the data frame produced by calling the same function without the \code{by} argument.
\item See examples below.
\item For more complex aggregations, you can use the \code{FUN} argument of the \code{hypotheses()} function. See that function's documentation and the Hypothesis Test vignettes on the \code{marginaleffects} website.
}}
\item{conf_level}{numeric value between 0 and 1. Confidence level to use to build a confidence interval.}
\item{transform}{string or function. Transformation applied to unit-level estimates and confidence intervals just before the function returns results. Functions must accept a vector and return a vector of the same length. Support string shortcuts: "exp", "ln"}
\item{cross}{\itemize{
\item \code{FALSE}: Contrasts represent the change in adjusted predictions when one predictor changes and all other variables are held constant.
\item \code{TRUE}: Contrasts represent the changes in adjusted predictions when all the predictors specified in the \code{variables} argument are manipulated simultaneously (a "cross-contrast").
}}
\item{wts}{string or numeric: weights to use when computing average contrasts or slopes. These weights only affect the averaging in \verb{avg_*()} or with the \code{by} argument, and not the unit-level estimates themselves. Internally, estimates and weights are passed to the \code{weighted.mean()} function.
\itemize{
\item string: column name of the weights variable in \code{newdata}. When supplying a column name to \code{wts}, it is recommended to supply the original data (including the weights variable) explicitly to \code{newdata}.
\item numeric: vector of length equal to the number of rows in the original data or in \code{newdata} (if supplied).
}}
\item{hypothesis}{specify a hypothesis test or custom contrast using a numeric value, vector, or matrix, a string, or a string formula.
\itemize{
\item Numeric:
\itemize{
\item Single value: the null hypothesis used in the computation of Z and p (before applying \code{transform}).
\item Vector: Weights to compute a linear combination of (custom contrast between) estimates. Length equal to the number of rows generated by the same function call, but without the \code{hypothesis} argument.
\item Matrix: Each column is a vector of weights, as describe above, used to compute a distinct linear combination of (contrast between) estimates. The column names of the matrix are used as labels in the output.
}
\item String formula to specify linear or non-linear hypothesis tests. If the \code{term} column uniquely identifies rows, terms can be used in the formula. Otherwise, use \code{b1}, \code{b2}, etc. to identify the position of each parameter. The \verb{b*} wildcard can be used to test hypotheses on all estimates. Examples:
\itemize{
\item \code{hp = drat}
\item \code{hp + drat = 12}
\item \code{b1 + b2 + b3 = 0}
\item \verb{b* / b1 = 1}
}
\item String:
\itemize{
\item "pairwise": pairwise differences between estimates in each row.
\item "reference": differences between the estimates in each row and the estimate in the first row.
\item "sequential": difference between an estimate and the estimate in the next row.
\item "revpairwise", "revreference", "revsequential": inverse of the corresponding hypotheses, as described above.
}
\item See the Examples section below and the vignette: https://marginaleffects.com/vignettes/hypothesis.html
}}
\item{equivalence}{Numeric vector of length 2: bounds used for the two-one-sided test (TOST) of equivalence, and for the non-inferiority and non-superiority tests. See Details section below.}
\item{p_adjust}{Adjust p-values for multiple comparisons: "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", or "fdr". See \link[stats:p.adjust]{stats::p.adjust}}
\item{df}{Degrees of freedom used to compute p values and confidence intervals. A single numeric value between 1 and \code{Inf}. When \code{df} is \code{Inf}, the normal distribution is used. When \code{df} is finite, the \code{t} distribution is used. See \link[insight:get_df]{insight::get_df} for a convenient function to extract degrees of freedom. Ex: \code{slopes(model, df = insight::get_df(model))}}
\item{eps}{NULL or numeric value which determines the step size to use when
calculating numerical derivatives: (f(x+eps)-f(x))/eps. When \code{eps} is
\code{NULL}, the step size is 0.0001 multiplied by the difference between
the maximum and minimum values of the variable with respect to which we
are taking the derivative. Changing \code{eps} may be necessary to avoid
numerical problems in certain models.}
\item{numderiv}{string or list of strings indicating the method to use to for the numeric differentiation used in to compute delta method standard errors.
\itemize{
\item "fdforward": finite difference method with forward differences
\item "fdcenter": finite difference method with central differences (default)
\item "richardson": Richardson extrapolation method
\item Extra arguments can be specified by passing a list to the \code{numDeriv} argument, with the name of the method first and named arguments following, ex: \code{numderiv=list("fdcenter", eps = 1e-5)}. When an unknown argument is used, \code{marginaleffects} prints the list of valid arguments for each method.
}}
\item{...}{Additional arguments are passed to the \code{predict()} method
supplied by the modeling package.These arguments are particularly useful
for mixed-effects or bayesian models (see the online vignettes on the
\code{marginaleffects} website). Available arguments can vary from model to
model, depending on the range of supported arguments by each modeling
package. See the "Model-Specific Arguments" section of the
\code{?slopes} documentation for a non-exhaustive list of available
arguments.}
}
\value{
A \code{data.frame} with one row per observation (per term/group) and several columns:
\itemize{
\item \code{rowid}: row number of the \code{newdata} data frame
\item \code{type}: prediction type, as defined by the \code{type} argument
\item \code{group}: (optional) value of the grouped outcome (e.g., categorical outcome models)
\item \code{term}: the variable whose marginal effect is computed
\item \code{dydx}: slope of the outcome with respect to the term, for a given combination of predictor values
\item \code{std.error}: standard errors computed by via the delta method.
\item \code{p.value}: p value associated to the \code{estimate} column. The null is determined by the \code{hypothesis} argument (0 by default), and p values are computed before applying the \code{transform} argument.
\item \code{s.value}: Shannon information transforms of p values. How many consecutive "heads" tosses would provide the same amount of evidence (or "surprise") against the null hypothesis that the coin is fair? The purpose of S is to calibrate the analyst's intuition about the strength of evidence encoded in p against a well-known physical phenomenon. See Greenland (2019) and Cole et al. (2020).
\item \code{conf.low}: lower bound of the confidence interval (or equal-tailed interval for bayesian models)
\item \code{conf.high}: upper bound of the confidence interval (or equal-tailed interval for bayesian models)
}
See \code{?print.marginaleffects} for printing options.
}
\description{
Predict the outcome variable at different regressor values (e.g., college
graduates vs. others), and compare those predictions by computing a difference,
ratio, or some other function. \code{comparisons()} can return many quantities of
interest, such as contrasts, differences, risk ratios, changes in log odds, lift,
slopes, elasticities, etc.
\itemize{
\item \code{comparisons()}: unit-level (conditional) estimates.
\item \code{avg_comparisons()}: average (marginal) estimates.
}
\code{variables} identifies the focal regressors whose "effect" we are interested in. \code{comparison} determines how predictions with different regressor values are compared (difference, ratio, odds, etc.). The \code{newdata} argument and the \code{datagrid()} function control where statistics are evaluated in the predictor space: "at observed values", "at the mean", "at representative values", etc.
See the comparisons vignette and package website for worked examples and case studies:
\itemize{
\item \url{https://marginaleffects.com/vignettes/comparisons.html}
\item \url{https://marginaleffects.com/}
}
}
\section{Functions}{
\itemize{
\item \code{avg_comparisons()}: Average comparisons
}}
\section{Standard errors using the delta method}{
Standard errors for all quantities estimated by \code{marginaleffects} can be obtained via the delta method. This requires differentiating a function with respect to the coefficients in the model using a finite difference approach. In some models, the delta method standard errors can be sensitive to various aspects of the numeric differentiation strategy, including the step size. By default, the step size is set to \code{1e-8}, or to \code{1e-4} times the smallest absolute model coefficient, whichever is largest.
\code{marginaleffects} can delegate numeric differentiation to the \code{numDeriv} package, which allows more flexibility. To do this, users can pass arguments to the \code{numDeriv::jacobian} function through a global option. For example:
\itemize{
\item \code{options(marginaleffects_numDeriv = list(method = "simple", method.args = list(eps = 1e-6)))}
\item \code{options(marginaleffects_numDeriv = list(method = "Richardson", method.args = list(eps = 1e-5)))}
\item \code{options(marginaleffects_numDeriv = NULL)}
}
See the "Standard Errors and Confidence Intervals" vignette on the \code{marginaleffects} website for more details on the computation of standard errors:
https://marginaleffects.com/vignettes/uncertainty.html
Note that the \code{inferences()} function can be used to compute uncertainty estimates using a bootstrap or simulation-based inference. See the vignette:
https://marginaleffects.com/vignettes/bootstrap.html
}
\section{Model-Specific Arguments}{
Some model types allow model-specific arguments to modify the nature of
marginal effects, predictions, marginal means, and contrasts. Please report
other package-specific \code{predict()} arguments on Github so we can add them to
the table below.
https://github.com/vincentarelbundock/marginaleffects/issues\tabular{llll}{
Package \tab Class \tab Argument \tab Documentation \cr
\code{brms} \tab \code{brmsfit} \tab \code{ndraws} \tab \link[brms:posterior_predict.brmsfit]{brms::posterior_predict} \cr
\tab \tab \code{re_formula} \tab \link[brms:posterior_predict.brmsfit]{brms::posterior_predict} \cr
\code{lme4} \tab \code{merMod} \tab \code{re.form} \tab \link[lme4:predict.merMod]{lme4::predict.merMod} \cr
\tab \tab \code{allow.new.levels} \tab \link[lme4:predict.merMod]{lme4::predict.merMod} \cr
\code{glmmTMB} \tab \code{glmmTMB} \tab \code{re.form} \tab \link[glmmTMB:predict.glmmTMB]{glmmTMB::predict.glmmTMB} \cr
\tab \tab \code{allow.new.levels} \tab \link[glmmTMB:predict.glmmTMB]{glmmTMB::predict.glmmTMB} \cr
\tab \tab \code{zitype} \tab \link[glmmTMB:predict.glmmTMB]{glmmTMB::predict.glmmTMB} \cr
\code{mgcv} \tab \code{bam} \tab \code{exclude} \tab \link[mgcv:predict.bam]{mgcv::predict.bam} \cr
\code{robustlmm} \tab \code{rlmerMod} \tab \code{re.form} \tab \link[robustlmm:rlmerMod-class]{robustlmm::predict.rlmerMod} \cr
\tab \tab \code{allow.new.levels} \tab \link[robustlmm:rlmerMod-class]{robustlmm::predict.rlmerMod} \cr
\code{MCMCglmm} \tab \code{MCMCglmm} \tab \code{ndraws} \tab \cr
}
}
\section{comparison argument functions}{
The following transformations can be applied by supplying one of the shortcut strings to the
\code{comparison} argument.
\code{hi} is a vector of adjusted predictions for the "high" side of the
contrast. \code{lo} is a vector of adjusted predictions for the "low" side of the
contrast. \code{y} is a vector of adjusted predictions for the original data. \code{x}
is the predictor in the original data. \code{eps} is the step size to use to
compute derivatives and elasticities.\tabular{ll}{
Shortcut \tab Function \cr
difference \tab \(hi, lo) hi - lo \cr
differenceavg \tab \(hi, lo) mean(hi - lo) \cr
dydx \tab \(hi, lo, eps) (hi - lo)/eps \cr
eyex \tab \(hi, lo, eps, y, x) (hi - lo)/eps * (x/y) \cr
eydx \tab \(hi, lo, eps, y, x) ((hi - lo)/eps)/y \cr
dyex \tab \(hi, lo, eps, x) ((hi - lo)/eps) * x \cr
dydxavg \tab \(hi, lo, eps) mean((hi - lo)/eps) \cr
eyexavg \tab \(hi, lo, eps, y, x) mean((hi - lo)/eps * (x/y)) \cr
eydxavg \tab \(hi, lo, eps, y, x) mean(((hi - lo)/eps)/y) \cr
dyexavg \tab \(hi, lo, eps, x) mean(((hi - lo)/eps) * x) \cr
ratio \tab \(hi, lo) hi/lo \cr
ratioavg \tab \(hi, lo) mean(hi)/mean(lo) \cr
lnratio \tab \(hi, lo) log(hi/lo) \cr
lnratioavg \tab \(hi, lo) log(mean(hi)/mean(lo)) \cr
lnor \tab \(hi, lo) log((hi/(1 - hi))/(lo/(1 - lo))) \cr
lnoravg \tab \(hi, lo) log((mean(hi)/(1 - mean(hi)))/(mean(lo)/(1 - mean(lo)))) \cr
lift \tab \(hi, lo) (hi - lo)/lo \cr
liftavg \tab \(hi, lo) (mean(hi - lo))/mean(lo) \cr
expdydx \tab \(hi, lo, eps) ((exp(hi) - exp(lo))/exp(eps))/eps \cr
expdydxavg \tab \(hi, lo, eps) mean(((exp(hi) - exp(lo))/exp(eps))/eps) \cr
}
}
\section{Bayesian posterior summaries}{
By default, credible intervals in bayesian models are built as equal-tailed
intervals. This can be changed to a highest density interval by setting a global
option:
\code{options("marginaleffects_posterior_interval" = "eti")}
\code{options("marginaleffects_posterior_interval" = "hdi")}
By default, the center of the posterior distribution in bayesian models is
identified by the median. Users can use a different summary function by setting a
global option:
\code{options("marginaleffects_posterior_center" = "mean")}
\code{options("marginaleffects_posterior_center" = "median")}
When estimates are averaged using the \code{by} argument, the \code{tidy()} function, or
the \code{summary()} function, the posterior distribution is marginalized twice over.
First, we take the average \emph{across} units but \emph{within} each iteration of the
MCMC chain, according to what the user requested in \code{by} argument or
\code{tidy()/summary()} functions. Then, we identify the center of the resulting
posterior using the function supplied to the
\code{"marginaleffects_posterior_center"} option (the median by default).
}
\section{Equivalence, Inferiority, Superiority}{
\eqn{\theta} is an estimate, \eqn{\sigma_\theta} its estimated standard error, and \eqn{[a, b]} are the bounds of the interval supplied to the \code{equivalence} argument.
Non-inferiority:
\itemize{
\item \eqn{H_0}{H0}: \eqn{\theta \leq a}{\theta <= a}
\item \eqn{H_1}{H1}: \eqn{\theta > a}
\item \eqn{t=(\theta - a)/\sigma_\theta}{t=(\theta - a)/\sigma_\theta}
\item p: Upper-tail probability
}
Non-superiority:
\itemize{
\item \eqn{H_0}{H0}: \eqn{\theta \geq b}{\theta >= b}
\item \eqn{H_1}{H1}: \eqn{\theta < b}
\item \eqn{t=(\theta - b)/\sigma_\theta}{t=(\theta - b)/\sigma_\theta}
\item p: Lower-tail probability
}
Equivalence: Two One-Sided Tests (TOST)
\itemize{
\item p: Maximum of the non-inferiority and non-superiority p values.
}
Thanks to Russell V. Lenth for the excellent \code{emmeans} package and documentation which inspired this feature.
}
\section{Prediction types}{
The \code{type} argument determines the scale of the predictions used to compute quantities of interest with functions from the \code{marginaleffects} package. Admissible values for \code{type} depend on the model object. When users specify an incorrect value for \code{type}, \code{marginaleffects} will raise an informative error with a list of valid \code{type} values for the specific model object. The first entry in the list in that error message is the default type.
The \code{invlink(link)} is a special type defined by \code{marginaleffects}. It is available for some (but not all) models and functions. With this link type, we first compute predictions on the link scale, then we use the inverse link function to backtransform the predictions to the response scale. This is useful for models with non-linear link functions as it can ensure that confidence intervals stay within desirable bounds, ex: 0 to 1 for a logit model. Note that an average of estimates with \code{type="invlink(link)"} will not always be equivalent to the average of estimates with \code{type="response"}.
Some of the most common \code{type} values are:
response, link, E, Ep, average, class, conditional, count, cum.prob, cumprob, density, detection, disp, ev, expected, expvalue, fitted, invlink(link), latent, latent_N, linear.predictor, linpred, location, lp, mean, numeric, p, ppd, pr, precision, prediction, prob, probability, probs, quantile, risk, scale, survival, unconditional, utility, variance, xb, zero, zlink, zprob
}
\examples{
\dontshow{if (interactive() || isTRUE(Sys.getenv("R_DOC_BUILD") == "true")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
\dontshow{\}) # examplesIf}
library(marginaleffects)
# Linear model
tmp <- mtcars
tmp$am <- as.logical(tmp$am)
mod <- lm(mpg ~ am + factor(cyl), tmp)
avg_comparisons(mod, variables = list(cyl = "reference"))
avg_comparisons(mod, variables = list(cyl = "sequential"))
avg_comparisons(mod, variables = list(cyl = "pairwise"))
# GLM with different scale types
mod <- glm(am ~ factor(gear), data = mtcars)
avg_comparisons(mod, type = "response")
avg_comparisons(mod, type = "link")
# Contrasts at the mean
comparisons(mod, newdata = "mean")
# Contrasts between marginal means
comparisons(mod, newdata = "marginalmeans")
# Contrasts at user-specified values
comparisons(mod, newdata = datagrid(am = 0, gear = tmp$gear))
comparisons(mod, newdata = datagrid(am = unique, gear = max))
m <- lm(mpg ~ hp + drat + factor(cyl) + factor(am), data = mtcars)
comparisons(m, variables = "hp", newdata = datagrid(FUN_factor = unique, FUN_numeric = median))
# Numeric contrasts
mod <- lm(mpg ~ hp, data = mtcars)
avg_comparisons(mod, variables = list(hp = 1))
avg_comparisons(mod, variables = list(hp = 5))
avg_comparisons(mod, variables = list(hp = c(90, 100)))
avg_comparisons(mod, variables = list(hp = "iqr"))
avg_comparisons(mod, variables = list(hp = "sd"))
avg_comparisons(mod, variables = list(hp = "minmax"))
# using a function to specify a custom difference in one regressor
dat <- mtcars
dat$new_hp <- 49 * (dat$hp - min(dat$hp)) / (max(dat$hp) - min(dat$hp)) + 1
modlog <- lm(mpg ~ log(new_hp) + factor(cyl), data = dat)
fdiff <- \(x) data.frame(x, x + 10)
avg_comparisons(modlog, variables = list(new_hp = fdiff))
# Adjusted Risk Ratio: see the contrasts vignette
mod <- glm(vs ~ mpg, data = mtcars, family = binomial)
avg_comparisons(mod, comparison = "lnratioavg", transform = exp)
# Adjusted Risk Ratio: Manual specification of the `comparison`
avg_comparisons(
mod,
comparison = function(hi, lo) log(mean(hi) / mean(lo)),
transform = exp)
# cross contrasts
mod <- lm(mpg ~ factor(cyl) * factor(gear) + hp, data = mtcars)
avg_comparisons(mod, variables = c("cyl", "gear"), cross = TRUE)
# variable-specific contrasts
avg_comparisons(mod, variables = list(gear = "sequential", hp = 10))
# hypothesis test: is the `hp` marginal effect at the mean equal to the `drat` marginal effect
mod <- lm(mpg ~ wt + drat, data = mtcars)
comparisons(
mod,
newdata = "mean",
hypothesis = "wt = drat")
# same hypothesis test using row indices
comparisons(
mod,
newdata = "mean",
hypothesis = "b1 - b2 = 0")
# same hypothesis test using numeric vector of weights
comparisons(
mod,
newdata = "mean",
hypothesis = c(1, -1))
# two custom contrasts using a matrix of weights
lc <- matrix(c(
1, -1,
2, 3),
ncol = 2)
comparisons(
mod,
newdata = "mean",
hypothesis = lc)
# Effect of a 1 group-wise standard deviation change
# First we calculate the SD in each group of `cyl`
# Second, we use that SD as the treatment size in the `variables` argument
library(dplyr)
mod <- lm(mpg ~ hp + factor(cyl), mtcars)
tmp <- mtcars \%>\%
group_by(cyl) \%>\%
mutate(hp_sd = sd(hp))
avg_comparisons(mod,
variables = list(hp = function(x) data.frame(x, x + tmp$hp_sd)),
by = "cyl")
# `by` argument
mod <- lm(mpg ~ hp * am * vs, data = mtcars)
comparisons(mod, by = TRUE)
mod <- lm(mpg ~ hp * am * vs, data = mtcars)
avg_comparisons(mod, variables = "hp", by = c("vs", "am"))
library(nnet)
mod <- multinom(factor(gear) ~ mpg + am * vs, data = mtcars, trace = FALSE)
by <- data.frame(
group = c("3", "4", "5"),
by = c("3,4", "3,4", "5"))
comparisons(mod, type = "probs", by = by)
}
\references{
\itemize{
\item Greenland S. 2019. "Valid P-Values Behave Exactly as They Should: Some Misleading Criticisms of P-Values and Their Resolution With S-Values." The American Statistician. 73(S1): 106–114.
\item Cole, Stephen R, Jessie K Edwards, and Sander Greenland. 2020. "Surprise!" American Journal of Epidemiology 190 (2): 191–93. https://doi.org/10.1093/aje/kwaa136
}
}
marginaleffects/man/get_varcov_args.Rd 0000644 0001762 0000144 00000000636 14541720224 017627 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/get_vcov.R
\name{get_varcov_args}
\alias{get_varcov_args}
\title{Take a \code{summary()} style \code{vcov} argument and convert it to
\code{insight::get_varcov()}}
\usage{
get_varcov_args(model, vcov)
}
\description{
Take a \code{summary()} style \code{vcov} argument and convert it to
\code{insight::get_varcov()}
}
\keyword{internal}
marginaleffects/man/inferences.Rd 0000644 0001762 0000144 00000012431 14560042044 016567 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/inferences.R
\name{inferences}
\alias{inferences}
\title{(EXPERIMENTAL) Bootstrap, Conformal, and Simulation-Based Inference}
\usage{
inferences(
x,
method,
R = 1000,
conf_type = "perc",
conformal_test = NULL,
conformal_calibration = NULL,
conformal_score = "residual_abs",
...
)
}
\arguments{
\item{x}{Object produced by one of the core \code{marginaleffects} functions.}
\item{method}{String
\itemize{
\item "delta": delta method standard errors
\item "boot" package
\item "fwb": fractional weighted bootstrap
\item "rsample" package
\item "simulation" from a multivariate normal distribution (Krinsky & Robb, 1986)
\item "mi" multiple imputation for missing data
\item "conformal_split": prediction intervals using split conformal prediction (see Angelopoulos & Bates, 2022)
\item "conformal_cv+": prediction intervals using cross-validation+ conformal prediction (see Barber et al., 2020)
}}
\item{R}{Number of resamples, simulations, or cross-validation folds.}
\item{conf_type}{String: type of bootstrap interval to construct.
\itemize{
\item \code{boot}: "perc", "norm", "basic", or "bca"
\item \code{fwb}: "perc", "norm", "basic", "bc", or "bca"
\item \code{rsample}: "perc" or "bca"
\item \code{simulation}: argument ignored.
}}
\item{conformal_test}{Data frame of test data for conformal prediction.}
\item{conformal_calibration}{Data frame of calibration data for split conformal prediction (\verb{method="conformal_split}).}
\item{conformal_score}{String. Warning: The \code{type} argument in \code{predictions()} must generate predictions which are on the same scale as the outcome variable. Typically, this means that \code{type} must be "response" or "probs".
\itemize{
\item "residual_abs" or "residual_sq" for regression tasks (numeric outcome)
\item "softmax" for classification tasks (when \code{predictions()} returns a \code{group} columns, such as multinomial or ordinal logit models.
}}
\item{...}{\itemize{
\item If \code{method="boot"}, additional arguments are passed to \code{boot::boot()}.
\item If \code{method="fwb"}, additional arguments are passed to \code{fwb::fwb()}.
\item If \code{method="rsample"}, additional arguments are passed to \code{rsample::bootstraps()}.
\item Additional arguments are ignored for all other methods.
}}
}
\value{
A \code{marginaleffects} object with simulation or bootstrap resamples and objects attached.
}
\description{
Warning: This function is experimental. It may be renamed, the user interface may change, or the functionality may migrate to arguments in other \code{marginaleffects} functions.
Apply this function to a \code{marginaleffects} object to change the inferential method used to compute uncertainty estimates.
}
\details{
When \code{method="simulation"}, we conduct simulation-based inference following the method discussed in Krinsky & Robb (1986):
\enumerate{
\item Draw \code{R} sets of simulated coefficients from a multivariate normal distribution with mean equal to the original model's estimated coefficients and variance equal to the model's variance-covariance matrix (classical, "HC3", or other).
\item Use the \code{R} sets of coefficients to compute \code{R} sets of estimands: predictions, comparisons, slopes, or hypotheses.
\item Take quantiles of the resulting distribution of estimands to obtain a confidence interval and the standard deviation of simulated estimates to estimate the standard error.
}
When \code{method="fwb"}, drawn weights are supplied to the model fitting function's \code{weights} argument; if the model doesn't accept non-integer weights, this method should not be used. If weights were included in the original model fit, they are extracted by \code{\link[=weights]{weights()}} and multiplied by the drawn weights. These weights are supplied to the \code{wts} argument of the estimation function (e.g., \code{comparisons()}).
}
\section{References}{
Krinsky, I., and A. L. Robb. 1986. “On Approximating the Statistical Properties of Elasticities.” Review of Economics and Statistics 68 (4): 715–9.
King, Gary, Michael Tomz, and Jason Wittenberg. "Making the most of statistical analyses: Improving interpretation and presentation." American journal of political science (2000): 347-361
Dowd, Bryan E., William H. Greene, and Edward C. Norton. "Computation of standard errors." Health services research 49.2 (2014): 731-750.
Angelopoulos, Anastasios N., and Stephen Bates. 2022. "A Gentle Introduction to Conformal Prediction and Distribution-Free Uncertainty Quantification." arXiv. https://doi.org/10.48550/arXiv.2107.07511.
Barber, Rina Foygel, Emmanuel J. Candes, Aaditya Ramdas, and Ryan J. Tibshirani. 2020. “Predictive Inference with the Jackknife+.” arXiv. http://arxiv.org/abs/1905.02928.
}
\examples{
\dontrun{
library(marginaleffects)
library(magrittr)
set.seed(1024)
mod <- lm(Sepal.Length ~ Sepal.Width * Species, data = iris)
# bootstrap
avg_predictions(mod, by = "Species") \%>\%
inferences(method = "boot")
avg_predictions(mod, by = "Species") \%>\%
inferences(method = "rsample")
# Fractional (bayesian) bootstrap
avg_slopes(mod, by = "Species") \%>\%
inferences(method = "fwb") \%>\%
posterior_draws("rvar") \%>\%
data.frame()
# Simulation-based inference
slopes(mod) \%>\%
inferences(method = "simulation") \%>\%
head()
}
}
marginaleffects/man/get_vcov.Rd 0000644 0001762 0000144 00000007761 14543163156 016305 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/get_vcov.R, R/methods_MCMCglmm.R,
% R/methods_afex.R, R/methods_aod.R, R/methods_biglm.R, R/methods_brms.R,
% R/methods_dbarts.R, R/methods_gamlss.R, R/methods_glmmTMB.R,
% R/methods_inferences_simulation.R, R/methods_mhurdle.R, R/methods_mlr3.R,
% R/methods_rms.R, R/methods_scam.R, R/methods_tidymodels.R
\name{get_vcov}
\alias{get_vcov}
\alias{get_vcov.default}
\alias{get_vcov.MCMCglmm}
\alias{get_vcov.afex_aov}
\alias{get_vcov.glimML}
\alias{get_vcov.biglm}
\alias{get_vcov.brmsfit}
\alias{get_vcov.bart}
\alias{get_vcov.gamlss}
\alias{get_vcov.glmmTMB}
\alias{get_vcov.inferences_simulation}
\alias{get_vcov.mhurdle}
\alias{get_vcov.Learner}
\alias{get_vcov.orm}
\alias{get_vcov.scam}
\alias{get_vcov.model_fit}
\alias{get_vcov.workflow}
\title{Get a named variance-covariance matrix from a model object (internal function)}
\usage{
get_vcov(model, ...)
\method{get_vcov}{default}(model, vcov = NULL, ...)
\method{get_vcov}{MCMCglmm}(model, vcov = NULL, ...)
\method{get_vcov}{afex_aov}(model, vcov = NULL, ...)
\method{get_vcov}{glimML}(model, vcov = NULL, ...)
\method{get_vcov}{biglm}(model, vcov = NULL, ...)
\method{get_vcov}{brmsfit}(model, vcov = NULL, ...)
\method{get_vcov}{bart}(model, vcov = NULL, ...)
\method{get_vcov}{gamlss}(model, ...)
\method{get_vcov}{glmmTMB}(model, ...)
\method{get_vcov}{inferences_simulation}(model, ...)
\method{get_vcov}{mhurdle}(model, vcov = NULL, ...)
\method{get_vcov}{Learner}(model, ...)
\method{get_vcov}{orm}(model, vcov = NULL, ...)
\method{get_vcov}{scam}(model, vcov = NULL, ...)
\method{get_vcov}{model_fit}(model, type = NULL, ...)
\method{get_vcov}{workflow}(model, type = NULL, ...)
}
\arguments{
\item{model}{Model object}
\item{...}{Additional arguments are passed to the \code{predict()} method
supplied by the modeling package.These arguments are particularly useful
for mixed-effects or bayesian models (see the online vignettes on the
\code{marginaleffects} website). Available arguments can vary from model to
model, depending on the range of supported arguments by each modeling
package. See the "Model-Specific Arguments" section of the
\code{?slopes} documentation for a non-exhaustive list of available
arguments.}
\item{vcov}{Type of uncertainty estimates to report (e.g., for robust standard errors). Acceptable values:
\itemize{
\item FALSE: Do not compute standard errors. This can speed up computation considerably.
\item TRUE: Unit-level standard errors using the default \code{vcov(model)} variance-covariance matrix.
\item String which indicates the kind of uncertainty estimates to return.
\itemize{
\item Heteroskedasticity-consistent: \code{"HC"}, \code{"HC0"}, \code{"HC1"}, \code{"HC2"}, \code{"HC3"}, \code{"HC4"}, \code{"HC4m"}, \code{"HC5"}. See \code{?sandwich::vcovHC}
\item Heteroskedasticity and autocorrelation consistent: \code{"HAC"}
\item Mixed-Models degrees of freedom: "satterthwaite", "kenward-roger"
\item Other: \code{"NeweyWest"}, \code{"KernHAC"}, \code{"OPG"}. See the \code{sandwich} package documentation.
}
\item One-sided formula which indicates the name of cluster variables (e.g., \code{~unit_id}). This formula is passed to the \code{cluster} argument of the \code{sandwich::vcovCL} function.
\item Square covariance matrix
\item Function which returns a covariance matrix (e.g., \code{stats::vcov(model)})
}}
\item{type}{string indicates the type (scale) of the predictions used to
compute contrasts or slopes. This can differ based on the model
type, but will typically be a string such as: "response", "link", "probs",
or "zero". When an unsupported string is entered, the model-specific list of
acceptable values is returned in an error message. When \code{type} is \code{NULL}, the
first entry in the error message is used by default.}
}
\value{
A named square matrix of variance and covariances. The names must match the coefficient names.
}
\description{
Get a named variance-covariance matrix from a model object (internal function)
}
\keyword{internal}
marginaleffects/man/plot_comparisons.Rd 0000644 0001762 0000144 00000023143 14557277362 020066 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/plot_comparisons.R
\name{plot_comparisons}
\alias{plot_comparisons}
\title{Plot Conditional or Marginal Comparisons}
\usage{
plot_comparisons(
model,
variables = NULL,
condition = NULL,
by = NULL,
newdata = NULL,
type = "response",
vcov = NULL,
conf_level = 0.95,
wts = NULL,
comparison = "difference",
transform = NULL,
rug = FALSE,
gray = FALSE,
draw = TRUE,
...
)
}
\arguments{
\item{model}{Model object}
\item{variables}{Name of the variable whose contrast we want to plot on the y-axis.}
\item{condition}{Conditional slopes
\itemize{
\item Character vector (max length 4): Names of the predictors to display.
\item Named list (max length 4): List names correspond to predictors. List elements can be:
\itemize{
\item Numeric vector
\item Function which returns a numeric vector or a set of unique categorical values
\item Shortcut strings for common reference values: "minmax", "quartile", "threenum"
}
\item 1: x-axis. 2: color/shape. 3: facet (wrap if no fourth variable, otherwise cols of grid). 4: facet (rows of grid).
\item Numeric variables in positions 2 and 3 are summarized by Tukey's five numbers \code{?stats::fivenum}.
}}
\item{by}{Aggregate unit-level estimates (aka, marginalize, average over). Valid inputs:
\itemize{
\item \code{FALSE}: return the original unit-level estimates.
\item \code{TRUE}: aggregate estimates for each term.
\item Character vector of column names in \code{newdata} or in the data frame produced by calling the function without the \code{by} argument.
\item Data frame with a \code{by} column of group labels, and merging columns shared by \code{newdata} or the data frame produced by calling the same function without the \code{by} argument.
\item See examples below.
\item For more complex aggregations, you can use the \code{FUN} argument of the \code{hypotheses()} function. See that function's documentation and the Hypothesis Test vignettes on the \code{marginaleffects} website.
}}
\item{newdata}{When \code{newdata} is \code{NULL}, the grid is determined by the \code{condition} argument. When \code{newdata} is not \code{NULL}, the argument behaves in the same way as in the \code{comparisons()} function.}
\item{type}{string indicates the type (scale) of the predictions used to
compute contrasts or slopes. This can differ based on the model
type, but will typically be a string such as: "response", "link", "probs",
or "zero". When an unsupported string is entered, the model-specific list of
acceptable values is returned in an error message. When \code{type} is \code{NULL}, the
first entry in the error message is used by default.}
\item{vcov}{Type of uncertainty estimates to report (e.g., for robust standard errors). Acceptable values:
\itemize{
\item FALSE: Do not compute standard errors. This can speed up computation considerably.
\item TRUE: Unit-level standard errors using the default \code{vcov(model)} variance-covariance matrix.
\item String which indicates the kind of uncertainty estimates to return.
\itemize{
\item Heteroskedasticity-consistent: \code{"HC"}, \code{"HC0"}, \code{"HC1"}, \code{"HC2"}, \code{"HC3"}, \code{"HC4"}, \code{"HC4m"}, \code{"HC5"}. See \code{?sandwich::vcovHC}
\item Heteroskedasticity and autocorrelation consistent: \code{"HAC"}
\item Mixed-Models degrees of freedom: "satterthwaite", "kenward-roger"
\item Other: \code{"NeweyWest"}, \code{"KernHAC"}, \code{"OPG"}. See the \code{sandwich} package documentation.
}
\item One-sided formula which indicates the name of cluster variables (e.g., \code{~unit_id}). This formula is passed to the \code{cluster} argument of the \code{sandwich::vcovCL} function.
\item Square covariance matrix
\item Function which returns a covariance matrix (e.g., \code{stats::vcov(model)})
}}
\item{conf_level}{numeric value between 0 and 1. Confidence level to use to build a confidence interval.}
\item{wts}{string or numeric: weights to use when computing average contrasts or slopes. These weights only affect the averaging in \verb{avg_*()} or with the \code{by} argument, and not the unit-level estimates themselves. Internally, estimates and weights are passed to the \code{weighted.mean()} function.
\itemize{
\item string: column name of the weights variable in \code{newdata}. When supplying a column name to \code{wts}, it is recommended to supply the original data (including the weights variable) explicitly to \code{newdata}.
\item numeric: vector of length equal to the number of rows in the original data or in \code{newdata} (if supplied).
}}
\item{comparison}{How should pairs of predictions be compared? Difference, ratio, odds ratio, or user-defined functions.
\itemize{
\item string: shortcuts to common contrast functions.
\itemize{
\item Supported shortcuts strings: difference, differenceavg, differenceavgwts, dydx, eyex, eydx, dyex, dydxavg, eyexavg, eydxavg, dyexavg, dydxavgwts, eyexavgwts, eydxavgwts, dyexavgwts, ratio, ratioavg, ratioavgwts, lnratio, lnratioavg, lnratioavgwts, lnor, lnoravg, lnoravgwts, lift, liftavg, expdydx, expdydxavg, expdydxavgwts
\item See the Comparisons section below for definitions of each transformation.
}
\item function: accept two equal-length numeric vectors of adjusted predictions (\code{hi} and \code{lo}) and returns a vector of contrasts of the same length, or a unique numeric value.
\itemize{
\item See the Transformations section below for examples of valid functions.
}
}}
\item{transform}{string or function. Transformation applied to unit-level estimates and confidence intervals just before the function returns results. Functions must accept a vector and return a vector of the same length. Support string shortcuts: "exp", "ln"}
\item{rug}{TRUE displays tick marks on the axes to mark the distribution of raw data.}
\item{gray}{FALSE grayscale or color plot}
\item{draw}{\code{TRUE} returns a \code{ggplot2} plot. \code{FALSE} returns a \code{data.frame} of the underlying data.}
\item{...}{Additional arguments are passed to the \code{predict()} method
supplied by the modeling package.These arguments are particularly useful
for mixed-effects or bayesian models (see the online vignettes on the
\code{marginaleffects} website). Available arguments can vary from model to
model, depending on the range of supported arguments by each modeling
package. See the "Model-Specific Arguments" section of the
\code{?slopes} documentation for a non-exhaustive list of available
arguments.}
}
\value{
A \code{ggplot2} object
}
\description{
Plot comparisons on the y-axis against values of one or more predictors (x-axis, colors/shapes, and facets).
The \code{by} argument is used to plot marginal comparisons, that is, comparisons made on the original data, but averaged by subgroups. This is analogous to using the \code{by} argument in the \code{comparisons()} function.
The \code{condition} argument is used to plot conditional comparisons, that is, comparisons made on a user-specified grid. This is analogous to using the \code{newdata} argument and \code{datagrid()} function in a \code{comparisons()} call. All variables whose values are not specified explicitly are treated as usual by \code{datagrid()}, that is, they are held at their mean or mode (or rounded mean for integers). This includes grouping variables in mixed-effects models, so analysts who fit such models may want to specify the groups of interest using the \code{condition} argument, or supply model-specific arguments to compute population-level estimates. See details below.
See the "Plots" vignette and website for tutorials and information on how to customize plots:
\itemize{
\item https://marginaleffects.com/vignettes/plot.html
\item https://marginaleffects.com
}
}
\section{Model-Specific Arguments}{
Some model types allow model-specific arguments to modify the nature of
marginal effects, predictions, marginal means, and contrasts. Please report
other package-specific \code{predict()} arguments on Github so we can add them to
the table below.
https://github.com/vincentarelbundock/marginaleffects/issues\tabular{llll}{
Package \tab Class \tab Argument \tab Documentation \cr
\code{brms} \tab \code{brmsfit} \tab \code{ndraws} \tab \link[brms:posterior_predict.brmsfit]{brms::posterior_predict} \cr
\tab \tab \code{re_formula} \tab \link[brms:posterior_predict.brmsfit]{brms::posterior_predict} \cr
\code{lme4} \tab \code{merMod} \tab \code{re.form} \tab \link[lme4:predict.merMod]{lme4::predict.merMod} \cr
\tab \tab \code{allow.new.levels} \tab \link[lme4:predict.merMod]{lme4::predict.merMod} \cr
\code{glmmTMB} \tab \code{glmmTMB} \tab \code{re.form} \tab \link[glmmTMB:predict.glmmTMB]{glmmTMB::predict.glmmTMB} \cr
\tab \tab \code{allow.new.levels} \tab \link[glmmTMB:predict.glmmTMB]{glmmTMB::predict.glmmTMB} \cr
\tab \tab \code{zitype} \tab \link[glmmTMB:predict.glmmTMB]{glmmTMB::predict.glmmTMB} \cr
\code{mgcv} \tab \code{bam} \tab \code{exclude} \tab \link[mgcv:predict.bam]{mgcv::predict.bam} \cr
\code{robustlmm} \tab \code{rlmerMod} \tab \code{re.form} \tab \link[robustlmm:rlmerMod-class]{robustlmm::predict.rlmerMod} \cr
\tab \tab \code{allow.new.levels} \tab \link[robustlmm:rlmerMod-class]{robustlmm::predict.rlmerMod} \cr
\code{MCMCglmm} \tab \code{MCMCglmm} \tab \code{ndraws} \tab \cr
}
}
\examples{
mod <- lm(mpg ~ hp * drat * factor(am), data = mtcars)
plot_comparisons(mod, variables = "hp", condition = "drat")
plot_comparisons(mod, variables = "hp", condition = c("drat", "am"))
plot_comparisons(mod, variables = "hp", condition = list("am", "drat" = 3:5))
plot_comparisons(mod, variables = "am", condition = list("hp", "drat" = range))
plot_comparisons(mod, variables = "am", condition = list("hp", "drat" = "threenum"))
}
marginaleffects/man/sanitize_model_specific.Rd 0000644 0001762 0000144 00000013625 14554070071 021333 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/methods_aod.R, R/methods_betareg.R,
% R/sanity_model.R, R/methods_brms.R, R/methods_dbarts.R, R/methods_glmmTMB.R,
% R/methods_inferences_simulation.R, R/methods_mclogit.R, R/methods_mlogit.R,
% R/methods_ordinal.R, R/methods_plm.R, R/methods_quantreg.R,
% R/methods_survey.R
\name{sanitize_model_specific.glimML}
\alias{sanitize_model_specific.glimML}
\alias{sanitize_model_specific.betareg}
\alias{sanitize_model_specific}
\alias{sanitize_model_specific.default}
\alias{sanitize_model_specific.brmsfit}
\alias{sanitize_model_specific.bart}
\alias{sanitize_model_specific.glmmTMB}
\alias{sanitize_model_specific.inferences_simulation}
\alias{sanitize_model_specific.mblogit}
\alias{sanitize_model_specific.mlogit}
\alias{sanitize_model_specific.clm}
\alias{sanitize_model_specific.plm}
\alias{sanitize_model_specific.rqs}
\alias{sanitize_model_specific.svyolr}
\alias{sanitize_model_specific.svyglm}
\title{Method to raise model-specific warnings and errors}
\usage{
\method{sanitize_model_specific}{glimML}(model, ...)
\method{sanitize_model_specific}{betareg}(model, ...)
sanitize_model_specific(model, ...)
\method{sanitize_model_specific}{default}(
model,
vcov = NULL,
calling_function = "marginaleffects",
...
)
\method{sanitize_model_specific}{brmsfit}(model, ...)
\method{sanitize_model_specific}{bart}(model, ...)
\method{sanitize_model_specific}{glmmTMB}(
model,
vcov = NULL,
calling_function = "marginaleffects",
...
)
\method{sanitize_model_specific}{inferences_simulation}(model, vcov = FALSE, ...)
\method{sanitize_model_specific}{mblogit}(model, calling_function = "marginaleffects", ...)
\method{sanitize_model_specific}{mlogit}(model, newdata, ...)
\method{sanitize_model_specific}{clm}(model, ...)
\method{sanitize_model_specific}{plm}(model, ...)
\method{sanitize_model_specific}{plm}(model, ...)
\method{sanitize_model_specific}{rqs}(model, ...)
\method{sanitize_model_specific}{svyolr}(model, wts = NULL, ...)
\method{sanitize_model_specific}{svyglm}(model, wts = NULL, ...)
}
\arguments{
\item{model}{Model object}
\item{...}{Additional arguments are passed to the \code{predict()} method
supplied by the modeling package.These arguments are particularly useful
for mixed-effects or bayesian models (see the online vignettes on the
\code{marginaleffects} website). Available arguments can vary from model to
model, depending on the range of supported arguments by each modeling
package. See the "Model-Specific Arguments" section of the
\code{?slopes} documentation for a non-exhaustive list of available
arguments.}
\item{vcov}{Type of uncertainty estimates to report (e.g., for robust standard errors). Acceptable values:
\itemize{
\item FALSE: Do not compute standard errors. This can speed up computation considerably.
\item TRUE: Unit-level standard errors using the default \code{vcov(model)} variance-covariance matrix.
\item String which indicates the kind of uncertainty estimates to return.
\itemize{
\item Heteroskedasticity-consistent: \code{"HC"}, \code{"HC0"}, \code{"HC1"}, \code{"HC2"}, \code{"HC3"}, \code{"HC4"}, \code{"HC4m"}, \code{"HC5"}. See \code{?sandwich::vcovHC}
\item Heteroskedasticity and autocorrelation consistent: \code{"HAC"}
\item Mixed-Models degrees of freedom: "satterthwaite", "kenward-roger"
\item Other: \code{"NeweyWest"}, \code{"KernHAC"}, \code{"OPG"}. See the \code{sandwich} package documentation.
}
\item One-sided formula which indicates the name of cluster variables (e.g., \code{~unit_id}). This formula is passed to the \code{cluster} argument of the \code{sandwich::vcovCL} function.
\item Square covariance matrix
\item Function which returns a covariance matrix (e.g., \code{stats::vcov(model)})
}}
\item{newdata}{Grid of predictor values at which we evaluate the slopes.
\itemize{
\item Warning: Please avoid modifying your dataset between fitting the model and calling a \code{marginaleffects} function. This can sometimes lead to unexpected results.
\item \code{NULL} (default): Unit-level slopes for each observed value in the dataset (empirical distribution). The dataset is retrieved using \code{\link[insight:get_data]{insight::get_data()}}, which tries to extract data from the environment. This may produce unexpected results if the original data frame has been altered since fitting the model.
\item \code{\link[=datagrid]{datagrid()}} call to specify a custom grid of regressors. For example:
\itemize{
\item \code{newdata = datagrid(cyl = c(4, 6))}: \code{cyl} variable equal to 4 and 6 and other regressors fixed at their means or modes.
\item See the Examples section and the \code{\link[=datagrid]{datagrid()}} documentation.
}
\item string:
\itemize{
\item "mean": Marginal Effects at the Mean. Slopes when each predictor is held at its mean or mode.
\item "median": Marginal Effects at the Median. Slopes when each predictor is held at its median or mode.
\item "marginalmeans": Marginal Effects at Marginal Means. See Details section below.
\item "tukey": Marginal Effects at Tukey's 5 numbers.
\item "grid": Marginal Effects on a grid of representative numbers (Tukey's 5 numbers and unique values of categorical predictors).
}
}}
\item{wts}{string or numeric: weights to use when computing average contrasts or slopes. These weights only affect the averaging in \verb{avg_*()} or with the \code{by} argument, and not the unit-level estimates themselves. Internally, estimates and weights are passed to the \code{weighted.mean()} function.
\itemize{
\item string: column name of the weights variable in \code{newdata}. When supplying a column name to \code{wts}, it is recommended to supply the original data (including the weights variable) explicitly to \code{newdata}.
\item numeric: vector of length equal to the number of rows in the original data or in \code{newdata} (if supplied).
}}
}
\value{
A warning, an error, or nothing
}
\description{
Method to raise model-specific warnings and errors
}
\keyword{internal}
marginaleffects/man/get_predict.Rd 0000644 0001762 0000144 00000020115 14543163156 016746 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/get_predict.R, R/methods_MASS.R,
% R/methods_MCMCglmm.R, R/methods_afex.R, R/methods_aod.R,
% R/methods_betareg.R, R/methods_bife.R, R/methods_biglm.R, R/methods_nnet.R,
% R/methods_brglm2.R, R/methods_brms.R, R/methods_crch.R, R/methods_dbarts.R,
% R/methods_fixest.R, R/methods_gamlss.R, R/methods_glmmTMB.R,
% R/methods_inferences_simulation.R, R/methods_lme4.R, R/methods_mclogit.R,
% R/methods_mhurdle.R, R/methods_mlogit.R, R/methods_mlr3.R,
% R/methods_ordinal.R, R/methods_quantreg.R, R/methods_rms.R,
% R/methods_robustlmm.R, R/methods_rstanarm.R, R/methods_stats.R,
% R/methods_survey.R, R/methods_survival.R, R/methods_tidymodels.R,
% R/methods_tobit1.R
\name{get_predict}
\alias{get_predict}
\alias{get_predict.default}
\alias{get_predict.polr}
\alias{get_predict.glmmPQL}
\alias{get_predict.MCMCglmm}
\alias{get_predict.afex_aov}
\alias{get_predict.glimML}
\alias{get_predict.betareg}
\alias{get_predict.bife}
\alias{get_predict.biglm}
\alias{get_predict.multinom}
\alias{get_predict.brmultinom}
\alias{get_predict.brmsfit}
\alias{get_predict.crch}
\alias{get_predict.bart}
\alias{get_predict.fixest}
\alias{get_predict.gamlss}
\alias{get_predict.glmmTMB}
\alias{get_predict.inferences_simulation}
\alias{get_predict.merMod}
\alias{get_predict.lmerModLmerTest}
\alias{get_predict.lmerMod}
\alias{get_predict.mblogit}
\alias{get_predict.mhurdle}
\alias{get_predict.mlogit}
\alias{get_predict.Learner}
\alias{get_predict.clm}
\alias{get_predict.rq}
\alias{get_predict.rms}
\alias{get_predict.orm}
\alias{get_predict.lrm}
\alias{get_predict.ols}
\alias{get_predict.rlmerMod}
\alias{get_predict.stanreg}
\alias{get_predict.lm}
\alias{get_predict.glm}
\alias{get_predict.svyolr}
\alias{get_predict.coxph}
\alias{get_predict.model_fit}
\alias{get_predict.workflow}
\alias{get_predict.tobit1}
\title{Get predicted values from a model object (internal function)}
\usage{
get_predict(model, newdata, type, ...)
\method{get_predict}{default}(model, newdata = insight::get_data(model), type = "response", ...)
\method{get_predict}{polr}(model, newdata = insight::get_data(model), type = "probs", ...)
\method{get_predict}{glmmPQL}(model, newdata = insight::get_data(model), type = "response", ...)
\method{get_predict}{MCMCglmm}(model, newdata, type = "response", ndraws = 1000, ...)
\method{get_predict}{afex_aov}(model, newdata = NULL, ...)
\method{get_predict}{glimML}(model, newdata = insight::get_data(model), type = "response", ...)
\method{get_predict}{betareg}(model, newdata, ...)
\method{get_predict}{bife}(model, newdata = insight::get_data(model), type = "response", ...)
\method{get_predict}{biglm}(model, newdata = insight::get_data(model), type = "response", ...)
\method{get_predict}{multinom}(model, newdata = insight::get_data(model), type = "probs", ...)
\method{get_predict}{brmultinom}(model, newdata = insight::get_data(model), type = "probs", ...)
\method{get_predict}{brmsfit}(model, newdata = insight::get_data(model), type = "response", ...)
\method{get_predict}{crch}(model, newdata = NULL, type = "location", ...)
\method{get_predict}{bart}(model, newdata = NULL, ...)
\method{get_predict}{fixest}(model, newdata = insight::get_data(model), type = "response", ...)
\method{get_predict}{gamlss}(model, newdata = insight::get_data(model), type = "response", ...)
\method{get_predict}{glmmTMB}(model, newdata = insight::get_data(model), type = "response", ...)
\method{get_predict}{inferences_simulation}(model, newdata, ...)
\method{get_predict}{merMod}(model, newdata = insight::get_data(model), type = "response", ...)
\method{get_predict}{lmerModLmerTest}(model, newdata = insight::get_data(model), type = "response", ...)
\method{get_predict}{lmerMod}(model, newdata = insight::get_data(model), type = "response", ...)
\method{get_predict}{mblogit}(model, newdata = insight::get_data(model), type = "response", ...)
\method{get_predict}{mhurdle}(model, newdata = insight::get_data(model), type = "response", ...)
\method{get_predict}{mlogit}(model, newdata, ...)
\method{get_predict}{Learner}(model, newdata, type = NULL, ...)
\method{get_predict}{clm}(model, newdata = insight::get_data(model), type = "prob", ...)
\method{get_predict}{rq}(model, newdata = insight::get_data(model), type = NULL, ...)
\method{get_predict}{rms}(model, newdata = insight::get_data(model), type = NULL, ...)
\method{get_predict}{orm}(model, newdata = insight::get_data(model), type = NULL, ...)
\method{get_predict}{lrm}(model, newdata = insight::get_data(model), type = NULL, ...)
\method{get_predict}{ols}(model, newdata = insight::get_data(model), type = NULL, ...)
\method{get_predict}{rlmerMod}(model, newdata = insight::get_data(model), type = "response", ...)
\method{get_predict}{stanreg}(model, newdata = insight::get_data(model), type = "response", ...)
\method{get_predict}{lm}(model, newdata = insight::get_data(model), type = "response", ...)
\method{get_predict}{glm}(model, newdata = insight::get_data(model), type = "response", ...)
\method{get_predict}{svyolr}(model, newdata = insight::get_data(model), type = "probs", ...)
\method{get_predict}{coxph}(model, newdata = insight::get_data(model), type = "lp", ...)
\method{get_predict}{model_fit}(model, newdata, type = NULL, ...)
\method{get_predict}{workflow}(model, newdata, type = NULL, ...)
\method{get_predict}{tobit1}(model, newdata = insight::get_data(model), type = "response", ...)
}
\arguments{
\item{model}{Model object}
\item{newdata}{Grid of predictor values at which we evaluate the slopes.
\itemize{
\item Warning: Please avoid modifying your dataset between fitting the model and calling a \code{marginaleffects} function. This can sometimes lead to unexpected results.
\item \code{NULL} (default): Unit-level slopes for each observed value in the dataset (empirical distribution). The dataset is retrieved using \code{\link[insight:get_data]{insight::get_data()}}, which tries to extract data from the environment. This may produce unexpected results if the original data frame has been altered since fitting the model.
\item \code{\link[=datagrid]{datagrid()}} call to specify a custom grid of regressors. For example:
\itemize{
\item \code{newdata = datagrid(cyl = c(4, 6))}: \code{cyl} variable equal to 4 and 6 and other regressors fixed at their means or modes.
\item See the Examples section and the \code{\link[=datagrid]{datagrid()}} documentation.
}
\item string:
\itemize{
\item "mean": Marginal Effects at the Mean. Slopes when each predictor is held at its mean or mode.
\item "median": Marginal Effects at the Median. Slopes when each predictor is held at its median or mode.
\item "marginalmeans": Marginal Effects at Marginal Means. See Details section below.
\item "tukey": Marginal Effects at Tukey's 5 numbers.
\item "grid": Marginal Effects on a grid of representative numbers (Tukey's 5 numbers and unique values of categorical predictors).
}
}}
\item{type}{string indicates the type (scale) of the predictions used to
compute contrasts or slopes. This can differ based on the model
type, but will typically be a string such as: "response", "link", "probs",
or "zero". When an unsupported string is entered, the model-specific list of
acceptable values is returned in an error message. When \code{type} is \code{NULL}, the
first entry in the error message is used by default.}
\item{...}{Additional arguments are passed to the \code{predict()} method
supplied by the modeling package.These arguments are particularly useful
for mixed-effects or bayesian models (see the online vignettes on the
\code{marginaleffects} website). Available arguments can vary from model to
model, depending on the range of supported arguments by each modeling
package. See the "Model-Specific Arguments" section of the
\code{?slopes} documentation for a non-exhaustive list of available
arguments.}
}
\value{
A data.frame of predicted values with a number of rows equal to the
number of rows in \code{newdata} and columns "rowid" and "estimate". A "group"
column is added for multivariate models or models with categorical outcomes.
}
\description{
Get predicted values from a model object (internal function)
}
\keyword{internal}
marginaleffects/man/slopes.Rd 0000644 0001762 0000144 00000053346 14554076657 016011 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/slopes.R
\name{slopes}
\alias{slopes}
\alias{avg_slopes}
\title{Slopes (aka Partial derivatives, Marginal Effects, or Trends)}
\usage{
slopes(
model,
newdata = NULL,
variables = NULL,
type = NULL,
by = FALSE,
vcov = TRUE,
conf_level = 0.95,
slope = "dydx",
wts = NULL,
hypothesis = NULL,
equivalence = NULL,
p_adjust = NULL,
df = Inf,
eps = NULL,
numderiv = "fdforward",
...
)
avg_slopes(
model,
newdata = NULL,
variables = NULL,
type = NULL,
by = TRUE,
vcov = TRUE,
conf_level = 0.95,
slope = "dydx",
wts = NULL,
hypothesis = NULL,
equivalence = NULL,
p_adjust = NULL,
df = Inf,
eps = NULL,
numderiv = "fdforward",
...
)
}
\arguments{
\item{model}{Model object}
\item{newdata}{Grid of predictor values at which we evaluate the slopes.
\itemize{
\item Warning: Please avoid modifying your dataset between fitting the model and calling a \code{marginaleffects} function. This can sometimes lead to unexpected results.
\item \code{NULL} (default): Unit-level slopes for each observed value in the dataset (empirical distribution). The dataset is retrieved using \code{\link[insight:get_data]{insight::get_data()}}, which tries to extract data from the environment. This may produce unexpected results if the original data frame has been altered since fitting the model.
\item \code{\link[=datagrid]{datagrid()}} call to specify a custom grid of regressors. For example:
\itemize{
\item \code{newdata = datagrid(cyl = c(4, 6))}: \code{cyl} variable equal to 4 and 6 and other regressors fixed at their means or modes.
\item See the Examples section and the \code{\link[=datagrid]{datagrid()}} documentation.
}
\item string:
\itemize{
\item "mean": Marginal Effects at the Mean. Slopes when each predictor is held at its mean or mode.
\item "median": Marginal Effects at the Median. Slopes when each predictor is held at its median or mode.
\item "marginalmeans": Marginal Effects at Marginal Means. See Details section below.
\item "tukey": Marginal Effects at Tukey's 5 numbers.
\item "grid": Marginal Effects on a grid of representative numbers (Tukey's 5 numbers and unique values of categorical predictors).
}
}}
\item{variables}{Focal variables
\itemize{
\item \code{NULL}: compute slopes or comparisons for all the variables in the model object (can be slow).
\item Character vector: subset of variables (usually faster).
}}
\item{type}{string indicates the type (scale) of the predictions used to
compute contrasts or slopes. This can differ based on the model
type, but will typically be a string such as: "response", "link", "probs",
or "zero". When an unsupported string is entered, the model-specific list of
acceptable values is returned in an error message. When \code{type} is \code{NULL}, the
first entry in the error message is used by default.}
\item{by}{Aggregate unit-level estimates (aka, marginalize, average over). Valid inputs:
\itemize{
\item \code{FALSE}: return the original unit-level estimates.
\item \code{TRUE}: aggregate estimates for each term.
\item Character vector of column names in \code{newdata} or in the data frame produced by calling the function without the \code{by} argument.
\item Data frame with a \code{by} column of group labels, and merging columns shared by \code{newdata} or the data frame produced by calling the same function without the \code{by} argument.
\item See examples below.
\item For more complex aggregations, you can use the \code{FUN} argument of the \code{hypotheses()} function. See that function's documentation and the Hypothesis Test vignettes on the \code{marginaleffects} website.
}}
\item{vcov}{Type of uncertainty estimates to report (e.g., for robust standard errors). Acceptable values:
\itemize{
\item FALSE: Do not compute standard errors. This can speed up computation considerably.
\item TRUE: Unit-level standard errors using the default \code{vcov(model)} variance-covariance matrix.
\item String which indicates the kind of uncertainty estimates to return.
\itemize{
\item Heteroskedasticity-consistent: \code{"HC"}, \code{"HC0"}, \code{"HC1"}, \code{"HC2"}, \code{"HC3"}, \code{"HC4"}, \code{"HC4m"}, \code{"HC5"}. See \code{?sandwich::vcovHC}
\item Heteroskedasticity and autocorrelation consistent: \code{"HAC"}
\item Mixed-Models degrees of freedom: "satterthwaite", "kenward-roger"
\item Other: \code{"NeweyWest"}, \code{"KernHAC"}, \code{"OPG"}. See the \code{sandwich} package documentation.
}
\item One-sided formula which indicates the name of cluster variables (e.g., \code{~unit_id}). This formula is passed to the \code{cluster} argument of the \code{sandwich::vcovCL} function.
\item Square covariance matrix
\item Function which returns a covariance matrix (e.g., \code{stats::vcov(model)})
}}
\item{conf_level}{numeric value between 0 and 1. Confidence level to use to build a confidence interval.}
\item{slope}{string indicates the type of slope or (semi-)elasticity to compute:
\itemize{
\item "dydx": dY/dX
\item "eyex": dY/dX * Y / X
\item "eydx": dY/dX * Y
\item "dyex": dY/dX / X
\item Y is the predicted value of the outcome; X is the observed value of the predictor.
}}
\item{wts}{string or numeric: weights to use when computing average contrasts or slopes. These weights only affect the averaging in \verb{avg_*()} or with the \code{by} argument, and not the unit-level estimates themselves. Internally, estimates and weights are passed to the \code{weighted.mean()} function.
\itemize{
\item string: column name of the weights variable in \code{newdata}. When supplying a column name to \code{wts}, it is recommended to supply the original data (including the weights variable) explicitly to \code{newdata}.
\item numeric: vector of length equal to the number of rows in the original data or in \code{newdata} (if supplied).
}}
\item{hypothesis}{specify a hypothesis test or custom contrast using a numeric value, vector, or matrix, a string, or a string formula.
\itemize{
\item Numeric:
\itemize{
\item Single value: the null hypothesis used in the computation of Z and p (before applying \code{transform}).
\item Vector: Weights to compute a linear combination of (custom contrast between) estimates. Length equal to the number of rows generated by the same function call, but without the \code{hypothesis} argument.
\item Matrix: Each column is a vector of weights, as describe above, used to compute a distinct linear combination of (contrast between) estimates. The column names of the matrix are used as labels in the output.
}
\item String formula to specify linear or non-linear hypothesis tests. If the \code{term} column uniquely identifies rows, terms can be used in the formula. Otherwise, use \code{b1}, \code{b2}, etc. to identify the position of each parameter. The \verb{b*} wildcard can be used to test hypotheses on all estimates. Examples:
\itemize{
\item \code{hp = drat}
\item \code{hp + drat = 12}
\item \code{b1 + b2 + b3 = 0}
\item \verb{b* / b1 = 1}
}
\item String:
\itemize{
\item "pairwise": pairwise differences between estimates in each row.
\item "reference": differences between the estimates in each row and the estimate in the first row.
\item "sequential": difference between an estimate and the estimate in the next row.
\item "revpairwise", "revreference", "revsequential": inverse of the corresponding hypotheses, as described above.
}
\item See the Examples section below and the vignette: https://marginaleffects.com/vignettes/hypothesis.html
}}
\item{equivalence}{Numeric vector of length 2: bounds used for the two-one-sided test (TOST) of equivalence, and for the non-inferiority and non-superiority tests. See Details section below.}
\item{p_adjust}{Adjust p-values for multiple comparisons: "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", or "fdr". See \link[stats:p.adjust]{stats::p.adjust}}
\item{df}{Degrees of freedom used to compute p values and confidence intervals. A single numeric value between 1 and \code{Inf}. When \code{df} is \code{Inf}, the normal distribution is used. When \code{df} is finite, the \code{t} distribution is used. See \link[insight:get_df]{insight::get_df} for a convenient function to extract degrees of freedom. Ex: \code{slopes(model, df = insight::get_df(model))}}
\item{eps}{NULL or numeric value which determines the step size to use when
calculating numerical derivatives: (f(x+eps)-f(x))/eps. When \code{eps} is
\code{NULL}, the step size is 0.0001 multiplied by the difference between
the maximum and minimum values of the variable with respect to which we
are taking the derivative. Changing \code{eps} may be necessary to avoid
numerical problems in certain models.}
\item{numderiv}{string or list of strings indicating the method to use to for the numeric differentiation used in to compute delta method standard errors.
\itemize{
\item "fdforward": finite difference method with forward differences
\item "fdcenter": finite difference method with central differences (default)
\item "richardson": Richardson extrapolation method
\item Extra arguments can be specified by passing a list to the \code{numDeriv} argument, with the name of the method first and named arguments following, ex: \code{numderiv=list("fdcenter", eps = 1e-5)}. When an unknown argument is used, \code{marginaleffects} prints the list of valid arguments for each method.
}}
\item{...}{Additional arguments are passed to the \code{predict()} method
supplied by the modeling package.These arguments are particularly useful
for mixed-effects or bayesian models (see the online vignettes on the
\code{marginaleffects} website). Available arguments can vary from model to
model, depending on the range of supported arguments by each modeling
package. See the "Model-Specific Arguments" section of the
\code{?slopes} documentation for a non-exhaustive list of available
arguments.}
}
\value{
A \code{data.frame} with one row per observation (per term/group) and several columns:
\itemize{
\item \code{rowid}: row number of the \code{newdata} data frame
\item \code{type}: prediction type, as defined by the \code{type} argument
\item \code{group}: (optional) value of the grouped outcome (e.g., categorical outcome models)
\item \code{term}: the variable whose marginal effect is computed
\item \code{dydx}: slope of the outcome with respect to the term, for a given combination of predictor values
\item \code{std.error}: standard errors computed by via the delta method.
\item \code{p.value}: p value associated to the \code{estimate} column. The null is determined by the \code{hypothesis} argument (0 by default), and p values are computed before applying the \code{transform} argument. For models of class \code{feglm}, \code{Gam}, \code{glm} and \code{negbin}, p values are computed on the link scale by default unless the \code{type} argument is specified explicitly.
\item \code{s.value}: Shannon information transforms of p values. How many consecutive "heads" tosses would provide the same amount of evidence (or "surprise") against the null hypothesis that the coin is fair? The purpose of S is to calibrate the analyst's intuition about the strength of evidence encoded in p against a well-known physical phenomenon. See Greenland (2019) and Cole et al. (2020).
\item \code{conf.low}: lower bound of the confidence interval (or equal-tailed interval for bayesian models)
\item \code{conf.high}: upper bound of the confidence interval (or equal-tailed interval for bayesian models)
}
See \code{?print.marginaleffects} for printing options.
}
\description{
Partial derivative of the regression equation with respect to a regressor of interest.
\itemize{
\item \code{slopes()}: unit-level (conditional) estimates.
\item \code{avg_slopes()}: average (marginal) estimates.
}
The \code{newdata} argument and the \code{datagrid()} function can be used to control where statistics are evaluated in the predictor space: "at observed values", "at the mean", "at representative values", etc.
See the slopes vignette and package website for worked examples and case studies:
\itemize{
\item \url{https://marginaleffects.com/vignettes/slopes.html}
\item \url{https://marginaleffects.com/}
}
}
\details{
A "slope" or "marginal effect" is the partial derivative of the regression equation
with respect to a variable in the model. This function uses automatic
differentiation to compute slopes for a vast array of models,
including non-linear models with transformations (e.g., polynomials).
Uncertainty estimates are computed using the delta method.
Numerical derivatives for the \code{slopes} function are calculated
using a simple epsilon difference approach: \eqn{\partial Y / \partial X = (f(X + \varepsilon/2) - f(X-\varepsilon/2)) / \varepsilon}{dY/dX = (f(X + e/2) - f(X-e/2)) / e},
where f is the \code{predict()} method associated with the model class, and
\eqn{\varepsilon}{e} is determined by the \code{eps} argument.
}
\section{Functions}{
\itemize{
\item \code{avg_slopes()}: Average slopes
}}
\section{Standard errors using the delta method}{
Standard errors for all quantities estimated by \code{marginaleffects} can be obtained via the delta method. This requires differentiating a function with respect to the coefficients in the model using a finite difference approach. In some models, the delta method standard errors can be sensitive to various aspects of the numeric differentiation strategy, including the step size. By default, the step size is set to \code{1e-8}, or to \code{1e-4} times the smallest absolute model coefficient, whichever is largest.
\code{marginaleffects} can delegate numeric differentiation to the \code{numDeriv} package, which allows more flexibility. To do this, users can pass arguments to the \code{numDeriv::jacobian} function through a global option. For example:
\itemize{
\item \code{options(marginaleffects_numDeriv = list(method = "simple", method.args = list(eps = 1e-6)))}
\item \code{options(marginaleffects_numDeriv = list(method = "Richardson", method.args = list(eps = 1e-5)))}
\item \code{options(marginaleffects_numDeriv = NULL)}
}
See the "Standard Errors and Confidence Intervals" vignette on the \code{marginaleffects} website for more details on the computation of standard errors:
https://marginaleffects.com/vignettes/uncertainty.html
Note that the \code{inferences()} function can be used to compute uncertainty estimates using a bootstrap or simulation-based inference. See the vignette:
https://marginaleffects.com/vignettes/bootstrap.html
}
\section{Model-Specific Arguments}{
Some model types allow model-specific arguments to modify the nature of
marginal effects, predictions, marginal means, and contrasts. Please report
other package-specific \code{predict()} arguments on Github so we can add them to
the table below.
https://github.com/vincentarelbundock/marginaleffects/issues\tabular{llll}{
Package \tab Class \tab Argument \tab Documentation \cr
\code{brms} \tab \code{brmsfit} \tab \code{ndraws} \tab \link[brms:posterior_predict.brmsfit]{brms::posterior_predict} \cr
\tab \tab \code{re_formula} \tab \link[brms:posterior_predict.brmsfit]{brms::posterior_predict} \cr
\code{lme4} \tab \code{merMod} \tab \code{re.form} \tab \link[lme4:predict.merMod]{lme4::predict.merMod} \cr
\tab \tab \code{allow.new.levels} \tab \link[lme4:predict.merMod]{lme4::predict.merMod} \cr
\code{glmmTMB} \tab \code{glmmTMB} \tab \code{re.form} \tab \link[glmmTMB:predict.glmmTMB]{glmmTMB::predict.glmmTMB} \cr
\tab \tab \code{allow.new.levels} \tab \link[glmmTMB:predict.glmmTMB]{glmmTMB::predict.glmmTMB} \cr
\tab \tab \code{zitype} \tab \link[glmmTMB:predict.glmmTMB]{glmmTMB::predict.glmmTMB} \cr
\code{mgcv} \tab \code{bam} \tab \code{exclude} \tab \link[mgcv:predict.bam]{mgcv::predict.bam} \cr
\code{robustlmm} \tab \code{rlmerMod} \tab \code{re.form} \tab \link[robustlmm:rlmerMod-class]{robustlmm::predict.rlmerMod} \cr
\tab \tab \code{allow.new.levels} \tab \link[robustlmm:rlmerMod-class]{robustlmm::predict.rlmerMod} \cr
\code{MCMCglmm} \tab \code{MCMCglmm} \tab \code{ndraws} \tab \cr
}
}
\section{Bayesian posterior summaries}{
By default, credible intervals in bayesian models are built as equal-tailed
intervals. This can be changed to a highest density interval by setting a global
option:
\code{options("marginaleffects_posterior_interval" = "eti")}
\code{options("marginaleffects_posterior_interval" = "hdi")}
By default, the center of the posterior distribution in bayesian models is
identified by the median. Users can use a different summary function by setting a
global option:
\code{options("marginaleffects_posterior_center" = "mean")}
\code{options("marginaleffects_posterior_center" = "median")}
When estimates are averaged using the \code{by} argument, the \code{tidy()} function, or
the \code{summary()} function, the posterior distribution is marginalized twice over.
First, we take the average \emph{across} units but \emph{within} each iteration of the
MCMC chain, according to what the user requested in \code{by} argument or
\code{tidy()/summary()} functions. Then, we identify the center of the resulting
posterior using the function supplied to the
\code{"marginaleffects_posterior_center"} option (the median by default).
}
\section{Equivalence, Inferiority, Superiority}{
\eqn{\theta} is an estimate, \eqn{\sigma_\theta} its estimated standard error, and \eqn{[a, b]} are the bounds of the interval supplied to the \code{equivalence} argument.
Non-inferiority:
\itemize{
\item \eqn{H_0}{H0}: \eqn{\theta \leq a}{\theta <= a}
\item \eqn{H_1}{H1}: \eqn{\theta > a}
\item \eqn{t=(\theta - a)/\sigma_\theta}{t=(\theta - a)/\sigma_\theta}
\item p: Upper-tail probability
}
Non-superiority:
\itemize{
\item \eqn{H_0}{H0}: \eqn{\theta \geq b}{\theta >= b}
\item \eqn{H_1}{H1}: \eqn{\theta < b}
\item \eqn{t=(\theta - b)/\sigma_\theta}{t=(\theta - b)/\sigma_\theta}
\item p: Lower-tail probability
}
Equivalence: Two One-Sided Tests (TOST)
\itemize{
\item p: Maximum of the non-inferiority and non-superiority p values.
}
Thanks to Russell V. Lenth for the excellent \code{emmeans} package and documentation which inspired this feature.
}
\section{Prediction types}{
The \code{type} argument determines the scale of the predictions used to compute quantities of interest with functions from the \code{marginaleffects} package. Admissible values for \code{type} depend on the model object. When users specify an incorrect value for \code{type}, \code{marginaleffects} will raise an informative error with a list of valid \code{type} values for the specific model object. The first entry in the list in that error message is the default type.
The \code{invlink(link)} is a special type defined by \code{marginaleffects}. It is available for some (but not all) models and functions. With this link type, we first compute predictions on the link scale, then we use the inverse link function to backtransform the predictions to the response scale. This is useful for models with non-linear link functions as it can ensure that confidence intervals stay within desirable bounds, ex: 0 to 1 for a logit model. Note that an average of estimates with \code{type="invlink(link)"} will not always be equivalent to the average of estimates with \code{type="response"}.
Some of the most common \code{type} values are:
response, link, E, Ep, average, class, conditional, count, cum.prob, cumprob, density, detection, disp, ev, expected, expvalue, fitted, invlink(link), latent, latent_N, linear.predictor, linpred, location, lp, mean, numeric, p, ppd, pr, precision, prediction, prob, probability, probs, quantile, risk, scale, survival, unconditional, utility, variance, xb, zero, zlink, zprob
}
\examples{
\dontshow{if (interactive() || isTRUE(Sys.getenv("R_DOC_BUILD") == "true")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
\dontshow{\}) # examplesIf}
# Unit-level (conditional) Marginal Effects
mod <- glm(am ~ hp * wt, data = mtcars, family = binomial)
mfx <- slopes(mod)
head(mfx)
# Average Marginal Effect (AME)
avg_slopes(mod, by = TRUE)
# Marginal Effect at the Mean (MEM)
slopes(mod, newdata = datagrid())
# Marginal Effect at User-Specified Values
# Variables not explicitly included in `datagrid()` are held at their means
slopes(mod, newdata = datagrid(hp = c(100, 110)))
# Group-Average Marginal Effects (G-AME)
# Calculate marginal effects for each observation, and then take the average
# marginal effect within each subset of observations with different observed
# values for the `cyl` variable:
mod2 <- lm(mpg ~ hp * cyl, data = mtcars)
avg_slopes(mod2, variables = "hp", by = "cyl")
# Marginal Effects at User-Specified Values (counterfactual)
# Variables not explicitly included in `datagrid()` are held at their
# original values, and the whole dataset is duplicated once for each
# combination of the values in `datagrid()`
mfx <- slopes(mod,
newdata = datagrid(hp = c(100, 110),
grid_type = "counterfactual"))
head(mfx)
# Heteroskedasticity robust standard errors
mfx <- slopes(mod, vcov = sandwich::vcovHC(mod))
head(mfx)
# hypothesis test: is the `hp` marginal effect at the mean equal to the `drat` marginal effect
mod <- lm(mpg ~ wt + drat, data = mtcars)
slopes(
mod,
newdata = "mean",
hypothesis = "wt = drat")
# same hypothesis test using row indices
slopes(
mod,
newdata = "mean",
hypothesis = "b1 - b2 = 0")
# same hypothesis test using numeric vector of weights
slopes(
mod,
newdata = "mean",
hypothesis = c(1, -1))
# two custom contrasts using a matrix of weights
lc <- matrix(c(
1, -1,
2, 3),
ncol = 2)
colnames(lc) <- c("Contrast A", "Contrast B")
slopes(
mod,
newdata = "mean",
hypothesis = lc)
}
\references{
\itemize{
\item Greenland S. 2019. "Valid P-Values Behave Exactly as They Should: Some Misleading Criticisms of P-Values and Their Resolution With S-Values." The American Statistician. 73(S1): 106–114.
\item Cole, Stephen R, Jessie K Edwards, and Sander Greenland. 2020. "Surprise!" American Journal of Epidemiology 190 (2): 191–93. https://doi.org/10.1093/aje/kwaa136
}
}
marginaleffects/man/get_group_names.Rd 0000644 0001762 0000144 00000004432 14543163156 017637 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/get_group_names.R, R/methods_MASS.R,
% R/methods_nnet.R, R/methods_brglm2.R, R/methods_brms.R, R/methods_mclogit.R,
% R/methods_mlm.R, R/methods_ordinal.R, R/methods_pscl.R, R/methods_survey.R
\name{get_group_names}
\alias{get_group_names}
\alias{get_group_names.default}
\alias{get_group_names.polr}
\alias{get_group_names.multinom}
\alias{get_group_names.bracl}
\alias{get_group_names.brmsfit}
\alias{get_group_names.mblogit}
\alias{get_group_names.mlm}
\alias{get_group_names.clm}
\alias{get_group_names.hurdle}
\alias{get_group_names.svyolr}
\title{Get levels of the outcome variable in grouped or multivariate models}
\usage{
get_group_names(model, ...)
\method{get_group_names}{default}(model, ...)
\method{get_group_names}{polr}(model, ...)
\method{get_group_names}{multinom}(model, ...)
\method{get_group_names}{bracl}(model, ...)
\method{get_group_names}{brmsfit}(model, ...)
\method{get_group_names}{mblogit}(model, type, ...)
\method{get_group_names}{mlm}(model, ...)
\method{get_group_names}{clm}(model, ...)
\method{get_group_names}{hurdle}(model, type = "count", ...)
\method{get_group_names}{svyolr}(model, ...)
}
\arguments{
\item{model}{Model object}
\item{...}{Additional arguments are passed to the \code{predict()} method
supplied by the modeling package.These arguments are particularly useful
for mixed-effects or bayesian models (see the online vignettes on the
\code{marginaleffects} website). Available arguments can vary from model to
model, depending on the range of supported arguments by each modeling
package. See the "Model-Specific Arguments" section of the
\code{?slopes} documentation for a non-exhaustive list of available
arguments.}
\item{type}{string indicates the type (scale) of the predictions used to
compute contrasts or slopes. This can differ based on the model
type, but will typically be a string such as: "response", "link", "probs",
or "zero". When an unsupported string is entered, the model-specific list of
acceptable values is returned in an error message. When \code{type} is \code{NULL}, the
first entry in the error message is used by default.}
}
\value{
A character vector
}
\description{
Get levels of the outcome variable in grouped or multivariate models
}
\keyword{internal}
marginaleffects/man/figures/ 0000755 0001762 0000144 00000000000 14541720224 015624 5 ustar ligges users marginaleffects/man/figures/zoo_banner.png 0000755 0001762 0000144 00003122432 14541720224 020500 0 ustar ligges users PNG
IHDR ; ~ F,zTXtRaw profile type exif xڥY丮d57QZ5~MUqH`е )?Rs柷=|_~q?o_%5}g-_ߟO7A+֟7j(>o?ouV?ؿ?oX~כ[/1ěcs;x)?Wu^w}^_/mg}K-r++goϿ~}\ѿ}ߪ߹gn[<uGKw[bՓ][fha'z>0S'/X
Sb}|7_ߧ+Zs]x\, ߟEMgL.X{? 0]BYހ%";BwyY٠Υ?};Rz1f>~4?Ry|uH1٬MOy+6SLoJ)jjߜr%^%\J^c}kZk=-VZmgvӝ>;5(6|;̳:Yq+j1.}/}+_?/v-ٵ/,_-寷Ir0H`Nj[A?]>{"{;;<vԝڷo]ns][yv.g\Hy}}oyWgŽ^ƷS_z;㎼2gFXJ+PGfo[w.{Ln\aO(M
c8f3^
ҸcbvnϺ"6wn.*>seyb}{Թ5>9ڭJ zb+wO_~`0Dp'c/i.o|K.;d%~'
~0wzYŪދoY[sD"E7e9:NhG9owau\+oФr9VŴ'?ߔ6pO
|m㍹q+x玹Fnn5Y ~n8 D0ܯֱ˲|M@L՞
L|-,yQ^j
$Xxޡ%J7ǐzXg
hDL,U~>pÎKaג6aΗ{KĂOƢЌxx4׃!
X=_55U *u5n~="+x*s%|o6^߳@{M97X ,?d