performance/ 0000755 0001762 0000144 00000000000 13620350016 012547 5 ustar ligges users performance/NAMESPACE 0000644 0001762 0000144 00000024233 13620212654 013777 0 ustar ligges users # Generated by roxygen2: do not edit by hand
S3method(as.data.frame,check_outliers)
S3method(as.numeric,check_outliers)
S3method(check_autocorrelation,default)
S3method(check_collinearity,MixMod)
S3method(check_collinearity,default)
S3method(check_collinearity,glmmTMB)
S3method(check_collinearity,hurdle)
S3method(check_collinearity,zerocount)
S3method(check_collinearity,zeroinfl)
S3method(check_convergence,default)
S3method(check_convergence,merMod)
S3method(check_distribution,default)
S3method(check_distribution,numeric)
S3method(check_heteroscedasticity,default)
S3method(check_homogeneity,default)
S3method(check_model,default)
S3method(check_normality,default)
S3method(check_outliers,data.frame)
S3method(check_outliers,default)
S3method(check_outliers,glmmTMB)
S3method(check_outliers,numeric)
S3method(check_overdispersion,fixest)
S3method(check_overdispersion,glm)
S3method(check_overdispersion,glmmTMB)
S3method(check_overdispersion,glmx)
S3method(check_overdispersion,merMod)
S3method(check_overdispersion,negbin)
S3method(check_singularity,MixMod)
S3method(check_singularity,clmm)
S3method(check_singularity,cpglmm)
S3method(check_singularity,default)
S3method(check_singularity,glmmTMB)
S3method(check_singularity,glmmadmb)
S3method(check_singularity,lme)
S3method(check_singularity,merMod)
S3method(check_singularity,rlmerMod)
S3method(cronbachs_alpha,data.frame)
S3method(cronbachs_alpha,parameters_pca)
S3method(icc,brmsfit)
S3method(icc,default)
S3method(logLik,cpglm)
S3method(logLik,iv_robust)
S3method(logLik,ivreg)
S3method(logLik,plm)
S3method(model_performance,DirichletRegModel)
S3method(model_performance,MixMod)
S3method(model_performance,betareg)
S3method(model_performance,brmsfit)
S3method(model_performance,censReg)
S3method(model_performance,clm)
S3method(model_performance,clm2)
S3method(model_performance,coxph)
S3method(model_performance,felm)
S3method(model_performance,fixest)
S3method(model_performance,flexsurvreg)
S3method(model_performance,glm)
S3method(model_performance,glmmTMB)
S3method(model_performance,glmmadmb)
S3method(model_performance,glmx)
S3method(model_performance,hurdle)
S3method(model_performance,iv_robust)
S3method(model_performance,ivreg)
S3method(model_performance,lavaan)
S3method(model_performance,lm)
S3method(model_performance,lme)
S3method(model_performance,merMod)
S3method(model_performance,mixed)
S3method(model_performance,mixor)
S3method(model_performance,mlogit)
S3method(model_performance,multinom)
S3method(model_performance,plm)
S3method(model_performance,polr)
S3method(model_performance,rlmerMod)
S3method(model_performance,stanreg)
S3method(model_performance,survreg)
S3method(model_performance,svyglm)
S3method(model_performance,truncreg)
S3method(model_performance,vglm)
S3method(performance_aicc,default)
S3method(performance_aicc,vglm)
S3method(performance_logloss,brmsfit)
S3method(performance_logloss,default)
S3method(performance_lrt,default)
S3method(plot,check_collinearity)
S3method(plot,check_distribution)
S3method(plot,check_distribution_numeric)
S3method(plot,check_heteroscedasticity)
S3method(plot,check_homogeneity)
S3method(plot,check_normality)
S3method(plot,check_outliers)
S3method(plot,compare_performance)
S3method(plot,performance_roc)
S3method(print,binned_residuals)
S3method(print,check_collinearity)
S3method(print,check_distribution)
S3method(print,check_distribution_numeric)
S3method(print,check_itemscale)
S3method(print,check_model)
S3method(print,check_outliers)
S3method(print,check_overdisp)
S3method(print,check_zi)
S3method(print,compare_performance)
S3method(print,icc)
S3method(print,icc_decomposed)
S3method(print,item_difficulty)
S3method(print,looic)
S3method(print,perf_pca)
S3method(print,perf_pca_rotate)
S3method(print,performance_accuracy)
S3method(print,performance_hosmer)
S3method(print,performance_lrt)
S3method(print,performance_model)
S3method(print,performance_pcp)
S3method(print,performance_roc)
S3method(print,performance_score)
S3method(print,r2_bayes)
S3method(print,r2_generic)
S3method(print,r2_nakagawa)
S3method(r2,BBreg)
S3method(r2,DirichletRegModel)
S3method(r2,MixMod)
S3method(r2,betareg)
S3method(r2,bigglm)
S3method(r2,biglm)
S3method(r2,bracl)
S3method(r2,brmsfit)
S3method(r2,brmultinom)
S3method(r2,censReg)
S3method(r2,clm)
S3method(r2,clm2)
S3method(r2,clmm)
S3method(r2,complmrob)
S3method(r2,coxph)
S3method(r2,cpglm)
S3method(r2,cpglmm)
S3method(r2,crch)
S3method(r2,default)
S3method(r2,feis)
S3method(r2,felm)
S3method(r2,fixest)
S3method(r2,glm)
S3method(r2,glmmTMB)
S3method(r2,glmmadmb)
S3method(r2,glmx)
S3method(r2,hurdle)
S3method(r2,iv_robust)
S3method(r2,ivreg)
S3method(r2,lm)
S3method(r2,lme)
S3method(r2,lmrob)
S3method(r2,mclogit)
S3method(r2,merMod)
S3method(r2,mixed)
S3method(r2,mlogit)
S3method(r2,mmclogit)
S3method(r2,multinom)
S3method(r2,plm)
S3method(r2,polr)
S3method(r2,rlmerMod)
S3method(r2,stanreg)
S3method(r2,survreg)
S3method(r2,svyglm)
S3method(r2,truncreg)
S3method(r2,vgam)
S3method(r2,vglm)
S3method(r2,wbm)
S3method(r2,zeroinfl)
S3method(r2,zerotrunc)
S3method(r2_coxsnell,BBreg)
S3method(r2_coxsnell,DirichletRegModel)
S3method(r2_coxsnell,censReg)
S3method(r2_coxsnell,clm)
S3method(r2_coxsnell,clm2)
S3method(r2_coxsnell,coxph)
S3method(r2_coxsnell,cpglm)
S3method(r2_coxsnell,crch)
S3method(r2_coxsnell,glm)
S3method(r2_coxsnell,glmx)
S3method(r2_coxsnell,mclogit)
S3method(r2_coxsnell,multinom)
S3method(r2_coxsnell,polr)
S3method(r2_coxsnell,survreg)
S3method(r2_coxsnell,truncreg)
S3method(r2_efron,default)
S3method(r2_mcfadden,bracl)
S3method(r2_mcfadden,brmultinom)
S3method(r2_mcfadden,censReg)
S3method(r2_mcfadden,clm)
S3method(r2_mcfadden,clm2)
S3method(r2_mcfadden,cpglm)
S3method(r2_mcfadden,glm)
S3method(r2_mcfadden,glmx)
S3method(r2_mcfadden,mclogit)
S3method(r2_mcfadden,mlogit)
S3method(r2_mcfadden,multinom)
S3method(r2_mcfadden,polr)
S3method(r2_mcfadden,truncreg)
S3method(r2_mcfadden,vglm)
S3method(r2_mckelvey,default)
S3method(r2_nagelkerke,BBreg)
S3method(r2_nagelkerke,DirichletRegModel)
S3method(r2_nagelkerke,bracl)
S3method(r2_nagelkerke,brmultinom)
S3method(r2_nagelkerke,censReg)
S3method(r2_nagelkerke,clm)
S3method(r2_nagelkerke,clm2)
S3method(r2_nagelkerke,coxph)
S3method(r2_nagelkerke,cpglm)
S3method(r2_nagelkerke,crch)
S3method(r2_nagelkerke,glm)
S3method(r2_nagelkerke,glmx)
S3method(r2_nagelkerke,mclogit)
S3method(r2_nagelkerke,multinom)
S3method(r2_nagelkerke,polr)
S3method(r2_nagelkerke,survreg)
S3method(r2_nagelkerke,truncreg)
S3method(residuals,iv_robust)
export(binned_residuals)
export(check_autocorrelation)
export(check_collinearity)
export(check_convergence)
export(check_distribution)
export(check_heteroscedasticity)
export(check_homogeneity)
export(check_itemscale)
export(check_model)
export(check_normality)
export(check_outliers)
export(check_overdispersion)
export(check_singularity)
export(check_zeroinflation)
export(compare_performance)
export(cronbachs_alpha)
export(icc)
export(item_difficulty)
export(item_intercor)
export(item_reliability)
export(item_split_half)
export(looic)
export(model_performance)
export(mse)
export(performance)
export(performance_accuracy)
export(performance_aic)
export(performance_aicc)
export(performance_hosmer)
export(performance_logloss)
export(performance_lrt)
export(performance_mse)
export(performance_pcp)
export(performance_rmse)
export(performance_roc)
export(performance_rse)
export(performance_score)
export(r2)
export(r2_bayes)
export(r2_coxsnell)
export(r2_efron)
export(r2_kullback)
export(r2_loo)
export(r2_mcfadden)
export(r2_mckelvey)
export(r2_nagelkerke)
export(r2_nakagawa)
export(r2_tjur)
export(r2_xu)
export(r2_zeroinflated)
export(rmse)
importFrom(bayestestR,area_under_curve)
importFrom(bayestestR,bayesfactor_models)
importFrom(bayestestR,ci)
importFrom(bayestestR,estimate_density)
importFrom(bayestestR,hdi)
importFrom(bayestestR,map_estimate)
importFrom(insight,all_models_equal)
importFrom(insight,clean_names)
importFrom(insight,find_algorithm)
importFrom(insight,find_formula)
importFrom(insight,find_parameters)
importFrom(insight,find_predictors)
importFrom(insight,find_response)
importFrom(insight,format_table)
importFrom(insight,get_data)
importFrom(insight,get_predictors)
importFrom(insight,get_response)
importFrom(insight,get_varcov)
importFrom(insight,get_variance)
importFrom(insight,get_variance_residual)
importFrom(insight,has_intercept)
importFrom(insight,is_model)
importFrom(insight,is_model_supported)
importFrom(insight,is_multivariate)
importFrom(insight,model_info)
importFrom(insight,n_obs)
importFrom(insight,print_color)
importFrom(stats,AIC)
importFrom(stats,BIC)
importFrom(stats,IQR)
importFrom(stats,anova)
importFrom(stats,as.formula)
importFrom(stats,bartlett.test)
importFrom(stats,binomial)
importFrom(stats,coef)
importFrom(stats,complete.cases)
importFrom(stats,cooks.distance)
importFrom(stats,cor)
importFrom(stats,cov)
importFrom(stats,cov2cor)
importFrom(stats,dbinom)
importFrom(stats,density)
importFrom(stats,df.residual)
importFrom(stats,dnbinom)
importFrom(stats,dpois)
importFrom(stats,family)
importFrom(stats,fitted)
importFrom(stats,fligner.test)
importFrom(stats,formula)
importFrom(stats,glm)
importFrom(stats,lm)
importFrom(stats,logLik)
importFrom(stats,mad)
importFrom(stats,mahalanobis)
importFrom(stats,median)
importFrom(stats,model.frame)
importFrom(stats,model.matrix)
importFrom(stats,na.omit)
importFrom(stats,nobs)
importFrom(stats,pchisq)
importFrom(stats,pf)
importFrom(stats,pnbinom)
importFrom(stats,ppoints)
importFrom(stats,ppois)
importFrom(stats,predict)
importFrom(stats,predict.glm)
importFrom(stats,qchisq)
importFrom(stats,qf)
importFrom(stats,qnorm)
importFrom(stats,quantile)
importFrom(stats,reshape)
importFrom(stats,residuals)
importFrom(stats,rstandard)
importFrom(stats,rstudent)
importFrom(stats,sd)
importFrom(stats,setNames)
importFrom(stats,shapiro.test)
importFrom(stats,terms)
importFrom(stats,update)
importFrom(stats,var)
importFrom(stats,vcov)
importFrom(stats,weights)
importFrom(stats,xtabs)
importFrom(utils,packageVersion)
performance/README.md 0000644 0001762 0000144 00000030256 13620205622 014036 0 ustar ligges users
# performance
[](https://cran.r-project.org/package=performance)
[](https://travis-ci.org/easystats/performance)
[](https://cran.r-project.org/package=performance)
***Test if your model is a good model\!***
The primary goal of the **performance** package is to provide utilities
for computing **indices of model quality** and **goodness of fit**. This
includes measures like r-squared (R2), root mean squared error (RMSE) or
intraclass correlation coefficient (ICC) , but also functions to check
(mixed) models for overdispersion, zero-inflation, convergence or
singularity.
## Installation
Run the following:
``` r
install.packages("performance")
```
``` r
library("performance")
```
# Examples
[](https://easystats.github.io/performance/)
[](https://easystats.github.io/performance/reference/index.html)
[](https://easystats.github.io/blog/posts/)
## Assessing model quality
### R-squared
**performance** has a generic `r2()` function, which computes the
r-squared for many different models, including mixed effects and
Bayesian regression models.
`r2()` returns a list containing values related to the “most
appropriate” r-squared for the given model.
``` r
model <- lm(mpg ~ wt + cyl, data = mtcars)
r2(model)
#> # R2 for Linear Regression
#>
#> R2: 0.830
#> adj. R2: 0.819
model <- glm(am ~ wt + cyl, data = mtcars, family = binomial)
r2(model)
#> $R2_Tjur
#> Tjur's R2
#> 0.7051
library(MASS)
data(housing)
model <- polr(Sat ~ Infl + Type + Cont, weights = Freq, data = housing)
r2(model)
#> $R2_Nagelkerke
#> Nagelkerke's R2
#> 0.1084
```
The different r-squared measures can also be accessed directly via
functions like `r2_bayes()`, `r2_coxsnell()` or `r2_nagelkerke()` (see a
full list of functions
[here](https://easystats.github.io/performance/reference/index.html)).
For mixed models, the *conditional* and *marginal* r-squared are
returned. The *marginal r-squared* considers only the variance of the
fixed effects and indicates how much of the model’s variance is
explained by the fixed effects part only. The *conditional r-squared*
takes both the fixed and random effects into account and indicates how
much of the model’s variance is explained by the “complete” model.
For frequentist mixed models, `r2()` (resp. `r2_nakagawa()`) computes
the *mean* random effect variances, thus `r2()` is also appropriate for
mixed models with more complex random effects structures, like random
slopes or nested random effects (Johnson 2014; Nakagawa, Johnson, and
Schielzeth 2017).
``` r
library(rstanarm)
model <- stan_glmer(Petal.Length ~ Petal.Width + (1 | Species),
data = iris, cores = 4)
r2(model)
#> # Bayesian R2 with Standard Error
#>
#> Conditional R2: 0.953 [0.006]
#> Marginal R2: 0.825 [0.042]
library(lme4)
model <- lmer(Reaction ~ Days + (1 + Days | Subject), data = sleepstudy)
r2(model)
#> # R2 for Mixed Models
#>
#> Conditional R2: 0.799
#> Marginal R2: 0.279
```
### Intraclass Correlation Coefficient (ICC)
Similar to r-squared, the ICC provides information on the explained
variance and can be interpreted as “the proportion of the variance
explained by the grouping structure in the population” (Hox 2010).
`icc()` calculates the ICC for various mixed model objects, including
`stanreg` models.
``` r
library(lme4)
model <- lmer(Reaction ~ Days + (1 + Days | Subject), data = sleepstudy)
icc(model)
#> # Intraclass Correlation Coefficient
#>
#> Adjusted ICC: 0.722
#> Conditional ICC: 0.521
```
For models of class `brmsfit`, an ICC based on variance decomposition is
returned (for details, see the
[documentation](https://easystats.github.io/performance/reference/icc.html)).
``` r
library(brms)
set.seed(123)
model <- brm(mpg ~ wt + (1 | cyl) + (1 + wt | gear), data = mtcars)
```
``` r
icc(model)
#> # Random Effect Variances and ICC
#>
#> Conditioned on: all random effects
#>
#> ## Variance Ratio (comparable to ICC)
#> Ratio: 0.39 CI 95%: [-0.55 0.78]
#>
#> ## Variances of Posterior Predicted Distribution
#> Conditioned on fixed effects: 22.69 CI 95%: [ 8.42 58.44]
#> Conditioned on rand. effects: 37.71 CI 95%: [25.06 55.58]
#>
#> ## Difference in Variances
#> Difference: 14.29 CI 95%: [-19.67 35.40]
```
## Model diagnostics
### Check for overdispersion
Overdispersion occurs when the observed variance in the data is higher
than the expected variance from the model assumption (for Poisson,
variance roughly equals the mean of an outcome).
`check_overdispersion()` checks if a count model (including mixed
models) is overdispersed or not.
``` r
library(glmmTMB)
data(Salamanders)
model <- glm(count ~ spp + mined, family = poisson, data = Salamanders)
check_overdispersion(model)
#> # Overdispersion test
#>
#> dispersion ratio = 2.946
#> Pearson's Chi-Squared = 1873.710
#> p-value = < 0.001
#> Overdispersion detected.
```
Overdispersion can be fixed by either modelling the dispersion parameter
(not possible with all packages), or by choosing a different
distributional family \[like Quasi-Poisson, or negative binomial, see
(Gelman and Hill 2007).
### Check for zero-inflation
Zero-inflation (in (Quasi-)Poisson models) is indicated when the amount
of observed zeros is larger than the amount of predicted zeros, so the
model is *underfitting* zeros. In such cases, it is recommended to use
negative binomial or zero-inflated models.
Use `check_zeroinflation()` to check if zero-inflation is present in the
fitted model.
``` r
model <- glm(count ~ spp + mined, family = poisson, data = Salamanders)
check_zeroinflation(model)
#> # Check for zero-inflation
#>
#> Observed zeros: 387
#> Predicted zeros: 298
#> Ratio: 0.77
#> Model is underfitting zeros (probable zero-inflation).
```
### Check for singular model fits
A “singular” model fit means that some dimensions of the
variance-covariance matrix have been estimated as exactly zero. This
often occurs for mixed models with overly complex random effects
structures.
`check_singularity()` checks mixed models (of class `lme`, `merMod`,
`glmmTMB` or `MixMod`) for singularity, and returns `TRUE` if the model
fit is singular.
``` r
library(lme4)
data(sleepstudy)
# prepare data
set.seed(123)
sleepstudy$mygrp <- sample(1:5, size = 180, replace = TRUE)
sleepstudy$mysubgrp <- NA
for (i in 1:5) {
filter_group <- sleepstudy$mygrp == i
sleepstudy$mysubgrp[filter_group] <- sample(1:30, size = sum(filter_group),
replace = TRUE)
}
# fit strange model
model <- lmer(Reaction ~ Days + (1 | mygrp/mysubgrp) + (1 | Subject),
data = sleepstudy)
check_singularity(model)
#> [1] TRUE
```
Remedies to cure issues with singular fits can be found
[here](https://easystats.github.io/performance/reference/check_singularity.html).
## Comprehensive model check
**performance** provides many functions to check model assumptions, like
`check_collinearity()`, `check_normality()` or
`check_heteroscedasticity()`. To get a comprehensive check, use
`check_model()`.
``` r
model <- lm(mpg ~ wt * cyl + gear, data = mtcars)
check_model(model)
```

## Model performance summaries
`model_performance()` computes indices of model performance for
regression models. Depending on the model object, typical indices might
be r-squared, AIC, BIC, RMSE, ICC or LOOIC.
### Linear model
``` r
m1 <- lm(mpg ~ wt + cyl, data = mtcars)
model_performance(m1)
```
| AIC | BIC | R2 | R2\_adjusted | RMSE |
| --: | ----: | ---: | -----------: | ---: |
| 156 | 161.9 | 0.83 | 0.82 | 2.44 |
### Logistic regression
``` r
m2 <- glm(vs ~ wt + mpg, data = mtcars, family = "binomial")
model_performance(m2)
```
| AIC | BIC | R2\_Tjur | RMSE | LOGLOSS | SCORE\_LOG | SCORE\_SPHERICAL | PCP |
| ---: | ---: | -------: | ---: | ------: | ---------: | ---------------: | ---: |
| 31.3 | 35.7 | 0.48 | 0.89 | 0.4 | \-14.9 | 0.09 | 0.74 |
### Linear mixed model
``` r
library(lme4)
m3 <- lmer(Reaction ~ Days + (1 + Days | Subject), data = sleepstudy)
model_performance(m3)
```
| AIC | BIC | R2\_conditional | R2\_marginal | ICC | RMSE |
| ---: | ---: | --------------: | -----------: | ---: | ----: |
| 1756 | 1775 | 0.8 | 0.28 | 0.72 | 23.44 |
### Comparing different models
``` r
counts <- c(18, 17, 15, 20, 10, 20, 25, 13, 12)
outcome <- gl(3, 1, 9)
treatment <- gl(3, 3)
m4 <- glm(counts ~ outcome + treatment, family = poisson())
compare_performance(m1, m2, m3, m4)
#> # Comparison of Model Performance Indices
#>
#> Model | Type | AIC | BIC | RMSE | SCORE_LOG | SCORE_SPHERICAL | R2 | R2_adjusted | R2_Tjur | LOGLOSS | PCP | R2_conditional | R2_marginal | ICC | R2_Nagelkerke
#> -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#> m1 | lm | 156.01 | 161.87 | 2.44 | | | 0.83 | 0.82 | | | | | | |
#> m2 | glm | 31.30 | 35.70 | 0.89 | -14.90 | 0.10 | | | 0.48 | 0.40 | 0.74 | | | |
#> m3 | lmerMod | 1755.63 | 1774.79 | 23.44 | | | | | | | | 0.80 | 0.28 | 0.72 |
#> m4 | glm | 56.76 | 57.75 | 0.76 | -2.60 | 0.32 | | | | | | | | | 0.66
```
### Comparing different models, ordered by model performance
``` r
compare_performance(m1, m2, m3, m4, rank = TRUE)
#> # Comparison of Model Performance Indices
#>
#> Model | Type | AIC | BIC | RMSE | Performance_Score
#> ---------------------------------------------------------------
#> m2 | glm | 31.30 | 35.70 | 0.89 | 99.80%
#> m4 | glm | 56.76 | 57.75 | 0.76 | 99.09%
#> m1 | lm | 156.01 | 161.87 | 2.44 | 92.69%
#> m3 | lmerMod | 1755.63 | 1774.79 | 23.44 | 0.00%
#>
#> Model m2 (of class glm) performed best with an overall performance score of 99.80%.
```
``` r
plot(compare_performance(m1, m2, m3, m4, rank = TRUE))
```

# References
Gelman, Andrew, and Jennifer Hill. 2007. *Data Analysis Using Regression
and Multilevel/Hierarchical Models*. Analytical Methods for Social
Research. Cambridge ; New York: Cambridge University Press.
Hox, J. J. 2010. *Multilevel Analysis: Techniques and Applications*. 2nd
ed. Quantitative Methodology Series. New York: Routledge.
Johnson, Paul C. D. 2014. “Extension of Nakagawa & Schielzeth’s R2 GLMM
to Random Slopes Models.” Edited by Robert B. O’Hara. *Methods in
Ecology and Evolution* 5 (9): 944–46.
.
Nakagawa, Shinichi, Paul C. D. Johnson, and Holger Schielzeth. 2017.
“The Coefficient of Determination R2 and Intra-Class Correlation
Coefficient from Generalized Linear Mixed-Effects Models Revisited and
Expanded.” *Journal of the Royal Society Interface* 14 (134): 20170213.
.
performance/man/ 0000755 0001762 0000144 00000000000 13620205622 013324 5 ustar ligges users performance/man/item_reliability.Rd 0000644 0001762 0000144 00000003252 13565530445 017160 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/item_reliability.R
\name{item_reliability}
\alias{item_reliability}
\title{Reliability Test for Items or Scales}
\usage{
item_reliability(x, standardize = FALSE, digits = 3)
}
\arguments{
\item{x}{A matrix or a data frame.}
\item{standardize}{Logical, if \code{TRUE}, the data frame's vectors will be
standardized. Recommended when the variables have different measures / scales.}
\item{digits}{Amount of digits for returned values.}
}
\value{
A data frame with the corrected item-total correlations (\emph{item
discrimination}, column \code{item_discrimination}) and Cronbach's Alpha
(if item deleted, column \code{alpha_if_deleted}) for each item
of the scale, or \code{NULL} if data frame had too less columns.
}
\description{
Compute various measures of internal consistencies
for tests or item-scales of questionnaires.
}
\details{
This function calculates the item discriminations (corrected item-total
correlations for each item of \code{x} with the remaining items) and
the Cronbach's alpha for each item, if it was deleted from the scale.
The absolute value of the item discrimination indices should be
above 0.1. An index between 0.1 and 0.3 is considered as "fair",
while an index above 0.3 (or below -0.3) is "good". Items with
low discrimination indices are often ambiguously worded and
should be examined. Items with negative indices should be
examined to determine why a negative value was obtained (e.g.
reversed answer categories regarding positive and negative poles).
}
\examples{
data(mtcars)
x <- mtcars[, c("cyl", "gear", "carb", "hp")]
item_reliability(x)
}
performance/man/model_performance.lavaan.Rd 0000644 0001762 0000144 00000010564 13611777057 020563 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/model_performance.lavaan.R
\name{model_performance.lavaan}
\alias{model_performance.lavaan}
\title{Performance of lavaan SEM / CFA Models}
\usage{
\method{model_performance}{lavaan}(model, metrics = "all", ...)
}
\arguments{
\item{model}{A \code{lavaan} model.}
\item{metrics}{Can be \code{"all"} or a character vector of metrics to be computed (some of \code{c("Chisq", "Chisq_DoF", "Chisq_p", "Baseline", "Baseline_DoF", "Baseline_p", "CFI", "TLI", "NNFI", "RFI", "NFI", "PNFI", "IFI", "RNI", "Loglikelihood", "AIC", "BIC", "BIC_adjusted", "RMSEA", "SMRM")}).}
\item{...}{Arguments passed to or from other methods.}
}
\value{
A data frame (with one row) and one column per "index" (see \code{metrics}).
}
\description{
Compute indices of model performance for SEM or CFA models from the \code{lavaan} package.
}
\details{
\subsection{Indices of fit}{
\itemize{
\item \strong{Chisq}: The model Chi-squared assesses overall fit and the discrepancy between the sample and fitted covariance matrices. Its p-value should be > .05 (i.e., the hypothesis of a perfect fit cannot be rejected). However, it is quite sensitive to sample size.
\item \strong{GFI/AGFI}: The (Adjusted) Goodness of Fit is the proportion of variance accounted for by the estimated population covariance. Analogous to R2. The GFI and the AGFI should be > .95 and > .90, respectively.
\item \strong{NFI/NNFI/TLI}: The (Non) Normed Fit Index. An NFI of 0.95, indicates the model of interest improves the fit by 95\% relative to the null model. The NNFI (also called the Tucker Lewis index; TLI) is preferable for smaller samples. They should be > .90 (Byrne, 1994) or > .95 (Schumacker & Lomax, 2004).
\item \strong{CFI}: The Comparative Fit Index is a revised form of NFI. Not very sensitive to sample size (Fan, Thompson, & Wang, 1999). Compares the fit of a target model to the fit of an independent, or null, model. It should be > .90.
\item \strong{RMSEA}: The Root Mean Square Error of Approximation is a parsimony-adjusted index. Values closer to 0 represent a good fit. It should be < .08 or < .05. The p-value printed with it tests the hypothesis that RMSEA is less than or equal to .05 (a cutoff sometimes used for good fit), and thus should be not significant.
\item \strong{RMR/SRMR}: the (Standardized) Root Mean Square Residual represents the square-root of the difference between the residuals of the sample covariance matrix and the hypothesized model. As the RMR can be sometimes hard to interpret, better to use SRMR. Should be < .08.
\item \strong{RFI}: the Relative Fit Index, also known as RHO1, is not guaranteed to vary from 0 to 1. However, RFI close to 1 indicates a good fit.
\item \strong{IFI}: the Incremental Fit Index (IFI) adjusts the Normed Fit Index (NFI) for sample size and degrees of freedom (Bollen's, 1989). Over 0.90 is a good fit, but the index can exceed 1.
\item \strong{PNFI}: the Parsimony-Adjusted Measures Index. There is no commonly agreed-upon cutoff value for an acceptable model for this index. Should be > 0.50.
}
See the documentation for \code{\link[lavaan]{fitmeasures}}.
}
\subsection{What to report}{
Kline (2015) suggests that at a minimum the following indices should be reported: The model \strong{chi-square}, the \strong{RMSEA}, the \strong{CFI} and the \strong{SRMR}.
}
}
\examples{
# Confirmatory Factor Analysis (CFA) ---------
if (require("lavaan")) {
structure <- " visual =~ x1 + x2 + x3
textual =~ x4 + x5 + x6
speed =~ x7 + x8 + x9 "
model <- lavaan::cfa(structure, data = HolzingerSwineford1939)
model_performance(model)
}
}
\references{
\itemize{
\item Byrne, B. M. (1994). Structural equation modeling with EQS and EQS/Windows. Thousand Oaks, CA: Sage Publications.
\item Tucker, L. R., \& Lewis, C. (1973). The reliability coefficient for maximum likelihood factor analysis. Psychometrika, 38, 1-10.
\item Schumacker, R. E., \& Lomax, R. G. (2004). A beginner's guide to structural equation modeling, Second edition. Mahwah, NJ: Lawrence Erlbaum Associates.
\item Fan, X., B. Thompson, \& L. Wang (1999). Effects of sample size, estimation method, and model specification on structural equation modeling fit indexes. Structural Equation Modeling, 6, 56-83.
\item Kline, R. B. (2015). Principles and practice of structural equation modeling. Guilford publications.
}
}
performance/man/r2_tjur.Rd 0000644 0001762 0000144 00000001645 13565530445 015224 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/r2_tjur.R
\name{r2_tjur}
\alias{r2_tjur}
\title{Tjur's R2 - coefficient of determination (D)}
\usage{
r2_tjur(model)
}
\arguments{
\item{model}{Binomial Model.}
}
\value{
A named vector with the R2 value.
}
\description{
This method calculates the Coefficient of Discrimination \code{D}
(also known as Tjur's R2; \cite{Tjur, 2009}) for generalized linear (mixed) models
for binary outcomes. It is an alternative to other pseudo-R2 values like
Nagelkerke's R2 or Cox-Snell R2. The Coefficient of Discrimination \code{D}
can be read like any other (pseudo-)R2 value.
}
\examples{
model <- glm(vs ~ wt + mpg, data = mtcars, family = "binomial")
r2_tjur(model)
}
\references{
Tjur, T. (2009). Coefficients of determination in logistic regression models - A new proposal: The coefficient of discrimination. The American Statistician, 63(4), 366-372.
}
performance/man/r2_bayes.Rd 0000644 0001762 0000144 00000003707 13611777057 015350 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/r2_bayes.R
\name{r2_bayes}
\alias{r2_bayes}
\title{Bayesian R2}
\usage{
r2_bayes(model, robust = TRUE, ci = 0.89)
}
\arguments{
\item{model}{A Bayesian regression model.}
\item{robust}{Logical, if \code{TRUE}, the median instead of mean is used to
calculate the central tendency of the variances.}
\item{ci}{Value or vector of probability of the CI (between 0 and 1) to be estimated.}
}
\value{
A list with the Bayesian R2 value. For mixed models, a list with the
Bayesian R2 value and the marginal Bayesian R2 value. The standard errors
and credible intervals for the R2 values are saved as attributes.
}
\description{
Compute R2 for Bayesian models. For mixed models (including a random part),
it additionally computes the R2 related to the fixed effects only (marginal R2).
}
\details{
\code{r2_bayes()} returns an "unadjusted" R2 value. See \code{\link{r2_loo}}
to calculate a LOO-adjusted R2, which comes conceptionally closer to an
adjusted R2 measure.
\cr \cr
For mixed models, the conditional and marginal R2 are returned. The marginal
R2 considers only the variance of the fixed effects, while the conditional
R2 takes both the fixed and random effects into account.
}
\examples{
library(performance)
if (require("rstanarm")) {
model <- stan_glm(mpg ~ wt + cyl, data = mtcars, chains = 1, iter = 500, refresh = 0)
r2_bayes(model)
model <- stan_lmer(
Petal.Length ~ Petal.Width + (1 | Species),
data = iris,
chains = 1,
iter = 500,
refresh = 0
)
r2_bayes(model)
}
\dontrun{
if (require("brms")) {
model <- brms::brm(mpg ~ wt + cyl, data = mtcars)
r2_bayes(model)
model <- brms::brm(Petal.Length ~ Petal.Width + (1 | Species), data = iris)
r2_bayes(model)
}
}
}
\references{
Gelman, A., Goodrich, B., Gabry, J., & Vehtari, A. (2018). R-squared for Bayesian regression models. The American Statistician, 1–6. \doi{10.1080/00031305.2018.1549100}
}
performance/man/check_singularity.Rd 0000644 0001762 0000144 00000006221 13611777057 017343 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/check_singularity.R
\name{check_singularity}
\alias{check_singularity}
\title{Check mixed models for boundary fits}
\usage{
check_singularity(x, tolerance = 1e-05, ...)
}
\arguments{
\item{x}{A mixed model.}
\item{tolerance}{Indicates up to which value the convergence result is
accepted. The larger \code{tolerance} is, the stricter the test
will be.}
\item{...}{Currently not used.}
}
\value{
\code{TRUE} if the model fit is singular.
}
\description{
Check mixed models for boundary fits.
}
\details{
If a model is "singular", this means that some dimensions of the
variance-covariance matrix have been estimated as exactly zero. This
often occurs for mixed models with complex random effects structures.
\cr \cr
\dQuote{While singular models are statistically well defined (it is theoretically
sensible for the true maximum likelihood estimate to correspond to a
singular fit), there are real concerns that (1) singular fits correspond
to overfitted models that may have poor power; (2) chances of numerical
problems and mis-convergence are higher for singular models (e.g. it
may be computationally difficult to compute profile confidence intervals
for such models); (3) standard inferential procedures such as Wald
statistics and likelihood ratio tests may be inappropriate.}
(\cite{lme4 Reference Manual})
\cr \cr
There is no gold-standard about how to deal with singularity and which
random-effects specification to choose. Beside using fully Bayesian methods
(with informative priors), proposals in a frequentist framework are:
\itemize{
\item avoid fitting overly complex models, such that the variance-covariance matrices can be estimated precisely enough (\cite{Matuschek et al. 2017})
\item use some form of model selection to choose a model that balances predictive accuracy and overfitting/type I error (\cite{Bates et al. 2015}, \cite{Matuschek et al. 2017})
\item \dQuote{keep it maximal}, i.e. fit the most complex model consistent with the experimental design, removing only terms required to allow a non-singular fit (\cite{Barr et al. 2013})
}
}
\examples{
if (require("lme4")) {
data(sleepstudy)
set.seed(123)
sleepstudy$mygrp <- sample(1:5, size = 180, replace = TRUE)
sleepstudy$mysubgrp <- NA
for (i in 1:5) {
filter_group <- sleepstudy$mygrp == i
sleepstudy$mysubgrp[filter_group] <-
sample(1:30, size = sum(filter_group), replace = TRUE)
}
model <- lmer(
Reaction ~ Days + (1 | mygrp / mysubgrp) + (1 | Subject),
data = sleepstudy
)
check_singularity(model)
}
}
\references{
\itemize{
\item Bates D, Kliegl R, Vasishth S, Baayen H. Parsimonious Mixed Models. arXiv:1506.04967, June 2015.
\item Barr DJ, Levy R, Scheepers C, Tily HJ. Random effects structure for confirmatory hypothesis testing: Keep it maximal. Journal of Memory and Language, 68(3):255-278, April 2013.
\item Matuschek H, Kliegl R, Vasishth S, Baayen H, Bates D. Balancing type I error and power in linear mixed models. Journal of Memory and Language, 94:305-315, 2017.
\item lme4 Reference Manual, \url{https://cran.r-project.org/package=lme4}
}
}
performance/man/performance_pcp.Rd 0000644 0001762 0000144 00000004716 13571155404 016775 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/performance_pcp.R
\name{performance_pcp}
\alias{performance_pcp}
\title{Percentage of Correct Predictions}
\usage{
performance_pcp(model, ci = 0.95, method = "Herron", verbose = TRUE)
}
\arguments{
\item{model}{Model with binary outcome.}
\item{ci}{The level of the confidence interval.}
\item{method}{Name of the method to calculate the PCP (see 'Details').
Default is \code{"Herron"}. May be abbreviated.}
\item{verbose}{Toggle off warnings.}
}
\value{
A list with several elements: the percentage of correct predictions
of the full and the null model, their confidence intervals, as well as the
chi-squared and p-value from the Likelihood-Ratio-Test between the full and
null model.
}
\description{
Percentage of correct predictions (PCP) for models
with binary outcome.
}
\details{
\code{method = "Gelman-Hill"} (or \code{"gelman_hill"}) computes the
PCP based on the proposal from \cite{Gelman and Hill 2017, 99}, which is
defined as the proportion of cases for which the deterministic prediction
is wrong, i.e. the proportion where the predicted probability is above 0.5,
although y=0 (and vice versa) (see also \cite{Herron 1999, 90}).
\cr \cr
\code{method = "Herron"} (or \code{"herron"}) computes a modified version
of the PCP (\cite{Herron 1999, 90-92}), which is the sum of predicted probabilities,
where y=1, plus the sum of 1 - predicted probabilities, where y=0, divided
by the number of observations. This approach is said to be more accurate.
\cr \cr
The PCP ranges from 0 to 1, where values closer to 1 mean that the model
predicts the outcome better than models with an PCP closer to 0. In general,
the PCP should be above 0.5 (i.e. 50\%), the closer to one, the better.
Furthermore, the PCP of the full model should be considerably above
the null model's PCP.
\cr \cr
The likelihood-ratio test indicates whether the model has a significantly
better fit than the null-model (in such cases, p < 0.05).
}
\examples{
data(mtcars)
m <- glm(formula = vs ~ hp + wt, family = binomial, data = mtcars)
performance_pcp(m)
performance_pcp(m, method = "Gelman-Hill")
}
\references{
\itemize{
\item Herron, M. (1999). Postestimation Uncertainty in Limited Dependent Variable Models. Political Analysis, 8, 83–98.
\item Gelman, A., & Hill, J. (2007). Data analysis using regression and multilevel/hierarchical models. Cambridge; New York: Cambridge University Press, 99
}
}
performance/man/check_homogeneity.Rd 0000644 0001762 0000144 00000002621 13620211155 017276 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/check_homogeneity.R
\name{check_homogeneity}
\alias{check_homogeneity}
\title{Check model for homogeneity of variances}
\usage{
check_homogeneity(x, method = c("bartlett", "fligner", "auto"), ...)
}
\arguments{
\item{x}{A linear model or an ANOVA object.}
\item{method}{Name of the method (underlying test) that should be performed
to check the homogeneity of variances. May either be \code{"bartlett"} for
the Bartlett test (assuming normal distributed samples or groups),
\code{"fligner"} for the Fligner-Killeen test (rank-based, non-parametric test),
or \code{"auto"}. In the latter case, Bartlett test is used if the model response
is normal distributed, else Fligner-Killeen test is used.}
\item{...}{Currently not used.}
}
\value{
Invisibly returns the p-value of the test statistics. A p-value
< 0.05 indicates a significant difference in the variance between the groups.
}
\description{
Check model for homogeneity of variances between groups described
by independent variables in a model.
}
\note{
There is also a \href{https://easystats.github.io/see/articles/performance.html}{\code{plot()}-method} implemented in the \href{https://easystats.github.io/see/}{\pkg{see}-package}.
}
\examples{
model <- lm(len ~ supp + dose, data = ToothGrowth)
check_homogeneity(model)
# plot results
result <- check_homogeneity(model)
plot(result)
}
performance/man/check_normality.Rd 0000644 0001762 0000144 00000002324 13620211155 016765 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/check_normality.R
\name{check_normality}
\alias{check_normality}
\title{Check model for (non-)normality of residuals.}
\usage{
check_normality(x, ...)
}
\arguments{
\item{x}{A model object.}
\item{...}{Currently not used.}
}
\value{
Invisibly returns the p-value of the test statistics. A p-value
< 0.05 indicates a significant deviation from normal distribution
}
\description{
Check model for (non-)normality of residuals.
}
\details{
\code{check_normality()} calls \code{\link[stats]{shapiro.test}}
and checks the standardized residuals for normal distribution. Note that
this formal test almost always yields significant results for the distribution
of residuals and visual inspection (e.g. Q-Q plots) are preferable.
}
\note{
There is also a \href{https://easystats.github.io/see/articles/performance.html}{\code{plot()}-method} implemented in the \href{https://easystats.github.io/see/}{\pkg{see}-package}.
}
\examples{
m <- lm(mpg ~ wt + cyl + gear + disp, data = mtcars)
check_normality(m)
# plot results
x <- check_normality(m)
plot(x)
\dontrun{
# QQ-plot
plot(check_normality(m), type = "qq")
# PP-plot
plot(check_normality(m), type = "pp")
}
}
performance/man/check_collinearity.Rd 0000644 0001762 0000144 00000005061 13620211155 017446 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/check_collinearity.R
\name{check_collinearity}
\alias{check_collinearity}
\alias{check_collinearity.glmmTMB}
\title{Check for multicollinearity of model terms}
\usage{
check_collinearity(x, ...)
\method{check_collinearity}{glmmTMB}(
x,
component = c("all", "conditional", "count", "zi", "zero_inflated"),
...
)
}
\arguments{
\item{x}{A model object (that should at least respond to \code{vcov()},
and if possible, also to \code{model.matrix()} - however, it also should
work without \code{model.matrix()}).}
\item{...}{Currently not used.}
\item{component}{For models with zero-inflation component, multicollinearity
can be checked for the conditional model (count component,
\code{component = "conditional"} or \code{component = "count"}),
zero-inflation component (\code{component = "zero_inflated"} or
\code{component = "zi"}) or both components (\code{component = "all"}).
Following model-classes are currently supported: \code{hurdle},
\code{zeroinfl}, \code{zerocount}, \code{MixMod} and \code{glmmTMB}.}
}
\value{
A data frame with three columns: The name of the model term, the
variance inflation factor and the factor by which the standard error
is increased due to possible correlation with other terms.
}
\description{
\code{check_collinearity()} checks regression models for
multicollinearity by calculating the variance inflation factor (VIF).
}
\details{
The variance inflation factor is a measure to analyze the magnitude
of multicollinearity of model terms. A VIF less than 5 indicates
a low correlation of that predictor with other predictors. A value between
5 and 10 indicates a moderate correlation, while VIF values larger than 10
are a sign for high, not tolerable correlation of model predictors. The
\emph{Increased SE} column in the output indicates how much larger
the standard error is due to the correlation with other predictors.
\cr \cr
An informative blog post about collinearity can be found
\href{https://janhove.github.io/analysis/2019/09/11/collinearity}{here}.
}
\note{
There is also a \href{https://easystats.github.io/see/articles/performance.html}{\code{plot()}-method} implemented in the \href{https://easystats.github.io/see/}{\pkg{see}-package}.
}
\examples{
m <- lm(mpg ~ wt + cyl + gear + disp, data = mtcars)
check_collinearity(m)
# plot results
x <- check_collinearity(m)
plot(x)
}
\references{
James, G., Witten, D., Hastie, T., & Tibshirani, R. (Hrsg.). (2013). An introduction to statistical learning: with applications in R. New York: Springer.
}
performance/man/performance_mse.Rd 0000644 0001762 0000144 00000001620 13616504501 016762 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/performance_mse.R
\name{performance_mse}
\alias{performance_mse}
\alias{mse}
\title{Mean Square Error of Linear Models}
\usage{
performance_mse(model, verbose = TRUE)
mse(model, verbose = TRUE)
}
\arguments{
\item{model}{A model.}
\item{verbose}{Toggle off warnings.}
}
\value{
Numeric, the mean square error of \code{model}.
}
\description{
Compute mean square error of linear models.
}
\details{
The mean square error is the mean of the sum of squared residuals,
i.e. it measures the average of the squares of the errors. Less technically
speaking, the mean square error can be considered as the variance of the
residuals, i.e. the variation in the outcome the model doesn't explain.
Lower values (closer to zero) indicate better fit.
}
\examples{
data(mtcars)
m <- lm(mpg ~ hp + gear, data = mtcars)
performance_mse(m)
}
performance/man/compare_performance.Rd 0000644 0001762 0000144 00000006406 13620211155 017626 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/compare_performance.R
\name{compare_performance}
\alias{compare_performance}
\title{Compare performance of different models}
\usage{
compare_performance(..., metrics = "all", rank = FALSE, verbose = TRUE)
}
\arguments{
\item{...}{Multiple model objects (also of different classes).}
\item{metrics}{Can be \code{"all"}, \code{"common"} or a character vector of metrics to be computed. See related \link[=model_performance]{documentation} of object's class for details.}
\item{rank}{Logical, if \code{TRUE}, models are ranked according to "best overall
model performance". See 'Details'.}
\item{verbose}{Toggle off warnings.}
}
\value{
A data frame (with one row per model) and one column per "index" (see \code{metrics}).
}
\description{
\code{compare_performance()} computes indices of model performance for
different models at once and hence allows comparison of indices across models.
}
\details{
\subsection{Bayes factor for Model Comparison}{
If all models were fit from the same data, \code{compare_performance()}
returns an additional column named \code{BF}, which shows the Bayes factor
(see \code{\link[bayestestR]{bayesfactor_models}}) for each model against
the denominator model. The \emph{first} model is used as denominator model,
and its Bayes factor is set to \code{NA} to indicate the reference model.
}
\subsection{Ranking Models}{
When \code{rank = TRUE}, a new column \code{Performance_Score} is returned. This
score ranges from 0\% to 100\%, higher values indicating better model performance.
Calculation is based on normalizing all indices (i.e. rescaling them to a
range from 0 to 1), and taking the mean value of all indices for each model.
This is a rather quick heuristic, but might be helpful as exploratory index.
\cr \cr
In particular when models are of different types (e.g. mixed models, classical
linear models, logistic regression, ...), not all indices will be computed
for each model. In case where an index can't be calculated for a specific
model type, this model gets an \code{NA} value. All indices that have any
\code{NA}s are excluded from calculating the performance score.
\cr \cr
There is a \code{plot()}-method for \code{compare_performance()},
which creates a "spiderweb" plot, where the different indices are
normalized and larger values indicate better model performance.
Hence, points closer to the center indicate worse fit indices
(see \href{https://easystats.github.io/see/articles/performance.html}{online-documentation}
for more details).
}
}
\note{
There is also a \href{https://easystats.github.io/see/articles/performance.html}{\code{plot()}-method} implemented in the \href{https://easystats.github.io/see/}{\pkg{see}-package}.
}
\examples{
if (require("lme4")) {
m1 <- lm(mpg ~ wt + cyl, data = mtcars)
m2 <- glm(vs ~ wt + mpg, data = mtcars, family = "binomial")
m3 <- lmer(Petal.Length ~ Sepal.Length + (1 | Species), data = iris)
compare_performance(m1, m2, m3)
}
data(iris)
lm1 <- lm(Sepal.Length ~ Species, data = iris)
lm2 <- lm(Sepal.Length ~ Species + Petal.Length, data = iris)
lm3 <- lm(Sepal.Length ~ Species * Petal.Length, data = iris)
compare_performance(lm1, lm2, lm3)
compare_performance(lm1, lm2, lm3, rank = TRUE)
}
performance/man/performance_score.Rd 0000644 0001762 0000144 00000004043 13616504501 017313 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/performance_score.R
\name{performance_score}
\alias{performance_score}
\title{Proper Scoring Rules}
\usage{
performance_score(model, verbose = TRUE)
}
\arguments{
\item{model}{Model with binary or count outcome.}
\item{verbose}{Toggle off warnings.}
}
\value{
A list with three elements, the logarithmic, quadratic/Brier and spherical score.
}
\description{
Calculates the logarithmic, quadratic/Brier and spherical score
from a model with binary or count outcome.
}
\details{
Proper scoring rules can be used to evaluate the quality of model
predictions and model fit. \code{performance_score()} calculates the logarithmic,
quadratic/Brier and spherical scoring rules. The spherical rule takes values
in the interval \code{[0, 1]}, with values closer to 1 indicating a more
accurate model, and the logarithmic rule in the interval \code{[-Inf, 0]},
with values closer to 0 indicating a more accurate model.
\cr \cr
For \code{stan_lmer()} and \code{stan_glmer()} models, the predicted values
are based on \code{posterior_predict()}, instead of \code{predict()}. Thus,
results may differ more than expected from their non-Bayesian counterparts
in \pkg{lme4}.
}
\note{
Code is partially based on \href{https://drizopoulos.github.io/GLMMadaptive/reference/scoring_rules.html}{GLMMadaptive::scoring_rules()}.
}
\examples{
## Dobson (1990) Page 93: Randomized Controlled Trial :
counts <- c(18, 17, 15, 20, 10, 20, 25, 13, 12)
outcome <- gl(3, 1, 9)
treatment <- gl(3, 3)
model <- glm(counts ~ outcome + treatment, family = poisson())
performance_score(model)
\dontrun{
if (require("glmmTMB")) {
data(Salamanders)
model <- glmmTMB(
count ~ spp + mined + (1 | site),
zi = ~ spp + mined,
family = nbinom2(),
data = Salamanders
)
performance_score(model)
}
}
}
\references{
Carvalho, A. (2016). An overview of applications of proper scoring rules. Decision Analysis 13, 223–242. \doi{10.1287/deca.2016.0337}
}
\seealso{
\code{\link[=performance_logloss]{performance_logloss()}}
}
performance/man/item_split_half.Rd 0000644 0001762 0000144 00000002435 13565530445 016776 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/item_split_half.R
\name{item_split_half}
\alias{item_split_half}
\title{Split-Half Reliability}
\usage{
item_split_half(x, digits = 3)
}
\arguments{
\item{x}{A matrix or a data frame.}
\item{digits}{Amount of digits for returned values.}
}
\value{
A list with two elements: the split-half reliability \code{splithalf} and
the Spearman-Brown corrected split-half reliability \code{spearmanbrown}.
}
\description{
Compute various measures of internal consistencies
for tests or item-scales of questionnaires.
}
\details{
This function calculates the split-half reliability for items in
\code{x}, including the Spearman-Brown adjustment. Splitting is done by
selecting odd versus even columns in \code{x}. A value closer to 1
indicates greater internal consistency.
}
\examples{
data(mtcars)
x <- mtcars[, c("cyl", "gear", "carb", "hp")]
item_split_half(x)
}
\references{
Spearman C. 1910. Correlation calculated from faulty data. British Journal of Psychology (3): 271-295. \doi{10.1111/j.2044-8295.1910.tb00206.x}
\cr \cr
Brown W. 1910. Some experimental results in the correlation of mental abilities. British Journal of Psychology (3): 296-322. \doi{10.1111/j.2044-8295.1910.tb00207.x}
}
performance/man/r2_xu.Rd 0000644 0001762 0000144 00000001433 13565530445 014667 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/r2_xu.R
\name{r2_xu}
\alias{r2_xu}
\title{Xu' R2 (Omega-squared)}
\usage{
r2_xu(model)
}
\arguments{
\item{model}{A linear (mixed) model.}
}
\value{
The R2 value.
}
\description{
Calculates Xu' Omega-squared value, a simple R2 equivalent for linear mixed models.
}
\details{
\code{r2_xu()} is a crude measure for the explained variance from
linear (mixed) effects models, which is originally denoted as
\ifelse{html}{\out{Ω2}}{\eqn{\Omega^2}}.
}
\examples{
model <- lm(Sepal.Length ~ Petal.Length + Species, data = iris)
r2_xu(model)
}
\references{
Xu, R. (2003). Measuring explained variation in linear mixed effects models. Statistics in Medicine, 22(22), 3527–3541. \doi{10.1002/sim.1572}
}
performance/man/classify_distribution.Rd 0000644 0001762 0000144 00000000723 13620212654 020234 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/check_distribution.R
\docType{data}
\name{classify_distribution}
\alias{classify_distribution}
\title{Machine learning model trained to classify distributions}
\format{An object of class \code{randomForest.formula} (inherits from \code{randomForest}) of length 8.}
\usage{
classify_distribution
}
\description{
Mean accuracy and Kappa of 0.86 and 0.85, repsectively.
}
\keyword{datasets}
performance/man/item_intercor.Rd 0000644 0001762 0000144 00000003531 13565530445 016474 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/item_intercor.R
\name{item_intercor}
\alias{item_intercor}
\title{Mean Inter-Item-Correlation}
\usage{
item_intercor(x, method = c("pearson", "spearman", "kendall"))
}
\arguments{
\item{x}{A matrix as returned by the \code{\link{cor}}-function,
or a data frame with items (e.g. from a test or questionnaire).}
\item{method}{Correlation computation method. May be one of
\code{"spearman"} (default), \code{"pearson"} or \code{"kendall"}.
You may use initial letter only.}
}
\value{
The mean inter-item-correlation value for \code{x}.
}
\description{
Compute various measures of internal consistencies
for tests or item-scales of questionnaires.
}
\details{
This function calculates a mean inter-item-correlation, i.e.
a correlation matrix of \code{x} will be computed (unless
\code{x} is already a matrix as returned by the \code{cor()}-function)
and the mean of the sum of all item's correlation values is returned.
Requires either a data frame or a computed \code{cor()}-object.
\cr \cr
\dQuote{Ideally, the average inter-item correlation for a set of
items should be between .20 and .40, suggesting that while the
items are reasonably homogenous, they do contain sufficiently
unique variance so as to not be isomorphic with each other.
When values are lower than .20, then the items may not be
representative of the same content domain. If values are higher than
.40, the items may be only capturing a small bandwidth of the construct.}
\cite{(Piedmont 2014)}
}
\examples{
data(mtcars)
x <- mtcars[, c("cyl", "gear", "carb", "hp")]
item_intercor(x)
}
\references{
Piedmont RL. 2014. Inter-item Correlations. In: Michalos AC (eds) Encyclopedia of Quality of Life and Well-Being Research. Dordrecht: Springer, 3303-3304. \doi{10.1007/978-94-007-0753-5_1493}
}
performance/man/r2_mcfadden.Rd 0000644 0001762 0000144 00000001667 13611777057 016011 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/r2_mcfadden.R
\name{r2_mcfadden}
\alias{r2_mcfadden}
\title{McFadden's R2}
\usage{
r2_mcfadden(model)
}
\arguments{
\item{model}{Generalized linear or multinomial logit (\code{mlogit}) model.}
}
\value{
For most models, a list with McFadden's R2 and adjusted McFadden's
R2 value. For some models, only McFadden's R2 is available.
}
\description{
Calculates McFadden's pseudo R2.
}
\examples{
if (require("mlogit")) {
data("Fishing", package = "mlogit")
Fish <- mlogit.data(Fishing, varying = c(2:9), shape = "wide", choice = "mode")
model <- mlogit(mode ~ price + catch, data = Fish)
r2_mcfadden(model)
}
}
\references{
\itemize{
\item McFadden, D. (1987). Regression-based specification tests for the multinomial logit model. Journal of econometrics, 34(1-2), 63-82.
\item McFadden, D. (1973). Conditional logit analysis of qualitative choice behavior.
}
}
performance/man/model_performance.merMod.Rd 0000644 0001762 0000144 00000002416 13611777057 020541 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/model_performance.mixed.R
\name{model_performance.merMod}
\alias{model_performance.merMod}
\title{Performance of Mixed Models}
\usage{
\method{model_performance}{merMod}(model, metrics = "all", verbose = TRUE, ...)
}
\arguments{
\item{model}{A mixed effects model.}
\item{metrics}{Can be \code{"all"}, \code{"common"} or a character vector of metrics to be computed (some of \code{c("AIC", "BIC", "R2", "ICC", "RMSE", "LOGLOSS", "SCORE")}). \code{"common"} will compute AIC, BIC, R2, ICC and RMSE.}
\item{verbose}{Toggle off warnings.}
\item{...}{Arguments passed to or from other methods.}
}
\value{
A data frame (with one row) and one column per "index" (see \code{metrics}).
}
\description{
Compute indices of model performance for mixed models.
}
\details{
This method returns the \emph{adjusted ICC} only, as this is typically
of interest when judging the variance attributed to the random effects part
of the model (see also \code{\link{icc}}).
\cr \cr
Furthermore, see 'Details' in \code{\link{model_performance.lm}} for
more details on returned indices.
}
\examples{
if (require("lme4")) {
model <- lmer(Petal.Length ~ Sepal.Length + (1 | Species), data = iris)
model_performance(model)
}
}
performance/man/check_convergence.Rd 0000644 0001762 0000144 00000002631 13611777057 017270 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/check_convergence.R
\name{check_convergence}
\alias{check_convergence}
\title{Convergence test for mixed effects models}
\usage{
check_convergence(x, tolerance = 0.001, ...)
}
\arguments{
\item{x}{A \code{merMod}-object.}
\item{tolerance}{Indicates up to which value the convergence result is
accepted. The smaller \code{tolerance} is, the stricter the test
will be.}
\item{...}{Currently not used.}
}
\value{
\code{TRUE} if convergence is fine and \code{FALSE} if convergence
is suspicious. Additionally, the convergence value is returned as attribute.
}
\description{
\code{check_convergence()} provides an alternative convergence
test for \code{\link[lme4]{merMod}}-objects.
}
\details{
\code{check_convergence()} provides an alternative convergence test for
\code{\link[lme4]{merMod}}-objects, as discussed
\href{https://github.com/lme4/lme4/issues/120}{here}
and suggested by Ben Bolker in
\href{https://github.com/lme4/lme4/issues/120#issuecomment-39920269}{this comment}.
Further details can be found in \code{\link[lme4]{convergence}}.
}
\examples{
if (require("lme4")) {
data(cbpp)
set.seed(1)
cbpp$x <- rnorm(nrow(cbpp))
cbpp$x2 <- runif(nrow(cbpp))
model <- glmer(
cbind(incidence, size - incidence) ~ period + x + x2 + (1 + x | herd),
data = cbpp,
family = binomial()
)
check_convergence(model)
}
}
performance/man/performance_accuracy.Rd 0000644 0001762 0000144 00000003417 13565530445 020007 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/performance_accuracy.R
\name{performance_accuracy}
\alias{performance_accuracy}
\title{Accuracy of predictions from model fit}
\usage{
performance_accuracy(model, method = c("cv", "boot"), k = 5, n = 1000)
}
\arguments{
\item{model}{Fitted model object of class \code{lm} or \code{glm}, the latter
being a logistic regression model (binary response).}
\item{method}{Character string, indicating whether crossvalidation
(\code{method = "cv"}) or bootstrapping (\code{method = "boot"})
is used to compute the accuracy values.}
\item{k}{The number of folds for the kfold-crossvalidation.}
\item{n}{Number of bootstrap-samples.}
}
\value{
A list with three values: The \code{Accuracy} of the model predictions, i.e.
the proportion of accurately predicted values from the model, its standard
error, \code{SE}, and the \code{Method} used to compute the accuracy.
}
\description{
This function calculates the predictive accuracy of linear
or logistic regression models.
}
\details{
For linar models, the accuracy is the correlation coefficient
between the actual and the predicted value of the outcome. For
logistic regression models, the accuracy corresponds to the
AUC-value, calculated with the \code{\link[bayestestR]{auc}}-function.
\cr \cr
The accuracy is the mean value of multiple correlation resp.
AUC-values, which are either computed with crossvalidation
or nonparametric bootstrapping (see argument \code{method}).
The standard error is the standard deviation of the computed
correlation resp. AUC-values.
}
\examples{
model <- lm(mpg ~ wt + cyl, data = mtcars)
performance_accuracy(model)
model <- glm(vs ~ wt + mpg, data = mtcars, family = "binomial")
performance_accuracy(model)
}
performance/man/check_model.Rd 0000644 0001762 0000144 00000003237 13611777057 016075 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/check_model.R
\name{check_model}
\alias{check_model}
\alias{check_model.default}
\title{Visual check of model assumptions}
\usage{
check_model(x, ...)
\method{check_model}{default}(x, dot_size = 2, line_size = 0.8, panel = TRUE, check = "all", ...)
}
\arguments{
\item{x}{A model object.}
\item{...}{Currently not used.}
\item{dot_size}{Size of dot-geoms.}
\item{line_size}{Size of line-geoms.}
\item{panel}{Logical, if \code{TRUE}, plots are arranged as panels; else,
single plots for each diagnostic are returned.}
\item{check}{Character vector, indicating which checks for should be performed
and plotted. May be one or more of
\code{"all", "vif", "qq", "normality", "ncv", "homogeneity", "outliers", "reqq"}.
\code{"reqq"} is a QQ-plot for random effects and only available for mixed models.
\code{"ncv"} checks for non-constant variance, i.e. for heteroscedasticity.
By default, all possible checks are performed and plotted.}
}
\value{
The data frame that is used for plotting.
}
\description{
Visual check of model various assumptions (normality of residuals,
normality of random effects, heteroscedasticity, homogeneity of variance,
multicollinearity).
}
\note{
This function just prepares the data for plotting. To create the plots,
\CRANpkg{see} needs to be installed.
}
\examples{
\dontrun{
m <- lm(mpg ~ wt + cyl + gear + disp, data = mtcars)
check_model(m)
if (require("lme4")) {
m <- lmer(Reaction ~ Days + (Days | Subject), sleepstudy)
check_model(m, panel = FALSE)
}
if (require("rstanarm")) {
m <- stan_glm(mpg ~ wt + gear, data = mtcars, chains = 2, iter = 200)
check_model(m)
}
}
}
performance/man/model_performance.stanreg.Rd 0000644 0001762 0000144 00000004504 13611777057 020761 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/model_performance.bayesian.R
\name{model_performance.stanreg}
\alias{model_performance.stanreg}
\title{Performance of Bayesian Models}
\usage{
\method{model_performance}{stanreg}(model, metrics = "all", verbose = TRUE, ...)
}
\arguments{
\item{model}{Object of class \code{stanreg} or \code{brmsfit}.}
\item{metrics}{Can be \code{"all"}, \code{"common"} or a character vector of metrics to be computed (some of \code{c("LOOIC", "WAIC", "R2", "R2_adj", "RMSE", "LOGLOSS", "SCORE")}). \code{"common"} will compute LOOIC, WAIC, R2 and RMSE.}
\item{verbose}{Toggle off warnings.}
\item{...}{Arguments passed to or from other methods.}
}
\value{
A data frame (with one row) and one column per "index" (see \code{metrics}).
}
\description{
Compute indices of model performance for (general) linear models.
}
\details{
Depending on \code{model}, following indices are computed:
\itemize{
\item{\strong{ELPD}} {expected log predictive density, see \code{\link{looic}}}
\item{\strong{LOOIC}} {leave-one-out cross-validation (LOO) information criterion, see \code{\link{looic}}}
\item{\strong{WAIC}} {widely applicable information criterion, see \code{loo::waic}}
\item{\strong{R2}} {r-squared value, see \code{\link{r2}}}
\item{\strong{R2_LOO_adjusted}} {adjusted r-squared, see \code{\link{r2}}}
\item{\strong{RMSE}} {root mean squared error, see \code{\link{performance_rmse}}}
\item{\strong{LOGLOSS}} {Log-loss, see \code{\link{performance_logloss}}}
\item{\strong{SCORE_LOG}} {score of logarithmic proper scoring rule, see \code{\link{performance_score}}}
\item{\strong{SCORE_SPHERICAL}} {score of spherical proper scoring rule, see \code{\link{performance_score}}}
\item{\strong{PCP}} {percentage of correct predictions, see \code{\link{performance_pcp}}}
}
}
\examples{
if (require("rstanarm")) {
model <- stan_glm(mpg ~ wt + cyl, data = mtcars, chains = 1, iter = 500, refresh = 0)
model_performance(model)
model <- stan_glmer(
mpg ~ wt + cyl + (1 | gear),
data = mtcars,
chains = 1,
iter = 500,
refresh = 0
)
model_performance(model)
}
}
\references{
Gelman, A., Goodrich, B., Gabry, J., & Vehtari, A. (2018). R-squared for Bayesian regression models. The American Statistician, The American Statistician, 1-6.
}
\seealso{
\link{r2_bayes}
}
performance/man/r2_loo.Rd 0000644 0001762 0000144 00000001236 13611777057 015031 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/r2_loo.R
\name{r2_loo}
\alias{r2_loo}
\title{LOO-adjusted R2}
\usage{
r2_loo(model)
}
\arguments{
\item{model}{A Bayesian regression model.}
}
\value{
The LOO-adjusted R2 for \code{model}, as numeric value.
}
\description{
Compute LOO-adjusted R2.
}
\details{
Unlike \code{\link{r2_bayes}}, which returns an "unadjusted" R2 value,
\code{r2_loo()} calculates a LOO-adjusted R2, which comes conceptionally
closer to an "adjusted" R2 measure.
}
\examples{
if (require("rstanarm")) {
model <- stan_glm(mpg ~ wt + cyl, data = mtcars, chains = 1, iter = 500, refresh = 0)
r2_loo(model)
}
}
performance/man/check_overdispersion.Rd 0000644 0001762 0000144 00000005176 13616504501 020037 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/check_overdispersion.R
\name{check_overdispersion}
\alias{check_overdispersion}
\title{Check overdispersion of GL(M)M's}
\usage{
check_overdispersion(x, ...)
}
\arguments{
\item{x}{Fitted model of class \code{merMod}, \code{glmmTMB}, \code{glm},
or \code{glm.nb} (package \pkg{MASS}).}
\item{...}{Currently not used.}
}
\value{
A list with results from the overdispersion test, like chi-squared
statistics, p-value or dispersion ratio.
}
\description{
\code{check_overdispersion()} checks generalized linear (mixed) models
for overdispersion.
}
\details{
Overdispersion occurs when the observed variance is higher than
the variance of a theoretical model. For Poisson models, variance increases
with the mean, thus, variance usually (roughly) equals the mean value. If
the variance is much higher, the data are "overdispersed".
\subsection{Interpretation of the Dispersion Ratio}{
If the dispersion ratio is close to one, a Poisson model fits well
to the data. Dispersion ratios larger than one indicate overdispersion,
thus a negative binomial model or similar might fit better to the data.
A p-value < .05 indicates overdispersion.
}
\subsection{Overdispersion in Poisson Models}{
For Poisson models, the overdispersion test is based on the code
from \cite{Gelman and Hill (2007), page 115}.
}
\subsection{Overdispersion in Mixed Models}{
For \code{merMod}- and \code{glmmTMB}-objects, \code{check_overdispersion()}
is based on the code in the \href{http://bbolker.github.io/mixedmodels-misc/glmmFAQ.html}{GLMM FAQ},
section \emph{How can I deal with overdispersion in GLMMs?}. Note that
this function only returns an \emph{approximate} estimate of an
overdispersion parameter, and is probably inaccurate for zero-inflated
mixed models (fitted with \code{glmmTMB}).
}
\subsection{How to fix Overdispersion}{
Overdispersion can be fixed by either modeling the dispersion parameter,
or by choosing a different distributional family (like Quasi-Poisson,
or negative binomial, see \cite{Gelman and Hill (2007), pages 115-116}).
}
}
\examples{
if (require("glmmTMB")) {
data(Salamanders)
m <- glm(count ~ spp + mined, family = poisson, data = Salamanders)
check_overdispersion(m)
m <- glmmTMB(
count ~ mined + spp + (1 | site),
family = poisson,
data = Salamanders
)
check_overdispersion(m)
}
}
\references{
\itemize{
\item Bolker B et al. (2017): \href{http://bbolker.github.io/mixedmodels-misc/glmmFAQ.html}{GLMM FAQ.}
\item Gelman, A., & Hill, J. (2007). Data analysis using regression and multilevel/hierarchical models. Cambridge; New York: Cambridge University Press.
}
}
performance/man/model_performance.Rd 0000644 0001762 0000144 00000002204 13611777377 017317 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/model_performance.R
\name{model_performance}
\alias{model_performance}
\alias{performance}
\title{Model Performance}
\usage{
model_performance(model, ...)
performance(model, ...)
}
\arguments{
\item{model}{Statistical model.}
\item{...}{Arguments passed to or from other methods, resp. for
\code{compare_performance()}, one or multiple model objects (also of
different classes).}
}
\value{
A data frame (with one row) and one column per "index" (see \code{metrics}).
}
\description{
See the documentation for your object's class:
\itemize{
\item \link[=model_performance.lm]{Frequentist Regressions}
\item \link[=model_performance.merMod]{Mixed models}
\item \link[=model_performance.stanreg]{Bayesian models}
\item \link[=model_performance.lavaan]{CFA / SEM lavaan models}
}
}
\examples{
model <- lm(mpg ~ wt + cyl, data = mtcars)
model_performance(model)
model <- glm(vs ~ wt + mpg, data = mtcars, family = "binomial")
model_performance(model)
}
\seealso{
\code{\link[=compare_performance]{compare_performance()}} to compare performance of many different models.
}
performance/man/performance_hosmer.Rd 0000644 0001762 0000144 00000002106 13565530445 017504 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/performance_hosmer.R
\name{performance_hosmer}
\alias{performance_hosmer}
\title{Hosmer-Lemeshow goodness-of-fit test}
\usage{
performance_hosmer(model, n_bins = 10)
}
\arguments{
\item{model}{A \code{glm}-object with binomial-family.}
\item{n_bins}{Numeric, the number of bins to divide the data.}
}
\value{
An object of class \code{hoslem_test} with following values: \code{chisq},
the Hosmer-Lemeshow chi-squared statistic; \code{df}, degrees of freedom
and \code{p.value} the p-value for the goodness-of-fit test.
}
\description{
Check model quality of logistic regression models.
}
\details{
A well-fitting model shows \emph{no} significant difference between
the model and the observed data, i.e. the reported p-value should be
greater than 0.05.
}
\examples{
model <- glm(vs ~ wt + mpg, data = mtcars, family = "binomial")
performance_hosmer(model)
}
\references{
Hosmer, D. W., & Lemeshow, S. (2000). Applied Logistic Regression. Hoboken, NJ, USA: John Wiley & Sons, Inc. \doi{10.1002/0471722146}
}
performance/man/looic.Rd 0000644 0001762 0000144 00000001172 13611777057 014741 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/looic.R
\name{looic}
\alias{looic}
\title{LOO-related Indices for Bayesian regressions.}
\usage{
looic(model)
}
\arguments{
\item{model}{A Bayesian regression model.}
}
\value{
A list with four elements, the ELPD, LOOIC and their standard errors.
}
\description{
Compute LOOIC (leave-one-out cross-validation (LOO) information
criterion) and ELPD (expected log predictive density) for Bayesian regressions.
}
\examples{
if (require("rstanarm")) {
model <- stan_glm(mpg ~ wt + cyl, data = mtcars, chains = 1, iter = 500, refresh = 0)
looic(model)
}
}
performance/man/check_itemscale.Rd 0000644 0001762 0000144 00000004600 13620212404 016712 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/check_itemscale.R
\name{check_itemscale}
\alias{check_itemscale}
\title{Describe Properties of Item Scales}
\usage{
check_itemscale(x)
}
\arguments{
\item{x}{An object of class \code{parameters_pca}, as returned by \code{\link[parameters]{principal_components}}.}
}
\value{
A list of data frames, with related measures of internal consistencies of each subscale.
}
\description{
Compute various measures of internal consistencies
applied to (sub)scales, which items were extracted using
\code{\link[parameters]{principal_components}}.
}
\details{
\code{check_itemscale()} calculates various measures of internal
consistencies, such as Cronbach's alpha, item difficulty or discrimination etc.
on subscales which were built from several items. Subscales are retrieved from
the results of \code{\link[parameters]{principal_components}}, i.e. based on
how many components were extracted from the PCA, \code{check_itemscale()}
retrieves those variables that belong to a component and calculates the above
mentioned measures.
}
\note{
\itemize{
\item \emph{Item difficulty} should range between 0.2 and 0.8. Ideal value is \code{p+(1-p)/2} (which mostly is between 0.5 and 0.8). See \code{\link{item_difficulty}} for details.
\item For \emph{item discrimination}, acceptable values are 0.20 or higher; the closer to 1.00 the better. See \code{\link{item_reliability}} for more details.
\item In case the total \emph{Cronbach's alpha} value is below the acceptable cut-off of 0.7 (mostly if an index has few items), the \emph{mean inter-item-correlation} is an alternative measure to indicate acceptability. Satisfactory range lies between 0.2 and 0.4. See also \code{\link{item_intercor}}.
}
}
\examples{
# data generation from '?prcomp', slightly modified
C <- chol(S <- toeplitz(.9^(0:15)))
set.seed(17)
X <- matrix(rnorm(16000), 100, 16)
Z <- X \%*\% C
if (require("parameters")) {
pca <- principal_components(as.data.frame(Z), rotation = "varimax", n = 3)
pca
check_itemscale(pca)
}
}
\references{
\itemize{
\item Briggs SR, Cheek JM (1986) The role of factor analysis in the development and evaluation of personality scales. Journal of Personality, 54(1), 106-148. doi: 10.1111/j.1467-6494.1986.tb00391.x
\item Trochim WMK (2008) Types of Reliability. (\href{http://www.socialresearchmethods.net/kb/reltypes.php}{web})
}
}
performance/man/item_difficulty.Rd 0000644 0001762 0000144 00000002167 13616504501 017004 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/item_difficulty.R
\name{item_difficulty}
\alias{item_difficulty}
\title{Difficulty of Questionnaire Items}
\usage{
item_difficulty(x)
}
\arguments{
\item{x}{Depending on the function, \code{x} may be a \code{matrix} as
returned by the \code{\link{cor}}-function, or a data frame
with items (e.g. from a test or questionnaire).}
}
\value{
A data frame with three columns: The name(s) of the item(s), the item
difficulties for each item, and the ideal item difficulty.
}
\description{
Compute various measures of internal consistencies
for tests or item-scales of questionnaires.
}
\details{
This function calculates the item difficulty, which should
range between 0.2 and 0.8. Lower values are a signal for
more difficult items, while higher values close to one
are a sign for easier items. The ideal value for item difficulty
is \code{p + (1 - p) / 2}, where \code{p = 1 / max(x)}. In most
cases, the ideal item difficulty lies between 0.5 and 0.8.
}
\examples{
data(mtcars)
x <- mtcars[, c("cyl", "gear", "carb", "hp")]
item_difficulty(x)
}
performance/man/performance_rse.Rd 0000644 0001762 0000144 00000001146 13565530445 017003 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/performance_rse.R
\name{performance_rse}
\alias{performance_rse}
\title{Residual Standard Error for Linear Models}
\usage{
performance_rse(model)
}
\arguments{
\item{model}{A model.}
}
\value{
Numeric, the residual standard error of \code{model}.
}
\description{
Compute residual standard error of linear models.
}
\details{
The residual standard error is the square root of the residual
sum of squares divided by the residual degrees of freedom.
}
\examples{
data(mtcars)
m <- lm(mpg ~ hp + gear, data = mtcars)
performance_rse(m)
}
performance/man/check_zeroinflation.Rd 0000644 0001762 0000144 00000002472 13616504501 017643 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/check_zeroinflation.R
\name{check_zeroinflation}
\alias{check_zeroinflation}
\title{Check for zero-inflation in count models}
\usage{
check_zeroinflation(x, tolerance = 0.05)
}
\arguments{
\item{x}{Fitted model of class \code{merMod}, \code{glmmTMB}, \code{glm},
or \code{glm.nb} (package \pkg{MASS}).}
\item{tolerance}{The tolerance for the ratio of observed and predicted
zeros to considered as over- or underfitting zeros. A ratio
between 1 +/- \code{tolerance} is considered as OK, while a ratio
beyond or below this threshold would indicate over- or underfitting.}
}
\value{
A list with information about the amount of predicted and observed
zeros in the outcome, as well as the ratio between these two values.
}
\description{
\code{check_zeroinflation()} checks whether count models are over- or
underfitting zeros in the outcome.
}
\details{
If the amount of observed zeros is larger than the amount of
predicted zeros, the model is underfitting zeros, which indicates a
zero-inflation in the data. In such cases, it is recommended to use
negative binomial or zero-inflated models.
}
\examples{
if (require("glmmTMB")) {
data(Salamanders)
m <- glm(count ~ spp + mined, family = poisson, data = Salamanders)
check_zeroinflation(m)
}
}
performance/man/performance_aicc.Rd 0000644 0001762 0000144 00000002313 13610307766 017105 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/performance_aicc.R
\name{performance_aicc}
\alias{performance_aicc}
\alias{performance_aic}
\title{Compute AIC and second order AIC}
\usage{
performance_aicc(x, ...)
performance_aic(x, ...)
}
\arguments{
\item{x}{A model object.}
\item{...}{Currently not used.}
}
\value{
Numeric, the AIC or AICc value.
}
\description{
Compute the second-order Akaike's information criterion (AICc).
The second-order (or small sample) is a AIC with a correction for small sample
sizes. \code{performance_aic()} is a small wrapper that returns the AIC. It is
a generic function that also works for some models that don't have a AIC method
(like Tweedie models).
}
\examples{
m <- lm(mpg ~ wt + cyl + gear + disp, data = mtcars)
AIC(m)
performance_aicc(m)
}
\references{
\itemize{
\item Akaike, H. (1973) Information theory as an extension of the maximum likelihood principle. In: Second International Symposium on Information Theory, pp. 267–281. Petrov, B.N., Csaki, F., Eds, Akademiai Kiado, Budapest.
\item Hurvich, C. M., Tsai, C.-L. (1991) Bias of the corrected AIC criterion for underfitted regression and time series models. Biometrika 78, 499–509.
}
}
performance/man/figures/ 0000755 0001762 0000144 00000000000 13620205622 014770 5 ustar ligges users performance/man/figures/logo.png 0000644 0001762 0000144 00000075243 13620205622 016451 0 ustar ligges users PNG
IHDR , [ oZ sRGB gAMA a pHYs od z8IDATx^`TG׆xI5h[ZJm;uww-^Z ww !!;{l6{7d~e;s9gP
R!OX`I4#$خJ%S~POxYU
%0GB[*oiRQ#wb+BsCpםp: 2C{ŘڐV%g>F"zEqkZGeg#2$7OFJ{<#ꉮsTdȔcˑz+.`ʏV%)