CalibratR/0000755000176200001440000000000013526516525012127 5ustar liggesusersCalibratR/NAMESPACE0000644000176200001440000000303313526505462013343 0ustar liggesusers# Generated by roxygen2: do not edit by hand - MODIFIED export(calibrate) export(predict_calibratR) export(reliability_diagramm) export(statistics_calibratR) export(visualize_calibratR) export(getECE) export(getMCE) export(build_GUESS) export(predict_GUESS) import(foreach) importFrom(doParallel,registerDoParallel) importFrom(fitdistrplus,denscomp) importFrom(ggplot2,aes) importFrom(ggplot2,coord_fixed) importFrom(ggplot2,element_text) importFrom(ggplot2,geom_abline) importFrom(ggplot2,geom_boxplot) importFrom(ggplot2,geom_density) importFrom(ggplot2,geom_histogram) importFrom(ggplot2,geom_hline) importFrom(ggplot2,geom_label) importFrom(ggplot2,geom_line) importFrom(ggplot2,geom_point) importFrom(ggplot2,geom_text) importFrom(ggplot2,geom_vline) importFrom(ggplot2,ggplot) importFrom(ggplot2,ggtitle) importFrom(ggplot2,labs) importFrom(ggplot2,position_dodge) importFrom(ggplot2,scale_color_brewer) importFrom(ggplot2,scale_color_discrete) importFrom(ggplot2,scale_colour_manual) importFrom(ggplot2,scale_fill_manual) importFrom(ggplot2,scale_x_discrete) importFrom(ggplot2,stat_bin) importFrom(ggplot2,theme) importFrom(ggplot2,xlab) importFrom(ggplot2,xlim) importFrom(ggplot2,ylab) importFrom(ggplot2,ylim) importFrom(graphics,hist) importFrom(pROC,roc) importFrom(parallel,makeCluster) importFrom(parallel,stopCluster) importFrom(reshape2,melt) importFrom(stats,binom.test) importFrom(stats,median) importFrom(stats,na.omit) importFrom(stats,sd) importFrom(stats,setNames) importFrom(stats,t.test) CalibratR/data/0000755000176200001440000000000013330646360013032 5ustar liggesusersCalibratR/data/example.rda0000644000176200001440000001116013330646305015153 0ustar liggesusers7zXZi"6!X1>4])TW"nRʟOqcWK>:w8kYAn:V'[MJarMĨ50nֈLfawRDfj'=nG-W51)U^aRb"8IpZ,)У|or76;SV&Kd`7 8=R)mawMh)4|:o0,]˥idI?E\R0#Pj~uurm/>Dƅx!ZK꽩qwv| epFj @=(9j%;āaX-k,X9Ԟ."bS |N\xtkݱ\22$lLmOUi^/Q ,wvooXeX| ={kk'￯S1b2.q 8*tI8tw||$Dd|XL㾹)Ƥ,/+y(4k=]},lZ(AS mRx -# @(}z!T.((m/]tʣPvzCgO/짪`R.o]`(zujkDt#uU"TS-8`Z}YR6WΎktMhKSz0,'l:k#Bz[2@-_hO*cQx98sIյbx9JƆmHXЂVɦdK.w)Rt 4㭎o*=4)ɰ(C@(͕!M >j_#q}}?wlKXp ىx3 Iv>!}t"~Mg mq0')wx_A@[b*E?4ApEeދ0VG"폟Pgm"e.5^?ft/9t[>L Gg*#822Iz`ղ+pvi-D81rl;hHDn-yo#rxxvxн:n  ͫJ$̟h;Y!x]I$qD{o1 lvu.^v=%B2YLߨe6[p]god7$Fd%m,D HDuˮ|Lށ^\gң' n{Ci~'hUu(K.I*lwzk-sl:D`hlpD?;"E<e-^2gqiz 4#0Wl2Id@`h6 SdG;OhAxdԜz{Pv̜t?3洋W/PU}^T=0NpJF C c s?澦}~KXݏbnrDL"RL.1<(9O|⛱݄&54٥]7wlٜBJ &a$~<D1Ae]]>HFŜr,.&+)F }Q6ogi Z6_\og ޫU<[l=Ù6Zѿ1BM뮖+[mw1~0JϦkQB̏=6*݋A W\aE4G!e:Uݰ[vj\g 7/_^^SHTN`77=!3#̤x[ډ%aPZT, xR mN!`*{2EU8婥6Aw/+Ϟ?K/+O|*'K í_xnVi'#Bf+Xt*̭>Rkc_6>A#;_!7|;Y]A"8a;$#qפI u @Z$n/~Д-p5w ϡpL 6u%ׅ>YkDՓ"طYF!Qo)rSm;Kvү u:oơEz)UQ+ޒ^ e^䅛V.@AšC r~vx=&_8+8D; ƜV,_lC*ˬ[xy3%mD;p?[<` 0&҄n#:&(/gB&}ΫWz'nZ% CddJN-0HJ',Y{w΋+>' ,3+z5*VhoO\i8q;Gy؄z${䈃+H#z2|tŸ'0rj^ <2 |+,6OэՑ?dW<ȕ嵣j5N:پIZ".ӈ!^ Gﶟk=d5aVy(?zȿLCYAB ,oȄ R-W;ܚȕm#/,OqXݡ?rV!P]Yy?ǰ.tw]|Š֊cKRy^ #`n\M$Ry2"CcylD ǎQ|^a^OɂJ k'hо\exIbb0Il!r5ˇ"k ij, Fг-rd R hyƢÓb _S5¹90J ΆY yp4CVq "(6RYfyFBn: f9Ζ<nچnҼ h^?m2pgc)ܽxﻶ%F_:l@CqRDStX{Aq@?y`Y|7 d$S>ɡ U?n ?h#{J絝wRhN##iI| ~G|5{_{ÓNSn?)>SHyy4JUɜX<8<534.'wӌ/uZ%n]vb]} *sF"|C혞.鑙zSx0 YZCalibratR/man/0000755000176200001440000000000013332313604012666 5ustar liggesusersCalibratR/man/get_CLE_comparison.Rd0000644000176200001440000000142113332671430016653 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/CLES_in_comparison.R \name{get_CLE_comparison} \alias{get_CLE_comparison} \title{get_CLE_comparison} \usage{ get_CLE_comparison(list_models) } \arguments{ \item{list_models}{list object that contains all error values for all trained calibration models. For the specific format, see the calling function \code{\link{visualize_calibratR}}.} } \value{ ggplot2 } \description{ visualises how class 1 and class 0 classification error (CLE) differs in each trained calibration model. Comparing class-specific CLE helps to choose a calibration model for applications were classification error is cost-sensitive for one class. See \code{\link{get_CLE_class}} for details on the implementation. } CalibratR/man/calibrate_me.Rd0000644000176200001440000000152013332335337015571 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/calibrate_me.R \name{calibrate_me} \alias{calibrate_me} \title{calibrate_me} \usage{ calibrate_me(actual, predicted, model_idx) } \arguments{ \item{actual}{vector of observed class labels (0/1)} \item{predicted}{vector of uncalibrated predictions} \item{model_idx}{a single number from 1 to 5, indicating which calibration model should be implemented, 1=hist_scaled, 2=hist_transformed, 3=BBQ_scaled, 4=BBQ_transformed, 5=GUESS} } \value{ depending on the value of \code{model_idx}, the respective calibration model is build on the input from \code{actual} and \code{predicted} } \description{ trains calibration models on the training set of \code{predicted}/\code{actual} value pairs.\code{model_idx} specifies which models should be trained. } CalibratR/man/plot_class_distributions.Rd0000644000176200001440000000201613332671430020305 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/plot_class_distributions.R \name{plot_class_distributions} \alias{plot_class_distributions} \title{plot_class_distributions} \usage{ plot_class_distributions(build_guess_object, pred_idx) } \arguments{ \item{build_guess_object}{output from build_GUESS()} \item{pred_idx}{if \code{pred_idx}=1 GUESS_1 is plotted; if \code{pred_idx}=2 GUESS_2 is plotted} } \value{ ggplot object that visualizes the returned calibrated predicition estimates by GUESS_1 or GUESS_2 } \description{ plots the the returned conditional class probabilities P(x|C) of GUESS_1 or GUESS_2 models. Which GUESS model is plotted can be specified in \code{pred_idx}. } \seealso{ \code{\link[reshape2]{melt}} \code{\link[ggplot2]{ggplot}},\code{\link[ggplot2]{geom_line}},\code{\link[ggplot2]{aes}},\code{\link[ggplot2]{scale_colour_manual}},\code{\link[ggplot2]{theme}},\code{\link[ggplot2]{labs}},\code{\link[ggplot2]{geom_vline}},\code{\link[ggplot2]{geom_text}} } CalibratR/man/predict_calibratR.Rd0000644000176200001440000000334713332610110016571 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/predict_calibratR_parallel.R \name{predict_calibratR} \alias{predict_calibratR} \title{predict_calibratR} \usage{ predict_calibratR(calibration_models, new = NULL, nCores = 4) } \arguments{ \item{calibration_models}{list of trained calibration models that were constructed using the \code{\link{calibrate}} method. The list components \code{calibration_models} from the \code{\link{calibrate}} output can be used directly.} \item{new}{vector of new uncalibrated instances. Default: 100 scores from the minimum to the maximum of the original ML scores} \item{nCores}{\code{nCores} how many cores should be used during parallelisation. Default: 4} } \value{ list object with the following components: \item{predictions}{a list containing the calibrated predictions for each calibration model} \item{significance_test_set}{a list containing the percentage of \code{new} instances for which prediction estimates are statistically significant} \item{pred_per_bin}{a list containing the number of instances in each bin for the binning models} } \description{ maps the uncalibrated predictions \code{new} into calibrated predictions using the passed over \code{calibration models} } \details{ if no \code{new} value is given, the function will evaluate a sequence of numbers ranging from the minimum to the maximum of the original values in the training set } \examples{ ## Loading dataset in environment data(example) test_set <- example$test_set calibration_model <- example$calibration_model ## Predict for test set predictions <- predict_calibratR(calibration_model$calibration_models, new=test_set, nCores = 2) } \author{ Johanna Schwarz } CalibratR/man/rd_multiple_runs.Rd0000644000176200001440000000227713332626754016571 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rd_multiple_runs.R \name{rd_multiple_runs} \alias{rd_multiple_runs} \title{rd_multiple_runs} \usage{ rd_multiple_runs(list_models) } \arguments{ \item{list_models}{list object that contains n-times the output from the \code{\link{reliability_diagramm}}. method.} } \value{ a list object that contains a reliability diagram that visualises all reliabilty diagrams that were constructed during n-times repeated m-fold cross-validation. } \description{ This functions plots all n reliability diagrams that were constructed during n-times repeated m-fold cross-validation (CV). During calibration model evaluation, CV is repeated n times, so that eventually n reliability diagrams are obtained. } \seealso{ \code{\link[reshape2]{melt}} \code{\link[ggplot2]{ggplot}},\code{\link[ggplot2]{geom_line}},\code{\link[ggplot2]{aes}},\code{\link[ggplot2]{geom_abline}},\code{\link[ggplot2]{ylab}},\code{\link[ggplot2]{xlab}},\code{\link[ggplot2]{xlim}},\code{\link[ggplot2]{ylim}},\code{\link[ggplot2]{coord_fixed}},\code{\link[ggplot2]{geom_text}},\code{\link[ggplot2]{scale_color_discrete}},\code{\link[ggplot2]{ggtitle}} } CalibratR/man/uncalibrated_CV.Rd0000644000176200001440000000303013332607510016200 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/no_calibration_CV.R \name{uncalibrated_CV} \alias{uncalibrated_CV} \title{uncalibrated_CV} \usage{ uncalibrated_CV(actual, predicted, n_folds = 10, seed, input) } \arguments{ \item{actual}{vector of observed class labels (0/1)} \item{predicted}{vector of uncalibrated predictions} \item{n_folds}{number of folds for the cross-validation, Default: 10} \item{seed}{random seed to alternate the split of data set partitions} \item{input}{specify if the input was scaled or transformed, scaled=1, transformed=2} } \value{ list object containing the following components: \item{error}{list object that summarizes discrimination and calibration errors obtained during the CV} \item{type}{"uncalibrated"} \item{probs_CV}{vector of input-preprocessed predictions that was used during the CV} \item{actual_CV}{respective vector of true values (0 or 1) that was used during the CV} } \description{ performs \code{n_folds}-CV but with only input-preprocessing the test set. No calibration model is trained and evaluated in this method. The \code{predicted} values are partitioned into n subsets. The training set is constructed on (n-1) subsets; the remaining set is used for testing. Since no calibration model is used in this method, the test set predictions are only input-preprocessed (either scaled or transformed, depending on \code{input}). All test set predictions are merged and used to compute error metrics for the input-preprocessing methods. } CalibratR/man/evaluate_discrimination.Rd0000644000176200001440000000210313332610110020043 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/evaluate_discrimination.R \name{evaluate_discrimination} \alias{evaluate_discrimination} \title{evaluate_discrimination} \usage{ evaluate_discrimination(actual, predicted, cutoff = NULL) } \arguments{ \item{actual}{vector of observed class labels (0/1)} \item{predicted}{vector of uncalibrated predictions} \item{cutoff}{cut-off to be used for the computation of npv, ppv, sensitivity and specificity, Default: value that maximizes sensitivity and specificity (Youden-Index)} } \value{ list object with the following components: \item{sens}{sensitivity} \item{spec}{specificity} \item{acc}{accuracy} \item{ppv}{positive predictive value} \item{npv}{negative predictive value} \item{cutoff}{cut-off that was used to compute the error values} \item{auc}{AUC value} } \description{ computes various discrimination error values, namely: sensitivity, specificity, accuracy, positive predictive value (ppv), negative predictive value (npv) and AUC } \seealso{ \code{\link[pROC]{roc}} } CalibratR/man/statistics_calibratR.Rd0000644000176200001440000000767413332316416017354 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/statistics_calibratR.R \name{statistics_calibratR} \alias{statistics_calibratR} \title{statistics_calibratR} \usage{ statistics_calibratR(calibrate_object, t.test_partitions = TRUE, significance_models = TRUE) } \arguments{ \item{calibrate_object}{list that is returned from the \code{\link{calibrate}} function. The parameter \code{n_seeds} is available as a list component of the \code{calibrate_object}} \item{t.test_partitions}{Performs a paired two sided t.test over the error values (ECE, CLE1, CLE0, MCE, AUC, sensitivity and specificity) from the random partition splits comparing a possible significant difference in mean among the calibration models. All models and the original, scaled and transformed values are tested against each other. The p_value and the effect size of the t.test are returned to the user. Can only be performed, if the \code{calibrate_object} contains a \code{summary_CV} list object, else, an error is returned. Default: TRUE} \item{significance_models}{returns important characteristics of the implemented calibration models, Default: TRUE} } \value{ An object of class list, with the following components: \item{mean_calibration}{mean of calibration error values (ECE_equal_width, MCE_equal_width, ECE_equal_freq, MCE_equal_freq, RMSE, Class 1 CLE, Class 0 CLE, Brier Score, Class 1 Brier Score, Class 0 Brier Score) over \code{n_seeds} times repeated 10-fold CV. ECE and MCE are computed once using equal-width and once using equal-frequency binning for the construction of the underlying binning scheme. Only returned, if \code{calibrate_object} contains a summary_CV list object.} \item{standard_deviation}{standard deviation of calibration error values over \code{n_seeds} times repeated 10-fold CV. Only returned, if \code{calibrate_object} contains a summary_CV list object.} \item{var_coeff_calibration}{variation coefficient of calibration error values over \code{n_seeds} times repeated 10-fold CV. Only returned, if \code{calibrate_object} contains a summary_CV list object.} \item{mean_discrimination}{mean of discrimination error (sensitivity, specificity, AUC, positive predictive value, negative predictive value, accuracy) values over \code{n_seeds} times repeated 10-fold CV. The "cut-off" is the cut-off value that maximizes sensitivity and specificity. Only returned, if \code{calibrate_object} contains a summary_CV list object.} \item{sd_discrimination}{standard deviation of discrimination error values over \code{n_seeds} times repeated 10-fold CV. Only returned, if \code{calibrate_object} contains a summary_CV list object.} \item{var_coeff_discrimination}{variation coefficient of discrimination error values over \code{n_seeds} times repeated 10-fold CV. Only returned, if \code{calibrate_object} contains a summary_CV list object.} \item{t.test_calibration}{=list(p_value=t.test.calibration, effect_size=effect_size_calibration), only returned if t.test=TRUE} \item{t.test_discrimination}{=list(p_value=t.test.discrimination, effect_size=effect_size_discrimination), only returned if t.test=TRUE} \item{significance_models}{only returned if significance_models=TRUE} \item{n_seeds}{number of random data set partitions into training and test set for \code{folds}-times CV} \item{original_values}{list object that consists of the \code{actual} and \code{predicted} values of the original scores} } \description{ this method offers a variety of statistical evaluation methods for the output of the \code{\link{calibrate}} method. All returned error values represent mean error values over the \code{n_seeds} times repeated 10-fold CV. } \details{ DETAILS } \examples{ ## Loading dataset in environment data(example) calibration_model <- example$calibration_model statistics <- statistics_calibratR(calibration_model) } \seealso{ \code{\link[stats]{t.test}},\code{\link[stats]{friedman.test}} } \author{ Johanna Schwarz } CalibratR/man/predict_BBQ.Rd0000644000176200001440000000202313332610110015260 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/predict_BBQ.R \name{predict_BBQ} \alias{predict_BBQ} \title{predict_BBQ} \usage{ predict_BBQ(bbq, new, option) } \arguments{ \item{bbq}{output from the \code{\link{build_BBQ}} method} \item{new}{vector of uncalibrated probabilities} \item{option}{either 1 or 0; averaging=1, selecting=0} } \value{ a list object containing the following components: \item{predictions}{contains a vector of calibrated predictions} \item{pred_idx}{which option was used (averaging or selecting)} \item{significance_test_set}{the percentage of \code{new} instances that was evaluated using significant prediction estimates} \item{pred_per_bin}{number of instances \code{new} in each bin of the selected model} } \description{ FUNCTION_DESCRIPTION } \details{ Based on the paper (and matlab code) : "Obtaining Well Calibrated Probabilities Using Bayesian Binning" by Naeini, Cooper and Hauskrecht: ; https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4410090/ } CalibratR/man/visualize_calibratR.Rd0000644000176200001440000001230413332575506017166 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/visualize_calibratR.R \name{visualize_calibratR} \alias{visualize_calibratR} \title{visualize_calibratR} \usage{ visualize_calibratR(calibrate_object, visualize_models = FALSE, plot_distributions = FALSE, rd_partitions = FALSE, training_set_calibrated = FALSE) } \arguments{ \item{calibrate_object}{the list component \code{calibration_models} from the \code{\link{calibrate}} method} \item{visualize_models}{returns the list components \code{plot_calibration_models} and \code{plot_single_models}} \item{plot_distributions}{returns a density distribution plot of the calibrated predictions after CV (External) or without CV (internal)} \item{rd_partitions}{returns a reliability diagram for each model} \item{training_set_calibrated}{returns a list of ggplots. Each plot represents the calibrated predictions by the respective calibration model of the training set. If the list object \code{predictions} in the \code{calibrate_object} is empty, \code{training_set_calibrated} is returned as NULL.} } \value{ An object of class list, with the following components: \item{histogram_distribution}{returns a histogram of the original ML score distribution} \item{density_calibration_internal}{returns a list of density distribution plots for each calibration method, the original and the two input-preprocessing methods scaling and transforming. The plot visualises the density distribution of the calibrated predictions of the training set. In this case, training and test set values are identical, so be careful to evaluate the plots.} \item{density_calibration_external}{returns a list of density distribution plots for each calibration method, the original and the two input-preprocessing methods scaling and transforming. The plot visualises the density distribution of the calibrated predictions, that were returned during Cross Validation. If more than one repetition of CV was performed, run number 1 is evaluated} \item{plot_calibration_models}{ maps the original ML scores to their calibrated prediction estimates for each model. This enables easy model comparison over the range of ML scores See also \code{\link{compare_models_visual}}. } \item{plot_single_models}{returns a list of ggplots for each calibration model, also mapping the original ML scores to their calibrated prediction. Significance values are indicated. See also \code{\link{plot_model}}} \item{rd_plot}{returns a list of reliability diagrams for each of the implemented calibration models and the two input-preprocessing methods "scaled" and "transformed". The returned plot visualises the calibrated predictions that were returned for the test set during each of the n run of the n-times repeated CV. Each grey line represents one of the n runs. The blue line represents the median of all calibrated bin predictions. Insignificant bin estimates are indicated with "ns". If no CV was performed during calibration model building using the \code{\link{calibrate}} method, \code{rd_plot} is returned as NULL} \item{calibration_error}{returns a list of boxplots for the calibration error metrics ECE, MCE, CLE and RMSE. The n values for each model represent the obtained error values during the n times repeated CV. If no CV was performed during calibration model building using the \code{\link{calibrate}} method, \code{calibration_error} is returned as NULL} \item{discrimination_error}{returns a list of boxplots for the discrimination error AUC, sensitivity and specificity. The n values for each model represent the obtained error values during the n times repeated CV. If no CV was performed during calibration model building using the \code{\link{calibrate}} method, \code{discrimination_error} is returned as NULL} \item{cle_class_specific_error}{If no CV was performed during calibration model building using the \code{\link{calibrate}} method, \code{cle_class_specific_error} is returned as NULL} \item{training_set_calibrated}{returns a list of ggplots. Each plot represents the calibrated predictions by the respective calibration model of the training set. If the list object \code{predictions} in the \code{calibrate_object} is empty, \code{training_set_calibrated} is returned as NULL.} \item{GUESS_1_final_model}{plots the the returned conditional probability p(x|Class) values of the GUESS_1 model} \item{GUESS_2_final_model}{plots the the returned conditional probability p(x|Class) values of the GUESS_2 model} } \description{ this method offers a variety of visualisations to compare implemented calibration models } \examples{ ## Loading dataset in environment data(example) calibration_model <- example$calibration_model visualisation <- visualize_calibratR(calibration_model, plot_distributions=FALSE, rd_partitions=FALSE, training_set_calibrated=FALSE) } \seealso{ \code{\link[ggplot2]{ggplot}},\code{\link[ggplot2]{geom_density}},\code{\link[ggplot2]{aes}},\code{\link[ggplot2]{scale_colour_manual}},\code{\link[ggplot2]{scale_fill_manual}},\code{\link[ggplot2]{labs}},\code{\link[ggplot2]{geom_point}},\code{\link[ggplot2]{geom_hline}},\code{\link[ggplot2]{theme}},\code{\link[ggplot2]{element_text}} \code{\link[reshape2]{melt}} } \author{ Johanna Schwarz } CalibratR/man/getECE.Rd0000644000176200001440000000125513332316132014253 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/getECE.R \name{getECE} \alias{getECE} \title{getECE} \usage{ getECE(actual, predicted, n_bins = 10) } \arguments{ \item{actual}{vector of observed class labels (0/1)} \item{predicted}{vector of uncalibrated predictions} \item{n_bins}{number of bins of the underlying equal-frequency histogram, Default: 10} } \value{ equal-frequency ECE value } \description{ Expected Calibration Error (ECE); the model is divided into 10 equal-width bins (default) and the mean of the observed (0/1) vs. mean of predicted is calculated per bin, weighted by emperical frequency of elements in bin i } CalibratR/man/scale_me.Rd0000644000176200001440000000136213332335337014736 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/format_values.R \name{scale_me} \alias{scale_me} \title{scale_me} \usage{ scale_me(x, min = NULL, max = NULL) } \arguments{ \item{x}{vector of predictions} \item{min}{minimum of \code{x}, Default: NULL} \item{max}{maximum of \code{x}, Default: NULL} } \value{ scaled values of \code{x} } \description{ maps all instances in \code{x} to the [0;1] range using the equation: \cr y = (x-min)/(max-min) \cr If no values for min and max are given, they are calculated per default as min=min(x) and max=max(x) } \details{ if \code{x} is greater (smaller) than \code{max} (\code{min}), its calibrated prediction is set to 1 (0) and warning is triggered. } CalibratR/man/predict_GUESS.Rd0000644000176200001440000000322713332610110015551 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/predict_GUESS.R \name{predict_GUESS} \alias{predict_GUESS} \title{predict_GUESS} \usage{ predict_GUESS(build_guess_object, new, density_evaluation = 2, return_class_density = FALSE) } \arguments{ \item{build_guess_object}{output from the \code{\link{build_GUESS}} method} \item{new}{vector of uncalibrated probabilities} \item{density_evaluation}{which density evaluation method should be used to infer calculate probabilities, Default: 2} \item{return_class_density}{if set to TRUE, class densities p(x|class) are returned, Default: FALSE} } \value{ a list object containing the following components: \item{predictions}{contains a vector of calibrated predictions} \item{pred_idx}{which density evaluation method was used} \item{significance_test_set}{the percentage of \code{new} instances that was evaluated using significant prediction estimates} \item{dens_case}{a vector containing the p(x|case) values} \item{dens_control}{a vector containing the p(x|control) values} } \description{ returns calibrated predictions for the instances \code{new} using the trained GUESS calibration model \code{build_guess_object}. Two different evaluation methods are available. Method 1: returns the p-value for the score \code{new} under the distribution that is handed over in the \code{build_guess_object} Method 2: returns the probability density value for the score \code{new} under the distribution that is handed over in the \code{build_guess_object} } \details{ \code{dens_case} and \code{dens_control} are only returned when \code{return_class_density} is set to TRUE } CalibratR/man/getMCE.Rd0000644000176200001440000000106313332316132014260 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/getMCE.R \name{getMCE} \alias{getMCE} \title{getMCE} \usage{ getMCE(actual, predicted, n_bins = 10) } \arguments{ \item{actual}{vector of observed class labels (0/1)} \item{predicted}{vector of uncalibrated predictions} \item{n_bins}{number of bins of the underlying equal-frequency histogram, Default: 10} } \value{ equal-frequency MCE value } \description{ Maximum Calibration Error (MCE), returns maximum calibration error for equal-frequency binning model } CalibratR/man/GUESS_CV.Rd0000644000176200001440000000263313332620176014444 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/GUESS_CV.R \name{GUESS_CV} \alias{GUESS_CV} \title{GUESS_CV} \usage{ GUESS_CV(actual, predicted, n_folds = 10, method_of_prediction = 2, seed, input) } \arguments{ \item{actual}{vector of observed class labels (0/1)} \item{predicted}{vector of uncalibrated predictions} \item{n_folds}{number of folds for the cross-validation, Default: 10} \item{method_of_prediction}{PARAM_DESCRIPTION, Default: 2} \item{seed}{random seed to alternate the split of data set partitions} \item{input}{specify if the input was scaled or transformed, scaled=1, transformed=2} } \value{ list object containing the following components: \item{error}{list object that summarizes discrimination and calibration errors obtained during the CV} \item{type}{"GUESS"} \item{pred_idx}{which prediction method was used during CV} \item{probs_CV}{vector of calibrated predictions that was used during the CV} \item{actual_CV}{respective vector of true values (0 or 1) that was used during the CV} } \description{ trains and evaluates the GUESS calibration model using \code{folds}-Cross-Validation (CV). The \code{predicted} values are partitioned into n subsets. A GUESS model is constructed on (n-1) subsets; the remaining set is used for testing the model. All test set predictions are merged and used to compute error metrics for the model. } CalibratR/man/visualize_error_boxplot.Rd0000644000176200001440000000570113332671430020157 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/visualize_errors.R \name{visualize_error_boxplot} \alias{visualize_error_boxplot} \title{visualize_error_boxplot} \usage{ visualize_error_boxplot(list_models, discrimination = TRUE) } \arguments{ \item{list_models}{list object that contains all error values for all trained calibration models. For the specific format, see the calling function \code{\link{visualize_calibratR}}.} \item{discrimination}{boolean (TRUE or FALSE). If TRUE, discrimination errors are compared between models; if FALSE calibration error is compared, Default: TRUE} } \value{ An object of class list, with the following components: \cr if \code{discrimination}=TRUE \item{sens}{ggplot2 boxplot that compares all evaluated calibration models with regard to sensitivity.} \item{spec}{ggplot2 boxplot that compares all evaluated calibration models with regard to specificity} \item{acc}{ggplot2 boxplot that compares all evaluated calibration models with regard to accuracy} \item{auc}{ggplot2 boxplot that compares all evaluated calibration models with regard to AUC} \item{list_errors}{list object that contains all discrimination error values that were used to construct the boxplots} \cr if \code{discrimination}=FALSE \item{ece}{ggplot2 boxplot that compares all evaluated calibration models with regard to expected calibration error} \item{mce}{ggplot2 boxplot that compares all evaluated calibration models with regard to maximum expected calibration error (MCE)} \item{rmse}{ggplot2 boxplot that compares all evaluated calibration models with regard to root mean square error (RMSE)} \item{cle_0}{ggplot2 boxplot that compares all evaluated calibration models with regard to class 0 classification error (CLE)} \item{cle_1}{ggplot2 boxplot that compares all evaluated calibration models with regard to class 1 classification error (CLE)} \item{list_errors}{list object that contains all calibration error values that were used to construct the boxplots} } \description{ compares error values among different calibration models. A boxplots is created from the n error values that were obtained during the n-times repeated Cross-Validation procedure. Different error values are implemented and can be compared: \cr discrimination error = sensitivity, specificity, accuracy, AUC (when \code{discrimination}=TRUE) \cr calibration error = ece, mce, rmse, class 0 cle, class 1 cle (when \code{discrimination}=FALSE) For the calculation of the errors, see the respective methods listed in the "see also" section } \seealso{ \code{\link[ggplot2]{ggplot}},\code{\link[ggplot2]{aes}},\code{\link[ggplot2]{ggtitle}},\code{\link[ggplot2]{scale_x_discrete}},\code{\link[ggplot2]{geom_boxplot}},\code{\link[ggplot2]{theme}},\code{\link[ggplot2]{element_text}} \code{\link[reshape2]{melt}},\code{\link{get_CLE_class}},\code{\link{getECE}},\code{\link{getMCE}},\code{\link{getRMSE}}, \code{\link{evaluate_discrimination}} } CalibratR/man/get_CLE_class.Rd0000644000176200001440000000252113332610110015574 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/get_CLE.R \name{get_CLE_class} \alias{get_CLE_class} \title{get_CLE_class} \usage{ get_CLE_class(actual, predicted, bins = 10) } \arguments{ \item{actual}{vector of observed class labels (0/1)} \item{predicted}{vector of uncalibrated predictions} \item{bins}{number of bins for the equal-width binning model, default=10} } \value{ object of class list containing the following components: \item{class_1}{CLE of class 1 instances} \item{class_0}{CLE of class 0 instances} } \description{ calculates the class-specific classification error CLE in the test set. The method computes the deviation of the calibrated predictions of class 1 instances from their true value 1. For class 0 instances, \code{get_CLE_class} computes the deviation from 0. Class 1 CLE is 0 when all class 1 instances have a calibrated prediction of 1 regardless of potential miscalibration of class 0 instances. CLE calculation is helpful when miscalibration and -classification is more cost-sensitive for one class than for the other. } \seealso{ \code{\link[reshape2]{melt}} \code{\link[ggplot2]{ggplot}},\code{\link[ggplot2]{geom_line}},\code{\link[ggplot2]{aes}},\code{\link[ggplot2]{position_dodge}},\code{\link[ggplot2]{labs}},\code{\link[ggplot2]{scale_colour_manual}} } CalibratR/man/binom_for_histogram.Rd0000644000176200001440000000103413332326476017215 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/binomtest_for_bin.R \name{binom_for_histogram} \alias{binom_for_histogram} \title{binom_for_histogram} \usage{ binom_for_histogram(n_x) } \arguments{ \item{n_x}{numeric vector of two integers. The first one is the number of cases in the bin; the second the number of instances in the bin} } \value{ p-value from stats::binom.test method } \description{ p_values from stats::binom.test for each bin, if bin is empty, a p-value of 2 is returned } CalibratR/man/get_MCE_equal_width.Rd0000644000176200001440000000107013332131040016774 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/getMCE.R \name{get_MCE_equal_width} \alias{get_MCE_equal_width} \title{get_MCE_equal_width} \usage{ get_MCE_equal_width(actual, predicted, bins = 10) } \arguments{ \item{actual}{vector of observed class labels (0/1)} \item{predicted}{vector of uncalibrated predictions} \item{bins}{number of bins for the binning model} } \value{ equal-width MCE value } \description{ Maximum Calibration Error (MCE), returns maximum calibration error for equal-width binning model } CalibratR/man/predict_model.Rd0000644000176200001440000000161313332671430015774 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/predict_model.R \name{predict_model} \alias{predict_model} \title{predict_model} \usage{ predict_model(new, calibration_model, min, max, mean, inputtype) } \arguments{ \item{new}{vector of uncalibrated predictions} \item{calibration_model}{calibration model to be used for the calibration. Can be the output of \code{\link{build_BBQ}},\code{\link{build_hist_binning}} or \code{\link{build_GUESS}}.} \item{min}{minimum value of the original data set} \item{max}{maximum value of the original data set} \item{mean}{mean value of the original data set} \item{inputtype}{specify if the model was build on original (=0), scaled(=1) or transformed (=2) data} } \value{ vector of calibrated predictions } \description{ calibrates the uncalibrated predictions \code{new} using \code{calibration_model}. } CalibratR/man/visualize_calibrated_test_set.Rd0000644000176200001440000000206713332620176021266 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/visualize_calibrated_test_set.R \name{visualize_calibrated_test_set} \alias{visualize_calibrated_test_set} \title{visualize_calibrated_test_set} \usage{ visualize_calibrated_test_set(actual, predicted_list, cutoffs) } \arguments{ \item{actual}{vector of observed class labels (0/1)} \item{predicted_list}{predict_calibratR$predictions object (list of calibrated predictions from calibration models)} \item{cutoffs}{vector of optimal cut-off thresholds for each calibration model} } \value{ ggplot2 element for visual comparison of the evaluated calibration models } \description{ plots a panel for all calibrated predictions from the respective calibration model. Allows visual comparison of the models output and their optimal cut off } \seealso{ \code{\link[ggplot2]{ggplot}},\code{\link[ggplot2]{geom_point}},\code{\link[ggplot2]{scale_colour_manual}},\code{\link[ggplot2]{xlab}},\code{\link[ggplot2]{ylab}},\code{\link[ggplot2]{geom_hline}},\code{\link[ggplot2]{ylim}} } CalibratR/man/build_BBQ.Rd0000644000176200001440000000143613332620423014744 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/build_BBQ.R \name{build_BBQ} \alias{build_BBQ} \title{build_BBQ} \usage{ build_BBQ(actual, predicted) } \arguments{ \item{actual}{vector of observed class labels (0/1)} \item{predicted}{vector of uncalibrated predictions} } \value{ returns the BBQ model which includes models for all evaluated binning schemes; the prunedmodel contains only a selection of BBQ models with the best Bayesian score } \description{ This method builds a BBQ calibration model using the trainings set provided. } \details{ Based on the paper (and matlab code) : "Obtaining Well Calibrated Probabilities Using Bayesian Binning" by Naeini, Cooper and Hauskrecht: ; https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4410090/ } CalibratR/man/get_ECE_equal_width.Rd0000644000176200001440000000130313332131040016763 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/getECE.R \name{get_ECE_equal_width} \alias{get_ECE_equal_width} \title{get_ECE_equal_width} \usage{ get_ECE_equal_width(actual, predicted, bins = 10) } \arguments{ \item{actual}{vector of observed class labels (0/1)} \item{predicted}{vector of uncalibrated predictions} \item{bins}{number of bins for the equal-width binning model} } \value{ equal-width ECE value } \description{ Expected Calibration Error (ECE); the model is divided into 10 equal-width bins (default) and the mean of the observed (0/1) vs. mean of predicted is calculated per bin, weighted by emperical frequency of elements in bin i } CalibratR/man/plot_model.Rd0000644000176200001440000000174513332575506015335 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/plot_model.R \name{plot_model} \alias{plot_model} \title{plot_model} \usage{ plot_model(calibration_model, seq = NULL) } \arguments{ \item{calibration_model}{output from the \code{\link{calibrate}} method.} \item{seq}{sequence of ML scores over which the mapping function should be evaluated, Default: 100 scores from the minimum to the maximum of the original ML scores} } \value{ ggplot object } \description{ this methods visualizes all implemented calibration models as a mapping function between original ML scores (x-axis) and calibrated predictions (y-axis) } \seealso{ \code{\link[reshape2]{melt}} \code{\link[ggplot2]{ggplot}},\code{\link[ggplot2]{geom_line}},\code{\link[ggplot2]{aes}},\code{\link[ggplot2]{ylim}},\code{\link[ggplot2]{scale_colour_manual}},\code{\link[ggplot2]{theme}},\code{\link[ggplot2]{labs}},\code{\link[ggplot2]{geom_text}},\code{\link[ggplot2]{geom_vline}} } CalibratR/man/visualize_distribution.Rd0000644000176200001440000000163413332610110017763 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/visualize_distribution.R \name{visualize_distribution} \alias{visualize_distribution} \title{visualize_distribution} \usage{ visualize_distribution(actual, predicted) } \arguments{ \item{actual}{vector of observed class labels (0/1)} \item{predicted}{vector of uncalibrated predictions} } \value{ list object containing the following components: \item{plot_distribution}{ggplot histogram that visualizes the observed class distributions} \item{parameter}{list object that summarizes all relevant parameters (mean, sd, number) of the observed class distributions} } \description{ FUNCTION_DESCRIPTION } \seealso{ \code{\link[ggplot2]{ggplot}},\code{\link[ggplot2]{geom_histogram}},\code{\link[ggplot2]{aes}},\code{\link[ggplot2]{scale_colour_manual}},\code{\link[ggplot2]{scale_fill_manual}},\code{\link[ggplot2]{labs}} } CalibratR/man/build_hist_binning.Rd0000644000176200001440000000175713332620176017026 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/build_hist_binning.R \name{build_hist_binning} \alias{build_hist_binning} \title{build_hist_binning} \usage{ build_hist_binning(actual, predicted, bins = NULL) } \arguments{ \item{actual}{vector of observed class labels (0/1)} \item{predicted}{vector of uncalibrated predictions} \item{bins}{number of bins that should be used to build the binning model, Default: decide_on_break estimates optimal number of bins} } \value{ returns the trained histogram model that can be used to calibrate a test set using the \code{\link{predict_hist_binning}} method } \description{ calculate estimated probability per bin, input predicted and real score as numeric vector; builds a histogram binning model which can be used to calibrate uncalibrated predictions using the predict_histogramm_binning method } \details{ if trainings set is smaller then threshold (15 bins*5 elements=75), number of bins is decreased } CalibratR/man/getRMSE.Rd0000644000176200001440000000066713332131040014424 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/getRMSE.R \name{getRMSE} \alias{getRMSE} \title{getRMSE} \usage{ getRMSE(actual, predicted) } \arguments{ \item{actual}{vector of observed class labels (0/1)} \item{predicted}{vector of uncalibrated predictions} } \value{ RMSE value } \description{ calculates the root of mean square error (RMSE) in the test set of calibrated predictions } CalibratR/man/example.Rd0000644000176200001440000000117313330165721014615 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/example.R \docType{data} \name{example} \alias{example} \title{example} \format{\code{predicted}=vector of 200 simulated classifier values; \code{actual}=their respective true class labels (0/1)} \usage{ data(example) } \description{ list object containing 1) the simulated classifiers for two classes. Distributions are simulated from Gaussian distributions with Normal(mean=1.5, sd=0) for class 1 and Normal(mean=0, sd=0) for class 0 instances. Each class consists of 100 instances. and 2) A test set of 100 instances } \keyword{datasets} CalibratR/man/compare_models_visual.Rd0000644000176200001440000000136113330610756017540 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/compare_models_visual.R \name{compare_models_visual} \alias{compare_models_visual} \title{compare_models_visual} \usage{ compare_models_visual(models, seq = NULL) } \arguments{ \item{models}{PARAM_DESCRIPTION} \item{seq}{sequence for which the calibrated predictions should be plotted, Default: NULL} } \value{ OUTPUT_DESCRIPTION } \description{ FUNCTION_DESCRIPTION } \details{ DETAILS } \seealso{ \code{\link[ggplot2]{ggplot}},\code{\link[ggplot2]{geom_line}},\code{\link[ggplot2]{aes}},\code{\link[ggplot2]{ylim}},\code{\link[ggplot2]{theme}},\code{\link[ggplot2]{labs}},\code{\link[ggplot2]{scale_color_brewer}} \code{\link[reshape2]{melt}} } CalibratR/man/BBQ_CV.Rd0000644000176200001440000000322113332607510014151 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/BBQ_CV.R \name{BBQ_CV} \alias{BBQ_CV} \title{BBQ_CV} \usage{ BBQ_CV(actual, predicted, method_for_prediction = 0, n_folds = 10, seed, input) } \arguments{ \item{actual}{vector of observed class labels (0/1)} \item{predicted}{vector of uncalibrated predictions} \item{method_for_prediction}{0=selection, 1=averaging, Default: 0} \item{n_folds}{number of folds in the cross-validation, Default: 10} \item{seed}{random seed to alternate the split of data set partitions} \item{input}{specify if the input was scaled or transformed, scaled=1, transformed=2} } \value{ list object containing the following components: \item{error}{list object that summarizes discrimination and calibration errors obtained during the CV} \item{pred_idx}{which BBQ prediction method was used during CV, 0=selection, 1=averaging} \item{type}{"BBQ"} \item{probs_CV}{vector of calibrated predictions that was used during the CV} \item{actual_CV}{respective vector of true values (0 or 1) that was used during the CV} } \description{ trains and evaluates the BBQ calibration model using \code{folds}-Cross-Validation (CV). The \code{predicted} values are partitioned into n subsets. A BBQ model is constructed on (n-1) subsets; the remaining set is used for testing the model. All test set predictions are merged and used to compute error metrics for the model. } \examples{ ## Loading dataset in environment data(example) actual <- example$actual predicted <- example$predicted BBQ_model <- CalibratR:::BBQ_CV(actual, predicted, method_for_prediction=0, n_folds=4, 123, 1) } CalibratR/man/hist_binning_CV.Rd0000644000176200001440000000264513332620423016227 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/build_hist_binning_CV.R \name{hist_binning_CV} \alias{hist_binning_CV} \title{hist_binning_CV} \usage{ hist_binning_CV(actual, predicted, n_bins = 15, n_folds = 10, seed, input) } \arguments{ \item{actual}{vector of observed class labels (0/1)} \item{predicted}{vector of uncalibrated predictions} \item{n_bins}{number of bins used in the histogram binning scheme, Default: 15} \item{n_folds}{number of folds in the cross-validation, Default: 10} \item{seed}{random seed to alternate the split of data set partitions} \item{input}{specify if the input was scaled or transformed, scaled=1, transformed=2} } \value{ list object containing the following components: \item{error}{list object that summarizes discrimination and calibration errors obtained during the CV} \item{type}{"hist"} \item{probs_CV}{vector of calibrated predictions that was used during the CV} \item{actual_CV}{respective vector of true values (0 or 1) that was used during the CV} } \description{ trains and evaluates the histogram binning calibration model repeated \code{folds}-Cross-Validation (CV). The \code{predicted} values are partitioned into n subsets. A histogram binning model is constructed on (n-1) subsets; the remaining set is used for testing the model. All test set predictions are merged and used to compute error metrics for the model. } CalibratR/man/build_GUESS.Rd0000644000176200001440000000113713332620423015224 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/build_GUESS.R \name{build_GUESS} \alias{build_GUESS} \title{build_GUESS} \usage{ build_GUESS(actual, predicted) } \arguments{ \item{actual}{vector of observed class labels (0/1)} \item{predicted}{vector of uncalibrated predictions} } \value{ returns the trained GUESS model that can be used to calibrate a test set using the \code{\link{predict_GUESS}} method } \description{ This method builds a GUESS calibration model using the trainings set provided. } \seealso{ \code{\link[fitdistrplus]{denscomp}} } CalibratR/man/get_Brier_score.Rd0000644000176200001440000000067113330610756016264 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/get_Brier_score.R \name{get_Brier_score} \alias{get_Brier_score} \title{get_Brier_score} \usage{ get_Brier_score(actual, predicted) } \arguments{ \item{actual}{vector of observed class labels (0/1)} \item{predicted}{vector of uncalibrated predictions} } \value{ OUTPUT_DESCRIPTION } \description{ FUNCTION_DESCRIPTION } \details{ DETAILS } CalibratR/man/calibrate_me_CV_errors.Rd0000644000176200001440000000263513332336753017570 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/calibrate_me_CV_errors_parallel.R \name{calibrate_me_CV_errors} \alias{calibrate_me_CV_errors} \title{calibrate_me_CV_errors} \usage{ calibrate_me_CV_errors(actual, predicted, model_idx, folds = 10, n_seeds, nCores) } \arguments{ \item{actual}{vector of observed class labels (0/1)} \item{predicted}{vector of uncalibrated predictions} \item{model_idx}{which calibration models should be implemented, 1=hist_scaled, 2=hist_transformed, 3=BBQ_scaled, 4=BBQ_transformed, 5=GUESS} \item{folds}{number of folds in the cross-validation, Default: 10} \item{n_seeds}{\code{n_seeds} determines how often random data set partition is repeated with varying seed} \item{nCores}{\code{nCores} how many cores should be used during parallelisation. Default: 4} } \value{ returns all trained calibration models that were built during the \code{n_seeds}-times repeated \code{folds}-CV. \cr Error values for each of the \code{n_seeds} CV runs are given. } \description{ trains and evaluates calibration models using \code{n_seeds}-times repeated \code{folds}-Cross-Validation (CV).\code{model_idx} specifies which models should be trained. \cr Model training and evaluation is repeated \code{n_seeds}-times with a different training/test set partition scheme for the CV each time. } \details{ parallised execution over \code{n_seeds} } CalibratR/man/format_values.Rd0000644000176200001440000000245113332610110016016 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/format_values.R \name{format_values} \alias{format_values} \title{format_values} \usage{ format_values(cases, control, input, min = NULL, max = NULL, mean = NULL) } \arguments{ \item{cases}{instances from class 1} \item{control}{instances from class 0} \item{input}{single integer (0, 1 or 2). specify if the input should be formatted (=0), formatted and scaled (=1) or formatted and transformed (=2)} \item{min}{min value of the original data set, default=calculated on input} \item{max}{max value of the original data set, default=calculated on input} \item{mean}{mean value of the original data set, default=calculated on input} } \value{ list object with the following components: \item{formated_values}{formatted input. If \code{input} is set to 1 (2), the input is additionally scaled (transformed) using the method \code{\link{scale_me}} (\code{\link{transform_me}})} \item{min}{minimum value among all instances} \item{max}{maximum value among all instances} \item{mean}{mean value among all instances} } \description{ returns formatted input. If specified, the uncalibrated input is mapped to the [0;1] range using scaling (\code{\link{scale_me}}) or transforming (\code{\link{transform_me}}) } CalibratR/man/predict_hist_binning.Rd0000644000176200001440000000151313332607510017344 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/predict_hist_binning.R \name{predict_hist_binning} \alias{predict_hist_binning} \title{predict_hist_binning} \usage{ predict_hist_binning(histogram, new) } \arguments{ \item{histogram}{the output of \code{\link{build_hist_binning}}} \item{new}{vector of uncalibrated probabilities} } \value{ a list object containing the following components \item{predictions}{contains a vector of calibrated predictions} \item{significance_test_set}{the percentage of \code{new} instances that was evaluated using significant prediction estimates} \item{pred_per_bin}{a table containing the number of instances from \code{new} for each bin of the final binning scheme of \code{histogram}} } \description{ predict for a new element using histogram binning } CalibratR/man/reliability_diagramm.Rd0000644000176200001440000000254013332503050017324 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/reliability_diagram.R \name{reliability_diagramm} \alias{reliability_diagramm} \title{reliability_diagramm} \usage{ reliability_diagramm(actual, predicted, bins = 10, plot_rd = TRUE) } \arguments{ \item{actual}{vector of observed class labels (0/1)} \item{predicted}{vector of uncalibrated predictions} \item{bins}{number of bins in the reliability diagram, Default: 10} \item{plot_rd}{should the reliability diagram be plotted, Default: TRUE} } \value{ a list object containing the following elements \item{calibration_error}{} \item{discrimination_error}{} \item{rd_breaks}{} \item{histogram_plot}{} \item{diagram_plot}{} \item{mean_pred_per_bin}{} \item{accuracy_per_bin}{} \item{freq_per_bin}{} \item{sign}{} } \description{ Reliability curves allow checking if the predicted probabilities of a } \seealso{ \code{\link[ggplot2]{ggplot}},\code{\link[ggplot2]{stat_bin}},\code{\link[ggplot2]{aes}},\code{\link[ggplot2]{scale_fill_manual}},\code{\link[ggplot2]{theme}},\code{\link[ggplot2]{labs}},\code{\link[ggplot2]{geom_point}},\code{\link[ggplot2]{xlim}},\code{\link[ggplot2]{ylim}},\code{\link[ggplot2]{geom_abline}},\code{\link[ggplot2]{geom_line}},\code{\link[ggplot2]{geom_text}},\code{\link[ggplot2]{geom_label}},\code{\link[ggplot2]{coord_fixed}} } CalibratR/man/transform_me.Rd0000644000176200001440000000120213332332006015641 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/format_values.R \name{transform_me} \alias{transform_me} \title{transform_me} \usage{ transform_me(x_unscaled, mean) } \arguments{ \item{x_unscaled}{vector of predictions} \item{mean}{mean of \code{x}} } \value{ transformed values of \code{x_unscaled} } \description{ maps all instances in \code{x_unscaled} to the [0;1] range using the equation: \cr y=exp(x)/(1+exp(x)) } \details{ values greater then exp(700)/ or smaller then exp(-700) are returned as "Inf". To avoid NaN values, these "Inf." values are turned into min(y) or max(y). } CalibratR/man/calibrate.Rd0000644000176200001440000000624313332610110015100 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/calibratR.R \name{calibrate} \alias{calibrate} \title{calibrate} \usage{ calibrate(actual, predicted, model_idx = c(1, 2, 3, 4, 5), evaluate_no_CV_error = TRUE, evaluate_CV_error = TRUE, folds = 10, n_seeds = 30, nCores = 4) } \arguments{ \item{actual}{vector of observed class labels (0/1)} \item{predicted}{vector of uncalibrated predictions} \item{model_idx}{which calibration models should be implemented, 1=hist_scaled, 2=hist_transformed, 3=BBQ_scaled, 4=BBQ_transformed, 5=GUESS, Default: c(1, 2, 3, 4, 5)} \item{evaluate_no_CV_error}{computes internal errors for calibration models that were trained on all available \code{actual}/\code{predicted} tuples. Testing is performed with the same set. Be careful to interpret those error values, as they are not cross-validated. Default: TRUE} \item{evaluate_CV_error}{computes cross-validation error. \code{folds} times cross validation is repeated \code{n_seeds} times with changing seeds. The trained models and the their calibration and discrimination errors are returned. Evaluation of CV errors can take some time to compute, depending on the number of repetitions specified in \code{n_seeds}, Default: TRUE} \item{folds}{number of folds in the cross-validation of the calibration model. If \code{folds} is set to 1, no CV is performed and \code{summary_CV} can be calculated. Default: 10} \item{n_seeds}{\code{n_seeds} determines how often random data set partition is repeated with varying seed. If \code{folds} is 1, \code{n_seeds} should be set to 1, too. Default: 30} \item{nCores}{\code{nCores} how many cores should be used during parallelisation. Default: 4} } \value{ A list object with the following components: \item{calibration_models}{a list of all trained calibration models, which can be used in the \code{\link{predict_calibratR}} method.} \item{summary_CV}{a list containing information on the CV errors of the implemented models} \item{summary_no_CV}{a list containing information on the internal errors of the implemented models} \item{predictions}{calibrated predictions for the original \code{predicted} values} \item{n_seeds}{number of random data set partitions into training and test set for \code{folds}-times CV} } \description{ Builds selected calibration models on the supplied trainings values \code{actual} and \code{predicted} and returns them to the user. New test instances can be calibrated using the \code{\link{predict_calibratR}} function. Returns cross-validated calibration and discrimination error values for the models if \code{evaluate_CV_error} is set to TRUE. Repeated cross-Validation can be time-consuming. } \details{ parallised execution of random data set splits for the Cross-Validation procedure over \code{n_seeds} } \examples{ ## Loading dataset in environment data(example) actual <- example$actual predicted <- example$predicted ## Create calibration models calibration_model <- calibrate(actual, predicted, model_idx = c(1,2), FALSE, FALSE, folds = 10, n_seeds = 1, nCores = 2) } \author{ Johanna Schwarz } CalibratR/DESCRIPTION0000644000176200001440000000151113526516525013633 0ustar liggesusersPackage: CalibratR Type: Package Title: Mapping ML Scores to Calibrated Predictions Version: 0.1.2 Author: Johanna Schwarz, Dominik Heider Maintainer: Dominik Heider Description: Transforms your uncalibrated Machine Learning scores to well-calibrated prediction estimates that can be interpreted as probability estimates. The implemented BBQ (Bayes Binning in Quantiles) model is taken from Naeini (2015, ISBN:0-262-51129-0). Please cite this paper: Schwarz J and Heider D, Bioinformatics 2019, 35(14):2458-2465. License: LGPL-3 Encoding: UTF-8 LazyData: true Depends: R (>= 2.10.0) Imports: ggplot2, pROC, reshape2, parallel, foreach, stats, fitdistrplus, doParallel NeedsCompilation: no Packaged: 2019-08-19 12:29:09 UTC; dominikheider Repository: CRAN Date/Publication: 2019-08-19 13:00:05 UTC CalibratR/R/0000755000176200001440000000000013526505445012327 5ustar liggesusersCalibratR/R/visualize_calibratR.R0000644000176200001440000007026613332575374016466 0ustar liggesusers#visualize #' @title visualize_calibratR #' @description this method offers a variety of visualisations to compare implemented calibration models #' @author Johanna Schwarz #' @param calibrate_object the list component \code{calibration_models} from the \code{\link{calibrate}} method #' @param plot_distributions returns a density distribution plot of the calibrated predictions after CV (External) or without CV (internal) #' @param rd_partitions returns a reliability diagram for each model #' @param training_set_calibrated returns a list of ggplots. Each plot represents the calibrated predictions by the respective calibration model of the training set. #' If the list object \code{predictions} in the \code{calibrate_object} is empty, \code{training_set_calibrated} is returned as NULL. #' @param visualize_models returns the list components \code{plot_calibration_models} and \code{plot_single_models} #' @return An object of class list, with the following components: #' \item{histogram_distribution}{returns a histogram of the original ML score distribution} #' \item{density_calibration_internal}{returns a list of density distribution plots for each calibration method, the original #' and the two input-preprocessing methods scaling and transforming. The plot visualises the density distribution of the calibrated predictions of the training set. In this case, training and test set values are identical, so be careful to evaluate the plots.} #' \item{density_calibration_external}{returns a list of density distribution plots for each calibration method, the original #' and the two input-preprocessing methods scaling and transforming. The plot visualises the density distribution of the calibrated predictions, that were returned during Cross Validation. If more than one repetition of CV was performed, #' run number 1 is evaluated} #' \item{plot_calibration_models}{ maps the original ML scores to their calibrated prediction estimates for each model. #' This enables easy model comparison over the range of ML scores See also \code{\link{compare_models_visual}}. } #' \item{plot_single_models}{returns a list of ggplots for each calibration model, also mapping the original ML scores to their calibrated prediction. Significance values are indicated. #' See also \code{\link{plot_model}}} #' \item{rd_plot}{returns a list of reliability diagrams for each of the implemented calibration models and the two input-preprocessing methods "scaled" and "transformed". The returned plot visualises the calibrated predictions that #' were returned for the test set during each of the n run of the n-times repeated CV. Each grey line represents one of the n runs. The blue line represents the median of all calibrated bin predictions. #' Insignificant bin estimates are indicated with "ns". If no CV was performed during calibration model building using the \code{\link{calibrate}} method, \code{rd_plot} is returned as NULL} #' \item{calibration_error}{returns a list of boxplots for the calibration error metrics ECE, MCE, CLE and RMSE. The n values for each model represent the obtained error values during the #' n times repeated CV. If no CV was performed during calibration model building using the \code{\link{calibrate}} method, \code{calibration_error} is returned as NULL} #' \item{discrimination_error}{returns a list of boxplots for the discrimination error AUC, sensitivity and specificity. The n values for each model represent the obtained error values during the #' n times repeated CV. If no CV was performed during calibration model building using the \code{\link{calibrate}} method, \code{discrimination_error} is returned as NULL} #' \item{cle_class_specific_error}{If no CV was performed during calibration model building using the \code{\link{calibrate}} method, \code{cle_class_specific_error} is returned as NULL} #' \item{training_set_calibrated}{returns a list of ggplots. Each plot represents the calibrated predictions by the respective calibration model of the training set. #' If the list object \code{predictions} in the \code{calibrate_object} is empty, \code{training_set_calibrated} is returned as NULL.} #' \item{GUESS_1_final_model}{plots the the returned conditional probability p(x|Class) values of the GUESS_1 model} #' \item{GUESS_2_final_model}{plots the the returned conditional probability p(x|Class) values of the GUESS_2 model} #' @examples #' ## Loading dataset in environment #' data(example) #' calibration_model <- example$calibration_model #' #' visualisation <- visualize_calibratR(calibration_model, plot_distributions=FALSE, #' rd_partitions=FALSE, training_set_calibrated=FALSE) #' @seealso #' \code{\link[ggplot2]{ggplot}},\code{\link[ggplot2]{geom_density}},\code{\link[ggplot2]{aes}},\code{\link[ggplot2]{scale_colour_manual}},\code{\link[ggplot2]{scale_fill_manual}},\code{\link[ggplot2]{labs}},\code{\link[ggplot2]{geom_point}},\code{\link[ggplot2]{geom_hline}},\code{\link[ggplot2]{theme}},\code{\link[ggplot2]{element_text}} #' \code{\link[reshape2]{melt}} #' @rdname visualize_calibratR #' @export #' @importFrom ggplot2 ggplot geom_density aes scale_colour_manual scale_fill_manual labs geom_point geom_hline theme element_text #' @importFrom reshape2 melt visualize_calibratR <- function(calibrate_object, visualize_models=FALSE, plot_distributions=FALSE, rd_partitions=FALSE, training_set_calibrated=FALSE){ visualize_distributions <- TRUE visualize_errors_CV <- TRUE visualize_cle_class_error <- TRUE if(is.null(calibrate_object$calibration_models$models_final$GUESS)){ visualize_guess <- FALSE } else{ visualize_guess <- TRUE } training_set_calibrated <- TRUE if(length(calibrate_object$calibration_models$models_final)!=5){ warning("Not all calibration models were trained. Certain visualisations may not be available. ") } if(is.null(calibrate_object$summary_CV$models$calibrated)){ visualize_errors_CV <- FALSE plot_distributions <- FALSE rd_partitions <- FALSE visualize_cle_class_error <- FALSE warning("The list object summary_CV of the calibrate_object is empty. Certain visualisations may not be available. ") } if(is.null(calibrate_object$predictions)){ plot_distributions <- FALSE training_set_calibrated <- FALSE warning("The list object predictions of the calibrate_object is empty. Certain visualisations may not be available.") } if(is.null(calibrate_object$summary_no_CV$discrimination_error)){ training_set_calibrated <- FALSE warning("The list object summary_no_CV of the calibrate_object is empty. Certain visualisations may not be available.") } if(visualize_models){ plot_models <- compare_models_visual(calibrate_object$calibration_models) plot_single_models <- plot_model(calibrate_object$calibration_models) } else{ plot_models <- NULL plot_single_models <- NULL } if(plot_distributions){ p0 <- ggplot2::ggplot()+ ggplot2::geom_density(ggplot2::aes(x=calibrate_object$predictions$original[calibrate_object$calibration_models$original_values$actual==0], colour="darkolivegreen4", fill="darkolivegreen4"),alpha=0.2)+ ggplot2::geom_density(ggplot2::aes(x=calibrate_object$predictions$original[calibrate_object$calibration_models$original_values$actual==1], colour="firebrick3", fill="firebrick3"),alpha=0.2)+ ggplot2::scale_colour_manual(values=c("darkolivegreen4", "firebrick3"),labels=c("control", "case"), name="Group")+ ggplot2::scale_fill_manual(values=c("darkolivegreen4", "firebrick3"),labels=c("control", "case"), name="Group")+ ggplot2::labs(title="Uncalibrated", x = "ML score") p1 <- ggplot2::ggplot()+ ggplot2::geom_density(ggplot2::aes(x=calibrate_object$predictions$scaled[calibrate_object$calibration_models$original_values$actual==0], colour="darkolivegreen4", fill="darkolivegreen4"),alpha=0.2)+ ggplot2::geom_density(ggplot2::aes(x=calibrate_object$predictions$scaled[calibrate_object$calibration_models$original_values$actual==1], colour="firebrick3", fill="firebrick3"),alpha=0.2)+ ggplot2::scale_colour_manual(values=c("darkolivegreen4", "firebrick3"),labels=c("control", "case"), name="Group")+ ggplot2::scale_fill_manual(values=c("darkolivegreen4", "firebrick3"),labels=c("control", "case"), name="Group")+ ggplot2::labs(title="calibrated, internal, scaled", x = "uncalibrated prediction") p2 <- ggplot2::ggplot()+ ggplot2::geom_density(ggplot2::aes(x=calibrate_object$predictions$transformed[calibrate_object$calibration_models$original_values$actual==0], colour="darkolivegreen4", fill="darkolivegreen4"),alpha=0.2)+ ggplot2::geom_density(ggplot2::aes(x=calibrate_object$predictions$transformed[calibrate_object$calibration_models$original_values$actual==1], colour="firebrick3", fill="firebrick3"),alpha=0.2)+ ggplot2::scale_colour_manual(values=c("darkolivegreen4", "firebrick3"),labels=c("control", "case"), name="Group")+ ggplot2::scale_fill_manual(values=c("darkolivegreen4", "firebrick3"),labels=c("control", "case"), name="Group")+ ggplot2::labs(title="calibrated, internal, transformed", x = "uncalibrated prediction") p3 <- ggplot2::ggplot()+ ggplot2::geom_density(ggplot2::aes(x=calibrate_object$predictions$hist_scal[calibrate_object$calibration_models$original_values$actual==0], colour="darkolivegreen4", fill="darkolivegreen4"),alpha=0.2)+ ggplot2::geom_density(ggplot2::aes(x=calibrate_object$predictions$hist_scal[calibrate_object$calibration_models$original_values$actual==1], colour="firebrick3", fill="firebrick3"),alpha=0.2)+ ggplot2::scale_colour_manual(values=c("darkolivegreen4", "firebrick3"),labels=c("control", "case"), name="Group")+ ggplot2::scale_fill_manual(values=c("darkolivegreen4", "firebrick3"),labels=c("control", "case"), name="Group")+ ggplot2::labs(title="calibrated, internal, hist_scal", x = "calibrated prediction") p4 <- ggplot2::ggplot()+ ggplot2::geom_density(ggplot2::aes(x=calibrate_object$predictions$hist_trans[calibrate_object$calibration_models$original_values$actual==0], colour="darkolivegreen4", fill="darkolivegreen4"),alpha=0.2)+ ggplot2::geom_density(ggplot2::aes(x=calibrate_object$predictions$hist_trans[calibrate_object$calibration_models$original_values$actual==1], colour="firebrick3", fill="firebrick3"),alpha=0.2)+ ggplot2::scale_colour_manual(values=c("darkolivegreen4", "firebrick3"),labels=c("control", "case"), name="Group")+ ggplot2::scale_fill_manual(values=c("darkolivegreen4", "firebrick3"),labels=c("control", "case"), name="Group")+ ggplot2::labs(title="calibrated, internal, hist_trans", x = "calibrated prediction") p5 <- ggplot2::ggplot()+ ggplot2::geom_density(ggplot2::aes(x=calibrate_object$predictions$BBQ_scaled_sel[calibrate_object$calibration_models$original_values$actual==0], colour="darkolivegreen4", fill="darkolivegreen4"),alpha=0.2)+ ggplot2::geom_density(ggplot2::aes(x=calibrate_object$predictions$BBQ_scaled_sel[calibrate_object$calibration_models$original_values$actual==1], colour="firebrick3", fill="firebrick3"),alpha=0.2)+ ggplot2::scale_colour_manual(values=c("darkolivegreen4", "firebrick3"),labels=c("control", "case"), name="Group")+ ggplot2::scale_fill_manual(values=c("darkolivegreen4", "firebrick3"),labels=c("control", "case"), name="Group")+ ggplot2::labs(title="calibrated, internal, BBQ_scaled_sel", x = "calibrated prediction") p6 <- ggplot2::ggplot()+ ggplot2::geom_density(ggplot2::aes(x=calibrate_object$predictions$BBQ_scaled_avg[calibrate_object$calibration_models$original_values$actual==0], colour="darkolivegreen4", fill="darkolivegreen4"),alpha=0.2)+ ggplot2::geom_density(ggplot2::aes(x=calibrate_object$predictions$BBQ_scaled_avg[calibrate_object$calibration_models$original_values$actual==1], colour="firebrick3", fill="firebrick3"),alpha=0.2)+ ggplot2::scale_colour_manual(values=c("darkolivegreen4", "firebrick3"),labels=c("control", "case"), name="Group")+ ggplot2::scale_fill_manual(values=c("darkolivegreen4", "firebrick3"),labels=c("control", "case"), name="Group")+ ggplot2::labs(title="calibrated, internal, BBQ_scaled_avg", x = "calibrated prediction") p7 <- ggplot2::ggplot()+ ggplot2::geom_density(ggplot2::aes(x=calibrate_object$predictions$BBQ_transformed_sel[calibrate_object$calibration_models$original_values$actual==0], colour="darkolivegreen4", fill="darkolivegreen4"),alpha=0.2)+ ggplot2::geom_density(ggplot2::aes(x=calibrate_object$predictions$BBQ_transformed_sel[calibrate_object$calibration_models$original_values$actual==1], colour="firebrick3", fill="firebrick3"),alpha=0.2)+ ggplot2::scale_colour_manual(values=c("darkolivegreen4", "firebrick3"),labels=c("control", "case"), name="Group")+ ggplot2::scale_fill_manual(values=c("darkolivegreen4", "firebrick3"),labels=c("control", "case"), name="Group")+ ggplot2::labs(title="calibrated, internal, BBQ_transformed_sel", x = "calibrated prediction") p8 <- ggplot2::ggplot()+ ggplot2::geom_density(ggplot2::aes(x=calibrate_object$predictions$BBQ_transformed_avg[calibrate_object$calibration_models$original_values$actual==0], colour="darkolivegreen4", fill="darkolivegreen4"),alpha=0.2)+ ggplot2::geom_density(ggplot2::aes(x=calibrate_object$predictions$BBQ_transformed_avg[calibrate_object$calibration_models$original_values$actual==1], colour="firebrick3", fill="firebrick3"),alpha=0.2)+ ggplot2::scale_colour_manual(values=c("darkolivegreen4", "firebrick3"),labels=c("control", "case"), name="Group")+ ggplot2::scale_fill_manual(values=c("darkolivegreen4", "firebrick3"),labels=c("control", "case"), name="Group")+ ggplot2::labs(title="calibrated, internal, BBQ_transformed_avg", x = "calibrated prediction") p9 <- ggplot2::ggplot()+ ggplot2::geom_density(ggplot2::aes(x=calibrate_object$predictions$GUESS_1[calibrate_object$calibration_models$original_values$actual==0], colour="darkolivegreen4", fill="darkolivegreen4"),alpha=0.2)+ ggplot2::geom_density(ggplot2::aes(x=calibrate_object$predictions$GUESS_1[calibrate_object$calibration_models$original_values$actual==1], colour="firebrick3", fill="firebrick3"),alpha=0.2)+ ggplot2::scale_colour_manual(values=c("darkolivegreen4", "firebrick3"),labels=c("control", "case"), name="Group")+ ggplot2::scale_fill_manual(values=c("darkolivegreen4", "firebrick3"),labels=c("control", "case"), name="Group")+ ggplot2::labs(title="calibrated, internal, GUESS_1", x = "calibrated prediction") p10 <- ggplot2::ggplot()+ ggplot2::geom_density(ggplot2::aes(x=calibrate_object$predictions$GUESS_2[calibrate_object$calibration_models$original_values$actual==0], colour="darkolivegreen4", fill="darkolivegreen4"),alpha=0.2)+ ggplot2::geom_density(ggplot2::aes(x=calibrate_object$predictions$GUESS_2[calibrate_object$calibration_models$original_values$actual==1], colour="firebrick3", fill="firebrick3"),alpha=0.2)+ ggplot2::scale_colour_manual(values=c("darkolivegreen4", "firebrick3"),labels=c("control", "case"), name="Group")+ ggplot2::scale_fill_manual(values=c("darkolivegreen4", "firebrick3"),labels=c("control", "case"), name="Group")+ ggplot2::labs(title="calibrated, internal, GUESS_2", x = "calibrated prediction") distribution_list <- list(original=p0,scaled=p1,transformed=p2,hist_scaled=p3,hist_transformed=p4,BBQ_scaled_sel=p5, BBQ_scaled_avg=p6,BBQ_transformed_sel=p7,BBQ_transformed_avg=p8,GUESS_1=p9,GUESS_2=p10) p0 <- ggplot2::ggplot()+ ggplot2::geom_density(ggplot2::aes(x=calibrate_object$calibration_models$original_values$predicted[calibrate_object$calibration_models$original_values$actual==0], colour="darkolivegreen4", fill="darkolivegreen4"),alpha=0.2)+ ggplot2::geom_density(ggplot2::aes(x=calibrate_object$calibration_models$original_values$predicted[calibrate_object$calibration_models$original_values$actual==1], colour="firebrick3", fill="firebrick3"),alpha=0.2)+ ggplot2::scale_colour_manual(values=c("darkolivegreen4", "firebrick3"),labels=c("control", "case"), name="Group")+ ggplot2::scale_fill_manual(values=c("darkolivegreen4", "firebrick3"),labels=c("control", "case"), name="Group")+ ggplot2::labs(title="Uncalibrated", x = "ML score") p1 <- ggplot2::ggplot()+ ggplot2::geom_density(ggplot2::aes(x=calibrate_object$summary_CV$models$uncalibrated$scaled[[1]]$probs_CV[calibrate_object$summary_CV$models$uncalibrated$scaled[[1]]$actual_CV==0], colour="darkolivegreen4", fill="darkolivegreen4"),alpha=0.2)+ ggplot2::geom_density(ggplot2::aes(x=calibrate_object$summary_CV$models$uncalibrated$scaled[[1]]$probs_CV[calibrate_object$summary_CV$models$uncalibrated$scaled[[1]]$actual_CV==1], colour="firebrick3", fill="firebrick3"),alpha=0.2)+ ggplot2::scale_colour_manual(values=c("darkolivegreen4", "firebrick3"),labels=c("control", "case"), name="Group")+ ggplot2::scale_fill_manual(values=c("darkolivegreen4", "firebrick3"),labels=c("control", "case"), name="Group")+ ggplot2::labs(title="calibrated, external, scaled", x = "uncalibrated prediction") p2 <- ggplot2::ggplot()+ ggplot2::geom_density(ggplot2::aes(x=calibrate_object$summary_CV$models$uncalibrated$transformed[[1]]$probs_CV[calibrate_object$summary_CV$models$uncalibrated$transformed[[1]]$actual_CV==0], colour="darkolivegreen4", fill="darkolivegreen4"),alpha=0.2)+ ggplot2::geom_density(ggplot2::aes(x=calibrate_object$summary_CV$models$uncalibrated$transformed[[1]]$probs_CV[calibrate_object$summary_CV$models$uncalibrated$transformed[[1]]$actual_CV==1], colour="firebrick3", fill="firebrick3"),alpha=0.2)+ ggplot2::scale_colour_manual(values=c("darkolivegreen4", "firebrick3"),labels=c("control", "case"), name="Group")+ ggplot2::scale_fill_manual(values=c("darkolivegreen4", "firebrick3"),labels=c("control", "case"), name="Group")+ ggplot2::labs(title="calibrated, external, transformed", x = "uncalibrated prediction") p3 <- ggplot2::ggplot()+ ggplot2::geom_density(ggplot2::aes(x=calibrate_object$summary_CV$models$calibrated$hist_scaled[[1]]$probs_CV[calibrate_object$summary_CV$models$calibrated$hist_scaled[[1]]$actual_CV==0], colour="darkolivegreen4", fill="darkolivegreen4"),alpha=0.2)+ ggplot2::geom_density(ggplot2::aes(x=calibrate_object$summary_CV$models$calibrated$hist_scaled[[1]]$probs_CV[calibrate_object$summary_CV$models$calibrated$hist_scaled[[1]]$actual_CV==1], colour="firebrick3", fill="firebrick3"),alpha=0.2)+ ggplot2::scale_colour_manual(values=c("darkolivegreen4", "firebrick3"),labels=c("control", "case"), name="Group")+ ggplot2::scale_fill_manual(values=c("darkolivegreen4", "firebrick3"),labels=c("control", "case"), name="Group")+ ggplot2::labs(title="calibrated, external, hist_scal", x = "calibrated prediction") p4 <- ggplot2::ggplot()+ ggplot2::geom_density(ggplot2::aes(x=calibrate_object$summary_CV$models$calibrated$hist_transformed[[1]]$probs_CV[calibrate_object$summary_CV$models$calibrated$hist_transformed[[1]]$actual_CV==0], colour="darkolivegreen4", fill="darkolivegreen4"),alpha=0.2)+ ggplot2::geom_density(ggplot2::aes(x=calibrate_object$summary_CV$models$calibrated$hist_transformed[[1]]$probs_CV[calibrate_object$summary_CV$models$calibrated$hist_transformed[[1]]$actual_CV==1], colour="firebrick3", fill="firebrick3"),alpha=0.2)+ ggplot2::scale_colour_manual(values=c("darkolivegreen4", "firebrick3"),labels=c("control", "case"), name="Group")+ ggplot2::scale_fill_manual(values=c("darkolivegreen4", "firebrick3"),labels=c("control", "case"), name="Group")+ ggplot2::labs(title="calibrated, external, hist_trans", x = "calibrated prediction") p5 <- ggplot2::ggplot()+ ggplot2::geom_density(ggplot2::aes(x=calibrate_object$summary_CV$models$calibrated$BBQ_scaled_sel[[1]]$probs_CV[calibrate_object$summary_CV$models$calibrated$BBQ_scaled_sel[[1]]$actual_CV==0], colour="darkolivegreen4", fill="darkolivegreen4"),alpha=0.2)+ ggplot2::geom_density(ggplot2::aes(x=calibrate_object$summary_CV$models$calibrated$BBQ_scaled_sel[[1]]$probs_CV[calibrate_object$summary_CV$models$calibrated$BBQ_scaled_sel[[1]]$actual_CV==1], colour="firebrick3", fill="firebrick3"),alpha=0.2)+ ggplot2::scale_colour_manual(values=c("darkolivegreen4", "firebrick3"),labels=c("control", "case"), name="Group")+ ggplot2::scale_fill_manual(values=c("darkolivegreen4", "firebrick3"),labels=c("control", "case"), name="Group")+ ggplot2::labs(title="calibrated, external, BBQ_scaled_sel", x = "calibrated prediction") p6 <- ggplot2::ggplot()+ ggplot2::geom_density(ggplot2::aes(x=calibrate_object$summary_CV$models$calibrated$BBQ_scaled_avg[[1]]$probs_CV[calibrate_object$summary_CV$models$calibrated$BBQ_scaled_avg[[1]]$actual_CV==0], colour="darkolivegreen4", fill="darkolivegreen4"),alpha=0.2)+ ggplot2::geom_density(ggplot2::aes(x=calibrate_object$summary_CV$models$calibrated$BBQ_scaled_avg[[1]]$probs_CV[calibrate_object$summary_CV$models$calibrated$BBQ_scaled_avg[[1]]$actual_CV==1], colour="firebrick3", fill="firebrick3"),alpha=0.2)+ ggplot2::scale_colour_manual(values=c("darkolivegreen4", "firebrick3"),labels=c("control", "case"), name="Group")+ ggplot2::scale_fill_manual(values=c("darkolivegreen4", "firebrick3"),labels=c("control", "case"), name="Group")+ ggplot2::labs(title="calibrated, external, BBQ_scaled_avg", x = "calibrated prediction") p7 <- ggplot2::ggplot()+ ggplot2::geom_density(ggplot2::aes(x=calibrate_object$summary_CV$models$calibrated$BBQ_transformed_sel[[1]]$probs_CV[calibrate_object$summary_CV$models$calibrated$BBQ_transformed_sel[[1]]$actual_CV==0], colour="darkolivegreen4", fill="darkolivegreen4"),alpha=0.2)+ ggplot2::geom_density(ggplot2::aes(x=calibrate_object$summary_CV$models$calibrated$BBQ_transformed_sel[[1]]$probs_CV[calibrate_object$summary_CV$models$calibrated$BBQ_transformed_sel[[1]]$actual_CV==1], colour="firebrick3", fill="firebrick3"),alpha=0.2)+ ggplot2::scale_colour_manual(values=c("darkolivegreen4", "firebrick3"),labels=c("control", "case"), name="Group")+ ggplot2::scale_fill_manual(values=c("darkolivegreen4", "firebrick3"),labels=c("control", "case"), name="Group")+ ggplot2::labs(title="calibrated, external, BBQ_transformed_sel", x = "calibrated prediction") p8 <- ggplot2::ggplot()+ ggplot2::geom_density(ggplot2::aes(x=calibrate_object$summary_CV$models$calibrated$BBQ_transformed_avg[[1]]$probs_CV[calibrate_object$summary_CV$models$calibrated$BBQ_transformed_avg[[1]]$actual_CV==0], colour="darkolivegreen4", fill="darkolivegreen4"),alpha=0.2)+ ggplot2::geom_density(ggplot2::aes(x=calibrate_object$summary_CV$models$calibrated$BBQ_transformed_avg[[1]]$probs_CV[calibrate_object$summary_CV$models$calibrated$BBQ_transformed_avg[[1]]$actual_CV==1], colour="firebrick3", fill="firebrick3"),alpha=0.2)+ ggplot2::scale_colour_manual(values=c("darkolivegreen4", "firebrick3"),labels=c("control", "case"), name="Group")+ ggplot2::scale_fill_manual(values=c("darkolivegreen4", "firebrick3"),labels=c("control", "case"), name="Group")+ ggplot2::labs(title="calibrated, external, BBQ_transformed_avg", x = "calibrated prediction") p9 <- ggplot2::ggplot()+ ggplot2::geom_density(ggplot2::aes(x=calibrate_object$summary_CV$models$calibrated$GUESS_1[[1]]$probs_CV[calibrate_object$summary_CV$models$calibrated$GUESS_1[[1]]$actual_CV==0], colour="darkolivegreen4", fill="darkolivegreen4"),alpha=0.2)+ ggplot2::geom_density(ggplot2::aes(x=calibrate_object$summary_CV$models$calibrated$GUESS_1[[1]]$probs_CV[calibrate_object$summary_CV$models$calibrated$GUESS_1[[1]]$actual_CV==1], colour="firebrick3", fill="firebrick3"),alpha=0.2)+ ggplot2::scale_colour_manual(values=c("darkolivegreen4", "firebrick3"),labels=c("control", "case"), name="Group")+ ggplot2::scale_fill_manual(values=c("darkolivegreen4", "firebrick3"),labels=c("control", "case"), name="Group")+ ggplot2::labs(title="calibrated, external, GUESS_1", x = "calibrated prediction") p10 <- ggplot2::ggplot()+ ggplot2::geom_density(ggplot2::aes(x=calibrate_object$summary_CV$models$calibrated$GUESS_2[[1]]$probs_CV[calibrate_object$summary_CV$models$calibrated$GUESS_2[[1]]$actual_CV==0], colour="darkolivegreen4", fill="darkolivegreen4"),alpha=0.2)+ ggplot2::geom_density(ggplot2::aes(x=calibrate_object$summary_CV$models$calibrated$GUESS_2[[1]]$probs_CV[calibrate_object$summary_CV$models$calibrated$GUESS_2[[1]]$actual_CV==1], colour="firebrick3", fill="firebrick3"),alpha=0.2)+ ggplot2::scale_colour_manual(values=c("darkolivegreen4", "firebrick3"),labels=c("control", "case"), name="Group")+ ggplot2::scale_fill_manual(values=c("darkolivegreen4", "firebrick3"),labels=c("control", "case"), name="Group")+ ggplot2::labs(title="calibrated, external, GUESS_2", x = "calibrated prediction") distribution_list_external <- list(original=p0,scaled=p1,transformed=p2,hist_scaled=p3,hist_transformed=p4,BBQ_scaled_sel=p5, BBQ_scaled_avg=p6,BBQ_transformed_sel=p7,BBQ_transformed_avg=p8,GUESS_1=p9,GUESS_2=p10) } else{ distribution_list <- NULL distribution_list_external <- NULL } if (visualize_distributions){ plot_distribution <- visualize_distribution(calibrate_object$calibration_models$original_values$actual, calibrate_object$calibration_models$original_values$predicted) } else{ plot_distribution <- NULL } if(rd_partitions){ rd_plot <- list() rd_plot[["scaled"]] <- rd_multiple_runs(calibrate_object$summary_CV$models$uncalibrated$scaled) rd_plot[["transformed"]] <- rd_multiple_runs(calibrate_object$summary_CV$models$uncalibrated$transformed) rd_plot[["hist_scaled"]] <- rd_multiple_runs(calibrate_object$summary_CV$models$calibrated$hist_scaled) rd_plot[["hist_transformed"]] <- rd_multiple_runs(calibrate_object$summary_CV$models$calibrated$hist_transformed) rd_plot[["BBQ_scaled_sel"]] <- rd_multiple_runs(calibrate_object$summary_CV$models$calibrated$BBQ_scaled_sel) rd_plot[["BBQ_scaled_avg"]] <- rd_multiple_runs(calibrate_object$summary_CV$models$calibrated$BBQ_scaled_avg) rd_plot[["BBQ_transformed_sel"]] <- rd_multiple_runs(calibrate_object$summary_CV$models$calibrated$BBQ_transformed_sel) rd_plot[["BBQ_transformed_avg"]] <- rd_multiple_runs(calibrate_object$summary_CV$models$calibrated$BBQ_transformed_avg) rd_plot[["GUESS_1"]] <- rd_multiple_runs(calibrate_object$summary_CV$models$calibrated$GUESS_1) rd_plot[["GUESS_2"]] <- rd_multiple_runs(calibrate_object$summary_CV$models$calibrated$GUESS_2) } else{ rd_plot <- NULL } if(visualize_errors_CV){ plots_calibration <- visualize_error_boxplot(calibrate_object$summary_CV$error_models$calibration, discrimination = FALSE) plots_discrimination <- visualize_error_boxplot(calibrate_object$summary_CV$error_models$discrimination, discrimination = TRUE) } else{ plots_calibration <- NULL plots_discrimination <- NULL } if(visualize_cle_class_error){ cle_class_specific <- get_CLE_comparison(calibrate_object$summary_CV$error_models$calibration) } else{ cle_class_specific <- NULL } if(visualize_guess){ guess1 <- plot_class_distributions(calibrate_object$calibration_models$models_final$GUESS, 1) guess2 <- plot_class_distributions(calibrate_object$calibration_models$models_final$GUESS, 2) } else{ guess1 <- NULL guess2 <- NULL } if(training_set_calibrated){ plot1 <- visualize_calibrated_test_set(calibrate_object$calibration_models$original_values$actual, calibrate_object$predictions, calibrate_object$summary_no_CV$discrimination_error[,"cutoff"]) } else{ plot1 <- NULL } return(list(histogram_distribution=plot_distribution, density_calibration_internal=distribution_list, density_calibration_external=distribution_list_external, plot_calibration_models=plot_models, plot_single_models=plot_single_models, rd_plot=rd_plot, calibration_error=plots_calibration,discrimination_error=plots_discrimination, cle_class_specific_error=cle_class_specific, training_set_calibrated=plot1, GUESS_1_final_model=guess1, GUESS_2_final_model=guess2)) } CalibratR/R/predict_GUESS.R0000644000176200001440000001246613526505445015063 0ustar liggesusers#' @title predict_GUESS #' @description returns calibrated predictions for the instances \code{new} using the trained GUESS calibration model \code{build_guess_object}. #' Two different evaluation methods are available. #' Method 1: returns the p-value for the score \code{new} under the distribution that is handed over in the \code{build_guess_object} #' Method 2: returns the probability density value for the score \code{new} under the distribution that is handed over in the \code{build_guess_object} #' @param build_guess_object output from the \code{\link{build_GUESS}} method #' @param new vector of uncalibrated probabilities #' @param density_evaluation which density evaluation method should be used to infer calculate probabilities, Default: 2 #' @param return_class_density if set to TRUE, class densities p(x|class) are returned, Default: FALSE #' @return a list object containing the following components: #' \item{predictions}{contains a vector of calibrated predictions} #' \item{pred_idx}{which density evaluation method was used} #' \item{significance_test_set}{the percentage of \code{new} instances that was evaluated using significant prediction estimates} #' \item{dens_case}{a vector containing the p(x|case) values} #' \item{dens_control}{a vector containing the p(x|control) values} #' @details \code{dens_case} and \code{dens_control} are only returned when \code{return_class_density} is set to TRUE #' @rdname predict_GUESS #' @export predict_GUESS <- function(build_guess_object, new, density_evaluation=2, return_class_density=FALSE){ ###local function#### evaluate_density_1 <- function(distr, new){ pdistname <- distr$pdistname #use pdistname estimate <- distr$estimate list_estimate <- list() for(i in 1:length(estimate)){ list_estimate[i] <- estimate[i] } p_case <- do.call(match.fun(pdistname), c(new,list_estimate)) return(p_case) } evaluate_density_2 <- function(distr, new){ ddistname <- distr$ddistname estimate <- distr$estimate list_estimate <- list() for(i in 1:length(estimate)){ list_estimate[i] <- estimate[i] } density <- do.call(match.fun(ddistname), c(new,list_estimate)) return(density) } new_1 <- new new_2 <- new out <- rep(0, length(new)) dens_cases <- rep(0, length(new)) dens_controls <- rep(0, length(new)) best_fit_cases <- build_guess_object$best_fit_cases best_fit_controls <- build_guess_object$best_fit_controls class_probs <- build_guess_object$class_probs #scale input first if data was z-scaled for t distribution if(best_fit_cases$distname=="t"){ new_1 <- scale(new, center = build_guess_object$mean_case, scale=F) } if(best_fit_controls$distname=="t"){ new_2 <- scale(new, center = build_guess_object$mean_control, scale=F) } #calculate P(C) class_prob_case <- class_probs$cases class_prob_control <- class_probs$controls #for rebalancing set both priors to 0.5 #class_prob_case <- 0.5 #class_prob_control <- 0.5 #which evaluation method should be used to determine p-value method <- switch(density_evaluation, "1"= evaluate_density_1, "2"= evaluate_density_2) for (i in 1:length(new)){ #for evaluation of P(x|Case) dens_case <- method(best_fit_cases, new_1[i]) if(density_evaluation==1){ #for evaluation of P(x|Control) for GUESS1: 1-pnorm = probability, for x to be a control. dens_control <- (1-method(best_fit_controls, new_2[i])) } else{ dens_control <- method(best_fit_controls, new_2[i]) } #P(x|C)*P(C) path_prob_case <- dens_case*class_prob_case path_prob_control <- dens_control*class_prob_control #p(x) evidence <- path_prob_case+path_prob_control if(evidence==0){ #to avoid dividing by 0 prob_case <- path_prob_case #if evidence is 0: both path_probs are set to 0 by default prob_control <- path_prob_control } else{ #path_prob/p(x) prob_case <- path_prob_case/evidence prob_control <- path_prob_control/evidence } class_guess <- max(prob_case, prob_control) class <- switch(which.max(c(prob_case, prob_control)), "1"=1, "2"=0) out[i] <- prob_case dens_cases[i] <- dens_case dens_controls[i] <- dens_control } #significance estimate_case <- best_fit_cases$estimate list_estimate_case <- list() for(i in 1:length(estimate_case)){ list_estimate_case[i] <- estimate_case[i] } estimate_control <- best_fit_controls$estimate list_estimate_control <- list() for(i in 1:length(estimate_control)){ list_estimate_control[i] <- estimate_control[i] } #significant results in test set sign_test_set <- sum(new>build_guess_object$t_crit[[1]]& new=0 if(sum(data<0) == 0){ data <- data[!data==0] #fitting works better if data does not include 0 value (?) data_fit_exp <- fitdistrplus::fitdist(data, "exp") #exponential distribution data_fit_w <- fitdistrplus::fitdist(data, "weibull", start=list(shape=5, scale=0.5)) #weibull distribution data_fit_g <- fitdistrplus::fitdist(data, "gamma") #gamma distribution data_fit_ln <- fitdistrplus::fitdist(data, "lnorm") #lognormal distribution names <- c("exponential", "Weibull","gamma", "lognormal") bool=1 } else{ names <- c() distributions <- c() } if(bool==1){ summary <- list(summary(data_fit_norm),summary(data_fit_log), summary(data_fit_exp), summary(data_fit_w), summary(data_fit_g), summary(data_fit_ln), summary(data_fit_t_z_trans)) best_fit <- get_LL(summary) return(list(distributions=summary, best_fit=best_fit$best_fit)) } else{ summary <- list(summary(data_fit_norm),summary(data_fit_log), summary(data_fit_t_z_trans)) best_fit <- get_LL(summary) return(list(distributions=summary, best_fit=best_fit$best_fit)) } } all <- data.frame(cbind(actual, predicted)) controls <- subset(all[,2],all[,1]==0) cases <- subset(all[,2],all[,1]==1) #function has to return min/max for scaling and mean and sd for Z-transformation for t-distribution fitting min <- min(predicted) max <- max(predicted) mean_cases <- mean(cases) sd_cases <- sd(cases) mean_controls <- mean(controls) sd_controls <- sd(controls) #evaluate possible distributions for the two classes distr_cases <- distribution_test(cases) distr_controls <- distribution_test(controls) #save distribution with max logLL in best_fit_Xx best_fit_cases <- distr_cases$best_fit best_fit_controls <- distr_controls$best_fit dist_name_cases <- best_fit_cases$qdistname dist_name_controls <- best_fit_controls$qdistname #plot best distributions for cases and controls fit_cases <- fitdistrplus::denscomp(best_fit_cases, main="Best fit: Cases", legendtext=best_fit_cases$distname, demp=TRUE, datacol="firebrick3", plotstyle="ggplot") fit_controls <- fitdistrplus::denscomp(best_fit_controls, main="Best fit: Controls", legendtext=best_fit_controls$distname, demp=TRUE, datacol="darkolivegreen4", plotstyle="ggplot") #get class probabiliy P(C) class_probs <- get_class_prob(cases, controls) #significance testing estimate_case <- best_fit_cases$estimate list_estimate_case <- list() for(i in 1:length(estimate_case)){ list_estimate_case[i] <- estimate_case[i] } estimate_control <- best_fit_controls$estimate list_estimate_control <- list() for(i in 1:length(estimate_control)){ list_estimate_control[i] <- estimate_control[i] } #define critical values (5%, 95%) for both distributions t_crit_cases_l <- do.call(match.fun(dist_name_cases), c(0.05,list_estimate_case)) t_crit_cases_u <- do.call(match.fun(dist_name_cases), c(0.95,list_estimate_case)) t_crit_controls_l <- do.call(match.fun(dist_name_controls), c(0.05,list_estimate_control)) t_crit_controls_u <- do.call(match.fun(dist_name_controls), c(0.95,list_estimate_control)) #define critical values where both distributions are in their 5% most extreme values t_crit <- c(crit_case_l=t_crit_cases_l, crit_control_l=t_crit_controls_l, crit_case_u=t_crit_cases_u, crit_control_u=t_crit_controls_u) #significant results in test set (lower than lower bound of controls and higher than upper bound of cases) sign_train_set <- sum(predicted>t_crit[[2]]& predictedbuild_guess_object$t_crit[[4]]){ plot1 <- ggplot2::ggplot()+ ggplot2::geom_line(data=data.frame(df),mapping=ggplot2::aes(x, y=value, colour=Var2))+ ggplot2::scale_colour_manual(values=c("darkolivegreen4","firebrick3"), name="Group",labels=c("Control","Case"))+ ggplot2::theme(legend.position = "bottom")+ ggplot2::labs(subtitle="Controls vs. Cases", x = "original ML score", y = "calibrated prediction")+ ggplot2::geom_vline(mapping=ggplot2::aes(xintercept=build_guess_object$t_crit[[2]]), colour="grey", size=1, linetype=2)+ ggplot2::geom_vline(mapping=ggplot2::aes(xintercept=build_guess_object$t_crit[[3]]), colour="grey", size=1, linetype=2)+ ggplot2::geom_vline(mapping=ggplot2::aes(xintercept=build_guess_object$t_crit[[1]]), colour="grey", size=1, linetype=2)+ ggplot2::geom_vline(mapping=ggplot2::aes(xintercept=build_guess_object$t_crit[[4]]), colour="grey", size=1, linetype=2)+ ggplot2::geom_text(ggplot2::aes(x=build_guess_object$t_crit[[2]], y=max(df$value),label=("significance\n boundaries")), nudge_x=0.05,vjust = "inward", hjust = "inward", size=3) } else plot1 <- ggplot2::ggplot()+ ggplot2::geom_line(data=data.frame(df),mapping=ggplot2::aes(x, y=value, colour=Var2))+ ggplot2::scale_colour_manual(values=c("darkolivegreen4","firebrick3"), name="Group",labels=c("Control","Case"))+ ggplot2::theme(legend.position = "bottom")+ ggplot2::labs(subtitle="Controls vs. Cases", x = "original ML score", y = "calibrated prediction")+ ggplot2::geom_vline(mapping=ggplot2::aes(xintercept=build_guess_object$t_crit[[2]]), colour="grey", size=1, linetype=2)+ ggplot2::geom_vline(mapping=ggplot2::aes(xintercept=build_guess_object$t_crit[[3]]), colour="grey", size=1, linetype=2)+ ggplot2::geom_text(ggplot2::aes(x=build_guess_object$t_crit[[2]], y=max(df$value),label=("significance\n boundaries")), nudge_x=0.05,vjust = "inward", hjust = "inward", size=3) return(plot1) } CalibratR/R/predict_model.R0000644000176200001440000000361713332671367015275 0ustar liggesusers#' @title predict_model #' @description calibrates the uncalibrated predictions \code{new} using \code{calibration_model}. #' @param new vector of uncalibrated predictions #' @param calibration_model calibration model to be used for the calibration. Can be the output of \code{\link{build_BBQ}},\code{\link{build_hist_binning}} or \code{\link{build_GUESS}}. #' @param min minimum value of the original data set #' @param max maximum value of the original data set #' @param mean mean value of the original data set #' @param inputtype specify if the model was build on original (=0), scaled(=1) or transformed (=2) data #' @return vector of calibrated predictions #' @rdname predict_model predict_model <- function(new, calibration_model, min, max, mean, inputtype){ ###locale function### prepare_input <- function(new, min, max, mean, inputtype){ if (inputtype==0){ #model uses original scores output <- new } else if (inputtype==1){ #model uses scaled scores output <- scale_me(new, min, max) } else if (inputtype==2){ #model uses transformed scores output <- transform_me(new, mean) } return(output=output) } predict <- switch(calibration_model$type, "hist"= predict_hist_binning, "BBQ"= predict_BBQ, "GUESS"= predict_GUESS ) new <- prepare_input(new, min, max, mean, inputtype) if(calibration_model$type=="BBQ"){ x_sel <- predict(calibration_model, new, 0) x_avg <- predict(calibration_model, new, 1) return(list(BBQ_sel=x_sel, BBQ_avg=x_avg)) } else if (calibration_model$type=="hist"){ x <- predict(calibration_model, new) return(case=x) } else if(calibration_model$type=="GUESS"){ x_1 <- predict(calibration_model, new, 1) x_2 <- predict(calibration_model, new, 2) return(list(GUESS_1=x_1, GUESS_2=x_2)) } } CalibratR/R/get_CLE.R0000644000176200001440000001101613332610104013673 0ustar liggesusers#' @title get_CLE_class #' @description calculates the class-specific classification error CLE in the test set. #' The method computes the deviation of the calibrated predictions of class 1 instances from their true value 1. #' For class 0 instances, \code{get_CLE_class} computes the deviation from 0. #' Class 1 CLE is 0 when all class 1 instances have a calibrated prediction of 1 regardless of potential miscalibration of class 0 instances. #' CLE calculation is helpful when miscalibration and -classification is more cost-sensitive for one class than for the other. #' @param actual vector of observed class labels (0/1) #' @param predicted vector of uncalibrated predictions #' @param bins number of bins for the equal-width binning model, default=10 #' @return object of class list containing the following components: #' \item{class_1}{CLE of class 1 instances} #' \item{class_0}{CLE of class 0 instances} #' @seealso #' \code{\link[reshape2]{melt}} #' \code{\link[ggplot2]{ggplot}},\code{\link[ggplot2]{geom_line}},\code{\link[ggplot2]{aes}},\code{\link[ggplot2]{position_dodge}},\code{\link[ggplot2]{labs}},\code{\link[ggplot2]{scale_colour_manual}} #' @rdname get_CLE_class #' @importFrom reshape2 melt #' @importFrom ggplot2 ggplot geom_line aes position_dodge labs scale_colour_manual #' @importFrom graphics hist get_CLE_class <- function(actual, predicted, bins=10){ #equal width bins pred_actual <- cbind(predicted, actual) if(all(predicted<=1) && all(predicted>=0)){ hist_x <- hist(pred_actual[,1], breaks=seq(0,1,1/bins), plot=F) } else{ hist_x <- hist(pred_actual[,1], breaks=bins, plot=F) } breaks_y <- hist_x$breaks y_true <- hist(subset(pred_actual[,1], pred_actual[,2]=="1"), breaks=breaks_y, plot=F) divided <- cut(pred_actual[,1], breaks=c(hist_x$breaks), label = seq(1,length(y_true$mids)), include.lowest = T) divided_0 <- cut(pred_actual[,1][pred_actual[,2]==0], breaks=c(hist_x$breaks), label = seq(1,length(y_true$mids)), include.lowest = T) divided_1 <- cut(pred_actual[,1][pred_actual[,2]==1], breaks=c(hist_x$breaks), label = seq(1,length(y_true$mids)), include.lowest = T) prediction_in_bin <- list() expected <- c() prediction_in_bin_0 <- list() expected_0 <- c() prediction_in_bin_1 <- list() expected_1 <- c() for (i in as.numeric(levels(divided))){ prediction_in_bin[[i]] <- pred_actual[which(divided==i),1] expected[i] <- mean(prediction_in_bin[[i]]) #mean prediction in that bin prediction_in_bin_0[[i]] <- subset(pred_actual,pred_actual[,2]==0)[which(divided_0==i),1] expected_0[i] <- mean(prediction_in_bin_0[[i]]) #mean prediction in that bin prediction_in_bin_1[[i]] <- subset(pred_actual,pred_actual[,2]==1)[which(divided_1==i),1] expected_1[i] <- mean(prediction_in_bin_1[[i]]) #mean prediction in that bin } counts_all <- hist_x$counts counts_true <- y_true$counts zeros <- which(counts_all==0) prevalence <- counts_true/counts_all prevalence[zeros] <- 0 #set prevalence to 0 when no observations are in the bin expected[zeros] <- hist_x$mids[zeros] #set expectation to the mid bin point, when no elements are in bin S_2 <- abs(prevalence-expected) W_2 <- counts_all/(length(predicted)) expected_0[!is.finite(expected_0)] <- 0 expected_1[!is.finite(expected_1)] <- 0 S2_1 <- abs(1-expected_1) S2_0 <- abs(0-expected_0) #weighing adapted for class 1 W_2_1_all <- counts_true/(sum(pred_actual[,2]=="1")) #add up to 1 #weighing adapted for class 0 W_2_0_all <- (counts_all-counts_true)/(sum(pred_actual[,2]=="0")) #add up to 1 ECE_per_bin <- (S_2*W_2) ECE <- sum(ECE_per_bin) CLE_per_bin <- (S2_1*W_2_1_all)+(S2_0*W_2_0_all) CLE <- sum(CLE_per_bin) CLE_per_bin_1 <- (S2_1*W_2_1_all) CLE_1 <- sum(CLE_per_bin_1) CLE_per_bin_0 <- (S2_0*W_2_0_all) CLE_0 <- sum(CLE_per_bin_0) #Visualisation of CLE class errors bins_1 <- S2_1*W_2_1_all bins_0 <- S2_0*W_2_0_all # df <- reshape2::melt(cbind(CLE_class1=bins_1,CLE_class0=bins_0, prop_0=W_2_0_all, prop_1=W_2_1_all, ECE_all=ECE_per_bin)) # plot1 <- ggplot2::ggplot()+ # ggplot2::geom_line(ggplot2::aes(x=df$Var1, y=(df$value), colour=df$Var2), position = ggplot2::position_dodge(width = 0.2))+ # ggplot2::labs(x="bin number", y="CLE")+ # ggplot2::scale_colour_manual(values=c("firebrick3", "darkolivegreen4", "cyan3", "grey", "black"), name = NULL) #show(plot1) return(list(class_1=as.numeric(t(S2_1)%*%W_2_1_all), class_0=as.numeric(t(S2_0)%*%W_2_0_all))) } CalibratR/R/visualize_errors.R0000644000176200001440000001756013332671367016074 0ustar liggesusers#' @title visualize_error_boxplot #' @description compares error values among different calibration models. A boxplots is created from the n error values that were obtained during the n-times repeated Cross-Validation procedure. #' Different error values are implemented and can be compared: #' \cr discrimination error = sensitivity, specificity, accuracy, AUC (when \code{discrimination}=TRUE) #' \cr calibration error = ece, mce, rmse, class 0 cle, class 1 cle (when \code{discrimination}=FALSE) #' For the calculation of the errors, see the respective methods listed in the "see also" section #' @param list_models list object that contains all error values for all trained calibration models. For the specific format, see the calling function \code{\link{visualize_calibratR}}. #' @param discrimination boolean (TRUE or FALSE). If TRUE, discrimination errors are compared between models; if FALSE calibration error is compared, Default: TRUE #' @return An object of class list, with the following components: #' \cr if \code{discrimination}=TRUE #' \item{sens}{ggplot2 boxplot that compares all evaluated calibration models with regard to sensitivity.} #' \item{spec}{ggplot2 boxplot that compares all evaluated calibration models with regard to specificity} #' \item{acc}{ggplot2 boxplot that compares all evaluated calibration models with regard to accuracy} #' \item{auc}{ggplot2 boxplot that compares all evaluated calibration models with regard to AUC} #' \item{list_errors}{list object that contains all discrimination error values that were used to construct the boxplots} #' \cr if \code{discrimination}=FALSE #' \item{ece}{ggplot2 boxplot that compares all evaluated calibration models with regard to expected calibration error} #' \item{mce}{ggplot2 boxplot that compares all evaluated calibration models with regard to maximum expected calibration error (MCE)} #' \item{rmse}{ggplot2 boxplot that compares all evaluated calibration models with regard to root mean square error (RMSE)} #' \item{cle_0}{ggplot2 boxplot that compares all evaluated calibration models with regard to class 0 classification error (CLE)} #' \item{cle_1}{ggplot2 boxplot that compares all evaluated calibration models with regard to class 1 classification error (CLE)} #' \item{list_errors}{list object that contains all calibration error values that were used to construct the boxplots} #' @seealso #' \code{\link[ggplot2]{ggplot}},\code{\link[ggplot2]{aes}},\code{\link[ggplot2]{ggtitle}},\code{\link[ggplot2]{scale_x_discrete}},\code{\link[ggplot2]{geom_boxplot}},\code{\link[ggplot2]{theme}},\code{\link[ggplot2]{element_text}} #' \code{\link[reshape2]{melt}},\code{\link{get_CLE_class}},\code{\link{getECE}},\code{\link{getMCE}},\code{\link{getRMSE}}, \code{\link{evaluate_discrimination}} #' @rdname visualize_error_boxplot #' @importFrom ggplot2 ggplot aes ggtitle scale_x_discrete geom_boxplot theme element_text #' @importFrom reshape2 melt visualize_error_boxplot <- function(list_models, discrimination=TRUE){ idx <- 1 list_errors <- list() if(discrimination){ list_errors[["sensitivity"]] <- list() list_errors[["specificity"]] <- list() list_errors[["accuracy"]] <- list() list_errors[["auc"]] <- list() for (j in list_models){ list_errors[["sensitivity"]][[names(list_models)[[idx]]]] <- j$sens list_errors[["specificity"]][[names(list_models)[[idx]]]] <- j$spec list_errors[["accuracy"]][[names(list_models)[[idx]]]] <- j$acc list_errors[["auc"]][[names(list_models)[[idx]]]] <- j$auc idx <- idx+1 } df_sens <- data.frame(list_errors$sensitivity) variable <- NULL value <- NULL p1 <- ggplot2::ggplot(reshape2::melt(df_sens, id.vars=NULL), ggplot2::aes(x=variable, y=value)) + ggplot2::ggtitle("Sensitivity") + ggplot2::scale_x_discrete(name = NULL) + ggplot2::geom_boxplot(fill="#4271AE", alpha=0.7) + ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 60, hjust = 1)) df_spec <- data.frame(list_errors$specificity) p2 <- ggplot2::ggplot(reshape2::melt(df_spec, id.vars=NULL), ggplot2::aes(x=variable, y=value)) + ggplot2::ggtitle("Specificity") + ggplot2::scale_x_discrete(name = NULL) + ggplot2::geom_boxplot(fill="#4271AE", alpha=0.7) + ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 60, hjust = 1)) df_acc <- data.frame(list_errors$accuracy) p3 <- ggplot2::ggplot(reshape2::melt(df_acc, id.vars=NULL), ggplot2::aes(x=variable, y=value)) + ggplot2::ggtitle("Accuracy") + ggplot2::scale_x_discrete(name = NULL) + ggplot2::geom_boxplot(fill="#4271AE", alpha=0.7) + ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 60, hjust = 1)) df_auc <- data.frame(list_errors$auc) p4 <- ggplot2::ggplot(reshape2::melt(df_auc, id.vars=NULL), ggplot2::aes(x=variable, y=value)) + ggplot2::ggtitle("AUC") + ggplot2::scale_x_discrete(name = NULL) + ggplot2::geom_boxplot(fill="#4271AE", alpha=0.7) + ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 60, hjust = 1)) return(list(sens=p1, spec=p2, acc=p3, auc=p4, list_errors=list_errors)) } else{ list_models$original <- NULL list_errors[["ece"]] <- list() list_errors[["mce"]] <- list() list_errors[["rmse"]] <- list() list_errors[["cle_class1"]] <- list() list_errors[["cle_class0"]] <- list() for (j in list_models){ list_errors[["ece"]][[names(list_models)[[idx]]]] <- j$ECE_equal_width list_errors[["mce"]][[names(list_models)[[idx]]]] <- j$MCE_equal_width list_errors[["rmse"]][[names(list_models)[[idx]]]] <- j$RMSE list_errors[["cle_class1"]][[names(list_models)[[idx]]]] <- j$CLE_class_1 list_errors[["cle_class0"]][[names(list_models)[[idx]]]] <- j$CLE_class_0 idx <- idx+1 } df_ece <- data.frame(list_errors$ece) variable <- NULL value <- NULL p1 <- ggplot2::ggplot(reshape2::melt(df_ece, id.vars=NULL), ggplot2::aes(x=variable, y=value)) + ggplot2::ggtitle("ECE") + ggplot2::scale_x_discrete(name = NULL) + ggplot2::geom_boxplot(fill="#4271AE", alpha=0.7) + ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 60, hjust = 1)) df_mce <- data.frame(list_errors$mce) p2 <- ggplot2::ggplot(reshape2::melt(df_mce, id.vars=NULL), ggplot2::aes(x=variable, y=value)) + ggplot2::ggtitle("MCE") + ggplot2::scale_x_discrete(name = NULL) + ggplot2::geom_boxplot(fill="#4271AE", alpha=0.7) + ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 60, hjust = 1)) df_rmse <- data.frame(list_errors$rmse) p3 <- ggplot2::ggplot(reshape2::melt(df_rmse, id.vars=NULL), ggplot2::aes(x=variable, y=value)) + ggplot2::ggtitle("RMSE") + ggplot2::scale_x_discrete(name = NULL) + ggplot2::geom_boxplot(fill="#4271AE", alpha=0.7) + ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 60, hjust = 1)) df_cle_0 <- data.frame(list_errors$cle_class0) p4 <- ggplot2::ggplot(reshape2::melt(df_cle_0, id.vars=NULL), ggplot2::aes(x=variable, y=value)) + ggplot2::ggtitle("CLE class 0") + ggplot2::scale_x_discrete(name = NULL) + ggplot2::geom_boxplot(fill="#4271AE", alpha=0.7) + ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 60, hjust = 1)) df_cle_1 <- data.frame(list_errors$cle_class1) p5 <- ggplot2::ggplot(reshape2::melt(df_cle_1, id.vars=NULL), ggplot2::aes(x=variable, y=value)) + ggplot2::ggtitle("CLE class 1") + ggplot2::scale_x_discrete(name = NULL) + ggplot2::geom_boxplot(fill="#4271AE", alpha=0.7) + ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 60, hjust = 1)) return(list(ece=p1, mce=p2, rmse=p3, cle_0=p4, cle_1=p5, list_errors=list_errors)) } } CalibratR/R/calibrate_me_CV_errors_parallel.R0000644000176200001440000001040113332336747020717 0ustar liggesusers #' @title calibrate_me_CV_errors #' @description trains and evaluates calibration models using \code{n_seeds}-times repeated \code{folds}-Cross-Validation (CV).\code{model_idx} specifies which models should be trained. #' \cr Model training and evaluation is repeated \code{n_seeds}-times with a different training/test set partition scheme for the CV each time. #' @param actual vector of observed class labels (0/1) #' @param predicted vector of uncalibrated predictions #' @param model_idx which calibration models should be implemented, 1=hist_scaled, 2=hist_transformed, 3=BBQ_scaled, 4=BBQ_transformed, 5=GUESS #' @param folds number of folds in the cross-validation, Default: 10 #' @param n_seeds \code{n_seeds} determines how often random data set partition is repeated with varying seed #' @param nCores \code{nCores} how many cores should be used during parallelisation. Default: 4 #' @return returns all trained calibration models that were built during the \code{n_seeds}-times repeated \code{folds}-CV. #' \cr Error values for each of the \code{n_seeds} CV runs are given. #' @details parallised execution over \code{n_seeds} #' @rdname calibrate_me_CV_errors #' @importFrom parallel makeCluster stopCluster #' @import foreach #' @importFrom stats setNames #' @importFrom doParallel registerDoParallel calibrate_me_CV_errors <- function(actual, predicted, model_idx, folds=10, n_seeds, nCores){ #parallize the foreach loop NumberOfCluster <- nCores # how many jobs you want the computer to run at the same time cl <- parallel::makeCluster(NumberOfCluster) # Make clusters registerDoSNOW(cl) # use the above cluster # your parallel programming code code code stopCluster(cl) # close clusters doParallel::registerDoParallel(cl) `%dopar%` <- foreach::`%dopar%` #how many list() do I expect in my output idx <- 0 names_model <- c() if(any(model_idx==1)){ idx <- idx +1 names_model <- c(names_model,"hist_scaled") } if(any(model_idx==2)){ idx <- idx +1 names_model <- c(names_model,"hist_transformed") } if(any(model_idx==3)){ idx <- idx +2 names_model <- c(names_model,"BBQ_scaled_sel", "BBQ_scaled_avg") } if(any(model_idx==4)){ idx <- idx +2 names_model <- c(names_model,"BBQ_transformed_sel", "BBQ_transformed_avg") } if(any(model_idx==5)){ idx <- idx +2 names_model <- c(names_model,"GUESS_1", "GUESS_2") } comb <- function(x, ...) { lapply(seq_along(x), function(i) c(x[[i]], lapply(list(...), function(y) y[[i]]))) } n <- length(seq(1,n_seeds,1)) i <- NULL hist_scaled <- NULL hist_transformed <- NULL BBQ_scaled_sel <- NULL BBQ_scaled_avg <- NULL BBQ_transformed_sel <- NULL BBQ_transformed_avg <- NULL GUESS_1 <- NULL GUESS_2 <- NULL parallized_results <- foreach::foreach(i=seq(1,n_seeds,1), .packages = "CalibratR", .combine='comb', .multicombine=TRUE, .init=rep(list(list()), idx), .final = function(x) setNames(x, names_model)) %dopar% { if(any(model_idx==1)){ hist_scaled <- hist_binning_CV(actual, predicted, n_folds=folds, seed=i, input=1) } if(any(model_idx==2)){ hist_transformed <- hist_binning_CV(actual, predicted, n_folds=folds, seed=i, input=2) } if(any(model_idx==3)){ BBQ_scaled_sel <- BBQ_CV(actual, predicted,0, n_folds=folds, seed=i, input=1) BBQ_scaled_avg <- BBQ_CV(actual, predicted,1, n_folds=folds, seed=i, input=1) } if(any(model_idx==4)){ BBQ_transformed_sel <- BBQ_CV(actual, predicted,0, n_folds=folds, seed=i, input=2) BBQ_transformed_avg <- BBQ_CV(actual, predicted,1, n_folds=folds, seed=i, input=2) } if(any(model_idx==5)){ GUESS_1 <- GUESS_CV(actual, predicted, n_folds=folds,1, seed=i, input=0) GUESS_2 <- GUESS_CV(actual, predicted, n_folds=folds,2, seed=i, input=0) } list(hist_scaled=hist_scaled, hist_transformed=hist_transformed, BBQ_scaled_sel=BBQ_scaled_sel, BBQ_scaled_avg=BBQ_scaled_avg, BBQ_transformed_sel=BBQ_transformed_sel, BBQ_transformed_avg=BBQ_transformed_avg, GUESS_1=GUESS_1, GUESS_2=GUESS_2) } parallel::stopCluster(cl) return(error=parallized_results) } CalibratR/R/getRMSE.R0000644000176200001440000000065513332352644013722 0ustar liggesusers#' @title getRMSE #' @description calculates the root of mean square error (RMSE) in the test set of calibrated predictions #' @param actual vector of observed class labels (0/1) #' @param predicted vector of uncalibrated predictions #' @return RMSE value #' @rdname getRMSE getRMSE <- function(actual, predicted){ res <- (((actual-predicted)%*%(actual-predicted)/length(actual))^0.5) return(as.numeric(res)) } CalibratR/R/calibrate_me.R0000644000176200001440000000447113332335327015062 0ustar liggesusers#' @title calibrate_me #' @description trains calibration models on the training set of \code{predicted}/\code{actual} value pairs.\code{model_idx} specifies which models should be trained. #' @param actual vector of observed class labels (0/1) #' @param predicted vector of uncalibrated predictions #' @param model_idx a single number from 1 to 5, indicating which calibration model should be implemented, 1=hist_scaled, 2=hist_transformed, 3=BBQ_scaled, 4=BBQ_transformed, 5=GUESS #' @return depending on the value of \code{model_idx}, the respective calibration model is build on the input from \code{actual} and \code{predicted} #' @rdname calibrate_me calibrate_me <- function(actual, predicted, model_idx){ if (length(predicted)<=75){ #If input set is too small, bin no. in rd is decreased for all (!) models breaks_rd <- floor(length(predicted)/6) } else breaks_rd <- NULL all <- data.frame(cbind(actual,unname(predicted))) cases_all <- data.frame(subset(all, all[,1]==1)) control_all <- data.frame(subset(all, all[,1]==0)) x_original <- format_values(cases_all, control_all, 0) x_scaled <- format_values(cases_all, control_all, 1) x_transformed <- format_values(cases_all, control_all, 2) switch(model_idx, "1"= { model <- build_hist_binning(x_scaled$formated_values[,1],x_scaled$formated_values[,2]) model$inputtype <- 1 model$model_idx <- model_idx return(list(hist_scaled=model)) }, "2"= { model <- build_hist_binning(x_transformed$formated_values[,1],x_transformed$formated_values[,2]) model$inputtype <- 2 model$model_idx <- model_idx return(list(hist_transformed=model)) }, "3"= { model <- build_BBQ(x_scaled$formated_values[,1],x_scaled$formated_values[,2]) model$inputtype <- 1 model$model_idx <- model_idx return(list(BBQ_scaled=model)) }, "4"= { model <- build_BBQ(x_transformed$formated_values[,1],x_transformed$formated_values[,2]) model$inputtype <- 2 model$model_idx <- model_idx return(list(BBQ_transformed=model)) }, "5"={ model <- build_GUESS(x_original$formated_values[,1],x_original$formated_values[,2]) model$inputtype <- 0 model$model_idx <- model_idx return(list(GUESS=model)) })} CalibratR/R/getECE.R0000644000176200001440000000761713526505133013553 0ustar liggesusers#' @title getECE #' @description Expected Calibration Error (ECE); the model is divided into 10 equal-width bins (default) and the mean of the observed (0/1) vs. mean of predicted is calculated per bin, weighted by empirical frequency of elements in bin i #' @param actual vector of observed class labels (0/1) #' @param predicted vector of uncalibrated predictions #' @param n_bins number of bins of the underlying equal-frequency histogram, Default: 10 #' @return equal-frequency ECE value #' @rdname getECE #' @export #' @importFrom graphics hist getECE <- function(actual, predicted, n_bins=10){ #equal frequency bins predicted <- predicted labels <- actual idx <- order(predicted) pred_actual <- (cbind(predicted[idx], labels[idx])) N <- nrow(pred_actual) rest <- N%%n_bins S <- 0 W <- c() B <- min(N,n_bins) #if less then n_bins elements in data set, then use that number of bins groups <- list() for (i in 1:B){ #i von 1 bis B if (i <= rest){ #put rest elements into each bin group_pred <- (pred_actual[(((i-1)*ceiling(N/n_bins)+1) : (i*ceiling(N/n_bins))),1]) group_actual <- (pred_actual[(((i-1)*ceiling(N/n_bins)+1) : (i*ceiling(N/n_bins))),2]) } else { group_pred <- (pred_actual[((rest+(i-1)*floor(N/n_bins)+1) : (rest+i*floor(N/n_bins))),1])#group size=N/B group_actual <- (pred_actual[((rest+(i-1)*floor(N/n_bins)+1) : (rest+i*floor(N/n_bins))),2]) } n_ <- length(group_pred) expected <- mean(group_pred) #mean of predictions in bin b observed <- mean(group_actual) #true fraction of pos.instances = prevalence in bin b S[i] <- abs(observed-expected) #absolut difference of observed value-predicted value in bin W[i] <- n_/N #empirical frequence of all instances that fall into bin i, should be equal when using equal freq binning approach groups[[i]] <- group_pred } mean_prediction <- lapply(groups, mean) min_group <- lapply(groups, min) max_group <- lapply(groups, max) res <- t(S)%*%W return(as.numeric(res)) } #' @title get_ECE_equal_width #' @description Expected Calibration Error (ECE); the model is divided into 10 equal-width bins (default) and the mean of the observed (0/1) vs. mean of predicted is calculated per bin, weighted by emperical frequency of elements in bin i #' @param actual vector of observed class labels (0/1) #' @param predicted vector of uncalibrated predictions #' @param bins number of bins for the equal-width binning model #' @return equal-width ECE value #' @rdname get_ECE_equal_width #' @importFrom graphics hist get_ECE_equal_width <- function(actual, predicted, bins=10){ #equal width bins pred_actual <- cbind(predicted, actual) if(all(predicted<=1) && all(predicted>=0)){ hist_x <- hist(pred_actual[,1], breaks=seq(0,1,1/bins), plot=F) } else{ hist_x <- hist(pred_actual[,1], breaks=bins, plot=F) } breaks_y <- hist_x$breaks y_true <- hist(subset(pred_actual[,1], pred_actual[,2]=="1"), breaks=breaks_y, plot=F) divided <- cut(pred_actual[,1], breaks=c(hist_x$breaks), label = seq(1,length(y_true$mids)), include.lowest = T) prediction_in_bin <- list() expected <- c() for (i in as.numeric(levels(divided))){ prediction_in_bin[[i]] <- pred_actual[which(divided==i),1] expected[i] <- mean(prediction_in_bin[[i]]) #mean prediction in that bin #expected[i] <- hist_x$mids[i] #hist mids as mean prediction in that bin } counts_all <- hist_x$counts counts_true <- y_true$counts zeros <- which(counts_all==0) prevalence <- counts_true/counts_all prevalence[zeros] <- 0 #set prevalence to 0 when no observations are in the bin expected[zeros] <- hist_x$mids[zeros] #set expectation to the mid bin point, when no elements are in bin S_2 <- abs(prevalence-expected) W_2 <- counts_all/(length(predicted)) return(as.numeric(t(S_2)%*%W_2)) } CalibratR/R/binomtest_for_bin.R0000644000176200001440000000117213332326470016147 0ustar liggesusers#' @title binom_for_histogram #' @description p_values from stats::binom.test for each bin, if bin is empty, a p-value of 2 is returned #' @param n_x numeric vector of two integers. The first one is the number of cases in the bin; the second the number of instances in the bin #' @return p-value from stats::binom.test method #' @rdname binom_for_histogram #' @importFrom stats binom.test binom_for_histogram <- function(n_x){ success <- n_x[1] all <- n_x[2] if(!(success==0 && all==0)){ return(as.numeric(stats::binom.test(success,all)$p.value)) } else #if bin is empty -> p-value of 2 return(2) } CalibratR/R/build_BBQ.R0000644000176200001440000003757013332620416014240 0ustar liggesusers#' @title build_BBQ #' @description This method builds a BBQ calibration model using the trainings set provided. #' @param actual vector of observed class labels (0/1) #' @param predicted vector of uncalibrated predictions #' @return returns the BBQ model which includes models for all evaluated binning schemes; the prunedmodel contains only a selection of BBQ models with the best Bayesian score #' @details Based on the paper (and matlab code) : "Obtaining Well Calibrated Probabilities Using Bayesian Binning" by Naeini, Cooper and Hauskrecht: ; https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4410090/ #' @rdname build_BBQ build_BBQ <- function(actual, predicted){ ####local functions### initlnfact_local <- function(n){ lnfact <- rep(0,n+1) for (w in 2:(n+1)){ lnfact[w] <- lnfact[w-1] + log(w-1) } return(lnfact) } get_BDeu_Score2_local <- function(opt){ histModel <- opt$histModel B <- length(histModel) N_0 <- 2*B #default in paper C <- N_0/B # =2 score <- B*lgamma(C) for (j in 1:B){ nj <- histModel[[j]]$n #number of elements in bin B nj0 <- histModel[[j]]$n0 #number of y==0 elements in bin B nj1 <- histModel[[j]]$n1 #number of y==1 elements in bin B pj <- (histModel[[j]]$min + histModel[[j]]$max)/2 #midpoint of bin B, naive estimate of probability pj <- min(pj,1-5*10^-3) pj <- max(pj, 5*10^-3) score <- score + (lgamma(nj1+C*pj)) + (lgamma(nj0+C*(1-pj))) - (lgamma(nj+C)) - (lgamma(C*pj)) - (lgamma(C*(1-pj)))#log(marginal LL)test score(Pr?fgr??e) } score <- -2*score return (score) } buildFuncOpt_local <- function(opt, histModel, cutIdx, cutPoints, logLikelihood){ N <- length(opt$PTR) #number of predicted values in the whole histogram K <- length(histModel) #how many bins where evaluated funcOpt <- list(histModel=histModel, cutIdx=cutIdx, cutPoints=cutPoints, logLikelihood=logLikelihood, K=K, N=N, PTR=opt$PTR, lnfact=opt$ lnfact, N_0=opt$N_0) return(funcOpt) } hist_calibration_freq_local <- function(predicted, actual, b){ #get the log of the likelihood P(D|M) for the observed data distribution in the bin, given theta as P #P is circa the prevalence of positives in the bin, but is smoothed and also depends by midpoint of the bin and mean predicted value for a positive case N <- length(actual) logLikelihood <- 0 cutIdx <- c() #b==1 in this case -> no cutIdx cutPoints <- c() histModel_all <- list() #smoothing function for p if (b==1){ #if binning model contains only 1 bin min <- 0 max <- 1 m_0 <- (min+max)/2 #mid point of bin idx <- which(actual==1) #TRUE values predicted_1 <- predicted[idx] #values predicted to be 1 p_0 <- (sum(predicted_1)+m_0)/(length(predicted_1)+1) #intuitive prior for beta smoothing n <- length(actual) #how many elements in the bin n1 <- sum(actual) #how many actual y==1 in the bin n0 <- n-n1 #how many actual y==0 in the bin P <- (n1+p_0)/(n+1) if(n1>0){ logLikelihood <- logLikelihood+n1*log(P) } if(n0>0){ logLikelihood<-logLikelihood+n0*log(1-P) } histModel_all[[b]] <- list(min=min, max=max, n=n, n1=n1, n0=n0, P=P, midpoint=m_0) } else { Yhat <- predicted Y <- actual c <- floor(length(Y)/b) #number of elements per bin b to get equal frequency bins rest <- length(Y)-(c*b) #how many elements are not counted due to the floor() method i <- 1 idx <- 1 Tt <- list() idx2 <- 0 while(i < b){ #i=Laufindex for bin number from 1 to last bin in the model #idx1 <- (i-1)*c+1 #lower idx of instance for bin i idx1 <- idx2+1 if (i <= rest){ #rest elements are distributed evenly among the first idx2 <- idx1+c #upper idx of instance } else idx2 <- idx1+c-1 j <- i+1 while (j <= b){ #is the next bin j already the last bin b of the model? if (j < b){ #no! #Jidx2 <- j*c #lower idx of instance for bin j if (j <= rest){ #is bin j still supposed to contain one element more to avoid too full bin Jidx2 <- idx2+c #upper idx of instance } else{ Jidx2 <- idx2+c-1 } if (predicted[Jidx2]==predicted[idx1]){ idx2 <- Jidx2 j <- j+1 } else break } else{ Jidx2 <- N if (predicted[Jidx2]==predicted[idx1]){ idx2 <- Jidx2 j <- j+1 } else break } } if (idx2 0){ logLikelihood <- logLikelihood + histModel_all[[1]]$n1*log(histModel_all[[1]]$P) } if (histModel_all[[1]]$n0 > 0){ logLikelihood <- logLikelihood + histModel_all[[1]]$n0*log(1-histModel_all[[1]]$P) } #for second till second to last bin for (i in 2:(b-1)){ if((b-1)<2){ break } else{ histModel_all[[i]] <- list() histModel_all[[i]]$min <- (Tt[[i]]$Yhat[1]+Tt[[i-1]]$Yhat[length(Tt[[i-1]]$Yhat)])/2 #min value in bin i histModel_all[[i]]$max <- (Tt[[i]]$Yhat[length(Tt[[i]]$Yhat)]+Tt[[i+1]]$Yhat[1])/2 #max value in bin i cutPoints[i] <- histModel_all[[i]]$max #intuitive prior for beta distribution smoothing m_0 <- (histModel_all[[i]]$min+histModel_all[[i]]$max)/2 #midpoint idx <- which(Tt[[i]]$Y==1) #Y==1 in bin i PTR1 <- Tt[[i]]$PTR[idx] #which PTR have y==1 (in bin i) p_0 <- (sum(PTR1)+m_0)/(length(PTR1)+1) #intuitive prior for beta smoothing histModel_all[[i]]$n <- length(Tt[[i]]$Y) #how many elements in bin i histModel_all[[i]]$n1 <- sum(Tt[[i]]$Y) #how many y==1 values in bin i histModel_all[[i]]$n0 <- histModel_all[[i]]$n-histModel_all[[i]]$n1 #how many y==0 values in bin i histModel_all[[i]]$P <- (histModel_all[[i]]$n1+p_0)/(histModel_all[[i]]$n+1) #intuitive prior for beta smoothing histModel_all[[i]]$P_observed <- (histModel_all[[i]]$n1)/(histModel_all[[i]]$n) histModel_all[[i]]$midpoint <- m_0 if (histModel_all[[i]]$n1 > 0){ logLikelihood <- logLikelihood + histModel_all[[i]]$n1*log(histModel_all[[i]]$P) } if (histModel_all[[i]]$n0 > 0){ logLikelihood <- logLikelihood + histModel_all[[i]]$n0*log(1-histModel_all[[i]]$P) } } } #for last bin b of histModel_all histModel_all[[b]] <- list() histModel_all[[b]]$min <- (Tt[[b]]$Yhat[1]+Tt[[b-1]]$Yhat[length(Tt[[b-1]]$Yhat)])/2 histModel_all[[b]]$max <- 1 m_0 <- (histModel_all[[b]]$min + histModel_all[[b]]$max)/2 #midpoint idx <- which(Tt[[b]]$Y==1) PTR1 <- Tt[[b]]$PTR[idx] p_0 <- (sum(PTR1)+m_0)/(length(PTR1)+1) #intuitive prior for beta smoothing histModel_all[[b]]$n <- length(Tt[[b]]$Y) histModel_all[[b]]$n1 <- sum(Tt[[b]]$Y) histModel_all[[b]]$n0 <- histModel_all[[b]]$n - histModel_all[[b]]$n1 histModel_all[[b]]$P <- (histModel_all[[b]]$n1+p_0)/(histModel_all[[b]]$n+1) histModel_all[[b]]$P_observed <- (histModel_all[[b]]$n1)/(histModel_all[[b]]$n) histModel_all[[b]]$midpoint <- m_0 if (histModel_all[[b]]$n1 > 0){ logLikelihood <- logLikelihood + histModel_all[[b]]$n1*log(histModel_all[[b]]$P) } if (histModel_all[[b]]$n0 > 0){ logLikelihood <- logLikelihood + histModel_all[[b]]$n0*log(1-histModel_all[[b]]$P) } } return(list(histModel_all=histModel_all, cutIdx=cutIdx, cutPoints=cutPoints, logLL=logLikelihood)) } elbow <- function(scores, alpha){ #Assume R is the sorted Bayesian scores of histogram models #in a decreasing order. We fix a small number a > 0 (a = 0.001 in our experiments) #and pick the first ka associated binning models as the refined set of models, #where ka is a defined index in the sorted sequence where and o2=sigma2 is the empirical variance of the Bayesian scores. b <- length(scores) sigma2 <- (sqrt(mean(scores ^ 2) - mean(scores)^2))^2 #sd of sample mean is computed (denominator=N not N-1) and then ^2 k <- 1 #laufindex zum scores durchsuchen idxs <- order(scores, decreasing=TRUE) R <- scores[idxs] #highest SV = rank 1, highest SV = lowest score while (R[k]==R[k+1]){ #scores unterscheiden sich nicht k <- k+1 } while (k < b && ((R[k]-R[k+1]))/sigma2 > alpha){ #Differenz zwischen SV[n] und SV[n+1] ist hoch genug um in refined set aufgenommen zu werden k <- k+1 } #for the first k elements of the sorted SV set(lowest score first) is the alpha high enough #those k elements in the refined SV set if (k > 1){ res <- idxs[1:k-1] } else #k==1, include only highest Bayesion Scores in Averaging procedure res <- idxs[1] return(res) } processModel_local <- function(inModel, idxs){ outModel <- list() for (i in 1:length(idxs)){ outModel[[i]] <- inModel[[idxs[i]]] } outModel[[1]]$minScoreIdx <- 1 #best model is [[1]] outModel[[1]]$SV <- inModel[[1]]$SV[idxs] return(outModel=outModel) } #default options from paper are set as fixed values all <- data.frame(cbind(actual, predicted)) N_0 <- 2 #default alpha <- 0.001 #default runSort <- 1 #default if (runSort==1){ x <- order(predicted) predicted <- predicted[x] actual <- actual[x] } N <- length(predicted) lnfact <- initlnfact_local(N+1) #output: array with length+2 elements maxbinno <- min(ceiling(N/5), ceiling(10*N^(1/3))) #max. number of bins I can have in the model, max=5 minbinno <- max(1,floor(N^(1/3)/10)) #have at least 1 bin in binning model, min. number of bins, min=1 MNM <- maxbinno-minbinno+1 #maximum number of possible binning models model <- list() model[[1]] <- list() model[[1]]$scoringFunc <- "BDeu2" opt1 <- list(PTR=predicted, lnfact=lnfact, N_0=N_0) for (b in 1:MNM){ #a binning model for each possible #bin b is created and evaulated using its BDeu2 score output_hist_calibration <- hist_calibration_freq_local(predicted, actual, b+minbinno-1) funcOpt <- buildFuncOpt_local(opt1, output_hist_calibration$histModel_all, output_hist_calibration$cutIdx, output_hist_calibration$cutPoints, output_hist_calibration$logLL) score <- get_BDeu_Score2_local(funcOpt) #should use the respective get_BDeu_Score(or BDeu2 (default)) method model[[b]] <- list(binNo=output_hist_calibration$histModel, cutIdx=output_hist_calibration$cutIdx, cutPoints=output_hist_calibration$cutPoints, score=score, logLL=output_hist_calibration$logLL) } score <- c() logLL <- c() #Zusammenhang zwischen Score und LogLL.. for (i in 1:MNM){ score[i]<-model[[i]]$score logLL[i]<-model[[i]]$logLL } #which binning model hast the best BDeu2 score? maxScore <- -Inf maxScoreIdx <- 0 minScore <- Inf minScoreIdx <- 0 SV <- rep(0,MNM) #SV vector contains all scores for all evaluated models for (b in 1:MNM){ SV[b] <- model[[b]]$score if(model[[b]]$score > maxScore){ maxScoreIdx <- b maxScore <- model[[b]]$score } if (model[[b]]$score < minScore){ minScoreIdx <- b minScore <- model[[b]]$score } } #SV becomes 1 for min(SV), SV becomes smallest for largest score, model with the logLL closest to 0 SV <- exp((min(SV)-SV)/2)#SV=whole set of BDeu2 scores for each possible MNM model[[1]]$maxScoreIdx <- maxScoreIdx #first binning model (b=1) stores min and max ScoreIdxs model[[1]]$minScoreIdx <- minScoreIdx model[[1]]$SV <- SV #select only a number of models for averaging over the models idxs <- elbow(SV, alpha = alpha) #include the indexed SV scores in the refined/pruned model model2 <- processModel_local(model, idxs = idxs) #refined model p_observed <- c() p_calculated <- c() midpoint <- c() n <- c() n_1 <- c() for (i in 1:length(model2[[1]]$binNo)){ p_observed[i]<- model2[[1]]$binNo[[i]]$P_observed p_calculated[i] <- model2[[1]]$binNo[[i]]$P #smoothed prevalence value midpoint[i]<- model2[[1]]$binNo[[i]]$midpoint n[i] <- model2[[1]]$binNo[[i]]$n n_1[i] <-model2[[1]]$binNo[[i]]$n1 } bin_no <- seq(1,length(midpoint)) #significance testing p_values_binom <- unlist(apply(cbind(n_1, n),1,binom_for_histogram)) #pvalues for single bins, binom.test binning_scheme <- data.frame(cbind(bin_no,midpoint,cases=n_1,all=n, prob_case=p_calculated,p_value=p_values_binom)) for (i in 1:nrow(binning_scheme)){ if(is.nan(binning_scheme[i,6])){ binning_scheme[i,7] <- "no value" } else if(binning_scheme[i,6]<0.001){ binning_scheme[i,7] <- "***" } else if(binning_scheme[i,6]<0.01){ binning_scheme[i,7] <- "**" } else if(binning_scheme[i,6]<0.05){ binning_scheme[i,7] <- "*" } else binning_scheme[i,7] <- "ns" } colnames(binning_scheme)[7] <- c("significance") #function has to return min/max for scaling min <- min(predicted) max <- max(predicted) #quality markers calibration model calibration_points <- binning_scheme$prob_case calibration_points_sign <- binning_scheme$p_value <0.05 calibration_points_number <- length((binning_scheme$prob_case)) calibration_points_number_sign <- length((subset(binning_scheme$prob_case, binning_scheme$p_value<0.05))) calibration_range <- range(binning_scheme$prob_case) if(sum(calibration_points_sign) != 0){ calibration_range_sign <- range(subset(binning_scheme$prob_case, binning_scheme$p_value<0.05)) } else{ calibration_range_sign <- 0 } return(bbq=list(type="BBQ", model=model, prunedmodel=model2, binnning_scheme=binning_scheme, min=min, max=max, calibration_points=list(calibration_points=calibration_points,calibration_points_sign=calibration_points_sign), calibration_range=list(calibration_range=calibration_range, calibration_range_sign=calibration_range_sign), calibration_points_number=list(calibration_points_number=calibration_points_number, calibration_points_number_sign=calibration_points_number_sign))) } CalibratR/R/visualize_calibrated_test_set.R0000644000176200001440000000473413332620101020537 0ustar liggesusers#' @title visualize_calibrated_test_set #' @description plots a panel for all calibrated predictions from the respective calibration model. Allows visual comparison of the models output and their optimal cut off #' @param actual vector of observed class labels (0/1) #' @param predicted_list predict_calibratR$predictions object (list of calibrated predictions from calibration models) #' @param cutoffs vector of optimal cut-off thresholds for each calibration model #' @return ggplot2 element for visual comparison of the evaluated calibration models #' @seealso #' \code{\link[ggplot2]{ggplot}},\code{\link[ggplot2]{geom_point}},\code{\link[ggplot2]{scale_colour_manual}},\code{\link[ggplot2]{xlab}},\code{\link[ggplot2]{ylab}},\code{\link[ggplot2]{geom_hline}},\code{\link[ggplot2]{ylim}} #' @rdname visualize_calibrated_test_set #' @importFrom ggplot2 ggplot geom_point scale_colour_manual xlab ylab geom_hline ylim visualize_calibrated_test_set <- function(actual, predicted_list, cutoffs){ plots <- list() d <- data.frame(predicted_list) d$original <- NULL plot1 <- ggplot2::ggplot()+ ggplot2::geom_point(ggplot2::aes(x=seq(1, length(actual)),y=predicted_list$original, colour=as.factor(actual)), show.legend = FALSE)+ ggplot2::scale_colour_manual(values=c("darkolivegreen4","firebrick3"),name="Group",labels=c("Control","Case"))+ ggplot2::xlab(label="idx") + ggplot2::ylab(label="original")+ ggplot2::geom_hline(yintercept = 0.6, colour="black", linetype=3, size=0.7)+ ggplot2::geom_hline(yintercept = 0.4, colour="black", linetype=3, size=0.7)+ ggplot2::geom_hline(yintercept = cutoffs[[1]], linetype=4, colour="red") plots$original <- plot1 for (i in names(d)){ plot <- ggplot2::ggplot(data = d)+ ggplot2::geom_point(ggplot2::aes_string(x=seq(1, length(actual)),y=i, colour=as.factor(actual)), show.legend = FALSE)+ ggplot2::scale_colour_manual(values=c("darkolivegreen4","firebrick3"),name="Group",labels=c("Control","Case"))+ ggplot2::xlab(label="idx") + ggplot2::ylim(c(0,1))+ ggplot2::ylab(label=i)+ ggplot2::geom_hline(yintercept = 0.6, colour="black", linetype=3, size=0.7)+ ggplot2::geom_hline(yintercept = 0.4, colour="black", linetype=3, size=0.7)+ ggplot2::geom_hline(yintercept = cutoffs[[i]], linetype=4, colour="red") plots[[i]] <- plot } if (any(sapply(plots, is.null))){ plots <- plots[-which(sapply(plots, is.null))] } return(plots) } CalibratR/R/get_Brier_score.R0000644000176200001440000000130613330610746015541 0ustar liggesusers#' @title get_Brier_score #' @description FUNCTION_DESCRIPTION #' @param actual vector of observed class labels (0/1) #' @param predicted vector of uncalibrated predictions #' @return OUTPUT_DESCRIPTION #' @details DETAILS #' @rdname get_Brier_score get_Brier_score <- function(actual, predicted){ n <- length(actual) n_1 <- length(actual==1) n_0 <- length(actual==0) sum <- 0 sum_0 <- 0 sum_1 <- 0 for (i in seq(1,n,1)){ diff <- abs((predicted[i]-actual[i]))^2 sum <- sum+diff if(actual[i]==0){ sum_0 <- sum_0+diff } else if(actual[i]==1){ sum_1 <- sum_1+diff } } return(list(brier=sum/n, brier_1=sum_1/n_1, brier_0=sum_0/n_0)) } CalibratR/R/calibratR.R0000644000176200001440000002176013332610104014343 0ustar liggesusers #' @title calibrate #' @description Builds selected calibration models on the supplied trainings values \code{actual} and \code{predicted} and returns them #' to the user. New test instances can be calibrated using the \code{\link{predict_calibratR}} function. #' Returns cross-validated calibration and discrimination error values for the models if \code{evaluate_CV_error} is set to TRUE. Repeated cross-Validation can be time-consuming. #' @author Johanna Schwarz #' @param actual vector of observed class labels (0/1) #' @param predicted vector of uncalibrated predictions #' @param model_idx which calibration models should be implemented, 1=hist_scaled, 2=hist_transformed, 3=BBQ_scaled, 4=BBQ_transformed, 5=GUESS, Default: c(1, 2, 3, 4, 5) #' @param evaluate_no_CV_error computes internal errors for calibration models that were trained on all available \code{actual}/\code{predicted} tuples. Testing is performed with the same set. Be careful to interpret those error values, as they are not cross-validated. Default: TRUE #' @param evaluate_CV_error computes cross-validation error. \code{folds} times cross validation is repeated \code{n_seeds} times with changing seeds. The trained models and the their calibration and discrimination errors are returned. #' Evaluation of CV errors can take some time to compute, depending on the number of repetitions specified in \code{n_seeds}, Default: TRUE #' @param folds number of folds in the cross-validation of the calibration model. If \code{folds} is set to 1, no CV is performed and \code{summary_CV} can be calculated. Default: 10 #' @param n_seeds \code{n_seeds} determines how often random data set partition is repeated with varying seed. If \code{folds} is 1, \code{n_seeds} should be set to 1, too. Default: 30 #' @param nCores \code{nCores} how many cores should be used during parallelisation. Default: 4 #' @return A list object with the following components: #' \item{calibration_models}{a list of all trained calibration models, which can be used in the \code{\link{predict_calibratR}} method.} #' \item{summary_CV}{a list containing information on the CV errors of the implemented models} #' \item{summary_no_CV}{a list containing information on the internal errors of the implemented models} #' \item{predictions}{calibrated predictions for the original \code{predicted} values} #' \item{n_seeds}{number of random data set partitions into training and test set for \code{folds}-times CV} #' @details parallised execution of random data set splits for the Cross-Validation procedure over \code{n_seeds} #' @examples #' ## Loading dataset in environment #' data(example) #' actual <- example$actual #' predicted <- example$predicted #' #' ## Create calibration models #' calibration_model <- calibrate(actual, predicted, #' model_idx = c(1,2), #' FALSE, FALSE, folds = 10, n_seeds = 1, nCores = 2) #' @rdname calibrate #' @export #' @importFrom parallel makeCluster stopCluster #' @import foreach #' @importFrom doParallel registerDoParallel calibrate <- function(actual, predicted, model_idx=c(1,2,3,4,5), evaluate_no_CV_error=TRUE, evaluate_CV_error=TRUE, folds=10, n_seeds=30, nCores = 4 ){ set.seed(123) if (length(actual) != length(predicted)){ stop("Please make sure, that the parameters actual and predicted are of the same length.") } if (any((unique(actual)!=1) & (unique(actual)!=0))){ stop("The parameter actual contains values other than 1 or 0. Please code your class labels accordingly.") } if(evaluate_CV_error==FALSE & (!is.null(folds)|| !is.null(n_seeds))){ warning("No Cross-Validation is performed, but parameters folds or n_seeds are specified. If you want to perform CV, please set evaluate_CV_error TRUE.") } predicted <- unname(predicted) #original values original_values <- list(actual=actual, predicted=predicted) #build selected models on all data and predict for all data (=no CV) models_final <- list() cal_models_final <- list() for (i in model_idx){ models_final <- c(models_final, calibrate_me(actual=actual, predicted=predicted, model_idx=i)) } cal_models_final <- list(original_values=original_values, models_final=models_final) predictions <- predict_calibratR(cal_models_final, predicted, nCores) #performs x-fold CV and returns error values if(evaluate_CV_error){ error <- c() t0_error <- list() t0_error[["calibration"]] <- list() t0_error[["discrimination"]] <- list() y <- 1 #uncalibrated #parallize the foreach loop NumberOfCluster <- nCores # how many jobs you want the computer to run at the same time cl <- parallel::makeCluster(NumberOfCluster) # use the above cluster # your parallel programming code code code stopCluster(cl) # close clusters doParallel::registerDoParallel(cl) `%dopar%` <- foreach::`%dopar%` comb <- function(x, ...) { lapply(seq_along(x), function(i) c(x[[i]], lapply(list(...), function(y) y[[i]]))) } error <- foreach::foreach(i=seq(1,n_seeds,1), .packages = "CalibratR", .combine='comb', .multicombine=TRUE, .init=list(list(), list(), list()), .final = function(x) setNames(x, c("original", "scaled", "transformed"))) %dopar% { original <- uncalibrated_CV(actual, predicted, n_folds=folds, seed=i, input=0) scaled <- uncalibrated_CV(actual, predicted, n_folds=folds, seed=i, input=1) transformed <- uncalibrated_CV(actual, predicted, n_folds=folds, seed=i, input=2) return(list(original=original, scaled=scaled, transformed=transformed)) } parallel::stopCluster(cl) ##uncalibrated predictions for (model in error){ calibration_df <- data.frame() discrimination_df <- data.frame() for(i in seq(1,length(model),1)){ calibration_df <- rbind(calibration_df,unlist(model[[i]]$error$calibration_error)) discrimination_df <- rbind(discrimination_df,unlist(model[[i]]$error$discrimination_error)) } colnames(discrimination_df) <- names(model[[1]]$error$discrimination_error) colnames(calibration_df) <- names(model[[1]]$error$calibration_error) t0_error$calibration[[names(error)[y]]] <- calibration_df t0_error$discrimination[[names(error)[y]]] <- discrimination_df y <- y+1 } #build calibration models y <- 1 error_calibrated <- calibrate_me_CV_errors(actual, predicted, model_idx, folds, n_seeds,nCores) for(model in error_calibrated){ if(!length(model)==0){ calibration_df <- data.frame() discrimination_df <- data.frame() for(i in seq(1,length(model),1)){ calibration_df <- rbind(calibration_df,unlist(model[[i]]$error$calibration_error)) discrimination_df <- rbind(discrimination_df,unlist(model[[i]]$error$discrimination_error)) } colnames(discrimination_df) <- names(model[[1]]$error$discrimination_error) colnames(calibration_df) <- names(model[[1]]$error$calibration_error) t0_error$calibration[[names(error_calibrated)[y]]] <- calibration_df t0_error$discrimination[[names(error_calibrated)[y]]] <- discrimination_df y <- y+1 } else{ y <- y+1 } }} else{ error <- NULL error_calibrated <- NULL t0_error <- NULL } #calculates error values on training set if(evaluate_no_CV_error){ training_error <- c() y <- 1 error_values_no_CV <- list() error_values_no_CV[["calibration"]] <- list() error_values_no_CV[["discrimination"]] <- list() for (i in predictions){ training_error <- c(training_error, list(reliability_diagramm(actual, i))) } names(training_error) <- names(predictions) for (i in training_error){ error_values_no_CV$calibration[[names(training_error)[y]]] <- unlist(i$calibration_error) error_values_no_CV$discrimination[[names(training_error)[y]]] <- unlist(i$discrimination_error) y <- y+1 } df_calibration_no_CV <- t(data.frame(error_values_no_CV$calibration)) df_discrimination_no_CV <- t(data.frame(error_values_no_CV$discrimination)) } else{ df_calibration_no_CV <- NULL df_discrimination_no_CV <- NULL training_error <- NULL } res <- list(calibration_models=cal_models_final, summary_CV=list(models=list(uncalibrated=error, calibrated=error_calibrated),error_models=t0_error, folds=folds), summary_no_CV=list(calibration_error=df_calibration_no_CV, discrimination_error=df_discrimination_no_CV, list_errors=training_error), predictions=predictions, n_seeds=n_seeds) return(res) } CalibratR/R/format_values.R0000644000176200001440000000755713332610103015316 0ustar liggesusers#' @title format_values #' @description returns formatted input. #' If specified, the uncalibrated input is mapped to the [0;1] range using scaling (\code{\link{scale_me}}) or transforming (\code{\link{transform_me}}) #' @param cases instances from class 1 #' @param control instances from class 0 #' @param input single integer (0, 1 or 2). specify if the input should be formatted (=0), formatted and scaled (=1) #' or formatted and transformed (=2) #' @param min min value of the original data set, default=calculated on input #' @param max max value of the original data set, default=calculated on input #' @param mean mean value of the original data set, default=calculated on input #' @return list object with the following components: #' \item{formated_values}{formatted input. If \code{input} is set to 1 (2), the input is additionally scaled (transformed) using the #' method \code{\link{scale_me}} (\code{\link{transform_me}})} #' \item{min}{minimum value among all instances} #' \item{max}{maximum value among all instances} #' \item{mean}{mean value among all instances} #' @rdname format_values format_values <- function(cases, control, input, min=NULL, max=NULL, mean=NULL){ simulation <- c(cases[,2], control[,2]) simulation_real <- c(cases[,1], control[,1]) #return min/max for scaling if (is.null(max) || is.null(min)|| is.null(mean)){ min <- min(min(cases), min(control), na.rm=TRUE) max <- max(max(cases), max(control), na.rm=TRUE) mean <- mean(simulation, na.rm = TRUE) } if (input==0){ output <- cbind(simulation_real, simulation) } else if (input==1){ output <- cbind(simulation_real, scale_me(simulation, min=min, max=max)) } else if (input==2){ output <- cbind(simulation_real, transform_me(simulation, mean=mean)) } return(list(formated_values=output, min=min, max=max, mean=mean)) } #' @title transform_me #' @description maps all instances in \code{x_unscaled} to the [0;1] range using the equation: #' \cr y=exp(x)/(1+exp(x)) #' @param x_unscaled vector of predictions #' @param mean mean of \code{x} #' @return transformed values of \code{x_unscaled} #' @details values greater then exp(700)/ or smaller then exp(-700) are returned as "Inf". To avoid NaN values, these "Inf." values are turned into min(y) or max(y). #' @rdname transform_me transform_me <- function(x_unscaled, mean){ #center first, subtract mean of x_unscaled from all x_unscaled values to center around 0 x <- scale(x_unscaled, center=mean, scale=FALSE)[,] #transform x y <- exp(x)/(1+exp(x)) for (i in 1:length(y)){ if (is.nan(y[i]) && x[i]>0){ y[i] <- max(y, na.rm=TRUE) } if (is.nan(y[i]) && x[i]<0){ y[i] <- min(y, na.rm=TRUE) } } return(y) } #' @title scale_me #' @description maps all instances in \code{x} to the [0;1] range using the equation: #' \cr y = (x-min)/(max-min) #' \cr If no values for min and max are given, they are calculated per default as min=min(x) and max=max(x) #' @param x vector of predictions #' @param min minimum of \code{x}, Default: NULL #' @param max maximum of \code{x}, Default: NULL #' @return scaled values of \code{x} #' @details if \code{x} is greater (smaller) than \code{max} (\code{min}), its calibrated prediction is set to 1 (0) and warning is triggered. #' @rdname scale_me scale_me <- function(x, min=NULL, max=NULL){ if (is.null(max) || is.null(min)){ max <- max(x, na.rm=TRUE) min <- min(x, na.rm=TRUE) } y <- (x-min)/(max-min) if(any(y<0)){ y[(y<0)] <- 0 warning("A new instance exceeded the min value of the calibration training model and was set to 0 value") } if(any(y>1)){ y[(y>1)] <- 1 warning("A new instance exceeded the max value of the calibration training model and was set to 1 value") } return(y) } CalibratR/R/statistics_calibratR.R0000644000176200001440000003454413332316412016626 0ustar liggesusers#' @title statistics_calibratR #' @description this method offers a variety of statistical evaluation methods for the output of the \code{\link{calibrate}} method. #' All returned error values represent mean error values over the \code{n_seeds} times repeated 10-fold CV. #' @author Johanna Schwarz #' @param calibrate_object list that is returned from the \code{\link{calibrate}} function. The parameter \code{n_seeds} is available as a list component of the \code{calibrate_object} #' @param t.test_partitions Performs a paired two sided t.test over the error values (ECE, CLE1, CLE0, MCE, AUC, sensitivity and specificity) from the #' random partition splits comparing a possible significant difference in mean among the calibration models. All models and the original, scaled and transformed values are tested against each other. #' The p_value and the effect size of the t.test are returned to the user. Can only be performed, if the \code{calibrate_object} contains a \code{summary_CV} list object, else, an error is returned. Default: TRUE #' @param significance_models returns important characteristics of the implemented calibration models, Default: TRUE #' @return An object of class list, with the following components: #' \item{mean_calibration}{mean of calibration error values (ECE_equal_width, MCE_equal_width, ECE_equal_freq, MCE_equal_freq, RMSE, Class 1 CLE, Class 0 CLE, Brier Score, Class 1 Brier Score, Class 0 Brier Score) over \code{n_seeds} times repeated 10-fold CV. #' ECE and MCE are computed once using equal-width and once using equal-frequency binning for the construction of the underlying binning scheme. #' Only returned, if \code{calibrate_object} contains a summary_CV list object.} #' \item{standard_deviation}{standard deviation of calibration error values over \code{n_seeds} times repeated 10-fold CV. Only returned, if \code{calibrate_object} contains a summary_CV list object.} #' \item{var_coeff_calibration}{variation coefficient of calibration error values over \code{n_seeds} times repeated 10-fold CV. Only returned, if \code{calibrate_object} contains a summary_CV list object.} #' \item{mean_discrimination}{mean of discrimination error (sensitivity, specificity, AUC, positive predictive value, negative predictive value, accuracy) values over \code{n_seeds} times repeated 10-fold CV. The "cut-off" is #' the cut-off value that maximizes sensitivity and specificity. Only returned, if \code{calibrate_object} contains a summary_CV list object.} #' \item{sd_discrimination}{standard deviation of discrimination error values over \code{n_seeds} times repeated 10-fold CV. Only returned, if \code{calibrate_object} contains a summary_CV list object.} #' \item{var_coeff_discrimination}{variation coefficient of discrimination error values over \code{n_seeds} times repeated 10-fold CV. Only returned, if \code{calibrate_object} contains a summary_CV list object.} #' \item{t.test_calibration}{=list(p_value=t.test.calibration, effect_size=effect_size_calibration), only returned if t.test=TRUE} #' \item{t.test_discrimination}{=list(p_value=t.test.discrimination, effect_size=effect_size_discrimination), only returned if t.test=TRUE} #' \item{significance_models}{only returned if significance_models=TRUE} #' \item{n_seeds}{number of random data set partitions into training and test set for \code{folds}-times CV} #' \item{original_values}{list object that consists of the \code{actual} and \code{predicted} values of the original scores} #' @details DETAILS #' @examples #' ## Loading dataset in environment #' data(example) #' calibration_model <- example$calibration_model #' #' statistics <- statistics_calibratR(calibration_model) #' @seealso #' \code{\link[stats]{t.test}},\code{\link[stats]{friedman.test}} #' @rdname statistics_calibratR #' @export #' @importFrom stats t.test sd statistics_calibratR <- function(calibrate_object, t.test_partitions=TRUE, significance_models=TRUE){ if(!is.null(calibrate_object$summary_CV$models$calibrated)){ ##data preparation means_calibration <- data.frame() sd_calibration <- data.frame() var_coeff_calibration <- data.frame() compare_ece <- data.frame() compare_mce <- data.frame() compare_auc <- data.frame() compare_rmse <- data.frame() compare_cle1 <- data.frame() compare_cle0 <- data.frame() compare_sens <- data.frame() compare_spec <- data.frame() for(i in calibrate_object$summary_CV$error_models$calibration){ compare_ece <- rbind(compare_ece,i$ECE_equal_width) compare_cle1 <- rbind(compare_cle1,i$CLE_class_1) compare_cle0 <- rbind(compare_cle0,i$CLE_class_0) compare_mce <- rbind(compare_mce, i$MCE_equal_width) compare_rmse <- rbind(compare_rmse, i$RMSE) means_calibration <- rbind(means_calibration, apply(i,2, mean)) sd_calibration <- rbind(sd_calibration, apply(i,2, sd)) } model_names <- names(calibrate_object$summary_CV$error_models$calibration) n_runs <- seq(1,calibrate_object$n_seeds,1) names_calibration_errors <- names(calibrate_object$summary_CV$error_models$calibration[[1]]) names_discrimination_errors <- names(calibrate_object$summary_CV$error_models$discrimination[[1]]) rownames(compare_ece) <- model_names colnames(compare_ece) <- n_runs rownames(compare_rmse) <- model_names colnames(compare_rmse) <- n_runs rownames(compare_cle1) <- model_names colnames(compare_cle1) <- n_runs rownames(compare_cle0) <- model_names colnames(compare_cle0) <- n_runs rownames(compare_mce) <- model_names colnames(compare_mce) <- n_runs rownames(means_calibration) <- model_names colnames(means_calibration) <- names_calibration_errors rownames(sd_calibration) <- model_names colnames(sd_calibration) <- names_calibration_errors var_coeff_calibration <- sd_calibration/means_calibration means_discrimination <- data.frame() sd_discrimination <- data.frame() var_coeff_discrimination <- data.frame() for(i in calibrate_object$summary_CV$error_models$discrimination){ means_discrimination <- rbind(means_discrimination, apply(i,2, mean)) sd_discrimination <- rbind(sd_discrimination, apply(i,2, sd)) compare_auc <- rbind(compare_auc,i$auc) compare_sens <- rbind(compare_sens,i$sens) compare_spec <-rbind(compare_spec,i$spec) } rownames(compare_auc) <- model_names colnames(compare_auc) <- n_runs rownames(compare_sens) <- model_names colnames(compare_sens) <- n_runs rownames(compare_spec) <- model_names colnames(compare_spec) <- n_runs rownames(means_discrimination) <- model_names colnames(means_discrimination) <- names_discrimination_errors rownames(sd_discrimination) <- model_names colnames(sd_discrimination) <- names_discrimination_errors var_coeff_discrimination <- sd_discrimination/means_discrimination all_calibration_errors <- list(ece=compare_ece, cle0=compare_cle0, cle1=compare_cle1, mce=compare_mce, rmse=compare_rmse) all_discrimination_errors <- list(auc=compare_auc, sens=compare_sens, spec=compare_spec) ## perform paired t.test for all models if(t.test_partitions){ t.test_partitions_cal <- list() t.test_partitions_dis <- list() t.test.calibration <- list() t.test.discrimination <- list() effect_size_calibration <- list() effect_size_discrimination <- list() z <- 1 a <- 1 for (i in all_calibration_errors){ t.test.all <- c() effect_size_all <- c() for (w in seq(1, nrow(i),1)){ t.test <- c() effect_size <-c() mean <- c() sd <- c() for (y in seq(1, nrow(i),1)){ #if observation for i[y,] are unique, do not perform t test or else it will crash if(length(unique(as.numeric(i[y,])))==1){ t.test <- c(t.test, NA) effect_size <- c(effect_size, NA) } else{ t.test_result <- stats::t.test(as.numeric(i[w,]), as.numeric(i[y,]), paired=TRUE) t.test <- c(t.test, round(t.test_result$p.value, 4)) effect_size <- c(effect_size, round(t.test_result$estimate,4)) } mean <- c(mean, mean(as.numeric(i[y,]))) sd <- c(sd, sd(as.numeric(i[y,]))) } t.test.all <- cbind(t.test.all, t.test) effect_size_all <- cbind(effect_size_all, effect_size) t.test_partitions_cal[[z]] <- cbind(i, mean=mean, sd=sd, rank_mean=rank(mean)) z <- z+1 } row.names(t.test.all) <- model_names colnames(t.test.all) <- model_names row.names(effect_size_all) <- model_names colnames(effect_size_all) <- model_names t.test.calibration[[a]] <- cbind(t.test.all, mean=round(mean,4), rank_mean=rank(mean)) effect_size_calibration[[a]] <- effect_size_all a <- a+1 } names(t.test.calibration) <- c("ece", "cle0", "cle1", "mce", "rmse") names(t.test_partitions_cal) <- c("ece", "cle0", "cle1", "mce", "rmse") names(effect_size_calibration) <- c("ece", "cle0", "cle1", "mce", "rmse") z <- 1 a <- 1 for (i in all_discrimination_errors){ t.test.all <- c() effect_size_all <- c() for (w in seq(1, nrow(i),1)){ t.test <- c() effect_size <- c() mean <- c() sd <- c() for (y in seq(1, nrow(i),1)){ #if observation for i[y,] are unique, do not perform t test or else it will crash if(length(unique(as.numeric(i[y,])))==1){ t.test <- c(t.test, NA) effect_size <- c(effect_size, NA) } else{ t.test_result <- stats::t.test(as.numeric(i[w,]), as.numeric(i[y,]), paired=TRUE) t.test <- c(t.test, round(t.test_result$p.value, 4)) effect_size <- c(effect_size, round(t.test_result$estimate,4)) } mean <- c(mean, mean(as.numeric(i[y,]))) sd <- c(sd, sd(as.numeric(i[y,]))) } t.test.all <- cbind(t.test.all, t.test) effect_size_all <- cbind(effect_size_all, effect_size) t.test_partitions_dis[[z]] <- cbind(i, mean=mean, sd=sd, rank_mean=rank(-mean)) z <- z+1 } row.names(t.test.all) <- model_names colnames(t.test.all) <- model_names row.names(effect_size_all) <- model_names colnames(effect_size_all) <- model_names t.test.discrimination[[a]] <- cbind(t.test.all, mean=round(mean,4), rank_mean=rank(-mean)) effect_size_discrimination[[a]] <- effect_size_all a <- a+1 } names(t.test.discrimination) <- c("auc", "sens","spec") names(t.test_partitions_dis) <- c("auc", "sens","spec") names(effect_size_discrimination) <- c("auc", "sens","spec") } else { t.test_partitions_cal <- NULL t.test_partitions_dis <- NULL t.test.calibration <- NULL t.test.discrimination <- NULL effect_size_calibration <- NULL effect_size_discrimination <- NULL } } else{ if(t.test_partitions==TRUE){ warning("No error values from repeated CVs are available in the trained calibration models. No t-test can be performed. Please make sure that the calibrate_object containes a summary_CV list object.") } t.test_partitions_cal <- NULL t.test_partitions_dis <- NULL t.test.calibration <- NULL t.test.discrimination <- NULL effect_size_calibration <- NULL effect_size_discrimination <- NULL means_calibration <- NULL sd_calibration <- NULL means_discrimination <- NULL sd_discrimination <- NULL var_coeff_calibration <- NULL var_coeff_discrimination <- NULL } if(significance_models){ sign_model <- list() sign_model[["hist_scaled"]] <- list() sign_model[["hist_scaled"]] <- c(calibrate_object$calibration_models$models_final$hist_scaled$calibration_points, calibrate_object$calibration_models$models_final$hist_scaled$calibration_points_number, calibrate_object$calibration_models$models_final$hist_scaled$calibration_range) sign_model[["hist_transformed"]] <- list() sign_model[["hist_transformed"]] <- c(calibrate_object$calibration_models$models_final$hist_transformed$calibration_points, calibrate_object$calibration_models$models_final$hist_transformed$calibration_points_number, calibrate_object$calibration_models$models_final$hist_transformed$calibration_range) sign_model[["BBQ_scaled"]] <- list() sign_model[["BBQ_scaled"]] <- c(calibrate_object$calibration_models$models_final$BBQ_scaled$calibration_points, calibrate_object$calibration_models$models_final$BBQ_scaled$calibration_points_number, calibrate_object$calibration_models$models_final$BBQ_scaled$calibration_range) sign_model[["BBQ_transformed"]] <- list() sign_model[["BBQ_transformed"]] <- c(calibrate_object$calibration_models$models_final$BBQ_transformed$calibration_points, calibrate_object$calibration_models$models_final$BBQ_transformed$calibration_points_number, calibrate_object$calibration_models$models_final$BBQ_transformed$calibration_range) sign_model[["GUESS"]] <- list() sign_model[["GUESS"]] <- list(crit_boundaries=calibrate_object$calibration_models$models_final$GUESS$t_crit, sign_train_set=calibrate_object$calibration_models$models_final$GUESS$sign_train_set) } else { sign_model <- NULL } return(list(mean_calibration=means_calibration, sd_calibration=sd_calibration, var_coeff_calibration=var_coeff_calibration, mean_discrimination=means_discrimination, sd_discrimination=sd_discrimination, var_coeff_discrimination=var_coeff_discrimination, t.test_calibration=list(p_value=t.test.calibration, effect_size=effect_size_calibration), t.test_discrimination=list(p_value=t.test.discrimination, effect_size=effect_size_discrimination), significance_models=sign_model, n_seeds=calibrate_object$n_seeds, original_values=calibrate_object$calibration_models$original_values)) } CalibratR/R/evaluate_discrimination.R0000644000176200001440000000606513332610104017343 0ustar liggesusers#' @title evaluate_discrimination #' @description computes various discrimination error values, namely: sensitivity, specificity, accuracy, positive predictive value (ppv), negative predictive value (npv) and AUC #' @param actual vector of observed class labels (0/1) #' @param predicted vector of uncalibrated predictions #' @param cutoff cut-off to be used for the computation of npv, ppv, sensitivity and specificity, Default: value that maximizes sensitivity and specificity (Youden-Index) #' @return list object with the following components: #' \item{sens}{sensitivity} #' \item{spec}{specificity} #' \item{acc}{accuracy} #' \item{ppv}{positive predictive value} #' \item{npv}{negative predictive value} #' \item{cutoff}{cut-off that was used to compute the error values} #' \item{auc}{AUC value} #' @seealso #' \code{\link[pROC]{roc}} #' @rdname evaluate_discrimination #' @importFrom pROC roc #' evaluate_discrimination <- function(actual, predicted, cutoff=NULL){ ###local functions### getAUC <- function(actual, predicted){ if (length(unique(actual))!=2||max(unique(actual))!=1){ #actual hast to be 0 or 1 warning("strange input") } nTarget <- length(which(actual==1)) #how many y==1 nBackground <- length(which(actual!=1)) #how many y==0 #Rank data R <- rank(predicted, ties.method = "average") #Calculate AUC using Wilcoxon Signed Rank Test AUC <- (sum(R[which(actual==1)])-(nTarget^2+nTarget)/2) / (nTarget*nBackground) #Ranksum AUC <- max (AUC, 1-AUC) } discriminate <- function(i, cutoff){ #decides on i's 1/0 class membership by checking, if i is greater than the threshold value cutoff if (i>cutoff){ class <- 1 } else class <- 0 return(class) } if(is.null(cutoff)){ roc <- pROC::roc(actual, predicted) youden <- which.max(roc$sensitivities + roc$specificities-1) #calculate maximum of Youden Index cutoff <- roc$thresholds[youden] } else{ youden <- cutoff } output_class <- sapply(predicted, discriminate, cutoff=cutoff) true_positives <- which(actual==1) true_negatives <- which(actual==0) #sensitivity, specificity sens <- sum(output_class[true_positives]==1)/length(true_positives) spec <- sum(output_class[true_negatives]==0)/length(true_negatives) false_positive <- sum(output_class[true_negatives]==1)/length(true_negatives) false_negative <- sum(output_class[true_positives]==0)/length(true_positives) ppv <- sum(output_class[true_positives]==1)/(sum(output_class[true_positives]==1)+sum(output_class[true_negatives]==1)) npv <- sum(output_class[true_negatives]==0)/(sum(output_class[true_positives]==0)+sum(output_class[true_negatives]==0)) all <- length(actual) #AUC auc <- getAUC(actual, predicted) #accuracy acc <- (sum(output_class[true_positives]==1)+sum(output_class[true_negatives]==0))/all error_list <- list(sens=sens, spec=spec, acc=acc, ppv=ppv, npv=npv, cutoff=cutoff, auc=auc) rounded_list <- lapply(error_list,FUN=round,3) return(rounded_list) } CalibratR/R/example.R0000644000176200001440000000104313330165710014071 0ustar liggesusers#' @title example #' @description list object containing 1) the simulated classifiers for two classes. Distributions are simulated from Gaussian distributions with #' Normal(mean=1.5, sd=0) for class 1 and Normal(mean=0, sd=0) for class 0 instances. Each class consists of 100 instances. #' and 2) A test set of 100 instances #' @name example #' @format \code{predicted}=vector of 200 simulated classifier values; \code{actual}=their respective true class labels (0/1) #' @docType data #' @usage data(example) #' @keywords datasets NULL CalibratR/R/getMCE.R0000644000176200001440000000642013526505204013551 0ustar liggesusers#' @title getMCE #' @description Maximum Calibration Error (MCE), returns maximum calibration error for equal-frequency binning model #' @param actual vector of observed class labels (0/1) #' @param predicted vector of uncalibrated predictions #' @param n_bins number of bins of the underlying equal-frequency histogram, Default: 10 #' @return equal-frequency MCE value #' @rdname getMCE #' @export #' @importFrom graphics hist getMCE <- function(actual, predicted, n_bins=10){ predicted <- predicted labels <- actual idx <- order(predicted) pred_actual <- (cbind(predicted[idx], actual[idx])) N <- nrow(pred_actual) rest <- N%%n_bins B <- min(N,n_bins) S <- 0 W <- c() for (i in 1:B){ #i von 1 bis B if (i <= rest){ #put rest elements into each bin group_pred <- (pred_actual[(((i-1)*ceiling(N/n_bins)+1) : (i*ceiling(N/n_bins))),1]) group_actual <- (pred_actual[(((i-1)*ceiling(N/n_bins)+1) : (i*ceiling(N/n_bins))),2]) } else { group_pred <- (pred_actual[((rest+(i-1)*floor(N/n_bins)+1) : (rest+i*floor(N/n_bins))),1])#group size=N/B group_actual <- (pred_actual[((rest+(i-1)*floor(N/n_bins)+1) : (rest+i*floor(N/n_bins))),2]) } n <- length(group_pred) expected <- mean(group_pred) #mean of predictions in bin b observed <- mean(group_actual) #true fraction of pos.instances = prevalence in bin b S[i] <- abs(observed-expected) #absolut difference of observed value-predicted value in bin W[i] <- n/N #empirical frequence of all instances that fall into bin i, should be pretty much the same among all bins } res <- max(S*W) return(res) } #' @title get_MCE_equal_width #' @description Maximum Calibration Error (MCE), returns maximum calibration error for equal-width binning model #' @param actual vector of observed class labels (0/1) #' @param predicted vector of uncalibrated predictions #' @param bins number of bins for the binning model #' @return equal-width MCE value #' @rdname get_MCE_equal_width #' @importFrom graphics hist get_MCE_equal_width <- function(actual, predicted, bins=10){ #equal width bins predicted <- predicted labels <- actual idx <- order(predicted) pred_actual <- (cbind(predicted[idx], labels[idx])) hist_x <- hist(pred_actual[,1],breaks=bins, plot=F) breaks_y <- hist_x$breaks y_true <- hist(subset(pred_actual[,1], pred_actual[,2]=="1"),breaks=breaks_y, plot=F) divided <- cut(pred_actual[,1], breaks=c(hist_x$breaks),label = seq(1,length(y_true$mids)),include.lowest = T) prediction_in_bin <- list() expected <- c() for (i in as.numeric(levels(divided))){ prediction_in_bin[[i]] <- pred_actual[which(divided==i),1] #expected[i] <- hist_x$mids[i] #mean prediction in that bin expected[i] <- mean(pred_actual[which(divided==i),1]) #mean prediction in that bin } counts_all <- hist_x$counts counts_true <- y_true$counts zeros <- which(counts_all==0) prevalence <- counts_true/counts_all prevalence[zeros] <- 0 #set prevalence to 0 when no observations are in the bin expected[zeros] <- hist_x$mids[zeros] #set expectation to the mid bin point, when no elements are in bin S_2 <- abs(prevalence-expected) W_2 <- counts_all/(length(predicted)) return(max(S_2*W_2)) } CalibratR/R/predict_calibratR_parallel.R0000644000176200001440000001020313332610103017716 0ustar liggesusers#' @title predict_calibratR #' @description maps the uncalibrated predictions \code{new} into calibrated predictions using the passed over \code{calibration models} #' @author Johanna Schwarz #' @param calibration_models list of trained calibration models that were constructed using the \code{\link{calibrate}} method. #' The list components \code{calibration_models} from the \code{\link{calibrate}} output can be used directly. #' @param new vector of new uncalibrated instances. Default: 100 scores from the minimum to the maximum of the original ML scores #' @param nCores \code{nCores} how many cores should be used during parallelisation. Default: 4 #' @return list object with the following components: #' \item{predictions}{a list containing the calibrated predictions for each calibration model} #' \item{significance_test_set}{a list containing the percentage of \code{new} instances for which prediction estimates are statistically significant} #' \item{pred_per_bin}{a list containing the number of instances in each bin for the binning models} #' @details if no \code{new} value is given, the function will evaluate a sequence of numbers ranging from the minimum to the maximum of the original values in the training set #' @examples #' ## Loading dataset in environment #' data(example) #' test_set <- example$test_set #' calibration_model <- example$calibration_model #' #' ## Predict for test set #' predictions <- predict_calibratR(calibration_model$calibration_models, new=test_set, nCores = 2) #' #' @rdname predict_calibratR #' @export #' @importFrom parallel makeCluster stopCluster #' @import foreach #' @importFrom doParallel registerDoParallel predict_calibratR <- function(calibration_models, new=NULL, nCores=4){ min <- min(calibration_models$original_values$predicted) max <- max(calibration_models$original_values$predicted) mean <- mean(calibration_models$original_values$predicted) #default: if no seq is given, evaluate from min to max value of original input score if(is.null(new)){ step_size <- (max-min)/100 #evaluate 100 scores new <- seq(min, max, step_size) } #calibrated predictions, inputtype 1=scaled, 2=transformed, 0=original NumberOfCluster <- nCores # how many jobs you want the computer to run at the same time cl <- parallel::makeCluster(NumberOfCluster) # use the above cluster # your parallel programming code code code stopCluster(cl) # close clusters doParallel::registerDoParallel(cl) `%dopar%` <- foreach::`%dopar%` i <- NULL predictions_calibrated <- foreach::foreach(i=seq(1, length(calibration_models$models),1), .packages = "CalibratR") %dopar% { pred <- predict_model(new, calibration_models$models[[i]], min, max, mean, calibration_models$models[[i]]$inputtype) return(pred) } parallel::stopCluster(cl) names(predictions_calibrated) <- names(calibration_models$models) #restructure predictions_calibrated predictions_calibrated[["hist_scaled"]] <- predictions_calibrated$hist_scaled$predictions predictions_calibrated[["hist_transformed"]] <- predictions_calibrated$hist_transformed$predictions predictions_calibrated[["BBQ_scaled_sel"]] <- predictions_calibrated$BBQ_scaled$BBQ_sel$predictions predictions_calibrated[["BBQ_scaled_avg"]] <- predictions_calibrated$BBQ_scaled$BBQ_avg$predictions predictions_calibrated$BBQ_scaled <- NULL predictions_calibrated[["BBQ_transformed_sel"]] <- predictions_calibrated$BBQ_transformed$BBQ_sel$predictions predictions_calibrated[["BBQ_transformed_avg"]] <- predictions_calibrated$BBQ_transformed$BBQ_avg$predictions predictions_calibrated$BBQ_transformed <- NULL predictions_calibrated[["GUESS_1"]] <- predictions_calibrated$GUESS$GUESS_1$predictions predictions_calibrated[["GUESS_2"]] <- predictions_calibrated$GUESS$GUESS_2$predictions predictions_calibrated$GUESS <- NULL #uncalibrated predictions predictions <- list() predictions[["original"]] <- new predictions[["scaled"]] <- scale_me(new, min, max) predictions[["transformed"]] <- transform_me(new, mean) predictions <- c(predictions, predictions_calibrated) return(predictions) } CalibratR/R/predict_BBQ.R0000644000176200001440000000712213332610103014551 0ustar liggesusers#' @title predict_BBQ #' @description FUNCTION_DESCRIPTION #' @param bbq output from the \code{\link{build_BBQ}} method #' @param new vector of uncalibrated probabilities #' @param option either 1 or 0; averaging=1, selecting=0 #' @return a list object containing the following components: #' \item{predictions}{contains a vector of calibrated predictions} #' \item{pred_idx}{which option was used (averaging or selecting)} #' \item{significance_test_set}{the percentage of \code{new} instances that was evaluated using significant prediction estimates} #' \item{pred_per_bin}{number of instances \code{new} in each bin of the selected model} #' @details Based on the paper (and matlab code) : "Obtaining Well Calibrated Probabilities Using Bayesian Binning" by Naeini, Cooper and Hauskrecht: ; https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4410090/ #' @rdname predict_BBQ predict_BBQ <- function(bbq, new, option){ ###local functions### getHistPr <- function(histModel, cutPoints, new){ N <- length(new) #new elements to be predicted B <- length(histModel) #how many bins are in the model cutPoints <- c(0,cutPoints,1) res <- rep(0,N) for (i in 1:N){ #for each new element N x <- new[i] minIdx <- 1 maxIdx <- B+1 #in which bin does my element belong? while ((maxIdx - minIdx)>1){ midIdx <- floor((minIdx+maxIdx)/2) #I start looking in the middle if(x>cutPoints[midIdx]){ minIdx <- midIdx } else if(x < cutPoints[midIdx]){ maxIdx <- midIdx } else{ minIdx <- midIdx break } } idx <- minIdx res[i] <- histModel[[idx]]$P #assign class prob P for bin idx to new resultat for element i #handling odd cases, not really relevant according to paper? cnt <- 1 k <- idx -1 while (k>=1){ if (histModel[[k]]$min==histModel[[idx]]$min && histModel[[k]]$max==histModel[[idx]]$max){ res[i] <- res[i] + histModel[[k]]$P k <- k+1 cnt <- cnt+1 } else break } res[i] <- res[i]/cnt } return(res) } getMA <- function(BBQ_Model, x){ #get Model average N <- length(BBQ_Model) #how many models p <- rep(0,N) SV <- BBQ_Model[[1]]$SV #all the scores for all models for(i in 1:N){ #get the probs for the new prediction from all evaluated models p[i] <- getHistPr(BBQ_Model[[i]]$binNo, BBQ_Model[[i]]$cutPoints, x) } #output average p res <- (t(SV)%*%p)/sum(SV) #transpose and matrix multiplication } out <- rep(0, length(new)) BBQ_Model <- bbq$prunedmodel if (option==1){#option for averaging for (i in 1:length(new)){ out[i] <- getMA(BBQ_Model, new[i]) } #percentage of significant predictions for test set if best model is used sign_test_set <- NULL new_bin <- NULL } if(option==0){#option for selection for (i in 1:length(new)){ out[i] <- getHistPr(BBQ_Model[[1]]$binNo, BBQ_Model[[1]]$cutPoints, new[i]) } #percentage of significant predictions for test set if best model is used significant_bins <- subset(bbq$binnning_scheme$bin_no, bbq$binnning_scheme$p_value<0.05) new_bin <- cut(new, c(0,BBQ_Model[[1]]$cutPoints,1),labels = seq(1,length(bbq$binnning_scheme$midpoint)),include.lowest = T) sign_test_set <- sum(table(new_bin)[significant_bins])/(sum(table(new_bin))) } return(list(predictions=out, pred_idx=option, significance_test_set=sign_test_set, pred_per_bin=table(new_bin))) } CalibratR/R/no_calibration_CV.R0000644000176200001440000000626713332607460016033 0ustar liggesusers#' @title uncalibrated_CV #' @description performs \code{n_folds}-CV but with only input-preprocessing the test set. No calibration model is trained and evaluated in this method. #' The \code{predicted} values are partitioned into n subsets. The training set is constructed on (n-1) subsets; the remaining set is used #' for testing. Since no calibration model is used in this method, the test set predictions are only input-preprocessed (either scaled or transformed, depending on \code{input}). #' All test set predictions are merged and used to compute error metrics for the input-preprocessing methods. #' @param actual vector of observed class labels (0/1) #' @param predicted vector of uncalibrated predictions #' @param n_folds number of folds for the cross-validation, Default: 10 #' @param seed random seed to alternate the split of data set partitions #' @param input specify if the input was scaled or transformed, scaled=1, transformed=2 #' @return list object containing the following components: #' \item{error}{list object that summarizes discrimination and calibration errors obtained during the CV} #' \item{type}{"uncalibrated"} #' \item{probs_CV}{vector of input-preprocessed predictions that was used during the CV} #' \item{actual_CV}{respective vector of true values (0 or 1) that was used during the CV} #' @rdname uncalibrated_CV uncalibrated_CV <- function(actual, predicted, n_folds=10, seed, input){ set.seed(seed) x <- data.frame(cbind(actual, predicted)) x_cases <- subset(x, x[,1]==1) x_controls <- subset(x, x[,1]==0) fold_cases <- sample(cut(seq(1,nrow(x_cases)),breaks=n_folds,label=FALSE)) fold_controls <- sample(cut(seq(1,nrow(x_controls)),breaks=n_folds,label=FALSE)) uncalibrated_models_rd <- list() y_cal <- list() y_dis <- list() list_probs <- c() list_actual <- c() for(i in 1:n_folds){ trainIndexes_cases <- which(fold_cases!=i, arr.ind = TRUE) trainIndexes_controls <- which(fold_controls!=i,arr.ind=TRUE) trainData <- rbind(x_cases[trainIndexes_cases, ], x_controls[trainIndexes_controls,]) x_train <- format_values(x_cases[trainIndexes_cases, ], x_controls[trainIndexes_controls,], input=input) testIndexes_cases <- which(fold_cases==i,arr.ind=TRUE) testIndexes_controls <- which(fold_controls==i,arr.ind=TRUE) testData <- rbind(x_cases[testIndexes_cases, ], x_controls[testIndexes_controls,]) x_test <- format_values(x_cases[testIndexes_cases, ], x_controls[testIndexes_controls,], input=input, min=x_train$min, max=x_train$max, mean=x_train$mean) list_probs <- c(list_probs, x_test$formated_values[,2]) list_actual <- c(list_actual, x_test$formated_values[,1]) } y <- reliability_diagramm(list_actual, list_probs) y_cal <- y$calibration_error y_dis <- y$discrimination_error error_summary_CV <- list(calibration_error=y_cal, discrimination_error=y_dis, mean_pred_per_bin=y$mean_pred_per_bin, accuracy_per_bin=y$accuracy_per_bin, sign=y$sign) return(list(error=error_summary_CV, type="uncalibrated", probs_CV=list_probs, actual_CV=list_actual))#, models_rd=uncalibrated_models_rd)) } CalibratR/R/BBQ_CV.R0000644000176200001440000000746713332607461013460 0ustar liggesusers#' @title BBQ_CV #' @description trains and evaluates the BBQ calibration model using \code{folds}-Cross-Validation (CV). #' The \code{predicted} values are partitioned into n subsets. A BBQ model is constructed on (n-1) subsets; the remaining set is used #' for testing the model. All test set predictions are merged and used to compute error metrics for the model. #' @param actual vector of observed class labels (0/1) #' @param predicted vector of uncalibrated predictions #' @param method_for_prediction 0=selection, 1=averaging, Default: 0 #' @param n_folds number of folds in the cross-validation, Default: 10 #' @param seed random seed to alternate the split of data set partitions #' @param input specify if the input was scaled or transformed, scaled=1, transformed=2 #' @return list object containing the following components: #' \item{error}{list object that summarizes discrimination and calibration errors obtained during the CV} #' \item{pred_idx}{which BBQ prediction method was used during CV, 0=selection, 1=averaging} #' \item{type}{"BBQ"} #' \item{probs_CV}{vector of calibrated predictions that was used during the CV} #' \item{actual_CV}{respective vector of true values (0 or 1) that was used during the CV} #' @examples #' ## Loading dataset in environment #' data(example) #' actual <- example$actual #' predicted <- example$predicted #' BBQ_model <- CalibratR:::BBQ_CV(actual, predicted, method_for_prediction=0, n_folds=4, 123, 1) #' @rdname BBQ_CV BBQ_CV <- function(actual, predicted, method_for_prediction=0, n_folds=10, seed, input){ set.seed(seed) if (!(method_for_prediction==0|method_for_prediction==1)){ print("Please set a valid method_for_prediction. Choose 0 for selection and 1 for averaging") } x <- data.frame(cbind(actual, predicted)) x_cases <- subset(x, x[,1]==1) x_controls <- subset(x, x[,1]==0) fold_cases <- sample(cut(seq(1,nrow(x_cases)),breaks=n_folds,label=FALSE)) fold_controls <- sample(cut(seq(1,nrow(x_controls)),breaks=n_folds,label=FALSE)) y_cal <- list() y_dis <- list() list_probs <- c() list_actual <- c() bbq_models <- list() bbq_models_rd <- list() for(i in 1:n_folds){ trainIndexes_cases <- which(fold_cases!=i, arr.ind = TRUE) trainIndexes_controls <- which(fold_controls!=i,arr.ind=TRUE) trainData <- rbind(x_cases[trainIndexes_cases, ], x_controls[trainIndexes_controls,]) x_train <- format_values(x_cases[trainIndexes_cases, ], x_controls[trainIndexes_controls,], input=input) testIndexes_cases <- which(fold_cases==i,arr.ind=TRUE) testIndexes_controls <- which(fold_controls==i,arr.ind=TRUE) testData <- rbind(x_cases[testIndexes_cases, ], x_controls[testIndexes_controls,]) x_test <- format_values(x_cases[testIndexes_cases, ], x_controls[testIndexes_controls,], input=input, min=x_train$min, max=x_train$max, x_train$mean) BBQ_model <- build_BBQ(x_train$formated_values[,1], x_train$formated_values[,2]) #build calibration model on training set calibrated_probs <- predict_BBQ(BBQ_model, x_test$formated_values[,2], method_for_prediction) #calibrate with test set und evaluate ECE etc list_probs <- c(list_probs, calibrated_probs$predictions) list_actual <- c(list_actual,x_test$formated_values[,1]) bbq_models[[i]] <- BBQ_model } y <- reliability_diagramm(list_actual, list_probs) y_cal <- y$calibration_error y_dis <- y$discrimination_error error_summary_CV <- list(calibration_error=y_cal, discrimination_error=y_dis, mean_pred_per_bin=y$mean_pred_per_bin, accuracy_per_bin=y$accuracy_per_bin, sign=y$sign) return(list(error=error_summary_CV, pred_idx=method_for_prediction, type="BBQ", probs_CV=list_probs, actual_CV=list_actual)) } CalibratR/R/visualize_distribution.R0000644000176200001440000000505013332610103017243 0ustar liggesusers#' @title visualize_distribution #' @description FUNCTION_DESCRIPTION #' @param actual vector of observed class labels (0/1) #' @param predicted vector of uncalibrated predictions #' @return list object containing the following components: #' \item{plot_distribution}{ggplot histogram that visualizes the observed class distributions} #' \item{parameter}{list object that summarizes all relevant parameters (mean, sd, number) of the observed class distributions} #' @seealso #' \code{\link[ggplot2]{ggplot}},\code{\link[ggplot2]{geom_histogram}},\code{\link[ggplot2]{aes}},\code{\link[ggplot2]{scale_colour_manual}},\code{\link[ggplot2]{scale_fill_manual}},\code{\link[ggplot2]{labs}} #' @rdname visualize_distribution #' @importFrom ggplot2 ggplot geom_histogram aes scale_colour_manual scale_fill_manual labs #' @importFrom stats sd visualize_distribution <- function(actual, predicted){ all <- data.frame(cbind(actual, predicted)) n <- nrow(all) case <- subset(all, all$actual==1) control <- subset(all, all$actual==0) mean_cases <- mean(case$predicted) mean_control <- mean(control$predicted) sd_case <- sd(case$predicted) sd_control <- sd(control$predicted) n_cases <- nrow(case) n_control <- nrow(control) total <- n_cases+n_control plot_distribution <- ggplot2::ggplot()+ ggplot2::geom_histogram(ggplot2::aes(x=control$predicted, colour="darkolivegreen4", fill="darkolivegreen4"),bins=10,alpha=0.4)+ ggplot2::geom_histogram(ggplot2::aes(x=case$predicted, colour="firebrick3", fill="firebrick3"),bins=10,alpha=0.4)+ ggplot2::scale_colour_manual(values=c("darkolivegreen4", "firebrick3"),labels=c("control", "case"), name="Group")+ ggplot2::scale_fill_manual(values=c("darkolivegreen4", "firebrick3"),labels=c("control", "case"), name="Group")+ ggplot2::labs(title="Controls vs. Cases density, Test Set",y="Frequency", subtitle=paste("no. of cases", n_cases,"\n", "no. of controls", n_control), x = "original ML score") parameters <- c(mean_prediction_cases=mean_cases, mean_prediction_controls=mean_control, sd_prediction_cases=sd_case, sd_prediction_controls=sd_control, number_cases=n_cases, number_controls=n_control, total=total) return(list(plot_distribution=plot_distribution, parameter=parameters)) } CalibratR/R/CLES_in_comparison.R0000644000176200001440000000264313332671367016127 0ustar liggesusers #' @title get_CLE_comparison #' @description visualises how class 1 and class 0 classification error (CLE) differs in each trained calibration model. #' Comparing class-specific CLE helps to choose a calibration model for applications were classification error is cost-sensitive for one class. #' See \code{\link{get_CLE_class}} for details on the implementation. #' @param list_models list object that contains all error values for all trained calibration models. For the specific format, see the calling function \code{\link{visualize_calibratR}}. #' @return ggplot2 #' @rdname get_CLE_comparison get_CLE_comparison <- function(list_models){ list_models$original <- NULL list_errors_0 <- list() list_errors_1 <- list() idx <- 1 for (j in list_models){ list_errors_1[[names(list_models)[[idx]]]] <- j$CLE_class_1 list_errors_0[[names(list_models)[[idx]]]] <- j$CLE_class_0 idx <- idx+1 } df_cle_0 <- cbind(reshape2::melt(list_errors_0), Class="CLE class 0") df_cle_1 <- cbind(reshape2::melt(list_errors_1), Class="CLE class 1") df <- rbind(df_cle_0, df_cle_1) Class <- NULL value <- NULL L1 <- NULL ggplot2::ggplot(df, ggplot2::aes(x=L1, y=value, colour=Class)) + ggplot2::ggtitle("Class-specific CLE") + ggplot2::scale_x_discrete(name = NULL) + ggplot2::geom_boxplot() + ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 60, hjust = 1)) } CalibratR/R/GUESS_CV.R0000644000176200001440000000663713332620101013722 0ustar liggesusers#' @title GUESS_CV #' @description trains and evaluates the GUESS calibration model using \code{folds}-Cross-Validation (CV). #' The \code{predicted} values are partitioned into n subsets. A GUESS model is constructed on (n-1) subsets; the remaining set is used #' for testing the model. All test set predictions are merged and used to compute error metrics for the model. #' @param actual vector of observed class labels (0/1) #' @param predicted vector of uncalibrated predictions #' @param n_folds number of folds for the cross-validation, Default: 10 #' @param method_of_prediction PARAM_DESCRIPTION, Default: 2 #' @param seed random seed to alternate the split of data set partitions #' @param input specify if the input was scaled or transformed, scaled=1, transformed=2 #' @return list object containing the following components: #' \item{error}{list object that summarizes discrimination and calibration errors obtained during the CV} #' \item{type}{"GUESS"} #' \item{pred_idx}{which prediction method was used during CV} #' \item{probs_CV}{vector of calibrated predictions that was used during the CV} #' \item{actual_CV}{respective vector of true values (0 or 1) that was used during the CV} #' @rdname GUESS_CV GUESS_CV <- function(actual, predicted, n_folds=10, method_of_prediction=2, seed, input){ set.seed(seed) x <- data.frame(cbind(actual, predicted)) x_cases <- subset(x, x[,1]==1) x_controls <- subset(x, x[,1]==0) fold_cases <- sample(cut(seq(1,nrow(x_cases)),breaks=n_folds,label=FALSE)) fold_controls <- sample(cut(seq(1,nrow(x_controls)),breaks=n_folds,label=FALSE)) y_cal_1 <- list() y_dis_1 <- list() list_probs <- c() list_actual <- c() GUESS_models <- list() GUESS_models_rd <- list() for(i in 1:n_folds){ trainIndexes_cases <- which(fold_cases!=i, arr.ind = TRUE) trainIndexes_controls <- which(fold_controls!=i,arr.ind=TRUE) trainData <- rbind(x_cases[trainIndexes_cases, ], x_controls[trainIndexes_controls,]) x_train <- format_values(x_cases[trainIndexes_cases, ], x_controls[trainIndexes_controls,], input=input) testIndexes_cases <- which(fold_cases==i,arr.ind=TRUE) testIndexes_controls <- which(fold_controls==i,arr.ind=TRUE) testData <- rbind(x_cases[testIndexes_cases, ], x_controls[testIndexes_controls,]) x_test <- format_values(x_cases[testIndexes_cases, ], x_controls[testIndexes_controls,], input=input, min=x_train$min, max=x_train$max, x_train$mean) GUESS_model <- build_GUESS(x_train$formated_values[,1], x_train$formated_values[,2]) #build calibration model on train set calibrated_probs <- predict_GUESS(GUESS_model, x_test$formated_values[,2], method_of_prediction) #calibrate with test set und evaluate ECE etc list_probs <- c(list_probs, calibrated_probs$predictions) list_actual <- c(list_actual, x_test$formated_values[,1]) GUESS_models[[i]] <- GUESS_model } y <- reliability_diagramm(list_actual, list_probs) y_cal <- y$calibration_error y_dis <- y$discrimination_error error_summary_CV <- list(calibration_error=y_cal, discrimination_error=y_dis, mean_pred_per_bin=y$mean_pred_per_bin, accuracy_per_bin=y$accuracy_per_bin, sign=y$sign) return(list(error=error_summary_CV, type="GUESS", pred_idx=method_of_prediction, probs_CV=list_probs, actual_CV=list_actual)) } CalibratR/R/plot_model.R0000644000176200001440000001322113332575105014602 0ustar liggesusers#' @title plot_model #' @description this methods visualizes all implemented calibration models as a mapping function between original ML scores (x-axis) and #' calibrated predictions (y-axis) #' @param calibration_model output from the \code{\link{calibrate}} method. #' @param seq sequence of ML scores over which the mapping function should be evaluated, Default: 100 scores from the minimum to the maximum of the original ML scores #' @return ggplot object #' @seealso #' \code{\link[reshape2]{melt}} #' \code{\link[ggplot2]{ggplot}},\code{\link[ggplot2]{geom_line}},\code{\link[ggplot2]{aes}},\code{\link[ggplot2]{ylim}},\code{\link[ggplot2]{scale_colour_manual}},\code{\link[ggplot2]{theme}},\code{\link[ggplot2]{labs}},\code{\link[ggplot2]{geom_text}},\code{\link[ggplot2]{geom_vline}} #' @rdname plot_model #' @importFrom reshape2 melt #' @importFrom ggplot2 ggplot geom_line aes ylim scale_colour_manual theme labs geom_text geom_vline plot_model <- function(calibration_model, seq=NULL){ ###local function### back_to_ML_scores <- function(midpoints, seq, score){ idx <- c() for (i in 1:length(midpoints)){ idx[i] <- which.min(abs(midpoints[i]-score)) } return(seq[idx]) } max <- max(calibration_model$original_values$predicted) min <- min(calibration_model$original_values$predicted) #default: if no seq is given, evaluate from min to max value of original input score if(is.null(seq)){ step_size <- (max-min)/100 #evaluate 100 scores seq <- seq(min, max, step_size) } prediction_all <- predict_calibratR(calibration_model, seq, nCores=1) scaled <- scale_me(seq) transformed <- transform_me(seq, mean=mean(seq)) idx <- 1 plot.list <- list() Var2 <- NULL value <- NULL for(i in prediction_all){ control <- 1-i df <- cbind(seq,reshape2::melt(cbind(i, control), measure.vars=c("i","control"))) plot_ori <- ggplot2::ggplot()+ ggplot2::geom_line(data=data.frame(df),mapping=ggplot2::aes(x=seq, y=value, colour=Var2))+ ggplot2::ylim(0, 1.05)+ ggplot2::scale_colour_manual(values=c("firebrick3", "darkolivegreen4"), name="Group",labels=c("Case","Control") )+ ggplot2::theme(legend.position = "right")+ ggplot2::labs(title=names(prediction_all$predictions)[idx], x = "original ML score", y = "calibrated prediction") #add model specifc significance values model_pred <- names(prediction_all)[idx] if(model_pred=="BBQ_scaled_sel"|model_pred=="BBQ_scaled_avg"){ z <- back_to_ML_scores(calibration_model$models$BBQ_scaled$binnning_scheme$midpoint, seq, scaled) plot.list[[model_pred]] <- plot_ori + ggplot2::geom_text(mapping=ggplot2::aes(x=as.numeric(z), y=as.numeric(calibration_model$models$BBQ_scaled$binnning_scheme$prob_case)+0.03, label=(calibration_model$models$BBQ_scaled$binnning_scheme$significance))) } else if(model_pred=="BBQ_transformed_sel"|model_pred=="BBQ_transformed_avg"){ z <- back_to_ML_scores(calibration_model$models$BBQ_transformed$binnning_scheme$midpoint, seq, transformed) plot.list[[model_pred]] <- plot_ori + ggplot2::geom_text(mapping=ggplot2::aes(x=as.numeric(z), y=as.numeric(calibration_model$models$BBQ_transformed$binnning_scheme$prob_case)+0.03, label=calibration_model$models$BBQ_transformed$binnning_scheme$significance)) } else if (model_pred=="hist_scal"){ z <- back_to_ML_scores(calibration_model$models$hist_scaled$histogram$mids, seq, scaled) plot.list[[model_pred]] <- plot_ori + ggplot2::geom_text(mapping=ggplot2::aes(x=as.numeric(z), y=as.numeric(calibration_model$models$hist_scaled$probs_per_bin)+0.03, label=calibration_model$models$hist_scaled$binnning_scheme$significance)) } else if (model_pred=="hist_trans"){ z <- back_to_ML_scores(calibration_model$models$hist_transformed$histogram$mids, seq, transformed) plot.list[[model_pred]] <- plot_ori + ggplot2::geom_text(mapping=ggplot2::aes(x=as.numeric(z), y=as.numeric(calibration_model$models$hist_transformed$probs_per_bin)+0.03, label=calibration_model$models$hist_transformed$binnning_scheme$significance)) } else if (model_pred=="GUESS_1"|model_pred=="GUESS_2"){ if (calibration_model$models$GUESS$t_crit[1]>calibration_model$models$GUESS$t_crit[4]){ plot.list[[model_pred]] <- plot_ori + ggplot2::geom_vline(mapping=ggplot2::aes(xintercept=calibration_model$models$GUESS$t_crit[2]), size=1, linetype=2)+ ggplot2::geom_vline(mapping=ggplot2::aes(xintercept=calibration_model$models$GUESS$t_crit[3]), size=1, linetype=2)+ ggplot2::geom_vline(mapping=ggplot2::aes(xintercept=calibration_model$models$GUESS$t_crit[1]), size=1, linetype=2)+ ggplot2::geom_vline(mapping=ggplot2::aes(xintercept=calibration_model$models$GUESS$t_crit[4]), size=1, linetype=2)+ ggplot2::geom_text(ggplot2::aes(x=calibration_model$models$GUESS$t_crit[2], y=0.9,label=("significance\n boundaries")), cex=0.6) } else plot.list[[model_pred]] <- plot_ori + ggplot2::geom_vline(mapping=ggplot2::aes(xintercept=calibration_model$models$GUESS$t_crit[2]), size=1, linetype=2)+ ggplot2::geom_vline(mapping=ggplot2::aes(xintercept=calibration_model$models$GUESS$t_crit[3]), size=1, linetype=2)+ ggplot2::geom_text(ggplot2::aes(x=calibration_model$models$GUESS$t_crit[2], y=0.9,label=("significance\n boundaries")), cex=0.6) } else plot.list[[model_pred]] <- plot_ori idx <- idx+1 } return(plot.list) } CalibratR/R/compare_models_visual.R0000644000176200001440000000305013330610744017014 0ustar liggesusers#' @title compare_models_visual #' @description FUNCTION_DESCRIPTION #' @param models PARAM_DESCRIPTION #' @param seq sequence for which the calibrated predictions should be plotted, Default: NULL #' @return OUTPUT_DESCRIPTION #' @details DETAILS #' @seealso #' \code{\link[ggplot2]{ggplot}},\code{\link[ggplot2]{geom_line}},\code{\link[ggplot2]{aes}},\code{\link[ggplot2]{ylim}},\code{\link[ggplot2]{theme}},\code{\link[ggplot2]{labs}},\code{\link[ggplot2]{scale_color_brewer}} #' \code{\link[reshape2]{melt}} #' @rdname compare_models_visual #' @importFrom ggplot2 ggplot geom_line aes ylim theme labs scale_color_brewer #' @importFrom reshape2 melt compare_models_visual <- function(models, seq=NULL){ max <- max(models$original_values$predicted) min <- min(models$original_values$predicted) #default: if no seq is given, evaluate from min to max value of original input score if(is.null(seq)){ step_size <- (max-min)/100 #evaluate 100 scores seq <- seq(min, max, step_size) } predictions <- predict_calibratR(models, seq, nCores=1) predictions$original <- NULL L1 <- NULL value <- NULL plot1 <- ggplot2::ggplot(cbind(seq,reshape2::melt(predictions)))+ ggplot2::geom_line(ggplot2::aes(x=seq, y=value, colour=L1), size=1)+ ggplot2::ylim(0, 1)+ ggplot2::theme(legend.position = "bottom")+ ggplot2::labs(title="Comparison of Calibration models", x = "original ML score", y = "calibrated prediction")+ ggplot2::scale_color_brewer(palette = "Paired", name=NULL) return(plot1) } CalibratR/R/reliability_diagram.R0000644000176200001440000001775113332574240016454 0ustar liggesusers#' @title reliability_diagramm #' @description Reliability curves allow checking if the predicted probabilities of a # binary classifier are well calibrated. This function returns two arrays # which encode a mapping from predicted probability to empirical probability. # For this, the predicted probabilities are partitioned into equally sized # bins and the mean predicted probability and the mean empirical probabilties # in the bins are computed. For perfectly calibrated predictions, both # quantities whould be approximately equal (for sufficiently many test samples). # Note: this implementation is restricted to binary classification. # breaks default value = 10 #' @param actual vector of observed class labels (0/1) #' @param predicted vector of uncalibrated predictions #' @param bins number of bins in the reliability diagram, Default: 10 #' @param plot_rd should the reliability diagram be plotted, Default: TRUE #' @return a list object containing the following elements #' \item{calibration_error}{} #' \item{discrimination_error}{} #' \item{rd_breaks}{} #' \item{histogram_plot}{} #' \item{diagram_plot}{} #' \item{mean_pred_per_bin}{} #' \item{accuracy_per_bin}{} #' \item{freq_per_bin}{} #' \item{sign}{} #' @seealso #' \code{\link[ggplot2]{ggplot}},\code{\link[ggplot2]{stat_bin}},\code{\link[ggplot2]{aes}},\code{\link[ggplot2]{scale_fill_manual}},\code{\link[ggplot2]{theme}},\code{\link[ggplot2]{labs}},\code{\link[ggplot2]{geom_point}},\code{\link[ggplot2]{xlim}},\code{\link[ggplot2]{ylim}},\code{\link[ggplot2]{geom_abline}},\code{\link[ggplot2]{geom_line}},\code{\link[ggplot2]{geom_text}},\code{\link[ggplot2]{geom_label}},\code{\link[ggplot2]{coord_fixed}} #' @rdname reliability_diagramm #' @importFrom ggplot2 ggplot stat_bin aes scale_fill_manual theme labs geom_point xlim ylim geom_abline geom_line geom_text geom_label coord_fixed #' @importFrom graphics hist #' @export reliability_diagramm <- function(actual, predicted, bins=10, plot_rd=TRUE){ plot1 <- NULL plot2 <- NULL mean_pred_per_bin_ <- NULL accuracy_per_bin_ <- NULL freq_per_bin <- NULL #error values ece <- getECE(actual, predicted, bins) mce <- getMCE(actual, predicted, bins) rmse <- getRMSE(actual, predicted) ece_ <- get_ECE_equal_width(actual, predicted, bins) mce_ <- get_MCE_equal_width(actual, predicted, bins) cle <- get_CLE_class(actual, predicted, bins) brier <- get_Brier_score(actual, predicted) discrimination_error <- evaluate_discrimination(actual, predicted) #only plot reliability diagram if all(predicted) is between 0 and 1, komischer Rundungsfehler.... deshalb 1.0001 if(all(predicted<=1.00001) && all(predicted>=0)){ all <- data.frame(cbind(actual,predicted)) histogram <- hist(all[,2], breaks=seq(0,1,1/bins),plot=FALSE) accuracy_per_bin <- rep(0,length(histogram$mids)) mean_pred_per_bin <- rep(0,length(histogram$mids)) #sort predicted x <- order(predicted) predicted <- predicted[x] actual <- actual[x] for(i in 1:length(predicted)){ for (j in 1:(length(histogram$breaks)-1)){ if (predicted[i]==histogram$breaks[1]){ #values with prob = 0 are put in bin 1 accuracy_per_bin[j] <- accuracy_per_bin[j] + actual[i] mean_pred_per_bin[j] <- mean_pred_per_bin[j] + predicted[i] break } if (histogram$breaks[j] < predicted[i] && predicted[i]<= histogram$breaks[j+1]){ accuracy_per_bin[j] <- accuracy_per_bin[j] + actual[i] mean_pred_per_bin[j] <- mean_pred_per_bin[j] + predicted[i] break } }} mean_pred_per_bin_ <- mean_pred_per_bin/histogram$counts #mean prediction in bin mean_pred_per_bin_[is.nan(mean_pred_per_bin_)] <- 0 accuracy_per_bin_ <- accuracy_per_bin/histogram$counts #no. of cases per bin accuracy_per_bin_[is.nan(accuracy_per_bin_)] <- 0 pvalue_per_bin <- unlist(apply(cbind(success=accuracy_per_bin, all= histogram$counts),1,binom_for_histogram)) freq_per_bin <- histogram$counts/sum(histogram$counts) sign <- c() for (i in (1: length(pvalue_per_bin))){ if (pvalue_per_bin[i]<0.05){ sign[i] <- "*" } else if (pvalue_per_bin[i]==2){ #empty bins are indicated with pvalue of 2 sign[i] <- "x" } else sign[i] <- "ns" } idx <- sign=="x" if(plot_rd){ ..count.. <- NULL plot1 <- ggplot2::ggplot(data=all)+ ggplot2::stat_bin(mapping=ggplot2::aes(x=predicted, fill=factor(actual)),color="white",alpha=0.6,breaks=seq(0,1,1/bins), position="identity")+ ggplot2::scale_fill_manual(values=c("darkolivegreen4", "firebrick3"),labels=c("control","case"), name="Group")+ ggplot2::theme(legend.position = "top")+ ggplot2::stat_bin(data=subset(all,actual==0), ggplot2::aes(x=predicted,label=..count..), breaks=seq(0,1,1/bins), geom="text", position="identity", size=4)+ ggplot2::stat_bin(data=subset(all,actual==1), ggplot2::aes(x=predicted,label=..count..), breaks=seq(0,1,1/bins), geom="text", position="identity", size=4)+ ggplot2::labs(title="Constructed Histogram for Reliability Diagram", subtitle=paste("bins:",bins), x = "prediction", y = "observed frequency") plot2 <- ggplot2::ggplot(data=data.frame(cbind(mean_pred_per_bin_,accuracy_per_bin_)),ggplot2::aes(mean_pred_per_bin_, accuracy_per_bin_))+ ggplot2::geom_point(shape=18,color="black", size=3)+ ggplot2::xlim(0, 1) + ggplot2::ylim(0, 1.05) + ggplot2::geom_abline(slope=1, color="#999999", size=1, linetype=2)+ ggplot2::geom_line(data=data.frame(cbind(mean_pred_per_bin_=mean_pred_per_bin_[!idx],accuracy_per_bin_=accuracy_per_bin_[!idx])), color="#0072B2", size=2)+ ggplot2::geom_text(mapping=ggplot2::aes(mean_pred_per_bin_, accuracy_per_bin_+0.04,label=sign))+ ggplot2::geom_label(mapping=ggplot2::aes(0.2,0.9, label=paste(paste("n:",length(predicted)),"\n", paste("ECE:",round(ece_,4)),"\n", "ns = not significant\n", "x = empty bin")), size=2)+ ggplot2::coord_fixed(ratio=1)+ ggplot2::labs(title ="Reliability Diagram", subtitle=paste("bins:",bins), x = "mean prediction in bin", y = "observed frequency") # plot2 <- ggplot(data=data.frame(cbind(histogram$mids,accuracy_per_bin_)),aes(V1, accuracy_per_bin_))+ # geom_point(shape=18,color="black", size=3)+ # xlim(0, 1) + ylim(0, 1.05) + # geom_abline(slope=1, color="#999999", size=1, linetype=2)+ # geom_line(data=data.frame(cbind(histogram$mids[!idx],accuracy_per_bin_=accuracy_per_bin_[!idx])), # color="#0072B2", size=2)+ # geom_text(mapping=aes(V1, accuracy_per_bin_+0.04,label=sign))+ # geom_label(mapping=aes(0.2,0.9, label=paste(paste("n:",length(predicted)),"\n", # paste("ECE:",round(ece_,4)),"\n", # "ns = not significant\n", # "x = empty bin")), size=2)+ # coord_fixed(ratio=1)+ # labs(title ="Reliability Diagram", subtitle=paste("bins:",breaks), x = "bin midpoint", y = "observed frequency") # }} error_list <- list(ECE_equal_width=ece_, MCE_equal_width=mce_, ECE_equal_freq=ece, MCE_equal_freq=mce, RMSE=rmse, CLE_class_1=cle$class_1, CLE_class_0=cle$class_0, brier=brier$brier, brier_class_1=brier$brier_1, brier_class_0=brier$brier_0) rounded_list <- lapply(error_list,round,5) return(list(calibration_error=rounded_list, discrimination_error=discrimination_error, rd_breaks=bins, histogram_plot=plot1, diagram_plot=plot2, mean_pred_per_bin=mean_pred_per_bin_, accuracy_per_bin=accuracy_per_bin_, freq_per_bin=freq_per_bin, sign=sign)) } CalibratR/R/build_hist_binning_CV.R0000644000176200001440000000650413332620416016670 0ustar liggesusers#' @title hist_binning_CV #' @description trains and evaluates the histogram binning calibration model repeated \code{folds}-Cross-Validation (CV). #' The \code{predicted} values are partitioned into n subsets. A histogram binning model is constructed on (n-1) subsets; the remaining set is used #' for testing the model. All test set predictions are merged and used to compute error metrics for the model. #' @param actual vector of observed class labels (0/1) #' @param predicted vector of uncalibrated predictions #' @param n_bins number of bins used in the histogram binning scheme, Default: 15 #' @param n_folds number of folds in the cross-validation, Default: 10 #' @param seed random seed to alternate the split of data set partitions #' @param input specify if the input was scaled or transformed, scaled=1, transformed=2 #' @return list object containing the following components: #' \item{error}{list object that summarizes discrimination and calibration errors obtained during the CV} #' \item{type}{"hist"} #' \item{probs_CV}{vector of calibrated predictions that was used during the CV} #' \item{actual_CV}{respective vector of true values (0 or 1) that was used during the CV} #' @rdname hist_binning_CV hist_binning_CV <- function(actual, predicted, n_bins=15, n_folds=10, seed, input){ set.seed(seed) x <- data.frame(cbind(actual, predicted)) x_cases <- subset(x, x[,1]==1) x_controls <- subset(x, x[,1]==0) fold_cases <- sample(cut(seq(1,nrow(x_cases)),breaks=n_folds,label=FALSE)) fold_controls <- sample(cut(seq(1,nrow(x_controls)),breaks=n_folds,label=FALSE)) y_cal <- list() y_dis <- list() list_calibrated_probs <- c() list_actual <- c() error_fold <- c() hist_models <- list() hist_models_rd <- list() for(i in 1:n_folds){ trainIndexes_cases <- which(fold_cases!=i, arr.ind = TRUE) trainIndexes_controls <- which(fold_controls!=i,arr.ind=TRUE) trainData <- rbind(x_cases[trainIndexes_cases, ], x_controls[trainIndexes_controls,]) x_train <- format_values(x_cases[trainIndexes_cases, ], x_controls[trainIndexes_controls,], input=input) testIndexes_cases <- which(fold_cases==i,arr.ind=TRUE) testIndexes_controls <- which(fold_controls==i,arr.ind=TRUE) testData <- rbind(x_cases[testIndexes_cases, ], x_controls[testIndexes_controls,]) x_test <- format_values(x_cases[testIndexes_cases, ], x_controls[testIndexes_controls,], input=input, min=x_train$min, max=x_train$max, mean=x_train$mean) hist <- build_hist_binning(x_train$formated_values[,1], x_train$formated_values[,2], n_bins) calibrated_probs <- predict_hist_binning(hist, x_test$formated_values[,2]) list_calibrated_probs <- c(list_calibrated_probs, calibrated_probs$predictions) list_actual <- c(list_actual,x_test$formated_values[,1]) hist_models[[i]] <- hist } y <- reliability_diagramm(list_actual, list_calibrated_probs) y_cal <- y$calibration_error y_dis <- y$discrimination_error error_summary_CV <- list(calibration_error=y_cal, discrimination_error=y_dis, mean_pred_per_bin=y$mean_pred_per_bin, accuracy_per_bin=y$accuracy_per_bin, sign=y$sign) return(list(error=error_summary_CV, type="hist", probs_CV=list_calibrated_probs, actual_CV=list_actual)) } CalibratR/R/predict_hist_binning.R0000644000176200001440000000273113332607460016635 0ustar liggesusers#' @title predict_hist_binning #' @description predict for a new element using histogram binning #' @param histogram the output of \code{\link{build_hist_binning}} #' @param new vector of uncalibrated probabilities #' @return a list object containing the following components #' \item{predictions}{contains a vector of calibrated predictions} #' \item{significance_test_set}{the percentage of \code{new} instances that was evaluated using significant prediction estimates} #' \item{pred_per_bin}{a table containing the number of instances from \code{new} for each bin of the final binning scheme of \code{histogram}} #' @rdname predict_hist_binning predict_hist_binning <- function(histogram, new){ breaks <- histogram$histogram$breaks bin_probs <- histogram$probs_per_bin out <- c() #percentage of significant predictions significant_bins <- subset(histogram$binnning_scheme$`no bin`, histogram$binnning_scheme$p_value<0.05) no_per_bin <- cut(new, breaks, labels = histogram$binnning_scheme$`no bin`,include.lowest = T) sign_test_set <- sum(table(no_per_bin)[significant_bins])/(sum(table(no_per_bin))) for(i in 1:length(new)){ for (j in 1:(length(breaks)-1)){ if (new[i]==breaks[1]){ out[i] <- bin_probs[1] } if (breaks[j] < new[i] && new[i]<= breaks[j+1]){ out[i] <- bin_probs[j] } } } return(list(predictions=out,significance_test_set=sign_test_set,pred_per_bin=table(no_per_bin))) } CalibratR/R/rd_multiple_runs.R0000644000176200001440000000603313332626747016047 0ustar liggesusers#' @title rd_multiple_runs #' @description This functions plots all n reliability diagrams that were constructed during n-times repeated m-fold cross-validation (CV). #' During calibration model evaluation, CV is repeated n times, so that eventually n reliability diagrams are obtained. #' @param list_models list object that contains n-times the output from the \code{\link{reliability_diagramm}}. method. #' @return a list object that contains a reliability diagram that visualises all reliabilty diagrams that were constructed during n-times repeated m-fold cross-validation. #' @seealso #' \code{\link[reshape2]{melt}} #' \code{\link[ggplot2]{ggplot}},\code{\link[ggplot2]{geom_line}},\code{\link[ggplot2]{aes}},\code{\link[ggplot2]{geom_abline}},\code{\link[ggplot2]{ylab}},\code{\link[ggplot2]{xlab}},\code{\link[ggplot2]{xlim}},\code{\link[ggplot2]{ylim}},\code{\link[ggplot2]{coord_fixed}},\code{\link[ggplot2]{geom_text}},\code{\link[ggplot2]{scale_color_discrete}},\code{\link[ggplot2]{ggtitle}} #' @rdname rd_multiple_runs #' @importFrom reshape2 melt #' @importFrom ggplot2 ggplot geom_line aes geom_abline ylab xlab xlim ylim coord_fixed geom_text scale_color_discrete ggtitle #' @importFrom stats median rd_multiple_runs <- function(list_models){ list_bins <- list() list_bins[["mean_prediction"]] <- list() list_bins[["accuracy"]] <- list() list_bins[["significance"]] <- list() for (j in list_models){ for (i in seq(1,10,1)){ list_bins[["mean_prediction"]][[as.character(i)]] <- c(list_bins[["mean_prediction"]][[as.character(i)]],j$error$mean_pred_per_bin[[i]]) list_bins[["accuracy"]][[as.character(i)]] <- c(list_bins[["accuracy"]][[as.character(i)]],j$error$accuracy_per_bin[[i]]) list_bins[["significance"]][[as.character(i)]] <- c(list_bins[["significance"]][[as.character(i)]],j$error$sign[[i]]) } } mean_pred <- data.frame(list_bins$mean_prediction) accuracy <- data.frame(list_bins$accuracy) significance <- data.frame(list_bins$significance) x <- reshape2::melt(t(mean_pred)) y <- reshape2::melt(t(accuracy)) df <- cbind(x,acc=y[,3]) plot1 <- ggplot2::ggplot()+ ggplot2::geom_line(data=df, ggplot2::aes(x=df$value, y=df$acc, group=df$Var2), colour="grey70",alpha=0.3, size=1)+ ggplot2::geom_line(ggplot2::aes(apply(mean_pred,2, FUN=median), apply(accuracy,2, FUN=median)), colour="#0072B2", size=2)+ ggplot2::geom_abline(slope=1, color="#999999", size=1, linetype=2)+ ggplot2::ylab("observed frequency")+ ggplot2::xlab("mean prediction per bin")+ ggplot2::xlim(0, 1) + ggplot2::ylim(0, 1.01) + ggplot2::coord_fixed(ratio=1)+ ggplot2::geom_text(ggplot2::aes(x=unlist(mean_pred), y=unlist(accuracy), label=unlist(significance)), size=3.5, alpha=0.5)+ ggplot2::scale_color_discrete(guide=FALSE)+ ggplot2::ggtitle(paste("Reliability Diagrams from", nrow(accuracy),"partitions")) return(plot1) } CalibratR/R/build_hist_binning.R0000644000176200001440000001222713332620171016275 0ustar liggesusers#' @title build_hist_binning #' @description calculate estimated probability per bin, input predicted and real score as numeric vector; builds a histogram binning model which can be used to calibrate uncalibrated predictions using the predict_histogramm_binning method #' @param actual vector of observed class labels (0/1) #' @param predicted vector of uncalibrated predictions #' @param bins number of bins that should be used to build the binning model, Default: decide_on_break estimates optimal number of bins #' @return returns the trained histogram model that can be used to calibrate a test set using the \code{\link{predict_hist_binning}} method #' @rdname build_hist_binning #' @details if trainings set is smaller then threshold (15 bins*5 elements=75), number of bins is decreased #' @importFrom graphics hist #' @importFrom stats na.omit build_hist_binning <- function(actual, predicted, bins=NULL){ ##local functions### decide_on_break <- function(predicted, breaks=15){ #if trainingsset is smaller then threshold (15 bins*5 elements=75), decrease #bins depending on training set size if (length(predicted)<=75){ breaks <- floor(length(predicted)/6) } #only use 0,1,by breaks, if input is between 0 and 1 if(all(predicted<=1) && all(predicted>=0)){ histogram <- hist(predicted, breaks=seq(0,1,(1/breaks)), plot=FALSE) #if there are more than 3 bins with insignificant (less then 18 elements) -> number of breaks is decreased while(sum(histogram$counts>0 & histogram$counts<18) > 3 && !breaks <= 7){ breaks <- breaks-1 histogram <- hist(predicted, breaks=seq(0,1,(1/breaks)), plot=FALSE) }} else { histogram <- hist(predicted, breaks=breaks, plot=FALSE) #if there are more than 3 bins with insignificant (less then 18 elements) -> number of breaks is decreased while(sum(histogram$counts>0 & histogram$counts<18) > 3 && !breaks <= 7){ breaks <- breaks-1 histogram <- hist(predicted, breaks=breaks, plot=FALSE) }} return(suggested_break_number=breaks) } if (is.null(bins)){ n_bins <- decide_on_break(predicted) } else n_bins <- bins predicted_real <- data.frame() predicted_real[1:length(predicted),1] <- predicted predicted_real[,2] <- actual predicted_real <- na.omit(predicted_real) histogram <- hist(predicted_real[,1], breaks=seq(0,1,(1/n_bins)), plot=FALSE, include.lowest = T) true_bin <- data.frame() true_bin[c(1:length(histogram$count)),1] <- seq(1,length(histogram$count)) true_bin[,2] <- 0 #true positiv true_bin[,3] <- 0 #no. of data in bin true_bin[,4] <- 0 #correct y=1 diagnosis colnames(predicted_real) <- c("ML score", "real score") for(i in 1:nrow(predicted_real)){ for (j in 1:(length(histogram$breaks)-1)){ if (predicted_real[i,1]==histogram$breaks[1]){ #values with prob = 0 are put in bin 1 true_bin[1,3] <- true_bin[1,3] + 1 true_bin[1,2] <- true_bin[1,2] + predicted_real[i,2] break } if (histogram$breaks[j] < predicted_real[i,1] && predicted_real[i,1]<= histogram$breaks[j+1]){ true_bin[j,3] <- true_bin[j,3] + 1 true_bin[j,2] <- true_bin[j,2] + predicted_real[i,2] break } } } true_bin[,4] <- true_bin[,2]/true_bin[,3] #Probability for correct y=1 diagnosis true_bin[,4][is.na(true_bin[,4])] <- 0 #significance testing p_values_binom <- unlist(apply(cbind(true_bin[,2], true_bin[,3]),1,binom_for_histogram)) #pvalues for single bins, binom.test true_bin[,5] <- p_values_binom for (i in 1:nrow(true_bin)){ if(is.nan(true_bin[i,4])){ true_bin[i,6] <- "no value" } else if(true_bin[i,5]<0.001){ true_bin[i,6] <- "***" } else if(true_bin[i,5]<0.01){ true_bin[i,6] <- "**" } else if(true_bin[i,5]<0.05){ true_bin[i,6] <- "*" } else true_bin[i,6] <- "ns" } colnames(true_bin) <- c("no bin", "true cases", "all", "prob_case", "p_value", "significance") colnames(predicted_real) <- c("ML score", "real score") min <- min(predicted) max <- max(predicted) #quality markers calibration model calibration_points <- true_bin[,4] calibration_points_sign <- true_bin[,5]<0.05 calibration_points_number <- length((true_bin[,4])) calibration_points_number_sign <- length((subset(true_bin[,4], true_bin[,5]<0.05))) calibration_range <- range(true_bin[,4]) if(sum(calibration_points_sign) != 0){ calibration_range_sign <- range(true_bin[,4][true_bin[,5]<0.05]) } else{ calibration_range_sign <- 0 } return(list(type="hist", histogram=histogram,probs_per_bin=true_bin[,4], binnning_scheme=true_bin, min=min, max=max, calibration_points=list(calibration_points=calibration_points,calibration_points_sign=calibration_points_sign), calibration_range=list(calibration_range=calibration_range, calibration_range_sign=calibration_range_sign), calibration_points_number=list(calibration_points_number=calibration_points_number, calibration_points_number_sign=calibration_points_number_sign))) } CalibratR/MD50000644000176200001440000001034013526516525012435 0ustar liggesusers8c99e5980afca07bdb91e5b1f7a8a9d9 *DESCRIPTION 201067bdc8907e37f704bdf6ea1e7c8b *NAMESPACE 03a4770f7df33b8612290470292f1cb9 *R/BBQ_CV.R 59de9f5f2027cd9139ccb5f9649e62c8 *R/CLES_in_comparison.R 55fdd0ff222e5975dfe07aed623b8de9 *R/GUESS_CV.R 0423efbbcb998f4001727b8a5c350fe0 *R/binomtest_for_bin.R 60e79d779389658f9fab3ebf7e90f6ab *R/build_BBQ.R 5f7c5351c60db45723be293e764e36bf *R/build_GUESS.R a8776b2722a158b40a3ee04b9cad5f14 *R/build_hist_binning.R 535d8e8a556cc3146e23f8f7e08d5cc4 *R/build_hist_binning_CV.R c220a34df2c5d2d9b3b4aa1188d3ab72 *R/calibratR.R ea56213178436d311c567e75936ec6b1 *R/calibrate_me.R 3e065ee3cf983809b9812d71b167cb5d *R/calibrate_me_CV_errors_parallel.R 65c72c29f4cb2987601add62611444f5 *R/compare_models_visual.R b05d86cf1348978c45d7ab83b8888a61 *R/evaluate_discrimination.R a2bc6649e43e007a11893ae68d37dfdf *R/example.R 88a6f8d024d1362cb6d93648e934fb2e *R/format_values.R cd85b9b7150b16fe3fbb9cd5745cb730 *R/getECE.R 4a174a72b42f4b638a54d6a71c3db48e *R/getMCE.R 78a807472f60fe8974885de63ea9fd6d *R/getRMSE.R 3ced6f33695c9ea8e2ebd15d5be78520 *R/get_Brier_score.R 5e1e2a9c7a707b0ca559e798bc8027c5 *R/get_CLE.R 842faa9ba99b2172b3e401cb58649912 *R/no_calibration_CV.R fec8de218cbcc5cf0df4e869d0da7d86 *R/plot_class_distributions.R 9382c05bfd29aac831e96d3441f42695 *R/plot_model.R 9f947716db2551d42f82c5a690eaa2bd *R/predict_BBQ.R bfcc236a01f77abfea82fb484b5ad70c *R/predict_GUESS.R 5e6c8f933fdb87e12baedcc828dddaba *R/predict_calibratR_parallel.R 53c8cc585038997093b70f00395518ca *R/predict_hist_binning.R 564e0a77ac10dd20285740456c6cc1d1 *R/predict_model.R 161239c1b9f643862930425703763b73 *R/rd_multiple_runs.R c3fe25ccfcc8d5f3b0142949a647e9e5 *R/reliability_diagram.R 02866406be97e6e665f95d628c3f7bdd *R/statistics_calibratR.R 182c225158895ddaf4d560785b2dfec5 *R/visualize_calibratR.R 2835e481201ad2b9aedc995836724ef5 *R/visualize_calibrated_test_set.R bb6b7f02c05c4da1db2ff519b2e8447e *R/visualize_distribution.R a6f3acfba1a73f76b62437f664993bfe *R/visualize_errors.R 2c0d7f95972ba93b23cc6689cfba2d32 *data/example.rda 07d3df984bf364a64c78a3951bde52e2 *inst/CITATION 53a27e69fcbfa5dc9d3e21b0b04d29b9 *man/BBQ_CV.Rd 629607d1dae1d38fde3151be62bd20fa *man/GUESS_CV.Rd 77755eec7adb34b83d13a6036e94177b *man/binom_for_histogram.Rd 2c8603bf07ef711ad7aa4f4b7cc73905 *man/build_BBQ.Rd b968a3819849ca3374b94d75c7123299 *man/build_GUESS.Rd 5a95429639f24f0c9aa907f1dc57a850 *man/build_hist_binning.Rd b4761ef5156ea855dd71138aa09282d7 *man/calibrate.Rd 8952466f487663250ec468ff9c90f2ab *man/calibrate_me.Rd e8ce7fb0846252bd9e9442e0285021b7 *man/calibrate_me_CV_errors.Rd 114d3213734065f8473aa0f53a42d66e *man/compare_models_visual.Rd b6d94d644e8ddce08c4b14eab7678595 *man/evaluate_discrimination.Rd 3d309e42080749952a37b34a18de3e02 *man/example.Rd f4c942e1bd6d42980f076e083737c286 *man/format_values.Rd 68e053ed4490647c0c5c0cd8a286d06a *man/getECE.Rd 44ececacb5f257fb26ec83a29e87819f *man/getMCE.Rd 953bf79a07f33f91da4eb302791dff39 *man/getRMSE.Rd 2b794d41fc1d58a5e73bdb36f24ca86c *man/get_Brier_score.Rd cbb547f80a6a909d531468d8de569b61 *man/get_CLE_class.Rd fbf30984d71cc0032dba81ae9fb18c9f *man/get_CLE_comparison.Rd 49d1724bb7a99d220db8718f40cce4f7 *man/get_ECE_equal_width.Rd d5957995b38506c2ab1a55c61c48044d *man/get_MCE_equal_width.Rd 09c4a925fb44b47e991f052436394e2d *man/hist_binning_CV.Rd f68c1e3678e3f7b29b453e4d59a0d358 *man/plot_class_distributions.Rd 0d6fdfcb72a83852fbc24694fa5f9b38 *man/plot_model.Rd b79bcd62053c973bf2f34dd174db64f7 *man/predict_BBQ.Rd 1e3eac069d4fd3c54194bfd07d818fd2 *man/predict_GUESS.Rd cee47e63f4777e5ae3cddfb231940757 *man/predict_calibratR.Rd 4fdc0b9f40da110df556c7ad77e11cac *man/predict_hist_binning.Rd 5ff7f19cb89c72ca9e1bd03a002ef763 *man/predict_model.Rd 0c4e7dd9ef3797ed8969c21c7fba878c *man/rd_multiple_runs.Rd bfa7a46a0566248f37dbe48fb8c1a00b *man/reliability_diagramm.Rd c5c8056f89aebea84fe37e6b0b90728f *man/scale_me.Rd 2145e886eb2d1d948a29ded7243ac138 *man/statistics_calibratR.Rd 972b70ca063cd2a4cda627c1468f1f13 *man/transform_me.Rd e39d7a2100d25537039f587fbbbe24fe *man/uncalibrated_CV.Rd 5adea79a6095d07f252a98375b855a39 *man/visualize_calibratR.Rd a9dae801fc467004a66ebd87d0893afd *man/visualize_calibrated_test_set.Rd f1dd01a46404827ff904c7960b7fdaed *man/visualize_distribution.Rd 1ada2367803fdb3d1bb0320116099872 *man/visualize_error_boxplot.Rd CalibratR/inst/0000755000176200001440000000000013526513020013067 5ustar liggesusersCalibratR/inst/CITATION0000644000176200001440000000056613526513020014233 0ustar liggesusersbibentry(bibtype = "article", title = "{GUESS}: Projecting Machine Learning Scores to Well-Calibrated Probability Estimates for Clinical Decision Making", author = c(person("Johanna", "Schwarz"), person("Dominik", "Heider")), journal = "Bioinformatics", year = "2019", volume = "35", number = "14", pages = "2458-2465")