cmstatr/0000755000176200001440000000000014574604752011745 5ustar liggesuserscmstatr/NAMESPACE0000644000176200001440000000523714125331440013152 0ustar liggesusers# Generated by roxygen2: do not edit by hand S3method(augment,mnr) S3method(glance,adk) S3method(glance,anderson_darling) S3method(glance,basis) S3method(glance,equiv_change_mean) S3method(glance,equiv_mean_extremum) S3method(glance,levene) S3method(glance,mnr) S3method(print,adk) S3method(print,anderson_darling) S3method(print,basis) S3method(print,equiv_change_mean) S3method(print,equiv_mean_extremum) S3method(print,levene) S3method(print,mnr) export(ad_ksample) export(anderson_darling_lognormal) export(anderson_darling_normal) export(anderson_darling_weibull) export(augment) export(basis_anova) export(basis_hk_ext) export(basis_lognormal) export(basis_nonpara_large_sample) export(basis_normal) export(basis_pooled_cv) export(basis_pooled_sd) export(basis_weibull) export(calc_cv_star) export(cv) export(equiv_change_mean) export(equiv_mean_extremum) export(glance) export(hk_ext_z) export(hk_ext_z_j_opt) export(k_equiv) export(k_factor_normal) export(levene_test) export(maximum_normed_residual) export(nested_data_plot) export(nonpara_binomial_rank) export(normalize_group_mean) export(normalize_ply_thickness) export(stat_esf) export(stat_normal_surv_func) export(tidy) export(transform_mod_cv) export(transform_mod_cv_ad) importFrom(MASS,fitdistr) importFrom(dplyr,mutate) importFrom(dplyr,select) importFrom(dplyr,ungroup) importFrom(generics,augment) importFrom(generics,glance) importFrom(generics,tidy) importFrom(ggplot2,Stat) importFrom(ggplot2,aes) importFrom(ggplot2,element_blank) importFrom(ggplot2,element_line) importFrom(ggplot2,expansion) importFrom(ggplot2,geom_hline) importFrom(ggplot2,ggplot) importFrom(ggplot2,ggproto) importFrom(ggplot2,labs) importFrom(ggplot2,layer) importFrom(ggplot2,scale_y_continuous) importFrom(ggplot2,theme) importFrom(ggplot2,xlab) importFrom(ggplot2,ylab) importFrom(kSamples,ad.test) importFrom(purrr,map_dfr) importFrom(rlang,.data) importFrom(rlang,abort) importFrom(rlang,call2) importFrom(rlang,call_args) importFrom(rlang,call_args_names) importFrom(rlang,call_name) importFrom(rlang,enquo) importFrom(rlang,enquos) importFrom(rlang,ensym) importFrom(rlang,eval_tidy) importFrom(rlang,exec) importFrom(rlang,fn_fmls_names) importFrom(rlang,inform) importFrom(rlang,list2) importFrom(rlang,quo_get_expr) importFrom(rlang,quo_text) importFrom(rlang,warn) importFrom(stats,dbeta) importFrom(stats,dnorm) importFrom(stats,integrate) importFrom(stats,median) importFrom(stats,pbeta) importFrom(stats,pchisq) importFrom(stats,pf) importFrom(stats,pnorm) importFrom(stats,pweibull) importFrom(stats,qnorm) importFrom(stats,qt) importFrom(stats,qweibull) importFrom(stats,sd) importFrom(stats,uniroot) importFrom(stats,var.test) importFrom(tibble,tibble) cmstatr/README.md0000644000176200001440000001306314477217401013220 0ustar liggesusers # cmstatr [![R-CMD-check](https://github.com/cmstatr/cmstatr/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/cmstatr/cmstatr/actions/workflows/R-CMD-check.yaml) [![`Codecov` test coverage](https://codecov.io/gh/cmstatr/cmstatr/branch/master/graph/badge.svg)](https://app.codecov.io/gh/cmstatr/cmstatr?branch=master) [![DOI](https://joss.theoj.org/papers/10.21105/joss.02265/status.svg)](https://doi.org/10.21105/joss.02265) [![CRAN_Status_Badge](https://www.r-pkg.org/badges/version/cmstatr)](https://cran.r-project.org/package=cmstatr) [![](https://cranlogs.r-pkg.org/badges/cmstatr)](https://cran.r-project.org/package=cmstatr) # What It Does The `cmstatr` package provides functions for performing statistical analysis of composite material data. The statistical methods implemented are those described in [CMH-17-1G](https://www.cmh17.org/). This package focuses on calculating basis values (lower tolerance bounds) for material strength properties, as well as performing the associated diagnostic tests. Functions are also provided for testing for equivalency between alternate samples and the “qualification” or “baseline” samples. Additional details about the package are available in the paper by Kloppenborg (2020, ). There is a companion package `cmstatrExt` which provides statistical methods that are not included in CMH-17, but which may be of use to practitioners. For more information, please see the [`cmstatrExt` Website](https://cmstatrext.cmstatr.net). # Installation To install `cmstatr` from CRAN, simply run: ``` r install.packages("cmstatr") ``` If you want the latest development version, you can install it from `github` using `devtools`. This will also install the dependencies required to build the vignettes. Optionally, change the value of the argument `ref` to install `cmstatr` from a different branch of the repository. ``` r install.packages(c("devtools", "rmarkdown", "dplyr", "tidyr")) devtools::install_github("cmstatr/cmstatr", build_vignettes = TRUE, ref = "master", build_opts = c("--no-resave-data", "--no-manual")) ``` # Usage To compute a B-Basis value from an example data set packaged with `cmstatr` you can do the following: ``` r library(dplyr) library(cmstatr) carbon.fabric.2 %>% filter(test == "FC") %>% filter(condition == "RTD") %>% basis_normal(strength, batch) #> #> Call: #> basis_normal(data = ., x = strength, batch = batch) #> #> Distribution: Normal ( n = 18 ) #> B-Basis: ( p = 0.9 , conf = 0.95 ) #> 76.88082 ``` For more examples of usage of the `cmstatr` package, see the tutorial vignette, which can be [viewed online](https://www.cmstatr.net/articles/cmstatr_Tutorial.html), or can be loaded as follows, once the package is installed: ``` r vignette("cmstatr_Tutorial") ``` There is also a vignette showing some examples of the types of graphs that are typically produced when analyzing composite materials. You can view this [vignette online](https://www.cmstatr.net/articles/cmstatr_Graphing.html), or you can load this vignette with: ``` r vignette("cmstatr_Graphing") ``` # Philosophical Notes This package expects [`tidy data`](https://doi.org/10.18637/jss.v059.i10). That is, individual observations should be in rows and variables in columns. Where possible, this package uses general solutions. Look-up tables are avoided wherever possible. # Issues If you’ve found a bug, please open an issue in this repository and describe the bug. Please include a [reproducible example](https://reprex.tidyverse.org/) of the bug. If you’re able to fix the bug, you can do so by submitting a pull request. If your bug is related to a particular data set, sharing that data set will help to fix the bug. If you cannot share the data set, please strip any identifying information and optionally scale the data by an unspecified factor so that the bug can be reproduced and diagnosed. # Contributing Contributions to `cmstatr` are always welcomed. For small changes (fixing typos or improving the documentation), go ahead and submit a pull request. For more significant changes, such as new features, please discuss the proposed change in an issue first. ## Contribution Guidelines - Please create a git branch for each pull request (PR) - Before submitting a pull request, please make sure that `R CMD check` passes with no errors, warnings or notes - New and modified code should follow the style guide enforced by the [`lintr`](https://cran.r-project.org/package=lintr) package - Document all exported functions using [`roxygen2`](https://cran.r-project.org/package=roxygen2) - Write tests using [`testthat`](https://cran.r-project.org/package=testthat). If your contribution fixes a bug, then the test(s) that you add should fail before your bug-fix patch is applied and should pass after the code is patched. - For changes that affect the user, add a bullet at the top of `NEWS.md` below the current development version ## Development Testing is performed using `testthat`. Edition 3 of that package is used and parallel processing enabled. If you wish to use more than two CPUs, set the environment variable `TESTTHAT_CPUS` to the number of CPUs that you want to use. One way of doing this is to create the file `.Rprofile` with the following contents. This file is ignored both by `git` and also in `.Rbuildingore`. ``` r Sys.setenv(TESTTHAT_CPUS = 8) ``` cmstatr/data/0000755000176200001440000000000014065516021012640 5ustar liggesuserscmstatr/data/carbon.fabric.rda0000644000176200001440000000514013507422437016030 0ustar liggesusersBZh91AY&SYR TA{n\SM)z4fAC'6h?Td4hѵbib4a4=dj~Dڞ2I䞚 @ё@ai=M`OPdؚd4jIa==FOSM 2=OH2=OM!2zd4zG24JhOSzHyO$i ~=@z'f2zQPff嚽E4Rd^,\q$dG5 / B@2sͼ抿*LFVN,Y*DpL?Y\z$>x<;K&$jZ;|Y<IdH@ ejGKZÀLƞ2oByX2DT;#Ke|>ߞgnXN^ӆ}UkǶ\VğXqк$rN15 Y_$ py>3'zQJ[=͍/ Ȑ:- `H4̒.FmȨv).JCL{ ̓ >-v*,p)^ª!Ģ3NՑnh2}hc l{*.n3H*{b_ Zu=JxZtU5`7 ;y!إ@%,mY>:ힻΒY3M%lp@*.VPuLgOuD$M ?$Sj2},Ƥ~{Mu_w6N2.3z͸jz$g,tFJU &D?&xzgN魑uO`f[{ jb;GGOѬjcau*xWSta׺Oʘo? ɂz]> %^N\:$oȸ3D^KZ18?ng ~،HST"&M|3:ͲhRO4g$ܑN$/ATcmstatr/data/carbon.fabric.2.rda0000644000176200001440000000473214065516021016167 0ustar liggesusers[ pT^BJL!E { tdı"c#YK콛G MLC@BCR BK:T-A(}huP(swnr%ݙ?A ˕JĿ;s25urؽܬFp}uy 5|Z,egږ'$>jTnJRI2S;^\+,N&Gd*'_K֮Wy+: [N;F\{QeQ:_*⊏ ӕޤI /.@K'8]w.ޞxOGP.֮n˫n=ޭ˧?Fy>?~i+s8m'km'?4њo9'yvο񽒏5}dQ[}G>NzkO1>ּ/~~߬Q?i]S曺vvxF?syަn/#'>Gxwۼm2?pĭv6ٶGdUt_ӡ[;/`l,{ƾ34O[i+CbzpgN8*~j݉]>v=,zZ9kg'+vZ(93:֣[F+^zhk Ϻ5ߋ>+v|>Gy 2>{FpY_dG| d/.(Bf!]d24k #{%@,q+yṙeQxAdX@F)\)K 6d(Wgd PW2Il#clZd,-kdʏ{*Q3x[4dn>8 Px &{'9R%>d~Nl wcQxaPYU19d0.Re܏œzDY_:^0$si)TȽF2('s|03\:[$ƑSH&&̑>"szMF2IpY|Ȃ]g? ʛ*mט\<).'+4eS'"`TV#z U|Xz̏=Gy8ݵ&uau?ݗARIXMk}ɦ_K>\ Tt v Ca_ưQA v}8puMB>eW ~eQ#_N! ӀGOɻ0B7Q?RyYިۢ iTT,A~ex̖?geqb>=dCR]bkQl 0ٟ؋0bg)͂>Ig?]wఎO"d{7fT7+]%8T<]g6>V+ Sn}_c?jQ/>ċ$z ~Pg./+=ʟjEgc^Y݈eXo xvg>P~ͬ /Im$s (I_w:IG^kq?#ݫ'jk\{g/ _3=g0 $M6V8m3mor7)>| r\uנU?=j.jo|Ȝx!tL4~V\-dx ݔgl_rvl|kvr=N D ѹtER>{&5<ߚMz9Z&ˉGq2;/K(JNXHg9h%q=80K!oȶ[8ݐPZ|!QL|F"E$ҪtI^PZ;_3*\eʆF :k#ՍյDl,jԙkkʆ5%RWS]i]X_vu6mFm̚r*ff}eOWڵ,1,-Ɍ ̪ضβ *O;wn,Ί翃ڻQ4cmstatr/man/0000755000176200001440000000000014477217401012511 5ustar liggesuserscmstatr/man/glance.basis.Rd0000644000176200001440000000534714021472437015337 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/basis.R \name{glance.basis} \alias{glance.basis} \title{Glance at a basis object} \usage{ \method{glance}{basis}(x, include_diagnostics = FALSE, ...) } \arguments{ \item{x}{a basis object} \item{include_diagnostics}{a logical value indicating whether to include columns for diagnostic tests. Default FALSE.} \item{...}{Additional arguments. Not used. Included only to match generic signature.} } \value{ A \code{\link[tibble:tibble]{tibble::tibble()}} with the following columns: \itemize{ \item \code{p} the the content of the tolerance bound. Normally 0.90 or 0.99 \item \code{conf} the confidence level. Normally 0.95 \item \code{distribution} a string representing the distribution assumed when calculating the basis value \item \code{modcv} a logical value indicating whether the modified CV approach was used. Only applicable to pooling methods. \item \code{n} the sample size \item \code{r} the number of groups used in the calculation. This will be \code{NA} for single-point basis values \item \code{basis} the basis value } } \description{ Glance accepts an object of type basis and returns a \code{\link[tibble:tibble]{tibble::tibble()}} with one row of summaries for each basis value. Glance does not do any calculations: it just gathers the results in a tibble. } \details{ For the pooled basis methods (\code{basis_pooled_cv} and \code{basis_pooled_sd}), the \code{\link[tibble:tibble]{tibble::tibble()}} returned by \code{glance} will have one row for each group included in the pooling. For all other basis methods, the resulting \code{tibble} will have a single row. If \code{include_diagnostics=TRUE}, there will be additional columns corresponding with the diagnostic tests performed. These column(s) will be of type character and will contain a "P" if the diagnostic test passed, a "F" if the diagnostic test failed, an "O" if the diagnostic test was overridden or \code{NA} if the test was not run (typically because an optional argument was not passed to the function that computed the basis value). } \examples{ set.seed(10) x <- rnorm(20, 100, 5) b <- basis_normal(x = x) glance(b) ## # A tibble: 1 x 7 ## p conf distribution modcv n r basis ## ## 1 0.9 0.95 Normal FALSE 20 NA 92.0 glance(b, include_diagnostics = TRUE) ## # A tibble: 1 x 11 ## p conf distribution modcv n r basis outliers_within… ## ## 1 0.9 0.95 Normal FALSE 20 NA 92.0 NA ## # … with 3 more variables: between_batch_variability , ## # outliers , anderson_darling_normal } \seealso{ \code{\link[=basis]{basis()}} } cmstatr/man/maximum_normed_residual.Rd0000644000176200001440000000604114015477557017722 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mnr.R \name{maximum_normed_residual} \alias{maximum_normed_residual} \title{Detect outliers using the maximum normed residual method} \usage{ maximum_normed_residual(data = NULL, x, alpha = 0.05) } \arguments{ \item{data}{a data.frame} \item{x}{the variable in the data.frame for which to find the MNR or a vector if \code{data=NULL}} \item{alpha}{the significance level for the test. Defaults to 0.05} } \value{ an object of class \code{mnr} This object has the following fields: \itemize{ \item \code{call} the expression used to call this function \item \code{data} the original data used to compute the MNR \item \code{alpha} the value of alpha given by the user \item \code{mnr} the computed MNR test statistic \item \code{crit} the critical value given the sample size and the significance level \item \code{outliers} a data.frame containing the \code{index} and \code{value} of each of the identified outliers \item \code{n_outliers} the number of outliers found } } \description{ This function detects outliers using the maximum normed residual method described in CMH-17-1G. This method identifies a value as an outlier if the absolute difference between the value and the sample mean divided by the sample standard deviation exceeds a critical value. } \details{ \code{data} is an optional argument. If \code{data} is given, it should be a \code{data.frame} (or similar object). When \code{data} is specified, the value of \code{x} is expected to be a variable within \code{data}. If \code{data} is not specified, \code{x} must be a vector. The maximum normed residual test is a test for outliers. The test statistic is given in CMH-17-1G. Outliers are identified in the returned object. The maximum normed residual test statistic is defined as: \deqn{MNR = max \frac{\left| x_i - \bar{x} \right|}{s} }{ MNR = max | x_i- x_bar | / s } When the value of the MNR test statistic exceeds the critical value defined in Section 8.3.3.1 of CMH-17-1G, the corresponding value is identified as an outlier. It is then removed from the sample, and the test statistic is computed again and compared with the critical value corresponding with the new sample. This process is repeated until no values are identified as outliers. } \examples{ library(dplyr) carbon.fabric.2 \%>\% filter(test=="FC" & condition=="ETW2" & batch=="A") \%>\% maximum_normed_residual(strength) ## Call: ## maximum_normed_residual(data = ., x = strength) ## ## MNR = 1.958797 ( critical value = 1.887145 ) ## ## Outliers ( alpha = 0.05 ): ## Index Value ## 6 44.26 carbon.fabric.2 \%>\% filter(test=="FC" & condition=="ETW2" & batch=="B") \%>\% maximum_normed_residual(strength) ## Call: ## maximum_normed_residual(data = ., x = strength) ## ## MNR = 1.469517 ( critical value = 1.887145 ) ## ## No outliers detected ( alpha = 0.05 ) } \references{ “Composite Materials Handbook, Volume 1. Polymer Matrix Composites Guideline for Characterization of Structural Materials,” SAE International, CMH-17-1G, Mar. 2012. } cmstatr/man/k_equiv.Rd0000644000176200001440000000651114015477557014456 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/equiv.R \name{k_equiv} \alias{k_equiv} \title{k-factors for determining acceptance based on sample mean and an extremum} \usage{ k_equiv(alpha, n) } \arguments{ \item{alpha}{the acceptable probability of a type I error} \item{n}{the number of observations in the sample to test} } \value{ a vector with elements c(k1, k2). k1 is for testing the sample extremum. k2 is for testing the sample mean } \description{ k-factors for determining acceptance based on sample mean and an extremum } \details{ The k-factors returned by this function are used for determining whether to accept a new dataset. This function is used as part of the procedure for determining acceptance limits for a sample mean and sample minimum. These acceptance limits are often used to set acceptance limits for material strength for each lot of material, or each new manufacturing site. When a sample meets the criteria that its mean and its minimum are both greater than these limits, then one may accept the lot of material or the new manufacturing site. This procedure is used to ensure that the strength of material processed at a second site, or made with a new batch of material are not degraded relative to the data originally used to determine basis values for the material. For more information about the use of this procedure, see CMH-17-1G or PS-ACE 100-2002-006. According to Vangel (2002), the use of mean and extremum for this purpose is more powerful than the use of mean and standard deviation. The results of this function match those published by Vangel within 0.05\\% for \eqn{n > 2} and \eqn{\alpha \le 0.25}. Those results published by Vangel are identical to those published in CMH-17-1G. This function uses numerical integration and numerical optimization to find values of the factors \eqn{k_1} and \eqn{k_2} based on Vangel's saddle point approximation. The value \eqn{n} refers to the number of observations in the sample being compared with the original population (the qualification sample is usually assumed to be equal to the population statistics). The value of \eqn{alpha} is the acceptable probability of a type I error. Normally, this is set to 0.05 for material or process equivalency and 0.01 when setting lot acceptance limits. Though, in principle, this parameter can be set to any number between 0 and 1. This function, however, has only been validated in the range of \eqn{1e-5 \le alpha \le 0.5}. } \examples{ qual_mean <- 100 qual_sd <- 3.5 k <- k_equiv(0.01, 5) print("Minimum Individual Acceptance Limit:") print(qual_mean - qual_sd * k[1]) print("Minimum Average Acceptance Limit:") print(qual_mean - qual_sd * k[2]) ## [1] "Minimum Individual Acceptance Limit:" ## [1] 89.24981 ## [1] "Minimum Average Acceptance Limit:" ## [1] 96.00123 } \references{ M. G. Vangel. Lot Acceptance and Compliance Testing Using the Sample Mean and an Extremum, Technometrics, vol. 44, no. 3. pp. 242–249. 2002. “Composite Materials Handbook, Volume 1. Polymer Matrix Composites Guideline for Characterization of Structural Materials,” SAE International, CMH-17-1G, Mar. 2012. Federal Aviation Administration, “Material Qualification and Equivalency for Polymer Matrix Composite Material Systems,” PS-ACE 100-2002-006, Sep. 2003. } \seealso{ \code{\link[=equiv_mean_extremum]{equiv_mean_extremum()}} } cmstatr/man/ad_ksample.Rd0000644000176200001440000000624314015477557015115 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/adk.R \name{ad_ksample} \alias{ad_ksample} \title{Anderson--Darling K-Sample Test} \usage{ ad_ksample(data = NULL, x, groups, alpha = 0.025) } \arguments{ \item{data}{a data.frame} \item{x}{the variable in the data.frame on which to perform the Anderson--Darling k-Sample test (usually strength)} \item{groups}{a variable in the data.frame that defines the groups} \item{alpha}{the significance level (default 0.025)} } \value{ Returns an object of class \code{adk}. This object has the following fields: \itemize{ \item \code{call} the expression used to call this function \item \code{data} the original data used to compute the ADK \item \code{groups} a vector of the groups used in the computation \item \code{alpha} the value of alpha specified \item \code{n} the total number of observations \item \code{k} the number of groups \item \code{sigma} the computed standard deviation of the test statistic \item \code{ad} the value of the Anderson--Darling k-Sample test statistic \item \code{p} the computed p-value \item \code{reject_same_dist} a boolean value indicating whether the null hypothesis that all samples come from the same distribution is rejected \item \code{raw} the original results returned from \link[kSamples:ad.test]{ad.test} } } \description{ This function performs an Anderson--Darling k-sample test. This is used to determine if several samples (groups) share a common (unspecified) distribution. } \details{ This function is a wrapper for the \link[kSamples:ad.test]{ad.test} function from the package \code{kSamples}. The method "exact" is specified in the call to \code{ad.test}. Refer to that package's documentation for details. There is a minor difference in the formulation of the Anderson--Darling k-Sample test in CMH-17-1G, compared with that in the Scholz and Stephens (1987). This difference affects the test statistic and the critical value in the same proportion, and therefore the conclusion of the test is unaffected. When comparing the test statistic generated by this function to that generated by software that uses the CMH-17-1G formulation (such as ASAP, CMH17-STATS, etc.), the test statistic reported by this function will be greater by a factor of \eqn{(k - 1)}, with a corresponding change in the critical value. For more information about the difference between this function and the formulation in CMH-17-1G, see the vignette on the subject, which can be accessed by running \code{vignette("adktest")} } \examples{ library(dplyr) carbon.fabric \%>\% filter(test == "WT") \%>\% filter(condition == "RTD") \%>\% ad_ksample(strength, batch) ## ## Call: ## ad_ksample(data = ., x = strength, groups = batch) ## ## N = 18 k = 3 ## ADK = 0.912 p-value = 0.95989 ## Conclusion: Samples come from the same distribution ( alpha = 0.025 ) } \references{ F. W. Scholz and M. Stephens, “K-Sample Anderson--Darling Tests,” Journal of the American Statistical Association, vol. 82, no. 399. pp. 918–924, Sep-1987. “Composite Materials Handbook, Volume 1. Polymer Matrix Composites Guideline for Characterization of Structural Materials,” SAE International, CMH-17-1G, Mar. 2012. } cmstatr/man/carbon.fabric.Rd0000644000176200001440000000124514065516021015464 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/data.R \docType{data} \name{carbon.fabric} \alias{carbon.fabric} \alias{carbon.fabric.2} \title{Sample data for a generic carbon fabric} \format{ An object of class \code{data.frame} with 216 rows and 5 columns. An object of class \code{data.frame} with 177 rows and 9 columns. } \usage{ carbon.fabric carbon.fabric.2 } \description{ Datasets containing sample data that is typical of a generic carbon fabric prepreg. This data is used in several examples within the \code{cmstatr} package. This data is fictional and should only be used for learning how to use this package. } \keyword{datasets} cmstatr/man/basis.Rd0000644000176200001440000004406714065516021014105 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/basis.R \name{basis} \alias{basis} \alias{basis_normal} \alias{basis_lognormal} \alias{basis_weibull} \alias{basis_pooled_cv} \alias{basis_pooled_sd} \alias{basis_hk_ext} \alias{basis_nonpara_large_sample} \alias{basis_anova} \title{Calculate basis values} \usage{ basis_normal( data = NULL, x, batch = NULL, p = 0.9, conf = 0.95, override = c() ) basis_lognormal( data = NULL, x, batch = NULL, p = 0.9, conf = 0.95, override = c() ) basis_weibull( data = NULL, x, batch = NULL, p = 0.9, conf = 0.95, override = c() ) basis_pooled_cv( data = NULL, x, groups, batch = NULL, p = 0.9, conf = 0.95, modcv = FALSE, override = c() ) basis_pooled_sd( data = NULL, x, groups, batch = NULL, p = 0.9, conf = 0.95, modcv = FALSE, override = c() ) basis_hk_ext( data = NULL, x, batch = NULL, p = 0.9, conf = 0.95, method = c("optimum-order", "woodward-frawley"), override = c() ) basis_nonpara_large_sample( data = NULL, x, batch = NULL, p = 0.9, conf = 0.95, override = c() ) basis_anova(data = NULL, x, groups, p = 0.9, conf = 0.95, override = c()) } \arguments{ \item{data}{a data.frame} \item{x}{the variable in the data.frame for which to find the basis value} \item{batch}{the variable in the data.frame that contains the batches.} \item{p}{the content of the tolerance bound. Should be 0.90 for B-Basis and 0.99 for A-Basis} \item{conf}{confidence level Should be 0.95 for both A- and B-Basis} \item{override}{a list of names of diagnostic tests to override, if desired. Specifying "all" will override all diagnostic tests applicable to the current method.} \item{groups}{the variable in the data.frame representing the groups} \item{modcv}{a logical value indicating whether the modified CV approach should be used. Only applicable to pooling methods.} \item{method}{the method for Hanson--Koopmans nonparametric basis values. should be "optimum-order" for B-Basis and "woodward-frawley" for A-Basis.} } \value{ an object of class \code{basis} This object has the following fields: \itemize{ \item \code{call} the expression used to call this function \item \code{distribution} the distribution used (normal, etc.) \item \code{p} the value of \eqn{p} supplied \item \code{conf} the value of \eqn{conf} supplied \item \code{modcv} a logical value indicating whether the modified CV approach was used. Only applicable to pooling methods. \item \code{data} a copy of the data used in the calculation \item \code{groups} a copy of the groups variable. Only used for pooling and ANOVA methods. \item \code{batch} a copy of the batch data used for diagnostic tests \item \code{modcv_transformed_data} the data after the modified CV transformation \item \code{override} a vector of the names of diagnostic tests that were overridden. \code{NULL} if none were overridden \item \code{diagnostic_results} a named character vector containing the results of all the diagnostic tests. See the Details section for additional information \item \code{diagnostic_failures} a vector containing any diagnostic tests that produced failures \item \code{n} the number of observations \item \code{r} the number of groups, if a pooling method was used. Otherwise it is NULL. \item \code{basis} the basis value computed. This is a number except when pooling methods are used, in which case it is a data.frame. } } \description{ Calculate the basis value for a given data set. There are various functions to calculate the basis values for different distributions. The basis value is the lower one-sided tolerance bound of a certain proportion of the population. For more information on tolerance bounds, see Meeker, et. al. (2017). For B-Basis, set the content of tolerance bound to \eqn{p=0.90} and the confidence level to \eqn{conf=0.95}; for A-Basis, set \eqn{p=0.99} and \eqn{conf=0.95}. While other tolerance bound contents and confidence levels may be computed, they are infrequently needed in practice. These functions also perform some automated diagnostic tests of the data prior to calculating the basis values. These diagnostic tests can be overridden if needed. } \details{ \code{data} is an optional argument. If \code{data} is given, it should be a \code{data.frame} (or similar object). When \code{data} is specified, the value of \code{x} is expected to be a variable within \code{data}. If \code{data} is not specified, \code{x} must be a vector. When \code{modcv=TRUE} is set, which is only applicable to the pooling methods, the data is first modified according to the modified coefficient of variation (CV) rules. This modified data is then used when both calculating the basis values and also when performing the diagnostic tests. The modified CV approach is a way of adding extra variance to datasets with unexpectedly low variance. \code{basis_normal} calculate the basis value by subtracting \eqn{k} times the standard deviation from the mean. \eqn{k} is given by the function \code{\link[=k_factor_normal]{k_factor_normal()}}. The equations in Krishnamoorthy and Mathew (2008) are used. \code{basis_normal} also performs a diagnostic test for outliers (using \code{\link[=maximum_normed_residual]{maximum_normed_residual()}}) and a diagnostic test for normality (using \code{\link[=anderson_darling_normal]{anderson_darling_normal()}}). If the argument \code{batch} is given, this function also performs a diagnostic test for outliers within each batch (using \code{\link[=maximum_normed_residual]{maximum_normed_residual()}}) and a diagnostic test for between batch variability (using \code{\link[=ad_ksample]{ad_ksample()}}). The argument \code{batch} is only used for these diagnostic tests. \code{basis_lognormal} calculates the basis value in the same way that \code{basis_normal} does, except that the natural logarithm of the data is taken. \code{basis_lognormal} function also performs a diagnostic test for outliers (using \code{\link[=maximum_normed_residual]{maximum_normed_residual()}}) and a diagnostic test for normality (using \code{\link[=anderson_darling_lognormal]{anderson_darling_lognormal()}}). If the argument \code{batch} is given, this function also performs a diagnostic test for outliers within each batch (using \code{\link[=maximum_normed_residual]{maximum_normed_residual()}}) and a diagnostic test for between batch variability (using \code{\link[=ad_ksample]{ad_ksample()}}). The argument \code{batch} is only used for these diagnostic tests. \code{basis_weibull} calculates the basis value for data distributed according to a Weibull distribution. The confidence level for the content requested is calculated using the conditional method, as described in Lawless (1982) Section 4.1.2b. This has good agreement with tables published in CMH-17-1G. Results differ between this function and STAT17 by approximately 0.5\\%. \code{basis_weibull} function also performs a diagnostic test for outliers (using \code{\link[=maximum_normed_residual]{maximum_normed_residual()}}) and a diagnostic test for normality (using \code{\link[=anderson_darling_weibull]{anderson_darling_weibull()}}). If the argument \code{batch} is given, this function also performs a diagnostic test for outliers within each batch (using \code{\link[=maximum_normed_residual]{maximum_normed_residual()}}) and a diagnostic test for between batch variability (using \code{\link[=ad_ksample]{ad_ksample()}}). The argument \code{batch} is only used for these diagnostic tests. \code{basis_hk_ext} calculates the basis value using the Extended Hanson--Koopmans method, as described in CMH-17-1G and Vangel (1994). For nonparametric distributions, this function should be used for samples up to n=28 for B-Basis and up to \eqn{n=299} for A-Basis. This method uses a pair of order statistics to determine the basis value. CMH-17-1G suggests that for A-Basis, the first and last order statistic is used: this is called the "woodward-frawley" method in this package, after the paper in which this approach is described (as referenced by Vangel (1994)). For B-Basis, another approach is used whereby the first and \code{j-th} order statistic are used to calculate the basis value. In this approach, the \code{j-th} order statistic is selected to minimize the difference between the tolerance limit (assuming that the order statistics are equal to the expected values from a standard normal distribution) and the population quantile for a standard normal distribution. This approach is described in Vangel (1994). This second method (for use when calculating B-Basis values) is called "optimum-order" in this package. The results of \code{basis_hk_ext} have been verified against example results from the program STAT-17. Agreement is typically well within 0.2\%. Note that the implementation of \code{hk_ext_z_j_opt} changed after \code{cmstatr} version 0.8.0. This function is used internally by \code{basis_hk_ext} when \code{method = "optimum-order"}. This implementation change may mean that basis values computed using this method may change slightly after version 0.8.0. However, both implementations seem to be equally valid. See the included vignette for a discussion of the differences between the implementation before and after version 0.8.0, as well as the factors given in CMH-17-1G. To access this vignette, run: \code{vignette("hk_ext", package = "cmstatr")} \code{basis_hk_ext} also performs a diagnostic test for outliers (using \code{\link[=maximum_normed_residual]{maximum_normed_residual()}}) and performs a pair of tests that the sample size and method selected follow the guidance described above. If the argument \code{batch} is given, this function also performs a diagnostic test for outliers within each batch (using \code{\link[=maximum_normed_residual]{maximum_normed_residual()}}) and a diagnostic test for between batch variability (using \code{\link[=ad_ksample]{ad_ksample()}}). The argument \code{batch} is only used for these diagnostic tests. \code{basis_nonpara_large_sample} calculates the basis value using the large sample method described in CMH-17-1G. This method uses a sum of binomials to determine the rank of the ordered statistic corresponding with the desired tolerance limit (basis value). Results of this function have been verified against results of the STAT-17 program. \code{basis_nonpara_large_sample} also performs a diagnostic test for outliers (using \code{\link[=maximum_normed_residual]{maximum_normed_residual()}}) and performs a test that the sample size is sufficiently large. If the argument \code{batch} is given, this function also performs a diagnostic test for outliers within each batch (using \code{\link[=maximum_normed_residual]{maximum_normed_residual()}}) and a diagnostic test for between batch variability (using \code{\link[=ad_ksample]{ad_ksample()}}). The argument \code{batch} is only used for these diagnostic tests. \code{basis_anova} calculates basis values using the ANOVA method. \code{x} specifies the data (normally strength) and \code{groups} indicates the group corresponding to each observation. This method is described in CMH-17-1G, but when the ratio of between-batch mean square to the within-batch mean square is less than or equal to one, the tolerance factor is calculated based on pooling the data from all groups. This approach is recommended by Vangel (1992) and by Krishnamoorthy and Mathew (2008), and is also implemented by the software CMH17-STATS and STAT-17. This function automatically performs a diagnostic test for outliers within each group (using \code{\link[=maximum_normed_residual]{maximum_normed_residual()}}) and a test for between group variability (using \code{\link[=ad_ksample]{ad_ksample()}}) as well as checking that the data contains at least 5 groups. This function has been verified against the results of the STAT-17 program. \code{basis_pooled_sd} calculates basis values by pooling the data from several groups together. \code{x} specifies the data (normally strength) and \code{group} indicates the group corresponding to each observation. This method is described in CMH-17-1G and matches the pooling method implemented in ASAP 2008. \code{basis_pooled_cv} calculates basis values by pooling the data from several groups together. \code{x} specifies the data (normally strength) and \code{group} indicates the group corresponding to each observation. This method is described in CMH-17-1G. \code{basis_pooled_sd} and \code{basis_pooled_cv} both automatically perform a number of diagnostic tests. Using \code{\link[=maximum_normed_residual]{maximum_normed_residual()}}, they check that there are no outliers within each group and batch (provided that \code{batch} is specified). They check the between batch variability using \code{\link[=ad_ksample]{ad_ksample()}}. They check that there are no outliers within each group (pooling all batches) using \code{\link[=maximum_normed_residual]{maximum_normed_residual()}}. They check for the normality of the pooled data using \code{\link[=anderson_darling_normal]{anderson_darling_normal()}}. \code{basis_pooled_sd} checks for equality of variance of all data using \code{\link[=levene_test]{levene_test()}} and \code{basis_pooled_cv} checks for equality of variances of all data after transforming it using \code{\link[=normalize_group_mean]{normalize_group_mean()}} using \code{\link[=levene_test]{levene_test()}}. The object returned by these functions includes the named vector \code{diagnostic_results}. This contains all of the diagnostic tests performed. The name of each element of the vector corresponds with the name of the diagnostic test. The contents of each element will be "P" if the diagnostic test passed, "F" if the diagnostic test failed, "O" if the diagnostic test was overridden and \code{NA} if the diagnostic test was skipped (typically because an optional argument was not supplied). The following list summarizes the diagnostic tests automatically performed by each function. \itemize{ \item \code{basis_normal} \itemize{ \item \code{outliers_within_batch} \item \code{between_batch_variability} \item \code{outliers} \item \code{anderson_darling_normal} } \item \code{basis_lognormal} \itemize{ \item \code{outliers_within_batch} \item \code{between_batch_variability} \item \code{outliers} \item \code{anderson_darling_lognormal} } \item \code{basis_weibull} \itemize{ \item \code{outliers_within_batch} \item \code{between_batch_variability} \item \code{outliers} \item \code{anderson_darling_weibull} } \item \code{basis_pooled_cv} \itemize{ \item \code{outliers_within_batch} \item \code{between_group_variability} \item \code{outliers_within_group} \item \code{pooled_data_normal} \item \code{normalized_variance_equal} } \item \code{basis_pooled_sd} \itemize{ \item \code{outliers_within_batch} \item \code{between_group_variability} \item \code{outliers_within_group} \item \code{pooled_data_normal} \item \code{pooled_variance_equal} } \item \code{basis_hk_ext} \itemize{ \item \code{outliers_within_batch} \item \code{between_batch_variability} \item \code{outliers} \item \code{sample_size} } \item \code{basis_nonpara_large_sample} \itemize{ \item \code{outliers_within_batch} \item \code{between_batch_variability} \item \code{outliers} \item \code{sample_size} } \item \code{basis_anova} \itemize{ \item \code{outliers_within_group} \item \code{equality_of_variance} \item \code{number_of_groups} } } } \examples{ library(dplyr) # A single-point basis value can be calculated as follows # in this example, three failed diagnostic tests are # overridden. carbon.fabric \%>\% filter(test == "FC") \%>\% filter(condition == "RTD") \%>\% basis_normal(strength, batch, override = c("outliers", "outliers_within_batch", "anderson_darling_normal")) ## Call: ## basis_normal(data = ., x = strength, batch = batch, ## override = c("outliers", "outliers_within_batch", ## "anderson_darling_normal")) ## ## Distribution: Normal ( n = 18 ) ## The following diagnostic tests were overridden: ## `outliers`, ## `outliers_within_batch`, ## `anderson_darling_normal` ## B-Basis: ( p = 0.9 , conf = 0.95 ) ## 76.94656 # A set of pooled basis values can also be calculated # using the pooled standard deviation method, as follows. # In this example, one failed diagnostic test is overridden. carbon.fabric \%>\% filter(test == "WT") \%>\% basis_pooled_sd(strength, condition, batch, override = c("outliers_within_batch")) ## Call: ## basis_pooled_sd(data = ., x = strength, groups = condition, ## batch = batch, override = c("outliers_within_batch")) ## ## Distribution: Normal - Pooled Standard Deviation ( n = 54, r = 3 ) ## The following diagnostic tests were overridden: ## `outliers_within_batch` ## B-Basis: ( p = 0.9 , conf = 0.95 ) ## CTD 127.6914 ## ETW 125.0698 ## RTD 132.1457 } \references{ J. F. Lawless, Statistical Models and Methods for Lifetime Data. New York: John Wiley & Sons, 1982. “Composite Materials Handbook, Volume 1. Polymer Matrix Composites Guideline for Characterization of Structural Materials,” SAE International, CMH-17-1G, Mar. 2012. M. Vangel, “One-Sided Nonparametric Tolerance Limits,” Communications in Statistics - Simulation and Computation, vol. 23, no. 4. pp. 1137–1154, 1994. K. Krishnamoorthy and T. Mathew, Statistical Tolerance Regions: Theory, Applications, and Computation. Hoboken: John Wiley & Sons, 2008. W. Meeker, G. Hahn, and L. Escobar, Statistical Intervals: A Guide for Practitioners and Researchers, Second Edition. Hoboken: John Wiley & Sons, 2017. M. Vangel, “New Methods for One-Sided Tolerance Limits for a One-Way Balanced Random-Effects ANOVA Model,” Technometrics, vol. 34, no. 2. Taylor & Francis, pp. 176–185, 1992. } \seealso{ \code{\link[=hk_ext_z_j_opt]{hk_ext_z_j_opt()}} \code{\link[=k_factor_normal]{k_factor_normal()}} \code{\link[=transform_mod_cv]{transform_mod_cv()}} \code{\link[=maximum_normed_residual]{maximum_normed_residual()}} \code{\link[=anderson_darling_normal]{anderson_darling_normal()}} \code{\link[=anderson_darling_lognormal]{anderson_darling_lognormal()}} \code{\link[=anderson_darling_weibull]{anderson_darling_weibull()}} \code{\link[=ad_ksample]{ad_ksample()}} \code{\link[=normalize_group_mean]{normalize_group_mean()}} } cmstatr/man/k_factor_normal.Rd0000644000176200001440000000604314015477557016153 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/basis.R \name{k_factor_normal} \alias{k_factor_normal} \title{Calculate k factor for basis values (\eqn{kB}, \eqn{kA}) with normal distribution} \usage{ k_factor_normal(n, p = 0.9, conf = 0.95) } \arguments{ \item{n}{the number of observations (i.e. coupons)} \item{p}{the desired content of the tolerance bound. Should be 0.90 for B-Basis and 0.99 for A-Basis} \item{conf}{confidence level. Should be 0.95 for both A- and B-Basis} } \value{ the calculated factor } \description{ The factors returned by this function are used when calculating basis values (one-sided confidence bounds) when the data are normally distributed. The basis value will be equal to \eqn{\bar{x} - k s}{x_bar - k s}, where \eqn{\bar{x}}{x_bar} is the sample mean, \eqn{s} is the sample standard deviation and \eqn{k} is the result of this function. This function is internally used by \code{\link[=basis_normal]{basis_normal()}} when computing basis values. } \details{ This function calculates the k factors used when determining A- and B-Basis values for normally distributed data. To get \eqn{kB}, set the content of the tolerance bound to \code{p = 0.90} and the confidence level to \code{conf = 0.95}. To get \eqn{kA}, set \code{p = 0.99} and \code{conf = 0.95}. While other tolerance bound contents and confidence levels may be computed, they are infrequently needed in practice. The k-factor is calculated using equation 2.2.3 of Krishnamoorthy and Mathew (2008). This function has been validated against the \eqn{kB} tables in CMH-17-1G for each value of \eqn{n} from \eqn{n = 2} to \eqn{n = 95}. It has been validated against the \eqn{kA} tables in CMH-17-1G for each value of \eqn{n} from \eqn{n = 2} to \eqn{n = 75}. Larger values of \eqn{n} also match the tables in CMH-17-1G, but R emits warnings that "full precision may not have been achieved." When validating the results of this function against the tables in CMH-17-1G, the maximum allowable difference between the two is 0.002. The tables in CMH-17-1G give values to three decimal places. For more information about tolerance bounds in general, see Meeker, et. al. (2017). } \examples{ kb <- k_factor_normal(n = 10, p = 0.9, conf = 0.95) print(kb) ## [1] 2.35464 # This can be used to caclulate the B-Basis if # the sample mean and sample standard deviation # is known, and data is assumed to be normally # distributed sample_mean <- 90 sample_sd <- 5.2 print("B-Basis:") print(sample_mean - sample_sd * kb) ## [1] B-Basis: ## [1] 77.75587 } \references{ K. Krishnamoorthy and T. Mathew, Statistical Tolerance Regions: Theory, Applications, and Computation. Hoboken: John Wiley & Sons, 2008. W. Meeker, G. Hahn, and L. Escobar, Statistical Intervals: A Guide for Practitioners and Researchers, Second Edition. Hoboken: John Wiley & Sons, 2017. “Composite Materials Handbook, Volume 1. Polymer Matrix Composites Guideline for Characterization of Structural Materials,” SAE International, CMH-17-1G, Mar. 2012. } \seealso{ \code{\link[=basis_normal]{basis_normal()}} } cmstatr/man/glance.levene.Rd0000644000176200001440000000312414015477557015516 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/levene.R \name{glance.levene} \alias{glance.levene} \title{Glance at a \code{levene} object} \usage{ \method{glance}{levene}(x, ...) } \arguments{ \item{x}{a \code{levene} object returned from \code{\link[=levene_test]{levene_test()}}} \item{...}{Additional arguments. Not used. Included only to match generic signature.} } \value{ A one-row \code{\link[tibble:tibble]{tibble::tibble()}} with the following columns: \itemize{ \item \code{alpha} the value of alpha specified \item \code{modcv} a logical value indicating whether the modified CV approach was used. \item \code{n} the total number of observations \item \code{k} the number of groups \item \code{f} the value of the F test statistic \item \code{p} the computed p-value \item \code{reject_equal_variance} a boolean value indicating whether the null hypothesis that all samples have the same variance is rejected } } \description{ Glance accepts an object of type \code{levene} and returns a \code{\link[tibble:tibble]{tibble::tibble()}} with one row of summaries. Glance does not do any calculations: it just gathers the results in a tibble. } \examples{ df <- data.frame( groups = c(rep("A", 5), rep("B", 6)), strength = c(rnorm(5, 100, 6), rnorm(6, 105, 7)) ) levene_result <- levene_test(df, strength, groups) glance(levene_result) ## # A tibble: 1 x 7 ## alpha modcv n k f p reject_equal_variance ## ## 1 0.05 FALSE 11 2 0.0191 0.893 FALSE } \seealso{ \code{\link[=levene_test]{levene_test()}} } cmstatr/man/stat_normal_surv_func.Rd0000644000176200001440000000253014015477557017425 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/plotting.R \name{stat_normal_surv_func} \alias{stat_normal_surv_func} \title{Normal Survival Function} \usage{ stat_normal_surv_func( mapping = NULL, data = NULL, geom = "smooth", position = "identity", show.legend = NA, inherit.aes = TRUE, n = 100, pad = FALSE, ... ) } \arguments{ \item{mapping}{Set of aesthetic mappings created by \code{aes()}.} \item{data}{The data to be displayed in this layer. This has the same usage as a \code{ggplot2} \code{stat} function.} \item{geom}{The geometric object to use to display the data.} \item{position}{Position argument} \item{show.legend}{Should this layer be included in the legends?} \item{inherit.aes}{If \code{FALSE}, overrides the default aesthetic, rather than combining with them.} \item{n}{If \code{NULL}, do not interpolated. Otherwise, the number of points to interpolate.} \item{pad}{If \code{TRUE}, pad the ESF with additional points \verb{(-Inf, 0)} and \verb{(0, Inf)}.} \item{...}{Other arguments to pass on to \code{layer}.} } \description{ The Normal survival function provides a visualization of a distribution. A normal curve is fit based on the mean and standard deviation of the data, and the survival function of this normal curve is plotted. The survival function is simply one minus the CDF. } cmstatr/man/hk_ext.Rd0000644000176200001440000000666014015477557014302 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/basis.R \name{hk_ext} \alias{hk_ext} \alias{hk_ext_z} \alias{hk_ext_z_j_opt} \title{Calculate values related to Extended Hanson--Koopmans tolerance bounds} \usage{ hk_ext_z(n, i, j, p, conf) hk_ext_z_j_opt(n, p, conf) } \arguments{ \item{n}{the sample size} \item{i}{the first order statistic (1 <= i < j)} \item{j}{the second order statistic (i < j <= n)} \item{p}{the content of the tolerance bound (normally 0.90 or 0.99)} \item{conf}{the confidence level (normally 0.95)} } \value{ For \code{hk_ext_z}, the return value is a numeric value representing the parameter z (denoted as k in CMH-17-1G). For \code{hk_ext_z_j_opt}, the return value is named list containing \code{z} and \code{k}. The former is the value of z, as defined by Vangel (1994), and the latter is the corresponding order statistic. } \description{ Calculates values related to Extended Hanson--Koopmans tolerance bounds as described by Vangel (1994). } \details{ Hanson (1964) presents a nonparametric method for determining tolerance bounds based on consecutive order statistics. Vangel (1994) extends this method using non-consecutive order statistics. The extended Hanson--Koopmans method calculates a tolerance bound (basis value) based on two order statistics and a weighting value \code{z}. The value of \code{z} is based on the sample size, which order statistics are selected, the desired content of the tolerance bond and the desired confidence level. The function \code{hk_ext_z} calculates the weighting variable \code{z} based on selected order statistics \code{i} and \code{j}. Based on this value \code{z}, the tolerance bound can be calculated as: \deqn{S = z X_{(i)} + (1 - z) X_{(j)}}{S = z X(i) + (1 - z) X(j)} Where \eqn{X_{(i)}}{X(i)} and \eqn{X_{(j)}}{X(j)} are the \code{i-th} and \code{j-th} ordered observation. The function \code{hk_ext_z_j_opt} determines the value of \code{j} and the corresponding value of \code{z}, assuming \code{i=1}. The value of \code{j} is selected such that the computed tolerance limit is nearest to the desired population quantile for a standard normal distribution when the order statistics are equal to the expected value of the order statistics for the standard normal distribution. } \examples{ # The factors from Table 1 of Vangel (1994) can be recreated # using the hk_ext_z function. For the sample size n=21, # the median is the 11th ordered observation. The factor # required for calculating the tolerance bound with a content # of 0.9 and a confidence level of 0.95 based on the median # and first ordered observation can be calculated as follows. hk_ext_z(n = 21, i = 1, j = 11, p = 0.9, conf = 0.95) ## [1] 1.204806 # The hk_ext_z_j_opt function can be used to refine this value # of z by finding an optimum value of j, rather than simply # using the median. Here, we find that the optimal observation # to use is the 10th, not the 11th (which is the median). hk_ext_z_j_opt(n = 21, p = 0.9, conf = 0.95) ## $z ## [1] 1.217717 ## ## $j ## [1] 10 } \references{ M. Vangel, “One-Sided Nonparametric Tolerance Limits,” Communications in Statistics - Simulation and Computation, vol. 23, no. 4. pp. 1137–1154, 1994. D. L. Hanson and L. H. Koopmans, “Tolerance Limits for the Class of Distributions with Increasing Hazard Rates,” The Annals of Mathematical Statistics, vol. 35, no. 4. pp. 1561–1570, 1964. } \seealso{ \code{\link[=basis_hk_ext]{basis_hk_ext()}} } cmstatr/man/normalize_group_mean.Rd0000644000176200001440000000275313676373651017235 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/norm.R \name{normalize_group_mean} \alias{normalize_group_mean} \title{Normalize values to group means} \usage{ normalize_group_mean(x, group) } \arguments{ \item{x}{the variable containing the data to normalized} \item{group}{the variable containing the groups} } \value{ Returns a vector of normalized values } \description{ This function computes the mean of each group, then divides each observation by its corresponding group mean. This is commonly done when pooling data across environments. } \details{ Computes the mean for each group, then divides each value by the mean for the corresponding group. } \examples{ library(dplyr) carbon.fabric.2 \%>\% filter(test == "WT") \%>\% select(condition, strength) \%>\% mutate(condition_norm = normalize_group_mean(strength, condition)) \%>\% head(10) ## condition strength condition_norm ## 1 CTD 142.817 1.0542187 ## 2 CTD 135.901 1.0031675 ## 3 CTD 132.511 0.9781438 ## 4 CTD 135.586 1.0008423 ## 5 CTD 125.145 0.9237709 ## 6 CTD 135.203 0.9980151 ## 7 CTD 128.547 0.9488832 ## 8 CTD 127.709 0.9426974 ## 9 CTD 127.074 0.9380101 ## 10 CTD 126.879 0.9365706 } \references{ “Composite Materials Handbook, Volume 1. Polymer Matrix Composites Guideline for Characterization of Structural Materials,” SAE International, CMH-17-1G, Mar. 2012. } cmstatr/man/augment.mnr.Rd0000644000176200001440000000427214015477557015250 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mnr.R \name{augment.mnr} \alias{augment.mnr} \title{Augment data with information from an \code{mnr} object} \usage{ \method{augment}{mnr}(x, data = x$data, ...) } \arguments{ \item{x}{an \code{mnr} object created by \code{\link[=maximum_normed_residual]{maximum_normed_residual()}}} \item{data}{a \code{data.frame} or \code{\link[tibble:tibble]{tibble::tibble()}} containing the original data that was passed to \code{maximum_normed_residual}} \item{...}{Additional arguments. Not used. Included only to match generic signature.} } \value{ When \code{data} is supplied, \code{augment} returns \code{data}, but with one column appended. When \code{data} is not supplied, \code{augment} returns a new \code{\link[tibble:tibble]{tibble::tibble()}} with the column \code{values} containing the original values used by \code{maximum_normed_residaul} plus one additional column. The additional column is: \itemize{ \item \code{.outler} a logical value indicating whether the observation is an outlier } } \description{ Augment accepts an \code{mnr} object (returned from the function \code{\link[=maximum_normed_residual]{maximum_normed_residual()}}) and a dataset and adds the column \code{.outlier} to the dataset. The column \code{.outlier} is a logical vector indicating whether each observation is an outlier. When passing data into \code{augment} using the \code{data} argument, the data must be exactly the data that was passed to \code{maximum_normed_residual}. } \examples{ data <- data.frame(strength = c(80, 98, 96, 97, 98, 120)) m <- maximum_normed_residual(data, strength) # augment can be called with the original data augment(m, data) ## strength .outlier ## 1 80 FALSE ## 2 98 FALSE ## 3 96 FALSE ## 4 97 FALSE ## 5 98 FALSE ## 6 120 FALSE # or augment can be called without the orignal data and it will be # reconstructed augment(m) ## # A tibble: 6 x 2 ## values .outlier ## ## 1 80 FALSE ## 2 98 FALSE ## 3 96 FALSE ## 4 97 FALSE ## 5 98 FALSE ## 6 120 FALSE } \seealso{ \code{\link[=maximum_normed_residual]{maximum_normed_residual()}} } cmstatr/man/nonpara_binomial_rank.Rd0000644000176200001440000000602614015477557017337 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/basis.R \name{nonpara_binomial_rank} \alias{nonpara_binomial_rank} \title{Rank for distribution-free tolerance bound} \usage{ nonpara_binomial_rank(n, p, conf) } \arguments{ \item{n}{the sample size} \item{p}{the desired content for the tolerance bound} \item{conf}{the confidence level for the desired tolerance bound} } \value{ The rank corresponding with the desired tolerance bound } \description{ Calculates the rank order for finding distribution-free tolerance bounds for large samples. This function should only be used for computing B-Basis for samples larger than 28 or A-Basis for samples larger than 298. This function is used by \code{\link[=basis_nonpara_large_sample]{basis_nonpara_large_sample()}}. } \details{ This function uses the sum of binomial terms to determine the rank of the ordered statistic that corresponds with the desired tolerance limit. This approach does not assume any particular distribution. This approach is described by Guenther (1969) and by CMH-17-1G. The results of this function have been verified against the tables in CMH-17-1G and agreement was found for all sample sizes published in CMH-17-1G for both A- and B-Basis, as well as the sample sizes \code{n+1} and \code{n-1}, where \code{n} is the sample size published in CMH-17-1G. The tables in CMH-17-1G purportedly list the smallest sample sizes for which a particular rank can be used. That is, for a sample size one less than the \code{n} published in the table, the next lowest rank would be used. In some cases, the results of this function disagree by a rank of one for sample sizes one less than the \code{n} published in the table. This indicates a disagreement in that sample size at which the rank should change. This is likely due to numerical differences in this function and the procedure used to generate the tables. However, the disagreement is limited to sample 6500 for A-Basis; no discrepancies have been identified for B-Basis. Since these sample sizes are uncommon for composite materials testing, and the difference between subsequent order statistics will be very small for samples this large, this difference will have no practical effect on computed tolerance bounds. } \examples{ nonpara_binomial_rank(n = 1693, p = 0.99, conf = 0.95) ## [1] 11 # The above example indicates that for a sample of 1693 observations, # the A-Basis is best approximated as the 11th ordered observation. # In the example below, the same ordered observation would also be used # for a sample of size 1702. nonpara_binomial_rank(n = 1702, p = 0.99, conf = 0.95) ## [1] 11 } \references{ W. Guenther, “Determination of Sample Size for Distribution-Free Tolerance Limits,” Jan. 1969. Available online: \url{https://www.duo.uio.no/handle/10852/48686} “Composite Materials Handbook, Volume 1. Polymer Matrix Composites Guideline for Characterization of Structural Materials,” SAE International, CMH-17-1G, Mar. 2012. } \seealso{ \code{\link[=basis_nonpara_large_sample]{basis_nonpara_large_sample()}} } cmstatr/man/glance.mnr.Rd0000644000176200001440000000231014015477557015030 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mnr.R \name{glance.mnr} \alias{glance.mnr} \title{Glance at a \code{mnr} (maximum normed residual) object} \usage{ \method{glance}{mnr}(x, ...) } \arguments{ \item{x}{An \code{mnr} object} \item{...}{Additional arguments. Not used. Included only to match generic signature.} } \value{ A one-row \code{\link[tibble:tibble]{tibble::tibble()}} with the following columns: \itemize{ \item \code{mnr} the computed MNR test statistic \item \code{alpha} the value of alpha used for the test \item \code{crit} the critical value given the sample size and the significance level \item \code{n_outliers} the number of outliers found } } \description{ Glance accepts an object of type \code{mnr} and returns a \code{\link[tibble:tibble]{tibble::tibble()}} with one row of summaries. Glance does not do any calculations: it just gathers the results in a tibble. } \examples{ x <- c(rnorm(20, 100, 5), 10) m <- maximum_normed_residual(x = x) glance(m) ## # A tibble: 1 x 4 ## mnr alpha crit n_outliers ## ## 1 4.23 0.05 2.73 1 } \seealso{ \code{\link[=maximum_normed_residual]{maximum_normed_residual()}} } cmstatr/man/equiv_mean_extremum.Rd0000644000176200001440000001707414015477557017100 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/equiv.R \name{equiv_mean_extremum} \alias{equiv_mean_extremum} \title{Test for decrease in mean or minimum individual} \usage{ equiv_mean_extremum( df_qual = NULL, data_qual = NULL, mean_qual = NULL, sd_qual = NULL, data_sample = NULL, n_sample = NULL, alpha, modcv = FALSE ) } \arguments{ \item{df_qual}{(optional) a data.frame containing the qualification data. Defaults to NULL.} \item{data_qual}{(optional) a vector of observations from the "qualification" data to which equivalency is being tested. Or the column of \code{df_qual} that contains this data. Defaults to NULL} \item{mean_qual}{(optional) the mean from the "qualification" data to which equivalency is being tested. Defaults to NULL} \item{sd_qual}{(optional) the standard deviation from the "qualification" data to which equivalency is being tested. Defaults to NULL} \item{data_sample}{(optional) a vector of observations from the sample for which equivalency is being tested. Defaults to NULL} \item{n_sample}{(optional) the number of observations in the sample for which equivalency will be tested. Defaults to NULL} \item{alpha}{the acceptable probability of a type I error} \item{modcv}{(optional) a boolean value indicating whether a modified CV should be used. Defaults to FALSE, in which case the standard deviation supplied (or calculated from \code{data_qual}) will be used directly.} } \value{ Returns an object of class \code{equiv_mean_extremum}. This object is a list with the following named elements: \itemize{ \item \code{call} the expression used to call this function \item \code{alpha} the value of alpha passed to this function \item \code{n_sample} the number of observations in the sample for which equivalency is being checked. This is either the value \code{n_sample} passed to this function or the length of the vector \code{data_sample}. \item \code{k1} the factor used to calculate the minimum individual threshold. The minimum individual threshold is calculated as \eqn{W_{min} = qual\,mean - k_1 \cdot qual\,sd}{ Wmin = qual_mean - k1 * qual_sd} \item \code{k2} the factor used to calculate the threshold for mean. The threshold for mean is calculated as \eqn{W_{mean} = qual\,mean - k_2 \cdot qual\,sd}{ Wmean = qual_mean - k2 * qual_sd} \item \code{modcv} logical value indicating whether the acceptance thresholds are calculated using the modified CV approach \item \code{cv} the coefficient of variation of the qualification data. This value is not modified, even if \code{modcv=TRUE} \item \code{cv_star} The modified coefficient of variation. If \code{modcv=FALSE}, this will be \code{NULL} \item \code{threshold_min_indiv} The calculated threshold value for minimum individual \item \code{threshold_mean} The calculated threshold value for mean \item \code{result_min_indiv} a character vector of either "PASS" or "FAIL" indicating whether the data from \code{data_sample} passes the test for minimum individual. If \code{data_sample} was not supplied, this value will be \code{NULL} \item \code{result_mean} a character vector of either "PASS" or "FAIL" indicating whether the data from \code{data_sample} passes the test for mean. If \code{data_sample} was not supplied, this value will be \code{NULL} \item \code{min_sample} The minimum value from the vector \code{data_sample}. if \code{data_sample} was not supplied, this will have a value of \code{NULL} \item \code{mean_sample} The mean value from the vector \code{data_sample}. If \code{data_sample} was not supplied, this will have a value of \code{NULL} } } \description{ This test is used when determining if a new process or manufacturing location produces material properties that are "equivalent" to an existing dataset, and hence the existing basis values are applicable to the new dataset. This test is also sometimes used for determining if a new batch of material is acceptable. This function determines thresholds based on both minimum individual and mean, and optionally evaluates a sample against those thresholds. The joint distribution between the sample mean and sample minimum is used to generate these thresholds. When there is no true difference between the existing ("qualification") and the new population from which the sample is obtained, there is a probability of \eqn{\alpha} of falsely concluding that there is a difference in mean or variance. It is assumed that both the original and new populations are normally distributed. According to Vangel (2002), this test provides improved power compared with a test of mean and standard deviation. } \details{ This function is used to determine acceptance limits for a sample mean and sample minimum. These acceptance limits are often used to set acceptance limits for material strength for each lot of material, or each new manufacturing site. When a sample meets the criteria that its mean and its minimum are both greater than these limits, then one may accept the lot of material or the new manufacturing site. This procedure is used to ensure that the strength of material processed at a second site, or made with a new batch of material are not degraded relative to the data originally used to determine basis values for the material. For more information about the use of this procedure, see CMH-17-1G or PS-ACE 100-2002-006. There are several optional arguments to this function. However, you can't omit all of the optional arguments. You must supply either \code{data_sample} or \code{n_sample}, but not both. You must also supply either \code{data_qual} (and \code{df_qual} if \code{data_qual} is a variable name and not a vector) or both \code{mean_qual} and \code{sd_qual}, but if you supply \code{data_qual} (and possibly \code{df_qual}) you should not supply either \code{mean_qual} or \code{sd_qual} (and visa-versa). This function will issue a warning or error if you violate any of these rules. If \code{modcv} is TRUE, the standard deviation used to calculate the thresholds will be replaced with a standard deviation calculated using the Modified Coefficient of Variation (CV) approach. The Modified CV approach is a way of adding extra variance to the qualification data in the case that the qualification data has less variance than expected, which sometimes occurs when qualification testing is performed in a short period of time. Using the Modified CV approach, the standard deviation is calculated by multiplying \code{CV_star * mean_qual} where \code{mean_qual} is either the value supplied or the value calculated by \code{mean(data_qual)} and \eqn{CV*} is the value computed by \code{\link[=calc_cv_star]{calc_cv_star()}}. } \examples{ equiv_mean_extremum(alpha = 0.01, n_sample = 6, mean_qual = 100, sd_qual = 5.5, modcv = TRUE) ## ## Call: ## equiv_mean_extremum(mean_qual = 100, sd_qual = 5.5, n_sample = 6, ## alpha = 0.01, modcv = TRUE) ## ## Modified CV used: CV* = 0.0675 ( CV = 0.055 ) ## ## For alpha = 0.01 and n = 6 ## ( k1 = 3.128346 and k2 = 1.044342 ) ## Min Individual Sample Mean ## Thresholds: 78.88367 92.95069 } \references{ M. G. Vangel. Lot Acceptance and Compliance Testing Using the Sample Mean and an Extremum, Technometrics, vol. 44, no. 3. pp. 242–249. 2002. “Composite Materials Handbook, Volume 1. Polymer Matrix Composites Guideline for Characterization of Structural Materials,” SAE International, CMH-17-1G, Mar. 2012. Federal Aviation Administration, “Material Qualification and Equivalency for Polymer Matrix Composite Material Systems,” PS-ACE 100-2002-006, Sep. 2003. } \seealso{ \code{\link[=k_equiv]{k_equiv()}} \code{\link[=calc_cv_star]{calc_cv_star()}} } cmstatr/man/figures/0000755000176200001440000000000014052741370014151 5ustar liggesuserscmstatr/man/figures/logo.png0000644000176200001440000006633513660767610015645 0ustar liggesusersPNG  IHDRX?sBIT|d pHYs[[htEXtSoftwarewww.inkscape.org< IDATxw|TUڀ;HH轓PPWņU?WWWZ@ׂ*H!M@dfL Fɽs$}y{@j%X y,>EV~i\b`!$$$H[sQ%$$HV4%$Z8J`G 5HHPpj7\EY>D b$'nrEp43|ȉ|0 =b /)ZY. p'ч_\v>! p'`'OLrr2K,h%1k#zl"BaWhs n\"ޞу5kְj*:uT/j07 "BQ{uy#4 7$DӲVBBBXx1IIIL8;. Pt'3ȍ#N#M}q)H# p1؇SqƑҥK kA\j`}_*D`zuy9݇>$O a.]Xbׯo߾Xޑ" XJ]C5X)Ui$?:xafh\#od*eC ̫;1-s R5z# pR;A;IMMe…Ս]MZF N;ez|i49Z%# p݈"?zc,_6m.{)5pq 4,Mh:W#q|ui%$0DS/a۶mYlwfP`#7\&VCw%4pP*$̞=&lb.]˄vF/Ui$xi1w9rE+!Q(1~6!,vJ+OVJ@jAt~FLYj;z\d(P/坋@Bj $IxbèQͫbFWQ)cj $5k0T*7oiii̟?z( j5FʩwRh<q2ٿ?K,!0koTMJِ\r—j ] p4)qgZڵ+Vbݺuu(]92Q k0`ZWG4j%((EW6+  j RX@\"VGb|&IlH,=uBOUi 9 cI!Cc/_NW6[TW 펊j mBШ=R5Okbf\֥J3wCt:dRyuP*z.@sWi2  ^!A!+,ܵ F ((0a0ssALhhSШ&Pl~gqL52xx40-=m bɓygf=޵\leQj>Z%,ZpFɣC_xϘ7o6A%~ՍÙN)J>G܇#7>&jRLL K,aΝ;Ǟ={ذa bZ@``[ P7aԨ]Ƀ u{8Mն||4߀HH㎪81^܁P}Yy:gP{^~[4z}BCȨ#9sCmBuuᣏV0gΟ2QD_\}FI oms@aRX x$T*< AURRΝ;Yr !??@t"0 h&Ojcп@&鳹rPTE\^]\E("^dzdz)**b˖|r{죰̬X,٨T::铹뮛СC/~;f4Z?Wv La~Dzk㎪a O|(Iڽ{w|M&M򸨨eڍ,_#)ǒ)(($ ";aa̚5{|*nu_РR)Wlv\#Up%xqǥ*<<裨Tclf ,{̬#J""ӦMky^f͚ Y_? 7OAW /4\Q\E pkw4g{/2o_VRQɞ={Xk6oF^^)GB€}x^V^ԩ0h"r̥L{x As8tpcǎeuJpa&^C;Tl"""zҾ}^z ƍ[~&z 6ȯD Lض|3|QU[*e$b}FrS)eͲ&VsBj c\24")x@ӱ`?ٳj|J6mx'HIΊK>| uGЫp֮ӽ 9  r Q74 STB֒ 0c Yp!/el 7܎ؚ]~ ıc<^/Qͺ]sR5VYI߫S#r౤Aغu++Vb\.zbX ƎBc5\{4 XV) QK&tˣ˫lbCCT{%#""Xt)wfM2F^ظ;}zFaA>o Vr % ]ԪE|XL֗Jfʕ 6o4':vlܹ)3;8r?+7Fjueee'rEU_1hf}@.1$NM%:O 'Oիӟtŕilr9ƍd g%eJ;VPp:#Ѫo|AF}=:L4TO͍1I>_c]K٬I:YdB7*eCv k9x(+L\W޿էǤ$fhp+Bׯ/qbdPZVdB75X>,<-ӣ˫D,BY٠ pG;GF1rdU\X|Zәumnv  iVuKPĴF[;xjСRӶp睏Z5u:B$ڿ(] ZEx: U*Ҩ*67Edd$˖-c̟0wLlfܸ),b6kA@7tڻ= @u~ ^]Qi'{`%h{"ؾ};gϾɮ]r7c6+ټ'bb9p٭|h"Ni=:i{w#j]%z"77>ij&$$0˿ÑIddoMJTT+rZ P(⤹\.' @1% `xӽ0N'U)ͷJ˦!˞  `ɒ%5A7p>VŭT*d2hJFF)}_ZBT*r \Rkd( :-ADDC(:wKtԉHMM|)$J˾ 0 N;xȮ|O"nV+!!!gʢ̂̌bfwRldpj Ng@*cƌr>))gV9~PTǚ5_3h >̊f- ',, -mkמL<{ﻹ^fqN<ɑ#i:x'NGiR?p:7aHp{`F2Q^Dyr6l2v%@7 $IuCyþ4NÜn׮]y}ʠw4oo!\sLp;Z'vҫpLŅ]ָWaa!7[NtXe.t5C&1"֗JYY&! [~GJJ SPAQ{1n~1/~$`Z$x+&Sltf{ju P`` >,Y [ؼftft9ApT*q8dZW!*FCYTJK6ܞGA1h z>@Ua׮]fmAn^YYg9uj? y7ڥ;>znФ+6oSOʩ? څ\ޖ%*P`Eg)6ZRcqÇgʕu`c/jVft:Ap9dZw X,9#jhWK5Veر+zŬ^+} i'O߿c=`޼9FӶm;xr,"h'FC֧J\adAg`6ڢd葄FQd2gO. L]֭[1c{`Qe Һ5DߝO6~FΝk%1 ̚u MJvÇMh<3=_HiiH{F [n~_=G "NRޫ:-n)h ð0z-gTl8vp dZ6G^^rб'vbS)2vYyXFP@zNvQtMڷ &&ѧT*Sԩ7p)-z_]Ϲ'xyefͺT^ %$2"Gl| vd. \kח.]rl,V Ndddp(;\Ovv.BdDDbv4I&D6n- \Ȥëر4D@Idd;#:=]Eѥһws MW)((_/<_?cʏ뼹xb~ERRq#KkF΋ǭSeo?bXY͆Z-7!#3 Lz 9hf-zӧ#F#Gfw^֭K`s0$&d,tCdHڷGt $..z?;xE?>pgO"6!2s6ZCۈj݊n:гWk W^6m½ӧr9ՄuA!8q(C &<\ &M=o뾥{m,\GRџ347M={͛ٽ;>}'QP 8E F% x߁D^X.3KJJK q8`jK '{V{"iرg~-AJ %vA5&h42.dpXryt5X$$.ۡͅ+O>cjۃն̣)89]! uq <$i[+F^@RvţNg.{(ѨֿD~9I& E&k4;u%$ܻ~=w ?$g_Mhzx,?:w+.O8UGZ Q~ne ƪcX7Zp7vWU‘G:?>_ G{LB ]W/ڹ3 V!$D bӐrBz5àcGh îpć6Aq4jɁ`v?vt zⳗ3pIsu"MD:up顤 - &eykTz `2 b }9 5`H/ `o尿|Ϗ̓oWJ̅ZWMguy '^zV~sq}jnoϫ{e>}/*#5Caʍb}p`Vx-eK^c9ܸp&k7zlh٥L:q=ԼJ]]y`A,_ ÿV9 z|%(}c<4_ vڰpXk>&6LC@x1^Z{{A}(#xq %&Lݘi ~ byw⛾:5" _Pm'U oAdfWҏڑ.*V+;+?"~ 8tPx՚rʂ\QȍFUwRwc2 A&h'*O>4cuNj/޲2(6ApkUCI窾M; pj{zoFu/ۋE"6(q-GO4 < KKuBŸƍ^}  6o_nN'p)de&*Ǝ: ?} +mf;q=Rw}*_$8%Ð0Qvin]_jT ~tn:a#CG[zrђ?@4}wf?Rm[xU7-Οw''QW%5’7EKo &޶-jWH;k6ZDqp)7U}oy~sgOOΞ6Gɰov.ZTTZƦBxS[!;yjn5_ <,*˃Y.ձ`溍 oe} O]YmDkk oeIe_W{nݪViuk_.q/ }~lPlTSx_9U# DaK9vM.BϞp+or2.` 68W/ͅ37̀~hq$]=Zg~޷kTС-Uezq`W wv5Ca%?/|T >/IrsPYnAhEqj[cs dK6D[UwQ?ڢ0Wsr-~rtɮac` ~[~C#lX 1>&\5 %4&'jI/~]ߜR)e7~ž]ԭ/Wi#OIYo]1o-S<ۯu{ 6^Y{ٟ%fQZp*eUe\h01_~?ݧZ:}1/y솈zxXܷ #RsQJ|o TVͿضƏ1׊&ꈑbJ;Z^݃s`WX8^U={>'jOR ol>QomkjF6t+LClfw09JWO ']'56>\5Kioo&/~'Uͩz6ӱr-?ρ5W Fhc⤪cc:76~.|4p)AǕ)j;fU=?E&WZ5C6 FrQDkEP;.wT쨯YV\߯2֦ŖԩNުosZ̍NqqsmAn]^Յ?,};w uXCCv}S7Xkfl2s [>ZU;Z,awT/ީUՊ:}蓼̪gm)͉7 _!. @.O?i7V!xشM\~o(bE%q}S9V|Ϡ`w%WGDTP!4T\\=*UGíeIFؼ9@|>x?RZ Kޅ=)eebV.+GZʽ47 Fq|CBĈnl8[9nޮ[6tnv}K*ֿf}aՂq:qI涭bV ;uᾹ}q+{B\vwT'%>n\<쯞/:O'˫jXӒ0n[K7U}OeVxyxhJϖWDic:H:QMs5hMmI$$. K/+-C:m"W%M(!qyy`!qu"}ѿD]hf>?+rHUBKw_7|Q ï>?M٦Ih\7ĀL=r:IWJf7wqaŠq5_5+ްySMA@hh(m۶0:|d_r8PeԀa!he2ΔYS$ p6Oܑ\Q.gBH0n ҿbS\rAΌpqw$3_Զ_LhZ< (v8Q*3ܶRnFXH(2m#įy;>8y-E\uu-]3\Rhw7$|o&5˥tWrgB%YWNZy=&^"N b5;D3xposf4<˥VX򃛚[b[ v'JŹ>F+wZ<Oek߽&Nu8k4vR?ܿ]RG bBG/}Dq+BJ[V~bBG!պo8<]V z% wZ ݒtKĔD'K,?L6-;k5+3G!*+kIԑ 7@']r?Ʀ\ejŢUNɑX!™:ChBb>2\Lby+6G)?xl;hH9̧\/L:r!-.)wԚ'9pf9lGQ0ˣ  ;[2tilVT&rThhpe4da>2y;Q+x4v2ЩoEyf}JT><SYz#5lS2Mf[CDKxDS1QF"؋S|0{Gl7v:).}8Jf`MqނFoe<>  a)MϥV5'ZUFDb*:*ed ǝCWR85I5% b?Y@@@$r-#Faܸk:t(GQmα,8]-OhJNDcʩ1*7b>bE,v|k}{[cPGpC}`a*}@1>x3nSZT* j@F5f:-c&0+jThR^&f[>c d@ELϬ4ދH\Kfa~;$c?,8(m+A]AZ u*j'M开Ngdټ ,< 0w}9X+\M0 Q)ch@r춞npsq7Ք@.:-K1s:y`ʀh(=Ij-C)P R7|`,y-?ڋ=w^{h4zuOv\( fΜ̙38s |9~Rl(?m8e~Vxt{ ח`Gp"=DmR3{MR|4!uV!=A4Ko#q*JI]IJc*}gQ*TLqbb3m;Zk> Їߟ~a$%%}v~[Mֈ#h;3k׎ f;wCk];_2 V0?=d4E*c5>i;FwTi /PˠDPj bYmjKH Hܿt=6[oe|u8<:us3l޼ca9))p|.\j6m2#U h15Ev)%*{> z<\';n8b @va+Kfq" LC%i .pgѤ>%OKD3YS!RĐ_yFb4ڏFppT"s낰ZX9$駳Z3˅]@!@1>xr]؊"td2%%%&^z_y瞻˯B 5"T*'ޤԚC3C;s™t_铕Rkh.*prV!CM,;S33no@A[^\pV[m B)c.f%]v^;#F,~7|Ixx8\6Z=Zm(#au6l}^^hq\9>*:Z ={2n0kLrr_ZҥK(+r+ l,#\y䲚\YؾZYt*5=6*1NR Z(G*3TgaYFEeUYxD3WJ=Z \r%}1aF 1NףhԩpQ")\ {H測X˕ :ԩ5j${F&^~9yZ8q)Sf 9m.Ox1_^D􋺭yG?TjF"u-q2H~>c)5h9:ؿ5=bgh95Wsj2m @a=\(Rˁ=^xJÇLf"{?Y|ݻ`/v\撑qQh+VProK|nZFIvjR䮻_?bpZ.l>J`T0B,[w_ػY(fF_ ||!0 ! ȐF> o?3:$u]K9}VP!{PB@zW,2wR4b .**ĄLdƌizL's9r  'Ɠyܹs^Tt)4i<Ç'8TO?@,,Rbv6Iw:s(1Bq1vׇe.XȀX^06x=#otf'8S 7Ÿ  :I`-kSS۹+;&rAws ? &SaP^]FRM[IsVCjCPǀ8zt<8|)Wj.xI^^P(\ЩcoҷQ)N3ѵrB`㋰;-DkAǭ$ IDATvl'# A=^R(:6@xSo]|]]QZөKy7mEaƳ,_캉I\>;!"kt餴p~+;BPhvBpU{&.ڷyNU(f!\b4W_}Lttt}k׮oV~(: XM23QXmuˬYS2z]> Nu|jRR՜7<3odȑ{ΜG0Ѩ6Fj5w7w76mf7HJJssGTg!u-w庘}M-xEoZ_>yT(y!?9/(BߖJD@.gWT}.#(jDH="N7ToBWu.cx&N&׵ᰲ{vz/ˡCXfǎ%Նwp!k  pp:Oc4a2fDnm 'N$44c2Xbccy(,,d۶mDEEѿyL :wKUzǎرcHKKcceȄLozf\I%:2zk Le{z<.oy,JwE֡{f™⿣A:%dYmRyvwx/vMc_^EGn(~ PK `:e9p{~;WԽԿ%VF4PWF P ^Hqq.O<'NUVq&,-FN;({P% @DE\}]u]~e׺+E) *MA;L2}q2) IH`epμe2{yy7ٱg\.L.t(il4mښG3Kb)dŊL0!VD||=/2#ZC8,QhPYx,Mz )6KLB+.BC( V! ,f~A%]TـصkFTT 4S$'7E$~m3=zW^GQ=ʴi8p 6{gYecI(r*&C4̓$%2wK/=m?uSbȐ!՚P{ٴ7dMzzWF=L4ݻ7~JF Nlls,|WZ֚-\W5+'f"Jݲ ewG߄ H րu踦1 c!2 t&ҧ@Y^uED73? Sc*, m߾ B׮$7QFqn}=qf~k)Rѽ6nNiI,&c? jxdf<yҮ]sh /̇j"&&gV8={䛕Yh&qYYfU8-&&{}}Z à_%G wXe`h) :;6CȊ ų0MF{W|MU+ҪA jXB ~ib'""2T0U0 &&&s;# #I7&>vz]YN|FRlt Xj'N$)wmuve$NUf,[9䮽Z>Sx'ʪ\BI"B/8N=5{yǠ(v8rcF=!|W't۝6;9x13(mlxӡ#O?~$pzmЧT;Kt:4Mcu./λx3f 0lմkJm=q oדg-!/85ÇVnM9U~j=Mll2K|ʠAy?rH ,3U+.<ؿ,3t:ؽ{&~zqV^~3"'O2i1F?Yi 6lXÈ#<'O&<<9scǎ߷l;4"ҽ<5zOtXC(ex|iW]]V>˰ί=0}+AU>-ǷH|t! pdJP U{@۷o'22z 2M@\rol>Ixx3f[˭6v-LK,Z(~SgBر#cƌ!..z+xT%"NMA 1 ./"8Bt a`w\9npRFj >+<)n7h6RF (w RT gǎ\Ntts'`0. I!61|7o^K2'N^U4]=Ka1{a S}ʹفxꩧ0L,^7bWL4`3dO ]f>psPLcP#kՆMW.Ehx蒒ȳ?*1c$/VE{ kN]oa.{7oމÑYRiA^nɺuHHhZxNӶmkx‡BD5ǒ%Kއi֬ ]dצr>\R \M| m=j& KZZȲٳ_c ΄TV~ 4r$cu#N]+; 2F\d]ђQG4Vu%Z)Q#'-/jVFl˳E'BK۷Pt5&K/ei6v SZjGJId]f[h" |IIMhӺW>d&P Ijj'~iΊݻw^?nvZhU]]vyd[t* 0`?'dt7075Evz!->]JLAH,^j6XC;~U匆3bLz9i&\:W_[<(("#eC dΝ;V$` .=,X˔'N V+NrT>99yf{MDTMf-@V[xxa:0:dpqR"kc㏓)ke~-,,|K̾}rzס/Y(r W?nn̔|]d>, Q-I~8/V~)aG߾ƛnG^4'`4uzhw .fcĿ/U*4`D24l鈍iOv;ȲJeqLDEn66l U1 eE0#PU MȲ MpHM;=1#0M:pe>1}qQBSLƍ9|0[l<YhIHV)W0vyGq^ni-+o\r{XYl>3#ˠϳc7!*( yWWrv4mnm!y5>{{ s(WES:ЅeBiBFUkȑ#??NN0F^` GQ$dIh Co40U$Id4jNޠG xvprݙ̓[F"3քt3_T]W5HfZV +m&C$˜GtN'h%(6 ]X4ZT䷢}K}W|$igUXN(N6oΝӹƫ喬g uXK 7fee1l a2I$%5g͚oԩb=$˅b`h_y$\s3o^Fd2P34idb۶m~bS0!!9'itvie~6S%+ 0-+EfZ.$Rsqo9i{3(~AZ8SXXθ-H{@r}ACJTT?VIr̞3~|#))I (j.QQM+L_ܹs'#Fb6c $sK4hDDDj:d} >>UUo*|}Nn>0$!TKQX@.!ߚAjhRU4eۯ$ㅢ7MX<<7C]F*q#_KBHǑL094_|&=VLtt"O=-[Pfh! 77UU+th)E^5˲h:hr':=B(NSEZb*q[#^RPiw9]Jϖd$ &ދ{|a?Of yqW^ <nL=oI*l/˙ť!R].O^LlL2|.߫|s!*fkOr{tqmIJt'F1/զn10>>qU72OW8M%96W.ntNӭwbPYEjꪊ5Ž'ine`xϽ%SԲ>Sj"߲rMIKKԩSafeLpDžaDvXAAz\v b 3Dsk% >MIj $uδ,Ϸ%3s0c ]pcE? ݆q q?Ds\@nQ%wyALY3 <tZ)6QƓ1cN" ))MزG  󉎎aÆeiR~=y} z\{J-ƛE$;TJNN( Dtn>}Kzju: ît$0PG:CxՏd!Hr #-0`_8_'QPp0SKƏCvѾ}`1e9]|~ҿ ܔ1+pp>ui:߯~v.>MСcl E} t JJ>/jl"9Et̟PԐ S>$B0c ֭[GTTDw` 0ht].ŴiѪUKtD"m[o=8)*R w6msimQ4;_*4^gNQ EZZ Q/dN@*f͓z0sS`4KE\9F#.7A;y ĂV~՚{m܂1g29c ؋X |}>iΗ_f/07))~(,ˌ;L:t('ïN9J}ޠ]ʵU$2)X&Ē9ʢud=ߛ-'.6U)6K#]qm'?FRH{lUA1 .Q$մj!%S\G5O(R|Yq_4UUy̚5Ç3hp"#}{FC4MB1znF]5t6lf^G8T ̇J۞ h"mAMw?>NlvViI %!%ݯbQ ,'>BQ=4Oh({@w:j΀E{Y[\'|J\r'b,! Gjxؽ{?S}X2{/dYfYKN;w?,Y9Njü~s;U m~ ՁtlŢB:] N'%nkhۉS\+X˖cWNO|P32k~ @=lhxhј@rr fΜߏ,l{߱EG/7xEae0鹒e yyF3k|o>ƍSYhtБ5kV'wF`^8bBl*$vuWP^mZ1:9eɲ"p$L$DuDkw!:+8fq$\WѯCUX:ANE+{,W110"k,X;ڶc=/?YJkygp8DEEѯ-[V% ԣATcѺ 컪'Zm,/`eNnUeÉ"V l{EV]4PdPoY$5$HBjC\#okoÌ35ƍ/r)sNa=f#:: [V$^_Cn qM$S,-m\^@ʋ۳q$o폗ՈrRs]EWJJzݤɧΝ;]<=Okn/%""-q/בUa;ԋ\YU@E8j6`/ۀ=4le-_O`ʔɗTa'xշX3<MsŽop p8bڴitŗ&fLMsPjc2իx 膮jˀ,GTU ?ǷUC̛7E>_̣yFۂ$zj&O~͛7":<3 4)S^={\. M/kjӫr4.F,9|JZ*oOό+={:: 4 6IDATdoێ^͖Cdd<9g}RvPlJm/asDL~9 <,+Išt"4x z Vl]{r]w曯PIp!;,۷`2&SjDӈ ITOѸq}!~y9Sj)x̅Ź`pV d9""$Y?o>?`!l6vZ;VZ-&St"bc?Cп B\|X#+~K޽ ԙ.uŀx@v.xgy0g· ZMjli`ͬ[ 7wXJ4aaQDEر 2n5?GDaV,MF:M@T UiG \Qǀ5xUO 0ӧ~O@x<8}عs?+;vndAUlh^o$> .֭;rͣ5:22j@c…<p41'֥Sj }փОzjNN {!H}hHR 7{txz6r8rclݺ={vw؆Ғ\Ulq &:-#F\8p #D]CE9T0`o @:2u݀AZzpM|2.]¤Iwӯ_=Ξ=8rNa/!::M3a[e˙cXVhg }\E޽/-.ڲ.QXfz%v-ߖ E-Fq̙??ӎZ9cpIDFiTuKOtӻNf;ׇJlpc>|6/D-~%YMD=6^ū2 o_A2ni'c(j%+Gh)Dk̯pP?p8l<ڴǻ~} Q4'v ˁd ΪAx}5Ea^giݪ?PmC4.@^NnQ%g4" X[߸ ؋W *)J!'3quwү \Ng5yE=XG,@s.W7{ 'Hr!{W +'jlbHKA]0{-+K 5Y Dx.'B\)1`,oV222qZ.KGhDp(P;OzDȀ}T{!Iv>t6M\ԩ(5Rg``29EA?!.` j  in'x)-[*QS)O !>불_{ !Hoo@XQeq 7ѻ`4qH/,Du!,/]#v~e#;vGjtɀU1-m4< 8KȀ+7GSxl|04iɬY\և%t%9Nϴ]#dUFj ̃>F6iUN4 pؽ,1`3@B—')!pUo,r~T:8A ,~ B|f L pNj `ժlَ $I S3wMX"8ηҠj n^xXw@\\ªb7h_5V YA|6uEI}%d5ùj ?vƍH}ٷ4!!knzrzR\rPѫQ&0D pͲ H.TΝ[޽&drI䛇 ; ?*!.< Pѐ$+ͥI;e&$$*ůiJl/SԾ,/6=v%X+ݘĥK 5Dk 4nĂ0`l1 w-pc=Ԅv9x .! \"+%de!>Ղ#m%x E%c[&2Z'4_\&  fXQs(9 4N#D2AOa4"0Z$y?Ӑ9ާT-&UĞ7E$du@LHJgpc>m(pNpd4"顕a:#&&6"؞^m,eE!jžr!fE} s:!IU͔؞^dP( 4Q%!JTNk\kU#iqjz}C(*Ꝇq s=F '9c h&c` WG "D5G,g `:]>6X ջ 2%b)jBA{Dh* ֍jCq`w-\i!*Ą-f"rCQGi_^õ#".e2~Fs^k QC?+9b IENDB`cmstatr/man/figures/logo-wbg-240x278.png0000644000176200001440000007101713660767610017347 0ustar liggesusersPNG  IHDRX?sBIT|d pHYs[[htEXtSoftwarewww.inkscape.org< IDATxw|TUoZf&3J$ދ  ,]\QAUqm*X@ 5 5I2%$$@2yw=s@#99 r.]-D= H |ka6yW1 ={Gpp%K$X~=ܹs_ã$8vcǎ.ԩ'Og$=YYYDEE9s)S0vX?^J7 X,|,XR駟^ǧ$h($ؼy3< 'ON?0;mȢExG$fAR$^y֮]R@=&`% E_۷o_-[oH^ARFNAA/d29m닯o{Νu3*sȼCj4i|ڵ%Wj8VZEHH .t2|SNѮ]⣶ Ԫ14G 6н{w^z%oQ$I!111 h˗5kFѣ.>DC )p#h4ZիW;t̟?ӧOY G<*E_"gNIZZK_$v3ׯ/k.W L>'O`juJ\,py4i/vvիWӥK,Y|zJ~M1bLŸm~?Xf mڴqVVbk[ xt6''^z^zq%Q7H dffEDDvrڶe˖Xd ]rykWwJE/mO:ĉ}z%13T{%rY mmFXXQQQ4ׂӭ[j >HRR ,hJyUFq) 8-g.vر#N%*GR˰aØ2e .\p6""={j*ZpnkXUA[hν,͛GDDwn o $v ߿?ӶZbʕ8pA2k݂Ri۸8ɓ9|x#)jhxIHH 22%l`]˅jT  nJ>}7o^&Mĉ,^VuBQ\a.teH@Iq)ƍWm־}￳~zڷo@:G!}A)wPrH \Lvvv_i[-[FLLL+*/j|<*eT i \>Y Rܹs9{,QQQUBq)p eT^iUqwTh cˬY};8vq'%cHKU9Sm5뽥^ I*Kd;viN:~zlBΧAJ_7VjK[&,XΝ;zjm}||Xx1L4$q%&Uj $pvZBCCYp!EEEU- 11_|[^:tsЕz5 mR@ny>t]taƌ\xiے Vy $aݢ](?yуj k -חu9sӶJeΜ'JBWRѕ@ߍ.W2d-_ ۝l6kU;rUP*hC7}a Կz̍KgW!:Sp8?&7U ⃸_DqK)7oKI&toCdddRTd!++B`5sF⣏.7^krHHofnHn NLLg~mXX :/_&&&mb8x0lL& ig1ŒޭD?:ÇSU>x/G}4fK ن(̖ǽcƌ!::ZXp!}V󨫿?*O?t3Ξ=ˁؼiG';+C&ӳ~-f̘1AYk=/BB^&"6qR'_oԉ9qS*j/N/}ni'˕uE<#̜9N8~W6v޺ n X/j ⋍rzTd4k֬$9vV; +xj Oڼ/2e&:͓xաv{&yc(enݚEDRu4J!**ʥTQFl2zrraضmNjrkAAmێ7x1cF7ogਖ਼nI=H~k`Ēcdq^`ȑDGG7ʿFJLddKy\۷7/:r96͛_xХ`,;odƌc(|}AFY1!VR5;wҷo_"##hhF,YB׮]@2|N>MdddJYhт _ 1qk֬`Ȑɨ=|8qb/yn݆i6P9b@tBttty ]ş鴝 s=7M^pI'1ҹS(_~.]Ty޽{=jJT|47?6%rNA t,]owWr}6 |F)SU~{n֬Y$[nlذ-[˨SQȵۿѣ^B-B<H+pVVQQQ׏]v9mĊ+8pC i ݺucۏ LnN6k)}ԩSr % ]{h0X,|tҥ]T*s%119s\@eԨ=%!acFͪUkʴy }sPj ]vmj [no߾s 3f`ܸq8-ѫɓ's…gffқe˖\{Bxhrr.0fuKTD<=u !w#Uci~2yQ 6Э[7,XhtNbMǎM/ Cdddi-[oyJv9|[O:MPO;-Nˬeaa! .sάZ{W5uV8yd79q}g}_~ڛӇhND;8tW~Y m9„#QHI=TPm(JE.WrT*2A@*5r RFۗ-h׮5;Ҟ:R7IIIq4;nq$⠰激|[R#0/^ҥK\kZZb={v_9{k ̎\.gtΟO@Pa Nh4-jSXhѢ ٌL&b6p0P6ɌbĚEAAvGarZ+dxjxzzڭ+&,'t:ޘY0?WO)UlhxA^ pT^uȆ ?>O=Ti`_5/iiiNT*<,ZN6̙Ʉ]h23z #G1tf?_LRFRROpXH˖-wfΜZd~~>CAAr|.p%(*2a,2b6X0.9~~juh40z0ƍLxxx/- pK 8\"IԀTڵ+K.e„ Frرڋnv}vZ>>qfdnzI^^7~G~8~8k l!33KP(4jJNL4GVbpEΝ;ljg9v Ϝ!++#F#boB?߶ ПĐ!CPn8p8 <77uo[$ZRdV\xmnv9vԉEAճ/w^+s.66 Gp>˦Mk۷o{sU(DAHA 0ǟС EEE?~$&&Knn699b_x{=05˛~+ORR( Vܿ???v%3+K?;?ߣSHya&1AWTܹ^Z?sHMۏ\ފ$%PAq7oRcG 6短sde;C@@3}Νg{y79gi釐ֽ^72?^Yy1[bʜD޽{j#ߚ<]S`Yq[\Qݻ>}6ݻ e^ђ Ӽ@[՗غu;/_E1s=l{X# ^ǹsq,\wzts-b$F8p C䞻_̼=ZEIyoa^h=.spQ~XL%Iłf  jGbmdeeuܓV-Ѯ}(V|C!WS8z,ǻ/ݺ MtLH'mzVA*)S`ʔ;8<om3,_6g̻n#G쳯p)G˃/S3[r*1pm !^~}o&HIIfSb9{,\z Aԍ>}Fq}Sy{J. l]JAA!׳w-#h߾ҹ5!!Dmv'K[p(O=9{5}Qccxx(Z!q(~*UZpQlYL-3ssZ,x٤cINv K/0mڴ H^ψ#1bDL:Ė-ٷ FbcqN[#HZjI۶5NDDӺuZ?geNjr ~ر'_ѫWjw_ĉIIMD!חh5#и$=eM6/9Ѫe=z0v(KΜ9è3)(,ohx{ɧ?~KO EvAL&f~dr =ZO/i.#0|x_틯o'''s` 7 !0YHKE,uc07C6tOXxTC +1JkWdo,ZGhݪ~ۮ]KK8{6DnxyV=gM\L&\<@pp ߿5>> 8{eȐf'edd}=10̳h4D&SQP`$==f2e 222HO ==C,-r />x{ӪU03v\aa}]?$&&c8{*idggqJ\iF=hԉf͛ѹs;B5gpuV:=c.yt.!{Vߏ3a  _z&N|wg`A4'O'-=7^GFD#F ,; (́x >ޝ @R)(,hbSh%'26#Ȭ}w$>L^xCLsra- dddJrULRv*/ށԂCq4s%v$ds9Ν;FNn`3=j//os1RRkr&veloKZ AAA9Ν[+p!.r8v+۷fe?3)H$SXy+.Ҷ(3TjNV% ' s# IDATt-Q(x(UJF3EEf22jU<<|oCFz>J2 IMM%%% 22KjZ׌-ZӪU #F3vlBCC] `]m'9KR$!q#VkVszjߔV۟-Kb02|}ۣyY("̸Jv9TAx{ýI4n "72ǪV`?¢o0ZKkr.ԊzеKgBmۖӾ}Rfq)c'77SQ_]wnjQlHNN.cv;۫AAhղm37cy^;ou ѪDJ%3k,&̖-ڮb竨UcjM*888qҥ2'Jz̖WRh=oѲUKfLnwy˗/-t:?.,+ҙdd[iAE2=u7#гgOv gti2Y3ti=BCP h2ܰpa4Ѵ"\r[tK%,<0t*ӄ݅ZU \R:NKK#55|!$+JEgЪwZTBL EUUQAV=z`l9Ѵ \=o*ZѨiּ#Ga~Ջ͛á T*JTeǎX6 dBkrs!'' VsHH4<+ fbQ)Q)f6EL-Hl%a>hw *%JUUP7jlfv.: .čRry6V,-1-1bKntY5%wuqI7'UR7٥GL2soNP*zVu[7J zH2?2Y3eDC7{n=w [: w*IK^\v@ FǎϷ}= .q\{ْW'NW~n8R۷C=6n<|@cH{<+pmXpcyB=@3fBB06}1A#96 }m²[7W@ZP( b6ub C×?/wP»4'ۍV Vuy^yU|ӗE3Dk22|qYɁܶ<=a0zdf * /j KQ?;*Z g,ZRQyii)*^;CCR殺P `1O_<ÏCkY+㟋**oQrn'NV o>իe n2X&YK"/onsb? zP7M+>pf)+['~\6Dvns߂ NQ*=R+cuk{} ܘR)*yۖK~pWJ(@:$]!?_L(l{X,wĀʚovcZ/bu%;HWpo.]ZYF! Y}Z.]˞=WTխ cqnMq\Z-u+[D={Mt:xs <7}] nQ(H h+T4S"IuAcJ.7\}޶wUPPM9KHKH_~m i?PGTͿYxmZ{T?_J̢&4Tʲ˸250>_~?|7+O1tth"ZŠV \}˽]0hp猒yK-wr7q#ahE:LLɬ zDllط >*.!LI*^pT|V)7Ո|룯P1 )b 4nN .F˧Po'*o/< o{jI33ʞul8b?fCnpﴊ+:=D=SrLXV=Q%u.?=r,{`&NscWFjŵ%M*V}ֹHnŽEM#UQDcEPz>[Qv]|eGe?gf~-ޮP>TY&[R<ʾ=m b*];ȑ p*+dgh WKmZvِq,?|$ ]Uh?lAú:0zműʖLL0IsJ_덁Z_&;3CL^i$Rw K/ﲣcMM)JڸFDpddMC{ ]F;̚@똟~@a8f66ĨjCih^кXR!SsrWcJ%c^_qeURMD}+gczZJ9\!=aV46 y-ovQ --J9vp#ɽq`gobŸ o؈q;atᆴo"iժŸ- y`w/M#U` ="SH7BTn,xٹ3q(FF ,Q9G xeZzȰXwk3RHX=`/qW?]IM&կ6㥐IrVv턇L9c xܛ&ĺ '}[=<R4ɍv/l1DjܿV8ڬYE+`;V h\fP(($j[!7$\m.-á0qz`5YWFA+WLfb V_l`7c^%|p tw/XS`iT1DwPUbxS!jL8t %}&-6nip]u  P V+IRB[ic`ps@qk@ / <6ԩ\5RdVʅvJ/7K:i \kGk*X&B7el&dF%)a9ѱR ˍu \7YMwQ㫨]O' A b|QQ]'QCr¦k7JްZZ_\F$*rmS#._TqlmUrV%~p1vh0?ώu!4M[9.@f\rvZ_O&ݐN݂ CXN5ÏgINy=8G:7/9wq%͟ԬdOOjOz_/,[@MA,Gnxj6x9RrQ?@XIBuY-{ :.facu$DyA |g濄ݞWK %jM+ɘ14h{FQn1//ƺ!lMo ^u<Sþ(e6-TR3L 5gr܇ wueuIyO`EoA3N۶m>ߍyBWXmTߏd2 7vk`H*^Bj+xcg0ӄY|Õ.{Ns=6}Ƣh~# Q 5;-:A|rOF)Q`wЂ RP_$+qᯨUШA6"¿( c*ح`+M3>@hMg aV'22%K^G_ei9seذ|a*:HVr(4{i@ k X-Yw2ĕ)}WU,oӅFb WLP0|k8O#P+> !bR5\R`نM?ߍᇋҥ jVS 33ct.^ȗ_~gFv ~ʯF'Y+nHQ2u%mwpi8k0}W6BtC| pP~B4CxPEͻ9ɻ!RRS\bY1WQ*TL9ddd3u\| :ODDw ]7j%>>={yv4ڴi0+̙3/>eo\ܣx(Mmu'5H pk'O ;ZЭe&Aq8+eAJh;M*XfK,ن1[3b6odv1b)".n7qq{@* о+͙Ņٹs/NElSPpTl^l-Zt/GL j# ɚ֓0O9ĤgP<5?Lf/,#`&g[зTT򊞀.`-4թb1k5d2_T0s7cB@wK:_{q@#;;Of9üJ:fsj (^(24=ziWqR-1ėLoͻ.孷^B.5 ,RUs} yvdPǹ7r87zmYr=47`8czґC':WL1+B !w7\k|_-Jl9 gRhTQ#5zCeX|3v[611 $99?O` ఠx3tH6ӳgO^u^:{ANN\r+W1)REQΘ13ȸsߎfŊhj" ͙MZ Ѕe 9R| ǕKhU,+T}`6~$}9WjTٗ`%֗;oU1s cA 7BAPyAnMJz`1!C]͹?b'j:p$.͈%E~<#,Z4B2ǍPQa`6W!+ <)S0|0wL&&-Z~:f3g3ytd2PոϻO/dͣWwzyX^3<|h".58b-hZ<w֍`d;-nUqjzZ .p+RF˗(8e?@`_RSLGqf֮]E.]X,ŗ6v8dffR% Pp8bqh F6mR䩧`֬Y->L& 6 \71.X`CQ2 1D˲۱~ Cbf$O㋫UQ@X7ˆs|W'E3#x58}v=AC(R@ ^jUܴTU˼1+ Kۖ(p^^d2%ӧO{fԿɉGo/7&LKRRs)˗K^nw:I'2q8 oͳt:?0FcI% IDAT_}5EEli~Hj2ngP`>~׆ E&Ȁp'蝧aiۏЖw];a1cK'@bx]N":f^lI_ ж(ׁ\)؉bIa׆Z9}v ىp;(o-'ާNKرch4~xxx֋>}pa~9֮]ʒ޻a2o#nU (Jо;ĉ1`eLfv?~&//C #DAyh<&wV[FOM?c2$8jDy g @ū󌗉=9Pl}KbF#/?~OP(PΌDֳO;fyYa7, yPFgm_5L|)>,obAv$N=z"#*@^h4|GL8f=F ۫5 瓒f@ѣGfL¸kX'997®]Ш}Kj-Bd Ov8Ha9 yPƣVFt`Z'>2 EE"[pr%RDV=ޭN]ch;N'M&,_|yPQFFֱKZPaO@6ͺ[ͿCCBs%00s5ǞF:^fw.oq>-OXV0]KHH<޻q|C ӧO#+P* nj{0nܸ2`9x ? $'_D.`0\LE^@ pÀJCG&3j8Əf㧟gv FAPR j;͞\+ŏ1c'p}w/Sd23ܧt7\MN&#r ϠUOc}k(Umm(J/:_*bG>jzo 88V򀘨i&֭֮Thd6 ͐Wj٢s̜9ɓ{eO`ۉco[X $&C6}4f̸aÆ>OcF`;ssbǎ̟~M~ǨLaq8JsVaw{U{jIO $R ("˷X"(D\vAQ(.Ȣ 4%(4HJZz>sPL2@8uqwp{N)XvU݇|)3բyH9'ACv.MMGЅ빉muAǽ/ ̈́hJ~"w5ʝO } ~EKN|4E+ o]]].|4o/Rw?ذ'֬Iԩ$N$T*/(WlD:t(NT* ""wޝF^^vaÆ=HKKcժo#͛wyE~;wORFvxro|H219Dt*%-9x Om[S'AAx͊) R~n48i4nSG|@A6s( BGwMyu2 _b@L8CM;/wejwsz+gϞO`uZIrHh=<-o?CSNg9skM& <ԪՀ}r,|4(^8WPk$$?L6lXɘ1c\oq3lذbW$^BcG͛7g~ Ib[EZvWو.#||PB;̄DЪ0?8 4]xv_fiy!AFG޹( CO#)H_H8oP(?-:aa$Ԛ/ontҝߟܹs,\>}Sv}~˗/#+zFIaFsyi:Wϱ`<_?[ 65DEE;62mHnn:/rrWwz5]\ Ȫ+q8m@=|$%+&R6)^p/?@61%1J o]uqx7G~gR=SfY+@bɥcG$aÆq1"?Fc[F:t܃O{]~d2F 8_0CLn-18DiI0Q4l橱Nˮ_/z?AOD`W)>D^8D;}†OxG4 JApNNFRK;BsuyjfIIQ+ի+Nb֬woE.]"*j<?ƅ g%=5fέ <ܞ3w\<<<ȑ#nkw`I(9zy·#ee@xД8";eJ[Rup!ĔHן@=h((bK% M$ή\GWʹ(^ 11/ |}^JVF 6 ;8#J?J4<"A8Vi 9あCzGvtBq)kը}EP0RN6B`/>KL-F3c~LG[{dJl+JQd횒 AlWhѢoQCD9w} /B!^ŋ'XP# >.]*FYH C\ߚ#q')i ac F=1m;8WfyH)k.LKZh]ؘSOw~ k$emJ"gy{OOĔW-/jjVoIGKwU $&H(C[ǧ5߭Ge0<=}ء+cǼB~Q'kߕ68rrr8tk֬aIPP}7k&_}kpQ(̈́eԩ;Ȇ k޽{GQFlڴe-AL^k:4'o92z05#kk3Jw- l; ʓ+9HEh<*lPR~r 68-8JփT/JxiݱY$--RD˗wٛp8n<8ju:<pQΟ/(RwMf)//^˩\i^z%$ <=% >!yx*hZgF#H"+G}I&b)7W8PPpcyq ~{>̙ v-zd'j{?fܐr]o1_OIiUa<5([E=r4šq^OB|~J\}-I* |}`Zѳt_A ,ÊŪGBUsQJI XP8D Q$ R ؐ$; ݌$ݾ=TQ=QԨ5 s_ ܇={qO#88zqW1LI΋:/6FoO'h~?^ViM92rFO??xhj1;ƈ\Zh=?$x-'%2ST / ׃UEAQ5mV6y5%HϞX|IB$& -:Ʃh4~ё_-zh8׮FfuffVZ@R{pjJB@B4j5dG$yjPT)m:AiC鏏OA4kڌ.mi߾ [uwTa2 ̙3>}K7խ @ʥ.{ g_gyՒl]cN|Z˂R#jufd95*/}Fs:7G@y0)?;_BEV_3Yj \Y.R]b:vbWl:af_i.{?LYo*秣Px.{U $+3 NPPCnmCX,L&rss17 EX+&|*~h4>\Ϛ-G[{N:?D1 QޥrSK%ku6d`"bW;v!#8q{'KpT?X8Ǿj9͸>`&3+r W$L 7(/OunM,^8>^zIAAуJUNR|ѣ <ܜ4Z/ڷϛGZJ|'*#"?SOMGj6dfsx5NzAoSpʝߏ-׬6~Zei3q=;C;~$B@=$ 1E3٢v? 9D@ Rض]8^bBrȑNo$IV|^}* bǎh./*D:{aР޳i),nEV&|8u΢ǧsOrEbww}U*k`7ϫt{~ٴoyRveoq@~h|aCUŋمd2r&#A rڵl߾ݥ,s9~B,)(Zz$<"C6ށL2uVq147(no>c]Qھ9'e8IenOב_edqzg9>Ex瑐sYT*4c_@9B]ǮE UcOKaiٝX7:mDll,7oρGbb"qqqԮ]v{{l8_RKD,!X,'/`'g>s9p@aJï]:0kn˰3^%Sn}U gX`I}#rJqEGXH.֯X-f!8IRV3~xN:Ett4jCTũS8q A@^6$seziuL}ܿ*iiidddT*T~L6 rZ7Oʫ{eA\+&rJi:qpV#%k7kA5WD=i>,!Lmi~%5+gqiN8D/^|;)00瓔СC]vB1 6~? ApPn}޽{Gsgk]m4Iϛ;v`:Qzm@dU-~\ ;xrfQ%gErǟd AC<j c ;}q8EzN -ZzjlBEwKTiӆ7~z5+f \xh5M5ENV\q ,Zk׮ݖ!/_.'oHBȈ崮WsBzW\r]K%dм΃<_(~_DnOQv@DzlUd"V-qs̙39vF**&&&C[n̈́ Uw.r&."8HN>l=ʌݍ̒%Kؾ};:Id,6lnaTn{xŖ@zt..u8KgܸС[k Gl6SvmnCB^~|X>*dbۮd \92ɥSܻ_OiIObɯ=||6WʎѾ9U{ҩ<LrѤewjm c޽QT;4]veŹ|S?~]{5n7CPYp/ڨԭCxĩ;J7Z}ym`m?( <=nʯfc"=@j兄i>37-]qxyyoLTTT܊BSOF#~!sj- @Cܹy՗_ #ƾMfЩS'o0MrjOhfyA3Jg! BHߪ#7"$fغߢANMǦoϛjZ׋CQ;KzWϽY8peH,X@ӦwR.̙3L< 6lVO|r5 L&ŁG۳ eΝԪUUV[E_lÔZMnwy[հ"^DkRCAjޔAr'Mfef.SR{(4 H!HSC@W[ޒJ9 F<^9D1i.z'Hc`f̘kV3r12h Yh3g$/d~A0hIDATufZhGtt4fMKj%I?gڴiL&t:zfݺuE=JPu]ՓEj]{7 Dp L`cVgIX T͠ ^z]hYA~*dP0af͚u#ŵ<"AhƎˬYpP(I6l6} K,aٲ0bsѣGyW8w oo_&Mz3gV-Xׇ>|Լ)6Qel@ `,"X~'a\ 4"%k8tԻwo YzZ+?^]g={Vzg$_ڒR!tRƭxʕ+VZI}ĞL !HJR+EWpVJWٹs' \T 9ztaa1zbc֨/co$ɂ^?>}KիWYpa%f"\K}%DHRL6I&QBrvKd>`ܹ.k*xxhY> - ZHĖ-[;S݅ (ٲ;D[Yj&A}+"1<=scYOP(<жB*#*Zfgw$3$)ɲ\V*3?usϝS\˗ywY|˶*AAA,_!CTBʇTV\'h'': Ο:p\ËĪU0a&ccZ!}V(nk[x)D;^!4%+uM=o1~_`6>>y8 h=.[$=KqnԨl۶n 22R9=pԩJed%t:2c\R.l˃ݿ)xT<9/e6lؐ8o~ς*h,ߟDVX)S(]гootγ>LJ& *ӧOϛYr=ШקPHzP^(_/<$Ɇ9 J΋t:NoWf =`2|pbbbXhQd&-_3+/U,3''m۶~f6mڈjG5Q(TxGg„H1[OBpw622O?&MTBJGB @&Lo(1[`w3c3p`9݂$I$''w^o]HKG-r"I6Z:?~~<ƍE+hӭW=J!U$z#B*9X Qso`5󫤻=* L4FMQѣG/DxWl69qG_p^4DQp IP*bͥY6

!C3cƌˊ"akv:wȑ7}BB[,Ξ=ˉ'HJ:ΡC8v,+FUe)V >ށxxxҸq >@>} O|0gxQtڲPHRը#\viӦb \uY Buzuܹ ;vgryL<$ c.p3\ ;=ZExЃ=ѽ{{Z=۶*I.4 ,6\ȁ8q"{uVوCsDLg-qcnݺ51?\DUz ]]veϞ=,_ztEQhDHF  D2ZNZѪU+ZlIhhhUy)$7~4ǩQ:o6oF)%[VPȑ#K"br"-39zeݔ e9yqN[͛G:ըd៨&J7111$%%鴭(q^yUÇs̹J꥛f?@ZUox(>>Z/T. _~q{i]$1q;aĈwqSqW΋"-;6N6jԈRw 5" )tu~`2cݺ4o֋@d`7w$7zf[Ut:3gDEoz_]Q 7@Ν;ĉK̬;4kڕ_u{$ufP1 f'0 I*I 5'OS#j\ٿK Ñɥ :^=#Kى_c@A Nw}ڵիWӸqJaScp~7V^Rn\i߮7/0PI,lrѤf݇Ӷg۷^ӃjRnNN8̙3]LD+}%Mtd/s)KVĉINNfIJ5>e:N>͸q㜶%ye[v,+H,-eu!W? Qr^G)22'O2*].oR\HY@B&))|pSWE#I֚8CVޓd<](8ܬaಆuM/Q$Vk[$4-STE IH5@ bHRDll,III :zX5K0t9u'Nt0[rSo^鵏kZhzf 9.jƏOrr2n:AHRRKK''*cǾH]IJrVԴݫ@|i2`>ŋ]ΖJOiӆM6~z!'ѓc_$''zYq8{bsڶE^[ұcJa%0|pu/a8ôXn#\`.׳\9s&ǎcԨQj;j&99q9u$;VoL^&ޟ,]X@FP0n8Ξ=KLLL(Ͼ}ѣӶHfU"#Gҽ{?.\p.+ qݷ@xiۮ]{n٥e  ٿmtWpbWzSbhҲatڶAűo>zYI=;={3g9 P~-ꮂ:L"ɓDEEw!>>>w,9R4͛cY$IPs1iH?Μ9sguwɭn ڵsVp$} Gޙ, paNvܙ_tn\rbРA9rTn `bӦ4iɿvTϐ 2Q%0NX?9m[(C7{d"8z '?߈6lCg!+I۽{wC||;x"˖-cdddhBF`x|C R}Æ ={$7;ټ{,ZAp^5^DjB@i JKt:^uOO<͝*Drr2&M*U*Vӕ@h5γ\a&[dd$ .$44pp$!!R"*e[څdr X7lNll,})3T<f,Xٳ]"*w C1i|̌3x *;8W^;+VpV$c<ˎ0g(:0h4L0Yfu~KDo,ض]p9h ϟ2MQ˻If\޻]v|' <pp5$77Yf駟:$a„ h4rꡛ՘ӧO3yd~2_[(7ouԩ޹ \HHH7̙3j߿bcc]J9T}Z9~8Nk6jԈRT#p #33>F*NcԩL6E:;k( ::-Z0gBBBuT|Y':IENDB`cmstatr/man/figures/logo-wbg-1280x640.png0000644000176200001440000020555713660767610017435 0ustar liggesusersPNG  IHDRաsBIT|d pHYs)u)u{EtEXtSoftwarewww.inkscape.org< IDATxyxYS9@#HAIkd%9u{{jJ nr|_jwE7 2) ! (2gZYL IV*Yk=[W˶m[PAb@ ( F1 @ QAb@ ( F1 @ QAb@ ( F1 @ QAb@ ( F1 @ QAb@ ( F1 @ QAb@ ( F1 @ QAbZMmmztjm[˖-S~ԻwoʶeS9yy^=>n8im42:'f0O>ٳ5bĈ?IZf >| F@ @ץY_B555WtMll#(\2i߾}t}>}3<2pK\;vӯ={hڴizw^#gPbǎzjc_}ԩSrK\߯?zuȑVWrr CzQZVR^^{z!CDF 0%֤Inx'I[lј1c>` @9yz)#I?>^O<~t XPLYY̙>`CR@.JNW75| c#HOOWQQc:[ꮻӍݻw\꫖+.3AI [.FRYY{WYYYڿ%$ =*׫ÇkݺuF2=̙* p$*R\ *Eϑ22rOF7QQA󩸸X)))*))QK˕λLUVVXQQQr:S^ s9*VRBB=cCC.\4;B+W'OLOO׫2NSǙ7dt%U%].giTnnFM6Ȟ={ɓ'k׮]F2UTT {3N,Z_ZXHv*&r,+$i5jrrrg' F2].f͚ݻwt^uCsײR,+LsXЙ:uZdҴpB577_@(,TFK'j۶mz畘u:g x^&ǔ[Fy#dFY @{FtPo\>|Hf޽tR\RkN/63Bpuߠr:lG@sh„ zwdFDDh޼yЌ3s4Qr^EG̓eI=s`թSdОPD}} ԧO-YH̙3U]]\}v! HXVb" PL#>O%%%JKKӢEPmZlUXXF#Ç׆ TZZnݮ}Y֜%c%nܮF2:ٳg (vl˖-;vGɼ馛xbmܸQ#G^NSmA Ꙥn U$c$5 @jhaaa;wsβV3eɭp>G3ϝyYPP`l%7 Ў477^tڵk,XH#g׭{s8Unc$^ӧJKKdp#QDYYӕg4kڵ*++n$B#7x x܃5~bH'H&7 ƪ4uTM>]3"m޼Ycǎ5UN?fRx %%Rtf.F2+++5eeeeiF20ݫlHjдiӌd^ )= 9݊ #e$\S~~jjjdp(VTWW_˖-3p84sL޽[sΕ1{! p)6XI  L1Р *==]mЊ(V`۶JKKիW/HuVk׮F2m~m3#\ @*''G#F[oe$3<<\Ӟ={c$Ӵ`\|)QTWɉU)ɺ@ %KW^*((06#+A\s|[d=,Ҍ3TYYi%ttW|Lo=Hfmm 5`F2 @:?uȐ!Zn.][oHfk:][u%qS7XNG7#{Qff&O]vP`۶m7nOLNN?vϸ| YQrb#ɲB\Rɓ'dp! @*=zT^WÆ ڵkdz<͙3GUUU5kt*8 %bY,PRBB2d|>(%%Eji mth#>OJIIQII&33S*..Vttp)W2u[!̣G*//OÇd QrJ 4HyyyƖii*++SϞ=d gK LRm*Ê1uVu]G}d$йQ_aϞ=ɓsN#***REELb$9 <$Yr+*ܫjEϑtٶe˖)==]jll:- @'NP~~ r#.KfRUU^\.ж 8 *V q1Y__BGF2 @ Ri…jjj2{wk֭zեK#Igx!{ƯSbrf$w߭;vt5khȐ!g}f$W^ZtVZ #ॅd))a#ɲBd^ZC ٳ? @tjrrr4am߾HfDD͛{O3f0ٞYVb" "BgZhRSSU\\,7QSWAA%Kȶδ,K3g޽{UPP#m ?Fx!fǔkj]d?~\yyyЫj$(ѩ9]o߾*,,TCCaÆiÆ *--URR̎%H*!JJئr8YUUS*++K3 .4nݪKdtMZx6mڤ;HfGs`f~="Bs[Qs$9\z1 zٳ5|p_HfXXΝJȲ,#\#NQJJPgf(--MF:> @-ϧbiѢEji1SLeffjΝZ`dvdd Uq%nc.O>D1b6nh$qQ"()==]yyy:uꔑAi͚5*++S=dfJFɲoF}gF2 ݻ5m4M>]F2TTT~[wu`rQ̲< *9J3%]r۶d@MMM?P@BpqkzWdn͙3Gz%Ru$2uuu*,,TFFˍd: @th@@JMM… l$wҤIڶmc$3Xx-~cH2+++K'OΝ;d7 @tXW֠A?Hf>}TVV+V_~F2K[%'UtO%%%JIIQqq  ::O>ZdLá3gJsUHU Ko,bK ]ff=zTyyy>|֯_o$~Pݳm[˖-Szz h$wܸqڲeJKKխ[7#ELRm*2O֭[5vXeee?4 h{h~m3F:p̛oY/ի5p@# Œ[Q^%'V+*|$3'Uo߾*((0V ڥO>Dgȑ#o ׼ygȲ,#KFs8Un o+=Hf}} ջwo @+*..VZZ-Z@ pݙeiƌڵk f`8tWl3 u_ؿH\M0ء;FYYҔ#C ڵktRvBs8Ιh3d))a#ɲo }F27 \eeLӧkF2kӦM3fL|5܏f%'QDLI׿=hɒ%JMMUqq~^(f;&׫ kF2n̙*͚5ӡ58_Y1ܮ;d?~\yyyЫj$z(q|>-ZH7|JJJb0Teem$W%_g*>fF24uTeeei߾}F2QZjٳgHfjj^~e)%%H&3; "BsP船,rۿ` n_ٚ4i{=#CܡVDDC:dSL%PhT#*))QZZJKKe۶\l~JC+o~-X@MMM9|FYC=zȑC5lP :TVxx|:zSIRDLǔp5t&Osx by2׆"_OS}g7.W,Kt:էO?=\Ç_~r\>$)<m<"\-nVmt SF2-w]=JJJ2 z0nzڸqHu]~zHuu5,KOd۾^v*`V~4nh 2DC Q߾}eYv6ݻ߮C>$b#µj |SmxARHfDD~SHHL1c=_|_g|ʶm۷O۶m֭[u6V=zX&\;L~l;1\cܩcj, K\-I c#j5s{Xf޽5|͘1X&(qNO=jkkdZo~o[n岯OeڵK;[h*ٶ-;\--- ~B-ˣ:n 4TǏرc5zh%$$yoG4}nIRXJ[fتk\5sXITTT~|5 @\2͙3G|aÆHFÇk֭ڼy6nܤܤ'p*44V'tk=|m[CǏј1c4zhʋsӾ}$Ia!YJ{&vN=m3n}דO>#K5ٺun:cݻw׼yCp=￯͛7kӦMZ~**o&v!!1KdM2IYYY8q[eA>АJ߲W'ke2_B?яt:DrQ=z箔?zJQQQF2TSSnݪM6iݺZf=,#3L'z'R>_,ҀC4mdMOoyɯ/ŮFffJJJԣGckڰa֮]7X?e9D锤+cW7^SN}ݧ޽{ȷp ޖ$z&K6Z-jMj O{vm2e\R^Wv2g}V{'hݺuZ~VXݻwpQc IK^t|zk=@4~x\&Z٠A}bC<5n@N9]nO+G}TsUXXL(vޭzs'}}z׵j**<\Y[>W!DN ̼OӧgiԩoÆޥw2=F] ٿM'Ny3{7k9s,2  .rq-\P>dn}{󕘘h$طoV\ukVȑCrd۾K^vkPhhN￿ÞzȻqjI=Rj44D[>09~x;0  z裏r'Mg}V7ܹٞSʴy[,KNg|K S ԩ;233;1c&kÆy-~smɶUS;{J'k_)á|;o~]΂իW+//O;v0ٻwo͟?_3f0>|X˗/劈u IDAThB2;|pkȑ2;;wk׻x\/YC.Gݢ@?՟5n(YCOkkӟ_z뭷nX]n,[NXH+U$ef;CѣG+''G~L(TYYӕSNGgf^rO5 `;yf5JOdr-Zx^u 0H&:D=ڹBFd8}ߤ~g5 cvqŹnKuEm|xF+)x2Y__ *==]m,(۩*''G#Gƍdk޼yڳgrrrdcJMMJK+..Fnwf :\(t:X}?Aj]5nc.v#rss5aF2P3 ZpҴd#O,Ҍ3TYY)::˲4sL[H0I I?6tr| Ӷ97^XHv)&r,+H5k4x`F2FlGԷo_O :TׯҥKu뭷DpߋMQQ,Nl;7;A -u @D۰0EGUrb"BgJL6@@K,Qjj.\(@m4n8M> zi&5H&<;՘wpLq7iǎ >J%u:sbDr:nR|Lƭ5H'W^1 7`:z^ kx<3g4k,9u]+W_IYЧi1:SR,aݻwkڴiRuuLhMCm󩸸X)))*))QKRTqqd,K?ϴl2] :uV&LReeyלx 3ў8݊+I-//Wzz^N:e$Z rJ 4Hyyy:y̴4-_\eeeٳLxccvGHzi-Ww\_fSrX\ g$󩤤DiiiZhNP {QVV&O;wɌWQQ***4e#F͛RBB<ӳJ?qJwjN>NX./K ]F2:ٳgkĈz7d)Mˍd\.͚5Kw2 \JϞ=z EF튒$|uڻ}Igfqb3_g%ب"9,3&;3fu#p([I PaaRSSpB555ɝ8qnݪ^F2IKKӪU)$!+L5?z%VTWIU %,ڶe˖)==]jllulk֬9ÇdKK.ʕ+a$7:aۖG}t,Ft$+.yu߬(#uuu*,,TFF-[f$Arrr4aٳHfDD͛{O3f0 \ &g?{LNGm s,FqQJY*V#{Uvv&M{H&\ ˶m駟O?#e~i%%%L=zoPss$SdD?$))BnW&plNQMBٶe.K>ϟ6nf^3=y< '>`3E 1K%n.3{;vLyyy:t֮]k$ KҡCTPP^xA@HfXX̙犌4 ?X))|k#QIRM򸇷VWmuM~;~2 f^!ϧbiѢE?r;7߬x%2V.ERrnEϑW\}U~~jkkdWLөSfv@ 7Z

_g?|HF 0[Ǚ&,$KI ;@ef;5khȐ!Çd(h"D~YL&MҶmT\\I~mذ^ݺv+l-YF?Кz!# LÊi1m1WɉU)ɺ@ %K(55U .Tss@C_]ٳӧ˵b wwlw%é3?,MڵJ+bd,@kd\gfT7a$ĉWFF.jVvv&N C;vhڴi_ʄJ p$|uUff’JO>Od{ſr:ܳgMɓ'H&i :Zlt>BB.q8%8пo%+ Hlo~0~>3_ccZVh hwQRb#|^+Ww!ktzmTzRaa6RRR勯:pZ;C--Mxjii4h6mdd)|zIEVb"()Ba!_?3J|>(--M-R 0 Xmm=ߖ[oe,oշo_1R/fB9NOW3f̸⬍7;_lвֵzT=ԣz=z{,h~ӟy57./ľАm<*ci ҩ6 2ވ'?Q~~BCٗ삶,++e2DEEE3f%UPH: رxyܹS?ﱗ_~Y'NTRM:qer:tޢ_j+T,9/J3ZZXleȾ+z劐Wv)7߮^zwׯӕV^ccn="/]4DWM 2SRR_f>AR<^{5c*((C=K7n~hӦM{>.h`jj> ;^d3wTK>QOr8tJ>{GeYgO<~$ɓ:yjjjTW[S5u:zU__S;u%+DG--˾N~|ڿ߫5kΊ뢌 WW߾}o^xAOt7-<)qQuoND--]wfuuuwHF ǎSaa{9|u9u5n뗿/z߯X56IBBbӟѓO>yEۻwz}c[lյKw}~KgO-z:xp\PI._R'\Mͧӳ4e_Ԥ:8qBuuuWMMN]?9b,733SEEEJII&NWo^$ رfͪ+J=VQQKd95m4I+V{V\ړ DN_dY1bx SӦMS~^@ Ç:x>|(ܻwևSsxNH-$]Y발r<t:Giȑ9rEE+Σ>>&2l6|ZZdk\b,3>>^O<~qIto߾4׿iĉt~+Ϩ !!a:u<eЀ{Jދ2~6nܨW^yEee˵sr8\r8<.y_rrk7߮TffƏyUUUk_^8yz/ˌC\+8 ߬@E?.wM2ErK+[oiػik˲”WNGWDM|]2{NF2 }T p&i޼yxɽԆ .:$dӦM>|eݶm|cS=.*WXI&]6Z|^~yV 튒_+ҿO{G?QN:]vw۵mUءZYАX|-#VKO}zjʔ)3fLΝ;5ad?~Ro<8=I L$,-]Fم)V)BBg $qrlY%\.9,ҝ}dy+LBW=NIDRS~i1o޼KDDDDD.D[ڵkoNx/s,Yĉc+,,wtؕ_,Xt( x闩?23{&)+<7\é`o%qFV\ɪUXb Vm~Fx*b̙w9֭[K]F_s7OreM׎omgBDvSZqe&ePDDDDnik3gdʕMIx8q2aN #|„ xvy)Un`p{ >NGBUog-c(..N-p0h N;4n^}nĮ]xWU3iR>^o&O6Gl@h7+NϞ>hwߵum۶\ɡޅh^MrS(iYG':<@x=[{8"""""bR8h {9y睴48j{C[;p_}p{UU_27q0\D"_ffG:wܹѱc&t҅N8o?.\LmNzqz ~ȏ|%ݻN`޼yWݻ8p0> ᰗj"8m IDATLn`՜~i;ϴikfՍ>/~`춬&HdVEE)ze%Λa}ilذN8Ikkn7SLPR~+~'3:0 λ̙szg۶mu)iQ\\ 7@{&-'{,];LCGw^C^FNRmK:HY>LˤIꫯh 0i$Nw}; 4@C("ٿΓaMgOqL GLeҥ0LĉMe>}pWsWn:q}+` RJ`9G}4ӧw[sqp]QU]M0jN2"[UXD+[aڴisϽ5+JXr%5\AeVʋF;_}~UU xH  Ɔ~#HBmmܷ۟'ޡ|:Jeq8+8q'O&??*sw?W_}ߟdz+9=>xu9qmӧ5?ORUU7<8\p@Ey4WDDDDDj|VM6 1F@I$b2hp8|``5DX`O>/{ Όj Ë!,cAB "0^oPhs#T?c7ߖ=J$"ۗ˄|&O'?? &гg_4 p\tItIZ{⦅gym%y/ /p-7qUWrǭz;;զו˸op98Eelt€{qSQQ<P$汎f͚5x73g3SYYF4!0\fGpױqFV./VSZZ|iͦ_5_33;vuI E߭ *+Jy瞇8餓իݻ攓O;dތ5'ˆ _qErxO+2vDV\c,//;o߃)))&/3 /ٙңz:>ODDDDDDM`kT9unA p?`0wr7PUX0d۶m1jÇ^o~)} <@9Çsp3`89k+..fٲe,]%KxR( 22: GcW*ر&`%`Ø:5J#G6ZXI>}xppM73w\lR J$b_?#?XZZC=]wKyyފS Gv{Z""ASEDDDDd?KZMcͭ_YY˙8qbc_ܹʩi:gO׮ 4; cǮ\p\tE 6,qwЁ3g2s̚vҥKYl/eɒٳ pTƮUTp4a/vfzB!?#2ePCIzmE߾}yGO~O~s>d!&  UW_K~]~IR>x~*++ dg^DZ"""""""6a0bӦM& 0 Mp:݌=+P:繜}(!.]ӟ'ҵkה]Kn89k۴iSMEK)(XFEE);A@^ gٲ%|<ПDdgw`ĉLb%OnRv-aر|<_PaVywϟ[oFSrݻwpP]]w~GG3FNq8:"b}ZPDDDDh[/#p$a>gx4Խ{v Dp:8 ̟N>Wǟ*LpoHv h(U›8axp8 aAf1L0c2f̘uF|,[RpTWWtq9xk7ٗ)S&L;v,ip8/L5jTwlٲ/~soC3uwd̘1M>~QQw< `OBvOѡkٲhIq<-0"iM (#++3grqg)Ī#8.v؆go{+t%5\~?~}<Ç",!ӁaÆ?qې!C|-Yv-hQV-#D Bƫ3].8N=d(SL>|xJn۶3͇כر6m'OfҤI#w}WC(\2x5*ܹ{3P`:fѝy9Kz"bm[v͈G=X:D"8wlAqìY1c>?7twE{eQZ)ph  yA*I&1|p*++ٱc۷o]vD|~NF&.PH$DVfGdҤq 6Ci7$ n: X gˉFddtHD"^o&Geʔ`}MO?>sEYYe*N^)QzOaҤL4c&:}r锖V re'|̡1o]w_p`*fٛ_# #3vDf0F++q8L0cͬY0aBR)..fƍlܸ_|{pجsa8xǓM PNFܹezz BQTTT VUU_} <\\.7."^oCdҤ15ӇGP0 f/_fu%rQRZx@9z|D:tĉ4\Kp„ D[馛1 wx\N/ 0j8MXnA}_QGfmnw}`1:?Mw}<_D?YWyE}K4<3ygZ`D"""""Қl ܮ8] D Í!eꔩ~MFٲe l޼B6m ٴi#{p8xogWD"Eq'w#c1$'-\J`o ~B8#s .Dtt  8r}ב=Ic{I4<묳x[`D"""""ҚKy)~V kbt @ PI.]SQM%_V=zӓ>}ЫW/G~߿kb V\IA -[֭:snw\.ݻw{5UVVL_%!.$L!Ao ǖټgEt؍ӧ3{L>C: qǹ(,˟4|@g2':a9 Q5'C+FQDk.µrf{2y۬}ٮ>Ձw)U׀'@!CF2mD=XfΜIvvv.b>ӚҥUdf z92NV$a5a;(+kڤ6Ng.d"Bq8LvfMeڴi5*xP(/̟w>}\×y>9Yllo*.$@` [AdxfDؼ3hmU؃P(۝X1q GPQ8U4+0`0v"'x"&Mp4y駹D&#m!OrU3lN8]df( ۶pxXDv֕dg^aIISD"EPR~ #[H(+BuTV=EQrfЩAsL9f6f͢gϞ _C"x =v &0jԨ#RQ%0B*:6A~Qݻx# q8YuQL2Ν;7k4:]tK.5sΚ[QQHιh@ @4op3/:"Nαk&, L9}EDDDDDD$840283DKI?T_'-޻W@9_~Mr`0pf0״:u*Lj P(澅6&;;碢Pn"} J**k+]Zo}?º'DDDDD$l>%+cYsxjTa~QX~-~K$"7#ǍgBXƍǸqja8NuFnj/^yf& J~u9%w06`kbn2<3k5#;>UG;B Bt {/!(#2r(Ǝȑ#5jÇQXX˕GVa{8蠃joݺK8" f3wpD0 g:4ci 8&"""""mFVdeNG"C.$p!~~EE)-byj‘ 0 zcp9d綿Jj_c$!l'v#=aKODDDDDDD2k8npȼ pd; Sy!*7 8p:>oQYsκ"M=DDZK"I֝tt'{ Sj !ZA ohpa=ώh1NGor/\'"""""""$KV rOǢQ?z 7 !`h h}}eGv業= vGk؃0]p<^[iTM_PEDDDDDDDJ-S-c[e @;\HwgUdVsEDDDDDDD `""bZPDDDD,/iv >M*"""""""TjbiWrr:չݐ uCcShU(Ҟ6_ރslXɧ7-7S+{+":=X:}I) M(zdH˱?H<U g @I(""""b]S;TSh DDDDDDDD$y6 i $J'DDDDDd?K ¬`6]D$}Ă=X:}IZ{ S[.s̗ i!4YYPRPBv6B06GN.|kVZ0ھ 3~?TW?gz0N1'$F+>g 4ƌA^ХkVV ՗:XQ˖B вcϡal4l^G]۶c^%3jÀa`(6b af(X/_杷c=ƌaa0 RZ[/֭>n&l c'p#ɚ`X!TUحsg>?=n3 Cc^pԅ]sσNA$,>¼SY ,w2CVqޱo۵ƍ|? u ?WOwwvđpfX'㏒ODh @{th'*n1àdBx`QWO9ԧ/y7L;iȀ^~u`&bp׽ЫwΛ >_Ӵ4&'~}-s.ܩ=gӎo&hi#FNOlaÿ00HiGkoe Tt6W$x5kgwc; ^CIBXB?[̅Ó_.YN9m9 o~9ڋ2y {95!]tr/ =-;( 9*g0 tVrr quλ\G/rrG)5`(m mɧiQ#7^SEDDDD$Z2\7])N9;w]K~fC*09]Cn7:wm80N_o4|f?1iG8XD.Ĝܐrdl * wç'֤a sJ蝷5oC ì(i|}7f*i#7|`6|tտ.dϞ WUWeSJ.s5wߙR()1_=zL WN?&?-"1_YimMݜl?̵$PTd6)/3_͜M5/iȀGXHz U;t3Sf Q72Ør(fxFSOy6P~ ãC0X>L=lp4ߋ/'MS{|㝐oG=k*f͆YUby eKùqt] 2;E~\s``7ɝz~iNWoȏ.U+;O]s]k-Fw͠>.geIۋįw7e=~cXI@q1%3P3u:vj\pM0HxW_%U5̐ᎻN۲ ?>||>|œ]xο(w߁GoZ>gρ#᏷ÎM;V[C|=~|7mzzq8f}33͎vq9>f57.yn(u"gn"""""@h)zxbA9XsS#/4CT;;.:ϼx\} @U>CCaN>>x/s_6qVӦ4vdNsng᲋4{C p;nkDWc᝷Oͱw4q'UvPpqϩH:Y:[ft=f\υBp.n1Qsjaf[slfϔ7^^wiN nɧ86uDDDDD( };&G^-\f [ ܗc-ZhNLGaْԜ?/>lxԩm [Q'omxAvk~]>n|Mǟ3DIp5_SsN0~#?fnz$5 &TZOZ{"""""b7ESr‰ln`Uw7^=՟6o֭-w^!7/@Hz oxzA,(""""bzlf4rGϊ'TK]g}L[o`vna@nouGLx^|0ZMy9ؑ@P`}~ kpҢ MMCx@NNrozJ>E֫wˍ(h h0Os6|#F?Z4qo7y W ޫkv%h[ox2v.w^խ=+Kb0Q5Xi "۶-s deƗ #F@vv_)u< =,+vDVߞ?1[ڵ-\pnf2cJ!ocslKkH,Kb/i _U9jy~k}&OiB~oؐީҶ $) Sm|.OO>ZlNnnUzhDxq,=9+=ݧ*M'ilZo?"I a9c7-##c%#^zMrùTy=23{~*Tؐ_}0uLˌEگoJxd Q1;;=A?nmUm"""""KZ2!Cp*y  ٿwopw>lDjM1ڇ""""""@˰WX/ ޑ-:mlۚ6iB/QEoj}[ | D" o?$'"""""(z)\π_2㑖1{r60tC֭`0c,G'wD-[AN9NIͱSQ|#gRDDDDD!;aT__{4 ֬Ċ痿n3EC9gŒR{\;OGץ w&K.M~]D ƩVɅj~Xv[k֥+unˍui @e^Tؘ++`KfUWs‡GSӳ'FX\`8v\BO1 I؛<S&woqp=Tdy|^',^rrp4ͦ$zypmxx-l9=4;g;yfh^y.q ê溁PU۷5py32?K oaV/ ͩfԿ5+,~wCYY۹vz,"""""ZV9Bpťpؐ:p‰mnX((ZYaJyyl1b$ü%b EDDDD,&Y=޻8T׿>٩Pb0w;֜mWQ("""""uh @З sN7_oH{T\  7bB 2>ܜveAee˜ODDDDD Z+̬к:sJH*,/|>Y+3QkVsip9 YN9_DDDDDDZ+QG LjٵӜʩuZMכ4᷿1oNa31 tZ ΆSwK_3ט5qW`KU EZ" ?Yrܾ }IЧoYTd6FxuxfS5}|)-m8ilSB _+$_7ƌGqfx ) u_|}'z77 8 Æ h3oyppBg믺 vNT(yɼuFakڽ7Ij& ̩""""""b=nb5XDD"""""X=FM@DDDDDAeعP_PEDDDDDDDJe+_)"""""""6vPDDDDD$@[:M""bZPDDDDZ5EDDDDDDD ZB0""""""""aЮk/{'"Ғ7BDDDDDc^Hji @{Ph&""""""""ShA,""""""""t LDDDDDDDD$>Kc >EDa"""""RE~!Hz&lU®-"""""""|V%fv 4@iU""^ZPDDDDDb)V HS(l!R{/I}&"""""""dW%f)""8(""""",ڏ]EDDDDDDDO((l=i_v{oEDRGkH,Kc @i:bJ ^HZ{-H.|_rU0DUU݁@uHd~Sa:!ײkצHͪe0څ:,ڏIߟ[{""""""@KQ4M7İaZ{"x6U=\Cbr@D ED` 14X^0O؅䌃R!""""""RC(+2s>)-[vDDDDDDDKONN{T(Rםn >,)eB*VWDDDDDDDK /WTr˦TgFBb&OkT>o.QnnpT>i"1UDDDD,ڏ}*E D,I]:}WTK"""""KصRî-}XRBy>gpVfLsP3!DjאU9J]oՙ;C.Y^ ;X:_H)b_osNzu\WYjMth;]ݮ-lX{ fe2*_rJCNC˙qYpCU5+5 XDDDDDDlF`{5${b'Ɏy= -""""""~X:_<`VةËK˚|,th? ^]L4HƤ J5EDDDD$@KQKԡY? Srsco(qDO F{b;Sk; PDDDDDD7KIUµiNeek;LˉuEDDDDDth? >ީӡw|n6]fwRnlH3Q("""""u(_'vmu#> LcIYle^ʽ#˹W?ayyEJ-""""""Y:L =gvyl\NvJ6 eKH{jH2Cu/,-m`O`Si7K7!'53%77Vu6 "i*A((mʑ n731i@i,=W>1FUh3EIh.]nOhRo]ešP}06; Ӏ_TZi];<"-OkH,K/jѨ@u~B|6"Ly&΢JT(""""""[Oh Gzn IDAT L΋]WpyyH$-iM6 Axft p{x$aKhptL_Hq!""""""bM0i(+cMm}܆:"5vAkH,Kc/jvnYU; p0—M.G -ŮS. Fg:kn @ilZh4vn$?MSȷ~i=HKth*8UJn=<fxc4`io,ڏ}*bN\ìyl|NzVw' D"""""K(p7dpffϛ_'d,(;<2{NKozФ ?l`PW׵]WŲ v\]QT^B -IzL23wRdywy-L!B!nGK!x&[ťks66 "] |lEiw$Jr)Q!Bq!u:X`Z}}vof^uye/({ !B!5Nx??QRM7)i>$X3=7F-DB!B- %vV}}GFRE362Eh;B!BKH= ^ AK9Q&L&L5v~:B!~MTG[)mi2f-B!B!.u:XH L\j-fRBrDB!B`R?3ťoOͽ5hupfld !B!DԛॱRxQ_wpb%jӌ+_zHj !BQ?`˂?_߿m*8FFcoF (B!PO}\V[z|5I!!B!DEHN@nī)[Z$[e-B!B!DU2\`}}o`ߋPH g-3n Mj8PSË>!B!tпmBn6;,|6k-'9K3a[89ߏBnot#:&eBTi"B!D [&.5MFZjt 9gx]<2 ;kUTRs!-26R B!BKuJ} }mѿRYnIdC25eo|j]B!B( )g ˎ܋W/)K ;?=qHsN)úEGXPs8Vo߆QXkB!B!*CjbYl}EShb/␭ϗf5xw,r6Gf}Xmd81CY\>3=Ts!9{77 V"RZB!BN/%g*VlXm[ڶpydȉ. @5/3xe;Ʃ:4LAKa?fdǷs @Gj9fmcok,.*Uj !B!|H"P-XlA?^V-&*0E@ېj9%.D3/"L٦4ǒ-דgIs?[n`ְ}=!B!N5 w߰XcoAUJ'D(:TYK5$XVޔ,uo񄎏2+(Ew wrv.Q[b5??x^ !B!W`вBjl=@y;j$:iѢ={wՋ?UVݣ>$R6G~cR:|$em(2* _{ $#{|ݡi3ͮg"DՒB!Bu:X(,b];Z^GP`:vƀ:t(C aȐ!4iDs_K؞ 94c<]"&8F6gTv1*:Թ0k:#̝6ީjT~A$B! $;/,,?=*}v:v :={b(%țn/q!|C}#'pl= Oz+&3=͜[fIdO귐%[wڲl5z3AB!B VJ~gdLyL;#(ǻ~~;X+'/{8LS54;Ʒ;fkBMLyM:k;|guȆ7c鹇vf B!B!F:Gv!2PhkTGq`xxÆ cĈ9To$,d67`зUHPU\ᴱd<<Ȑ^'qk@|M*gso=ӵ3Z?V/D͐B!Bu:XTՊ9 rAU->:0a< vʨvqsJv_1<]]:]>+tY;Fv_ @xә5cC;nGa4lؐ?~<= .dÆ dggWيv;[l7dƌWRqU{7̙ӼjqՊ5{5ˮ\E{bqu&&|-9d'V `{KZAi'0č)= mtBӇ}Ҷm ~JOOgӦMlܸ׳e|%2tر^ؓOĤS]Sٕ=4 X3 "=sf{)٥AӁ ǗAʖoy|BN*ZOd'8ήi#_B+qB!Bqʲ֓s76^NMp|WիAN:ͩSX|{4**!C0qDvMؼy37n$11\ev>3g/2'NUaSF0u%"Wトfd@ ܜ(gN,$u'Ǖ\g3( %/'j8ܯ5)z {o!=p[:B!rU$'٢&YH aׁ~yyt^ )Wvv+Wdʕ+deeRO~|p9IMM;wdҤI5_~!C9lg|##sٯ9Y36ı} }vnzg&;JvPp2yzmOѩ]dX#6+}sLjuZbH,t N3kV=ZO66B!BQMjDvS8i1}; ,+p޵|~zEAU턇Hng ݰ8uou(e} D^l6& _eذa\y啼Kt֭9ݪ:TiX\ r:8vnO4ZuTpQ 2IYd{ouXioO^;s|up :5rUX09+ծZBF~f !BQ??` Sbp'vLn+}c(DC$~ ^wB!B!.u:XuJ:vȰa>|8Çk׮Obo gjz'x]Q3ƾ}عs'˖-믿. O3$$3tP СC5^ۃm6={֯ ?t܉_~EW5zN… Yx <{f٬50RN|^3&vkt/>FMO-~ٙ|Ӵx{~v+]Xe&2j)={9DBz;tSrz]׺}C^RApChNB!B\4g4Ç'>>D؞={pGw5w˖Rڀz`ܸq;x d մ@~z^zϰaÈ'.UGӧO7eyRU{Cʼn(Ý{n <@Xm3gpy^H dX&$$8-Xls%111:WԜf e}+bw)8>VP\<Ps%uaTV(!!)/ܕXH5;-u< š{M( ԫq53jMbI>fa_U/kkt}64 B!B!:5\À*}}i;v^_j ƒ>ڵkyiذas:ئX IMM%%%Rmӵk*2:u*Ǐ_祗^"//ϫQH \ݓ벓R vXշ{4{k2W>oIW>Jy wc&Eyn-K\ӡ0}>ؾYsVF' XIԢۙ)l ~Q%N{e) 'c"j5(B!ЪW@k׮cW_}5W_}5wu7YY7q6nG}ȸqJ\<-~dt:z#>|8 hѢV qfΜ?ҥK^.z} *DnmLuo$ϒ>)z.v?Už-Ҩ1nAiW"<% JH^xۻxM8:v_-#q?[ ^|!qfrL8 4B!BQԫ`A3w.1֫W/Z-š5kJt=w,&LnW^q״X,~e dIII+g48p{+o||n"6s9ؿǖ%f-#\rOHm\n >u;F.U8Sq} lQF~p&-CP^RrN2[[:Lj !BQ?`M7(틒?z ..UV1o<DZZ-h" Hff&f" 2tP1bDTB֮]ϊ+HLD5E Xג5QlP#wPhwXm0Mi4a! ?ſzuo#q6OG^.O}ՂE841\}+t~L?ӌ)f*sFiԘfsF6 OsͶ1C?fGj wxpfkrt9S!B!:i] ޵zTs2~x:WÐl!v֮][tv^y|vdJ+e1-X6wڦռ\lt uvhcuF:Sŧ)`dEYgQ|CfJjԫKK}tſoP֑B!B- A+Y0lx QlYC@ټy3_x-mp8xRvS@~7fРA3vX[I;wl2/_es5K~d ?!L6M$$h"FCo*g IDATT)H[ -?TKE_(J:%3h,r [>ACf4 E {=՞<I'Ui׮#GdĈ$$$о} _[Yr%^z^7GE AU q־]uuMP|j*6,ֵXk})A 6M 4.W`˷Xlk}-B` XXסS%4ZC) Nmg0 2ڲ٘f̕WJ wKc0qJ&~2(<>#w߾}Zon=z{;oUر+V߳yf,1ME/Jm8J7p&W!y:LƾdKi$va ,),bҷyoVcʔ)$%%b YNɓ>W_o{͊ZtS. \@qa! \ڋxP%mHG댥gYSrۯҤA)[ 㼱a&>)W;Yw-N}64c0LZOrɛ 544w !B!DW5͟ @_\r맪:1Lr5 ߝOLL)e6Yj+V`Ŋ;~}GQ\i@q`22d$]vC{;vիWf6n܈ͦͮӉն mE BU}_C-ښ(Я_NԩS۷/зo׍?0}YMoŋs\,?2?@huޅPr7p,6}S3֯%6lҨ 9xO?h2L6^v'PtٜKBT_etg]M,@ǯPn 3@`ϻ`%d !B!j? M@*Xmr.j|;wE[C;oX[fPU3jGU(zԢ@^z1qD.2K͚5c<3n:w@pϞ=^ߗ[J<3f W\qSLE38Ν;q}츶q+L4}N=Xho9 .d۶mETsY& Cf|3mwp0/_36$ҿ@܆o1IO!Ͱ{ۯ?| Nۊ=yg* 6 Hk}s؆ھr9 G ^AQB8rqˀ !B!bCE2N;;޽{@n֬@Q1-O=III넅`v6n4AKEU EQBp:3'Ng&ZkoAFFԩSۛcYO}ŷ~ s{Ipf-čhjQH @!B!: 5}}roߡ@Q\knРVǂ Xz5-bԨQRRRXr%˗/gʕzzF棪>Wt?vҼys&O̔)S;vle;6i҄Yf1k,[laŊl۶-[qWpW2rH lx ^_~,Y 7t&j&`>1 R5iUaS߈-oH||'q9ܽC3DDb' }nJ۴cQHҾd** 4 H3} ؆˵bKt]B˱s v|#*SB!BQ`MH`ul*Vw@8(t1ƌٳ-psN-[پ};Z^6STP΢8`A2e SLqիNz!C0dy*$''E!,,ӧWx0nVnVOնn̏:d ,C*li2؏?&M_pUs?6{8v ĤS:]\<gTsf38:xj9>]hY{ a:yϳYu‘%O`xER !B!3䇊dg[kzݴ_YuU`Ú5k7@zz:~)_=7fy|@JP&))1@l6+^oӱwȷzc>vށi/20v7Jf/Vjc@X  uoӮ"ҳŵ? |Df'!DwM+B!JUZ`E;Gח:>o<5s;k=ʸq[|]l߾^zCҴiSf͚ŗ_~IFo}>%z} @LHBU;k;mۖ+W_nv6mךkʵ^Kvv; GQt0w܀gl۶M6q뭷u}bf]>}:VƱBR. 00brlfw˙Ąy(şjt%r{WϮlmar6"*Js.-t^& v^H/L\bB!Cv*Uwnԩ߿9s攑 㣏>SN,X ??իWЪU+ϓO>ƍq8S1SS8g4y7u >W^y{r1z-ƏOPPu͛hK4 |gx׋GO*55oJ]kqBC5+tG_>댌$jJ2ݦp5U.6,I i &w FC%~;5\Ç~ȹsX~==Xݑk'x(}۵kW{jott4sȑ#,ZmTQO0^`ƍնҬF..L|y8:O3֯mĄsePxӏ~hBѯjuRã"1x}vvUit'o=φķ0N]vp5 ,:}?*g[AO͆Q3f!NZOiڱ ڐձ AX{I3(Ps1<ɉs+tϨlv,k{B!B^u T_(l˗ӼY5e L9z =v6hhw$8uQNlwT߿?<;v %%?Sy;wnQ6?xڸ͖-[xꩧӧ_ϯ.s=<Ӛ?PPU'7pC4ɓ'_(O?U} Yk՞>6CH(U+pޡ9Wi㍷Aly?4)C(2%:~E_[5c}j YfdRSSXls̡y~JKKc„ {?2qD>DJnTUU̙CAAAw 3e!#P2AK%"-s k;d#$DEj~xl oYFv~<\6$j$*q0^7Kp`tjZjĵ [A*$UXT (B!ЪW@3J :N]5_RWU=g@>m4KBBBиq#f̘QTp8л*ݻw'`ӦMN> DK.1}zF+7n8Z^Aɓ'7o̽ѨQ#^bߧo߾@1ro6'N`׮ 4L 55YfqW P2m4[M-::{ UUA_~j3O`Q\~R19ŝ>/ؽjN* kgkզ+p5m&7ۀŠv[r~X􃴅 Ռ?Y}@VV]ۦՓ(B!HNMɴkqxZ*Q$$$f^Zn:2믿NǎOQc#kcxZ+vӪU+l6 /\Y7<* >,}bfl[鵩NH [53a3'kV4nvҌm=1cU3u9y<fP mЎ̐ !5B!tU{}g 4 /Zm^QtƢvfcѢEtܙ~ٌw?aɒ%+6;288g}Բ\`W~NɮSjƆGo >tW0Ҏ5JĸGDGjzZI,( dy=ic'='gގҸ:-8~YU6>=}2BJO)B!hE5 4cveBn+rq6PU'?0ݻwh޺,''7|Ν;swpq@WM=?~<{fڴiotM7ѵkWfꫯV=s̚. ~?k:Uqstmczm ?圀3kca!nLkj2)TCL /p&sI!o %,\sKtQG)mޒ~E.&B!Bz Ty68ѧOW5e|iӦ O=T몋lO˖-yݗ]QqVdÆ Y`?#-Z8 ^G`0駟Vi-Yؼ>'#V40v(ZseRb:#t FlF ņVc>9GiMwh_/UŶSI'*u_}t.]i#Ki+:!B!C֖YYY>}OP!,,Wٽzl.((8 w!44K֑#Gxܹ3 l;z}z9spfϞ]Ix4o),,dU6Qx>-wj7@h(.既V\!QQ@ `tyF3v"}-GR~Ԍڴ8c6cnBԬJ;elɂT8c狺 B!BTNڵ1O=zTs=8j)4x:6m' :x ctԉg}+c;#utx޽[ AAAw}G}Te5 cb8v)9{4cG}nk5jКmڱݡ񀀦`d6xa!Ǫ @V78RE}oA׻15c9ۇbԽ[vڱ%__!B!K  @Yj޽{Ē C{H"#7o^HIIaҥy睴iӆ]2w\6oLqϥki28x ~)]vuW;30zD֭[)V{5ce59 sV9-}lZt;v,(_дn-:|L^g\5cٻu]^kz,O?4R\ >I¥eXIj !B!*_T*/WU=V4/qfرc8pCqaCSP;;:Sˮ<6cn?́I wq0S:7oޜYfqmHQ0yd,Y[d #G h5YC ]}d?)8>VT{2}5!a#As)z8e_篫5IgKye~4J <{ĺb:'(gĖ3{y0F"UpsϘ=| nB!BQn[.Nݻw @ח͛7si:9pǎ+#3PWjQFqWz^^^{aǎر۷w^,%7)ʌ~(`LM7ĤIrӧdt:N˗+9tWF_na2f4sma─p[(Ў Uϝ(VܒB F>&;Z?ï0F 7DaTݠ+4)[O>d)gۉSPh-ZTkȱc4MLL$99 z~ dbر\wu\yDDDTs.ru!Oy IDAT{7z?k'v-W8Wi-wb[6جqQ"׽uFx}^}OBaYBj !B!U::zn(t) ٽz_z>3g`0N#&&F3WHH.y}}f(;ȩ1^Rll,5#GһwZX4@WSY9B fݎ;5jCl! ?;%2 wAOw ?pd x7-g hrMo n1sb%5j1m V=ݧW%׵jY>BCU-cD$62 >srX]{-B!U0P,M (KrejŃgZq 8SldffoINh B~_1@0`dt]~߿?` SW6,qκ/jޯOSbTjqKW[I&/hPdH*ʂtmN灸N}J=ѩ$%0u=zc[ٰ}8{QZ1yj$]qOG!B!D5(X%i֭e^j3=Y{%XSŜEWq5hCBBwu;ܹ rwңG O>zwGkcC6fiZ素Gfw3oCix߃9I;x Rk|_/*יϿ5?r Y~8`AEy!-/ȶڱ{~O%B! @yo7o;%2+'- S\Y{j)ocFg*}O%88:СCڷo{i֬^`FΝظ>OOC~q6opd]BC**߭]ԝ3: L9~3upIݮH  ]6Q,o>>#;3kաMzΤOcKV3)=j88y2?ռ\l PVΕ]ze`& !&bQaN +|D!BN+c 83gMVڒBDDxjfEGǰtƎOOO4$++Kn$;;ANN{c_eu:QQnAAAz긅c4z:DGG( JLL DEEKƍiҤ 5iӦŹOTYwSm-x6:pz0&A@cp;!:]jTQSFi;s_ TO: PU'v;GmBK^(݀jܵ=dc[+Ƕlc8t"j1] pdEڶi~m^B!w@p>n5[oou m+{8:cFn)[Q- """"MXYQo m۶U|PJ7NOg)/j-eL0ǎ{hGT_ ˗=:ތOKZ =*ve+d tz8 @\i^O_'up~!nUpFy =;r7APC2a dݣg?L@׉ xm,x%2}P߂Z}qh7.7of̘G>d|oeuʕ+;v,;,[+`߾}ԩト];)Y4}a]X.hqrgR/< FU>`.JGYOm*ڵC {YovXv¡b_TU*~E60n@˭8z:V\uY0_iY FAѻ:p QТ @vލÇw<^y<Xۖs5xQ7)))Wl]͛7hi2Q]׽8"3 J;r n}緗<1cu~d3pB>@߹NϫP<98ZN%W rNB<tg?)Yy3?&զ-9r$"""Tl`Æ ӧ/QV`b%]}$AP߿/1mtdeU}C_ϧAT#n;[RN;s ʼnuoaT<1MHҵ~+ ^vf=Bj?ў `A!͵3/3T-Խe9?:GX}wFC`kSGPK _ǝ!v tpp@>}dQ .ם4 1k,AM%%%Xb/ @e0)_~pu}taXtR0ʪ<޽[w:tկ|*DɱV2][\:%Ya9vڇlGۥSFol=\"e_(`L&Nl^feq4$HxnT<m ~u<~֘EP=\._qgUn@C@s +FnDF 0W{OM={<"""""jZtx/[l߾j-8}t 2JegX̙bܹXz=g='Oȑ#zwS5-+1H`7Rm9}߮@e @ON_B$ /t=[s6tf1MM8t¯GRÊ$|fK53zI ?傶py}Z a@^BX,bNCRR_Qs* ̙V9ܠEݾ}׿.N1[*kk ITǎl̴}n>2*i'uE|3$IGoW([3+-U[~}M^bG7oG ~];vrGM7[a0₨(hZ$""""֡M ::$[ѣ Ie~h`Aq!?^k7([7vK1ɰOn.u[nH^W:[OuQ&3 us )F ͵2S!k5Px~bH{diW0md]>\bT+I˗/#::Z/hS}G J-v,[ $'ȲÇDZc+`0`˖-6#ˑeõMz_fPCgfw܌@瀹gNK:6>o!n#О}}__NB#%yvwPR*5\J2֤, wrh)dHHf@@9xy"""""jdxߴipӳj@2TʡR >&,!33򽥚]v} @-7(\X)a~cǎX0a<]}j=c1p S+j8vul1_߄ >cs&6Yk!B"(ʾƿm[ 鄍2O#3w2d&$[nlj'8զ @P*GZZ"""j'ZuL&~ib &O)rrr`x0_ZX~w?W%;ߢkON2z 7c^0-ݻmgO^)MwhBCH?cuGg܍/j$I8#!yT%,| đ67Œ|wsǠxHH,YTU|`Xcǎh41yd!2{LfƢ_h45J% CЗ*aJRdp Pz#^oXqoK[̶#P\1͵m_ 윰`.z\7[ ǗqҚg)zz FT}i=)֍EaDЕ g~pp0Ο? $""""g8poߎ>}J)yyyĸqp1AD5g2l#Уt]>KL܋ d u_Uvpjڬ EYbJKM6&{;4' ) ƼG] W |1fMo؃9ÑE.9p@|HJJB߾}dQ̙ . &&R~GL0aaa}Ra0K/Aח/nW:lu1_0wO9Qv/m<CXk0\5lr pVBRqlNoj7t 8:B9YQnvL4dFVޓ0.TC tjxꩧdQ1**t%Yea0 ²*{qÒ$AeTy,aaY+ɒ5=q赋n6)3>nκ߀;n-ݼ\BS;Ng"ԋ(.˪Ys?@*]{nrK NP˾RUB%""""` t 8vƎ+(: K.m6AD|裏P(e3g~;f7&oz?lRI?n;LXH;(u ]t|<&]"Z7WBWG*io~O6l/,=ݛPvbVm>,(' #ū!b~$tz""""""0vX=zܹTk!se̛7fŋeS[w,XlN֭*j=tC d;oq?Bzl$c1p pvp ߇Mms ?()8 _ E{+]ǚ`i⩀;!by Z,װn7 @Ib(-\a?@nX,B2ݻwcB2k=B@XXҰdWSIعs'୷Baaa=™3g__I`gg?:urO Z|r 9c*i_g>qSgw>0n=d^ +bK\ÔZ\7 dύJ@oΩG JspWQa͝|a̛霐LWWWDEEA>tI>,CVŬYZKшk QM:u ӦMG^ރ\n:L2J?^tg{ۥ bb{p9w߯|k \ӿCv,6emQ}«e}~V׷2PQPT( ˗ V&5)If$Ğ={cǎ1f>|gAs[x1֮][~2PY6'wD/$u@Ƣ D|nxOne`q5UX},wճ;&o3V)g1G(IzF/ 6c_5}t|!^2J_!h1Rǎ F#pOY"""""c(hg}{O]I/c͚5A?$&&b@7 /? N+ pmE'!{IDDDDDT[,ШQp!|WXx1_LMM̙31w\|0`lji݋0ܾ}^g[j|G6~B\n+j/@L|C5fe5q`88є?WXXkݡCܹz:Ԁ @i?ԉ{Rw{ѣNү7¼:#nvuYͧXrSN TT@zz:j5Zׅ.ndGbb"ۇHh?p*k t-ذa֯_#G ȦΝ;Xt)ʋʇ}/~cT{pr=hBɈ\ &r&3ʟgzpbEQ`tiPTEbFA $"""""`6mN>͛7wAv7GA@@ϟkעK.² lذtBHKߊ"""t6J*%#iEhJ닏>fIDDDDD )J#-- BY,2l_h4*b`֭FdddJ F2r !+/HX;w,;B@⩧j!VXXH 6 vM b/?BCCO_'IqYL:)Jlr1 q'%mB2 BCC%K^H.QCfdZW ˾pfΜٳg?ld2᫯ʕ+q{W%J/iHv[z2okԗRu+X"M6R.c,fdgُTK+Y═e13%I?>zIDDDDDxH 1f=zҥT̞=>,._,4d Þ={p϶{K.O>i%+-KA((Z*=z4>DDDDDDԪlA Otpp.oAVQPP 0mu4 &N!C`͚5{7qY6W}˗/#..%Q%dNFv\ׄdv qqq8qd5',[: &&ΝٳZW&_>_ jIEvm|駘6mzH9r| n޼5kְ#bAN9(3ioo"<<\:DDDDDD`>>>رcv-VkAp=?ի1a-!RgY'Ik aDЕ HIIFzQslPٳ{U8QΞ=SÚ5kЯ_?a-hÇsNܹ}p , зoF3QK7AN 邰́bݺuxꩧe5wJT*j#""vvvTcaРXlt:ʕ+شiyxzzbڴiXvQ!!!ؽ{7\hD`2_Dv~0Vuj, VaaaP/̵2bbbi&,_-\468pكݻwիIg.}p]Rb7oy.3$E·8ueLRW_}VL"""""`+5j(:t_}/^ׯ H]وĖ-[' nZiii8z(>G"553{\y?9::`ƌx0g7ЉZ [#,^CӧOGll,eD,[1Iٳgc͚5Xz5ztO&Mr;b(3<]$#$$DX&QK&ɲfZ7nw-[Z2GG',^,YYd[VVVSNԩS8y$RSSa6?l}+H64O?4fΜI&F8)SRCģlQߊ՜ /^e˖I""""" ڿ?j5ZE@ҥ }]CR ̯޵kנjjq9;w.]dʳ$ 5\\\0~xgm1,x t%!bf$K_~k֬L"""""ք`e2yf,_YYY UV!$$$UC ɲ_~iiipRRRVQ}ge;4ix 7 W((#L_3ƍIDDDDDڰlrssb |g)WWJ^SNUBii).]K.}HMMEqq#^SF͖V-]]]1n8L0'Nɓaoo_1D,1?!_FQĩVݻwᅬPd """""jX 55صkL *0f̘˗cҤIgff۸r .]˗/sG*&?vvv8p @ 2  @jHfK QT 5}<'''DDD`jQ""""" HJJZիWeJd ^ ={Զ0xr>}`̘1ߨQX #J>Ca{ȅrO>2eS9s ((ׯǪUi-$HRSS]C}*=srrA0tPcС6l:uTπj, 7`2 1b4 b""""""9Gu-[[<{{{? CkapHM/z2=<>]t}YL<s/"4`زr W##g [pI˗/#::(5)I0#** Bw?^ܱlJ˒p'g Bdȑ#HHHL""""""jz,pqqAtt4Z-fzX>>>HHHM$x?&ݻwG||\H&N,MΝ;}vOhٳgO`Μ9vlQ`2[n#p!ELgggDEEŋ $IBrbH`Μ9HIIAll,܄fر~~tR ͮ/ C Еh퇢ҍ,ƅ  '''!DDDDDD$jB!ۣ W/M\њ%N@"!:!#GġC޽{ $""""" Q%ݺuC\\? JmoƂ 0n88qBPvݱhJAVLdυ|EH'bccqIL8QH&=,! GA||>Xz5 lj &nDF/t%FAvdQ\\\ VمXt) oVhvu8n􆽸3y ad ŷ~ݻwcB2DD޽j-.^`<䓸pႠj^`̗S0YyA0d#&&ΝìYdUpƢ}BكaÆAVPhve,kFPP;9CP&$SP 44iiiXd UT*j5!Ɍףo~h40/W XPO@F, I:u*Μ9tIH&ѣ$'h4:u &Mj-rss@?~\PvWaHI͝܂0[ĜܳgOc߾}:tL""""""$dĈ8x oߎ޽{ >y$1o<\~]XnC,l̖[-q0Ŕ...ŋ&$X 6g\pQQQprr+2m__?DGGC;\C~ ¬I . ::(Q-$jΈŋ IezX>HHHW @Ҳ$"!GÇ^z $"""""" D GHHH}0l0A֙i7o` IDAT 0m4;wNIm4O#3w2d&$[nÉ'($>X5)SӈGΝf߿#G… U{-бKuj2!!@jj*áP+5|JH %K^XlƍFT @FJ4]zb>`@MH&(,;bbbp9<ӂR˂ !C`׮]Ֆ @a G.@H#)) IDDDDDD$ @&۷oXYT̜9s+WP'a4]ٱcGB⩧IDDDDDDPX59s %%h׮;vj:㭹((Z;9CPZLRpAVNH.QCbH CV#%%EF ׯ@,)Ja|XPO@F/ WC BRO3g ..B2 @f[nÉ'0~xAx饗0vX?~ԲV*3;9#[KLooo$&&bϞ=IDDDDDDԘX5CGƑ#G///A"ԩS DXXЭXl܂0dMLWWWDEEA"$$DH&QS)..ڵk2Ann?^^ZAdk+Y Y T(?>֮].]$""""""jJH̹ ::gY{NkWe6&%mʿ1cȑ#HHH`GDDDDDDg0{Edd$ZgExDXf#44ԲCUD-qiCP&/0)ْ…;VX%K 55aaa,U @,77+V~ ,$SGsYIrYl@Q\(,7881#,9bH 9sjig]W1MuJpiYu09r$bcc1i$aDDDDDDD @V$)) v횰L)pwJ9LXfuTހLXh5w,Z|Xr%*8>̪,\3&!* o? ۷IDDDDDDԒ$jn޼~osnK$^H EQPKԠ h4 4HX&QK;pj5~gaJ/]zg Ѥ02+|G={L"""""" Q`XuVDff\'`tp@iׯ̗QP6JۄCXt)x ۋHDDDDDDr$jCu`0ɔ${:vBjWek+, Bsۣ%bH7w}',N \( -(-rGN26N)&""""""jIXaIIIx7.,^5na_~`<<1aӳgO\aaa2Z#Dm\YY֭[UVHP.N {J>DKb~Ըbٲex7($5cHD DGGcӦMX,M=*$I?>zj,ǡVqɦJcB`رM=""""""a;Q6n8?~ٳgk׮ѣGYg#cڵXz5z}=^uFvuZ#DT˗/ƶmhЯ_-"""""" ޽{d~~~XnfΜ)<-DTcӧOӧn:tAH>S$''#""""""jHDu+VO?lJ*V\N:5`HDtYj|88۷O>}{#""""""j$HD”bXr%ʯ`XlpDDDDDDDm @"֭[Xl>s̟?k֬WSMbHD &++|51DDDDDDDDD!""""""""jXb,Z1DDD۱A #`L1c @0&`L1c @0&`L1c @0&`L1c @0&`L1c @0&`LzIENDB`cmstatr/man/reexports.Rd0000644000176200001440000000121514015477557015042 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/generics.R \docType{import} \name{reexports} \alias{reexports} \alias{augment} \alias{tidy} \alias{glance} \title{Objects exported from other packages} \seealso{ \code{\link[generics:augment]{generics::augment()}} \code{\link[generics:tidy]{generics::tidy()}} \code{\link[generics:glance]{generics::glance()}} } \keyword{internal} \description{ These objects are imported from other packages. Follow the links below to see their documentation. \describe{ \item{generics}{\code{\link[generics]{augment}}, \code{\link[generics]{glance}}, \code{\link[generics]{tidy}}} }} cmstatr/man/stat_esf.Rd0000644000176200001440000000241714015477557014624 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/plotting.R \name{stat_esf} \alias{stat_esf} \title{Empirical Survival Function} \usage{ stat_esf( mapping = NULL, data = NULL, geom = "point", position = "identity", show.legend = NA, inherit.aes = TRUE, n = NULL, pad = FALSE, ... ) } \arguments{ \item{mapping}{Set of aesthetic mappings created by \code{aes()}.} \item{data}{The data to be displayed in this layer. This has the same usage as a \code{ggplot2} \code{stat} function.} \item{geom}{The geometric object to use to display the data.} \item{position}{Position argument} \item{show.legend}{Should this layer be included in the legends?} \item{inherit.aes}{If \code{FALSE}, overrides the default aesthetic, rather than combining with them.} \item{n}{If \code{NULL}, do not interpolated. Otherwise, the number of points to interpolate.} \item{pad}{If \code{TRUE}, pad the ESF with additional points \verb{(-Inf, 0)} and \verb{(0, Inf)}.} \item{...}{Other arguments to pass on to \code{layer}.} } \description{ The empirical survival function (ESF) provides a visualization of a distribution. This is closely related to the empirical cumulative distribution function (ECDF). The empirical survival function is simply ESF = 1 - ECDF. } cmstatr/man/calc_cv_star.Rd0000644000176200001440000000177114015477557015441 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/norm.R \name{calc_cv_star} \alias{calc_cv_star} \title{Calculate the modified CV from the CV} \usage{ calc_cv_star(cv) } \arguments{ \item{cv}{The CV to modify} } \value{ The value of the modified CV } \description{ This function calculates the modified coefficient of variation (CV) based on a (unmodified) CV. The modified CV is calculated based on the rules in CMH-17-1G. Those rules are: \itemize{ \item For CV < 4\\\%, CV* = 6\\\% \item For 4\\\% <= CV < 8\\\%, CV* = CV / 2 + 4\\\% \item For CV > 8\\\%, CV* = CV } } \examples{ # The modified CV for values of CV smaller than 4\% is 6\% calc_cv_star(0.01) ## [1] 0.06 # The modified CV for values of CV larger than 8\% is unchanged calc_cv_star(0.09) ## [1] 0.09 } \references{ "Composite Materials Handbook, Volume 1. Polymer Matrix Composites Guideline for Characterization of Structural Materials," SAE International, CMH-17-1G, Mar. 2012. } \seealso{ \code{\link[=cv]{cv()}} } cmstatr/man/normalize_ply_thickness.Rd0000644000176200001440000000472214016050422017727 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/norm.R \name{normalize_ply_thickness} \alias{normalize_ply_thickness} \title{Normalizes strength values to ply thickness} \usage{ normalize_ply_thickness(strength, measured_thk, nom_thk) } \arguments{ \item{strength}{the strength to be normalized. Either a vector or a numeric} \item{measured_thk}{the measured thickness of the samples. Must be the same length as strength} \item{nom_thk}{the nominal thickness. Must be a single numeric value.} } \value{ The normalized strength values } \description{ This function takes a vector of strength values and a vector of measured thicknesses, and a nominal thickness and returns the normalized strength. } \details{ It is often necessary to normalize strength values so that variation in specimen thickness does not unnecessarily increase variation in strength. See CMH-17-1G, or other references, for information about the cases where normalization is appropriate. Either cured ply thickness or laminate thickness may be used for \code{measured_thk} and \code{nom_thk}, as long as the same decision made for both values. The formula applied is: \deqn{normalized\,value = test\,value \frac{t_{measured}}{t_{nominal}}}{ normalized value = test value * t_measured / t_nominal} If you need to normalize based on fiber volume fraction (or another method), you will first need to calculate the nominal cured ply thickness (or laminate thickness). Those calculations are outside the scope of this documentation. } \examples{ library(dplyr) carbon.fabric.2 \%>\% select(thickness, strength) \%>\% mutate(normalized_strength = normalize_ply_thickness(strength, thickness, 0.105)) \%>\% head(10) ## thickness strength normalized_strength ## 1 0.112 142.817 152.3381 ## 2 0.113 135.901 146.2554 ## 3 0.113 132.511 142.6071 ## 4 0.112 135.586 144.6251 ## 5 0.113 125.145 134.6799 ## 6 0.113 135.203 145.5042 ## 7 0.113 128.547 138.3411 ## 8 0.113 127.709 137.4392 ## 9 0.113 127.074 136.7558 ## 10 0.114 126.879 137.7543 } \references{ “Composite Materials Handbook, Volume 1. Polymer Matrix Composites Guideline for Characterization of Structural Materials,” SAE International, CMH-17-1G, Mar. 2012. } cmstatr/man/glance.equiv_change_mean.Rd0000644000176200001440000000675014015477557017706 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/equiv.R \name{glance.equiv_change_mean} \alias{glance.equiv_change_mean} \title{Glance at a \code{equiv_change_mean} object} \usage{ \method{glance}{equiv_change_mean}(x, ...) } \arguments{ \item{x}{a \code{equiv_change_mean} object returned from \code{\link[=equiv_change_mean]{equiv_change_mean()}}} \item{...}{Additional arguments. Not used. Included only to match generic signature.} } \value{ A one-row \code{\link[tibble:tibble]{tibble::tibble()}} with the following columns: \itemize{ \item \code{alpha} the value of alpha passed to this function \item \code{n_sample} the number of observations in the sample for which equivalency is being checked. This is either the value \code{n_sample} passed to this function or the length of the vector \code{data_sample}. \item \code{mean_sample} the mean of the observations in the sample for which equivalency is being checked. This is either the value \code{mean_sample} passed to this function or the mean of the vector \code{data-sample}. \item \code{sd_sample} the standard deviation of the observations in the sample for which equivalency is being checked. This is either the value \code{mean_sample} passed to this function or the standard deviation of the vector \code{data-sample}. \item \code{n_qual} the number of observations in the qualification data to which the sample is being compared for equivalency. This is either the value \code{n_qual} passed to this function or the length of the vector \code{data_qual}. \item \code{mean_qual} the mean of the qualification data to which the sample is being compared for equivalency. This is either the value \code{mean_qual} passed to this function or the mean of the vector \code{data_qual}. \item \code{sd_qual} the standard deviation of the qualification data to which the sample is being compared for equivalency. This is either the value \code{mean_qual} passed to this function or the standard deviation of the vector \code{data_qual}. \item \code{modcv} logical value indicating whether the equivalency calculations were performed using the modified CV approach \item \code{sp} the value of the pooled standard deviation. If \code{modecv = TRUE}, this pooled standard deviation includes the modification to the qualification CV. \item \code{t0} the test statistic \item \code{t_req} the t-value for \eqn{\alpha / 2} and \eqn{df = n1 + n2 -2} \item \code{threshold_min} the minimum value of the sample mean that would result in a pass \item \code{threshold_max} the maximum value of the sample mean that would result in a pass \item \code{result} a character vector of either "PASS" or "FAIL" indicating the result of the test for change in mean } } \description{ Glance accepts an object of type \code{equiv_change_mean} and returns a \code{\link[tibble:tibble]{tibble::tibble()}} with one row of summaries. Glance does not do any calculations: it just gathers the results in a tibble. } \examples{ x0 <- rnorm(30, 100, 4) x1 <- rnorm(5, 91, 7) eq <- equiv_change_mean(data_qual = x0, data_sample = x1, alpha = 0.01) glance(eq) ## # A tibble: 1 x 14 ## alpha n_sample mean_sample sd_sample n_qual mean_qual sd_qual modcv ## ## 1 0.01 5 85.8 9.93 30 100. 3.90 FALSE ## # ... with 6 more variables: sp , t0 , t_req , ## # threshold_min , threshold_max , result } \seealso{ \code{\link[=equiv_change_mean]{equiv_change_mean()}} } cmstatr/man/equiv_change_mean.Rd0000644000176200001440000001464114015477557016454 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/equiv.R \name{equiv_change_mean} \alias{equiv_change_mean} \title{Equivalency based on change in mean value} \usage{ equiv_change_mean( df_qual = NULL, data_qual = NULL, n_qual = NULL, mean_qual = NULL, sd_qual = NULL, data_sample = NULL, n_sample = NULL, mean_sample = NULL, sd_sample = NULL, alpha, modcv = FALSE ) } \arguments{ \item{df_qual}{(optional) a data.frame containing the qualification data. Defaults to NULL.} \item{data_qual}{(optional) a vector of observations from the "qualification" data to which equivalency is being tested. Or the column of \code{df_qual} that contains this data. Defaults to NULL} \item{n_qual}{the number of observations in the qualification data to which the sample is being compared for equivalency} \item{mean_qual}{the mean from the qualification data to which the sample is being compared for equivalency} \item{sd_qual}{the standard deviation from the qualification data to which the sample is being compared for equivalency} \item{data_sample}{a vector of observations from the sample being compared for equivalency} \item{n_sample}{the number of observations in the sample being compared for equivalency} \item{mean_sample}{the mean of the sample being compared for equivalency} \item{sd_sample}{the standard deviation of the sample being compared for equivalency} \item{alpha}{the acceptable probability of a Type I error} \item{modcv}{a logical value indicating whether the modified CV approach should be used. Defaults to \code{FALSE}} } \value{ \itemize{ \item \code{call} the expression used to call this function \item \code{alpha} the value of alpha passed to this function \item \code{n_sample} the number of observations in the sample for which equivalency is being checked. This is either the value \code{n_sample} passed to this function or the length of the vector \code{data_sample}. \item \code{mean_sample} the mean of the observations in the sample for which equivalency is being checked. This is either the value \code{mean_sample} passed to this function or the mean of the vector \code{data-sample}. \item \code{sd_sample} the standard deviation of the observations in the sample for which equivalency is being checked. This is either the value \code{mean_sample} passed to this function or the standard deviation of the vector \code{data-sample}. \item \code{n_qual} the number of observations in the qualification data to which the sample is being compared for equivalency. This is either the value \code{n_qual} passed to this function or the length of the vector \code{data_qual}. \item \code{mean_qual} the mean of the qualification data to which the sample is being compared for equivalency. This is either the value \code{mean_qual} passed to this function or the mean of the vector \code{data_qual}. \item \code{sd_qual} the standard deviation of the qualification data to which the sample is being compared for equivalency. This is either the value \code{mean_qual} passed to this function or the standard deviation of the vector \code{data_qual}. \item \code{modcv} logical value indicating whether the equivalency calculations were performed using the modified CV approach \item \code{sp} the value of the pooled standard deviation. If \code{modecv = TRUE}, this pooled standard deviation includes the modification to the qualification CV. \item \code{t0} the test statistic \item \code{t_req} the t-value for \eqn{\alpha / 2} and \eqn{df = n1 + n2 -2} \item \code{threshold} a vector with two elements corresponding to the minimum and maximum values of the sample mean that would result in a pass \item \code{result} a character vector of either "PASS" or "FAIL" indicating the result of the test for change in mean } } \description{ Checks for change in the mean value between a qualification data set and a sample. This is normally used to check for properties such as modulus. This function is a wrapper for a two-sample t--test. } \details{ There are several optional arguments to this function. Either (but not both) \code{data_sample} or all of \code{n_sample}, \code{mean_sample} and \code{sd_sample} must be supplied. And, either (but not both) \code{data_qual} (and also \code{df_qual} if \code{data_qual} is a column name and not a vector) or all of \code{n_qual}, \code{mean_qual} and \code{sd_qual} must be supplied. If these requirements are violated, warning(s) or error(s) will be issued. This function uses a two-sample t-test to determine if there is a difference in the mean value of the qualification data and the sample. A pooled standard deviation is used in the t-test. The procedure is per CMH-17-1G. If \code{modcv} is TRUE, the standard deviation used to calculate the thresholds will be replaced with a standard deviation calculated using the Modified Coefficient of Variation (CV) approach. The Modified CV approach is a way of adding extra variance to the qualification data in the case that the qualification data has less variance than expected, which sometimes occurs when qualification testing is performed in a short period of time. Using the Modified CV approach, the standard deviation is calculated by multiplying \code{CV_star * mean_qual} where \code{mean_qual} is either the value supplied or the value calculated by \code{mean(data_qual)} and \eqn{CV*} is determined using \code{\link[=calc_cv_star]{calc_cv_star()}}. Note that the modified CV option should only be used if that data passes the Anderson--Darling test. } \examples{ equiv_change_mean(alpha = 0.05, n_sample = 9, mean_sample = 9.02, sd_sample = 0.15785, n_qual = 28, mean_qual = 9.24, sd_qual = 0.162, modcv = TRUE) ## Call: ## equiv_change_mean(n_qual = 28, mean_qual = 9.24, sd_qual = 0.162, ## n_sample = 9, mean_sample = 9.02, sd_sample = 0.15785, ## alpha = 0.05,modcv = TRUE) ## ## For alpha = 0.05 ## Modified CV used ## Qualification Sample ## Number 28 9 ## Mean 9.24 9.02 ## SD 0.162 0.15785 ## Result PASS ## Passing Range 8.856695 to 9.623305 } \references{ “Composite Materials Handbook, Volume 1. Polymer Matrix Composites Guideline for Characterization of Structural Materials,” SAE International, CMH-17-1G, Mar. 2012. } \seealso{ \code{\link[=calc_cv_star]{calc_cv_star()}} \code{\link[stats:t.test]{stats::t.test()}} } cmstatr/man/glance.equiv_mean_extremum.Rd0000644000176200001440000000520314015477557020317 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/equiv.R \name{glance.equiv_mean_extremum} \alias{glance.equiv_mean_extremum} \title{Glance at an \code{equiv_mean_extremum} object} \usage{ \method{glance}{equiv_mean_extremum}(x, ...) } \arguments{ \item{x}{an equiv_mean_extremum object returned from \code{\link[=equiv_mean_extremum]{equiv_mean_extremum()}}} \item{...}{Additional arguments. Not used. Included only to match generic signature.} } \value{ A one-row \code{\link[tibble:tibble]{tibble::tibble()}} with the following columns: \itemize{ \item \code{alpha} the value of alpha passed to this function \item \code{n_sample} the number of observations in the sample for which equivalency is being checked. This is either the value \code{n_sample} passed to this function or the length of the vector \code{data_sample}. \item \code{modcv} logical value indicating whether the acceptance thresholds are calculated using the modified CV approach \item \code{threshold_min_indiv} The calculated threshold value for minimum individual \item \code{threshold_mean} The calculated threshold value for mean \item \code{result_min_indiv} a character vector of either "PASS" or "FAIL" indicating whether the data from \code{data_sample} passes the test for minimum individual. If \code{data_sample} was not supplied, this value will be \code{NULL} \item \code{result_mean} a character vector of either "PASS" or "FAIL" indicating whether the data from \code{data_sample} passes the test for mean. If \code{data_sample} was not supplied, this value will be \code{NULL} \item \code{min_sample} The minimum value from the vector \code{data_sample}. if \code{data_sample} was not supplied, this will have a value of \code{NULL} \item \code{mean_sample} The mean value from the vector \code{data_sample}. If \code{data_sample} was not supplied, this will have a value of \code{NULL} } } \description{ Glance accepts an object of type \code{equiv_mean_extremum} and returns a \code{\link[tibble:tibble]{tibble::tibble()}} with one row of summaries. Glance does not do any calculations: it just gathers the results in a tibble. } \examples{ x0 <- rnorm(30, 100, 4) x1 <- rnorm(5, 91, 7) eq <- equiv_mean_extremum(data_qual = x0, data_sample = x1, alpha = 0.01) glance(eq) ## # A tibble: 1 x 9 ## alpha n_sample modcv threshold_min_indiv threshold_mean ## ## 1 0.01 5 FALSE 86.2 94.9 ## # ... with 4 more variables: result_min_indiv , result_mean , ## # min_sample , mean_sample } \seealso{ \code{\link[=equiv_mean_extremum]{equiv_mean_extremum()}} } cmstatr/man/glance.anderson_darling.Rd0000644000176200001440000000311114015477557017545 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/adtest.R \name{glance.anderson_darling} \alias{glance.anderson_darling} \title{Glance at an \code{anderson_darling} object} \usage{ \method{glance}{anderson_darling}(x, ...) } \arguments{ \item{x}{an \code{anderson_darling} object} \item{...}{Additional arguments. Not used. Included only to match generic signature.} } \value{ A one-row \code{\link[tibble:tibble]{tibble::tibble()}} with the following columns: \itemize{ \item \code{dist} the distribution used \item \code{n} the number of observations in the sample \item \code{A} the Anderson--Darling test statistic \item \code{osl} the observed significance level (p-value), assuming the parameters of the distribution are estimated from the data \item \code{alpha} the required significance level for the test. This value is given by the user. \item \code{reject_distribution} a logical value indicating whether the hypothesis that the data is drawn from the specified distribution should be rejected } } \description{ Glance accepts an object of type \code{anderson_darling} and returns a \code{\link[tibble:tibble]{tibble::tibble()}} with one row of summaries. Glance does not do any calculations: it just gathers the results in a tibble. } \examples{ x <- rnorm(100, 100, 4) ad <- anderson_darling_weibull(x = x) glance(ad) ## # A tibble: 1 x 6 ## dist n A osl alpha reject_distribution ## ## 1 Weibull 100 2.62 0.00000207 0.05 TRUE } \seealso{ \code{\link[=anderson_darling]{anderson_darling()}} } cmstatr/man/cv.Rd0000644000176200001440000000171514016050422013377 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/cv.R \name{cv} \alias{cv} \title{Calculate the coefficient of variation} \usage{ cv(x, na.rm = FALSE) } \arguments{ \item{x}{a vector} \item{na.rm}{logical. Should missing values be removed?} } \value{ The calculated CV } \description{ The coefficient of variation (CV) is the ratio of the standard deviation to the mean of a sample. This function takes a vector of data and calculates the CV. } \examples{ set.seed(15) # make this example reproducible x <- rnorm(100, mean = 100, sd = 5) cv(x) ## [1] 0.04944505 # the cv function can also be used within a call to dplyr::summarise library(dplyr) carbon.fabric \%>\% filter(test == "WT") \%>\% group_by(condition) \%>\% summarise(mean = mean(strength), cv = cv(strength)) ## # A tibble: 3 x 3 ## condition mean cv ## ## 1 CTD 137. 0.0417 ## 2 ETW 135. 0.0310 ## 3 RTD 142. 0.0451 } cmstatr/man/anderson_darling.Rd0000644000176200001440000001077014015477557016326 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/adtest.R \name{anderson_darling} \alias{anderson_darling} \alias{anderson_darling_normal} \alias{anderson_darling_lognormal} \alias{anderson_darling_weibull} \title{Anderson--Darling test for goodness of fit} \usage{ anderson_darling_normal(data = NULL, x, alpha = 0.05) anderson_darling_lognormal(data = NULL, x, alpha = 0.05) anderson_darling_weibull(data = NULL, x, alpha = 0.05) } \arguments{ \item{data}{a data.frame-like object (optional)} \item{x}{a numeric vector or a variable in the data.frame} \item{alpha}{the required significance level of the test. Defaults to 0.05.} } \value{ an object of class \code{anderson_darling}. This object has the following fields. \itemize{ \item \code{call} the expression used to call this function \item \code{dist} the distribution used \item \code{data} a copy of the data analyzed \item \code{n} the number of observations in the sample \item \code{A} the Anderson--Darling test statistic \item \code{osl} the observed significance level (p-value), assuming the parameters of the distribution are estimated from the data \item \code{alpha} the required significance level for the test. This value is given by the user. \item \code{reject_distribution} a logical value indicating whether the hypothesis that the data is drawn from the specified distribution should be rejected } } \description{ Calculates the Anderson--Darling test statistic for a sample given a particular distribution, and determines whether to reject the hypothesis that a sample is drawn from that distribution. } \details{ The Anderson--Darling test statistic is calculated for the distribution given by the user. The observed significance level (OSL), or p-value, is calculated assuming that the parameters of the distribution are unknown; these parameters are estimate from the data. The function \code{anderson_darling_normal} computes the Anderson--Darling test statistic given a normal distribution with mean and standard deviation equal to the sample mean and standard deviation. The function \code{anderson_darling_lognormal} is the same as \code{anderson_darling_normal} except that the data is log transformed first. The function \code{anderson_darling_weibull} computes the Anderson--Darling test statistic given a Weibull distribution with shape and scale parameters estimated from the data using a maximum likelihood estimate. The test statistic, \code{A}, is modified to account for the fact that the parameters of the population are not known, but are instead estimated from the sample. This modification is a function of the sample size only, and is different for each distribution (normal/lognormal or Weibull). Several such modifications have been proposed. This function uses the modification published in Stephens (1974), Lawless (1982) and CMH-17-1G. Some other implementations of the Anderson-Darling test, such as the implementation in the \code{nortest} package, use other modifications, such as the one published in D'Agostino and Stephens (1986). As such, the p-value reported by this function may differ from the p-value reported by implementations of the Anderson--Darling test that use different modifiers. Only the unmodified test statistic is reported in the result of this function, but the modified test statistic is used to compute the OSL (p-value). This function uses the formulae for observed significance level (OSL) published in CMH-17-1G. These formulae depend on the particular distribution used. The results of this function have been validated against published values in Lawless (1982). } \examples{ library(dplyr) carbon.fabric \%>\% filter(test == "FC") \%>\% filter(condition == "RTD") \%>\% anderson_darling_normal(strength) ## Call: ## anderson_darling_normal(data = ., x = strength) ## ## Distribution: Normal ( n = 18 ) ## Test statistic: A = 0.9224776 ## OSL (p-value): 0.01212193 (assuming unknown parameters) ## Conclusion: Sample is not drawn from a Normal distribution (alpha = 0.05) } \references{ J. F. Lawless, \emph{Statistical models and methods for lifetime data}. New York: Wiley, 1982. "Composite Materials Handbook, Volume 1. Polymer Matrix Composites Guideline for Characterization of Structural Materials," SAE International, CMH-17-1G, Mar. 2012. M. A. Stephens, “EDF Statistics for Goodness of Fit and Some Comparisons,” Journal of the American Statistical Association, vol. 69, no. 347. pp. 730–737, 1974. R. D’Agostino and M. Stephens, Goodness-of-Fit Techniques. New York: Marcel Dekker, 1986. } cmstatr/man/levene_test.Rd0000644000176200001440000000501414015477557015325 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/levene.R \name{levene_test} \alias{levene_test} \title{Levene's Test for Equality of Variance} \usage{ levene_test(data = NULL, x, groups, alpha = 0.05, modcv = FALSE) } \arguments{ \item{data}{a data.frame} \item{x}{the variable in the data.frame or a vector on which to perform the Levene's test (usually strength)} \item{groups}{a variable in the data.frame that defines the groups} \item{alpha}{the significance level (default 0.05)} \item{modcv}{a logical value indicating whether the modified CV approach should be used.} } \value{ Returns an object of class \code{adk}. This object has the following fields: \itemize{ \item \code{call} the expression used to call this function \item \code{data} the original data supplied by the user \item \code{groups} a vector of the groups used in the computation \item \code{alpha} the value of alpha specified \item \code{modcv} a logical value indicating whether the modified CV approach was used. \item \code{n} the total number of observations \item \code{k} the number of groups \item \code{f} the value of the F test statistic \item \code{p} the computed p-value \item \code{reject_equal_variance} a boolean value indicating whether the null hypothesis that all samples have the same variance is rejected \item \code{modcv_transformed_data} the data after the modified CV transformation } } \description{ This function performs the Levene's test for equality of variance. } \details{ This function performs the Levene's test for equality of variance. The data is transformed as follows: \deqn{w_{ij} = \left| x_{ij} - m_i \right|}{wij = | xij - mi |} Where \eqn{m_i}{mi} is median of the \eqn{ith} group. An F-Test is then performed on the transformed data. When \code{modcv=TRUE}, the data from each group is first transformed according to the modified coefficient of variation (CV) rules before performing Levene's test. } \examples{ library(dplyr) carbon.fabric.2 \%>\% filter(test == "FC") \%>\% levene_test(strength, condition) ## ## Call: ## levene_test(data = ., x = strength, groups = condition) ## ## n = 91 k = 5 ## F = 3.883818 p-value = 0.00600518 ## Conclusion: Samples have unequal variance ( alpha = 0.05 ) } \references{ “Composite Materials Handbook, Volume 1. Polymer Matrix Composites Guideline for Characterization of Structural Materials,” SAE International, CMH-17-1G, Mar. 2012. } \seealso{ \code{\link[=calc_cv_star]{calc_cv_star()}} \code{\link[=transform_mod_cv]{transform_mod_cv()}} } cmstatr/man/nested_data_plot.Rd0000644000176200001440000000542314065516021016306 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/plot-nested.R \name{nested_data_plot} \alias{nested_data_plot} \title{Create a plot of nested sources of variation} \usage{ nested_data_plot( dat, x, groups = c(), stat = "mean", ..., y_gap = 1, divider_color = "grey50", point_args = list(), dline_args = list(), vline_args = list(), hline_args = list(), label_args = list(), connector_args = list() ) } \arguments{ \item{dat}{a \code{data.frame} or similar object} \item{x}{the variable within \code{dat} to plot. Most often this would be a strength or modulus variable.} \item{groups}{a vector of variables to group the data by} \item{stat}{a function for computing the central location for each group. This is normally "mean" but could be "median" or another function.} \item{...}{extra options. See Details.} \item{y_gap}{the vertical gap between grouping variables} \item{divider_color}{the color of the lines between grouping variables. Or \code{NULL} to omit these lines.} \item{point_args}{arguments to pass to \link[ggplot2:geom_point]{ggplot2::geom_point} when plotting individual data points.} \item{dline_args}{arguments to pass to \link[ggplot2:geom_segment]{ggplot2::geom_segment} when plotting the horizontal lines between data points.} \item{vline_args}{arguments to pass to \link[ggplot2:geom_segment]{ggplot2::geom_segment} when plotting vertical lines} \item{hline_args}{arguments to pass to \link[ggplot2:geom_segment]{ggplot2::geom_segment} when plotting horizontal lines connecting levels in groups} \item{label_args}{arguments to pass to \link[ggplot2:geom_text]{ggplot2::geom_label} when plotting labels} \item{connector_args}{arguments to pass to \link[ggplot2:geom_point]{ggplot2::geom_point} when plotting the connection between the vertical lines and the horizontal lines connecting levels in groups} } \description{ Creates a plot showing the breakdown of variation within a sample. This function uses \link{ggplot2} internally. } \details{ Extra options can be included to control aesthetic options. The following options are supported. Any (or all) can be set to a single variable in the data set. \itemize{ \item \code{color}: Controls the color of the data points. \item \code{fill}: Controls the fill color of the labels. When a particular label is associated with data points with more than one level of the supplied variable, the fill is omitted. } } \examples{ library(dplyr) carbon.fabric.2 \%>\% filter(test == "WT" & condition == "RTD") \%>\% nested_data_plot(strength, groups = c(batch, panel)) # Labels can be filled too carbon.fabric.2 \%>\% filter(test == "WT" & condition == "RTD") \%>\% nested_data_plot(strength, groups = c(batch, panel), fill = batch) } cmstatr/man/transform_mod_cv.Rd0000644000176200001440000001047014015477557016354 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/norm.R \name{transform_mod_cv} \alias{transform_mod_cv} \alias{transform_mod_cv_ad} \title{Transforms data according to the modified CV rule} \usage{ transform_mod_cv_ad(x, condition, batch) transform_mod_cv(x) } \arguments{ \item{x}{a vector of data to transform} \item{condition}{a vector indicating the condition to which each observation belongs} \item{batch}{a vector indicating the batch to which each observation belongs} } \value{ A vector of transformed data } \description{ Transforms data according to the modified coefficient of variation (CV) rule. This is used to add additional variance to datasets with unexpectedly low variance, which is sometimes encountered during testing of new materials over short periods of time. Two versions of this transformation are implemented. The first version, \code{transform_mod_cv()}, transforms the data in a single group (with no other structure) according to the modified CV rules. The second version, \code{transform_mod_cv_ad()}, transforms data that is structured according to both condition and batch, as is commonly done for the Anderson--Darling k-Sample and Anderson-Darling tests when pooling across environments. } \details{ The modified CV transformation takes the general form: \deqn{\frac{S_i^*}{S_i} (x_{ij} - \bar{x_i}) + \bar{x_i}}{ Si*/Si (xij-x_bar_i) + x_bar_i } Where \eqn{S_i^*}{Si*} is the modified standard deviation (mod CV times mean) for the \eqn{ith} group; \eqn{S_i}{Si} is the standard deviation for the \eqn{ith} group, \eqn{\bar{x_i}}{x_bar_i} is the group mean and \eqn{x_{ij}}{xij} is the observation. \code{transform_mod_cv()} takes a vector containing the observations and transforms the data. The equation above is used, and all observations are considered to be from the same group. \code{transform_mod_cv_ad()} takes a vector containing the observations plus a vector containing the corresponding conditions and a vector containing the batches. This function first calculates the modified CV value from the data from each condition (independently). Then, within each condition, the transformation above is applied to produce the transformed data \eqn{x'}. This transformed data is further transformed using the following equation. \deqn{x_{ij}'' = C (x'_{ij} - \bar{x_i}) + \bar{x_i}}{ x_ij'' = C (x'_ij - x_bar_i) + x_bar_i} Where: \deqn{C = \sqrt{\frac{SSE^*}{SSE'}}}{C = sqrt(SSE* / SSE')} \deqn{SSE^* = (n-1) (CV^* \bar{x})^2 - \sum(n_i(\bar{x_i}-\bar{x})^2)}{ SSE* = (n-1) (CV* x_bar)^2 - sum(n_i(x_bar_i-x_bar)^2)} \deqn{SSE' = \sum(x'_{ij} - \bar{x_i})^2}{SSE' = sum(x'_ij - x_bar_i)^2} } \examples{ # Transform data according to the modified CV transformation # and report the original and modified CV for each condition library(dplyr) carbon.fabric \%>\% filter(test == "FT") \%>\% group_by(condition) \%>\% mutate(trans_strength = transform_mod_cv(strength)) \%>\% head(10) ## # A tibble: 10 x 6 ## # Groups: condition [1] ## id test condition batch strength trans_strength ## ## 1 FT-RTD-1-1 FT RTD 1 126. 126. ## 2 FT-RTD-1-2 FT RTD 1 139. 141. ## 3 FT-RTD-1-3 FT RTD 1 116. 115. ## 4 FT-RTD-1-4 FT RTD 1 132. 133. ## 5 FT-RTD-1-5 FT RTD 1 129. 129. ## 6 FT-RTD-1-6 FT RTD 1 130. 130. ## 7 FT-RTD-2-1 FT RTD 2 131. 131. ## 8 FT-RTD-2-2 FT RTD 2 124. 124. ## 9 FT-RTD-2-3 FT RTD 2 125. 125. ## 10 FT-RTD-2-4 FT RTD 2 120. 119. # The CV of this transformed data can be computed to verify # that the resulting CV follows the rules for modified CV carbon.fabric \%>\% filter(test == "FT") \%>\% group_by(condition) \%>\% mutate(trans_strength = transform_mod_cv(strength)) \%>\% summarize(cv = sd(strength) / mean(strength), mod_cv = sd(trans_strength) / mean(trans_strength)) ## # A tibble: 3 x 3 ## condition cv mod_cv ## ## 1 CTD 0.0423 0.0612 ## 2 ETW 0.0369 0.0600 ## 3 RTD 0.0621 0.0711 } \seealso{ \code{\link[=calc_cv_star]{calc_cv_star()}} \code{\link[=cv]{cv()}} } cmstatr/man/glance.adk.Rd0000644000176200001440000000265114015477557015003 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/adk.R \name{glance.adk} \alias{glance.adk} \title{Glance at a \code{adk} (Anderson--Darling k-Sample) object} \usage{ \method{glance}{adk}(x, ...) } \arguments{ \item{x}{an \code{adk} object} \item{...}{Additional arguments. Not used. Included only to match generic signature.} } \value{ A one-row \code{\link[tibble:tibble]{tibble::tibble()}} with the following columns: \itemize{ \item \code{alpha} the significance level for the test \item \code{n} the sample size for the test \item \code{k} the number of samples \item \code{sigma} the computed standard deviation of the test statistic \item \code{ad} the test statistic \item \code{p} the p-value of the test \item \code{reject_same_dist} whether the test concludes that the samples are drawn from different populations } } \description{ Glance accepts an object of type \code{adk} and returns a \code{\link[tibble:tibble]{tibble::tibble()}} with one row of summaries. Glance does not do any calculations: it just gathers the results in a tibble. } \examples{ x <- c(rnorm(20, 100, 5), rnorm(20, 105, 6)) k <- c(rep(1, 20), rep(2, 20)) a <- ad_ksample(x = x, groups = k) glance(a) ## A tibble: 1 x 7 ## alpha n k sigma ad p reject_same_dist ## ## 1 0.025 40 2 0.727 4.37 0.00487 TRUE } \seealso{ \code{\link[=ad_ksample]{ad_ksample()}} } cmstatr/man/cmstatr-package.Rd0000644000176200001440000000170314477217401016047 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/cmstatr.R \docType{package} \name{cmstatr-package} \alias{cmstatr} \alias{cmstatr-package} \title{cmstatr: Statistical Methods for Composite Material Data} \description{ To learn more about \code{cmstatr}, start with the vignettes: \code{browseVignettes(package = "cmstatr")} } \seealso{ Useful links: \itemize{ \item \url{https://www.cmstatr.net/} \item \url{https://github.com/cmstatr/cmstatr} \item Report bugs at \url{https://github.com/cmstatr/cmstatr/issues} } } \author{ \strong{Maintainer}: Stefan Kloppenborg \email{stefan@kloppenborg.ca} (\href{https://orcid.org/0000-0002-1908-5214}{ORCID}) Other contributors: \itemize{ \item Billy Cheng \email{bcheng@comtekadvanced.com} [contributor] \item Ally Fraser \email{ally.fraser25@gmail.com} [contributor] \item Jeffrey Borlik [contributor] \item Comtek Advanced Structures, Ltd. [funder] } } \keyword{internal} cmstatr/DESCRIPTION0000644000176200001440000000450714574604752013461 0ustar liggesusersPackage: cmstatr Type: Package Title: Statistical Methods for Composite Material Data Version: 0.9.3 Date: 2024-03-13 Depends: R (>= 3.3) Authors@R: c( person("Stefan", "Kloppenborg", email = "stefan@kloppenborg.ca", comment = c(ORCID = "0000-0002-1908-5214"), role = c("aut", "cre")), person("Billy", "Cheng", email = "bcheng@comtekadvanced.com", role = c("ctb")), person("Ally", "Fraser", email = "ally.fraser25@gmail.com", role = c("ctb")), person("Jeffrey", "Borlik", role = c("ctb")), person("Comtek Advanced Structures, Ltd.", role = c("fnd")) ) Description: An implementation of the statistical methods commonly used for advanced composite materials in aerospace applications. This package focuses on calculating basis values (lower tolerance bounds) for material strength properties, as well as performing the associated diagnostic tests. This package provides functions for calculating basis values assuming several different distributions, as well as providing functions for non-parametric methods of computing basis values. Functions are also provided for testing the hypothesis that there is no difference between strength and modulus data from an alternate sample and that from a "qualification" or "baseline" sample. For a discussion of these statistical methods and their use, see the Composite Materials Handbook, Volume 1 (2012, ISBN: 978-0-7680-7811-4). Additional details about this package are available in the paper by Kloppenborg (2020, ). URL: https://www.cmstatr.net/, https://github.com/cmstatr/cmstatr BugReports: https://github.com/cmstatr/cmstatr/issues License: AGPL-3 Encoding: UTF-8 LazyData: true Imports: dplyr, generics, ggplot2, kSamples, MASS, purrr, rlang, stats, tibble Suggests: knitr, lintr, rmarkdown, spelling, testthat, tidyr, vdiffr RoxygenNote: 7.3.1 VignetteBuilder: knitr Language: en-US Config/testthat/parallel: true Config/testthat/edition: 3 NeedsCompilation: no Packaged: 2024-03-14 13:30:55 UTC; stefan Author: Stefan Kloppenborg [aut, cre] (), Billy Cheng [ctb], Ally Fraser [ctb], Jeffrey Borlik [ctb], Comtek Advanced Structures, Ltd. [fnd] Maintainer: Stefan Kloppenborg Repository: CRAN Date/Publication: 2024-03-14 14:30:02 UTC cmstatr/build/0000755000176200001440000000000014574576017013046 5ustar liggesuserscmstatr/build/vignette.rds0000644000176200001440000000054714574576017015413 0ustar liggesusersS]O0-l&Dz2M Qb|#7JC.5DAsӗ&c `Հ C[VVAg  {:vӉЎ,SFhU= Jƀ$9bͳ+:Lҍ*T >"eASIP"xTpO|}! $!2Ƥ’#o/  pٵ*hlFVv\A?*mWK@ vþT7$n`]X"3Xw{8-˯+&WKSWe_cmstatr/tests/0000755000176200001440000000000014065516021013071 5ustar liggesuserscmstatr/tests/spelling.R0000644000176200001440000000016413774367346015056 0ustar liggesusersif (requireNamespace("spelling", quietly = TRUE)) { spelling::spell_check_test(vignettes = TRUE, error = FALSE) } cmstatr/tests/testthat/0000755000176200001440000000000014574604752014747 5ustar liggesuserscmstatr/tests/testthat/test-plotting.R0000644000176200001440000000252414065516021017674 0ustar liggesuserssuppressMessages(library(dplyr)) suppressMessages(library(ggplot2)) test_that("stat_esf", { skip_if_not_installed("vdiffr") vdiffr::expect_doppelganger("stat_esf", { carbon.fabric.2 %>% filter(test == "WT" & condition == "RTD") %>% group_by(batch) %>% ggplot(aes(x = strength, color = batch)) + stat_esf(pad = TRUE) + ggtitle("Distribution of Data For Each Batch") }) }) test_that("stat_normal_surv_func", { skip_if_not_installed("vdiffr") vdiffr::expect_doppelganger("stat_normal_surv_func", { carbon.fabric.2 %>% filter(test == "WT" & condition == "RTD") %>% group_by(batch) %>% ggplot(aes(x = strength, color = batch)) + stat_normal_surv_func() + ggtitle("Distribution of Data For Each Batch") }) }) test_that("stat_normal_surv_func and stat_esf", { skip_if_not_installed("vdiffr") vdiffr::expect_doppelganger("stat_normal_surv_func and stat_esf", { set.seed(100) data.frame( strength = c(rnorm(400, 100, 10), rnorm(400, 120, 10), rnorm(400, 140, 10)), batch = c(rep("A", 400), rep("B", 400), rep("C", 400)) ) %>% group_by(batch) %>% ggplot(aes(x = strength, color = batch)) + stat_esf(pad = TRUE) + stat_normal_surv_func() + ggtitle("Distribution of Data For Each Batch") }) }) cmstatr/tests/testthat/test-cv.R0000644000176200001440000000152414065516021016443 0ustar liggesuserssuppressMessages(library(dplyr)) test_that("CV produces expected results", { x <- rnorm(30, 100, 8) res <- cv(x) expect_equal(res, sd(x) / mean(x)) }) test_that("CV with na.rm=TRUE works", { x <- rnorm(30, 100, 8) x <- c(x, NA, NA, NA) res <- cv(x, na.rm = TRUE) expect_equal(res, sd(x[1:30]) / mean(x[1:30])) }) test_that("cv works inside dplyr::summarize", { data.frame( x = rnorm(30, 100, 8) ) %>% summarize(cv = cv(x), test = expect_equal(cv, sd(x) / mean(x))) }) test_that("cv works inside dplyr::summarize with na.rm=TRUE", { data.frame( x = rnorm(30, 100, 8) ) %>% bind_rows( data.frame(x = c(NA, NA, NA)) ) %>% summarize(cv = cv(x, na.rm = TRUE), test = expect_equal(cv, sd(x, na.rm = TRUE) / mean(x, na.rm = TRUE))) }) cmstatr/tests/testthat/test-plot-nested.R0000644000176200001440000000370314065516021020272 0ustar liggesuserssuppressMessages(library(dplyr)) suppressMessages(library(ggplot2)) test_that("nested_data_plot-no_grouping", { skip_if_not_installed("vdiffr") vdiffr::expect_doppelganger("nested_data_plot-no_grouping", { carbon.fabric.2 %>% filter(test == "WT") %>% nested_data_plot(strength) }) }) test_that("nested_data_plot-single_grouping", { skip_if_not_installed("vdiffr") vdiffr::expect_doppelganger("nested_data_plot-single_grouping", { carbon.fabric.2 %>% filter(test == "WT" & condition == "RTD") %>% nested_data_plot(strength, groups = c(batch)) }) }) test_that("nested_data_plot-two_groupings", { skip_if_not_installed("vdiffr") vdiffr::expect_doppelganger("nested_data_plot-two_groupings", { carbon.fabric.2 %>% filter(test == "WT" & condition == "RTD") %>% nested_data_plot(strength, groups = c(batch, panel)) }) }) test_that("nested_data_plot-single_obs_per_group", { skip_if_not_installed("vdiffr") vdiffr::expect_doppelganger("nested_data_plot-single_obs_per_group", { carbon.fabric.2 %>% filter(test == "WT" & condition == "RTD") %>% nested_data_plot(strength, groups = c(batch, panel, strength)) }) }) test_that("nested_data_plot-color", { skip_if_not_installed("vdiffr") vdiffr::expect_doppelganger("nested_data_plot-color", { carbon.fabric.2 %>% filter(test == "WT" & condition == "RTD") %>% nested_data_plot(strength, groups = c(batch, panel), color = batch) }) }) test_that("nested_data_plot-color-and-fill", { skip_if_not_installed("vdiffr") vdiffr::expect_doppelganger("nested_data_plot-color-and-fill", { carbon.fabric.2 %>% filter(test == "WT") %>% nested_data_plot(strength, groups = c(condition, batch, panel), color = batch, fill = condition) }) }) cmstatr/tests/testthat/test-lintr.R0000644000176200001440000000116214573746051017175 0ustar liggesusersif (requireNamespace("lintr", quietly = TRUE)) { test_that("Package Style", { lintr::expect_lint_free( path = getwd(), relative_path = FALSE, exclusions = list( "R/RcppExports.R", # Ignore the auto-generated R files "vignettes/adktest.R", "vignettes/cmstatr_Graphing.R", "vignettes/cmstatr_Tutorial.R", "vignettes/cmstatr_Validation.R", "vignettes/hk_ext.R" ), linters = lintr::linters_with_defaults( line_length_linter = NULL, trailing_whitespace_linter = NULL, indentation_linter = NULL ) ) }) } cmstatr/tests/testthat/test-adk.R0000644000176200001440000001420014065516021016565 0ustar liggesuserssuppressMessages(library(dplyr)) suppressMessages(library(kSamples)) # nolint test_that("kSamples package gives results that match published example", { # Reproduce the example from: # F. W. Scholz and M. Stephens, “K-Sample Anderson-Darling Tests,” Journal # of the American Statistical Association, vol. 82, no. 399. # pp. 918–924, Sep-1987. df <- data.frame( smoothness = c( 38.7, 41.5, 43.8, 44.5, 45.5, 46.0, 47.7, 58.0, 39.2, 39.3, 39.7, 41.4, 41.8, 42.9, 43.3, 45.8, 34.0, 35.0, 39.0, 40.0, 43.0, 43.0, 44.0, 45.0, 34.0, 34.8, 34.8, 35.4, 37.2, 37.8, 41.2, 42.8 ), lab = c(rep("A", 8), rep("B", 8), rep("C", 8), rep("D", 8)) ) res <- ad.test(smoothness ~ lab, data = df) expect_equal(res[["sig"]], 1.2038, tolerance = 1e-4) ad <- res[["ad"]] expect_equal(ad["version 1:", "AD"], 8.3559, tolerance = 1e-3) expect_equal(ad["version 1:", " asympt. P-value"], 0.0023, tolerance = 1e-3 / 0.0023) expect_equal(ad["version 2:", "AD"], 8.3926, tolerance = 1e-3) expect_equal(ad["version 2:", " asympt. P-value"], 0.0022, tolerance = 1e-3 / 0.0022) }) test_that("ADK test match ASAP", { res <- carbon.fabric %>% filter(test == "WT") %>% filter(condition == "RTD") %>% ad_ksample(strength, batch) expect_equal(res$ad / (res$k - 1), 0.456, tolerance = 0.002) expect_false(res$reject_same_dist) res <- carbon.fabric %>% filter(test == "WT") %>% filter(condition == "ETW") %>% ad_ksample(strength, batch) expect_equal(res$ad / (res$k - 1), 1.604, tolerance = 0.002) expect_false(res$reject_same_dist) res <- carbon.fabric %>% filter(test == "WT") %>% filter(condition == "CTD") %>% ad_ksample(strength, batch) expect_equal(res$ad / (res$k - 1), 1.778, tolerance = 0.002) expect_false(res$reject_same_dist) res <- carbon.fabric %>% filter(test == "FT") %>% filter(condition == "RTD") %>% ad_ksample(strength, batch) expect_equal(res$ad / (res$k - 1), 0.777, tolerance = 0.003) expect_false(res$reject_same_dist) res <- carbon.fabric %>% filter(test == "FT") %>% filter(condition == "ETW") %>% ad_ksample(strength, batch) expect_equal(res$ad / (res$k - 1), 1.355, tolerance = 0.002) expect_false(res$reject_same_dist) res <- carbon.fabric %>% filter(test == "FT") %>% filter(condition == "CTD") %>% ad_ksample(strength, batch) expect_equal(res$ad / (res$k - 1), 1.432, tolerance = 0.002) expect_false(res$reject_same_dist) res <- carbon.fabric %>% filter(test == "WC") %>% filter(condition == "RTD") %>% ad_ksample(strength, batch) expect_equal(res$ad / (res$k - 1), 0.384, tolerance = 0.002) expect_false(res$reject_same_dist) res <- carbon.fabric %>% filter(test == "WC") %>% filter(condition == "ETW") %>% ad_ksample(strength, batch) expect_equal(res$ad / (res$k - 1), 0.723, tolerance = 0.003) expect_false(res$reject_same_dist) res <- carbon.fabric %>% filter(test == "WC") %>% filter(condition == "CTD") %>% ad_ksample(strength, batch) expect_equal(res$ad / (res$k - 1), 1.145, tolerance = 0.002) expect_false(res$reject_same_dist) res <- carbon.fabric %>% filter(test == "FC") %>% filter(condition == "RTD") %>% ad_ksample(strength, batch) expect_equal(res$ad / (res$k - 1), 0.865, tolerance = 0.002) expect_false(res$reject_same_dist) res <- carbon.fabric %>% filter(test == "FC") %>% filter(condition == "ETW") %>% ad_ksample(strength, batch) expect_equal(res$ad / (res$k - 1), 0.934, tolerance = 0.002) expect_false(res$reject_same_dist) res <- carbon.fabric %>% filter(test == "FC") %>% filter(condition == "CTD") %>% ad_ksample(strength, batch) expect_equal(res$ad / (res$k - 1), 0.501, tolerance = 0.002) expect_false(res$reject_same_dist) }) test_that("ADK test matches example from CMH-17-1G", { # Reference the example in section 8.3.11.1.1 etw <- tribble( ~batch, ~strength, 1, 106.357525, 1, 105.898733, 1, 88.4640082, 1, 103.901744, 1, 80.2058219, 1, 109.199597, 1, 61.0139431, 2, 99.3207107, 2, 115.86177, 2, 82.6133082, 2, 85.3690411, 2, 115.801622, 2, 44.3217741, 2, 117.328077, 2, 88.6782903, 3, 107.676986, 3, 108.960241, 3, 116.12264, 3, 80.2334815, 3, 106.14557, 3, 104.667866, 3, 104.234953 ) # ETW: # ADK = 0.793 / same distribution res <- etw %>% ad_ksample(strength, batch) expect_equal(res$ad / (res$k - 1), 0.793, tolerance = 0.003) expect_false(res$reject_same_dist) expect_output(print(res), ".*N.*22") expect_output(print(res), ".*k.*3") expect_output(print(res), "Conclusion: Samples come") etw2 <- tribble( ~batch, ~strength, 1, 99.0239966, 1, 103.341238, 1, 100.30213, 1, 98.4634133, 1, 92.264728, 1, 103.487693, 1, 113.734763, 2, 108.172659, 2, 108.426732, 2, 116.260375, 2, 121.04961, 2, 111.223082, 2, 104.574843, 2, 103.222552, 3, 99.3918538, 3, 87.3421658, 3, 102.730741, 3, 96.3694916, 3, 99.5946088, 3, 97.0712407 ) # ETW2: # ADK = 3.024 / different distribution res <- etw2 %>% ad_ksample(strength, batch) expect_equal(res$ad / (res$k - 1), 3.024, tolerance = 0.003) expect_true(res$reject_same_dist) expect_output(print(res), ".*N.*20") expect_output(print(res), ".*k.*3") expect_output(print(res), "Conclusion: Samples do not come") }) test_that("glance.adk produces expected output", { res <- carbon.fabric %>% filter(test == "WT") %>% filter(condition == "RTD") %>% ad_ksample(strength, batch) glance_res <- glance(res) expect_equal(glance_res[["alpha"]][1], 0.025) expect_equal(glance_res[["n"]][1], 18) expect_equal(glance_res[["k"]][1], 3) expect_equal(glance_res[["sigma"]], 0.944, tolerance = 0.001) expect_equal(glance_res[["ad"]][1], 0.456 * (res$k - 1)) expect_equal(glance_res[["p"]][1], 0.96, tolerance = 0.001) expect_equal(glance_res[["reject_same_dist"]], FALSE) }) test_that("ad_ksample checks that input vectors are equal length", { expect_error({ ad_ksample(x = c(1, 2, 3), groups = c("A", "B")) }) }) cmstatr/tests/testthat/test-levene.R0000644000176200001440000001207414065516021017313 0ustar liggesuserssuppressMessages(library(dplyr)) # From CMH-17-1G Section 8.3.11.1.1 df <- tribble( ~condition, ~batch, ~strength, "CTD", 1, 118.3774604, "CTD", 1, 123.6035612, "CTD", 1, 115.2238092, "CTD", 1, 112.6379744, "CTD", 1, 116.5564277, "CTD", 1, 123.1649896, "CTD", 2, 128.5589027, "CTD", 2, 113.1462103, "CTD", 2, 121.4248107, "CTD", 2, 134.3241906, "CTD", 2, 129.6405117, "CTD", 2, 117.9818658, "CTD", 3, 115.4505226, "CTD", 3, 120.0369467, "CTD", 3, 117.1631088, "CTD", 3, 112.9302797, "CTD", 3, 117.9114501, "CTD", 3, 120.1900159, "CTD", 3, 110.7295966, "RTD", 1, 84.951364, "RTD", 1, 92.4891822, "RTD", 1, 96.8212659, "RTD", 1, 109.030325, "RTD", 1, 97.8918182, "RTD", 1, 100.921517, "RTD", 1, 103.699444, "RTD", 2, 93.790812, "RTD", 2, 107.526709, "RTD", 2, 94.5769704, "RTD", 2, 93.8831373, "RTD", 2, 98.2296605, "RTD", 2, 111.34659, "RTD", 2, 100.817538, "RTD", 3, 100.382203, "RTD", 3, 91.5037811, "RTD", 3, 100.083233, "RTD", 3, 95.6393615, "RTD", 3, 109.304779, "RTD", 3, 999.1205847, "RTD", 3, 100.078562, "ETD", 1, 83.7436035, "ETD", 1, 84.3831677, "ETD", 1, 94.8030433, "ETD", 1, 94.3931537, "ETD", 1, 101.702222, "ETD", 1, 86.5372121, "ETD", 1, 92.3772684, "ETD", 2, 89.2084024, "ETD", 2, 100.686001, "ETD", 2, 81.0444192, "ETD", 2, 91.339807, "ETD", 2, 93.1441939, "ETD", 2, 85.8204168, "ETD", 3, 94.8966273, "ETD", 3, 95.806852, "ETD", 3, 86.7842252, "ETD", 3, 94.4011973, "ETD", 3, 96.7231171, "ETD", 3, 89.9010384, "ETD", 3, 89.32672306, "ETW", 1, 106.357525, "ETW", 1, 105.898733, "ETW", 1, 88.4640082, "ETW", 1, 103.901744, "ETW", 1, 80.2058219, "ETW", 1, 109.199597, "ETW", 1, 61.0139431, "ETW", 2, 99.3207107, "ETW", 2, 115.86177, "ETW", 2, 82.6133082, "ETW", 2, 85.3690411, "ETW", 2, 115.801622, "ETW", 2, 44.3217741, "ETW", 2, 117.328077, "ETW", 2, 88.6782903, "ETW", 3, 107.676986, "ETW", 3, 108.960241, "ETW", 3, 116.12264, "ETW", 3, 80.2334815, "ETW", 3, 106.14557, "ETW", 3, 104.667866, "ETW", 3, 104.234953, "ETW2", 1, 99.0239966, "ETW2", 1, 103.341238, "ETW2", 1, 100.30213, "ETW2", 1, 98.4634133, "ETW2", 1, 92.264728, "ETW2", 1, 103.487693, "ETW2", 1, 113.734763, "ETW2", 2, 108.172659, "ETW2", 2, 108.426732, "ETW2", 2, 116.260375, "ETW2", 2, 121.04961, "ETW2", 2, 111.223082, "ETW2", 2, 104.574843, "ETW2", 2, 103.222552, "ETW2", 3, 99.3918538, "ETW2", 3, 87.3421658, "ETW2", 3, 102.730741, "ETW2", 3, 96.3694916, "ETW2", 3, 99.5946088, "ETW2", 3, 97.0712407 ) test_that("Levene's test matches ASAP", { res <- df %>% levene_test(strength, condition) expect_equal(res$f, 0.896, tolerance = 0.005) expect_false(res$reject_equal_variance) expect_output(print(res), ".*n.*102") expect_output(print(res), ".*k.*5") expect_output(print(res), "Conclusion: Samples have equal variance") }) test_that("Levene's test matches results from STAT17", { res <- df %>% filter(condition == "CTD") %>% levene_test(strength, batch) expect_equal(res$f, 3.850, tolerance = 0.005) res <- df %>% filter(condition == "RTD") %>% levene_test(strength, batch) expect_equal(res$f, 0.973, tolerance = 0.005) res <- df %>% filter(condition == "ETD") %>% levene_test(strength, batch) expect_equal(res$f, 0.723, tolerance = 0.005) res <- df %>% filter(condition == "ETW2") %>% levene_test(strength, batch) expect_equal(res$f, 0.123, tolerance = 0.005) }) test_that("glance produces expected results", { res <- df %>% levene_test(strength, condition, alpha = 0.05) glance_res <- glance(res) expect_equal(glance_res$alpha[1], 0.05) expect_equal(glance_res$n[1], 102) expect_equal(glance_res$k[1], 5) expect_equal(glance_res$f[1], 0.896, tolerance = 0.005) expect_equal(glance_res$p[1], 0.469, tolerance = 0.005) expect_false(glance_res$reject_equal_variance[1]) }) test_that("printing works correctly for equal variance", { res <- carbon.fabric.2 %>% filter(test == "FC") %>% group_by(condition) %>% mutate(trans_strength = transform_mod_cv(strength)) %>% ungroup() %>% levene_test(trans_strength, condition, alpha = 0.02) expect_equal(res$f, 2.862, tolerance = 0.075) expect_equal(res$p, 0.03, tolerance = 0.01 / 0.03) expect_output( print(res), "2\\.[78]" ) expect_output( print(res), "equal variance" ) }) test_that("printing works correctly for unequal variance", { res <- carbon.fabric.2 %>% filter(test == "FC") %>% group_by(condition) %>% mutate(trans_strength = transform_mod_cv(strength)) %>% ungroup() %>% levene_test(trans_strength, condition, alpha = 0.05) expect_equal(res$f, 2.862, tolerance = 0.075) expect_equal(res$p, 0.03, tolerance = 0.01 / 0.03) expect_output( print(res), "2\\.[78]" ) expect_output( print(res), "unequal variance" ) }) test_that("printing works correctly for modCV", { res <- carbon.fabric.2 %>% filter(test == "FC") %>% levene_test(strength, condition, modcv = TRUE) expect_output( print(res), "Modified CV" ) }) cmstatr/tests/testthat/test-normalize.R0000644000176200001440000001141614065516021020034 0ustar liggesuserssuppressMessages(library(dplyr)) test_that("normalization to ply thickness takes correct argument lengths", { expect_error(normalize_ply_thickness(c(1, 2), 1, 1)) expect_error(normalize_ply_thickness(1, c(1, 2), 1)) expect_error(normalize_ply_thickness(c(1, 2), c(1, 2), c(1, 2))) }) test_that("normalization to ply thickness produces expected numeric results", { expect_equal(normalize_ply_thickness(1, 1, 0.25), 4) expect_equal(normalize_ply_thickness(c(1, 2), c(0.5, 0.25), 0.25), c(2, 2) ) }) test_that("normalize_group_mean correctly handles errors and edge cases", { expect_error(normalize_group_mean(x = 1:10, group = 1:8), "length") expect_equal(normalize_group_mean(x = numeric(0L), group = numeric(0L)), numeric(0L)) }) test_that("normalize to group mean produces means of one for all groups", { # since each group is normalized to its own mean, the mean of the normalized # values within each group should be equal to one. res <- carbon.fabric %>% filter(test == "WT") %>% mutate(norm_str = normalize_group_mean(strength, condition)) %>% group_by(condition) %>% summarise(mean_norm_str = mean(norm_str)) %>% rowwise() %>% mutate(test = expect_equal(mean_norm_str, 1)) }) # data from CMH-17-1G Section 8.3.11.1.2 poolable_data <- tribble( ~batch, ~strength, ~condition, 1, 79.04517, "CTD", 1, 102.6014, "CTD", 1, 97.79372, "CTD", 1, 92.86423, "CTD", 1, 117.218, "CTD", 1, 108.7168, "CTD", 1, 112.2773, "CTD", 1, 114.0129, "CTD", 2, 106.8452, "CTD", 2, 112.3911, "CTD", 2, 115.5658, "CTD", 2, 87.40657, "CTD", 2, 102.2785, "CTD", 2, 110.6073, "CTD", 3, 105.2762, "CTD", 3, 110.8924, "CTD", 3, 108.7638, "CTD", 3, 110.9833, "CTD", 3, 101.3417, "CTD", 3, 100.0251, "CTD", 1, 103.2006, "RTD", 1, 105.1034, "RTD", 1, 105.1893, "RTD", 1, 100.4189, "RTD", 2, 85.32319, "RTD", 2, 92.69923, "RTD", 2, 98.45242, "RTD", 2, 104.1014, "RTD", 2, 91.51841, "RTD", 2, 101.3746, "RTD", 2, 101.5828, "RTD", 2, 99.57384, "RTD", 2, 88.84826, "RTD", 3, 92.18703, "RTD", 3, 101.8234, "RTD", 3, 97.68909, "RTD", 3, 101.5172, "RTD", 3, 100.0481, "RTD", 3, 102.0544, "RTD", 1, 63.22764, "ETW", 1, 70.84454, "ETW", 1, 66.43223, "ETW", 1, 75.37771, "ETW", 1, 72.43773, "ETW", 1, 68.43073, "ETW", 1, 69.72524, "ETW", 2, 66.20343, "ETW", 2, 60.51251, "ETW", 2, 65.69334, "ETW", 2, 62.73595, "ETW", 2, 59.00798, "ETW", 2, 62.37761, "ETW", 3, 64.3947, "ETW", 3, 72.8491, "ETW", 3, 66.56226, "ETW", 3, 66.56779, "ETW", 3, 66.00123, "ETW", 3, 59.62108, "ETW", 3, 60.61167, "ETW", 3, 57.65487, "ETW", 3, 66.51241, "ETW", 3, 64.89347, "ETW", 3, 57.73054, "ETW", 3, 68.94086, "ETW", 3, 61.63177, "ETW", 1, 54.09806, "ETW2", 1, 58.87615, "ETW2", 1, 61.60167, "ETW2", 1, 60.23973, "ETW2", 1, 61.4808, "ETW2", 1, 64.55832, "ETW2", 2, 57.76131, "ETW2", 2, 49.91463, "ETW2", 2, 61.49271, "ETW2", 2, 57.7281, "ETW2", 2, 62.11653, "ETW2", 2, 62.69353, "ETW2", 3, 61.38523, "ETW2", 3, 60.39053, "ETW2", 3, 59.17616, "ETW2", 3, 60.17616, "ETW2", 3, 46.47396, "ETW2", 3, 51.16616, "ETW2" ) test_that("Modified CV transform produces values that match CMH17-STATS", { res <- poolable_data %>% group_by(condition) %>% mutate(trans_strength = transform_mod_cv(strength)) %>% summarise(cv = sd(strength) / mean(strength), mod_cv = sd(trans_strength) / mean(trans_strength)) expect_equal(res$cv[res$condition == "CTD"], 0.0933, tolerance = 1e-4 / 0.1) expect_equal(res$mod_cv[res$condition == "CTD"], 0.0933, tolerance = 1e-4 / 0.1) expect_equal(res$cv[res$condition == "RTD"], 0.0580, tolerance = 1e-4 / 0.058) expect_equal(res$mod_cv[res$condition == "RTD"], 0.0690, tolerance = 1e-4 / 0.0690) expect_equal(res$cv[res$condition == "ETW"], 0.0723, tolerance = 1e-4 / 0.0723) expect_equal(res$mod_cv[res$condition == "ETW"], 0.0761, tolerance = 1e-4 / 0.0761) expect_equal(res$cv[res$condition == "ETW2"], 0.0836, tolerance = 1e-4 / 0.0836) expect_equal(res$mod_cv[res$condition == "ETW2"], 0.0836, tolerance = 1e-4 / 0.0836) }) test_that("mod CV transforms handles errors and edge cases correctly", { expect_error(transform_mod_cv_2_within_condition(x = 1:9, batch = 1:4, 0.5), "length") expect_error(transform_mod_cv_ad(1:10, 1:9, 1:10), "length") expect_error(transform_mod_cv_ad(1:10, 1:10, 1:9), "length") expect_equal( transform_mod_cv_ad(1:10, rep("A", 10), rep("A", 10)), transform_mod_cv_ad(1:10, NULL, NULL) ) expect_error(transform_mod_cv_grouped(1:10, 1:9), "length") }) cmstatr/tests/testthat/test-basis.R0000644000176200001440000020104214125365511017134 0ustar liggesuserssuppressMessages(library(dplyr)) test_that("kB factors are correct for normal distribution", { cmh17_factors <- matrix( c(2, 20.581, 36, 1.725, 70, 1.582, 104, 1.522, 3, 6.157, 37, 1.718, 71, 1.579, 105, 1.521, 4, 4.163, 38, 1.711, 72, 1.577, 106, 1.519, 5, 3.408, 39, 1.704, 73, 1.575, 107, 1.518, 6, 3.007, 40, 1.698, 74, 1.572, 108, 1.517, 7, 2.756, 41, 1.692, 75, 1.570, 109, 1.516, 8, 2.583, 42, 1.686, 76, 1.568, 110, 1.515, 9, 2.454, 43, 1.680, 77, 1.566, 111, 1.513, 10, 2.355, 44, 1.675, 78, 1.564, 112, 1.512, 11, 2.276, 45, 1.669, 79, 1.562, 113, 1.511, 12, 2.211, 46, 1.664, 80, 1.560, 114, 1.510, 13, 2.156, 47, 1.660, 81, 1.558, 115, 1.509, 14, 2.109, 48, 1.655, 82, 1.556, 116, 1.508, 15, 2.069, 49, 1.650, 83, 1.554, 117, 1.507, 16, 2.034, 50, 1.646, 84, 1.552, 118, 1.506, 17, 2.002, 51, 1.642, 85, 1.551, 119, 1.505, 18, 1.974, 52, 1.638, 86, 1.549, 120, 1.504, 19, 1.949, 53, 1.634, 87, 1.547, 121, 1.503, 20, 1.927, 54, 1.630, 88, 1.545, 122, 1.502, 21, 1.906, 55, 1.626, 89, 1.544, 123, 1.501, 22, 1.887, 56, 1.623, 90, 1.542, 124, 1.500, 23, 1.870, 57, 1.619, 91, 1.540, 125, 1.499, 24, 1.854, 58, 1.616, 92, 1.539, 126, 1.498, 25, 1.839, 59, 1.613, 93, 1.537, 127, 1.497, 26, 1.825, 60, 1.609, 94, 1.536, 128, 1.496, 27, 1.812, 61, 1.606, 95, 1.534, 129, 1.495, 28, 1.800, 62, 1.603, 96, 1.533, 130, 1.494, 29, 1.789, 63, 1.600, 97, 1.531, 131, 1.493, 30, 1.778, 64, 1.597, 98, 1.530, 132, 1.492, 31, 1.768, 65, 1.595, 99, 1.529, 133, 1.492, 32, 1.758, 66, 1.592, 100, 1.527, 134, 1.491, 33, 1.749, 67, 1.589, 101, 1.526, 135, 1.490, 34, 1.741, 68, 1.587, 102, 1.525, 136, 1.489, 35, 1.733, 69, 1.584, 103, 1.523, 137, 1.488), ncol = 2, byrow = TRUE ) %>% as.data.frame() %>% rename(n = V1) %>% rename(kb = V2) %>% filter(n <= 95) %>% rowwise() %>% mutate(calc_kb = k_factor_normal(n, p = 0.90, conf = 0.95)) %>% mutate(check = expect_lte(abs(calc_kb - kb), expected = 0.002, label = paste0("Validation failure for ", n, ".", "CMH-17 gives kB=", kb, ",", "library gives kB=", calc_kb))) }) test_that("kA factors are correct for normal distribution", { cmh17_factors <- matrix( c(2, 37.094, 36, 2.983, 70, 2.765, 104, 2.676, 3, 10.553, 37, 2.972, 71, 2.762, 105, 2.674, 4, 7.042, 38, 2.961, 72, 2.758, 106, 2.672, 5, 5.741, 39, 2.951, 73, 2.755, 107, 2.671, 6, 5.062, 40, 2.941, 74, 2.751, 108, 2.669, 7, 4.642, 41, 2.932, 75, 2.748, 109, 2.667, 8, 4.354, 42, 2.923, 76, 2.745, 110, 2.665, 9, 4.143, 43, 2.914, 77, 2.742, 111, 2.663, 10, 3.981, 44, 2.906, 78, 2.739, 112, 2.662, 11, 3.852, 45, 2.898, 79, 2.736, 113, 2.660, 12, 3.747, 46, 2.890, 80, 2.733, 114, 2.658, 13, 3.659, 47, 2.883, 81, 2.730, 115, 2.657, 14, 3.585, 48, 2.876, 82, 2.727, 116, 2.655, 15, 3.520, 49, 2.869, 83, 2.724, 117, 2.654, 16, 3.464, 50, 2.862, 84, 2.721, 118, 2.652, 17, 3.414, 51, 2.856, 85, 2.719, 119, 2.651, 18, 3.370, 52, 2.850, 86, 2.716, 120, 2.649, 19, 3.331, 53, 2.844, 87, 2.714, 121, 2.648, 20, 3.295, 54, 2.838, 88, 2.711, 122, 2.646, 21, 3.263, 55, 2.833, 89, 2.709, 123, 2.645, 22, 3.233, 56, 2.827, 90, 2.706, 124, 2.643, 23, 3.206, 57, 2.822, 91, 2.704, 125, 2.642, 24, 3.181, 58, 2.817, 92, 2.701, 126, 2.640, 25, 3.158, 59, 2.812, 93, 2.699, 127, 2.639, 26, 3.136, 60, 2.807, 94, 2.697, 128, 2.638, 27, 3.116, 61, 2.802, 95, 2.695, 129, 2.636, 28, 3.098, 62, 2.798, 96, 2.692, 130, 2.635, 29, 3.080, 63, 2.793, 97, 2.690, 131, 2.634, 30, 3.064, 64, 2.789, 98, 2.688, 132, 2.632, 31, 3.048, 65, 2.785, 99, 2.686, 133, 2.631, 32, 3.034, 66, 2.781, 100, 2.684, 134, 2.630, 33, 3.020, 67, 2.777, 101, 2.682, 135, 2.628, 34, 3.007, 68, 2.773, 102, 2.680, 136, 2.627, 35, 2.995, 69, 2.769, 103, 2.678, 137, 2.626), ncol = 2, byrow = TRUE ) %>% as.data.frame() %>% rename(n = V1) %>% rename(ka = V2) %>% filter(n <= 75) %>% rowwise() %>% mutate(calc_ka = k_factor_normal(n, p = 0.99, conf = 0.95)) %>% mutate(check = expect_lte(abs(calc_ka - ka), expected = 0.002, label = paste0("Validation failure for ", n, ".", "CMH-17 gives kA=", ka, ",", "library gives kA=", calc_ka))) }) test_that("(normal) basis value equals mean when sd = 0", { expect_equal( (data.frame(x = rep(100, 10)) %>% basis_normal(x, p = 0.9, conf = 0.95, override = c("anderson_darling_normal", "outliers_within_batch", "between_batch_variability")))$basis, 100 ) }) test_that("(normal) basis value approx equals percentile for large samples", { m <- 100 s <- 5 set.seed(100) # make sure that this doesn't fail by pure chance q <- qnorm(0.10, m, s, lower.tail = TRUE) basis <- (data.frame(x = rnorm(50, m, s)) %>% basis_normal(x, p = 0.90, conf = 0.95, override = c("outliers_within_batch", "between_batch_variability")))$basis expect_lt(abs(basis - q), 0.1) }) test_that("printing of basis objects works as expected", { set.seed(100) x <- c(runif(25)) expect_output( print(basis_normal(x = x, p = 0.9, conf = 0.95, override = c("outliers_within_batch", "between_batch_variability"))), "B-Basis" ) expect_output( print(basis_normal(x = x, p = 0.99, conf = 0.95, override = c("outliers_within_batch", "between_batch_variability"))), "A-Basis" ) expect_output( print(basis_normal(x = x, p = 0.9, conf = 0.9, override = c("outliers_within_batch", "between_batch_variability"))), "[^AB-]Basis" ) expect_error( print.basis(x) # give it the wrong type ) }) test_that("normal basis value matches STAT17/ASAP result", { data <- c( 137.4438, 139.5395, 150.89, 141.4474, 141.8203, 151.8821, 143.9245, 132.9732, 136.6419, 138.1723, 148.7668, 143.283, 143.5429, 141.7023, 137.4732, 152.338, 144.1589, 128.5218 ) res <- basis_normal(x = data, p = 0.9, conf = 0.95, override = c("outliers_within_batch", "between_batch_variability")) expect_equal(res$basis, 129.287, tolerance = 0.0005) expect_output(print(res), "b-basis.*129\\.2", ignore.case = TRUE) expect_output(print(res), "normal", ignore.case = TRUE) res <- basis_normal(x = data, p = 0.99, conf = 0.95, override = c("outliers_within_batch", "between_batch_variability")) expect_equal(res$basis, 120.336, tolerance = 0.0005) expect_output(print(res), "a-basis.*120\\.3", ignore.case = TRUE) expect_output(print(res), "normal", ignore.case = TRUE) expect_match(res$distribution, "normal", ignore.case = TRUE) }) test_that("normal basis values produce expected diagnostic failures", { set.seed(100) x <- c(runif(25), runif(25, max = 2), 200) batch <- c(rep("A", 25), rep("B", 26)) expect_warning( expect_warning( expect_warning( expect_warning( res <- basis_normal(x = x, batch = batch), "outliers_within_batch" ), "between_batch_variability" ), "outliers" ), "anderson_darling_normal" ) # Check that res$... contains the correct value expect_equal(res$batch, batch) expect_equal(res$diagnostic_failures, c("outliers_within_batch", "between_batch_variability", "outliers", "anderson_darling_normal")) expect_length(res$override, 0) expect_output(print(res), regexp = paste("failed.+", "outliers_within_batch", "between_batch_variability", "outliers", "anderson_darling_normal", sep = ".+")) output <- capture_output(print(res)) expect_false(grepl("overridden", output, ignore.case = TRUE)) # overriding the diagnostics should eliminate the warnings res <- basis_normal(x = x, batch = batch, override = c("outliers_within_batch", "between_batch_variability", "outliers", "anderson_darling_normal")) expect_equal(res$override, c("outliers_within_batch", "between_batch_variability", "outliers", "anderson_darling_normal")) expect_length(res$diagnostic_failures, 0) expect_output(print(res), regexp = paste("overridden.+", "outliers_within_batch", "between_batch_variability", "outliers", "anderson_darling_normal", sep = ".+")) output <- capture_output(print(res)) expect_false(grepl("failed", output, ignore.case = TRUE)) # overriding the diagnostics using "all" should do the same thing res <- basis_normal(x = x, batch = batch, override = "all") expect_equal(res$override, c("outliers_within_batch", "between_batch_variability", "outliers", "anderson_darling_normal")) expect_length(res$diagnostic_failures, 0) # call basis_normal without batch expect_warning( expect_warning( expect_message( expect_message( res <- basis_normal(x = x), "outliers_within_batch" ), "between_batch_variability" ), "outliers" ), "anderson_darling_normal" ) # Check that res$... contains the correct value expect_equal(res$diagnostic_failures, c("outliers", "anderson_darling_normal")) expect_length(res$override, 0) # overriding the diagnostics should eliminate the warnings res <- basis_normal(x = x, override = c("outliers", "anderson_darling_normal", "outliers_within_batch", "between_batch_variability")) expect_equal(res$override, c("outliers", "anderson_darling_normal", "outliers_within_batch", "between_batch_variability")) expect_length(res$diagnostic_failures, 0) }) test_that("log-normal basis value matches STAT17 result", { data <- c( 137.4438, 139.5395, 150.89, 141.4474, 141.8203, 151.8821, 143.9245, 132.9732, 136.6419, 138.1723, 148.7668, 143.283, 143.5429, 141.7023, 137.4732, 152.338, 144.1589, 128.5218 ) res <- basis_lognormal(x = data, p = 0.9, conf = 0.95, override = "all") expect_equal(res$basis, 129.664, tolerance = 0.0005) expect_output(print(res), "b-basis.*129.6", ignore.case = TRUE) expect_output(print(res), "normal", ignore.case = TRUE) expect_output(print(res), "log", ignore.case = TRUE) res <- basis_lognormal(x = data, p = 0.99, conf = 0.95, override = "all") expect_equal(res$basis, 121.710, tolerance = 0.0005) expect_output(print(res), "a-basis.*121.7", ignore.case = TRUE) expect_output(print(res), "normal", ignore.case = TRUE) expect_output(print(res), "log", ignore.case = TRUE) expect_match(res$distribution, "log", ignore.case = TRUE) expect_match(res$distribution, "normal", ignore.case = TRUE) }) test_that("lognormal basis values produce expected diagnostic failures", { set.seed(100) x <- c(runif(25), runif(25, max = 2), 200) batch <- c(rep("A", 25), rep("B", 26)) expect_warning( expect_warning( expect_warning( expect_warning( res <- basis_lognormal(x = x, batch = batch), "outliers_within_batch" ), "between_batch_variability" ), "outliers" ), "anderson_darling_lognormal" ) # Check that res$... contains the correct value expect_equal(res$batch, batch) expect_equal(res$diagnostic_failures, c("outliers_within_batch", "between_batch_variability", "outliers", "anderson_darling_lognormal")) expect_length(res$override, 0) # overriding the diagnostics should eliminate the warnings res <- basis_lognormal(x = x, batch = batch, override = c("outliers_within_batch", "between_batch_variability", "outliers", "anderson_darling_lognormal")) expect_equal(res$override, c("outliers_within_batch", "between_batch_variability", "outliers", "anderson_darling_lognormal")) expect_length(res$diagnostic_failures, 0) # overriding the diagnostics with "all" should do the same thing res <- basis_lognormal(x = x, batch = batch, override = "all") expect_equal(res$override, c("outliers_within_batch", "between_batch_variability", "outliers", "anderson_darling_lognormal")) expect_length(res$diagnostic_failures, 0) # call basis_normal without batch expect_warning( expect_warning( expect_message( expect_message( res <- basis_lognormal(x = x), "outliers_within_batch" ), "between_batch_variability" ), "outliers" ), "anderson_darling_lognormal" ) # Check that res$... contains the correct value expect_equal(res$diagnostic_failures, c("outliers", "anderson_darling_lognormal")) expect_length(res$override, 0) # overriding the diagnostics should eliminate the warnings res <- basis_lognormal(x = x, override = c("outliers", "anderson_darling_lognormal", "outliers_within_batch", "between_batch_variability")) expect_equal(res$override, c("outliers", "anderson_darling_lognormal", "outliers_within_batch", "between_batch_variability")) expect_length(res$diagnostic_failures, 0) }) test_that("Weibull basis value matches STAT17 result", { data <- c( 137.4438, 139.5395, 150.89, 141.4474, 141.8203, 151.8821, 143.9245, 132.9732, 136.6419, 138.1723, 148.7668, 143.283, 143.5429, 141.7023, 137.4732, 152.338, 144.1589, 128.5218 ) # stat17 B-Basis: 125.441 # stat17 A-Basis: 109.150 res <- basis_weibull(x = data, p = 0.9, conf = 0.95, override = c("outliers_within_batch", "between_batch_variability")) expect_equal(res$basis, 125.441, tolerance = 0.3) expect_output(print(res), "b-basis.*125", ignore.case = TRUE) expect_output(print(res), "weibull", ignore.case = TRUE) res <- basis_weibull(x = data, p = 0.99, conf = 0.95, override = c("outliers_within_batch", "between_batch_variability")) expect_equal(res$basis, 109.150, tolerance = 0.6) expect_output(print(res), "a-basis.*109", ignore.case = TRUE) expect_output(print(res), "weibull", ignore.case = TRUE) expect_match(res$distribution, "weibull", ignore.case = TRUE) }) test_that("weibull basis values produce expected diagnostic failures", { set.seed(100) x <- c(rnorm(10, 100, 2), rnorm(10, 103, 2), 120) batch <- c(rep("A", 10), rep("B", 11)) expect_warning( expect_warning( expect_warning( expect_warning( res <- basis_weibull(x = x, batch = batch), "outliers_within_batch" ), "between_batch_variability" ), "outliers" ), "anderson_darling_weibull" ) # Check that res$... contains the correct value expect_equal(res$batch, batch) expect_equal(res$diagnostic_failures, c("outliers_within_batch", "between_batch_variability", "outliers", "anderson_darling_weibull")) expect_length(res$override, 0) # overriding the diagnostics should eliminate the warnings res <- basis_weibull(x = x, batch = batch, override = c("outliers_within_batch", "between_batch_variability", "outliers", "anderson_darling_weibull")) expect_equal(res$override, c("outliers_within_batch", "between_batch_variability", "outliers", "anderson_darling_weibull")) expect_length(res$diagnostic_failures, 0) # overriding the diagnostics with "all" should do the same thing res <- basis_weibull(x = x, batch = batch, override = "all") expect_equal(res$override, c("outliers_within_batch", "between_batch_variability", "outliers", "anderson_darling_weibull")) expect_length(res$diagnostic_failures, 0) # call basis_normal without batch expect_warning( expect_warning( expect_message( expect_message( res <- basis_weibull(x = x), "outliers_within_batch" ), "between_batch_variability" ), "outliers" ), "anderson_darling_weibull" ) # Check that res$... contains the correct value expect_equal(res$diagnostic_failures, c("outliers", "anderson_darling_weibull")) expect_length(res$override, 0) # overriding the diagnostics should eliminate the warnings res <- basis_weibull(x = x, override = c("outliers", "anderson_darling_weibull", "outliers_within_batch", "between_batch_variability")) expect_equal(res$override, c("outliers", "anderson_darling_weibull", "outliers_within_batch", "between_batch_variability")) expect_length(res$diagnostic_failures, 0) }) test_that("Non-parametric (small sample) basis value matches STAT17 result", { data <- c( 137.4438, 139.5395, 150.89, 141.4474, 141.8203, 151.8821, 143.9245, 132.9732, 136.6419, 138.1723, 148.7668, 143.283, 143.5429, 141.7023, 137.4732, 152.338, 144.1589, 128.5218 ) res <- basis_hk_ext(x = data, p = 0.9, conf = 0.95, method = "optimum-order", override = c("outliers_within_batch", "between_batch_variability")) expect_equal(res$basis, 124.156, tolerance = 0.002) expect_output(print(res), "b-basis.*124", ignore.case = TRUE) expect_output(print(res), "nonparametric", ignore.case = TRUE) expect_match(res$distribution, "nonparametric.*optimum", ignore.case = TRUE) res <- basis_hk_ext(x = data, p = 0.99, conf = 0.95, method = "woodward-frawley", override = c("outliers_within_batch", "between_batch_variability")) expect_equal(res$basis, 99.651, tolerance = 0.002) expect_output(print(res), "a-basis.*99", ignore.case = TRUE) expect_output(print(res), "nonparametric", ignore.case = TRUE) expect_match(res$distribution, "nonparametric.*Woodward-Frawley", ignore.case = TRUE) expect_error(basis_hk_ext(x = data, method = "something invalid", override = c("outliers_within_batch", "between_batch_variability"))) }) test_that("non-para (small) basis values produce expected diag failures", { set.seed(100) x_small <- c(rnorm(10, 100, 2), rnorm(10, 103, 2), 120) batch_small <- c(rep("A", 10), rep("B", 11)) x_large <- c(rnorm(200, 100, 2), rnorm(100, 103, 2), 120) batch_large <- c(rep("A", 200), rep("B", 101)) # woodward-frawley is only for A-Basis. Should fail if we calculate B-Basis expect_warning( expect_warning( expect_warning( expect_warning( expect_warning( res <- basis_hk_ext( x = x_large, batch = batch_large, method = "woodward-frawley"), "outliers_within_batch" ), "between_batch_variability" ), "outliers" ), "correct_method_used" ), "sample_size" ) # Check that res$... contains the correct value expect_equal(res$batch, batch_large) expect_equal(res$diagnostic_failures, c("outliers_within_batch", "between_batch_variability", "outliers", "correct_method_used", "sample_size")) expect_length(res$override, 0) # overriding the diagnostics should eliminate the warnings res <- basis_hk_ext(x = x_large, batch = batch_large, method = "woodward-frawley", override = c("outliers_within_batch", "between_batch_variability", "outliers", "correct_method_used", "sample_size")) expect_equal(res$override, c("outliers_within_batch", "between_batch_variability", "outliers", "correct_method_used", "sample_size")) expect_length(res$diagnostic_failures, 0) # overriding the diagnostics with "all" should do the same thing res <- basis_hk_ext(x = x_large, batch = batch_large, method = "woodward-frawley", override = "all") expect_equal(res$override, c("outliers_within_batch", "between_batch_variability", "outliers", "correct_method_used", "sample_size")) expect_length(res$diagnostic_failures, 0) # optimum-order is only for B-Basis. Should fail if we calculate A-Basis expect_warning( expect_warning( expect_warning( expect_warning( expect_warning( res <- basis_hk_ext( x = x_large, batch = batch_large, method = "optimum-order", p = 0.99, conf = 0.95), "outliers_within_batch" ), "between_batch_variability" ), "outliers" ), "correct_method_used" ), "sample_size" ) # call basis_normal without batch expect_warning( expect_message( expect_message( res <- basis_hk_ext(x = x_small, method = "optimum-order"), "outliers_within_batch" ), "between_batch_variability" ), "outliers" ) # Check that res$... contains the correct value expect_equal(res$diagnostic_failures, c("outliers")) expect_length(res$override, 0) # overriding the diagnostics should eliminate the warnings res <- basis_hk_ext(x = x_small, method = "optimum-order", override = c("outliers", "outliers_within_batch", "between_batch_variability")) expect_equal(res$override, c("outliers", "outliers_within_batch", "between_batch_variability")) expect_length(res$diagnostic_failures, 0) }) test_that("Non-parametric (large sample) basis value matches STAT17 result", { data <- c( 137.3603, 135.6665, 136.6914, 154.7919, 159.2037, 137.3277, 128.821, 138.6304, 138.9004, 147.4598, 148.6622, 144.4948, 131.0851, 149.0203, 131.8232, 146.4471, 123.8124, 126.3105, 140.7609, 134.4875, 128.7508, 117.1854, 129.3088, 141.6789, 138.4073, 136.0295, 128.4164, 141.7733, 134.455, 122.7383, 136.9171, 136.9232, 138.8402, 152.8294, 135.0633, 121.052, 131.035, 138.3248, 131.1379, 147.3771, 130.0681, 132.7467, 137.1444, 141.662, 146.9363, 160.7448, 138.5511, 129.1628, 140.2939, 144.8167, 156.5918, 132.0099, 129.3551, 136.6066, 134.5095, 128.2081, 144.0896, 141.8029, 130.0149, 140.8813, 137.7864 ) res <- basis_nonpara_large_sample(x = data, p = 0.9, conf = 0.95, override = "all") expect_equal(res$basis, 122.738297, tolerance = 0.005) expect_output(print(res), "b-basis.*122", ignore.case = TRUE) expect_output(print(res), "nonparametric", ignore.case = TRUE) expect_match(res$distribution, "nonparametric.*large", ignore.case = TRUE) }) test_that("non-para (large) basis values produce expected diag failures", { set.seed(100) x_small <- c(rnorm(13, 100, 2), rnorm(13, 103, 2), 120) batch_small <- c(rep("A", 13), rep("B", 14)) x_large <- c(rnorm(200, 100, 2), rnorm(100, 103, 2), 120) batch_large <- c(rep("A", 200), rep("B", 101)) expect_warning( expect_warning( expect_warning( res <- basis_nonpara_large_sample( x = x_large, batch = batch_large), "outliers_within_batch" ), "between_batch_variability" ), "outliers" ) # Check that res$... contains the correct value expect_equal(res$batch, batch_large) expect_equal(res$diagnostic_failures, c("outliers_within_batch", "between_batch_variability", "outliers")) expect_length(res$override, 0) # overriding the diagnostics should eliminate the warnings res <- basis_nonpara_large_sample(x = x_large, batch = batch_large, override = c("outliers_within_batch", "between_batch_variability", "outliers")) expect_equal(res$override, c("outliers_within_batch", "between_batch_variability", "outliers")) expect_length(res$diagnostic_failures, 0) # overriding the diagnostics with "all" should do the same thing res <- basis_nonpara_large_sample(x = x_large, batch = batch_large, override = "all") expect_equal(res$override, c("outliers_within_batch", "between_batch_variability", "outliers", "sample_size")) expect_length(res$diagnostic_failures, 0) expect_warning( expect_warning( expect_warning( res <- basis_nonpara_large_sample( x = x_large, batch = batch_large, p = 0.99, conf = 0.95), "outliers_within_batch" ), "between_batch_variability" ), "outliers" ) # call basis_normal without batch expect_warning( expect_message( expect_message( res <- basis_nonpara_large_sample(x = x_large), "outliers_within_batch" ), "between_batch_variability" ), "outliers" ) # Check that res$... contains the correct value expect_equal(res$diagnostic_failures, c("outliers")) expect_length(res$override, 0) # overriding the diagnostics should eliminate the warnings res <- basis_nonpara_large_sample(x = x_large, override = c("outliers", "outliers_within_batch", "between_batch_variability")) expect_equal(res$override, c("outliers", "outliers_within_batch", "between_batch_variability")) expect_length(res$diagnostic_failures, 0) }) # data from CMH-17-1G Section 8.3.11.1.1 cmh_17_8_3_11_1_1 <- tribble( ~batch, ~strength, ~condition, 1, 118.3774604, "CTD", 1, 84.9581364, "RTD", 1, 83.7436035, "ETD", 1, 123.6035612, "CTD", 1, 92.4891822, "RTD", 1, 84.3831677, "ETD", 1, 115.2238092, "CTD", 1, 96.8212659, "RTD", 1, 94.8030433, "ETD", 1, 112.6379744, "CTD", 1, 109.030325, "RTD", 1, 94.3931537, "ETD", 1, 116.5564277, "CTD", 1, 97.8212659, "RTD", 1, 101.702222, "ETD", 1, 123.1649896, "CTD", 1, 100.921519, "RTD", 1, 86.5372121, "ETD", 2, 128.5589027, "CTD", 1, 103.699444, "RTD", 1, 92.3772684, "ETD", 2, 113.1462103, "CTD", 2, 93.7908212, "RTD", 2, 89.2084024, "ETD", 2, 121.4248107, "CTD", 2, 107.526709, "RTD", 2, 100.686001, "ETD", 2, 134.3241906, "CTD", 2, 94.5769704, "RTD", 2, 81.0444192, "ETD", 2, 129.6405117, "CTD", 2, 93.8831373, "RTD", 2, 91.3398070, "ETD", 2, 117.9818658, "CTD", 2, 98.2296605, "RTD", 2, 93.1441939, "ETD", 3, 115.4505226, "CTD", 2, 111.346590, "RTD", 2, 85.8204168, "ETD", 3, 120.0369467, "CTD", 2, 100.817538, "RTD", 3, 94.8966273, "ETD", 3, 117.1631088, "CTD", 3, 100.382203, "RTD", 3, 95.8068520, "ETD", 3, 112.9302797, "CTD", 3, 91.5037811, "RTD", 3, 86.7842252, "ETD", 3, 117.9114501, "CTD", 3, 100.083233, "RTD", 3, 94.4011973, "ETD", 3, 120.1900159, "CTD", 3, 95.6393615, "RTD", 3, 96.7231171, "ETD", 3, 110.7295966, "CTD", 3, 109.304779, "RTD", 3, 89.9010384, "ETD", 3, 100.078562, "RTD", 3, 99.1205847, "RTD", 3, 89.3672306, "ETD", 1, 106.357525, "ETW", 1, 99.0239966, "ETW2", 1, 105.898733, "ETW", 1, 103.341238, "ETW2", 1, 88.4640082, "ETW", 1, 100.302130, "ETW2", 1, 103.901744, "ETW", 1, 98.4634133, "ETW2", 1, 80.2058219, "ETW", 1, 92.2647280, "ETW2", 1, 109.199597, "ETW", 1, 103.487693, "ETW2", 1, 61.0139431, "ETW", 1, 113.734763, "ETW2", 2, 99.3207107, "ETW", 2, 108.172659, "ETW2", 2, 115.861770, "ETW", 2, 108.426732, "ETW2", 2, 82.6133082, "ETW", 2, 116.260375, "ETW2", 2, 85.3690411, "ETW", 2, 121.049610, "ETW2", 2, 115.801622, "ETW", 2, 111.223082, "ETW2", 2, 44.3217741, "ETW", 2, 104.574843, "ETW2", 2, 117.328077, "ETW", 2, 103.222552, "ETW2", 2, 88.6782903, "ETW", 3, 99.3918538, "ETW2", 3, 107.676986, "ETW", 3, 87.3421658, "ETW2", 3, 108.960241, "ETW", 3, 102.730741, "ETW2", 3, 116.122640, "ETW", 3, 96.3694916, "ETW2", 3, 80.2334815, "ETW", 3, 99.5946088, "ETW2", 3, 106.145570, "ETW", 3, 97.0712407, "ETW2", 3, 104.667866, "ETW", 3, 104.234953, "ETW" ) test_that("expected diagnostic failures are noted for pooling methods", { # This test follows CMH-17-1G Section # This section in CMH-17-1G shows the removal of one condition # before running Levene's test on the pooled data, so this test # will be skipped in this test. expect_warning( expect_warning( expect_warning( expect_warning( expect_warning( res <- basis_pooled_sd(cmh_17_8_3_11_1_1, strength, condition, batch), "outliers_within_batch" ), "between_group_variability" ), "outliers_within_group" ), "pooled_data_normal" ), "pooled_variance_equal" ) expect_warning( expect_warning( expect_warning( expect_warning( res <- cmh_17_8_3_11_1_1 %>% filter(condition != "ETW2") %>% basis_pooled_sd(strength, condition, batch), "outliers_within_batch" ), "outliers_within_group" ), "pooled_data_normal" ), "pooled_variance_equal" ) # removing both ETW and ETW2 should remove all diagnostic failures res <- cmh_17_8_3_11_1_1 %>% filter(condition != "ETW2" & condition != "ETW") %>% basis_pooled_sd(strength, condition, batch) expect_equal(res$basis$value[res$basis$group == "CTD"], 108.70, tolerance = 0.02) expect_equal(res$basis$value[res$basis$group == "RTD"], 88.52, tolerance = 0.02) expect_equal(res$basis$value[res$basis$group == "ETD"], 80.68, tolerance = 0.02) expect_warning( expect_warning( expect_warning( expect_warning( expect_warning( res <- basis_pooled_cv(cmh_17_8_3_11_1_1, strength, condition, batch), "outliers_within_batch" ), "between_group_variability" ), "outliers_within_group" ), "pooled_data_normal" ), "normalized_variance_equal" ) expect_warning( expect_warning( expect_warning( expect_warning( res <- cmh_17_8_3_11_1_1 %>% filter(condition != "ETW2") %>% basis_pooled_cv(strength, condition, batch), "outliers_within_batch" ), "outliers_within_group" ), "pooled_data_normal" ), "normalized_variance_equal" ) # removing both ETW and ETW2 should remove all diagnostic failures res <- cmh_17_8_3_11_1_1 %>% filter(condition != "ETW2" & condition != "ETW") %>% basis_pooled_cv(strength, condition, batch) }) # data from CMH-17-1G Section 8.3.11.1.2 poolable_data <- tribble( ~batch, ~strength, ~condition, 1, 79.04517, "CTD", 1, 102.6014, "CTD", 1, 97.79372, "CTD", 1, 92.86423, "CTD", 1, 117.218, "CTD", 1, 108.7168, "CTD", 1, 112.2773, "CTD", 1, 114.0129, "CTD", 2, 106.8452, "CTD", 2, 112.3911, "CTD", 2, 115.5658, "CTD", 2, 87.40657, "CTD", 2, 102.2785, "CTD", 2, 110.6073, "CTD", 3, 105.2762, "CTD", 3, 110.8924, "CTD", 3, 108.7638, "CTD", 3, 110.9833, "CTD", 3, 101.3417, "CTD", 3, 100.0251, "CTD", 1, 103.2006, "RTD", 1, 105.1034, "RTD", 1, 105.1893, "RTD", 1, 100.4189, "RTD", 2, 85.32319, "RTD", 2, 92.69923, "RTD", 2, 98.45242, "RTD", 2, 104.1014, "RTD", 2, 91.51841, "RTD", 2, 101.3746, "RTD", 2, 101.5828, "RTD", 2, 99.57384, "RTD", 2, 88.84826, "RTD", 3, 92.18703, "RTD", 3, 101.8234, "RTD", 3, 97.68909, "RTD", 3, 101.5172, "RTD", 3, 100.0481, "RTD", 3, 102.0544, "RTD", 1, 63.22764, "ETW", 1, 70.84454, "ETW", 1, 66.43223, "ETW", 1, 75.37771, "ETW", 1, 72.43773, "ETW", 1, 68.43073, "ETW", 1, 69.72524, "ETW", 2, 66.20343, "ETW", 2, 60.51251, "ETW", 2, 65.69334, "ETW", 2, 62.73595, "ETW", 2, 59.00798, "ETW", 2, 62.37761, "ETW", 3, 64.3947, "ETW", 3, 72.8491, "ETW", 3, 66.56226, "ETW", 3, 66.56779, "ETW", 3, 66.00123, "ETW", 3, 59.62108, "ETW", 3, 60.61167, "ETW", 3, 57.65487, "ETW", 3, 66.51241, "ETW", 3, 64.89347, "ETW", 3, 57.73054, "ETW", 3, 68.94086, "ETW", 3, 61.63177, "ETW", 1, 54.09806, "ETW2", 1, 58.87615, "ETW2", 1, 61.60167, "ETW2", 1, 60.23973, "ETW2", 1, 61.4808, "ETW2", 1, 64.55832, "ETW2", 2, 57.76131, "ETW2", 2, 49.91463, "ETW2", 2, 61.49271, "ETW2", 2, 57.7281, "ETW2", 2, 62.11653, "ETW2", 2, 62.69353, "ETW2", 3, 61.38523, "ETW2", 3, 60.39053, "ETW2", 3, 59.17616, "ETW2", 3, 60.17616, "ETW2", 3, 46.47396, "ETW2", 3, 51.16616, "ETW2" ) test_that("Pooled SD results match ASAP results", { # This data fails the anderson-darling test for normality for the # transformed data expect_warning( expect_message( expect_message( res_b <- basis_pooled_sd(poolable_data, strength, condition, override = c("pooled_variance_equal")), "outliers_within_batch" ), "between_group_variability" ), "pooled_data_normal" ) expect_equal(res_b$basis$value[res_b$basis$group == "CTD"], 93.64, tolerance = 0.01) expect_equal(res_b$basis$value[res_b$basis$group == "RTD"], 87.30, tolerance = 0.01) expect_equal(res_b$basis$value[res_b$basis$group == "ETW"], 54.33, tolerance = 0.01) expect_equal(res_b$basis$value[res_b$basis$group == "ETW2"], 47.12, tolerance = 0.01) expect_equal(res_b$n, 83) expect_equal(res_b$r, 4) expect_output(print(res_b), "b-basis", ignore.case = TRUE) expect_output(print(res_b), "pooled standard deviation", ignore.case = TRUE) expect_output(print(res_b), "CTD.*93\\.6", ignore.case = TRUE) expect_output(print(res_b), "RTD.*87\\.29", ignore.case = TRUE) expect_output(print(res_b), "ETW.*54\\.3", ignore.case = TRUE) expect_output(print(res_b), "ETW2.*47\\.07", ignore.case = TRUE) res_a <- basis_pooled_sd(poolable_data, strength, condition, p = 0.99, conf = 0.95, override = c("pooled_data_normal", "pooled_variance_equal", "outliers_within_batch", "between_group_variability")) expect_equal(res_a$basis$value[res_a$basis$group == "CTD"], 86.19, tolerance = 0.01) expect_equal(res_a$basis$value[res_a$basis$group == "RTD"], 79.86, tolerance = 0.01) expect_equal(res_a$basis$value[res_a$basis$group == "ETW"], 46.84, tolerance = 0.01) expect_equal(res_a$basis$value[res_a$basis$group == "ETW2"], 39.69, tolerance = 0.01) }) test_that("Pooled CV results match CMH17STATS", { # This data fails the anderson-darling test for normality for the # transformed data expect_warning( expect_message( expect_message( res_b <- basis_pooled_cv(poolable_data, strength, condition), "outliers_within_batch" ), "between_group_variability" ), "pooled_data_normal" ) expect_equal(res_b$basis$value[res_b$basis$group == "CTD"], 90.89, tolerance = 0.01) expect_equal(res_b$basis$value[res_b$basis$group == "RTD"], 85.37, tolerance = 0.01) expect_equal(res_b$basis$value[res_b$basis$group == "ETW"], 56.79, tolerance = 0.01) expect_equal(res_b$basis$value[res_b$basis$group == "ETW2"], 50.55, tolerance = 0.01) expect_equal(res_b$n, 83) expect_equal(res_b$r, 4) expect_output(print(res_b), "b-basis", ignore.case = TRUE) expect_output(print(res_b), "pooled CV", ignore.case = TRUE) expect_output(print(res_b), "CTD.*90\\.8", ignore.case = TRUE) expect_output(print(res_b), "RTD.*85\\.3", ignore.case = TRUE) expect_output(print(res_b), "ETW.*56\\.7", ignore.case = TRUE) expect_output(print(res_b), "ETW2.*50\\.5", ignore.case = TRUE) res_a <- basis_pooled_cv(poolable_data, strength, condition, p = 0.99, conf = 0.95, override = c("pooled_data_normal", "outliers_within_batch", "between_group_variability")) expect_equal(res_a$basis$value[res_a$basis$group == "CTD"], 81.62, tolerance = 0.01) expect_equal(res_a$basis$value[res_a$basis$group == "RTD"], 76.67, tolerance = 0.01) expect_equal(res_a$basis$value[res_a$basis$group == "ETW"], 50.98, tolerance = 0.01) expect_equal(res_a$basis$value[res_a$basis$group == "ETW2"], 45.40, tolerance = 0.01) expect_output(print(res_a), "a-basis", ignore.case = TRUE) }) test_that("Pooled data matches CMH17-STATS with mod CV, SD pooling", { # pooled SD modified CV results # pooled data fails Levene's test after mod CV transform # based on `poolable_data` dataset with ETW2 removed data <- filter(poolable_data, condition != "ETW2") res_b <- basis_pooled_sd(data, strength, condition, modcv = TRUE, override = c("pooled_variance_equal", "outliers_within_batch", "between_group_variability")) expect_equal(res_b$basis$value[res_b$basis$group == "CTD"], 92.25, tolerance = 0.01) expect_equal(res_b$basis$value[res_b$basis$group == "RTD"], 85.91, tolerance = 0.01) expect_equal(res_b$basis$value[res_b$basis$group == "ETW"], 52.97, tolerance = 0.01) expect_output(print(res_b), "Modified CV") res_a <- basis_pooled_sd(data, strength, condition, p = 0.99, conf = 0.95, modcv = TRUE, override = c("pooled_variance_equal", "outliers_within_batch", "between_group_variability")) expect_equal(res_a$basis$value[res_a$basis$group == "CTD"], 83.81, tolerance = 0.01) expect_equal(res_a$basis$value[res_a$basis$group == "RTD"], 77.48, tolerance = 0.01) expect_equal(res_a$basis$value[res_a$basis$group == "ETW"], 44.47, tolerance = 0.01) }) test_that("Pooled data matches CMH17-STATS with mod CV, CV pooling", { # pooled CV modified CV results # pooled data passes Levene's test after mod CV transform # based on `poolable_data` dataset with ETW2 removed data <- filter(poolable_data, condition != "ETW2") res_b <- basis_pooled_cv(data, strength, condition, modcv = TRUE, override = c("outliers_within_batch", "between_group_variability")) expect_equal(res_b$basis$value[res_b$basis$group == "CTD"], 90.31, tolerance = 0.01) expect_equal(res_b$basis$value[res_b$basis$group == "RTD"], 84.83, tolerance = 0.01) expect_equal(res_b$basis$value[res_b$basis$group == "ETW"], 56.43, tolerance = 0.01) expect_output(print(res_b), "Modified CV") res_a <- basis_pooled_cv(data, strength, condition, p = 0.99, conf = 0.95, modcv = TRUE, override = c("outliers_within_batch", "between_group_variability")) expect_equal(res_a$basis$value[res_a$basis$group == "CTD"], 80.57, tolerance = 0.01) expect_equal(res_a$basis$value[res_a$basis$group == "RTD"], 75.69, tolerance = 0.01) expect_equal(res_a$basis$value[res_a$basis$group == "ETW"], 50.33, tolerance = 0.01) }) vangel1994 <- tribble( ~n, ~z, ~p, ~conf, 3, 13.976451, 0.90, 0.90, 5, 4.1011886, 0.90, 0.90, 7, 2.5440993, 0.90, 0.90, 9, 1.9368858, 0.90, 0.90, 11, 1.6127559, 0.90, 0.90, 13, 1.4096961, 0.90, 0.90, 15, 1.2695586, 0.90, 0.90, 17, 1.1663923, 0.90, 0.90, 19, 1.0868640, 0.90, 0.90, 21, 1.0234110, 0.90, 0.90, 3, 28.820048, 0.90, 0.95, 5, 6.1981307, 0.90, 0.95, 7, 3.4780112, 0.90, 0.95, 9, 2.5168762, 0.90, 0.95, 11, 2.0312134, 0.90, 0.95, 13, 1.7377374, 0.90, 0.95, 15, 1.5403989, 0.90, 0.95, 17, 1.3979806, 0.90, 0.95, 19, 1.2899172, 0.90, 0.95, 21, 1.2048089, 0.90, 0.95, 23, 1.1358259, 0.90, 0.95, 25, 1.0786237, 0.90, 0.95, 27, 1.0303046, 0.90, 0.95, 3, 147.51275, 0.90, 0.99, 5, 14.993461, 0.90, 0.99, 7, 6.6442464, 0.90, 0.99, 9, 4.2798170, 0.90, 0.99, 11, 3.2197376, 0.90, 0.99, 13, 2.6267547, 0.90, 0.99, 15, 2.2493289, 0.90, 0.99, 17, 1.9880239, 0.90, 0.99, 19, 1.7961467, 0.90, 0.99, 21, 1.6490109, 0.90, 0.99, 23, 1.5323809, 0.90, 0.99, 25, 1.4374854, 0.90, 0.99, 27, 1.3586292, 0.90, 0.99, 29, 1.2919549, 0.90, 0.99, 31, 1.2347570, 0.90, 0.99, 33, 1.1850813, 0.90, 0.99, 35, 1.1414809, 0.90, 0.99, 37, 1.1028613, 0.90, 0.99, 39, 1.0683787, 0.90, 0.99, 41, 1.0373720, 0.90, 0.99, 43, 1.0093159, 0.90, 0.99, 3, 20.478521, 0.95, 0.90, 5, 5.8872014, 0.95, 0.90, 7, 3.6322326, 0.95, 0.90, 9, 2.7593956, 0.95, 0.90, 11, 2.2953853, 0.95, 0.90, 13, 2.0054547, 0.95, 0.90, 15, 1.8057261, 0.95, 0.90, 17, 1.6588820, 0.95, 0.90, 19, 1.5457939, 0.95, 0.90, 21, 1.4556317, 0.95, 0.90, 23, 1.3817937, 0.95, 0.90, 25, 1.3200198, 0.95, 0.90, 27, 1.2674334, 0.95, 0.90, 29, 1.2220187, 0.95, 0.90, 31, 1.1823195, 0.95, 0.90, 33, 1.1472560, 0.95, 0.90, 35, 1.1160097, 0.95, 0.90, 37, 1.0879479, 0.95, 0.90, 39, 1.0625739, 0.95, 0.90, 41, 1.0394913, 0.95, 0.90, 43, 1.0183802, 0.95, 0.90, 3, 42.149579, 0.95, 0.95, 5, 8.8719351, 0.95, 0.95, 7, 4.9501721, 0.95, 0.95, 9, 3.5743714, 0.95, 0.95, 11, 2.8819079, 0.95, 0.95, 13, 2.4645176, 0.95, 0.95, 15, 2.1843450, 0.95, 0.95, 17, 1.9824011, 0.95, 0.95, 19, 1.8293163, 0.95, 0.95, 21, 1.7088376, 0.95, 0.95, 23, 1.6112408, 0.95, 0.95, 25, 1.5303474, 0.95, 0.95, 27, 1.4620403, 0.95, 0.95, 29, 1.4034674, 0.95, 0.95, 31, 1.3525889, 0.95, 0.95, 33, 1.3079057, 0.95, 0.95, 35, 1.2682903, 0.95, 0.95, 37, 1.2328780, 0.95, 0.95, 39, 1.2009936, 0.95, 0.95, 41, 1.1721022, 0.95, 0.95, 43, 1.1457739, 0.95, 0.95, 45, 1.1216591, 0.95, 0.95, 47, 1.0994706, 0.95, 0.95, 49, 1.0789699, 0.95, 0.95, 51, 1.0599573, 0.95, 0.95, 53, 1.0422642, 0.95, 0.95, 55, 1.0257472, 0.95, 0.95, 57, 1.0102836, 0.95, 0.95 ) test_that("Extended Hanson-Koopman matches median results from Vangel 1994", { # Vangel (1994) provides extensive tables of z for the case where i=1 and # j is the median observation. This test checks the results of this # package's function against those tables. Only the odd values of n # are checked so that the median is a single observation. vangel1994 %>% rowwise() %>% mutate( z_calc = hk_ext_z(n, 1, ceiling(n / 2), p, conf) ) %>% mutate(expect_equal(z, z_calc, tolerance = 0.00005, label = paste0("Mismatch in `z` for n=", n, ", p=", p, " conf=", conf, ".\n", "z_vangel=", z, ", z_calc=", z_calc, "\n"))) }) cmh_17_1g_8_5_14 <- tribble( ~n, ~r, ~k, 2, 2, 35.177, 3, 3, 7.859, 4, 4, 4.505, 5, 4, 4.101, 6, 5, 3.064, 7, 5, 2.858, 8, 6, 2.382, 9, 6, 2.253, 10, 6, 2.137, 11, 7, 1.897, 12, 7, 1.814, 13, 7, 1.738, 14, 8, 1.599, 15, 8, 1.540, 16, 8, 1.485, 17, 8, 1.434, 18, 9, 1.354, 19, 9, 1.311, 20, 10, 1.253, 21, 10, 1.218, 22, 10, 1.184, 23, 11, 1.143, 24, 11, 1.114, 25, 11, 1.087, 26, 11, 1.060, 27, 11, 1.035, 28, 12, 1.010 ) test_that("Extended HK matches CMH-17-1G Table 8.5.14", { # CMH-17-1G uses the optimal order statistic approach suggested by # Vangel (1994) for computing B-Basis values. There are a few values # of n where this package's implementation finds a different optimum order # statistic than CMH-17-1G uses. In these cases, the order statistic that # this package and CMH-17-1G are both very nearly optimal. These differences # are ignored in this test. cmh_17_1g_8_5_14 %>% rowwise() %>% mutate(z = hk_ext_z_j_opt(n, 0.90, 0.95)$z) %>% mutate(j = hk_ext_z_j_opt(n, 0.90, 0.95)$j) %>% filter( n != 17 & n != 20 & n != 23 & n != 24 & n != 28 ) %>% mutate(expect_equal(j, r, label = paste0("Mismatch in `j`/`r` for n=", n, ", ", "r_B_cmh=", r, ", j=", j, "\n"))) %>% mutate(expect_equal(z, k, tolerance = 0.005, label = paste0("Mismatch in `k`/`z` for n=", n, ", ", "k_B_cmh=", k, ", z_calc=", z, "\n"))) }) test_that("Hanson-Koopman results match STAT17 for several values of n", { data <- c( 139.6734, 143.0032, 130.4757, 144.8327, 138.7818, 136.7693, 148.636, 131.0095, 131.4933, 142.8856, 158.0198, 145.2271, 137.5991, 139.8298, 140.8557, 137.6148, 131.3614, 152.7795, 145.8792, 152.9207, 160.0989, 145.192, 128.6383, 141.5992, 122.5297, 159.8209, 151.672, 159.0156 ) res <- basis_hk_ext(x = head(data, 28), p = 0.9, conf = 0.95, method = "optimum-order", override = "all") expect_equal(res$basis, 122.36798, tolerance = 0.001) res <- basis_hk_ext(x = head(data, 27), p = 0.9, conf = 0.95, method = "optimum-order", override = "all") expect_equal(res$basis, 121.96939, tolerance = 0.001) res <- basis_hk_ext(x = head(data, 26), p = 0.9, conf = 0.95, method = "optimum-order", override = "all") expect_equal(res$basis, 121.57073, tolerance = 0.001) res <- basis_hk_ext(x = head(data, 23), p = 0.9, conf = 0.95, method = "optimum-order", override = "all") expect_equal(res$basis, 127.11286, tolerance = 0.001) res <- basis_hk_ext(x = head(data, 22), p = 0.9, conf = 0.95, method = "optimum-order", override = "all") expect_equal(res$basis, 128.82397, tolerance = 0.001) res <- basis_hk_ext(x = head(data, 21), p = 0.9, conf = 0.95, method = "optimum-order", override = "all") expect_equal(res$basis, 128.52107, tolerance = 0.001) res <- basis_hk_ext(x = head(data, 20), p = 0.9, conf = 0.95, method = "optimum-order", override = "all") expect_equal(res$basis, 128.20999, tolerance = 0.001) res <- basis_hk_ext(x = head(data, 19), p = 0.9, conf = 0.95, method = "optimum-order", override = "all") expect_equal(res$basis, 127.74060, tolerance = 0.002) res <- basis_hk_ext(x = head(data, 18), p = 0.9, conf = 0.95, method = "optimum-order", override = "all") expect_equal(res$basis, 127.36697, tolerance = 0.001) res <- basis_hk_ext(x = head(data, 17), p = 0.9, conf = 0.95, method = "optimum-order", override = "all") expect_equal(res$basis, 127.02732, tolerance = 0.001) res <- basis_hk_ext(x = head(data, 16), p = 0.9, conf = 0.95, method = "optimum-order", override = "all") expect_equal(res$basis, 126.23545, tolerance = 0.002) res <- basis_hk_ext(x = head(data, 15), p = 0.9, conf = 0.95, method = "optimum-order", override = "all") expect_equal(res$basis, 125.68740, tolerance = 0.001) res <- basis_hk_ext(x = head(data, 14), p = 0.9, conf = 0.95, method = "optimum-order", override = "all") expect_equal(res$basis, 125.17500, tolerance = 0.001) res <- basis_hk_ext(x = head(data, 13), p = 0.9, conf = 0.95, method = "optimum-order", override = "all") expect_equal(res$basis, 124.07851, tolerance = 0.002) res <- basis_hk_ext(x = head(data, 12), p = 0.9, conf = 0.95, method = "optimum-order", override = "all") expect_equal(res$basis, 121.17418, tolerance = 0.001) res <- basis_hk_ext(x = head(data, 11), p = 0.9, conf = 0.95, method = "optimum-order", override = "all") expect_equal(res$basis, 120.26382, tolerance = 0.001) res <- basis_hk_ext(x = head(data, 10), p = 0.9, conf = 0.95, method = "optimum-order", override = "all") expect_equal(res$basis, 120.75149, tolerance = 0.002) res <- basis_hk_ext(x = head(data, 9), p = 0.9, conf = 0.95, method = "optimum-order", override = "all") expect_equal(res$basis, 119.80108, tolerance = 0.001) }) cmh_17_1g_8_5_15 <- tribble( ~n, ~k, 2, 80.0038, 3, 16.9122, 4, 9.49579, 5, 6.89049, 6, 5.57681, 7, 4.78352, 8, 4.25011, 9, 3.86502, 10, 3.57267, 11, 3.34227, 12, 3.1554, 13, 3.00033, 14, 2.86924, 15, 2.75672, 16, 2.65889, 17, 2.5729, 18, 2.4966, 19, 2.42833, 20, 2.36683, 21, 2.31106, 22, 2.2602, 23, 2.21359, 24, 2.17067, 25, 2.131, 26, 2.09419, 27, 2.05991, 28, 2.0279, 29, 1.99791, 30, 1.96975, 31, 1.94324, 32, 1.91822, 33, 1.89457, 34, 1.87215, 35, 1.85088, 36, 1.83065, 37, 1.81139, 38, 1.79301, 39, 1.77546, 40, 1.75868, 41, 1.7426, 42, 1.72718, 43, 1.71239, 44, 1.69817, 45, 1.68449, 46, 1.67132, 47, 1.65862, 48, 1.64638, 49, 1.63456, 50, 1.62313, 52, 1.60139, 54, 1.58101, 56, 1.56184, 58, 1.54377, 60, 1.5267, 62, 1.51053, 64, 1.4952, 66, 1.48063, 68, 1.46675, 70, 1.45352, 72, 1.44089, 74, 1.42881, 76, 1.41724, 78, 1.40614, 80, 1.39549, 82, 1.38525, 84, 1.37541, 86, 1.36592, 88, 1.35678, 90, 1.34796, 92, 1.33944, 94, 1.3312, 96, 1.32324, 98, 1.31553, 100, 1.30806, 105, 1.29036, 110, 1.27392, 115, 1.25859, 120, 1.24425, 125, 1.2308, 130, 1.21814, 135, 1.2062, 140, 1.19491, 145, 1.18421, 150, 1.17406, 155, 1.1644, 160, 1.15519, 165, 1.1464, 170, 1.13801, 175, 1.12997, 180, 1.12226, 185, 1.11486, 190, 1.10776, 195, 1.10092, 200, 1.09434, 205, 1.08799, 210, 1.08187, 215, 1.07595, 220, 1.07024, 225, 1.06471, 230, 1.05935, 235, 1.05417, 240, 1.04914, 245, 1.04426, 250, 1.03952, 275, 1.01773 ) test_that("Extended Hanson-Koopman matches CMH-17-1G Table 8.5.15", { # for A-Basis, CMH-17-1G uses the order statistics for 1 and n # to compute the tolerance limits. This test verifies that the code # in this package computes the same values of z (k, as CMH-17 calls it) cmh_17_1g_8_5_15 %>% rowwise() %>% mutate(z = hk_ext_z(n, 1, n, 0.99, 0.95)) %>% mutate(expect_equal(z, k, tolerance = 0.00005, label = paste0("Mismatch in `k`/`z` for n=", n, ", ", "k_A_cmh=", k, ", z_calc=", z, "\n"))) }) cmh_17_1g_8_5_13 <- tribble( ~n, ~ra, 299, 1, 473, 2, 628, 3, 773, 4, 913, 5, 1049, 6, 1182, 7, 1312, 8, 1441, 9, 1568, 10, 1693, 11, 1818, 12, 1941, 13, 2064, 14, 2185, 15, 2306, 16, 2426, 17, 2546, 18, 2665, 19, 2784, 20, 2902, 21, 3020, 22, 3137, 23, 3254, 24, 3371, 25, 3487, 26, 3603, 27, 3719, 28, 3834, 29, 3949, 30, 4064, 31, 4179, 32, 4293, 33, 4407, 34, 4521, 35, 4635, 36, 4749, 37, 4862, 38, 4975, 39, 5088, 40, 5201, 41, 5314, 42, 5427, 43, 5539, 44, 5651, 45, 5764, 46, 5876, 47, 5988, 48, 6099, 49, 6211, 50, 6323, 51, 6434, 52, 6545, 53, 6657, 54, 6769, 55, 6879, 56, 6990, 57, 7100, 58, 7211, 59, 7322, 60, 7432, 61, 7543, 62, 7653, 63, 7763, 64, 7874, 65, 7984, 66, 8094, 67, 8204, 68, 8314, 69, 8423, 70, 8533, 71, 8643, 72, 8753, 73, 8862, 74, 8972, 75, 9081, 76, 9190, 77, 9300, 78, 9409, 79, 9518, 80, 9627, 81, 9736, 82, 9854, 83, 9954, 84, 10063, 85, 10172, 86, 10281, 87, 10390, 88, 10498, 89, 10607, 90, 10716, 91, 10824, 92, 10933, 93, 11041, 94, 11150, 95, 11258, 96, 11366, 97, 11475, 98, 11583, 99, 11691, 100 ) test_that("Non-parametric ranks for A-Basis match CMH-17-1G Table 8.5.13", { skip_on_cran() # this test is a long-running test cmh_17_1g_8_5_13 %>% mutate(ra_lag = lag(ra)) %>% rowwise() %>% mutate(r_calc = nonpara_binomial_rank(n, 0.99, 0.95)) %>% mutate(expect_equal(ra, r_calc, label = paste0( "Mismatch in r for n=", n, ". rA=", ra, ", r_calc=", r_calc ))) %>% filter(n > 299 & n < 6500) %>% # the rank for one sample larger should be the same mutate(r_calc_plus = nonpara_binomial_rank(n + 1, 0.99, 0.95)) %>% mutate(expect_equal(ra, r_calc_plus, label = paste0( "Mismatch in r for n=", n + 1, ". rA=", ra, ", ", "r_calc=", r_calc_plus ))) %>% # the rank for one sample smaller should be the previous one mutate(r_calc_minus = nonpara_binomial_rank(n - 1, 0.99, 0.95)) %>% mutate(expect_equal(ra_lag, r_calc_minus, label = paste0( "Mismatch in r for n=", n - 1, ". rA=", ra_lag, ", ", "r_calc=", r_calc_minus ))) }) test_that("nonpara_binomial_rank raises and error when sample too small", { expect_error(nonpara_binomial_rank(298, 0.99, 0.95), "p.*0\\.99.*conf.*0\\.95") }) test_that("nonpara_binomial_rank raises an error when it can't converge", { expect_error(nonpara_binomial_rank(4000, 0.00001, 0.01), "p.*1e-05.*conf.*0\\.01") }) cmh_17_1g_8_5_12 <- tribble( ~n, ~rb, 29, 1, 46, 2, 61, 3, 76, 4, 89, 5, 103, 6, 116, 7, 129, 8, 142, 9, 154, 10, 167, 11, 179, 12, 191, 13, 203, 14, 215, 15, 227, 16, 239, 17, 251, 18, 263, 19, 275, 20, 298, 22, 321, 24, 345, 26, 368, 28, 391, 30, 413, 32, 436, 34, 459, 36, 481, 38, 504, 40, 526, 42, 549, 44, 571, 46, 593, 48, 615, 50, 638, 52, 660, 54, 682, 56, 704, 58, 726, 60, 781, 65, 836, 70, 890, 75, 945, 80, 999, 85, 1053, 90, 1107, 95, 1161, 100, 1269, 110, 1376, 120, 1483, 130, 1590, 140, 1696, 150, 1803, 160, 1909, 170, 2015, 180, 2120, 190, 2226, 200, 2331, 210, 2437, 220, 2542, 230, 2647, 240, 2752, 250, 2857, 260, 2962, 270, 3066, 280, 3171, 290, 3276, 300, 3380, 310, 3484, 320, 3589, 330, 3693, 340, 3797, 350, 3901, 360, 4005, 370, 4109, 380, 4213, 390, 4317, 400, 4421, 410, 4525, 420, 4629, 430, 4733, 440, 4836, 450, 4940, 460, 5044, 470, 5147, 480, 5251, 490, 5354, 500, 5613, 525, 5871, 550, 6130, 575, 6388, 600, 6645, 625, 6903, 650, 7161, 675, 7418, 700, 7727, 730, 8036, 760, 8344, 790, 8652, 820, 8960, 850, 9268, 880, 9576, 910, 9884, 940, 10191, 970, 10499, 1000 ) test_that("Non-parametric ranks for BA-Basis match CMH-17-1G Table 8.5.12", { skip_on_cran() # this test is a long-running test cmh_17_1g_8_5_12 %>% mutate(rb_lag = lag(rb)) %>% rowwise() %>% mutate(r_calc = nonpara_binomial_rank(n, 0.9, 0.95)) %>% mutate(expect_equal(rb, r_calc, label = paste0( "Mismatch in r for n=", n, ". rB=", rb, ", r_calc=", r_calc ))) %>% # the rank for one sample larger should be the same mutate(r_calc_plus = nonpara_binomial_rank(n + 1, 0.9, 0.95)) %>% mutate(expect_equal(rb, r_calc_plus, label = paste0( "Mismatch in r for n=", n + 1, ". rB=", rb, ", ", "r_calc=", r_calc_plus ))) %>% filter(n > 29 & n <= 275) %>% # the rank for one sample smaller should be the previous one # above n=275, Table 8.5.12 does not have consecutive ranks, so we can't # use the lag trick below to check sample sizes of n-1 mutate(r_calc_minus = nonpara_binomial_rank(n - 1, 0.9, 0.95)) %>% mutate(expect_equal(rb_lag, r_calc_minus, label = paste0( "Mismatch in r for n=", n - 1, ". rB=", rb_lag, ", ", "r_calc=", r_calc_minus ))) }) cmh_17_1g_8_3_11_1_1_etw2 <- tribble( ~batch, ~strength, 1, 99.0239966, 1, 103.341238, 1, 100.30213, 1, 98.4634133, 1, 92.264728, 1, 103.487693, 1, 113.734763, 2, 108.172659, 2, 108.426732, 2, 116.260375, 2, 121.04961, 2, 111.223082, 2, 104.574843, 2, 103.222552, 3, 99.3918538, 3, 87.3421658, 3, 102.730741, 3, 96.3694916, 3, 99.5946088, 3, 97.0712407 ) test_that("ANOVA results match STAT17 for sample data", { # Sample data from CMH-17-1G Section 8.3.11.2.2 res <- cmh_17_1g_8_3_11_1_1_etw2 %>% basis_anova(strength, batch, override = c("equality_of_variance", "number_of_groups")) expect_equal(res$basis, 63.2, tolerance = 0.05) expect_output(print(res), "b-basis.*63\\.2", ignore.case = TRUE) expect_output(print(res), "ANOVA", ignore.case = TRUE) expect_match(res$distribution, "ANOVA", ignore.case = TRUE) res <- cmh_17_1g_8_3_11_1_1_etw2 %>% basis_anova(strength, batch, p = 0.99, conf = 0.95, override = c("equality_of_variance", "number_of_groups")) expect_equal(res$basis, 34.6, tolerance = 0.05) expect_output(print(res), "a-basis.*34\\.", ignore.case = TRUE) expect_output(print(res), "ANOVA", ignore.case = TRUE) expect_match(res$distribution, "ANOVA", ignore.case = TRUE) }) test_that("ANOVA produces an error when there is only one group", { strength <- rep(10, 1) batch <- rep(10, 1) expect_error( basis_anova(x = strength, group = batch), "fewer than 2" ) }) test_that("anova basis values produce expected diagnostic failures", { set.seed(100) x <- c(rnorm(30, 100, 1), rnorm(30, 100, 10), 80) batch <- c(rep("A", 30), rep("B", 30), "A") expect_warning( expect_warning( expect_warning( res <- basis_anova(x = x, group = batch), "outliers_within_group" ), "equality_of_variance" ), "number_of_groups" ) # Check that res$... contains the correct value expect_equal(res$group, batch) expect_equal(res$diagnostic_failures, c("outliers_within_group", "equality_of_variance", "number_of_groups")) expect_length(res$override, 0) # overriding the diagnostics should eliminate the warnings res <- basis_anova(x = x, group = batch, override = c("outliers_within_group", "equality_of_variance", "number_of_groups")) expect_equal(res$override, c("outliers_within_group", "equality_of_variance", "number_of_groups")) expect_length(res$diagnostic_failures, 0) # overriding the diagnostics with "all" should do the same thing res <- basis_anova(x = x, group = batch, override = "all") expect_equal(res$override, c("outliers_within_group", "equality_of_variance", "number_of_groups")) expect_length(res$diagnostic_failures, 0) }) test_that("ANOVA method matches STAT17 when between-batch var. is small", { data <- tribble( ~x, ~batch, 105.04953017290813, 1, 105.74515635546253, 1, 99.7549396676824, 1, 107.44219439303261, 1, 100.17657481474124, 1, 106.601810738431, 1, 101.15202811896768, 2, 90.63466521331704, 2, 106.93692070778634, 2, 116.14555531325212, 2, 100.20555336225114, 2, 103.89002397699194, 2, 110.50367678215923, 3, 95.34690617376182, 3, 105.03624331633935, 3, 105.83852344481843, 3, 105.8785931848096, 3, 103.97623814685818, 3, 94.92344509669459, 4, 89.35739844589054, 4, 110.45073142288507, 4, 108.32807015574465, 4, 104.35498641239826, 4, 109.39785860273314, 4, 102.88966425996772, 5, 105.08208381529616, 5, 109.82310733067601, 5, 108.64289487358796, 5, 99.87084985403291, 5, 96.7651412720645, 5 ) res <- basis_anova(data, x, batch, override = "all") expect_equal(res$basis, 93.2, tolerance = 0.05) }) test_that("glance.basis produces expected value", { # Sample data from CMH-17-1G Section 8.3.11.2.2 res <- cmh_17_1g_8_3_11_1_1_etw2 %>% basis_anova(strength, batch, override = c("number_of_groups")) glance_res <- glance(res) expect_equal(glance_res[["p"]][1], 0.9) expect_equal(glance_res[["conf"]][1], 0.95) expect_equal(glance_res[["distribution"]][1], "ANOVA") expect_equal(glance_res[["n"]][1], nrow(cmh_17_1g_8_3_11_1_1_etw2)) expect_equal(glance_res[["r"]][1], 3) expect_equal(glance_res[["basis"]][1], 63.2, tolerance = 0.05) glance_res_2 <- glance(res, TRUE) for (gn in names(glance_res)) { expect_equal(glance_res[[gn]], glance_res_2[[gn]]) } expect_equal(glance_res_2[["outliers_within_group"]], "P") expect_equal(glance_res_2[["equality_of_variance"]], "P") expect_equal(glance_res_2[["number_of_groups"]], "O") expect_warning({ glance_res_3 <- cmh_17_1g_8_3_11_1_1_etw2 %>% basis_anova(strength, batch) %>% glance(TRUE) }) expect_equal(glance_res_3[["outliers_within_group"]], "P") expect_equal(glance_res_3[["equality_of_variance"]], "P") expect_equal(glance_res_3[["number_of_groups"]], "F") }) test_that("glance for pooled methods works", { res <- carbon.fabric %>% filter(test == "WT") %>% basis_pooled_sd(strength, condition, batch, override = c("outliers_within_batch")) %>% glance(TRUE) # 3 conditions should produce 3 basis values and hence 3 rows expect_equal(nrow(res), 3) }) test_that("pooled methods process override='all'", { res <- basis_pooled_sd(poolable_data, strength, condition, modcv = TRUE, override = "all") expect_equal(res$override, c("outliers_within_batch", "between_group_variability", "outliers_within_group", "pooled_data_normal", "pooled_variance_equal")) expect_length(res$diagnostic_failures, 0) res <- basis_pooled_cv(poolable_data, strength, condition, modcv = TRUE, override = "all") expect_equal(res$override, c("outliers_within_batch", "between_group_variability", "outliers_within_group", "pooled_data_normal", "normalized_variance_equal")) expect_length(res$diagnostic_failures, 0) }) cmstatr/tests/testthat/test-workflow.R0000644000176200001440000005647614125365511017730 0ustar liggesuserssuppressMessages(library(dplyr)) # In this test case, the "steps" refer to the flowchart in CMH-17-1G # Figure 8.3.1.1(a) test_that("carbon.fabric.2 MNR test matches CMH17-STATS (step 7)", { # Test for outliers within each batch and condition conditions <- unique(carbon.fabric.2$condition) batches <- unique(carbon.fabric.2$batch) sapply(conditions, function(c) { n_outliers <- sum( sapply(batches, function(b) { dat <- carbon.fabric.2 %>% filter(test == "FC") %>% filter(condition == c) %>% filter(batch == b) mnr <- maximum_normed_residual(data = dat, strength) glance(mnr)$n_outliers }) ) expected_outliers <- case_when(c == "ETW2" ~ 1, TRUE ~ 0) expect_equal(n_outliers, expected_outliers) }) }) test_that("carbon.fabric.2 ADK matches CMH17-STATS (step 10)", { # Test for between-batch variability within each condition conditions <- unique(carbon.fabric.2$condition) sapply(conditions, function(c) { dat <- carbon.fabric.2 %>% filter(test == "FC") %>% filter(condition == c) adk <- ad_ksample(dat, strength, batch) ad <- glance(adk)$ad k <- glance(adk)$k reject_same_dist <- glance(adk)$reject_same_dist adk_expected <- case_when(c == "CTD" ~ 0.351, c == "RTD" ~ 0.448, c == "ETD" ~ 0.453, c == "ETW" ~ 1.627, c == "ETW2" ~ 0.784, TRUE ~ 0) # There is a difference between the way that the test statistic is defined # in cmstatr and CMH17-STATS and a corresponding difference in the # critical value. See documentation for ad_ksample() adk_expected <- adk_expected * (k - 1) expect_equal(ad, adk_expected, tolerance = 0.005) expect_false(reject_same_dist) }) }) test_that("carbon.fabric.2 MNR test matches CMH17-STATS (step 18)", { # Test for outliers within each condition conditions <- unique(carbon.fabric.2$condition) sapply(conditions, function(c) { dat <- carbon.fabric.2 %>% filter(test == "FC") %>% filter(condition == c) mnr <- maximum_normed_residual(dat, strength) n_outliers <- glance(mnr)$n_outliers expect_equal(n_outliers, 0) }) }) test_that("carbon.fabric.2 Normality of pooled norm data matches (step 23)", { # Check normality of pooled set of normalized data using Anderson Darling # test res <- carbon.fabric.2 %>% filter(test == "FC") %>% mutate(norm_strength = normalize_group_mean(strength, condition)) %>% anderson_darling_normal(norm_strength) expect_equal(res$osl, 0.5765, tolerance = 1e-4) }) test_that("carbon.fabric.2 equality of variances matches (step 28)", { # Test for equality of variances among condition groups res <- carbon.fabric.2 %>% filter(test == "FC") %>% levene_test(strength, condition) expect_equal(res$f, 3.884, tolerance = 0.001) }) test_that("carbon.fabric.2 pooled sd basis values match (step 30)", { res <- carbon.fabric.2 %>% filter(test == "FC") %>% basis_pooled_sd(strength, condition, override = c("pooled_variance_equal", "outliers_within_batch", "between_group_variability")) res$basis %>% mutate(expected = case_when(group == "CTD" ~ 89.22, group == "RTD" ~ 80.06, group == "ETW" ~ 48.53, group == "ETW2" ~ 42.27, group == "ETD" ~ 66.56, TRUE ~ 0)) %>% mutate(expect_equal(value, expected, tolerance = 0.005)) }) test_that("carbon.fabric.2 equality of normalized variance (step 31)", { # Test for equality of normalized variances among condition groups res <- carbon.fabric.2 %>% filter(test == "FC") %>% mutate(norm_strength = normalize_group_mean(strength, condition)) %>% levene_test(norm_strength, condition) expect_equal(res$f, 1.065, tolerance = 0.0005) }) test_that("carbon.fabric.2 pooled CV basis values match (step 39-41)", { res <- carbon.fabric.2 %>% filter(test == "FC") %>% basis_pooled_cv(strength, condition, batch, override = c("outliers_within_batch")) res$basis %>% mutate(expected = case_when(group == "CTD" ~ 86.91, group == "RTD" ~ 78.83, group == "ETW" ~ 51.04, group == "ETW2" ~ 45.51, group == "ETD" ~ 66.93, TRUE ~ 0)) %>% mutate(expect_equal(value, expected, tolerance = 0.005)) }) test_that("carbon.fabric.2 ADK matches CMH17-STATS - modCV (step 10)", { # Test for between-batch variability within each condition (modified CV) conditions <- unique(carbon.fabric.2$condition) sapply(conditions, function(c) { dat <- carbon.fabric.2 %>% filter(test == "FC") %>% mutate(trans_strength = transform_mod_cv_ad( strength, condition, batch)) %>% filter(condition == c) adk <- ad_ksample(dat, trans_strength, batch) ad <- glance(adk)$ad k <- glance(adk)$k reject_same_dist <- glance(adk)$reject_same_dist adk_expected <- case_when(c == "CTD" ~ 0.351, c == "RTD" ~ 0.401, c == "ETD" ~ 0.495, c == "ETW" ~ 1.051, c == "ETW2" ~ 0.670, TRUE ~ 0) # There is a difference between the way that the test statistic is defined # in cmstatr and CMH17-STATS and a corresponding difference in the # critical value. See documentation for ad_ksample() adk_expected <- adk_expected * (k - 1) expect_equal(ad, adk_expected, tolerance = 0.005) expect_false(reject_same_dist) }) }) test_that("carbon.fabric.2 Normality of pooled norm data - modCV (step 23)", { # Check normality of pooled set of normalized data using Anderson Darling # test (modified CV) res <- carbon.fabric.2 %>% filter(test == "FC") %>% mutate(trans_strength = transform_mod_cv_ad(strength, condition, batch)) %>% mutate(norm_strength = normalize_group_mean(trans_strength, condition)) %>% anderson_darling_normal(norm_strength) expect_equal(res$osl, 0.6036, tolerance = 1e-4) }) test_that("carbon.fabric.2 equality of variances - modCV (step 28)", { # Test for equality of variances among condition groups res <- carbon.fabric.2 %>% filter(test == "FC") %>% group_by(condition) %>% mutate(trans_strength = transform_mod_cv(strength)) %>% ungroup() %>% levene_test(trans_strength, condition) expect_equal(res$f, 2.862, tolerance = 0.075) # Test again, using the modcv argument res <- carbon.fabric.2 %>% filter(test == "FC") %>% levene_test(strength, condition, modcv = TRUE) expect_equal(res$f, 2.862, tolerance = 0.075) }) test_that("carbon.fabric.2 pooled sd basis values - modCV (step 30)", { res <- carbon.fabric.2 %>% filter(test == "FC") %>% group_by(condition) %>% mutate(trans_strength = transform_mod_cv(strength)) %>% ungroup() %>% basis_pooled_sd(trans_strength, condition, override = c("pooled_variance_equal", "outliers_within_batch", "between_group_variability")) res$basis %>% mutate(expected = case_when(group == "CTD" ~ 88.64, group == "RTD" ~ 79.48, group == "ETW" ~ 47.95, group == "ETW2" ~ 41.68, group == "ETD" ~ 65.97, TRUE ~ 0)) %>% mutate(expect_equal(value, expected, tolerance = 0.005)) # Test again, using the modcv argument res <- carbon.fabric.2 %>% filter(test == "FC") %>% basis_pooled_sd(strength, condition, modcv = TRUE, override = c("pooled_variance_equal", "outliers_within_batch", "between_group_variability")) res$basis %>% mutate(expected = case_when(group == "CTD" ~ 88.64, group == "RTD" ~ 79.48, group == "ETW" ~ 47.95, group == "ETW2" ~ 41.68, group == "ETD" ~ 65.97, TRUE ~ 0)) %>% mutate(expect_equal(value, expected, tolerance = 0.005)) }) test_that("carbon.fabric.2 equality of norm variance - modCV (step 31)", { # Test for equality of normalized variacnes among condition groups res <- carbon.fabric.2 %>% filter(test == "FC") %>% group_by(condition) %>% mutate(trans_strength = transform_mod_cv(strength)) %>% ungroup() %>% mutate(norm_strength = normalize_group_mean(trans_strength, condition)) %>% levene_test(norm_strength, condition) expect_equal(res$f, 0.226, tolerance = 0.05 / 0.226) # Test again, using the modcv argument res <- carbon.fabric.2 %>% filter(test == "FC") %>% mutate(norm_strength = normalize_group_mean(strength, condition)) %>% levene_test(norm_strength, condition, modcv = TRUE) expect_equal(res$f, 0.226, tolerance = 0.05 / 0.226) }) test_that("carbon.fabric.2 pooled CV basis values - modCV (step 39-41)", { res <- carbon.fabric.2 %>% filter(test == "FC") %>% group_by(condition) %>% mutate(trans_strength = transform_mod_cv(strength)) %>% ungroup() %>% basis_pooled_cv(trans_strength, condition, override = c("outliers_within_batch", "between_group_variability")) res$basis %>% mutate(expected = case_when(group == "CTD" ~ 85.98, group == "RTD" ~ 77.99, group == "ETW" ~ 50.50, group == "ETW2" ~ 45.02, group == "ETD" ~ 66.22, TRUE ~ 0)) %>% mutate(expect_equal(value, expected, tolerance = 0.005)) # Test again, using the modcv argument res <- carbon.fabric.2 %>% filter(test == "FC") %>% basis_pooled_cv(strength, condition, modcv = TRUE, override = c("outliers_within_batch", "between_group_variability")) res$basis %>% mutate(expected = case_when(group == "CTD" ~ 85.98, group == "RTD" ~ 77.99, group == "ETW" ~ 50.50, group == "ETW2" ~ 45.02, group == "ETD" ~ 66.22, TRUE ~ 0)) %>% mutate(expect_equal(value, expected, tolerance = 0.005)) }) test_dat <- tribble( ~cond, ~batch, ~strength, 1, "A", 97.89583, 1, "A", 109.76291, 1, "A", 107.66701, 1, "A", 112.69444, 1, "A", 99.71637, 1, "A", 108.68871, 1, "B", 106.37708, 1, "B", 110.96946, 1, "B", 102.78598, 1, "D", 100.83316, 1, "D", 105.66699, 1, "D", 103.14165, 1, "D", 105.36496, 1, "D", 101.76552, 1, "D", 103.81940, 1, "E", 105.59004, 1, "E", 107.74373, 1, "E", 108.88401, 1, "E", 103.72913, 1, "E", 107.07630, 1, "E", 104.94092, 1, "C", 103.99478, 1, "C", 104.57191, 1, "C", 104.65751, 1, "C", 106.26021, 1, "C", 108.80221, 1, "C", 104.45781, 1, "C", 105.71117, 1, "C", 104.68779, 1, "C", 96.90000, 2, "A", 108.89755, 2, "A", 112.26424, 2, "A", 114.14414, 2, "A", 109.56092, 2, "A", 112.81857, 2, "A", 110.04231, 2, "B", 105.92060, 2, "B", 104.76159, 2, "B", 105.39842, 2, "D", 97.83402, 2, "D", 103.60583, 2, "D", 101.81152, 2, "D", 100.55827, 2, "D", 104.10650, 2, "D", 103.56471, 2, "E", 105.18761, 2, "E", 103.81970, 2, "E", 104.31939, 2, "E", 105.21502, 2, "E", 109.61211, 2, "E", 108.43762, 2, "C", 104.08681, 2, "C", 109.60930, 2, "C", 107.17149, 2, "C", 107.10460, 2, "C", 102.88207, 2, "C", 106.91041, 2, "C", 104.40203, 2, "C", 105.43848, 2, "C", 109.50752, 3, "B", 93.22953, 3, "B", 97.09608, 3, "B", 97.05682, 3, "A", 102.57570, 3, "A", 98.16441, 3, "A", 97.67216, 3, "A", 103.10081, 3, "A", 106.05421, 3, "A", 100.43507, 3, "B", 101.64746, 3, "B", 103.24892, 3, "B", 101.74833, 3, "D", 93.48767, 3, "D", 89.77051, 3, "D", 95.24123, 3, "D", 96.81138, 3, "D", 86.88577, 3, "D", 89.46870, 3, "E", 96.80020, 3, "E", 97.87896, 3, "E", 97.91697, 3, "E", 98.73682, 3, "E", 96.20074, 3, "E", 99.22644, 3, "C", 102.06662, 3, "C", 102.04738, 3, "C", 96.40601, 3, "C", 99.34420, 3, "C", 99.20519, 3, "C", 101.22471, 3, "C", 99.60168, 3, "C", 106.73832, 3, "C", 110.09276 ) test_that("MNR within each batch and condition matches CMH17-STATS", { expect_warning( expect_warning( res_unmodified <- test_dat %>% basis_pooled_sd(strength, cond, batch, modcv = FALSE), "outliers_within_batch" ), "between_group_variability" ) expect_true("outliers_within_batch" %in% res_unmodified$diagnostic_failures) expect_warning( res_modcv <- test_dat %>% basis_pooled_sd(strength, cond, batch, modcv = TRUE), "outliers_within_batch" ) expect_true("outliers_within_batch" %in% res_modcv$diagnostic_failures) expect_warning( expect_warning( res_unmodified <- test_dat %>% basis_pooled_cv(strength, cond, batch, modcv = FALSE), "outliers_within_batch" ), "between_group_variability" ) expect_true("outliers_within_batch" %in% res_unmodified$diagnostic_failures) expect_warning( res_modcv <- test_dat %>% basis_pooled_cv(strength, cond, batch, modcv = TRUE), "outliers_within_batch" ) expect_true("outliers_within_batch" %in% res_modcv$diagnostic_failures) conditions <- unique(test_dat$cond) batches <- unique(test_dat$batch) sapply(conditions, function(c) { sapply(batches, function(b) { dat <- test_dat %>% filter(cond == c & batch == b) mnr <- maximum_normed_residual(dat, strength) n_outliers <- glance(mnr)$n_outliers if (c == 1 & b == "C") { expect_gt(n_outliers, 0) } else { expect_equal(n_outliers, 0) } }) }) }) test_that("ADK for between-batch var. within each cond matches CMH17-STATS", { expect_warning( expect_warning( res_unmodified <- test_dat %>% basis_pooled_cv(strength, cond, batch, modcv = FALSE), "outliers_within_batch" ), "between_group_variability" ) expect_true("between_group_variability" %in% res_unmodified$diagnostic_failures) expect_warning( res_modcv <- test_dat %>% basis_pooled_cv(strength, cond, batch, modcv = TRUE), "outliers_within_batch" ) expect_false("between_group_variability" %in% res_modcv$diagnostic_failures) expect_warning( expect_warning( res_unmodified <- test_dat %>% basis_pooled_sd(strength, cond, batch, modcv = FALSE), "outliers_within_batch" ), "between_group_variability" ) expect_true("between_group_variability" %in% res_unmodified$diagnostic_failures) expect_warning( res_modcv <- test_dat %>% basis_pooled_sd(strength, cond, batch, modcv = TRUE), "outliers_within_batch" ) expect_false("between_group_variability" %in% res_modcv$diagnostic_failures) conditions <- unique(test_dat$cond) lapply(conditions, function(c) { dat <- test_dat %>% filter(cond == c) adk <- ad_ksample(dat, strength, batch) ad <- glance(adk)$ad k <- glance(adk)$k expected <- case_when(c == 1 ~ 1.243, c == 2 ~ 3.376, c == 3 ~ 2.793) expect_equal(ad / (k - 1), expected, tolerance = 7e-3) }) lapply(conditions, function(c) { dat <- test_dat %>% mutate(strength = transform_mod_cv_ad(strength, cond, batch)) %>% filter(cond == c) adk <- ad_ksample(dat, strength, batch) ad <- glance(adk)$ad k <- glance(adk)$k expected <- case_when(c == 1 ~ 0.504, c == 2 ~ 1.106, c == 3 ~ 1.508) expect_equal(ad / (k - 1), expected, tolerance = 2e-3) }) }) test_that("MNR within each condition matches CMH17-STATS", { expect_warning( expect_warning( res_unmodified <- test_dat %>% basis_pooled_sd(strength, cond, batch, modcv = FALSE), "outliers_within_batch" ), "between_group_variability" ) expect_false("outliers_within_group" %in% res_unmodified$diagnostic_failures) expect_warning( res_modcv <- test_dat %>% basis_pooled_sd(strength, cond, batch, modcv = TRUE), "outliers_within_batch" ) expect_false("outliers_within_group" %in% res_modcv$diagnostic_failures) expect_warning( expect_warning( res_unmodified <- test_dat %>% basis_pooled_cv(strength, cond, batch, modcv = FALSE), "outliers_within_batch" ), "between_group_variability" ) expect_false("outliers_within_group" %in% res_unmodified$diagnostic_failures) expect_warning( res_modcv <- test_dat %>% basis_pooled_cv(strength, cond, batch, modcv = TRUE), "outliers_within_batch" ) expect_false("outliers_within_group" %in% res_modcv$diagnostic_failures) conditions <- unique(test_dat$cond) lapply(conditions, function(c) { dat <- test_dat %>% filter(cond == c) mnr <- maximum_normed_residual(dat, strength) n_outliers <- glance(mnr)$n_outliers expect_equal(n_outliers, 0) }) }) test_that("OSL of data from each batch CMH17-STATS", { conditions <- unique(test_dat$cond) lapply(conditions, function(c) { dat <- test_dat %>% filter(cond == c) osl <- anderson_darling_normal(dat, strength) osl <- glance(osl)$osl expected <- case_when(c == 1 ~ 0.485, c == 2 ~ 0.306, c == 3 ~ 0.287) expect_equal(osl, expected, tolerance = 1e-3) }) lapply(conditions, function(c) { dat <- test_dat %>% mutate(strength = transform_mod_cv_ad(strength, cond, batch)) %>% filter(cond == c) osl <- anderson_darling_normal(dat, strength) osl <- glance(osl)$osl expected <- case_when(c == 1 ~ 0.494, c == 2 ~ 0.467, c == 3 ~ 0.718) expect_equal(osl, expected, tolerance = 1e-3) }) }) test_that("OSL of pooled data after norm to group mean matches CMH17-STATS", { expect_warning( expect_warning( res_unmodified <- test_dat %>% basis_pooled_sd(strength, cond, batch, modcv = FALSE), "outliers_within_batch" ), "between_group_variability" ) expect_false("pooled_data_normal" %in% res_unmodified$diagnostic_failures) expect_warning( res_modcv <- test_dat %>% basis_pooled_sd(strength, cond, batch, modcv = TRUE), "outliers_within_batch" ) expect_false("pooled_data_normal" %in% res_modcv$diagnostic_failures) expect_warning( expect_warning( res_unmodified <- test_dat %>% basis_pooled_cv(strength, cond, batch, modcv = FALSE), "outliers_within_batch" ), "between_group_variability" ) expect_false("pooled_data_normal" %in% res_unmodified$diagnostic_failures) expect_warning( res_modcv <- test_dat %>% basis_pooled_cv(strength, cond, batch, modcv = TRUE), "outliers_within_batch" ) expect_false("pooled_data_normal" %in% res_modcv$diagnostic_failures) res <- test_dat %>% mutate(normalized_strength = normalize_group_mean(strength, cond)) %>% anderson_darling_normal(normalized_strength) expect_equal(res$osl, 0.0798, tolerance = 1e-3) res <- test_dat %>% mutate(strength = transform_mod_cv_ad(strength, cond, batch)) %>% mutate(normalized_strength = normalize_group_mean(strength, cond)) %>% anderson_darling_normal(normalized_strength) expect_equal(res$osl, 0.7045, tolerance = 1e-4) }) test_that("Levene's test among condition groups matches CMH17-STATS", { expect_warning( expect_warning( res_unmodified <- test_dat %>% basis_pooled_sd(strength, cond, batch, modcv = FALSE), "outliers_within_batch" ), "between_group_variability" ) expect_false("pooled_variance_equal" %in% res_unmodified$diagnostic_failures) lev <- test_dat %>% levene_test(strength, groups = cond) expect_equal(lev$f, 1.051, tolerance = 1e-3) expect_warning( res_modcv <- test_dat %>% basis_pooled_sd(strength, cond, batch, modcv = TRUE), "outliers_within_batch" ) expect_false("pooled_variance_equal" %in% res_modcv$diagnostic_failures) lev <- test_dat %>% group_by(cond) %>% mutate(strength = transform_mod_cv(strength)) %>% ungroup() %>% levene_test(strength, groups = cond) expect_equal(lev$f, 0.005, tolerance = 0.05) expect_warning( expect_warning( res_unmodified <- test_dat %>% basis_pooled_cv(strength, cond, batch, modcv = FALSE), "outliers_within_batch" ), "between_group_variability" ) lev <- test_dat %>% mutate(strength = normalize_group_mean(strength, cond)) %>% levene_test(strength, groups = cond) expect_equal(lev$f, 1.631, tolerance = 0.05) expect_warning( res_modcv <- test_dat %>% basis_pooled_cv(strength, cond, batch, modcv = TRUE), "outliers_within_batch" ) lev <- test_dat %>% group_by(cond) %>% mutate(strength = transform_mod_cv(strength)) %>% ungroup() %>% mutate(strength = normalize_group_mean(strength, cond)) %>% levene_test(strength, groups = cond) expect_equal(lev$f, 0.064, tolerance = 0.05 / 0.064) }) test_that("Basis values match CMH17-STATS", { expect_warning( expect_warning( res <- test_dat %>% basis_pooled_cv(strength, cond, batch, modcv = FALSE), "outliers_within_batch" ), "between_group_variability" ) res$basis %>% mutate(expected = case_when(group == 1 ~ 98.21, group == 2 ~ 99.27, group == 3 ~ 92.22)) %>% mutate(expect_equal(value, expected, tolerance = 1e-2)) expect_warning( res <- test_dat %>% basis_pooled_cv(strength, cond, batch, modcv = TRUE), "outliers_within_batch" ) res$basis %>% mutate(expected = case_when(group == 1 ~ 94.49, group == 2 ~ 95.51, group == 3 ~ 88.76)) %>% mutate(expect_equal(value, expected, tolerance = 1e-2)) expect_warning( expect_warning( res <- test_dat %>% basis_pooled_sd(strength, cond, batch, modcv = FALSE), "outliers_within_batch" ), "between_group_variability" ) res$basis %>% mutate(expected = case_when(group == 1 ~ 98.42, group == 2 ~ 99.55, group == 3 ~ 92.00)) %>% mutate(expect_equal(value, expected, tolerance = 1e-2)) expect_warning( res <- test_dat %>% basis_pooled_sd(strength, cond, batch, modcv = TRUE), "outliers_within_batch" ) res$basis %>% mutate(expected = case_when(group == 1 ~ 94.71, group == 2 ~ 95.84, group == 3 ~ 88.32)) %>% mutate(expect_equal(value, expected, tolerance = 1e-2)) }) cmstatr/tests/testthat/test-verifytidy.R0000644000176200001440000000175214065516021020234 0ustar liggesuserssuppressMessages(library(rlang)) # A mock function that uses process_tidy_vector_input mock_fcn <- function(data = NULL, x) { verify_tidy_input( df = data, x = x, c = match.call(), arg_name = "x") vec <- eval_tidy(enquo(x), data) return(vec) } test_that("A vector is returned", { res <- mock_fcn(carbon.fabric, strength) expect_true(is.vector(res)) res <- mock_fcn(carbon.fabric, "strength") expect_true(is.vector(res)) res <- mock_fcn(NULL, c(1, 2, 3)) expect_true(is.vector(res)) }) test_that("Correct warning messages are given", { # Should give the name of the data argument in mock_fcn expect_error( mock_fcn(c(1, 2, 3)), regexp = ".*`data`" ) expect_error( mock_fcn(data = c(1, 2, 3)), regexp = ".*`data`" ) expect_error( mock_fcn(c(1, 2, 3)), regexp = ".*mock_fcn\\(x = c\\(1, 2, 3\\)\\)" ) expect_error( mock_fcn(carbon.fabric$strength), regexp = ".*mock_fcn\\(x = carbon\\.fabric\\$strength\\)" ) }) cmstatr/tests/testthat/test-adtest.R0000644000176200001440000001604314065516021017321 0ustar liggesuserstest_that("AD test gives same results for a data frame and a vector", { data <- data.frame( strength = c( 79.9109621761937, 77.9447436346388, 79.717168019752, 87.3547460860547, 76.2404769192413, 75.7026911300246, 79.5952709280298, 76.7833784980155, 77.5791472067831, 78.4164523339268, 79.2819398818745, 77.6346481930964, 81.2182937743241, 81.1826431730731, 86.0561762593461, 82.1837784884495, 80.7564920650884, 79.3614980225488 )) res_vec1 <- anderson_darling_normal( x = data$strength) # value from STAT17 (0.0840) expect_equal(res_vec1$osl, 0.0840, tolerance = 0.002) res_df <- anderson_darling_normal( data, strength) expect_equal(res_vec1$osl, res_df$osl) }) test_that("AD test matches results from STAT17 (normal)", { data <- data.frame( strength = c( 137.4438, 139.5395, 150.89, 141.4474, 141.8203, 151.8821, 143.9245, 132.9732, 136.6419, 138.1723, 148.7668, 143.283, 143.5429, 141.7023, 137.4732, 152.338, 144.1589, 128.5218 )) res_vec <- anderson_darling_normal(x = data$strength) expect_equal(res_vec$osl, 0.465, tolerance = 0.002) # Do it again but pass in a data.frame res_df <- anderson_darling_normal(data, strength) expect_equal(res_vec$osl, res_df$osl) }) test_that("AD test matches results from STAT17 (lognormal)", { data <- data.frame( strength = c( 137.4438, 139.5395, 150.89, 141.4474, 141.8203, 151.8821, 143.9245, 132.9732, 136.6419, 138.1723, 148.7668, 143.283, 143.5429, 141.7023, 137.4732, 152.338, 144.1589, 128.5218 )) res_vec <- anderson_darling_lognormal(x = data$strength) expect_equal(res_vec$osl, 0.480, tolerance = 0.002) # Do it again but pass in a data.frame res_df <- anderson_darling_lognormal(data, strength) expect_equal(res_vec$osl, res_df$osl) }) test_that("AD test matches results from STAT17 (weibull)", { data <- data.frame( strength = c( 137.4438, 139.5395, 150.89, 141.4474, 141.8203, 151.8821, 143.9245, 132.9732, 136.6419, 138.1723, 148.7668, 143.283, 143.5429, 141.7023, 137.4732, 152.338, 144.1589, 128.5218 ) ) # OSL: 0.179 res_vec <- anderson_darling_weibull(x = data$strength) expect_equal(res_vec$osl, 0.179, tolerance = 0.002) # Do it again but pass in a data.frame res_df <- anderson_darling_weibull(data, strength) expect_equal(res_vec$osl, res_df$osl) }) test_that("ad_p_unknown_parameters matches normal results from Lawless", { # Comparison with the results from: # J. F. Lawless, \emph{Statistical models and methods for lifetime data}. # New York: Wiley, 1982. # See page 458 fcn <- function(a, n) { ad_p_normal_unknown_param(a / (1 + 4 / n - 25 / n ^ 2), n) } n <- 5 expect_equal(fcn(0.576, n), 0.15, tolerance = 0.002 / 0.15) expect_equal(fcn(0.656, n), 0.10, tolerance = 0.002 / 0.10) expect_equal(fcn(0.787, n), 0.05, tolerance = 0.002 / 0.05) expect_equal(fcn(0.918, n), 0.025, tolerance = 0.002 / 0.025) expect_equal(fcn(1.092, n), 0.01, tolerance = 0.002 / 0.01) n <- 10 expect_equal(fcn(0.576, n), 0.15, tolerance = 0.002 / 0.15) expect_equal(fcn(0.656, n), 0.10, tolerance = 0.002 / 0.10) expect_equal(fcn(0.787, n), 0.05, tolerance = 0.002 / 0.05) expect_equal(fcn(0.918, n), 0.025, tolerance = 0.002 / 0.025) expect_equal(fcn(1.092, n), 0.01, tolerance = 0.002 / 0.01) n <- 20 expect_equal(fcn(0.576, n), 0.15, tolerance = 0.002 / 0.15) expect_equal(fcn(0.656, n), 0.10, tolerance = 0.002 / 0.10) expect_equal(fcn(0.787, n), 0.05, tolerance = 0.002 / 0.05) expect_equal(fcn(0.918, n), 0.025, tolerance = 0.002 / 0.025) expect_equal(fcn(1.092, n), 0.01, tolerance = 0.002 / 0.01) }) test_that("ad_p_unknown_parameters matches weibull results from Lawless", { # Comparison with the results from: # J. F. Lawless, \emph{Statistical models and methods for lifetime data}. # New York: Wiley, 1982. # See p. 455 fcn <- function(a, n) { ad_p_weibull_unknown_param(a / (1 + 0.2 / sqrt(n)), n) } n <- 5 expect_equal(fcn(0.474, n), 0.25, tolerance = 0.002 / 0.25) expect_equal(fcn(0.637, n), 0.10, tolerance = 0.002 / 0.10) expect_equal(fcn(0.757, n), 0.05, tolerance = 0.002 / 0.05) expect_equal(fcn(0.877, n), 0.025, tolerance = 0.002 / 0.025) expect_equal(fcn(1.038, n), 0.01, tolerance = 0.002 / 0.01) n <- 10 expect_equal(fcn(0.474, n), 0.25, tolerance = 0.002 / 0.25) expect_equal(fcn(0.637, n), 0.10, tolerance = 0.002 / 0.10) expect_equal(fcn(0.757, n), 0.05, tolerance = 0.002 / 0.05) expect_equal(fcn(0.877, n), 0.025, tolerance = 0.002 / 0.025) expect_equal(fcn(1.038, n), 0.01, tolerance = 0.002 / 0.01) n <- 20 expect_equal(fcn(0.474, n), 0.25, tolerance = 0.002 / 0.25) expect_equal(fcn(0.637, n), 0.10, tolerance = 0.002 / 0.10) expect_equal(fcn(0.757, n), 0.05, tolerance = 0.002 / 0.05) expect_equal(fcn(0.877, n), 0.025, tolerance = 0.002 / 0.025) expect_equal(fcn(1.038, n), 0.01, tolerance = 0.002 / 0.01) }) test_that("print.anderson_darling contains expected values", { data <- data.frame( strength = c( 137.4438, 139.5395, 150.89, 141.4474, 141.8203, 151.8821, 143.9245, 132.9732, 136.6419, 138.1723, 148.7668, 143.283, 143.5429, 141.7023, 137.4732, 152.338, 144.1589, 128.5218 )) res_vec <- anderson_darling_normal(x = data$strength) # should include the distribution expect_output(print(res_vec), "distribution.*normal", ignore.case = TRUE) # should include the signficance for known parameters expect_output(print(res_vec), "OSL.*0.464.*unknown", ignore.case = TRUE) # conclusion should be printed expect_output(print(res_vec), "conclusion.*is drawn.*alpha.*0.05", ignore.case = TRUE) expect_false(res_vec$reject_distribution) # if alpha is adjusted to be above OSL, the conclusion should be reversed res_vec <- anderson_darling_normal(x = data$strength, alpha = 0.470) expect_output(print(res_vec), "conclusion.*is not drawn.*alpha.*0.47", ignore.case = TRUE) expect_true(res_vec$reject_distribution) }) test_that("glance method produces expected results", { x <- c( 137.4438, 139.5395, 150.89, 141.4474, 141.8203, 151.8821, 143.9245, 132.9732, 136.6419, 138.1723, 148.7668, 143.283, 143.5429, 141.7023, 137.4732, 152.338, 144.1589, 128.5218 ) res <- anderson_darling_lognormal(x = x) glance_res <- glance(res) expect_equal(glance_res$osl[1], 0.480, tolerance = 0.002) expect_equal(glance_res$dist[1], "Lognormal") expect_equal(glance_res$n[1], 18) expect_equal(glance_res$A[1], 0.277, tolerance = 0.005) expect_equal(glance_res$alpha, 0.05) expect_equal(glance_res$reject_distribution, FALSE) }) cmstatr/tests/testthat/test-equiv.R0000644000176200001440000003736114125365511017177 0ustar liggesuserssuppressMessages(library(dplyr)) test_that("k-factor warnings and errors are raised", { expect_error(k_equiv(-0.01, 3)) expect_error(k_equiv(1.01, 3)) expect_warning(k_equiv(1e-9, 3)) expect_warning(k_equiv(0.75, 3)) expect_error(k_equiv(0.05, 1)) }) test_that("k-factors match those published in literature", { if (requireNamespace("tidyr", quietly = TRUE)) { # the files k1.vangel.csv and k2.vangel.csv contain the k factors # published in Vangel's 2002 paper. k1_vangel <- read.csv(system.file("extdata", "k1.vangel.csv", package = "cmstatr")) %>% tidyr::gather(n, k1, X2:X10) %>% mutate(n = as.numeric(substring(n, 2))) k2_vangel <- read.csv(system.file("extdata", "k2.vangel.csv", package = "cmstatr")) %>% tidyr::gather(n, k2, X2:X10) %>% mutate(n = as.numeric(substring(n, 2))) # This test is super slow, so we will only run a sampling of the values # that # would normally be used in the validation, unless the flag full_test is # set to TRUE full_test <- FALSE diff_df <- inner_join(k1_vangel, k2_vangel, by = c("alpha", "n")) %>% mutate(error_threshold = case_when(n == 2 & alpha > 0.25 ~ 0.10, n == 2 & alpha <= 0.25 ~ 0.02, n > 2 & alpha > 0.25 ~ 0.005, TRUE ~ 5e-4)) diff_df <- diff_df %>% sample_n(ifelse(full_test, length(diff_df$k1), 5)) %>% tidyr::gather(fct, vangel, k1:k2) %>% group_by(alpha, n) %>% mutate(calc = k_equiv(first(alpha), first(n))) %>% mutate(diff = (vangel - calc) / vangel) %>% ungroup() %>% rowwise() %>% mutate(check = expect_lte(abs(diff), error_threshold, label = paste0("Validation failure for ", "alpha=", alpha, ", n=", n, " computed ", fct, "=", calc, " but validation ", fct, "=", vangel, ".\n"))) } }) test_that("check equiv_mean_extremum against HYTEQ using some example data", { data_sample <- c(145.055, 148.329, 142.667, 141.795, 144.139, 135.923, 136.177, 133.523, 134.350) res <- equiv_mean_extremum(alpha = 0.05, data_sample = data_sample, mean_qual = 141.310, sd_qual = 6.415) expect_equal(res$alpha, 0.05) expect_equal(res$n_sample, 9) expect_equal(res$modcv, FALSE) expect_equal(res$cv, 6.415 / 141.310, tolerance = 1e-8) expect_equal(res$threshold_min_indiv, 123.725, tolerance = 1e-2) expect_equal(res$threshold_mean, 137.197, tolerance = 1e-2) expect_equal(res$result_min_indiv, "PASS") expect_equal(res$result_mean, "PASS") expect_output(print(res), "alpha\\W*=\\W*0.05") expect_output(print(res), "n\\W*=\\W*9") expect_output(print(res), "Sample[s:]*\\W*133.5\\d*\\W*140.2\\d*") expect_output(print(res), "Threshold[s:]*\\W*123.7\\d*\\W*137.[12]") expect_output(print(res), "Equiv\\w*\\W*PASS\\W*PASS") }) test_that("check equiv_mean_extremum against HYTEQ using an example (modCV)", { data_sample <- c(145.055, 148.329, 142.667, 141.795, 144.139, 135.923, 136.177, 133.523, 134.350) res <- equiv_mean_extremum(alpha = 0.05, data_sample = data_sample, mean_qual = 141.310, sd_qual = 6.415, modcv = TRUE) expect_equal(res$alpha, 0.05) expect_equal(res$n_sample, 9) expect_equal(res$modcv, TRUE) expect_equal(res$cv, 6.415 / 141.310, tolerance = 1e-8) expect_equal(res$cv_star, 0.0627, tolerance = 1e-3) expect_equal(res$threshold_min_indiv, 117.024, tolerance = 1e-2) expect_equal(res$threshold_mean, 135.630, tolerance = 1e-2) expect_equal(res$result_min_indiv, "PASS") expect_equal(res$result_mean, "PASS") #ensure print indicates it's modCV expect_output(print(res), "[Mm]od\\w*\\WCV") expect_output(print(res), "alpha\\W*=\\W*0.05") expect_output(print(res), "n\\W*=\\W*9") expect_output(print(res), "Sample[s:]*\\W*133.5\\d*\\W*140.2\\d*") expect_output(print(res), "Threshold[s:]*\\W*117.0\\d*\\W*135.6") expect_output(print(res), "Equiv\\w*\\W*PASS\\W*PASS") }) test_that("check three ways of specifying qual data are same (mean_ext)", { data_qual <- c(145.055, 148.329, 142.667, 141.795, 144.139, 135.923, 136.177, 133.523, 134.350) data_qual_df <- data.frame(strength = data_qual) res1 <- equiv_mean_extremum(alpha = 0.05, data_qual = data_qual, n_sample = 5) res2 <- equiv_mean_extremum(alpha = 0.05, df_qual = data_qual_df, data_qual = strength, n_sample = 5) res3 <- equiv_mean_extremum(alpha = 0.05, mean_qual = mean(data_qual), sd_qual = sd(data_qual), n_sample = 5) # the calls will be different, and that's okay res1$call <- NULL res2$call <- NULL res3$call <- NULL expect_equal(res1, res2) expect_equal(res1, res3) }) test_that("check that glance.equiv_mean_extremum produces expected results", { data_sample <- c(145.055, 148.329, 142.667, 141.795, 144.139, 135.923, 136.177, 133.523, 134.350) res <- equiv_mean_extremum(alpha = 0.05, data_sample = data_sample, mean_qual = 141.310, sd_qual = 6.415, modcv = TRUE) res <- glance(res) expect_equal(res$alpha[1], 0.05) expect_equal(res$n_sample[1], 9) expect_equal(res$modcv[1], TRUE) expect_equal(res$threshold_min_indiv[1], 117.024, tolerance = 1e-2) expect_equal(res$threshold_mean[1], 135.630, tolerance = 1e-2) expect_equal(res$result_min_indiv[1], "PASS") expect_equal(res$result_mean[1], "PASS") expect_equal(res$min_sample[1], min(data_sample)) expect_equal(res$mean_sample[1], mean(data_sample)) res <- equiv_mean_extremum(alpha = 0.05, mean_qual = 141.310, sd_qual = 6.415, n_sample = 9, modcv = TRUE) res <- glance(res) expect_equal(ncol(res), 5) expect_equal(res$alpha[1], 0.05) expect_equal(res$n_sample[1], 9) expect_equal(res$modcv[1], TRUE) expect_equal(res$threshold_min_indiv[1], 117.024, tolerance = 1e-2) expect_equal(res$threshold_mean[1], 135.630, tolerance = 1e-2) }) test_that("check equiv_change_mean against HYTEQ using some example data", { res <- equiv_change_mean(alpha = 0.05, n_sample = 9, mean_sample = 9.02, sd_sample = 0.15785, n_qual = 28, mean_qual = 9.24, sd_qual = 0.162) expect_equal(res$alpha, 0.05) expect_equal(res$n_sample, 9) expect_equal(res$mean_sample, 9.02) expect_equal(res$sd_sample, 0.15785) expect_equal(res$n_qual, 28) expect_equal(res$mean_qual, 9.24) expect_equal(res$sd_qual, 0.162) expect_equal(res$sp, 0.1608, tolerance = 5e-4 / 0.1608) expect_equal(res$t0, -3.570, tolerance = 5e-3) expect_equal(res$t_req, 2.030, tolerance = 5e-3) expect_equal(res$threshold, c(9.115, 9.365), tolerance = 5e-3) expect_equal(res$modcv, FALSE) expect_equal(res$result, "FAIL") expect_output(print(res), "alpha\\W*=\\W*0.05") expect_output(print(res), "Number\\W*28\\W*9") expect_output(print(res), "Mean\\W*9.24\\d*\\W*9.02\\d*") expect_output(print(res), "Result\\W*FAIL") expect_output(print(res), "Range\\W*9.11[45]\\d*\\W*\\w*\\W*9.365\\d*") }) test_that("check equiv_change_mean against HYTEQ using an example (modCV)", { res <- equiv_change_mean(alpha = 0.05, n_sample = 9, mean_sample = 9.02, sd_sample = 0.15785, n_qual = 28, mean_qual = 9.24, sd_qual = 0.162, modcv = TRUE) expect_equal(res$alpha, 0.05) expect_equal(res$n_sample, 9) expect_equal(res$mean_sample, 9.02) expect_equal(res$sd_sample, 0.15785) expect_equal(res$n_qual, 28) expect_equal(res$mean_qual, 9.24) expect_equal(res$sd_qual, 0.162) expect_equal(res$sp, 0.4927, tolerance = 5e-4) expect_equal(res$t0, -1.165, tolerance = 5e-3) expect_equal(res$t_req, 2.03, tolerance = 5e-3) expect_equal(res$threshold, c(8.857, 9.623), tolerance = 5e-3) expect_equal(res$modcv, TRUE) expect_equal(res$result, "PASS") #ensure print indicates it's modCV expect_output(print(res), "[Mm]od\\w*\\WCV") expect_output(print(res), "alpha\\W*=\\W*0.05") expect_output(print(res), "Number\\W*28\\W*9") expect_output(print(res), "Mean\\W*9.24\\d*\\W*9.02\\d*") expect_output(print(res), "Result\\W*PASS") expect_output(print(res), "Range\\W*8.85[67]\\d*\\W*\\w*\\W*9.623\\d*") }) test_that("check four ways of specifying qual data are same (chg in mean)", { data_qual <- c(145.055, 148.329, 142.667, 141.795, 144.139, 135.923, 136.177, 133.523, 134.350) data_qual_df <- data.frame(strength = data_qual) data_sample <- c(145.055, 148.329, 142.667, 141.795, 144.139) res1 <- equiv_change_mean(alpha = 0.05, data_qual = data_qual, n_sample = length(data_sample), mean_sample = mean(data_sample), sd_sample = sd(data_sample)) res2 <- equiv_change_mean(alpha = 0.05, df_qual = data_qual_df, data_qual = strength, n_sample = length(data_sample), mean_sample = mean(data_sample), sd_sample = sd(data_sample)) res3 <- equiv_change_mean(alpha = 0.05, mean_qual = mean(data_qual), sd_qual = sd(data_qual), n_qual = length(data_qual), n_sample = length(data_sample), mean_sample = mean(data_sample), sd_sample = sd(data_sample)) res4 <- equiv_change_mean(alpha = 0.05, data_qual = data_qual, data_sample = data_sample) # the calls will be different, and that's okay res1$call <- NULL res2$call <- NULL res3$call <- NULL res4$call <- NULL expect_equal(res1, res2) expect_equal(res1, res3) expect_equal(res1, res4) }) test_that("glance.equiv_change_mean produces expected results", { res <- equiv_change_mean(alpha = 0.05, n_sample = 9, mean_sample = 9.02, sd_sample = 0.15785, n_qual = 28, mean_qual = 9.24, sd_qual = 0.162) res <- glance(res) expect_equal(res$alpha[1], 0.05) expect_equal(res$n_sample[1], 9) expect_equal(res$mean_sample[1], 9.02) expect_equal(res$sd_sample[1], 0.15785) expect_equal(res$n_qual[1], 28) expect_equal(res$mean_qual[1], 9.24) expect_equal(res$sd_qual[1], 0.162) expect_equal(res$sp[1], 0.1608, tolerance = 5e-4 / 0.1608) expect_equal(res$t0[1], -3.570, tolerance = 5e-3) expect_equal(res$t_req[1], 2.030, tolerance = 5e-3) expect_equal(res$threshold_min[1], 9.115, tolerance = 5e-3) expect_equal(res$threshold_max[1], 9.365, tolerance = 5e-3) expect_equal(res$modcv[1], FALSE) expect_equal(res$result[1], "FAIL") }) test_that("equiv_mean_extremum produces expected errors and warnings", { expect_error( equiv_mean_extremum( alpha = -0.05, n_sample = 9, mean_qual = 9.24, sd_qual = 0.162), "alpha" ) expect_error( equiv_mean_extremum( alpha = 1.05, n_sample = 9, mean_qual = 9.24, sd_qual = 0.162), "alpha" ) expect_warning( equiv_mean_extremum( alpha = 0.05, data_sample = runif(9), n_sample = 9, mean_qual = 9.24, sd_qual = 0.162), "n_sample" ) set.seed(100) expect_warning( expect_warning( equiv_mean_extremum( alpha = 0.05, n_sample = 9, data_qual = runif(28), mean_qual = 9.24, sd_qual = 0.162), "Both data_qual and mean_qual" ), "Both data_qual and sd_qual" ) set.seed(101) expect_warning( expect_warning( equiv_mean_extremum( alpha = 0.05, n_sample = 9, data_qual = runif(28), mean_qual = 9.24, sd_qual = 0.162), "Both data_qual and mean_qual were supplied. mean_qual ignored." ), "Both data_qual and sd_qual were supplied. sd_qual ignored." ) expect_error( equiv_mean_extremum( alpha = 0.05, mean_sample = 9.02, sd_sample = 0.15785, n_qual = 28, mean_qual = 9.24, sd_qual = 0.162), "sample" ) expect_error( equiv_mean_extremum( alpha = 0.05, n_sample = 9, sd_qual = 0.162), "mean_qual" ) expect_error( equiv_mean_extremum( alpha = 0.05, n_sample = 9, mean_qual = 9.24), "sd_qual" ) expect_error( equiv_mean_extremum( alpha = 0.05, mean_qual = 9.24, sd_qual = 0.162), "n_sample" ) }) test_that("equiv_change_mean produces expected errors and warnings", { set.seed(100) expect_error( equiv_change_mean( alpha = -0.05, n_sample = 9, mean_sample = 9.02, sd_sample = 0.15785, n_qual = 28, mean_qual = 9.24, sd_qual = 0.162), "alpha" ) expect_error( equiv_change_mean( alpha = 1.05, n_sample = 9, mean_sample = 9.02, sd_sample = 0.15785, n_qual = 28, mean_qual = 9.24, sd_qual = 0.162), "alpha" ) set.seed(156) expect_warning( expect_warning( expect_warning( equiv_change_mean( alpha = 0.05, data_sample = runif(9), n_sample = 9, mean_sample = 9.02, sd_sample = 0.15785, n_qual = 28, mean_qual = 9.24, sd_qual = 0.162), "Both data_sample and n_sample supplied. n_sample ignored." ), "Both data_sample and mean_sample supplied. mean_sample ignored" ), "Both data_sample and sd_sample supplied. sd_sample ignored" ) expect_error( equiv_change_mean( alpha = 0.05, n_sample = 9, sd_sample = 0.15785, n_qual = 28, mean_qual = 9.24, sd_qual = 0.162), "mean_sample" ) expect_error( equiv_change_mean( alpha = 0.05, n_sample = 9, mean_sample = 9.02, n_qual = 28, mean_qual = 9.24, sd_qual = 0.162), "sd_sample" ) expect_error( equiv_change_mean( alpha = 0.05, n_sample = 9, mean_sample = 9.02, sd_sample = 0.15785, mean_qual = 9.24, sd_qual = 0.162), "n_qual" ) set.seed(109) expect_warning( expect_warning( expect_warning( equiv_change_mean( alpha = 0.05, n_sample = 9, mean_sample = 9.02, sd_sample = 0.15785, data_qual = runif(28), n_qual = 28, mean_qual = 9.24, sd_qual = 0.162), "Both data_qual and n_qual supplied. n_qual ignored." ), "Both data_qual and mean_qual supplied. mean_qual ignored" ), "Both data_qual and sd_qual supplied. sd_qual ignored" ) set.seed(110) expect_warning( expect_warning( expect_warning( equiv_change_mean( alpha = 0.05, n_sample = 9, mean_sample = 9.02, sd_sample = 0.15785, data_qual = runif(28), n_qual = 28, mean_qual = 9.24, sd_qual = 0.162), "Both data_qual and n_qual supplied. n_qual ignored." ), "Both data_qual and mean_qual supplied. mean_qual ignored" ), "Both data_qual and sd_qual supplied. sd_qual ignored" ) expect_error( equiv_change_mean( alpha = 0.05, mean_sample = 9.02, sd_sample = 0.15785, n_qual = 28, mean_qual = 9.24, sd_qual = 0.162), "sample" ) expect_error( equiv_change_mean( alpha = 0.05, n_sample = 9, mean_sample = 9.02, sd_sample = 0.15785, n_qual = 28, sd_qual = 0.162), "mean_qual" ) expect_error( equiv_change_mean( alpha = 0.05, n_sample = 9, mean_sample = 9.02, sd_sample = 0.15785, n_qual = 28, mean_qual = 9.24), "sd_qual" ) }) cmstatr/tests/testthat/_snaps/0000755000176200001440000000000014125365511016217 5ustar liggesuserscmstatr/tests/testthat/_snaps/plotting/0000755000176200001440000000000014573744701020070 5ustar liggesuserscmstatr/tests/testthat/_snaps/plotting/stat-normal-surv-func.svg0000644000176200001440000002330114573744650025002 0ustar liggesusers 0.25 0.50 0.75 130 135 140 145 150 strength batch A B C Distribution of Data For Each Batch cmstatr/tests/testthat/_snaps/plotting/stat-normal-surv-func-and-stat-esf.svg0000644000176200001440000041041114573744650027270 0ustar liggesusers 0.00 0.25 0.50 0.75 1.00 90 120 150 strength batch A B C Distribution of Data For Each Batch cmstatr/tests/testthat/_snaps/plotting/stat-esf.svg0000644000176200001440000002247414573744647022361 0ustar liggesusers 0.00 0.25 0.50 0.75 1.00 130 135 140 145 150 strength batch A B C Distribution of Data For Each Batch cmstatr/tests/testthat/_snaps/plot-nested/0000755000176200001440000000000014573744637020476 5ustar liggesuserscmstatr/tests/testthat/_snaps/plot-nested/nested-data-plot-no-grouping.svg0000644000176200001440000003002314065516021026600 0ustar liggesusers 130 140 150 s t r e n g t h nested_data_plot-no_grouping cmstatr/tests/testthat/_snaps/plot-nested/nested-data-plot-color.svg0000644000176200001440000005701214573744420025473 0ustar liggesusers A B C 1 2 1 2 1 2 panel batch 130 135 140 145 150 s t r e n g t h b a t c h A B C nested_data_plot-color cmstatr/tests/testthat/_snaps/plot-nested/nested-data-plot-two-groupings.svg0000644000176200001440000005235514573744416027213 0ustar liggesusers A B C 1 2 1 2 1 2 panel batch 130 135 140 145 150 s t r e n g t h nested_data_plot-two_groupings cmstatr/tests/testthat/_snaps/plot-nested/nested-data-plot-single-grouping.svg0000644000176200001440000003071714573744415027475 0ustar liggesusers A B C batch 130 135 140 145 150 s t r e n g t h nested_data_plot-single_grouping cmstatr/tests/testthat/_snaps/plot-nested/nested-data-plot-single-obs-per-group.svg0000644000176200001440000020017414573744417030342 0ustar liggesusers A B C 1 2 1 2 1 2 129.224 137.194 139.728 144.702 127.286 129.261 130.031 132.104 132.88 140.038 134.912 137.618 139.217 141.558 135.686 136.075 145.001 147.053 150.242 135.435 139.078 143.715 143.738 146.285 146.825 147.981 148.235 148.418 strength panel batch 130 135 140 145 150 s t r e n g t h nested_data_plot-single_obs_per_group cmstatr/tests/testthat/_snaps/plot-nested/nested-data-plot-color-and-fill.svg0000644000176200001440000023471014573744421027162 0ustar liggesusers CTD ETW ETW2 RTD A B C A B C A B C A B C 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 panel batch condition 130 140 150 s t r e n g t h c o n d i t i o n a a a a CTD ETW ETW2 RTD b a t c h A B C nested_data_plot-color-and-fill cmstatr/tests/testthat/test-checks.R0000644000176200001440000001025314125365511017275 0ustar liggesuserstest_that("perform_checks produces warnings unless overriden", { sample_rules <- list( positive = function(pos, ...) { ifelse(pos > 0, "", "Not positive") }, negative = function(neg, ...) { ifelse(neg < 0, "", "Not negative") }, zero = function(z, ...) { ifelse(z == 0, "", "Not zero") } ) expect_warning(res <- perform_checks(sample_rules, pos = -1, neg = -1, z = 0), regexp = "positive") names(res) <- NULL expect_equal(res, c("F", "P", "P")) perform_checks(sample_rules, pos = -1, neg = -1, z = 0, override = "positive") expect_warning(res <- perform_checks(sample_rules, pos = 1, neg = 1, z = 0), regexp = "negative") names(res) <- NULL expect_equal(res, c("P", "F", "P")) perform_checks(sample_rules, pos = 1, neg = 1, z = 0, override = "negative") expect_warning(res <- perform_checks(sample_rules, pos = 1, neg = -1, z = 1), regexp = "zero") names(res) <- NULL expect_equal(res, c("P", "P", "F")) perform_checks(sample_rules, pos = 1, neg = -1, z = 1, override = "zero") # should produce no warnings perform_checks(sample_rules, pos = 1, neg = -1, z = 0) }) test_that("Messages are created for missing parameters", { sample_rules <- list( positive = function(pos, ...) { ifelse(pos > 0, "", "Not positive") }, negative = function(neg, ...) { ifelse(neg < 0, "", "Not negative") }, zero = function(z, ...) { ifelse(z == 0, "", "Not zero") }, neg_or_zero = function(neg, z, ...) { "" } ) expect_message(perform_checks(sample_rules, pos = NULL, neg = -1, z = 0), regexp = "`positive`.+`pos`", all = TRUE) expect_message( expect_message( perform_checks(sample_rules, pos = 1, neg = NULL, z = 0), "negative"), "neg_or_zero" ) # two parameters missing expect_message( expect_message( expect_message( perform_checks(sample_rules, pos = 1, neg = NULL, z = NULL), "negative" ), "zero" ), "neg_or_zero" ) }) test_that("User can specify `all` for overrides", { sample_rules <- list( positive = function(pos, ...) { ifelse(pos > 0, "", "Not positive") }, negative = function(neg, ...) { ifelse(neg < 0, "", "Not negative") }, zero = function(z, ...) { ifelse(z == 0, "", "Not zero") } ) expect_equal( process_overrides("all", sample_rules), c("positive", "negative", "zero") ) expect_equal( process_overrides(c("all"), sample_rules), c("positive", "negative", "zero") ) expect_equal( process_overrides(c("all", "positive"), sample_rules), c("positive", "negative", "zero") ) expect_equal( process_overrides(c("positive", "all"), sample_rules), c("positive", "negative", "zero") ) }) test_that("Invalid overrides produce warnings", { sample_rules <- list( positive = function(pos, ...) { ifelse(pos > 0, "", "Not positive") }, negative = function(neg, ...) { ifelse(neg < 0, "", "Not negative") }, zero = function(z, ...) { ifelse(z == 0, "", "Not zero") } ) expect_warning(process_overrides("invalid", sample_rules), "diagnostic") }) test_that("Errors raised are wrapped with source of warning", { sample_rules <- list( positive = function(pos, ...) { ifelse(pos > 0, "", "Not positive") }, negative = function(neg, ...) { ifelse(neg < 0, "", "Not negative") }, zero = function(z, ...) { ifelse(z == 0, "", "Not zero") }, always_error = function(...) { stop("This is an error") "" } ) # The "always_error" diagnostic test should result in an error containing # the name of the diagnostic test expect_error( perform_checks(sample_rules, pos = +1, neg = -1, z = 0), "always_error" ) # The behavior should be the same, even if some tests fail # In this case, the warnings should still be raised, at least # if they occur before the error expect_error( expect_warning( perform_checks(sample_rules, pos = +1, neg = +1, z = 0), "negative"), "always_error" ) }) cmstatr/tests/testthat/test-mnr.R0000644000176200001440000002264414065516021016635 0ustar liggesuserssuppressMessages(library(dplyr)) cmh_17_cv <- tribble( ~n, ~c, 3, 1.154, 4, 1.481, 5, 1.715, 6, 1.887, 7, 2.02, 8, 2.127, 9, 2.215, 10, 2.29, 11, 2.355, 12, 2.412, 13, 2.462, 14, 2.507, 15, 2.548, 16, 2.586, 17, 2.62, 18, 2.652, 19, 2.681, 20, 2.708, 21, 2.734, 22, 2.758, 23, 2.78, 24, 2.802, 25, 2.822, 26, 2.841, 27, 2.859, 28, 2.876, 29, 2.893, 30, 2.908, 31, 2.924, 32, 2.938, 33, 2.952, 34, 2.965, 35, 2.978, 36, 2.991, 37, 3.003, 38, 3.014, 39, 3.025, 40, 3.036, 41, 3.047, 42, 3.057, 43, 3.067, 44, 3.076, 45, 3.085, 46, 3.094, 47, 3.103, 48, 3.112, 49, 3.12, 50, 3.128, 51, 3.136, 52, 3.144, 53, 3.151, 54, 3.159, 55, 3.166, 56, 3.173, 57, 3.18, 58, 3.187, 59, 3.193, 60, 3.2, 61, 3.206, 62, 3.212, 63, 3.218, 64, 3.224, 65, 3.23, 66, 3.236, 67, 3.241, 68, 3.247, 69, 3.252, 70, 3.258, 71, 3.263, 72, 3.268, 73, 3.273, 74, 3.278, 75, 3.283, 76, 3.288, 77, 3.292, 78, 3.297, 79, 3.302, 80, 3.306, 81, 3.311, 82, 3.315, 83, 3.319, 84, 3.323, 85, 3.328, 86, 3.332, 87, 3.336, 88, 3.34, 89, 3.344, 90, 3.348, 91, 3.352, 92, 3.355, 93, 3.359, 94, 3.363, 95, 3.366, 96, 3.37, 97, 3.374, 98, 3.377, 99, 3.381, 100, 3.384, 101, 3.387, 102, 3.391, 103, 3.394, 104, 3.397, 105, 3.401, 106, 3.404, 107, 3.407, 108, 3.41, 109, 3.413, 110, 3.416, 111, 3.419, 112, 3.422, 113, 3.425, 114, 3.428, 115, 3.431, 116, 3.434, 117, 3.437, 118, 3.44, 119, 3.442, 120, 3.445, 121, 3.448, 122, 3.451, 123, 3.453, 124, 3.456, 125, 3.459, 126, 3.461, 127, 3.464, 128, 3.466, 129, 3.469, 130, 3.471, 131, 3.474, 132, 3.476, 133, 3.479, 134, 3.481, 135, 3.483, 136, 3.486, 137, 3.488, 138, 3.491, 139, 3.493, 140, 3.495, 141, 3.497, 142, 3.5, 143, 3.502, 144, 3.504, 145, 3.506, 146, 3.508, 147, 3.511, 148, 3.513, 149, 3.515, 150, 3.517, 151, 3.519, 152, 3.521, 153, 3.523, 154, 3.525, 155, 3.527, 156, 3.529, 157, 3.531, 158, 3.533, 159, 3.535, 160, 3.537, 161, 3.539, 162, 3.541, 163, 3.543, 164, 3.545, 165, 3.547, 166, 3.549, 167, 3.551, 168, 3.552, 169, 3.554, 170, 3.556, 171, 3.558, 172, 3.56, 173, 3.561, 174, 3.563, 175, 3.565, 176, 3.567, 177, 3.568, 178, 3.57, 179, 3.572, 180, 3.574, 181, 3.575, 182, 3.577, 183, 3.579, 184, 3.58, 185, 3.582, 186, 3.584, 187, 3.585, 188, 3.587, 189, 3.588, 190, 3.59, 191, 3.592, 192, 3.593, 193, 3.595, 194, 3.596, 195, 3.598, 196, 3.599, 197, 3.601, 198, 3.603, 199, 3.604, 200, 3.606 ) test_that( "Critical MNR values match those published in CMH-17-1G, Table 8.5.7", { cmh_17_cv %>% rowwise() %>% mutate(calc_mnr_crit = maximum_normed_residual_crit(n, 0.05)) %>% mutate(check = expect_lte(abs(calc_mnr_crit - c), 0.001)) }) test_that( "MNR calculation matches example in CMH-17-1G Section 8.3.11.1.1", { # What follows is part of the ETW data from Section 8.3.11.1.1 df <- tribble( ~batch, ~strength, 2, 99.3207107, 2, 115.86177, 2, 82.6133082, 2, 85.3690411, 2, 115.801622, 2, 44.3217741, 2, 117.328077, 2, 88.6782903, 3, 107.676986, 3, 108.960241, 3, 116.12264, 3, 80.2334815, 3, 106.14557, 3, 104.667866, 3, 104.234953 ) # first do some checks to ensure that the above data has been typed # correctly df %>% filter(batch == 2) %>% summarise(check_n = expect_equal(n(), 8), check_mean = expect_lte(abs(mean(strength) - 93.662), 0.001), check_sd = expect_lte(abs(sd(strength) - 24.568), 0.001)) # Check that the MNR test results match res <- df %>% filter(batch == 2) %>% maximum_normed_residual(strength, alpha = 0.05) expect_lte(abs(res$mnr - 2.008), 0.001) expect_lte(abs(res$crit - 2.127), 0.001) expect_equal(nrow(res$outliers), 0) # no outliers for this batch expect_equal(res$n_outliers, 0) # check the print function expect_output(print(res), "no outliers", ignore.case = TRUE) expect_output(print(res), "MNR.*2.008", ignore.case = TRUE) expect_output(print(res), ".*crit.*2\\.12", ignore.case = TRUE) expect_output(print(res), ".*alpha.*0\\.05", ignore.case = TRUE) # check for typographical errors in the data above df %>% filter(batch == 3) %>% summarise(check = expect_equal(n(), 7), check_mean = expect_lte(abs(mean(strength) - 104.006), 0.001), check_sd = expect_lte(abs(sd(strength) - 11.218), 0.001)) # Check that the MNR test results match res <- df %>% filter(batch == 3) %>% maximum_normed_residual(strength, alpha = 0.05) expect_lte(abs(res$mnr - 2.119), 0.001) expect_lte(abs(res$crit - 2.02), 0.001) expect_equal(nrow(res$outliers), 1) # one outlier for this batch expect_equal(res$n_outliers, 1) # check the print function expect_output(print(res), "outliers", ignore.case = TRUE) expect_output(print(res), "MNR.*2.119", ignore.case = TRUE) expect_output(print(res), ".*crit.*2\\.01", ignore.case = TRUE) expect_output(print(res), ".*alpha.*0\\.05", ignore.case = TRUE) # check that the outlier was shown in the print statement expect_output(print(res), "4.*80\\.23348", ignore.case = TRUE) }) test_that("Datasets with repeated value non-outliers work", { # It was found that for small data sets where there are three # outliers and two duplicate values, the MNR calculation # would fail. This is a regression test against this. # This has always worked d1 <- c(673, 621, 690, 689, 689.5) res <- maximum_normed_residual(x = d1) expect_equal(res$n_outliers, 2) # This would previously fail d2 <- c(673, 621, 690, 689, 689) res <- maximum_normed_residual(x = d2) expect_equal(res$n_outliers, 3) }) test_that("Vectors with no variance produce reasonable results", { x <- rep(100, 10) res <- maximum_normed_residual(x = x) expect_equal(res$n_outliers, 0) }) test_that("Both vectors and data.frames can be passed to the MNR function", { df <- tribble( ~batch, ~strength, 3, 107.676986, 3, 108.960241, 3, 116.12264, 3, 80.2334815, 3, 106.14557, 3, 104.667866, 3, 104.234953 ) # check that passing a data.frame works res1 <- df %>% maximum_normed_residual(strength, alpha = 0.05) expect_lte(abs(res1$mnr - 2.119), 0.001) expect_lte(abs(res1$crit - 2.02), 0.001) expect_equal(nrow(res1$outliers), 1) # one outlier for this batch expect_equal(res1$n_outliers, 1) # check that passing a vector works res2 <- maximum_normed_residual(x = df$strength, alpha = 0.05) expect_lte(abs(res2$mnr - 2.119), 0.001) expect_lte(abs(res2$crit - 2.02), 0.001) expect_equal(nrow(res2$outliers), 1) # one outlier for this batch expect_equal(res2$n_outliers, 1) }) test_that("Glance returns correct indicies for multiple outliers", { x <- c(129.18348, 131.07326, 122.68332, 123.06133, 126.82286, 133.25628, 123.55778, 125.46771, 134.88326, 137.98354, 128.62570, 125.07538, 130.88193, 128.33410, 130.94715, 135.19539, 136.42983, 135.83982, 130.80670, 126.02690, 138.88892, 124.68059, 131.11826, 120.19239, 125.54239, 124.95325, 139.76737, 136.47803, 134.99572, 84.27198, 86.77162, 86.48636 ) res <- maximum_normed_residual(x = x) expect_equal(res$n_outliers, 3) expect_equal(res$outliers$index, c(30, 32, 31)) }) test_that("Glance works with repeated values", { x <- c(129.18348, 131.07326, 122.68332, 123.06133, 126.82286, 133.25628, 123.55778, 125.46771, 134.88326, 137.98354, 128.62570, 125.07538, 130.88193, 128.33410, 130.94715, 135.19539, 136.42983, 135.83982, 130.80670, 126.02690, 138.88892, 124.68059, 131.11826, 120.19239, 125.54239, 124.95325, 139.76737, 136.47803, 134.99572, 64.27198, 64.27198, 64.27198 ) res <- maximum_normed_residual(x = x) expect_equal(res$n_outliers, 3) expect_equal(res$outliers$index, c(30, 31, 32)) }) test_that("glance method returns expected results", { df <- tribble( ~batch, ~strength, 3, 107.676986, 3, 108.960241, 3, 116.12264, 3, 80.2334815, 3, 106.14557, 3, 104.667866, 3, 104.234953 ) mnr_res <- df %>% maximum_normed_residual(strength, alpha = 0.05) glance_res <- glance(mnr_res) expect_equal(glance_res$mnr, 2.119, tolerance = 0.001) expect_equal(glance_res$crit, 2.02, tolerance = 0.001) expect_equal(glance_res$alpha, 0.05, tolerance = 0.00001) expect_equal(glance_res$n_outliers, 1) }) test_that("augment method returns expected results", { df <- tribble( ~batch, ~strength, 3, 107.676986, 3, 108.960241, 3, 116.12264, 3, 80.2334815, 3, 106.14557, 3, 104.667866, 3, 204.234953 ) mnr_res <- df %>% maximum_normed_residual(strength, alpha = 0.05) # not passing along the original data augment_res <- augment(mnr_res) expect_equal(df$strength, augment_res$values) expect_equal(augment_res$.outlier, c(FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, TRUE)) # passing along the original data.frame augment_res <- augment(mnr_res, df) expect_equal(df$strength, augment_res$strength) expect_equal(df$batch, augment_res$batch) expect_equal(augment_res$.outlier, c(FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, TRUE)) }) cmstatr/tests/testthat.R0000644000176200001440000000007213570437351015063 0ustar liggesuserslibrary(testthat) library(cmstatr) test_check("cmstatr") cmstatr/vignettes/0000755000176200001440000000000014574576017013757 5ustar liggesuserscmstatr/vignettes/cmstatr_Validation.Rmd0000644000176200001440000013131314067354422020243 0ustar liggesusers--- title: "cmstatr Validation" author: "Stefan Kloppenborg" date: "`r Sys.Date()`" output: rmarkdown::html_vignette: toc: true toc_depth: 2 vignette: > %\VignetteIndexEntry{cmstatr Validation} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} csl: ieee.csl references: - id: "ASAP2008" type: "report" number: "ASAP-2008" author: - given: K.S. family: Raju - given: J.S. family: Tomblin title: "AGATE Statistical Analysis Program" issued: year: 2008 publisher: Wichita State University - id: Stephens1987 type: article author: - given: F.W. family: Scholz - given: M.A, family: Stephens title: K-Sample Anderson-Darling Tests container-title: Journal of the American Statistical Association volume: "82" issue: "399" page: 918-924 issued: year: "1987" month: "09" DOI: 10.1080/01621459.1987.10478517 URL: https://doi.org/10.1080/01621459.1987.10478517 - id: "CMH-17-1G" type: report number: "CMH-17-1G" title: Composite Materials Handbook, Volume 1. Polymer Matrix Composites Guideline for Characterization of Structural Materials publisher: SAE International issued: year: "2012" month: "03" - id: "STAT-17" type: report number: "STAT-17 Rev 5" author: - literal: Materials Sciences Corporation title: CMH-17 Statistical Analysis for B-Basis and A-Basis Values publisher: Materials Sciences Corporation publisher-place: Horsham, PA issued: year: "2008" month: "01" day: "08" - id: Vangel1994 type: article author: - given: Mark family: Vangel title: One-Sided Nonparametric Tolerance Limits container-title: Communications in Statistics - Simulation and Computation volume: "23" issue: "4" page: 1137-1154 issued: year: "1994" DOI: 10.1080/03610919408813222 - id: vangel_lot_2002 type: article author: - given: Mark family: Vangel title: Lot Acceptance and Compliance Testing Using the Sample Mean and an Extremum container-title: Technometrics volume: "44" issue: "3" page: 242--249 issued: year: 2002 month: 8 DOI: 10.1198/004017002188618428 --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` # 1. Introduction This vignette is intended to contain the same validation that is included in the test suite within the `cmstatr` package, but in a format that is easier for a human to read. The intent is that this vignette will include only those validations that are included in the test suite, but that the test suite may include more tests than are shown in this vignette. The following packages will be used in this validation. The version of each package used is listed at the end of this vignette. ```{r message=FALSE, warning=FALSE} library(cmstatr) library(dplyr) library(purrr) library(tidyr) library(testthat) ``` Throughout this vignette, the `testthat` package will be used. Expressions such as `expect_equal` are used to ensure that the two values are equal (within some tolerance). If this expectation is not true, the vignette will fail to build. The tolerance is a relative tolerance: a tolerance of 0.01 means that the two values must be within $1\%$ of each other. As an example, the following expression checks that the value `10` is equal to `10.1` within a tolerance of `0.01`. Such an expectation should be satisfied. ```{r} expect_equal(10, 10.1, tolerance = 0.01) ``` The `basis_...` functions automatically perform certain diagnostic tests. When those diagnostic tests are not relevant to the validation, the diagnostic tests are overridden by passing the argument `override = "all"`. # 2. Validation Table The following table provides a cross-reference between the various functions of the `cmstatr` package and the tests shown within this vignette. The sections in this vignette are organized by data set. Not all checks are performed on all data sets. Function | Tests --------------------------------|--------------------------- `ad_ksample()` | [Section 3.1](#cf-ad), [Section 4.1.2](#c11-adk), [Section 6.1](#cl-adk) `anderson_darling_normal()` | [Section 4.1.3](#c11-ad), [Section 5.1](#ods-adn) `anderson_darling_lognormal()` | [Section 4.1.3](#c11-ad), [Section 5.2](#ods-adl) `anderson_darling_weibull()` | [Section 4.1.3](#c11-ad), [Section 5.3](#ods-adw) `basis_normal()` | [Section 5.4](#ods-nb) `basis_lognormal()` | [Section 5.5](#ods-lb) `basis_weibull()` | [Section 5.6](#ods-wb) `basis_pooled_cv()` | [Section 4.2.3](#c12-pcv), [Section 4.2.4](#c12-pcvmcv), `basis_pooled_sd()` | [Section 4.2.1](#c12-psd), [Section 4.2.2](#c12-psdmcv) `basis_hk_ext()` | [Section 4.1.6](#c11-hk-wf), [Section 5.7](#ods-hkb), [Section 5.8](#ods-hkb2) `basis_nonpara_large_sample()` | [Section 5.9](#ods-lsnb) `basis_anova()` | [Section 4.1.7](#c11-anova) `calc_cv_star()` | `cv()` | `equiv_change_mean()` | [Section 5.11](#ods-ecm) `equiv_mean_extremum()` | [Section 5.10](#ods-eme) `hk_ext_z()` | [Section 7.3](#pf-hk), [Section 7.4](#pf-hk2) `hk_ext_z_j_opt()` | [Section 7.5](#pf-hk-opt) `k_equiv()` | [Section 7.8](#pf-equiv) `k_factor_normal()` | [Section 7.1](#pf-kb), [Section 7.2](#pf-ka) `levene_test()` | [Section 4.1.4](#c11-lbc), [Section 4.1.5](#c11-lbb) `maximum_normed_residual()` | [Section 4.1.1](#c11-mnr) `nonpara_binomial_rank()` | [Section 7.6](#pf-npbinom), [Section 7.7](#pf-npbinom2) `normalize_group_mean()` | `normalize_ply_thickness()` | `transform_mod_cv_ad()` | `transform_mod_cv()` | # 3. `carbon.fabric` Data Set This data set is example data that is provided with `cmstatr`. The first few rows of this data are shown below. ```{r} head(carbon.fabric) ``` ## 3.1. Anderson--Darling k-Sample Test {#cf-ad} This data was entered into ASAP 2008 [@ASAP2008] and the reported Anderson--Darling k--Sample test statistics were recorded, as were the conclusions. The value of the test statistic reported by `cmstatr` and that reported by ASAP 2008 differ by a factor of $k - 1$, as do the critical values used. As such, the conclusion of the tests are identical. This is described in more detail in the [Anderson--Darling k--Sample Vignette](adktest.html). When the RTD warp-tension data from this data set is entered into ASAP 2008, it reports a test statistic of 0.456 and fails to reject the null hypothesis that the batches are drawn from the same distribution. Adjusting for the different definition of the test statistic, the results given by `cmstatr` are very similar. ```{r} res <- carbon.fabric %>% filter(test == "WT") %>% filter(condition == "RTD") %>% ad_ksample(strength, batch) expect_equal(res$ad / (res$k - 1), 0.456, tolerance = 0.002) expect_false(res$reject_same_dist) res ``` When the ETW warp-tension data from this data set are entered into ASAP 2008, the reported test statistic is 1.604 and it fails to reject the null hypothesis that the batches are drawn from the same distribution. Adjusting for the different definition of the test statistic, `cmstatr` gives nearly identical results. ```{r} res <- carbon.fabric %>% filter(test == "WT") %>% filter(condition == "ETW") %>% ad_ksample(strength, batch) expect_equal(res$ad / (res$k - 1), 1.604, tolerance = 0.002) expect_false(res$reject_same_dist) res ``` # 4. Comparison with Examples from CMH-17-1G ## 4.1 Dataset From Section 8.3.11.1.1 CMH-17-1G [@CMH-17-1G] provides an example data set and results from ASAP [@ASAP2008] and STAT17 [@STAT-17]. This example data set is duplicated below: ```{r} dat_8_3_11_1_1 <- tribble( ~batch, ~strength, ~condition, 1, 118.3774604, "CTD", 1, 84.9581364, "RTD", 1, 83.7436035, "ETD", 1, 123.6035612, "CTD", 1, 92.4891822, "RTD", 1, 84.3831677, "ETD", 1, 115.2238092, "CTD", 1, 96.8212659, "RTD", 1, 94.8030433, "ETD", 1, 112.6379744, "CTD", 1, 109.030325, "RTD", 1, 94.3931537, "ETD", 1, 116.5564277, "CTD", 1, 97.8212659, "RTD", 1, 101.702222, "ETD", 1, 123.1649896, "CTD", 1, 100.921519, "RTD", 1, 86.5372121, "ETD", 2, 128.5589027, "CTD", 1, 103.699444, "RTD", 1, 92.3772684, "ETD", 2, 113.1462103, "CTD", 2, 93.7908212, "RTD", 2, 89.2084024, "ETD", 2, 121.4248107, "CTD", 2, 107.526709, "RTD", 2, 100.686001, "ETD", 2, 134.3241906, "CTD", 2, 94.5769704, "RTD", 2, 81.0444192, "ETD", 2, 129.6405117, "CTD", 2, 93.8831373, "RTD", 2, 91.3398070, "ETD", 2, 117.9818658, "CTD", 2, 98.2296605, "RTD", 2, 93.1441939, "ETD", 3, 115.4505226, "CTD", 2, 111.346590, "RTD", 2, 85.8204168, "ETD", 3, 120.0369467, "CTD", 2, 100.817538, "RTD", 3, 94.8966273, "ETD", 3, 117.1631088, "CTD", 3, 100.382203, "RTD", 3, 95.8068520, "ETD", 3, 112.9302797, "CTD", 3, 91.5037811, "RTD", 3, 86.7842252, "ETD", 3, 117.9114501, "CTD", 3, 100.083233, "RTD", 3, 94.4011973, "ETD", 3, 120.1900159, "CTD", 3, 95.6393615, "RTD", 3, 96.7231171, "ETD", 3, 110.7295966, "CTD", 3, 109.304779, "RTD", 3, 89.9010384, "ETD", 3, 100.078562, "RTD", 3, 99.1205847, "RTD", 3, 89.3672306, "ETD", 1, 106.357525, "ETW", 1, 99.0239966, "ETW2", 1, 105.898733, "ETW", 1, 103.341238, "ETW2", 1, 88.4640082, "ETW", 1, 100.302130, "ETW2", 1, 103.901744, "ETW", 1, 98.4634133, "ETW2", 1, 80.2058219, "ETW", 1, 92.2647280, "ETW2", 1, 109.199597, "ETW", 1, 103.487693, "ETW2", 1, 61.0139431, "ETW", 1, 113.734763, "ETW2", 2, 99.3207107, "ETW", 2, 108.172659, "ETW2", 2, 115.861770, "ETW", 2, 108.426732, "ETW2", 2, 82.6133082, "ETW", 2, 116.260375, "ETW2", 2, 85.3690411, "ETW", 2, 121.049610, "ETW2", 2, 115.801622, "ETW", 2, 111.223082, "ETW2", 2, 44.3217741, "ETW", 2, 104.574843, "ETW2", 2, 117.328077, "ETW", 2, 103.222552, "ETW2", 2, 88.6782903, "ETW", 3, 99.3918538, "ETW2", 3, 107.676986, "ETW", 3, 87.3421658, "ETW2", 3, 108.960241, "ETW", 3, 102.730741, "ETW2", 3, 116.122640, "ETW", 3, 96.3694916, "ETW2", 3, 80.2334815, "ETW", 3, 99.5946088, "ETW2", 3, 106.145570, "ETW", 3, 97.0712407, "ETW2", 3, 104.667866, "ETW", 3, 104.234953, "ETW" ) dat_8_3_11_1_1 ``` ### 4.1.1 Maximum Normed Residual Test {#c11-mnr} CMH-17-1G Table 8.3.11.1.1(a) provides results of the MNR test from ASAP for this data set. Batches 2 and 3 of the ETW data is considered here and the results of `cmstatr` are compared with those published in CMH-17-1G. For Batch 2 of the ETW data, the results match those published in the handbook within a small tolerance. The published test statistic is 2.008. ```{r} res <- dat_8_3_11_1_1 %>% filter(condition == "ETW" & batch == 2) %>% maximum_normed_residual(strength, alpha = 0.05) expect_equal(res$mnr, 2.008, tolerance = 0.001) expect_equal(res$crit, 2.127, tolerance = 0.001) expect_equal(res$n_outliers, 0) res ``` Similarly, for Batch 3 of the ETW data, the results of `cmstatr` match the results published in the handbook within a small tolerance. The published test statistic is 2.119 ```{r} res <- dat_8_3_11_1_1 %>% filter(condition == "ETW" & batch == 3) %>% maximum_normed_residual(strength, alpha = 0.05) expect_equal(res$mnr, 2.119, tolerance = 0.001) expect_equal(res$crit, 2.020, tolerance = 0.001) expect_equal(res$n_outliers, 1) res ``` ### 4.1.2 Anderson--Darling k--Sample Test {#c11-adk} For the ETW condition, the ADK test statistic given in [@CMH-17-1G] is $ADK = 0.793$ and the test concludes that the samples come from the same distribution. Noting that `cmstatr` uses the definition of the test statistic given in [@Stephens1987], so the test statistic given by `cmstatr` differs from that given by ASAP by a factor of $k - 1$, as described in the [Anderson--Darling k--Sample Vignette](adktest.html). ```{r} res <- dat_8_3_11_1_1 %>% filter(condition == "ETW") %>% ad_ksample(strength, batch) expect_equal(res$ad / (res$k - 1), 0.793, tolerance = 0.003) expect_false(res$reject_same_dist) res ``` Similarly, for the ETW2 condition, the test statistic given in [@CMH-17-1G] is $ADK = 3.024$ and the test concludes that the samples come from different distributions. This matches `cmstatr` ```{r} res <- dat_8_3_11_1_1 %>% filter(condition == "ETW2") %>% ad_ksample(strength, batch) expect_equal(res$ad / (res$k - 1), 3.024, tolerance = 0.001) expect_true(res$reject_same_dist) res ``` ### 4.1.3 Anderson--Darling Tests for Distribution {#c11-ad} CMH-17-1G Section 8.3.11.2.1 contains results from STAT17 for the "observed significance level" from the Anderson--Darling test for various distributions. In this section, the ETW condition from the present data set is used. The published results are given in the following table. The results from `cmstatr` are below and are very similar to those from STAT17. Distribution | OSL -------------|------------ Normal | 0.006051 Lognormal | 0.000307 Weibull | 0.219 ```{r} res <- dat_8_3_11_1_1 %>% filter(condition == "ETW") %>% anderson_darling_normal(strength) expect_equal(res$osl, 0.006051, tolerance = 0.001) res ``` ```{r} res <- dat_8_3_11_1_1 %>% filter(condition == "ETW") %>% anderson_darling_lognormal(strength) expect_equal(res$osl, 0.000307, tolerance = 0.001) res ``` ```{r} res <- dat_8_3_11_1_1 %>% filter(condition == "ETW") %>% anderson_darling_weibull(strength) expect_equal(res$osl, 0.0219, tolerance = 0.002) res ``` ### 4.1.4 Levene's Test (Between Conditions) {#c11-lbc} CMH-17-1G Section 8.3.11.1.1 provides results from ASAP for Levene's test for equality of variance between conditions after the ETW and ETW2 conditions are removed. The handbook shows an F statistic of 0.58, however if this data is entered into ASAP directly, ASAP gives an F statistic of 0.058, which matches the result of `cmstatr`. ```{r} res <- dat_8_3_11_1_1 %>% filter(condition != "ETW" & condition != "ETW2") %>% levene_test(strength, condition) expect_equal(res$f, 0.058, tolerance = 0.01) res ``` ### 4.1.5 Levene's Test (Between Batches) {#c11-lbb} CMH-17-1G Section 8.3.11.2.2 provides output from STAT17. The ETW2 condition from the present data set was analyzed by STAT17 and that software reported an F statistic of 0.123 from Levene's test when comparing the variance of the batches within this condition. The result from `cmstatr` is similar. ```{r} res <- dat_8_3_11_1_1 %>% filter(condition == "ETW2") %>% levene_test(strength, batch) expect_equal(res$f, 0.123, tolerance = 0.005) res ``` Similarly, the published value of the F statistic for the CTD condition is $3.850$. `cmstatr` produces very similar results. ```{r} res <- dat_8_3_11_1_1 %>% filter(condition == "CTD") %>% levene_test(strength, batch) expect_equal(res$f, 3.850, tolerance = 0.005) res ``` ### 4.1.6 Nonparametric Basis Values {#c11-hk-wf} CMH-17-1G Section 8.3.11.2.1 provides STAT17 outputs for the ETW condition of the present data set. The nonparametric Basis values are listed. In this case, the Hanson--Koopmans method is used. The published A-Basis value is 13.0 and the B-Basis is 37.9. ```{r} res <- dat_8_3_11_1_1 %>% filter(condition == "ETW") %>% basis_hk_ext(strength, method = "woodward-frawley", p = 0.99, conf = 0.95, override = "all") expect_equal(res$basis, 13.0, tolerance = 0.001) res ``` ```{r} res <- dat_8_3_11_1_1 %>% filter(condition == "ETW") %>% basis_hk_ext(strength, method = "optimum-order", p = 0.90, conf = 0.95, override = "all") expect_equal(res$basis, 37.9, tolerance = 0.001) res ``` ### 4.1.7 Single-Point ANOVA Basis Value {#c11-anova} CMH-17-1G Section 8.3.11.2.2 provides output from STAT17 for the ETW2 condition from the present data set. STAT17 reports A- and B-Basis values based on the ANOVA method of 34.6 and 63.2, respectively. The results from `cmstatr` are similar. ```{r} res <- dat_8_3_11_1_1 %>% filter(condition == "ETW2") %>% basis_anova(strength, batch, override = "number_of_groups", p = 0.99, conf = 0.95) expect_equal(res$basis, 34.6, tolerance = 0.001) res ``` ```{r} res <- dat_8_3_11_1_1 %>% filter(condition == "ETW2") %>% basis_anova(strength, batch, override = "number_of_groups") expect_equal(res$basis, 63.2, tolerance = 0.001) res ``` ## 4.2 Dataset From Section 8.3.11.1.2 [@CMH-17-1G] provides an example data set and results from ASAP [@ASAP2008]. This example data set is duplicated below: ```{r} dat_8_3_11_1_2 <- tribble( ~batch, ~strength, ~condition, 1, 79.04517, "CTD", 1, 103.2006, "RTD", 1, 63.22764, "ETW", 1, 54.09806, "ETW2", 1, 102.6014, "CTD", 1, 105.1034, "RTD", 1, 70.84454, "ETW", 1, 58.87615, "ETW2", 1, 97.79372, "CTD", 1, 105.1893, "RTD", 1, 66.43223, "ETW", 1, 61.60167, "ETW2", 1, 92.86423, "CTD", 1, 100.4189, "RTD", 1, 75.37771, "ETW", 1, 60.23973, "ETW2", 1, 117.218, "CTD", 2, 85.32319, "RTD", 1, 72.43773, "ETW", 1, 61.4808, "ETW2", 1, 108.7168, "CTD", 2, 92.69923, "RTD", 1, 68.43073, "ETW", 1, 64.55832, "ETW2", 1, 112.2773, "CTD", 2, 98.45242, "RTD", 1, 69.72524, "ETW", 2, 57.76131, "ETW2", 1, 114.0129, "CTD", 2, 104.1014, "RTD", 2, 66.20343, "ETW", 2, 49.91463, "ETW2", 2, 106.8452, "CTD", 2, 91.51841, "RTD", 2, 60.51251, "ETW", 2, 61.49271, "ETW2", 2, 112.3911, "CTD", 2, 101.3746, "RTD", 2, 65.69334, "ETW", 2, 57.7281, "ETW2", 2, 115.5658, "CTD", 2, 101.5828, "RTD", 2, 62.73595, "ETW", 2, 62.11653, "ETW2", 2, 87.40657, "CTD", 2, 99.57384, "RTD", 2, 59.00798, "ETW", 2, 62.69353, "ETW2", 2, 102.2785, "CTD", 2, 88.84826, "RTD", 2, 62.37761, "ETW", 3, 61.38523, "ETW2", 2, 110.6073, "CTD", 3, 92.18703, "RTD", 3, 64.3947, "ETW", 3, 60.39053, "ETW2", 3, 105.2762, "CTD", 3, 101.8234, "RTD", 3, 72.8491, "ETW", 3, 59.17616, "ETW2", 3, 110.8924, "CTD", 3, 97.68909, "RTD", 3, 66.56226, "ETW", 3, 60.17616, "ETW2", 3, 108.7638, "CTD", 3, 101.5172, "RTD", 3, 66.56779, "ETW", 3, 46.47396, "ETW2", 3, 110.9833, "CTD", 3, 100.0481, "RTD", 3, 66.00123, "ETW", 3, 51.16616, "ETW2", 3, 101.3417, "CTD", 3, 102.0544, "RTD", 3, 59.62108, "ETW", 3, 100.0251, "CTD", 3, 60.61167, "ETW", 3, 57.65487, "ETW", 3, 66.51241, "ETW", 3, 64.89347, "ETW", 3, 57.73054, "ETW", 3, 68.94086, "ETW", 3, 61.63177, "ETW" ) ``` ### 4.2.1 Pooled SD A- and B-Basis {#c12-psd} CMH-17-1G Table 8.3.11.2(k) provides outputs from ASAP for the data set above. ASAP uses the pooled SD method. ASAP produces the following results, which are quite similar to those produced by `cmstatr`. Condition | CTD | RTD | ETW | ETW2 ----------|-------|-------|-------|------ B-Basis | 93.64 | 87.30 | 54.33 | 47.12 A-Basis | 89.19 | 79.86 | 46.84 | 39.69 ```{r} res <- basis_pooled_sd(dat_8_3_11_1_2, strength, condition, override = "all") expect_equal(res$basis$value[res$basis$group == "CTD"], 93.64, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "RTD"], 87.30, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "ETW"], 54.33, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "ETW2"], 47.12, tolerance = 0.001) res ``` ```{r} res <- basis_pooled_sd(dat_8_3_11_1_2, strength, condition, p = 0.99, conf = 0.95, override = "all") expect_equal(res$basis$value[res$basis$group == "CTD"], 86.19, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "RTD"], 79.86, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "ETW"], 46.84, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "ETW2"], 39.69, tolerance = 0.001) res ``` ### 4.2.2 Pooled SD A- and B-Basis (Mod CV) {#c12-psdmcv} After removal of the ETW2 condition, CMH17-STATS reports the pooled A- and B-Basis (mod CV) shown in the following table. `cmstatr` computes very similar values. Condition | CTD | RTD | ETW ----------|-------|-------|------ B-Basis | 92.25 | 85.91 | 52.97 A-Basis | 83.81 | 77.48 | 44.47 ```{r} res <- dat_8_3_11_1_2 %>% filter(condition != "ETW2") %>% basis_pooled_sd(strength, condition, modcv = TRUE, override = "all") expect_equal(res$basis$value[res$basis$group == "CTD"], 92.25, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "RTD"], 85.91, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "ETW"], 52.97, tolerance = 0.001) res ``` ```{r} res <- dat_8_3_11_1_2 %>% filter(condition != "ETW2") %>% basis_pooled_sd(strength, condition, p = 0.99, conf = 0.95, modcv = TRUE, override = "all") expect_equal(res$basis$value[res$basis$group == "CTD"], 83.81, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "RTD"], 77.48, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "ETW"], 44.47, tolerance = 0.001) res ``` ### 4.2.3 Pooled CV A- and B-Basis {#c12-pcv} This data set was input into CMH17-STATS and the Pooled CV method was selected. The results from CMH17-STATS were as follows. `cmstatr` produces very similar results. Condition | CTD | RTD | ETW | ETW2 ----------|-------|-------|-------|------ B-Basis | 90.89 | 85.37 | 56.79 | 50.55 A-Basis | 81.62 | 76.67 | 50.98 | 45.40 ```{r} res <- basis_pooled_cv(dat_8_3_11_1_2, strength, condition, override = "all") expect_equal(res$basis$value[res$basis$group == "CTD"], 90.89, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "RTD"], 85.37, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "ETW"], 56.79, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "ETW2"], 50.55, tolerance = 0.001) res ``` ```{r} res <- basis_pooled_cv(dat_8_3_11_1_2, strength, condition, p = 0.99, conf = 0.95, override = "all") expect_equal(res$basis$value[res$basis$group == "CTD"], 81.62, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "RTD"], 76.67, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "ETW"], 50.98, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "ETW2"], 45.40, tolerance = 0.001) res ``` ### 4.2.4 Pooled CV A- and B-Basis (Mod CV) {#c12-pcvmcv} This data set was input into CMH17-STATS and the Pooled CV method was selected with the modified CV transform. Additionally, the ETW2 condition was removed. The results from CMH17-STATS were as follows. `cmstatr` produces very similar results. Condition | CTD | RTD | ETW ----------|-------|-------|------- B-Basis | 90.31 | 84.83 | 56.43 A-Basis | 80.57 | 75.69 | 50.33 ```{r} res <- dat_8_3_11_1_2 %>% filter(condition != "ETW2") %>% basis_pooled_cv(strength, condition, modcv = TRUE, override = "all") expect_equal(res$basis$value[res$basis$group == "CTD"], 90.31, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "RTD"], 84.83, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "ETW"], 56.43, tolerance = 0.001) res ``` ```{r} res <- dat_8_3_11_1_2 %>% filter(condition != "ETW2") %>% basis_pooled_cv(strength, condition, modcv = TRUE, p = 0.99, conf = 0.95, override = "all") expect_equal(res$basis$value[res$basis$group == "CTD"], 80.57, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "RTD"], 75.69, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "ETW"], 50.33, tolerance = 0.001) res ``` # 5. Other Data Sets This section contains various small data sets. In most cases, these data sets were generated randomly for the purpose of comparing `cmstatr` to other software. ## 5.1 Anderson--Darling Test (Normal) {#ods-adn} The following data set was randomly generated. When this is entered into STAT17 [@STAT-17], that software gives the value $OSL = 0.465$, which matches the result of `cmstatr` within a small margin. ```{r} dat <- data.frame( strength = c( 137.4438, 139.5395, 150.89, 141.4474, 141.8203, 151.8821, 143.9245, 132.9732, 136.6419, 138.1723, 148.7668, 143.283, 143.5429, 141.7023, 137.4732, 152.338, 144.1589, 128.5218 ) ) res <- anderson_darling_normal(dat, strength) expect_equal(res$osl, 0.465, tolerance = 0.001) res ``` ## 5.2 Anderson--Darling Test (Lognormal) {#ods-adl} The following data set was randomly generated. When this is entered into STAT17 [@STAT-17], that software gives the value $OSL = 0.480$, which matches the result of `cmstatr` within a small margin. ```{r} dat <- data.frame( strength = c( 137.4438, 139.5395, 150.89, 141.4474, 141.8203, 151.8821, 143.9245, 132.9732, 136.6419, 138.1723, 148.7668, 143.283, 143.5429, 141.7023, 137.4732, 152.338, 144.1589, 128.5218 ) ) res <- anderson_darling_lognormal(dat, strength) expect_equal(res$osl, 0.480, tolerance = 0.001) res ``` ## 5.3 Anderson--Darling Test (Weibull) {#ods-adw} The following data set was randomly generated. When this is entered into STAT17 [@STAT-17], that software gives the value $OSL = 0.179$, which matches the result of `cmstatr` within a small margin. ```{r} dat <- data.frame( strength = c( 137.4438, 139.5395, 150.89, 141.4474, 141.8203, 151.8821, 143.9245, 132.9732, 136.6419, 138.1723, 148.7668, 143.283, 143.5429, 141.7023, 137.4732, 152.338, 144.1589, 128.5218 ) ) res <- anderson_darling_weibull(dat, strength) expect_equal(res$osl, 0.179, tolerance = 0.002) res ``` ## 5.4 Normal A- and B-Basis {#ods-nb} The following data was input into STAT17 and the A- and B-Basis values were computed assuming normally distributed data. The results were 120.336 and 129.287, respectively. `cmstatr` reports very similar values. ```{r} dat <- c( 137.4438, 139.5395, 150.8900, 141.4474, 141.8203, 151.8821, 143.9245, 132.9732, 136.6419, 138.1723, 148.7668, 143.2830, 143.5429, 141.7023, 137.4732, 152.3380, 144.1589, 128.5218 ) res <- basis_normal(x = dat, p = 0.99, conf = 0.95, override = "all") expect_equal(res$basis, 120.336, tolerance = 0.0005) res ``` ```{r} res <- basis_normal(x = dat, p = 0.9, conf = 0.95, override = "all") expect_equal(res$basis, 129.287, tolerance = 0.0005) res ``` ## 5.5 Lognormal A- and B-Basis {#ods-lb} The following data was input into STAT17 and the A- and B-Basis values were computed assuming distributed according to a lognormal distribution. The results were 121.710 and 129.664, respectively. `cmstatr` reports very similar values. ```{r} dat <- c( 137.4438, 139.5395, 150.8900, 141.4474, 141.8203, 151.8821, 143.9245, 132.9732, 136.6419, 138.1723, 148.7668, 143.2830, 143.5429, 141.7023, 137.4732, 152.3380, 144.1589, 128.5218 ) res <- basis_lognormal(x = dat, p = 0.99, conf = 0.95, override = "all") expect_equal(res$basis, 121.710, tolerance = 0.0005) res ``` ```{r} res <- basis_lognormal(x = dat, p = 0.9, conf = 0.95, override = "all") expect_equal(res$basis, 129.664, tolerance = 0.0005) res ``` ## 5.6 Weibull A- and B-Basis {#ods-wb} The following data was input into STAT17 and the A- and B-Basis values were computed assuming data following the Weibull distribution. The results were 109.150 and 125.441, respectively. `cmstatr` reports very similar values. ```{r} dat <- c( 137.4438, 139.5395, 150.8900, 141.4474, 141.8203, 151.8821, 143.9245, 132.9732, 136.6419, 138.1723, 148.7668, 143.2830, 143.5429, 141.7023, 137.4732, 152.3380, 144.1589, 128.5218 ) res <- basis_weibull(x = dat, p = 0.99, conf = 0.95, override = "all") expect_equal(res$basis, 109.150, tolerance = 0.005) res ``` ```{r} res <- basis_weibull(x = dat, p = 0.9, conf = 0.95, override = "all") expect_equal(res$basis, 125.441, tolerance = 0.005) res ``` ## 5.7 Extended Hanson--Koopmans A- and B-Basis {#ods-hkb} The following data was input into STAT17 and the A- and B-Basis values were computed using the nonparametric (small sample) method. The results were 99.651 and 124.156, respectively. `cmstatr` reports very similar values. ```{r} dat <- c( 137.4438, 139.5395, 150.8900, 141.4474, 141.8203, 151.8821, 143.9245, 132.9732, 136.6419, 138.1723, 148.7668, 143.2830, 143.5429, 141.7023, 137.4732, 152.3380, 144.1589, 128.5218 ) res <- basis_hk_ext(x = dat, p = 0.99, conf = 0.95, method = "woodward-frawley", override = "all") expect_equal(res$basis, 99.651, tolerance = 0.005) res ``` ```{r} res <- basis_hk_ext(x = dat, p = 0.9, conf = 0.95, method = "optimum-order", override = "all") expect_equal(res$basis, 124.156, tolerance = 0.005) res ``` ## 5.8 Extended Hanson--Koopmans B-Basis {#ods-hkb2} The following random numbers were generated. ```{r} dat <- c( 139.6734, 143.0032, 130.4757, 144.8327, 138.7818, 136.7693, 148.636, 131.0095, 131.4933, 142.8856, 158.0198, 145.2271, 137.5991, 139.8298, 140.8557, 137.6148, 131.3614, 152.7795, 145.8792, 152.9207, 160.0989, 145.1920, 128.6383, 141.5992, 122.5297, 159.8209, 151.6720, 159.0156 ) ``` All of the numbers above were input into STAT17 and the reported B-Basis value using the Optimum Order nonparametric method was 122.36798. This result matches the results of `cmstatr` within a small margin. ```{r} res <- basis_hk_ext(x = dat, p = 0.9, conf = 0.95, method = "optimum-order", override = "all") expect_equal(res$basis, 122.36798, tolerance = 0.001) res ``` The last two observations from the above data set were discarded, leaving 26 observations. This smaller data set was input into STAT17 and that software calculated a B-Basis value of 121.57073 using the Optimum Order nonparametric method. `cmstatr` reports a very similar number. ```{r} res <- basis_hk_ext(x = head(dat, 26), p = 0.9, conf = 0.95, method = "optimum-order", override = "all") expect_equal(res$basis, 121.57073, tolerance = 0.001) res ``` The same data set was further reduced such that only the first 22 observations were included. This smaller data set was input into STAT17 and that software calculated a B-Basis value of 128.82397 using the Optimum Order nonparametric method. `cmstatr` reports a very similar number. ```{r} res <- basis_hk_ext(x = head(dat, 22), p = 0.9, conf = 0.95, method = "optimum-order", override = "all") expect_equal(res$basis, 128.82397, tolerance = 0.001) res ``` ## 5.9 Large Sample Nonparametric B-Basis {#ods-lsnb} The following data was input into STAT17 and the B-Basis value was computed using the nonparametric (large sample) method. The results was 122.738297. `cmstatr` reports very similar values. ```{r} dat <- c( 137.3603, 135.6665, 136.6914, 154.7919, 159.2037, 137.3277, 128.821, 138.6304, 138.9004, 147.4598, 148.6622, 144.4948, 131.0851, 149.0203, 131.8232, 146.4471, 123.8124, 126.3105, 140.7609, 134.4875, 128.7508, 117.1854, 129.3088, 141.6789, 138.4073, 136.0295, 128.4164, 141.7733, 134.455, 122.7383, 136.9171, 136.9232, 138.8402, 152.8294, 135.0633, 121.052, 131.035, 138.3248, 131.1379, 147.3771, 130.0681, 132.7467, 137.1444, 141.662, 146.9363, 160.7448, 138.5511, 129.1628, 140.2939, 144.8167, 156.5918, 132.0099, 129.3551, 136.6066, 134.5095, 128.2081, 144.0896, 141.8029, 130.0149, 140.8813, 137.7864 ) res <- basis_nonpara_large_sample(x = dat, p = 0.9, conf = 0.95, override = "all") expect_equal(res$basis, 122.738297, tolerance = 0.005) res ``` ## 5.10 Acceptance Limits Based on Mean and Extremum {#ods-eme} Results from `cmstatr`'s `equiv_mean_extremum` function were compared with results from HYTEQ. The summary statistics for the qualification data were set as `mean = 141.310` and `sd=6.415`. For a value of `alpha=0.05` and `n = 9`, HYTEQ reported thresholds of 123.725 and 137.197 for minimum individual and mean, respectively. `cmstatr` produces very similar results. ```{r} res <- equiv_mean_extremum(alpha = 0.05, mean_qual = 141.310, sd_qual = 6.415, n_sample = 9) expect_equal(res$threshold_min_indiv, 123.725, tolerance = 0.001) expect_equal(res$threshold_mean, 137.197, tolerance = 0.001) res ``` Using the same parameters, but using the modified CV method, HYTEQ produces thresholds of 117.024 and 135.630 for minimum individual and mean, respectively. `cmstatr` produces very similar results. ```{r} res <- equiv_mean_extremum(alpha = 0.05, mean_qual = 141.310, sd_qual = 6.415, n_sample = 9, modcv = TRUE) expect_equal(res$threshold_min_indiv, 117.024, tolerance = 0.001) expect_equal(res$threshold_mean, 135.630, tolerance = 0.001) res ``` ## 5.11 Acceptance Based on Change in Mean {#ods-ecm} Results from `cmstatr`'s `equiv_change_mean` function were compared with results from HYTEQ. The following parameters were used. A value of `alpha = 0.05` was selected. Parameter | Qualification | Sample ----------|---------------|--------- Mean | 9.24 | 9.02 SD | 0.162 | 0.15785 n | 28 | 9 HYTEQ gives an acceptance range of 9.115 to 9.365. `cmstatr` produces similar results. ```{r} res <- equiv_change_mean(alpha = 0.05, n_sample = 9, mean_sample = 9.02, sd_sample = 0.15785, n_qual = 28, mean_qual = 9.24, sd_qual = 0.162) expect_equal(res$threshold, c(9.115, 9.365), tolerance = 0.001) res ``` After selecting the modified CV method, HYTEQ gives an acceptance range of 8.857 to 9.623. `cmstatr` produces similar results. ```{r} res <- equiv_change_mean(alpha = 0.05, n_sample = 9, mean_sample = 9.02, sd_sample = 0.15785, n_qual = 28, mean_qual = 9.24, sd_qual = 0.162, modcv = TRUE) expect_equal(res$threshold, c(8.857, 9.623), tolerance = 0.001) res ``` # 6. Comparison with Literature In this section, results from `cmstatr` are compared with values published in literature. ## 6.1 Anderson--Darling K--Sample Test {#cl-adk} [@Stephens1987] provides example data that compares measurements obtained in four labs. Their paper gives values of the ADK test statistic as well as p-values. The data in [@Stephens1987] is as follows: ```{r} dat_ss1987 <- data.frame( smoothness = c( 38.7, 41.5, 43.8, 44.5, 45.5, 46.0, 47.7, 58.0, 39.2, 39.3, 39.7, 41.4, 41.8, 42.9, 43.3, 45.8, 34.0, 35.0, 39.0, 40.0, 43.0, 43.0, 44.0, 45.0, 34.0, 34.8, 34.8, 35.4, 37.2, 37.8, 41.2, 42.8 ), lab = c(rep("A", 8), rep("B", 8), rep("C", 8), rep("D", 8)) ) dat_ss1987 ``` [@Stephens1987] lists the corresponding test statistics $A_{akN}^2 = 8.3926$ and $\sigma_N = 1.2038$ with the p-value $p = 0.0022$. These match the result of `cmstatr` within a small margin. ```{r} res <- ad_ksample(dat_ss1987, smoothness, lab) expect_equal(res$ad, 8.3926, tolerance = 0.001) expect_equal(res$sigma, 1.2038, tolerance = 0.001) expect_equal(res$p, 0.00226, tolerance = 0.01) res ``` # 7. Comparison with Published Factors Various factors, such as tolerance limit factors, are published in various publications. This section compares those published factors with those computed by `cmstatr`. ## 7.1 Normal kB Factors {#pf-kb} B-Basis tolerance limit factors assuming a normal distribution are published in CMH-17-1G. Those factors are reproduced below and are compared with the results of `cmstatr`. The published factors and those computed by `cmstatr` are quite similar. ```{r} tribble( ~n, ~kB_published, 2, 20.581, 36, 1.725, 70, 1.582, 104, 1.522, 3, 6.157, 37, 1.718, 71, 1.579, 105, 1.521, 4, 4.163, 38, 1.711, 72, 1.577, 106, 1.519, 5, 3.408, 39, 1.704, 73, 1.575, 107, 1.518, 6, 3.007, 40, 1.698, 74, 1.572, 108, 1.517, 7, 2.756, 41, 1.692, 75, 1.570, 109, 1.516, 8, 2.583, 42, 1.686, 76, 1.568, 110, 1.515, 9, 2.454, 43, 1.680, 77, 1.566, 111, 1.513, 10, 2.355, 44, 1.675, 78, 1.564, 112, 1.512, 11, 2.276, 45, 1.669, 79, 1.562, 113, 1.511, 12, 2.211, 46, 1.664, 80, 1.560, 114, 1.510, 13, 2.156, 47, 1.660, 81, 1.558, 115, 1.509, 14, 2.109, 48, 1.655, 82, 1.556, 116, 1.508, 15, 2.069, 49, 1.650, 83, 1.554, 117, 1.507, 16, 2.034, 50, 1.646, 84, 1.552, 118, 1.506, 17, 2.002, 51, 1.642, 85, 1.551, 119, 1.505, 18, 1.974, 52, 1.638, 86, 1.549, 120, 1.504, 19, 1.949, 53, 1.634, 87, 1.547, 121, 1.503, 20, 1.927, 54, 1.630, 88, 1.545, 122, 1.502, 21, 1.906, 55, 1.626, 89, 1.544, 123, 1.501, 22, 1.887, 56, 1.623, 90, 1.542, 124, 1.500, 23, 1.870, 57, 1.619, 91, 1.540, 125, 1.499, 24, 1.854, 58, 1.616, 92, 1.539, 126, 1.498, 25, 1.839, 59, 1.613, 93, 1.537, 127, 1.497, 26, 1.825, 60, 1.609, 94, 1.536, 128, 1.496, 27, 1.812, 61, 1.606, 95, 1.534, 129, 1.495, 28, 1.800, 62, 1.603, 96, 1.533, 130, 1.494, 29, 1.789, 63, 1.600, 97, 1.531, 131, 1.493, 30, 1.778, 64, 1.597, 98, 1.530, 132, 1.492, 31, 1.768, 65, 1.595, 99, 1.529, 133, 1.492, 32, 1.758, 66, 1.592, 100, 1.527, 134, 1.491, 33, 1.749, 67, 1.589, 101, 1.526, 135, 1.490, 34, 1.741, 68, 1.587, 102, 1.525, 136, 1.489, 35, 1.733, 69, 1.584, 103, 1.523, 137, 1.488 ) %>% arrange(n) %>% mutate(kB_cmstatr = k_factor_normal(n, p = 0.9, conf = 0.95)) %>% rowwise() %>% mutate(diff = expect_equal(kB_published, kB_cmstatr, tolerance = 0.001)) %>% select(-c(diff)) ``` ## 7.2 Normal kA Factors {#pf-ka} A-Basis tolerance limit factors assuming a normal distribution are published in CMH-17-1G. Those factors are reproduced below and are compared with the results of `cmstatr`. The published factors and those computed by `cmstatr` are quite similar. ```{r} tribble( ~n, ~kA_published, 2, 37.094, 36, 2.983, 70, 2.765, 104, 2.676, 3, 10.553, 37, 2.972, 71, 2.762, 105, 2.674, 4, 7.042, 38, 2.961, 72, 2.758, 106, 2.672, 5, 5.741, 39, 2.951, 73, 2.755, 107, 2.671, 6, 5.062, 40, 2.941, 74, 2.751, 108, 2.669, 7, 4.642, 41, 2.932, 75, 2.748, 109, 2.667, 8, 4.354, 42, 2.923, 76, 2.745, 110, 2.665, 9, 4.143, 43, 2.914, 77, 2.742, 111, 2.663, 10, 3.981, 44, 2.906, 78, 2.739, 112, 2.662, 11, 3.852, 45, 2.898, 79, 2.736, 113, 2.660, 12, 3.747, 46, 2.890, 80, 2.733, 114, 2.658, 13, 3.659, 47, 2.883, 81, 2.730, 115, 2.657, 14, 3.585, 48, 2.876, 82, 2.727, 116, 2.655, 15, 3.520, 49, 2.869, 83, 2.724, 117, 2.654, 16, 3.464, 50, 2.862, 84, 2.721, 118, 2.652, 17, 3.414, 51, 2.856, 85, 2.719, 119, 2.651, 18, 3.370, 52, 2.850, 86, 2.716, 120, 2.649, 19, 3.331, 53, 2.844, 87, 2.714, 121, 2.648, 20, 3.295, 54, 2.838, 88, 2.711, 122, 2.646, 21, 3.263, 55, 2.833, 89, 2.709, 123, 2.645, 22, 3.233, 56, 2.827, 90, 2.706, 124, 2.643, 23, 3.206, 57, 2.822, 91, 2.704, 125, 2.642, 24, 3.181, 58, 2.817, 92, 2.701, 126, 2.640, 25, 3.158, 59, 2.812, 93, 2.699, 127, 2.639, 26, 3.136, 60, 2.807, 94, 2.697, 128, 2.638, 27, 3.116, 61, 2.802, 95, 2.695, 129, 2.636, 28, 3.098, 62, 2.798, 96, 2.692, 130, 2.635, 29, 3.080, 63, 2.793, 97, 2.690, 131, 2.634, 30, 3.064, 64, 2.789, 98, 2.688, 132, 2.632, 31, 3.048, 65, 2.785, 99, 2.686, 133, 2.631, 32, 3.034, 66, 2.781, 100, 2.684, 134, 2.630, 33, 3.020, 67, 2.777, 101, 2.682, 135, 2.628, 34, 3.007, 68, 2.773, 102, 2.680, 136, 2.627, 35, 2.995, 69, 2.769, 103, 2.678, 137, 2.626 ) %>% arrange(n) %>% mutate(kA_cmstatr = k_factor_normal(n, p = 0.99, conf = 0.95)) %>% rowwise() %>% mutate(diff = expect_equal(kA_published, kA_cmstatr, tolerance = 0.001)) %>% select(-c(diff)) ``` ## 7.3 Nonparametric B-Basis Extended Hanson--Koopmans {#pf-hk} Vangel [@Vangel1994] provides extensive tables of $z$ for the case where $i=1$ and $j$ is the median observation. This section checks the results of `cmstatr`'s function against those tables. Only the odd values of $n$ are checked so that the median is a single observation. The unit tests for the `cmstatr` package include checks of a variety of values of $p$ and confidence, but only the factors for B-Basis are checked here. ```{r} tribble( ~n, ~z, 3, 28.820048, 5, 6.1981307, 7, 3.4780112, 9, 2.5168762, 11, 2.0312134, 13, 1.7377374, 15, 1.5403989, 17, 1.3979806, 19, 1.2899172, 21, 1.2048089, 23, 1.1358259, 25, 1.0786237, 27, 1.0303046, ) %>% rowwise() %>% mutate( z_calc = hk_ext_z(n, 1, ceiling(n / 2), p = 0.90, conf = 0.95) ) %>% mutate(diff = expect_equal(z, z_calc, tolerance = 0.0001)) %>% select(-c(diff)) ``` ## 7.4 Nonparametric A-Basis Extended Hanson--Koopmans {#pf-hk2} CMH-17-1G provides Table 8.5.15, which contains factors for calculating A-Basis values using the Extended Hanson--Koopmans nonparametric method. That table is reproduced in part here and the factors are compared with those computed by `cmstatr`. More extensive checks are performed in the unit test of the `cmstatr` package. The factors computed by `cmstatr` are very similar to those published in CMH-17-1G. ```{r} tribble( ~n, ~k, 2, 80.0038, 4, 9.49579, 6, 5.57681, 8, 4.25011, 10, 3.57267, 12, 3.1554, 14, 2.86924, 16, 2.65889, 18, 2.4966, 20, 2.36683, 25, 2.131, 30, 1.96975, 35, 1.85088, 40, 1.75868, 45, 1.68449, 50, 1.62313, 60, 1.5267, 70, 1.45352, 80, 1.39549, 90, 1.34796, 100, 1.30806, 120, 1.24425, 140, 1.19491, 160, 1.15519, 180, 1.12226, 200, 1.09434, 225, 1.06471, 250, 1.03952, 275, 1.01773 ) %>% rowwise() %>% mutate(z_calc = hk_ext_z(n, 1, n, 0.99, 0.95)) %>% mutate(diff = expect_lt(abs(k - z_calc), 0.0001)) %>% select(-c(diff)) ``` ## 7.5 Factors for Small Sample Nonparametric B-Basis {#pf-hk-opt} CMH-17-1G Table 8.5.14 provides ranks orders and factors for computing nonparametric B-Basis values. This table is reproduced below and compared with the results of `cmstatr`. The results are similar. In some cases, the rank order ($r$ in CMH-17-1G or $j$ in `cmstatr`) *and* the the factor ($k$) are different. These differences are discussed in detail in the vignette [Extended Hanson-Koopmans](hk_ext.html). ```{r} tribble( ~n, ~r, ~k, 2, 2, 35.177, 3, 3, 7.859, 4, 4, 4.505, 5, 4, 4.101, 6, 5, 3.064, 7, 5, 2.858, 8, 6, 2.382, 9, 6, 2.253, 10, 6, 2.137, 11, 7, 1.897, 12, 7, 1.814, 13, 7, 1.738, 14, 8, 1.599, 15, 8, 1.540, 16, 8, 1.485, 17, 8, 1.434, 18, 9, 1.354, 19, 9, 1.311, 20, 10, 1.253, 21, 10, 1.218, 22, 10, 1.184, 23, 11, 1.143, 24, 11, 1.114, 25, 11, 1.087, 26, 11, 1.060, 27, 11, 1.035, 28, 12, 1.010 ) %>% rowwise() %>% mutate(r_calc = hk_ext_z_j_opt(n, 0.90, 0.95)$j) %>% mutate(k_calc = hk_ext_z_j_opt(n, 0.90, 0.95)$z) ``` ## 7.6 Nonparametric B-Basis Binomial Rank {#pf-npbinom} CMH-17-1G Table 8.5.12 provides factors for computing B-Basis values using the nonparametric binomial rank method. Part of that table is reproduced below and compared with the results of `cmstatr`. The results of `cmstatr` are similar to the published values. A more complete comparison is performed in the units tests of the `cmstatr` package. ```{r} tribble( ~n, ~rb, 29, 1, 46, 2, 61, 3, 76, 4, 89, 5, 103, 6, 116, 7, 129, 8, 142, 9, 154, 10, 167, 11, 179, 12, 191, 13, 203, 14 ) %>% rowwise() %>% mutate(r_calc = nonpara_binomial_rank(n, 0.9, 0.95)) %>% mutate(test = expect_equal(rb, r_calc)) %>% select(-c(test)) ``` ## 7.7 Nonparametric A-Basis Binomial Rank {#pf-npbinom2} CMH-17-1G Table 8.5.13 provides factors for computing B-Basis values using the nonparametric binomial rank method. Part of that table is reproduced below and compared with the results of `cmstatr`. The results of `cmstatr` are similar to the published values. A more complete comparison is performed in the units tests of the `cmstatr` package. ```{r} tribble( ~n, ~ra, 299, 1, 473, 2, 628, 3, 773, 4, 913, 5 ) %>% rowwise() %>% mutate(r_calc = nonpara_binomial_rank(n, 0.99, 0.95)) %>% mutate(test = expect_equal(ra, r_calc)) %>% select(-c(test)) ``` ## 7.8 Factors for Equivalency {#pf-equiv} Vangel's 2002 paper provides factors for calculating limits for sample mean and sample extremum for various values of $\alpha$ and sample size ($n$). A subset of those factors are reproduced below and compared with results from `cmstatr`. The results are very similar for values of $\alpha$ and $n$ that are common for composite materials. ```{r} read.csv(system.file("extdata", "k1.vangel.csv", package = "cmstatr")) %>% gather(n, k1, X2:X10) %>% mutate(n = as.numeric(substring(n, 2))) %>% inner_join( read.csv(system.file("extdata", "k2.vangel.csv", package = "cmstatr")) %>% gather(n, k2, X2:X10) %>% mutate(n = as.numeric(substring(n, 2))), by = c("n" = "n", "alpha" = "alpha") ) %>% filter(n >= 5 & (alpha == 0.01 | alpha == 0.05)) %>% group_by(n, alpha) %>% nest() %>% mutate(equiv = map2(alpha, n, ~k_equiv(.x, .y))) %>% mutate(k1_calc = map(equiv, function(e) e[1]), k2_calc = map(equiv, function(e) e[2])) %>% select(-c(equiv)) %>% unnest(cols = c(data, k1_calc, k2_calc)) %>% mutate(check = expect_equal(k1, k1_calc, tolerance = 0.0001)) %>% select(-c(check)) %>% mutate(check = expect_equal(k2, k2_calc, tolerance = 0.0001)) %>% select(-c(check)) ``` # 8. Session Info This copy of this vignette was build on the following system. ```{r} sessionInfo() ``` # 9. References cmstatr/vignettes/bibliography.json0000644000176200001440000000173113676373651017330 0ustar liggesusers[ { "id": "Stephens1987", "type": "article", "author": [ { "given": "F.W.", "family": "Scholz" }, { "given": "M.A,", "family": "Stephens" } ], "title": "K-Sample Anderson--Darling Tests", "container-title": "Journal of the American Statistical Association", "volume": "82", "issue": "399", "page": "918-924", "issued": { "date-parts": [ [ "1987", "09" ] ] }, "keyword": "Combining tests; Convolution; Consistency; Empirical processes; Pearson curves; Simulation." }, { "id": "CMH-17-1G", "type": "report", "number": "CMH-17-1G", "title": "Composite Materials Handbook, Volume 1. Polymer Matrix Composites Guideline for Characterization of Structural Materials", "issued": { "date-parts": [ [ "2012", "03" ] ] }, "publisher": "SAE International" } ] cmstatr/vignettes/adktest.Rmd0000644000176200001440000001003614477217317016057 0ustar liggesusers--- title: "Anderson-Darling k-Sample Test" author: "Stefan Kloppenborg, Jeffrey Borlik" date: "20-Jan-2019" output: rmarkdown::html_vignette bibliography: bibliography.json csl: ieee.csl vignette: > %\VignetteIndexEntry{Anderson-Darling k-Sample Test} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` This vignette explores the Anderson--Darling k-Sample test. CMH-17-1G [@CMH-17-1G] provides a formulation for this test that appears different than the formulation given by Scholz and Stephens in their 1987 paper [@Stephens1987]. Both references use different nomenclature, which is summarized as follows: Term | CMH-17-1G | Scholz and Stephens ---------------------------------------------------|-----------------------|--------------------- A sample | $i$ | $i$ The number of samples | $k$ | $k$ An observation within a sample | $j$ | $j$ The number of observations within the sample $i$ | $n_i$ | $n_i$ The total number of observations within all samples| $n$ | $N$ Distinct values in combined data, ordered | $z_{(1)}$...$z_{(L)}$ | $Z_1^*$...$Z_L^*$ The number of distinct values in the combined data | $L$ | $L$ Given the possibility of ties in the data, the discrete version of the test must be used Scholz and Stephens (1987) give the test statistic as: $$ A_{a k N}^2 = \frac{N - 1}{N}\sum_{i=1}^k \frac{1}{n_i}\sum_{j=1}^{L}\frac{l_j}{N}\frac{\left(N M_{a i j} - n_i B_{a j}\right)^2}{B_{a j}\left(N - B_{a j}\right) - N l_j / 4} $$ CMH-17-1G gives the test statistic as: $$ ADK = \frac{n - 1}{n^2\left(k - 1\right)}\sum_{i=1}^k\frac{1}{n_i}\sum_{j=1}^L h_j \frac{\left(n F_{i j} - n_i H_j\right)^2}{H_j \left(n - H_j\right) - n h_j / 4} $$ By inspection, the CMH-17-1G version of this test statistic contains an extra factor of $\frac{1}{\left(k - 1\right)}$. Scholz and Stephens indicate that one rejects $H_0$ at a significance level of $\alpha$ when: $$ \frac{A_{a k N}^2 - \left(k - 1\right)}{\sigma_N} \ge t_{k - 1}\left(\alpha\right) $$ This can be rearranged to give a critical value: $$ A_{c r i t}^2 = \left(k - 1\right) + \sigma_N t_{k - 1}\left(\alpha\right) $$ CHM-17-1G gives the critical value for $ADK$ for $\alpha=0.025$ as: $$ ADC = 1 + \sigma_n \left(1.96 + \frac{1.149}{\sqrt{k - 1}} - \frac{0.391}{k - 1}\right) $$ The definition of $\sigma_n$ from the two sources differs by a factor of $\left(k - 1\right)$. The value in parentheses in the CMH-17-1G critical value corresponds to the interpolation formula for $t_m\left(\alpha\right)$ given in Scholz and Stephen's paper. It should be noted that this is *not* the student's t-distribution, but rather a distribution referred to as the $T_m$ distribution. The `cmstatr` package use the package `kSamples` to perform the k-sample Anderson--Darling tests. This package uses the original formulation from Scholz and Stephens, so the test statistic will differ from that given software based on the CMH-17-1G formulation by a factor of $\left(k-1\right)$. For comparison, [SciPy's implementation](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.anderson_ksamp.html) also uses the original Scholz and Stephens formulation. The statistic that it returns, however, is the normalized statistic, $\left[A_{a k N}^2 - \left(k - 1\right)\right] / \sigma_N$, rather than `kSamples`'s $A_{a k N}^2$ value. To be consistent, SciPy also returns the critical values $t_{k-1}(\alpha)$ directly. (Currently, SciPy also floors/caps the returned p-value at 0.1% / 25%.) The values of $k$ and $\sigma_N$ are available in `cmstatr`'s `ad_ksample` return value, if an exact comparison to Python SciPy is necessary. The conclusions about the null hypothesis drawn, however, will be the same, whether R or CMH-17-1G or SciPy. # References cmstatr/vignettes/ieee.csl0000644000176200001440000002720513507422437015366 0ustar liggesusers cmstatr/vignettes/hk_ext.Rmd.orig0000644000176200001440000003147114065601246016636 0ustar liggesusers--- title: "Extended Hanson-Koopmans" author: "Stefan Kloppenborg" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Extended Hanson-Koopmans} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} csl: ieee.csl references: - id: Hanson1964 type: article author: - given: D. L. family: Hanson - given: L. H. family: Koopmans title: Tolerance Limits for the Class of Distributions with Increasing Hazard Rates container-title: The Annals of Mathematical Statistics volume: "35" issue: "4" page: 1561-1570 issued: year: "1964" DOI: 10.1214/aoms/1177700380 - id: Vangel1994 type: article author: - given: Mark family: Vangel title: One-Sided Nonparametric Tolerance Limits container-title: Communications in Statistics - Simulation and Computation volume: "23" issue: "4" page: 1137-1154 issued: year: "1994" DOI: 10.1080/03610919408813222 - id: Harter1961 type: article author: - given: H. Leon family: Harter title: Expected values of normal order statistics container-title: Biometrika volume: "48" issue: 1/2 page: 151-165 issued: year: "1961" DOI: https://doi.org/10.2307/2333139 - id: CMH-17-1G type: report number: CMH-17-1G title: Composite Materials Handbook, Volume 1. Polymer Matrix Composites Guideline for Characterization of Structural Materials publisher: SAE International issued: year: "2012" month: "03" --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.path = "" ) ``` In this vignette, we'll use the following packages: ```{r message=FALSE, warning=FALSE} library(dplyr) library(ggplot2) library(purrr) library(tidyr) ``` The Extended Hanson--Koopmans method is a nonparametric method of determining tolerance limits (such as A- or B-Basis values). This method does not assume any particular distribution, but does require that $-\log\left(F\right)$ is convex, where $F$ is the cumulative distribution function (CDF) of the distribution. The functions `kh_ext_z`, `hk_ext_z_j_opt` and `basis_kh_ext` in `cmstatr` are based on the Extended Hanson--Koopmans method, developed by Vangel [@Vangel1994]. This is an extension of the method published in [@Hanson1964]. Tolerance limits (Basis values) calculated using the Extended Hanson--Koopmans method are calculated based on two order statistics [^1], $i$ and $j$, and a factor, $z$. The function `hk_ext_z_j_opt` and the function `basis_kh_ext` (with `method = "optimum-order"`) set the first of these order statistics to the first (lowest) order statistic, and a second order statistic determined by minimizing the following function: $$ \left| z E\left(X_{\left(1\right)}\right) + \left(1 - z\right) E\left(X_{\left(j\right)}\right) - \Phi\left(p\right)\right| $$ Where $E\left(X_{(i)}\right)$ is the expected value of the $i$`th` order statistic for a sample drawn from the standard normal distribution, and $\Phi\left(p\right)$ is the CDF of a standard normal distribution for the content of the tolerance limit (i.e. $p=0.9$ for B-Basis). [^1]: The $i$`th` order statistic is the $i$`th` lowest value in the sample. The value of $z$ is calculated based on the sample size, $n$, the two order statistics $i$ and $j$, the content $p$ and the confidence. The calculation is performed using the method in [@Vangel1994] and implemented in `kh_ext_z`. The value of $j$ is very sensitive to the way that the expected value of the order statistics is calculated, and may be sensitive to numerical precision. In version 0.8.0 of `cmstatr` and prior, the expected value of an order statistic for a sample drawn from a standard normal distribution was determined in a crude way. After version 0.8.0, the method in [@Harter1961] is used. These method produce different values of $j$ for certain sample sizes. Additionally, a table of $j$ and $z$ values for various sample sizes is published in CMH-17-1G[^2] [@CMH-17-1G]. This table gives slightly different values of $j$ for some sample sizes. [^2]: Note that CMH-17-1G uses the symbols $r$ and $k$ instead of $j$ and $z$. The values of $j$ and $z$ produced by `cmstatr` in version 0.8.0 and before, the values produced after version 0.8.0 and the value published in CMH-17-1G are shown below. All of these values are for B-Basis (90% content, 95% confidence). ```{r} factors <- tribble( ~n, ~j_pre_080, ~z_pre_080, ~j_post_080, ~z_post_080, ~j_cmh, ~z_cmh, 2, 2, 35.1768141883907, 2, 35.1768141883907, 2, 35.177, 3, 3, 7.85866787768029, 3, 7.85866787768029, 3, 7.859, 4, 4, 4.50522447199018, 4, 4.50522447199018, 4, 4.505, 5, 4, 4.10074820079326, 4, 4.10074820079326, 4, 4.101, 6, 5, 3.06444416024793, 5, 3.06444416024793, 5, 3.064, 7, 5, 2.85751000593839, 5, 2.85751000593839, 5, 2.858, 8, 6, 2.38240998122575, 6, 2.38240998122575, 6, 2.382, 9, 6, 2.25292053841772, 6, 2.25292053841772, 6, 2.253, 10, 7, 1.98762060673102, 6, 2.13665759924781, 6, 2.137, 11, 7, 1.89699586212496, 7, 1.89699586212496, 7, 1.897, 12, 7, 1.81410756892749, 7, 1.81410756892749, 7, 1.814, 13, 8, 1.66223343216608, 7, 1.73773765993598, 7, 1.738, 14, 8, 1.59916281901889, 8, 1.59916281901889, 8, 1.599, 15, 8, 1.54040000806181, 8, 1.54040000806181, 8, 1.54, 16, 9, 1.44512878109546, 8, 1.48539432060546, 8, 1.485, 17, 9, 1.39799975474842, 9, 1.39799975474842, 8, 1.434, 18, 9, 1.35353033609361, 9, 1.35353033609361, 9, 1.354, 19, 10, 1.28991705486727, 9, 1.31146980117942, 9, 1.311, 20, 10, 1.25290765871981, 9, 1.27163203813793, 10, 1.253, 21, 10, 1.21771654027026, 10, 1.21771654027026, 10, 1.218, 22, 11, 1.17330587650406, 10, 1.18418267046374, 10, 1.184, 23, 11, 1.14324511741536, 10, 1.15218647199938, 11, 1.143, 24, 11, 1.11442082880151, 10, 1.12153586685854, 11, 1.114, 25, 11, 1.08682185727661, 11, 1.08682185727661, 11, 1.087, 26, 11, 1.06032912052507, 11, 1.06032912052507, 11, 1.06, 27, 12, 1.03307994274081, 11, 1.03485308510789, 11, 1.035, 28, 12, 1.00982188136729, 11, 1.01034609051393, 12, 1.01 ) ``` For the sample sizes where $j$ is the same for each approach, the values of $z$ are also equal within a small tolerance. ```{r include=FALSE} factors %>% filter(j_pre_080 == j_post_080 & j_pre_080 == j_cmh) %>% mutate(testthat::expect_equal(z_pre_080, z_post_080), testthat::expect_equal(z_post_080, z_cmh, tolerance = 1e-4)) ``` ```{r include=FALSE} factors %>% rowwise() %>% mutate(testthat::expect_equal(z_post_080, cmstatr::hk_ext_z_j_opt(n, 0.9, 0.95)$z), testthat::expect_equal(j_post_080, cmstatr::hk_ext_z_j_opt(n, 0.9, 0.95)$j)) ``` ```{r} factors %>% filter(j_pre_080 == j_post_080 & j_pre_080 == j_cmh) ``` The sample sizes where the value of $j$ differs are as follows: ```{r} factor_diff <- factors %>% filter(j_pre_080 != j_post_080 | j_pre_080 != j_cmh | j_post_080 != j_cmh) factor_diff ``` While there are differences in the three implementations, it's not clear how much these differences will matter in terms of the tolerance limits calculated. This can be investigated through simulation. # Simulation with Normally Distributed Data First, we'll generate a large number (10,000) of samples of sample size $n$ from a normal distribution. Since we're generating the samples, we know the true population parameters, so can calculate the true population quantiles. We'll use the three sets of $j$ and $z$ values to compute tolerance limits and compared those tolerance limits to the population quantiles. The proportion of the calculated tolerance limits below the population quantiles should be equal to the selected confidence. We'll restrict the simulation study to the sample sizes where the values of $j$ and $z$ differ in the three implementations of this method, and we'll consider B-Basis (90% content, 95% confidence). ```{r} mu_normal <- 100 sd_normal <- 6 set.seed(1234567) # make this reproducible tolerance_limit <- function(x, j, z) { x[j] * (x[1] / x[j]) ^ z } sim_normal <- pmap_dfr(factor_diff, function(n, j_pre_080, z_pre_080, j_post_080, z_post_080, j_cmh, z_cmh) { map_dfr(1:10000, function(i_sim) { x <- sort(rnorm(n, mu_normal, sd_normal)) tibble( n = n, b_pre_080 = tolerance_limit(x, j_pre_080, z_pre_080), b_post_080 = tolerance_limit(x, j_post_080, z_post_080), b_cmh = tolerance_limit(x, j_cmh, z_cmh), x = list(x) ) } ) }) sim_normal ``` One can see that the tolerance limits calculated with each set of factors for (most) data sets is different. However, this does not necessarily mean that any set of factors is more or less correct. The distribution of the tolerance limits for each sample size is as follows: ```{r distribution-normal, fig.width=7, fig.height=5} sim_normal %>% pivot_longer(cols = b_pre_080:b_cmh, names_to = "Factors") %>% ggplot(aes(x = value, color = Factors)) + geom_density() + facet_wrap(n ~ .) + theme_bw() + ggtitle("Distribution of Tolerance Limits for Various Values of n") ``` For all samples sizes, the distribution of tolerance limits is actually very similar between all three sets of factors. The true population quantile can be calculated as follows: ```{r} x_p_normal <- qnorm(0.9, mu_normal, sd_normal, lower.tail = FALSE) x_p_normal ``` The proportion of calculated tolerance limit values that are below the population quantile can be calculated as follows. We see that the in all cases the tolerance limits are all conservative, and also that each set of factors produce similar levels of conservatism. ```{r} sim_normal %>% mutate(below_pre_080 = b_pre_080 < x_p_normal, below_post_080 = b_post_080 < x_p_normal, below_cmh = b_cmh < x_p_normal) %>% group_by(n) %>% summarise( prop_below_pre_080 = sum(below_pre_080) / n(), prop_below_post_080 = sum(below_post_080) / n(), prop_below_cmh = sum(below_cmh) / n() ) ``` # Simulation with Weibull Data Next, we'll do a similar simulation using data drawn from a Weibull distribution. Again, we'll generate 10,000 samples for each sample size. ```{r} shape_weibull <- 60 scale_weibull <- 100 set.seed(234568) # make this reproducible sim_weibull <- pmap_dfr(factor_diff, function(n, j_pre_080, z_pre_080, j_post_080, z_post_080, j_cmh, z_cmh) { map_dfr(1:10000, function(i_sim) { x <- sort(rweibull(n, shape_weibull, scale_weibull)) tibble( n = n, b_pre_080 = tolerance_limit(x, j_pre_080, z_pre_080), b_post_080 = tolerance_limit(x, j_post_080, z_post_080), b_cmh = tolerance_limit(x, j_cmh, z_cmh), x = list(x) ) } ) }) sim_weibull ``` The distribution of the tolerance limits for each sample size is as follows. Once again, we see that the distribution of tolerance limits is nearly identical when each of the three sets of factors are used. ```{r distribution-Weibull, fig.width=7, fig.height=5} sim_weibull %>% pivot_longer(cols = b_pre_080:b_cmh, names_to = "Factors") %>% ggplot(aes(x = value, color = Factors)) + geom_density() + facet_wrap(n ~ .) + theme_bw() + ggtitle("Distribution of Tolerance Limits for Various Values of n") ``` The true population quantile can be calculated as follows: ```{r} x_p_weibull <- qweibull(0.9, shape_weibull, scale_weibull, lower.tail = FALSE) x_p_weibull ``` The proportion of calculated tolerance limit values that are below the population quantile can be calculated as follows. We see that the in all roughly 95% or more of the tolerance limits calculated for each sample is below the population quantile. We also see very similar proportions for each of the three sets of factors considered. ```{r} sim_weibull %>% mutate(below_pre_080 = b_pre_080 < x_p_weibull, below_post_080 = b_post_080 < x_p_weibull, below_cmh = b_cmh < x_p_weibull) %>% group_by(n) %>% summarise( prop_below_pre_080 = sum(below_pre_080) / n(), prop_below_post_080 = sum(below_post_080) / n(), prop_below_cmh = sum(below_cmh) / n() ) ``` # Conclusion The values of $j$ and $z$ computed by the `kh_Ext_z_j_opt` function differs for certain samples sizes ($n$) before and after version 0.8.0. Furthermore, for certain sample sizes, these values differ from those published in CMH-17-1G. The simulation study presented in this vignette shows that the tolerance limit (Basis value) might differ for any individual sample based on which set of $j$ and $z$ are used. However, each set of factors produces tolerance limit factors that are either correct or conservative. These three methods have very similar performance, and tolerance limits produced with any of these three methods are equally valid. # Session Info This vignette is computed in advance. A system with the following configuration was used: ```{r} sessionInfo() ``` # References cmstatr/vignettes/distribution-Weibull-1.png0000644000176200001440000011341714125357664020747 0ustar liggesusersPNG  IHDRh4,~k pHYs  ~ IDATxy|TE_޷H%aq8xxuf'Wu@88 (,5@ ;d#d,m{9s+~stu!`0LBF:  -x`0(O `O&L ~襦&Z=Pޫ3f}l߾=77w;/&2eJ/_&BC嗾-E [ޠ ꫯ5ʷ鄄O>¡ f8\eRZZ/wvv^|yϞ=ӧO9s&qB\;}3g5zϧO.2 +ǜ9suvvvvv7t7S^t:ݻ}[ċk EP|2I'N|WnS*piM\\\ZZZffu]_m߾`0VZsagg'MӾ-BJ+h,((kLHHHKKKKKj* ] gpx|-> \sͶmۼ-׿-[h7 /a@O _}srrZ^{% :::,Y+%%eΝyyy|*ڵk333sss}Yzv/޲e+ߛVVVΚ5KחϧhLHHxz]w8U^u-444غu=аdɒs~?~/6mV^~=OؕaΜ9N _hF1>>斔{}}0P?|;Bz@BEP0^B+$)fc&"myg/^ !lnnVT}#GtMwy'FwvvG.U9I<@GG}ZNu]yf_W{{{^yO?T?~$+V9sFG BXRRrP"<+WGr]{Ϟ=iiijU՛6mڵkL&khhvWWW\\|сrԿ_K7|AK/q}l JŲWP/^㫀߅2 ݨ-U)( zHL/}ǃ+$P\]~ݺu~]t:ݎ;n7MYN߫>8˅_y狟y-GyMd[nٴioAz~?ӜaЫӟ{Szk( :B|0}зI0 NYK7fnOHHXx^rssZrrrJ%.^TT~ /vwwggggdddddj}L矏;vʔ)/L&^vKKѣQ EQ`/d]ME'p &~)Z*Qω7"P0{}cp uȚm6/^x |w/{NEy,=%++7{J$222Fu*+++**K;w/Ϝ9m6]ǭY!JX|yGG͛ږ.] xUsvHd/ "^A&#'3H"?[V}믿xŊ_lYuuaYaX,裞^zwIJJ0FA_qܹsZmAAon]wݥGqիW'&& {߾}SNh4ӦM;tP(~t:!TWWFndk^C AA%޷#J(MozCG^oN1CsA@\`/`0 fH=D9x`0(O `By}[yPb+WlܸѷCFFF!꫾-O=TJJJ!\tiӦMbD'o9 MMMŲe `0`Y/!t=D9x`0(O Dr>466PŲm6[~~>"Ẇ~8qoi0?`Y,[l9rjΞ=F 1Dq\MMs=;|pAA/=c ?Y⋛o^06zl8Z{2t(nl']wocǎ Lbnw\UUUDɓ'&O\__",˖-+++3Lvry v{fffdc ~ jkkj@ 17k4;#==jZZ0at'Kbb"pݺuAm\)dl sO<",m۶׿TH~bnC9 <$/fQ(>pF Zvҥiii-t8#r#&8>я;P]]]RRpF ~ٳgN$c}x)))4445 _;6uHG I̙ܼnݺT\xʕ+?'xkt8#s655^h4Λ7/ *++M(0!Jo|3rʒWS%"׮]XFň֬YH0=D=x`0(O `H~rlFjگ`Yr_ X]$I, >cKHN n1`r cr\*+X,:?i$)A;~_\./DE$=Ƃ]d|eYYYYNe/t8z?N1}qB1 l6Z-,ѣ~Aqf3 qG0\l6J%ezJaxD0sLv{||<.xbf(Jpqe4?n(qzwy8AYl6[BBbb9㪯`0wejʢeabn!M^ BBczwvF/ ˲&)))?Np'!ƕdj#eYԻa=D9x`0('K7BoQXp'Ɔ I\y7$U-Au F {9H0P,"{,di&R1 =q. ~N|  Q1H+m1 CH\]ӻFovY!Dr(=`N064M N+ͦP(2ff4v% 0bnĸ q^+8,BIfUI\9\.ɗɂu8I\9N\.ɗCÈk `nX_bsss^p8^{5Cd@  '?WVV?3qϞ='N|gҾ۠6 NŸKKKdN'˃`0 f8 fjf1cN8qѧ~:_dV+-P a* B*ؠ^'n 5>|G}fɓ'OUhiZ˲6 P%+ݮT*%)jD NJ֖Rϯ]Pi.I_$NyɂJ% qr\ B>\HN2lIYN όu\=LdY6F, H"gƺnZ-Iz%0a0 MӒ"ᙱGP(ܯ]PenTp'+6j$|sb+Ѿa@LpR/1IjL' ܜ8#F 7 ;=/b>)t"TsM%z=3ܜk }v2#V(ooJ/^ˣ(OMM^:33sb0W?>uI_ gn= Z{̸H4bX G,sۦv^2$G!߸q#gҥx`.јn(ygZ"Hawg:ߣ4cJ\r頢ѻ\syvuuuttlڴi'i{|UW"`{] ߺ%Cf8O+ѫ+VTUU_5ĉ'Mk%%%M0ꫯر0o<%hCs=~HOO_n`\ZJvoK':rC eוD:HGbЖz[~1[c47vq~cSlIH<0*WdߦgwAU,p|[3'Uj[#R$Y~cIh6G$(O!]h?^y:~sX_Kй][ t[ ƕ˵Z]%O!aqc&?L xh:ec]6q_xՓFE*hJ3@p=G ZOc!^RK*LY.L5%?=ri'n= ,者I&O6}ףav{A4֠ jIFSwY ( m=6jQٿR+\C\XzBv@(S=W2 'zH "8Bm)NN"X̰s%Ch4Zw-DXVV~G innx< (ˍqC~ ]vH ⿾~~HN9֤p)z Q.0S0ie!BWBa,$݌  9!,,BdJqS3 3.3[~Bg5–E‘/?D2ySؠ(%{;DL m=6GL0qO @0V+*k7ƶ8ggVC )7/ۡ揥B!zI]¸xYĘr}v-e%AÃjdJ7yFaa0>̲c+lQ3sBPPfψybzAmVZj*o_l,=y]=}dMy:xq7N%GoèGOӴfD UoRM*^<]$ Za\L~",t:6G_Ԇi!K]j.2&I*Y8Ÿ2LZ9L:: W8LL>z􂄿Fjjjðlg eP_\v:3^dQJ7_*av8:xen:,j8VRLR6FGg3PGiMw:/?O¬ Mw /GvQ JgMH'XvL,팭o]NCAl dhDo;&Scylzvq QZ(9?!8"o&5|j4@SEK,=B2r&i%p{viBZSsTӑa ,=-rr%V !,cr^\ DȋSl e_js Ǝ7+Qp.: K{3-*9sB^ckFؼeS3@Qjз;iyV{p^hk IDATHV}at.1lblD&Å漒o~&$*]`Y?HxPqY[;D(KԻ"C4J3s71.аeAJi?&CeC,zڮIyAOэ7`@d'zl ~"z{_{!&vtw-)]&C3H,n ]]66H4, /Z J3HBYrJj0yC MZZ.+&Hq̤%%F!yPH" )1qJ!8QWoJ aT߯]Pa\.$clNŔ^[|C! RmÖeY),I곳r1V"PRTl=Èx^$v]F%Zy_;mΘ'ٙr6 yI^$^Ic"-Okit<ĈOl9|ףY?cB>BEq+D;ljd|N.U/ ]T#-A&q3AqFҴ3")F:E4G2~?xjj*`ՙ&G -]6%ά>jހ.\0*4TZ;#!O3=E4umlp:Ջ3oINqS8jg_/X0p#5,]j! @FQ[}|(!XI GEl] MDF>=6bRp,\6ŸKKK%6vuuuttlڴRD;Ϸo57ꞲlʀK$(mhøOte4 hL/Wq֨>m㺝3/})LπЎSG2~'N4ik4aݻO:0 M, ؍;ud</AY PVXE)yK( `vZ(W;;U@ge!IGt#۝NDrA !PzN\l*N Y< a=\{~Ɂ6ASH%]:!lt|n|L=[ 1@sD#qbƙ3/qpº:4gee:;;E ڰ,+h2oٞo)IQ I3$J.U;iN[)@L Mخ7|\z52KE5Ҩz_* YІIdASLخ8=dLخPeYFp +HE #( +"hhF6$I78dCzͮ@{;0 .))ۂ6nٲeĉ&Lhii)./////ifFg~GWfMSa<OЄq8Q$İ]}|$MEVS.KPo=@o)O=,˺\.d9B4?<$IRx/uu| n!^>45srrza=џQ: <镒To# Jc¶7Ҧ(^\oT("+ѯz=}?;Oz`b-,(r@\EIݩ 96.KPJ3IJ )SYh,=xgs}knB!>09]18R$zUn6A͚uL/,)t6}V+?4ü˘ŗEmZ=epZ?p03;]E<<[ H,+i3N̰Bp@3eu6Ѳlu֮,_`0 nûǚ^sZ½s _Ow_O/7\MZi2͜ wUs?>ۅMRT(RQE]"Sۯ_0 .Ɇ8~hzxy.G^[SO -U!% dL +>o]͎˒䧖ǣQڗNp婝5U*9b'ZOj^8ڽȑӵqO-mJU/.LC[>Mdq{ݛ]z7N4Ȇg&rh;OِlМh8Vd_+QfCs6wo ;?=+Ջai5UلO&䏑P㱘_j4 LLIٸl]V֋s!_reEEE{{{vvv`cKKKps8k7)fU[ ``ZR7Hq2\g֤pZ>_[9w~'Mu- YjbzEYvPՎ/MO3ЮVh8vpl%GM>N=jd$ƓuU1s\KSݮKF.ĔŝOsϸlB]A{oll,++?G_cKKK$m6fdWY'K<6]+.A&2!gC^լP7&iԢY@D=A)!Ҍ|g;Vf]Wzܗeb֞~q9ܗ̖N'Y(դut 3M _veTݨ4|SʪmG]wZokkqYnX@t2h.Y7y] !asfNdb4LQuE+Cş۩x/8+ހq2ڼ;vL2{[2ˠJCRE2N IF4S P2&XښsO'$ !@=O|lf3B 7rQ0۹n$ˮsg?X&!q@AA  @K8!9.#m]/9*MKW⑑IM?>ŋ8,ooR*o,x;ǃd9X_qm>nR jI@2DPu2 $2dG!p+d H` sR\eYU`~PYƤ$^Wpfw˘?nOX8 Qy(Y_j\xYH('%<x HɠVf@2K~eRt#?k(m&C($dAeϽ*1SYnSzŔ)h+֣j|vޭ*(ҦqvPuP)V!68\TeJJV!(oNn(N`V٬7LL1z/8Hb WJP(,?>=Pb9xh*äV(jʩ  :5E9r4+&^,Eo/i g^.Ug,keH'MU*χ!R>kG( u*Za \S*t !TE˂gvewCd{2DHg*G')Bf, iF.y_JI +iB.2 2F@BjjwtRP`痌_Z{2P$iZm0~ m=zBhػw/z2ټEinfIahRp:AA'q_P{~W(%9 BLvi#vP$t 4) ڟFC;,0 cX۳eq8X$AG 2@^YL `Yd2%% ,-aʦ0# g D_^^iӦ4Tz衇|322|1[0 rt: @?Ӊ:+"D|s\~frD*"О/ ˲C/$/n[P1l6ZDaXst8ErHY<"RJ%ewϢ aΞ=rx Љ6b~< Uc&hgq\bb1?ɬ/bYX7no GeYV!%..nuuu#MTD*Yt:ĉDr`00%0 &=D9x`0('Eel,el,jel,RDr߼ysyyy |֭mټy̙3#ĉ/o͛OxFǎ{}[l2yH3BܰaCDr7  `$P[ޱ^DzhZ,K,* r.R/^`0(O `=J(Ͷn:QRR+b^zŭ^Z._f?.b²﯂]?Yryff?K@b㸚{i?oڴҥK.bڵkԩ/bFF>\PP/vwwF:'߯.b_|/hlhhtt 媪BMf; Eqơz^.Ι30yHd5E ?-[jM&nONNtt F;08ƍKLLj-))t8E=S蘡cVWW,^Y~YtϞ= ~8'aOhtqqq.N:USS裏/_>o޼7[.333fA,UUU.X E?YV\a\n0~D::Lbt߸q#A>쳑 f$><dD'K~W_}5"`D`0(O `yQT~B8 j]z.{< 0 cٶo׮P(eaj&$$)/NB(F=AɤGPPؠB,,˚D8Eq1fF ,*JԻ@Y0~DrG<4MSo(Jp$IQ&MA@?2LP$%(1Ÿb`0H2(H` ,2 B(F=AeQ`1weY㤒8b\?Lr!5z rD`0QN$nq:<2~1~ q !t\5=!Iꝷ`XJ% An.V!Bq0Pec@J%Te_F,hIWǹnශN2Q8AeIĕP*3 t^P% qAHt* I=O5zqݒB!ĕE=T~킲,r-Rv $k._^ x+4,6n4H fY9F F`be !09Cx`01ʕ+WdrlٲfG`gO,nܸg}`޼y{'X}饗bfjf1cN8qѧ~:wٵk`ʔ)w8NBxr Bh6Y۹n.Cv8",, ,"mXZ^tx`JFïC~bdJ! ;!J.K?n}ħCyv*N ~C=#~ǎ0 {{{ y/2z>}z׮]o/s7HŸZ-zNoi{fYFVY|wJ9S(\fN02sQQ(8Idq:bŸX,Z4M̲nE.w](,. B(F=1h4tӍF'{_.0^PVTp'+ժVGdGљoNSPszǿA$Xt_[[[UUxڿo{{{O^WW~׶`5k$wN<~1bVS[w)>)6j@'k\i#tr 9K -@}HD/FcX6jok hk?Uku77^UmFwm} Y 9CMyf&E}iAeNĄ%cgUIqs䧿ot8#oMre}4^6&G@AalbnGcSe\.#v;Fo~8CU^]܁ߤWw)vHh7N' &]`);J vGʨ>6p=r'YFTE*W@@YhOd A/*waȂ2ͥ-d :(6 ނ.ϏK! hjnH \z zgK4Ͳv]f93V+˥V+mIgZOY*\.Bβl`YD.X1nw`f4[xK^3t)fIaaa%p93V+(KgIL.X0:vsD}7. YXuR"pp1Ё ~[z)`Y\UiJEGzF=zh.! պ\a媦[4j4`nѤԫuXHőe[ $eU-%},pI%% ѯ87_a=&HZf1R!= {c$_P3g E0@~ӦM[jՍ78Kn@6didC]Vs:$)WzyKP5F4eW\(fc422'zibAP9oiL! cx|x`۶mcƌyΜ93a]uhQZI9U3 6mǒ&]îJSA"Wf,ADGVWXQUU~h4'N4ik4aG}cyflT]FLp US]5T7Ĥ}l_! x$ $Gd7I&qlljMMeҍf"m ij(Œۘ-OMM!գLremfOFFAr<-[z:dY6...P}9s&a…uuuhGQ7x#)/x<:[)HJ1BQ{ y!PKi+JAYl6[B.aΣ3 ݞ,ɷ/z%hXǖ axr, X,D-ÜG߯}eiSqޤ3㴧 <D EP²dɣ$Ÿ2LZ77X|d(3K^@dQϙ:^xt|֭[wuuyAnݺ$I^܌裏lBoI~Շ馛ѣG+++D555EE mٲܹs )\*?y.&iw Ztcl~j%r$7;?˯=71YX==`3&f? *(4]HTb@ڜ9s^+// *j6k,@JJ _կ~e0|gP`ԣ֭6⑆tΜz+iopi9xN]l$ANy`[{ ,\AYAJp ;t:.ۗ'NtM?x~~~%07%KZ-A_~… _~_S9Z_,Yy~JJnf}-=]K:9KCM u)PIR4"΅+n7GNJvk/__7n\|yzz`CM's4[ @f+Z~~dee8pq׮]>K?'zѸ`?rKfffSSݞ?*&IVPj2Q  #I<{ H-|ezfeR(bW+yjJ_,BvZݡYa9,j!eڼyo~;4a#D?cƌ{g߾}׮]l0 ß؎* & A6$IJ Bv-b09E[,6"Ȩw~iĀI%"ex9/S8Y#MQlcZT}5N]qDlOdl}T\e4u;}\VK㎮)Sx[ UǡK}ilůZl  !sI| jSct*}5EKXb4RR/dNg_+.yCk0VHɓ'<Z% -^U.{W8fQ!VRv3LuڕJ[WujbI^êdJsp$Orҗ<Hj6x44fff\)=MtIOM!g4?/D5% EC;qHXL%Q333qXm ^bMOӞ!b4VI4Rt#^>_+**⊛.iD^&iq/٠d8r_cd3E.S/gW40j;yyZ>RlGԠSD)}ꩧz)3ы K@䙴@Z4z&Ȏ!XbKJo6|z4E:uoi0*?_`5&,$ U z$N$pM$pIYkON.T"hV+,* Fð,Tt u:8( z]8ȰL-?^__,̙OqΝ۳gOmmۿ]ȑ#VWG@o;۶m+..6;v숔ZLQv5+gVIL*nlbU>HvWJ}>M>j ڣ~=T6~r}{a]O?u]bŊn ]s5_~ڵkyohhXf͍7ozz}}UW]ꪫ=ZRRr̙Oe)]zȫ3%za6B|=TI^~f)~P镶1 4j1K2KIm#%0ɤ`Y(+81]eg-% 6"?ˠ||\ve---?>Z322\.vgddr*3ndcJ1bh+czڴ|ƥm~6z)Z&,6XU-E?ݿiӦMv!x|߾}>짟~/ɋ-ꫯ^z)|3yaOeDiӦϰQ/$SJ~' w7Uv]}j>=Pq쁶V ZգPh3^9XfS83Tg6X,[nN۶m;^{}{{V7\TT|͚5/2?wqGyyyuu~{CzzoId觲 _Svyצo߾}pGQR[0]OFu]>9y gr~93?5CL)҃T1,J60G3i<[Ȁ"E`RݤE-W 0 sjЯ'(*Wr \`ya95D: M:.u}-t3<L4͟yK Ch4Żv 5}+,^yyy7prqc6@pgT|ь*OČ|FZK AbyW1,]nm!mLDaNq#C`@A+HqqqZR["2v``j,aXǥ ;aB҂ pWbLu; KГ$oƻfQK8;?݉ ZC˭"ABOdI=3elaATqW=)_R*\\I,140,#cOjE-W0,^&.-m ?eNptڈcwt;\SÌv"4?UvsNZFq2%Us xWÅE3uY).~eqf%6wQd;SEgqP={F$W5ǰ?twrSU*`_+|PLQ ⰃO>dnn./(P* jѝ,,"c4GD$8?JmV8dpeOiGM&Eϭ sD@b\%kteĿ锺KE [6lT =vWZLݩ/1`y|>DЙ_軽jj`ȲY7@xr҅ޤ ة*.Sҵ찪d#π~ST< qA@oo-[jjj-[ΐ'CrK;cXbexjwx)_\<=MGc:~5sЇaձxQhgĺ*w-g4A} ;[[[;wM6eee͙3oЊ dou8,m. ϮUKK/^=orIM4N:rpvi9S1,욬4fّaI#=>BԞ!) w3Y)Ea::JvA w%h:9 KJ%usgEB/*AvpѢE+BlٲYf+-gʲl < k~u:iWo>Ɗ (0L0m?KG ӆYE&$) ihs `gX; DZfvtN4\#`\;iOq>OTXz)UAP=YʴI14,9D"*j-=Zw J\g̖E6` J.ٖk2ߨpvJnS%a~9sttt{`7C Ic&kD 9ח: IRzEu%+I';,naWA',,P >NW,ϝXOJ*ّna} a'(0_S; *xbLZͷ3 KpǕ!ŝ]K4r*&dLx$u=@ ^ IDAT2aw}wݺuPmmQa1ۖq1sֻMUmQ1S<=YWrZ.7۠&wkN]]7AVuܹ~#G>U~؟gbDBvT>ʂi}~[f.O5ڱ5^P0de"I/oϽ,Js49pwpn9mN{ˆqcw^W/X,=ǎc&==z뭷9²,Zؗoٺuk{93kDL ZzHzOtηțNA;\<1 c=1D7=w,\0 |!%N1?]: FJ8SGA뮻Θ1NQH+bӁ:8Gr;3rw" @NɩJ|jb)hݽީ\#s,t˘(:HH{{{#/Ҳm۶x4~#{YbE F^g{M3cZwv0ad^>Pt~_r6TFdM,vS!S/FiUA(V}M8}$GZ0cƌիWkW]uU+_u̴$e^<3`V, {0 gfWz/XQj2U0 +Yawq7 dяC=xn @WWgҪX^x 6YR)4n/Wʪ{j i>O |^ԠI0-/\_56]SXף>:Un8/\9`[_B8xdA }T:t6/ƊVC0/ZړcR 8׋VC{:fgXjwlMִ1F"hu ^,ʞgڶZ{ƖRFn2M&:w hѿqYyZW]!Y#S}lO[XsvN\k$Swr(rbTG3#CF'ȷGf7&\IlR;Gr5C|y9)/u>)ľ+ѣIdx{y7%gXqZinuEKݦ*Z~>ᜥvU*KwN}]/;v/H ?%MYQnߓ6wSN}e/R[צ;2"~o_55)]VQOe|;::"mU^ǩ{ҩk?b.~7w-Wxޙw1X̨+E8Sխ/m?q\r5ws1mC]6cZWί}TWs"3ۇ`ӭ ˙ii;}-`v&L喠GiLQ^ĪX P+#yJ:<#OG&w ~!qRc 4{x;Wu1\{Q$Uȥ Bm4^} ^;dVQG=#Ñ LJY0,AhvxŝA=J $>mz|2Z\_n:"Æ':k!OCmz}Y=9lκiyϘ{XXx-Ȱ0[N7r^tF^t3Ĩ(~Pu<_X"m.5}iiќiJp3<_ (lK1NZr>'#ݠёw~wwX$ထe- *]}a-,?Yʞ9c͵U"i䧃b G/jqm۶VZ522xر6I?uFVaMnpyY%:8jtDFȪ{/y-v4eUsx@L"(OujQxš΃,w \!1:C;Tl-6PTU8$iFGGcңWGl_8\YU $Ź7^nX`p/<2,b>=I,inm@.85wӖ=a9Kq!>er-Kgq7.pA1`-!hi":1!(+31-*@J}X)Kp݀dd|;*Ӷ[8gSO=SO6oޜ xkE/+/d+}+ C}$###cʣܚLEjt:{WlaǓ)M2\Cl襊n)>o\yYua+n\9˲.jJSY8&d BlLbS}_$²*;{{أ&+Kp\o N>5fsSmJt:M&Ә胬]|GX A6Hb_dɖ-[6lؐwlyo 8XX(=#"mvuztQ;˲84p8Ž&kXjlŽ I@ [d.cpu:*a }}}a V2_캦Nx#dz2'*6b B`_?BѰ|>YIKlX=RԹ8m688#Ν+m&KO&aq:^W_! ⊦iN+П,S@H"pc#;;[o'ӦMSeujj.̭@LS@ cb2n}EILaa5\z䣏>S7Ȥ$//oʕG Nx~f۶m;t,Zhڵ1KN|?O^fY 5LB8DO> [;2qB___dKn>#%A[nƍW\)*K{tMJssP̮>裺k^+A7n/1&0 DfX8"xFSS^l;v숔RHNN޽{,?p@̮-Zjnn@ p`e-#ovEJ!---/"AV5Wv߷o_4!,i Ú5krssJ#I( "JN)d֭ss=W[[{x\Z*##c֭f9W%ZZ$ѣx$8y ˲jjj}G^oUUUBXHYK9)W0z_xRzp*oǓ=!,,x<m6ݶmIW~7z;O`x®z#2,{nڵcl7/~#onp&2,R~֬YU>3Y&2,s _dee}v}hhH8YVWWWqܟqM2aa}ݵkJ܄7 I<!~gF[ou 7R(rlæ'4,C=x۹sgp HpRn߳gA!w~%aa>|x8D ,,yYq|"qHرc =`=```_OUDeٲe8drw>l~'{tRRnaMJM_2"2,/{@ Z"A =@$9Gz&qEQaG|> KdX(Ba=Ĕd2z v!a 8ܟݽ#9v}p`GHrB^sgҍ_uM@ k=w'{ $g2Yz<s#ByM’x.@D#<\ؒ1c֭#m6nܸm6UFFFd/FSŕ=Y(,b+n}A4YW2S4,=Y,Z:88(aQbOXhdFz0S[s9C "}7Lp_x~_ Qz v[޼ysHa(SNնd/ͣkNӅtaZf ?TIegg&nWdEEb5r:t|w/X-i `DKL.2,bߣlhh444TVVƒ_}A=\;Dm^oau`8<2UgOfq{v{(jjZG1^Ddɒ-[lذ!//iǎ$I666\K. Qo؉Ho0|`F(4f^wt; hvdb)f"b_ xGVUUUUUE$7oi:@?F%aEqҺIzE [\2hD̠)eyM4APN DW#鳣X\)M>͢ey1^^>T>o6u-_ uv?ۃ5A{2,߼Y-J3kd"fB;Nȁ9rlΥF4}^`|)o5~V&) lMp^#G?fWzG{$xR9c==r2t㲵Y"-YtsMÛ;~HH~̮D{<7R%-Ei.2L$Գ| oBr"X[O&M2YCJI3vÜ]vIPbyp!9=NiD̨G/BSSK/0 `hhhժU6lذaCQ;Xn¸T/X Mup+ҏLV(.$]vЬa}l! ӧ-z{{lRSSl2xĉHmJ„Hi˲6 +A( Mq:z"˲>S]d dâ\X0LŰ1lnEF x.yt400 w MaaX[ Aa(ڠ$PY`0YĉWX[[;wM6eee͙3~abрm<+QҺA+pq H3= +}CQd"SeOT+, lt8q56*x.T]OzXrCQ/)* K!Ch#[@}(-?X.W^z5`.60~K+0;JwTLo@,Zϥn!'F(瘌@=@L %K:;;7lؐ lfsW֭o &db{o;|$`a,$])#ȵXH-qa\yxƗ@p9"[8IHd 8)cID!f8\C]bxsvzH pf(e3d̀O ,M ϭeȏ8A } 3ĨV+Go&YjB,S)זn!ms3"bQ L2$әJ jJߘ\opa -A9!Pݰ^ 9 jyEߊ9L4MKFϧ!!,L qiYXXXA0=.Q‚@(=K1Ԛt=p)*wsǨi<8F&~TΩN_Q F@ g2q 0LֆiYP$`3x'"׬M1 1a0$`4PܪhX9vWT`[X&GXZFh»aBmIDAT?J>PoI3Gf&[FHӆ˙0jꔢ"xE!>^LzY<>}Ǔ 89U5'p7+ t] NiybLMҀU9"k!~ɐZ;ǑlE!8_z'&<#gL"Ə)I&F\Z.{颟ZP=#"ա͑F cxE+LF-5?qG }KKKMM (S $_k iQ2Ǒr,rDKО{teXV+D4#2L|pAbqEJ@ni+A#H$;|#cc.Ct P>2ia)IyQpvD8iZpa*|>N 4S%2`ھ@ >Y ;.IgU\~V bĐ^klQEn'=,W8&&;>ct]uH9]3A\AmQՠD9=CɵTW8&mXK2D&u͚z1`-&ac2` ?3#_H 2u=n<]3M+T&CnHa4lr񾒐BktTF+hS\?b1t΍xI_ڦ03KÛ$dHk16iFzH2h _9S,׬SL{ǣ<ڬY&S%:dXZ:@dn,T 3m ڰ,qlֳW / /0<3D}EOdX0LW0,8{Ks{EaXh8:Пm ',0?@xU\<d̚hwB0G^ ܽvg-A D̅$h a8x^YC`U 0 R3q0rFaHFih8m%aaYV/X+ jECU90<D²,0 + Iv:*cT>^ 0ES/@nbAGU^Y*j1oFi.ϗʭr%KϞ ZcaK)V̩1mQٓ#nM%{G]:>ʋ舀J X#hHѸ)#4t{35ӓ~g UV5PxvzdڟAza|\?^PPΨ'4 [q4EzfIR>V @eVS1o 6twwCn{ժU~z|ܔܴD1#I#-M8?YJ3t%#4t3wj$UxT֣;800ۻe˖ݻw1ڄ`3>_zLRS lK3j:G̣LJ?1igӅ?t:ٰxތ 7{J{b0Knהڒ>}e=O aaYvgfFQ2/8~)+ki4<=i2gB o@y@CCCee~,^OKKKKK0%fߋysxThh%q*QծTi6 ǟ[铵ek6*"T֣;xW֭Sqܓޮ1,*^g@ku83t|to->>i>42Bчg_BNi ĠSqؤhvuR <>:X>NcJ >,WH!NB!`j d:=}3x|e(% v0, v4@62S՛aAL0hݮYy;G+k>meBz||:.l?!HP ÂxCzf- fpn7WLì531r.FFqJھ,޳jƘ6nF& H^'3~~ԣڌkѣ#&~4(.'TPd&Y*% (h G(0b= =""W1v襷)Bs yN=!OݝZ1 ~B JfL=_g=xlXBsmGuO;.Z"M] e[Nò/vvkugokXȼrjkwlPڹƱ8-, SK'"&C~וDsъhΜSw` ˲?ze]E*fAZؤ߳6'p^ɎƖ@H Jуcgg[,W]~>6л{iKNk iSÜt~ufW/I؀$B9G/j3%}J/ί[oYBM:H忻^W`ҟe~.DM. 붽@흵n\xe< >3GiP,<2 ?w WebU<`FϿxxT-=|>_`tn0Kh'>;}9T38}OnY 'xi`:X-&rʼ_b4DaiN5M+KNվ{;J-.D a ?-&\@X.Vc 1$}FxaFrl } _?0;,HGkl-.y>r `Y^@S"l\_c s^)c>Gj)L! ]###EQ1W/ڼ`Өɩj;R4a<_kX<}8A z`8 2xlX=r °!w_z;Հc.k=EU5ʣGы{{/`PTW\+D.Px}CIKKS};΋?ʃhZ8囗^vX>'0GVx}bv2< qeg(L* _/5²Z㜰O:,ǹ\.Ux|ٍU68pVճ>oj]]]9t~N<! ((,4 o߾oQuRYY/aZqUQQk׮}E`0Y&7ܣQHPXDAaA IąvSKEru{瞫ݽ{w^*~ DPXDOAa!"Hą~޼ m(,YٹaÆ-Yf͇~nݺ+V㪴t=\QQQIII< a8Da=%TL!DOP#DzHrB@ IZˮ]n1A =@$9hGL]w'|E@=شil6ۃ>LOw<_.//^~ B1ivm|V/)ډ_|_ףG:t_Q#SӣG7tyw瞹s_Μ9_n uG8pbzĤaZ̙W_m߾}߾}{}};â8Nh|Gx C4h1vm֭9sjݽ{Uxp y燇?35\ߣ@`ʕP @D-d{w}S͛nݺ>fΜ9cƌE-]px`…UUU˖-[pdHx@$9@ #DzHrB@ IZ"A =@$9hG $%, mIENDB`cmstatr/vignettes/distribution-normal-1.png0000644000176200001440000010370614125357403020623 0ustar liggesusersPNG  IHDRh4,~k pHYs  ~ IDATxw`GGUow1 6-!!)v{IHM Ir) -@=@tc0wZJdYewGǻ3ϰH^6@  @@ ^ 96^~eJE6hTOGvX\ݻwN>eFVVb9*ǐH$'O4-iс}&&&`A 2ƕ߸c 2Hﷴ466>}:;;{̘1A$)˿++;w Gfggh>;uǛ7ϛ7\oʈX,>qiN3Yс}foܸa,9uP(TT,2ƕ߸c .?t#BCC#""|Hҧ~ڊ0$|u1:;;i"S: Iz:N3iYс<8pX?̝;W(Zoı:l-dP@oT*}GN:5>>oڴ)::Z$M20y䦦3g>|588رc4H\fMDDDLLڵk (--H$OgӦL;;vD"Ƞ~?BCC%ɒ%KC.//Gk[0|̙~~~&Lo'O5J$EEEmڴɊRtnݲ~[od/r111iii_|iMS3~hL/6$Ç?cs/Y̻c< 1b/ƯOl8m! 7>M$_]v$I|$Ϟ=+H._\WW7o޼zE=P`FѣGl/tܹ>D,zwmڔQ!ɶmv!Ht6p;wH$Ϩ"I2--ꫯ.ZJ yVf͚ӧO;w @ رcGkkQ-//g4U&ڵwHc4k͇nLx饗T*{b{9Rq74cEQ|jKWcwW-ga1o/ƯO8! ֭Ǐ#Gt: j5U4MUGdZm۶I&˜c~ŊT ~;vϨ" -6 ѿo/6=}^_RRB$nDfᇳf͢J g}&ۧYgohh IPYYId{{;@T2zK߯)F2̞={ժU$I[=%/1^E1^EX_SuqyC5tΚ5+//oÆ ӧO=DGGx<ﴴ&Zrd人:HcFPUU@M>GMOOڸq#F-ڤ$A^*ZivP;_ +1(&Jxֈ-_(uxvcԧ\V.u³ROnZXUU5yWL>}AZI}}\YYY\\qc$22EoeޛXo!<<<11?Q?vX^^ɓ'ܹs]ّ555A;V8l~f[@`ݻwϙ3XKݙ_E6]P5iLzBv…3f,\д¹sΝ{=^`0^VZ寿zGGǯy{. @|z>+**2V5رѣw^tifֶ͚} .5TӃ(f۶m u 4{߿OlB.w` BkH$s}W̙#=^b*,5/ƯOqB1//FK^|EBA2b/ _vK$͡,M'M|򠠠Rmٲ%((A1c̚576e:|ʕH4toqѢE*Ҟ1zLgU߸qc„ "(>>u:ݒ%K$Ibb֭[_{zFϞ=;rHP8jԨ/ZjX,]BcTG1z-}:?`-1~^Ю"K_UdYnLyygbͿ&[ R "X$LS @ ^n 2@@ ^ @@ ^ @@ ^ XK1ШT*h'(-#9t `rZ6 C@ZOk0BÝ ^y啄w!o޼ٴdŊs[n5-y7eP__[CޱchyG>|7 ob㊿?t[R)t˷~n<8F@ ^ @@ ^;HEEE||< 8oڴITŽKյi&+FE*-[seB7ny >wGODqqM.]y涶6{&hn駟ϟqN*7ob~_>''7eɒ%;wܹsgzz:|3\jƉ%%%#F1̭[ΝӣR=`}[HܲeJpmn@`ޙs^(.^8,,P(9Ƅ -$I[b.4(˗_p͍͛Bqe˖$u|.jh7Y۹\mۆz%w)D9s愆N6xZ6 ݆@@~={mo߾BFa}"1VZZ (//c\vmȑb_ ?fݺu!!!0Eڵkʕmmm<&LPYYkuvvN8xFrJcAVZ^c<["""h9U|[8Κ5kjawq%>;zz`@ /zr`@ /ǝ/cZ={h' h%zJHn1Ϯf[[}@rg͚E+t<ϼR(`gJH}JΝ;G;pf̘A+j|>߼)Jrâ%<-$9s (͞;wMjc9 ɩShGY,]nAQջKN8a^b;1chjq}OOP(dCtu |]3+2P͗1lb`l%vMwLbd?݂㸽8($t ANWuyS x90C =x90C =x90C =x90C =x9(q>PT111<顪XAԁ@ kp>???..n T!Ieee[n20ց@ +q"''QQQjw5&'b_ZZ P֬`0/y$I+c*s!zAkWz\nqs@]{&5R2v RI  XHm~ZJbg)s%& S7Ͷt.$I{Kc$--&K}-S"HV4 cz9Kuf͚5k,,iKI" @ +gId$Wr8sX'|]ؕ4$!pA u͘b[ I"-'''x%G _uĹŻﯼn[| }nnn]]]^^^PPPTTTYYَ;韩Ƞ~3r@q>nw[4U.$V-!AdŊƏ))))))ׯg${j şnTM(.^`}{`qkוSǺ..@LY #OcJ_vi5Ɣ?np_[Y+SAjju6'q2?n̘FVwk6|z]ypޜ HWn4zęP" da1 kn.7sF'[fkA-W=LUm/e0L n4vstqϥ'Et9sU> 3]O 0,y«nʽ]IodMУ͇wU>n j PT 7LNsAAG^NYnZ(JHΕK49I2̽ԶJ t@ Ry}`=nA~A4&`nޅyYh5 ?v.yaJ,t]b|!v/ ӍKVxUSFTwO e’-n~,.ѳ23>WΡRl)jǃBB%c")cG161@KsߌT yM؃FY״U͚P)('[ﯪk_lJEdFF8s֘O K9œ+1PvGv!@Kk_$&9 ^' (+; 0KIhcёCXfe/rXWm <с ~wނsCϻ24:z9lt}o Vr QggYsBj*Jv_q=u2%Pd ]`xq|֭k׮ꫯ,vtt,_|ʕ+Wljך| lo͞-(KEMzJ/nM=)\E2,jiX9*(qA R<.vzQZe@8V^X>gΜ'd$lkFע)\>,nWMm|vS6,>Ϛ\Jj*ϡS%5V+C7J]p1z<}EEEFF ##iǎϟw@ɯ"XM&rjOܚ#wX+~y$+!8No:>1 GIAo m9&2P*T"@T* CBB233G6 ?$B58cn#5N0ɑBN8[[D', s2VYKrȴ) j_Jd)*?@Hլnu_,#MW?3F 0JOPrcEzvdN\6KPٝ ѰqK9+9x뮞ѦYnܱsa4b^$jF1f3*:ujii)oJe I a^䰤Yκ\U N|FKY|?ohz$i0Al6fNM E vHA[#x(R)vX,%9-8[+9_x6JF]B_wfaNq !K"$-N0Q.u\xONN...NKK+..~ ٓ9lذ=1bĈ?<_AIZ-ĕe _>޾9qP(E1)O*omk*[1FC! *t1b%j H+r8FdVgz"S(I?ݢu^ߥNpväzu&=f-YJE\ Qe*XC; )'ͭ *++۱cpڴixwrqhwiw _C7Hmͩn/PU)?_RE.lnCuo HפD%<mu\Y٫4$R~빁0slŴ)vW0=Ć /6Sk/Lf:CDN5v 0ɑfflw G6ZwU&U(Ҵzo~8[3,NVEuVy_]F˧e0ǵBnRؐ^5VyMc츝zYa}q襙~lpOw5s5e؉1_foz$IH$!IjFJqf@?R2k8 Ưxޙяf;r;O@XK^l,eqfZ pZN56lz#;FeDD$`5'>oT\bq탧 t]ŁzDo%޿<<4!QɣeZw=r*&'1ȄTt >7߾LOI6͉!FyUEl!zF6­8^Ca0Iz aOL^((fGnB"=boe?u O=S.}v,qy&i'V9-O?2'tuVDT[M'+><<  w^bK ܆/v|(pçwaS IDAT6DlHSNiC v!ÚV54gm7<3Αah  jm*KX}8vĪZM?F !@Ώ?uj<ȊRhu+ڜզwmof޲>!CI/)R;;Ti**7iHqJ/T>B[4cG16N3o1^[sCM-Vzt2~W"1:ߣb*im$grU*UzvJ@MSY,v*&cFK:Z l|BIrc/ nF^) (^QTc|$Y}JjۛߏN uV,(ᢶ8,>wGIh4VzKdtJ2/ ǘɷ7hxi@$B%nxו\AyH[p-6Jb|L W/I"%割 /MNݰ~ɘ#m7 \ Bp/"iXWHrn151(in~D[fhd_9&A')i?A'sV׺4ݰ@*-?@&ed֊x5޷brYrمUȀߎaR !\ο ־_#VLw|9B=}q#է7>yc~RV7bμ!=I\o%$!;Lp*c5 Źt38@]sE/w: hn\v]N#i*9Xg%ɰj , '0bCzKr ާ1'뵉aQDTsoL~Qmi"y| Ԏye[v1o<+ܗ sVB*,w>B:eSZK s+]p)A(07vT;s? sA6<> Rϻw!oˉ2`x8_w*ToUtF1屜~fΠ16kR ћ=W̚=6F|diAm=DRn0^ʙ1s`;qA`J:/g;-ܫ"?"3'Ea@enwSY:n 62J~=\>5'tL'6qY(~q*_,>Pܞ/M t- ꙟ^$?o{M&x"V]7}q8Uڅ }!'i㰣JUp|7iUߩ lFU<~䵛 =o/gK0~R3}9W^8^m{,S*Y9sX?r}qn+ D7w@ nBCy@48>c>@R<3uf\=U}X_;i&ƒqDL~s)xcpl ׷Jd4uV`_|ͬt|#gJ6[܉M_NKr}UwIOșZu;5w oqS[*~Hr͋tLs'$]͇]>???..nѢEyyy QQQ慵uOycᡒgm ,J;:H(A老l.%1ӊg}!S㗻٠PP}HIYyKN>=7!ekI K_8@STk=2c zʸ߉,zζlܔqpͫG*{uY,n)c"rG?v|0| }EEENN ## Z:6"Wv ޞNewF*lҩ*X3 gc'zr2I O$m96X`wݷ#1q3dbWtvyg[\k彊z>RCD%fd"2bھ@btTDrXL{r{)^Хh)jiZt~K63I~\Eۃl]4|2>$DXuuulR a6M&C CE3[9 XoHpȲqŻ,hlh1Y^58J%E (JB:~YYYol)@, `IR8a2,0I֚[Rd1 $1,v [C:d{ZþN,RxfXDNGLǡzMdXr xDŖKQT){j*;.@NdFhKnp<'''?㌅\.׼NXXXXX̙3T9 NFWb!˜f3WB$aqxo{6p\=/ŢQtֈQT~rygLnI$(*%0\.x}-(d@&59[23kst $Ųt.Vq $i+s }nn;BCCN:/~}5ZNc}*J>x)xzJSb/byBRq\Ack$ vTPZp:A!j]nt<% ,} Z:AljpgٌD&n{G15Xr>w=x90C =x9w#(u֩괴?mT{Od^{lڴITŽKm܂G,-jzӦM'""W^)<'xI&رֹǏ9rΝtR||͛mֹ۠ ~oܸA;zV[PP8iرxwl+ 2-H8NII#F(++qGw[6h~;wH$QTAAĈ ŋS) %%ɓMMMϟ8C Ͽxq qk^K[8y?4>:AC@@I֭cXI@܎X`}CbwNvܩP(6l  bqk0[?,~hoo ܶmۿK.M6A=⬬UV)4w6.\ [oEm3~={[n}Yh~طo_aa!@0掇_O=֭[ů*`'Nܺuu"""|vɓleѢE[lp8R0ࣁ~֭"hڵ5~Z-x4lda~|nb#9 @!ˁ@!qX$?sB\$""vfCЎr8薀Q@"J# "hŊBF#+r@ry$IVkI" 5׿hG|K+TՌ+tz{{y<3?\ gtIrmwBrv㠐|嗴,v(JcWB_W@OM(! gJHp8$" wdA$!ΕX7@s b[ j vbq8ƤZylC =x90C =x90C =9pgX&|w}T*Ն \`qJRl)j^$IǭHz=լ-a:μ0Ź'[\)[ uY޻c \)a-477U_ٳwޱR0@ YK#0 w{%$Ij4+j0J8`0Xt [+1znqs~ K-@kvaV;,wok֬YdɟW^yeΝk׮Y,ҥK/v;v /??׮] ZfԩSo^?|M$QVVVTTt ~';w<~C}rsssrr|#Gn޼$?رc0C ^Nܠ '_$?W1}˨7sν{555iӧO/ZFFFs\ ҹs{1yG9wٳG8qƍW\9k֬{| xPך#DiݶxIȟۖ_͛|l`0=vttt:ƔjH$TIFF۷333?䓹s,=2PuW)E3hQ=sdx)SLZzۗ,YG]x$I 0C Z[Važr=+Un#G.]ZXX8bĈw}wҥŋG=tܭ[IgyfΜ9=ó-Z4{lӦ{3g-e`v%p_ $UAn>WqxQ!ȕHۀ F;..ĉj[lٲeƍk֬YfФI&MDs0a}0p:h#vǥ뮈 m qccIBqvձNII1z޽&y=0Ǿ+G>@R<3bccRgr %K6-̣Щ݆7v9- iIk^R>{^hXk{vϏ[zuGGGCCUHdYY֭[u/z|`7mpjyjaGđVn{aZ&{ bv!0>[E"@XԘN\\UTTy6o 0AFtaglܮ}lK$BflJdޔ`@Q`K7\-l׬NH _8oGEb-Il饟nq\G X)Qߟ^C(3-ts ~z>B2E?}vvQ~ٳg[J\\\V\\^gĈ#F|`Lbj\.c6DH66 Ȝf#A6 |% B9<"bq)Np!:] =B^88c ?=QiIbK/t^+w\jSRM ~ʠx5z5[SI02Xl~O6Vƾ.<UUU/RSS_;wͭ *++۱c:7U PQ09V֥*SD켔-xSԫeP \9 ))Rb'У,lha@= 3g;v޽{WZW_%$$l߾}uVXa[[~=cKcƒF.].?$w'GߺW' '?.<:) AZvm[x# ~qE얒F0ݻw?cgg_|l29_-􀱄hF7Zv~ih'*Ĵ07>Mz}Э =butOÆ2@ګ.7LJ`?{+2uTWVV֦M\hgQФCY_vUJ,I|*5?JJ+Ј@X `xpJ z"oTh.6)Ɍ7?g4)_η~{ѢEFrL_'x¹V1|GOnO?4W_S"-nCPAYEe=n1pGfƘh>:gN}ms6i o\!΂~GaÆ={՝={*q\&0 A>4;dEkmf/-*{5(vaRF9([lܸQRq8{7hmm}ꩧX,VNNNBB޽{lD"󫩩yW߳gaf,wŋ͛w?\rZܭo q#i|_T^o#yRYi2i;1oܪk:|@Xz饗z=}jgZY!| R>n6ʜ3\.s(& W]:a(a4 3:Ճ}qǏ*--L IDATlܸ'%%ռcǎ;.Rid'IvHq#e]P+ s=B1ߠH}4:Ձ~ӱ8;uxNbSP0ih'>ŋSSSinܸ1o޼7x#..N,fHFz Ϝ9S$X'ON:G;;;]cgҡ%J ḏ$$1! /$$X0Z"OV~8Apgpvʓrʔ)Sgݺu ¨_nիׯ_? /]gܐ 1w"G e@H@HdbG0[ 54MI s鼛$;#8{+mTU᱖*85L+<3###ϟ?O+<~8Ǐ?h,|7m@9y7| J@v'L$ %~<VY51uI8[Gh0/nF{8[kY nXX%r9><Į^DnI~)С2mIbzK?B%d̨\ A4ɵ:x%I Oݡu.^5ں{nӒ?O]`G^|ٳg~5kDGȊvE^j |> \oPm% T2;$5^ftnQT)B cLt[7b($ -!+O8n9:KR)'Ĝ'(,Υn2A2hIp:8(KX ڤ t\\6mPܘ; o߾wޕ+WX},ϡq[ȰTt Gs @1"(UyOv$+G 1\L̷q|Zh@~K5m[8&{ /8D\TTTTT4dȐ+W0t١S(yN<.mpNWgBa֦ *]0?>JQyOBBfu[D,?W6@0ŋfeeK\cjHdmO[MסڬI H=* `5/F-űl""...εx;8\ +uFD4J`'i/\^ 3jIRył\vn7u!H+uRBCxAq_yqJJāVhOu=e8^`fBdp\c2,))iرAAŸC*ON2,AUS=d[Yg56t{ jz9!> f:=:5Vnb@C[Q\s뭷z-itȻ9!fY0[[;&;1ې:5S?>r\D:l-tfDIІ6z2Ǐ/))ٵk׈#x<nԈ L+XܤH^ з{xYa;M2:s+)$nmxo޼vjkki=SsYr%wٲe=믿N$h0_Y,믿zj:8oݺuڵ9i˗/_rʕ+Y~85m=g;k2;`)}hCݣw4huDWNoʇ1hqvãxx<>|xذaǎ/))ٿff#G:::_N;go<- O:ejzꎎ9sED |Go/\eSh [J2_ԦݣbxDW2h6]>sg`i}?EH^C[46n8k֬+G~{̙SN}GT*U{{>8a„e˖Q\\p‡zhϞ=׿ S7&M4i͛7cbbCCCiG9Л&6t +**222FiMMM;v0OZg48i6_(h<mbdl?ҡc)X, m+nnqU#qƝ={6''G}lٲ/JңGpB*a.jk?D5z7o>(ӎi'?3gzwżRRX9|>(00pذaO?JᖕMBNǰ;J13@HnEݑHRyxpE]ew-I0zJmfNctyM)JVщ-JJqV҈%lNcߝN&zb0=HZ%2/uӱ^`0 :v 󺻻 Ί^3_k0n<?n7~hnn?~|EEų>zO?gܫϯP(6lذy9s{wN@odee?ð]v1ns.O njCO: ,:N8Aej  1@Kz= 0yV$~k뜈!%@ 21rݢh3)J.˘͹p.(NL:FݬN&p8Bmi9Sr\6 KEM[_"bn]nQ(uwhD2UtQ3m:V61|r||<ϿzjRRR~~)S.^a$njsҥc^|^Jюi'=Gƿα+VLNN...NKK+..6fٳ'33sذaɿgΜ1O;fsѱX,A9]a-L*Po4by[PSֱ.7zhl1lpPAqE/auu Z5nth$5 nا$Pk6^`CnxD"|7x'|??kז/_裏FEEo[p1H~^x'LHHHKK[jO?}vL_(jzO;jӧOϘ1C&>|qב;v兆FEE:u_6-6m'|Q8k-H%R˩tZ6P;9&9L* `-m6+)F+f*G֭3g;ɓƏW^i9tt?lZ%~ժUw֯_?w\s% ))))))°un'N6sH]()qk\q ?<.vؔwxXשׂ6>h[DO^BWbg|s(ep6lHttT*uX}K[[[~{(H^5 !Mϴoaaݼ-@sQ#UYOBC,`H˯ee9k!vbɎb$NxsV `XTTiDAwQUپg1R22@3(2(B@۠t(n{zG{Ђ^vFq@UI0*<<:H UR*Tjg^gXxd+W._|ƍ(+V? HRi>v th^[bߛEXH_8^{Frs5~lIYʻ@ ^NdƲij8]Bˉ..;ߢ(aÆ͛7OY!?-n{$*4:xz ^ j"bsy@y[?|47KDP:!bs\;H(Y5/utwG_oPdz3z#<2քv*Q߁@znSxiUU﮷{'媫pL>VTH(~ ,w?9/lgF-7$e!vk3ee,(+:kV܋H:;KO=ک %Ww+DkaSxp;N3[zlQiv%@:g`wT B4Hq]Ų DZn^K9K |ejn֣9Q[^N#څE2~w=r2Hۄ3.]r\qqq{w#55rKVYY7z#|ȫ=\{AsSb{^,V_7rhP= Ia?tYRx 눴I$Er R`׿u߾}/7os00Oe0y(ĝ#eUGY@IXAP4FTƤmcc63'-$M8\.-%)M-靆 Z5N inn:\xD^?p-[jo'O.]4z#@\(QyۖJGΞ?05MX 6"*_ Q?=cLJ9HFMY c*_,Ə (G\p@JAAUzEqGUTs\F ztPe&S!iZ1Ud0,h0K^2f̖Tt.4T;~1sc$b]`™C5edrb$1K< HEEŦM]=~ta>֬YSQQﯮްaCFFWodϞ=˖-xMܮ O<";EiJ }z; ʀ!bSYb4œL_}d竫 mew1A~$ӿ\ּĊ.o͚N2&ο6'~d ~ٺu+C||~Mz#D{ iSD|e Vtz((Å8~r)ΏMI_nu业nP!ţ_ / B3/}e菝$+<2F@c|O yvN0sȏ5=7 /f>3}k+snӟj$&)nXl=dzzzG !te*ere\XZ$%H2F5z˱j:{z<)#~ȷvbYfǡ;Q8Cd|j Ȃ=;YW*zڟIY Xc*TT%} 5]i/l~Ɯ^Y&i޽{j~'䜭aUk}J }+DV"`ŕs˖yFK b׌ u?aY:}ÛcO0糋g_ϊ#@ipGq>Co|Rԟ9Nj[oaI^ B<9SnNk%y- WQQv]v{6x !G*-qriy"h\VWϛnٶkO<͌XsRle|HJgZ%Aqj%+1XMC6 o5HAcDL.#zQ;mҏ3<(2g%zLE%9B540۬8n8l39*2BN ffo*H2gh'Kڹm1SsHK'[>왓PNPƻHrzFvR@JIbdTjXxyٳ+W >tm*SEXgZB H'RGkzO\(F6olg_ɒi9ǜm9"&A +N8N7eյQB!6~c(nh ))Hllq_7.ʩvw X`Tget$BT4b b(jh0>--C&J|!Ir7J0^\&shD` U,>rt0[7?C-. $p c)R̴ ,ioWIs闻V2E^v,IH^.snrF>O?O? ػwJl[|xXmp`0d2Z >I 1&s@$,vJJ`gsI<6b--YoVu؁#1Ad#$/bHb$2o"Jyd%y$fX>f7Lb7 n2!b$[{X~k▕*ʛ׽n6 HxNIbnJDw~JPg-ٹjGl6BN+$]7ZPSSjyy@ d" t߾}vJNNNOO?|c=6Qь|6Bkkg6E]"7i IDATxJ>%CC ;;;=B$ݱq-xHu8IZ w0̯8)$]]%ȇ[<;@F8 jL@xkV[[l6g1xb$-}喗_~y˓bI^z3fXF(O^[x}@- `\%4M8h&zOB FjjjBMįQN(n 2;zpBF駟%ߤ&--_U 2"INN枳 W_FT1HLL>,o*a8p9b?M|Jl6ۛoiZ5/~ !X,W^yfi-[ؘ̋aؤHn2_zCKB̗_~+++q_|***~3g444L?vڵkٳb ެVn:u*;;{ջwt}}}>%_|EYY_z饖voeu:ݞ={^{5 -/T*]fZ^ Iqz$d˖-SwyNM?lv:ܖG!?/BIIDžHV\˜y#\@D2ZVSRRx3G)iap3I}}}IIۭVk~~X0VD]XK/$O3L|-ٳgfG -4M%55u۶mޒ>T*X%0ܺ j͚5{/ro~ |J駟j̴4!y3zB-G(L=֭I8U׿&''s5>\YYN1=nx& O^xM6Yg|G9%BQf[~ /z -{P(\jɮ#GDqjOjkkRG-ǎ>Bn~ԩJeee4M?p 1nn?M6ְº{P[%ol~e2{(..Gza[&p(PbF7غum|iu!0$ɓ'1 jV맟~p8>=gϞB7Pa3 <D]tRMMO<Xj֭[===ovFyŋWTT?]!-?^|O>$&&g uP.sn$''dMw!$tkK uc!$ڀ@"!$ jmjj tkl-nq:-v=&@&% >|حa\Q.2%Wq)A0 ~>)$111 2k4_Wn6;`d"RxJX$r4w^*skZ)XFD"Ş+Jn'[w!p($zBDp@"!:p$CmH汓o?j+ Q yuA5Vږ0tyީy]CC@ s:K\_fna+$q6Զ@!Aͬdl$\kh]۫7G{6PHH\.p-/I$K͡(sR R L$0C9B%YyBmQPJКUf^0t>AP(j i0 p8\.qB.aYQ$$Ir rEӴ͍t [%ZhZ 3tX|K% {rni8~cve,1a#xG2[ !i|<\Lm #=aB]{\:{&sHX2@3G#wO[c0:!Sq?gIi=YMtHTr/IhZZ Q ooKt?!;.ڢJ.H>$*q35yj$Ht i/P%qbyl60+;{95=C(Ʌ?5+j I(z?cf21frJhM .57:De|D5*4eS2[DAZB`ӭ] r崙8K~B}LSrG62`1T&AɈ)%?/,Άmn IaB#[D`9ڳ odfRfCYȸHeN-QW!ϡH"7ڣUoDDn": B&_4{n՚y摗0 ϤN-52knVrˡ$)hFGҼ4Z窨޹sgn,׿+Av>`l,Rɜ&T& }N^_41+VCeR8,Fe/+f$$&A^/..zpTUU 3IjǺd#KsrDtIF&uC?0)QDG-M仵'͓?_X,\"TjXFTf͚+WW_}5k֐GeNGU4M[,A ))))))#GxfCQ7E ^)U-d`lgYr6AIEm^1!D[x-J%-_/--mmmݵkJJOO߷o}fjqP F,`Ry6H0c9z)YȘ k=a۷o~s3aԹF7Z&ުpa(06JW't]$fPHˣu3g@&8h0O\Xo]ofp":lR84d1f78ى@/."MޮbP1J] ?f(^6&üKqBt7zARwm)f㿩|(Fuw)7G>~*-ޮ*DHzA8Ĭ8%O=A'&]EvϞԈDJ^3 ŋz߰tIzt lzfbcc&Ҥ0IǦȽr%IE O!O˲E5r)x;4{i%m:=5˛$#VV٣tʲpY}{pɉQ8yvs*dI˨ܢK[ !23 3xc-H.4:q$9*%QhjKeфs{ 9Inĸ 2vri%4Muk0˲ K0C#0}iiFn)p%!07IQz&Έ{Yv.l2 Eݿ(ꩢ(w(A怒K<=ae%aIB3ƺu2t:q=Qr)EY(.8Inj*)!T,n@?7KS$0„~$Q<ھҌx`.еN(qhH ԅ*jXtyFHF%dĎSDL"FXb 5{@ }@34H;,QWPSFaIICYmGԋ0:x>LѸ`ն&EeO[6}73#{>蠾g Ld\G"~ 3Z`]@wQ[9ŴS50h;Nǻf0CqbL X*(ɽRb6[h  >heEM=Cuǻ}vPc925tҼ_in  > bkJ`yoD0JV]YQtzF_zd>DҴzXtyj(*jqr|~b q ׄ>%JQY D࿁4M޽jfffn޼Z<䓩ziu inJ*BJ'0AoD$,^NJq+**w׷r]vڵk2FyEJhͬ ߱CQ2P-; V>'7gO(h&2>C #!dP^z>==ݳٹo߾ŋsBA$sdo =?ɥf= 2\l0 "M>n\liqbO%1H[[Hu 4SՌOIvRs F^0Dz"Rb6꒒3fOJLL>}:`Ϟ=\r`*Ɵol6{3f$Z)L"!PcIL8N'[vd.k*FCOJ$i;9Bq&H$,`겾6O7]xB^.sQnrsK.O?OݫR܆lܟ 7 L&P6~v "U*I ^mt˲vݛD.sI%K9nl3\O!IevB?Q[gQAD[8[ !2У(JAӴg#Ay/]RoȘ bg!ĺ/k3Yef uRxr8^IE"E{Z-1K!1t"9XqͅN_,qxgrtxB(= nxvE] lCA=rg[`Xwm0eHĽHSx n9*%pDޠ~3Zǰ,eFUѡ[ !wWI$c<(1tXx#G^31k @;7+SDT:sO&+n2oN:ͶYy҂ $_[@~ßu"5[Ԍ qc5yBM+ @xGy>VA #>z { xۧd[W˦," Z.Eᠳ4MGB`׺ZA [8ףʍcX >%Ec5ׂhU8s x8I`}ތV p\?33R6KH ]='&>wW ㍽*LE{},>Sh;]xG"w&|@߿̌P'&q8S)=ysZ\Kku׿0,s/gfҘR;,))uQB`'"ߌ-qlv,i쏐~s1e,p_^,B'*P ~c$Nk\Ԑz׬lx CB糑LΉW:JWj[2sňu'"+uwܟ"ʴ9r?0EݻVkff͛yy Wiod3 AvyPl* -]9WNd'wg?j7o?~ivً̪>K}H~pK}4ēo`+**w'`XMw<:y'w.@}[1'ׯuo?>/^ו"hS 'e 8[yyƒ+sAP\:ڒk ʀn P\\=Ztf຅0,bu}?@(0)YR.CZ~6i N( hu7w'(ckflEY?o2&vtlT^-nam.F tdT&-2djRo_ ~J/YӲJ B`#`f$%>@ B<[,n]B*Z,F>GvL+6,`2"bpSbP-5%`TEGhrwb̛$3A,^(gXzfk̬)/!EQ[v#~ABcɵЬs>8eۯ>mg n_J5tHIDATd?~6 gcY1gXAcm3Awf N/i!Q@I1gxy.N4gsonEr.bڬ2X ,`oX:us "\.l%WF>=C=ػwo||<_.כ6w]`0d2'LxJXD{2^\.NxvU$qnzsmjkFD"< D*%0{xb4)S8{v\L&X{ "$@|mVhZF>H/--ݷo߮]>clh4#_߿w^FDn r;Jbbb%%x(-ܞOFHl6pL&H$H>)$&Ӥ| =$aӦMgΜoƍϞ=;r$غuѣG[#Ld˖-}_o<_~xKo~С׿WLA H 0 ~$ L2mڴ +h,((PT-),,IQQQbbLNDժՑ2 ,hD p@"P ˲ovooBx9ΝX,oڴɧfVU2byWl6Vݲeϔ>YYY%t tKxf-aF_QQrnJQH$q|۶mGʊL:u˗/V"r֭[O8X__S\|YT$t:]kk' ;ŋΝ{'jd!իw0̇~Θ17x;;;7c׭[hkk+///9C !!CTN>̙3<}S{۷oذd۶mo~lt72:pJ֭[cǎ"Ry+WnٲE*^r$IOBBBuu50G}_FXb xzH(Yjٳg|Aƍ]6s;vlܸa̟?snٲeܹ/;wn(f;zp`@ zp`@ zp`@ zp?t=IENDB`cmstatr/vignettes/cmstatr_Tutorial.Rmd0000644000176200001440000003743314477217317017772 0ustar liggesusers--- title: "cmstatr Tutorial" author: "Stefan Kloppenborg" date: "1-Apr-2020" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{cmstatr Tutorial} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) # If any of the required packages are unavailable, # don't re-run the code # nolint start required <- c("dplyr", "ggplot2", "tidyr", "cmstatr", "purrr") if (!all(unlist(lapply(required, function(pkg) { requireNamespace(pkg, quietly = TRUE)} )))) { knitr::opts_chunk$set(eval = FALSE) } #nolint end ``` `cmstatr` is an R package for analyzing composite material data for use in the aerospace industry. The statistical methods are based on those published in [CMH-17-1G](https://www.cmh17.org/). This package is intended to facilitate reproducible statistical analysis of composite materials. In this tutorial, we'll explore the basic functionality of `cmstatr`. Before we can actually use the package, we'll need to load it. We'll also load the `dplyr` package, which we'll talk about shortly. There are also a few other packages that we'll load. These could all be loaded by loading the `tidyverse` package instead. ```{r message=FALSE} library(cmstatr) library(dplyr) library(ggplot2) library(tidyr) library(purrr) ``` # Input Data `cmstatr` is built with the assumption that the data is in (so called) [tidy data](http://vita.had.co.nz/papers/tidy-data.html) format. This means that the data is in a data frame and that each observation (i.e. test result) has its own row and that each variable has its own column. Included in this package is a sample composite material data set (this data set is fictional: don't use it for anything other than learning this package). The data set `carbon.fabric.2` has the expected format. We'll just show the first 10 rows of the data for now. ```{r} carbon.fabric.2 %>% head(10) ``` If your data set is not yet in this type of format (note: that the column names *do not* need to match the column names in the example), there are many ways to get it into this format. One of the easier ways of doing so is to use the [`tidyr`](https://tidyr.tidyverse.org/) package. The use of this package is outside the scope of this vignette. # Working With Data Throughout this vignette, we will be using some of the `tidyverse` tools for working with data. There are several ways to work with data in R, but in the opinion of the author of this vignette, the `tidyverse` provides the easiest way to do so. As such, this is the approach used in this vignette. Feel free to use whichever approach works best for you. # Normalizing Data to Cured Ply Thickness Very often, you'll want to normalize as-measured strength data to a nominal cured ply thickness for fiber-dominated properties. Very often, this will reduce the apparent variance in the data. The `normalize_ply_thickness` function can be used to normalize strength or modulus data to a certain cured ply thickness. This function takes three arguments: the value to normalize (i.e.. strength or modulus), the measured thickness and the nominal thickness. In our case, the nominal cured ply thickness of the material is $0.0079$. We can then normalize the warp-tension and fill-compression data as follows: ```{r} norm_data <- carbon.fabric.2 %>% filter(test == "WT" | test == "FC") %>% mutate(strength.norm = normalize_ply_thickness(strength, thickness / nplies, 0.0079)) norm_data %>% head(10) ``` # Calculating Single-Point Basis Value The simplest thing that you will likely do is to calculate a basis value based of a set of numbers that you consider as unstructured data. An example of this would be calculating the B-Basis of the `RTD` warp tension (`WT`) data. There are a number of diagnostic tests that we should run before actually calculating a B-Basis value. We'll talk about those later, but for now, let's just get right to checking how the data are distributed and calculating the B-Basis. We'll use an Anderson--Darling test to check if the data are normally distributed. The `cmstatr` package provides the function `anderson_darling_normal` and related functions for other distributions. We can run an Anderson--Darling test for normality on the warp tension RTD data as follows. We'll perform this test on the normalized strength. ```{r} norm_data %>% filter(test == "WT" & condition == "RTD") %>% anderson_darling_normal(strength.norm) ``` ```{r include=FALSE} # Verify that the AD test always provides the same conclusion # If this assertion fails, the Vignette needs to be re-written if (0.05 >= (norm_data %>% filter(test == "WT" & condition == "RTD") %>% anderson_darling_normal(strength.norm))$osl) { stop("Unexpected vale for Anderson-Darling test") } ``` Now that we know that this data follows a normal distribution (since the observed significance level (OSL) of the Anderson--Darling test is greater than $0.05$), we can proceed to calculate a basis value based based on the assumption of normally distributed data. The `cmstatr` package provides the function `basis_normal` as well as related functions for other distributions. By default, the B-Basis value is calculated, but other population proportions and confidence bounds can be specified (for example, specify `p = 0.99, conf = 0.99` for A-Basis). ```{r} norm_data %>% filter(test == "WT" & condition == "RTD") %>% basis_normal(strength.norm) ``` We see that the calculated B-Basis is $129.96$. We also see two messages issued by the `cmstatr` package. These messages relate to the automated diagnostic tests performed by the basis calculation functions. In this case we see messages that two of the diagnostic tests were not performed because we didn't specify the batch of each observation. The batch is not required for calculating single-point basis values, but it is required for performing batch-to-batch variability and within-batch outlier diagnostic tests. The `basis_normal` function performs the following diagnostic tests by default: - Within batch outliers using `maximum_normed_residual()` - Between batch variability using `ad_ksample()` - Outliers using `maximum_normed_residual()` - Normality of data using `anderson_darling_normal()` There are two ways that we can deal with the two messages that we see. We can pass in a column that specifies the batch for each observation, or we can override those two diagnostic tests so that `cmstatr` doesn't run them. To override the two diagnostic tests, we set the argument `override` to a list of the names of the diagnostic tests that we want to skip. The names of the diagnostic tests that were not run are shown between back-ticks (\`) in the message. Our call to `basis_normal()` would be updated as follows: ```{r} norm_data %>% filter(test == "WT" & condition == "RTD") %>% basis_normal(strength.norm, override = c("outliers_within_batch", "between_batch_variability")) ``` Obviously, you should be cautious about overriding the diagnostic tests. There are certainly times when it is appropriate to do so, but sound engineering judgment is required. The better approach would be to specify the batch. This can be done as follows: ```{r} norm_data %>% filter(test == "WT" & condition == "RTD") %>% basis_normal(strength.norm, batch) ``` Now that batch is specified, we see that one of the diagnostic tests actually fails: the Anderson--Darling k-Sample test shows that the batches are not drawn from the same (unspecified) distribution. We can run this diagnostic test directly to investigate further: ```{r} norm_data %>% filter(test == "WT" & condition == "RTD") %>% ad_ksample(strength.norm, batch) ``` For the Anderson--Darling k-Sample test, $\alpha=0.025$ is normally used. In this case the p-value is $p=0.0026$, so it is no where near $\alpha$ (note the number of decimal places). We can plot the distribution of this data and make a judgment call about whether to continue. ```{r} norm_data %>% filter(test == "WT" & condition == "RTD") %>% group_by(batch) %>% ggplot(aes(x = strength.norm, color = batch)) + stat_normal_surv_func() + stat_esf() + ggtitle("Distribution of Data For Each Batch") ``` We can also run the other diagnostic test by themselves. These are described in more detail in the following sections. # Calculating Basis Values by Pooling Across Environments In this section, we'll use the fill-compression data from the `carbon.fabric.2` data set. ## Checking for Outliers After checking that there are a sufficient number of conditions, batches and specimens and that the failure modes are consistent, we would normally check if there are outliers within each batch and condition. The maximum normed residual test can be used for this. The `cmstatr` package provides the function `maximum_normed_residual` to do this. First, we'll group the data by condition and batch, then run the test on each group. The `maximum_normed_residual` function returns an object that contains a number of values. We'll create a `data.frame` that contains those values. In order to do this, we need to use the `nest` function from the `tidyr` package. This is explained in detail [here](https://tidyr.tidyverse.org/articles/nest.html). Basically, `nest` allows a column of `list`s or a column of `data.frame`s to be added to a `data.frame`. Once nested, we can use the `glance` method to unpack the values returned by `maximum_normed_residual` into a one-row `data.frame`, and then use `unnest` to flatten this into a single `data.frame`. ```{r} norm_data %>% filter(test == "FC") %>% group_by(condition, batch) %>% nest() %>% mutate(mnr = map(data, ~maximum_normed_residual(data = .x, x = strength.norm)), tidied = map(mnr, glance)) %>% select(-c(mnr, data)) %>% # remove unneeded columns unnest(tidied) ``` ```{r include=FALSE} if ((norm_data %>% filter(test == "FC") %>% group_by(condition, batch) %>% summarise( n_outliers = maximum_normed_residual(x = strength.norm)$n_outliers ) %>% ungroup() %>% summarise(n_outliers = sum(n_outliers)))[[1]] != 0) { stop("Unexpected number of outliers") } ``` None of the groups have outliers, so we can continue. # Batch-to-Batch Distribution Next, we will use the Anderson--Darling k-Sample test to check that each batch comes from the same distribution within each condition. We can use the `ad_ksample` function from `cmstatr` to do so. Once again, we'll use `nest`/`unnest` and `glance` to do so. ```{r} norm_data %>% filter(test == "FC") %>% group_by(condition) %>% nest() %>% mutate(adk = map(data, ~ad_ksample(data = .x, x = strength.norm, groups = batch)), tidied = map(adk, glance)) %>% select(-c(data, adk)) %>% # remove unneeded columns unnest(tidied) ``` ```{r include=FALSE} if (!all(!(norm_data %>% filter(test == "FC") %>% group_by(condition) %>% summarise(different_dist = ad_ksample(x = strength.norm, groups = batch)$reject_same_dist ))$different_dist)) { stop("Unexpected ADK result") } ``` For all conditions, the Anderson--Darling k-Sample test fails to reject the hypothesis that each batch comes from the same (unspecified) distribution. We can thus proceed to pooling the data. ## Checking for Outliers Within Each Condition Just as we did when checking for outlier within each condition and each batch, we can pool all the batches (within each condition) and check for outliers within each condition. ```{r} norm_data %>% filter(test == "FC") %>% group_by(condition) %>% nest() %>% mutate(mnr = map(data, ~maximum_normed_residual(data = .x, x = strength.norm)), tidied = map(mnr, glance)) %>% select(-c(mnr, data)) %>% # remove unneeded columns unnest(tidied) ``` ```{r include=FALSE} if ((norm_data %>% filter(test == "FC") %>% group_by(condition) %>% summarise( n_outliers = maximum_normed_residual(x = strength.norm)$n_outliers ) %>% ungroup() %>% summarise(n_outliers = sum(n_outliers)))[[1]] != 0) { stop("Unexpected number of outliers") } ``` We find no outliers, so we can continue. ## Pooling Across Environments Often it is desirable to pool data across several environments. There are two methods for doing so: "pooled standard deviation" and "pooled CV" (CV is an abbreviation for Coefficient of Variation) First, we will check for equality of variance among the conditions. We will do so using Levene's test. The `cmstatr` package provides the function `levene_test` to do so. ```{r} norm_data %>% filter(test == "FC") %>% levene_test(strength.norm, condition) ``` ```{r include=FALSE} if (!(norm_data %>% filter(test == "FC") %>% levene_test(strength.norm, condition))$reject_equal_variance) { stop("Unexpected result from Levene's test") } ``` The result from Levene's test indicates that the variance for each condition is not equal. This indicates that the data cannot be pooled using the "pooled standard deviation" method. We can check if the data can be pooled using the "pooled CV" method. We'll start by normalizing the data from each group to the group's mean. The `cmstatr` package provides the function `normalize_group_mean` for this purpose. ```{r} norm_data %>% filter(test == "FC") %>% mutate( strength_norm_group = normalize_group_mean(strength.norm, condition)) %>% levene_test(strength_norm_group, condition) ``` ```{r include=FALSE} if ((norm_data %>% filter(test == "FC") %>% mutate( strength_norm_group = normalize_group_mean(strength.norm, condition)) %>% levene_test(strength_norm_group, condition))$reject_equal_variance) { stop("Unexpected value from Levene's test") } ``` The Levene's test thus shows the variances of the pooled data are equal. We can move on to performing an Anderson--Darling test for normality on the pooled data. ```{r} norm_data %>% filter(test == "FC") %>% mutate( strength_norm_group = normalize_group_mean(strength.norm, condition)) %>% anderson_darling_normal(strength_norm_group) ``` ```{r include=FALSE} if ((norm_data %>% filter(test == "FC") %>% mutate( strength_norm_group = normalize_group_mean(strength.norm, condition)) %>% anderson_darling_normal(strength_norm_group))$osl <= 0.05) { stop("Unexpected value from AD test") } ``` The Anderson--Darling test indicates that the pooled data is drawn from a normal distribution, so we can continue with calculating basis values using the "pooled CV" method. ```{r} norm_data %>% filter(test == "FC") %>% basis_pooled_cv(strength.norm, condition, batch) ``` The conditions listed in the output above are in alphabetical order. This probably isn't what you want. Instead, you probably want the conditions listed in a certain order. This can be done by ordering the data first as demonstrated below. You're probably just do this one in at the start of your analysis. ```{r} norm_data %>% mutate(condition = ordered(condition, c("CTD", "RTD", "ETD", "ETW", "ETW2"))) %>% filter(test == "FC") %>% basis_pooled_cv(strength.norm, condition, batch) ``` # Equivalency Eventually, once you've finished calculating all your basis values, you'll probably want to set specification requirements or evaluate site/process equivalency. `cmstatr` has functionality to do both. Let's say that you want to develop specification limits for fill compression that you're going to put in your material specification. You can do this as follows: ```{r} carbon.fabric.2 %>% filter(test == "FC" & condition == "RTD") %>% equiv_mean_extremum(strength, n_sample = 5, alpha = 0.01) ``` If you're determining equivalency limits for modulus, a different approach is generally used so that bilateral limits are set. `cmstatr` can do this as well, using the function `equiv_change_mean`. cmstatr/vignettes/hk_ext.Rmd0000644000176200001440000004647414125365540015711 0ustar liggesusers--- title: "Extended Hanson-Koopmans" author: "Stefan Kloppenborg" date: "2021-09-30" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Extended Hanson-Koopmans} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} csl: ieee.csl references: - id: Hanson1964 type: article author: - given: D. L. family: Hanson - given: L. H. family: Koopmans title: Tolerance Limits for the Class of Distributions with Increasing Hazard Rates container-title: The Annals of Mathematical Statistics volume: "35" issue: "4" page: 1561-1570 issued: year: "1964" DOI: 10.1214/aoms/1177700380 - id: Vangel1994 type: article author: - given: Mark family: Vangel title: One-Sided Nonparametric Tolerance Limits container-title: Communications in Statistics - Simulation and Computation volume: "23" issue: "4" page: 1137-1154 issued: year: "1994" DOI: 10.1080/03610919408813222 - id: Harter1961 type: article author: - given: H. Leon family: Harter title: Expected values of normal order statistics container-title: Biometrika volume: "48" issue: 1/2 page: 151-165 issued: year: "1961" DOI: https://doi.org/10.2307/2333139 - id: CMH-17-1G type: report number: CMH-17-1G title: Composite Materials Handbook, Volume 1. Polymer Matrix Composites Guideline for Characterization of Structural Materials publisher: SAE International issued: year: "2012" month: "03" --- In this vignette, we'll use the following packages: ```r library(dplyr) library(ggplot2) library(purrr) library(tidyr) ``` The Extended Hanson--Koopmans method is a nonparametric method of determining tolerance limits (such as A- or B-Basis values). This method does not assume any particular distribution, but does require that $-\log\left(F\right)$ is convex, where $F$ is the cumulative distribution function (CDF) of the distribution. The functions `kh_ext_z`, `hk_ext_z_j_opt` and `basis_kh_ext` in `cmstatr` are based on the Extended Hanson--Koopmans method, developed by Vangel [@Vangel1994]. This is an extension of the method published in [@Hanson1964]. Tolerance limits (Basis values) calculated using the Extended Hanson--Koopmans method are calculated based on two order statistics [^1], $i$ and $j$, and a factor, $z$. The function `hk_ext_z_j_opt` and the function `basis_kh_ext` (with `method = "optimum-order"`) set the first of these order statistics to the first (lowest) order statistic, and a second order statistic determined by minimizing the following function: $$ \left| z E\left(X_{\left(1\right)}\right) + \left(1 - z\right) E\left(X_{\left(j\right)}\right) - \Phi\left(p\right)\right| $$ Where $E\left(X_{(i)}\right)$ is the expected value of the $i$`th` order statistic for a sample drawn from the standard normal distribution, and $\Phi\left(p\right)$ is the CDF of a standard normal distribution for the content of the tolerance limit (i.e. $p=0.9$ for B-Basis). [^1]: The $i$`th` order statistic is the $i$`th` lowest value in the sample. The value of $z$ is calculated based on the sample size, $n$, the two order statistics $i$ and $j$, the content $p$ and the confidence. The calculation is performed using the method in [@Vangel1994] and implemented in `kh_ext_z`. The value of $j$ is very sensitive to the way that the expected value of the order statistics is calculated, and may be sensitive to numerical precision. In version 0.8.0 of `cmstatr` and prior, the expected value of an order statistic for a sample drawn from a standard normal distribution was determined in a crude way. After version 0.8.0, the method in [@Harter1961] is used. These method produce different values of $j$ for certain sample sizes. Additionally, a table of $j$ and $z$ values for various sample sizes is published in CMH-17-1G[^2] [@CMH-17-1G]. This table gives slightly different values of $j$ for some sample sizes. [^2]: Note that CMH-17-1G uses the symbols $r$ and $k$ instead of $j$ and $z$. The values of $j$ and $z$ produced by `cmstatr` in version 0.8.0 and before, the values produced after version 0.8.0 and the value published in CMH-17-1G are shown below. All of these values are for B-Basis (90% content, 95% confidence). ```r factors <- tribble( ~n, ~j_pre_080, ~z_pre_080, ~j_post_080, ~z_post_080, ~j_cmh, ~z_cmh, 2, 2, 35.1768141883907, 2, 35.1768141883907, 2, 35.177, 3, 3, 7.85866787768029, 3, 7.85866787768029, 3, 7.859, 4, 4, 4.50522447199018, 4, 4.50522447199018, 4, 4.505, 5, 4, 4.10074820079326, 4, 4.10074820079326, 4, 4.101, 6, 5, 3.06444416024793, 5, 3.06444416024793, 5, 3.064, 7, 5, 2.85751000593839, 5, 2.85751000593839, 5, 2.858, 8, 6, 2.38240998122575, 6, 2.38240998122575, 6, 2.382, 9, 6, 2.25292053841772, 6, 2.25292053841772, 6, 2.253, 10, 7, 1.98762060673102, 6, 2.13665759924781, 6, 2.137, 11, 7, 1.89699586212496, 7, 1.89699586212496, 7, 1.897, 12, 7, 1.81410756892749, 7, 1.81410756892749, 7, 1.814, 13, 8, 1.66223343216608, 7, 1.73773765993598, 7, 1.738, 14, 8, 1.59916281901889, 8, 1.59916281901889, 8, 1.599, 15, 8, 1.54040000806181, 8, 1.54040000806181, 8, 1.54, 16, 9, 1.44512878109546, 8, 1.48539432060546, 8, 1.485, 17, 9, 1.39799975474842, 9, 1.39799975474842, 8, 1.434, 18, 9, 1.35353033609361, 9, 1.35353033609361, 9, 1.354, 19, 10, 1.28991705486727, 9, 1.31146980117942, 9, 1.311, 20, 10, 1.25290765871981, 9, 1.27163203813793, 10, 1.253, 21, 10, 1.21771654027026, 10, 1.21771654027026, 10, 1.218, 22, 11, 1.17330587650406, 10, 1.18418267046374, 10, 1.184, 23, 11, 1.14324511741536, 10, 1.15218647199938, 11, 1.143, 24, 11, 1.11442082880151, 10, 1.12153586685854, 11, 1.114, 25, 11, 1.08682185727661, 11, 1.08682185727661, 11, 1.087, 26, 11, 1.06032912052507, 11, 1.06032912052507, 11, 1.06, 27, 12, 1.03307994274081, 11, 1.03485308510789, 11, 1.035, 28, 12, 1.00982188136729, 11, 1.01034609051393, 12, 1.01 ) ``` For the sample sizes where $j$ is the same for each approach, the values of $z$ are also equal within a small tolerance. ```r factors %>% filter(j_pre_080 == j_post_080 & j_pre_080 == j_cmh) #> # A tibble: 16 × 7 #> n j_pre_080 z_pre_080 j_post_080 z_post_080 j_cmh z_cmh #> #> 1 2 2 35.2 2 35.2 2 35.2 #> 2 3 3 7.86 3 7.86 3 7.86 #> 3 4 4 4.51 4 4.51 4 4.50 #> 4 5 4 4.10 4 4.10 4 4.10 #> 5 6 5 3.06 5 3.06 5 3.06 #> 6 7 5 2.86 5 2.86 5 2.86 #> 7 8 6 2.38 6 2.38 6 2.38 #> 8 9 6 2.25 6 2.25 6 2.25 #> 9 11 7 1.90 7 1.90 7 1.90 #> 10 12 7 1.81 7 1.81 7 1.81 #> 11 14 8 1.60 8 1.60 8 1.60 #> 12 15 8 1.54 8 1.54 8 1.54 #> 13 18 9 1.35 9 1.35 9 1.35 #> 14 21 10 1.22 10 1.22 10 1.22 #> 15 25 11 1.09 11 1.09 11 1.09 #> 16 26 11 1.06 11 1.06 11 1.06 ``` The sample sizes where the value of $j$ differs are as follows: ```r factor_diff <- factors %>% filter(j_pre_080 != j_post_080 | j_pre_080 != j_cmh | j_post_080 != j_cmh) factor_diff #> # A tibble: 11 × 7 #> n j_pre_080 z_pre_080 j_post_080 z_post_080 j_cmh z_cmh #> #> 1 10 7 1.99 6 2.14 6 2.14 #> 2 13 8 1.66 7 1.74 7 1.74 #> 3 16 9 1.45 8 1.49 8 1.48 #> 4 17 9 1.40 9 1.40 8 1.43 #> 5 19 10 1.29 9 1.31 9 1.31 #> 6 20 10 1.25 9 1.27 10 1.25 #> 7 22 11 1.17 10 1.18 10 1.18 #> 8 23 11 1.14 10 1.15 11 1.14 #> 9 24 11 1.11 10 1.12 11 1.11 #> 10 27 12 1.03 11 1.03 11 1.03 #> 11 28 12 1.01 11 1.01 12 1.01 ``` While there are differences in the three implementations, it's not clear how much these differences will matter in terms of the tolerance limits calculated. This can be investigated through simulation. # Simulation with Normally Distributed Data First, we'll generate a large number (10,000) of samples of sample size $n$ from a normal distribution. Since we're generating the samples, we know the true population parameters, so can calculate the true population quantiles. We'll use the three sets of $j$ and $z$ values to compute tolerance limits and compared those tolerance limits to the population quantiles. The proportion of the calculated tolerance limits below the population quantiles should be equal to the selected confidence. We'll restrict the simulation study to the sample sizes where the values of $j$ and $z$ differ in the three implementations of this method, and we'll consider B-Basis (90% content, 95% confidence). ```r mu_normal <- 100 sd_normal <- 6 set.seed(1234567) # make this reproducible tolerance_limit <- function(x, j, z) { x[j] * (x[1] / x[j]) ^ z } sim_normal <- pmap_dfr(factor_diff, function(n, j_pre_080, z_pre_080, j_post_080, z_post_080, j_cmh, z_cmh) { map_dfr(1:10000, function(i_sim) { x <- sort(rnorm(n, mu_normal, sd_normal)) tibble( n = n, b_pre_080 = tolerance_limit(x, j_pre_080, z_pre_080), b_post_080 = tolerance_limit(x, j_post_080, z_post_080), b_cmh = tolerance_limit(x, j_cmh, z_cmh), x = list(x) ) } ) }) sim_normal #> # A tibble: 110,000 × 5 #> n b_pre_080 b_post_080 b_cmh x #> #> 1 10 78.4 77.7 77.7 #> 2 10 82.8 82.0 82.0 #> 3 10 83.3 83.0 83.0 #> 4 10 78.4 77.2 77.2 #> 5 10 87.3 86.6 86.6 #> 6 10 92.3 93.2 93.2 #> 7 10 75.2 77.9 77.9 #> 8 10 75.4 73.9 73.9 #> 9 10 75.5 75.1 75.1 #> 10 10 76.4 78.4 78.4 #> # … with 109,990 more rows ``` One can see that the tolerance limits calculated with each set of factors for (most) data sets is different. However, this does not necessarily mean that any set of factors is more or less correct. The distribution of the tolerance limits for each sample size is as follows: ```r sim_normal %>% pivot_longer(cols = b_pre_080:b_cmh, names_to = "Factors") %>% ggplot(aes(x = value, color = Factors)) + geom_density() + facet_wrap(n ~ .) + theme_bw() + ggtitle("Distribution of Tolerance Limits for Various Values of n") ``` ![plot of chunk distribution-normal](distribution-normal-1.png) For all samples sizes, the distribution of tolerance limits is actually very similar between all three sets of factors. The true population quantile can be calculated as follows: ```r x_p_normal <- qnorm(0.9, mu_normal, sd_normal, lower.tail = FALSE) x_p_normal #> [1] 92.31069 ``` The proportion of calculated tolerance limit values that are below the population quantile can be calculated as follows. We see that the in all cases the tolerance limits are all conservative, and also that each set of factors produce similar levels of conservatism. ```r sim_normal %>% mutate(below_pre_080 = b_pre_080 < x_p_normal, below_post_080 = b_post_080 < x_p_normal, below_cmh = b_cmh < x_p_normal) %>% group_by(n) %>% summarise( prop_below_pre_080 = sum(below_pre_080) / n(), prop_below_post_080 = sum(below_post_080) / n(), prop_below_cmh = sum(below_cmh) / n() ) #> # A tibble: 11 × 4 #> n prop_below_pre_080 prop_below_post_080 prop_below_cmh #> #> 1 10 0.984 0.980 0.980 #> 2 13 0.979 0.975 0.975 #> 3 16 0.969 0.967 0.967 #> 4 17 0.973 0.973 0.971 #> 5 19 0.962 0.961 0.961 #> 6 20 0.964 0.962 0.964 #> 7 22 0.961 0.960 0.960 #> 8 23 0.960 0.959 0.960 #> 9 24 0.962 0.961 0.962 #> 10 27 0.954 0.953 0.954 #> 11 28 0.952 0.952 0.952 ``` # Simulation with Weibull Data Next, we'll do a similar simulation using data drawn from a Weibull distribution. Again, we'll generate 10,000 samples for each sample size. ```r shape_weibull <- 60 scale_weibull <- 100 set.seed(234568) # make this reproducible sim_weibull <- pmap_dfr(factor_diff, function(n, j_pre_080, z_pre_080, j_post_080, z_post_080, j_cmh, z_cmh) { map_dfr(1:10000, function(i_sim) { x <- sort(rweibull(n, shape_weibull, scale_weibull)) tibble( n = n, b_pre_080 = tolerance_limit(x, j_pre_080, z_pre_080), b_post_080 = tolerance_limit(x, j_post_080, z_post_080), b_cmh = tolerance_limit(x, j_cmh, z_cmh), x = list(x) ) } ) }) sim_weibull #> # A tibble: 110,000 × 5 #> n b_pre_080 b_post_080 b_cmh x #> #> 1 10 95.3 95.1 95.1 #> 2 10 88.5 88.3 88.3 #> 3 10 89.7 89.3 89.3 #> 4 10 94.7 94.4 94.4 #> 5 10 96.9 96.9 96.9 #> 6 10 93.6 93.2 93.2 #> 7 10 86.1 85.5 85.5 #> 8 10 91.9 91.9 91.9 #> 9 10 93.7 93.4 93.4 #> 10 10 90.9 90.4 90.4 #> # … with 109,990 more rows ``` The distribution of the tolerance limits for each sample size is as follows. Once again, we see that the distribution of tolerance limits is nearly identical when each of the three sets of factors are used. ```r sim_weibull %>% pivot_longer(cols = b_pre_080:b_cmh, names_to = "Factors") %>% ggplot(aes(x = value, color = Factors)) + geom_density() + facet_wrap(n ~ .) + theme_bw() + ggtitle("Distribution of Tolerance Limits for Various Values of n") ``` ![plot of chunk distribution-Weibull](distribution-Weibull-1.png) The true population quantile can be calculated as follows: ```r x_p_weibull <- qweibull(0.9, shape_weibull, scale_weibull, lower.tail = FALSE) x_p_weibull #> [1] 96.31885 ``` The proportion of calculated tolerance limit values that are below the population quantile can be calculated as follows. We see that the in all roughly 95% or more of the tolerance limits calculated for each sample is below the population quantile. We also see very similar proportions for each of the three sets of factors considered. ```r sim_weibull %>% mutate(below_pre_080 = b_pre_080 < x_p_weibull, below_post_080 = b_post_080 < x_p_weibull, below_cmh = b_cmh < x_p_weibull) %>% group_by(n) %>% summarise( prop_below_pre_080 = sum(below_pre_080) / n(), prop_below_post_080 = sum(below_post_080) / n(), prop_below_cmh = sum(below_cmh) / n() ) #> # A tibble: 11 × 4 #> n prop_below_pre_080 prop_below_post_080 prop_below_cmh #> #> 1 10 0.97 0.965 0.965 #> 2 13 0.966 0.964 0.964 #> 3 16 0.959 0.959 0.959 #> 4 17 0.961 0.961 0.96 #> 5 19 0.957 0.956 0.956 #> 6 20 0.955 0.954 0.955 #> 7 22 0.953 0.952 0.952 #> 8 23 0.950 0.950 0.950 #> 9 24 0.953 0.953 0.953 #> 10 27 0.952 0.951 0.951 #> 11 28 0.950 0.950 0.950 ``` # Conclusion The values of $j$ and $z$ computed by the `kh_Ext_z_j_opt` function differs for certain samples sizes ($n$) before and after version 0.8.0. Furthermore, for certain sample sizes, these values differ from those published in CMH-17-1G. The simulation study presented in this vignette shows that the tolerance limit (Basis value) might differ for any individual sample based on which set of $j$ and $z$ are used. However, each set of factors produces tolerance limit factors that are either correct or conservative. These three methods have very similar performance, and tolerance limits produced with any of these three methods are equally valid. # Session Info This vignette is computed in advance. A system with the following configuration was used: ```r sessionInfo() #> R version 4.1.1 (2021-08-10) #> Platform: x86_64-pc-linux-gnu (64-bit) #> Running under: Ubuntu 20.04.3 LTS #> #> Matrix products: default #> BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0 #> LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0 #> #> locale: #> [1] LC_CTYPE=en_CA.UTF-8 LC_NUMERIC=C LC_TIME=en_CA.UTF-8 LC_COLLATE=en_CA.UTF-8 #> [5] LC_MONETARY=en_CA.UTF-8 LC_MESSAGES=en_CA.UTF-8 LC_PAPER=en_CA.UTF-8 LC_NAME=C #> [9] LC_ADDRESS=C LC_TELEPHONE=C LC_MEASUREMENT=en_CA.UTF-8 LC_IDENTIFICATION=C #> #> attached base packages: #> [1] stats graphics grDevices utils datasets methods base #> #> other attached packages: #> [1] tidyr_1.1.4 purrr_0.3.4 ggplot2_3.3.5 dplyr_1.0.7 #> #> loaded via a namespace (and not attached): #> [1] tidyselect_1.1.1 xfun_0.26 remotes_2.4.0 colorspace_2.0-2 vctrs_0.3.8 generics_0.1.0 #> [7] testthat_3.0.4 htmltools_0.5.2 usethis_2.0.1 yaml_2.2.1 utf8_1.2.2 rlang_0.4.11.9001 #> [13] pkgbuild_1.2.0 pillar_1.6.3 glue_1.4.2 withr_2.4.2 DBI_1.1.1 waldo_0.3.1 #> [19] cmstatr_0.9.0 sessioninfo_1.1.1 lifecycle_1.0.1 stringr_1.4.0 munsell_0.5.0 gtable_0.3.0 #> [25] devtools_2.4.2 memoise_2.0.0 evaluate_0.14 labeling_0.4.2 knitr_1.35 callr_3.7.0 #> [31] fastmap_1.1.0 ps_1.6.0 curl_4.3.1 fansi_0.5.0 highr_0.9 scales_1.1.1 #> [37] kSamples_1.2-9 cachem_1.0.6 desc_1.4.0 pkgload_1.2.2 farver_2.1.0 fs_1.5.0 #> [43] digest_0.6.28 stringi_1.7.4 processx_3.5.2 SuppDists_1.1-9.5 rprojroot_2.0.2 grid_4.1.1 #> [49] cli_3.0.1 tools_4.1.1 magrittr_2.0.1 tibble_3.1.4 crayon_1.4.1 pkgconfig_2.0.3 #> [55] MASS_7.3-54 ellipsis_0.3.2 prettyunits_1.1.1 assertthat_0.2.1 rmarkdown_2.11 rstudioapi_0.13 #> [61] R6_2.5.1 compiler_4.1.1 ``` # References cmstatr/vignettes/cmstatr_Graphing.Rmd0000644000176200001440000002653514477217317017727 0ustar liggesusers--- title: "Plotting Composite Material Data" author: "Ally Fraser" date: "2-May-2020" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Plotting Composite Material Data} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 6 ) # If any of the required packages are unavailable, # don't re-run the code # nolint start required <- c("dplyr", "ggplot2", "tidyr", "cmstatr") if (!all(unlist(lapply(required, function(pkg) { requireNamespace(pkg, quietly = TRUE)} )))) { knitr::opts_chunk$set(eval = FALSE) } # nolint end ``` This vignette demonstrates how to create some of the graphs commonly used when analyzing composite material data. Here, we rely on the [`ggplot2`](https://ggplot2.tidyverse.org/) package for graphing. This package can be loaded either on its own, or through the `tidyverse` meta-package, which also includes packages such as `dplyr` that we will also use. We'll need to load a few packages in order to proceed. ```{r message=FALSE} library(dplyr) library(ggplot2) library(tidyr) library(cmstatr) ``` Throughout this vignette, we'll use one of the example data sets that comes with `cmstatr` and we'll focus on the warp-tension data as an example. We'll load the example data in a variable as follows. By default the condition will be in an arbitrary order, but throughout the visualization, we'll want the conditions shown in a particular order (from coldest and driest to hottest and wettest). We can define the order of the conditions using the `ordered` function. For brevity, only the first few rows of the data set are displayed below. ```{r} dat <- carbon.fabric.2 %>% filter(test == "WT") %>% mutate(condition = ordered(condition, c("CTD", "RTD", "ETW", "ETW2"))) dat %>% head(10) ``` We'll then calculate the B-Basis value using the pooling by standard deviation method. This data set happens to fail some of the diagnostic tests, but for the purpose of this example, we'll ignore those failures using the `override` argument. ```{r} b_basis_pooled <- dat %>% basis_pooled_cv(strength, condition, batch, override = c("between_group_variability", "normalized_variance_equal")) b_basis_pooled ``` The object returned from `basis_pooled_cv` contains a number of values. One value is a `data.frame` containing the groups (i.e. conditions) and the corresponding basis values. This looks like the following. We'll use this in the visualizations. ```{r} b_basis_pooled$basis ``` # Batch Plots Batch plots are used to identify differences between batches. Simple batch plots can be created using box plots and adding horizontal lines for the basis values as follows. Note that the heavy line in the box of the box plot is the *median*, not the mean. The two hinges correspond with the first and third quantiles and the whiskers extend to the most extreme data point, or 1.5 times the inner quantile range. In the code below, we use the function `rename` to rename the column `group` to `condition`. The `data.frame` produced by `basis_pooled_cv` uses the columns `value` and `group`, but to match the data, we need the column with the conditions to be named `condition`. ```{r} dat %>% ggplot(aes(x = batch, y = strength)) + geom_boxplot() + geom_jitter(width = 0.25) + geom_hline(aes(yintercept = value), data = b_basis_pooled$basis %>% rename(condition = group), color = "blue") + facet_grid(. ~ condition) + theme_bw() + ggtitle("Batch Plot") ``` # Quantile Plots A quantile plot provides a graphical summary of sample values. This plot displays the sample values and the corresponding quantile. A quantile plot can be used to examine the symmetry and tail sizes of the underlying distribution. Sharp rises may indicate the presence of outliers. ```{r} dat %>% ggplot(aes(x = strength, color = condition)) + stat_ecdf(geom = "point") + coord_flip() + theme_bw() + ggtitle("Quantile Plot") ``` # Normal Survival Function Plots An empirical survival function, and the corresponding normal survival function can be plotted using two `ggplot` "stat" functions provided by `cmstatr`. In the example below, the empirical survival function is plotted for each condition, and the survival function for a normal distribution with the mean and variance from the data is also plotted (the survival function is 1 minus the cumulative distribution function). This type of plot can be used to identify how closely the data follows a normal distribution, and also to compare the distributions of the various conditions. ```{r} dat %>% ggplot(aes(x = strength, color = condition)) + stat_normal_surv_func() + stat_esf() + theme_bw() + ggtitle("Normal Survival Function Plot") ``` # Normal Score Plots The normal scores plot calculates the normal score and plots it against the normal score. Normal plots are useful to investigate distributions of the data. ```{r} dat %>% group_by(condition) %>% mutate(norm.score = scale(strength)) %>% ggplot(aes(x = norm.score, y = strength, colour = condition)) + geom_point() + ggtitle("Normal Scores Plot") + theme_bw() ``` # Q-Q Plots A Q-Q plot compares the data against the theoretical quantiles for a particular distribution. A line is also plotted showing the normal distribution with mean and variance from the data. If the data exactly followed a normal distribution, all points would fall on this line. ```{r} dat %>% ggplot(aes(sample = strength, colour = condition)) + geom_qq() + geom_qq_line() + ggtitle("Q-Q Plot") + theme_bw() ``` # Property Plots Property plots allow for a variety of properties for a group to be compared to other properties within the same group, as well as to other group properties. The properties included in this plot are A-Basis, B-Basis, Pooled A- and B-Basis, Pooled Modified CV (Coefficient of Variation) A- and B-Basis, Mean, and Min for each group. The property plots will take a bit of work to construct. First, the distribution of each group must be determined. Once the distribution has been determined, the proper basis calculation based on that distribution should be filled in below. We also have a column in the tables below for extra arguments to pass to the `basis` function, such as overrides required or the method for the `basis_hk_ext` function to use. ```{r} b_basis_fcn <- tribble( ~condition, ~fcn, ~args, "CTD", "basis_normal", list(override = c("between_batch_variability")), "RTD", "basis_normal", list(override = c("between_batch_variability")), "ETW", "basis_hk_ext", NULL, "ETW2", "basis_normal", list(override = c("between_batch_variability")) ) a_basis_fcn <- tribble( ~condition, ~fcn, ~args, "CTD", "basis_normal", list(override = c("between_batch_variability")), "RTD", "basis_normal", list(override = c("between_batch_variability")), "ETW", "basis_hk_ext", list(method = "woodward-frawley"), "ETW2", "basis_normal", list(override = c("between_batch_variability")) ) ``` We'll write a function that takes the data and information about the distribution and computes the single-point basis value. We'll use this function for both A- and B-Basis, so we'll add a parameter for the probability (0.90 or 0.99). ```{r} single_point_fcn <- function(group_x, group_batch, cond, basis_fcn, p) { fcn <- basis_fcn$fcn[basis_fcn$condition == cond[1]] extra_args <- basis_fcn$args[basis_fcn$condition == cond[1]] args <- c( list(x = group_x, batch = group_batch, p = p), unlist(extra_args)) basis <- do.call(fcn, args) basis$basis } single_point_results <- dat %>% group_by(condition) %>% summarise(single_point_b_basis = single_point_fcn( strength, batch, condition, b_basis_fcn, 0.90), single_point_a_basis = single_point_fcn( strength, batch, condition, a_basis_fcn, 0.99), minimum = min(strength), mean = mean(strength)) %>% mutate(condition = ordered(condition, c("CTD", "RTD", "ETW", "ETW2"))) single_point_results ``` In the above code, we also ensure that the condition column is still in the order we expect. We've already computed the B-Basis of the data using a pooling method. We'll do the same for A-Basis: ```{r} a_basis_pooled <- dat %>% basis_pooled_cv(strength, condition, batch, p = 0.99, override = c("between_group_variability", "normalized_variance_equal")) a_basis_pooled ``` As we saw before, the returned object has a property called `basis`, which is a `data.frame` for the pooling methods. ```{r} a_basis_pooled$basis ``` We can take this `data.frame` and change the column names to suit our needs. ```{r} a_basis_pooled$basis %>% rename(condition = group, b_basis_pooled = value) ``` We can combine all these steps into one statement. We'll also ensure that the conditions are listed in the order we want. ```{r} a_basis_pooled_results <- a_basis_pooled$basis %>% rename(condition = group, a_basis_pooled = value) %>% mutate(condition = ordered(condition, c("CTD", "RTD", "ETW", "ETW2"))) a_basis_pooled_results ``` And the same thing for B-Basis: ```{r} b_basis_pooled_results <- b_basis_pooled$basis %>% rename(condition = group, b_basis_pooled = value) %>% mutate(condition = ordered(condition, c("CTD", "RTD", "ETW", "ETW2"))) b_basis_pooled_results ``` We can use the function `inner_join` from the `dplyr` package to combine the three sets of computational results. Each row for each condition will be concatenated. ```{r} single_point_results %>% inner_join(b_basis_pooled_results, by = "condition") %>% inner_join(a_basis_pooled_results, by = "condition") ``` To use this table in the plot we're trying to construct, we want to "lengthen" the table as follows. ```{r} single_point_results %>% inner_join(b_basis_pooled_results, by = "condition") %>% inner_join(a_basis_pooled_results, by = "condition") %>% pivot_longer(cols = single_point_b_basis:a_basis_pooled) ``` We can now make a plot based on this: ```{r} single_point_results %>% inner_join(b_basis_pooled_results, by = "condition") %>% inner_join(a_basis_pooled_results, by = "condition") %>% pivot_longer(cols = single_point_b_basis:a_basis_pooled) %>% ggplot(aes(x = condition, y = value)) + geom_boxplot(aes(y = strength), data = dat) + geom_point(aes(shape = name, color = name)) + ggtitle("Property Graph") + theme_bw() ``` # Nested Data Plots `cmstatr` contains the function `nested_data_plot`. This function creates a plot showing the sources of variation. In the following example, the data is grouped according to the variables in the `group` argument. The data is first grouped according to `batch`, then according to `panel`. The labels located according to the data points that fall under them. By default, the mean is used, but that `stat` argument can be used to locate the labels according to `median` or some other statistic. ```{r} carbon.fabric.2 %>% mutate(panel = as.character(panel)) %>% filter(test == "WT") %>% nested_data_plot(strength, groups = c(batch, panel)) ``` Optionally, `fill` or `color` can be set as follows: ```{r} carbon.fabric.2 %>% mutate(panel = as.character(panel)) %>% filter(test == "WT" & condition == "RTD") %>% nested_data_plot(strength, groups = c(batch, panel), fill = batch, color = panel) ``` cmstatr/R/0000755000176200001440000000000014517621570012140 5ustar liggesuserscmstatr/R/basis.R0000644000176200001440000015333714573746241013405 0ustar liggesusers#' Calculate k factor for basis values (\eqn{kB}, \eqn{kA}) with normal #' distribution #' #' @description #' The factors returned by this function are used when calculating basis #' values (one-sided confidence bounds) when the data are normally #' distributed. The basis value will #' be equal to \eqn{\bar{x} - k s}{x_bar - k s}, #' where \eqn{\bar{x}}{x_bar} is the sample mean, #' \eqn{s} is the sample standard deviation and \eqn{k} is the result #' of this function. #' This function is internally used by [basis_normal()] when #' computing basis values. #' #' @param n the number of observations (i.e. coupons) #' @param p the desired content of the tolerance bound. #' Should be 0.90 for B-Basis and 0.99 for A-Basis #' @param conf confidence level. Should be 0.95 for both A- and B-Basis #' #' @details #' This function calculates the k factors used when determining A- and #' B-Basis values for normally distributed data. To get \eqn{kB}, set #' the content of the tolerance bound to `p = 0.90` and #' the confidence level to `conf = 0.95`. To get \eqn{kA}, set #' `p = 0.99` and `conf = 0.95`. While other tolerance bound #' contents and confidence levels may be computed, they are infrequently #' needed in practice. #' #' The k-factor is calculated using equation 2.2.3 of #' Krishnamoorthy and Mathew (2008). #' #' This function has been validated against the \eqn{kB} tables in #' CMH-17-1G for each value of \eqn{n} from \eqn{n = 2} to \eqn{n = 95}. #' It has been validated against the \eqn{kA} tables in CMH-17-1G for each #' value of \eqn{n} from \eqn{n = 2} to \eqn{n = 75}. Larger values of \eqn{n} #' also match the tables in CMH-17-1G, but R #' emits warnings that "full precision may not have been achieved." When #' validating the results of this function against the tables in CMH-17-1G, #' the maximum allowable difference between the two is 0.002. The tables in #' CMH-17-1G give values to three decimal places. #' #' For more information about tolerance bounds in general, see #' Meeker, et. al. (2017). #' #' @return the calculated factor #' #' @references #' K. Krishnamoorthy and T. Mathew, Statistical Tolerance Regions: Theory, #' Applications, and Computation. Hoboken: John Wiley & Sons, 2008. #' #' W. Meeker, G. Hahn, and L. Escobar, Statistical Intervals: A Guide #' for Practitioners and Researchers, Second Edition. #' Hoboken: John Wiley & Sons, 2017. #' #' “Composite Materials Handbook, Volume 1. Polymer Matrix Composites #' Guideline for Characterization of Structural Materials,” SAE International, #' CMH-17-1G, Mar. 2012. #' #' @seealso #' [basis_normal()] #' #' @examples #' kb <- k_factor_normal(n = 10, p = 0.9, conf = 0.95) #' print(kb) #' #' ## [1] 2.35464 #' #' # This can be used to caclulate the B-Basis if #' # the sample mean and sample standard deviation #' # is known, and data is assumed to be normally #' # distributed #' #' sample_mean <- 90 #' sample_sd <- 5.2 #' print("B-Basis:") #' print(sample_mean - sample_sd * kb) #' #' ## [1] B-Basis: #' ## [1] 77.75587 #' #' @importFrom stats qnorm qt #' #' @export k_factor_normal <- function(n, p = 0.90, conf = 0.95) { z <- qnorm(p) suppressWarnings( t <- qt(conf, df = n - 1, ncp = z * sqrt(n)) ) return(t / sqrt(n)) } #' Calculate basis values #' #' @description #' Calculate the basis value for a given data set. There are various functions #' to calculate the basis values for different distributions. #' The basis value is the lower one-sided tolerance bound of a certain #' proportion of the population. For more information on tolerance bounds, #' see Meeker, et. al. (2017). #' For B-Basis, set the content of tolerance bound to \eqn{p=0.90} and #' the confidence level to \eqn{conf=0.95}; for A-Basis, set \eqn{p=0.99} and #' \eqn{conf=0.95}. While other tolerance bound #' contents and confidence levels may be computed, they are infrequently #' needed in practice. #' #' These functions also perform some automated diagnostic #' tests of the data prior to calculating the basis values. These diagnostic #' tests can be overridden if needed. #' #' @param data a data.frame #' @param x the variable in the data.frame for which to find the basis value #' @param batch the variable in the data.frame that contains the batches. #' @param groups the variable in the data.frame representing the groups #' @param p the content of the tolerance bound. Should be 0.90 for B-Basis #' and 0.99 for A-Basis #' @param conf confidence level Should be 0.95 for both A- and B-Basis #' @param override a list of names of diagnostic tests to override, #' if desired. Specifying "all" will override all diagnostic #' tests applicable to the current method. #' @param modcv a logical value indicating whether the modified CV approach #' should be used. Only applicable to pooling methods. #' @param method the method for Hanson--Koopmans nonparametric basis values. #' should be "optimum-order" for B-Basis and "woodward-frawley" #' for A-Basis. #' #' @details #' `data` is an optional argument. If `data` is given, it should #' be a #' `data.frame` (or similar object). When `data` is specified, the #' value of `x` is expected to be a variable within `data`. If #' `data` is not specified, `x` must be a vector. #' #' When `modcv=TRUE` is set, which is only applicable to the #' pooling methods, #' the data is first modified according to the modified coefficient #' of variation (CV) #' rules. This modified data is then used when both calculating the #' basis values and #' also when performing the diagnostic tests. The modified CV approach #' is a way of #' adding extra variance to datasets with unexpectedly low variance. #' #' `basis_normal` calculate the basis value by subtracting \eqn{k} times #' the standard deviation from the mean. \eqn{k} is given by #' the function [k_factor_normal()]. The equations in #' Krishnamoorthy and Mathew (2008) are used. #' `basis_normal` also #' performs a diagnostic test for outliers (using #' [maximum_normed_residual()]) #' and a diagnostic test for normality (using #' [anderson_darling_normal()]). #' If the argument `batch` is given, this function also performs #' a diagnostic test for outliers within #' each batch (using [maximum_normed_residual()]) #' and a diagnostic test for between batch variability (using #' [ad_ksample()]). The argument `batch` is only used #' for these diagnostic tests. #' #' `basis_lognormal` calculates the basis value in the same way #' that `basis_normal` does, except that the natural logarithm of the #' data is taken. #' #' `basis_lognormal` function also performs #' a diagnostic test for outliers (using #' [maximum_normed_residual()]) #' and a diagnostic test for normality (using #' [anderson_darling_lognormal()]). #' If the argument `batch` is given, this function also performs #' a diagnostic test for outliers within #' each batch (using [maximum_normed_residual()]) #' and a diagnostic test for between batch variability (using #' [ad_ksample()]). The argument `batch` is only used #' for these diagnostic tests. #' #' `basis_weibull` calculates the basis value for data distributed #' according to a Weibull distribution. The confidence level for the #' content requested is calculated using the conditional method, as #' described in Lawless (1982) Section 4.1.2b. This has good agreement #' with tables published in CMH-17-1G. Results differ between this function #' and STAT17 by approximately 0.5\%. #' #' `basis_weibull` function also performs #' a diagnostic test for outliers (using #' [maximum_normed_residual()]) #' and a diagnostic test for normality (using #' [anderson_darling_weibull()]). #' If the argument `batch` is given, this function also performs #' a diagnostic test for outliers within #' each batch (using [maximum_normed_residual()]) #' and a diagnostic test for between batch variability (using #' [ad_ksample()]). The argument `batch` is only used #' for these diagnostic tests. #' #' `basis_hk_ext` calculates the basis value using the Extended #' Hanson--Koopmans method, as described in CMH-17-1G and Vangel (1994). #' For nonparametric distributions, this function should be used for samples #' up to n=28 for B-Basis and up to \eqn{n=299} for A-Basis. #' This method uses a pair of order statistics to determine the basis value. #' CMH-17-1G suggests that for A-Basis, the first and last order statistic #' is used: this is called the "woodward-frawley" method in this package, #' after the paper in which this approach is described (as referenced #' by Vangel (1994)). For B-Basis, another approach is used whereby the #' first and `j-th` order statistic are used to calculate the basis value. #' In this approach, the `j-th` order statistic is selected to minimize #' the difference between the tolerance limit (assuming that the order #' statistics are equal to the expected values from a standard normal #' distribution) and the population quantile for a standard normal #' distribution. This approach is described in Vangel (1994). This second #' method (for use when calculating B-Basis values) is called #' "optimum-order" in this package. #' The results of `basis_hk_ext` have been #' verified against example results from the program STAT-17. Agreement is #' typically well within 0.2%. #' #' Note that the implementation of `hk_ext_z_j_opt` changed after `cmstatr` #' version 0.8.0. This function is used internally by `basis_hk_ext` #' when `method = "optimum-order"`. This implementation change may mean #' that basis values computed using this method may change slightly #' after version 0.8.0. However, both implementations seem to be equally #' valid. See the included vignette #' for a discussion of the differences between the implementation before #' and after version 0.8.0, as well as the factors given in CMH-17-1G. #' To access this vignette, run: `vignette("hk_ext", package = "cmstatr")` #' #' `basis_hk_ext` also performs #' a diagnostic test for outliers (using #' [maximum_normed_residual()]) #' and performs a pair of tests that the sample size and method selected #' follow the guidance described above. #' If the argument `batch` is given, this function also performs #' a diagnostic test for outliers within #' each batch (using [maximum_normed_residual()]) #' and a diagnostic test for between batch variability (using #' [ad_ksample()]). The argument `batch` is only used #' for these diagnostic tests. #' #' `basis_nonpara_large_sample` calculates the basis value #' using the large sample method described in CMH-17-1G. This method uses #' a sum of binomials to determine the rank of the ordered statistic #' corresponding with the desired tolerance limit (basis value). Results #' of this function have been verified against results of the STAT-17 #' program. #' #' `basis_nonpara_large_sample` also performs #' a diagnostic test for outliers (using #' [maximum_normed_residual()]) #' and performs a test that the sample size is sufficiently large. #' If the argument `batch` is given, this function also performs #' a diagnostic test for outliers within #' each batch (using [maximum_normed_residual()]) #' and a diagnostic test for between batch variability (using #' [ad_ksample()]). The argument `batch` is only used #' for these diagnostic tests. #' #' `basis_anova` calculates basis values using the ANOVA method. #' `x` specifies the data (normally strength) and `groups` #' indicates the group corresponding to each observation. This method is #' described in CMH-17-1G, but when the ratio of between-batch mean #' square to the within-batch mean square is less than or equal #' to one, the tolerance factor is calculated based on pooling the data #' from all groups. This approach is recommended by Vangel (1992) #' and by Krishnamoorthy and Mathew (2008), and is also implemented #' by the software CMH17-STATS and STAT-17. #' This function automatically performs a diagnostic #' test for outliers within each group #' (using [maximum_normed_residual()]) and a test for between #' group variability (using [ad_ksample()]) as well as checking #' that the data contains at least 5 groups. #' This function has been verified against the results of the STAT-17 program. #' #' `basis_pooled_sd` calculates basis values by pooling the data from #' several groups together. `x` specifies the data (normally strength) #' and `group` indicates the group corresponding to each observation. #' This method is described in CMH-17-1G and matches the pooling method #' implemented in ASAP 2008. #' #' `basis_pooled_cv` calculates basis values by pooling the data from #' several groups together. `x` specifies the data (normally strength) #' and `group` indicates the group corresponding to each observation. #' This method is described in CMH-17-1G. #' #' `basis_pooled_sd` and `basis_pooled_cv` both automatically #' perform a number of diagnostic tests. Using #' [maximum_normed_residual()], they check that there are no #' outliers within each group and batch (provided that `batch` is #' specified). They check the between batch variability using #' [ad_ksample()]. They check that there are no outliers within #' each group (pooling all batches) using #' [maximum_normed_residual()]. They check for the normality #' of the pooled data using [anderson_darling_normal()]. #' `basis_pooled_sd` checks for equality of variance of all #' data using [levene_test()] and `basis_pooled_cv` #' checks for equality of variances of all data after transforming it #' using [normalize_group_mean()] #' using [levene_test()]. #' #' The object returned by these functions includes the named vector #' `diagnostic_results`. This contains all of the diagnostic tests #' performed. The name of each element of the vector corresponds with the #' name of the diagnostic test. The contents of each element will be #' "P" if the diagnostic test passed, "F" if the diagnostic test failed, #' "O" if the diagnostic test was overridden and `NA` if the #' diagnostic test was skipped (typically because an optional #' argument was not supplied). #' #' The following list summarizes the diagnostic tests automatically #' performed by each function. #' #' - `basis_normal` #' * `outliers_within_batch` #' * `between_batch_variability` #' * `outliers` #' * `anderson_darling_normal` #' - `basis_lognormal` #' * `outliers_within_batch` #' * `between_batch_variability` #' * `outliers` #' * `anderson_darling_lognormal` #' - `basis_weibull` #' * `outliers_within_batch` #' * `between_batch_variability` #' * `outliers` #' * `anderson_darling_weibull` #' - `basis_pooled_cv` #' * `outliers_within_batch` #' * `between_group_variability` #' * `outliers_within_group` #' * `pooled_data_normal` #' * `normalized_variance_equal` #' - `basis_pooled_sd` #' * `outliers_within_batch` #' * `between_group_variability` #' * `outliers_within_group` #' * `pooled_data_normal` #' * `pooled_variance_equal` #' - `basis_hk_ext` #' * `outliers_within_batch` #' * `between_batch_variability` #' * `outliers` #' * `sample_size` #' - `basis_nonpara_large_sample` #' * `outliers_within_batch` #' * `between_batch_variability` #' * `outliers` #' * `sample_size` #' - `basis_anova` #' * `outliers_within_group` #' * `equality_of_variance` #' * `number_of_groups` #' #' @return an object of class `basis` #' This object has the following fields: #' - `call` the expression used to call this function #' - `distribution` the distribution used (normal, etc.) #' - `p` the value of \eqn{p} supplied #' - `conf` the value of \eqn{conf} supplied #' - `modcv` a logical value indicating whether the modified #' CV approach was used. Only applicable to pooling methods. #' - `data` a copy of the data used in the calculation #' - `groups` a copy of the groups variable. #' Only used for pooling and ANOVA methods. #' - `batch` a copy of the batch data used for diagnostic tests #' - `modcv_transformed_data` the data after the modified CV transformation #' - `override` a vector of the names of diagnostic tests that #' were overridden. `NULL` if none were overridden #' - `diagnostic_results` a named character vector containing the #' results of all the diagnostic tests. See the Details section for #' additional information #' - `diagnostic_failures` a vector containing any diagnostic tests #' that produced failures #' - `n` the number of observations #' - `r` the number of groups, if a pooling method was used. #' Otherwise it is NULL. #' - `basis` the basis value computed. This is a number #' except when pooling methods are used, in which case it is a data.frame. #' #' @seealso [hk_ext_z_j_opt()] #' @seealso [k_factor_normal()] #' @seealso [transform_mod_cv()] #' @seealso [maximum_normed_residual()] #' @seealso [anderson_darling_normal()] #' @seealso [anderson_darling_lognormal()] #' @seealso [anderson_darling_weibull()] #' @seealso [ad_ksample()] #' @seealso [normalize_group_mean()] #' #' @references #' J. F. Lawless, Statistical Models and Methods for Lifetime Data. #' New York: John Wiley & Sons, 1982. #' #' “Composite Materials Handbook, Volume 1. Polymer Matrix Composites #' Guideline for Characterization of Structural Materials,” SAE International, #' CMH-17-1G, Mar. 2012. #' #' M. Vangel, “One-Sided Nonparametric Tolerance Limits,” #' Communications in Statistics - Simulation and Computation, #' vol. 23, no. 4. pp. 1137–1154, 1994. #' #' K. Krishnamoorthy and T. Mathew, Statistical Tolerance Regions: Theory, #' Applications, and Computation. Hoboken: John Wiley & Sons, 2008. #' #' W. Meeker, G. Hahn, and L. Escobar, Statistical Intervals: A Guide #' for Practitioners and Researchers, Second Edition. #' Hoboken: John Wiley & Sons, 2017. #' #' M. Vangel, “New Methods for One-Sided Tolerance Limits for a One-Way #' Balanced Random-Effects ANOVA Model,” Technometrics, vol. 34, no. 2. #' Taylor & Francis, pp. 176–185, 1992. #' #' @examples #' library(dplyr) #' #' # A single-point basis value can be calculated as follows #' # in this example, three failed diagnostic tests are #' # overridden. #' #' carbon.fabric %>% #' filter(test == "FC") %>% #' filter(condition == "RTD") %>% #' basis_normal(strength, batch, #' override = c("outliers", #' "outliers_within_batch", #' "anderson_darling_normal")) #' #' ## Call: #' ## basis_normal(data = ., x = strength, batch = batch, #' ## override = c("outliers", "outliers_within_batch", #' ## "anderson_darling_normal")) #' ## #' ## Distribution: Normal ( n = 18 ) #' ## The following diagnostic tests were overridden: #' ## `outliers`, #' ## `outliers_within_batch`, #' ## `anderson_darling_normal` #' ## B-Basis: ( p = 0.9 , conf = 0.95 ) #' ## 76.94656 #' #' # A set of pooled basis values can also be calculated #' # using the pooled standard deviation method, as follows. #' # In this example, one failed diagnostic test is overridden. #' carbon.fabric %>% #' filter(test == "WT") %>% #' basis_pooled_sd(strength, condition, batch, #' override = c("outliers_within_batch")) #' #' ## Call: #' ## basis_pooled_sd(data = ., x = strength, groups = condition, #' ## batch = batch, override = c("outliers_within_batch")) #' ## #' ## Distribution: Normal - Pooled Standard Deviation ( n = 54, r = 3 ) #' ## The following diagnostic tests were overridden: #' ## `outliers_within_batch` #' ## B-Basis: ( p = 0.9 , conf = 0.95 ) #' ## CTD 127.6914 #' ## ETW 125.0698 #' ## RTD 132.1457 #' #' @name basis NULL new_basis <- function( call, distribution, modcv, p, conf, override, data, groups, batch ) { res <- list() class(res) <- "basis" res$call <- call res$distribution <- distribution res$modcv <- modcv res$p <- p res$conf <- conf res$data <- data res$groups <- groups res$batch <- batch res$modcv_transformed_data <- NA res$override <- override res$diagnostic_results <- character(0L) res$diagnostic_failures <- character(0L) res$n <- length(res$data) res$r <- NA if (!is.null(groups) && !all(is.na(groups))) { res$r <- length(levels(as.factor(groups))) } res$basis <- NA return(res) } #' Glance at a basis object #' #' @description #' Glance accepts an object of type basis and returns a #' [tibble::tibble()] with #' one row of summaries for each basis value. #' #' Glance does not do any calculations: it just gathers the results in a #' tibble. #' #' @param x a basis object #' @param include_diagnostics a logical value indicating whether to include #' columns for diagnostic tests. Default FALSE. #' @param ... Additional arguments. Not used. Included only to match generic #' signature. #' #' #' @return #' A [tibble::tibble()] with the following #' columns: #' #' - `p` the the content of the tolerance bound. Normally 0.90 or 0.99 #' - `conf` the confidence level. Normally 0.95 #' - `distribution` a string representing the distribution assumed #' when calculating the basis value #' - `modcv` a logical value indicating whether the modified #' CV approach was used. Only applicable to pooling methods. #' - `n` the sample size #' - `r` the number of groups used in the calculation. This will #' be `NA` for single-point basis values #' - `basis` the basis value #' #' @details #' For the pooled basis methods (`basis_pooled_cv` and #' `basis_pooled_sd`), the [tibble::tibble()] #' returned by `glance` will have one row for each group included in #' the pooling. For all other basis methods, the resulting `tibble` #' will have a single row. #' #' If `include_diagnostics=TRUE`, there will be additional columns #' corresponding with the diagnostic tests performed. These column(s) will #' be of type character and will contain a "P" if the diagnostic test #' passed, a "F" if the diagnostic test failed, an "O" if the diagnostic #' test was overridden or `NA` if the test was not run (typically #' because an optional argument was not passed to the function that #' computed the basis value). #' #' #' @seealso #' [basis()] #' #' @examples #' set.seed(10) #' x <- rnorm(20, 100, 5) #' b <- basis_normal(x = x) #' glance(b) #' #' ## # A tibble: 1 x 7 #' ## p conf distribution modcv n r basis #' ## #' ## 1 0.9 0.95 Normal FALSE 20 NA 92.0 #' #' #' glance(b, include_diagnostics = TRUE) #' #' ## # A tibble: 1 x 11 #' ## p conf distribution modcv n r basis outliers_within… #' ## #' ## 1 0.9 0.95 Normal FALSE 20 NA 92.0 NA #' ## # … with 3 more variables: between_batch_variability , #' ## # outliers , anderson_darling_normal #' #' @method glance basis #' @importFrom tibble tibble #' #' @export glance.basis <- function(x, include_diagnostics = FALSE, ...) { # nolint res <- tibble::tibble( p = x$p, conf = x$conf, distribution = x$distribution, modcv = x$modcv, n = x$n, r = x$r, basis = x$basis ) if (include_diagnostics) { for (dn in names(x$diagnostic_results)) { res[[dn]] <- x$diagnostic_results[[dn]] } } res } format_diagnostic_helper <- function(heading, vec) { if (!is.null(vec) && length(vec) > 0) { return(paste0( heading, "\n", " `", paste(vec, collapse = "`,\n `"), "`\n" )) } return("") } #' @export print.basis <- function(x, ...) { cat("\nCall:\n", paste(deparse(x$call), sep = "\n", collapse = "\n"), "\n\n", sep = "") cat("Distribution: ", x$distribution, "\t") cat("( n =", x$n) if (!is.null(x$r) && !all(is.na(x$r))) { cat(", r =", x$r) } cat(" )\n") if (x$modcv == TRUE) { cat("Modified CV Approach Used", "\n") } cat(format_diagnostic_helper( "The following diagnostic tests failed:", x$diagnostic_failures) ) cat(format_diagnostic_helper( "The following diagnostic tests were overridden:", x$override) ) if (x$conf == 0.95 && x$p == 0.9) { cat("B-Basis: ", " ( p =", x$p, ", conf =", x$conf, ")\n") } else if (x$conf == 0.95 && x$p == 0.99) { cat("A-Basis: ", " ( p =", x$p, ", conf =", x$conf, ")\n") } else { cat("Basis: ", " ( p =", x$p, ", conf =", x$conf, ")\n") } if (is.numeric(x$basis)) { cat(x$basis, "\n") } else if (is.data.frame(x$basis)) { col_width <- max(nchar(as.character(x$basis[["group"]]))) + 2 for (j in seq(along.with = x$basis$group)) { cat(format(x$basis[["group"]][j], width = col_width)) cat(x$basis[["value"]][j], "\n") } } else { stop("`basis` is an unexpected data type") # nocov } cat("\n") } single_point_rules <- list( outliers_within_batch = function(x, batch, ...) { group_mnr <- vapply(unique(batch), function(b) { x_group <- x[batch == b] mnr <- maximum_normed_residual(x = x_group) mnr$n_outliers == 0 }, FUN.VALUE = logical(1L)) ifelse(all(group_mnr), "", paste0("Maximum normed residual test detected ", "outliers within one or more batch")) }, between_batch_variability = function(x, batch, ...) { adk <- ad_ksample(x = x, groups = batch, alpha = 0.025) ifelse(!adk$reject_same_dist, "", paste0("Anderson-Darling k-Sample test indicates that ", "batches are drawn from different distributions")) }, outliers = function(x, ...) { mnr <- maximum_normed_residual(x = x) ifelse(mnr$n_outliers == 0, "", paste0("Maximum normed residual test detected outliers ", "within data")) } ) basis_normal_rules <- single_point_rules basis_normal_rules[["anderson_darling_normal"]] <- function(x, ...) { ad <- anderson_darling_normal(x = x) ifelse(!ad$reject_distribution, "", paste0("Anderson-Darling test rejects hypothesis that data ", "is drawn from a normal distribution")) } #' @rdname basis #' @importFrom rlang enquo eval_tidy #' @importFrom stats sd #' @export basis_normal <- function(data = NULL, x, batch = NULL, p = 0.90, conf = 0.95, override = c()) { verify_tidy_input( df = data, x = x, c = match.call(), arg_name = "x") verify_tidy_input( df = data, x = batch, c = match.call(), arg_name = "batch") override <- process_overrides(override, basis_normal_rules) res <- new_basis( call = match.call(), distribution = "Normal", modcv = FALSE, p = p, conf = conf, override = override, data = eval_tidy(enquo(x), data), groups = NA, batch = eval_tidy(enquo(batch), data) ) res$diagnostic_results <- perform_checks( basis_normal_rules, x = res$data, batch = res$batch, override = override ) res$diagnostic_failures <- get_check_failure_names(res$diagnostic_results) k <- k_factor_normal(n = res$n, p = p, conf = conf) cv <- sd(res$data) / mean(res$data) res$basis <- mean(res$data) * (1 - k * cv) return(res) } basis_lognormal_rules <- single_point_rules basis_lognormal_rules[["anderson_darling_lognormal"]] <- function(x, ...) { ad <- anderson_darling_lognormal(x = x) ifelse(!ad$reject_distribution, "", paste0("Anderson-Darling test rejects hypothesis that data ", "is drawn from a log-normal distribution")) } #' @rdname basis #' @importFrom rlang enquo eval_tidy #' @importFrom stats sd #' @export basis_lognormal <- function(data = NULL, x, batch = NULL, p = 0.90, conf = 0.95, override = c()) { verify_tidy_input( df = data, x = x, c = match.call(), arg_name = "x") verify_tidy_input( df = data, x = batch, c = match.call(), arg_name = "batch") override <- process_overrides(override, basis_lognormal_rules) res <- new_basis( call = match.call(), distribution = "Lognormal", modcv = FALSE, p = p, conf = conf, override = override, data = eval_tidy(enquo(x), data), groups = NA, batch = eval_tidy(enquo(batch), data) ) res$diagnostic_results <- perform_checks( basis_lognormal_rules, x = res$data, batch = res$batch, override = override ) res$diagnostic_failures <- get_check_failure_names(res$diagnostic_results) k <- k_factor_normal(n = res$n, p = p, conf = conf) res$basis <- exp(mean(log(res$data)) - k * sd(log(res$data))) return(res) } basis_weibull_rules <- single_point_rules basis_weibull_rules[["anderson_darling_weibull"]] <- function(x, ...) { ad <- anderson_darling_weibull(x = x) ifelse(!ad$reject_distribution, "", paste0("Anderson-Darling test rejects hypothesis that data ", "is drawn from a Weibull distribution")) } #' @rdname basis #' @importFrom rlang enquo eval_tidy #' @importFrom stats qweibull integrate pchisq uniroot #' @importFrom MASS fitdistr #' #' @export basis_weibull <- function(data = NULL, x, batch = NULL, p = 0.90, conf = 0.95, override = c()) { verify_tidy_input( df = data, x = x, c = match.call(), arg_name = "x") verify_tidy_input( df = data, x = batch, c = match.call(), arg_name = "batch") override <- process_overrides(override, basis_weibull_rules) res <- new_basis( call = match.call(), distribution = "Weibull", modcv = FALSE, p = p, conf = conf, override = override, data = eval_tidy(enquo(x), data), groups = NA, batch = eval_tidy(enquo(batch), data) ) res$diagnostic_results <- perform_checks( basis_weibull_rules, x = res$data, batch = res$batch, override = override ) res$diagnostic_failures <- get_check_failure_names(res$diagnostic_results) dist <- fitdistr(res$data, "weibull") alpha_hat <- dist$estimate[["scale"]] beta_hat <- dist$estimate[["shape"]] # The data must be transformed to fit an extreme value distribution data_evd <- log(res$data) u_hat <- log(alpha_hat) b_hat <- 1 / beta_hat # Next, find the ancillary statistic for the data a <- (data_evd - u_hat) / b_hat k_integrand <- function(z) { return( z ^ (res$n - 2) * exp((z - 1) * sum(a)) / ((1 / res$n) * sum(exp(a * z))) ^ res$n ) } k_inv <- integrate(Vectorize(k_integrand), lower = 0, upper = Inf) k <- 1 / k_inv$value incomplete_gamma <- function(ki, xi) { pchisq(xi * 2, ki * 2) } h2 <- function(z) { return( k * z ^ (res$n - 2) * exp((z - 1) * sum(a)) / ((1 / res$n) * sum(exp(a * z))) ^ res$n ) } wp <- log(-log(p)) pr_zp_integrand <- function(z, t) { h2(z) * incomplete_gamma(res$n, exp(wp + t * z) * sum(exp(a * z))) } pr_fcn <- function(t) { int_res <- integrate(Vectorize(function(z) pr_zp_integrand(z, t)), 0, Inf) return(int_res$value - conf) } res_root <- uniroot(pr_fcn, c(0, 10), extendInt = "yes") res$basis <- exp(u_hat - res_root$root * b_hat) return(res) } pooled_rules <- list( outliers_within_batch = function(x, groups, batch, ...) { group_batch_mnr <- vapply(unique(groups), function(g) { batch_mnr <- vapply(unique(batch), function(b) { x_group <- x[batch == b & groups == g] if (length(x_group) == 0) { return(TRUE) } mnr <- maximum_normed_residual(x = x_group) return(mnr$n_outliers == 0) }, FUN.VALUE = logical(1L)) all(batch_mnr) }, FUN.VALUE = logical(1L)) ifelse(all(group_batch_mnr), "", paste0("Maximum normed residual test detected ", "outliers within one or more batch and group")) }, between_group_variability = function(x_ad, groups, batch, ...) { group_adk <- vapply(unique(groups), function(g) { x_group <- x_ad[groups == g] batch_group <- batch[groups == g] adk <- ad_ksample(x = x_group, groups = batch_group) !adk$reject_same_dist }, FUN.VALUE = logical(1L)) ifelse(all(group_adk), "", paste0("Anderson-Darling k-Sample test indicates that ", "batches are drawn from different distributions")) }, outliers_within_group = function(x, groups, ...) { group_mnr <- vapply(unique(groups), function(g) { x_group <- x[groups == g] mnr <- maximum_normed_residual(x = x_group) return(mnr$n_outliers == 0) }, FUN.VALUE = logical(1L)) ifelse(all(group_mnr), "", paste0("Maximum normed residual test detected ", "outliers within one or more group")) }, pooled_data_normal = function(x_ad, groups, ...) { norm_x <- normalize_group_mean(x = x_ad, group = groups) ad <- anderson_darling_normal(x = norm_x) ifelse(!ad$reject_distribution, "", paste0("Anderson-Darling test rejects hypothesis that pooled ", "data is drawn from a normal distribution")) } ) pooled_rules_cv <- pooled_rules pooled_rules_cv[["normalized_variance_equal"]] <- function(x, groups, ...) { norm_x <- normalize_group_mean(x = x, group = groups) lev <- levene_test(x = norm_x, groups = groups) return(ifelse(!lev$reject_equal_variance, "", paste0("Levene's test rejected the hypothesis that the ", "variance of all groups are equal"))) } #' @rdname basis #' @importFrom rlang enquo eval_tidy #' @export basis_pooled_cv <- function(data = NULL, x, groups, batch = NULL, p = 0.90, conf = 0.95, modcv = FALSE, override = c()) { verify_tidy_input( df = data, x = x, c = match.call(), arg_name = "x") verify_tidy_input( df = data, x = groups, c = match.call(), arg_name = "groups") verify_tidy_input( df = data, x = batch, c = match.call(), arg_name = "batch") override <- process_overrides(override, pooled_rules_cv) res <- new_basis( call = match.call(), distribution = "Normal - Pooled CV", modcv = modcv, p = p, conf = conf, override = override, data = eval_tidy(enquo(x), data), groups = eval_tidy(enquo(groups), data), batch = eval_tidy(enquo(batch), data) ) if (modcv == TRUE) { res$modcv <- TRUE res$modcv_transformed_data <- transform_mod_cv_grouped(res$data, res$groups) data_to_use <- res$modcv_transformed_data x_ad <- transform_mod_cv_ad(res$data, res$groups, res$batch) } else { res$modcv <- FALSE data_to_use <- res$data x_ad <- data_to_use } res$diagnostic_results <- perform_checks( pooled_rules_cv, x = data_to_use, x_ad = x_ad, groups = res$groups, batch = res$batch, override = override ) res$diagnostic_failures <- get_check_failure_names(res$diagnostic_results) norm_data <- normalize_group_mean(data_to_use, res$groups) pooled_sd <- sqrt(sum((norm_data - 1) ^ 2) / (res$n - res$r)) basis <- vapply(levels(as.factor(res$groups)), function(g) { nj <- length(data_to_use[res$groups == g]) z <- qnorm(p) suppressWarnings( kj <- qt(conf, df = res$n - res$r, ncp = z * sqrt(nj)) / sqrt(nj) ) xj_bar <- mean(data_to_use[res$groups == g]) xj_bar * (1 - kj * pooled_sd) }, FUN.VALUE = numeric(1L)) res$basis <- data.frame(group = names(basis), value = basis) return(res) } pooled_rules_sd <- pooled_rules pooled_rules_sd[["pooled_variance_equal"]] <- function(x, groups, ...) { lev <- levene_test(x = x, groups = groups) return(ifelse(!lev$reject_equal_variance, "", paste0("Levene's test rejected the hypothesis that the ", "variance of all conditions are equal"))) } #' @rdname basis #' @importFrom rlang enquo eval_tidy #' @export basis_pooled_sd <- function(data = NULL, x, groups, batch = NULL, p = 0.90, conf = 0.95, modcv = FALSE, override = c()) { verify_tidy_input( df = data, x = x, c = match.call(), arg_name = "x") verify_tidy_input( df = data, x = groups, c = match.call(), arg_name = "groups") verify_tidy_input( df = data, x = batch, c = match.call(), arg_name = "batch") override <- process_overrides(override, pooled_rules_sd) res <- new_basis( call = match.call(), distribution = "Normal - Pooled Standard Deviation", modcv = modcv, p = p, conf = conf, override = override, data = eval_tidy(enquo(x), data), groups = eval_tidy(enquo(groups), data), batch = eval_tidy(enquo(batch), data) ) if (modcv == TRUE) { res$modcv <- TRUE res$modcv_transformed_data <- transform_mod_cv_grouped(res$data, res$groups) data_to_use <- res$modcv_transformed_data x_ad <- transform_mod_cv_ad(res$data, res$groups, res$batch) } else { res$modcv <- FALSE data_to_use <- res$data x_ad <- data_to_use } res$diagnostic_results <- perform_checks( pooled_rules_sd, x = data_to_use, x_ad = x_ad, groups = res$groups, batch = res$batch, override = override ) res$diagnostic_failures <- get_check_failure_names(res$diagnostic_results) pooled_sd <- sqrt( sum( vapply(levels(as.factor(res$groups)), function(g) { xj_bar <- mean(data_to_use[res$groups == g]) sum((data_to_use[res$groups == g] - xj_bar) ^ 2) }, FUN.VALUE = numeric(1L)) ) / (res$n - res$r)) basis <- vapply(levels(as.factor(res$groups)), function(g) { nj <- length(data_to_use[res$groups == g]) z <- qnorm(p) suppressWarnings( kj <- qt(conf, df = res$n - res$r, ncp = z * sqrt(nj)) / sqrt(nj) ) xj_bar <- mean(data_to_use[res$groups == g]) xj_bar - kj * pooled_sd }, FUN.VALUE = numeric(1L)) res$basis <- data.frame(group = names(basis), value = basis) return(res) } #' @importFrom stats pbeta dbeta hk_ext_h <- function(z, n, i, j, p) { if (!(1 <= i && i < j && j <= n)) { # This function is called internally, so i, j and n should always be valid stop("Error: The condition 1 <= i < j <= n must be true.") # nocov } # for z >= 1 qb <- pbeta(1 - p, j, n - j + 1) int <- integrate(function(t) { pbeta(((1 - p) / t) ^ (1 / z), i, j - i) * dbeta(t, j, n - j + 1) }, lower = 1 - p, upper = 1) if (int$message != "OK") { warning(int$message) # nocov } qb + int$value } #' Calculate values related to Extended Hanson--Koopmans tolerance bounds #' #' @description #' Calculates values related to Extended Hanson--Koopmans tolerance bounds #' as described by Vangel (1994). #' #' @param n the sample size #' @param i the first order statistic (1 <= i < j) #' @param j the second order statistic (i < j <= n) #' @param p the content of the tolerance bound (normally 0.90 or 0.99) #' @param conf the confidence level (normally 0.95) #' #' @return #' For `hk_ext_z`, the return value is a numeric value representing #' the parameter z (denoted as k in CMH-17-1G). #' #' For `hk_ext_z_j_opt`, the return value is named list containing #' `z` and `k`. The former is the value of z, as defined by #' Vangel (1994), and the latter is the corresponding order statistic. #' #' @details #' Hanson (1964) presents a nonparametric method for determining #' tolerance bounds based on consecutive order statistics. #' Vangel (1994) extends this method using non-consecutive order statistics. #' #' The extended Hanson--Koopmans method calculates a tolerance bound #' (basis value) based on two order statistics and a weighting value #' `z`. The value of `z` is based on the sample size, which #' order statistics are selected, the desired content of the tolerance #' bond and the desired confidence level. #' #' The function `hk_ext_z` calculates the weighting variable `z` #' based on selected order statistics `i` and `j`. Based on this #' value `z`, the tolerance bound can be calculated as: #' #' \deqn{S = z X_{(i)} + (1 - z) X_{(j)}}{S = z X(i) + (1 - z) X(j)} #' #' Where \eqn{X_{(i)}}{X(i)} and \eqn{X_{(j)}}{X(j)} are the `i-th` #' and `j-th` ordered observation. #' #' The function `hk_ext_z_j_opt` determines the value of `j` and #' the corresponding value of `z`, assuming `i=1`. The value #' of `j` is selected such that the computed tolerance limit is #' nearest to the desired population quantile for a standard normal #' distribution when the order statistics are equal to the expected #' value of the order statistics for the standard normal distribution. #' #' @references #' M. Vangel, “One-Sided Nonparametric Tolerance Limits,” #' Communications in Statistics - Simulation and Computation, #' vol. 23, no. 4. pp. 1137–1154, 1994. #' #' D. L. Hanson and L. H. Koopmans, #' “Tolerance Limits for the Class of Distributions with Increasing #' Hazard Rates,” The Annals of Mathematical Statistics, #' vol. 35, no. 4. pp. 1561–1570, 1964. #' #' @examples #' # The factors from Table 1 of Vangel (1994) can be recreated #' # using the hk_ext_z function. For the sample size n=21, #' # the median is the 11th ordered observation. The factor #' # required for calculating the tolerance bound with a content #' # of 0.9 and a confidence level of 0.95 based on the median #' # and first ordered observation can be calculated as follows. #' hk_ext_z(n = 21, i = 1, j = 11, p = 0.9, conf = 0.95) #' #' ## [1] 1.204806 #' #' # The hk_ext_z_j_opt function can be used to refine this value #' # of z by finding an optimum value of j, rather than simply #' # using the median. Here, we find that the optimal observation #' # to use is the 10th, not the 11th (which is the median). #' hk_ext_z_j_opt(n = 21, p = 0.9, conf = 0.95) #' #' ## $z #' ## [1] 1.217717 #' ## #' ## $j #' ## [1] 10 #' #' @seealso [basis_hk_ext()] #' #' @name hk_ext #' #' @rdname hk_ext #' @export hk_ext_z <- function(n, i, j, p, conf) { res <- uniroot( function(z) { hk_ext_h(z, n, i, j, p) - conf }, lower = 1, upper = 10, extendInt = "upX") z <- res$root z } #' @rdname hk_ext #' @export hk_ext_z_j_opt <- function(n, p, conf) { i <- 1 # i is always 1 if (n < 2) { stop("n must be >= 2") } expected_order_statistic <- function(i, n) { # ref: https://www.gwern.net/docs/statistics/order/1961-harter.pdf int <- function(x) { x * pnorm(-x) ^ (i - 1) * pnorm(x) ^ (n - i) * dnorm(x) } integral <- integrate(int, -Inf, Inf) stopifnot(integral$message == "OK") factorial(n) / (factorial(n - i) * factorial(i - 1)) * integral$value } # Try all the allowable values of j to find the value of T # that is closest to the population quantile for a standard # normal distribution j <- (i + 1):n z_vals <- vapply(j, function(ji) { hk_ext_z(n, i, ji, p, conf) }, FUN.VALUE = numeric(1L)) err_vals <- vapply(seq(along.with = j), function(index) { ji <- j[index] zi <- z_vals[index] e1 <- expected_order_statistic(i, n) e2 <- expected_order_statistic(ji, n) abs(zi * e1 + (1 - zi) * e2 - qnorm(p)) }, FUN.VALUE = numeric(1L)) list( z = z_vals[err_vals == min(err_vals)], j = j[err_vals == min(err_vals)] ) } basis_hk_ext_rules <- single_point_rules basis_hk_ext_rules[["correct_method_used"]] <- function(method, p, conf, ...) { if (p == 0.90 && conf == 0.95) { # B-Basis return(ifelse(method == "optimum-order", "", paste0("For B-Basis, the optimum order method ", "should be used"))) } else if (p == 0.99 && conf == 0.95) { # A-Basis return(ifelse(method == "woodward-frawley", "", paste0("For A-Basis, the Woodward-Frawley method ", "should be used"))) } else { return("") } } basis_hk_ext_rules[["sample_size"]] <- function(n, p, conf, ...) { if (p == 0.90 && conf == 0.95) { # B-Basis return(ifelse(n <= 28, "", paste0("For B-Basis, Hanson-Koopmans should only be ", "used for samples of 28 or fewer observations"))) } else if (p == 0.99 && conf == 0.95) { # A-Basis return(ifelse(n <= 299, "", paste0("For A-Basis, Hanson-Koopmans should only be ", "used for samples of 299 or fewer observations"))) } else { return("") } } #' @rdname basis #' @importFrom rlang enquo eval_tidy #' #' @export basis_hk_ext <- function(data = NULL, x, batch = NULL, p = 0.90, conf = 0.95, method = c("optimum-order", "woodward-frawley"), override = c()) { method <- match.arg(method) verify_tidy_input( df = data, x = x, c = match.call(), arg_name = "x") verify_tidy_input( df = data, x = batch, c = match.call(), arg_name = "batch") override <- process_overrides(override, basis_hk_ext_rules) res <- new_basis( call = match.call(), distribution = paste0( "Nonparametric (Extended Hanson-Koopmans, ", ifelse(method == "optimum-order", "optimum two-order-statistic method", "Woodward-Frawley method"), ")"), modcv = FALSE, p = p, conf = conf, override = override, data = eval_tidy(enquo(x), data), groups = NA, batch = eval_tidy(enquo(batch), data) ) res$diagnostic_results <- perform_checks( basis_hk_ext_rules, x = res$data, batch = res$batch, n = res$n, p = res$p, conf = res$conf, method = method, override = override ) res$diagnostic_failures <- get_check_failure_names(res$diagnostic_results) if (method == "optimum-order") { zj <- hk_ext_z_j_opt(res$n, p, conf) z <- zj$z j <- zj$j } else if (method == "woodward-frawley") { j <- res$n z <- hk_ext_z(res$n, 1, j, p, conf) } else { stop("Invalid value for method.") # nocov } x_ordered <- sort(res$data) res$basis <- x_ordered[j] * (x_ordered[1] / x_ordered[j]) ^ z return(res) } #' Rank for distribution-free tolerance bound #' #' @description #' Calculates the rank order for finding distribution-free tolerance #' bounds for large samples. This function should only be used for #' computing B-Basis for samples larger than 28 or A-Basis for samples #' larger than 298. This function is used by #' [basis_nonpara_large_sample()]. #' #' @param n the sample size #' @param p the desired content for the tolerance bound #' @param conf the confidence level for the desired tolerance bound #' #' @return #' The rank corresponding with the desired tolerance bound #' #' @details #' This function uses the sum of binomial terms to determine the rank #' of the ordered statistic that corresponds with the desired tolerance #' limit. This approach does not assume any particular distribution. This #' approach is described by Guenther (1969) and by CMH-17-1G. #' #' The results of this function have been verified against the tables in #' CMH-17-1G and agreement was found for all sample sizes published in #' CMH-17-1G for both A- and B-Basis, as well as the sample sizes #' `n+1` and `n-1`, where #' `n` is the sample size published in CMH-17-1G. #' #' The tables in CMH-17-1G purportedly list the smallest sample sizes #' for which a particular rank can be used. That is, for a sample size #' one less than the `n` published in the table, the next lowest rank #' would be used. In some cases, the results of this function disagree by a #' rank of one for sample sizes one less than the `n` published in the #' table. This indicates a disagreement in that sample size at which #' the rank should change. This is likely due to numerical #' differences in this function and the procedure used to generate the tables. #' However, the disagreement is limited to sample 6500 for A-Basis; no #' discrepancies have been identified for B-Basis. Since these sample sizes are #' uncommon for composite materials #' testing, and the difference between subsequent order statistics will be #' very small for samples this large, this difference will have no practical #' effect on computed tolerance bounds. #' #' @references #' W. Guenther, “Determination of Sample Size for Distribution-Free #' Tolerance Limits,” Jan. 1969. #' Available online: #' #' “Composite Materials Handbook, Volume 1. Polymer Matrix Composites #' Guideline for Characterization of Structural Materials,” SAE International, #' CMH-17-1G, Mar. 2012. #' #' @seealso [basis_nonpara_large_sample()] #' #' @examples #' nonpara_binomial_rank(n = 1693, p = 0.99, conf = 0.95) #' ## [1] 11 #' #' # The above example indicates that for a sample of 1693 observations, #' # the A-Basis is best approximated as the 11th ordered observation. #' # In the example below, the same ordered observation would also be used #' # for a sample of size 1702. #' #' nonpara_binomial_rank(n = 1702, p = 0.99, conf = 0.95) #' ## [1] 11 #' #' @export nonpara_binomial_rank <- function(n, p, conf) { p_orig <- p p <- 1 - p e_fcn <- function(r) { sum(vapply(r:n, function(w) { exp(lchoose(n, w) + w * log(p) + (n - w) * log(1 - p)) }, FUN.VALUE = numeric(1L))) } r1 <- 1 e1 <- e_fcn(r1) if (e1 < conf) { stop(paste0( "Sample size ", n, " is too small to compute a non-parametric ", "tolerance limit for p=", p_orig, " and conf=", conf)) } r2 <- n e2 <- e_fcn(r2) if (e2 > conf) { stop(paste0( "No rank found for n=", n, ", p=", p_orig, " conf=", conf)) } for (i in 1:n) { # use a for loop just to give it a limit to prevent infinite loope if (abs(r2 - r1) == 1) { break } rm <- round((r1 + r2) / 2, digits = 0) em <- e_fcn(rm) # nolint start # We know that the following holds, and we want this to continue to hold: # E1 > conf # E2 < conf # nolint end if (em > conf) { r1 <- rm e1 <- em } else { r2 <- rm e2 <- em } } r1 } nonpara_large_sample_rules <- single_point_rules nonpara_large_sample_rules[["sample_size"]] <- function(n, p, conf, ...) { if (p == 0.90 && conf == 0.95) { # B-Basis return(ifelse(n >= 28, "", paste0("This method should only be used for ", "B-Basis for sample sizes larger than 28"))) } else if (p == 0.99 && conf == 0.95) { # A-Basis return(ifelse(n >= 299, "", paste0("This method should only be used for ", "A-Basis for sample sizes larger than 299"))) } else { return(TRUE) } } #' @rdname basis #' @importFrom rlang enquo eval_tidy #' #' @export basis_nonpara_large_sample <- function(data = NULL, x, batch = NULL, p = 0.90, conf = 0.95, override = c()) { verify_tidy_input( df = data, x = x, c = match.call(), arg_name = "x") verify_tidy_input( df = data, x = batch, c = match.call(), arg_name = "batch") override <- process_overrides(override, nonpara_large_sample_rules) res <- new_basis( call = match.call(), distribution = "Nonparametric (large sample)", modcv = FALSE, p = p, conf = conf, override = override, data = eval_tidy(enquo(x), data), groups = NA, batch = eval_tidy(enquo(batch), data) ) res$diagnostic_results <- perform_checks( nonpara_large_sample_rules, x = res$data, batch = res$batch, n = res$n, p = res$p, conf = res$conf, override = override ) res$diagnostic_failures <- get_check_failure_names(res$diagnostic_results) x_ordered <- sort(res$data) r <- nonpara_binomial_rank(res$n, p, conf) res$basis <- x_ordered[r] return(res) } anova_rules <- list( outliers_within_group = function(x, groups, ...) { group_mnr <- vapply(unique(groups), function(b) { x_group <- x[groups == b] mnr <- maximum_normed_residual(x = x_group) mnr$n_outliers == 0 }, FUN.VALUE = logical(1L)) ifelse(all(group_mnr), "", paste0("Maximum normed residual test detected ", "outliers within one or more batch")) }, equality_of_variance = function(x, groups, ...) { lt <- levene_test(x = x, groups = groups) ifelse(!lt$reject_equal_variance, "", paste0("Levene's test rejected the hypothesis that the ", "variance of all groups is equal")) }, number_of_groups = function(r, ...) { ifelse(r >= 5, "", "ANOVA should only be used for 5 or more groups") } ) #' @rdname basis #' @importFrom rlang enquo eval_tidy #' @export basis_anova <- function(data = NULL, x, groups, p = 0.90, conf = 0.95, override = c()) { verify_tidy_input( df = data, x = x, c = match.call(), arg_name = "x") verify_tidy_input( df = data, x = groups, c = match.call(), arg_name = "groups") override <- process_overrides(override, anova_rules) res <- new_basis( call = match.call(), distribution = "ANOVA", modcv = FALSE, p = p, conf = conf, override = override, data = eval_tidy(enquo(x), data), groups = eval_tidy(enquo(groups), data), batch = NA ) if (res$r < 2) { stop("ANOVA cannot be computed with fewer than 2 groups") } res$diagnostic_results <- perform_checks( rules = anova_rules, x = res$data, groups = res$groups, r = res$r, override = override ) res$diagnostic_failures <- get_check_failure_names(res$diagnostic_results) grand_mean <- mean(res$data) ssb <- sum(vapply( levels(as.factor(res$groups)), function(g) { group_data <- res$data[res$groups == g] length(group_data) * mean(group_data) ^ 2 }, FUN.VALUE = numeric(1L) )) - res$n * grand_mean ^ 2 sst <- sum(vapply( res$data, function(xi) { xi ^ 2 }, FUN.VALUE = numeric(1L) )) - res$n * grand_mean ^ 2 sse <- sst - ssb msb <- ssb / (res$r - 1) mse <- sse / (res$n - res$r) n_star <- sum(vapply( levels(as.factor(res$groups)), function(g) { group_data <- res$data[res$group == g] length(group_data) ^ 2 / res$n }, FUN.VALUE = numeric(1L) )) effective_batch <- (res$n - n_star) / (res$r - 1) pop_sd <- sqrt( msb / effective_batch + (effective_batch - 1) / effective_batch * mse ) # It was found that when mse=0, the following code unnecessarily fails # The equations were rearranged to allow for mse and msb to be any value # nolint start # WAS: u <- msb / mse # WAS: w <- sqrt(u / (u + effective_batch - 1)) # nolint end w <- sqrt(msb / (msb + effective_batch * mse - mse)) k0 <- k_factor_normal(res$n, p, conf) k1 <- k_factor_normal(res$r, p, conf) tol_factor <- (k0 - k1 / sqrt(effective_batch) + (k1 - k0) * w) / (1 - 1 / sqrt(effective_batch)) if (msb <= mse) { tol_factor <- k0 } res$basis <- grand_mean - tol_factor * pop_sd return(res) } cmstatr/R/generics.R0000644000176200001440000000042314015477557014070 0ustar liggesusers#' @importFrom generics augment #' @export #' @seealso [generics::augment()] generics::augment #' @importFrom generics tidy #' @export #' @seealso [generics::tidy()] generics::tidy #' @importFrom generics glance #' @export #' @seealso [generics::glance()] generics::glance cmstatr/R/util.R0000644000176200001440000000204013703365454013236 0ustar liggesusers # A non-exported function used to format text in `print` methods. format_row <- function(content, justify, width, ...) { # content must be a list, justify and width must be vectors res <- character(0L) for (i in seq_along(content)) { res <- c(res, format( format(content[[i]], ...), justify = justify[i], width = width[i], ...) ) } res <- c(res, "\n") return(res) } # A non-exported function used to format text in `print` methods. format_row_equal <- function(content, justify, column_width, ...) { # content must be a list with the content: # (var_name1, var1, var_name2, var2, etc) # the length of content will be twice the length of the other vectors res <- character(0L) for (i in seq_along(justify)) { res <- c(res, format( paste0(content[[2 * i - 1]], " = ", format(content[[2 * i]], ...) ), justify = justify[i], width = column_width[i], ...) ) } res <- c(res, "\n") return(res) } cmstatr/R/cv.R0000644000176200001440000000176614016050422012667 0ustar liggesusers #' Calculate the coefficient of variation #' #' @description #' The coefficient of variation (CV) is the ratio of the standard #' deviation to the mean of a sample. This function takes a vector #' of data and calculates the CV. #' #' @param x a vector #' @param na.rm logical. Should missing values be removed? #' #' @return #' The calculated CV #' #' @examples #' set.seed(15) # make this example reproducible #' x <- rnorm(100, mean = 100, sd = 5) #' cv(x) #' ## [1] 0.04944505 #' #' # the cv function can also be used within a call to dplyr::summarise #' library(dplyr) #' carbon.fabric %>% #' filter(test == "WT") %>% #' group_by(condition) %>% #' summarise(mean = mean(strength), cv = cv(strength)) #' #' ## # A tibble: 3 x 3 #' ## condition mean cv #' ## #' ## 1 CTD 137. 0.0417 #' ## 2 ETW 135. 0.0310 #' ## 3 RTD 142. 0.0451 #' #' #' @export cv <- function(x, na.rm = FALSE) { # nolint sd(x, na.rm = na.rm) / mean(x, na.rm = na.rm) # nolint } cmstatr/R/adtest.R0000644000176200001440000002332114477217317013555 0ustar liggesusers #' Anderson--Darling test for goodness of fit #' #' @description #' Calculates the Anderson--Darling test statistic for a sample given #' a particular distribution, and determines whether to reject the #' hypothesis that a sample is drawn from that distribution. #' #' @param data a data.frame-like object (optional) #' @param x a numeric vector or a variable in the data.frame #' @param alpha the required significance level of the test. #' Defaults to 0.05. #' #' @return #' an object of class `anderson_darling`. This object has the following #' fields. #' #' - `call` the expression used to call this function #' - `dist` the distribution used #' - `data` a copy of the data analyzed #' - `n` the number of observations in the sample #' - `A` the Anderson--Darling test statistic #' - `osl` the observed significance level (p-value), #' assuming the #' parameters of the distribution are estimated from the data #' - `alpha` the required significance level for the test. #' This value is given by the user. #' - `reject_distribution` a logical value indicating whether #' the hypothesis that the data is drawn from the specified distribution #' should be rejected #' #' @details #' The Anderson--Darling test statistic is calculated for the distribution #' given by the user. #' #' The observed significance level (OSL), or p-value, is calculated assuming #' that the parameters #' of the distribution are unknown; these parameters are estimate from the #' data. #' #' The function `anderson_darling_normal` computes the Anderson--Darling #' test statistic given a normal distribution with mean and standard deviation #' equal to the sample mean and standard deviation. #' #' The function `anderson_darling_lognormal` is the same as #' `anderson_darling_normal` except that the data is log transformed #' first. #' #' The function `anderson_darling_weibull` computes the Anderson--Darling #' test statistic given a Weibull distribution with shape and scale parameters #' estimated from the data using a maximum likelihood estimate. #' #' The test statistic, `A`, is modified to account for #' the fact that the parameters of the population are not known, #' but are instead estimated from the sample. This modification is #' a function of the sample size only, and is different for each #' distribution (normal/lognormal or Weibull). Several such modifications #' have been proposed. This function uses the modification published in #' Stephens (1974), Lawless (1982) and CMH-17-1G. Some other implementations #' of the Anderson-Darling test, such as the implementation in the #' `nortest` package, use other modifications, such as the one #' published in D'Agostino and Stephens (1986). As such, the p-value #' reported by this function may differ from the p-value reported #' by implementations of the Anderson--Darling test that use #' different modifiers. Only the unmodified #' test statistic is reported in the result of this function, but #' the modified test statistic is used to compute the OSL (p-value). #' #' This function uses the formulae for observed significance #' level (OSL) published in CMH-17-1G. These formulae depend on the particular #' distribution used. #' #' The results of this function have been validated against #' published values in Lawless (1982). #' #' #' @references #' J. F. Lawless, *Statistical models and methods for lifetime data*. #' New York: Wiley, 1982. #' #' "Composite Materials Handbook, Volume 1. Polymer Matrix #' Composites Guideline for Characterization of Structural #' Materials," SAE International, CMH-17-1G, Mar. 2012. #' #' M. A. Stephens, “EDF Statistics for Goodness of Fit and Some #' Comparisons,” #' Journal of the American Statistical Association, vol. 69, no. 347. #' pp. 730–737, 1974. #' #' R. D’Agostino and M. Stephens, Goodness-of-Fit Techniques. #' New York: Marcel Dekker, 1986. #' #' @examples #' library(dplyr) #' #' carbon.fabric %>% #' filter(test == "FC") %>% #' filter(condition == "RTD") %>% #' anderson_darling_normal(strength) #' ## Call: #' ## anderson_darling_normal(data = ., x = strength) #' ## #' ## Distribution: Normal ( n = 18 ) #' ## Test statistic: A = 0.9224776 #' ## OSL (p-value): 0.01212193 (assuming unknown parameters) #' ## Conclusion: Sample is not drawn from a Normal distribution (alpha = 0.05) #' #' @importFrom rlang enquo eval_tidy #' #' @name anderson_darling NULL # A non-exported function for an Anderson--Darling goodness of fit test # The function \code{dist} should be # the cumulative distribution function. Additional parameters, such as # the parameters for the distribution, can be passed through the argument # to the function \code{...} to the function \code{dist}. anderson_darling <- function(x0, call, ad_p_unknown_param_fcn, dist_name, dist, alpha, ...) { f0 <- dist x0_sorted <- sort(x0) n <- length(x0_sorted) ii <- 1:n u <- f0(x0_sorted, ...) a <- -n - sum((2 * ii - 1) / n * (log(u) + log(1 - rev(u)))) res <- list( call = call, dist = dist_name, data = x0, n = n, A = a, # nolint osl = ad_p_unknown_param_fcn(a, n), alpha = alpha ) res$reject_distribution <- res$osl <= res$alpha class(res) <- "anderson_darling" return(res) } #' Glance at an `anderson_darling` object #' #' @description #' Glance accepts an object of type `anderson_darling` and #' returns a [tibble::tibble()] with #' one row of summaries. #' #' Glance does not do any calculations: it just gathers the results in a #' tibble. #' #' @param x an `anderson_darling` object #' @param ... Additional arguments. Not used. Included only to match generic #' signature. #' #' #' @return #' A one-row [tibble::tibble()] with the following #' columns: #' #' - `dist` the distribution used #' - `n` the number of observations in the sample #' - `A` the Anderson--Darling test statistic #' - `osl` the observed significance level (p-value), #' assuming the #' parameters of the distribution are estimated from the data #' - `alpha` the required significance level for the test. #' This value is given by the user. #' - `reject_distribution` a logical value indicating whether #' the hypothesis that the data is drawn from the specified distribution #' should be rejected #' #' #' @seealso #' [anderson_darling()] #' #' @examples #' x <- rnorm(100, 100, 4) #' ad <- anderson_darling_weibull(x = x) #' glance(ad) #' #' ## # A tibble: 1 x 6 #' ## dist n A osl alpha reject_distribution #' ## #' ## 1 Weibull 100 2.62 0.00000207 0.05 TRUE #' #' @method glance anderson_darling #' @importFrom tibble tibble #' #' @export glance.anderson_darling <- function(x, ...) { # nolint # nolint start: object_usage_linter with( x, tibble::tibble( dist = dist, n = n, A = A, # nolint osl = osl, alpha = alpha, reject_distribution = reject_distribution ) ) # nolint end } #' @export print.anderson_darling <- function(x, ...) { cat("\nCall:\n", paste(deparse(x$call), sep = "\n", collapse = "\n"), "\n\n", sep = "") cat("Distribution: ", x$dist, "( n =", x$n, ")", "\n") cat("Test statistic: A =", x$A, "\n") cat( "OSL (p-value): ", x$osl, " (assuming unknown parameters)\n" ) if (x$reject_distribution) { cat("Conclusion: Sample is not drawn from a", x$dist, "distribution ( alpha =", x$alpha, ")") } else { cat("Conclusion: Sample is drawn from a", x$dist, "distribution ( alpha =", x$alpha, ")") } } #' @importFrom rlang enquo eval_tidy #' @importFrom stats pnorm #' #' @rdname anderson_darling #' #' @export anderson_darling_normal <- function(data = NULL, x, alpha = 0.05) { verify_tidy_input( df = data, x = x, c = match.call(), arg_name = "x") x0 <- eval_tidy(enquo(x), data) return(anderson_darling(x0, call = match.call(), ad_p_unknown_param_fcn = ad_p_normal_unknown_param, dist_name = "Normal", alpha = alpha, dist = pnorm, mean = mean(x0), sd = sd(x0))) } #' @importFrom rlang enquo eval_tidy #' @importFrom stats pnorm #' #' @rdname anderson_darling #' #' @export anderson_darling_lognormal <- function(data = NULL, x, alpha = 0.05) { verify_tidy_input( df = data, x = x, c = match.call(), arg_name = "x") x0 <- eval_tidy(enquo(x), data) x0 <- log(x0) return(anderson_darling(x0, call = match.call(), ad_p_unknown_param_fcn = ad_p_normal_unknown_param, dist_name = "Lognormal", alpha = alpha, dist = pnorm, mean = mean(x0), sd = sd(x0))) } #' @importFrom rlang enquo eval_tidy #' @importFrom stats pweibull #' @importFrom MASS fitdistr #' #' @rdname anderson_darling #' #' @export anderson_darling_weibull <- function(data = NULL, x, alpha = 0.05) { verify_tidy_input( df = data, x = x, c = match.call(), arg_name = "x") x0 <- eval_tidy(enquo(x), data) dist <- fitdistr(x0, "weibull") return(anderson_darling(x0, call = match.call(), dist = pweibull, dist_name = "Weibull", alpha = alpha, ad_p_unknown_param_fcn = ad_p_weibull_unknown_param, shape = dist$estimate[["shape"]], scale = dist$estimate[["scale"]])) } ad_p_normal_unknown_param <- function(z, n) { ad_star <- (1 + 4 / n - 25 / n ^ 2) * z osl <- 1 / (1 + exp(-0.48 + 0.78 * log(ad_star) + 4.58 * ad_star)) return(osl) } ad_p_weibull_unknown_param <- function(z, n) { ad_star <- (1 + 0.2 / sqrt(n)) * z osl <- 1 / (1 + exp(-0.1 + 1.24 * log(ad_star) + 4.48 * ad_star)) return(osl) } cmstatr/R/mnr.R0000644000176200001440000002173014477217317013067 0ustar liggesusers#' Detect outliers using the maximum normed residual method #' #' @description #' This function detects outliers using the maximum normed residual #' method described in CMH-17-1G. This method identifies a value #' as an outlier if the absolute difference between the value and #' the sample mean divided by the sample standard deviation #' exceeds a critical value. #' #' @param data a data.frame #' @param x the variable in the data.frame for which to find the MNR #' or a vector if `data=NULL` #' @param alpha the significance level for the test. Defaults to 0.05 #' #' @details #' `data` is an optional argument. If `data` is given, it #' should be a #' `data.frame` (or similar object). When `data` is specified, the #' value of `x` is expected to be a variable within `data`. If #' `data` is not specified, `x` must be a vector. #' #' The maximum normed residual test is a test for outliers. The test statistic #' is given in CMH-17-1G. Outliers are identified in the returned object. #' #' The maximum normed residual test statistic is defined as: #' #' \deqn{MNR = max \frac{\left| x_i - \bar{x} \right|}{s} }{ #' MNR = max | x_i- x_bar | / s } #' #' When the value of the MNR test statistic exceeds the critical value #' defined in Section 8.3.3.1 of CMH-17-1G, the corresponding value #' is identified as an outlier. It is then removed from the sample, and #' the test statistic is computed again and compared with the critical #' value corresponding with the new sample. This process is repeated until #' no values are identified as outliers. #' #' @return an object of class `mnr` #' This object has the following fields: #' - `call` the expression used to call this function #' - `data` the original data used to compute the MNR #' - `alpha` the value of alpha given by the user #' - `mnr` the computed MNR test statistic #' - `crit` the critical value given the sample size and the #' significance level #' - `outliers` a data.frame containing the `index` and #' `value` of each of the identified outliers #' - `n_outliers` the number of outliers found #' #' @examples #' library(dplyr) #' #' carbon.fabric.2 %>% #' filter(test=="FC" & condition=="ETW2" & batch=="A") %>% #' maximum_normed_residual(strength) #' #' ## Call: #' ## maximum_normed_residual(data = ., x = strength) #' ## #' ## MNR = 1.958797 ( critical value = 1.887145 ) #' ## #' ## Outliers ( alpha = 0.05 ): #' ## Index Value #' ## 6 44.26 #' #' carbon.fabric.2 %>% #' filter(test=="FC" & condition=="ETW2" & batch=="B") %>% #' maximum_normed_residual(strength) #' #' ## Call: #' ## maximum_normed_residual(data = ., x = strength) #' ## #' ## MNR = 1.469517 ( critical value = 1.887145 ) #' ## #' ## No outliers detected ( alpha = 0.05 ) #' #' @references #' “Composite Materials Handbook, Volume 1. Polymer Matrix Composites #' Guideline for Characterization of Structural Materials,” SAE International, #' CMH-17-1G, Mar. 2012. #' #' @importFrom rlang eval_tidy enquo #' #' @export maximum_normed_residual <- function(data = NULL, x, alpha = 0.05) { res <- list() class(res) <- "mnr" res$call <- match.call() verify_tidy_input( df = data, x = x, c = match.call(), arg_name = "x") cur_data <- eval_tidy(enquo(x), data) res$data <- cur_data res$alpha <- alpha indicies_cur <- seq_along(res$data) cur_mnr <- max(abs(res$data - mean(res$data)) / sd(res$data)) res$mnr <- cur_mnr cur_crit <- maximum_normed_residual_crit(length(res$data), alpha) res$crit <- cur_crit res$outliers <- data.frame(index = c(), value = c()) res$n_outliers <- 0 if (is.na(cur_mnr)) { return(res) } for (i in 1:(length(res$data) - 2)) { # the `maximum_normed_residual_crit` function uses DF=n-2 # With each loop, we remove one data point, so if we don't # break early, we can stop looping before n-2 # data points (after which DF<=0) cur_mnr <- max(abs(cur_data - mean(cur_data)) / sd(cur_data)) cur_crit <- maximum_normed_residual_crit(length(cur_data), alpha) if (cur_mnr >= cur_crit) { worst_index_cur <- which.max(abs(cur_data - mean(cur_data))) res$outliers <- rbind( res$outliers, data.frame( index = indicies_cur[worst_index_cur], value = cur_data[worst_index_cur] ) ) res$n_outliers <- res$n_outliers + 1 cur_data <- cur_data[-worst_index_cur] indicies_cur <- indicies_cur[-worst_index_cur] } else { # If we didn't remove a data point, then the current MNR and the # critical MNR definitely won't change, so we can short-circuit # this function. break } } return(res) } maximum_normed_residual_crit <- function(n, alpha) { t <- qt(p = 1 - alpha / (2 * n), df = n - 2, lower.tail = TRUE, log.p = FALSE) return((n - 1) / sqrt(n) * sqrt(t ^ 2 / (n - 2 + t ^ 2))) } #' Glance at a `mnr` (maximum normed residual) object #' #' @description #' Glance accepts an object of type `mnr` and returns a #' [tibble::tibble()] with #' one row of summaries. #' #' Glance does not do any calculations: it just gathers the results in a #' tibble. #' #' @param x An `mnr` object #' @param ... Additional arguments. Not used. Included only to match generic #' signature. #' #' #' @return #' A one-row [tibble::tibble()] with the following #' columns: #' #' - `mnr` the computed MNR test statistic #' - `alpha` the value of alpha used for the test #' - `crit` the critical value given the sample size and the #' significance level #' - `n_outliers` the number of outliers found #' #' #' @seealso #' [maximum_normed_residual()] #' #' @examples #' x <- c(rnorm(20, 100, 5), 10) #' m <- maximum_normed_residual(x = x) #' glance(m) #' #' ## # A tibble: 1 x 4 #' ## mnr alpha crit n_outliers #' ## #' ## 1 4.23 0.05 2.73 1 #' #' @method glance mnr #' @importFrom tibble tibble #' #' @export glance.mnr <- function(x, ...) { # nolint # nolint start: object_usage_linter with( x, tibble::tibble( mnr = mnr, alpha = alpha, crit = crit, n_outliers = n_outliers ) ) # nolint end } #' Augment data with information from an `mnr` object #' #' @description #' Augment accepts an `mnr` object (returned from the function #' [maximum_normed_residual()]) and a dataset and adds the column #' `.outlier` to the dataset. The column `.outlier` is a logical #' vector indicating whether each observation is an outlier. #' #' When passing data into `augment` using the `data` argument, #' the data must be exactly the data that was passed to #' `maximum_normed_residual`. #' #' @param x an `mnr` object created by #' [maximum_normed_residual()] #' @param data a `data.frame` or #' [tibble::tibble()] #' containing the original data that was passed to #' `maximum_normed_residual` #' @param ... Additional arguments. Not used. Included only to match generic #' signature. #' #' @return #' When `data` is supplied, `augment` returns `data`, but with #' one column appended. When `data` is not supplied, `augment` #' returns a new [tibble::tibble()] with the column #' `values` containing the original values used by #' `maximum_normed_residaul` plus one additional column. The additional #' column is: #' #' - `.outler` a logical value indicating whether the observation #' is an outlier #' #' @examples #' data <- data.frame(strength = c(80, 98, 96, 97, 98, 120)) #' m <- maximum_normed_residual(data, strength) #' #' # augment can be called with the original data #' augment(m, data) #' #' ## strength .outlier #' ## 1 80 FALSE #' ## 2 98 FALSE #' ## 3 96 FALSE #' ## 4 97 FALSE #' ## 5 98 FALSE #' ## 6 120 FALSE #' #' # or augment can be called without the orignal data and it will be #' # reconstructed #' augment(m) #' #' ## # A tibble: 6 x 2 #' ## values .outlier #' ## #' ## 1 80 FALSE #' ## 2 98 FALSE #' ## 3 96 FALSE #' ## 4 97 FALSE #' ## 5 98 FALSE #' ## 6 120 FALSE #' #' @seealso #' [maximum_normed_residual()] #' #' @method augment mnr #' @importFrom tibble tibble #' #' @export augment.mnr <- function(x, data = x$data, ...) { # nolint if (is.data.frame(data)) { df <- data } else { df <- tibble::tibble(values = data) } res <- df res[[".outlier"]] <- FALSE res$.outlier[x$outliers$index] <- TRUE res } #' @export print.mnr <- function(x, ...) { cat("\nCall:\n", paste(deparse(x$call), sep = "\n", collapse = "\n"), "\n\n", sep = "") cat("MNR =", x$mnr, " ( critical value =", x$crit, ")\n\n") if (nrow(x$outliers) == 0) { cat("No outliers detected ( alpha =", x$alpha, ")\n\n") } else { cat("Outliers ( alpha =", x$alpha, "):\n") justify <- c("right", "left", "left") width <- c(8L, 2L, 16L) cat(format_row(list("Index", " ", "Value"), justify, width, ...)) for (j in seq(along.with = x$outliers$index)) { cat(format_row( list(x$outliers[["index"]][j], " ", x$outliers[["value"]][j]), justify, width, ...) ) } } } cmstatr/R/data.R0000644000176200001440000000063014015477557013202 0ustar liggesusers #' Sample data for a generic carbon fabric #' #' Datasets containing sample data that is typical of a generic carbon #' fabric prepreg. This data is used in several examples within the #' `cmstatr` package. This data is fictional and should #' only be used for learning how to use this package. #' #' @name carbon.fabric #' @rdname carbon.fabric "carbon.fabric" #' @rdname carbon.fabric "carbon.fabric.2" cmstatr/R/plotting.R0000644000176200001440000000755214015477557014143 0ustar liggesusers #' @importFrom ggplot2 ggproto #' @importFrom ggplot2 Stat StatNormalSurvFunc <- ggproto( # nolint "stat_normal_surv_func", Stat, compute_group = function(data, scales, n) { x <- seq(from = min(data$x), to = max(data$x), length.out = n) y <- pnorm(x, mean(data$x), sd(data$x), lower.tail = FALSE) data.frame(x = x, y = y) }, required_aes = c("x") ) #' Normal Survival Function #' #' @description #' The Normal survival function provides a visualization of a #' distribution. A normal curve is fit based on the mean and standard #' deviation of the data, and the survival function of this normal #' curve is plotted. The survival function is simply one minus the #' CDF. #' #' @param mapping Set of aesthetic mappings created by `aes()`. #' @param data The data to be displayed in this layer. This has the #' same usage as a `ggplot2` `stat` function. #' @param geom The geometric object to use to display the data. #' @param position Position argument #' @param ... Other arguments to pass on to `layer`. #' @param n If `NULL`, do not interpolated. Otherwise, the #' number of points to interpolate. #' @param pad If `TRUE`, pad the ESF with additional points #' `(-Inf, 0)` and `(0, Inf)`. #' @param show.legend Should this layer be included in the legends? #' @param inherit.aes If `FALSE`, overrides the default aesthetic, #' rather than combining with them. #' #' @importFrom ggplot2 layer #' #' @export stat_normal_surv_func <- function(mapping = NULL, data = NULL, geom = "smooth", position = "identity", show.legend = NA, inherit.aes = TRUE, # nolint n = 100, pad = FALSE, ...) { layer( stat = StatNormalSurvFunc, data = data, geom = geom, position = position, mapping = mapping, show.legend = show.legend, # nolint inherit.aes = inherit.aes, params = list(n = n, ...) ) } #' @importFrom ggplot2 ggproto #' @importFrom ggplot2 Stat StatESF <- ggproto( # nolint "stat_esf", Stat, compute_group = function(data, scales, n, pad) { if (is.null(n)) { x <- unique(data$x) } else { x <- seq(from = min(data$x), to = max(data$x), length.out = n) } if (pad) { x <- c(-Inf, x, Inf) } y <- 1 - ecdf(data$x)(x) data.frame(x = x, y = y) }, required_aes = c("x") ) #' Empirical Survival Function #' #' @description #' The empirical survival function (ESF) provides a visualization of a #' distribution. This is closely related to the empirical cumulative #' distribution function (ECDF). The empirical survival function is #' simply ESF = 1 - ECDF. #' #' @param mapping Set of aesthetic mappings created by `aes()`. #' @param data The data to be displayed in this layer. This has the #' same usage as a `ggplot2` `stat` function. #' @param geom The geometric object to use to display the data. #' @param position Position argument #' @param ... Other arguments to pass on to `layer`. #' @param n If `NULL`, do not interpolated. Otherwise, the #' number of points to interpolate. #' @param pad If `TRUE`, pad the ESF with additional points #' `(-Inf, 0)` and `(0, Inf)`. #' @param show.legend Should this layer be included in the legends? #' @param inherit.aes If `FALSE`, overrides the default aesthetic, #' rather than combining with them. #' #' @importFrom ggplot2 layer #' #' @export stat_esf <- function(mapping = NULL, data = NULL, geom = "point", position = "identity", show.legend = NA, # nolint inherit.aes = TRUE, n = NULL, # nolint pad = FALSE, ...) { layer( stat = StatESF, data = data, geom = geom, position = position, mapping = mapping, show.legend = show.legend, # nolint inherit.aes = inherit.aes, params = list(n = n, pad = pad, ...) # nolint ) } cmstatr/R/levene.R0000644000176200001440000001474514477217317013561 0ustar liggesusers #' Levene's Test for Equality of Variance #' #' @description #' This function performs the Levene's test for equality of variance. #' #' @param data a data.frame #' @param x the variable in the data.frame or a vector on which to perform the #' Levene's test (usually strength) #' @param groups a variable in the data.frame that defines the groups #' @param alpha the significance level (default 0.05) #' @param modcv a logical value indicating whether the modified CV approach #' should be used. #' #' @return #' Returns an object of class `adk`. This object has the following fields: #' - `call` the expression used to call this function #' - `data` the original data supplied by the user #' - `groups` a vector of the groups used in the computation #' - `alpha` the value of alpha specified #' - `modcv` a logical value indicating whether the modified #' CV approach was used. #' - `n` the total number of observations #' - `k` the number of groups #' - `f` the value of the F test statistic #' - `p` the computed p-value #' - `reject_equal_variance` a boolean value indicating whether the #' null hypothesis that all samples have the same variance is rejected #' - `modcv_transformed_data` the data after the modified CV transformation #' @details #' This function performs the Levene's test for equality of variance. The #' data is transformed as follows: #' #' \deqn{w_{ij} = \left| x_{ij} - m_i \right|}{wij = | xij - mi |} #' #' Where \eqn{m_i}{mi} is median of the \eqn{ith} group. An F-Test is then #' performed on the transformed data. #' #' When `modcv=TRUE`, the data from each group is first transformed #' according to the modified coefficient of variation (CV) rules before #' performing Levene's test. #' #' @references #' “Composite Materials Handbook, Volume 1. Polymer Matrix Composites #' Guideline for Characterization of Structural Materials,” SAE International, #' CMH-17-1G, Mar. 2012. #' #' @examples #' library(dplyr) #' #' carbon.fabric.2 %>% #' filter(test == "FC") %>% #' levene_test(strength, condition) #' ## #' ## Call: #' ## levene_test(data = ., x = strength, groups = condition) #' ## #' ## n = 91 k = 5 #' ## F = 3.883818 p-value = 0.00600518 #' ## Conclusion: Samples have unequal variance ( alpha = 0.05 ) #' #' @importFrom rlang enquo eval_tidy #' @importFrom stats var.test median pf #' #' @seealso [calc_cv_star()] #' @seealso [transform_mod_cv()] #' #' @export levene_test <- function(data = NULL, x, groups, alpha = 0.05, modcv = FALSE) { res <- list() class(res) <- "levene" res$call <- match.call() verify_tidy_input( df = data, x = x, c = match.call(), arg_name = "x") data_vector <- eval_tidy(enquo(x), data) verify_tidy_input( df = data, x = groups, c = match.call(), arg_name = "groups") group_vector <- eval_tidy(enquo(groups), data) res$data <- data_vector res$groups <- group_vector res$alpha <- alpha if (modcv == TRUE) { res$modcv <- TRUE res$modcv_transformed_data <- transform_mod_cv_grouped( data_vector, group_vector ) data_vector <- res$modcv_transformed_data } else { res$modcv <- FALSE res$modcv_transformed_data <- NULL } transformed_groups <- lapply(levels(as.factor(group_vector)), function(lvl) { group_data <- data_vector[group_vector == lvl] w <- abs(group_data - median(group_data)) return(w) }) n <- length(data_vector) k <- length(transformed_groups) grand_mean <- mean(unlist(transformed_groups)) f_stat_numerator <- sum(vapply(transformed_groups, function(group_data) { ni <- length(group_data) res <- ni * (mean(group_data) - grand_mean) ^ 2 / (k - 1) return(res) }, FUN.VALUE = numeric(1L))) f_stat_denomenator <- sum(vapply(transformed_groups, function(group_data) { group_data_minus_group_mean <- group_data - mean(group_data) res <- sum((group_data_minus_group_mean) ^ 2) / (n - k) return(res) }, FUN.VALUE = numeric(1L))) res$n <- n res$k <- k res$f <- f_stat_numerator / f_stat_denomenator res$p <- pf(res$f, df1 = k - 1, df2 = n - k, lower.tail = FALSE) res$reject_equal_variance <- res$p <= alpha return(res) } #' Glance at a `levene` object #' #' @description #' Glance accepts an object of type `levene` and returns a #' [tibble::tibble()] with #' one row of summaries. #' #' Glance does not do any calculations: it just gathers the results in a #' tibble. #' #' @param x a `levene` object returned from [levene_test()] #' @param ... Additional arguments. Not used. Included only to match generic #' signature. #' #' #' @return #' A one-row [tibble::tibble()] with the following #' columns: #' #' - `alpha` the value of alpha specified #' - `modcv` a logical value indicating whether the modified #' CV approach was used. #' - `n` the total number of observations #' - `k` the number of groups #' - `f` the value of the F test statistic #' - `p` the computed p-value #' - `reject_equal_variance` a boolean value indicating whether the #' null hypothesis that all samples have the same variance is rejected #' #' #' @seealso #' [levene_test()] #' #' @examples #' df <- data.frame( #' groups = c(rep("A", 5), rep("B", 6)), #' strength = c(rnorm(5, 100, 6), rnorm(6, 105, 7)) #' ) #' levene_result <- levene_test(df, strength, groups) #' glance(levene_result) #' #' ## # A tibble: 1 x 7 #' ## alpha modcv n k f p reject_equal_variance #' ## #' ## 1 0.05 FALSE 11 2 0.0191 0.893 FALSE #' #' @method glance levene #' @importFrom tibble tibble #' #' @export glance.levene <- function(x, ...) { # nolint # nolint start: object_usage_linter with( x, tibble::tibble( alpha = alpha, modcv = modcv, n = n, k = k, f = f, p = p, reject_equal_variance = reject_equal_variance ) ) # nolint end } #' @export print.levene <- function(x, ...) { cat("\nCall:\n", paste(deparse(x$call), sep = "\n", collapse = "\n"), "\n\n", sep = "") justify <- c("left", "left") width <- c(16L, 16L) cat(format_row_equal(list("n", x$n, "k", x$k), justify, width, ...)) if (x$modcv == TRUE) { cat("Modified CV Approach Used", "\n") } cat(format_row_equal(list("F", x$f, "p-value", x$p), justify, width, ...)) if (x$reject_equal_variance) { cat("Conclusion: Samples have unequal variance ( alpha =", x$alpha, ")\n\n") } else { cat("Conclusion: Samples have equal variances ( alpha =", x$alpha, ")\n\n") } } cmstatr/R/misc.R0000644000176200001440000000047614065516021013215 0ustar liggesusers# To add extra release questions that devtools::release asks # Ref. https://devtools.r-lib.org/reference/release.html release_questions <- function() { c( "Did you re-build the hk_ext.Rmd using `rebuild-long-running-vignette.R`?" ) } # To allow the vignette to build correctly #' @importFrom stats dnorm NULL cmstatr/R/cmstatr.R0000644000176200001440000000025314015477557013747 0ustar liggesusers#' @description #' To learn more about `cmstatr`, start with the vignettes: #' `browseVignettes(package = "cmstatr")` #' @keywords internal #' @docType package "_PACKAGE" cmstatr/R/plot-nested.R0000644000176200001440000003253214517621570014526 0ustar liggesusers #' @importFrom rlang call2 build_nesting <- function(x, group_df, i_group, label, stat, type, extras) { if (i_group > ncol(group_df)) { if (type == "Points") { nodes <- NULL } else { nodes <- list(build_nesting(x, group_df, i_group, label, stat, "Points", extras)) } } else { group_levels <- levels(as.factor(group_df[[i_group]])) nodes <- lapply(group_levels, function(gl) { mask <- group_df[[i_group]] == gl node_x <- subset(x, mask) node_group_df <- subset(group_df, mask) extras_subset <- lapply(extras, function(e) { subset(e, mask) }) n <- build_nesting(node_x, node_group_df, i_group + 1, gl, stat, "Label", extras_subset) n }) } list( type = type, x = x, stat = eval(call2(.fn = stat, x)), label = label, nodes = nodes, extras = extras ) } build_drawing_element_list <- function(nesting, group_df, y_gap) { elm_list <- list() # We'll need a queue for a level order tree traversal # We'll track the heights of each grouping too level_counts <- c() queue <- list() push_queue <- function(obj, parent, level) { # the root (not drawn) is level=1 parent_level_order <- ifelse(is.null(parent), 1, parent$level_order) # level_heights is an accumulator that keeps increasing (by 1) as # subsequent items within a level are drawn level_counts[level] <<- sum(level_counts[level], 1, na.rm = TRUE) level_order <- level_counts[level] level_heights <- c(1, level_counts) ancestor_level_heights <- ifelse(level <= 2, 0, sum(level_heights[2:(level - 1)])) y_obj <- ancestor_level_heights + y_gap * max(0, level - 2) + (parent_level_order - 1) queue <<- c(queue, list(list( obj = obj, parent = parent, level = level, level_order = level_order, y = -y_obj ))) } pop_queue <- function() { obj <- queue[[1]] queue <<- queue[-1] obj } # We'll traverse the "nesting" tree using level-order traversal push_queue(nesting, NULL, 1) while (length(queue) > 0) { cur_item <- pop_queue() for (n in cur_item$obj$nodes) { push_queue(n, cur_item, cur_item$level + 1) } if (cur_item$obj$type != "Root") { elm_list <- c(elm_list, list(cur_item)) } } minor_breaks <- sapply(seq_along(level_counts) + 1, function(level) { level_heights <- c(1, level_counts) ancestor_level_heights <- ifelse(level <= 2, 0, sum(level_heights[2:(level - 1)])) y_obj <- # nolint ancestor_level_heights + y_gap * max(0, level - 2) - y_gap / 2 - 0.5 }) labels <- c(names(group_df), "") breaks <- minor_breaks[-1] - diff(minor_breaks) / 2 axis_info <- list( breaks = rev(-breaks), labels = rev(labels), minor_breaks = rev(-minor_breaks) ) list( elm_list = (elm_list), axis_info = (axis_info) ) } #' @importFrom ggplot2 xlab ylab scale_y_continuous expansion theme #' @importFrom ggplot2 element_blank element_line geom_hline set_axes <- function(g, xlabel, axis_info, divider_color) { breaks <- axis_info$breaks labels <- axis_info$labels minor_breaks <- axis_info$minor_breaks g <- g + xlab(xlabel) + scale_y_continuous( breaks = breaks, labels = labels, minor_breaks = minor_breaks, limits = c(minor_breaks[1], minor_breaks[length(minor_breaks)]), expand = expansion() ) + theme(panel.grid.major.y = element_blank(), panel.grid.minor.y = element_line(linewidth = 0.1, color = "black")) + ylab("") if (!is.null(divider_color)) { lapply(minor_breaks, function(b) { g <<- g + geom_hline(yintercept = b, color = divider_color) }) } g } #' @importFrom purrr map_dfr #' @importFrom rlang exec #' @importFrom ggplot2 aes #' @importFrom rlang .data draw_connectors <- function(g, elm_list, connector_args) { points <- map_dfr( elm_list, function(cur_item) { if (!is.null(cur_item$parent) & !is.na(cur_item$parent$y) & cur_item$parent$obj$type == "Label") { data.frame( x = cur_item$parent$obj$stat, y = cur_item$y ) } else { NULL } } ) if (nrow(points) == 0) { g } else { g + exec( ggplot2::geom_point, data = points, mapping = aes(x = .data$x, y = .data$y), !!!connector_args ) } } #' @importFrom purrr map_dfr #' @importFrom rlang exec #' @importFrom ggplot2 aes #' @importFrom rlang .data draw_vert_lines_to_labels <- function(g, elm_list, vline_args) { line_segments <- map_dfr( elm_list, function(cur_item) { if (!is.null(cur_item$parent) & !is.na(cur_item$parent$y) & cur_item$parent$obj$type == "Label") { data.frame( x = cur_item$parent$obj$stat, xend = cur_item$parent$obj$stat, y = cur_item$y, yend = cur_item$parent$y ) } else { NULL } } ) if (nrow(line_segments) == 0) { g } else { g + exec( ggplot2::geom_segment, data = line_segments, mapping = aes(x = .data$x, xend = .data$xend, y = .data$y, yend = .data$yend), !!!vline_args ) } } #' @importFrom purrr map_dfr #' @importFrom rlang exec #' @importFrom ggplot2 aes #' @importFrom rlang .data draw_horiz_lines_to_labels <- function(g, elm_list, hline_args) { line_segments <- map_dfr( elm_list, function(cur_item) { if (!is.null(cur_item$parent) & !is.na(cur_item$parent$y) & cur_item$parent$obj$type == "Label") { data.frame( x = cur_item$obj$stat, xend = cur_item$parent$obj$stat, y = cur_item$y, yend = cur_item$y ) } else { NULL } } ) if (nrow(line_segments) == 0) { g } else { g + exec( ggplot2::geom_segment, data = line_segments, mapping = aes(x = .data$x, xend = .data$xend, y = .data$y, yend = .data$yend), !!!hline_args ) } } add_extras_data <- function(df, extras, name, must_be_equal) { if (name %in% names(extras)) { if (length(levels(as.factor(extras[[name]]))) > 1 && must_be_equal) { # Not all values are the same return(df) } else { # All values are the same, so we can just take the first # This will work regardless of how many rows are in df df[[name]] <- extras[[name]][1] return(df) } } else { return(df) } } #' @importFrom purrr map_dfr #' @importFrom rlang exec #' @importFrom ggplot2 aes labs #' @importFrom rlang .data draw_points <- function(g, elm_list, point_args, dline_args, extras_names) { points <- map_dfr( elm_list, function(cur_item) { if (cur_item$obj$type == "Points") { add_extras_data( data.frame( x = cur_item$obj$x, y = cur_item$y ), cur_item$obj$extras, "color", FALSE ) } else { return(NULL) } } ) line_segments <- map_dfr( elm_list, function(cur_item) { if (cur_item$obj$type == "Points") { data.frame( x = min(cur_item$obj$x), xend = max(cur_item$obj$x), y = cur_item$y, yend = cur_item$y ) } else { return(NULL) } } ) if ("color" %in% names(extras_names)) { g <- g + exec( ggplot2::geom_point, data = points, mapping = aes(x = .data$x, y = .data$y, color = .data$color), !!!point_args ) + labs( color = extras_names[["color"]] ) } else { # No color variable g <- g + exec( ggplot2::geom_point, data = points, mapping = aes(x = .data$x, y = .data$y), !!!point_args ) } g + exec( ggplot2::geom_segment, data = line_segments, mapping = aes( x = .data$x, xend = .data$xend, y = .data$y, yend = .data$yend ), !!!dline_args ) } #' @importFrom purrr map_dfr #' @importFrom rlang exec #' @importFrom ggplot2 aes labs #' @importFrom rlang .data draw_labels <- function(g, elm_list, label_args, extras_names) { labels <- map_dfr( elm_list, function(cur_item) { if (cur_item$obj$type == "Label") { add_extras_data( data.frame( x = cur_item$obj$stat, y = cur_item$y, label = cur_item$obj$label ), cur_item$obj$extras, "fill", TRUE ) } else { NULL } } ) if (nrow(labels) == 0) { g } else { if ("fill" %in% names(extras_names)) { labels_fill <- subset(labels, !is.na(labels$fill)) g <- g + exec( ggplot2::geom_label, data = labels_fill, mapping = aes(x = .data$x, y = .data$y, label = .data$label, fill = .data$fill), !!!label_args ) + labs( fill = extras_names[["fill"]] ) labels <- subset(labels, is.na(labels$fill)) } # Data with no fill variable g + exec( ggplot2::geom_label, data = labels, mapping = aes(x = .data$x, y = .data$y, label = .data$label), !!!label_args ) } } #' Create a plot of nested sources of variation #' #' @description #' Creates a plot showing the breakdown of variation within a sample. This #' function uses [ggplot2] internally. #' #' @param dat a `data.frame` or similar object #' @param x the variable within `dat` to plot. Most often this would be a #' strength or modulus variable. #' @param groups a vector of variables to group the data by #' @param stat a function for computing the central location for each group. #' This is normally "mean" but could be "median" or another #' function. #' @param ... extra options. See Details. #' @param y_gap the vertical gap between grouping variables #' @param divider_color the color of the lines between grouping variables. #' Or `NULL` to omit these lines. #' @param point_args arguments to pass to [ggplot2::geom_point] when plotting #' individual data points. #' @param dline_args arguments to pass to [ggplot2::geom_segment] when plotting #' the horizontal lines between data points. #' @param vline_args arguments to pass to [ggplot2::geom_segment] when plotting #' vertical lines #' @param hline_args arguments to pass to [ggplot2::geom_segment] when plotting #' horizontal lines connecting levels in groups #' @param label_args arguments to pass to [ggplot2::geom_label] when plotting #' labels #' @param connector_args arguments to pass to [ggplot2::geom_point] when #' plotting the connection between the vertical lines #' and the horizontal lines connecting levels in groups #' #' @details #' Extra options can be included to control aesthetic options. The following #' options are supported. Any (or all) can be set to a single variable #' in the data set. #' #' - `color`: Controls the color of the data points. #' - `fill`: Controls the fill color of the labels. When a particular label #' is associated with data points with more than one level of the supplied #' variable, the fill is omitted. #' #' @examples #' library(dplyr) #' carbon.fabric.2 %>% #' filter(test == "WT" & condition == "RTD") %>% #' nested_data_plot(strength, #' groups = c(batch, panel)) #' #' # Labels can be filled too #' carbon.fabric.2 %>% #' filter(test == "WT" & condition == "RTD") %>% #' nested_data_plot(strength, #' groups = c(batch, panel), #' fill = batch) #' #' @importFrom rlang ensym #' @importFrom rlang quo_get_expr enquos #' @importFrom ggplot2 ggplot #' @importFrom dplyr ungroup #' @export nested_data_plot <- function(dat, x, groups = c(), stat = "mean", ..., y_gap = 1, divider_color = "grey50", point_args = list(), dline_args = list(), vline_args = list(), hline_args = list(), label_args = list(), connector_args = list()) { dat <- ungroup(dat) group_df <- select(dat, !!enquo(groups)) xlabel <- ensym(x) x <- select(dat, !!enquo(x))[[1]] en_dots <- enquos(..., .ignore_empty = "all") extras <- lapply(en_dots, function(ed) select(dat, !!quo_get_expr(ed))[[1]]) extras_names <- lapply(en_dots, function(ed) quo_get_expr(ed)) nesting <- build_nesting(x, group_df, 1, "Root", stat, "Root", extras) el <- build_drawing_element_list(nesting, group_df, y_gap) elm_list <- el$elm_list axis_info <- el$axis_info g <- ggplot() g <- set_axes(g, xlabel, axis_info, divider_color) g <- draw_vert_lines_to_labels(g, elm_list, vline_args) g <- draw_horiz_lines_to_labels(g, elm_list, hline_args) g <- draw_connectors(g, elm_list, connector_args) g <- draw_points(g, elm_list, point_args, dline_args, extras_names) g <- draw_labels(g, elm_list, label_args, extras_names) g } cmstatr/R/norm.R0000644000176200001440000003010314016050422013215 0ustar liggesusers #' Normalizes strength values to ply thickness #' #' @description #' This function takes a vector of strength values and a #' vector of measured thicknesses, and a nominal thickness #' and returns the normalized strength. #' #' @param strength the strength to be normalized. Either a vector or a numeric #' @param measured_thk the measured thickness of the samples. Must be the same #' length as strength #' @param nom_thk the nominal thickness. Must be a single numeric value. #' #' @return #' The normalized strength values #' #' @details #' It is often necessary to normalize strength values so that variation in #' specimen thickness does not unnecessarily increase variation in strength. #' See CMH-17-1G, or other references, for information about the cases where #' normalization is appropriate. #' #' Either cured ply thickness or laminate thickness may be used for #' `measured_thk` and `nom_thk`, as long as the same decision #' made for both values. #' #' The formula applied is: #' \deqn{normalized\,value = test\,value \frac{t_{measured}}{t_{nominal}}}{ #' normalized value = test value * t_measured / t_nominal} #' #' If you need to normalize based on fiber volume fraction (or another method), #' you will first need to calculate the nominal cured ply thickness (or laminate #' thickness). Those calculations are outside the scope of this documentation. #' #' @references #' “Composite Materials Handbook, Volume 1. Polymer Matrix Composites #' Guideline for Characterization of Structural Materials,” SAE International, #' CMH-17-1G, Mar. 2012. #' #' @examples #' library(dplyr) #' #' carbon.fabric.2 %>% #' select(thickness, strength) %>% #' mutate(normalized_strength = normalize_ply_thickness(strength, #' thickness, #' 0.105)) %>% #' head(10) #' #' ## thickness strength normalized_strength #' ## 1 0.112 142.817 152.3381 #' ## 2 0.113 135.901 146.2554 #' ## 3 0.113 132.511 142.6071 #' ## 4 0.112 135.586 144.6251 #' ## 5 0.113 125.145 134.6799 #' ## 6 0.113 135.203 145.5042 #' ## 7 0.113 128.547 138.3411 #' ## 8 0.113 127.709 137.4392 #' ## 9 0.113 127.074 136.7558 #' ## 10 0.114 126.879 137.7543 #' #' #' @importFrom dplyr select mutate #' #' @export normalize_ply_thickness <- function(strength, measured_thk, nom_thk) { if (length(strength) != length(measured_thk)) { stop("strength and measured_thk must be the same length") } if (length(nom_thk) != 1) { stop("nom_thk must be a single numeric value (not a vector)") } return(strength * measured_thk / nom_thk) } #' Normalize values to group means #' #' @description #' This function computes the mean of each group, then divides each #' observation by its corresponding group mean. This is commonly done #' when pooling data across environments. #' #' @param x the variable containing the data to normalized #' @param group the variable containing the groups #' #' @return #' Returns a vector of normalized values #' #' @details #' Computes the mean for each group, then divides each value by the mean for #' the corresponding group. #' #' @references #' “Composite Materials Handbook, Volume 1. Polymer Matrix Composites #' Guideline for Characterization of Structural Materials,” SAE International, #' CMH-17-1G, Mar. 2012. #' #' @examples #' library(dplyr) #' carbon.fabric.2 %>% #' filter(test == "WT") %>% #' select(condition, strength) %>% #' mutate(condition_norm = normalize_group_mean(strength, condition)) %>% #' head(10) #' #' ## condition strength condition_norm #' ## 1 CTD 142.817 1.0542187 #' ## 2 CTD 135.901 1.0031675 #' ## 3 CTD 132.511 0.9781438 #' ## 4 CTD 135.586 1.0008423 #' ## 5 CTD 125.145 0.9237709 #' ## 6 CTD 135.203 0.9980151 #' ## 7 CTD 128.547 0.9488832 #' ## 8 CTD 127.709 0.9426974 #' ## 9 CTD 127.074 0.9380101 #' ## 10 CTD 126.879 0.9365706 #' #' @importFrom rlang enquo eval_tidy #' #' @export normalize_group_mean <- function(x, group) { if (length(x) != length(group)) { stop("The length of x and groups must be equal") } if (length(x) == 0) { return(numeric(0)) } group_means <- vapply(group, function(g) { cur_group <- x[group == g] group_mean <- mean(cur_group) return(group_mean) }, USE.NAMES = FALSE, FUN.VALUE = numeric(1L)) return(x / group_means) } #' Calculate the modified CV from the CV #' #' @description #' This function calculates the modified coefficient of variation (CV) #' based on a (unmodified) CV. #' The modified CV is calculated based on the rules in CMH-17-1G. Those #' rules are: #' #' - For CV < 4\\%, CV* = 6\\% #' - For 4\\% <= CV < 8\\%, CV* = CV / 2 + 4\\% #' - For CV > 8\\%, CV* = CV #' #' @param cv The CV to modify #' #' @return #' The value of the modified CV #' #' @seealso #' [cv()] #' #' @references #' "Composite Materials Handbook, Volume 1. Polymer Matrix Composites #' Guideline for Characterization of Structural Materials," #' SAE International, CMH-17-1G, Mar. 2012. #' #' @examples #' # The modified CV for values of CV smaller than 4% is 6% #' calc_cv_star(0.01) #' ## [1] 0.06 #' #' # The modified CV for values of CV larger than 8% is unchanged #' calc_cv_star(0.09) #' ## [1] 0.09 #' #' @export calc_cv_star <- function(cv) { if (cv < 0.04) { cv <- 0.06 } else if (cv < 0.08) { cv <- cv / 2 + 0.04 } else { cv <- cv } return(cv) } #' Transforms data according to the modified CV rule #' #' @description #' Transforms data according to the modified coefficient of variation (CV) #' rule. This is used to add additional variance to datasets with #' unexpectedly low variance, which is sometimes encountered during #' testing of new materials over short periods of time. #' #' Two versions of this transformation are implemented. The first version, #' `transform_mod_cv()`, transforms the data in a single group (with #' no other structure) according to the modified CV rules. #' #' The second #' version, `transform_mod_cv_ad()`, transforms data that is structured #' according to both condition and batch, as is commonly done for #' the Anderson--Darling k-Sample and Anderson-Darling tests when pooling #' across environments. #' #' @details #' The modified CV transformation takes the general form: #' #' \deqn{\frac{S_i^*}{S_i} (x_{ij} - \bar{x_i}) + \bar{x_i}}{ #' Si*/Si (xij-x_bar_i) + x_bar_i #' } #' #' Where \eqn{S_i^*}{Si*} is the modified standard deviation #' (mod CV times mean) for #' the \eqn{ith} group; \eqn{S_i}{Si} is the standard deviation #' for the \eqn{ith} group, \eqn{\bar{x_i}}{x_bar_i} is #' the group mean and \eqn{x_{ij}}{xij} is the observation. #' #' `transform_mod_cv()` takes a vector #' containing the observations and transforms the data. #' The equation above is used, and all observations #' are considered to be from the same group. #' #' `transform_mod_cv_ad()` takes a vector containing the observations #' plus a vector containing the corresponding conditions and a vector #' containing the batches. This function first calculates the modified #' CV value from the data from each condition (independently). Then, #' within each condition, the transformation #' above is applied to produce the transformed data \eqn{x'}. #' This transformed data is further transformed using the following #' equation. #' #' \deqn{x_{ij}'' = C (x'_{ij} - \bar{x_i}) + \bar{x_i}}{ #' x_ij'' = C (x'_ij - x_bar_i) + x_bar_i} #' #' Where: #' #' \deqn{C = \sqrt{\frac{SSE^*}{SSE'}}}{C = sqrt(SSE* / SSE')} #' #' \deqn{SSE^* = (n-1) (CV^* \bar{x})^2 - \sum(n_i(\bar{x_i}-\bar{x})^2)}{ #' SSE* = (n-1) (CV* x_bar)^2 - sum(n_i(x_bar_i-x_bar)^2)} #' #' \deqn{SSE' = \sum(x'_{ij} - \bar{x_i})^2}{SSE' = sum(x'_ij - x_bar_i)^2} #' #' #' @param x a vector of data to transform #' @param condition a vector indicating the condition to which each #' observation belongs #' @param batch a vector indicating the batch to which each observation #' belongs #' #' @return #' A vector of transformed data #' #' @examples #' # Transform data according to the modified CV transformation #' # and report the original and modified CV for each condition #' #' library(dplyr) #' carbon.fabric %>% #' filter(test == "FT") %>% #' group_by(condition) %>% #' mutate(trans_strength = transform_mod_cv(strength)) %>% #' head(10) #' #' ## # A tibble: 10 x 6 #' ## # Groups: condition [1] #' ## id test condition batch strength trans_strength #' ## #' ## 1 FT-RTD-1-1 FT RTD 1 126. 126. #' ## 2 FT-RTD-1-2 FT RTD 1 139. 141. #' ## 3 FT-RTD-1-3 FT RTD 1 116. 115. #' ## 4 FT-RTD-1-4 FT RTD 1 132. 133. #' ## 5 FT-RTD-1-5 FT RTD 1 129. 129. #' ## 6 FT-RTD-1-6 FT RTD 1 130. 130. #' ## 7 FT-RTD-2-1 FT RTD 2 131. 131. #' ## 8 FT-RTD-2-2 FT RTD 2 124. 124. #' ## 9 FT-RTD-2-3 FT RTD 2 125. 125. #' ## 10 FT-RTD-2-4 FT RTD 2 120. 119. #' #' # The CV of this transformed data can be computed to verify #' # that the resulting CV follows the rules for modified CV #' #' carbon.fabric %>% #' filter(test == "FT") %>% #' group_by(condition) %>% #' mutate(trans_strength = transform_mod_cv(strength)) %>% #' summarize(cv = sd(strength) / mean(strength), #' mod_cv = sd(trans_strength) / mean(trans_strength)) #' #' ## # A tibble: 3 x 3 #' ## condition cv mod_cv #' ## #' ## 1 CTD 0.0423 0.0612 #' ## 2 ETW 0.0369 0.0600 #' ## 3 RTD 0.0621 0.0711 #' #' @seealso [calc_cv_star()] #' @seealso [cv()] #' #' @name transform_mod_cv NULL transform_mod_cv_2_within_condition <- function(x, batch, cv_star) { # nolint if (length(x) != length(batch)) { stop("x and batches must be the same length") } x_prime <- transform_mod_cv_grouped(x, batch) n <- length(x) x_bar <- mean(x) sse_prime <- sum(vapply(seq(along.with = x), function(i) { x_prime_i <- x_prime[i] x_bar_i <- mean(x[batch == batch[i]]) (x_prime_i - x_bar_i) ^ 2 }, FUN.VALUE = numeric(1L))) sse_star <- (n - 1) * (cv_star * x_bar) ^ 2 - sum(vapply(unique(batch), function(gi) { n_i <- sum(batch == gi) x_bar_i <- mean(x[batch == gi]) n_i * (x_bar_i - x_bar) ^ 2 }, FUN.VALUE = numeric(1L))) c_prime <- sqrt(sse_star / sse_prime) res <- vapply(seq(along.with = x), function(i) { x_bar_i <- mean(x[batch == batch[i]]) c_prime * (x_prime[i] - x_bar_i) + x_bar_i }, FUN.VALUE = numeric(1L)) res } #' @rdname transform_mod_cv #' @export transform_mod_cv_ad <- function(x, condition, batch) { if (is.null(batch)) { batch <- rep("A", length(x)) } if (is.null(condition)) { condition <- rep("A", length(x)) } if (length(x) != length(batch)) { stop("x and batches must be the same length") } if (length(x) != length(condition)) { stop("x and conditions must be the same length") } res <- numeric(0) for (ci in unique(condition)) { x_condition <- x[condition == ci] cv_star <- calc_cv_star(sd(x_condition) / mean(x_condition)) res[condition == ci] <- transform_mod_cv_2_within_condition( x_condition, batch[condition == ci], cv_star ) } res } transform_mod_cv_grouped <- function(x, group) { if (length(x) != length(group)) { stop("x and groups must be the same length") } res <- vapply(seq(along.with = x), function(i) { xi <- x[i] cur_group <- x[group == group[i]] s <- sd(cur_group) x_bar <- mean(cur_group) cv <- s / x_bar cv_star <- calc_cv_star(cv) s_star <- cv_star * x_bar s_star / s * (xi - x_bar) + x_bar }, FUN.VALUE = numeric(1L)) res } #' @rdname transform_mod_cv #' @export transform_mod_cv <- function(x) { group <- rep("A", length(x)) transform_mod_cv_grouped(x, group) } cmstatr/R/verifytidy.R0000644000176200001440000000155213614624411014456 0ustar liggesusers# This function attempts to catch common errors, such as the # user passing a vector into df and not specifying x (as would happen # if the user called a function from inside summarize without using # the named argument x). # # @param df a data.frame or NULL # @param x the vector # @param c the call # @param arg_name the name of the vector argument being targeted # #' @importFrom rlang enquo eval_tidy abort quo_text call_name #' @importFrom rlang call_args call_args_names verify_tidy_input <- function(df, x, c, arg_name) { if (is.vector(df)) { cname <- call_name(c) cargs <- call_args(c) cargnames <- call_args_names(c) abort( paste0( "Argument `", cargnames[[1]], "` should be either a data.frame or NULL.\n", " Did you mean: `", cname, "(", arg_name, " = ", quo_text(cargs[[1]]), ")` ?\n" ) ) } } cmstatr/R/adk.R0000644000176200001440000001433114477217317013031 0ustar liggesusers #' Anderson--Darling K-Sample Test #' #' @description #' This function performs an Anderson--Darling k-sample test. This is used to #' determine if several samples (groups) share a common (unspecified) #' distribution. #' #' @param data a data.frame #' @param x the variable in the data.frame on which to perform the #' Anderson--Darling k-Sample test (usually strength) #' @param groups a variable in the data.frame that defines the groups #' @param alpha the significance level (default 0.025) #' #' @return #' Returns an object of class `adk`. This object has the following fields: #' - `call` the expression used to call this function #' - `data` the original data used to compute the ADK #' - `groups` a vector of the groups used in the computation #' - `alpha` the value of alpha specified #' - `n` the total number of observations #' - `k` the number of groups #' - `sigma` the computed standard deviation of the test statistic #' - `ad` the value of the Anderson--Darling k-Sample test statistic #' - `p` the computed p-value #' - `reject_same_dist` a boolean value indicating whether the null #' hypothesis that all samples come from the same distribution is rejected #' - `raw` the original results returned from #' [ad.test][kSamples::ad.test] #' #' #' @details #' This function is a wrapper for the [ad.test][kSamples::ad.test] function from #' the package `kSamples`. The method "exact" is specified in the call to #' `ad.test`. Refer to that package's documentation for details. #' #' There is a minor difference in the formulation of the Anderson--Darling #' k-Sample test in CMH-17-1G, compared with that in the Scholz and #' Stephens (1987). This difference affects the test statistic and the #' critical value in the same proportion, and therefore the conclusion of #' the test is unaffected. When #' comparing the test statistic generated by this function to that generated #' by software that uses the CMH-17-1G formulation (such as ASAP, CMH17-STATS, #' etc.), the test statistic reported by this function will be greater by #' a factor of \eqn{(k - 1)}, with a corresponding change in the critical #' value. #' #' For more information about the difference between this function and #' the formulation in CMH-17-1G, see the vignette on the subject, which #' can be accessed by running `vignette("adktest")` #' #' @references #' F. W. Scholz and M. Stephens, “K-Sample Anderson--Darling Tests,” Journal #' of the American Statistical Association, vol. 82, no. 399. pp. 918–924, #' Sep-1987. #' #' “Composite Materials Handbook, Volume 1. Polymer Matrix Composites #' Guideline for Characterization of Structural Materials,” SAE International, #' CMH-17-1G, Mar. 2012. #' #' @examples #' library(dplyr) #' #' carbon.fabric %>% #' filter(test == "WT") %>% #' filter(condition == "RTD") %>% #' ad_ksample(strength, batch) #' ## #' ## Call: #' ## ad_ksample(data = ., x = strength, groups = batch) #' ## #' ## N = 18 k = 3 #' ## ADK = 0.912 p-value = 0.95989 #' ## Conclusion: Samples come from the same distribution ( alpha = 0.025 ) #' #' @importFrom rlang enquo eval_tidy #' @importFrom kSamples ad.test #' @export ad_ksample <- function(data = NULL, x, groups, alpha = 0.025) { res <- list() class(res) <- "adk" res$call <- match.call() verify_tidy_input( df = data, x = x, c = match.call(), arg_name = "x") res$data <- eval_tidy(enquo(x), data) verify_tidy_input( df = data, x = groups, c = match.call(), arg_name = "groups") res$groups <- eval_tidy(enquo(groups), data) if (length(res$data) != length(res$groups)) { stop("Error: `x` and `groups` must be of same length.") } res$alpha <- alpha td <- NULL res$transformed_data <- td grps <- lapply(levels(as.factor(res[["groups"]])), function(l) { res[["data"]][res[["groups"]] == l] } ) raw <- ad.test(grps, method = "exact") res$n <- raw$N res$k <- raw$k res$sigma <- raw$sig res$ad <- raw$ad[2, 1] res$p <- raw$ad[2, 3] res$reject_same_dist <- res$p < alpha res$raw <- raw return(res) } #' Glance at a `adk` (Anderson--Darling k-Sample) object #' #' @description #' Glance accepts an object of type `adk` and returns a #' [tibble::tibble()] with #' one row of summaries. #' #' Glance does not do any calculations: it just gathers the results in a #' tibble. #' #' @param x an `adk` object #' @param ... Additional arguments. Not used. Included only to match generic #' signature. #' #' #' @return #' A one-row [tibble::tibble()] with the following #' columns: #' #' - `alpha` the significance level for the test #' - `n` the sample size for the test #' - `k` the number of samples #' - `sigma` the computed standard deviation of the test statistic #' - `ad` the test statistic #' - `p` the p-value of the test #' - `reject_same_dist` whether the test concludes that the samples #' are drawn from different populations #' #' #' @seealso #' [ad_ksample()] #' #' @examples #' x <- c(rnorm(20, 100, 5), rnorm(20, 105, 6)) #' k <- c(rep(1, 20), rep(2, 20)) #' a <- ad_ksample(x = x, groups = k) #' glance(a) #' #' ## A tibble: 1 x 7 #' ## alpha n k sigma ad p reject_same_dist #' ## #' ## 1 0.025 40 2 0.727 4.37 0.00487 TRUE #' #' @method glance adk #' @importFrom tibble tibble #' #' @export glance.adk <- function(x, ...) { # nolint # nolint start: object_usage_linter with( x, tibble::tibble( alpha = alpha, n = n, k = k, sigma = sigma, ad = ad, p = p, reject_same_dist = reject_same_dist ) ) # nolint end } #' @export print.adk <- function(x, ...) { cat("\nCall:\n", paste(deparse(x$call), sep = "\n", collapse = "\n"), "\n\n", sep = "") justify <- c("left", "left") width <- c(16L, 16L) cat(format_row_equal(list("N", x$n, "k", x$k), justify, width, ...)) cat(format_row_equal(list("ADK", x$ad, "p-value", x$p), justify, width, ...)) if (x$reject_same_dist) { cat("Conclusion: Samples do not come from the same distribution (alpha =", x$alpha, ")\n\n") } else { cat("Conclusion: Samples come from the same distribution ( alpha =", x$alpha, ")\n\n") } } cmstatr/R/checks.R0000644000176200001440000000550114016050422013506 0ustar liggesusers #' Performs a list of checks according to specified rules #' #' @description #' Takes a named list of function (the rules). Each function should take #' the arguments it requires plus ... Each function should return TRUE #' if the rule is satisfied and FALSE otherwise. #' #' This function will produce warning(s) for each of the rules that fail. #' #' @param rules a named list of rules #' @param override a vector of names of rules to ignore #' @param ... arguments to pass to the rules #' #' @return #' a named vector of characters. "P" if the test passed, #' "F" if the test failed, "O" if the test was overridden, #' and NA if the test was not run #' due to a missing argument. #' #' @noRd #' #' @importFrom rlang list2 #' @importFrom rlang fn_fmls_names #' @importFrom rlang warn #' @importFrom rlang inform perform_checks <- function(rules, override = c(), ...) { args <- list2(...) vapply(names(rules), function(cur_rule_name) { if (!(cur_rule_name %in% override)) { cur_rule <- rules[[cur_rule_name]] all_formal_names <- fn_fmls_names(cur_rule) missing_formals <- vapply(all_formal_names, function(cur_formal_name) { is.null(args[[cur_formal_name]]) & cur_formal_name != "..." }, FUN.VALUE = logical(1L) ) if (!any(missing_formals)) { message <- tryCatch( do.call(cur_rule, args), error = function(e) { stop(paste0("During evaluation of `", cur_rule_name, "`: ", e)) }) if (message != "") { warn(paste0("`", cur_rule_name, "` failed: ", message)) return("F") } return("P") } else { inform( paste0("`", cur_rule_name, "` not run because parameter", ifelse(sum(missing_formals) > 1, "s", ""), " `", paste(all_formal_names[missing_formals], collapse = "`, `"), "` not specified") ) return(NA_character_) } } else { # in override list return("O") } }, FUN.VALUE = character(1L)) } #' Gets the names of the diagnostic tests that failed #' #' @param x a named character vector created by perform_checks #' #' @return #' A character vector of the tests that failed (if any) #' #' @noRd get_check_failure_names <- function(x) { names(x[x == "F" & !is.na(x)]) } process_overrides <- function(override, rules) { if ("all" %in% override) { # Remove the "all" value override <- override[!override %in% "all"] # Add all of the rules to the override vector override <- c(override, names(rules)) } # Keep only the unique values of override override <- unique(override) # Warn if there are invalid overrides for (ov in override) { if (!ov %in% names(rules)) { warn(paste0("`", ov, "` is not a valid diagnostic test to override")) } } override } cmstatr/R/equiv.R0000644000176200001440000010550414574234243013421 0ustar liggesusers #' Test for decrease in mean or minimum individual #' #' @description #' This test is used when determining if a new process or #' manufacturing location produces material properties that are #' "equivalent" to an existing dataset, and hence the existing #' basis values are applicable to the new dataset. This test is also #' sometimes used for determining if a new batch of material is acceptable. #' This function determines thresholds based on both minimum #' individual and mean, and optionally evaluates a sample against those #' thresholds. The joint distribution between the sample mean #' and sample minimum is used to generate these thresholds. #' When there is no true difference between the existing ("qualification") #' and the new population from which the sample is obtained, there is a #' probability of \eqn{\alpha} of falsely concluding that there is a #' difference in mean or variance. It is assumed that both the original #' and new populations are normally distributed. #' According to Vangel (2002), this test provides improved power compared #' with a test of mean and standard deviation. #' #' @param df_qual (optional) a data.frame containing the qualification data. #' Defaults to NULL. #' @param alpha the acceptable probability of a type I error #' @param data_sample (optional) a vector of observations from the sample for #' which equivalency is being tested. Defaults to NULL #' @param n_sample (optional) the number of observations in the sample for #' which equivalency will be tested. Defaults to NULL #' @param data_qual (optional) a vector of observations from the #' "qualification" data to which equivalency is being tested. Or the column of #' `df_qual` that contains this data. Defaults to NULL #' @param mean_qual (optional) the mean from the "qualification" data to which #' equivalency is being tested. Defaults to NULL #' @param sd_qual (optional) the standard deviation from the "qualification" #' data to which equivalency is being tested. Defaults to NULL #' @param modcv (optional) a boolean value indicating whether a modified CV #' should be used. Defaults to FALSE, in which case the standard deviation #' supplied (or calculated from `data_qual`) will be used directly. #' #' @return #' Returns an object of class `equiv_mean_extremum`. This object is a list #' with the following named elements: #' #' - `call` the expression used to call this function #' - `alpha` the value of alpha passed to this function #' - `n_sample` the number of observations in the sample for which #' equivalency is being checked. This is either the value `n_sample` #' passed to this function or the length of the vector `data_sample`. #' - `k1` the factor used to calculate the minimum individual #' threshold. The minimum individual threshold is calculated as #' \eqn{W_{min} = qual\,mean - k_1 \cdot qual\,sd}{ #' Wmin = qual_mean - k1 * qual_sd} #' - `k2` the factor used to calculate the threshold for mean. The #' threshold for mean is calculated as #' \eqn{W_{mean} = qual\,mean - k_2 \cdot qual\,sd}{ #' Wmean = qual_mean - k2 * qual_sd} #' - `modcv` logical value indicating whether the acceptance #' thresholds are calculated using the modified CV approach #' - `cv` the coefficient of variation of the qualification data. #' This value is not modified, even if `modcv=TRUE` #' - `cv_star` The modified coefficient of variation. If #' `modcv=FALSE`, this will be `NULL` #' - `threshold_min_indiv` The calculated threshold value for #' minimum individual #' - `threshold_mean` The calculated threshold value for mean #' - `result_min_indiv` a character vector of either "PASS" or #' "FAIL" indicating whether the data from `data_sample` passes the #' test for minimum individual. If `data_sample` was not supplied, #' this value will be `NULL` #' - `result_mean` a character vector of either "PASS" or #' "FAIL" indicating whether the data from `data_sample` passes the #' test for mean. If `data_sample` was not supplied, this value will #' be `NULL` #' - `min_sample` The minimum value from the vector #' `data_sample`. if `data_sample` was not supplied, this will #' have a value of `NULL` #' - `mean_sample` The mean value from the vector #' `data_sample`. If `data_sample` was not supplied, this will #' have a value of `NULL` #' #' @details #' This function is used to #' determine acceptance limits for a sample mean and sample minimum. #' These acceptance limits are often used to set acceptance limits for #' material strength for each lot of material, or each new manufacturing #' site. When a sample meets the criteria that its mean and its minimum are #' both greater than these limits, then one may accept the lot of material #' or the new manufacturing site. #' #' This procedure is used to ensure that the strength of material processed #' at a second site, or made with a new batch of material are not degraded #' relative to the data originally used to determine basis values for the #' material. For more information about the use of this procedure, see #' CMH-17-1G or PS-ACE 100-2002-006. #' #' There are several optional arguments to this function. However, you can't #' omit all of the optional arguments. You must supply either #' `data_sample` or `n_sample`, but not both. You must also supply #' either `data_qual` (and `df_qual` if `data_qual` is a #' variable name and not a vector) or both `mean_qual` and `sd_qual`, #' but if you supply `data_qual` (and possibly `df_qual`) you should #' not supply either `mean_qual` or `sd_qual` (and visa-versa). This #' function will issue a warning or error if you violate any of these rules. #' #' If `modcv` is TRUE, the standard deviation used to calculate the #' thresholds will be replaced with a standard deviation calculated #' using the Modified Coefficient of Variation (CV) approach. #' The Modified CV approach is a way of adding extra variance to the #' qualification data in the case that the qualification data has less #' variance than expected, which sometimes occurs when qualification testing #' is performed in a short period of time. #' Using the Modified CV approach, the standard deviation is calculated by #' multiplying `CV_star * mean_qual` where `mean_qual` is either the #' value supplied or the value calculated by `mean(data_qual)` and #' \eqn{CV*} is the value computed by [calc_cv_star()]. #' #' @examples #' equiv_mean_extremum(alpha = 0.01, n_sample = 6, #' mean_qual = 100, sd_qual = 5.5, modcv = TRUE) #' ## #' ## Call: #' ## equiv_mean_extremum(mean_qual = 100, sd_qual = 5.5, n_sample = 6, #' ## alpha = 0.01, modcv = TRUE) #' ## #' ## Modified CV used: CV* = 0.0675 ( CV = 0.055 ) #' ## #' ## For alpha = 0.01 and n = 6 #' ## ( k1 = 3.128346 and k2 = 1.044342 ) #' ## Min Individual Sample Mean #' ## Thresholds: 78.88367 92.95069 #' #' @seealso [k_equiv()] #' @seealso [calc_cv_star()] #' #' @references #' M. G. Vangel. Lot Acceptance and Compliance Testing Using the Sample Mean #' and an Extremum, Technometrics, vol. 44, no. 3. pp. 242–249. 2002. #' #' “Composite Materials Handbook, Volume 1. Polymer Matrix Composites #' Guideline for Characterization of Structural Materials,” SAE International, #' CMH-17-1G, Mar. 2012. #' #' Federal Aviation Administration, “Material Qualification and Equivalency #' for Polymer Matrix Composite Material Systems,” PS-ACE 100-2002-006, #' Sep. 2003. #' #' @importFrom rlang enquo eval_tidy #' #' @export equiv_mean_extremum <- function(df_qual = NULL, data_qual = NULL, mean_qual = NULL, sd_qual = NULL, data_sample = NULL, n_sample = NULL, alpha, modcv = FALSE) { res <- list() class(res) <- "equiv_mean_extremum" res$call <- match.call() if (alpha <= 0) { stop("alpha must be positive") } if (alpha >= 1) { stop("alpha must be less than 1") } if (!is.null(df_qual)) { data_qual_enq <- enquo(data_qual) data_qual <- eval_tidy(data_qual_enq, df_qual) } if (!is.null(data_sample)) { if (!is.null(n_sample)) { warning("Both data_sample and n_sample were supplied. n_sample ignored.") } n_sample <- length(data_sample) } if (!is.null(data_qual)) { if (!is.null(mean_qual)) { warning("Both data_qual and mean_qual were supplied. mean_qual ignored.") } mean_qual <- mean(data_qual) if (!is.null(sd_qual)) { warning("Both data_qual and sd_qual were supplied. sd_qual ignored.") } sd_qual <- stats::sd(data_qual) } if (is.null(n_sample)) { stop(paste("Number of observations in sample not defined.", "You must provide either data_sample or n_sample")) } if (is.null(mean_qual)) { stop(paste("Qualification mean not defined.", "You must provide either data_qual or mean_qual")) } if (is.null(sd_qual)) { stop(paste("Qualification SD not defined.", "You must provide either data_qual or sd_qual")) } k <- k_equiv(alpha = alpha, n = n_sample) res$alpha <- alpha res$n_sample <- n_sample res$k1 <- k[1] res$k2 <- k[2] cv <- sd_qual / mean_qual res$modcv <- modcv res$cv <- cv res$cv_star <- NULL if (modcv) { cv <- calc_cv_star(cv) res$cv_star <- cv sd_qual <- cv * mean_qual } res$threshold_min_indiv <- mean_qual - sd_qual * res$k1 res$threshold_mean <- mean_qual - sd_qual * res$k2 res$result_min_indiv <- NULL res$result_mean <- NULL res$min_sample <- NULL res$mean_sample <- NULL if (!is.null(data_sample)) { res$min_sample <- min(data_sample) res$mean_sample <- mean(data_sample) res$result_min_indiv <- ifelse(res$min_sample >= res$threshold_min_indiv, "PASS", "FAIL") res$result_mean <- ifelse(res$mean_sample >= res$threshold_mean, "PASS", "FAIL") } return(res) } #' Glance at an `equiv_mean_extremum` object #' #' @description #' Glance accepts an object of type `equiv_mean_extremum` and returns a #' [tibble::tibble()] with #' one row of summaries. #' #' Glance does not do any calculations: it just gathers the results in a #' tibble. #' #' @param x an equiv_mean_extremum object returned from #' [equiv_mean_extremum()] #' @param ... Additional arguments. Not used. Included only to match generic #' signature. #' #' #' @return #' A one-row [tibble::tibble()] with the following #' columns: #' #' - `alpha` the value of alpha passed to this function #' - `n_sample` the number of observations in the sample for which #' equivalency is being checked. This is either the value `n_sample` #' passed to this function or the length of the vector `data_sample`. #' - `modcv` logical value indicating whether the acceptance #' thresholds are calculated using the modified CV approach #' - `threshold_min_indiv` The calculated threshold value for #' minimum individual #' - `threshold_mean` The calculated threshold value for mean #' - `result_min_indiv` a character vector of either "PASS" or #' "FAIL" indicating whether the data from `data_sample` passes the #' test for minimum individual. If `data_sample` was not supplied, #' this value will be `NULL` #' - `result_mean` a character vector of either "PASS" or #' "FAIL" indicating whether the data from `data_sample` passes the #' test for mean. If `data_sample` was not supplied, this value will #' be `NULL` #' - `min_sample` The minimum value from the vector #' `data_sample`. if `data_sample` was not supplied, this will #' have a value of `NULL` #' - `mean_sample` The mean value from the vector #' `data_sample`. If `data_sample` was not supplied, this will #' have a value of `NULL` #' #' #' @seealso #' [equiv_mean_extremum()] #' #' @examples #' x0 <- rnorm(30, 100, 4) #' x1 <- rnorm(5, 91, 7) #' eq <- equiv_mean_extremum(data_qual = x0, data_sample = x1, alpha = 0.01) #' glance(eq) #' #' ## # A tibble: 1 x 9 #' ## alpha n_sample modcv threshold_min_indiv threshold_mean #' ## #' ## 1 0.01 5 FALSE 86.2 94.9 #' ## # ... with 4 more variables: result_min_indiv , result_mean , #' ## # min_sample , mean_sample #' #' @method glance equiv_mean_extremum #' @importFrom tibble tibble #' #' @export glance.equiv_mean_extremum <- function(x, ...) { # nolint res <- tibble::tibble( alpha = x[["alpha"]], n_sample = x[["n_sample"]], modcv = x[["modcv"]], threshold_min_indiv = x[["threshold_min_indiv"]], threshold_mean = x[["threshold_mean"]] ) if (!is.null(x[["result_min_indiv"]])) { res[["result_min_indiv"]] <- x[["result_min_indiv"]] } if (!is.null(x[["result_mean"]])) { res[["result_mean"]] <- x[["result_mean"]] } if (!is.null(x[["min_sample"]])) { res[["min_sample"]] <- x[["min_sample"]] } if (!is.null(x[["mean_sample"]])) { res[["mean_sample"]] <- x[["mean_sample"]] } res } #' @export print.equiv_mean_extremum <- function(x, ...) { cat("\nCall:\n", paste(deparse(x$call), sep = "\n", collapse = "\n"), "\n\n", sep = "") justify <- c("right", "centre", "centre") col_width <- c(16L, 16L, 16L) if (x$modcv) { cat("Modified CV used: CV* =", format(x$cv_star, ...), "( CV =", format(x$cv, ...), ")\n\n") } cat("For alpha =", format(x$alpha, ...), "and n =", format(x$n_sample, ...), "\n( k1 =", format(x$k1, ...), "and k2 =", format(x$k2, ...), ")\n") cat(format_row(list("", "Min Individual", "Sample Mean"), justify, col_width, ...)) if (!is.null(x$min_sample)) { cat(format_row(list("Sample:", x$min_sample, x$mean_sample), justify, col_width, ...)) } cat(format_row(list("Thresholds:", x$threshold_min_indiv, x$threshold_mean), justify, col_width, ...)) if (!is.null(x$result_min_indiv)) { cat(format_row(list("Equivalency:", x$result_min_indiv, x$result_mean), justify, col_width, ...)) } } #' k-factors for determining acceptance based on sample mean and an extremum #' #' @param alpha the acceptable probability of a type I error #' @param n the number of observations in the sample to test #' @return a vector with elements c(k1, k2). k1 is for testing the sample #' extremum. k2 is for testing the sample mean #' @details #' The k-factors returned by this function are used for determining #' whether to accept a new dataset. #' #' This function is used as part of the procedure for #' determining acceptance limits for a sample mean and sample minimum. #' These acceptance limits are often used to set acceptance limits for #' material strength for each lot of material, or each new manufacturing #' site. When a sample meets the criteria that its mean and its minimum are #' both greater than these limits, then one may accept the lot of material #' or the new manufacturing site. #' #' This procedure is used to ensure that the strength of material processed #' at a second site, or made with a new batch of material are not degraded #' relative to the data originally used to determine basis values for the #' material. For more information about the use of this procedure, see #' CMH-17-1G or PS-ACE 100-2002-006. #' #' According to Vangel (2002), the use of mean and extremum for this purpose #' is more powerful than the use of mean and standard deviation. #' #' The results of this function match those published by Vangel within #' 0.05\% for \eqn{n > 2} and \eqn{\alpha \le 0.25}. Those results published #' by Vangel are identical to those published in CMH-17-1G. #' #' This function uses numerical integration and numerical optimization to #' find values of the factors \eqn{k_1} and \eqn{k_2} based on Vangel's #' saddle point approximation. #' #' The value \eqn{n} refers to the number of observations in the sample #' being compared with the original population (the qualification sample is #' usually assumed to be equal to the population statistics). #' #' The value of \eqn{alpha} is the acceptable probability of a type I error. #' Normally, this is set to 0.05 for material or process equivalency and 0.01 #' when setting lot acceptance limits. Though, in principle, this parameter #' can be set to any number between 0 and 1. This function, however, has only #' been validated in the range of \eqn{1e-5 \le alpha \le 0.5}. #' #' @references #' M. G. Vangel. Lot Acceptance and Compliance Testing Using the Sample Mean #' and an Extremum, Technometrics, vol. 44, no. 3. pp. 242–249. 2002. #' #' “Composite Materials Handbook, Volume 1. Polymer Matrix Composites #' Guideline for Characterization of Structural Materials,” SAE International, #' CMH-17-1G, Mar. 2012. #' #' Federal Aviation Administration, “Material Qualification and Equivalency #' for Polymer Matrix Composite Material Systems,” PS-ACE 100-2002-006, #' Sep. 2003. #' #' @seealso #' [equiv_mean_extremum()] #' #' @examples #' qual_mean <- 100 #' qual_sd <- 3.5 #' k <- k_equiv(0.01, 5) #' print("Minimum Individual Acceptance Limit:") #' print(qual_mean - qual_sd * k[1]) #' print("Minimum Average Acceptance Limit:") #' print(qual_mean - qual_sd * k[2]) #' #' ## [1] "Minimum Individual Acceptance Limit:" #' ## [1] 89.24981 #' ## [1] "Minimum Average Acceptance Limit:" #' ## [1] 96.00123 #' #' @export k_equiv <- function(alpha, n) { # If you are making changes to this function, you should run the full # validation test in test/test-equiv.R by setting full_test = TRUE. # The full validation test is not run by default as it takes several # minutes to run, which is impractical for normal test-driven-development # workflows. if (alpha <= 0) { stop("alpha must be positive") } if (alpha >= 1) { stop("alpha must be less than 1") } if (alpha < 1e-5 || alpha > 0.5) { warning(paste("k-factor solution has only been validated", "for 1e-5 <= alpha <= 0.5")) } if (n < 2) { stop("n must be greater than 2") } # In order to speed up convergence, a linear model was fit to estimate the # values of k1 and k2. These estimated values are used as a starting point # for the numerical optimization theta <- c(0.318 * log(n) - 0.285 * log(alpha) + 1.114, -0.16751 * log(n) - 0.29011 * log(alpha) + 0.08414 * log(n) * log(alpha) + 0.65848) # The function f returns a penalty value for use in numerical optimization. # The first parameter, k, should be a vector of c(k1, k2). f <- function(k, n, alpha) { fx1 <- 1 - (stats::pnorm(-k[1], lower.tail = FALSE)) ** n fxbar <- stats::pnorm(sqrt(n) * (-k[2])) logh <- function(t) { stats::dnorm(t, log = TRUE) - stats::pnorm(t, lower.tail = FALSE, log.p = TRUE) } h <- function(t) { exp(stats::dnorm(t, log = TRUE) - stats::pnorm(t, lower.tail = FALSE, log.p = TRUE)) } h_minus_t <- function(t) { ifelse(t > 60, t ** -1, h(t) - t) } lambda_hat <- stats::uniroot( function(lambda) { ((n - 1) / n) * h_minus_t(lambda) - k[1] + k[2] }, interval = c(-1000, 1000), extendInt = "yes" )$root a_fcn <- function(t, n) { hmt <- h_minus_t(t) exp( - (n - 1) * logh(t) + (n - 1) ** 2 / (2 * n) * hmt ** 2 + (n - 1) * t * hmt ) * sqrt(ifelse(t > 60, t ** -2, 1 - h(t) * hmt)) } fx1xbar_numerator <- stats::pnorm(sqrt(n) * (-k[2])) * stats::integrate( function(t) { a_fcn(t, n) }, lower = -Inf, upper = lambda_hat, subdivisions = 1000L )$value + stats::integrate( function(t) { stats::pnorm( sqrt(n) * (-k[1] + (n - 1) / n * h_minus_t(t)) ) * a_fcn(t, n) }, lower = lambda_hat, upper = Inf, subdivisions = 1000L, rel.tol = 1e-8 )$value fx1xbar_denominator <- stats::integrate(function(t) a_fcn(t, n), lower = -Inf, upper = Inf)$value fx1xbar <- fx1xbar_numerator / fx1xbar_denominator # Use the sum of the absolute values of the two functions being solved as # the penalty function. However, it was found empirically that the first # function listed is more sensitive, so give it a higher weight to aid in # finding hte correct solution return( abs(fx1 + fxbar - fx1xbar - alpha) * 100 + abs(fx1 - fxbar) ) } # Perform the constrained numeric optimization to find the values of k1 and # k2. The constrait enforces k1 > k2, which is required such that all values # are real res <- stats::constrOptim( theta, f, grad = NULL, ui = c(1, -1), ci = 0, n = n, alpha = alpha ) # nocov start if (res$convergence != 0) { warning( "k-factor search did not converge. The results are unreliable." ) } # nocov end return(res$par) } #' Equivalency based on change in mean value #' #' @description #' Checks for change in the mean value between a qualification data set and #' a sample. This is normally used to check for properties such as modulus. #' This function is a wrapper for a two-sample t--test. #' #' @param df_qual (optional) a data.frame containing the qualification data. #' Defaults to NULL. #' @param alpha the acceptable probability of a Type I error #' @param data_sample a vector of observations from the sample being compared #' for equivalency #' @param n_sample the number of observations in the sample being compared for #' equivalency #' @param mean_sample the mean of the sample being compared for equivalency #' @param sd_sample the standard deviation of the sample being compared for #' equivalency #' @param data_qual (optional) a vector of observations from the #' "qualification" data to which equivalency is being tested. Or the column of #' `df_qual` that contains this data. Defaults to NULL #' @param n_qual the number of observations in the qualification data to which #' the sample is being compared for equivalency #' @param mean_qual the mean from the qualification data to which the sample #' is being compared for equivalency #' @param sd_qual the standard deviation from the qualification data to which #' the sample is being compared for equivalency #' @param modcv a logical value indicating whether the modified CV approach #' should be used. Defaults to `FALSE` #' #' @return #' - `call` the expression used to call this function #' - `alpha` the value of alpha passed to this function #' - `n_sample` the number of observations in the sample for which #' equivalency is being checked. This is either the value `n_sample` #' passed to this function or the length of the vector `data_sample`. #' - `mean_sample` the mean of the observations in the sample for #' which equivalency is being checked. This is either the value #' `mean_sample` passed to this function or the mean of the vector #' `data-sample`. #' - `sd_sample` the standard deviation of the observations in the #' sample for which equivalency is being checked. This is either the value #' `mean_sample` passed to this function or the standard deviation of #' the vector `data-sample`. #' - `n_qual` the number of observations in the qualification data #' to which the sample is being compared for equivalency. This is either #' the value `n_qual` passed to this function or the length of the #' vector `data_qual`. #' - `mean_qual` the mean of the qualification data to which the #' sample is being compared for equivalency. This is either the value #' `mean_qual` passed to this function or the mean of the vector #' `data_qual`. #' - `sd_qual` the standard deviation of the qualification data to #' which the sample is being compared for equivalency. This is either the #' value `mean_qual` passed to this function or the standard deviation #' of the vector `data_qual`. #' - `modcv` logical value indicating whether the equivalency #' calculations were performed using the modified CV approach #' - `sp` the value of the pooled standard deviation. If #' `modecv = TRUE`, this pooled standard deviation includes the #' modification to the qualification CV. #' - `t0` the test statistic #' - `t_req` the t-value for \eqn{\alpha / 2} and #' \eqn{df = n1 + n2 -2} #' - `threshold` a vector with two elements corresponding to the #' minimum and maximum values of the sample mean that would result in a #' pass #' - `result` a character vector of either "PASS" or "FAIL" #' indicating the result of the test for change in mean #' #' @details #' There are several optional arguments to this function. Either (but not both) #' `data_sample` or all of `n_sample`, `mean_sample` and #' `sd_sample` must be supplied. And, either (but not both) #' `data_qual` #' (and also `df_qual` if `data_qual` is a column name and not a #' vector) or all of `n_qual`, `mean_qual` and `sd_qual` must #' be supplied. If these requirements are violated, warning(s) or error(s) will #' be issued. #' #' This function uses a two-sample t-test to determine if there is a difference #' in the mean value of the qualification data and the sample. A pooled #' standard deviation is used in the t-test. The procedure is per CMH-17-1G. #' #' If `modcv` is TRUE, the standard deviation used to calculate the #' thresholds will be replaced with a standard deviation calculated #' using the Modified Coefficient of Variation (CV) approach. #' The Modified CV approach is a way of adding extra variance to the #' qualification data in the case that the qualification data has less #' variance than expected, which sometimes occurs when qualification testing #' is performed in a short period of time. #' Using the Modified CV approach, the standard deviation is calculated by #' multiplying `CV_star * mean_qual` where `mean_qual` is either the #' value supplied or the value calculated by `mean(data_qual)` and #' \eqn{CV*} is determined using [calc_cv_star()]. #' #' Note that the modified CV option should only be used if that data passes the #' Anderson--Darling test. #' #' @examples #' equiv_change_mean(alpha = 0.05, n_sample = 9, mean_sample = 9.02, #' sd_sample = 0.15785, n_qual = 28, mean_qual = 9.24, #' sd_qual = 0.162, modcv = TRUE) #' #' ## Call: #' ## equiv_change_mean(n_qual = 28, mean_qual = 9.24, sd_qual = 0.162, #' ## n_sample = 9, mean_sample = 9.02, sd_sample = 0.15785, #' ## alpha = 0.05,modcv = TRUE) #' ## #' ## For alpha = 0.05 #' ## Modified CV used #' ## Qualification Sample #' ## Number 28 9 #' ## Mean 9.24 9.02 #' ## SD 0.162 0.15785 #' ## Result PASS #' ## Passing Range 8.856695 to 9.623305 #' #' @references #' “Composite Materials Handbook, Volume 1. Polymer Matrix Composites #' Guideline for Characterization of Structural Materials,” SAE International, #' CMH-17-1G, Mar. 2012. #' #' @seealso [calc_cv_star()] #' @seealso [stats::t.test()] #' #' @export equiv_change_mean <- function(df_qual = NULL, data_qual = NULL, n_qual = NULL, mean_qual = NULL, sd_qual = NULL, data_sample = NULL, n_sample = NULL, mean_sample = NULL, sd_sample = NULL, alpha, modcv = FALSE) { if (alpha <= 0 || alpha >= 1) { stop("alpha must be positive and less than 1") } if (!is.null(df_qual)) { data_qual_enq <- enquo(data_qual) data_qual <- eval_tidy(data_qual_enq, df_qual) } if (!is.null(data_sample)) { if (!is.null(n_sample)) { warning("Both data_sample and n_sample supplied. n_sample ignored.") } n_sample <- length(data_sample) if (!is.null(mean_sample)) { warning("Both data_sample and mean_sample supplied. mean_sample ignored") } mean_sample <- mean(data_sample) if (!is.null(sd_sample)) { warning("Both data_sample and sd_sample supplied. sd_sample ignored") } sd_sample <- stats::sd(data_sample) } if (!is.null(data_qual)) { if (!is.null(n_qual)) { warning("Both data_qual and n_qual supplied. n_qual ignored.") } n_qual <- length(data_qual) if (!is.null(mean_qual)) { warning("Both data_qual and mean_qual supplied. mean_qual ignored") } mean_qual <- mean(data_qual) if (!is.null(sd_qual)) { warning("Both data_qual and sd_qual supplied. sd_qual ignored") } sd_qual <- stats::sd(data_qual) } verify_equiv_change_mean_var(n_sample, mean_sample, sd_sample, n_qual, mean_qual, sd_qual) res <- list() class(res) <- "equiv_change_mean" res$call <- match.call() res$alpha <- alpha res$n_sample <- n_sample res$mean_sample <- mean_sample res$sd_sample <- sd_sample res$n_qual <- n_qual res$mean_qual <- mean_qual res$sd_qual <- sd_qual res$modcv <- modcv cv <- sd_qual / mean_qual res$modcv <- modcv if (modcv) { cv <- calc_cv_star(cv) sd_qual <- cv * mean_qual } sp <- sqrt( ((n_qual - 1) * sd_qual ** 2 + (n_sample - 1) * sd_sample ** 2) / (n_qual + n_sample - 2) ) res$sp <- sp t0 <- (mean_sample - mean_qual) / (sp * sqrt(1 / n_qual + 1 / n_sample)) res$t0 <- t0 t_req <- stats::qt(alpha / 2, n_qual + n_sample - 2, lower.tail = FALSE) res$t_req <- t_req res$threshold <- c( mean_qual - t_req * sp * sqrt(1 / n_qual + 1 / n_sample), mean_qual + t_req * sp * sqrt(1 / n_qual + 1 / n_sample) ) res$result <- ifelse(-t_req <= t0 & t0 <= t_req, "PASS", "FAIL") return(res) } verify_equiv_change_mean_var <- function(n_sample, mean_sample, sd_sample, n_qual, mean_qual, sd_qual) { if (is.null(n_sample)) { stop("n_sample not set") } if (is.null(mean_sample)) { stop("mean_sample not set") } if (is.null(sd_sample)) { stop("sd_sample not set") } if (is.null(n_qual)) { stop("n_qual not set") } if (is.null(mean_qual)) { stop("mean_qual not set") } if (is.null(sd_qual)) { stop("sd_qual not set") } } #' Glance at a `equiv_change_mean` object #' #' @description #' Glance accepts an object of type `equiv_change_mean` #' and returns a [tibble::tibble()] with #' one row of summaries. #' #' Glance does not do any calculations: it just gathers the results in a #' tibble. #' #' @param x a `equiv_change_mean` object returned from #' [equiv_change_mean()] #' @param ... Additional arguments. Not used. Included only to match generic #' signature. #' #' #' @return #' A one-row [tibble::tibble()] with the following #' columns: #' #' - `alpha` the value of alpha passed to this function #' - `n_sample` the number of observations in the sample for which #' equivalency is being checked. This is either the value `n_sample` #' passed to this function or the length of the vector `data_sample`. #' - `mean_sample` the mean of the observations in the sample for #' which equivalency is being checked. This is either the value #' `mean_sample` passed to this function or the mean of the vector #' `data-sample`. #' - `sd_sample` the standard deviation of the observations in the #' sample for which equivalency is being checked. This is either the value #' `mean_sample` passed to this function or the standard deviation of #' the vector `data-sample`. #' - `n_qual` the number of observations in the qualification data #' to which the sample is being compared for equivalency. This is either #' the value `n_qual` passed to this function or the length of the #' vector `data_qual`. #' - `mean_qual` the mean of the qualification data to which the #' sample is being compared for equivalency. This is either the value #' `mean_qual` passed to this function or the mean of the vector #' `data_qual`. #' - `sd_qual` the standard deviation of the qualification data to #' which the sample is being compared for equivalency. This is either the #' value `mean_qual` passed to this function or the standard deviation #' of the vector `data_qual`. #' - `modcv` logical value indicating whether the equivalency #' calculations were performed using the modified CV approach #' - `sp` the value of the pooled standard deviation. If #' `modecv = TRUE`, this pooled standard deviation includes the #' modification to the qualification CV. #' - `t0` the test statistic #' - `t_req` the t-value for \eqn{\alpha / 2} and #' \eqn{df = n1 + n2 -2} #' - `threshold_min` the minimum value of the sample mean that would #' result in a pass #' - `threshold_max` the maximum value of the sample mean that would #' result in a pass #' - `result` a character vector of either "PASS" or "FAIL" #' indicating the result of the test for change in mean #' #' #' @seealso #' [equiv_change_mean()] #' #' @examples #' x0 <- rnorm(30, 100, 4) #' x1 <- rnorm(5, 91, 7) #' eq <- equiv_change_mean(data_qual = x0, data_sample = x1, alpha = 0.01) #' glance(eq) #' #' ## # A tibble: 1 x 14 #' ## alpha n_sample mean_sample sd_sample n_qual mean_qual sd_qual modcv #' ## #' ## 1 0.01 5 85.8 9.93 30 100. 3.90 FALSE #' ## # ... with 6 more variables: sp , t0 , t_req , #' ## # threshold_min , threshold_max , result #' #' @method glance equiv_change_mean #' @importFrom tibble tibble #' #' @export glance.equiv_change_mean <- function(x, ...) { # nolint # nolint start: object_usage_linter with( x, tibble::tibble( alpha = alpha, n_sample = n_sample, mean_sample = mean_sample, sd_sample = sd_sample, n_qual = n_qual, mean_qual = mean_qual, sd_qual = sd_qual, modcv = modcv, sp = sp, t0 = t0, t_req = t_req, threshold_min = threshold[1], threshold_max = threshold[2], result = result ) ) # nolint end } #' @export print.equiv_change_mean <- function(x, ...) { cat("\nCall:\n", paste(deparse(x$call), sep = "\n", collapse = "\n"), "\n\n", sep = "") cat("For alpha =", format(x$alpha, ...), "\n") if (x$modcv) { cat("Modified CV used\n") } justify3 <- c("right", "centre", "centre") width3 <- c(16L, 16L, 16L) justify2 <- c("right", "centre") width2 <- c(16L, 32L) cat(format_row(list("", "Qualification", "Sample"), justify3, width3, ...)) cat(format_row(list("Number", x$n_qual, x$n_sample), justify3, width3, ...)) cat(format_row(list("Mean", x$mean_qual, x$mean_sample), justify3, width3, ...)) cat(format_row(list("SD", x$sd_qual, x$sd_sample), justify3, width3, ...)) cat(format_row(list("Pooled SD", x$sp), justify2, width2, ...)) cat(format_row(list("t0", x$t0), justify2, width2, ...)) cat(format_row(list("Result", x$result), justify2, width2, ...)) cat(format_row( list("Passing Range", paste0(format(x$threshold[1], ...), " to ", format(x$threshold[2], ...))), justify2, width2, ...) ) } cmstatr/NEWS.md0000644000176200001440000001200414574467253013043 0ustar liggesusers# Version 0.9.3 - Updated `basis_anova` function to prevent division by zero when MSE is 0 - Update `equiv_change_mean.print` method so that t-statistic and pooled SD are displayed. # Version 0.9.2 - Update to Anderson Darling k-Sample vignette to explain differences with SciPy implementation - Updated README - Update `plot_nested` to use `linewidth` instead of `size` internally due to update to `ggplot2` # Version 0.9.1 - Updated tests to accommodate upcoming changes to the rlang package. No change to test coverage was made. # Version 0.9.0 - Added the vignette `cmstatr_Validation` - Updated the expected value of the order statistic of a normally distributed variable in the implementation of `hk_ext_z_j_opt`. This affects the Basis values computed by `basis_hk_ext` when `method="optimum-order"`. Both the new and old implementations appear to perform equally well. See the vignette `hk_ext` for more information. - Added the function `nested_data_plot` for producing nested data plots. - Added the vignette `hk_ext` - Updated the vignette `cmstatr_Graphing` to show some examples of the use of `nested_data_plot`. - Added the additional column `batch` to the `carbon.data.2` example data set. - In `k_factor_normal`, suppress warnings emitted by `qt` when the non-central parameter is large. - Updated the test to use `testthat` edition 3. # Version 0.8.0 - Updated `basis_anova` so that in cases where the between-batch variance is small compared with the within-batch variance, a tolerance factor that doesn't consider the structure of the data is used. This matches the recommendation of Vangel (1992). - Added the alias `override="all"` to allow overriding all applicable diagnostic tests that are automatically run by the `basis_...` functions. - Improved documentation of diagnostic tests - Added `na.rm` argument to `cv` with identical behavior to the `na.rm` argument of `mean` and `sd`. - Fixed bug causing `maximum_normed_residual` to fail with small data sets where all but two observations would be considered outliers. - When diagnostic tests produce an error (when automatically run by the `basis_...` functions), the error message now identifies which test produced the error. # Version 0.7.1 - Fixed bug in `glance.equiv_mean_extremum` where it would include empty values when a sample was not specified. - Moved `dplyr` from Suggests to Depends. It is expected that nearly all users will use this package in their workflow, and a future version of `cmstatr` will also rely on functionality from `dplyr`. - Changed tests and vignettes such that tests and vignette code is not re-run when the necessary packages are not available. Test coverage and re-building of vignettes is unchanged when all packages in Depends and Suggests are available. # Version 0.7.0 - Added optional argument to `glance.basis` to add diagnostic test results to resulting `data.frame` # Version 0.6.0 - Improved the documentation for several functions - Made minor formatting changes to the `print` methods for: - `ad_ksample` - `anderson_darling` - `basis` - `equiv_mean_extremum` - `equiv_chage_mean` - `levene_test` - `maximum_normed_residual` - Added `alpha` into the `mnr` object, and updated `print` and `glance` methods to show the value of `alpha` specified by the user # Version 0.5.2 - Internally use `vapply` instead of `sapply` to improve code safety - Increased coverage of unit tests # Version 0.5.1 - Fixed the title of the graphing vignette # Version 0.5.0 - Renamed `transform_mod_cv_2` to `transform_mod_cv_ad` to better describe the purpose of this function. - Removed the optional argument from `transform_mod_cv`. Now if several groups are to be transformed separately, this needs to be done explicitly using `dplyr::group_by` or a similar strategy. - Fixed bug related to the automated diagnostic tests of pooled basis methods when `modcv = TRUE`. Previously, the diagnostic tests were performed with the unmodified data. After this bug fix, the the data after the modified CV transform is used for the diagnostic tests. - Added `stat` extensions to `ggplot2`: - `stat_normal_surv_func` to plot a normal survival function based on the data given - `stat_esf` to plot an empirical survival function - Updated cmstatr_Tutorial vignette - Created cmstatr_Graphing vignette - Various documentation improvements # Version 0.4.0 - Added automated diagnostic tests to basis_... methods - Updated argument names for functions: - `transform_mod_cv` - `transform_mod_cv_2` - `normalize_group_mean` - Updated cmstatr_Tutorial vignette # Version 0.3.0 - Added modified CV functionality - Added glance and augment methods for most objects - Added function for calculating CV of a sample - Breaking changes: - Renamed function `basis_nonparametric_large_sample` to `basis_nonpara_large_sample` - Renamed function `nonparametric_binomial_rank` to `nonpara_binomial_rank` # Version 0.2.0 - Added ANOVA basis calculation - Added non-parametric basis calculations # Version 0.1.0 - Initial release cmstatr/MD50000644000176200001440000001464014574604752012262 0ustar liggesusers4e9fde2c9262956b92ddc5573d795ca1 *DESCRIPTION 91e4cbca9cccab7bea28842379ea9b16 *NAMESPACE d20a452ddd1b96f6f9cad6811ad31b58 *NEWS.md d8f641501b102944b855e4713b80e6b0 *R/adk.R 3508411c954ec7e8b21e57bab4c4a600 *R/adtest.R 7adfef72bb7bdf155596b780ed514ece *R/basis.R 9bf1c0c5e9127e54e64e58311f223ba9 *R/checks.R b976e6ce69f516e2093471f98fb3ed48 *R/cmstatr.R 6ac620b57464d57287ac98d4c6a54379 *R/cv.R 6dc57f4dec035b269a1f30ff3249d6a2 *R/data.R ed592e5e620b231a4ba20dcd13dceac8 *R/equiv.R 48581645f2cfe993f8b5c9b781b00173 *R/generics.R 9501d10a9634c9ccaa61cf4f71e1ca8f *R/levene.R 06cdc4e6af033a8eca6a1aabe57a9f5d *R/misc.R 29ce81cdb21a558feeaa5038f1dd7624 *R/mnr.R 394036a1094c007238c814cb9ef1a973 *R/norm.R 03d649e5d4c1db4b95d83ac8134dfe4a *R/plot-nested.R 5071a0853476bcce48729e130342a8dc *R/plotting.R 999657b4f8a5e4f15d28e701b77c4cec *R/util.R 416bd78ab6a5a2215650c4520fe4818b *R/verifytidy.R f643438066382aebff563fdc5ae7019e *README.md 00e55f1f13edb2ea650440aa53d4d46b *build/vignette.rds 4d3faaaf00f4157038095665f5c6290a *data/carbon.fabric.2.rda 4c5d7c2017236338558a8e90d521dcfc *data/carbon.fabric.rda c3713c9635e4b810eeec967b6d620f71 *inst/CITATION 1e242e19d1805d0a3371ea048dfc5271 *inst/WORDLIST 46750e778a5cdb4d65c347f8e43df623 *inst/doc/adktest.R 554b2bf0b65e21cb9a506f0c51d95e4c *inst/doc/adktest.Rmd 34e406a9f508ae4bd6ebe9b5200d1bc9 *inst/doc/adktest.html 5e32fd17e858922f9ff1c00af452c7ce *inst/doc/cmstatr_Graphing.R d6046645bd9a1a43c7cd46207ebb9993 *inst/doc/cmstatr_Graphing.Rmd a3f4959f770a63c00c8b385d8c63cbd8 *inst/doc/cmstatr_Graphing.html f6c101af1f6e1a7a4b393becd58447f5 *inst/doc/cmstatr_Tutorial.R d8cf2f3123723e54795130754bcb2f41 *inst/doc/cmstatr_Tutorial.Rmd 201f696a193952b864257af015906d27 *inst/doc/cmstatr_Tutorial.html 0a47eec28a4755453a328a4426f12952 *inst/doc/cmstatr_Validation.R bd3d72760de19f329134b1b5944a5083 *inst/doc/cmstatr_Validation.Rmd 334c9c6d337ab44c45b155fbb899a6ff *inst/doc/cmstatr_Validation.html cda674589a397f0039869cbf47696cd7 *inst/doc/hk_ext.Rmd a2bc8f0bb5d14ec2228c36818b154c09 *inst/doc/hk_ext.html efbd952051b23ababe18bd750989a67d *inst/extdata/k1.vangel.csv a67ad5a80e2a99524b255e0b201b118c *inst/extdata/k2.vangel.csv e5164697ecbd6e2745cfa717b28b2b50 *man/ad_ksample.Rd f1f6f1eb5b474029e4fb0461bac47501 *man/anderson_darling.Rd 6381d2825420b25ef11a03d9a38f5b73 *man/augment.mnr.Rd 253abb0e0e26c23504ff067871c7e6cb *man/basis.Rd 5fbdfd4d75d33ed086a2509f83c07d65 *man/calc_cv_star.Rd 8bd5387a87dc996f83755ac8eb53b4fb *man/carbon.fabric.Rd 1719fd689d93004a24dcf95f6e25b6ae *man/cmstatr-package.Rd e0ad6b8e5027f2b0d0e5c3e28593ef4f *man/cv.Rd dd30d3e7cf0173349a2c91d7bb9a85a9 *man/equiv_change_mean.Rd 566a72c7116961ed35e63943a8e442b1 *man/equiv_mean_extremum.Rd e123343e89143147ca1b3c9daea7bd9a *man/figures/logo-wbg-1280x640.png 7b2aa867e7fac55b33398e8287229615 *man/figures/logo-wbg-240x278.png 274835069075753709b35519eba8289d *man/figures/logo.png 0baec0fac2a3f02aaf58df054e9176fb *man/glance.adk.Rd 82728bf73c7959cf8a072938176f2199 *man/glance.anderson_darling.Rd b859ea8f49afbb79b4aa37a34921b533 *man/glance.basis.Rd 58f558feb92730025eab9df42c6a12aa *man/glance.equiv_change_mean.Rd 7cd7ff87e51277b4df81af9f21d53ad8 *man/glance.equiv_mean_extremum.Rd 3947d2960b7817c46f26672794c3e03a *man/glance.levene.Rd f3374d26fa5a3f316cd9fc4556f143af *man/glance.mnr.Rd d9c0ceac6a29f4496bd623d7fb708750 *man/hk_ext.Rd bdc15445ce89f1cd4cc62fa6c63652b8 *man/k_equiv.Rd bb37686232b4a422bcc562be28d9244b *man/k_factor_normal.Rd 9be3950203d1687c341c8edae158ccfb *man/levene_test.Rd 976ec14aa2980f19fca5dc07fc529876 *man/maximum_normed_residual.Rd d7172e19662ca562417eef76e2c61c81 *man/nested_data_plot.Rd 6f683e43abf52676b3b14e6a644e2af3 *man/nonpara_binomial_rank.Rd fa3323f83a68ebc7523f1bd57c2c4380 *man/normalize_group_mean.Rd 88f62bb8dddf4098d3144a0173085ba9 *man/normalize_ply_thickness.Rd cf94bf30f383f5db86cb905226f56fa7 *man/reexports.Rd bc1d6f67bd995bba45da5af4cb83f953 *man/stat_esf.Rd 8a177c8990f2b50e1fbeb117030df8a1 *man/stat_normal_surv_func.Rd 857129803de385692db362eca2b3a707 *man/transform_mod_cv.Rd 4fe5745ec0c68ec0d5952f05b6e88daf *tests/spelling.R 56a699171db96cdd590964011cd35564 *tests/testthat.R 103cd709b3e332ec76aca1a709ea9c16 *tests/testthat/_snaps/plot-nested/nested-data-plot-color-and-fill.svg 98e0d88ae837f1344d1da9c5e813bd07 *tests/testthat/_snaps/plot-nested/nested-data-plot-color.svg 05885fbeca5e9846100c44599f9c3f4f *tests/testthat/_snaps/plot-nested/nested-data-plot-no-grouping.svg 0360220ba15b4ee66fb4cb3027250506 *tests/testthat/_snaps/plot-nested/nested-data-plot-single-grouping.svg f7a18e5994ea4ed1b74a7e7b2cfd4822 *tests/testthat/_snaps/plot-nested/nested-data-plot-single-obs-per-group.svg 26010ee8ec72b5da04741b6cc4767a83 *tests/testthat/_snaps/plot-nested/nested-data-plot-two-groupings.svg f3448dd3d535a11287d184391baccef4 *tests/testthat/_snaps/plotting/stat-esf.svg ba29a43c6b50258b64857eee77732819 *tests/testthat/_snaps/plotting/stat-normal-surv-func-and-stat-esf.svg ab0f9af9fcf48f630d9fc40835c410ee *tests/testthat/_snaps/plotting/stat-normal-surv-func.svg 536a4b66d985b19c7f31ff49aa5ef878 *tests/testthat/test-adk.R 7d238f01b0b1bb774855a5ffc13e3ff9 *tests/testthat/test-adtest.R 86408f67cce63e3ddf2c675a4899caf6 *tests/testthat/test-basis.R 8c01b7c50ee7e451fdbf848f9f7a4daf *tests/testthat/test-checks.R 5c0aff9648ce65992226c4d0a720aac9 *tests/testthat/test-cv.R 03e41facff981569cdbcd94ce198bebc *tests/testthat/test-equiv.R 94340c9b5865620b81e2a15edbf1cd50 *tests/testthat/test-levene.R ab921ab9d47fc557b5e6df1430d39dfd *tests/testthat/test-lintr.R 9d87c028a839a4d9d04464e915dd5309 *tests/testthat/test-mnr.R 6ed532484ec519490575a6babd5385ca *tests/testthat/test-normalize.R e937bf9df2b6ed2b586a6ad51386c9d1 *tests/testthat/test-plot-nested.R 8b394406eacb8151d0445e2f7eedaa3c *tests/testthat/test-plotting.R 38f2bc781851355ca9f053f5934fcab3 *tests/testthat/test-verifytidy.R 20d8435882f46e9b7ec144a7d319550e *tests/testthat/test-workflow.R 554b2bf0b65e21cb9a506f0c51d95e4c *vignettes/adktest.Rmd 3a62a1d484ebd9e9b57355ba990e5458 *vignettes/bibliography.json d6046645bd9a1a43c7cd46207ebb9993 *vignettes/cmstatr_Graphing.Rmd d8cf2f3123723e54795130754bcb2f41 *vignettes/cmstatr_Tutorial.Rmd bd3d72760de19f329134b1b5944a5083 *vignettes/cmstatr_Validation.Rmd 6139147eb94de827fd31a86f354d7248 *vignettes/distribution-Weibull-1.png 9259580c601bc05772eef61a61ce3f56 *vignettes/distribution-normal-1.png cda674589a397f0039869cbf47696cd7 *vignettes/hk_ext.Rmd f45458d7a1b95fc9d931c2509c856794 *vignettes/hk_ext.Rmd.orig 9177d71981d632ffbe4a7dc8632159b6 *vignettes/ieee.csl cmstatr/inst/0000755000176200001440000000000014574576017012724 5ustar liggesuserscmstatr/inst/doc/0000755000176200001440000000000014574576017013471 5ustar liggesuserscmstatr/inst/doc/cmstatr_Validation.Rmd0000644000176200001440000013131314067354422017755 0ustar liggesusers--- title: "cmstatr Validation" author: "Stefan Kloppenborg" date: "`r Sys.Date()`" output: rmarkdown::html_vignette: toc: true toc_depth: 2 vignette: > %\VignetteIndexEntry{cmstatr Validation} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} csl: ieee.csl references: - id: "ASAP2008" type: "report" number: "ASAP-2008" author: - given: K.S. family: Raju - given: J.S. family: Tomblin title: "AGATE Statistical Analysis Program" issued: year: 2008 publisher: Wichita State University - id: Stephens1987 type: article author: - given: F.W. family: Scholz - given: M.A, family: Stephens title: K-Sample Anderson-Darling Tests container-title: Journal of the American Statistical Association volume: "82" issue: "399" page: 918-924 issued: year: "1987" month: "09" DOI: 10.1080/01621459.1987.10478517 URL: https://doi.org/10.1080/01621459.1987.10478517 - id: "CMH-17-1G" type: report number: "CMH-17-1G" title: Composite Materials Handbook, Volume 1. Polymer Matrix Composites Guideline for Characterization of Structural Materials publisher: SAE International issued: year: "2012" month: "03" - id: "STAT-17" type: report number: "STAT-17 Rev 5" author: - literal: Materials Sciences Corporation title: CMH-17 Statistical Analysis for B-Basis and A-Basis Values publisher: Materials Sciences Corporation publisher-place: Horsham, PA issued: year: "2008" month: "01" day: "08" - id: Vangel1994 type: article author: - given: Mark family: Vangel title: One-Sided Nonparametric Tolerance Limits container-title: Communications in Statistics - Simulation and Computation volume: "23" issue: "4" page: 1137-1154 issued: year: "1994" DOI: 10.1080/03610919408813222 - id: vangel_lot_2002 type: article author: - given: Mark family: Vangel title: Lot Acceptance and Compliance Testing Using the Sample Mean and an Extremum container-title: Technometrics volume: "44" issue: "3" page: 242--249 issued: year: 2002 month: 8 DOI: 10.1198/004017002188618428 --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` # 1. Introduction This vignette is intended to contain the same validation that is included in the test suite within the `cmstatr` package, but in a format that is easier for a human to read. The intent is that this vignette will include only those validations that are included in the test suite, but that the test suite may include more tests than are shown in this vignette. The following packages will be used in this validation. The version of each package used is listed at the end of this vignette. ```{r message=FALSE, warning=FALSE} library(cmstatr) library(dplyr) library(purrr) library(tidyr) library(testthat) ``` Throughout this vignette, the `testthat` package will be used. Expressions such as `expect_equal` are used to ensure that the two values are equal (within some tolerance). If this expectation is not true, the vignette will fail to build. The tolerance is a relative tolerance: a tolerance of 0.01 means that the two values must be within $1\%$ of each other. As an example, the following expression checks that the value `10` is equal to `10.1` within a tolerance of `0.01`. Such an expectation should be satisfied. ```{r} expect_equal(10, 10.1, tolerance = 0.01) ``` The `basis_...` functions automatically perform certain diagnostic tests. When those diagnostic tests are not relevant to the validation, the diagnostic tests are overridden by passing the argument `override = "all"`. # 2. Validation Table The following table provides a cross-reference between the various functions of the `cmstatr` package and the tests shown within this vignette. The sections in this vignette are organized by data set. Not all checks are performed on all data sets. Function | Tests --------------------------------|--------------------------- `ad_ksample()` | [Section 3.1](#cf-ad), [Section 4.1.2](#c11-adk), [Section 6.1](#cl-adk) `anderson_darling_normal()` | [Section 4.1.3](#c11-ad), [Section 5.1](#ods-adn) `anderson_darling_lognormal()` | [Section 4.1.3](#c11-ad), [Section 5.2](#ods-adl) `anderson_darling_weibull()` | [Section 4.1.3](#c11-ad), [Section 5.3](#ods-adw) `basis_normal()` | [Section 5.4](#ods-nb) `basis_lognormal()` | [Section 5.5](#ods-lb) `basis_weibull()` | [Section 5.6](#ods-wb) `basis_pooled_cv()` | [Section 4.2.3](#c12-pcv), [Section 4.2.4](#c12-pcvmcv), `basis_pooled_sd()` | [Section 4.2.1](#c12-psd), [Section 4.2.2](#c12-psdmcv) `basis_hk_ext()` | [Section 4.1.6](#c11-hk-wf), [Section 5.7](#ods-hkb), [Section 5.8](#ods-hkb2) `basis_nonpara_large_sample()` | [Section 5.9](#ods-lsnb) `basis_anova()` | [Section 4.1.7](#c11-anova) `calc_cv_star()` | `cv()` | `equiv_change_mean()` | [Section 5.11](#ods-ecm) `equiv_mean_extremum()` | [Section 5.10](#ods-eme) `hk_ext_z()` | [Section 7.3](#pf-hk), [Section 7.4](#pf-hk2) `hk_ext_z_j_opt()` | [Section 7.5](#pf-hk-opt) `k_equiv()` | [Section 7.8](#pf-equiv) `k_factor_normal()` | [Section 7.1](#pf-kb), [Section 7.2](#pf-ka) `levene_test()` | [Section 4.1.4](#c11-lbc), [Section 4.1.5](#c11-lbb) `maximum_normed_residual()` | [Section 4.1.1](#c11-mnr) `nonpara_binomial_rank()` | [Section 7.6](#pf-npbinom), [Section 7.7](#pf-npbinom2) `normalize_group_mean()` | `normalize_ply_thickness()` | `transform_mod_cv_ad()` | `transform_mod_cv()` | # 3. `carbon.fabric` Data Set This data set is example data that is provided with `cmstatr`. The first few rows of this data are shown below. ```{r} head(carbon.fabric) ``` ## 3.1. Anderson--Darling k-Sample Test {#cf-ad} This data was entered into ASAP 2008 [@ASAP2008] and the reported Anderson--Darling k--Sample test statistics were recorded, as were the conclusions. The value of the test statistic reported by `cmstatr` and that reported by ASAP 2008 differ by a factor of $k - 1$, as do the critical values used. As such, the conclusion of the tests are identical. This is described in more detail in the [Anderson--Darling k--Sample Vignette](adktest.html). When the RTD warp-tension data from this data set is entered into ASAP 2008, it reports a test statistic of 0.456 and fails to reject the null hypothesis that the batches are drawn from the same distribution. Adjusting for the different definition of the test statistic, the results given by `cmstatr` are very similar. ```{r} res <- carbon.fabric %>% filter(test == "WT") %>% filter(condition == "RTD") %>% ad_ksample(strength, batch) expect_equal(res$ad / (res$k - 1), 0.456, tolerance = 0.002) expect_false(res$reject_same_dist) res ``` When the ETW warp-tension data from this data set are entered into ASAP 2008, the reported test statistic is 1.604 and it fails to reject the null hypothesis that the batches are drawn from the same distribution. Adjusting for the different definition of the test statistic, `cmstatr` gives nearly identical results. ```{r} res <- carbon.fabric %>% filter(test == "WT") %>% filter(condition == "ETW") %>% ad_ksample(strength, batch) expect_equal(res$ad / (res$k - 1), 1.604, tolerance = 0.002) expect_false(res$reject_same_dist) res ``` # 4. Comparison with Examples from CMH-17-1G ## 4.1 Dataset From Section 8.3.11.1.1 CMH-17-1G [@CMH-17-1G] provides an example data set and results from ASAP [@ASAP2008] and STAT17 [@STAT-17]. This example data set is duplicated below: ```{r} dat_8_3_11_1_1 <- tribble( ~batch, ~strength, ~condition, 1, 118.3774604, "CTD", 1, 84.9581364, "RTD", 1, 83.7436035, "ETD", 1, 123.6035612, "CTD", 1, 92.4891822, "RTD", 1, 84.3831677, "ETD", 1, 115.2238092, "CTD", 1, 96.8212659, "RTD", 1, 94.8030433, "ETD", 1, 112.6379744, "CTD", 1, 109.030325, "RTD", 1, 94.3931537, "ETD", 1, 116.5564277, "CTD", 1, 97.8212659, "RTD", 1, 101.702222, "ETD", 1, 123.1649896, "CTD", 1, 100.921519, "RTD", 1, 86.5372121, "ETD", 2, 128.5589027, "CTD", 1, 103.699444, "RTD", 1, 92.3772684, "ETD", 2, 113.1462103, "CTD", 2, 93.7908212, "RTD", 2, 89.2084024, "ETD", 2, 121.4248107, "CTD", 2, 107.526709, "RTD", 2, 100.686001, "ETD", 2, 134.3241906, "CTD", 2, 94.5769704, "RTD", 2, 81.0444192, "ETD", 2, 129.6405117, "CTD", 2, 93.8831373, "RTD", 2, 91.3398070, "ETD", 2, 117.9818658, "CTD", 2, 98.2296605, "RTD", 2, 93.1441939, "ETD", 3, 115.4505226, "CTD", 2, 111.346590, "RTD", 2, 85.8204168, "ETD", 3, 120.0369467, "CTD", 2, 100.817538, "RTD", 3, 94.8966273, "ETD", 3, 117.1631088, "CTD", 3, 100.382203, "RTD", 3, 95.8068520, "ETD", 3, 112.9302797, "CTD", 3, 91.5037811, "RTD", 3, 86.7842252, "ETD", 3, 117.9114501, "CTD", 3, 100.083233, "RTD", 3, 94.4011973, "ETD", 3, 120.1900159, "CTD", 3, 95.6393615, "RTD", 3, 96.7231171, "ETD", 3, 110.7295966, "CTD", 3, 109.304779, "RTD", 3, 89.9010384, "ETD", 3, 100.078562, "RTD", 3, 99.1205847, "RTD", 3, 89.3672306, "ETD", 1, 106.357525, "ETW", 1, 99.0239966, "ETW2", 1, 105.898733, "ETW", 1, 103.341238, "ETW2", 1, 88.4640082, "ETW", 1, 100.302130, "ETW2", 1, 103.901744, "ETW", 1, 98.4634133, "ETW2", 1, 80.2058219, "ETW", 1, 92.2647280, "ETW2", 1, 109.199597, "ETW", 1, 103.487693, "ETW2", 1, 61.0139431, "ETW", 1, 113.734763, "ETW2", 2, 99.3207107, "ETW", 2, 108.172659, "ETW2", 2, 115.861770, "ETW", 2, 108.426732, "ETW2", 2, 82.6133082, "ETW", 2, 116.260375, "ETW2", 2, 85.3690411, "ETW", 2, 121.049610, "ETW2", 2, 115.801622, "ETW", 2, 111.223082, "ETW2", 2, 44.3217741, "ETW", 2, 104.574843, "ETW2", 2, 117.328077, "ETW", 2, 103.222552, "ETW2", 2, 88.6782903, "ETW", 3, 99.3918538, "ETW2", 3, 107.676986, "ETW", 3, 87.3421658, "ETW2", 3, 108.960241, "ETW", 3, 102.730741, "ETW2", 3, 116.122640, "ETW", 3, 96.3694916, "ETW2", 3, 80.2334815, "ETW", 3, 99.5946088, "ETW2", 3, 106.145570, "ETW", 3, 97.0712407, "ETW2", 3, 104.667866, "ETW", 3, 104.234953, "ETW" ) dat_8_3_11_1_1 ``` ### 4.1.1 Maximum Normed Residual Test {#c11-mnr} CMH-17-1G Table 8.3.11.1.1(a) provides results of the MNR test from ASAP for this data set. Batches 2 and 3 of the ETW data is considered here and the results of `cmstatr` are compared with those published in CMH-17-1G. For Batch 2 of the ETW data, the results match those published in the handbook within a small tolerance. The published test statistic is 2.008. ```{r} res <- dat_8_3_11_1_1 %>% filter(condition == "ETW" & batch == 2) %>% maximum_normed_residual(strength, alpha = 0.05) expect_equal(res$mnr, 2.008, tolerance = 0.001) expect_equal(res$crit, 2.127, tolerance = 0.001) expect_equal(res$n_outliers, 0) res ``` Similarly, for Batch 3 of the ETW data, the results of `cmstatr` match the results published in the handbook within a small tolerance. The published test statistic is 2.119 ```{r} res <- dat_8_3_11_1_1 %>% filter(condition == "ETW" & batch == 3) %>% maximum_normed_residual(strength, alpha = 0.05) expect_equal(res$mnr, 2.119, tolerance = 0.001) expect_equal(res$crit, 2.020, tolerance = 0.001) expect_equal(res$n_outliers, 1) res ``` ### 4.1.2 Anderson--Darling k--Sample Test {#c11-adk} For the ETW condition, the ADK test statistic given in [@CMH-17-1G] is $ADK = 0.793$ and the test concludes that the samples come from the same distribution. Noting that `cmstatr` uses the definition of the test statistic given in [@Stephens1987], so the test statistic given by `cmstatr` differs from that given by ASAP by a factor of $k - 1$, as described in the [Anderson--Darling k--Sample Vignette](adktest.html). ```{r} res <- dat_8_3_11_1_1 %>% filter(condition == "ETW") %>% ad_ksample(strength, batch) expect_equal(res$ad / (res$k - 1), 0.793, tolerance = 0.003) expect_false(res$reject_same_dist) res ``` Similarly, for the ETW2 condition, the test statistic given in [@CMH-17-1G] is $ADK = 3.024$ and the test concludes that the samples come from different distributions. This matches `cmstatr` ```{r} res <- dat_8_3_11_1_1 %>% filter(condition == "ETW2") %>% ad_ksample(strength, batch) expect_equal(res$ad / (res$k - 1), 3.024, tolerance = 0.001) expect_true(res$reject_same_dist) res ``` ### 4.1.3 Anderson--Darling Tests for Distribution {#c11-ad} CMH-17-1G Section 8.3.11.2.1 contains results from STAT17 for the "observed significance level" from the Anderson--Darling test for various distributions. In this section, the ETW condition from the present data set is used. The published results are given in the following table. The results from `cmstatr` are below and are very similar to those from STAT17. Distribution | OSL -------------|------------ Normal | 0.006051 Lognormal | 0.000307 Weibull | 0.219 ```{r} res <- dat_8_3_11_1_1 %>% filter(condition == "ETW") %>% anderson_darling_normal(strength) expect_equal(res$osl, 0.006051, tolerance = 0.001) res ``` ```{r} res <- dat_8_3_11_1_1 %>% filter(condition == "ETW") %>% anderson_darling_lognormal(strength) expect_equal(res$osl, 0.000307, tolerance = 0.001) res ``` ```{r} res <- dat_8_3_11_1_1 %>% filter(condition == "ETW") %>% anderson_darling_weibull(strength) expect_equal(res$osl, 0.0219, tolerance = 0.002) res ``` ### 4.1.4 Levene's Test (Between Conditions) {#c11-lbc} CMH-17-1G Section 8.3.11.1.1 provides results from ASAP for Levene's test for equality of variance between conditions after the ETW and ETW2 conditions are removed. The handbook shows an F statistic of 0.58, however if this data is entered into ASAP directly, ASAP gives an F statistic of 0.058, which matches the result of `cmstatr`. ```{r} res <- dat_8_3_11_1_1 %>% filter(condition != "ETW" & condition != "ETW2") %>% levene_test(strength, condition) expect_equal(res$f, 0.058, tolerance = 0.01) res ``` ### 4.1.5 Levene's Test (Between Batches) {#c11-lbb} CMH-17-1G Section 8.3.11.2.2 provides output from STAT17. The ETW2 condition from the present data set was analyzed by STAT17 and that software reported an F statistic of 0.123 from Levene's test when comparing the variance of the batches within this condition. The result from `cmstatr` is similar. ```{r} res <- dat_8_3_11_1_1 %>% filter(condition == "ETW2") %>% levene_test(strength, batch) expect_equal(res$f, 0.123, tolerance = 0.005) res ``` Similarly, the published value of the F statistic for the CTD condition is $3.850$. `cmstatr` produces very similar results. ```{r} res <- dat_8_3_11_1_1 %>% filter(condition == "CTD") %>% levene_test(strength, batch) expect_equal(res$f, 3.850, tolerance = 0.005) res ``` ### 4.1.6 Nonparametric Basis Values {#c11-hk-wf} CMH-17-1G Section 8.3.11.2.1 provides STAT17 outputs for the ETW condition of the present data set. The nonparametric Basis values are listed. In this case, the Hanson--Koopmans method is used. The published A-Basis value is 13.0 and the B-Basis is 37.9. ```{r} res <- dat_8_3_11_1_1 %>% filter(condition == "ETW") %>% basis_hk_ext(strength, method = "woodward-frawley", p = 0.99, conf = 0.95, override = "all") expect_equal(res$basis, 13.0, tolerance = 0.001) res ``` ```{r} res <- dat_8_3_11_1_1 %>% filter(condition == "ETW") %>% basis_hk_ext(strength, method = "optimum-order", p = 0.90, conf = 0.95, override = "all") expect_equal(res$basis, 37.9, tolerance = 0.001) res ``` ### 4.1.7 Single-Point ANOVA Basis Value {#c11-anova} CMH-17-1G Section 8.3.11.2.2 provides output from STAT17 for the ETW2 condition from the present data set. STAT17 reports A- and B-Basis values based on the ANOVA method of 34.6 and 63.2, respectively. The results from `cmstatr` are similar. ```{r} res <- dat_8_3_11_1_1 %>% filter(condition == "ETW2") %>% basis_anova(strength, batch, override = "number_of_groups", p = 0.99, conf = 0.95) expect_equal(res$basis, 34.6, tolerance = 0.001) res ``` ```{r} res <- dat_8_3_11_1_1 %>% filter(condition == "ETW2") %>% basis_anova(strength, batch, override = "number_of_groups") expect_equal(res$basis, 63.2, tolerance = 0.001) res ``` ## 4.2 Dataset From Section 8.3.11.1.2 [@CMH-17-1G] provides an example data set and results from ASAP [@ASAP2008]. This example data set is duplicated below: ```{r} dat_8_3_11_1_2 <- tribble( ~batch, ~strength, ~condition, 1, 79.04517, "CTD", 1, 103.2006, "RTD", 1, 63.22764, "ETW", 1, 54.09806, "ETW2", 1, 102.6014, "CTD", 1, 105.1034, "RTD", 1, 70.84454, "ETW", 1, 58.87615, "ETW2", 1, 97.79372, "CTD", 1, 105.1893, "RTD", 1, 66.43223, "ETW", 1, 61.60167, "ETW2", 1, 92.86423, "CTD", 1, 100.4189, "RTD", 1, 75.37771, "ETW", 1, 60.23973, "ETW2", 1, 117.218, "CTD", 2, 85.32319, "RTD", 1, 72.43773, "ETW", 1, 61.4808, "ETW2", 1, 108.7168, "CTD", 2, 92.69923, "RTD", 1, 68.43073, "ETW", 1, 64.55832, "ETW2", 1, 112.2773, "CTD", 2, 98.45242, "RTD", 1, 69.72524, "ETW", 2, 57.76131, "ETW2", 1, 114.0129, "CTD", 2, 104.1014, "RTD", 2, 66.20343, "ETW", 2, 49.91463, "ETW2", 2, 106.8452, "CTD", 2, 91.51841, "RTD", 2, 60.51251, "ETW", 2, 61.49271, "ETW2", 2, 112.3911, "CTD", 2, 101.3746, "RTD", 2, 65.69334, "ETW", 2, 57.7281, "ETW2", 2, 115.5658, "CTD", 2, 101.5828, "RTD", 2, 62.73595, "ETW", 2, 62.11653, "ETW2", 2, 87.40657, "CTD", 2, 99.57384, "RTD", 2, 59.00798, "ETW", 2, 62.69353, "ETW2", 2, 102.2785, "CTD", 2, 88.84826, "RTD", 2, 62.37761, "ETW", 3, 61.38523, "ETW2", 2, 110.6073, "CTD", 3, 92.18703, "RTD", 3, 64.3947, "ETW", 3, 60.39053, "ETW2", 3, 105.2762, "CTD", 3, 101.8234, "RTD", 3, 72.8491, "ETW", 3, 59.17616, "ETW2", 3, 110.8924, "CTD", 3, 97.68909, "RTD", 3, 66.56226, "ETW", 3, 60.17616, "ETW2", 3, 108.7638, "CTD", 3, 101.5172, "RTD", 3, 66.56779, "ETW", 3, 46.47396, "ETW2", 3, 110.9833, "CTD", 3, 100.0481, "RTD", 3, 66.00123, "ETW", 3, 51.16616, "ETW2", 3, 101.3417, "CTD", 3, 102.0544, "RTD", 3, 59.62108, "ETW", 3, 100.0251, "CTD", 3, 60.61167, "ETW", 3, 57.65487, "ETW", 3, 66.51241, "ETW", 3, 64.89347, "ETW", 3, 57.73054, "ETW", 3, 68.94086, "ETW", 3, 61.63177, "ETW" ) ``` ### 4.2.1 Pooled SD A- and B-Basis {#c12-psd} CMH-17-1G Table 8.3.11.2(k) provides outputs from ASAP for the data set above. ASAP uses the pooled SD method. ASAP produces the following results, which are quite similar to those produced by `cmstatr`. Condition | CTD | RTD | ETW | ETW2 ----------|-------|-------|-------|------ B-Basis | 93.64 | 87.30 | 54.33 | 47.12 A-Basis | 89.19 | 79.86 | 46.84 | 39.69 ```{r} res <- basis_pooled_sd(dat_8_3_11_1_2, strength, condition, override = "all") expect_equal(res$basis$value[res$basis$group == "CTD"], 93.64, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "RTD"], 87.30, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "ETW"], 54.33, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "ETW2"], 47.12, tolerance = 0.001) res ``` ```{r} res <- basis_pooled_sd(dat_8_3_11_1_2, strength, condition, p = 0.99, conf = 0.95, override = "all") expect_equal(res$basis$value[res$basis$group == "CTD"], 86.19, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "RTD"], 79.86, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "ETW"], 46.84, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "ETW2"], 39.69, tolerance = 0.001) res ``` ### 4.2.2 Pooled SD A- and B-Basis (Mod CV) {#c12-psdmcv} After removal of the ETW2 condition, CMH17-STATS reports the pooled A- and B-Basis (mod CV) shown in the following table. `cmstatr` computes very similar values. Condition | CTD | RTD | ETW ----------|-------|-------|------ B-Basis | 92.25 | 85.91 | 52.97 A-Basis | 83.81 | 77.48 | 44.47 ```{r} res <- dat_8_3_11_1_2 %>% filter(condition != "ETW2") %>% basis_pooled_sd(strength, condition, modcv = TRUE, override = "all") expect_equal(res$basis$value[res$basis$group == "CTD"], 92.25, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "RTD"], 85.91, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "ETW"], 52.97, tolerance = 0.001) res ``` ```{r} res <- dat_8_3_11_1_2 %>% filter(condition != "ETW2") %>% basis_pooled_sd(strength, condition, p = 0.99, conf = 0.95, modcv = TRUE, override = "all") expect_equal(res$basis$value[res$basis$group == "CTD"], 83.81, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "RTD"], 77.48, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "ETW"], 44.47, tolerance = 0.001) res ``` ### 4.2.3 Pooled CV A- and B-Basis {#c12-pcv} This data set was input into CMH17-STATS and the Pooled CV method was selected. The results from CMH17-STATS were as follows. `cmstatr` produces very similar results. Condition | CTD | RTD | ETW | ETW2 ----------|-------|-------|-------|------ B-Basis | 90.89 | 85.37 | 56.79 | 50.55 A-Basis | 81.62 | 76.67 | 50.98 | 45.40 ```{r} res <- basis_pooled_cv(dat_8_3_11_1_2, strength, condition, override = "all") expect_equal(res$basis$value[res$basis$group == "CTD"], 90.89, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "RTD"], 85.37, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "ETW"], 56.79, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "ETW2"], 50.55, tolerance = 0.001) res ``` ```{r} res <- basis_pooled_cv(dat_8_3_11_1_2, strength, condition, p = 0.99, conf = 0.95, override = "all") expect_equal(res$basis$value[res$basis$group == "CTD"], 81.62, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "RTD"], 76.67, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "ETW"], 50.98, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "ETW2"], 45.40, tolerance = 0.001) res ``` ### 4.2.4 Pooled CV A- and B-Basis (Mod CV) {#c12-pcvmcv} This data set was input into CMH17-STATS and the Pooled CV method was selected with the modified CV transform. Additionally, the ETW2 condition was removed. The results from CMH17-STATS were as follows. `cmstatr` produces very similar results. Condition | CTD | RTD | ETW ----------|-------|-------|------- B-Basis | 90.31 | 84.83 | 56.43 A-Basis | 80.57 | 75.69 | 50.33 ```{r} res <- dat_8_3_11_1_2 %>% filter(condition != "ETW2") %>% basis_pooled_cv(strength, condition, modcv = TRUE, override = "all") expect_equal(res$basis$value[res$basis$group == "CTD"], 90.31, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "RTD"], 84.83, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "ETW"], 56.43, tolerance = 0.001) res ``` ```{r} res <- dat_8_3_11_1_2 %>% filter(condition != "ETW2") %>% basis_pooled_cv(strength, condition, modcv = TRUE, p = 0.99, conf = 0.95, override = "all") expect_equal(res$basis$value[res$basis$group == "CTD"], 80.57, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "RTD"], 75.69, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "ETW"], 50.33, tolerance = 0.001) res ``` # 5. Other Data Sets This section contains various small data sets. In most cases, these data sets were generated randomly for the purpose of comparing `cmstatr` to other software. ## 5.1 Anderson--Darling Test (Normal) {#ods-adn} The following data set was randomly generated. When this is entered into STAT17 [@STAT-17], that software gives the value $OSL = 0.465$, which matches the result of `cmstatr` within a small margin. ```{r} dat <- data.frame( strength = c( 137.4438, 139.5395, 150.89, 141.4474, 141.8203, 151.8821, 143.9245, 132.9732, 136.6419, 138.1723, 148.7668, 143.283, 143.5429, 141.7023, 137.4732, 152.338, 144.1589, 128.5218 ) ) res <- anderson_darling_normal(dat, strength) expect_equal(res$osl, 0.465, tolerance = 0.001) res ``` ## 5.2 Anderson--Darling Test (Lognormal) {#ods-adl} The following data set was randomly generated. When this is entered into STAT17 [@STAT-17], that software gives the value $OSL = 0.480$, which matches the result of `cmstatr` within a small margin. ```{r} dat <- data.frame( strength = c( 137.4438, 139.5395, 150.89, 141.4474, 141.8203, 151.8821, 143.9245, 132.9732, 136.6419, 138.1723, 148.7668, 143.283, 143.5429, 141.7023, 137.4732, 152.338, 144.1589, 128.5218 ) ) res <- anderson_darling_lognormal(dat, strength) expect_equal(res$osl, 0.480, tolerance = 0.001) res ``` ## 5.3 Anderson--Darling Test (Weibull) {#ods-adw} The following data set was randomly generated. When this is entered into STAT17 [@STAT-17], that software gives the value $OSL = 0.179$, which matches the result of `cmstatr` within a small margin. ```{r} dat <- data.frame( strength = c( 137.4438, 139.5395, 150.89, 141.4474, 141.8203, 151.8821, 143.9245, 132.9732, 136.6419, 138.1723, 148.7668, 143.283, 143.5429, 141.7023, 137.4732, 152.338, 144.1589, 128.5218 ) ) res <- anderson_darling_weibull(dat, strength) expect_equal(res$osl, 0.179, tolerance = 0.002) res ``` ## 5.4 Normal A- and B-Basis {#ods-nb} The following data was input into STAT17 and the A- and B-Basis values were computed assuming normally distributed data. The results were 120.336 and 129.287, respectively. `cmstatr` reports very similar values. ```{r} dat <- c( 137.4438, 139.5395, 150.8900, 141.4474, 141.8203, 151.8821, 143.9245, 132.9732, 136.6419, 138.1723, 148.7668, 143.2830, 143.5429, 141.7023, 137.4732, 152.3380, 144.1589, 128.5218 ) res <- basis_normal(x = dat, p = 0.99, conf = 0.95, override = "all") expect_equal(res$basis, 120.336, tolerance = 0.0005) res ``` ```{r} res <- basis_normal(x = dat, p = 0.9, conf = 0.95, override = "all") expect_equal(res$basis, 129.287, tolerance = 0.0005) res ``` ## 5.5 Lognormal A- and B-Basis {#ods-lb} The following data was input into STAT17 and the A- and B-Basis values were computed assuming distributed according to a lognormal distribution. The results were 121.710 and 129.664, respectively. `cmstatr` reports very similar values. ```{r} dat <- c( 137.4438, 139.5395, 150.8900, 141.4474, 141.8203, 151.8821, 143.9245, 132.9732, 136.6419, 138.1723, 148.7668, 143.2830, 143.5429, 141.7023, 137.4732, 152.3380, 144.1589, 128.5218 ) res <- basis_lognormal(x = dat, p = 0.99, conf = 0.95, override = "all") expect_equal(res$basis, 121.710, tolerance = 0.0005) res ``` ```{r} res <- basis_lognormal(x = dat, p = 0.9, conf = 0.95, override = "all") expect_equal(res$basis, 129.664, tolerance = 0.0005) res ``` ## 5.6 Weibull A- and B-Basis {#ods-wb} The following data was input into STAT17 and the A- and B-Basis values were computed assuming data following the Weibull distribution. The results were 109.150 and 125.441, respectively. `cmstatr` reports very similar values. ```{r} dat <- c( 137.4438, 139.5395, 150.8900, 141.4474, 141.8203, 151.8821, 143.9245, 132.9732, 136.6419, 138.1723, 148.7668, 143.2830, 143.5429, 141.7023, 137.4732, 152.3380, 144.1589, 128.5218 ) res <- basis_weibull(x = dat, p = 0.99, conf = 0.95, override = "all") expect_equal(res$basis, 109.150, tolerance = 0.005) res ``` ```{r} res <- basis_weibull(x = dat, p = 0.9, conf = 0.95, override = "all") expect_equal(res$basis, 125.441, tolerance = 0.005) res ``` ## 5.7 Extended Hanson--Koopmans A- and B-Basis {#ods-hkb} The following data was input into STAT17 and the A- and B-Basis values were computed using the nonparametric (small sample) method. The results were 99.651 and 124.156, respectively. `cmstatr` reports very similar values. ```{r} dat <- c( 137.4438, 139.5395, 150.8900, 141.4474, 141.8203, 151.8821, 143.9245, 132.9732, 136.6419, 138.1723, 148.7668, 143.2830, 143.5429, 141.7023, 137.4732, 152.3380, 144.1589, 128.5218 ) res <- basis_hk_ext(x = dat, p = 0.99, conf = 0.95, method = "woodward-frawley", override = "all") expect_equal(res$basis, 99.651, tolerance = 0.005) res ``` ```{r} res <- basis_hk_ext(x = dat, p = 0.9, conf = 0.95, method = "optimum-order", override = "all") expect_equal(res$basis, 124.156, tolerance = 0.005) res ``` ## 5.8 Extended Hanson--Koopmans B-Basis {#ods-hkb2} The following random numbers were generated. ```{r} dat <- c( 139.6734, 143.0032, 130.4757, 144.8327, 138.7818, 136.7693, 148.636, 131.0095, 131.4933, 142.8856, 158.0198, 145.2271, 137.5991, 139.8298, 140.8557, 137.6148, 131.3614, 152.7795, 145.8792, 152.9207, 160.0989, 145.1920, 128.6383, 141.5992, 122.5297, 159.8209, 151.6720, 159.0156 ) ``` All of the numbers above were input into STAT17 and the reported B-Basis value using the Optimum Order nonparametric method was 122.36798. This result matches the results of `cmstatr` within a small margin. ```{r} res <- basis_hk_ext(x = dat, p = 0.9, conf = 0.95, method = "optimum-order", override = "all") expect_equal(res$basis, 122.36798, tolerance = 0.001) res ``` The last two observations from the above data set were discarded, leaving 26 observations. This smaller data set was input into STAT17 and that software calculated a B-Basis value of 121.57073 using the Optimum Order nonparametric method. `cmstatr` reports a very similar number. ```{r} res <- basis_hk_ext(x = head(dat, 26), p = 0.9, conf = 0.95, method = "optimum-order", override = "all") expect_equal(res$basis, 121.57073, tolerance = 0.001) res ``` The same data set was further reduced such that only the first 22 observations were included. This smaller data set was input into STAT17 and that software calculated a B-Basis value of 128.82397 using the Optimum Order nonparametric method. `cmstatr` reports a very similar number. ```{r} res <- basis_hk_ext(x = head(dat, 22), p = 0.9, conf = 0.95, method = "optimum-order", override = "all") expect_equal(res$basis, 128.82397, tolerance = 0.001) res ``` ## 5.9 Large Sample Nonparametric B-Basis {#ods-lsnb} The following data was input into STAT17 and the B-Basis value was computed using the nonparametric (large sample) method. The results was 122.738297. `cmstatr` reports very similar values. ```{r} dat <- c( 137.3603, 135.6665, 136.6914, 154.7919, 159.2037, 137.3277, 128.821, 138.6304, 138.9004, 147.4598, 148.6622, 144.4948, 131.0851, 149.0203, 131.8232, 146.4471, 123.8124, 126.3105, 140.7609, 134.4875, 128.7508, 117.1854, 129.3088, 141.6789, 138.4073, 136.0295, 128.4164, 141.7733, 134.455, 122.7383, 136.9171, 136.9232, 138.8402, 152.8294, 135.0633, 121.052, 131.035, 138.3248, 131.1379, 147.3771, 130.0681, 132.7467, 137.1444, 141.662, 146.9363, 160.7448, 138.5511, 129.1628, 140.2939, 144.8167, 156.5918, 132.0099, 129.3551, 136.6066, 134.5095, 128.2081, 144.0896, 141.8029, 130.0149, 140.8813, 137.7864 ) res <- basis_nonpara_large_sample(x = dat, p = 0.9, conf = 0.95, override = "all") expect_equal(res$basis, 122.738297, tolerance = 0.005) res ``` ## 5.10 Acceptance Limits Based on Mean and Extremum {#ods-eme} Results from `cmstatr`'s `equiv_mean_extremum` function were compared with results from HYTEQ. The summary statistics for the qualification data were set as `mean = 141.310` and `sd=6.415`. For a value of `alpha=0.05` and `n = 9`, HYTEQ reported thresholds of 123.725 and 137.197 for minimum individual and mean, respectively. `cmstatr` produces very similar results. ```{r} res <- equiv_mean_extremum(alpha = 0.05, mean_qual = 141.310, sd_qual = 6.415, n_sample = 9) expect_equal(res$threshold_min_indiv, 123.725, tolerance = 0.001) expect_equal(res$threshold_mean, 137.197, tolerance = 0.001) res ``` Using the same parameters, but using the modified CV method, HYTEQ produces thresholds of 117.024 and 135.630 for minimum individual and mean, respectively. `cmstatr` produces very similar results. ```{r} res <- equiv_mean_extremum(alpha = 0.05, mean_qual = 141.310, sd_qual = 6.415, n_sample = 9, modcv = TRUE) expect_equal(res$threshold_min_indiv, 117.024, tolerance = 0.001) expect_equal(res$threshold_mean, 135.630, tolerance = 0.001) res ``` ## 5.11 Acceptance Based on Change in Mean {#ods-ecm} Results from `cmstatr`'s `equiv_change_mean` function were compared with results from HYTEQ. The following parameters were used. A value of `alpha = 0.05` was selected. Parameter | Qualification | Sample ----------|---------------|--------- Mean | 9.24 | 9.02 SD | 0.162 | 0.15785 n | 28 | 9 HYTEQ gives an acceptance range of 9.115 to 9.365. `cmstatr` produces similar results. ```{r} res <- equiv_change_mean(alpha = 0.05, n_sample = 9, mean_sample = 9.02, sd_sample = 0.15785, n_qual = 28, mean_qual = 9.24, sd_qual = 0.162) expect_equal(res$threshold, c(9.115, 9.365), tolerance = 0.001) res ``` After selecting the modified CV method, HYTEQ gives an acceptance range of 8.857 to 9.623. `cmstatr` produces similar results. ```{r} res <- equiv_change_mean(alpha = 0.05, n_sample = 9, mean_sample = 9.02, sd_sample = 0.15785, n_qual = 28, mean_qual = 9.24, sd_qual = 0.162, modcv = TRUE) expect_equal(res$threshold, c(8.857, 9.623), tolerance = 0.001) res ``` # 6. Comparison with Literature In this section, results from `cmstatr` are compared with values published in literature. ## 6.1 Anderson--Darling K--Sample Test {#cl-adk} [@Stephens1987] provides example data that compares measurements obtained in four labs. Their paper gives values of the ADK test statistic as well as p-values. The data in [@Stephens1987] is as follows: ```{r} dat_ss1987 <- data.frame( smoothness = c( 38.7, 41.5, 43.8, 44.5, 45.5, 46.0, 47.7, 58.0, 39.2, 39.3, 39.7, 41.4, 41.8, 42.9, 43.3, 45.8, 34.0, 35.0, 39.0, 40.0, 43.0, 43.0, 44.0, 45.0, 34.0, 34.8, 34.8, 35.4, 37.2, 37.8, 41.2, 42.8 ), lab = c(rep("A", 8), rep("B", 8), rep("C", 8), rep("D", 8)) ) dat_ss1987 ``` [@Stephens1987] lists the corresponding test statistics $A_{akN}^2 = 8.3926$ and $\sigma_N = 1.2038$ with the p-value $p = 0.0022$. These match the result of `cmstatr` within a small margin. ```{r} res <- ad_ksample(dat_ss1987, smoothness, lab) expect_equal(res$ad, 8.3926, tolerance = 0.001) expect_equal(res$sigma, 1.2038, tolerance = 0.001) expect_equal(res$p, 0.00226, tolerance = 0.01) res ``` # 7. Comparison with Published Factors Various factors, such as tolerance limit factors, are published in various publications. This section compares those published factors with those computed by `cmstatr`. ## 7.1 Normal kB Factors {#pf-kb} B-Basis tolerance limit factors assuming a normal distribution are published in CMH-17-1G. Those factors are reproduced below and are compared with the results of `cmstatr`. The published factors and those computed by `cmstatr` are quite similar. ```{r} tribble( ~n, ~kB_published, 2, 20.581, 36, 1.725, 70, 1.582, 104, 1.522, 3, 6.157, 37, 1.718, 71, 1.579, 105, 1.521, 4, 4.163, 38, 1.711, 72, 1.577, 106, 1.519, 5, 3.408, 39, 1.704, 73, 1.575, 107, 1.518, 6, 3.007, 40, 1.698, 74, 1.572, 108, 1.517, 7, 2.756, 41, 1.692, 75, 1.570, 109, 1.516, 8, 2.583, 42, 1.686, 76, 1.568, 110, 1.515, 9, 2.454, 43, 1.680, 77, 1.566, 111, 1.513, 10, 2.355, 44, 1.675, 78, 1.564, 112, 1.512, 11, 2.276, 45, 1.669, 79, 1.562, 113, 1.511, 12, 2.211, 46, 1.664, 80, 1.560, 114, 1.510, 13, 2.156, 47, 1.660, 81, 1.558, 115, 1.509, 14, 2.109, 48, 1.655, 82, 1.556, 116, 1.508, 15, 2.069, 49, 1.650, 83, 1.554, 117, 1.507, 16, 2.034, 50, 1.646, 84, 1.552, 118, 1.506, 17, 2.002, 51, 1.642, 85, 1.551, 119, 1.505, 18, 1.974, 52, 1.638, 86, 1.549, 120, 1.504, 19, 1.949, 53, 1.634, 87, 1.547, 121, 1.503, 20, 1.927, 54, 1.630, 88, 1.545, 122, 1.502, 21, 1.906, 55, 1.626, 89, 1.544, 123, 1.501, 22, 1.887, 56, 1.623, 90, 1.542, 124, 1.500, 23, 1.870, 57, 1.619, 91, 1.540, 125, 1.499, 24, 1.854, 58, 1.616, 92, 1.539, 126, 1.498, 25, 1.839, 59, 1.613, 93, 1.537, 127, 1.497, 26, 1.825, 60, 1.609, 94, 1.536, 128, 1.496, 27, 1.812, 61, 1.606, 95, 1.534, 129, 1.495, 28, 1.800, 62, 1.603, 96, 1.533, 130, 1.494, 29, 1.789, 63, 1.600, 97, 1.531, 131, 1.493, 30, 1.778, 64, 1.597, 98, 1.530, 132, 1.492, 31, 1.768, 65, 1.595, 99, 1.529, 133, 1.492, 32, 1.758, 66, 1.592, 100, 1.527, 134, 1.491, 33, 1.749, 67, 1.589, 101, 1.526, 135, 1.490, 34, 1.741, 68, 1.587, 102, 1.525, 136, 1.489, 35, 1.733, 69, 1.584, 103, 1.523, 137, 1.488 ) %>% arrange(n) %>% mutate(kB_cmstatr = k_factor_normal(n, p = 0.9, conf = 0.95)) %>% rowwise() %>% mutate(diff = expect_equal(kB_published, kB_cmstatr, tolerance = 0.001)) %>% select(-c(diff)) ``` ## 7.2 Normal kA Factors {#pf-ka} A-Basis tolerance limit factors assuming a normal distribution are published in CMH-17-1G. Those factors are reproduced below and are compared with the results of `cmstatr`. The published factors and those computed by `cmstatr` are quite similar. ```{r} tribble( ~n, ~kA_published, 2, 37.094, 36, 2.983, 70, 2.765, 104, 2.676, 3, 10.553, 37, 2.972, 71, 2.762, 105, 2.674, 4, 7.042, 38, 2.961, 72, 2.758, 106, 2.672, 5, 5.741, 39, 2.951, 73, 2.755, 107, 2.671, 6, 5.062, 40, 2.941, 74, 2.751, 108, 2.669, 7, 4.642, 41, 2.932, 75, 2.748, 109, 2.667, 8, 4.354, 42, 2.923, 76, 2.745, 110, 2.665, 9, 4.143, 43, 2.914, 77, 2.742, 111, 2.663, 10, 3.981, 44, 2.906, 78, 2.739, 112, 2.662, 11, 3.852, 45, 2.898, 79, 2.736, 113, 2.660, 12, 3.747, 46, 2.890, 80, 2.733, 114, 2.658, 13, 3.659, 47, 2.883, 81, 2.730, 115, 2.657, 14, 3.585, 48, 2.876, 82, 2.727, 116, 2.655, 15, 3.520, 49, 2.869, 83, 2.724, 117, 2.654, 16, 3.464, 50, 2.862, 84, 2.721, 118, 2.652, 17, 3.414, 51, 2.856, 85, 2.719, 119, 2.651, 18, 3.370, 52, 2.850, 86, 2.716, 120, 2.649, 19, 3.331, 53, 2.844, 87, 2.714, 121, 2.648, 20, 3.295, 54, 2.838, 88, 2.711, 122, 2.646, 21, 3.263, 55, 2.833, 89, 2.709, 123, 2.645, 22, 3.233, 56, 2.827, 90, 2.706, 124, 2.643, 23, 3.206, 57, 2.822, 91, 2.704, 125, 2.642, 24, 3.181, 58, 2.817, 92, 2.701, 126, 2.640, 25, 3.158, 59, 2.812, 93, 2.699, 127, 2.639, 26, 3.136, 60, 2.807, 94, 2.697, 128, 2.638, 27, 3.116, 61, 2.802, 95, 2.695, 129, 2.636, 28, 3.098, 62, 2.798, 96, 2.692, 130, 2.635, 29, 3.080, 63, 2.793, 97, 2.690, 131, 2.634, 30, 3.064, 64, 2.789, 98, 2.688, 132, 2.632, 31, 3.048, 65, 2.785, 99, 2.686, 133, 2.631, 32, 3.034, 66, 2.781, 100, 2.684, 134, 2.630, 33, 3.020, 67, 2.777, 101, 2.682, 135, 2.628, 34, 3.007, 68, 2.773, 102, 2.680, 136, 2.627, 35, 2.995, 69, 2.769, 103, 2.678, 137, 2.626 ) %>% arrange(n) %>% mutate(kA_cmstatr = k_factor_normal(n, p = 0.99, conf = 0.95)) %>% rowwise() %>% mutate(diff = expect_equal(kA_published, kA_cmstatr, tolerance = 0.001)) %>% select(-c(diff)) ``` ## 7.3 Nonparametric B-Basis Extended Hanson--Koopmans {#pf-hk} Vangel [@Vangel1994] provides extensive tables of $z$ for the case where $i=1$ and $j$ is the median observation. This section checks the results of `cmstatr`'s function against those tables. Only the odd values of $n$ are checked so that the median is a single observation. The unit tests for the `cmstatr` package include checks of a variety of values of $p$ and confidence, but only the factors for B-Basis are checked here. ```{r} tribble( ~n, ~z, 3, 28.820048, 5, 6.1981307, 7, 3.4780112, 9, 2.5168762, 11, 2.0312134, 13, 1.7377374, 15, 1.5403989, 17, 1.3979806, 19, 1.2899172, 21, 1.2048089, 23, 1.1358259, 25, 1.0786237, 27, 1.0303046, ) %>% rowwise() %>% mutate( z_calc = hk_ext_z(n, 1, ceiling(n / 2), p = 0.90, conf = 0.95) ) %>% mutate(diff = expect_equal(z, z_calc, tolerance = 0.0001)) %>% select(-c(diff)) ``` ## 7.4 Nonparametric A-Basis Extended Hanson--Koopmans {#pf-hk2} CMH-17-1G provides Table 8.5.15, which contains factors for calculating A-Basis values using the Extended Hanson--Koopmans nonparametric method. That table is reproduced in part here and the factors are compared with those computed by `cmstatr`. More extensive checks are performed in the unit test of the `cmstatr` package. The factors computed by `cmstatr` are very similar to those published in CMH-17-1G. ```{r} tribble( ~n, ~k, 2, 80.0038, 4, 9.49579, 6, 5.57681, 8, 4.25011, 10, 3.57267, 12, 3.1554, 14, 2.86924, 16, 2.65889, 18, 2.4966, 20, 2.36683, 25, 2.131, 30, 1.96975, 35, 1.85088, 40, 1.75868, 45, 1.68449, 50, 1.62313, 60, 1.5267, 70, 1.45352, 80, 1.39549, 90, 1.34796, 100, 1.30806, 120, 1.24425, 140, 1.19491, 160, 1.15519, 180, 1.12226, 200, 1.09434, 225, 1.06471, 250, 1.03952, 275, 1.01773 ) %>% rowwise() %>% mutate(z_calc = hk_ext_z(n, 1, n, 0.99, 0.95)) %>% mutate(diff = expect_lt(abs(k - z_calc), 0.0001)) %>% select(-c(diff)) ``` ## 7.5 Factors for Small Sample Nonparametric B-Basis {#pf-hk-opt} CMH-17-1G Table 8.5.14 provides ranks orders and factors for computing nonparametric B-Basis values. This table is reproduced below and compared with the results of `cmstatr`. The results are similar. In some cases, the rank order ($r$ in CMH-17-1G or $j$ in `cmstatr`) *and* the the factor ($k$) are different. These differences are discussed in detail in the vignette [Extended Hanson-Koopmans](hk_ext.html). ```{r} tribble( ~n, ~r, ~k, 2, 2, 35.177, 3, 3, 7.859, 4, 4, 4.505, 5, 4, 4.101, 6, 5, 3.064, 7, 5, 2.858, 8, 6, 2.382, 9, 6, 2.253, 10, 6, 2.137, 11, 7, 1.897, 12, 7, 1.814, 13, 7, 1.738, 14, 8, 1.599, 15, 8, 1.540, 16, 8, 1.485, 17, 8, 1.434, 18, 9, 1.354, 19, 9, 1.311, 20, 10, 1.253, 21, 10, 1.218, 22, 10, 1.184, 23, 11, 1.143, 24, 11, 1.114, 25, 11, 1.087, 26, 11, 1.060, 27, 11, 1.035, 28, 12, 1.010 ) %>% rowwise() %>% mutate(r_calc = hk_ext_z_j_opt(n, 0.90, 0.95)$j) %>% mutate(k_calc = hk_ext_z_j_opt(n, 0.90, 0.95)$z) ``` ## 7.6 Nonparametric B-Basis Binomial Rank {#pf-npbinom} CMH-17-1G Table 8.5.12 provides factors for computing B-Basis values using the nonparametric binomial rank method. Part of that table is reproduced below and compared with the results of `cmstatr`. The results of `cmstatr` are similar to the published values. A more complete comparison is performed in the units tests of the `cmstatr` package. ```{r} tribble( ~n, ~rb, 29, 1, 46, 2, 61, 3, 76, 4, 89, 5, 103, 6, 116, 7, 129, 8, 142, 9, 154, 10, 167, 11, 179, 12, 191, 13, 203, 14 ) %>% rowwise() %>% mutate(r_calc = nonpara_binomial_rank(n, 0.9, 0.95)) %>% mutate(test = expect_equal(rb, r_calc)) %>% select(-c(test)) ``` ## 7.7 Nonparametric A-Basis Binomial Rank {#pf-npbinom2} CMH-17-1G Table 8.5.13 provides factors for computing B-Basis values using the nonparametric binomial rank method. Part of that table is reproduced below and compared with the results of `cmstatr`. The results of `cmstatr` are similar to the published values. A more complete comparison is performed in the units tests of the `cmstatr` package. ```{r} tribble( ~n, ~ra, 299, 1, 473, 2, 628, 3, 773, 4, 913, 5 ) %>% rowwise() %>% mutate(r_calc = nonpara_binomial_rank(n, 0.99, 0.95)) %>% mutate(test = expect_equal(ra, r_calc)) %>% select(-c(test)) ``` ## 7.8 Factors for Equivalency {#pf-equiv} Vangel's 2002 paper provides factors for calculating limits for sample mean and sample extremum for various values of $\alpha$ and sample size ($n$). A subset of those factors are reproduced below and compared with results from `cmstatr`. The results are very similar for values of $\alpha$ and $n$ that are common for composite materials. ```{r} read.csv(system.file("extdata", "k1.vangel.csv", package = "cmstatr")) %>% gather(n, k1, X2:X10) %>% mutate(n = as.numeric(substring(n, 2))) %>% inner_join( read.csv(system.file("extdata", "k2.vangel.csv", package = "cmstatr")) %>% gather(n, k2, X2:X10) %>% mutate(n = as.numeric(substring(n, 2))), by = c("n" = "n", "alpha" = "alpha") ) %>% filter(n >= 5 & (alpha == 0.01 | alpha == 0.05)) %>% group_by(n, alpha) %>% nest() %>% mutate(equiv = map2(alpha, n, ~k_equiv(.x, .y))) %>% mutate(k1_calc = map(equiv, function(e) e[1]), k2_calc = map(equiv, function(e) e[2])) %>% select(-c(equiv)) %>% unnest(cols = c(data, k1_calc, k2_calc)) %>% mutate(check = expect_equal(k1, k1_calc, tolerance = 0.0001)) %>% select(-c(check)) %>% mutate(check = expect_equal(k2, k2_calc, tolerance = 0.0001)) %>% select(-c(check)) ``` # 8. Session Info This copy of this vignette was build on the following system. ```{r} sessionInfo() ``` # 9. References cmstatr/inst/doc/cmstatr_Validation.R0000644000176200001440000007125714574576016017456 0ustar liggesusers## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----message=FALSE, warning=FALSE--------------------------------------------- library(cmstatr) library(dplyr) library(purrr) library(tidyr) library(testthat) ## ----------------------------------------------------------------------------- expect_equal(10, 10.1, tolerance = 0.01) ## ----------------------------------------------------------------------------- head(carbon.fabric) ## ----------------------------------------------------------------------------- res <- carbon.fabric %>% filter(test == "WT") %>% filter(condition == "RTD") %>% ad_ksample(strength, batch) expect_equal(res$ad / (res$k - 1), 0.456, tolerance = 0.002) expect_false(res$reject_same_dist) res ## ----------------------------------------------------------------------------- res <- carbon.fabric %>% filter(test == "WT") %>% filter(condition == "ETW") %>% ad_ksample(strength, batch) expect_equal(res$ad / (res$k - 1), 1.604, tolerance = 0.002) expect_false(res$reject_same_dist) res ## ----------------------------------------------------------------------------- dat_8_3_11_1_1 <- tribble( ~batch, ~strength, ~condition, 1, 118.3774604, "CTD", 1, 84.9581364, "RTD", 1, 83.7436035, "ETD", 1, 123.6035612, "CTD", 1, 92.4891822, "RTD", 1, 84.3831677, "ETD", 1, 115.2238092, "CTD", 1, 96.8212659, "RTD", 1, 94.8030433, "ETD", 1, 112.6379744, "CTD", 1, 109.030325, "RTD", 1, 94.3931537, "ETD", 1, 116.5564277, "CTD", 1, 97.8212659, "RTD", 1, 101.702222, "ETD", 1, 123.1649896, "CTD", 1, 100.921519, "RTD", 1, 86.5372121, "ETD", 2, 128.5589027, "CTD", 1, 103.699444, "RTD", 1, 92.3772684, "ETD", 2, 113.1462103, "CTD", 2, 93.7908212, "RTD", 2, 89.2084024, "ETD", 2, 121.4248107, "CTD", 2, 107.526709, "RTD", 2, 100.686001, "ETD", 2, 134.3241906, "CTD", 2, 94.5769704, "RTD", 2, 81.0444192, "ETD", 2, 129.6405117, "CTD", 2, 93.8831373, "RTD", 2, 91.3398070, "ETD", 2, 117.9818658, "CTD", 2, 98.2296605, "RTD", 2, 93.1441939, "ETD", 3, 115.4505226, "CTD", 2, 111.346590, "RTD", 2, 85.8204168, "ETD", 3, 120.0369467, "CTD", 2, 100.817538, "RTD", 3, 94.8966273, "ETD", 3, 117.1631088, "CTD", 3, 100.382203, "RTD", 3, 95.8068520, "ETD", 3, 112.9302797, "CTD", 3, 91.5037811, "RTD", 3, 86.7842252, "ETD", 3, 117.9114501, "CTD", 3, 100.083233, "RTD", 3, 94.4011973, "ETD", 3, 120.1900159, "CTD", 3, 95.6393615, "RTD", 3, 96.7231171, "ETD", 3, 110.7295966, "CTD", 3, 109.304779, "RTD", 3, 89.9010384, "ETD", 3, 100.078562, "RTD", 3, 99.1205847, "RTD", 3, 89.3672306, "ETD", 1, 106.357525, "ETW", 1, 99.0239966, "ETW2", 1, 105.898733, "ETW", 1, 103.341238, "ETW2", 1, 88.4640082, "ETW", 1, 100.302130, "ETW2", 1, 103.901744, "ETW", 1, 98.4634133, "ETW2", 1, 80.2058219, "ETW", 1, 92.2647280, "ETW2", 1, 109.199597, "ETW", 1, 103.487693, "ETW2", 1, 61.0139431, "ETW", 1, 113.734763, "ETW2", 2, 99.3207107, "ETW", 2, 108.172659, "ETW2", 2, 115.861770, "ETW", 2, 108.426732, "ETW2", 2, 82.6133082, "ETW", 2, 116.260375, "ETW2", 2, 85.3690411, "ETW", 2, 121.049610, "ETW2", 2, 115.801622, "ETW", 2, 111.223082, "ETW2", 2, 44.3217741, "ETW", 2, 104.574843, "ETW2", 2, 117.328077, "ETW", 2, 103.222552, "ETW2", 2, 88.6782903, "ETW", 3, 99.3918538, "ETW2", 3, 107.676986, "ETW", 3, 87.3421658, "ETW2", 3, 108.960241, "ETW", 3, 102.730741, "ETW2", 3, 116.122640, "ETW", 3, 96.3694916, "ETW2", 3, 80.2334815, "ETW", 3, 99.5946088, "ETW2", 3, 106.145570, "ETW", 3, 97.0712407, "ETW2", 3, 104.667866, "ETW", 3, 104.234953, "ETW" ) dat_8_3_11_1_1 ## ----------------------------------------------------------------------------- res <- dat_8_3_11_1_1 %>% filter(condition == "ETW" & batch == 2) %>% maximum_normed_residual(strength, alpha = 0.05) expect_equal(res$mnr, 2.008, tolerance = 0.001) expect_equal(res$crit, 2.127, tolerance = 0.001) expect_equal(res$n_outliers, 0) res ## ----------------------------------------------------------------------------- res <- dat_8_3_11_1_1 %>% filter(condition == "ETW" & batch == 3) %>% maximum_normed_residual(strength, alpha = 0.05) expect_equal(res$mnr, 2.119, tolerance = 0.001) expect_equal(res$crit, 2.020, tolerance = 0.001) expect_equal(res$n_outliers, 1) res ## ----------------------------------------------------------------------------- res <- dat_8_3_11_1_1 %>% filter(condition == "ETW") %>% ad_ksample(strength, batch) expect_equal(res$ad / (res$k - 1), 0.793, tolerance = 0.003) expect_false(res$reject_same_dist) res ## ----------------------------------------------------------------------------- res <- dat_8_3_11_1_1 %>% filter(condition == "ETW2") %>% ad_ksample(strength, batch) expect_equal(res$ad / (res$k - 1), 3.024, tolerance = 0.001) expect_true(res$reject_same_dist) res ## ----------------------------------------------------------------------------- res <- dat_8_3_11_1_1 %>% filter(condition == "ETW") %>% anderson_darling_normal(strength) expect_equal(res$osl, 0.006051, tolerance = 0.001) res ## ----------------------------------------------------------------------------- res <- dat_8_3_11_1_1 %>% filter(condition == "ETW") %>% anderson_darling_lognormal(strength) expect_equal(res$osl, 0.000307, tolerance = 0.001) res ## ----------------------------------------------------------------------------- res <- dat_8_3_11_1_1 %>% filter(condition == "ETW") %>% anderson_darling_weibull(strength) expect_equal(res$osl, 0.0219, tolerance = 0.002) res ## ----------------------------------------------------------------------------- res <- dat_8_3_11_1_1 %>% filter(condition != "ETW" & condition != "ETW2") %>% levene_test(strength, condition) expect_equal(res$f, 0.058, tolerance = 0.01) res ## ----------------------------------------------------------------------------- res <- dat_8_3_11_1_1 %>% filter(condition == "ETW2") %>% levene_test(strength, batch) expect_equal(res$f, 0.123, tolerance = 0.005) res ## ----------------------------------------------------------------------------- res <- dat_8_3_11_1_1 %>% filter(condition == "CTD") %>% levene_test(strength, batch) expect_equal(res$f, 3.850, tolerance = 0.005) res ## ----------------------------------------------------------------------------- res <- dat_8_3_11_1_1 %>% filter(condition == "ETW") %>% basis_hk_ext(strength, method = "woodward-frawley", p = 0.99, conf = 0.95, override = "all") expect_equal(res$basis, 13.0, tolerance = 0.001) res ## ----------------------------------------------------------------------------- res <- dat_8_3_11_1_1 %>% filter(condition == "ETW") %>% basis_hk_ext(strength, method = "optimum-order", p = 0.90, conf = 0.95, override = "all") expect_equal(res$basis, 37.9, tolerance = 0.001) res ## ----------------------------------------------------------------------------- res <- dat_8_3_11_1_1 %>% filter(condition == "ETW2") %>% basis_anova(strength, batch, override = "number_of_groups", p = 0.99, conf = 0.95) expect_equal(res$basis, 34.6, tolerance = 0.001) res ## ----------------------------------------------------------------------------- res <- dat_8_3_11_1_1 %>% filter(condition == "ETW2") %>% basis_anova(strength, batch, override = "number_of_groups") expect_equal(res$basis, 63.2, tolerance = 0.001) res ## ----------------------------------------------------------------------------- dat_8_3_11_1_2 <- tribble( ~batch, ~strength, ~condition, 1, 79.04517, "CTD", 1, 103.2006, "RTD", 1, 63.22764, "ETW", 1, 54.09806, "ETW2", 1, 102.6014, "CTD", 1, 105.1034, "RTD", 1, 70.84454, "ETW", 1, 58.87615, "ETW2", 1, 97.79372, "CTD", 1, 105.1893, "RTD", 1, 66.43223, "ETW", 1, 61.60167, "ETW2", 1, 92.86423, "CTD", 1, 100.4189, "RTD", 1, 75.37771, "ETW", 1, 60.23973, "ETW2", 1, 117.218, "CTD", 2, 85.32319, "RTD", 1, 72.43773, "ETW", 1, 61.4808, "ETW2", 1, 108.7168, "CTD", 2, 92.69923, "RTD", 1, 68.43073, "ETW", 1, 64.55832, "ETW2", 1, 112.2773, "CTD", 2, 98.45242, "RTD", 1, 69.72524, "ETW", 2, 57.76131, "ETW2", 1, 114.0129, "CTD", 2, 104.1014, "RTD", 2, 66.20343, "ETW", 2, 49.91463, "ETW2", 2, 106.8452, "CTD", 2, 91.51841, "RTD", 2, 60.51251, "ETW", 2, 61.49271, "ETW2", 2, 112.3911, "CTD", 2, 101.3746, "RTD", 2, 65.69334, "ETW", 2, 57.7281, "ETW2", 2, 115.5658, "CTD", 2, 101.5828, "RTD", 2, 62.73595, "ETW", 2, 62.11653, "ETW2", 2, 87.40657, "CTD", 2, 99.57384, "RTD", 2, 59.00798, "ETW", 2, 62.69353, "ETW2", 2, 102.2785, "CTD", 2, 88.84826, "RTD", 2, 62.37761, "ETW", 3, 61.38523, "ETW2", 2, 110.6073, "CTD", 3, 92.18703, "RTD", 3, 64.3947, "ETW", 3, 60.39053, "ETW2", 3, 105.2762, "CTD", 3, 101.8234, "RTD", 3, 72.8491, "ETW", 3, 59.17616, "ETW2", 3, 110.8924, "CTD", 3, 97.68909, "RTD", 3, 66.56226, "ETW", 3, 60.17616, "ETW2", 3, 108.7638, "CTD", 3, 101.5172, "RTD", 3, 66.56779, "ETW", 3, 46.47396, "ETW2", 3, 110.9833, "CTD", 3, 100.0481, "RTD", 3, 66.00123, "ETW", 3, 51.16616, "ETW2", 3, 101.3417, "CTD", 3, 102.0544, "RTD", 3, 59.62108, "ETW", 3, 100.0251, "CTD", 3, 60.61167, "ETW", 3, 57.65487, "ETW", 3, 66.51241, "ETW", 3, 64.89347, "ETW", 3, 57.73054, "ETW", 3, 68.94086, "ETW", 3, 61.63177, "ETW" ) ## ----------------------------------------------------------------------------- res <- basis_pooled_sd(dat_8_3_11_1_2, strength, condition, override = "all") expect_equal(res$basis$value[res$basis$group == "CTD"], 93.64, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "RTD"], 87.30, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "ETW"], 54.33, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "ETW2"], 47.12, tolerance = 0.001) res ## ----------------------------------------------------------------------------- res <- basis_pooled_sd(dat_8_3_11_1_2, strength, condition, p = 0.99, conf = 0.95, override = "all") expect_equal(res$basis$value[res$basis$group == "CTD"], 86.19, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "RTD"], 79.86, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "ETW"], 46.84, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "ETW2"], 39.69, tolerance = 0.001) res ## ----------------------------------------------------------------------------- res <- dat_8_3_11_1_2 %>% filter(condition != "ETW2") %>% basis_pooled_sd(strength, condition, modcv = TRUE, override = "all") expect_equal(res$basis$value[res$basis$group == "CTD"], 92.25, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "RTD"], 85.91, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "ETW"], 52.97, tolerance = 0.001) res ## ----------------------------------------------------------------------------- res <- dat_8_3_11_1_2 %>% filter(condition != "ETW2") %>% basis_pooled_sd(strength, condition, p = 0.99, conf = 0.95, modcv = TRUE, override = "all") expect_equal(res$basis$value[res$basis$group == "CTD"], 83.81, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "RTD"], 77.48, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "ETW"], 44.47, tolerance = 0.001) res ## ----------------------------------------------------------------------------- res <- basis_pooled_cv(dat_8_3_11_1_2, strength, condition, override = "all") expect_equal(res$basis$value[res$basis$group == "CTD"], 90.89, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "RTD"], 85.37, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "ETW"], 56.79, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "ETW2"], 50.55, tolerance = 0.001) res ## ----------------------------------------------------------------------------- res <- basis_pooled_cv(dat_8_3_11_1_2, strength, condition, p = 0.99, conf = 0.95, override = "all") expect_equal(res$basis$value[res$basis$group == "CTD"], 81.62, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "RTD"], 76.67, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "ETW"], 50.98, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "ETW2"], 45.40, tolerance = 0.001) res ## ----------------------------------------------------------------------------- res <- dat_8_3_11_1_2 %>% filter(condition != "ETW2") %>% basis_pooled_cv(strength, condition, modcv = TRUE, override = "all") expect_equal(res$basis$value[res$basis$group == "CTD"], 90.31, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "RTD"], 84.83, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "ETW"], 56.43, tolerance = 0.001) res ## ----------------------------------------------------------------------------- res <- dat_8_3_11_1_2 %>% filter(condition != "ETW2") %>% basis_pooled_cv(strength, condition, modcv = TRUE, p = 0.99, conf = 0.95, override = "all") expect_equal(res$basis$value[res$basis$group == "CTD"], 80.57, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "RTD"], 75.69, tolerance = 0.001) expect_equal(res$basis$value[res$basis$group == "ETW"], 50.33, tolerance = 0.001) res ## ----------------------------------------------------------------------------- dat <- data.frame( strength = c( 137.4438, 139.5395, 150.89, 141.4474, 141.8203, 151.8821, 143.9245, 132.9732, 136.6419, 138.1723, 148.7668, 143.283, 143.5429, 141.7023, 137.4732, 152.338, 144.1589, 128.5218 ) ) res <- anderson_darling_normal(dat, strength) expect_equal(res$osl, 0.465, tolerance = 0.001) res ## ----------------------------------------------------------------------------- dat <- data.frame( strength = c( 137.4438, 139.5395, 150.89, 141.4474, 141.8203, 151.8821, 143.9245, 132.9732, 136.6419, 138.1723, 148.7668, 143.283, 143.5429, 141.7023, 137.4732, 152.338, 144.1589, 128.5218 ) ) res <- anderson_darling_lognormal(dat, strength) expect_equal(res$osl, 0.480, tolerance = 0.001) res ## ----------------------------------------------------------------------------- dat <- data.frame( strength = c( 137.4438, 139.5395, 150.89, 141.4474, 141.8203, 151.8821, 143.9245, 132.9732, 136.6419, 138.1723, 148.7668, 143.283, 143.5429, 141.7023, 137.4732, 152.338, 144.1589, 128.5218 ) ) res <- anderson_darling_weibull(dat, strength) expect_equal(res$osl, 0.179, tolerance = 0.002) res ## ----------------------------------------------------------------------------- dat <- c( 137.4438, 139.5395, 150.8900, 141.4474, 141.8203, 151.8821, 143.9245, 132.9732, 136.6419, 138.1723, 148.7668, 143.2830, 143.5429, 141.7023, 137.4732, 152.3380, 144.1589, 128.5218 ) res <- basis_normal(x = dat, p = 0.99, conf = 0.95, override = "all") expect_equal(res$basis, 120.336, tolerance = 0.0005) res ## ----------------------------------------------------------------------------- res <- basis_normal(x = dat, p = 0.9, conf = 0.95, override = "all") expect_equal(res$basis, 129.287, tolerance = 0.0005) res ## ----------------------------------------------------------------------------- dat <- c( 137.4438, 139.5395, 150.8900, 141.4474, 141.8203, 151.8821, 143.9245, 132.9732, 136.6419, 138.1723, 148.7668, 143.2830, 143.5429, 141.7023, 137.4732, 152.3380, 144.1589, 128.5218 ) res <- basis_lognormal(x = dat, p = 0.99, conf = 0.95, override = "all") expect_equal(res$basis, 121.710, tolerance = 0.0005) res ## ----------------------------------------------------------------------------- res <- basis_lognormal(x = dat, p = 0.9, conf = 0.95, override = "all") expect_equal(res$basis, 129.664, tolerance = 0.0005) res ## ----------------------------------------------------------------------------- dat <- c( 137.4438, 139.5395, 150.8900, 141.4474, 141.8203, 151.8821, 143.9245, 132.9732, 136.6419, 138.1723, 148.7668, 143.2830, 143.5429, 141.7023, 137.4732, 152.3380, 144.1589, 128.5218 ) res <- basis_weibull(x = dat, p = 0.99, conf = 0.95, override = "all") expect_equal(res$basis, 109.150, tolerance = 0.005) res ## ----------------------------------------------------------------------------- res <- basis_weibull(x = dat, p = 0.9, conf = 0.95, override = "all") expect_equal(res$basis, 125.441, tolerance = 0.005) res ## ----------------------------------------------------------------------------- dat <- c( 137.4438, 139.5395, 150.8900, 141.4474, 141.8203, 151.8821, 143.9245, 132.9732, 136.6419, 138.1723, 148.7668, 143.2830, 143.5429, 141.7023, 137.4732, 152.3380, 144.1589, 128.5218 ) res <- basis_hk_ext(x = dat, p = 0.99, conf = 0.95, method = "woodward-frawley", override = "all") expect_equal(res$basis, 99.651, tolerance = 0.005) res ## ----------------------------------------------------------------------------- res <- basis_hk_ext(x = dat, p = 0.9, conf = 0.95, method = "optimum-order", override = "all") expect_equal(res$basis, 124.156, tolerance = 0.005) res ## ----------------------------------------------------------------------------- dat <- c( 139.6734, 143.0032, 130.4757, 144.8327, 138.7818, 136.7693, 148.636, 131.0095, 131.4933, 142.8856, 158.0198, 145.2271, 137.5991, 139.8298, 140.8557, 137.6148, 131.3614, 152.7795, 145.8792, 152.9207, 160.0989, 145.1920, 128.6383, 141.5992, 122.5297, 159.8209, 151.6720, 159.0156 ) ## ----------------------------------------------------------------------------- res <- basis_hk_ext(x = dat, p = 0.9, conf = 0.95, method = "optimum-order", override = "all") expect_equal(res$basis, 122.36798, tolerance = 0.001) res ## ----------------------------------------------------------------------------- res <- basis_hk_ext(x = head(dat, 26), p = 0.9, conf = 0.95, method = "optimum-order", override = "all") expect_equal(res$basis, 121.57073, tolerance = 0.001) res ## ----------------------------------------------------------------------------- res <- basis_hk_ext(x = head(dat, 22), p = 0.9, conf = 0.95, method = "optimum-order", override = "all") expect_equal(res$basis, 128.82397, tolerance = 0.001) res ## ----------------------------------------------------------------------------- dat <- c( 137.3603, 135.6665, 136.6914, 154.7919, 159.2037, 137.3277, 128.821, 138.6304, 138.9004, 147.4598, 148.6622, 144.4948, 131.0851, 149.0203, 131.8232, 146.4471, 123.8124, 126.3105, 140.7609, 134.4875, 128.7508, 117.1854, 129.3088, 141.6789, 138.4073, 136.0295, 128.4164, 141.7733, 134.455, 122.7383, 136.9171, 136.9232, 138.8402, 152.8294, 135.0633, 121.052, 131.035, 138.3248, 131.1379, 147.3771, 130.0681, 132.7467, 137.1444, 141.662, 146.9363, 160.7448, 138.5511, 129.1628, 140.2939, 144.8167, 156.5918, 132.0099, 129.3551, 136.6066, 134.5095, 128.2081, 144.0896, 141.8029, 130.0149, 140.8813, 137.7864 ) res <- basis_nonpara_large_sample(x = dat, p = 0.9, conf = 0.95, override = "all") expect_equal(res$basis, 122.738297, tolerance = 0.005) res ## ----------------------------------------------------------------------------- res <- equiv_mean_extremum(alpha = 0.05, mean_qual = 141.310, sd_qual = 6.415, n_sample = 9) expect_equal(res$threshold_min_indiv, 123.725, tolerance = 0.001) expect_equal(res$threshold_mean, 137.197, tolerance = 0.001) res ## ----------------------------------------------------------------------------- res <- equiv_mean_extremum(alpha = 0.05, mean_qual = 141.310, sd_qual = 6.415, n_sample = 9, modcv = TRUE) expect_equal(res$threshold_min_indiv, 117.024, tolerance = 0.001) expect_equal(res$threshold_mean, 135.630, tolerance = 0.001) res ## ----------------------------------------------------------------------------- res <- equiv_change_mean(alpha = 0.05, n_sample = 9, mean_sample = 9.02, sd_sample = 0.15785, n_qual = 28, mean_qual = 9.24, sd_qual = 0.162) expect_equal(res$threshold, c(9.115, 9.365), tolerance = 0.001) res ## ----------------------------------------------------------------------------- res <- equiv_change_mean(alpha = 0.05, n_sample = 9, mean_sample = 9.02, sd_sample = 0.15785, n_qual = 28, mean_qual = 9.24, sd_qual = 0.162, modcv = TRUE) expect_equal(res$threshold, c(8.857, 9.623), tolerance = 0.001) res ## ----------------------------------------------------------------------------- dat_ss1987 <- data.frame( smoothness = c( 38.7, 41.5, 43.8, 44.5, 45.5, 46.0, 47.7, 58.0, 39.2, 39.3, 39.7, 41.4, 41.8, 42.9, 43.3, 45.8, 34.0, 35.0, 39.0, 40.0, 43.0, 43.0, 44.0, 45.0, 34.0, 34.8, 34.8, 35.4, 37.2, 37.8, 41.2, 42.8 ), lab = c(rep("A", 8), rep("B", 8), rep("C", 8), rep("D", 8)) ) dat_ss1987 ## ----------------------------------------------------------------------------- res <- ad_ksample(dat_ss1987, smoothness, lab) expect_equal(res$ad, 8.3926, tolerance = 0.001) expect_equal(res$sigma, 1.2038, tolerance = 0.001) expect_equal(res$p, 0.00226, tolerance = 0.01) res ## ----------------------------------------------------------------------------- tribble( ~n, ~kB_published, 2, 20.581, 36, 1.725, 70, 1.582, 104, 1.522, 3, 6.157, 37, 1.718, 71, 1.579, 105, 1.521, 4, 4.163, 38, 1.711, 72, 1.577, 106, 1.519, 5, 3.408, 39, 1.704, 73, 1.575, 107, 1.518, 6, 3.007, 40, 1.698, 74, 1.572, 108, 1.517, 7, 2.756, 41, 1.692, 75, 1.570, 109, 1.516, 8, 2.583, 42, 1.686, 76, 1.568, 110, 1.515, 9, 2.454, 43, 1.680, 77, 1.566, 111, 1.513, 10, 2.355, 44, 1.675, 78, 1.564, 112, 1.512, 11, 2.276, 45, 1.669, 79, 1.562, 113, 1.511, 12, 2.211, 46, 1.664, 80, 1.560, 114, 1.510, 13, 2.156, 47, 1.660, 81, 1.558, 115, 1.509, 14, 2.109, 48, 1.655, 82, 1.556, 116, 1.508, 15, 2.069, 49, 1.650, 83, 1.554, 117, 1.507, 16, 2.034, 50, 1.646, 84, 1.552, 118, 1.506, 17, 2.002, 51, 1.642, 85, 1.551, 119, 1.505, 18, 1.974, 52, 1.638, 86, 1.549, 120, 1.504, 19, 1.949, 53, 1.634, 87, 1.547, 121, 1.503, 20, 1.927, 54, 1.630, 88, 1.545, 122, 1.502, 21, 1.906, 55, 1.626, 89, 1.544, 123, 1.501, 22, 1.887, 56, 1.623, 90, 1.542, 124, 1.500, 23, 1.870, 57, 1.619, 91, 1.540, 125, 1.499, 24, 1.854, 58, 1.616, 92, 1.539, 126, 1.498, 25, 1.839, 59, 1.613, 93, 1.537, 127, 1.497, 26, 1.825, 60, 1.609, 94, 1.536, 128, 1.496, 27, 1.812, 61, 1.606, 95, 1.534, 129, 1.495, 28, 1.800, 62, 1.603, 96, 1.533, 130, 1.494, 29, 1.789, 63, 1.600, 97, 1.531, 131, 1.493, 30, 1.778, 64, 1.597, 98, 1.530, 132, 1.492, 31, 1.768, 65, 1.595, 99, 1.529, 133, 1.492, 32, 1.758, 66, 1.592, 100, 1.527, 134, 1.491, 33, 1.749, 67, 1.589, 101, 1.526, 135, 1.490, 34, 1.741, 68, 1.587, 102, 1.525, 136, 1.489, 35, 1.733, 69, 1.584, 103, 1.523, 137, 1.488 ) %>% arrange(n) %>% mutate(kB_cmstatr = k_factor_normal(n, p = 0.9, conf = 0.95)) %>% rowwise() %>% mutate(diff = expect_equal(kB_published, kB_cmstatr, tolerance = 0.001)) %>% select(-c(diff)) ## ----------------------------------------------------------------------------- tribble( ~n, ~kA_published, 2, 37.094, 36, 2.983, 70, 2.765, 104, 2.676, 3, 10.553, 37, 2.972, 71, 2.762, 105, 2.674, 4, 7.042, 38, 2.961, 72, 2.758, 106, 2.672, 5, 5.741, 39, 2.951, 73, 2.755, 107, 2.671, 6, 5.062, 40, 2.941, 74, 2.751, 108, 2.669, 7, 4.642, 41, 2.932, 75, 2.748, 109, 2.667, 8, 4.354, 42, 2.923, 76, 2.745, 110, 2.665, 9, 4.143, 43, 2.914, 77, 2.742, 111, 2.663, 10, 3.981, 44, 2.906, 78, 2.739, 112, 2.662, 11, 3.852, 45, 2.898, 79, 2.736, 113, 2.660, 12, 3.747, 46, 2.890, 80, 2.733, 114, 2.658, 13, 3.659, 47, 2.883, 81, 2.730, 115, 2.657, 14, 3.585, 48, 2.876, 82, 2.727, 116, 2.655, 15, 3.520, 49, 2.869, 83, 2.724, 117, 2.654, 16, 3.464, 50, 2.862, 84, 2.721, 118, 2.652, 17, 3.414, 51, 2.856, 85, 2.719, 119, 2.651, 18, 3.370, 52, 2.850, 86, 2.716, 120, 2.649, 19, 3.331, 53, 2.844, 87, 2.714, 121, 2.648, 20, 3.295, 54, 2.838, 88, 2.711, 122, 2.646, 21, 3.263, 55, 2.833, 89, 2.709, 123, 2.645, 22, 3.233, 56, 2.827, 90, 2.706, 124, 2.643, 23, 3.206, 57, 2.822, 91, 2.704, 125, 2.642, 24, 3.181, 58, 2.817, 92, 2.701, 126, 2.640, 25, 3.158, 59, 2.812, 93, 2.699, 127, 2.639, 26, 3.136, 60, 2.807, 94, 2.697, 128, 2.638, 27, 3.116, 61, 2.802, 95, 2.695, 129, 2.636, 28, 3.098, 62, 2.798, 96, 2.692, 130, 2.635, 29, 3.080, 63, 2.793, 97, 2.690, 131, 2.634, 30, 3.064, 64, 2.789, 98, 2.688, 132, 2.632, 31, 3.048, 65, 2.785, 99, 2.686, 133, 2.631, 32, 3.034, 66, 2.781, 100, 2.684, 134, 2.630, 33, 3.020, 67, 2.777, 101, 2.682, 135, 2.628, 34, 3.007, 68, 2.773, 102, 2.680, 136, 2.627, 35, 2.995, 69, 2.769, 103, 2.678, 137, 2.626 ) %>% arrange(n) %>% mutate(kA_cmstatr = k_factor_normal(n, p = 0.99, conf = 0.95)) %>% rowwise() %>% mutate(diff = expect_equal(kA_published, kA_cmstatr, tolerance = 0.001)) %>% select(-c(diff)) ## ----------------------------------------------------------------------------- tribble( ~n, ~z, 3, 28.820048, 5, 6.1981307, 7, 3.4780112, 9, 2.5168762, 11, 2.0312134, 13, 1.7377374, 15, 1.5403989, 17, 1.3979806, 19, 1.2899172, 21, 1.2048089, 23, 1.1358259, 25, 1.0786237, 27, 1.0303046, ) %>% rowwise() %>% mutate( z_calc = hk_ext_z(n, 1, ceiling(n / 2), p = 0.90, conf = 0.95) ) %>% mutate(diff = expect_equal(z, z_calc, tolerance = 0.0001)) %>% select(-c(diff)) ## ----------------------------------------------------------------------------- tribble( ~n, ~k, 2, 80.0038, 4, 9.49579, 6, 5.57681, 8, 4.25011, 10, 3.57267, 12, 3.1554, 14, 2.86924, 16, 2.65889, 18, 2.4966, 20, 2.36683, 25, 2.131, 30, 1.96975, 35, 1.85088, 40, 1.75868, 45, 1.68449, 50, 1.62313, 60, 1.5267, 70, 1.45352, 80, 1.39549, 90, 1.34796, 100, 1.30806, 120, 1.24425, 140, 1.19491, 160, 1.15519, 180, 1.12226, 200, 1.09434, 225, 1.06471, 250, 1.03952, 275, 1.01773 ) %>% rowwise() %>% mutate(z_calc = hk_ext_z(n, 1, n, 0.99, 0.95)) %>% mutate(diff = expect_lt(abs(k - z_calc), 0.0001)) %>% select(-c(diff)) ## ----------------------------------------------------------------------------- tribble( ~n, ~r, ~k, 2, 2, 35.177, 3, 3, 7.859, 4, 4, 4.505, 5, 4, 4.101, 6, 5, 3.064, 7, 5, 2.858, 8, 6, 2.382, 9, 6, 2.253, 10, 6, 2.137, 11, 7, 1.897, 12, 7, 1.814, 13, 7, 1.738, 14, 8, 1.599, 15, 8, 1.540, 16, 8, 1.485, 17, 8, 1.434, 18, 9, 1.354, 19, 9, 1.311, 20, 10, 1.253, 21, 10, 1.218, 22, 10, 1.184, 23, 11, 1.143, 24, 11, 1.114, 25, 11, 1.087, 26, 11, 1.060, 27, 11, 1.035, 28, 12, 1.010 ) %>% rowwise() %>% mutate(r_calc = hk_ext_z_j_opt(n, 0.90, 0.95)$j) %>% mutate(k_calc = hk_ext_z_j_opt(n, 0.90, 0.95)$z) ## ----------------------------------------------------------------------------- tribble( ~n, ~rb, 29, 1, 46, 2, 61, 3, 76, 4, 89, 5, 103, 6, 116, 7, 129, 8, 142, 9, 154, 10, 167, 11, 179, 12, 191, 13, 203, 14 ) %>% rowwise() %>% mutate(r_calc = nonpara_binomial_rank(n, 0.9, 0.95)) %>% mutate(test = expect_equal(rb, r_calc)) %>% select(-c(test)) ## ----------------------------------------------------------------------------- tribble( ~n, ~ra, 299, 1, 473, 2, 628, 3, 773, 4, 913, 5 ) %>% rowwise() %>% mutate(r_calc = nonpara_binomial_rank(n, 0.99, 0.95)) %>% mutate(test = expect_equal(ra, r_calc)) %>% select(-c(test)) ## ----------------------------------------------------------------------------- read.csv(system.file("extdata", "k1.vangel.csv", package = "cmstatr")) %>% gather(n, k1, X2:X10) %>% mutate(n = as.numeric(substring(n, 2))) %>% inner_join( read.csv(system.file("extdata", "k2.vangel.csv", package = "cmstatr")) %>% gather(n, k2, X2:X10) %>% mutate(n = as.numeric(substring(n, 2))), by = c("n" = "n", "alpha" = "alpha") ) %>% filter(n >= 5 & (alpha == 0.01 | alpha == 0.05)) %>% group_by(n, alpha) %>% nest() %>% mutate(equiv = map2(alpha, n, ~k_equiv(.x, .y))) %>% mutate(k1_calc = map(equiv, function(e) e[1]), k2_calc = map(equiv, function(e) e[2])) %>% select(-c(equiv)) %>% unnest(cols = c(data, k1_calc, k2_calc)) %>% mutate(check = expect_equal(k1, k1_calc, tolerance = 0.0001)) %>% select(-c(check)) %>% mutate(check = expect_equal(k2, k2_calc, tolerance = 0.0001)) %>% select(-c(check)) ## ----------------------------------------------------------------------------- sessionInfo() cmstatr/inst/doc/adktest.Rmd0000644000176200001440000001003614477217317015571 0ustar liggesusers--- title: "Anderson-Darling k-Sample Test" author: "Stefan Kloppenborg, Jeffrey Borlik" date: "20-Jan-2019" output: rmarkdown::html_vignette bibliography: bibliography.json csl: ieee.csl vignette: > %\VignetteIndexEntry{Anderson-Darling k-Sample Test} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` This vignette explores the Anderson--Darling k-Sample test. CMH-17-1G [@CMH-17-1G] provides a formulation for this test that appears different than the formulation given by Scholz and Stephens in their 1987 paper [@Stephens1987]. Both references use different nomenclature, which is summarized as follows: Term | CMH-17-1G | Scholz and Stephens ---------------------------------------------------|-----------------------|--------------------- A sample | $i$ | $i$ The number of samples | $k$ | $k$ An observation within a sample | $j$ | $j$ The number of observations within the sample $i$ | $n_i$ | $n_i$ The total number of observations within all samples| $n$ | $N$ Distinct values in combined data, ordered | $z_{(1)}$...$z_{(L)}$ | $Z_1^*$...$Z_L^*$ The number of distinct values in the combined data | $L$ | $L$ Given the possibility of ties in the data, the discrete version of the test must be used Scholz and Stephens (1987) give the test statistic as: $$ A_{a k N}^2 = \frac{N - 1}{N}\sum_{i=1}^k \frac{1}{n_i}\sum_{j=1}^{L}\frac{l_j}{N}\frac{\left(N M_{a i j} - n_i B_{a j}\right)^2}{B_{a j}\left(N - B_{a j}\right) - N l_j / 4} $$ CMH-17-1G gives the test statistic as: $$ ADK = \frac{n - 1}{n^2\left(k - 1\right)}\sum_{i=1}^k\frac{1}{n_i}\sum_{j=1}^L h_j \frac{\left(n F_{i j} - n_i H_j\right)^2}{H_j \left(n - H_j\right) - n h_j / 4} $$ By inspection, the CMH-17-1G version of this test statistic contains an extra factor of $\frac{1}{\left(k - 1\right)}$. Scholz and Stephens indicate that one rejects $H_0$ at a significance level of $\alpha$ when: $$ \frac{A_{a k N}^2 - \left(k - 1\right)}{\sigma_N} \ge t_{k - 1}\left(\alpha\right) $$ This can be rearranged to give a critical value: $$ A_{c r i t}^2 = \left(k - 1\right) + \sigma_N t_{k - 1}\left(\alpha\right) $$ CHM-17-1G gives the critical value for $ADK$ for $\alpha=0.025$ as: $$ ADC = 1 + \sigma_n \left(1.96 + \frac{1.149}{\sqrt{k - 1}} - \frac{0.391}{k - 1}\right) $$ The definition of $\sigma_n$ from the two sources differs by a factor of $\left(k - 1\right)$. The value in parentheses in the CMH-17-1G critical value corresponds to the interpolation formula for $t_m\left(\alpha\right)$ given in Scholz and Stephen's paper. It should be noted that this is *not* the student's t-distribution, but rather a distribution referred to as the $T_m$ distribution. The `cmstatr` package use the package `kSamples` to perform the k-sample Anderson--Darling tests. This package uses the original formulation from Scholz and Stephens, so the test statistic will differ from that given software based on the CMH-17-1G formulation by a factor of $\left(k-1\right)$. For comparison, [SciPy's implementation](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.anderson_ksamp.html) also uses the original Scholz and Stephens formulation. The statistic that it returns, however, is the normalized statistic, $\left[A_{a k N}^2 - \left(k - 1\right)\right] / \sigma_N$, rather than `kSamples`'s $A_{a k N}^2$ value. To be consistent, SciPy also returns the critical values $t_{k-1}(\alpha)$ directly. (Currently, SciPy also floors/caps the returned p-value at 0.1% / 25%.) The values of $k$ and $\sigma_N$ are available in `cmstatr`'s `ad_ksample` return value, if an exact comparison to Python SciPy is necessary. The conclusions about the null hypothesis drawn, however, will be the same, whether R or CMH-17-1G or SciPy. # References cmstatr/inst/doc/hk_ext.html0000644000176200001440000046474414574576017015664 0ustar liggesusers Extended Hanson-Koopmans

Extended Hanson-Koopmans

Stefan Kloppenborg

2021-09-30

In this vignette, we’ll use the following packages:

library(dplyr)
library(ggplot2)
library(purrr)
library(tidyr)

The Extended Hanson–Koopmans method is a nonparametric method of determining tolerance limits (such as A- or B-Basis values). This method does not assume any particular distribution, but does require that \(-\log\left(F\right)\) is convex, where \(F\) is the cumulative distribution function (CDF) of the distribution.

The functions kh_ext_z, hk_ext_z_j_opt and basis_kh_ext in cmstatr are based on the Extended Hanson–Koopmans method, developed by Vangel [1]. This is an extension of the method published in [2].

Tolerance limits (Basis values) calculated using the Extended Hanson–Koopmans method are calculated based on two order statistics 1, \(i\) and \(j\), and a factor, \(z\). The function hk_ext_z_j_opt and the function basis_kh_ext (with method = "optimum-order") set the first of these order statistics to the first (lowest) order statistic, and a second order statistic determined by minimizing the following function:

\[ \left| z E\left(X_{\left(1\right)}\right) + \left(1 - z\right) E\left(X_{\left(j\right)}\right) - \Phi\left(p\right)\right| \]

Where \(E\left(X_{(i)}\right)\) is the expected value of the \(i\)th order statistic for a sample drawn from the standard normal distribution, and \(\Phi\left(p\right)\) is the CDF of a standard normal distribution for the content of the tolerance limit (i.e. \(p=0.9\) for B-Basis).

The value of \(z\) is calculated based on the sample size, \(n\), the two order statistics \(i\) and \(j\), the content \(p\) and the confidence. The calculation is performed using the method in [1] and implemented in kh_ext_z. The value of \(j\) is very sensitive to the way that the expected value of the order statistics is calculated, and may be sensitive to numerical precision.

In version 0.8.0 of cmstatr and prior, the expected value of an order statistic for a sample drawn from a standard normal distribution was determined in a crude way. After version 0.8.0, the method in [3] is used. These method produce different values of \(j\) for certain sample sizes. Additionally, a table of \(j\) and \(z\) values for various sample sizes is published in CMH-17-1G2 [4]. This table gives slightly different values of \(j\) for some sample sizes.

The values of \(j\) and \(z\) produced by cmstatr in version 0.8.0 and before, the values produced after version 0.8.0 and the value published in CMH-17-1G are shown below. All of these values are for B-Basis (90% content, 95% confidence).

factors <- tribble(
  ~n, ~j_pre_080, ~z_pre_080, ~j_post_080, ~z_post_080, ~j_cmh, ~z_cmh,
  2, 2, 35.1768141883907, 2, 35.1768141883907, 2, 35.177,
  3, 3, 7.85866787768029, 3, 7.85866787768029, 3, 7.859,
  4, 4, 4.50522447199018, 4, 4.50522447199018, 4, 4.505,
  5, 4, 4.10074820079326, 4, 4.10074820079326, 4, 4.101,
  6, 5, 3.06444416024793, 5, 3.06444416024793, 5, 3.064,
  7, 5, 2.85751000593839, 5, 2.85751000593839, 5, 2.858,
  8, 6, 2.38240998122575, 6, 2.38240998122575, 6, 2.382,
  9, 6, 2.25292053841772, 6, 2.25292053841772, 6, 2.253,
  10, 7, 1.98762060673102, 6, 2.13665759924781, 6, 2.137,
  11, 7, 1.89699586212496, 7, 1.89699586212496, 7, 1.897,
  12, 7, 1.81410756892749, 7, 1.81410756892749, 7, 1.814,
  13, 8, 1.66223343216608, 7, 1.73773765993598, 7, 1.738,
  14, 8, 1.59916281901889, 8, 1.59916281901889, 8, 1.599,
  15, 8, 1.54040000806181, 8, 1.54040000806181, 8, 1.54,
  16, 9, 1.44512878109546, 8, 1.48539432060546, 8, 1.485,
  17, 9, 1.39799975474842, 9, 1.39799975474842, 8, 1.434,
  18, 9, 1.35353033609361, 9, 1.35353033609361, 9, 1.354,
  19, 10, 1.28991705486727, 9, 1.31146980117942, 9, 1.311,
  20, 10, 1.25290765871981, 9, 1.27163203813793, 10, 1.253,
  21, 10, 1.21771654027026, 10, 1.21771654027026, 10, 1.218,
  22, 11, 1.17330587650406, 10, 1.18418267046374, 10, 1.184,
  23, 11, 1.14324511741536, 10, 1.15218647199938, 11, 1.143,
  24, 11, 1.11442082880151, 10, 1.12153586685854, 11, 1.114,
  25, 11, 1.08682185727661, 11, 1.08682185727661, 11, 1.087,
  26, 11, 1.06032912052507, 11, 1.06032912052507, 11, 1.06,
  27, 12, 1.03307994274081, 11, 1.03485308510789, 11, 1.035,
  28, 12, 1.00982188136729, 11, 1.01034609051393, 12, 1.01
)

For the sample sizes where \(j\) is the same for each approach, the values of \(z\) are also equal within a small tolerance.

factors %>%
  filter(j_pre_080 == j_post_080 & j_pre_080 == j_cmh)
#> # A tibble: 16 × 7
#>        n j_pre_080 z_pre_080 j_post_080 z_post_080 j_cmh z_cmh
#>    <dbl>     <dbl>     <dbl>      <dbl>      <dbl> <dbl> <dbl>
#>  1     2         2     35.2           2      35.2      2 35.2 
#>  2     3         3      7.86          3       7.86     3  7.86
#>  3     4         4      4.51          4       4.51     4  4.50
#>  4     5         4      4.10          4       4.10     4  4.10
#>  5     6         5      3.06          5       3.06     5  3.06
#>  6     7         5      2.86          5       2.86     5  2.86
#>  7     8         6      2.38          6       2.38     6  2.38
#>  8     9         6      2.25          6       2.25     6  2.25
#>  9    11         7      1.90          7       1.90     7  1.90
#> 10    12         7      1.81          7       1.81     7  1.81
#> 11    14         8      1.60          8       1.60     8  1.60
#> 12    15         8      1.54          8       1.54     8  1.54
#> 13    18         9      1.35          9       1.35     9  1.35
#> 14    21        10      1.22         10       1.22    10  1.22
#> 15    25        11      1.09         11       1.09    11  1.09
#> 16    26        11      1.06         11       1.06    11  1.06

The sample sizes where the value of \(j\) differs are as follows:

factor_diff <- factors %>%
  filter(j_pre_080 != j_post_080 | j_pre_080 != j_cmh | j_post_080 != j_cmh)
factor_diff
#> # A tibble: 11 × 7
#>        n j_pre_080 z_pre_080 j_post_080 z_post_080 j_cmh z_cmh
#>    <dbl>     <dbl>     <dbl>      <dbl>      <dbl> <dbl> <dbl>
#>  1    10         7      1.99          6       2.14     6  2.14
#>  2    13         8      1.66          7       1.74     7  1.74
#>  3    16         9      1.45          8       1.49     8  1.48
#>  4    17         9      1.40          9       1.40     8  1.43
#>  5    19        10      1.29          9       1.31     9  1.31
#>  6    20        10      1.25          9       1.27    10  1.25
#>  7    22        11      1.17         10       1.18    10  1.18
#>  8    23        11      1.14         10       1.15    11  1.14
#>  9    24        11      1.11         10       1.12    11  1.11
#> 10    27        12      1.03         11       1.03    11  1.03
#> 11    28        12      1.01         11       1.01    12  1.01

While there are differences in the three implementations, it’s not clear how much these differences will matter in terms of the tolerance limits calculated. This can be investigated through simulation.

Simulation with Normally Distributed Data

First, we’ll generate a large number (10,000) of samples of sample size \(n\) from a normal distribution. Since we’re generating the samples, we know the true population parameters, so can calculate the true population quantiles. We’ll use the three sets of \(j\) and \(z\) values to compute tolerance limits and compared those tolerance limits to the population quantiles. The proportion of the calculated tolerance limits below the population quantiles should be equal to the selected confidence. We’ll restrict the simulation study to the sample sizes where the values of \(j\) and \(z\) differ in the three implementations of this method, and we’ll consider B-Basis (90% content, 95% confidence).

mu_normal <- 100
sd_normal <- 6

set.seed(1234567)  # make this reproducible

tolerance_limit <- function(x, j, z) {
  x[j] * (x[1] / x[j]) ^ z
}

sim_normal <- pmap_dfr(factor_diff, function(n, j_pre_080, z_pre_080,
                                             j_post_080, z_post_080,
                                             j_cmh, z_cmh) {
  map_dfr(1:10000, function(i_sim) {
    x <- sort(rnorm(n, mu_normal, sd_normal))
    tibble(
      n = n,
      b_pre_080 = tolerance_limit(x, j_pre_080, z_pre_080),
      b_post_080 = tolerance_limit(x, j_post_080, z_post_080),
      b_cmh = tolerance_limit(x, j_cmh, z_cmh),
      x = list(x)
    )
  }
  )
})
sim_normal
#> # A tibble: 110,000 × 5
#>        n b_pre_080 b_post_080 b_cmh x         
#>    <dbl>     <dbl>      <dbl> <dbl> <list>    
#>  1    10      78.4       77.7  77.7 <dbl [10]>
#>  2    10      82.8       82.0  82.0 <dbl [10]>
#>  3    10      83.3       83.0  83.0 <dbl [10]>
#>  4    10      78.4       77.2  77.2 <dbl [10]>
#>  5    10      87.3       86.6  86.6 <dbl [10]>
#>  6    10      92.3       93.2  93.2 <dbl [10]>
#>  7    10      75.2       77.9  77.9 <dbl [10]>
#>  8    10      75.4       73.9  73.9 <dbl [10]>
#>  9    10      75.5       75.1  75.1 <dbl [10]>
#> 10    10      76.4       78.4  78.4 <dbl [10]>
#> # … with 109,990 more rows

One can see that the tolerance limits calculated with each set of factors for (most) data sets is different. However, this does not necessarily mean that any set of factors is more or less correct.

The distribution of the tolerance limits for each sample size is as follows:

sim_normal %>%
  pivot_longer(cols = b_pre_080:b_cmh, names_to = "Factors") %>%
  ggplot(aes(x = value, color = Factors)) +
  geom_density() +
  facet_wrap(n ~ .) +
  theme_bw() +
  ggtitle("Distribution of Tolerance Limits for Various Values of n")
plot of chunk distribution-normal
plot of chunk distribution-normal

For all samples sizes, the distribution of tolerance limits is actually very similar between all three sets of factors.

The true population quantile can be calculated as follows:

x_p_normal <- qnorm(0.9, mu_normal, sd_normal, lower.tail = FALSE)
x_p_normal
#> [1] 92.31069

The proportion of calculated tolerance limit values that are below the population quantile can be calculated as follows. We see that the in all cases the tolerance limits are all conservative, and also that each set of factors produce similar levels of conservatism.

sim_normal %>%
  mutate(below_pre_080 = b_pre_080 < x_p_normal,
         below_post_080 = b_post_080 < x_p_normal,
         below_cmh = b_cmh < x_p_normal) %>%
  group_by(n) %>%
  summarise(
    prop_below_pre_080 = sum(below_pre_080) / n(),
    prop_below_post_080 = sum(below_post_080) / n(),
    prop_below_cmh = sum(below_cmh) / n()
  )
#> # A tibble: 11 × 4
#>        n prop_below_pre_080 prop_below_post_080 prop_below_cmh
#>    <dbl>              <dbl>               <dbl>          <dbl>
#>  1    10              0.984               0.980          0.980
#>  2    13              0.979               0.975          0.975
#>  3    16              0.969               0.967          0.967
#>  4    17              0.973               0.973          0.971
#>  5    19              0.962               0.961          0.961
#>  6    20              0.964               0.962          0.964
#>  7    22              0.961               0.960          0.960
#>  8    23              0.960               0.959          0.960
#>  9    24              0.962               0.961          0.962
#> 10    27              0.954               0.953          0.954
#> 11    28              0.952               0.952          0.952

Simulation with Weibull Data

Next, we’ll do a similar simulation using data drawn from a Weibull distribution. Again, we’ll generate 10,000 samples for each sample size.

shape_weibull <- 60
scale_weibull <- 100

set.seed(234568)  # make this reproducible

sim_weibull <- pmap_dfr(factor_diff, function(n, j_pre_080, z_pre_080,
                                              j_post_080, z_post_080,
                                              j_cmh, z_cmh) {
  map_dfr(1:10000, function(i_sim) {
    x <- sort(rweibull(n, shape_weibull, scale_weibull))
    tibble(
      n = n,
      b_pre_080 = tolerance_limit(x, j_pre_080, z_pre_080),
      b_post_080 = tolerance_limit(x, j_post_080, z_post_080),
      b_cmh = tolerance_limit(x, j_cmh, z_cmh),
      x = list(x)
    )
  }
  )
})
sim_weibull
#> # A tibble: 110,000 × 5
#>        n b_pre_080 b_post_080 b_cmh x         
#>    <dbl>     <dbl>      <dbl> <dbl> <list>    
#>  1    10      95.3       95.1  95.1 <dbl [10]>
#>  2    10      88.5       88.3  88.3 <dbl [10]>
#>  3    10      89.7       89.3  89.3 <dbl [10]>
#>  4    10      94.7       94.4  94.4 <dbl [10]>
#>  5    10      96.9       96.9  96.9 <dbl [10]>
#>  6    10      93.6       93.2  93.2 <dbl [10]>
#>  7    10      86.1       85.5  85.5 <dbl [10]>
#>  8    10      91.9       91.9  91.9 <dbl [10]>
#>  9    10      93.7       93.4  93.4 <dbl [10]>
#> 10    10      90.9       90.4  90.4 <dbl [10]>
#> # … with 109,990 more rows

The distribution of the tolerance limits for each sample size is as follows. Once again, we see that the distribution of tolerance limits is nearly identical when each of the three sets of factors are used.

sim_weibull %>%
  pivot_longer(cols = b_pre_080:b_cmh, names_to = "Factors") %>%
  ggplot(aes(x = value, color = Factors)) +
  geom_density() +
  facet_wrap(n ~ .) +
  theme_bw() +
  ggtitle("Distribution of Tolerance Limits for Various Values of n")
plot of chunk distribution-Weibull
plot of chunk distribution-Weibull

The true population quantile can be calculated as follows:

x_p_weibull <- qweibull(0.9, shape_weibull, scale_weibull, lower.tail = FALSE)
x_p_weibull
#> [1] 96.31885

The proportion of calculated tolerance limit values that are below the population quantile can be calculated as follows. We see that the in all roughly 95% or more of the tolerance limits calculated for each sample is below the population quantile. We also see very similar proportions for each of the three sets of factors considered.

sim_weibull %>%
  mutate(below_pre_080 = b_pre_080 < x_p_weibull,
         below_post_080 = b_post_080 < x_p_weibull,
         below_cmh = b_cmh < x_p_weibull) %>%
  group_by(n) %>%
  summarise(
    prop_below_pre_080 = sum(below_pre_080) / n(),
    prop_below_post_080 = sum(below_post_080) / n(),
    prop_below_cmh = sum(below_cmh) / n()
  )
#> # A tibble: 11 × 4
#>        n prop_below_pre_080 prop_below_post_080 prop_below_cmh
#>    <dbl>              <dbl>               <dbl>          <dbl>
#>  1    10              0.97                0.965          0.965
#>  2    13              0.966               0.964          0.964
#>  3    16              0.959               0.959          0.959
#>  4    17              0.961               0.961          0.96 
#>  5    19              0.957               0.956          0.956
#>  6    20              0.955               0.954          0.955
#>  7    22              0.953               0.952          0.952
#>  8    23              0.950               0.950          0.950
#>  9    24              0.953               0.953          0.953
#> 10    27              0.952               0.951          0.951
#> 11    28              0.950               0.950          0.950

Conclusion

The values of \(j\) and \(z\) computed by the kh_Ext_z_j_opt function differs for certain samples sizes (\(n\)) before and after version 0.8.0. Furthermore, for certain sample sizes, these values differ from those published in CMH-17-1G. The simulation study presented in this vignette shows that the tolerance limit (Basis value) might differ for any individual sample based on which set of \(j\) and \(z\) are used. However, each set of factors produces tolerance limit factors that are either correct or conservative. These three methods have very similar performance, and tolerance limits produced with any of these three methods are equally valid.

Session Info

This vignette is computed in advance. A system with the following configuration was used:

sessionInfo()
#> R version 4.1.1 (2021-08-10)
#> Platform: x86_64-pc-linux-gnu (64-bit)
#> Running under: Ubuntu 20.04.3 LTS
#> 
#> Matrix products: default
#> BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
#> LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
#> 
#> locale:
#>  [1] LC_CTYPE=en_CA.UTF-8       LC_NUMERIC=C               LC_TIME=en_CA.UTF-8        LC_COLLATE=en_CA.UTF-8    
#>  [5] LC_MONETARY=en_CA.UTF-8    LC_MESSAGES=en_CA.UTF-8    LC_PAPER=en_CA.UTF-8       LC_NAME=C                 
#>  [9] LC_ADDRESS=C               LC_TELEPHONE=C             LC_MEASUREMENT=en_CA.UTF-8 LC_IDENTIFICATION=C       
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#> [1] tidyr_1.1.4   purrr_0.3.4   ggplot2_3.3.5 dplyr_1.0.7  
#> 
#> loaded via a namespace (and not attached):
#>  [1] tidyselect_1.1.1  xfun_0.26         remotes_2.4.0     colorspace_2.0-2  vctrs_0.3.8       generics_0.1.0   
#>  [7] testthat_3.0.4    htmltools_0.5.2   usethis_2.0.1     yaml_2.2.1        utf8_1.2.2        rlang_0.4.11.9001
#> [13] pkgbuild_1.2.0    pillar_1.6.3      glue_1.4.2        withr_2.4.2       DBI_1.1.1         waldo_0.3.1      
#> [19] cmstatr_0.9.0     sessioninfo_1.1.1 lifecycle_1.0.1   stringr_1.4.0     munsell_0.5.0     gtable_0.3.0     
#> [25] devtools_2.4.2    memoise_2.0.0     evaluate_0.14     labeling_0.4.2    knitr_1.35        callr_3.7.0      
#> [31] fastmap_1.1.0     ps_1.6.0          curl_4.3.1        fansi_0.5.0       highr_0.9         scales_1.1.1     
#> [37] kSamples_1.2-9    cachem_1.0.6      desc_1.4.0        pkgload_1.2.2     farver_2.1.0      fs_1.5.0         
#> [43] digest_0.6.28     stringi_1.7.4     processx_3.5.2    SuppDists_1.1-9.5 rprojroot_2.0.2   grid_4.1.1       
#> [49] cli_3.0.1         tools_4.1.1       magrittr_2.0.1    tibble_3.1.4      crayon_1.4.1      pkgconfig_2.0.3  
#> [55] MASS_7.3-54       ellipsis_0.3.2    prettyunits_1.1.1 assertthat_0.2.1  rmarkdown_2.11    rstudioapi_0.13  
#> [61] R6_2.5.1          compiler_4.1.1

References

[1]
M. Vangel, One-Sided Nonparametric Tolerance Limits,” Communications in Statistics - Simulation and Computation, vol. 23, no. 4. pp. 1137–1154, 1994.
[2]
D. L. Hanson and L. H. Koopmans, Tolerance Limits for the Class of Distributions with Increasing Hazard Rates,” The Annals of Mathematical Statistics, vol. 35, no. 4. pp. 1561–1570, 1964.
[3]
H. L. Harter, Expected values of normal order statistics,” Biometrika, vol. 48, no. 1/2. pp. 151–165, 1961.
[4]
“Composite Materials Handbook, Volume 1. Polymer Matrix Composites Guideline for Characterization of Structural Materials,” SAE International, CMH-17-1G, Mar. 2012.

  1. The \(i\)th order statistic is the \(i\)th lowest value in the sample.↩︎

  2. Note that CMH-17-1G uses the symbols \(r\) and \(k\) instead of \(j\) and \(z\).↩︎

cmstatr/inst/doc/cmstatr_Graphing.R0000644000176200001440000001536014574575776017130 0ustar liggesusers## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 6 ) # If any of the required packages are unavailable, # don't re-run the code # nolint start required <- c("dplyr", "ggplot2", "tidyr", "cmstatr") if (!all(unlist(lapply(required, function(pkg) { requireNamespace(pkg, quietly = TRUE)} )))) { knitr::opts_chunk$set(eval = FALSE) } # nolint end ## ----message=FALSE------------------------------------------------------------ library(dplyr) library(ggplot2) library(tidyr) library(cmstatr) ## ----------------------------------------------------------------------------- dat <- carbon.fabric.2 %>% filter(test == "WT") %>% mutate(condition = ordered(condition, c("CTD", "RTD", "ETW", "ETW2"))) dat %>% head(10) ## ----------------------------------------------------------------------------- b_basis_pooled <- dat %>% basis_pooled_cv(strength, condition, batch, override = c("between_group_variability", "normalized_variance_equal")) b_basis_pooled ## ----------------------------------------------------------------------------- b_basis_pooled$basis ## ----------------------------------------------------------------------------- dat %>% ggplot(aes(x = batch, y = strength)) + geom_boxplot() + geom_jitter(width = 0.25) + geom_hline(aes(yintercept = value), data = b_basis_pooled$basis %>% rename(condition = group), color = "blue") + facet_grid(. ~ condition) + theme_bw() + ggtitle("Batch Plot") ## ----------------------------------------------------------------------------- dat %>% ggplot(aes(x = strength, color = condition)) + stat_ecdf(geom = "point") + coord_flip() + theme_bw() + ggtitle("Quantile Plot") ## ----------------------------------------------------------------------------- dat %>% ggplot(aes(x = strength, color = condition)) + stat_normal_surv_func() + stat_esf() + theme_bw() + ggtitle("Normal Survival Function Plot") ## ----------------------------------------------------------------------------- dat %>% group_by(condition) %>% mutate(norm.score = scale(strength)) %>% ggplot(aes(x = norm.score, y = strength, colour = condition)) + geom_point() + ggtitle("Normal Scores Plot") + theme_bw() ## ----------------------------------------------------------------------------- dat %>% ggplot(aes(sample = strength, colour = condition)) + geom_qq() + geom_qq_line() + ggtitle("Q-Q Plot") + theme_bw() ## ----------------------------------------------------------------------------- b_basis_fcn <- tribble( ~condition, ~fcn, ~args, "CTD", "basis_normal", list(override = c("between_batch_variability")), "RTD", "basis_normal", list(override = c("between_batch_variability")), "ETW", "basis_hk_ext", NULL, "ETW2", "basis_normal", list(override = c("between_batch_variability")) ) a_basis_fcn <- tribble( ~condition, ~fcn, ~args, "CTD", "basis_normal", list(override = c("between_batch_variability")), "RTD", "basis_normal", list(override = c("between_batch_variability")), "ETW", "basis_hk_ext", list(method = "woodward-frawley"), "ETW2", "basis_normal", list(override = c("between_batch_variability")) ) ## ----------------------------------------------------------------------------- single_point_fcn <- function(group_x, group_batch, cond, basis_fcn, p) { fcn <- basis_fcn$fcn[basis_fcn$condition == cond[1]] extra_args <- basis_fcn$args[basis_fcn$condition == cond[1]] args <- c( list(x = group_x, batch = group_batch, p = p), unlist(extra_args)) basis <- do.call(fcn, args) basis$basis } single_point_results <- dat %>% group_by(condition) %>% summarise(single_point_b_basis = single_point_fcn( strength, batch, condition, b_basis_fcn, 0.90), single_point_a_basis = single_point_fcn( strength, batch, condition, a_basis_fcn, 0.99), minimum = min(strength), mean = mean(strength)) %>% mutate(condition = ordered(condition, c("CTD", "RTD", "ETW", "ETW2"))) single_point_results ## ----------------------------------------------------------------------------- a_basis_pooled <- dat %>% basis_pooled_cv(strength, condition, batch, p = 0.99, override = c("between_group_variability", "normalized_variance_equal")) a_basis_pooled ## ----------------------------------------------------------------------------- a_basis_pooled$basis ## ----------------------------------------------------------------------------- a_basis_pooled$basis %>% rename(condition = group, b_basis_pooled = value) ## ----------------------------------------------------------------------------- a_basis_pooled_results <- a_basis_pooled$basis %>% rename(condition = group, a_basis_pooled = value) %>% mutate(condition = ordered(condition, c("CTD", "RTD", "ETW", "ETW2"))) a_basis_pooled_results ## ----------------------------------------------------------------------------- b_basis_pooled_results <- b_basis_pooled$basis %>% rename(condition = group, b_basis_pooled = value) %>% mutate(condition = ordered(condition, c("CTD", "RTD", "ETW", "ETW2"))) b_basis_pooled_results ## ----------------------------------------------------------------------------- single_point_results %>% inner_join(b_basis_pooled_results, by = "condition") %>% inner_join(a_basis_pooled_results, by = "condition") ## ----------------------------------------------------------------------------- single_point_results %>% inner_join(b_basis_pooled_results, by = "condition") %>% inner_join(a_basis_pooled_results, by = "condition") %>% pivot_longer(cols = single_point_b_basis:a_basis_pooled) ## ----------------------------------------------------------------------------- single_point_results %>% inner_join(b_basis_pooled_results, by = "condition") %>% inner_join(a_basis_pooled_results, by = "condition") %>% pivot_longer(cols = single_point_b_basis:a_basis_pooled) %>% ggplot(aes(x = condition, y = value)) + geom_boxplot(aes(y = strength), data = dat) + geom_point(aes(shape = name, color = name)) + ggtitle("Property Graph") + theme_bw() ## ----------------------------------------------------------------------------- carbon.fabric.2 %>% mutate(panel = as.character(panel)) %>% filter(test == "WT") %>% nested_data_plot(strength, groups = c(batch, panel)) ## ----------------------------------------------------------------------------- carbon.fabric.2 %>% mutate(panel = as.character(panel)) %>% filter(test == "WT" & condition == "RTD") %>% nested_data_plot(strength, groups = c(batch, panel), fill = batch, color = panel) cmstatr/inst/doc/cmstatr_Tutorial.R0000644000176200001440000001624014574576000017147 0ustar liggesusers## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) # If any of the required packages are unavailable, # don't re-run the code # nolint start required <- c("dplyr", "ggplot2", "tidyr", "cmstatr", "purrr") if (!all(unlist(lapply(required, function(pkg) { requireNamespace(pkg, quietly = TRUE)} )))) { knitr::opts_chunk$set(eval = FALSE) } #nolint end ## ----message=FALSE------------------------------------------------------------ library(cmstatr) library(dplyr) library(ggplot2) library(tidyr) library(purrr) ## ----------------------------------------------------------------------------- carbon.fabric.2 %>% head(10) ## ----------------------------------------------------------------------------- norm_data <- carbon.fabric.2 %>% filter(test == "WT" | test == "FC") %>% mutate(strength.norm = normalize_ply_thickness(strength, thickness / nplies, 0.0079)) norm_data %>% head(10) ## ----------------------------------------------------------------------------- norm_data %>% filter(test == "WT" & condition == "RTD") %>% anderson_darling_normal(strength.norm) ## ----include=FALSE------------------------------------------------------------ # Verify that the AD test always provides the same conclusion # If this assertion fails, the Vignette needs to be re-written if (0.05 >= (norm_data %>% filter(test == "WT" & condition == "RTD") %>% anderson_darling_normal(strength.norm))$osl) { stop("Unexpected vale for Anderson-Darling test") } ## ----------------------------------------------------------------------------- norm_data %>% filter(test == "WT" & condition == "RTD") %>% basis_normal(strength.norm) ## ----------------------------------------------------------------------------- norm_data %>% filter(test == "WT" & condition == "RTD") %>% basis_normal(strength.norm, override = c("outliers_within_batch", "between_batch_variability")) ## ----------------------------------------------------------------------------- norm_data %>% filter(test == "WT" & condition == "RTD") %>% basis_normal(strength.norm, batch) ## ----------------------------------------------------------------------------- norm_data %>% filter(test == "WT" & condition == "RTD") %>% ad_ksample(strength.norm, batch) ## ----------------------------------------------------------------------------- norm_data %>% filter(test == "WT" & condition == "RTD") %>% group_by(batch) %>% ggplot(aes(x = strength.norm, color = batch)) + stat_normal_surv_func() + stat_esf() + ggtitle("Distribution of Data For Each Batch") ## ----------------------------------------------------------------------------- norm_data %>% filter(test == "FC") %>% group_by(condition, batch) %>% nest() %>% mutate(mnr = map(data, ~maximum_normed_residual(data = .x, x = strength.norm)), tidied = map(mnr, glance)) %>% select(-c(mnr, data)) %>% # remove unneeded columns unnest(tidied) ## ----include=FALSE------------------------------------------------------------ if ((norm_data %>% filter(test == "FC") %>% group_by(condition, batch) %>% summarise( n_outliers = maximum_normed_residual(x = strength.norm)$n_outliers ) %>% ungroup() %>% summarise(n_outliers = sum(n_outliers)))[[1]] != 0) { stop("Unexpected number of outliers") } ## ----------------------------------------------------------------------------- norm_data %>% filter(test == "FC") %>% group_by(condition) %>% nest() %>% mutate(adk = map(data, ~ad_ksample(data = .x, x = strength.norm, groups = batch)), tidied = map(adk, glance)) %>% select(-c(data, adk)) %>% # remove unneeded columns unnest(tidied) ## ----include=FALSE------------------------------------------------------------ if (!all(!(norm_data %>% filter(test == "FC") %>% group_by(condition) %>% summarise(different_dist = ad_ksample(x = strength.norm, groups = batch)$reject_same_dist ))$different_dist)) { stop("Unexpected ADK result") } ## ----------------------------------------------------------------------------- norm_data %>% filter(test == "FC") %>% group_by(condition) %>% nest() %>% mutate(mnr = map(data, ~maximum_normed_residual(data = .x, x = strength.norm)), tidied = map(mnr, glance)) %>% select(-c(mnr, data)) %>% # remove unneeded columns unnest(tidied) ## ----include=FALSE------------------------------------------------------------ if ((norm_data %>% filter(test == "FC") %>% group_by(condition) %>% summarise( n_outliers = maximum_normed_residual(x = strength.norm)$n_outliers ) %>% ungroup() %>% summarise(n_outliers = sum(n_outliers)))[[1]] != 0) { stop("Unexpected number of outliers") } ## ----------------------------------------------------------------------------- norm_data %>% filter(test == "FC") %>% levene_test(strength.norm, condition) ## ----include=FALSE------------------------------------------------------------ if (!(norm_data %>% filter(test == "FC") %>% levene_test(strength.norm, condition))$reject_equal_variance) { stop("Unexpected result from Levene's test") } ## ----------------------------------------------------------------------------- norm_data %>% filter(test == "FC") %>% mutate( strength_norm_group = normalize_group_mean(strength.norm, condition)) %>% levene_test(strength_norm_group, condition) ## ----include=FALSE------------------------------------------------------------ if ((norm_data %>% filter(test == "FC") %>% mutate( strength_norm_group = normalize_group_mean(strength.norm, condition)) %>% levene_test(strength_norm_group, condition))$reject_equal_variance) { stop("Unexpected value from Levene's test") } ## ----------------------------------------------------------------------------- norm_data %>% filter(test == "FC") %>% mutate( strength_norm_group = normalize_group_mean(strength.norm, condition)) %>% anderson_darling_normal(strength_norm_group) ## ----include=FALSE------------------------------------------------------------ if ((norm_data %>% filter(test == "FC") %>% mutate( strength_norm_group = normalize_group_mean(strength.norm, condition)) %>% anderson_darling_normal(strength_norm_group))$osl <= 0.05) { stop("Unexpected value from AD test") } ## ----------------------------------------------------------------------------- norm_data %>% filter(test == "FC") %>% basis_pooled_cv(strength.norm, condition, batch) ## ----------------------------------------------------------------------------- norm_data %>% mutate(condition = ordered(condition, c("CTD", "RTD", "ETD", "ETW", "ETW2"))) %>% filter(test == "FC") %>% basis_pooled_cv(strength.norm, condition, batch) ## ----------------------------------------------------------------------------- carbon.fabric.2 %>% filter(test == "FC" & condition == "RTD") %>% equiv_mean_extremum(strength, n_sample = 5, alpha = 0.01) cmstatr/inst/doc/adktest.html0000644000176200001440000002605214574575772016032 0ustar liggesusers Anderson-Darling k-Sample Test

Anderson-Darling k-Sample Test

Stefan Kloppenborg, Jeffrey Borlik

20-Jan-2019

This vignette explores the Anderson–Darling k-Sample test. CMH-17-1G [1] provides a formulation for this test that appears different than the formulation given by Scholz and Stephens in their 1987 paper [2].

Both references use different nomenclature, which is summarized as follows:

Term CMH-17-1G Scholz and Stephens
A sample \(i\) \(i\)
The number of samples \(k\) \(k\)
An observation within a sample \(j\) \(j\)
The number of observations within the sample \(i\) \(n_i\) \(n_i\)
The total number of observations within all samples \(n\) \(N\)
Distinct values in combined data, ordered \(z_{(1)}\)\(z_{(L)}\) \(Z_1^*\)\(Z_L^*\)
The number of distinct values in the combined data \(L\) \(L\)

Given the possibility of ties in the data, the discrete version of the test must be used Scholz and Stephens (1987) give the test statistic as:

\[ A_{a k N}^2 = \frac{N - 1}{N}\sum_{i=1}^k \frac{1}{n_i}\sum_{j=1}^{L}\frac{l_j}{N}\frac{\left(N M_{a i j} - n_i B_{a j}\right)^2}{B_{a j}\left(N - B_{a j}\right) - N l_j / 4} \]

CMH-17-1G gives the test statistic as:

\[ ADK = \frac{n - 1}{n^2\left(k - 1\right)}\sum_{i=1}^k\frac{1}{n_i}\sum_{j=1}^L h_j \frac{\left(n F_{i j} - n_i H_j\right)^2}{H_j \left(n - H_j\right) - n h_j / 4} \]

By inspection, the CMH-17-1G version of this test statistic contains an extra factor of \(\frac{1}{\left(k - 1\right)}\).

Scholz and Stephens indicate that one rejects \(H_0\) at a significance level of \(\alpha\) when:

\[ \frac{A_{a k N}^2 - \left(k - 1\right)}{\sigma_N} \ge t_{k - 1}\left(\alpha\right) \]

This can be rearranged to give a critical value:

\[ A_{c r i t}^2 = \left(k - 1\right) + \sigma_N t_{k - 1}\left(\alpha\right) \]

CHM-17-1G gives the critical value for \(ADK\) for \(\alpha=0.025\) as:

\[ ADC = 1 + \sigma_n \left(1.96 + \frac{1.149}{\sqrt{k - 1}} - \frac{0.391}{k - 1}\right) \]

The definition of \(\sigma_n\) from the two sources differs by a factor of \(\left(k - 1\right)\).

The value in parentheses in the CMH-17-1G critical value corresponds to the interpolation formula for \(t_m\left(\alpha\right)\) given in Scholz and Stephen’s paper. It should be noted that this is not the student’s t-distribution, but rather a distribution referred to as the \(T_m\) distribution.

The cmstatr package use the package kSamples to perform the k-sample Anderson–Darling tests. This package uses the original formulation from Scholz and Stephens, so the test statistic will differ from that given software based on the CMH-17-1G formulation by a factor of \(\left(k-1\right)\).

For comparison, SciPy’s implementation also uses the original Scholz and Stephens formulation. The statistic that it returns, however, is the normalized statistic, \(\left[A_{a k N}^2 - \left(k - 1\right)\right] / \sigma_N\), rather than kSamples’s \(A_{a k N}^2\) value. To be consistent, SciPy also returns the critical values \(t_{k-1}(\alpha)\) directly. (Currently, SciPy also floors/caps the returned p-value at 0.1% / 25%.) The values of \(k\) and \(\sigma_N\) are available in cmstatr’s ad_ksample return value, if an exact comparison to Python SciPy is necessary.

The conclusions about the null hypothesis drawn, however, will be the same, whether R or CMH-17-1G or SciPy.

References

[1]
“Composite Materials Handbook, Volume 1. Polymer Matrix Composites Guideline for Characterization of Structural Materials,” SAE International, CMH-17-1G, Mar. 2012.
[2]
F. W. Scholz and M. A. Stephens, “K-Sample Anderson--Darling Tests,” Journal of the American Statistical Association, vol. 82, no. 399. pp. 918–924, Sep-1987.
cmstatr/inst/doc/cmstatr_Tutorial.Rmd0000644000176200001440000003743314477217317017504 0ustar liggesusers--- title: "cmstatr Tutorial" author: "Stefan Kloppenborg" date: "1-Apr-2020" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{cmstatr Tutorial} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) # If any of the required packages are unavailable, # don't re-run the code # nolint start required <- c("dplyr", "ggplot2", "tidyr", "cmstatr", "purrr") if (!all(unlist(lapply(required, function(pkg) { requireNamespace(pkg, quietly = TRUE)} )))) { knitr::opts_chunk$set(eval = FALSE) } #nolint end ``` `cmstatr` is an R package for analyzing composite material data for use in the aerospace industry. The statistical methods are based on those published in [CMH-17-1G](https://www.cmh17.org/). This package is intended to facilitate reproducible statistical analysis of composite materials. In this tutorial, we'll explore the basic functionality of `cmstatr`. Before we can actually use the package, we'll need to load it. We'll also load the `dplyr` package, which we'll talk about shortly. There are also a few other packages that we'll load. These could all be loaded by loading the `tidyverse` package instead. ```{r message=FALSE} library(cmstatr) library(dplyr) library(ggplot2) library(tidyr) library(purrr) ``` # Input Data `cmstatr` is built with the assumption that the data is in (so called) [tidy data](http://vita.had.co.nz/papers/tidy-data.html) format. This means that the data is in a data frame and that each observation (i.e. test result) has its own row and that each variable has its own column. Included in this package is a sample composite material data set (this data set is fictional: don't use it for anything other than learning this package). The data set `carbon.fabric.2` has the expected format. We'll just show the first 10 rows of the data for now. ```{r} carbon.fabric.2 %>% head(10) ``` If your data set is not yet in this type of format (note: that the column names *do not* need to match the column names in the example), there are many ways to get it into this format. One of the easier ways of doing so is to use the [`tidyr`](https://tidyr.tidyverse.org/) package. The use of this package is outside the scope of this vignette. # Working With Data Throughout this vignette, we will be using some of the `tidyverse` tools for working with data. There are several ways to work with data in R, but in the opinion of the author of this vignette, the `tidyverse` provides the easiest way to do so. As such, this is the approach used in this vignette. Feel free to use whichever approach works best for you. # Normalizing Data to Cured Ply Thickness Very often, you'll want to normalize as-measured strength data to a nominal cured ply thickness for fiber-dominated properties. Very often, this will reduce the apparent variance in the data. The `normalize_ply_thickness` function can be used to normalize strength or modulus data to a certain cured ply thickness. This function takes three arguments: the value to normalize (i.e.. strength or modulus), the measured thickness and the nominal thickness. In our case, the nominal cured ply thickness of the material is $0.0079$. We can then normalize the warp-tension and fill-compression data as follows: ```{r} norm_data <- carbon.fabric.2 %>% filter(test == "WT" | test == "FC") %>% mutate(strength.norm = normalize_ply_thickness(strength, thickness / nplies, 0.0079)) norm_data %>% head(10) ``` # Calculating Single-Point Basis Value The simplest thing that you will likely do is to calculate a basis value based of a set of numbers that you consider as unstructured data. An example of this would be calculating the B-Basis of the `RTD` warp tension (`WT`) data. There are a number of diagnostic tests that we should run before actually calculating a B-Basis value. We'll talk about those later, but for now, let's just get right to checking how the data are distributed and calculating the B-Basis. We'll use an Anderson--Darling test to check if the data are normally distributed. The `cmstatr` package provides the function `anderson_darling_normal` and related functions for other distributions. We can run an Anderson--Darling test for normality on the warp tension RTD data as follows. We'll perform this test on the normalized strength. ```{r} norm_data %>% filter(test == "WT" & condition == "RTD") %>% anderson_darling_normal(strength.norm) ``` ```{r include=FALSE} # Verify that the AD test always provides the same conclusion # If this assertion fails, the Vignette needs to be re-written if (0.05 >= (norm_data %>% filter(test == "WT" & condition == "RTD") %>% anderson_darling_normal(strength.norm))$osl) { stop("Unexpected vale for Anderson-Darling test") } ``` Now that we know that this data follows a normal distribution (since the observed significance level (OSL) of the Anderson--Darling test is greater than $0.05$), we can proceed to calculate a basis value based based on the assumption of normally distributed data. The `cmstatr` package provides the function `basis_normal` as well as related functions for other distributions. By default, the B-Basis value is calculated, but other population proportions and confidence bounds can be specified (for example, specify `p = 0.99, conf = 0.99` for A-Basis). ```{r} norm_data %>% filter(test == "WT" & condition == "RTD") %>% basis_normal(strength.norm) ``` We see that the calculated B-Basis is $129.96$. We also see two messages issued by the `cmstatr` package. These messages relate to the automated diagnostic tests performed by the basis calculation functions. In this case we see messages that two of the diagnostic tests were not performed because we didn't specify the batch of each observation. The batch is not required for calculating single-point basis values, but it is required for performing batch-to-batch variability and within-batch outlier diagnostic tests. The `basis_normal` function performs the following diagnostic tests by default: - Within batch outliers using `maximum_normed_residual()` - Between batch variability using `ad_ksample()` - Outliers using `maximum_normed_residual()` - Normality of data using `anderson_darling_normal()` There are two ways that we can deal with the two messages that we see. We can pass in a column that specifies the batch for each observation, or we can override those two diagnostic tests so that `cmstatr` doesn't run them. To override the two diagnostic tests, we set the argument `override` to a list of the names of the diagnostic tests that we want to skip. The names of the diagnostic tests that were not run are shown between back-ticks (\`) in the message. Our call to `basis_normal()` would be updated as follows: ```{r} norm_data %>% filter(test == "WT" & condition == "RTD") %>% basis_normal(strength.norm, override = c("outliers_within_batch", "between_batch_variability")) ``` Obviously, you should be cautious about overriding the diagnostic tests. There are certainly times when it is appropriate to do so, but sound engineering judgment is required. The better approach would be to specify the batch. This can be done as follows: ```{r} norm_data %>% filter(test == "WT" & condition == "RTD") %>% basis_normal(strength.norm, batch) ``` Now that batch is specified, we see that one of the diagnostic tests actually fails: the Anderson--Darling k-Sample test shows that the batches are not drawn from the same (unspecified) distribution. We can run this diagnostic test directly to investigate further: ```{r} norm_data %>% filter(test == "WT" & condition == "RTD") %>% ad_ksample(strength.norm, batch) ``` For the Anderson--Darling k-Sample test, $\alpha=0.025$ is normally used. In this case the p-value is $p=0.0026$, so it is no where near $\alpha$ (note the number of decimal places). We can plot the distribution of this data and make a judgment call about whether to continue. ```{r} norm_data %>% filter(test == "WT" & condition == "RTD") %>% group_by(batch) %>% ggplot(aes(x = strength.norm, color = batch)) + stat_normal_surv_func() + stat_esf() + ggtitle("Distribution of Data For Each Batch") ``` We can also run the other diagnostic test by themselves. These are described in more detail in the following sections. # Calculating Basis Values by Pooling Across Environments In this section, we'll use the fill-compression data from the `carbon.fabric.2` data set. ## Checking for Outliers After checking that there are a sufficient number of conditions, batches and specimens and that the failure modes are consistent, we would normally check if there are outliers within each batch and condition. The maximum normed residual test can be used for this. The `cmstatr` package provides the function `maximum_normed_residual` to do this. First, we'll group the data by condition and batch, then run the test on each group. The `maximum_normed_residual` function returns an object that contains a number of values. We'll create a `data.frame` that contains those values. In order to do this, we need to use the `nest` function from the `tidyr` package. This is explained in detail [here](https://tidyr.tidyverse.org/articles/nest.html). Basically, `nest` allows a column of `list`s or a column of `data.frame`s to be added to a `data.frame`. Once nested, we can use the `glance` method to unpack the values returned by `maximum_normed_residual` into a one-row `data.frame`, and then use `unnest` to flatten this into a single `data.frame`. ```{r} norm_data %>% filter(test == "FC") %>% group_by(condition, batch) %>% nest() %>% mutate(mnr = map(data, ~maximum_normed_residual(data = .x, x = strength.norm)), tidied = map(mnr, glance)) %>% select(-c(mnr, data)) %>% # remove unneeded columns unnest(tidied) ``` ```{r include=FALSE} if ((norm_data %>% filter(test == "FC") %>% group_by(condition, batch) %>% summarise( n_outliers = maximum_normed_residual(x = strength.norm)$n_outliers ) %>% ungroup() %>% summarise(n_outliers = sum(n_outliers)))[[1]] != 0) { stop("Unexpected number of outliers") } ``` None of the groups have outliers, so we can continue. # Batch-to-Batch Distribution Next, we will use the Anderson--Darling k-Sample test to check that each batch comes from the same distribution within each condition. We can use the `ad_ksample` function from `cmstatr` to do so. Once again, we'll use `nest`/`unnest` and `glance` to do so. ```{r} norm_data %>% filter(test == "FC") %>% group_by(condition) %>% nest() %>% mutate(adk = map(data, ~ad_ksample(data = .x, x = strength.norm, groups = batch)), tidied = map(adk, glance)) %>% select(-c(data, adk)) %>% # remove unneeded columns unnest(tidied) ``` ```{r include=FALSE} if (!all(!(norm_data %>% filter(test == "FC") %>% group_by(condition) %>% summarise(different_dist = ad_ksample(x = strength.norm, groups = batch)$reject_same_dist ))$different_dist)) { stop("Unexpected ADK result") } ``` For all conditions, the Anderson--Darling k-Sample test fails to reject the hypothesis that each batch comes from the same (unspecified) distribution. We can thus proceed to pooling the data. ## Checking for Outliers Within Each Condition Just as we did when checking for outlier within each condition and each batch, we can pool all the batches (within each condition) and check for outliers within each condition. ```{r} norm_data %>% filter(test == "FC") %>% group_by(condition) %>% nest() %>% mutate(mnr = map(data, ~maximum_normed_residual(data = .x, x = strength.norm)), tidied = map(mnr, glance)) %>% select(-c(mnr, data)) %>% # remove unneeded columns unnest(tidied) ``` ```{r include=FALSE} if ((norm_data %>% filter(test == "FC") %>% group_by(condition) %>% summarise( n_outliers = maximum_normed_residual(x = strength.norm)$n_outliers ) %>% ungroup() %>% summarise(n_outliers = sum(n_outliers)))[[1]] != 0) { stop("Unexpected number of outliers") } ``` We find no outliers, so we can continue. ## Pooling Across Environments Often it is desirable to pool data across several environments. There are two methods for doing so: "pooled standard deviation" and "pooled CV" (CV is an abbreviation for Coefficient of Variation) First, we will check for equality of variance among the conditions. We will do so using Levene's test. The `cmstatr` package provides the function `levene_test` to do so. ```{r} norm_data %>% filter(test == "FC") %>% levene_test(strength.norm, condition) ``` ```{r include=FALSE} if (!(norm_data %>% filter(test == "FC") %>% levene_test(strength.norm, condition))$reject_equal_variance) { stop("Unexpected result from Levene's test") } ``` The result from Levene's test indicates that the variance for each condition is not equal. This indicates that the data cannot be pooled using the "pooled standard deviation" method. We can check if the data can be pooled using the "pooled CV" method. We'll start by normalizing the data from each group to the group's mean. The `cmstatr` package provides the function `normalize_group_mean` for this purpose. ```{r} norm_data %>% filter(test == "FC") %>% mutate( strength_norm_group = normalize_group_mean(strength.norm, condition)) %>% levene_test(strength_norm_group, condition) ``` ```{r include=FALSE} if ((norm_data %>% filter(test == "FC") %>% mutate( strength_norm_group = normalize_group_mean(strength.norm, condition)) %>% levene_test(strength_norm_group, condition))$reject_equal_variance) { stop("Unexpected value from Levene's test") } ``` The Levene's test thus shows the variances of the pooled data are equal. We can move on to performing an Anderson--Darling test for normality on the pooled data. ```{r} norm_data %>% filter(test == "FC") %>% mutate( strength_norm_group = normalize_group_mean(strength.norm, condition)) %>% anderson_darling_normal(strength_norm_group) ``` ```{r include=FALSE} if ((norm_data %>% filter(test == "FC") %>% mutate( strength_norm_group = normalize_group_mean(strength.norm, condition)) %>% anderson_darling_normal(strength_norm_group))$osl <= 0.05) { stop("Unexpected value from AD test") } ``` The Anderson--Darling test indicates that the pooled data is drawn from a normal distribution, so we can continue with calculating basis values using the "pooled CV" method. ```{r} norm_data %>% filter(test == "FC") %>% basis_pooled_cv(strength.norm, condition, batch) ``` The conditions listed in the output above are in alphabetical order. This probably isn't what you want. Instead, you probably want the conditions listed in a certain order. This can be done by ordering the data first as demonstrated below. You're probably just do this one in at the start of your analysis. ```{r} norm_data %>% mutate(condition = ordered(condition, c("CTD", "RTD", "ETD", "ETW", "ETW2"))) %>% filter(test == "FC") %>% basis_pooled_cv(strength.norm, condition, batch) ``` # Equivalency Eventually, once you've finished calculating all your basis values, you'll probably want to set specification requirements or evaluate site/process equivalency. `cmstatr` has functionality to do both. Let's say that you want to develop specification limits for fill compression that you're going to put in your material specification. You can do this as follows: ```{r} carbon.fabric.2 %>% filter(test == "FC" & condition == "RTD") %>% equiv_mean_extremum(strength, n_sample = 5, alpha = 0.01) ``` If you're determining equivalency limits for modulus, a different approach is generally used so that bilateral limits are set. `cmstatr` can do this as well, using the function `equiv_change_mean`. cmstatr/inst/doc/cmstatr_Graphing.html0000644000176200001440000111742114574575776017675 0ustar liggesusers Plotting Composite Material Data

Plotting Composite Material Data

Ally Fraser

2-May-2020

This vignette demonstrates how to create some of the graphs commonly used when analyzing composite material data. Here, we rely on the ggplot2 package for graphing. This package can be loaded either on its own, or through the tidyverse meta-package, which also includes packages such as dplyr that we will also use.

We’ll need to load a few packages in order to proceed.

library(dplyr)
library(ggplot2)
library(tidyr)
library(cmstatr)

Throughout this vignette, we’ll use one of the example data sets that comes with cmstatr and we’ll focus on the warp-tension data as an example. We’ll load the example data in a variable as follows. By default the condition will be in an arbitrary order, but throughout the visualization, we’ll want the conditions shown in a particular order (from coldest and driest to hottest and wettest). We can define the order of the conditions using the ordered function. For brevity, only the first few rows of the data set are displayed below.

dat <- carbon.fabric.2 %>%
  filter(test == "WT") %>%
  mutate(condition = ordered(condition, c("CTD", "RTD", "ETW", "ETW2")))

dat %>%
  head(10)
#>    test condition batch panel thickness nplies strength modulus failure_mode
#> 1    WT       CTD     A     1     0.112     14  142.817   9.285          LAT
#> 2    WT       CTD     A     1     0.113     14  135.901   9.133          LAT
#> 3    WT       CTD     A     1     0.113     14  132.511   9.253          LAT
#> 4    WT       CTD     A     2     0.112     14  135.586   9.150          LAB
#> 5    WT       CTD     A     2     0.113     14  125.145   9.270          LAB
#> 6    WT       CTD     A     2     0.113     14  135.203   9.189          LGM
#> 7    WT       CTD     A     2     0.113     14  128.547   9.088          LAB
#> 8    WT       CTD     B     1     0.113     14  127.709   9.199          LGM
#> 9    WT       CTD     B     1     0.113     14  127.074   9.058          LGM
#> 10   WT       CTD     B     1     0.114     14  126.879   9.306          LGM

We’ll then calculate the B-Basis value using the pooling by standard deviation method. This data set happens to fail some of the diagnostic tests, but for the purpose of this example, we’ll ignore those failures using the override argument.

b_basis_pooled <- dat %>%
  basis_pooled_cv(strength, condition, batch,
                  override = c("between_group_variability",
                               "normalized_variance_equal"))

b_basis_pooled
#> 
#> Call:
#> basis_pooled_cv(data = ., x = strength, groups = condition, batch = batch, 
#>     override = c("between_group_variability", "normalized_variance_equal"))
#> 
#> Distribution:  Normal - Pooled CV    ( n = 86, r = 4 )
#> The following diagnostic tests were overridden:
#>     `between_group_variability`,
#>     `normalized_variance_equal`
#> B-Basis:   ( p = 0.9 , conf = 0.95 )
#> CTD   125.1325 
#> RTD   129.3447 
#> ETW   123.809 
#> ETW2  120.3191

The object returned from basis_pooled_cv contains a number of values. One value is a data.frame containing the groups (i.e. conditions) and the corresponding basis values. This looks like the following. We’ll use this in the visualizations.

b_basis_pooled$basis
#>      group    value
#> CTD    CTD 125.1325
#> RTD    RTD 129.3447
#> ETW    ETW 123.8090
#> ETW2  ETW2 120.3191

Batch Plots

Batch plots are used to identify differences between batches. Simple batch plots can be created using box plots and adding horizontal lines for the basis values as follows. Note that the heavy line in the box of the box plot is the median, not the mean. The two hinges correspond with the first and third quantiles and the whiskers extend to the most extreme data point, or 1.5 times the inner quantile range.

In the code below, we use the function rename to rename the column group to condition. The data.frame produced by basis_pooled_cv uses the columns value and group, but to match the data, we need the column with the conditions to be named condition.

dat %>%
  ggplot(aes(x = batch, y = strength)) +
  geom_boxplot() +
  geom_jitter(width = 0.25) +
  geom_hline(aes(yintercept = value),
             data = b_basis_pooled$basis %>% rename(condition = group),
             color = "blue") +
  facet_grid(. ~ condition) +
  theme_bw() +
  ggtitle("Batch Plot")
#> Warning: Combining variables of class <ordered> and <factor> was deprecated in ggplot2
#> 3.4.0.
#> ℹ Please ensure your variables are compatible before plotting (location:
#>   `join_keys()`)
#> This warning is displayed once every 8 hours.
#> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
#> generated.

Quantile Plots

A quantile plot provides a graphical summary of sample values. This plot displays the sample values and the corresponding quantile. A quantile plot can be used to examine the symmetry and tail sizes of the underlying distribution. Sharp rises may indicate the presence of outliers.

dat %>%
  ggplot(aes(x = strength, color = condition)) +
  stat_ecdf(geom = "point") +
  coord_flip() +
  theme_bw() +
  ggtitle("Quantile Plot")

Normal Survival Function Plots

An empirical survival function, and the corresponding normal survival function can be plotted using two ggplot “stat” functions provided by cmstatr. In the example below, the empirical survival function is plotted for each condition, and the survival function for a normal distribution with the mean and variance from the data is also plotted (the survival function is 1 minus the cumulative distribution function). This type of plot can be used to identify how closely the data follows a normal distribution, and also to compare the distributions of the various conditions.

dat %>%
  ggplot(aes(x = strength, color = condition)) +
  stat_normal_surv_func() +
  stat_esf() +
  theme_bw() +
  ggtitle("Normal Survival Function Plot")

Normal Score Plots

The normal scores plot calculates the normal score and plots it against the normal score. Normal plots are useful to investigate distributions of the data.

dat %>%
  group_by(condition) %>%
  mutate(norm.score = scale(strength)) %>%
  ggplot(aes(x = norm.score, y = strength, colour = condition)) +
  geom_point() +
  ggtitle("Normal Scores Plot") +
  theme_bw()

Q-Q Plots

A Q-Q plot compares the data against the theoretical quantiles for a particular distribution. A line is also plotted showing the normal distribution with mean and variance from the data. If the data exactly followed a normal distribution, all points would fall on this line.

dat %>%
  ggplot(aes(sample = strength, colour = condition)) +
  geom_qq() +
  geom_qq_line() +
  ggtitle("Q-Q Plot") +
  theme_bw()

Property Plots

Property plots allow for a variety of properties for a group to be compared to other properties within the same group, as well as to other group properties. The properties included in this plot are A-Basis, B-Basis, Pooled A- and B-Basis, Pooled Modified CV (Coefficient of Variation) A- and B-Basis, Mean, and Min for each group.

The property plots will take a bit of work to construct.

First, the distribution of each group must be determined. Once the distribution has been determined, the proper basis calculation based on that distribution should be filled in below. We also have a column in the tables below for extra arguments to pass to the basis function, such as overrides required or the method for the basis_hk_ext function to use.

b_basis_fcn <- tribble(
  ~condition, ~fcn, ~args,
  "CTD", "basis_normal", list(override = c("between_batch_variability")),
  "RTD", "basis_normal", list(override = c("between_batch_variability")),
  "ETW", "basis_hk_ext", NULL,
  "ETW2", "basis_normal", list(override = c("between_batch_variability"))
)

a_basis_fcn <- tribble(
  ~condition, ~fcn, ~args,
  "CTD", "basis_normal", list(override = c("between_batch_variability")),
  "RTD", "basis_normal", list(override = c("between_batch_variability")),
  "ETW", "basis_hk_ext", list(method = "woodward-frawley"),
  "ETW2", "basis_normal", list(override = c("between_batch_variability"))
)

We’ll write a function that takes the data and information about the distribution and computes the single-point basis value. We’ll use this function for both A- and B-Basis, so we’ll add a parameter for the probability (0.90 or 0.99).

single_point_fcn <- function(group_x, group_batch, cond, basis_fcn, p) {
  fcn <- basis_fcn$fcn[basis_fcn$condition == cond[1]]
  extra_args <- basis_fcn$args[basis_fcn$condition == cond[1]]

  args <- c(
    list(x = group_x, batch = group_batch, p = p),
    unlist(extra_args))
  basis <- do.call(fcn, args)
  basis$basis
}

single_point_results <- dat %>%
  group_by(condition) %>%
  summarise(single_point_b_basis = single_point_fcn(
              strength, batch, condition, b_basis_fcn, 0.90),
            single_point_a_basis = single_point_fcn(
              strength, batch, condition, a_basis_fcn, 0.99),
            minimum = min(strength),
            mean = mean(strength)) %>%
  mutate(condition = ordered(condition, c("CTD", "RTD", "ETW", "ETW2")))

single_point_results
#> # A tibble: 4 × 5
#>   condition single_point_b_basis single_point_a_basis minimum  mean
#>   <ord>                    <dbl>                <dbl>   <dbl> <dbl>
#> 1 CTD                       121.                 111.    125.  135.
#> 2 RTD                       128.                 119.    127.  140.
#> 3 ETW                       121.                 103.    125.  134.
#> 4 ETW2                      123.                 119.    123.  130.

In the above code, we also ensure that the condition column is still in the order we expect.

We’ve already computed the B-Basis of the data using a pooling method. We’ll do the same for A-Basis:

a_basis_pooled <- dat %>%
  basis_pooled_cv(strength, condition, batch, p = 0.99,
                  override = c("between_group_variability",
                               "normalized_variance_equal"))

a_basis_pooled
#> 
#> Call:
#> basis_pooled_cv(data = ., x = strength, groups = condition, batch = batch, 
#>     p = 0.99, override = c("between_group_variability", "normalized_variance_equal"))
#> 
#> Distribution:  Normal - Pooled CV    ( n = 86, r = 4 )
#> The following diagnostic tests were overridden:
#>     `between_group_variability`,
#>     `normalized_variance_equal`
#> A-Basis:   ( p = 0.99 , conf = 0.95 )
#> CTD   118.3205 
#> RTD   122.2636 
#> ETW   117.074 
#> ETW2  113.7601

As we saw before, the returned object has a property called basis, which is a data.frame for the pooling methods.

a_basis_pooled$basis
#>      group    value
#> CTD    CTD 118.3205
#> RTD    RTD 122.2636
#> ETW    ETW 117.0740
#> ETW2  ETW2 113.7601

We can take this data.frame and change the column names to suit our needs.

a_basis_pooled$basis %>%
  rename(condition = group,
         b_basis_pooled = value)
#>      condition b_basis_pooled
#> CTD        CTD       118.3205
#> RTD        RTD       122.2636
#> ETW        ETW       117.0740
#> ETW2      ETW2       113.7601

We can combine all these steps into one statement. We’ll also ensure that the conditions are listed in the order we want.

a_basis_pooled_results <- a_basis_pooled$basis %>%
  rename(condition = group,
         a_basis_pooled = value) %>%
  mutate(condition = ordered(condition, c("CTD", "RTD", "ETW", "ETW2")))

a_basis_pooled_results
#>      condition a_basis_pooled
#> CTD        CTD       118.3205
#> RTD        RTD       122.2636
#> ETW        ETW       117.0740
#> ETW2      ETW2       113.7601

And the same thing for B-Basis:

b_basis_pooled_results <- b_basis_pooled$basis %>%
  rename(condition = group,
         b_basis_pooled = value) %>%
  mutate(condition = ordered(condition, c("CTD", "RTD", "ETW", "ETW2")))

b_basis_pooled_results
#>      condition b_basis_pooled
#> CTD        CTD       125.1325
#> RTD        RTD       129.3447
#> ETW        ETW       123.8090
#> ETW2      ETW2       120.3191

We can use the function inner_join from the dplyr package to combine the three sets of computational results. Each row for each condition will be concatenated.

single_point_results %>%
  inner_join(b_basis_pooled_results, by = "condition") %>%
  inner_join(a_basis_pooled_results, by = "condition")
#> # A tibble: 4 × 7
#>   condition single_point_b_basis single_point_a_basis minimum  mean
#>   <ord>                    <dbl>                <dbl>   <dbl> <dbl>
#> 1 CTD                       121.                 111.    125.  135.
#> 2 RTD                       128.                 119.    127.  140.
#> 3 ETW                       121.                 103.    125.  134.
#> 4 ETW2                      123.                 119.    123.  130.
#> # ℹ 2 more variables: b_basis_pooled <dbl>, a_basis_pooled <dbl>

To use this table in the plot we’re trying to construct, we want to “lengthen” the table as follows.

single_point_results %>%
  inner_join(b_basis_pooled_results, by = "condition") %>%
  inner_join(a_basis_pooled_results, by = "condition") %>%
  pivot_longer(cols = single_point_b_basis:a_basis_pooled)
#> # A tibble: 24 × 3
#>    condition name                 value
#>    <ord>     <chr>                <dbl>
#>  1 CTD       single_point_b_basis  121.
#>  2 CTD       single_point_a_basis  111.
#>  3 CTD       minimum               125.
#>  4 CTD       mean                  135.
#>  5 CTD       b_basis_pooled        125.
#>  6 CTD       a_basis_pooled        118.
#>  7 RTD       single_point_b_basis  128.
#>  8 RTD       single_point_a_basis  119.
#>  9 RTD       minimum               127.
#> 10 RTD       mean                  140.
#> # ℹ 14 more rows

We can now make a plot based on this:

single_point_results %>%
  inner_join(b_basis_pooled_results, by = "condition") %>%
  inner_join(a_basis_pooled_results, by = "condition") %>%
  pivot_longer(cols = single_point_b_basis:a_basis_pooled) %>%
  ggplot(aes(x = condition, y = value)) +
  geom_boxplot(aes(y = strength), data = dat) +
  geom_point(aes(shape = name, color = name)) +
  ggtitle("Property Graph") +
  theme_bw()

Nested Data Plots

cmstatr contains the function nested_data_plot. This function creates a plot showing the sources of variation. In the following example, the data is grouped according to the variables in the group argument. The data is first grouped according to batch, then according to panel. The labels located according to the data points that fall under them. By default, the mean is used, but that stat argument can be used to locate the labels according to median or some other statistic.

carbon.fabric.2 %>%
  mutate(panel = as.character(panel)) %>%
  filter(test == "WT") %>%
  nested_data_plot(strength,
                   groups = c(batch, panel))

Optionally, fill or color can be set as follows:

carbon.fabric.2 %>%
  mutate(panel = as.character(panel)) %>%
  filter(test == "WT" & condition == "RTD") %>%
  nested_data_plot(strength,
                   groups = c(batch, panel),
                   fill = batch,
                   color = panel)

cmstatr/inst/doc/cmstatr_Tutorial.html0000644000176200001440000025357614574576000017731 0ustar liggesusers cmstatr Tutorial

cmstatr Tutorial

Stefan Kloppenborg

1-Apr-2020

cmstatr is an R package for analyzing composite material data for use in the aerospace industry. The statistical methods are based on those published in CMH-17-1G. This package is intended to facilitate reproducible statistical analysis of composite materials. In this tutorial, we’ll explore the basic functionality of cmstatr.

Before we can actually use the package, we’ll need to load it. We’ll also load the dplyr package, which we’ll talk about shortly. There are also a few other packages that we’ll load. These could all be loaded by loading the tidyverse package instead.

library(cmstatr)
library(dplyr)
library(ggplot2)
library(tidyr)
library(purrr)

Input Data

cmstatr is built with the assumption that the data is in (so called) tidy data format. This means that the data is in a data frame and that each observation (i.e. test result) has its own row and that each variable has its own column. Included in this package is a sample composite material data set (this data set is fictional: don’t use it for anything other than learning this package). The data set carbon.fabric.2 has the expected format. We’ll just show the first 10 rows of the data for now.

carbon.fabric.2 %>%
  head(10)
#>    test condition batch panel thickness nplies strength modulus failure_mode
#> 1    WT       CTD     A     1     0.112     14  142.817   9.285          LAT
#> 2    WT       CTD     A     1     0.113     14  135.901   9.133          LAT
#> 3    WT       CTD     A     1     0.113     14  132.511   9.253          LAT
#> 4    WT       CTD     A     2     0.112     14  135.586   9.150          LAB
#> 5    WT       CTD     A     2     0.113     14  125.145   9.270          LAB
#> 6    WT       CTD     A     2     0.113     14  135.203   9.189          LGM
#> 7    WT       CTD     A     2     0.113     14  128.547   9.088          LAB
#> 8    WT       CTD     B     1     0.113     14  127.709   9.199          LGM
#> 9    WT       CTD     B     1     0.113     14  127.074   9.058          LGM
#> 10   WT       CTD     B     1     0.114     14  126.879   9.306          LGM

If your data set is not yet in this type of format (note: that the column names do not need to match the column names in the example), there are many ways to get it into this format. One of the easier ways of doing so is to use the tidyr package. The use of this package is outside the scope of this vignette.

Working With Data

Throughout this vignette, we will be using some of the tidyverse tools for working with data. There are several ways to work with data in R, but in the opinion of the author of this vignette, the tidyverse provides the easiest way to do so. As such, this is the approach used in this vignette. Feel free to use whichever approach works best for you.

Normalizing Data to Cured Ply Thickness

Very often, you’ll want to normalize as-measured strength data to a nominal cured ply thickness for fiber-dominated properties. Very often, this will reduce the apparent variance in the data. The normalize_ply_thickness function can be used to normalize strength or modulus data to a certain cured ply thickness. This function takes three arguments: the value to normalize (i.e.. strength or modulus), the measured thickness and the nominal thickness. In our case, the nominal cured ply thickness of the material is \(0.0079\). We can then normalize the warp-tension and fill-compression data as follows:

norm_data <- carbon.fabric.2 %>%
  filter(test == "WT" | test == "FC") %>%
  mutate(strength.norm = normalize_ply_thickness(strength,
                                                 thickness / nplies,
                                                 0.0079))

norm_data %>%
  head(10)
#>    test condition batch panel thickness nplies strength modulus failure_mode
#> 1    WT       CTD     A     1     0.112     14  142.817   9.285          LAT
#> 2    WT       CTD     A     1     0.113     14  135.901   9.133          LAT
#> 3    WT       CTD     A     1     0.113     14  132.511   9.253          LAT
#> 4    WT       CTD     A     2     0.112     14  135.586   9.150          LAB
#> 5    WT       CTD     A     2     0.113     14  125.145   9.270          LAB
#> 6    WT       CTD     A     2     0.113     14  135.203   9.189          LGM
#> 7    WT       CTD     A     2     0.113     14  128.547   9.088          LAB
#> 8    WT       CTD     B     1     0.113     14  127.709   9.199          LGM
#> 9    WT       CTD     B     1     0.113     14  127.074   9.058          LGM
#> 10   WT       CTD     B     1     0.114     14  126.879   9.306          LGM
#>    strength.norm
#> 1       144.6248
#> 2       138.8500
#> 3       135.3865
#> 4       137.3023
#> 5       127.8606
#> 6       138.1369
#> 7       131.3364
#> 8       130.4803
#> 9       129.8315
#> 10      130.7794

Calculating Single-Point Basis Value

The simplest thing that you will likely do is to calculate a basis value based of a set of numbers that you consider as unstructured data. An example of this would be calculating the B-Basis of the RTD warp tension (WT) data.

There are a number of diagnostic tests that we should run before actually calculating a B-Basis value. We’ll talk about those later, but for now, let’s just get right to checking how the data are distributed and calculating the B-Basis.

We’ll use an Anderson–Darling test to check if the data are normally distributed. The cmstatr package provides the function anderson_darling_normal and related functions for other distributions. We can run an Anderson–Darling test for normality on the warp tension RTD data as follows. We’ll perform this test on the normalized strength.

norm_data %>%
  filter(test == "WT" & condition == "RTD") %>%
  anderson_darling_normal(strength.norm)
#> 
#> Call:
#> anderson_darling_normal(data = ., x = strength.norm)
#> 
#> Distribution:  Normal ( n = 28 ) 
#> Test statistic:  A = 0.3805995 
#> OSL (p-value):  0.3132051  (assuming unknown parameters)
#> Conclusion: Sample is drawn from a Normal distribution ( alpha = 0.05 )

Now that we know that this data follows a normal distribution (since the observed significance level (OSL) of the Anderson–Darling test is greater than \(0.05\)), we can proceed to calculate a basis value based based on the assumption of normally distributed data. The cmstatr package provides the function basis_normal as well as related functions for other distributions. By default, the B-Basis value is calculated, but other population proportions and confidence bounds can be specified (for example, specify p = 0.99, conf = 0.99 for A-Basis).

norm_data %>%
  filter(test == "WT" & condition == "RTD") %>%
  basis_normal(strength.norm)
#> `outliers_within_batch` not run because parameter `batch` not specified
#> `between_batch_variability` not run because parameter `batch` not specified
#> 
#> Call:
#> basis_normal(data = ., x = strength.norm)
#> 
#> Distribution:  Normal    ( n = 28 )
#> B-Basis:   ( p = 0.9 , conf = 0.95 )
#> 129.9583

We see that the calculated B-Basis is \(129.96\). We also see two messages issued by the cmstatr package. These messages relate to the automated diagnostic tests performed by the basis calculation functions. In this case we see messages that two of the diagnostic tests were not performed because we didn’t specify the batch of each observation. The batch is not required for calculating single-point basis values, but it is required for performing batch-to-batch variability and within-batch outlier diagnostic tests.

The basis_normal function performs the following diagnostic tests by default:

  • Within batch outliers using maximum_normed_residual()
  • Between batch variability using ad_ksample()
  • Outliers using maximum_normed_residual()
  • Normality of data using anderson_darling_normal()

There are two ways that we can deal with the two messages that we see. We can pass in a column that specifies the batch for each observation, or we can override those two diagnostic tests so that cmstatr doesn’t run them.

To override the two diagnostic tests, we set the argument override to a list of the names of the diagnostic tests that we want to skip. The names of the diagnostic tests that were not run are shown between back-ticks (`) in the message. Our call to basis_normal() would be updated as follows:

norm_data %>%
  filter(test == "WT" & condition == "RTD") %>%
  basis_normal(strength.norm, 
               override = c("outliers_within_batch",
                            "between_batch_variability"))
#> 
#> Call:
#> basis_normal(data = ., x = strength.norm, override = c("outliers_within_batch", 
#>     "between_batch_variability"))
#> 
#> Distribution:  Normal    ( n = 28 )
#> The following diagnostic tests were overridden:
#>     `outliers_within_batch`,
#>     `between_batch_variability`
#> B-Basis:   ( p = 0.9 , conf = 0.95 )
#> 129.9583

Obviously, you should be cautious about overriding the diagnostic tests. There are certainly times when it is appropriate to do so, but sound engineering judgment is required.

The better approach would be to specify the batch. This can be done as follows:

norm_data %>%
  filter(test == "WT" & condition == "RTD") %>%
  basis_normal(strength.norm, batch)
#> Warning: `between_batch_variability` failed: Anderson-Darling k-Sample test
#> indicates that batches are drawn from different distributions
#> 
#> Call:
#> basis_normal(data = ., x = strength.norm, batch = batch)
#> 
#> Distribution:  Normal    ( n = 28 )
#> The following diagnostic tests failed:
#>     `between_batch_variability`
#> B-Basis:   ( p = 0.9 , conf = 0.95 )
#> 129.9583

Now that batch is specified, we see that one of the diagnostic tests actually fails: the Anderson–Darling k-Sample test shows that the batches are not drawn from the same (unspecified) distribution. We can run this diagnostic test directly to investigate further:

norm_data %>%
  filter(test == "WT" & condition == "RTD") %>%
  ad_ksample(strength.norm, batch)
#> 
#> Call:
#> ad_ksample(data = ., x = strength.norm, groups = batch)
#> 
#> N = 28           k = 3            
#> ADK = 6.65       p-value = 0.0025892 
#> Conclusion: Samples do not come from the same distribution (alpha = 0.025 )

For the Anderson–Darling k-Sample test, \(\alpha=0.025\) is normally used. In this case the p-value is \(p=0.0026\), so it is no where near \(\alpha\) (note the number of decimal places).

We can plot the distribution of this data and make a judgment call about whether to continue.

norm_data %>%
  filter(test == "WT" & condition == "RTD") %>%
  group_by(batch) %>%
  ggplot(aes(x = strength.norm, color = batch)) +
  stat_normal_surv_func() +
  stat_esf() +
  ggtitle("Distribution of Data For Each Batch")

We can also run the other diagnostic test by themselves. These are described in more detail in the following sections.

Calculating Basis Values by Pooling Across Environments

In this section, we’ll use the fill-compression data from the carbon.fabric.2 data set.

Checking for Outliers

After checking that there are a sufficient number of conditions, batches and specimens and that the failure modes are consistent, we would normally check if there are outliers within each batch and condition. The maximum normed residual test can be used for this. The cmstatr package provides the function maximum_normed_residual to do this. First, we’ll group the data by condition and batch, then run the test on each group. The maximum_normed_residual function returns an object that contains a number of values. We’ll create a data.frame that contains those values.

In order to do this, we need to use the nest function from the tidyr package. This is explained in detail here. Basically, nest allows a column of lists or a column of data.frames to be added to a data.frame. Once nested, we can use the glance method to unpack the values returned by maximum_normed_residual into a one-row data.frame, and then use unnest to flatten this into a single data.frame.

norm_data %>%
  filter(test == "FC") %>%
  group_by(condition, batch) %>%
  nest() %>%
  mutate(mnr = map(data,
                   ~maximum_normed_residual(data = .x, x = strength.norm)),
         tidied = map(mnr, glance)) %>%
  select(-c(mnr, data)) %>%  # remove unneeded columns
  unnest(tidied)
#> # A tibble: 15 × 6
#> # Groups:   condition, batch [15]
#>    condition batch   mnr alpha  crit n_outliers
#>    <chr>     <chr> <dbl> <dbl> <dbl>      <dbl>
#>  1 CTD       A      1.48  0.05  1.89          0
#>  2 CTD       B      1.72  0.05  1.89          0
#>  3 CTD       C      1.52  0.05  1.89          0
#>  4 RTD       A      1.34  0.05  1.89          0
#>  5 RTD       B      1.47  0.05  1.89          0
#>  6 RTD       C      1.52  0.05  1.89          0
#>  7 ETD       A      1.66  0.05  1.89          0
#>  8 ETD       B      1.53  0.05  1.89          0
#>  9 ETD       C      1.40  0.05  1.89          0
#> 10 ETW       A      1.45  0.05  1.89          0
#> 11 ETW       B      1.83  0.05  1.89          0
#> 12 ETW       C      1.76  0.05  1.89          0
#> 13 ETW2      A      1.85  0.05  1.89          0
#> 14 ETW2      B      1.54  0.05  1.89          0
#> 15 ETW2      C      1.38  0.05  2.02          0

None of the groups have outliers, so we can continue.

Batch-to-Batch Distribution

Next, we will use the Anderson–Darling k-Sample test to check that each batch comes from the same distribution within each condition. We can use the ad_ksample function from cmstatr to do so. Once again, we’ll use nest/unnest and glance to do so.

norm_data %>%
  filter(test == "FC") %>%
  group_by(condition) %>%
  nest() %>%
  mutate(adk = map(data, ~ad_ksample(data = .x,
                                     x = strength.norm,
                                     groups = batch)),
         tidied = map(adk, glance)) %>%
  select(-c(data, adk)) %>%  # remove unneeded columns
  unnest(tidied)
#> # A tibble: 5 × 8
#> # Groups:   condition [5]
#>   condition alpha     n     k sigma    ad     p reject_same_dist
#>   <chr>     <dbl> <int> <int> <dbl> <dbl> <dbl> <lgl>           
#> 1 CTD       0.025    18     3 0.944 1.76  0.505 FALSE           
#> 2 RTD       0.025    18     3 0.944 1.03  0.918 FALSE           
#> 3 ETD       0.025    18     3 0.944 0.683 0.997 FALSE           
#> 4 ETW       0.025    18     3 0.944 0.93  0.954 FALSE           
#> 5 ETW2      0.025    19     3 0.951 1.74  0.513 FALSE

For all conditions, the Anderson–Darling k-Sample test fails to reject the hypothesis that each batch comes from the same (unspecified) distribution. We can thus proceed to pooling the data.

Checking for Outliers Within Each Condition

Just as we did when checking for outlier within each condition and each batch, we can pool all the batches (within each condition) and check for outliers within each condition.

norm_data %>%
  filter(test == "FC") %>%
  group_by(condition) %>%
  nest() %>%
  mutate(mnr = map(data, ~maximum_normed_residual(data = .x,
                                                  x = strength.norm)),
         tidied = map(mnr, glance)) %>%
  select(-c(mnr, data)) %>%  # remove unneeded columns
  unnest(tidied)
#> # A tibble: 5 × 5
#> # Groups:   condition [5]
#>   condition   mnr alpha  crit n_outliers
#>   <chr>     <dbl> <dbl> <dbl>      <dbl>
#> 1 CTD        2.38  0.05  2.65          0
#> 2 RTD        2.06  0.05  2.65          0
#> 3 ETD        2.05  0.05  2.65          0
#> 4 ETW        2.34  0.05  2.65          0
#> 5 ETW2       2.07  0.05  2.68          0

We find no outliers, so we can continue.

Pooling Across Environments

Often it is desirable to pool data across several environments. There are two methods for doing so: “pooled standard deviation” and “pooled CV” (CV is an abbreviation for Coefficient of Variation)

First, we will check for equality of variance among the conditions. We will do so using Levene’s test. The cmstatr package provides the function levene_test to do so.

norm_data %>%
  filter(test == "FC") %>%
  levene_test(strength.norm, condition)
#> 
#> Call:
#> levene_test(data = ., x = strength.norm, groups = condition)
#> 
#> n = 91           k = 5            
#> F = 5.260731     p-value = 0.0007727083 
#> Conclusion: Samples have unequal variance ( alpha = 0.05 )

The result from Levene’s test indicates that the variance for each condition is not equal. This indicates that the data cannot be pooled using the “pooled standard deviation” method.

We can check if the data can be pooled using the “pooled CV” method. We’ll start by normalizing the data from each group to the group’s mean. The cmstatr package provides the function normalize_group_mean for this purpose.

norm_data %>%
  filter(test == "FC") %>%
  mutate(
    strength_norm_group = normalize_group_mean(strength.norm, condition)) %>%
  levene_test(strength_norm_group, condition)
#> 
#> Call:
#> levene_test(data = ., x = strength_norm_group, groups = condition)
#> 
#> n = 91           k = 5            
#> F = 1.839645     p-value = 0.1285863 
#> Conclusion: Samples have equal variances ( alpha = 0.05 )

The Levene’s test thus shows the variances of the pooled data are equal. We can move on to performing an Anderson–Darling test for normality on the pooled data.

norm_data %>%
  filter(test == "FC") %>%
  mutate(
    strength_norm_group = normalize_group_mean(strength.norm, condition)) %>%
  anderson_darling_normal(strength_norm_group)
#> 
#> Call:
#> anderson_darling_normal(data = ., x = strength_norm_group)
#> 
#> Distribution:  Normal ( n = 91 ) 
#> Test statistic:  A = 0.3619689 
#> OSL (p-value):  0.3812268  (assuming unknown parameters)
#> Conclusion: Sample is drawn from a Normal distribution ( alpha = 0.05 )

The Anderson–Darling test indicates that the pooled data is drawn from a normal distribution, so we can continue with calculating basis values using the “pooled CV” method.

norm_data %>%
  filter(test == "FC") %>%
  basis_pooled_cv(strength.norm, condition, batch)
#> 
#> Call:
#> basis_pooled_cv(data = ., x = strength.norm, groups = condition, 
#>     batch = batch)
#> 
#> Distribution:  Normal - Pooled CV    ( n = 91, r = 5 )
#> B-Basis:   ( p = 0.9 , conf = 0.95 )
#> CTD   85.09241 
#> ETD   66.55109 
#> ETW   51.43626 
#> ETW2  45.81318 
#> RTD   78.2274

The conditions listed in the output above are in alphabetical order. This probably isn’t what you want. Instead, you probably want the conditions listed in a certain order. This can be done by ordering the data first as demonstrated below. You’re probably just do this one in at the start of your analysis.

norm_data %>%
  mutate(condition = ordered(condition,
                             c("CTD", "RTD", "ETD", "ETW", "ETW2"))) %>%
  filter(test == "FC") %>%
  basis_pooled_cv(strength.norm, condition, batch)
#> 
#> Call:
#> basis_pooled_cv(data = ., x = strength.norm, groups = condition, 
#>     batch = batch)
#> 
#> Distribution:  Normal - Pooled CV    ( n = 91, r = 5 )
#> B-Basis:   ( p = 0.9 , conf = 0.95 )
#> CTD   85.09241 
#> RTD   78.2274 
#> ETD   66.55109 
#> ETW   51.43626 
#> ETW2  45.81318

Equivalency

Eventually, once you’ve finished calculating all your basis values, you’ll probably want to set specification requirements or evaluate site/process equivalency. cmstatr has functionality to do both.

Let’s say that you want to develop specification limits for fill compression that you’re going to put in your material specification. You can do this as follows:

carbon.fabric.2 %>%
  filter(test == "FC" & condition == "RTD") %>%
  equiv_mean_extremum(strength, n_sample = 5, alpha = 0.01)
#> 
#> Call:
#> equiv_mean_extremum(df_qual = ., data_qual = strength, n_sample = 5, 
#>     alpha = 0.01)
#> 
#> For alpha = 0.01 and n = 5 
#> ( k1 = 3.071482 and k2 = 1.142506 )
#>                   Min Individual    Sample Mean    
#>      Thresholds:     69.89842         82.16867

If you’re determining equivalency limits for modulus, a different approach is generally used so that bilateral limits are set. cmstatr can do this as well, using the function equiv_change_mean.

cmstatr/inst/doc/adktest.R0000644000176200001440000000021714574575772015262 0ustar liggesusers## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) cmstatr/inst/doc/hk_ext.Rmd0000644000176200001440000004647414125365540015423 0ustar liggesusers--- title: "Extended Hanson-Koopmans" author: "Stefan Kloppenborg" date: "2021-09-30" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Extended Hanson-Koopmans} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} csl: ieee.csl references: - id: Hanson1964 type: article author: - given: D. L. family: Hanson - given: L. H. family: Koopmans title: Tolerance Limits for the Class of Distributions with Increasing Hazard Rates container-title: The Annals of Mathematical Statistics volume: "35" issue: "4" page: 1561-1570 issued: year: "1964" DOI: 10.1214/aoms/1177700380 - id: Vangel1994 type: article author: - given: Mark family: Vangel title: One-Sided Nonparametric Tolerance Limits container-title: Communications in Statistics - Simulation and Computation volume: "23" issue: "4" page: 1137-1154 issued: year: "1994" DOI: 10.1080/03610919408813222 - id: Harter1961 type: article author: - given: H. Leon family: Harter title: Expected values of normal order statistics container-title: Biometrika volume: "48" issue: 1/2 page: 151-165 issued: year: "1961" DOI: https://doi.org/10.2307/2333139 - id: CMH-17-1G type: report number: CMH-17-1G title: Composite Materials Handbook, Volume 1. Polymer Matrix Composites Guideline for Characterization of Structural Materials publisher: SAE International issued: year: "2012" month: "03" --- In this vignette, we'll use the following packages: ```r library(dplyr) library(ggplot2) library(purrr) library(tidyr) ``` The Extended Hanson--Koopmans method is a nonparametric method of determining tolerance limits (such as A- or B-Basis values). This method does not assume any particular distribution, but does require that $-\log\left(F\right)$ is convex, where $F$ is the cumulative distribution function (CDF) of the distribution. The functions `kh_ext_z`, `hk_ext_z_j_opt` and `basis_kh_ext` in `cmstatr` are based on the Extended Hanson--Koopmans method, developed by Vangel [@Vangel1994]. This is an extension of the method published in [@Hanson1964]. Tolerance limits (Basis values) calculated using the Extended Hanson--Koopmans method are calculated based on two order statistics [^1], $i$ and $j$, and a factor, $z$. The function `hk_ext_z_j_opt` and the function `basis_kh_ext` (with `method = "optimum-order"`) set the first of these order statistics to the first (lowest) order statistic, and a second order statistic determined by minimizing the following function: $$ \left| z E\left(X_{\left(1\right)}\right) + \left(1 - z\right) E\left(X_{\left(j\right)}\right) - \Phi\left(p\right)\right| $$ Where $E\left(X_{(i)}\right)$ is the expected value of the $i$`th` order statistic for a sample drawn from the standard normal distribution, and $\Phi\left(p\right)$ is the CDF of a standard normal distribution for the content of the tolerance limit (i.e. $p=0.9$ for B-Basis). [^1]: The $i$`th` order statistic is the $i$`th` lowest value in the sample. The value of $z$ is calculated based on the sample size, $n$, the two order statistics $i$ and $j$, the content $p$ and the confidence. The calculation is performed using the method in [@Vangel1994] and implemented in `kh_ext_z`. The value of $j$ is very sensitive to the way that the expected value of the order statistics is calculated, and may be sensitive to numerical precision. In version 0.8.0 of `cmstatr` and prior, the expected value of an order statistic for a sample drawn from a standard normal distribution was determined in a crude way. After version 0.8.0, the method in [@Harter1961] is used. These method produce different values of $j$ for certain sample sizes. Additionally, a table of $j$ and $z$ values for various sample sizes is published in CMH-17-1G[^2] [@CMH-17-1G]. This table gives slightly different values of $j$ for some sample sizes. [^2]: Note that CMH-17-1G uses the symbols $r$ and $k$ instead of $j$ and $z$. The values of $j$ and $z$ produced by `cmstatr` in version 0.8.0 and before, the values produced after version 0.8.0 and the value published in CMH-17-1G are shown below. All of these values are for B-Basis (90% content, 95% confidence). ```r factors <- tribble( ~n, ~j_pre_080, ~z_pre_080, ~j_post_080, ~z_post_080, ~j_cmh, ~z_cmh, 2, 2, 35.1768141883907, 2, 35.1768141883907, 2, 35.177, 3, 3, 7.85866787768029, 3, 7.85866787768029, 3, 7.859, 4, 4, 4.50522447199018, 4, 4.50522447199018, 4, 4.505, 5, 4, 4.10074820079326, 4, 4.10074820079326, 4, 4.101, 6, 5, 3.06444416024793, 5, 3.06444416024793, 5, 3.064, 7, 5, 2.85751000593839, 5, 2.85751000593839, 5, 2.858, 8, 6, 2.38240998122575, 6, 2.38240998122575, 6, 2.382, 9, 6, 2.25292053841772, 6, 2.25292053841772, 6, 2.253, 10, 7, 1.98762060673102, 6, 2.13665759924781, 6, 2.137, 11, 7, 1.89699586212496, 7, 1.89699586212496, 7, 1.897, 12, 7, 1.81410756892749, 7, 1.81410756892749, 7, 1.814, 13, 8, 1.66223343216608, 7, 1.73773765993598, 7, 1.738, 14, 8, 1.59916281901889, 8, 1.59916281901889, 8, 1.599, 15, 8, 1.54040000806181, 8, 1.54040000806181, 8, 1.54, 16, 9, 1.44512878109546, 8, 1.48539432060546, 8, 1.485, 17, 9, 1.39799975474842, 9, 1.39799975474842, 8, 1.434, 18, 9, 1.35353033609361, 9, 1.35353033609361, 9, 1.354, 19, 10, 1.28991705486727, 9, 1.31146980117942, 9, 1.311, 20, 10, 1.25290765871981, 9, 1.27163203813793, 10, 1.253, 21, 10, 1.21771654027026, 10, 1.21771654027026, 10, 1.218, 22, 11, 1.17330587650406, 10, 1.18418267046374, 10, 1.184, 23, 11, 1.14324511741536, 10, 1.15218647199938, 11, 1.143, 24, 11, 1.11442082880151, 10, 1.12153586685854, 11, 1.114, 25, 11, 1.08682185727661, 11, 1.08682185727661, 11, 1.087, 26, 11, 1.06032912052507, 11, 1.06032912052507, 11, 1.06, 27, 12, 1.03307994274081, 11, 1.03485308510789, 11, 1.035, 28, 12, 1.00982188136729, 11, 1.01034609051393, 12, 1.01 ) ``` For the sample sizes where $j$ is the same for each approach, the values of $z$ are also equal within a small tolerance. ```r factors %>% filter(j_pre_080 == j_post_080 & j_pre_080 == j_cmh) #> # A tibble: 16 × 7 #> n j_pre_080 z_pre_080 j_post_080 z_post_080 j_cmh z_cmh #> #> 1 2 2 35.2 2 35.2 2 35.2 #> 2 3 3 7.86 3 7.86 3 7.86 #> 3 4 4 4.51 4 4.51 4 4.50 #> 4 5 4 4.10 4 4.10 4 4.10 #> 5 6 5 3.06 5 3.06 5 3.06 #> 6 7 5 2.86 5 2.86 5 2.86 #> 7 8 6 2.38 6 2.38 6 2.38 #> 8 9 6 2.25 6 2.25 6 2.25 #> 9 11 7 1.90 7 1.90 7 1.90 #> 10 12 7 1.81 7 1.81 7 1.81 #> 11 14 8 1.60 8 1.60 8 1.60 #> 12 15 8 1.54 8 1.54 8 1.54 #> 13 18 9 1.35 9 1.35 9 1.35 #> 14 21 10 1.22 10 1.22 10 1.22 #> 15 25 11 1.09 11 1.09 11 1.09 #> 16 26 11 1.06 11 1.06 11 1.06 ``` The sample sizes where the value of $j$ differs are as follows: ```r factor_diff <- factors %>% filter(j_pre_080 != j_post_080 | j_pre_080 != j_cmh | j_post_080 != j_cmh) factor_diff #> # A tibble: 11 × 7 #> n j_pre_080 z_pre_080 j_post_080 z_post_080 j_cmh z_cmh #> #> 1 10 7 1.99 6 2.14 6 2.14 #> 2 13 8 1.66 7 1.74 7 1.74 #> 3 16 9 1.45 8 1.49 8 1.48 #> 4 17 9 1.40 9 1.40 8 1.43 #> 5 19 10 1.29 9 1.31 9 1.31 #> 6 20 10 1.25 9 1.27 10 1.25 #> 7 22 11 1.17 10 1.18 10 1.18 #> 8 23 11 1.14 10 1.15 11 1.14 #> 9 24 11 1.11 10 1.12 11 1.11 #> 10 27 12 1.03 11 1.03 11 1.03 #> 11 28 12 1.01 11 1.01 12 1.01 ``` While there are differences in the three implementations, it's not clear how much these differences will matter in terms of the tolerance limits calculated. This can be investigated through simulation. # Simulation with Normally Distributed Data First, we'll generate a large number (10,000) of samples of sample size $n$ from a normal distribution. Since we're generating the samples, we know the true population parameters, so can calculate the true population quantiles. We'll use the three sets of $j$ and $z$ values to compute tolerance limits and compared those tolerance limits to the population quantiles. The proportion of the calculated tolerance limits below the population quantiles should be equal to the selected confidence. We'll restrict the simulation study to the sample sizes where the values of $j$ and $z$ differ in the three implementations of this method, and we'll consider B-Basis (90% content, 95% confidence). ```r mu_normal <- 100 sd_normal <- 6 set.seed(1234567) # make this reproducible tolerance_limit <- function(x, j, z) { x[j] * (x[1] / x[j]) ^ z } sim_normal <- pmap_dfr(factor_diff, function(n, j_pre_080, z_pre_080, j_post_080, z_post_080, j_cmh, z_cmh) { map_dfr(1:10000, function(i_sim) { x <- sort(rnorm(n, mu_normal, sd_normal)) tibble( n = n, b_pre_080 = tolerance_limit(x, j_pre_080, z_pre_080), b_post_080 = tolerance_limit(x, j_post_080, z_post_080), b_cmh = tolerance_limit(x, j_cmh, z_cmh), x = list(x) ) } ) }) sim_normal #> # A tibble: 110,000 × 5 #> n b_pre_080 b_post_080 b_cmh x #> #> 1 10 78.4 77.7 77.7 #> 2 10 82.8 82.0 82.0 #> 3 10 83.3 83.0 83.0 #> 4 10 78.4 77.2 77.2 #> 5 10 87.3 86.6 86.6 #> 6 10 92.3 93.2 93.2 #> 7 10 75.2 77.9 77.9 #> 8 10 75.4 73.9 73.9 #> 9 10 75.5 75.1 75.1 #> 10 10 76.4 78.4 78.4 #> # … with 109,990 more rows ``` One can see that the tolerance limits calculated with each set of factors for (most) data sets is different. However, this does not necessarily mean that any set of factors is more or less correct. The distribution of the tolerance limits for each sample size is as follows: ```r sim_normal %>% pivot_longer(cols = b_pre_080:b_cmh, names_to = "Factors") %>% ggplot(aes(x = value, color = Factors)) + geom_density() + facet_wrap(n ~ .) + theme_bw() + ggtitle("Distribution of Tolerance Limits for Various Values of n") ``` ![plot of chunk distribution-normal](distribution-normal-1.png) For all samples sizes, the distribution of tolerance limits is actually very similar between all three sets of factors. The true population quantile can be calculated as follows: ```r x_p_normal <- qnorm(0.9, mu_normal, sd_normal, lower.tail = FALSE) x_p_normal #> [1] 92.31069 ``` The proportion of calculated tolerance limit values that are below the population quantile can be calculated as follows. We see that the in all cases the tolerance limits are all conservative, and also that each set of factors produce similar levels of conservatism. ```r sim_normal %>% mutate(below_pre_080 = b_pre_080 < x_p_normal, below_post_080 = b_post_080 < x_p_normal, below_cmh = b_cmh < x_p_normal) %>% group_by(n) %>% summarise( prop_below_pre_080 = sum(below_pre_080) / n(), prop_below_post_080 = sum(below_post_080) / n(), prop_below_cmh = sum(below_cmh) / n() ) #> # A tibble: 11 × 4 #> n prop_below_pre_080 prop_below_post_080 prop_below_cmh #> #> 1 10 0.984 0.980 0.980 #> 2 13 0.979 0.975 0.975 #> 3 16 0.969 0.967 0.967 #> 4 17 0.973 0.973 0.971 #> 5 19 0.962 0.961 0.961 #> 6 20 0.964 0.962 0.964 #> 7 22 0.961 0.960 0.960 #> 8 23 0.960 0.959 0.960 #> 9 24 0.962 0.961 0.962 #> 10 27 0.954 0.953 0.954 #> 11 28 0.952 0.952 0.952 ``` # Simulation with Weibull Data Next, we'll do a similar simulation using data drawn from a Weibull distribution. Again, we'll generate 10,000 samples for each sample size. ```r shape_weibull <- 60 scale_weibull <- 100 set.seed(234568) # make this reproducible sim_weibull <- pmap_dfr(factor_diff, function(n, j_pre_080, z_pre_080, j_post_080, z_post_080, j_cmh, z_cmh) { map_dfr(1:10000, function(i_sim) { x <- sort(rweibull(n, shape_weibull, scale_weibull)) tibble( n = n, b_pre_080 = tolerance_limit(x, j_pre_080, z_pre_080), b_post_080 = tolerance_limit(x, j_post_080, z_post_080), b_cmh = tolerance_limit(x, j_cmh, z_cmh), x = list(x) ) } ) }) sim_weibull #> # A tibble: 110,000 × 5 #> n b_pre_080 b_post_080 b_cmh x #> #> 1 10 95.3 95.1 95.1 #> 2 10 88.5 88.3 88.3 #> 3 10 89.7 89.3 89.3 #> 4 10 94.7 94.4 94.4 #> 5 10 96.9 96.9 96.9 #> 6 10 93.6 93.2 93.2 #> 7 10 86.1 85.5 85.5 #> 8 10 91.9 91.9 91.9 #> 9 10 93.7 93.4 93.4 #> 10 10 90.9 90.4 90.4 #> # … with 109,990 more rows ``` The distribution of the tolerance limits for each sample size is as follows. Once again, we see that the distribution of tolerance limits is nearly identical when each of the three sets of factors are used. ```r sim_weibull %>% pivot_longer(cols = b_pre_080:b_cmh, names_to = "Factors") %>% ggplot(aes(x = value, color = Factors)) + geom_density() + facet_wrap(n ~ .) + theme_bw() + ggtitle("Distribution of Tolerance Limits for Various Values of n") ``` ![plot of chunk distribution-Weibull](distribution-Weibull-1.png) The true population quantile can be calculated as follows: ```r x_p_weibull <- qweibull(0.9, shape_weibull, scale_weibull, lower.tail = FALSE) x_p_weibull #> [1] 96.31885 ``` The proportion of calculated tolerance limit values that are below the population quantile can be calculated as follows. We see that the in all roughly 95% or more of the tolerance limits calculated for each sample is below the population quantile. We also see very similar proportions for each of the three sets of factors considered. ```r sim_weibull %>% mutate(below_pre_080 = b_pre_080 < x_p_weibull, below_post_080 = b_post_080 < x_p_weibull, below_cmh = b_cmh < x_p_weibull) %>% group_by(n) %>% summarise( prop_below_pre_080 = sum(below_pre_080) / n(), prop_below_post_080 = sum(below_post_080) / n(), prop_below_cmh = sum(below_cmh) / n() ) #> # A tibble: 11 × 4 #> n prop_below_pre_080 prop_below_post_080 prop_below_cmh #> #> 1 10 0.97 0.965 0.965 #> 2 13 0.966 0.964 0.964 #> 3 16 0.959 0.959 0.959 #> 4 17 0.961 0.961 0.96 #> 5 19 0.957 0.956 0.956 #> 6 20 0.955 0.954 0.955 #> 7 22 0.953 0.952 0.952 #> 8 23 0.950 0.950 0.950 #> 9 24 0.953 0.953 0.953 #> 10 27 0.952 0.951 0.951 #> 11 28 0.950 0.950 0.950 ``` # Conclusion The values of $j$ and $z$ computed by the `kh_Ext_z_j_opt` function differs for certain samples sizes ($n$) before and after version 0.8.0. Furthermore, for certain sample sizes, these values differ from those published in CMH-17-1G. The simulation study presented in this vignette shows that the tolerance limit (Basis value) might differ for any individual sample based on which set of $j$ and $z$ are used. However, each set of factors produces tolerance limit factors that are either correct or conservative. These three methods have very similar performance, and tolerance limits produced with any of these three methods are equally valid. # Session Info This vignette is computed in advance. A system with the following configuration was used: ```r sessionInfo() #> R version 4.1.1 (2021-08-10) #> Platform: x86_64-pc-linux-gnu (64-bit) #> Running under: Ubuntu 20.04.3 LTS #> #> Matrix products: default #> BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0 #> LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0 #> #> locale: #> [1] LC_CTYPE=en_CA.UTF-8 LC_NUMERIC=C LC_TIME=en_CA.UTF-8 LC_COLLATE=en_CA.UTF-8 #> [5] LC_MONETARY=en_CA.UTF-8 LC_MESSAGES=en_CA.UTF-8 LC_PAPER=en_CA.UTF-8 LC_NAME=C #> [9] LC_ADDRESS=C LC_TELEPHONE=C LC_MEASUREMENT=en_CA.UTF-8 LC_IDENTIFICATION=C #> #> attached base packages: #> [1] stats graphics grDevices utils datasets methods base #> #> other attached packages: #> [1] tidyr_1.1.4 purrr_0.3.4 ggplot2_3.3.5 dplyr_1.0.7 #> #> loaded via a namespace (and not attached): #> [1] tidyselect_1.1.1 xfun_0.26 remotes_2.4.0 colorspace_2.0-2 vctrs_0.3.8 generics_0.1.0 #> [7] testthat_3.0.4 htmltools_0.5.2 usethis_2.0.1 yaml_2.2.1 utf8_1.2.2 rlang_0.4.11.9001 #> [13] pkgbuild_1.2.0 pillar_1.6.3 glue_1.4.2 withr_2.4.2 DBI_1.1.1 waldo_0.3.1 #> [19] cmstatr_0.9.0 sessioninfo_1.1.1 lifecycle_1.0.1 stringr_1.4.0 munsell_0.5.0 gtable_0.3.0 #> [25] devtools_2.4.2 memoise_2.0.0 evaluate_0.14 labeling_0.4.2 knitr_1.35 callr_3.7.0 #> [31] fastmap_1.1.0 ps_1.6.0 curl_4.3.1 fansi_0.5.0 highr_0.9 scales_1.1.1 #> [37] kSamples_1.2-9 cachem_1.0.6 desc_1.4.0 pkgload_1.2.2 farver_2.1.0 fs_1.5.0 #> [43] digest_0.6.28 stringi_1.7.4 processx_3.5.2 SuppDists_1.1-9.5 rprojroot_2.0.2 grid_4.1.1 #> [49] cli_3.0.1 tools_4.1.1 magrittr_2.0.1 tibble_3.1.4 crayon_1.4.1 pkgconfig_2.0.3 #> [55] MASS_7.3-54 ellipsis_0.3.2 prettyunits_1.1.1 assertthat_0.2.1 rmarkdown_2.11 rstudioapi_0.13 #> [61] R6_2.5.1 compiler_4.1.1 ``` # References cmstatr/inst/doc/cmstatr_Graphing.Rmd0000644000176200001440000002653514477217317017441 0ustar liggesusers--- title: "Plotting Composite Material Data" author: "Ally Fraser" date: "2-May-2020" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Plotting Composite Material Data} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 6 ) # If any of the required packages are unavailable, # don't re-run the code # nolint start required <- c("dplyr", "ggplot2", "tidyr", "cmstatr") if (!all(unlist(lapply(required, function(pkg) { requireNamespace(pkg, quietly = TRUE)} )))) { knitr::opts_chunk$set(eval = FALSE) } # nolint end ``` This vignette demonstrates how to create some of the graphs commonly used when analyzing composite material data. Here, we rely on the [`ggplot2`](https://ggplot2.tidyverse.org/) package for graphing. This package can be loaded either on its own, or through the `tidyverse` meta-package, which also includes packages such as `dplyr` that we will also use. We'll need to load a few packages in order to proceed. ```{r message=FALSE} library(dplyr) library(ggplot2) library(tidyr) library(cmstatr) ``` Throughout this vignette, we'll use one of the example data sets that comes with `cmstatr` and we'll focus on the warp-tension data as an example. We'll load the example data in a variable as follows. By default the condition will be in an arbitrary order, but throughout the visualization, we'll want the conditions shown in a particular order (from coldest and driest to hottest and wettest). We can define the order of the conditions using the `ordered` function. For brevity, only the first few rows of the data set are displayed below. ```{r} dat <- carbon.fabric.2 %>% filter(test == "WT") %>% mutate(condition = ordered(condition, c("CTD", "RTD", "ETW", "ETW2"))) dat %>% head(10) ``` We'll then calculate the B-Basis value using the pooling by standard deviation method. This data set happens to fail some of the diagnostic tests, but for the purpose of this example, we'll ignore those failures using the `override` argument. ```{r} b_basis_pooled <- dat %>% basis_pooled_cv(strength, condition, batch, override = c("between_group_variability", "normalized_variance_equal")) b_basis_pooled ``` The object returned from `basis_pooled_cv` contains a number of values. One value is a `data.frame` containing the groups (i.e. conditions) and the corresponding basis values. This looks like the following. We'll use this in the visualizations. ```{r} b_basis_pooled$basis ``` # Batch Plots Batch plots are used to identify differences between batches. Simple batch plots can be created using box plots and adding horizontal lines for the basis values as follows. Note that the heavy line in the box of the box plot is the *median*, not the mean. The two hinges correspond with the first and third quantiles and the whiskers extend to the most extreme data point, or 1.5 times the inner quantile range. In the code below, we use the function `rename` to rename the column `group` to `condition`. The `data.frame` produced by `basis_pooled_cv` uses the columns `value` and `group`, but to match the data, we need the column with the conditions to be named `condition`. ```{r} dat %>% ggplot(aes(x = batch, y = strength)) + geom_boxplot() + geom_jitter(width = 0.25) + geom_hline(aes(yintercept = value), data = b_basis_pooled$basis %>% rename(condition = group), color = "blue") + facet_grid(. ~ condition) + theme_bw() + ggtitle("Batch Plot") ``` # Quantile Plots A quantile plot provides a graphical summary of sample values. This plot displays the sample values and the corresponding quantile. A quantile plot can be used to examine the symmetry and tail sizes of the underlying distribution. Sharp rises may indicate the presence of outliers. ```{r} dat %>% ggplot(aes(x = strength, color = condition)) + stat_ecdf(geom = "point") + coord_flip() + theme_bw() + ggtitle("Quantile Plot") ``` # Normal Survival Function Plots An empirical survival function, and the corresponding normal survival function can be plotted using two `ggplot` "stat" functions provided by `cmstatr`. In the example below, the empirical survival function is plotted for each condition, and the survival function for a normal distribution with the mean and variance from the data is also plotted (the survival function is 1 minus the cumulative distribution function). This type of plot can be used to identify how closely the data follows a normal distribution, and also to compare the distributions of the various conditions. ```{r} dat %>% ggplot(aes(x = strength, color = condition)) + stat_normal_surv_func() + stat_esf() + theme_bw() + ggtitle("Normal Survival Function Plot") ``` # Normal Score Plots The normal scores plot calculates the normal score and plots it against the normal score. Normal plots are useful to investigate distributions of the data. ```{r} dat %>% group_by(condition) %>% mutate(norm.score = scale(strength)) %>% ggplot(aes(x = norm.score, y = strength, colour = condition)) + geom_point() + ggtitle("Normal Scores Plot") + theme_bw() ``` # Q-Q Plots A Q-Q plot compares the data against the theoretical quantiles for a particular distribution. A line is also plotted showing the normal distribution with mean and variance from the data. If the data exactly followed a normal distribution, all points would fall on this line. ```{r} dat %>% ggplot(aes(sample = strength, colour = condition)) + geom_qq() + geom_qq_line() + ggtitle("Q-Q Plot") + theme_bw() ``` # Property Plots Property plots allow for a variety of properties for a group to be compared to other properties within the same group, as well as to other group properties. The properties included in this plot are A-Basis, B-Basis, Pooled A- and B-Basis, Pooled Modified CV (Coefficient of Variation) A- and B-Basis, Mean, and Min for each group. The property plots will take a bit of work to construct. First, the distribution of each group must be determined. Once the distribution has been determined, the proper basis calculation based on that distribution should be filled in below. We also have a column in the tables below for extra arguments to pass to the `basis` function, such as overrides required or the method for the `basis_hk_ext` function to use. ```{r} b_basis_fcn <- tribble( ~condition, ~fcn, ~args, "CTD", "basis_normal", list(override = c("between_batch_variability")), "RTD", "basis_normal", list(override = c("between_batch_variability")), "ETW", "basis_hk_ext", NULL, "ETW2", "basis_normal", list(override = c("between_batch_variability")) ) a_basis_fcn <- tribble( ~condition, ~fcn, ~args, "CTD", "basis_normal", list(override = c("between_batch_variability")), "RTD", "basis_normal", list(override = c("between_batch_variability")), "ETW", "basis_hk_ext", list(method = "woodward-frawley"), "ETW2", "basis_normal", list(override = c("between_batch_variability")) ) ``` We'll write a function that takes the data and information about the distribution and computes the single-point basis value. We'll use this function for both A- and B-Basis, so we'll add a parameter for the probability (0.90 or 0.99). ```{r} single_point_fcn <- function(group_x, group_batch, cond, basis_fcn, p) { fcn <- basis_fcn$fcn[basis_fcn$condition == cond[1]] extra_args <- basis_fcn$args[basis_fcn$condition == cond[1]] args <- c( list(x = group_x, batch = group_batch, p = p), unlist(extra_args)) basis <- do.call(fcn, args) basis$basis } single_point_results <- dat %>% group_by(condition) %>% summarise(single_point_b_basis = single_point_fcn( strength, batch, condition, b_basis_fcn, 0.90), single_point_a_basis = single_point_fcn( strength, batch, condition, a_basis_fcn, 0.99), minimum = min(strength), mean = mean(strength)) %>% mutate(condition = ordered(condition, c("CTD", "RTD", "ETW", "ETW2"))) single_point_results ``` In the above code, we also ensure that the condition column is still in the order we expect. We've already computed the B-Basis of the data using a pooling method. We'll do the same for A-Basis: ```{r} a_basis_pooled <- dat %>% basis_pooled_cv(strength, condition, batch, p = 0.99, override = c("between_group_variability", "normalized_variance_equal")) a_basis_pooled ``` As we saw before, the returned object has a property called `basis`, which is a `data.frame` for the pooling methods. ```{r} a_basis_pooled$basis ``` We can take this `data.frame` and change the column names to suit our needs. ```{r} a_basis_pooled$basis %>% rename(condition = group, b_basis_pooled = value) ``` We can combine all these steps into one statement. We'll also ensure that the conditions are listed in the order we want. ```{r} a_basis_pooled_results <- a_basis_pooled$basis %>% rename(condition = group, a_basis_pooled = value) %>% mutate(condition = ordered(condition, c("CTD", "RTD", "ETW", "ETW2"))) a_basis_pooled_results ``` And the same thing for B-Basis: ```{r} b_basis_pooled_results <- b_basis_pooled$basis %>% rename(condition = group, b_basis_pooled = value) %>% mutate(condition = ordered(condition, c("CTD", "RTD", "ETW", "ETW2"))) b_basis_pooled_results ``` We can use the function `inner_join` from the `dplyr` package to combine the three sets of computational results. Each row for each condition will be concatenated. ```{r} single_point_results %>% inner_join(b_basis_pooled_results, by = "condition") %>% inner_join(a_basis_pooled_results, by = "condition") ``` To use this table in the plot we're trying to construct, we want to "lengthen" the table as follows. ```{r} single_point_results %>% inner_join(b_basis_pooled_results, by = "condition") %>% inner_join(a_basis_pooled_results, by = "condition") %>% pivot_longer(cols = single_point_b_basis:a_basis_pooled) ``` We can now make a plot based on this: ```{r} single_point_results %>% inner_join(b_basis_pooled_results, by = "condition") %>% inner_join(a_basis_pooled_results, by = "condition") %>% pivot_longer(cols = single_point_b_basis:a_basis_pooled) %>% ggplot(aes(x = condition, y = value)) + geom_boxplot(aes(y = strength), data = dat) + geom_point(aes(shape = name, color = name)) + ggtitle("Property Graph") + theme_bw() ``` # Nested Data Plots `cmstatr` contains the function `nested_data_plot`. This function creates a plot showing the sources of variation. In the following example, the data is grouped according to the variables in the `group` argument. The data is first grouped according to `batch`, then according to `panel`. The labels located according to the data points that fall under them. By default, the mean is used, but that `stat` argument can be used to locate the labels according to `median` or some other statistic. ```{r} carbon.fabric.2 %>% mutate(panel = as.character(panel)) %>% filter(test == "WT") %>% nested_data_plot(strength, groups = c(batch, panel)) ``` Optionally, `fill` or `color` can be set as follows: ```{r} carbon.fabric.2 %>% mutate(panel = as.character(panel)) %>% filter(test == "WT" & condition == "RTD") %>% nested_data_plot(strength, groups = c(batch, panel), fill = batch, color = panel) ``` cmstatr/inst/doc/cmstatr_Validation.html0000644000176200001440000101754414574576016020221 0ustar liggesusers cmstatr Validation

cmstatr Validation

Stefan Kloppenborg

2024-03-14

1. Introduction

This vignette is intended to contain the same validation that is included in the test suite within the cmstatr package, but in a format that is easier for a human to read. The intent is that this vignette will include only those validations that are included in the test suite, but that the test suite may include more tests than are shown in this vignette.

The following packages will be used in this validation. The version of each package used is listed at the end of this vignette.

library(cmstatr)
library(dplyr)
library(purrr)
library(tidyr)
library(testthat)

Throughout this vignette, the testthat package will be used. Expressions such as expect_equal are used to ensure that the two values are equal (within some tolerance). If this expectation is not true, the vignette will fail to build. The tolerance is a relative tolerance: a tolerance of 0.01 means that the two values must be within \(1\%\) of each other. As an example, the following expression checks that the value 10 is equal to 10.1 within a tolerance of 0.01. Such an expectation should be satisfied.

expect_equal(10, 10.1, tolerance = 0.01)

The basis_... functions automatically perform certain diagnostic tests. When those diagnostic tests are not relevant to the validation, the diagnostic tests are overridden by passing the argument override = "all".

2. Validation Table

The following table provides a cross-reference between the various functions of the cmstatr package and the tests shown within this vignette. The sections in this vignette are organized by data set. Not all checks are performed on all data sets.

Function Tests
ad_ksample() Section 3.1, Section 4.1.2, Section 6.1
anderson_darling_normal() Section 4.1.3, Section 5.1
anderson_darling_lognormal() Section 4.1.3, Section 5.2
anderson_darling_weibull() Section 4.1.3, Section 5.3
basis_normal() Section 5.4
basis_lognormal() Section 5.5
basis_weibull() Section 5.6
basis_pooled_cv() Section 4.2.3, Section 4.2.4,
basis_pooled_sd() Section 4.2.1, Section 4.2.2
basis_hk_ext() Section 4.1.6, Section 5.7, Section 5.8
basis_nonpara_large_sample() Section 5.9
basis_anova() Section 4.1.7
calc_cv_star()
cv()
equiv_change_mean() Section 5.11
equiv_mean_extremum() Section 5.10
hk_ext_z() Section 7.3, Section 7.4
hk_ext_z_j_opt() Section 7.5
k_equiv() Section 7.8
k_factor_normal() Section 7.1, Section 7.2
levene_test() Section 4.1.4, Section 4.1.5
maximum_normed_residual() Section 4.1.1
nonpara_binomial_rank() Section 7.6, Section 7.7
normalize_group_mean()
normalize_ply_thickness()
transform_mod_cv_ad()
transform_mod_cv()

3. carbon.fabric Data Set

This data set is example data that is provided with cmstatr. The first few rows of this data are shown below.

head(carbon.fabric)
#>           id test condition batch strength
#> 1 WT-RTD-1-1   WT       RTD     1 137.4438
#> 2 WT-RTD-1-2   WT       RTD     1 139.5395
#> 3 WT-RTD-1-3   WT       RTD     1 150.8900
#> 4 WT-RTD-1-4   WT       RTD     1 141.4474
#> 5 WT-RTD-1-5   WT       RTD     1 141.8203
#> 6 WT-RTD-1-6   WT       RTD     1 151.8821

3.1. Anderson–Darling k-Sample Test

This data was entered into ASAP 2008 [1] and the reported Anderson–Darling k–Sample test statistics were recorded, as were the conclusions.

The value of the test statistic reported by cmstatr and that reported by ASAP 2008 differ by a factor of \(k - 1\), as do the critical values used. As such, the conclusion of the tests are identical. This is described in more detail in the Anderson–Darling k–Sample Vignette.

When the RTD warp-tension data from this data set is entered into ASAP 2008, it reports a test statistic of 0.456 and fails to reject the null hypothesis that the batches are drawn from the same distribution. Adjusting for the different definition of the test statistic, the results given by cmstatr are very similar.

res <- carbon.fabric %>%
  filter(test == "WT") %>%
  filter(condition == "RTD") %>%
  ad_ksample(strength, batch)

expect_equal(res$ad / (res$k - 1), 0.456, tolerance = 0.002)
expect_false(res$reject_same_dist)

res
#> 
#> Call:
#> ad_ksample(data = ., x = strength, groups = batch)
#> 
#> N = 18           k = 3            
#> ADK = 0.912      p-value = 0.95989 
#> Conclusion: Samples come from the same distribution ( alpha = 0.025 )

When the ETW warp-tension data from this data set are entered into ASAP 2008, the reported test statistic is 1.604 and it fails to reject the null hypothesis that the batches are drawn from the same distribution. Adjusting for the different definition of the test statistic, cmstatr gives nearly identical results.

res <- carbon.fabric %>%
  filter(test == "WT") %>%
  filter(condition == "ETW") %>%
  ad_ksample(strength, batch)

expect_equal(res$ad / (res$k - 1), 1.604, tolerance = 0.002)
expect_false(res$reject_same_dist)

res
#> 
#> Call:
#> ad_ksample(data = ., x = strength, groups = batch)
#> 
#> N = 18           k = 3            
#> ADK = 3.21       p-value = 0.10208 
#> Conclusion: Samples come from the same distribution ( alpha = 0.025 )

4. Comparison with Examples from CMH-17-1G

4.1 Dataset From Section 8.3.11.1.1

CMH-17-1G [2] provides an example data set and results from ASAP [1] and STAT17 [3]. This example data set is duplicated below:

dat_8_3_11_1_1 <- tribble(
  ~batch, ~strength, ~condition,
  1, 118.3774604, "CTD", 1, 84.9581364, "RTD", 1, 83.7436035, "ETD",
  1, 123.6035612, "CTD", 1, 92.4891822, "RTD", 1, 84.3831677, "ETD",
  1, 115.2238092, "CTD", 1, 96.8212659, "RTD", 1, 94.8030433, "ETD",
  1, 112.6379744, "CTD", 1, 109.030325, "RTD", 1, 94.3931537, "ETD",
  1, 116.5564277, "CTD", 1, 97.8212659, "RTD", 1, 101.702222, "ETD",
  1, 123.1649896, "CTD", 1, 100.921519, "RTD", 1, 86.5372121, "ETD",
  2, 128.5589027, "CTD", 1, 103.699444, "RTD", 1, 92.3772684, "ETD",
  2, 113.1462103, "CTD", 2, 93.7908212, "RTD", 2, 89.2084024, "ETD",
  2, 121.4248107, "CTD", 2, 107.526709, "RTD", 2, 100.686001, "ETD",
  2, 134.3241906, "CTD", 2, 94.5769704, "RTD", 2, 81.0444192, "ETD",
  2, 129.6405117, "CTD", 2, 93.8831373, "RTD", 2, 91.3398070, "ETD",
  2, 117.9818658, "CTD", 2, 98.2296605, "RTD", 2, 93.1441939, "ETD",
  3, 115.4505226, "CTD", 2, 111.346590, "RTD", 2, 85.8204168, "ETD",
  3, 120.0369467, "CTD", 2, 100.817538, "RTD", 3, 94.8966273, "ETD",
  3, 117.1631088, "CTD", 3, 100.382203, "RTD", 3, 95.8068520, "ETD",
  3, 112.9302797, "CTD", 3, 91.5037811, "RTD", 3, 86.7842252, "ETD",
  3, 117.9114501, "CTD", 3, 100.083233, "RTD", 3, 94.4011973, "ETD",
  3, 120.1900159, "CTD", 3, 95.6393615, "RTD", 3, 96.7231171, "ETD",
  3, 110.7295966, "CTD", 3, 109.304779, "RTD", 3, 89.9010384, "ETD",
  3, 100.078562, "RTD", 3, 99.1205847, "RTD", 3, 89.3672306, "ETD",
  1, 106.357525, "ETW", 1, 99.0239966, "ETW2",
  1, 105.898733, "ETW", 1, 103.341238, "ETW2",
  1, 88.4640082, "ETW", 1, 100.302130, "ETW2",
  1, 103.901744, "ETW", 1, 98.4634133, "ETW2",
  1, 80.2058219, "ETW", 1, 92.2647280, "ETW2",
  1, 109.199597, "ETW", 1, 103.487693, "ETW2",
  1, 61.0139431, "ETW", 1, 113.734763, "ETW2",
  2, 99.3207107, "ETW", 2, 108.172659, "ETW2",
  2, 115.861770, "ETW", 2, 108.426732, "ETW2",
  2, 82.6133082, "ETW", 2, 116.260375, "ETW2",
  2, 85.3690411, "ETW", 2, 121.049610, "ETW2",
  2, 115.801622, "ETW", 2, 111.223082, "ETW2",
  2, 44.3217741, "ETW", 2, 104.574843, "ETW2",
  2, 117.328077, "ETW", 2, 103.222552, "ETW2",
  2, 88.6782903, "ETW", 3, 99.3918538, "ETW2",
  3, 107.676986, "ETW", 3, 87.3421658, "ETW2",
  3, 108.960241, "ETW", 3, 102.730741, "ETW2",
  3, 116.122640, "ETW", 3, 96.3694916, "ETW2",
  3, 80.2334815, "ETW", 3, 99.5946088, "ETW2",
  3, 106.145570, "ETW", 3, 97.0712407, "ETW2",
  3, 104.667866, "ETW",
  3, 104.234953, "ETW"
)
dat_8_3_11_1_1
#> # A tibble: 102 × 3
#>    batch strength condition
#>    <dbl>    <dbl> <chr>    
#>  1     1    118.  CTD      
#>  2     1     85.0 RTD      
#>  3     1     83.7 ETD      
#>  4     1    124.  CTD      
#>  5     1     92.5 RTD      
#>  6     1     84.4 ETD      
#>  7     1    115.  CTD      
#>  8     1     96.8 RTD      
#>  9     1     94.8 ETD      
#> 10     1    113.  CTD      
#> # ℹ 92 more rows

4.1.1 Maximum Normed Residual Test

CMH-17-1G Table 8.3.11.1.1(a) provides results of the MNR test from ASAP for this data set. Batches 2 and 3 of the ETW data is considered here and the results of cmstatr are compared with those published in CMH-17-1G.

For Batch 2 of the ETW data, the results match those published in the handbook within a small tolerance. The published test statistic is 2.008.

res <- dat_8_3_11_1_1 %>%
  filter(condition == "ETW" & batch == 2) %>%
  maximum_normed_residual(strength, alpha = 0.05)

expect_equal(res$mnr, 2.008, tolerance = 0.001)
expect_equal(res$crit, 2.127, tolerance = 0.001)
expect_equal(res$n_outliers, 0)

res
#> 
#> Call:
#> maximum_normed_residual(data = ., x = strength, alpha = 0.05)
#> 
#> MNR = 2.008274  ( critical value = 2.126645 )
#> 
#> No outliers detected ( alpha = 0.05 )

Similarly, for Batch 3 of the ETW data, the results of cmstatr match the results published in the handbook within a small tolerance. The published test statistic is 2.119

res <- dat_8_3_11_1_1 %>%
  filter(condition == "ETW" & batch == 3) %>%
  maximum_normed_residual(strength, alpha = 0.05)

expect_equal(res$mnr, 2.119, tolerance = 0.001)
expect_equal(res$crit, 2.020, tolerance = 0.001)
expect_equal(res$n_outliers, 1)

res
#> 
#> Call:
#> maximum_normed_residual(data = ., x = strength, alpha = 0.05)
#> 
#> MNR = 2.119175  ( critical value = 2.019969 )
#> 
#> Outliers ( alpha = 0.05 ):
#>    Index    Value            
#>        4    80.23348

4.1.2 Anderson–Darling k–Sample Test

For the ETW condition, the ADK test statistic given in [2] is \(ADK = 0.793\) and the test concludes that the samples come from the same distribution. Noting that cmstatr uses the definition of the test statistic given in [4], so the test statistic given by cmstatr differs from that given by ASAP by a factor of \(k - 1\), as described in the Anderson–Darling k–Sample Vignette.

res <- dat_8_3_11_1_1 %>%
  filter(condition == "ETW") %>%
  ad_ksample(strength, batch)

expect_equal(res$ad / (res$k - 1), 0.793, tolerance = 0.003)
expect_false(res$reject_same_dist)

res
#> 
#> Call:
#> ad_ksample(data = ., x = strength, groups = batch)
#> 
#> N = 22           k = 3            
#> ADK = 1.59       p-value = 0.59491 
#> Conclusion: Samples come from the same distribution ( alpha = 0.025 )

Similarly, for the ETW2 condition, the test statistic given in [2] is \(ADK = 3.024\) and the test concludes that the samples come from different distributions. This matches cmstatr

res <- dat_8_3_11_1_1 %>%
  filter(condition == "ETW2") %>%
  ad_ksample(strength, batch)

expect_equal(res$ad / (res$k - 1), 3.024, tolerance = 0.001)
expect_true(res$reject_same_dist)

res
#> 
#> Call:
#> ad_ksample(data = ., x = strength, groups = batch)
#> 
#> N = 20           k = 3            
#> ADK = 6.05       p-value = 0.0042703 
#> Conclusion: Samples do not come from the same distribution (alpha = 0.025 )

4.1.3 Anderson–Darling Tests for Distribution

CMH-17-1G Section 8.3.11.2.1 contains results from STAT17 for the “observed significance level” from the Anderson–Darling test for various distributions. In this section, the ETW condition from the present data set is used. The published results are given in the following table. The results from cmstatr are below and are very similar to those from STAT17.

Distribution OSL
Normal 0.006051
Lognormal 0.000307
Weibull 0.219
res <- dat_8_3_11_1_1 %>%
  filter(condition == "ETW") %>%
  anderson_darling_normal(strength)
expect_equal(res$osl, 0.006051, tolerance = 0.001)
res
#> 
#> Call:
#> anderson_darling_normal(data = ., x = strength)
#> 
#> Distribution:  Normal ( n = 22 ) 
#> Test statistic:  A = 1.052184 
#> OSL (p-value):  0.006051441  (assuming unknown parameters)
#> Conclusion: Sample is not drawn from a Normal distribution ( alpha = 0.05 )
res <- dat_8_3_11_1_1 %>%
  filter(condition == "ETW") %>%
  anderson_darling_lognormal(strength)
expect_equal(res$osl, 0.000307, tolerance = 0.001)
res
#> 
#> Call:
#> anderson_darling_lognormal(data = ., x = strength)
#> 
#> Distribution:  Lognormal ( n = 22 ) 
#> Test statistic:  A = 1.568825 
#> OSL (p-value):  0.0003073592  (assuming unknown parameters)
#> Conclusion: Sample is not drawn from a Lognormal distribution ( alpha = 0.05 )
res <- dat_8_3_11_1_1 %>%
  filter(condition == "ETW") %>%
  anderson_darling_weibull(strength)
expect_equal(res$osl, 0.0219, tolerance = 0.002)
res
#> 
#> Call:
#> anderson_darling_weibull(data = ., x = strength)
#> 
#> Distribution:  Weibull ( n = 22 ) 
#> Test statistic:  A = 0.8630665 
#> OSL (p-value):  0.02186889  (assuming unknown parameters)
#> Conclusion: Sample is not drawn from a Weibull distribution ( alpha = 0.05 )

4.1.4 Levene’s Test (Between Conditions)

CMH-17-1G Section 8.3.11.1.1 provides results from ASAP for Levene’s test for equality of variance between conditions after the ETW and ETW2 conditions are removed. The handbook shows an F statistic of 0.58, however if this data is entered into ASAP directly, ASAP gives an F statistic of 0.058, which matches the result of cmstatr.

res <- dat_8_3_11_1_1 %>%
  filter(condition != "ETW" & condition != "ETW2") %>%
  levene_test(strength, condition)
expect_equal(res$f, 0.058, tolerance = 0.01)
res
#> 
#> Call:
#> levene_test(data = ., x = strength, groups = condition)
#> 
#> n = 60           k = 3            
#> F = 0.05811631   p-value = 0.943596 
#> Conclusion: Samples have equal variances ( alpha = 0.05 )

4.1.5 Levene’s Test (Between Batches)

CMH-17-1G Section 8.3.11.2.2 provides output from STAT17. The ETW2 condition from the present data set was analyzed by STAT17 and that software reported an F statistic of 0.123 from Levene’s test when comparing the variance of the batches within this condition. The result from cmstatr is similar.

res <- dat_8_3_11_1_1 %>%
  filter(condition == "ETW2") %>%
  levene_test(strength, batch)
expect_equal(res$f, 0.123, tolerance = 0.005)
res
#> 
#> Call:
#> levene_test(data = ., x = strength, groups = batch)
#> 
#> n = 20           k = 3            
#> F = 0.1233937    p-value = 0.8847 
#> Conclusion: Samples have equal variances ( alpha = 0.05 )

Similarly, the published value of the F statistic for the CTD condition is \(3.850\). cmstatr produces very similar results.

res <- dat_8_3_11_1_1 %>%
  filter(condition == "CTD") %>%
  levene_test(strength, batch)
expect_equal(res$f, 3.850, tolerance = 0.005)
res
#> 
#> Call:
#> levene_test(data = ., x = strength, groups = batch)
#> 
#> n = 19           k = 3            
#> F = 3.852032     p-value = 0.04309008 
#> Conclusion: Samples have unequal variance ( alpha = 0.05 )

4.1.6 Nonparametric Basis Values

CMH-17-1G Section 8.3.11.2.1 provides STAT17 outputs for the ETW condition of the present data set. The nonparametric Basis values are listed. In this case, the Hanson–Koopmans method is used. The published A-Basis value is 13.0 and the B-Basis is 37.9.

res <- dat_8_3_11_1_1 %>%
  filter(condition == "ETW") %>%
  basis_hk_ext(strength, method = "woodward-frawley", p = 0.99, conf = 0.95,
               override = "all")

expect_equal(res$basis, 13.0, tolerance = 0.001)

res
#> 
#> Call:
#> basis_hk_ext(data = ., x = strength, p = 0.99, conf = 0.95, method = "woodward-frawley", 
#>     override = "all")
#> 
#> Distribution:  Nonparametric (Extended Hanson-Koopmans, Woodward-Frawley method)     ( n = 22 )
#> The following diagnostic tests were overridden:
#>     `outliers_within_batch`,
#>     `between_batch_variability`,
#>     `outliers`,
#>     `correct_method_used`,
#>     `sample_size`
#> A-Basis:   ( p = 0.99 , conf = 0.95 )
#> 12.99614
res <- dat_8_3_11_1_1 %>%
  filter(condition == "ETW") %>%
  basis_hk_ext(strength, method = "optimum-order", p = 0.90, conf = 0.95,
               override = "all")

expect_equal(res$basis, 37.9, tolerance = 0.001)

res
#> 
#> Call:
#> basis_hk_ext(data = ., x = strength, p = 0.9, conf = 0.95, method = "optimum-order", 
#>     override = "all")
#> 
#> Distribution:  Nonparametric (Extended Hanson-Koopmans, optimum two-order-statistic method)  ( n = 22 )
#> The following diagnostic tests were overridden:
#>     `outliers_within_batch`,
#>     `between_batch_variability`,
#>     `outliers`,
#>     `correct_method_used`,
#>     `sample_size`
#> B-Basis:   ( p = 0.9 , conf = 0.95 )
#> 37.88511

4.1.7 Single-Point ANOVA Basis Value

CMH-17-1G Section 8.3.11.2.2 provides output from STAT17 for the ETW2 condition from the present data set. STAT17 reports A- and B-Basis values based on the ANOVA method of 34.6 and 63.2, respectively. The results from cmstatr are similar.

res <- dat_8_3_11_1_1 %>%
  filter(condition == "ETW2") %>%
  basis_anova(strength, batch, override = "number_of_groups",
              p = 0.99, conf = 0.95)
expect_equal(res$basis, 34.6, tolerance = 0.001)
res
#> 
#> Call:
#> basis_anova(data = ., x = strength, groups = batch, p = 0.99, 
#>     conf = 0.95, override = "number_of_groups")
#> 
#> Distribution:  ANOVA     ( n = 20, r = 3 )
#> The following diagnostic tests were overridden:
#>     `number_of_groups`
#> A-Basis:   ( p = 0.99 , conf = 0.95 )
#> 34.57763
res <- dat_8_3_11_1_1 %>%
  filter(condition == "ETW2") %>%
  basis_anova(strength, batch, override = "number_of_groups")
expect_equal(res$basis, 63.2, tolerance = 0.001)
res
#> 
#> Call:
#> basis_anova(data = ., x = strength, groups = batch, override = "number_of_groups")
#> 
#> Distribution:  ANOVA     ( n = 20, r = 3 )
#> The following diagnostic tests were overridden:
#>     `number_of_groups`
#> B-Basis:   ( p = 0.9 , conf = 0.95 )
#> 63.20276

4.2 Dataset From Section 8.3.11.1.2

[2] provides an example data set and results from ASAP [1]. This example data set is duplicated below:

dat_8_3_11_1_2 <- tribble(
  ~batch, ~strength, ~condition,
  1, 79.04517, "CTD", 1, 103.2006, "RTD", 1, 63.22764, "ETW", 1, 54.09806, "ETW2",
  1, 102.6014, "CTD", 1, 105.1034, "RTD", 1, 70.84454, "ETW", 1, 58.87615, "ETW2",
  1, 97.79372, "CTD", 1, 105.1893, "RTD", 1, 66.43223, "ETW", 1, 61.60167, "ETW2",
  1, 92.86423, "CTD", 1, 100.4189, "RTD", 1, 75.37771, "ETW", 1, 60.23973, "ETW2",
  1, 117.218,  "CTD", 2, 85.32319, "RTD", 1, 72.43773, "ETW", 1, 61.4808,  "ETW2",
  1, 108.7168, "CTD", 2, 92.69923, "RTD", 1, 68.43073, "ETW", 1, 64.55832, "ETW2",
  1, 112.2773, "CTD", 2, 98.45242, "RTD", 1, 69.72524, "ETW", 2, 57.76131, "ETW2",
  1, 114.0129, "CTD", 2, 104.1014, "RTD", 2, 66.20343, "ETW", 2, 49.91463, "ETW2",
  2, 106.8452, "CTD", 2, 91.51841, "RTD", 2, 60.51251, "ETW", 2, 61.49271, "ETW2",
  2, 112.3911, "CTD", 2, 101.3746, "RTD", 2, 65.69334, "ETW", 2, 57.7281,  "ETW2",
  2, 115.5658, "CTD", 2, 101.5828, "RTD", 2, 62.73595, "ETW", 2, 62.11653, "ETW2",
  2, 87.40657, "CTD", 2, 99.57384, "RTD", 2, 59.00798, "ETW", 2, 62.69353, "ETW2",
  2, 102.2785, "CTD", 2, 88.84826, "RTD", 2, 62.37761, "ETW", 3, 61.38523, "ETW2",
  2, 110.6073, "CTD", 3, 92.18703, "RTD", 3, 64.3947,  "ETW", 3, 60.39053, "ETW2",
  3, 105.2762, "CTD", 3, 101.8234, "RTD", 3, 72.8491,  "ETW", 3, 59.17616, "ETW2",
  3, 110.8924, "CTD", 3, 97.68909, "RTD", 3, 66.56226, "ETW", 3, 60.17616, "ETW2",
  3, 108.7638, "CTD", 3, 101.5172, "RTD", 3, 66.56779, "ETW", 3, 46.47396, "ETW2",
  3, 110.9833, "CTD", 3, 100.0481, "RTD", 3, 66.00123, "ETW", 3, 51.16616, "ETW2",
  3, 101.3417, "CTD", 3, 102.0544, "RTD", 3, 59.62108, "ETW",
  3, 100.0251, "CTD",                     3, 60.61167, "ETW",
                                          3, 57.65487, "ETW",
                                          3, 66.51241, "ETW",
                                          3, 64.89347, "ETW",
                                          3, 57.73054, "ETW",
                                          3, 68.94086, "ETW",
                                          3, 61.63177, "ETW"
)

4.2.1 Pooled SD A- and B-Basis

CMH-17-1G Table 8.3.11.2(k) provides outputs from ASAP for the data set above. ASAP uses the pooled SD method. ASAP produces the following results, which are quite similar to those produced by cmstatr.

Condition CTD RTD ETW ETW2
B-Basis 93.64 87.30 54.33 47.12
A-Basis 89.19 79.86 46.84 39.69

res <- basis_pooled_sd(dat_8_3_11_1_2, strength, condition,
                         override = "all")

expect_equal(res$basis$value[res$basis$group == "CTD"],
           93.64, tolerance = 0.001)
expect_equal(res$basis$value[res$basis$group == "RTD"],
           87.30, tolerance = 0.001)
expect_equal(res$basis$value[res$basis$group == "ETW"],
           54.33, tolerance = 0.001)
expect_equal(res$basis$value[res$basis$group == "ETW2"],
           47.12, tolerance = 0.001)
res
#> 
#> Call:
#> basis_pooled_sd(data = dat_8_3_11_1_2, x = strength, groups = condition, 
#>     override = "all")
#> 
#> Distribution:  Normal - Pooled Standard Deviation    ( n = 83, r = 4 )
#> The following diagnostic tests were overridden:
#>     `outliers_within_batch`,
#>     `between_group_variability`,
#>     `outliers_within_group`,
#>     `pooled_data_normal`,
#>     `pooled_variance_equal`
#> B-Basis:   ( p = 0.9 , conf = 0.95 )
#> CTD   93.63504 
#> ETW   54.32706 
#> ETW2  47.07669 
#> RTD   87.29555
res <- basis_pooled_sd(dat_8_3_11_1_2, strength, condition,
                       p = 0.99, conf = 0.95,
                       override = "all")
expect_equal(res$basis$value[res$basis$group == "CTD"],
           86.19, tolerance = 0.001)
expect_equal(res$basis$value[res$basis$group == "RTD"],
           79.86, tolerance = 0.001)
expect_equal(res$basis$value[res$basis$group == "ETW"],
           46.84, tolerance = 0.001)
expect_equal(res$basis$value[res$basis$group == "ETW2"],
           39.69, tolerance = 0.001)
res
#> 
#> Call:
#> basis_pooled_sd(data = dat_8_3_11_1_2, x = strength, groups = condition, 
#>     p = 0.99, conf = 0.95, override = "all")
#> 
#> Distribution:  Normal - Pooled Standard Deviation    ( n = 83, r = 4 )
#> The following diagnostic tests were overridden:
#>     `outliers_within_batch`,
#>     `between_group_variability`,
#>     `outliers_within_group`,
#>     `pooled_data_normal`,
#>     `pooled_variance_equal`
#> A-Basis:   ( p = 0.99 , conf = 0.95 )
#> CTD   86.19301 
#> ETW   46.84112 
#> ETW2  39.65214 
#> RTD   79.86205

4.2.2 Pooled SD A- and B-Basis (Mod CV)

After removal of the ETW2 condition, CMH17-STATS reports the pooled A- and B-Basis (mod CV) shown in the following table. cmstatr computes very similar values.

Condition CTD RTD ETW
B-Basis 92.25 85.91 52.97
A-Basis 83.81 77.48 44.47
res <- dat_8_3_11_1_2 %>%
  filter(condition != "ETW2") %>%
  basis_pooled_sd(strength, condition, modcv = TRUE, override = "all")
expect_equal(res$basis$value[res$basis$group == "CTD"],
             92.25, tolerance = 0.001)
expect_equal(res$basis$value[res$basis$group == "RTD"],
             85.91, tolerance = 0.001)
expect_equal(res$basis$value[res$basis$group == "ETW"],
             52.97, tolerance = 0.001)
res
#> 
#> Call:
#> basis_pooled_sd(data = ., x = strength, groups = condition, modcv = TRUE, 
#>     override = "all")
#> 
#> Distribution:  Normal - Pooled Standard Deviation    ( n = 65, r = 3 )
#> Modified CV Approach Used 
#> The following diagnostic tests were overridden:
#>     `outliers_within_batch`,
#>     `between_group_variability`,
#>     `outliers_within_group`,
#>     `pooled_data_normal`,
#>     `pooled_variance_equal`
#> B-Basis:   ( p = 0.9 , conf = 0.95 )
#> CTD  92.24927 
#> ETW  52.9649 
#> RTD  85.90454
res <- dat_8_3_11_1_2 %>%
  filter(condition != "ETW2") %>%
  basis_pooled_sd(strength, condition,
                  p = 0.99, conf = 0.95, modcv = TRUE, override = "all")
expect_equal(res$basis$value[res$basis$group == "CTD"],
             83.81, tolerance = 0.001)
expect_equal(res$basis$value[res$basis$group == "RTD"],
             77.48, tolerance = 0.001)
expect_equal(res$basis$value[res$basis$group == "ETW"],
             44.47, tolerance = 0.001)
res
#> 
#> Call:
#> basis_pooled_sd(data = ., x = strength, groups = condition, p = 0.99, 
#>     conf = 0.95, modcv = TRUE, override = "all")
#> 
#> Distribution:  Normal - Pooled Standard Deviation    ( n = 65, r = 3 )
#> Modified CV Approach Used 
#> The following diagnostic tests were overridden:
#>     `outliers_within_batch`,
#>     `between_group_variability`,
#>     `outliers_within_group`,
#>     `pooled_data_normal`,
#>     `pooled_variance_equal`
#> A-Basis:   ( p = 0.99 , conf = 0.95 )
#> CTD  83.80981 
#> ETW  44.47038 
#> RTD  77.47589

4.2.3 Pooled CV A- and B-Basis

This data set was input into CMH17-STATS and the Pooled CV method was selected. The results from CMH17-STATS were as follows. cmstatr produces very similar results.

Condition CTD RTD ETW ETW2
B-Basis 90.89 85.37 56.79 50.55
A-Basis 81.62 76.67 50.98 45.40
res <- basis_pooled_cv(dat_8_3_11_1_2, strength, condition, override = "all")
expect_equal(res$basis$value[res$basis$group == "CTD"],
             90.89, tolerance = 0.001)
expect_equal(res$basis$value[res$basis$group == "RTD"],
             85.37, tolerance = 0.001)
expect_equal(res$basis$value[res$basis$group == "ETW"],
             56.79, tolerance = 0.001)
expect_equal(res$basis$value[res$basis$group == "ETW2"],
             50.55, tolerance = 0.001)
res
#> 
#> Call:
#> basis_pooled_cv(data = dat_8_3_11_1_2, x = strength, groups = condition, 
#>     override = "all")
#> 
#> Distribution:  Normal - Pooled CV    ( n = 83, r = 4 )
#> The following diagnostic tests were overridden:
#>     `outliers_within_batch`,
#>     `between_group_variability`,
#>     `outliers_within_group`,
#>     `pooled_data_normal`,
#>     `normalized_variance_equal`
#> B-Basis:   ( p = 0.9 , conf = 0.95 )
#> CTD   90.88018 
#> ETW   56.78337 
#> ETW2  50.54406 
#> RTD   85.36756
res <- basis_pooled_cv(dat_8_3_11_1_2, strength, condition,
                       p = 0.99, conf = 0.95, override = "all")
expect_equal(res$basis$value[res$basis$group == "CTD"],
             81.62, tolerance = 0.001)
expect_equal(res$basis$value[res$basis$group == "RTD"],
             76.67, tolerance = 0.001)
expect_equal(res$basis$value[res$basis$group == "ETW"],
             50.98, tolerance = 0.001)
expect_equal(res$basis$value[res$basis$group == "ETW2"],
             45.40, tolerance = 0.001)
res
#> 
#> Call:
#> basis_pooled_cv(data = dat_8_3_11_1_2, x = strength, groups = condition, 
#>     p = 0.99, conf = 0.95, override = "all")
#> 
#> Distribution:  Normal - Pooled CV    ( n = 83, r = 4 )
#> The following diagnostic tests were overridden:
#>     `outliers_within_batch`,
#>     `between_group_variability`,
#>     `outliers_within_group`,
#>     `pooled_data_normal`,
#>     `normalized_variance_equal`
#> A-Basis:   ( p = 0.99 , conf = 0.95 )
#> CTD   81.60931 
#> ETW   50.97801 
#> ETW2  45.39158 
#> RTD   76.66214

4.2.4 Pooled CV A- and B-Basis (Mod CV)

This data set was input into CMH17-STATS and the Pooled CV method was selected with the modified CV transform. Additionally, the ETW2 condition was removed. The results from CMH17-STATS were as follows. cmstatr produces very similar results.

Condition CTD RTD ETW
B-Basis 90.31 84.83 56.43
A-Basis 80.57 75.69 50.33
res <- dat_8_3_11_1_2 %>%
  filter(condition != "ETW2") %>%
  basis_pooled_cv(strength, condition, modcv = TRUE, override = "all")
expect_equal(res$basis$value[res$basis$group == "CTD"],
             90.31, tolerance = 0.001)
expect_equal(res$basis$value[res$basis$group == "RTD"],
             84.83, tolerance = 0.001)
expect_equal(res$basis$value[res$basis$group == "ETW"],
             56.43, tolerance = 0.001)
res
#> 
#> Call:
#> basis_pooled_cv(data = ., x = strength, groups = condition, modcv = TRUE, 
#>     override = "all")
#> 
#> Distribution:  Normal - Pooled CV    ( n = 65, r = 3 )
#> Modified CV Approach Used 
#> The following diagnostic tests were overridden:
#>     `outliers_within_batch`,
#>     `between_group_variability`,
#>     `outliers_within_group`,
#>     `pooled_data_normal`,
#>     `normalized_variance_equal`
#> B-Basis:   ( p = 0.9 , conf = 0.95 )
#> CTD  90.30646 
#> ETW  56.42786 
#> RTD  84.82748
res <- dat_8_3_11_1_2 %>%
  filter(condition != "ETW2") %>%
  basis_pooled_cv(strength, condition, modcv = TRUE,
                  p = 0.99, conf = 0.95, override = "all")
expect_equal(res$basis$value[res$basis$group == "CTD"],
             80.57, tolerance = 0.001)
expect_equal(res$basis$value[res$basis$group == "RTD"],
             75.69, tolerance = 0.001)
expect_equal(res$basis$value[res$basis$group == "ETW"],
             50.33, tolerance = 0.001)
res
#> 
#> Call:
#> basis_pooled_cv(data = ., x = strength, groups = condition, p = 0.99, 
#>     conf = 0.95, modcv = TRUE, override = "all")
#> 
#> Distribution:  Normal - Pooled CV    ( n = 65, r = 3 )
#> Modified CV Approach Used 
#> The following diagnostic tests were overridden:
#>     `outliers_within_batch`,
#>     `between_group_variability`,
#>     `outliers_within_group`,
#>     `pooled_data_normal`,
#>     `normalized_variance_equal`
#> A-Basis:   ( p = 0.99 , conf = 0.95 )
#> CTD  80.56529 
#> ETW  50.32422 
#> RTD  75.6817

5. Other Data Sets

This section contains various small data sets. In most cases, these data sets were generated randomly for the purpose of comparing cmstatr to other software.

5.1 Anderson–Darling Test (Normal)

The following data set was randomly generated. When this is entered into STAT17 [3], that software gives the value \(OSL = 0.465\), which matches the result of cmstatr within a small margin.

dat <- data.frame(
  strength = c(
    137.4438, 139.5395, 150.89, 141.4474, 141.8203, 151.8821, 143.9245,
    132.9732, 136.6419, 138.1723, 148.7668, 143.283, 143.5429,
    141.7023, 137.4732, 152.338, 144.1589, 128.5218
  )
)
res <- anderson_darling_normal(dat, strength)

expect_equal(res$osl, 0.465, tolerance = 0.001)

res
#> 
#> Call:
#> anderson_darling_normal(data = dat, x = strength)
#> 
#> Distribution:  Normal ( n = 18 ) 
#> Test statistic:  A = 0.2849726 
#> OSL (p-value):  0.4648132  (assuming unknown parameters)
#> Conclusion: Sample is drawn from a Normal distribution ( alpha = 0.05 )

5.2 Anderson–Darling Test (Lognormal)

The following data set was randomly generated. When this is entered into STAT17 [3], that software gives the value \(OSL = 0.480\), which matches the result of cmstatr within a small margin.

dat <- data.frame(
  strength = c(
    137.4438, 139.5395, 150.89, 141.4474, 141.8203, 151.8821, 143.9245,
    132.9732, 136.6419, 138.1723, 148.7668, 143.283, 143.5429,
    141.7023, 137.4732, 152.338, 144.1589, 128.5218
  )
)

res <- anderson_darling_lognormal(dat, strength)

expect_equal(res$osl, 0.480, tolerance = 0.001)

res
#> 
#> Call:
#> anderson_darling_lognormal(data = dat, x = strength)
#> 
#> Distribution:  Lognormal ( n = 18 ) 
#> Test statistic:  A = 0.2774652 
#> OSL (p-value):  0.4798148  (assuming unknown parameters)
#> Conclusion: Sample is drawn from a Lognormal distribution ( alpha = 0.05 )

5.3 Anderson–Darling Test (Weibull)

The following data set was randomly generated. When this is entered into STAT17 [3], that software gives the value \(OSL = 0.179\), which matches the result of cmstatr within a small margin.

dat <- data.frame(
  strength = c(
    137.4438, 139.5395, 150.89, 141.4474, 141.8203, 151.8821, 143.9245,
    132.9732, 136.6419, 138.1723, 148.7668, 143.283, 143.5429,
    141.7023, 137.4732, 152.338, 144.1589, 128.5218
  )
)

res <- anderson_darling_weibull(dat, strength)

expect_equal(res$osl, 0.179, tolerance = 0.002)

res
#> 
#> Call:
#> anderson_darling_weibull(data = dat, x = strength)
#> 
#> Distribution:  Weibull ( n = 18 ) 
#> Test statistic:  A = 0.5113909 
#> OSL (p-value):  0.1787882  (assuming unknown parameters)
#> Conclusion: Sample is drawn from a Weibull distribution ( alpha = 0.05 )

5.4 Normal A- and B-Basis

The following data was input into STAT17 and the A- and B-Basis values were computed assuming normally distributed data. The results were 120.336 and 129.287, respectively. cmstatr reports very similar values.

dat <- c(
  137.4438, 139.5395, 150.8900, 141.4474, 141.8203, 151.8821, 143.9245,
  132.9732, 136.6419, 138.1723, 148.7668, 143.2830, 143.5429, 141.7023,
  137.4732, 152.3380, 144.1589, 128.5218
)

res <- basis_normal(x = dat, p = 0.99, conf = 0.95, override = "all")
expect_equal(res$basis, 120.336, tolerance = 0.0005)
res
#> 
#> Call:
#> basis_normal(x = dat, p = 0.99, conf = 0.95, override = "all")
#> 
#> Distribution:  Normal    ( n = 18 )
#> The following diagnostic tests were overridden:
#>     `outliers_within_batch`,
#>     `between_batch_variability`,
#>     `outliers`,
#>     `anderson_darling_normal`
#> A-Basis:   ( p = 0.99 , conf = 0.95 )
#> 120.3549
res <- basis_normal(x = dat, p = 0.9, conf = 0.95, override = "all")
expect_equal(res$basis, 129.287, tolerance = 0.0005)
res
#> 
#> Call:
#> basis_normal(x = dat, p = 0.9, conf = 0.95, override = "all")
#> 
#> Distribution:  Normal    ( n = 18 )
#> The following diagnostic tests were overridden:
#>     `outliers_within_batch`,
#>     `between_batch_variability`,
#>     `outliers`,
#>     `anderson_darling_normal`
#> B-Basis:   ( p = 0.9 , conf = 0.95 )
#> 129.2898

5.5 Lognormal A- and B-Basis

The following data was input into STAT17 and the A- and B-Basis values were computed assuming distributed according to a lognormal distribution. The results were 121.710 and 129.664, respectively. cmstatr reports very similar values.

dat <- c(
  137.4438, 139.5395, 150.8900, 141.4474, 141.8203, 151.8821, 143.9245,
  132.9732, 136.6419, 138.1723, 148.7668, 143.2830, 143.5429, 141.7023,
  137.4732, 152.3380, 144.1589, 128.5218
)

res <- basis_lognormal(x = dat, p = 0.99, conf = 0.95, override = "all")
expect_equal(res$basis, 121.710, tolerance = 0.0005)
res
#> 
#> Call:
#> basis_lognormal(x = dat, p = 0.99, conf = 0.95, override = "all")
#> 
#> Distribution:  Lognormal     ( n = 18 )
#> The following diagnostic tests were overridden:
#>     `outliers_within_batch`,
#>     `between_batch_variability`,
#>     `outliers`,
#>     `anderson_darling_lognormal`
#> A-Basis:   ( p = 0.99 , conf = 0.95 )
#> 121.7265
res <- basis_lognormal(x = dat, p = 0.9, conf = 0.95, override = "all")
expect_equal(res$basis, 129.664, tolerance = 0.0005)
res
#> 
#> Call:
#> basis_lognormal(x = dat, p = 0.9, conf = 0.95, override = "all")
#> 
#> Distribution:  Lognormal     ( n = 18 )
#> The following diagnostic tests were overridden:
#>     `outliers_within_batch`,
#>     `between_batch_variability`,
#>     `outliers`,
#>     `anderson_darling_lognormal`
#> B-Basis:   ( p = 0.9 , conf = 0.95 )
#> 129.667

5.6 Weibull A- and B-Basis

The following data was input into STAT17 and the A- and B-Basis values were computed assuming data following the Weibull distribution. The results were 109.150 and 125.441, respectively. cmstatr reports very similar values.

dat <- c(
  137.4438, 139.5395, 150.8900, 141.4474, 141.8203, 151.8821, 143.9245,
  132.9732, 136.6419, 138.1723, 148.7668, 143.2830, 143.5429, 141.7023,
  137.4732, 152.3380, 144.1589, 128.5218
)

res <- basis_weibull(x = dat, p = 0.99, conf = 0.95, override = "all")
expect_equal(res$basis, 109.150, tolerance = 0.005)
res
#> 
#> Call:
#> basis_weibull(x = dat, p = 0.99, conf = 0.95, override = "all")
#> 
#> Distribution:  Weibull   ( n = 18 )
#> The following diagnostic tests were overridden:
#>     `outliers_within_batch`,
#>     `between_batch_variability`,
#>     `outliers`,
#>     `anderson_darling_weibull`
#> A-Basis:   ( p = 0.99 , conf = 0.95 )
#> 109.651
res <- basis_weibull(x = dat, p = 0.9, conf = 0.95, override = "all")
expect_equal(res$basis, 125.441, tolerance = 0.005)
res
#> 
#> Call:
#> basis_weibull(x = dat, p = 0.9, conf = 0.95, override = "all")
#> 
#> Distribution:  Weibull   ( n = 18 )
#> The following diagnostic tests were overridden:
#>     `outliers_within_batch`,
#>     `between_batch_variability`,
#>     `outliers`,
#>     `anderson_darling_weibull`
#> B-Basis:   ( p = 0.9 , conf = 0.95 )
#> 125.7338

5.7 Extended Hanson–Koopmans A- and B-Basis

The following data was input into STAT17 and the A- and B-Basis values were computed using the nonparametric (small sample) method. The results were 99.651 and 124.156, respectively. cmstatr reports very similar values.

dat <- c(
  137.4438, 139.5395, 150.8900, 141.4474, 141.8203, 151.8821, 143.9245,
  132.9732, 136.6419, 138.1723, 148.7668, 143.2830, 143.5429, 141.7023,
  137.4732, 152.3380, 144.1589, 128.5218
)

res <- basis_hk_ext(x = dat, p = 0.99, conf = 0.95,
                    method = "woodward-frawley", override = "all")
expect_equal(res$basis, 99.651, tolerance = 0.005)
res
#> 
#> Call:
#> basis_hk_ext(x = dat, p = 0.99, conf = 0.95, method = "woodward-frawley", 
#>     override = "all")
#> 
#> Distribution:  Nonparametric (Extended Hanson-Koopmans, Woodward-Frawley method)     ( n = 18 )
#> The following diagnostic tests were overridden:
#>     `outliers_within_batch`,
#>     `between_batch_variability`,
#>     `outliers`,
#>     `correct_method_used`,
#>     `sample_size`
#> A-Basis:   ( p = 0.99 , conf = 0.95 )
#> 99.65098
res <- basis_hk_ext(x = dat, p = 0.9, conf = 0.95,
                    method = "optimum-order", override = "all")
expect_equal(res$basis, 124.156, tolerance = 0.005)
res
#> 
#> Call:
#> basis_hk_ext(x = dat, p = 0.9, conf = 0.95, method = "optimum-order", 
#>     override = "all")
#> 
#> Distribution:  Nonparametric (Extended Hanson-Koopmans, optimum two-order-statistic method)  ( n = 18 )
#> The following diagnostic tests were overridden:
#>     `outliers_within_batch`,
#>     `between_batch_variability`,
#>     `outliers`,
#>     `correct_method_used`,
#>     `sample_size`
#> B-Basis:   ( p = 0.9 , conf = 0.95 )
#> 124.1615

5.8 Extended Hanson–Koopmans B-Basis

The following random numbers were generated.

dat <- c(
  139.6734, 143.0032, 130.4757, 144.8327, 138.7818, 136.7693, 148.636,
  131.0095, 131.4933, 142.8856, 158.0198, 145.2271, 137.5991, 139.8298,
  140.8557, 137.6148, 131.3614, 152.7795, 145.8792, 152.9207, 160.0989,
  145.1920, 128.6383, 141.5992, 122.5297, 159.8209, 151.6720, 159.0156
)

All of the numbers above were input into STAT17 and the reported B-Basis value using the Optimum Order nonparametric method was 122.36798. This result matches the results of cmstatr within a small margin.

res <- basis_hk_ext(x = dat, p = 0.9, conf = 0.95,
                    method = "optimum-order", override = "all")
expect_equal(res$basis, 122.36798, tolerance = 0.001)
res
#> 
#> Call:
#> basis_hk_ext(x = dat, p = 0.9, conf = 0.95, method = "optimum-order", 
#>     override = "all")
#> 
#> Distribution:  Nonparametric (Extended Hanson-Koopmans, optimum two-order-statistic method)  ( n = 28 )
#> The following diagnostic tests were overridden:
#>     `outliers_within_batch`,
#>     `between_batch_variability`,
#>     `outliers`,
#>     `correct_method_used`,
#>     `sample_size`
#> B-Basis:   ( p = 0.9 , conf = 0.95 )
#> 122.3638

The last two observations from the above data set were discarded, leaving 26 observations. This smaller data set was input into STAT17 and that software calculated a B-Basis value of 121.57073 using the Optimum Order nonparametric method. cmstatr reports a very similar number.

res <- basis_hk_ext(x = head(dat, 26), p = 0.9, conf = 0.95,
                    method = "optimum-order", override = "all")
expect_equal(res$basis, 121.57073, tolerance = 0.001)
res
#> 
#> Call:
#> basis_hk_ext(x = head(dat, 26), p = 0.9, conf = 0.95, method = "optimum-order", 
#>     override = "all")
#> 
#> Distribution:  Nonparametric (Extended Hanson-Koopmans, optimum two-order-statistic method)  ( n = 26 )
#> The following diagnostic tests were overridden:
#>     `outliers_within_batch`,
#>     `between_batch_variability`,
#>     `outliers`,
#>     `correct_method_used`,
#>     `sample_size`
#> B-Basis:   ( p = 0.9 , conf = 0.95 )
#> 121.5655

The same data set was further reduced such that only the first 22 observations were included. This smaller data set was input into STAT17 and that software calculated a B-Basis value of 128.82397 using the Optimum Order nonparametric method. cmstatr reports a very similar number.

res <- basis_hk_ext(x = head(dat, 22), p = 0.9, conf = 0.95,
                    method = "optimum-order", override = "all")
expect_equal(res$basis, 128.82397, tolerance = 0.001)
res
#> 
#> Call:
#> basis_hk_ext(x = head(dat, 22), p = 0.9, conf = 0.95, method = "optimum-order", 
#>     override = "all")
#> 
#> Distribution:  Nonparametric (Extended Hanson-Koopmans, optimum two-order-statistic method)  ( n = 22 )
#> The following diagnostic tests were overridden:
#>     `outliers_within_batch`,
#>     `between_batch_variability`,
#>     `outliers`,
#>     `correct_method_used`,
#>     `sample_size`
#> B-Basis:   ( p = 0.9 , conf = 0.95 )
#> 128.8224

5.9 Large Sample Nonparametric B-Basis

The following data was input into STAT17 and the B-Basis value was computed using the nonparametric (large sample) method. The results was 122.738297. cmstatr reports very similar values.

dat <- c(
  137.3603, 135.6665, 136.6914, 154.7919, 159.2037, 137.3277, 128.821,
  138.6304, 138.9004, 147.4598, 148.6622, 144.4948, 131.0851, 149.0203,
  131.8232, 146.4471, 123.8124, 126.3105, 140.7609, 134.4875, 128.7508,
  117.1854, 129.3088, 141.6789, 138.4073, 136.0295, 128.4164, 141.7733,
  134.455,  122.7383, 136.9171, 136.9232, 138.8402, 152.8294, 135.0633,
  121.052,  131.035,  138.3248, 131.1379, 147.3771, 130.0681, 132.7467,
  137.1444, 141.662,  146.9363, 160.7448, 138.5511, 129.1628, 140.2939,
  144.8167, 156.5918, 132.0099, 129.3551, 136.6066, 134.5095, 128.2081,
  144.0896, 141.8029, 130.0149, 140.8813, 137.7864
)

res <- basis_nonpara_large_sample(x = dat, p = 0.9, conf = 0.95,
                                  override = "all")
expect_equal(res$basis, 122.738297, tolerance = 0.005)
res
#> 
#> Call:
#> basis_nonpara_large_sample(x = dat, p = 0.9, conf = 0.95, override = "all")
#> 
#> Distribution:  Nonparametric (large sample)  ( n = 61 )
#> The following diagnostic tests were overridden:
#>     `outliers_within_batch`,
#>     `between_batch_variability`,
#>     `outliers`,
#>     `sample_size`
#> B-Basis:   ( p = 0.9 , conf = 0.95 )
#> 122.7383

5.10 Acceptance Limits Based on Mean and Extremum

Results from cmstatr’s equiv_mean_extremum function were compared with results from HYTEQ. The summary statistics for the qualification data were set as mean = 141.310 and sd=6.415. For a value of alpha=0.05 and n = 9, HYTEQ reported thresholds of 123.725 and 137.197 for minimum individual and mean, respectively. cmstatr produces very similar results.

res <- equiv_mean_extremum(alpha = 0.05, mean_qual = 141.310, sd_qual = 6.415,
                           n_sample = 9)
expect_equal(res$threshold_min_indiv, 123.725, tolerance = 0.001)
expect_equal(res$threshold_mean, 137.197, tolerance = 0.001)
res
#> 
#> Call:
#> equiv_mean_extremum(mean_qual = 141.31, sd_qual = 6.415, n_sample = 9, 
#>     alpha = 0.05)
#> 
#> For alpha = 0.05 and n = 9 
#> ( k1 = 2.741054 and k2 = 0.6410852 )
#>                   Min Individual    Sample Mean    
#>      Thresholds:     123.7261         137.1974

Using the same parameters, but using the modified CV method, HYTEQ produces thresholds of 117.024 and 135.630 for minimum individual and mean, respectively. cmstatr produces very similar results.

res <- equiv_mean_extremum(alpha = 0.05, mean_qual = 141.310, sd_qual = 6.415,
                           n_sample = 9, modcv = TRUE)
expect_equal(res$threshold_min_indiv, 117.024, tolerance = 0.001)
expect_equal(res$threshold_mean, 135.630, tolerance = 0.001)
res
#> 
#> Call:
#> equiv_mean_extremum(mean_qual = 141.31, sd_qual = 6.415, n_sample = 9, 
#>     alpha = 0.05, modcv = TRUE)
#> 
#> Modified CV used: CV* = 0.06269832 ( CV = 0.04539665 )
#> 
#> For alpha = 0.05 and n = 9 
#> ( k1 = 2.741054 and k2 = 0.6410852 )
#>                   Min Individual    Sample Mean    
#>      Thresholds:     117.0245          135.63

5.11 Acceptance Based on Change in Mean

Results from cmstatr’s equiv_change_mean function were compared with results from HYTEQ. The following parameters were used. A value of alpha = 0.05 was selected.

Parameter Qualification Sample
Mean 9.24 9.02
SD 0.162 0.15785
n 28 9

HYTEQ gives an acceptance range of 9.115 to 9.365. cmstatr produces similar results.

res <- equiv_change_mean(alpha = 0.05, n_sample = 9, mean_sample = 9.02,
                         sd_sample = 0.15785, n_qual = 28, mean_qual = 9.24,
                         sd_qual = 0.162)
expect_equal(res$threshold, c(9.115, 9.365), tolerance = 0.001)
res
#> 
#> Call:
#> equiv_change_mean(n_qual = 28, mean_qual = 9.24, sd_qual = 0.162, 
#>     n_sample = 9, mean_sample = 9.02, sd_sample = 0.15785, alpha = 0.05)
#> 
#> For alpha = 0.05 
#>                   Qualification        Sample      
#>           Number        28               9         
#>             Mean       9.24             9.02       
#>               SD      0.162           0.15785      
#>        Pooled SD            0.1610609             
#>               t0            -3.564775             
#>           Result               FAIL               
#>    Passing Range       9.114712 to 9.365288

After selecting the modified CV method, HYTEQ gives an acceptance range of 8.857 to 9.623. cmstatr produces similar results.

res <- equiv_change_mean(alpha = 0.05, n_sample = 9, mean_sample = 9.02,
                         sd_sample = 0.15785, n_qual = 28, mean_qual = 9.24,
                         sd_qual = 0.162, modcv = TRUE)
expect_equal(res$threshold, c(8.857, 9.623), tolerance = 0.001)
res
#> 
#> Call:
#> equiv_change_mean(n_qual = 28, mean_qual = 9.24, sd_qual = 0.162, 
#>     n_sample = 9, mean_sample = 9.02, sd_sample = 0.15785, alpha = 0.05, 
#>     modcv = TRUE)
#> 
#> For alpha = 0.05 
#> Modified CV used
#>                   Qualification        Sample      
#>           Number        28               9         
#>             Mean       9.24             9.02       
#>               SD      0.162           0.15785      
#>        Pooled SD            0.4927484             
#>               t0             -1.16519             
#>           Result               PASS               
#>    Passing Range       8.856695 to 9.623305

6. Comparison with Literature

In this section, results from cmstatr are compared with values published in literature.

6.1 Anderson–Darling K–Sample Test

[4] provides example data that compares measurements obtained in four labs. Their paper gives values of the ADK test statistic as well as p-values.

The data in [4] is as follows:

dat_ss1987 <- data.frame(
  smoothness = c(
    38.7, 41.5, 43.8, 44.5, 45.5, 46.0, 47.7, 58.0,
    39.2, 39.3, 39.7, 41.4, 41.8, 42.9, 43.3, 45.8,
    34.0, 35.0, 39.0, 40.0, 43.0, 43.0, 44.0, 45.0,
    34.0, 34.8, 34.8, 35.4, 37.2, 37.8, 41.2, 42.8
  ),
  lab = c(rep("A", 8), rep("B", 8), rep("C", 8), rep("D", 8))
)
dat_ss1987
#>    smoothness lab
#> 1        38.7   A
#> 2        41.5   A
#> 3        43.8   A
#> 4        44.5   A
#> 5        45.5   A
#> 6        46.0   A
#> 7        47.7   A
#> 8        58.0   A
#> 9        39.2   B
#> 10       39.3   B
#> 11       39.7   B
#> 12       41.4   B
#> 13       41.8   B
#> 14       42.9   B
#> 15       43.3   B
#> 16       45.8   B
#> 17       34.0   C
#> 18       35.0   C
#> 19       39.0   C
#> 20       40.0   C
#> 21       43.0   C
#> 22       43.0   C
#> 23       44.0   C
#> 24       45.0   C
#> 25       34.0   D
#> 26       34.8   D
#> 27       34.8   D
#> 28       35.4   D
#> 29       37.2   D
#> 30       37.8   D
#> 31       41.2   D
#> 32       42.8   D

[4] lists the corresponding test statistics \(A_{akN}^2 = 8.3926\) and \(\sigma_N = 1.2038\) with the p-value \(p = 0.0022\). These match the result of cmstatr within a small margin.

res <- ad_ksample(dat_ss1987, smoothness, lab)

expect_equal(res$ad, 8.3926, tolerance = 0.001)
expect_equal(res$sigma, 1.2038, tolerance = 0.001)
expect_equal(res$p, 0.00226, tolerance = 0.01)

res
#> 
#> Call:
#> ad_ksample(data = dat_ss1987, x = smoothness, groups = lab)
#> 
#> N = 32           k = 4            
#> ADK = 8.39       p-value = 0.002255 
#> Conclusion: Samples do not come from the same distribution (alpha = 0.025 )

7. Comparison with Published Factors

Various factors, such as tolerance limit factors, are published in various publications. This section compares those published factors with those computed by cmstatr.

7.1 Normal kB Factors

B-Basis tolerance limit factors assuming a normal distribution are published in CMH-17-1G. Those factors are reproduced below and are compared with the results of cmstatr. The published factors and those computed by cmstatr are quite similar.

tribble(
  ~n, ~kB_published,
  2, 20.581, 36, 1.725, 70, 1.582, 104, 1.522,
  3, 6.157, 37, 1.718, 71, 1.579, 105, 1.521,
  4, 4.163, 38, 1.711, 72, 1.577, 106, 1.519,
  5, 3.408, 39, 1.704, 73, 1.575, 107, 1.518,
  6, 3.007, 40, 1.698, 74, 1.572, 108, 1.517,
  7, 2.756, 41, 1.692, 75, 1.570, 109, 1.516,
  8, 2.583, 42, 1.686, 76, 1.568, 110, 1.515,
  9, 2.454, 43, 1.680, 77, 1.566, 111, 1.513,
  10, 2.355, 44, 1.675, 78, 1.564, 112, 1.512,
  11, 2.276, 45, 1.669, 79, 1.562, 113, 1.511,
  12, 2.211, 46, 1.664, 80, 1.560, 114, 1.510,
  13, 2.156, 47, 1.660, 81, 1.558, 115, 1.509,
  14, 2.109, 48, 1.655, 82, 1.556, 116, 1.508,
  15, 2.069, 49, 1.650, 83, 1.554, 117, 1.507,
  16, 2.034, 50, 1.646, 84, 1.552, 118, 1.506,
  17, 2.002, 51, 1.642, 85, 1.551, 119, 1.505,
  18, 1.974, 52, 1.638, 86, 1.549, 120, 1.504,
  19, 1.949, 53, 1.634, 87, 1.547, 121, 1.503,
  20, 1.927, 54, 1.630, 88, 1.545, 122, 1.502,
  21, 1.906, 55, 1.626, 89, 1.544, 123, 1.501,
  22, 1.887, 56, 1.623, 90, 1.542, 124, 1.500,
  23, 1.870, 57, 1.619, 91, 1.540, 125, 1.499,
  24, 1.854, 58, 1.616, 92, 1.539, 126, 1.498,
  25, 1.839, 59, 1.613, 93, 1.537, 127, 1.497,
  26, 1.825, 60, 1.609, 94, 1.536, 128, 1.496,
  27, 1.812, 61, 1.606, 95, 1.534, 129, 1.495,
  28, 1.800, 62, 1.603, 96, 1.533, 130, 1.494,
  29, 1.789, 63, 1.600, 97, 1.531, 131, 1.493,
  30, 1.778, 64, 1.597, 98, 1.530, 132, 1.492,
  31, 1.768, 65, 1.595, 99, 1.529, 133, 1.492,
  32, 1.758, 66, 1.592, 100, 1.527, 134, 1.491,
  33, 1.749, 67, 1.589, 101, 1.526, 135, 1.490,
  34, 1.741, 68, 1.587, 102, 1.525, 136, 1.489,
  35, 1.733, 69, 1.584, 103, 1.523, 137, 1.488
) %>%
  arrange(n) %>%
  mutate(kB_cmstatr = k_factor_normal(n, p = 0.9, conf = 0.95)) %>%
  rowwise() %>%
  mutate(diff = expect_equal(kB_published, kB_cmstatr, tolerance = 0.001)) %>%
  select(-c(diff))
#> # A tibble: 136 × 3
#> # Rowwise: 
#>        n kB_published kB_cmstatr
#>    <dbl>        <dbl>      <dbl>
#>  1     2        20.6       20.6 
#>  2     3         6.16       6.16
#>  3     4         4.16       4.16
#>  4     5         3.41       3.41
#>  5     6         3.01       3.01
#>  6     7         2.76       2.76
#>  7     8         2.58       2.58
#>  8     9         2.45       2.45
#>  9    10         2.36       2.35
#> 10    11         2.28       2.28
#> # ℹ 126 more rows

7.2 Normal kA Factors

A-Basis tolerance limit factors assuming a normal distribution are published in CMH-17-1G. Those factors are reproduced below and are compared with the results of cmstatr. The published factors and those computed by cmstatr are quite similar.

tribble(
  ~n, ~kA_published,
  2, 37.094, 36, 2.983, 70, 2.765, 104, 2.676,
  3, 10.553, 37, 2.972, 71, 2.762, 105, 2.674,
  4, 7.042, 38, 2.961, 72, 2.758, 106, 2.672,
  5, 5.741, 39, 2.951, 73, 2.755, 107, 2.671,
  6, 5.062, 40, 2.941, 74, 2.751, 108, 2.669,
  7, 4.642, 41, 2.932, 75, 2.748, 109, 2.667,
  8, 4.354, 42, 2.923, 76, 2.745, 110, 2.665,
  9, 4.143, 43, 2.914, 77, 2.742, 111, 2.663,
  10, 3.981, 44, 2.906, 78, 2.739, 112, 2.662,
  11, 3.852, 45, 2.898, 79, 2.736, 113, 2.660,
  12, 3.747, 46, 2.890, 80, 2.733, 114, 2.658,
  13, 3.659, 47, 2.883, 81, 2.730, 115, 2.657,
  14, 3.585, 48, 2.876, 82, 2.727, 116, 2.655,
  15, 3.520, 49, 2.869, 83, 2.724, 117, 2.654,
  16, 3.464, 50, 2.862, 84, 2.721, 118, 2.652,
  17, 3.414, 51, 2.856, 85, 2.719, 119, 2.651,
  18, 3.370, 52, 2.850, 86, 2.716, 120, 2.649,
  19, 3.331, 53, 2.844, 87, 2.714, 121, 2.648,
  20, 3.295, 54, 2.838, 88, 2.711, 122, 2.646,
  21, 3.263, 55, 2.833, 89, 2.709, 123, 2.645,
  22, 3.233, 56, 2.827, 90, 2.706, 124, 2.643,
  23, 3.206, 57, 2.822, 91, 2.704, 125, 2.642,
  24, 3.181, 58, 2.817, 92, 2.701, 126, 2.640,
  25, 3.158, 59, 2.812, 93, 2.699, 127, 2.639,
  26, 3.136, 60, 2.807, 94, 2.697, 128, 2.638,
  27, 3.116, 61, 2.802, 95, 2.695, 129, 2.636,
  28, 3.098, 62, 2.798, 96, 2.692, 130, 2.635,
  29, 3.080, 63, 2.793, 97, 2.690, 131, 2.634,
  30, 3.064, 64, 2.789, 98, 2.688, 132, 2.632,
  31, 3.048, 65, 2.785, 99, 2.686, 133, 2.631,
  32, 3.034, 66, 2.781, 100, 2.684, 134, 2.630,
  33, 3.020, 67, 2.777, 101, 2.682, 135, 2.628,
  34, 3.007, 68, 2.773, 102, 2.680, 136, 2.627,
  35, 2.995, 69, 2.769, 103, 2.678, 137, 2.626
) %>%
  arrange(n) %>%
  mutate(kA_cmstatr = k_factor_normal(n, p = 0.99, conf = 0.95)) %>%
  rowwise() %>%
  mutate(diff = expect_equal(kA_published, kA_cmstatr, tolerance = 0.001)) %>%
  select(-c(diff))
#> # A tibble: 136 × 3
#> # Rowwise: 
#>        n kA_published kA_cmstatr
#>    <dbl>        <dbl>      <dbl>
#>  1     2        37.1       37.1 
#>  2     3        10.6       10.6 
#>  3     4         7.04       7.04
#>  4     5         5.74       5.74
#>  5     6         5.06       5.06
#>  6     7         4.64       4.64
#>  7     8         4.35       4.35
#>  8     9         4.14       4.14
#>  9    10         3.98       3.98
#> 10    11         3.85       3.85
#> # ℹ 126 more rows

7.3 Nonparametric B-Basis Extended Hanson–Koopmans

Vangel [5] provides extensive tables of \(z\) for the case where \(i=1\) and \(j\) is the median observation. This section checks the results of cmstatr’s function against those tables. Only the odd values of \(n\) are checked so that the median is a single observation. The unit tests for the cmstatr package include checks of a variety of values of \(p\) and confidence, but only the factors for B-Basis are checked here.

tribble(
  ~n, ~z,
  3,  28.820048,
  5,  6.1981307,
  7,  3.4780112,
  9,  2.5168762,
  11, 2.0312134,
  13, 1.7377374,
  15, 1.5403989,
  17, 1.3979806,
  19, 1.2899172,
  21, 1.2048089,
  23, 1.1358259,
  25, 1.0786237,
  27, 1.0303046,
) %>%
  rowwise() %>%
  mutate(
    z_calc = hk_ext_z(n, 1, ceiling(n / 2), p = 0.90, conf = 0.95)
  ) %>%
  mutate(diff = expect_equal(z, z_calc, tolerance = 0.0001)) %>% 
  select(-c(diff))
#> # A tibble: 13 × 3
#> # Rowwise: 
#>        n     z z_calc
#>    <dbl> <dbl>  <dbl>
#>  1     3 28.8   28.8 
#>  2     5  6.20   6.20
#>  3     7  3.48   3.48
#>  4     9  2.52   2.52
#>  5    11  2.03   2.03
#>  6    13  1.74   1.74
#>  7    15  1.54   1.54
#>  8    17  1.40   1.40
#>  9    19  1.29   1.29
#> 10    21  1.20   1.20
#> 11    23  1.14   1.14
#> 12    25  1.08   1.08
#> 13    27  1.03   1.03

7.4 Nonparametric A-Basis Extended Hanson–Koopmans

CMH-17-1G provides Table 8.5.15, which contains factors for calculating A-Basis values using the Extended Hanson–Koopmans nonparametric method. That table is reproduced in part here and the factors are compared with those computed by cmstatr. More extensive checks are performed in the unit test of the cmstatr package. The factors computed by cmstatr are very similar to those published in CMH-17-1G.

tribble(
  ~n, ~k,
  2, 80.0038,
  4, 9.49579,
  6, 5.57681,
  8, 4.25011,
  10, 3.57267,
  12, 3.1554,
  14, 2.86924,
  16, 2.65889,
  18, 2.4966,
  20, 2.36683,
  25, 2.131,
  30, 1.96975,
  35, 1.85088,
  40, 1.75868,
  45, 1.68449,
  50, 1.62313,
  60, 1.5267,
  70, 1.45352,
  80, 1.39549,
  90, 1.34796,
  100, 1.30806,
  120, 1.24425,
  140, 1.19491,
  160, 1.15519,
  180, 1.12226,
  200, 1.09434,
  225, 1.06471,
  250, 1.03952,
  275, 1.01773
) %>%
  rowwise() %>%
  mutate(z_calc = hk_ext_z(n, 1, n, 0.99, 0.95)) %>%
  mutate(diff = expect_lt(abs(k - z_calc), 0.0001)) %>% 
  select(-c(diff))
#> # A tibble: 29 × 3
#> # Rowwise: 
#>        n     k z_calc
#>    <dbl> <dbl>  <dbl>
#>  1     2 80.0   80.0 
#>  2     4  9.50   9.50
#>  3     6  5.58   5.58
#>  4     8  4.25   4.25
#>  5    10  3.57   3.57
#>  6    12  3.16   3.16
#>  7    14  2.87   2.87
#>  8    16  2.66   2.66
#>  9    18  2.50   2.50
#> 10    20  2.37   2.37
#> # ℹ 19 more rows

7.5 Factors for Small Sample Nonparametric B-Basis

CMH-17-1G Table 8.5.14 provides ranks orders and factors for computing nonparametric B-Basis values. This table is reproduced below and compared with the results of cmstatr. The results are similar. In some cases, the rank order (\(r\) in CMH-17-1G or \(j\) in cmstatr) and the the factor (\(k\)) are different. These differences are discussed in detail in the vignette Extended Hanson-Koopmans.

tribble(
  ~n, ~r, ~k,
  2, 2, 35.177,
  3, 3, 7.859,
  4, 4, 4.505,
  5, 4, 4.101,
  6, 5, 3.064,
  7, 5, 2.858,
  8, 6, 2.382,
  9, 6, 2.253,
  10, 6, 2.137,
  11, 7, 1.897,
  12, 7, 1.814,
  13, 7, 1.738,
  14, 8, 1.599,
  15, 8, 1.540,
  16, 8, 1.485,
  17, 8, 1.434,
  18, 9, 1.354,
  19, 9, 1.311,
  20, 10, 1.253,
  21, 10, 1.218,
  22, 10, 1.184,
  23, 11, 1.143,
  24, 11, 1.114,
  25, 11, 1.087,
  26, 11, 1.060,
  27, 11, 1.035,
  28, 12, 1.010
) %>% 
  rowwise() %>%
  mutate(r_calc = hk_ext_z_j_opt(n, 0.90, 0.95)$j) %>%
  mutate(k_calc = hk_ext_z_j_opt(n, 0.90, 0.95)$z)
#> # A tibble: 27 × 5
#> # Rowwise: 
#>        n     r     k r_calc k_calc
#>    <dbl> <dbl> <dbl>  <int>  <dbl>
#>  1     2     2 35.2       2  35.2 
#>  2     3     3  7.86      3   7.86
#>  3     4     4  4.50      4   4.51
#>  4     5     4  4.10      4   4.10
#>  5     6     5  3.06      5   3.06
#>  6     7     5  2.86      5   2.86
#>  7     8     6  2.38      6   2.38
#>  8     9     6  2.25      6   2.25
#>  9    10     6  2.14      6   2.14
#> 10    11     7  1.90      7   1.90
#> # ℹ 17 more rows

7.6 Nonparametric B-Basis Binomial Rank

CMH-17-1G Table 8.5.12 provides factors for computing B-Basis values using the nonparametric binomial rank method. Part of that table is reproduced below and compared with the results of cmstatr. The results of cmstatr are similar to the published values. A more complete comparison is performed in the units tests of the cmstatr package.

tribble(
  ~n, ~rb,
  29, 1,
  46, 2,
  61, 3,
  76, 4,
  89, 5,
  103, 6,
  116, 7,
  129, 8,
  142, 9,
  154, 10,
  167, 11,
  179, 12,
  191, 13,
  203, 14
) %>%
  rowwise() %>%
  mutate(r_calc = nonpara_binomial_rank(n, 0.9, 0.95)) %>%
  mutate(test = expect_equal(rb, r_calc)) %>%
  select(-c(test))
#> # A tibble: 14 × 3
#> # Rowwise: 
#>        n    rb r_calc
#>    <dbl> <dbl>  <dbl>
#>  1    29     1      1
#>  2    46     2      2
#>  3    61     3      3
#>  4    76     4      4
#>  5    89     5      5
#>  6   103     6      6
#>  7   116     7      7
#>  8   129     8      8
#>  9   142     9      9
#> 10   154    10     10
#> 11   167    11     11
#> 12   179    12     12
#> 13   191    13     13
#> 14   203    14     14

7.7 Nonparametric A-Basis Binomial Rank

CMH-17-1G Table 8.5.13 provides factors for computing B-Basis values using the nonparametric binomial rank method. Part of that table is reproduced below and compared with the results of cmstatr. The results of cmstatr are similar to the published values. A more complete comparison is performed in the units tests of the cmstatr package.

tribble(
  ~n, ~ra,
  299, 1,
  473, 2,
  628, 3,
  773, 4,
  913, 5
) %>%
  rowwise() %>%
  mutate(r_calc = nonpara_binomial_rank(n, 0.99, 0.95)) %>%
  mutate(test = expect_equal(ra, r_calc)) %>%
  select(-c(test))
#> # A tibble: 5 × 3
#> # Rowwise: 
#>       n    ra r_calc
#>   <dbl> <dbl>  <dbl>
#> 1   299     1      1
#> 2   473     2      2
#> 3   628     3      3
#> 4   773     4      4
#> 5   913     5      5

7.8 Factors for Equivalency

Vangel’s 2002 paper provides factors for calculating limits for sample mean and sample extremum for various values of \(\alpha\) and sample size (\(n\)). A subset of those factors are reproduced below and compared with results from cmstatr. The results are very similar for values of \(\alpha\) and \(n\) that are common for composite materials.

read.csv(system.file("extdata", "k1.vangel.csv",
                     package = "cmstatr")) %>%
  gather(n, k1, X2:X10) %>%
  mutate(n = as.numeric(substring(n, 2))) %>%
  inner_join(
    read.csv(system.file("extdata", "k2.vangel.csv",
                         package = "cmstatr")) %>%
      gather(n, k2, X2:X10) %>%
      mutate(n = as.numeric(substring(n, 2))),
    by = c("n" = "n", "alpha" = "alpha")
  ) %>%
  filter(n >= 5 & (alpha == 0.01 | alpha == 0.05)) %>% 
  group_by(n, alpha) %>%
  nest() %>%
  mutate(equiv = map2(alpha, n, ~k_equiv(.x, .y))) %>%
  mutate(k1_calc = map(equiv, function(e) e[1]),
         k2_calc = map(equiv, function(e) e[2])) %>%
  select(-c(equiv)) %>% 
  unnest(cols = c(data, k1_calc, k2_calc)) %>% 
  mutate(check = expect_equal(k1, k1_calc, tolerance = 0.0001)) %>%
  select(-c(check)) %>% 
  mutate(check = expect_equal(k2, k2_calc, tolerance = 0.0001)) %>%
  select(-c(check))
#> # A tibble: 12 × 6
#> # Groups:   n, alpha [12]
#>    alpha     n    k1    k2 k1_calc k2_calc
#>    <dbl> <dbl> <dbl> <dbl>   <dbl>   <dbl>
#>  1  0.05     5  2.53 0.852    2.53   0.853
#>  2  0.01     5  3.07 1.14     3.07   1.14 
#>  3  0.05     6  2.60 0.781    2.60   0.781
#>  4  0.01     6  3.13 1.04     3.13   1.04 
#>  5  0.05     7  2.65 0.725    2.65   0.725
#>  6  0.01     7  3.18 0.968    3.18   0.968
#>  7  0.05     8  2.7  0.679    2.70   0.679
#>  8  0.01     8  3.22 0.906    3.22   0.906
#>  9  0.05     9  2.74 0.641    2.74   0.641
#> 10  0.01     9  3.25 0.854    3.25   0.855
#> 11  0.05    10  2.78 0.609    2.78   0.609
#> 12  0.01    10  3.28 0.811    3.28   0.811

8. Session Info

This copy of this vignette was build on the following system.

sessionInfo()
#> R version 4.3.3 (2024-02-29)
#> Platform: x86_64-pc-linux-gnu (64-bit)
#> Running under: Ubuntu 20.04.6 LTS
#> 
#> Matrix products: default
#> BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0 
#> LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
#> 
#> locale:
#>  [1] LC_CTYPE=en_CA.UTF-8       LC_NUMERIC=C              
#>  [3] LC_TIME=en_CA.UTF-8        LC_COLLATE=C              
#>  [5] LC_MONETARY=en_CA.UTF-8    LC_MESSAGES=en_CA.UTF-8   
#>  [7] LC_PAPER=en_CA.UTF-8       LC_NAME=C                 
#>  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
#> [11] LC_MEASUREMENT=en_CA.UTF-8 LC_IDENTIFICATION=C       
#> 
#> time zone: America/Edmonton
#> tzcode source: system (glibc)
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#> [1] testthat_3.2.1 purrr_1.0.2    cmstatr_0.9.3  tidyr_1.3.1    ggplot2_3.5.0 
#> [6] dplyr_1.1.4   
#> 
#> loaded via a namespace (and not attached):
#>  [1] gtable_0.3.4      jsonlite_1.8.8    highr_0.10        crayon_1.5.2     
#>  [5] compiler_4.3.3    brio_1.1.4        tidyselect_1.2.0  jquerylib_0.1.4  
#>  [9] scales_1.3.0      yaml_2.3.8        fastmap_1.1.1     R6_2.5.1         
#> [13] labeling_0.4.3    kSamples_1.2-10   generics_0.1.3    knitr_1.45       
#> [17] MASS_7.3-60       tibble_3.2.1      desc_1.4.3        rprojroot_2.0.4  
#> [21] munsell_0.5.0     bslib_0.6.1       pillar_1.9.0      SuppDists_1.1-9.7
#> [25] rlang_1.1.3       utf8_1.2.4        cachem_1.0.8      xfun_0.42        
#> [29] sass_0.4.8        pkgload_1.3.4     viridisLite_0.4.2 cli_3.6.2        
#> [33] withr_3.0.0       magrittr_2.0.3    digest_0.6.34     grid_4.3.3       
#> [37] rstudioapi_0.15.0 lifecycle_1.0.4   waldo_0.5.2       vctrs_0.6.5      
#> [41] evaluate_0.23     glue_1.7.0        farver_2.1.1      fansi_1.0.6      
#> [45] colorspace_2.1-0  rmarkdown_2.26    tools_4.3.3       pkgconfig_2.0.3  
#> [49] htmltools_0.5.7

9. References

[1]
K. S. Raju and J. S. Tomblin, “AGATE Statistical Analysis Program,” Wichita State University, ASAP-2008, 2008.
[2]
“Composite Materials Handbook, Volume 1. Polymer Matrix Composites Guideline for Characterization of Structural Materials,” SAE International, CMH-17-1G, Mar. 2012.
[3]
Materials Sciences Corporation, “CMH-17 Statistical Analysis for B-Basis and A-Basis Values,” Materials Sciences Corporation, Horsham, PA, STAT-17 Rev 5, Jan. 2008.
[4]
F. W. Scholz and M. A. Stephens, K-Sample Anderson-Darling Tests,” Journal of the American Statistical Association, vol. 82, no. 399. pp. 918–924, Sep-1987.
[5]
M. Vangel, One-Sided Nonparametric Tolerance Limits,” Communications in Statistics - Simulation and Computation, vol. 23, no. 4. pp. 1137–1154, 1994.
cmstatr/inst/extdata/0000755000176200001440000000000014061271230014332 5ustar liggesuserscmstatr/inst/extdata/k2.vangel.csv0000644000176200001440000000204113507422437016646 0ustar liggesusersalpha,2,3,4,5,6,7,8,9,10 0.5,0.1472,0.1591,0.1539,0.1473,0.141,0.1354,0.1303,0.1258,0.1217 0.25,0.6266,0.5421,0.4818,0.4382,0.4048,0.3782,0.3563,0.3379,0.3221 0.1,1.0539,0.8836,0.7744,0.6978,0.6403,0.5951,0.5583,0.5276,0.5016 0.05,1.3076,1.0868,0.9486,0.8525,0.7808,0.7246,0.679,0.6411,0.6089 0.025,1.5266,1.2626,1.0995,0.9866,0.9026,0.8369,0.7838,0.7396,0.7022 0.01,1.7804,1.4666,1.2747,1.1425,1.0443,0.9678,0.9059,0.8545,0.811 0.005,1.9528,1.6054,1.3941,1.2488,1.1411,1.0571,0.9893,0.933,0.8854 0.0025,2.1123,1.7341,1.5049,1.3475,1.2309,1.1401,1.0668,1.0061,0.9546 0.001,2.3076,1.8919,1.6408,1.4687,1.3413,1.2422,1.1622,1.0959,1.0397 0.0005,2.4457,2.0035,1.7371,1.5546,1.4196,1.3145,1.2298,1.1596,1.1002 0.00025,2.5768,2.1097,1.8287,1.6363,1.4941,1.3835,1.2943,1.2203,1.1578 0.0001,2.7411,2.2429,1.9436,1.739,1.5877,1.4701,1.3752,1.2966,1.2301 0.00005,2.8595,2.3389,2.0266,1.813,1.6553,1.5326,1.4337,1.3517,1.2824 0.000025,2.9734,2.4313,2.1065,1.8844,1.7204,1.5928,1.49,1.4048,1.3327 0.00001,3.1179,2.5487,2.2079,1.9751,1.8031,1.6694,1.5616,1.4723,1.3968 cmstatr/inst/extdata/k1.vangel.csv0000644000176200001440000000202113507422437016643 0ustar liggesusersalpha,2,3,4,5,6,7,8,9,10 0.5,0.7166,1.0254,1.2142,1.3498,1.4548,1.54,1.6113,1.6724,1.7258 0.25,1.2887,1.5407,1.6972,1.8106,1.899,1.9711,2.0317,2.0838,2.1295 0.1,1.8167,2.0249,2.1561,2.252,2.3272,2.3887,2.4407,2.4856,2.525 0.05,2.1385,2.3239,2.442,2.5286,2.5967,2.6527,2.7,2.7411,2.7772 0.025,2.4208,2.5888,2.6965,2.7758,2.8384,2.89,2.9337,2.9717,3.0052 0.01,2.7526,2.9027,2.9997,3.0715,3.1283,3.1753,3.2153,3.25,3.2807 0.005,2.98,3.1198,3.2103,3.2775,3.3309,3.3751,3.4127,3.4455,3.4745 0.0025,3.193,3.3232,3.4082,3.4716,3.522,3.5638,3.5995,3.6307,3.6582 0.001,3.4549,3.5751,3.6541,3.7132,3.7603,3.7995,3.8331,3.8623,3.8883 0.0005,3.6412,3.755,3.8301,3.8864,3.9314,3.969,4.0011,4.0292,4.0541 0.00025,3.8188,3.927,3.9987,4.0526,4.0958,4.1319,4.1628,4.1898,4.2138 0.0001,4.0421,4.1439,4.2117,4.2629,4.304,4.3384,4.3678,4.3936,4.4166 0.00005,4.2035,4.3011,4.3664,4.4157,4.4554,4.4886,4.5172,4.5422,4.5644 0.000025,4.3592,4.453,4.516,4.5637,4.6022,4.6344,4.662,4.6863,4.7079 0.00001,4.5573,4.6466,4.7069,4.7527,4.7897,4.8206,4.8473,4.8707,4.8915 cmstatr/inst/CITATION0000644000176200001440000000107114477217401014047 0ustar liggesusersbibentry( "Article", title = "cmstatr: An {R} Package for Statistical Analysis of Composite Material Data", author = "Stefan Kloppenborg", year = 2020, journal = "Journal of Open Source Software", volume = 5, number = 51, pages = 2265, doi = "10.21105/joss.02265", url = "https://joss.theoj.org/papers/10.21105/joss.02265", textVersion = "Kloppenborg, S., (2020). cmstatr: An R Package for Statistical Analysis of Composite Material Data. Journal of Open Source Software, 5(51), 2265, https://doi.org/10.21105/joss.02265" ) citation(auto = meta) cmstatr/inst/WORDLIST0000644000176200001440000000053314477217401014106 0ustar liggesusersADK akN al Borlik CMH CMD Comtek CPT csl CTD D'Agostino Dekker doi DOI ECDF Escobar ESF et ETW extremum frac frawley funder ge Guenther Hoboken https HYTEQ ieee joss kA kB Kloppenborg Koopmans Krishnamoorthy Levene's lm Marsaglia MNR normed ORCID OSL prepreg README rlang RTD SAE Scholz SciPy SciPy's Technometrics tibble Vangel Vangel's woodward