nortest/0000755000176200001440000000000012556250141011751 5ustar liggesusersnortest/NAMESPACE0000644000176200001440000000023212556245346013200 0ustar liggesusersimportFrom("stats", "complete.cases", "cor", "pchisq", "pnorm", "ppoints", "qnorm", "sd") export(ad.test, cvm.test, lillie.test, pearson.test, sf.test) nortest/R/0000755000176200001440000000000012556245346012165 5ustar liggesusersnortest/R/cvm.test.R0000644000176200001440000000203212556245346014050 0ustar liggesusers"cvm.test" <- function (x) { DNAME <- deparse(substitute(x)) x <- sort(x[complete.cases(x)]) n <- length(x) if (n < 8) stop("sample size must be greater than 7") p <- pnorm((x - mean(x))/sd(x)) W <- (1/(12 * n) + sum( (p - (2 * seq(1:n) - 1)/(2 * n))^2 )) WW <- (1 + 0.5/n) * W if (WW < 0.0275) { pval <- 1 - exp(-13.953 + 775.5 * WW - 12542.61 * WW^2) } else if (WW < 0.051) { pval <- 1 - exp(-5.903 + 179.546 * WW - 1515.29 * WW^2) } else if (WW < 0.092) { pval <- exp(0.886 - 31.62 * WW + 10.897 * WW^2) } else if (WW < 1.1) { pval <- exp(1.111 - 34.242 * WW + 12.832 * WW^2) } else { warning("p-value is smaller than 7.37e-10, cannot be computed more accurately") pval <- 7.37e-10 } RVAL <- list(statistic = c(W = W), p.value = pval, method = "Cramer-von Mises normality test", data.name = DNAME) class(RVAL) <- "htest" return(RVAL) } nortest/R/pearson.test.R0000644000176200001440000000141512556245346014736 0ustar liggesusers"pearson.test" <- function (x, n.classes = ceiling(2 * (n^(2/5))), adjust = TRUE) { DNAME <- deparse(substitute(x)) x <- x[complete.cases(x)] n <- length(x) if (adjust) { dfd <- 2 } else { dfd <- 0 } num <- floor(1 + n.classes * pnorm(x, mean(x), sd(x))) count <- tabulate(num, n.classes) prob <- rep(1/n.classes, n.classes) xpec <- n * prob h <- ((count - xpec)^2)/xpec P <- sum(h) pvalue <- pchisq(P, n.classes - dfd - 1, lower.tail = FALSE) RVAL <- list(statistic = c(P = P), p.value = pvalue, method = "Pearson chi-square normality test", data.name = DNAME, n.classes = n.classes, df = n.classes - 1 - dfd) class(RVAL) <- "htest" return(RVAL) } nortest/R/lillie.test.R0000644000176200001440000000275712556245346014553 0ustar liggesusers"lillie.test" <- function (x) { DNAME <- deparse(substitute(x)) x <- sort(x[complete.cases(x)]) n <- length(x) if (n < 5) stop("sample size must be greater than 4") p <- pnorm((x - mean(x))/sd(x)) Dplus <- max(seq(1:n)/n - p) Dminus <- max(p - (seq(1:n) - 1)/n) K <- max(Dplus, Dminus) if (n <= 100) { Kd <- K nd <- n } else { Kd <- K * ((n/100)^0.49) nd <- 100 } pvalue <- exp(-7.01256 * Kd^2 * (nd + 2.78019) + 2.99587 * Kd * sqrt(nd + 2.78019) - 0.122119 + 0.974598/sqrt(nd) + 1.67997/nd) if (pvalue > 0.1) { KK <- (sqrt(n) - 0.01 + 0.85/sqrt(n)) * K if (KK <= 0.302) { pvalue <- 1 } else if (KK <= 0.5) { pvalue <- 2.76773 - 19.828315 * KK + 80.709644 * KK^2 - 138.55152 * KK^3 + 81.218052 * KK^4 } else if (KK <= 0.9) { pvalue <- -4.901232 + 40.662806 * KK - 97.490286 * KK^2 + 94.029866 * KK^3 - 32.355711 * KK^4 } else if (KK <= 1.31) { pvalue <- 6.198765 - 19.558097 * KK + 23.186922 * KK^2 - 12.234627 * KK^3 + 2.423045 * KK^4 } else { pvalue <- 0 } } RVAL <- list(statistic = c(D = K), p.value = pvalue, method = "Lilliefors (Kolmogorov-Smirnov) normality test", data.name = DNAME) class(RVAL) <- "htest" return(RVAL) } nortest/R/sf.test.R0000644000176200001440000000117412556245346013701 0ustar liggesusers"sf.test" <- function (x) { DNAME <- deparse(substitute(x)) x <- sort(x[complete.cases(x)]) n <- length(x) if ((n < 5 || n > 5000)) stop("sample size must be between 5 and 5000") y <- qnorm(ppoints(n, a = 3/8)) W <- cor(x, y)^2 u <- log(n) v <- log(u) mu <- -1.2725 + 1.0521 * (v - u) sig <- 1.0308 - 0.26758 * (v + 2/u) z <- (log(1 - W) - mu)/sig pval <- pnorm(z, lower.tail = FALSE) RVAL <- list(statistic = c(W = W), p.value = pval, method = "Shapiro-Francia normality test", data.name = DNAME) class(RVAL) <- "htest" return(RVAL) } nortest/R/ad.test.R0000644000176200001440000000174612556245346013662 0ustar liggesusers"ad.test" <- function (x) { DNAME <- deparse(substitute(x)) x <- sort(x[complete.cases(x)]) n <- length(x) if (n < 8) stop("sample size must be greater than 7") logp1 <- pnorm( (x-mean(x))/sd(x), log.p=TRUE) logp2 <- pnorm( -(x-mean(x))/sd(x), log.p=TRUE ) h <- (2 * seq(1:n) - 1) * (logp1 + rev(logp2)) A <- -n - mean(h) AA <- (1 + 0.75/n + 2.25/n^2) * A if (AA < 0.2) { pval <- 1 - exp(-13.436 + 101.14 * AA - 223.73 * AA^2) } else if (AA < 0.34) { pval <- 1 - exp(-8.318 + 42.796 * AA - 59.938 * AA^2) } else if (AA < 0.6) { pval <- exp(0.9177 - 4.279 * AA - 1.38 * AA^2) } else if (AA < 10) { pval <- exp(1.2937 - 5.709 * AA + 0.0186 * AA^2) } else pval <- 3.7e-24 RVAL <- list(statistic = c(A = A), p.value = pval, method = "Anderson-Darling normality test", data.name = DNAME) class(RVAL) <- "htest" return(RVAL) } nortest/MD50000644000176200001440000000116512556250141012264 0ustar liggesusers8d544d79c2ca9d3d3af9c21e20ff5cda *ChangeLog 3278000fce0c8cfec21a0bce61eabbbb *DESCRIPTION c8c0f358620549d4164f95876c8ac8b1 *NAMESPACE 0be837d22cc94617f9cead4e6a72fd42 *R/ad.test.R bec733d1c117ccd9593c37607e17bd03 *R/cvm.test.R c672b95caa6de73d839e15ec36fbde99 *R/lillie.test.R c2ee62eac181fca251d7a70503c96ae2 *R/pearson.test.R e73ed91d74be1f1a7c080ff35bb43f82 *R/sf.test.R b5267ad8746dbbbcaed0dfe0cdbd7dea *man/ad.test.Rd 79c660cf60fb4bf9c0f0089898fe642f *man/cvm.test.Rd 0b0c9455a2d4ef604a45ff8c45e5b7b1 *man/lillie.test.Rd c27e4b08dd8533138809de9dbd081630 *man/pearson.test.Rd ba77d1ac9f1527e92ddb57bf0b8737a6 *man/sf.test.Rd nortest/DESCRIPTION0000644000176200001440000000115412556250141013460 0ustar liggesusersPackage: nortest Title: Tests for Normality Version: 1.0-4 Date: 2015-07-29 Description: Five omnibus tests for testing the composite hypothesis of normality. License: GPL (>= 2) Authors@R: c(person("Juergen", "Gross", role = "aut", email = "gross@statistik.tu-dortmund.de"), person("Uwe", "Ligges", role = c("aut", "cre"), email = "ligges@statistik.tu-dortmund.de")) Imports: stats Packaged: 2015-07-29 21:51:34 UTC; ligges Author: Juergen Gross [aut], Uwe Ligges [aut, cre] Maintainer: Uwe Ligges NeedsCompilation: no Repository: CRAN Date/Publication: 2015-07-30 00:14:57 nortest/ChangeLog0000644000176200001440000000020512556245346013533 0ustar liggesusersVersion: 1.0-3 bug fixes for extreme values in ad.test (thanks to John P. Nolan) Version: 1.0-4 importing from base namespaces nortest/man/0000755000176200001440000000000012556245346012537 5ustar liggesusersnortest/man/cvm.test.Rd0000644000176200001440000000403712556245346014575 0ustar liggesusers\name{cvm.test} \alias{cvm.test} \title{Cramer-von Mises test for normality} \description{ Performs the Cramer-von Mises test for the composite hypothesis of normality, see e.g. Thode (2002, Sec. 5.1.3). } \usage{ cvm.test(x) } \arguments{ \item{x}{a numeric vector of data values, the number of which must be greater than 7. Missing values are allowed.} } \details{The Cramer-von Mises test is an EDF omnibus test for the composite hypothesis of normality. The test statistic is \deqn{ W = \frac{1}{12 n} + \sum_{i=1}^{n} \left(p_{(i)} - \frac{2i-1}{2n}\right)^2, }{W = 1/(12n) + \sum_{i=1}^n (p_(i) - (2i-1)/(2n))^2,} where \eqn{p_{(i)} = \Phi([x_{(i)} - \overline{x}]/s)}. Here, \eqn{\Phi} is the cumulative distribution function of the standard normal distribution, and \eqn{\overline{x}} and \eqn{s} are mean and standard deviation of the data values. The p-value is computed from the modified statistic \eqn{Z=W (1.0 + 0.5/n)} according to Table 4.9 in Stephens (1986). } \value{ A list with class \dQuote{htest} containing the following components: \item{statistic}{the value of the Cramer-von Mises statistic.} \item{p.value }{the p-value for the test.} \item{method}{the character string \dQuote{Cramer-von Mises normality test}.} \item{data.name}{a character string giving the name(s) of the data.} } \references{Stephens, M.A. (1986): Tests based on EDF statistics. In: D'Agostino, R.B. and Stephens, M.A., eds.: Goodness-of-Fit Techniques. Marcel Dekker, New York. Thode Jr., H.C. (2002): Testing for Normality. Marcel Dekker, New York. } \author{Juergen Gross} \seealso{\code{\link{shapiro.test}} for performing the Shapiro-Wilk test for normality. \code{\link{ad.test}}, \code{\link{lillie.test}}, \code{\link{pearson.test}}, \code{\link{sf.test}} for performing further tests for normality. \code{\link{qqnorm}} for producing a normal quantile-quantile plot.} \examples{ cvm.test(rnorm(100, mean = 5, sd = 3)) cvm.test(runif(100, min = 2, max = 4)) } \keyword{htest} nortest/man/sf.test.Rd0000644000176200001440000000414412556245346014417 0ustar liggesusers\name{sf.test} \alias{sf.test} \title{Shapiro-Francia test for normality} \description{ Performs the Shapiro-Francia test for the composite hypothesis of normality, see e.g. Thode (2002, Sec. 2.3.2). } \usage{ sf.test(x) } \arguments{ \item{x}{a numeric vector of data values, the number of which must be between 5 and 5000. Missing values are allowed.} } \details{The test statistic of the Shapiro-Francia test is simply the squared correlation between the ordered sample values and the (approximated) expected ordered quantiles from the standard normal distribution. The p-value is computed from the formula given by Royston (1993). } \value{ A list with class \dQuote{htest} containing the following components: \item{statistic}{the value of the Shapiro-Francia statistic.} \item{p.value }{the p-value for the test.} \item{method}{the character string \dQuote{Shapiro-Francia normality test}.} \item{data.name}{a character string giving the name(s) of the data.} } \references{Royston, P. (1993): A pocket-calculator algorithm for the Shapiro-Francia test for non-normality: an application to medicine. Statistics in Medicine, 12, 181--184. Thode Jr., H.C. (2002): Testing for Normality. Marcel Dekker, New York.} \author{Juergen Gross} \note{The Shapiro-Francia test is known to perform well, see also the comments by Royston (1993). The expected ordered quantiles from the standard normal distribution are approximated by \code{qnorm(ppoints(x, a = 3/8))}, being slightly different from the approximation \code{qnorm(ppoints(x, a = 1/2))} used for the normal quantile-quantile plot by \code{\link{qqnorm}} for sample sizes greater than 10.} \seealso{\code{\link{shapiro.test}} for performing the Shapiro-Wilk test for normality. \code{\link{ad.test}}, \code{\link{cvm.test}}, \code{\link{lillie.test}}, \code{\link{pearson.test}} for performing further tests for normality. \code{\link{qqnorm}} for producing a normal quantile-quantile plot.} \examples{ sf.test(rnorm(100, mean = 5, sd = 3)) sf.test(runif(100, min = 2, max = 4)) } \keyword{htest} nortest/man/pearson.test.Rd0000644000176200001440000000740412556245346015460 0ustar liggesusers\name{pearson.test} \alias{pearson.test} \title{Pearson chi-square test for normality} \description{ Performs the Pearson chi-square test for the composite hypothesis of normality, see e.g. Thode (2002, Sec. 5.2). } \usage{ pearson.test(x, n.classes = ceiling(2 * (n^(2/5))), adjust = TRUE) } \arguments{ \item{x}{a numeric vector of data values. Missing values are allowed.} \item{n.classes}{The number of classes. The default is due to Moore (1986).} \item{adjust}{logical; if \code{TRUE} (default), the p-value is computed from a chi-square distribution with \code{n.classes}-3 degrees of freedom, otherwise from a chi-square distribution with \code{n.classes}-1 degrees of freedom.} } \details{ The Pearson test statistic is \eqn{P=\sum (C_{i} - E_{i})^{2}/E_{i}}, where \eqn{C_{i}} is the number of counted and \eqn{E_{i}} is the number of expected observations (under the hypothesis) in class \eqn{i}. The classes are build is such a way that they are equiprobable under the hypothesis of normality. The p-value is computed from a chi-square distribution with \code{n.classes}-3 degrees of freedom if \code{adjust} is \code{TRUE} and from a chi-square distribution with \code{n.classes}-1 degrees of freedom otherwise. In both cases this is not (!) the correct p-value, lying somewhere between the two, see also Moore (1986). } \value{ A list with class \dQuote{htest} containing the following components: \item{statistic}{the value of the Pearson chi-square statistic.} \item{p.value }{the p-value for the test.} \item{method}{the character string \dQuote{Pearson chi-square normality test}.} \item{data.name}{a character string giving the name(s) of the data.} \item{n.classes}{the number of classes used for the test.} \item{df}{the degress of freedom of the chi-square distribution used to compute the p-value.} } \references{Moore, D.S. (1986): Tests of the chi-squared type. In: D'Agostino, R.B. and Stephens, M.A., eds.: Goodness-of-Fit Techniques. Marcel Dekker, New York. Thode Jr., H.C. (2002): Testing for Normality. Marcel Dekker, New York. } \author{Juergen Gross} \note{The Pearson chi-square test is usually not recommended for testing the composite hypothesis of normality due to its inferior power properties compared to other tests. It is common practice to compute the p-value from the chi-square distribution with \code{n.classes} - 3 degrees of freedom, in order to adjust for the additional estimation of two parameters. (For the simple hypothesis of normality (mean and variance known) the test statistic is asymptotically chi-square distributed with \code{n.classes} - 1 degrees of freedom.) This is, however, not correct as long as the parameters are estimated by \code{mean(x)} and \code{var(x)} (or \code{sd(x)}), as it is usually done, see Moore (1986) for details. Since the true p-value is somewhere between the two, it is suggested to run \code{pearson.test} twice, with \code{adjust = TRUE} (default) and with \code{adjust = FALSE}. It is also suggested to slightly change the default number of classes, in order to see the effect on the p-value. Eventually, it is suggested not to rely upon the result of the test. The function call \code{pearson.test(x)} essentially produces the same result as the S-PLUS function call \code{chisq.gof((x-mean(x))/sqrt(var(x)), n.param.est=2)}. } \seealso{\code{\link{shapiro.test}} for performing the Shapiro-Wilk test for normality. \code{\link{ad.test}}, \code{\link{cvm.test}}, \code{\link{lillie.test}}, \code{\link{sf.test}} for performing further tests for normality. \code{\link{qqnorm}} for producing a normal quantile-quantile plot.} \examples{ pearson.test(rnorm(100, mean = 5, sd = 3)) pearson.test(runif(100, min = 2, max = 4)) } \keyword{htest} nortest/man/ad.test.Rd0000644000176200001440000000430212556245346014367 0ustar liggesusers\name{ad.test} \alias{ad.test} \title{Anderson-Darling test for normality} \description{ Performs the Anderson-Darling test for the composite hypothesis of normality, see e.g. Thode (2002, Sec. 5.1.4). } \usage{ ad.test(x) } \arguments{ \item{x}{a numeric vector of data values, the number of which must be greater than 7. Missing values are allowed.} } \details{The Anderson-Darling test is an EDF omnibus test for the composite hypothesis of normality. The test statistic is \deqn{ A = -n -\frac{1}{n} \sum_{i=1}^{n} [2i-1] [\ln(p_{(i)}) + \ln(1 - p_{(n-i+1)})], } where \eqn{p_{(i)} = \Phi([x_{(i)} - \overline{x}]/s)}. Here, \eqn{\Phi} is the cumulative distribution function of the standard normal distribution, and \eqn{\overline{x}} and \eqn{s} are mean and standard deviation of the data values. The p-value is computed from the modified statistic \eqn{Z=A (1.0 + 0.75/n +2.25/n^{2})}\ according to Table 4.9 in Stephens (1986). } \value{ A list with class \dQuote{htest} containing the following components: \item{statistic}{the value of the Anderson-Darling statistic.} \item{p.value }{the p-value for the test.} \item{method}{the character string \dQuote{Anderson-Darling normality test}.} \item{data.name}{a character string giving the name(s) of the data.} } \references{Stephens, M.A. (1986): Tests based on EDF statistics. In: D'Agostino, R.B. and Stephens, M.A., eds.: Goodness-of-Fit Techniques. Marcel Dekker, New York. Thode Jr., H.C. (2002): Testing for Normality. Marcel Dekker, New York. } \author{Juergen Gross} \note{The Anderson-Darling test is the recommended EDF test by Stephens (1986). Compared to the Cramer-von Mises test (as second choice) it gives more weight to the tails of the distribution.} \seealso{\code{\link{shapiro.test}} for performing the Shapiro-Wilk test for normality. \code{\link{cvm.test}}, \code{\link{lillie.test}}, \code{\link{pearson.test}}, \code{\link{sf.test}} for performing further tests for normality. \code{\link{qqnorm}} for producing a normal quantile-quantile plot.} \examples{ ad.test(rnorm(100, mean = 5, sd = 3)) ad.test(runif(100, min = 2, max = 4)) } \keyword{htest} nortest/man/lillie.test.Rd0000644000176200001440000000724312556245346015264 0ustar liggesusers\name{lillie.test} \alias{lillie.test} \title{Lilliefors (Kolmogorov-Smirnov) test for normality} \description{ Performs the Lilliefors (Kolmogorov-Smirnov) test for the composite hypothesis of normality, see e.g. Thode (2002, Sec. 5.1.1). } \usage{ lillie.test(x) } \arguments{ \item{x}{a numeric vector of data values, the number of which must be greater than 4. Missing values are allowed.} } \details{The Lilliefors (Kolmogorov-Smirnov) test is an EDF omnibus test for the composite hypothesis of normality. The test statistic is the maximal absolute difference between empirical and hypothetical cumulative distribution function. It may be computed as \eqn{D=\max\{D^{+}, D^{-}\}} with \deqn{ D^{+} = \max_{i=1,\ldots, n}\{i/n - p_{(i)}\}, D^{-} = \max_{i=1,\ldots, n}\{p_{(i)} - (i-1)/n\}, } where \eqn{p_{(i)} = \Phi([x_{(i)} - \overline{x}]/s)}. Here, \eqn{\Phi} is the cumulative distribution function of the standard normal distribution, and \eqn{\overline{x}} and \eqn{s} are mean and standard deviation of the data values. The p-value is computed from the Dallal-Wilkinson (1986) formula, which is claimed to be only reliable when the p-value is smaller than 0.1. If the Dallal-Wilkinson p-value turns out to be greater than 0.1, then the p-value is computed from the distribution of the modified statistic \eqn{Z=D (\sqrt{n}-0.01+0.85/\sqrt{n})}, see Stephens (1974), the actual p-value formula being obtained by a simulation and approximation process.} \value{ A list with class \dQuote{htest} containing the following components: \item{statistic}{the value of the Lilliefors (Kolomogorv-Smirnov) statistic.} \item{p.value }{the p-value for the test.} \item{method}{the character string \dQuote{Lilliefors (Kolmogorov-Smirnov) normality test}.} \item{data.name}{a character string giving the name(s) of the data.} } \references{ Dallal, G.E. and Wilkinson, L. (1986): An analytic approximation to the distribution of Lilliefors' test for normality. The American Statistician, 40, 294--296. Stephens, M.A. (1974): EDF statistics for goodness of fit and some comparisons. Journal of the American Statistical Association, 69, 730--737. Thode Jr., H.C. (2002): Testing for Normality. Marcel Dekker, New York. } \author{Juergen Gross} \note{The Lilliefors (Kolomorov-Smirnov) test is the most famous EDF omnibus test for normality. Compared to the Anderson-Darling test and the Cramer-von Mises test it is known to perform worse. Although the test statistic obtained from \code{lillie.test(x)} is the same as that obtained from \code{ks.test(x, "pnorm", mean(x), sd(x))}, it is not correct to use the p-value from the latter for the composite hypothesis of normality (mean and variance unknown), since the distribution of the test statistic is different when the parameters are estimated. The function call \code{lillie.test(x)} essentially produces the same result as the S-PLUS function call \code{ks.gof(x)} with the distinction that the p-value is not set to 0.5 when the Dallal-Wilkinson approximation yields a p-value greater than 0.1. (Actually, the alternative p-value approximation is provided for the complete range of test statistic values, but is only used when the Dallal-Wilkinson approximation fails.)} \seealso{\code{\link{shapiro.test}} for performing the Shapiro-Wilk test for normality. \code{\link{ad.test}}, \code{\link{cvm.test}}, \code{\link{pearson.test}}, \code{\link{sf.test}} for performing further tests for normality. \code{\link{qqnorm}} for producing a normal quantile-quantile plot.} \examples{ lillie.test(rnorm(100, mean = 5, sd = 3)) lillie.test(runif(100, min = 2, max = 4)) } \keyword{htest}