epiR/0000755000176200001440000000000012602004033011137 5ustar liggesusersepiR/NAMESPACE0000644000176200001440000000141112601641614012366 0ustar liggesusers# Export all names exportPattern(".") S3method(print, epi.2by2) S3method(summary, epi.2by2) S3method(print, epi.tests) S3method(summary, epi.tests) S3method(print, epi.occc) S3method(summary, epi.occc) # Import all packages listed as Imports or Depends import(survival) importFrom(BiasedUrn, dFNCHypergeo) importFrom("graphics", "hist") importFrom("methods", "slot") importFrom("stats", "complete.cases", "cor", "cov", "fisher.test", "mantelhaen.test", "model.matrix", "pbeta", "pbinom", "pchisq", "phyper", "pnorm", "pt", "qbeta", "qbinom", "qchisq", "qf", "qgamma", "qnorm", "qpois", "qt", "quantile", "rpois", "sd", "uniroot", "var", "vcov") importFrom("utils", "packageDescription", "write.table")epiR/data/0000755000176200001440000000000012601641614012063 5ustar liggesusersepiR/data/epi.epidural.RData0000644000176200001440000000054612601641614015366 0ustar liggesusers ]QAK0:[P@/cP..St^afeӣ O_aeKGK򾗼|>5ʽ2!"1]0k>p,)'=vfY-_:lqjG2*[oa|5#n--3:Z!Mj7Sjō &\;w{s8Zh{{p:JQ0FOK̦@ڔ& v. |gt_GCgs]>CƵM\[H<;JT*@E0+}C1RE} %?iz$ɟi*\epiR/data/epi.incin.RData0000644000176200001440000001207612601641614014662 0ustar liggesusers g#g*%M`&ttP:w+Zݒ:{l$K2KZrӒai9z0uFUw{ޯΙч{YA <N {teRa_ˍh|/<'H\t}6صwR>ث)pksw+b-bV{ lo لCsy]O:k'Ho6ېc|%HEn=3-;Z^̝uخ܌w>H:7Ŏ\xMgk|lke=<_??H\8;%oetuy7{kX+W7<7 p/͋1ج}b=?51Ogq`Bc?wʕaGtR#Q=O/X~qc]Al0&^X+wqt枩|_v5/qZdi=]d?|W~PE=?!ՈiQY7j gf7)eUZm@z8Hz?_wOqϬ\GZ$4<</Liaw_|ƻgqϗurb詵yVnKTe/~ɽ*,_ 5I\:*½]zԚZgepE75L9oO2[<"86mkhn(py_|)w%9D~&'Cf5ͩKË%q-\CKrޠ9O'+1 ?u\9K创|"-+wC˯e3~"A5z+#2 ;<C.Κ+ψzXP[ĚI8^>8?$i4|\AWG=+"i9aXjN4?8:{M]=iїGỦN4f̫"CmXU>M⾩<'埁XTazԯ-bx3gݲm5kiVFS lW̎J4kagu5.5ړeyq:;?xF|̊DkF͒9}yLLf;U΋+taӫ h1i]Tr.b>LIPuN-MF~ qԝIgԡX}}Syخ9O,ȧkC[p_ԗ-37m}|m뗔\'Ũ%\M1WἊm|ݢ3FXsIm\ ZT:RG<+牖`+zp>)VkWqz|¾F|nt^_-̛4fqwpSl7eƬ}ܡ-Yӟ|-Vsbn`W7׳!}9.1gG$̼>3q2:Si׀C뫌- ԣmظGܧQ9m.XzQպ6g/Y3^S-aբw<33O[0G*.;kc^fAj;~W)\S{wLIWik\4yhq/yo -JӖQIa\/h^iL37ն]ë8QAMQ7+2w4%sm;+tVқKp\l 7a_0}N ,[V~?q} {A,V |i<3k7 ٱnK,;֯ OI6y(;-N VnGwM}o} T;_3d \Uœgw=v7A8jf_oa( n=~w7jO%__`Ŕs݈faA L==}|(\ϝ xc$WYyv>#?w9\VQ †WM|M60N|\}ދ֛`p{x<8n )]Į|7w>v$ .Ŝqv?aWV׊Zn0IE Aa턜&fCrwj}|S|~eա'ύyGwjp3EεHyD{1}.;A8+Vz,3?wasJw9{}{v6&n1&"jyK1G 8 oocyz}^49TβYoybӑYgw/aqe+0x.fWOn3x6Ow#pȈaƾ8є\MF&wSK'?3+jyԾyTQ}M_G:(@=bLҒŭMϏk+QwkԿ8Q\srX9u]{I|;%ep)4F>_GקME7Ο>Iy]joո29|¸xq3 IXn(i6'g@;Ĝ@=ܧ Ӫ~/*.ЁSr`\ҮWhDWΰ772{?_!'8>">{ >{V3~Kܳyc #t;gv")ybsj7x8Ի|͎]}g#%9GQ);2P]f$ս[z.JuWE6qNRlr=٪u 8{C޷ k5gkS6Jx#r-MRCгT_LY6b]Ғm4e,aMZ:wѓUu.'ti;Ŝaw`ZDiM0r vOx9y^__ҏV1sk/]E_ Q;SsxL{r2Cwql色A4cﰩj5DkIᲞO˶8(ۧ/EMUV/\L;\Yᰂe7c-^Q}hH2%rS{nTѨ~Um;t`-Qginf*YqwꝂs8o_{t mASx3۔ZտfŎZ{7^ >7{y]~Wp~W>4}/:7C 5k7b*;m{i]l'~fTzoifٻ\Mw;$ESV`ֹb4~e~t8C9pTwԡe<0{lʷ==Sr6Y\XgWʚ玬j ֻgA[WŸ圵agk}t^%\(G=Oa r:[|b#'󬆯Czz'v^kIcT>~Vgo5g8twpS?n$xOuO_tkz7Y^c l¯F`G3E|( z'I-k]=+>oZЦi:K.U}f?EؕW::Ltֺ,Wkj^kHym\,}UtyVǢ<)NxdͬT܏)}ihEmo~sH^~~/džz^rоI6qmF䜓bˎ+8ӯsh's|K;.p.{[5,SxR#=5~˽Sww&~~g(ܔz4qMfcLz?cl:gva{>OzNUϷ̉Uwc&#C>|>s9|>9w[^qqvכaP.׊WVzcr+WY\=wLo9Wx:tDӡC.epiR/data/epi.SClip.RData0000644000176200001440000000241612601641614014571 0ustar liggesusers eVMlE~k{כҦ?mH4ik7M8i+%p;W^FKHTHp8 !TN 8 NB o7M8|ޝ7{73o:cAG3| 2>D݋H1 "1؏A<FD<88D"A@CC 0] gn <- length(tdat) gmean <- exp(mean(log(tdat), na.rm = TRUE)) gsd <- sd(log(tdat), na.rm = TRUE) gse <- gsd / sqrt(gn) gna <- is.na(tdat); gna <- sum(as.numeric(gna)) gq25 <- as.vector(exp(quantile(log(tdat), probs = 0.25, na.rm = TRUE))) gq75 <- as.vector(exp(quantile(log(tdat), probs = 0.75, na.rm = TRUE))) glcl <- exp(mean(log(tdat), na.rm = TRUE) - (tcrit * gse)) gucl <- exp(mean(log(tdat), na.rm = TRUE) + (tcrit * gse)) gmin <- as.vector(exp(min(log(tdat), na.rm = TRUE))) gmax <- as.vector(exp(max(log(tdat), na.rm = TRUE))) # Skewness: x <- dat - mean(dat, na.rm = TRUE) skew <- sqrt(an) * sum(x^3, na.rm = TRUE) / (sum(x^2, na.rm = TRUE)^(3/2)) # Kurtosis: x <- dat - mean(dat, na.rm = TRUE) r <- an * sum(x^4, na.rm = TRUE) / (sum(x^2, na.rm = TRUE)^2) kurt <- ((an + 1) * (r - 3) + 6) * (an - 1)/((an - 2) * (an - 3)) rval <- list( arithmetic = data.frame(n = an, mean = amean, sd = asd, q25 = aq25, q75 = aq75, lower = alcl, upper = aucl, min = amin, max = amax, na = ana), geometric = data.frame(n = gn, mean = gmean, sd = gsd, q25 = gq25, q75 = gq75, lower = glcl, upper = gucl, min = gmin, max = gmax, na = gna), symmetry = data.frame(skewness = skew, kurtosis = kurt) ) return(rval) } epiR/R/epi.clustersize.R0000644000176200001440000000071312601641614014627 0ustar liggesusers"epi.clustersize" <- function(p, b, rho, epsilon.r, conf.level = 0.95){ N. <- 1 - ((1 - conf.level) / 2) z <- qnorm(N., mean = 0, sd = 1) D <- rho * (b - 1) + 1 s <- p * epsilon.r numerator <- p * (1 - p) * D * z^2 denominator <- (s^2 * b) clusters <- round((numerator/denominator) + 1, digits = 0) units <- clusters * b rval <- list(clusters = clusters, units = units, design = D) return(rval) } epiR/R/epi.prev.R0000644000176200001440000002034112601641614013226 0ustar liggesusers"epi.prev" <- function(pos, tested, se, sp, method = "wilson", conf.level = 0.95){ # Apparent prevalence: ap.p <- pos / tested if(method == "c-p") ap.cl = .bin.conf(pos, tested, method = "e", alpha = 1 - conf.level)[2:3] else if (method == "sterne") ap.cl = .sterne.int(pos, tested, alpha = 1 - conf.level) else if (method == "blaker") ap.cl = .blakerci(pos, tested, conf.level) else if (method == "wilson") ap.cl = .bin.conf(pos, tested, method = "w", alpha = 1 - conf.level)[2:3] else stop('Valid methods are "c-p", "sterne", "blaker", or "wilson"') # True prevalence: if(method == "c-p") tp.cl = .bin.conf(pos, tested, method = "e", alpha = 1 - conf.level)[2:3] else if (method == "sterne") tp.cl = .sterne.int(pos, tested, alpha = 1 - conf.level) else if (method == "blaker") tp.cl = .blakerci(pos, tested, conf.level) else if (method == "wilson") tp.cl = .bin.conf(pos, tested, method = "w", alpha = 1 - conf.level)[2:3] else stop('Valid methods are "c-p", "sterne", "blaker", or "wilson"') tp.p <- (ap.p + sp - 1) / (se + sp - 1) tp.p[tp.p < 0] <- 0 tp.p[tp.p > 1] <- 1 adj.cl <- (tp.cl + sp - 1) / (se + sp - 1) adj.cl <- pmax(adj.cl, c(0, 0)) adj.cl <- pmin(adj.cl, c(1, 1)) result.01 <- data.frame(est = ap.p, lower = ap.cl[1], upper = ap.cl[2]) result.02 <- data.frame(est = tp.p, lower = adj.cl[1], upper = adj.cl[2]) rval <- list(ap = result.01, tp = result.02) return(rval) } # library(Hmisc) # ----------------------------------- # Blaker's interval (by Helge Blaker). Computes the Blaker exact CI (Canadian J. Stat 2000) for a binomial success probability for x successes out of n trials with confidence coefficient = conf.level. # uses acceptbin function. .blakerci <- function(x, n, conf.level, tolerance = 1e-04){ lower = 0 upper = 1 if (x != 0){lower = qbeta((1 - conf.level) / 2, x, n - x + 1) while (.acceptbin(x, n, lower + tolerance) < (1 - conf.level)) lower = lower + tolerance } if (x != n){upper = qbeta(1 - (1 - conf.level) / 2, x + 1, n - x) while (.acceptbin(x, n, upper - tolerance) < (1 - conf.level)) upper = upper - tolerance } c(lower, upper) } .acceptbin = function(x, n, p){ # Computes the Blaker acceptability of p when x is observed and X is bin(n, p) p1 = 1 - pbinom(x - 1, n, p) p2 = pbinom(x, n, p) a1 = p1 + pbinom(qbinom(p1, n, p) - 1, n, p) a2 = p2 + 1 - pbinom(qbinom(1 - p2, n, p), n, p) return(min(a1,a2)) } # ----------------------------------- # Exact confidence intervals # ----------------------------------- .sterne.int <- function(x, n, alpha = 0.05, del = 10^-5){ logit <- function(p){log(p / (1 - p))} invlogit <- function(y){exp(y) / (1 + exp(y))} theta <- function(k, x, n){(lchoose(n, x) - lchoose(n, k)) / (k - x)} Feta <- function(x, eta){pbinom(x, n, invlogit(eta))} # The function piXeta(x, eta) automatically accounts for the fact that if k_alpha(X) = min(J) then a_alpha^st(X) = a_alpha(X) .piXeta <- function(x, eta){ if (invlogit(eta) >= 1){f <- 0} else { J <- c(0:(x - 1),(x + 1):n) # on (-infinity, theta_0] t1 <- theta(0, x, n) if (is.na(t1) != 1 && eta <= t1){f <- 1 - Feta(x - 1, eta)} # on [theta_0,mode] k1 <- J[J < (x - 1)] if (length(k1) > 0){ the1 <- theta(k1, x, n) the2 <- theta(k1 + 1, x, n) pos <- (the1 <= eta) * (eta < the2) if (sum(pos) > 0){f <- 1 - Feta(x - 1, eta) + Feta(max(k1 * pos), eta)} } # mode the1 <- theta(x - 1, x, n) the2 <- theta(x + 1, x, n) if (eta >= the1 && eta <= the2){f <- 1} } # on [mode,theta_n] k2 <- J[J > (x + 1)] if (length(k2) > 0){ the1 <- theta(k2 - 1, x, n) the2 <- theta(k2, x, n) kre <- sum(k2 * (the1 < eta) * (eta <= the2)) if (kre > 0){ f <- 1 - Feta(kre - 1, eta) + Feta(x, eta)} } # on [theta_n,infty) t2 <- theta(n, x, n) if (is.na(t2) != 1 && eta >= t2){f <- Feta(x, eta)} f} # Lower bound a_alpha^st(X) if (x ==0 ){pu <- 0} else { J <- c(0:(x - 1), (x + 1):n) k1 <- min(J) pi1 <- .piXeta(x, theta(k1, x, n)) # Calculation of k_alpha(X) if (pi1 >= alpha){kal <- k1} else { k <- x-1 while (k1 < k - 1){ k2 <- floor((k + k1) / 2) pi2 <- .piXeta(x, theta(k2, x, n)) if (pi2 >= alpha){k <- k2} else {k1 <- k2} } kal <- k } # Calculation of a_alpha^st(X): b1 <- theta(kal, x, n) pi1 <- 1 - Feta(x - 1, b1) + Feta(kal - 1, b1) if (pi1 <= alpha){b <- b1} else { b <- max(theta(kal - 1, x, n),logit(del)) pi <- 1 - Feta(x - 1, b) + Feta(kal - 1, b) while (b1 - b > del || pi1 - pi > del){ b2 <- (b + b1) / 2 pi2 <- 1 - Feta(x - 1, b2) + Feta(kal - 1, b2) if (pi2 > alpha){ b1 <- b2 pi1 <- pi2} else { b <- b2 pi <- pi2}}} pu <- invlogit(b)} # Upper bound b_alpha^st(X): if (x == n){po <- 1} else { J <- c(0:(x - 1),(x + 1):n) k1 <- max(J) pi1 <- .piXeta(x, theta(k1, x, n)) # Calculation of k_alpha(X): if (pi1 >= alpha){kau <- k1} else { k <- x + 1 pi <- 1 while (k1 > k + 1){ k2 <- floor((k + k1) / 2) pi2 <- .piXeta(x, theta(k2, x, n)) if (pi2 >= alpha){k <- k2} else {k1 <- k2} } kau <- k } # Calculation of b_alpha^st(X): b1 <- theta(kau, x, n) pi1 <- 1 - Feta(kau, b1) + Feta(x, b1) if (pi1 <= alpha){ b <- b1 po <- pi1} else { b <- min(theta(kau + 1, x, n), b1 + n) pi <- 1 - Feta(kau, b) + Feta(x, b) while (b - b1 > del || pi1 - pi > del){ b2 <- (b + b1) / 2 pi2 <- 1 - Feta(kau, b2) + Feta(x, b2) if (pi2 > alpha){ b1 <- b2 pi1 <- pi2} else { b <- b2 pi <- pi2}}} po <- invlogit(b)} # c("a_alpha^St" = pu, "b_alpha^St" = po) c(pu, po) } .bin.conf <- function (x, n, alpha = 0.05, method = c("wilson", "exact", "asymptotic", "all"), include.x = FALSE, include.n = FALSE, return.df = FALSE){ method <- match.arg(method) bc <- function(x, n, alpha, method) { nu1 <- 2 * (n - x + 1) nu2 <- 2 * x ll <- if (x > 0) x/(x + qf(1 - alpha/2, nu1, nu2) * (n - x + 1)) else 0 nu1p <- nu2 + 2 nu2p <- nu1 - 2 pp <- if (x < n) qf(1 - alpha/2, nu1p, nu2p) else 1 ul <- ((x + 1) * pp)/(n - x + (x + 1) * pp) zcrit <- -qnorm(alpha/2) z2 <- zcrit * zcrit p <- x/n cl <- (p + z2/2/n + c(-1, 1) * zcrit * sqrt((p * (1 - p) + z2/4/n)/n))/(1 + z2/n) if (x == 1) cl[1] <- -log(1 - alpha)/n if (x == (n - 1)) cl[2] <- 1 + log(1 - alpha)/n asymp.lcl <- x/n - qnorm(1 - alpha/2) * sqrt(((x/n) * (1 - x/n))/n) asymp.ucl <- x/n + qnorm(1 - alpha/2) * sqrt(((x/n) * (1 - x/n))/n) res <- rbind(c(ll, ul), cl, c(asymp.lcl, asymp.ucl)) res <- cbind(rep(x/n, 3), res) switch(method, wilson = res[2, ], exact = res[1, ], asymptotic = res[3, ], all = res, res) } if ((length(x) != length(n)) & length(x) == 1) x <- rep(x, length(n)) if ((length(x) != length(n)) & length(n) == 1) n <- rep(n, length(x)) if ((length(x) > 1 | length(n) > 1) & method == "all") { method <- "wilson" warning("method = all will not work with vectors ... setting method to wilson") } if (method == "all" & length(x) == 1 & length(n) == 1) { mat <- bc(x, n, alpha, method) dimnames(mat) <- list(c("Exact", "Wilson", "Asymptotic"), c("PointEst", "Lower", "Upper")) if (include.n) mat <- cbind(N = n, mat) if (include.x) mat <- cbind(X = x, mat) if (return.df) mat <- as.data.frame(mat) return(mat) } mat <- matrix(ncol = 3, nrow = length(x)) for (i in 1:length(x)) mat[i, ] <- bc(x[i], n[i], alpha = alpha, method = method) dimnames(mat) <- list(rep("", dim(mat)[1]), c("PointEst", "Lower", "Upper")) if (include.n) mat <- cbind(N = n, mat) if (include.x) mat <- cbind(X = x, mat) if (return.df) mat <- as.data.frame(mat, row.names = NULL) mat } epiR/R/epi.bohning.R0000644000176200001440000000073212601641614013700 0ustar liggesusers"epi.bohning" <- function(obs, exp, alpha = 0.05){ J <- length(obs) smr <- obs / exp smr.bar <- sum(smr) / J # Bohning's test: top <- (1 / (J - 1)) * sum(((obs - (smr.bar * exp))^2) / exp) - smr.bar bottom <- sqrt((2 * smr.bar) / (J - 1)) bohning <- top / bottom p <- 1 - pnorm(q = bohning, mean = 0, sd = 1) # Results: rval <- as.data.frame(cbind(test.statistic = bohning, p.value = p)) return(rval) }epiR/R/epi.popsize.R0000644000176200001440000000330112601641614013740 0ustar liggesusers"epi.popsize" <- function (T1, T2, T12, conf.level = 0.95, verbose = FALSE) { N. <- c(((1 - conf.level) / 2), 1 - ((1 - conf.level) / 2)) z <- qnorm(N., mean = 0, sd = 1)[2] lower <- "lower" upper <- "upper" N <- T1 * (T2 / T12) p <- T1 / N fcf <- sqrt(1 - (T2 / N)) width <- z * sqrt(((p * (1 - p)) / T2) * (1 - T2 / N)) + (1 / (2 * N)) low.p <- p - width up.p <- p + width low.N <- round((T1 / up.p), digits = 0) up.N <- round(ceiling(T1 / low.p), digits = 0) # New tests first round = T1 # New tests second round = T2 - T12 total.test <- T1 + (T2 - T12) untest <- N - total.test low.untest <- ifelse(low.N - total.test < 0, 0, low.N - total.test) up.untest <- ifelse(up.N - total.test < 0, 0, up.N - total.test) population <- as.data.frame(cbind(round(N, digits = 0), low.N, up.N)) names(population) <- c("est", lower, upper) untested <- as.data.frame(cbind(round(untest, digits = 0), low.untest, up.untest)) names(untested) <- c("est", lower, upper) rval <- list(population = population, untested = untested) if(verbose == TRUE){ return(rval) } else if(verbose == FALSE){ line1 <- paste("Estimated population size: ", round(N, digits = 0), " (", (conf.level * 100), "% CI ", low.N, " - ", up.N, ")", sep = "") line2 <- paste("Estimated number of untested subjects: ", round(untest, digits = 0), " (", (conf.level * 100), "% CI ", low.untest, " - ", up.untest, ")", sep = "") cat("\n", line1) cat("\n", line2, "\n") } } epiR/R/epi.kappa.R0000644000176200001440000001341012601641614013345 0ustar liggesusers"epi.kappa" <- function(dat, method = "fleiss", alternative = c("two.sided", "less", "greater"), conf.level = 0.95){ N. <- 1 - ((1 - conf.level) / 2) z <- qnorm(N., mean = 0, sd = 1) lower <- "lower" upper <- "upper" n <- sum(dat) # Function to return confidene intervals for a proportion: .funincrisk <- function(ndat, nconf.level){ ## Exact binomial confidence limits from D. Collett (1999) Modelling binary data. Chapman & Hall/CRC, Boca Raton Florida, p. 24. N. <- 1 - ((1 - nconf.level) / 2) a <- ndat[,1] n <- ndat[,2] b <- n - a p <- a / n a. <- ifelse(a == 0, a + 1, a); b. <- ifelse(b == 0, b + 1, b) low <- a. /(a. + (b. + 1) * (1 / qf(1 - N., 2 * a., 2 * b. + 2))) up <- (a. + 1) / (a. + 1 + b. / (1 / qf(1 - N., 2 * b., 2 * a. + 2))) low <- ifelse(a == 0, 0, low) up <- ifelse(a == n, 1, up) rval <- data.frame(p, low, up) names(rval) <- c("est", "lower", "upper") rval } if(method == "fleiss"){ # Turn cell frequencies into proportions: x ndat <- dat / n # Overall proportion of observed agreement, pO # pO <- sum(diag(ndat)) tmp <- .funincrisk(ndat = as.matrix(cbind((dat[1,1] + dat[2,2]), sum(dat))), nconf.level = conf.level) pO.p <- as.numeric(tmp[,1]) pO.l <- as.numeric(tmp[,2]) pO.u <- as.numeric(tmp[,3]) # Overall proportion of chance-expected agreement, pE r.totals <- apply(ndat, MARGIN = 1, FUN = sum) c.totals <- apply(ndat, MARGIN = 2, FUN = sum) pE.p <- sum(r.totals * c.totals) # Overall kappa (Equation 18.12 in Fleiss): kappa.p <- (pO.p - pE.p) / (1 - pE.p) # Standard error of kappa (Equation 18.13 in Fleiss): tmp.1 <- 1 / ((1 - pE.p) * sqrt(n)) tmp.2 <- sqrt(pE.p + pE.p^2 - sum((r.totals * c.totals) * (r.totals + c.totals))) se.kappa <- tmp.1 * tmp.2 kappa.l <- kappa.p - (z * se.kappa) kappa.u <- kappa.p + (z * se.kappa) } if(method == "watson"){ # Overall proportion of observed agreement, pO tmp <- .funincrisk(ndat = as.matrix(cbind((dat[1,1] + dat[2,2]), sum(dat))), nconf.level = conf.level) pO.p <- as.numeric(tmp[,1]) pO.l <- as.numeric(tmp[,2]) pO.u <- as.numeric(tmp[,3]) # Expected proportion of agreement, pE: r.totals <- apply(dat, MARGIN = 1, FUN = sum) c.totals <- apply(dat, MARGIN = 2, FUN = sum) pE.p <- sum(r.totals * c.totals) / n^2 # Overall kappa (Equation 18.12 in Fleiss): kappa.p <- (pO.p - pE.p) / (1 - pE.p) # Standard error of kappa (page 1170 of Watson and Petrie 2010): se.kappa <- sqrt((pO.p * (1- pO.p)) / (n * (1 - pE.p)^2)) kappa.l <- kappa.p - (z * se.kappa) kappa.u <- kappa.p + (z * se.kappa) } if(method == "altman"){ # Overall proportion of observed agreement, pO n <- sum(dat) tmp <- .funincrisk(ndat = as.matrix(cbind((dat[1,1] + dat[2,2]), sum(dat))), nconf.level = conf.level) pO.p <- as.numeric(tmp[,1]) pO.l <- as.numeric(tmp[,2]) pO.u <- as.numeric(tmp[,3]) # Overall proportion of chance-expected agreement, pE r.totals <- apply(dat, MARGIN = 1, FUN = sum) c.totals <- apply(dat, MARGIN = 2, FUN = sum) pE.p <- sum(r.totals * c.totals) / n^2 kappa.p <- (pO.p - pE.p) / (1 - pE.p) se.kappa <- sqrt((pO.p * (1 - pO.p)) / (n * (1 - pE.p)^2)) kappa.l <- kappa.p - (z * se.kappa) kappa.u <- kappa.p + (z * se.kappa) } # Bias index, the difference in proportions of 'yes' for the two raters (after Byrt et al. 1993, added 010814). # The Bias index is equal to zero if and only if the marginal proportions are equal. # BI = (a + b)/N - (a + c)/N # Confidence interval calculation same as that used for attributable risk (Rothman p 135 equation 7-2). a <- dat[1,1] + dat[1,2] c <- dat[1,1] + dat[2,1] bi.p <- ((a / n) - (c / n)) bi.se <- (sqrt(((a * (n - a))/n^3) + ((c * (n - c))/n^3))) bi.l <- (bi.p - (z * bi.se)) bi.u <- (bi.p + (z * bi.se)) # Prevalence index, the difference between the probability of 'Yes' and the probability of 'No' (after Byrt et al. 1993, added 010814). # PI = (a / N) - (d / N) # Confidence interval calculation same as that used for attributable risk (Rothman p 135 equation 7-2). a <- dat[1,1] d <- dat[2,2] pi.p <- ((a / n) - (d / n)) pi.se <- (sqrt(((a * (n - a))/n^3) + ((d * (n - d))/n^3))) pi.l <- (pi.p - (z * pi.se)) pi.u <- (pi.p + (z * pi.se)) # Population adjusted, bias corrected kappa (after Byrt et al. 1993, added 010814): pabak.p <- 2 * pO.p - 1 pabak.l <- 2 * pO.l - 1 pabak.u <- 2 * pO.u - 1 # Test of effect (Equation 18.14 in Fleiss). Code for p-value taken from z.test function in TeachingDemos package: effect.z <- kappa.p / se.kappa alternative <- match.arg(alternative) p.effect <- switch(alternative, two.sided = 2 * pnorm(abs(effect.z), lower.tail = FALSE), less = pnorm(effect.z), greater = pnorm(effect.z, lower.tail = FALSE)) # McNemar's test (Dohoo, Martin, Stryhn): mcnemar <- (dat[1,2] - dat[2,1])^2 / (dat[1,2] + dat[2,1]) p.chi2 <- 1 - pchisq(mcnemar, df = 1) # Results: prop.agree <- data.frame(obs = pO.p, exp = pE.p) pindex <- data.frame(est = pi.p, se = pi.se, lower = pi.l, upper = pi.u) bindex <- data.frame(est = bi.p, se = bi.se, lower = bi.l, upper = bi.u) pabak <- data.frame(est = pabak.p, lower = pabak.l, upper = pabak.u) kappa <- data.frame(est = kappa.p, se = se.kappa, lower = kappa.l, upper = kappa.u) z <- data.frame(test.statistic = effect.z, p.value = p.effect) mcnemar <- data.frame(test.statistic = mcnemar, df = 1, p.value = p.chi2) rval <- list(prop.agree = prop.agree, pindex = pindex, bindex = bindex, pabak = pabak, kappa = kappa, z = z, mcnemar = mcnemar) return(rval) } epiR/R/epi.dsl.R0000644000176200001440000002171212601641614013037 0ustar liggesusers"epi.dsl" <- function(ev.trt, n.trt, ev.ctrl, n.ctrl, names, method = "odds.ratio", alternative = c("two.sided", "less", "greater"), conf.level = 0.95) { # Declarations: k <- length(names) a.i <- ev.trt b.i <- n.trt - ev.trt c.i <- ev.ctrl d.i <- n.ctrl - ev.ctrl N <- 1 - ((1 - conf.level) / 2) z <- qnorm(N, mean = 0, sd = 1) # Test each strata for zero values. Add 0.5 to all cells if any cell has a zero value: for(i in 1:k){ if(a.i[i] < 1 | b.i[i] < 1 | c.i[i] < 1 | d.i[i] < 1){ a.i[i] <- a.i[i] + 0.5; b.i[i] <- b.i[i] + 0.5; c.i[i] <- c.i[i] + 0.5; d.i[i] <- d.i[i] + 0.5 } } n.1i <- a.i + b.i n.2i <- c.i + d.i N.i <- a.i + b.i + c.i + d.i # For summary odds ratio: R <- sum((a.i * d.i) / N.i) S <- sum((b.i * c.i) / N.i) E <- sum(((a.i + d.i) * a.i * d.i) / N.i^2) F. <- sum(((a.i + d.i) * b.i * c.i) / N.i^2) G <- sum(((b.i + c.i) * a.i * d.i) / N.i^2) H <- sum(((b.i + c.i) * b.i * c.i) / N.i^2) P <- sum(((n.1i * n.2i * (a.i + c.i)) - (a.i * c.i * N.i)) / N.i^2) # For summary risk ratio: R. <- sum((a.i * n.2i) / N.i) S. <- sum((c.i * n.1i) / N.i) # Individual study odds ratios: if(method == "odds.ratio") {OR.i <- (a.i * d.i) / (b.i * c.i) lnOR.i <- log(OR.i) SE.lnOR.i <- sqrt(1/a.i + 1/b.i + 1/c.i + 1/d.i) SE.OR.i <- exp(SE.lnOR.i) lower.lnOR.i <- lnOR.i - (z * SE.lnOR.i) upper.lnOR.i <- lnOR.i + (z * SE.lnOR.i) lower.OR.i <- exp(lower.lnOR.i) upper.OR.i <- exp(upper.lnOR.i) # Weights: w.i <- (b.i * c.i) / N.i w.iv.i <- 1 / (SE.lnOR.i)^2 # MH pooled odds ratios (relative effect measures combined in their natural scale): OR.mh <- sum(w.iv.i * OR.i)/sum(w.iv.i) lnOR.mh <- log(OR.mh) SE.lnOR.mh <- sqrt(1/2 * ((E/R^2) + ((F. + G)/(R * S)) + (H/S^2))) # DSL pooled odds ratios: Q <- sum(w.iv.i * (lnOR.i - lnOR.mh)^2) df <- k - 1 p.heterogeneity <- 1 - pchisq(Q, df) tau.sq.upper <- Q - df tau.sq.lower <- sum(w.iv.i) - (sum((w.iv.i)^2) / sum(w.iv.i)) # If Q is less than (k - 1) tau.sq equals zero: tau.sq <- ifelse(Q < (k - 1), 0, (tau.sq.upper / tau.sq.lower)) w.dsl.i <- 1 / (((SE.lnOR.i)^2) + tau.sq) lnOR.dsl <- sum(w.dsl.i * lnOR.i) / sum(w.dsl.i) OR.dsl <- exp(lnOR.dsl) SE.lnOR.dsl <- 1 / sqrt(sum(w.dsl.i)) SE.OR.dsl <- exp(SE.lnOR.dsl) lower.lnOR.dsl <- log(OR.dsl) - (z * SE.lnOR.dsl) upper.lnOR.dsl <- log(OR.dsl) + (z * SE.lnOR.dsl) lower.OR.dsl <- exp(lower.lnOR.dsl) upper.OR.dsl <- exp(upper.lnOR.dsl) # Higgins and Thompson (2002) H^2 and I^2 statistic: Hsq <- Q / (k - 1) lnHsq <- log(Hsq) if(Q > k) { lnHsq.se <- (1 * log(Q) - log(k - 1)) / (2 * sqrt(2 * Q) - sqrt((2 * (k - 3)))) } if(Q <= k) { lnHsq.se <- sqrt((1/(2 * (k - 2))) * (1 - (1 / (3 * (k - 2)^2)))) } lnHsq.l <- lnHsq - (z * lnHsq.se) lnHsq.u <- lnHsq + (z * lnHsq.se) Hsq.l <- exp(lnHsq.l) Hsq.u <- exp(lnHsq.u) Isq <- ((Hsq - 1) / Hsq) * 100 Isq.l <- ((Hsq.l - 1) / Hsq.l) * 100 Isq.u <- ((Hsq.u - 1) / Hsq.u) * 100 # Test of effect. Code for p-value taken from z.test function in TeachingDemos package: effect.z <- log(OR.dsl) / SE.lnOR.dsl alternative <- match.arg(alternative) p.effect <- switch(alternative, two.sided = 2 * pnorm(abs(effect.z), lower.tail = FALSE), less = pnorm(effect.z), greater = pnorm(effect.z, lower.tail = FALSE)) # Results: OR <- as.data.frame(cbind(OR.i, SE.OR.i, lower.OR.i, upper.OR.i)) names(OR) <- c("est", "se", "lower", "upper") OR.summary <- as.data.frame(cbind(OR.dsl, SE.OR.dsl, lower.OR.dsl, upper.OR.dsl)) names(OR.summary) <- c("est", "se", "lower", "upper") weights <- as.data.frame(cbind(w.iv.i, w.dsl.i)) names(weights) <- c("inv.var", "dsl") Hsq <- as.data.frame(cbind(Hsq, Hsq.l, Hsq.u)) names(Hsq) <- c("est", "lower", "upper") Isq <- as.data.frame(cbind(Isq, Isq.l, Isq.u)) names(Isq) <- c("est", "lower", "upper") rval <- list(OR = OR, OR.summary = OR.summary, weights = weights, heterogeneity = c(Q = Q, df = df, p.value = p.heterogeneity), Hsq = Hsq, Isq = Isq, tau.sq = tau.sq, effect = c(z = effect.z, p.value = p.effect)) return(rval) } else if(method == "risk.ratio") {RR.i <- (a.i / n.1i) / (c.i / n.2i) lnRR.i <- log(RR.i) SE.lnRR.i <- sqrt(1/a.i + 1/c.i - 1/n.1i - 1/n.2i) SE.RR.i <- exp(SE.lnRR.i) lower.lnRR.i <- lnRR.i - (z * SE.lnRR.i) upper.lnRR.i <- lnRR.i + (z * SE.lnRR.i) lower.RR.i <- exp(lower.lnRR.i) upper.RR.i <- exp(upper.lnRR.i) # Weights: w.i <- (b.i * c.i) / N.i w.iv.i <- 1 / (SE.lnRR.i)^2 # MH pooled risk ratios (relative effect measures combined in their natural scale): RR.mh <- sum(w.i * RR.i) / sum(w.i) lnRR.mh <- log(RR.mh) SE.lnRR.mh <- sqrt(P / (R. * S.)) SE.RR.mh <- exp(SE.lnRR.mh) lower.lnRR.mh <- log(RR.mh) - (z * SE.lnRR.mh) upper.lnRR.mh <- log(RR.mh) + (z * SE.lnRR.mh) lower.RR.mh <- exp(lower.lnRR.mh) upper.RR.mh <- exp(upper.lnRR.mh) # DSL pooled risk ratios: Q <- sum(w.iv.i * (lnRR.i - lnRR.mh)^2) df <- k - 1 p.heterogeneity <- 1 - pchisq(Q, df) tau.sq.upper <- Q - df tau.sq.lower <- sum(w.iv.i) - (sum((w.iv.i)^2) / sum(w.iv.i)) # If Q is less than (k - 1) tau.sq equals zero: tau.sq <- ifelse(Q < (k - 1), 0, (tau.sq.upper / tau.sq.lower)) w.dsl.i <- 1 / (((SE.lnRR.i)^2) + tau.sq) lnRR.dsl <- sum(w.dsl.i * lnRR.i) / sum(w.dsl.i) RR.dsl <- exp(lnRR.dsl) SE.lnRR.dsl <- 1 / sqrt(sum(w.dsl.i)) SE.RR.dsl <- exp(SE.lnRR.dsl) lower.lnRR.dsl <- log(RR.dsl) - (z * SE.lnRR.dsl) upper.lnRR.dsl <- log(RR.dsl) + (z * SE.lnRR.dsl) lower.RR.dsl <- exp(lower.lnRR.dsl) upper.RR.dsl <- exp(upper.lnRR.dsl) # Higgins and Thompson (2002) H^2 and I^2 statistic: Hsq <- Q / (k - 1) lnHsq <- log(Hsq) if(Q > k) { lnHsq.se <- (1 * log(Q) - log(k - 1)) / (2 * sqrt(2 * Q) - sqrt((2 * (k - 3)))) } if(Q <= k) { lnHsq.se <- sqrt((1/(2 * (k - 2))) * (1 - (1 / (3 * (k - 2)^2)))) } lnHsq.l <- lnHsq - (z * lnHsq.se) lnHsq.u <- lnHsq + (z * lnHsq.se) Hsq.l <- exp(lnHsq.l) Hsq.u <- exp(lnHsq.u) Isq <- ((Hsq - 1) / Hsq) * 100 Isq.l <- ((Hsq.l - 1) / Hsq.l) * 100 Isq.u <- ((Hsq.u - 1) / Hsq.u) * 100 # Test of effect. Code for p-value taken from z.test function in TeachingDemos package: effect.z <- log(RR.dsl) / SE.lnRR.dsl alternative <- match.arg(alternative) p.effect <- switch(alternative, two.sided = 2 * pnorm(abs(effect.z), lower.tail = FALSE), less = pnorm(effect.z), greater = pnorm(effect.z, lower.tail = FALSE)) # Results: RR <- as.data.frame(cbind(RR.i, SE.RR.i, lower.RR.i, upper.RR.i)) names(RR) <- c("est", "se", "lower", "upper") RR.summary <- as.data.frame(cbind(RR.dsl, SE.RR.dsl, lower.RR.dsl, upper.RR.dsl)) names(RR.summary) <- c("est", "se", "lower", "upper") weights <- as.data.frame(cbind(w.iv.i, w.dsl.i)) names(weights) <- c("inv.var", "dsl") Hsq <- as.data.frame(cbind(Hsq, Hsq.l, Hsq.u)) names(Hsq) <- c("est", "lower", "upper") Isq <- as.data.frame(cbind(Isq, Isq.l, Isq.u)) names(Isq) <- c("est", "lower", "upper") rval <- list(RR = RR, RR.summary = RR.summary, weights = weights, heterogeneity = c(Q = Q, df = df, p.value = p.heterogeneity), Hsq = Hsq, Isq = Isq, tau.sq = tau.sq, effect = c(z = effect.z, p.value = p.effect)) return(rval) } } epiR/R/epi.convgrid.R0000644000176200001440000000411112601641614014062 0ustar liggesusers # 070605: MS modified to insert NA when valid OS reference could not be found. ".selectstring" <- function (text, first, last = 1e+06){ storage.mode(text) <- "character" n <- max(lt <- length(text), length(first), length(last)) if (lt < n) text <- rep(text, length = n) substr(text, first, last) } "epi.convgrid" <- function(os.refs){ name <- c("SW","SX","SY","SZ","TV","SV","SQ","SR","SS","ST","SU","TQ","TR","SM","SN","SO","SP","TL","TM","NQ","NL","NF","NA","HV","HQ","HL","SG","SB","NW", "NR","NM","NG","NB","HW","HR","HM","SH","SC","NX","NS","NN","NH","NC","HX","HS","HN","SJ","SD","NY","NT","NO","NJ","ND","HY","HT","HO","SK","SE", "NZ","NU","NP","NK","NE","HZ","HU","HP","TF","TA","OV","OQ","OL","OF","OA","JV","JQ","JL","TG","TB","OW") easting <- c(100,200,300,400,500,0,0,100,200,300,400,500,600,100,200,300,400,500,600,0,0,0, 0,0,0,0,100,100,100,100,100,100,100,100,100,100,200,200,200,200,200,200,200,200, 200,200,300,300,300,300,300,300,300,300,300,300,400,400,400,400,400,400,400,400,400,400, 500,500,500,500,500,500,500,500,500,500,600,600,600) northing <- c(0,0,0,0,0,0,100,100,100,100,100,100,100,200,200,200,200, 200,200,600,700,800,900,1000,1100,1200,300,400,500,600,700,800,900,1000, 1100,1200,300,400,500,600,700,800,900,1000,1100,1200,300,400,500,600,700, 800,900,1000,1100,1200,300,400,500,600,700,800,900,1000,1100,1200,300,400, 500,600,700,800,900,1000,1100,1200,300,400,500) cells <- as.data.frame(cbind(easting, northing)) cells <- as.data.frame(cbind(name, cells)) x.coord = 0 y.coord = 0 res <- as.matrix(cbind(x.coord, y.coord)) for(i in 1:length(os.refs)){ grid <- os.refs[i] grid <- .selectstring(grid, 1, 2) coords <- os.refs[i] easting <- as.numeric(.selectstring(coords, 3, 5)) * 100 northing <- as.numeric(.selectstring(coords, 6, 8)) * 100 id <- cells$name == grid tmp <- cells[id, 1:3] tmp <- cbind(((tmp$easting * 1000) + easting), ((tmp$northing * 1000) + northing)) if(dim(tmp)[1] == 0) tmp <- matrix(c(NA, NA), nrow = 1) rval <- rbind(res, tmp) } rval <- rval[-1,] return(rval) } epiR/R/epi.indirectadj.R0000644000176200001440000000664512601641614014545 0ustar liggesusers"epi.indirectadj" <- function(obs, pop, std, units, conf.level = 0.95){ # How many strata (rows) are there? n.strata <- dim(pop)[1] # How many covariates are there? n.cov <- dim(pop)[2] N <- 1 - ((1 - conf.level) / 2) alpha <- 1 - conf.level z <- qnorm(N, mean = 0, sd = 1) tmp <- data.frame(strata = rep(rownames(pop), times = n.cov), cov = rep(colnames(pop), each = n.strata), pop = as.vector(pop), std = rep(as.vector(std[1:n.cov]), each = n.strata)) # Expected events (equals std incidence multiplied by population size): tmp$exp <- (tmp$pop * tmp$std) # tmp <- tmp[order(tmp$strata, tmp$cov),] # Crude risk by strata: # Turn 'obs' into a table object so calculations can easily be done by strata: t.obs <- by(data = obs, INDICES = rownames(obs), FUN = sum) t.exp <- by(data = tmp$exp, INDICES = tmp$strata, FUN = sum) t.pop <- by(data = tmp$pop, INDICES = tmp$strata, FUN = sum) # Confidence interval for crude incidence risk estimates corrected following email from Gillian Raab: crude.p <- t.obs / t.pop # crude.se <- crude.p / sqrt(t.pop) ## Incorrect. crude.se <- crude.p / sqrt(t.obs) ## replaced pop by obs crude.l <- qchisq(alpha / 2, 2 * t.obs) / 2 / t.pop ## next 2 lines changed crude.u <- qchisq(1 - alpha / 2, 2 *(t.obs + 1)) / 2 / t.pop crude.strata <- data.frame(est = as.vector(crude.p) * units, lower = as.vector(crude.l) * units, upper = as.vector(crude.u) * units) rownames(crude.strata) <- names(t.exp) # Indirectly adjusted risk for each strata (see page 378 of Stata manual): t.obs <- by(data = obs, INDICES = rownames(obs), FUN = sum) t.exp <- by(data = tmp$exp, INDICES = tmp$strata, FUN = sum) t.pop <- by(data = tmp$pop, INDICES = tmp$strata, FUN = sum) if(n.cov > 1){ adj.p <- (std[n.cov + 1] * (t.obs / t.exp)) adj.l <- (std[n.cov + 1] * (qpois((1 - conf.level) / 2, lambda = t.obs, log.p = FALSE) / t.exp)) adj.u <- (std[n.cov + 1] * (qpois(1 - (1 - conf.level) / 2, lambda = t.obs, log.p = FALSE) / t.exp)) adj.strata <- data.frame(est = as.vector(adj.p) * units, lower = as.vector(adj.l) * units, upper = as.vector(adj.u) * units) rownames(adj.strata) <- names(t.exp) } if(n.cov == 1){ adj.p <- (std * (t.obs / t.exp)) adj.l <- (std * (qpois((1 - conf.level) / 2, lambda = t.obs, log.p = FALSE) / t.exp)) adj.u <- (std * (qpois(1 - (1 - conf.level) / 2, lambda = t.obs, log.p = FALSE) / t.exp)) adj.strata <- data.frame(est = as.vector(adj.p) * units, lower = as.vector(adj.l) * units, upper = as.vector(adj.u) * units) rownames(adj.strata) <- names(t.exp) } # Crude standardised mortality ratio (confidence intervals based on Breslow and Day 1987 p 69-71): smr.p <- t.obs / t.exp smr.l <- qpois((1 - conf.level) / 2, lambda = t.obs, log.p = FALSE) / t.exp smr.u <- qpois(1 - (1 - conf.level) / 2, lambda = t.obs, log.p = FALSE) / t.exp smr.strata <- data.frame(obs = as.vector(t.obs), exp = as.vector(t.exp), est = as.vector(smr.p), lower = as.vector(smr.l), upper = as.vector(smr.u)) rownames(smr.strata) <- names(t.exp) rval <- list(crude.strata = crude.strata, adj.strata = adj.strata, smr.strata = smr.strata) return(rval) }epiR/R/epi.mh.R0000644000176200001440000002133212601641614012657 0ustar liggesusers"epi.mh" <- function(ev.trt, n.trt, ev.ctrl, n.ctrl, names, method = "odds.ratio", alternative = c("two.sided", "less", "greater"), conf.level = 0.95) { # Declarations: k <- length(names) a.i <- ev.trt b.i <- n.trt - ev.trt c.i <- ev.ctrl d.i <- n.ctrl - ev.ctrl N. <- 1 - ((1 - conf.level) / 2) z <- qnorm(N., mean = 0, sd = 1) # Test each strata for zero values. Add 0.5 to all cells if any cell has a zero value: for(i in 1:k){ if(a.i[i] < 1 | b.i[i] < 1 | c.i[i] < 1 | d.i[i] < 1){ a.i[i] <- a.i[i] + 0.5; b.i[i] <- b.i[i] + 0.5; c.i[i] <- c.i[i] + 0.5; d.i[i] <- d.i[i] + 0.5 } } n.1i <- a.i + b.i n.2i <- c.i + d.i N.i <- a.i + b.i + c.i + d.i # For summary odds ratio: R <- sum((a.i * d.i) / N.i) S <- sum((b.i * c.i) / N.i) E <- sum(((a.i + d.i) * a.i * d.i) / N.i^2) F. <- sum(((a.i + d.i) * b.i * c.i) / N.i^2) G <- sum(((b.i + c.i) * a.i * d.i) / N.i^2) H <- sum(((b.i + c.i) * b.i * c.i) / N.i^2) P <- sum(((n.1i * n.2i * (a.i + c.i)) - (a.i * c.i * N.i)) / N.i^2) # For summary risk ratio: R. <- sum((a.i * d.i) / N.i) S. <- sum((b.i * c.i) / N.i) if(method == "odds.ratio"){ # Individual study odds ratios: OR.i <- (a.i * d.i) / (b.i * c.i) lnOR.i <- log(OR.i) SE.lnOR.i <- sqrt(1/a.i + 1/b.i + 1/c.i + 1/d.i) SE.OR.i <- exp(SE.lnOR.i) lower.lnOR.i <- lnOR.i - (z * SE.lnOR.i) upper.lnOR.i <- lnOR.i + (z * SE.lnOR.i) lower.OR.i <- exp(lower.lnOR.i) upper.OR.i <- exp(upper.lnOR.i) # Weights: w.i <- (b.i * c.i) / N.i # w.i <- 1 / (1/a.i + 1/b.i + 1/c.i + 1/d.i) w.iv.i <- 1 / (SE.lnOR.i)^2 # MH pooled odds ratios (relative effect measures combined in their natural scale): OR.mh <- sum(w.i * OR.i) / sum(w.i) lnOR.mh <- sum(w.i * log(OR.i)) / sum(w.i) # Same method for calculating confidence intervals around pooled OR as epi.2by2 so results differ from page 304 of Egger, Smith and Altman: G <- a.i * d.i / N.i H <- b.i * c.i / N.i P <- (a.i + d.i) / N.i Q <- (b.i + c.i) / N.i GQ.HP <- G * Q + H * P sumG <- sum(G) sumH <- sum(H) sumGP <- sum(G * P) sumGH <- sum(G * H) sumHQ <- sum(H * Q) sumGQ <- sum(G * Q) sumGQ.HP <- sum(GQ.HP) var.lnOR.mh <- sumGP / (2 * sumG^2) + sumGQ.HP/(2 * sumGH) + sumHQ/(2 * sumH^2) SE.lnOR.mh <- sqrt(var.lnOR.mh) SE.OR.mh <- exp(SE.lnOR.mh) lower.OR.mh <- exp(lnOR.mh - z * SE.lnOR.mh) upper.OR.mh <- exp(lnOR.mh + z * SE.lnOR.mh) # Test of heterogeneity (based on inverse variance weights): Q <- sum(w.iv.i * (lnOR.i - lnOR.mh)^2) df <- k - 1 p.heterogeneity <- 1 - pchisq(Q, df) # Higgins and Thompson (2002) H^2 and I^2 statistic: Hsq <- Q / (k - 1) lnHsq <- log(Hsq) if(Q > k) { lnHsq.se <- (1 * log(Q) - log(k - 1)) / (2 * sqrt(2 * Q) - sqrt((2 * (k - 3)))) } if(Q <= k) { lnHsq.se <- sqrt((1/(2 * (k - 2))) * (1 - (1 / (3 * (k - 2)^2)))) } lnHsq.l <- lnHsq - (z * lnHsq.se) lnHsq.u <- lnHsq + (z * lnHsq.se) Hsq.l <- exp(lnHsq.l) Hsq.u <- exp(lnHsq.u) Isq <- ((Hsq - 1) / Hsq) * 100 Isq.l <- ((Hsq.l - 1) / Hsq.l) * 100 Isq.u <- ((Hsq.u - 1) / Hsq.u) * 100 # Test of effect. Code for p-value taken from z.test function in TeachingDemos package: effect.z <- lnOR.mh / SE.lnOR.mh alternative <- match.arg(alternative) p.effect <- switch(alternative, two.sided = 2 * pnorm(abs(effect.z), lower.tail = FALSE), less = pnorm(effect.z), greater = pnorm(effect.z, lower.tail = FALSE)) # Results: OR <- as.data.frame(cbind(OR.i, SE.OR.i, lower.OR.i, upper.OR.i)) names(OR) <- c("est", "se", "lower", "upper") OR.summary <- as.data.frame(cbind(OR.mh, SE.OR.mh, lower.OR.mh, upper.OR.mh)) names(OR.summary) <- c("est", "se", "lower", "upper") weights <- as.data.frame(cbind(w.i, w.iv.i)) names(weights) <- c("raw", "inv.var") Hsq <- as.data.frame(cbind(Hsq, Hsq.l, Hsq.u)) names(Hsq) <- c("est", "lower", "upper") Isq <- as.data.frame(cbind(Isq, Isq.l, Isq.u)) names(Isq) <- c("est", "lower", "upper") rval <- list(OR = OR, OR.summary = OR.summary, weights = weights, heterogeneity = c(Q = Q, df = df, p.value = p.heterogeneity), Hsq = Hsq, Isq = Isq, effect = c(z = effect.z, p.value = p.effect)) } else if(method == "risk.ratio"){ # Individual study risk ratios: RR.i <- (a.i / n.1i) / (c.i / n.2i) lnRR.i <- log(RR.i) SE.lnRR.i <- sqrt(1/a.i + 1/c.i - 1/n.1i - 1/n.2i) SE.RR.i <- exp(SE.lnRR.i) lower.lnRR.i <- lnRR.i - (z * SE.lnRR.i) upper.lnRR.i <- lnRR.i + (z * SE.lnRR.i) lower.RR.i <- exp(lower.lnRR.i) upper.RR.i <- exp(upper.lnRR.i) # Weights: w.i <- (c.i * n.1i) / N.i w.iv.i <- 1 / (SE.lnRR.i)^2 # MH pooled odds ratios (relative effect measures combined in their natural scale): RR.mh <- sum(w.i * RR.i) / sum(w.i) lnRR.mh <- log(RR.mh) SE.lnRR.mh <- sqrt(P / (R. * S.)) SE.RR.mh <- exp(SE.lnRR.mh) lower.lnRR.mh <- log(RR.mh) - (z * SE.lnRR.mh) upper.lnRR.mh <- log(RR.mh) + (z * SE.lnRR.mh) lower.RR.mh <- exp(lower.lnRR.mh) upper.RR.mh <- exp(upper.lnRR.mh) # Test of heterogeneity (based on inverse variance weights): Q <- sum(w.iv.i * (lnRR.i - lnRR.mh)^2) df <- k - 1 p.heterogeneity <- 1 - pchisq(Q, df) # Higgins and Thompson (2002) H^2 and I^2 statistic: Hsq <- Q / (k - 1) lnHsq <- log(Hsq) if(Q > k) { lnHsq.se <- (1 * log(Q) - log(k - 1)) / (2 * sqrt(2 * Q) - sqrt((2 * (k - 3)))) } if(Q <= k) { lnHsq.se <- sqrt((1/(2 * (k - 2))) * (1 - (1 / (3 * (k - 2)^2)))) } lnHsq.l <- lnHsq - (z * lnHsq.se) lnHsq.u <- lnHsq + (z * lnHsq.se) Hsq.l <- exp(lnHsq.l) Hsq.u <- exp(lnHsq.u) Isq <- ((Hsq - 1) / Hsq) * 100 Isq.l <- ((Hsq.l - 1) / Hsq.l) * 100 Isq.u <- ((Hsq.u - 1) / Hsq.u) * 100 # Test of effect. Code for p-value taken from z.test function in TeachingDemos package: effect.z <- log(RR.mh) / SE.lnRR.mh alternative <- match.arg(alternative) p.effect <- switch(alternative, two.sided = 2 * pnorm(abs(effect.z), lower.tail = FALSE), less = pnorm(effect.z), greater = pnorm(effect.z, lower.tail = FALSE)) # Results: RR <- as.data.frame(cbind(RR.i, SE.RR.i, lower.RR.i, upper.RR.i)) names(RR) <- c("est", "se", "lower", "upper") RR.summary <- as.data.frame(cbind(RR.mh, SE.RR.mh, lower.RR.mh, upper.RR.mh)) names(RR.summary) <- c("est", "se", "lower", "upper") weights <- as.data.frame(cbind(w.i, w.iv.i)) names(weights) <- c("raw", "inv.var") Hsq <- as.data.frame(cbind(Hsq, Hsq.l, Hsq.u)) names(Hsq) <- c("est", "lower", "upper") Isq <- as.data.frame(cbind(Isq, Isq.l, Isq.u)) names(Isq) <- c("est", "lower", "upper") rval <- list(RR = RR, RR.summary = RR.summary, weights = weights, heterogeneity = c(Q = Q, df = df, p.value = p.heterogeneity), Hsq = Hsq, Isq = Isq, effect = c(z = effect.z, p.value = p.effect)) } return(rval) } epiR/R/epi.detectsize.R0000644000176200001440000000545512601641614014426 0ustar liggesusersepi.detectsize <- function(N, prev, se, sp, interpretation = "series", covar = c(0,0), conf.level = 0.95, finite.correction = TRUE){ # Covar is a vector of length two. First element is covariance for D+ group, second element is covariance for D- group. # See Dohoo, Martin and Stryhn page 103. alpha <- (1 - conf.level) # Work out sensitivity and specificity: if (length(se) > 1 & interpretation == "series") { Ses <- se[1] * se[2] + covar[1] Sps <- 1 - (1 - sp[1]) * (1 - sp[2]) - covar[2] use <- Ses usp <- Sps } if (length(se) > 1 & interpretation == "parallel") { Sep <- 1 - (1 - se[1]) * (1 - se[2]) - covar[1] Spp <- sp[1] * sp[2] + covar[2] use <- Sep usp <- Spp } if (length(se) == 1) { use <- se usp <- sp } if (length(N) == 1) { units = round((1 - alpha^(1/(N[1] * prev[1] * use))) * (N[1] - (N[1] * prev[1] * use - 1)/2), digits = 0) units.corrected <- round(units/(1 + (units/N[1])), digits = 0) if (finite.correction == TRUE) { performance <- as.data.frame(cbind(sens = use, spec = usp)) sample.size <- units.corrected rval <- list(performance = performance, sample.size = sample.size) } if (finite.correction == FALSE) { performance <- as.data.frame(cbind(sens = use, spec = usp)) sample.size <- units rval <- list(performance = performance, sample.size = sample.size) } } if (length(N) == 2) { units <- round((1 - alpha^(1/(N[2] * prev[2] * use))) * (N[2] - (N[2] * prev[2] * use - 1)/2), digits = 0) pd <- prev[1] * (1 - alpha) clusters <- round((1 - alpha^(1/(N[1] * pd))) * (N[1] - (N[1] * pd - 1)/2), digits = 0) total <- units * clusters units.corrected <- round(units/(1 + (units/N[2])), digits = 0) clusters.corrected <- round(clusters/(1 + (clusters/N[1])), digits = 0) total.corrected <- units.corrected * clusters.corrected if (finite.correction == TRUE) { performance <- as.data.frame(cbind(sens = use, spec = usp)) sample.size <- as.data.frame(cbind(clusters = clusters.corrected, units = units.corrected, total = total.corrected)) rval <- list(performance = performance, sample.size = sample.size) } if (finite.correction == FALSE) { performance <- as.data.frame(cbind(sens = use, spec = usp)) sample.size <- as.data.frame(cbind(clusters = clusters, units = units, total = total)) rval <- list(performance = performance, sample.size = sample.size) } } return(rval) } epiR/R/epi.herdtest.R0000644000176200001440000000121612601641614014074 0ustar liggesusersepi.herdtest <- function(se, sp, P, N, n, k){ # Probability of testing positive: APpos <- P * se + (1 - P) * (1 - sp) APneg <- (1 - sp) # if(n/N < 0.2){ # Binomial distribution: # HSe <- 1 - pbinom(k - 1, n, P) # HSp <- phyper(k - 1, N * APneg, N - N * APneg, n) # rval <- list(APpos = APpos, APneg = APneg, HSe = HSe, HSp = HSp) # } # else if(n/N >= 0.2){ # Hypergeometric distribution: HSe <- 1 - phyper(k - 1, N * APpos, N - N * APpos, n) HSp <- phyper(k - 1, N * APneg, N - N * APneg, n) rval <- list(APpos = APpos, APneg = APneg, HSe = HSe, HSp = HSp) # } rval } epiR/R/epi.cluster1size.R0000644000176200001440000000216012601641614014706 0ustar liggesusers"epi.cluster1size" <- function(n, mean, var, epsilon.r, method = "mean", conf.level = 0.95){ N. <- 1 - ((1 - conf.level) / 2) z <- qnorm(N., mean = 0, sd = 1) if (method == "total") { Vsq <- var / mean^2 numerator <- z^2 * n * Vsq denominator <- z^2 * Vsq + (n - 1) * epsilon.r^2 rval <- round(numerator/denominator, digits = 0) } if (method == "mean") { Vsq <- var / mean^2 numerator <- z^2 * n * Vsq denominator <- z^2 * Vsq + (n - 1) * epsilon.r^2 rval <- round(numerator/denominator, digits = 0) } if (method == "mean.per.unit") { Vsq <- var / mean^2 numerator <- z^2 * n * Vsq denominator <- z^2 * Vsq + (n - 1) * epsilon.r^2 rval <- round(numerator/denominator, digits = 0) } if (method == "proportion") { if (length(var) != 2) stop("Error: var must be of length 2") if (length(mean) != 2) stop("Error: mean must be of length 2") rval <- 'Not implemented yet!' } return(rval) } epiR/R/epi.asc.R0000644000176200001440000000166612601641614013031 0ustar liggesusersepi.asc <- function(dat, file, xllcorner, yllcorner, cellsize, na = -9999) { id <- is.na(dat) dat[id] <- na ncols <- dim(dat)[2] nrows <- dim(dat)[1] h.ncol <- paste("ncols", nrows) h.nrow <- paste("nrows", ncols) # h.ncol <- paste("ncols", ncols) # h.nrow <- paste("nrows", nrows) h.xllcorner <- paste("xllcorner", xllcorner) h.yllcorner <- paste("yllcorner", yllcorner) h.cellsize <- paste("cellsize", cellsize) h.nodata <- paste("nodata_value", na) header <- rbind(h.ncol, h.nrow, h.xllcorner, h.yllcorner, h.cellsize, h.nodata) write.table(header, file = file, append = FALSE, quote = FALSE, sep = " ", row.names = FALSE, col.names = FALSE) rval <- as.matrix(dat) rval <- matrix(rval, nrow = ncols, byrow = TRUE) rval <- rval[ncols:1,] write.table(rval, file = file, append = TRUE, quote = FALSE, sep = " ", row.names = FALSE, col.names = FALSE) } epiR/R/epi.interaction.r0000644000176200001440000001672612601641614014645 0ustar liggesusersepi.interaction <- function(model, coeff, type = c("RERI", "APAB", "S"), conf.level = 0.95){ N. <- 1 - ((1 - conf.level)/2) z <- qnorm(N., mean = 0, sd = 1) if(type == "RERI"){ if (!(class(model)[1] == "glm" & class(model)[2] == "lm") & !(class(model)[1] == "clogit" & class(model)[2] == "coxph")) stop("Error: model must be either a glm or coxph object") if(class(model)[1] == "glm" & class(model)[2] == "lm"){ theta1 <- as.numeric(model$coefficients[coeff[1]]) theta2 <- as.numeric(model$coefficients[coeff[2]]) theta3 <- as.numeric(model$coefficients[coeff[3]]) } if(class(model)[1] == "clogit" & class(model)[2] == "coxph"){ theta1 <- as.numeric(model$coefficients[coeff[1]]) theta2 <- as.numeric(model$coefficients[coeff[2]]) theta3 <- as.numeric(model$coefficients[coeff[3]]) } cov.mat <- vcov(model) h1 <- -exp(theta1) h2 <- -exp(theta2) h3 <- exp(theta3) reri.var <- (h1^2 * (cov.mat[coeff[1],coeff[1]])) + (h2^2 * (cov.mat[coeff[2],coeff[2]])) + (h3^2 * (cov.mat[coeff[3],coeff[3]])) + (2 * h1 * h2 * cov.mat[coeff[1],coeff[2]]) + (2 * h1 * h3 * cov.mat[coeff[1],coeff[3]]) + (2 * h2 * h3 * cov.mat[coeff[2],coeff[3]]) reri.se <- sqrt(reri.var) reri.p <- exp(theta3) - exp(theta1) - exp(theta2) + 1 reri.l <- reri.p - (z * reri.se) reri.u <- reri.p + (z * reri.se) rval <- data.frame(reri.p, reri.l, reri.u) names(rval) <- c("est", "lower", "upper") } if(type == "APAB"){ if (!(class(model)[1] == "glm" & class(model)[2] == "lm") & !(class(model)[1] == "clogit" & class(model)[2] == "coxph")) stop("Error: model must be either a glm or coxph object") if(class(model)[1] == "glm" & class(model)[2] == "lm"){ theta1 <- as.numeric(model$coefficients[coeff[1]]) theta2 <- as.numeric(model$coefficients[coeff[2]]) theta3 <- as.numeric(model$coefficients[coeff[3]]) } if(class(model)[1] == "clogit" & class(model)[2] == "coxph"){ theta1 <- as.numeric(model$coefficients[coeff[1]]) theta2 <- as.numeric(model$coefficients[coeff[2]]) theta3 <- as.numeric(model$coefficients[coeff[3]]) } cov.mat <- vcov(model) h1 <- -exp(theta1 - theta3) h2 <- -exp(theta2 - theta3) h3 <- (exp(theta1) + exp(theta2) - 1) / exp(theta3) apab.var <- (h1^2 * (cov.mat[coeff[1],coeff[1]])) + (h2^2 * (cov.mat[coeff[2],coeff[2]])) + (h3^2 * (cov.mat[coeff[3],coeff[3]])) + (2 * h1 * h2 * cov.mat[coeff[1],coeff[2]]) + (2 * h1 * h3 * cov.mat[coeff[1],coeff[3]]) + (2 * h2 * h3 * cov.mat[coeff[2],coeff[3]]) apab.se <- sqrt(apab.var) # apab.p <- exp(-theta3) - exp(theta1 - theta3) - exp(theta2 - theta3) + 1 # Equation 4 (Skrondal 2003): apab.p <- (exp(theta3) - exp(theta1) - exp(theta2) + 1) / exp(theta3) apab.l <- apab.p - (z * apab.se) apab.u <- apab.p + (z * apab.se) rval <- data.frame(apab.p, apab.l, apab.u) names(rval) <- c("est", "lower", "upper") } if(type == "S"){ if (!(class(model)[1] == "glm" & class(model)[2] == "lm") & !(class(model)[1] == "mle2") & !(class(model)[1] == "clogit" & class(model)[2] == "coxph")) stop("Error: model must be either a glm, mle2 or coxph object") if(class(model)[1] == "glm" & class(model)[2] == "lm"){ theta1 <- as.numeric(model$coefficients[coeff[1]]) theta2 <- as.numeric(model$coefficients[coeff[2]]) theta3 <- as.numeric(model$coefficients[coeff[3]]) } if(class(model)[1] == "clogit" & class(model)[2] == "coxph"){ theta1 <- as.numeric(model$coefficients[coeff[1]]) theta2 <- as.numeric(model$coefficients[coeff[2]]) theta3 <- as.numeric(model$coefficients[coeff[3]]) } if(class(model)[1] == "mle2"){ theta1 <- as.numeric(slot(model, "fullcoef")[coeff[1]]) theta2 <- as.numeric(slot(model, "fullcoef")[coeff[2]]) theta3 <- as.numeric(slot(model, "fullcoef")[coeff[3]]) } # Calculate S.p: S.p <- (exp(theta3) - 1) / (exp(theta1) + exp(theta2) - 2) cov.mat <- vcov(model) # If model type is glm or cph and point estimate of S is negative terminate analysis and advise user to use a linear odds model: if(class(model)[1] == "glm" & class(model)[2] == "lm" & S.p < 0){ message <- paste("Point estimate of synergy index (S) is less than zero (", round(S.p, digits = 2), ").\n Confidence intervals cannot be calculated using the delta method. Consider re-parameterising as linear odds model.", sep = "") stop(message) } if(class(model)[1] == "clogit" & class(model)[2] == "coxph" & S.p < 0){ message <- paste("Point estimate of synergy index (S) is less than zero (", round(S.p, digits = 2), ").\n Confidence intervals cannot be calculated using the delta method. Consider re-parameterising as linear odds model.", sep = "") stop(message) } # Use delta method (Hosmer and Lemeshow 1992) if model type it glm or cph: if(class(model)[1] == "glm" & class(model)[2] == "lm" & S.p > 0){ ha <- -exp(theta1) / (exp(theta1) + exp(theta2) - 2) hb <- -exp(theta2) / (exp(theta1) + exp(theta2) - 2) hab <- exp(theta3) / (exp(theta3) - 1) lnS.var <- (ha^2 * (cov.mat[coeff[1],coeff[1]])) + (hb^2 * (cov.mat[coeff[2],coeff[2]])) + (hab^2 * (cov.mat[coeff[3],coeff[3]])) + (2 * ha * hb * cov.mat[coeff[1],coeff[2]]) + (2 * ha * hab * cov.mat[coeff[1],coeff[3]]) + (2 * hb * hab * cov.mat[coeff[2],coeff[3]]) lnS.se <- sqrt(lnS.var) lnS.p <- log((exp(theta3) - 1)) - log((exp(theta1) + exp(theta2) - 2)) lnS.l <- lnS.p - (z * lnS.se) lnS.u <- lnS.p + (z * lnS.se) S.p <- exp(lnS.p) S.l <- exp(lnS.l) S.u <- exp(lnS.u) rval <- as.data.frame(cbind(S.p, S.l, S.u)) names(rval) <- c("est", "lower", "upper") } # Use Skrondal (2003) method if model type is mle2: if(class(model)[1] == "mle2"){ # Confidence interval for S assuming regression coefficients are from a linear odds model (see appendix of Skrondal, 2003): S.p <- (exp(theta3) - 1) / (exp(theta1) + exp(theta2) - 2) lnS.p <- log(S.p) c <- (1 / (theta1 + theta2 + theta3)) - (1 / (theta1 + theta2)) d <- 1 / (theta1 + theta2 + theta3) # Covariance matrix from the model. # Diagonals entries are the variances of the regression coefficients. # Off-diagonals are the covariance between the corresponding regression coefficients. tvcov <- vcov(model) theta1.var <- tvcov[coeff[1], coeff[1]] theta2.var <- tvcov[coeff[2], coeff[2]] theta3.var <- tvcov[coeff[3], coeff[3]] theta12.cov <- tvcov[coeff[1], coeff[2]] theta13.cov <- tvcov[coeff[1], coeff[3]] theta23.cov <- tvcov[coeff[2], coeff[3]] lnS.se <- sqrt((c^2 * theta1.var) + (c^2 * theta2.var) + (d^2 * theta3.var) + (2 * c^2 * theta12.cov) + (2 * c * d * theta13.cov) + (2 * c * d * theta23.cov)) lnS.l <- lnS.p - (z * lnS.se) lnS.u <- lnS.p + (z * lnS.se) S.l <- exp(lnS.l) S.u <- exp(lnS.u) rval <- data.frame(S.p, S.l, S.u) names(rval) <- c("est", "lower", "upper") } } return(rval) } epiR/R/epi.cp.R0000644000176200001440000000135212601641614012655 0ustar liggesusersepi.cp <- function(dat){ obs <- as.data.frame(cbind(id = 1:nrow(dat), cp = rep(0, times = nrow(dat)))) nvar <- dim(dat)[2] cp <- unique(dat) cp <- cbind(id = 1:nrow(cp),cp) for(i in 1:nrow(cp)){ tmp <- rep(0, times = nrow(dat)) for(j in 1:nvar){ tmp. <- as.numeric(dat[,j] == cp[i,(j+1)]) tmp <- cbind(tmp, tmp.) } tmp <- apply(tmp, MARGIN = 1, FUN = sum) id <- tmp == nvar obs$cp[id] <- cp$id[i] } n <- hist(obs$cp, breaks = seq(from = 0, to = nrow(cp), by = 1), plot = FALSE) n <- n$counts end <- nvar + 1 cov.pattern <- as.data.frame(cbind(id = cp[,1], n, cp[,2:end])) rval <- list(cov.pattern = cov.pattern, id = obs$cp) rval } epiR/R/epi.conf.R0000644000176200001440000002427012601641614013204 0ustar liggesusers"epi.conf" <- function(dat, ctype = "mean.single", method, N, design = 1, conf.level = 0.95){ N. <- 1 - ((1 - conf.level) / 2) z <- qnorm(N., mean = 0, sd = 1) # Define function to calculate confidence interval for a single proportion. # This is used on several occasions in this function: .propsingle <- function(tmp.dat, conf.level = conf.level){ if (is.matrix(tmp.dat) == FALSE) stop("Error: dat must be a two-column matrix") # The method implemented here follows Altman et al (2000) p 46: r <- tmp.dat[,1] n <- tmp.dat[,1] + tmp.dat[,2] p <- r/n q <- 1 - r/n se <- sqrt((p * q) / n) A <- (2 * r) + (z * z) B <- z * sqrt((z * z) + (4 * r * q)) C <- 2 * (n + (z * z)) low <- (A - B) / C up <- (A + B) / C tmp.rval <- data.frame(est = p, se = se, lower = low, upper = up) } if(ctype == "mean.single"){ if (is.vector(dat) == FALSE) stop("Error: dat must be a vector") mean <- mean(dat) n <- length(dat) var <- var(dat) sd <- sqrt(var) se <- sd/sqrt(n) P <- (1 - conf.level)/2 t <- abs(qt(P, n - 1)) low <- mean - (t * se) up <- mean + (t * se) rval <- data.frame(est = mean, se = se, lower = low, upper = up) } if(ctype == "mean.unpaired"){ if (is.data.frame(dat) == FALSE) stop("Error: dat must be a two-column data frame") n <- as.vector(by(dat[,2], dat[,1], length)) if (length(n) > 2) stop("Error: there must be only two groups") if (is.factor(dat[,1] == FALSE)) stop("Error: the first column of the data frame must be factor") sum <- as.vector(by(dat[,2], dat[,1], sum)) mean <- as.vector(by(dat[,2], dat[,1], mean)) mean.diff <- mean[1] - mean[2] var <- as.vector(by(dat[,2], dat[,1], var)) s <- sqrt((((n[1] - 1) * var[1]) + ((n[2] - 1) * var[2])) / (n[1] + n[2] - 2)) se.diff <- s * sqrt(1/n[1] + 1/n[2]) P <- (1 - conf.level)/2 t <- abs(qt(P, (n[1] + n[2] - 2))) low <- mean[1] - mean[2] - (t * se.diff) up <- mean[1] - mean[2] + (t * se.diff) rval <- data.frame(est = mean[1] - mean[2], se = se.diff, lower = low, upper = up) } if(ctype == "mean.paired"){ if (is.data.frame(dat) == FALSE) stop("Error: dat must be a two-column data frame") diff <- as.vector(dat[,2] - dat[,1]) n <- length(dat[,1]) mean.diff <- mean(diff) sd.diff <- sd(diff) se.diff <- sd.diff / sqrt(n) P <- (1 - conf.level)/2 t <- abs(qt(P, (n - 1))) low <- mean.diff - (t * se.diff) up <- mean.diff + (t * se.diff) rval <- data.frame(est = mean.diff, se = se.diff, lower = low, upper = up) } if(ctype == "prop.single"){ rval <- .propsingle(tmp.dat = dat, conf.level = conf.level) } if(ctype == "prop.unpaired"){ if (is.matrix(dat) == FALSE) stop("Error: dat must be a four-column matrix") # Work out the confidence interval for each proportion: prop.1 <- .propsingle(tmp.dat = matrix(dat[,1:2], ncol = 2), conf.level = conf.level) n1 <- dat[,1] + dat[,2] p1 <- prop.1[,1] l1 <- prop.1[,3] u1 <- prop.1[,4] prop.2 <- .propsingle(tmp.dat = matrix(dat[,3:4], ncol = 2), conf.level = conf.level) n2 <- dat[,3] + dat[,4] p2 <- prop.2[,1] l2 <- prop.2[,3] u2 <- prop.2[,4] # Altman's recommended method (p 48 - 49): D <- p1 - p2 se.D <- sqrt(((p1 * (1 - p1)) / n1) + ((p2 * (1 - p2)) / n2)) low <- D - sqrt((p1 - l1)^2 + (u2 - p2)^2) up <- D + sqrt((p2 - l2)^2 + (u1 - p1)^2) rval <- data.frame(est = D, se = se.D, lower = low, upper = up) } if(ctype == "prop.paired"){ if (is.matrix(dat) == FALSE) stop("Error: dat must be a four-column matrix") n <- dat[,1] + dat[,2] + dat[,3] + dat[,4] r <- dat[,1] s <- dat[,2] t <- dat[,3] u <- dat[,4] p1 <- (r + s) / n p2 <- (r + t) / n D <- (s - t) / n A <- (r + s) * (t + u) * (r + t) * (s + u) B <- (r * u) - (s * t) se.D <- 1/n * sqrt(s + t - ((s - t)^2 / n)) # Select an appropriate value for C: if(B > n/2) C <- B - n/2 if(B >= 0 & B <= n/2) C <- 0 if(B < 0) C <- B # Calculate phi: phi <- C / sqrt(A) # Set phi to zero if one of the following conditions are true: if(r + s == 0) phi <- 0 if(t + u == 0) phi <- 0 if(r + t == 0) phi <- 0 if(s + u == 0) phi <- 0 # Calculate confidence intervals for the raw proportions: tmp.dat <- matrix(c((r + s), (n - (r + s))), ncol = 2) prop.1 <- .propsingle(tmp.dat, conf.level = conf.level) l1 <- prop.1[,3] u1 <- prop.1[,4] tmp.dat <- matrix(c((r + t), (n - (r + t))), ncol = 2) prop.2 <- .propsingle(tmp.dat, conf.level = conf.level) l2 <- prop.2[,3] u2 <- prop.2[,4] # Altman's recommended method (p 52): low <- D - sqrt((p1 - l1)^2 - 2 * phi * (p1 - l1) * (u2 - p2) + (u2 - p2)^2) up <- D + sqrt((p2 - l2)^2 - 2 * phi * (p2 - l2) * (u1 - p1) + (u1 - p1)^2) rval <- data.frame(est = D, se = se.D, lower = low, upper = up) } if(ctype == "inc.risk" | ctype == "prevalence"){ if (is.matrix(dat) == FALSE) stop("Error: dat must be a two-column matrix") if(method == "exact"){ # Exact method (see http://www.folkesundhed.au.dk/uddannelse/software): a <- dat[,1] n <- dat[,2] b <- n - a p <- a / n # Exact binomial confidence limits (D. Collett (1999): Modelling binary data. Chapman & Hall/CRC, Boca Raton Florida, p. 24). a. <- ifelse(a == 0, a + 1, a) b. <- ifelse(b == 0, b + 1, b) low <- a. /(a. + (b. + 1) * (1 / qf(1 - N., 2 * a., 2 * b. + 2))) up <- (a. + 1) / (a. + 1 + b. / (1 / qf(1 - N., 2 * b., 2 * a. + 2))) low <- ifelse(a == 0, 0, low) up <- ifelse(a == n, 1, up) rval <- data.frame(est = p, lower = low, upper = up) } if(method == "wilson"){ # Wilson's method (see Rothman, Epidemiology An Introduction, page 132): a <- dat[,1] n <- dat[,2] p <- a / n a. <- n / (n + z^2) b. <- a/n c. <- z^2/(2 * n) d. <- (a * (n - a)) / n^3 e. <- z^2 / (4 * n^2) var.wil <- sqrt(d. + e.) # Design effect equals [var.obs] / [var.srs]. # var.wil has been computed assuming simple random sampling so if an argument for design effect is provided we need to adjust se.wil accordingly: se.wil <- sqrt(design * var.wil) low <- a. * (b. + c. - (z * se.wil)) up <- a. * (b. + c. + (z * se.wil)) rval <- data.frame(est = p, se = se.wil, lower = low, upper = up) } if(method == "fleiss"){ # Sampling for Epidemiologists, Kevin M Sullivan a <- dat[,1] n <- dat[,2] p <- a / n q <- (1 - p) # 'n' = the total number of subjects sampled. 'N'equals the size of the total population. var.fl <- ((p * q) / (n - 1)) * ((N - n) / N) # Design effect equals [var.obs] / [var.srs]. # var.fl has been computed assuming simple random sampling so if an argument for design effect is provided we need to adjust se.wil accordingly: se.fl <- sqrt(design * var.fl) df <- n - 1 t <- abs(qt(p = N., df = df)) low <- p - (t * se.fl) up <- p + (t * se.fl) rval <- data.frame(est = p, se = se.fl, lower = low, upper = up) } } if(ctype == "inc.rate"){ if (is.matrix(dat) == FALSE) stop("Error: dat must be a two-column matrix") if(method == "exact"){ # Exact method (see http://www.folkesundhed.au.dk/uddannelse/software): a <- dat[,1] n <- dat[,2] p <- a / n # If numerator equals zero set lower bound of confidence limit to zero: low <- ifelse(a == 0, 0, (0.5 * qchisq(p = N., df = 2 * a + 2, lower.tail = FALSE) / n)) up <- 0.5 * qchisq(p = 1 - N., df = 2 * a + 2, lower.tail = FALSE) / n rval <- data.frame(est = p, lower = low, upper = up) } if(method == "byar"){ # Byar's method (see Rothman, Epidemiology An Introduction, page 134): a.prime <- dat[,1] + 0.5 p <- dat[,1]/dat[,2] PT <- dat[,2] low <- (a.prime * (1 - (1/(9 * a.prime)) - (z/3 * sqrt(1/a.prime)))^3)/PT up <- (a.prime * (1 - (1/(9 * a.prime)) + (z/3 * sqrt(1/a.prime)))^3)/PT rval <- data.frame(est = p, lower = low, upper = up) } } else if(ctype == "smr"){ if (is.matrix(dat) == FALSE) stop("Error: dat must be a two-column matrix") # After Dobson et al. 1991. Adapted from Excel code written by Iain Buchan # Public Health Informatics at the University of Manchester (www.phi.man.ac.uk) # buchan@man.ac.uk # dat[,1] = obs; dat[,2] = pop obs <- dat[,1] exp <- (sum(dat[,1]) / sum(dat[,2])) * dat[,2] smr <- obs / exp se.smr <- sqrt(dat[,2]) / exp low <- ifelse(dat[,1] > 0, ((qchisq(N., df = 2 * dat[,1], lower.tail = FALSE) / 2) / exp), 0) up <- ifelse(dat[,1] > 0, ((qchisq(1 - N., df = 2 * (dat[,1] + 1), lower.tail = FALSE) / 2) / exp), ((qchisq(1 - N., df = 2, lower.tail = FALSE) / 2) / exp)) rval <- data.frame(est = smr, se = se.smr, lower = low, upper = up) } else if(ctype == "odds"){ ## Ederer F and Mantel N (1974) Confidence limits on the ratio of two Poisson variables. American Journal of Epidemiology 100: 165 - 167 ## Cited in Altman, Machin, Bryant, and Gardner (2000) Statistics with Confidence, British Medical Journal, page 69. ## Added 161214 if (is.matrix(dat) == FALSE) stop("Error: dat must be a two-column matrix") a <- dat[,1]; b <- dat[,2] Al <- (qbinom(1 - N., size = a + b, prob = (a / (a + b)))) / (a + b) Au <- (qbinom(N., size = a + b, prob = (a / (a + b)))) / (a + b) odds.p <- (a / b) odds.l <- (Al / (1 - Al)) odds.u <- (Au / (1 - Au)) rval <- data.frame(est = odds.p, lower = odds.l, upper = odds.u) } return(rval) } epiR/R/epi.2by2.r0000644000176200001440000030515012601641614013074 0ustar liggesusers"epi.2by2" <- function(dat, method = "cohort.count", conf.level = 0.95, units = 100, homogeneity = "breslow.day", outcome = "as.columns"){ ## Elwoood JM (1992). Causal Relationships in Medicine - A Practical System for Critical Appraisal. Oxford Medical Publications, London, p 266 - 293. ## Rothman KJ (2002). Epidemiology An Introduction. Oxford University Press, London, p 130 - 143. ## Hanley JA (2001). A heuristic approach to the formulas for population attributable fraction. J. Epidemiol. Community Health 55:508 - 514. ## Jewell NP (2004). Statistics for Epidemiology. Chapman & Hall/CRC, New York, p 84 - 85. ## Incidence risk in exposed: IRiske ## Incidence risk in unexposed: IRisko ## Incidence risk in population: IRpop ## Incidence rate in exposed: IRatee ## Incidence rate in unexposed: IRateo ## Incidence rate in population: IRatepop ## Odds in exposed: Oe ## Odds in unexposed: Oo ## Odds in population: Opop ## Incidence risk ratio: RR.p ## Incidence rate ratio: IRR.p ## Odds ratio: OR.p ## Attributable risk: ARisk.p ## Attributable rate: ARate.p ## Attributable fraction risk data: AFRisk.p ## Attributable fraction rate data: AFRate.p ## Estimated attributable fraction: AFest.p ## Population attributable risk: PARisk.p ## Population attributable rate: PARate.p ## Population attributable fraction risk data: PAFRisk.p ## Population attributable fraction rate data: PAFRate.p ## Crude incidence risk ratio (strata): cRR.p ## Crude incidence rate ratio (strata): cIRR.p ## Crude incidence odds ratio (strata): cOR.p ## Crude attributable risk (strata): cARisk.p ## Crude attributable rate (strata): cARate.p ## Summary incidence risk ratio: sRR.p ## Summary incidence rate ratio: sIRR.p ## Summary incidence odds ratio: sOR.p ## Summary attributable risk: sARisk.p ## Summary attributable rate: sARate.p ## Reporting - method == cohort.count: ## Inc risk ratio; odds ratio ## Attributable risk; attributable risk in population ## Attributable fraction in exposed; attributable fraction in population ## Reporting - method == cohort.time: ## Inc rate ratio ## Attributable rate; attributable rate in population ## Attributable fraction in exposed; attributable fraction in population ## Reporting - method == case.control: ## Odds ratio ## Attributable prevalence; attributable prevalence in population ## Attributable fraction (est) in exposed; attributable fraction (est) in population ## Reporting - method == cross.sectional: ## Prevalence ratio; odds ratio ## Attributable prevalence; attributable prevalence in population ## Attributable fraction in exposed; attributable fraction in population ## If outcome is assigned by column, leave the data as is: if(outcome == "as.columns"){ dat <- dat} ## If outcome is assigned by row, transpose it: if(outcome == "as.rows"){ dat <- t(dat)} ## Make a copy of the original data. These values used when sums of cells across all strata are greater than zero but some strata contain zero cell frequencies: if(length(dim(dat)) == 2){ a <- dat[1]; A <- a b <- dat[3]; B <- b c <- dat[2]; C <- c d <- dat[4]; D <- d } if(length(dim(dat)) > 2){ a <- dat[1,1,]; A <- a b <- dat[1,2,]; B <- b c <- dat[2,1,]; C <- c d <- dat[2,2,]; D <- d } ## Test each strata for zero values. Add 0.5 to all cells if any cell has a zero value: for(i in 1:length(a)){ if(a[i] < 1 | b[i] < 1 | c[i] < 1 | d[i] < 1){ a[i] <- a[i] + 0.5; b[i] <- b[i] + 0.5; c[i] <- c[i] + 0.5; d[i] <- d[i] + 0.5 } } .funincrisk <- function(dat, conf.level){ ## Exact binomial confidence limits from D. Collett (1999) Modelling binary data. Chapman & Hall/CRC, Boca Raton Florida, p. 24. N. <- 1 - ((1 - conf.level) / 2) a <- dat[,1] n <- dat[,2] b <- n - a p <- a / n ## Wilson's method (see Rothman, Epidemiology An Introduction, page 132): ## N. <- 1 - ((1 - conf.level) / 2) ## z <- qnorm(N., mean = 0, sd = 1) ## a <- dat[,1] ## n <- dat[,2] ## p <- dat[,1] / dat[,2] ## a. <- n/(n + z^2) ## b. <- a/n ## c. <- z^2/(2 * n) ## d. <- (a * (n - a)) / n^3 ## e. <- z^2 / (4 * n^2) ## low <- a. * (b. + c. - (z * sqrt(d. + e.))) ## up <- a. * (b. + c. + (z * sqrt(d. + e.))) a. <- ifelse(a == 0, a + 1, a); b. <- ifelse(b == 0, b + 1, b) low <- a. /(a. + (b. + 1) * (1 / qf(1 - N., 2 * a., 2 * b. + 2))) up <- (a. + 1) / (a. + 1 + b. / (1 / qf(1 - N., 2 * b., 2 * a. + 2))) low <- ifelse(a == 0, 0, low) up <- ifelse(a == n, 1, up) rval <- data.frame(p, low, up) names(rval) <- c("est", "lower", "upper") rval } .funincrate <- function(dat, conf.level){ N. <- 1 - ((1 - conf.level) / 2) a <- dat[,1] n <- dat[,2] p <- a / n low <- 0.5 * qchisq(p = N., df = 2 * a + 2, lower.tail = FALSE) / n up <- 0.5 * qchisq(p = 1 - N., df = 2 * a + 2, lower.tail = FALSE) / n ## a.prime <- dat[,1] + 0.5 ## p <- dat[,1]/dat[,2] ## PT <- dat[,2] ## low <- (a.prime * (1 - (1/(9 * a.prime)) - (z/3 * sqrt(1/a.prime)))^3)/PT ## up <- (a.prime * (1 - (1/(9 * a.prime)) + (z/3 * sqrt(1/a.prime)))^3)/PT ## Wilson's method (see Rothman, Epidemiology An Introduction, page 132): ## N. <- 1 - ((1 - conf.level) / 2) ## z <- qnorm(N., mean = 0, sd = 1) ## a <- dat[,1] ## n <- dat[,2] ## p <- dat[,1] / dat[,2] ## a. <- n/(n + z^2) ## b. <- a/n ## c. <- z^2/(2 * n) ## d. <- (a * (n - a)) / n^3 ## e. <- z^2 / (4 * n^2) ## low <- a. * (b. + c. - (z * sqrt(d. + e.))) ## up <- a. * (b. + c. + (z * sqrt(d. + e.))) rval <- data.frame(p, low, up) names(rval) <- c("est", "lower", "upper") rval } .funRRwald <- function(dat, conf.level){ N. <- 1 - ((1 - conf.level) / 2) z <- qnorm(N., mean = 0, sd = 1) a <- dat[1]; b <- dat[3]; c <- dat[2]; d <- dat[4] N1 <- a + b; N0 <- c + d wRR.p <- (a / N1) / (c / N0) lnwRR <- log(wRR.p) lnwRR.var <- (1 / a) - (1 / N1) + (1 / c) - (1 / N0) lnwRR.se <- sqrt((1 / a) - (1 / N1) + (1 / c) - (1 / N0)) wRR.se <- exp(lnwRR.se) ll <- exp(lnwRR - (z * lnwRR.se)) ul <- exp(lnwRR + (z * lnwRR.se)) c(wRR.p, ll, ul) } .funRRscore <- function(dat, conf.level){ N. <- 1 - ((1 - conf.level) / 2) z <- qnorm(N., mean = 0, sd = 1) a <- dat[1]; b <- dat[3]; c <- dat[2]; d <- dat[4] N1 <- a + b; N0 <- c + d scRR.p <- (a / N1) / (c / N0) if ((c == 0) && (a == 0)){ ul = Inf ll = 0 } else{ a1 = N0 * (N0 * (N0 + N1) * a + N1 * (N0 + a) * (z^2)) a2 = -N0 * (N0 * N1 * (c + a) + 2 * (N0 + N1) * c * a + N1 * (N0 + c + 2 * a) * (z^2)) a3 = 2 * N0 * N1 * c * (c + a) + (N0 + N1) * (c^2) * a + N0 * N1 * (c + a) * (z^2) a4 = -N1 * (c ^ 2) * (c + a) b1 = a2 / a1 b2 = a3 / a1 b3 = a4 / a1 c1 = b2 - (b1^2) / 3 c2 = b3 - b1 * b2 / 3 + 2 * (b1^3) / 27 ceta = acos(sqrt(27) * c2 / (2 * c1 * sqrt(-c1))) t1 = -2 * sqrt(-c1 / 3) * cos(pi / 3 - ceta / 3) t2 = -2 * sqrt(-c1 / 3) * cos(pi / 3 + ceta / 3) t3 = 2 * sqrt(-c1 / 3) * cos(ceta / 3) p01 = t1 - b1 / 3 p02 = t2 - b1 / 3 p03 = t3 - b1 / 3 p0sum = p01 + p02 + p03 p0up = min(p01, p02, p03) p0low = p0sum - p0up - max(p01, p02, p03) if( (c == 0) && (a != 0) ){ ll = (1 - (N1 - a) * (1 - p0low) / (c + N1 - (N0 + N1) * p0low)) / p0low ul = Inf } else if((c != N0) && (a == 0)){ ul = (1 - (N1 - a) * (1 - p0up) / (c + N1 - (N0 + N1) * p0up)) / p0up ll = 0 } else if((c == N0) && (a == N1)){ ul = (N0 + z^2) / N0 ll = N1 / (N1 + z^2) } else if((a == N1) || (c == N0)){ if((c == N0) && (a == 0)) {ll = 0} if((c == N0) && (a != 0)) { phat1 = c / N0 phat2 = a / N1 phihat = phat2 / phat1 phil = 0.95 * phihat chi2 = 0 while (chi2 <= z){ a = (N0 + N1) * phil b = -((c + N1) * phil + a + N0) c = c + a p1hat = (-b - sqrt(b^2 -4 * a * c)) / (2 * a) p2hat = p1hat * phil q2hat = 1 - p2hat var = (N0 * N1 * p2hat) / (N1 * (phil - p2hat) + N0 * q2hat) chi2 = ((a - N1 * p2hat) / q2hat) / sqrt(var) ll = phil phil = ll / 1.0001}} i = c j = a ni = N0 nj = N1 if(a == N1){ i = a j = c ni = N1 nj = N0 } phat1 = i / ni phat2 = j / nj phihat = phat2 / phat1 phiu = 1.1 * phihat if((c == N0) && (a == 0)) { if(N0 < 100) {phiu = 0.01} else {phiu = 0.001} } chi1 = 0 while (chi1 >= -z){ a. = (ni + nj) * phiu b. = -((i + nj) * phiu + j + ni) c. = i + j p1hat = (-b. - sqrt(b.^2 - 4 * a. * c.)) / (2 * a.) p2hat = p1hat * phiu q2hat = 1 - p2hat var = (ni * nj * p2hat) / (nj * (phiu - p2hat) + ni * q2hat) chi1 = ((j - nj * p2hat) / q2hat) / sqrt(var) phiu1 = phiu phiu = 1.0001 * phiu1 } if(a == N1) { ul = (1 - (N1 - a) * (1 - p0up) / (c + N1 - (N0 + N1) * p0up)) / p0up ll = 1 / phiu1 } else{ul = phiu1} } else{ ul = (1 - (N1 - a) * (1 - p0up) / (c + N1 - (N0 + N1) * p0up)) /p0up ll = (1 - (N1 - a) * (1 - p0low) / (c + N1 - (N0 + N1) * p0low)) / p0low } } c(scRR.p, ll, ul) } .funORwald <- function(dat, conf.level){ N. <- 1 - ((1 - conf.level) / 2) z <- qnorm(N., mean = 0, sd = 1) a <- dat[1]; b <- dat[3]; c <- dat[2]; d <- dat[4] N1 <- a + b; N0 <- c + d wOR.p <- (a / b) / (c / d) lnwOR <- log(wOR.p) lnwOR.var <- 1/a + 1/b + 1/c + 1/d lnwOR.se <- sqrt(lnwOR.var) ll <- exp(lnwOR - (z * lnwOR.se)) ul <- exp(lnwOR + (z * lnwOR.se)) c(wOR.p, ll, ul) } .funORcfield <- function (dat, conf.level, interval = c(1e-08, 1e+08)){ a <- dat[1]; b <- dat[3]; c <- dat[2]; d <- dat[4] N1 <- a + b; N0 <- c + d cfOR.p <- (a / b) / (c / d) if (((a == 0) && (c == 0)) || ((a == N1) && (c == N0))) { ll <- 0 ul <- Inf } else if (c == N0 || a == 0) { ll <- 0 ul <- uniroot(function(or) { sum(sapply(max(0, a + c - N0):a, dFNCHypergeo, N1, N0, a + c, or)) - dFNCHypergeo(a, N1, N0, a + c, or)/2 - (1 - conf.level)/2 }, interval = interval)$root } else if (a == N1 || c == 0) { ll <- uniroot(function(or) { sum(sapply(a:min(N1, a + c), dFNCHypergeo, N1, N0, a + c, or)) - dFNCHypergeo(a, N1, N0, a + c, or)/2 - (1 - conf.level)/2 }, interval = interval)$root ul <- Inf } else { ll <- uniroot(function(or) { sum(sapply(a:min(N1, a + c), dFNCHypergeo, N1, N0, a + c, or)) - dFNCHypergeo(a, N1, N0, a + c, or)/2 - (1 - conf.level)/2 }, interval = interval)$root ul <- uniroot(function(or) { sum(sapply(max(0, a + c - N0):a, dFNCHypergeo, N1, N0, a + c, or)) - dFNCHypergeo(a, N1, N0, a + c, or)/2 - (1 - conf.level)/2 }, interval = interval)$root } c(cfOR.p, ll, ul) } # dFNCHypergeo <- function (x, m1, m2, n, odds, precision = 1e-07){ # stopifnot(is.numeric(x), is.numeric(m1), is.numeric(m2), # is.numeric(n), is.numeric(odds), is.numeric(precision)) # .Call("dFNCHypergeo", as.integer(x), as.integer(m1), as.integer(m2), # as.integer(n), as.double(odds), as.double(precision), # PACKAGE = "BiasedUrn") # } .limit <- function(x, nx, y, ny, conf.level, lim, t){ z = qchisq(conf.level, 1) px = x / nx score = 0 while (score < z){ a = ny *(lim - 1) b = nx * lim + ny - (x + y) * (lim - 1) c = -(x + y) p2d = (-b + sqrt(b^2 - 4 * a * c)) / (2 * a) p1d = p2d * lim / (1 + p2d * (lim - 1)) score = ((nx * (px - p1d))^2) * (1 / (nx * p1d * (1 - p1d)) + 1 / (ny * p2d * (1 - p2d))) ci = lim if(t == 0) {lim = ci / 1.001} else{lim = ci * 1.001} } return(ci) } .funORscore <- function(dat, conf.level){ a <- dat[1] N1 <- dat[1] + dat[3] c <- dat[2] N0 <- dat[2] + dat[4] px <- a / N1 py <- c / N0 scOR.p <- (a / b) / (c / d) if(((a == 0) && (c == 0)) || ((a == N1) && (c == N0))){ ul <- 1 / 0 ll <- 0 } else if((a == 0) || (c == N0)){ ll <- 0 theta <- 0.01 / N0 ul <- .limit(a, N1, c, N0, conf.level, theta, 1) } else if((a == N1) || (c == 0)){ ul <- 1 / 0 theta <- 100 * N1 ll <- .limit(a, N1, c, N0, conf.level, theta, 0) } else{ theta <- px / (1 - px) / (py / (1 - py)) / 1.1 ll <- .limit(a, N1, c, N0, conf.level, theta, 0) theta <- px / (1 - px) / (py / (1 - py)) * 1.1 ul <- .limit(a, N1, c, N0, conf.level, theta, 1) } c(scOR.p, ll, ul) } .funORml <- function(dat, conf.level){ mOR.tmp <- fisher.test(dat, conf.int = TRUE, conf.level = conf.level) mOR.p <- as.numeric(mOR.tmp$estimate) mOR.l <- as.numeric(mOR.tmp$conf.int)[1] mOR.u <- as.numeric(mOR.tmp$conf.int)[2] c(mOR.p, mOR.l, mOR.u) } .funARwald <- function(dat, conf.level, units){ N. <- 1 - ((1 - conf.level) / 2) z <- qnorm(N., mean = 0, sd = 1) a <- dat[1]; b <- dat[3]; c <- dat[2]; d <- dat[4] N1 <- a + b; N0 <- c + d wARisk.p <- ((a / N1) - (c / N0)) ## wARisk.var <- (((a * b) / (N1^2 * (N1 - 1))) + ((c * d) / (N0^2 * (N0 - 1)))) wARisk.se <- (sqrt(((a * (N1 - a))/N1^3) + ((c * (N0 - c))/N0^3))) ll <- (wARisk.p - (z * wARisk.se)) ul <- (wARisk.p + (z * wARisk.se)) c(wARisk.p * units, ll * units, ul * units) } .funARscore <- function(dat, conf.level, units){ N. <- 1 - ((1 - conf.level) / 2) z <- qnorm(N., mean = 0, sd = 1) a <- dat[1]; b <- dat[3]; c <- dat[2]; d <- dat[4] N1 <- a + b; N0 <- c + d sARisk.p <- ((a / N1) - (c / N0)) px = a / N1 py = c / N0 z = qchisq(conf.level, 1) proot = px - py dp = 1 - proot niter = 1 while(niter <= 50){ dp = 0.5 * dp up2 = proot + dp score = .z2stat(px, N1, py, N0, up2) if(score < z){proot = up2} niter = niter + 1 if((dp < 0.0000001) || (abs(z - score) < 0.000001)){ niter = 51 ul = up2 } } proot = px - py dp = 1 + proot niter = 1 while(niter <= 50){ dp = 0.5 * dp low2 = proot - dp score = .z2stat(px, N1, py, N0, low2) if(score < z){proot = low2} niter = niter + 1 if((dp < 0.0000001) || (abs(z - score) < 0.000001)){ ll = low2 niter = 51 } } c(sARisk.p * units, ll * units, ul * units) } .z2stat <- function (p1x, nx, p1y, ny, dif){ diff = p1x-p1y-dif if (abs(diff) == 0) { fmdiff = 0} else{ t = ny / nx a = 1 + t b = -(1 + t + p1x + t * p1y + dif * (t + 2)) c = dif * dif + dif * (2 * p1x + t + 1) + p1x + t * p1y d = -p1x * dif * (1 + dif) v = (b / a / 3)^3 - b * c / (6 * a * a) + d / a / 2 s = sqrt((b / a / 3)^2 - c / a / 3) if(v > 0){u = s} else{u = -s} w = (3.141592654 + acos(v / u^3)) / 3 p1d = 2 * u * cos(w) - b / a / 3 p2d = p1d - dif var = p1d * (1 - p1d) / nx + p2d * (1 - p2d) / ny fmdiff = diff^2 / var } return(fmdiff) } ## ================= ## DECLARE VARIABLES ## ================= ## | D+ | D- | Total ## ---------------------------- ## Exp + | a | b | N1 ## Exp - | c | d | N0 ## -------|------|------|------ ## Total | M1 | M0 | Total N. <- 1 - ((1 - conf.level) / 2) z <- qnorm(N., mean = 0, sd = 1) ## For large numbers you need to use floating point rather than integer representation. This will avoid "integer overflow" messages: a <- as.numeric(a); A <- as.numeric(A) b <- as.numeric(b); B <- as.numeric(B) c <- as.numeric(c); C <- as.numeric(C) d <- as.numeric(d); D <- as.numeric(D) ## Total within strata cases: M1 <- a + c ## Total within strata non-cases: M0 <- b + d ## Total within strata exposed: N1 <- a + b ## Total within strata unexposed: N0 <- c + d ## Total within strata subjects: total <- a + b + c + d ## Number of strata: n.strata <- length(a) ## Added 190809: ## If the sums across strata for all cells are greater than 0, use the sums of the crude data (cf the sums of the adjusted values): if(sum(A) > 0 & sum(B) > 0 & sum(C) > 0 & sum(D) > 0){ sa <- sum(A); sb <- sum(B); sc <- sum(C); sd <- sum(D) } ## If the sums across strata for all cells contain a 0, use the sums of the adjusted data: if(sum(A) == 0 | sum(B) == 0 | sum(C) == 0 | sum(D) == 0){ sa <- sum(a); sb <- sum(b); sc <- sum(c); sd <- sum(d) } ## sa <- sum(a); sb <- sum(b); sc <- sum(c); sd <- sum(d) ## Grand total cases: sM1 <- sa + sc ## Grand total non-cases: sM0 <- sb + sd ## Grand total exposed: sN1 <- sa + sb ## Grand total unexposed: sN0 <- sc + sd ## Grand total: stotal <- sa + sb + sc + sd ## Within-strata incidence risk in exposed: tmp <- .funincrisk(as.matrix(cbind(a, N1)), conf.level = conf.level) IRiske.p <- as.numeric(tmp[,1]) * units IRiske.l <- as.numeric(tmp[,2]) * units IRiske.u <- as.numeric(tmp[,3]) * units ## Within-strata incidence risk in unexposed: tmp <- .funincrisk(as.matrix(cbind(c, N0)), conf.level = conf.level) IRisko.p <- as.numeric(tmp[,1]) * units IRisko.l <- as.numeric(tmp[,2]) * units IRisko.u <- as.numeric(tmp[,3]) * units ## Within-strata incidence risk in population: tmp <- .funincrisk(as.matrix(cbind(M1, total)), conf.level = conf.level) IRiskpop.p <- as.numeric(tmp[,1]) * units IRiskpop.l <- as.numeric(tmp[,2]) * units IRiskpop.u <- as.numeric(tmp[,3]) * units ## Within-strata incidence rate in exposed: tmp <- .funincrate(as.matrix(cbind(a, b)), conf.level = conf.level) IRatee.p <- as.numeric(tmp[,1]) * units IRatee.l <- as.numeric(tmp[,2]) * units IRatee.u <- as.numeric(tmp[,3]) * units ## Within-strata incidence rate in unexposed: tmp <- .funincrate(as.matrix(cbind(c, d)), conf.level = conf.level) IRateo.p <- as.numeric(tmp[,1]) * units IRateo.l <- as.numeric(tmp[,2]) * units IRateo.u <- as.numeric(tmp[,3]) * units ## Within-strata incidence rate in population: tmp <- .funincrate(as.matrix(cbind(M1, M0)), conf.level = conf.level) IRatepop.p <- as.numeric(tmp[,1]) * units IRatepop.l <- as.numeric(tmp[,2]) * units IRatepop.u <- as.numeric(tmp[,3]) * units ## Within-strata odds in exposed (based on Ederer F and Mantel N (1974) Confidence limits on the ratio of two Poisson variables. ## American Journal of Epidemiology 100: 165 - 167. ## Cited in Altman, Machin, Bryant, and Gardner (2000) Statistics with Confidence, British Medical Journal, page 69). ## Added 160609. Al <- (qbinom(1 - N., size = a + b, prob = (a / (a + b)))) / (a + b) Au <- (qbinom(N., size = a + b, prob = (a / (a + b)))) / (a + b) Oe.p <- (a / b) Oe.l <- (Al / (1 - Al)) Oe.u <- (Au / (1 - Au)) ## Within-strata odds in unexposed: Al <- (qbinom(1 - N., size = c + d, prob = (c / (c + d)))) / (c + d) Au <- (qbinom(N., size = c + d, prob = (c / (c + d)))) / (c + d) Oo.p <- (c / d) Oo.l <- (Al / (1 - Al)) Oo.u <- (Au / (1 - Au)) ## Within-strata odds in population: Al <- (qbinom(1 - N., size = M1 + M0, prob = (M1 / (M1 + M0)))) / (M1 + M0) Au <- (qbinom(N., size = M1 + M0, prob = (M1 / (M1 + M0)))) / (M1 + M0) Opop.p <- (M1 / M0) Opop.l <- (Al / (1 - Al)) Opop.u <- (Au / (1 - Au)) ## Crude incidence risk in exposed: tmp <- .funincrisk(as.matrix(cbind(sa, sN1)), conf.level = conf.level) cIRiske.p <- as.numeric(tmp[,1]) * units cIRiske.l <- as.numeric(tmp[,2]) * units cIRiske.u <- as.numeric(tmp[,3]) * units ## Crude incidence risk in unexposed: tmp <- .funincrisk(as.matrix(cbind(sc, sN0)), conf.level = conf.level) cIRisko.p <- as.numeric(tmp[,1]) * units cIRisko.l <- as.numeric(tmp[,2]) * units cIRisko.u <- as.numeric(tmp[,3]) * units ## Crude incidence risk in population: tmp <- .funincrisk(as.matrix(cbind(sM1, stotal)), conf.level = conf.level) cIRiskpop.p <- as.numeric(tmp[,1]) * units cIRiskpop.l <- as.numeric(tmp[,2]) * units cIRiskpop.u <- as.numeric(tmp[,3]) * units ## Crude incidence rate in exposed: tmp <- .funincrate(as.matrix(cbind(sa, sb)), conf.level = conf.level) cIRatee.p <- as.numeric(tmp[,1]) * units cIRatee.l <- as.numeric(tmp[,2]) * units cIRatee.u <- as.numeric(tmp[,3]) * units ## Crude incidence rate in unexposed: tmp <- .funincrate(as.matrix(cbind(sc, sd)), conf.level = conf.level) cIRateo.p <- as.numeric(tmp[,1]) * units cIRateo.l <- as.numeric(tmp[,2]) * units cIRateo.u <- as.numeric(tmp[,3]) * units ## Crude incidence risk in population: tmp <- .funincrate(as.matrix(cbind(sM1, sM0)), conf.level = conf.level) cIRatepop.p <- as.numeric(tmp[,1]) * units cIRatepop.l <- as.numeric(tmp[,2]) * units cIRatepop.u <- as.numeric(tmp[,3]) * units ## Crude odds in exposed (based on Ederer F and Mantel N (1974) Confidence limits on the ratio of two Poisson variables. ## American Journal of Epidemiology 100: 165 - 167. ## Cited in Altman, Machin, Bryant, and Gardner (2000) Statistics with Confidence, British Medical Journal, page 69). ## Added 160609 Al <- (qbinom(1 - N., size = sa + sb, prob = (sa / (sa + sb)))) / (sa + sb) u <- (qbinom(N., size = sa + sb, prob = (sa / (sa + sb)))) / (sa + sb) cOe.p <- sa / sb cOe.l <- Al / (1 - Al) cOe.u <- Au / (1 - Au) ## Crude odds in unexposed: Al <- (qbinom(1 - N., size = sc + sd, prob = (sc / (sc + sd)))) / (sc + sd) u <- (qbinom(N., size = sc + sd, prob = (sc / (sc + sd)))) / (sc + sd) cOo.p <- sc / sd cOo.l <- Al / (1 - Al) cOo.u <- Au / (1 - Au) ## Crude odds in population: Al <- (qbinom(1 - N., size = sM1 + sM0, prob = (sM1 / (sM1 + sM0)))) / (sM1 + sM0) u <- (qbinom(N., size = sM1 + sM0, prob = (sM1 / (sM1 + sM0)))) / (sM1 + sM0) cOpop.p <- sM1 / sM0 cOpop.l <- Al / (1 - Al) cOpop.u <- Au / (1 - Au) ## ========================================= ## INDIVIDUAL STRATA MEASURES OF ASSOCIATION ## ========================================= ## Individual strata incidence risk ratio - Wald confidence limits (Rothman p 135 equation 7-3): wRR.ctype <- "Wald" wRR.p <- c(); wRR.l <- c(); wRR.u <- c() if(length(dim(dat)) == 3){ for(i in 1:dim(dat)[3]){ tmp <- .funRRwald(dat[,,i], conf.level) wRR.p <- c(wRR.p, tmp[1]) wRR.l <- c(wRR.l, tmp[2]) wRR.u <- c(wRR.u, tmp[3]) } } if(length(dim(dat)) == 2){ tmp <- .funRRwald(dat, conf.level) wRR.p <- tmp[1] wRR.l <- tmp[2] wRR.u <- tmp[3] } ## Individual strata incidence risk ratio - score confidence limits: scRR.ctype <- "Score" scRR.p <- c(); scRR.l <- c(); scRR.u <- c() if(length(dim(dat)) == 3){ for(i in 1:dim(dat)[3]){ tmp <- .funRRscore(dat[,,i], conf.level) scRR.p <- c(scRR.p, tmp[1]) scRR.l <- c(scRR.l, tmp[2]) scRR.u <- c(scRR.u, tmp[3]) } } if(length(dim(dat)) == 2){ tmp <- .funRRscore(dat, conf.level) scRR.p <- tmp[1] scRR.l <- tmp[2] scRR.u <- tmp[3] } ## Individual strata incidence rate ratio (exact confidence intervals from epibasic.xlsx http://ph.au.dk/uddannelse/software/): IRR.ctype <- "" IRR.p <- (a / b) / (c / d) lnIRR <- log(IRR.p) lnIRR.var <- (1 / a) + (1 / c) lnIRR.se <- sqrt((1 / a) + (1 / c)) IRR.se <- exp(lnIRR.se) pl <- a / (a + (c + 1) * (1 / qf(1 - N., 2 * a, 2 * c + 2))) ph <- (a + 1) / (a + 1 + c / (1 / qf(1 - N., 2 * c, 2 * a + 2))) IRR.l <- pl * d / ((1 - pl) * b) IRR.u <- ph * d / ((1 - ph) * b) ## lnIRR.l <- lnIRR - (z * lnIRR.se) ## lnIRR.u <- lnIRR + (z * lnIRR.se) ## IRR.l <- exp(lnIRR.l) ## IRR.u <- exp(lnIRR.u) ## Incidence rate ratio weights (equal to precision, the inverse of the variance of the IRR. See Woodward page 168): IRR.w <- 1 / (exp(lnIRR.var)) ## Individual strata Wald odds ratios (Rothman p 139 equation 7-6): wOR.ctype <- "Wald" wOR.p <- c(); wOR.l <- c(); wOR.u <- c() if(length(dim(dat)) == 3){ for(i in 1:dim(dat)[3]){ tmp <- .funORwald(dat[,,i], conf.level) wOR.p <- c(wOR.p, tmp[1]) wOR.l <- c(wOR.l, tmp[2]) wOR.u <- c(wOR.u, tmp[3]) } } if(length(dim(dat)) == 2){ tmp <- .funORwald(dat, conf.level) wOR.p <- tmp[1] wOR.l <- tmp[2] wOR.u <- tmp[3] } ## Individual strata odds ratio - Cornfield confidence limits: cfOR.ctype <- "Cornfield" cfOR.p <- c(); cfOR.l <- c(); cfOR.u <- c() if(length(dim(dat)) == 3){ for(i in 1:dim(dat)[3]){ tmp <- .funORcfield(dat[,,i], conf.level) cfOR.p <- c(cfOR.p, tmp[1]) cfOR.l <- c(cfOR.l, tmp[2]) cfOR.u <- c(cfOR.u, tmp[3]) } } if(length(dim(dat)) == 2){ tmp <- .funORcfield(dat, conf.level) cfOR.p <- tmp[1] cfOR.l <- tmp[2] cfOR.u <- tmp[3] } ## Individual strata odds ratio - score confidence limits: scOR.ctype <- "Score" scOR.p <- c(); scOR.l <- c(); scOR.u <- c() if(length(dim(dat)) == 3){ for(i in 1:dim(dat)[3]){ tmp <- .funORscore(dat[,,i], conf.level) scOR.p <- c(scOR.p, tmp[1]) scOR.l <- c(scOR.l, tmp[2]) scOR.u <- c(scOR.u, tmp[3]) } } if(length(dim(dat)) == 2){ tmp <- .funORscore(dat, conf.level) scOR.p <- tmp[1] scOR.l <- tmp[2] scOR.u <- tmp[3] } ## Individual strata odds ratios - maximum likelihood estimate (using fisher.test function): ## Replaced 130612. mOR.ctype <- "MLE" mOR.p <- c(); mOR.l <- c(); mOR.u <- c() if(length(dim(dat)) == 3){ for(i in 1:dim(dat)[3]){ tmp <- .funORml(dat[,,i], conf.level) mOR.p <- c(mOR.p, tmp[1]) mOR.l <- c(mOR.l, tmp[2]) mOR.u <- c(mOR.u, tmp[3]) } } if(length(dim(dat)) == 2){ tmp <- .funORml(dat, conf.level) mOR.p <- tmp[1] mOR.l <- tmp[2] mOR.u <- tmp[3] } ## Individual strata attributable risk (Rothman p 135 equation 7-2): wARisk.ctype <- "Wald" wARisk.p <- c(); wARisk.l <- c(); wARisk.u <- c() if(length(dim(dat)) == 3){ for(i in 1:dim(dat)[3]){ tmp <- .funARwald(dat[,,i], conf.level, units) wARisk.p <- c(wARisk.p, tmp[1]) wARisk.l <- c(wARisk.l, tmp[2]) wARisk.u <- c(wARisk.u, tmp[3]) } } if(length(dim(dat)) == 2){ tmp <- .funARwald(dat, conf.level, units) wARisk.p <- tmp[1] wARisk.l <- tmp[2] wARisk.u <- tmp[3] } ## Individual strata attributable risk - score confidence limits: scARisk.ctype <- "Score" scARisk.p <- c(); scARisk.l <- c(); scARisk.u <- c() if(length(dim(dat)) == 3){ for(i in 1:dim(dat)[3]){ tmp <- .funARscore(dat[,,i], conf.level, units) scARisk.p <- c(scARisk.p, tmp[1]) scARisk.l <- c(scARisk.l, tmp[2]) scARisk.u <- c(scARisk.u, tmp[3]) } } if(length(dim(dat)) == 2){ tmp <- .funARscore(dat, conf.level, units) scARisk.p <- tmp[1] scARisk.l <- tmp[2] scARisk.u <- tmp[3] } ## Individual strata attributable rate (Rothman p 137 equation 7-4): ARate.ctype <- "" ARate.p <- ((a / b) - (c / d)) * units ARate.var <- (a / b^2) + (c / d^2) ARate.se <- (sqrt((a / b^2) + (c / d^2))) * units ARate.l <- ARate.p - (z * ARate.se) ARate.u <- ARate.p + (z * ARate.se) ## Attribtable rate weights (equal to precision, the inverse of the variance of the RR. See Woodward page 168): ARate.w <- 1 / (ARate.var) ## Individual strata attributable fraction for risk data (from Hanley 2001): AFRisk.ctype <- "" AFRisk.p <- ((wRR.p - 1) / wRR.p) ## Bug found 031013. The following two lines of code replace those on lines 441 and 442. AFRisk.l <- (wRR.l - 1) / wRR.l AFRisk.u <- (wRR.u - 1) / wRR.u ## AFRisk.l <- min((wRR.l - 1) / wRR.l, (wRR.u - 1) / wRR.u) ## AFRisk.u <- max((wRR.l - 1) / wRR.l, (wRR.u - 1) / wRR.u) ## Individual strata attributable fraction for rate data (from Hanley 2001): AFRate.ctype <- "" AFRate.p <- (IRR.p - 1) / IRR.p ## Bug found 031013. The following two lines of code replace those on lines 449 and 450. AFRate.l <- (IRR.l - 1) / IRR.l AFRate.u <- (IRR.u - 1) / IRR.u ## AFRate.l <- min((IRR.l - 1) / IRR.l, (IRR.u - 1) / IRR.u) ## AFRate.u <- max((IRR.l - 1) / IRR.l, (IRR.u - 1) / IRR.u) ## Individual strata estimated attributable fraction (from Hanley 2001): AFest.ctype <- "" AFest.p <- (mOR.p - 1) / mOR.p AFest.l <- (mOR.l - 1) / mOR.l AFest.u <- (mOR.u - 1) / mOR.u ## Bug found 031013. The following two lines of code replace those on lines 457 and 458. ## AFest.l <- min((OR.l - 1) / OR.l, (OR.u - 1) / OR.u) ## AFest.u <- max((OR.l - 1) / OR.l, (OR.u - 1) / OR.u) ## Individual strata population attributable risk (same as Rothman p 135 equation 7-2): wPARisk.ctype <- "" wPARisk.p <- ((M1 / total) - (c / N0)) * units wPARisk.se <- (sqrt(((M1 * (total - M1))/total^3) + ((c * (N0 - c))/N0^3))) * units wPARisk.l <- wPARisk.p - (z * wPARisk.se) wPARisk.u <- wPARisk.p + (z * wPARisk.se) ## 270115 Confidence intervals for PAR from Sarah Pirikahu MSc thesis. pPARisk.ctype <- "Pirikahu" pPARisk.p <- ((M1 / total) - (c / N0)) * units pPARisk.d1 <- (1 / total) - ((a + c) / total^2) pPARisk.d2 <- -((a + c) / total^2) pPARisk.d3 <- (c / (c + d)^2) - ((a + c) / total^2) + (1 / total) - (1 / (c + d)) pPARisk.d4 <- (c / (c + d)^2) - ((a + c) / total^2) pPARisk.var <- ((pPARisk.d1^2) * a) + ((pPARisk.d2^2) * b) + ((pPARisk.d3^2) * c) + ((pPARisk.d4^2) * d) pPARisk.se <- sqrt(pPARisk.var) * units pPARisk.l <- pPARisk.p - (z * pPARisk.se) pPARisk.u <- pPARisk.p + (z * pPARisk.se) ## Individual strata population attributable rate (same as Rothman p 137 equation 7-4): PARate.ctype <- "" PARate.p <- ((M1 / M0) - (c / d)) * units PARate.se <- (sqrt((M1 / M0^2) + (c / d^2))) * units PARate.l <- PARate.p - (z * PARate.se) PARate.u <- PARate.p + (z * PARate.se) ## Individual strata population attributable fractions for risk data (from Hanley, 2001): ## PAFRisk.p <- ((wRR.p - 1) / wRR.p) * (a / M1) ## PAFRisk.l <- ((wRR.l - 1) / wRR.l) * (a / M1) ## PAFRisk.u <- ((wRR.u - 1) / wRR.u) * (a / M1) ## Individual strata population attributable fractions for risk data (from OpenEpi TwobyTwo): ## PAFRisk.p <- (IRiskpop.p - IRisko.p) / IRiskpop.p ## PAFRisk.l <- min((IRiskpop.l - IRisko.l) / IRiskpop.l, (IRiskpop.u - IRisko.u) / IRiskpop.u) ## PAFRisk.u <- max((IRiskpop.l - IRisko.l) / IRiskpop.l, (IRiskpop.u - IRisko.u) / IRiskpop.u) ## Individual strata population attributable fractions for risk data (from Jewell, page 84): PAFRisk.ctype <- "Jewell" PAFRisk.p <- ((a * d) - (b * c)) / ((a + c) * (c + d)) PAFRisk.var <- (b + (PAFRisk.p * (a + d))) / (total * c) PAFRisk.l <- 1 - exp(log(1 - PAFRisk.p) + (z * sqrt(PAFRisk.var))) PAFRisk.u <- 1 - exp(log(1 - PAFRisk.p) - (z * sqrt(PAFRisk.var))) ## Individual strata population attributable fractions for rate data (from Hanley, 2001): ## PAFRate.p <- ((IRR.p - 1) / IRR.p) * (a / M1) ## PAFRate.l <- ((IRR.l - 1) / IRR.l) * (a / M1) ## PAFRate.u <- ((IRR.u - 1) / IRR.u) * (a / M1) ## Individual strata population attributable fractions for rate data (from OpenEpi TwobyTwo - Jewell doesn't provide a method for rate data): PAFRate.ctype <- "Sullivan" PAFRate.p <- (IRatepop.p - IRateo.p) / IRatepop.p PAFRate.l <- min((IRatepop.l - IRateo.l) / IRatepop.l, (IRatepop.u - IRateo.u) / IRatepop.u) PAFRate.u <- max((IRatepop.l - IRateo.l) / IRatepop.l, (IRatepop.u - IRateo.u) / IRatepop.u) ## Individual strata estimated population attributable fraction (from Hanley, 2001): ## PAFest.p <- ((OR.p - 1) / OR.p) * (a / M1) ## PAFest.l <- ((OR.l - 1) / OR.l) * (a / M1) ## PAFest.u <- ((OR.u - 1) / OR.u) * (a / M1) ## Individual strata estimated population attributable fraction (from OpenEpi TwobyTwo): ## PAFest.p <- (Opop.p - Oo.p) / Opop.p ## PAFest.l <- min((Opop.l - Oo.l) / Opop.l, (Opop.u - Oo.u) / Opop.u) ## PAFest.u <- max((Opop.l - Oo.l) / Opop.l, (Opop.u - Oo.u) / Opop.u) ## Individual strata population attributable fractions for risk data (from Jewell, page 84): PAFest.ctype <- "Jewell" PAFest.p <- ((a * d) - (b * c)) / (d * (a + c)) PAFest.var <- (a / (c * (a + c))) + (b / (d * (b + d))) PAFest.l <- 1 - exp(log(1 - PAFest.p) + (z * sqrt(PAFest.var))) PAFest.u <- 1 - exp(log(1 - PAFest.p) - (z * sqrt(PAFest.var))) ## ============================= ## CRUDE MEASURES OF ASSOCIATION ## ============================= ## Crude incidence risk ratio - Wald confidence limits (Rothman p 135 equation 7-3): cwRR.ctype <- "Wald" tmp <- .funRRwald(matrix(c(sa,sb,sc,sd), nrow = 2, byrow = TRUE), conf.level) cwRR.p <- tmp[1] cwRR.l <- tmp[2] cwRR.u <- tmp[3] ## Crude incidence risk ratio - score confidence limits: csRR.ctype <- "Score" tmp <- .funRRscore(matrix(c(sa,sb,sc,sd), nrow = 2, byrow = TRUE), conf.level) csRR.p <- tmp[1] csRR.l <- tmp[2] csRR.u <- tmp[3] ## Crude incidence rate ratio (exact confidence intervals from epibasic.xlsx http://ph.au.dk/uddannelse/software/): ceIRR.ctype <- "Exact" ceIRR.p <- (sa / sb) / (sc / sd) celnIRR <- log(ceIRR.p) celnIRR.se <- sqrt((1 / sa) + (1 / sc)) ceIRR.se <- exp(celnIRR.se) pl <- sa / (sa + (sc + 1) * (1 / qf(1 - N., 2 * sa, 2 * sc + 2))) ph <- (sa + 1) / (sa + 1 + sc / (1 / qf(1 - N., 2 * sc, 2 * sa + 2))) ceIRR.l <- pl * sd / ((1 - pl) * sb) ceIRR.u <- ph * sd / ((1 - ph) * sb) ## Crude odds ratio - Wald confidence limits: cwOR.ctype <- "Wald" tmp <- .funORwald(matrix(c(sa,sb,sc,sd), nrow = 2, byrow = TRUE), conf.level) cwOR.p <- tmp[1] cwOR.l <- tmp[2] cwOR.u <- tmp[3] ## Crude odds ratio - Cornfield confidence limits: ccfOR.ctype <- "Wald" tmp <- .funORcfield(matrix(c(sa,sb,sc,sd), nrow = 2, byrow = TRUE), conf.level) ccfOR.p <- tmp[1] ccfOR.l <- tmp[2] ccfOR.u <- tmp[3] ## Crude odds ratio - score confidence limits: csOR.ctype <- "Score" tmp <- .funORscore(matrix(c(sa,sb,sc,sd), nrow = 2, byrow = TRUE), conf.level) csOR.p <- tmp[1] csOR.l <- tmp[2] csOR.u <- tmp[3] ## Crude odds ratio - maximum likelihood estimate (using fisher.test function: ## Replaced 130612. cmOR.ctype <- "MLE" cmOR.tmp <- fisher.test(apply(dat, MARGIN = c(1,2), FUN = sum), conf.int = TRUE, conf.level = conf.level) cmOR.p <- as.numeric(cmOR.tmp$estimate) cmOR.l <- as.numeric(cmOR.tmp$conf.int)[1] cmOR.u <- as.numeric(cmOR.tmp$conf.int)[2] ## Crude attributable risk - Wald confidence limits (Rothman p 135 equation 7-2): cwARisk.ctype <- "Wald" tmp <- .funARwald(dat, conf.level, units) cwARisk.p <- tmp[1] cwARisk.l <- tmp[2] cwARisk.u <- tmp[3] ## Crude attributable risk - score confidence limits: cscARisk.ctype <- "Score" tmp <- .funARscore(dat, conf.level, units) cscARisk.p <- tmp[1] cscARisk.l <- tmp[2] cscARisk.u <- tmp[3] ## Crude attributable rate (Rothman p 137 equation 7-4): cARate.ctype <- "" cARate.p <- ((sa / sb) - (sc / sd)) * units cARate.se <- (sqrt((sa / sb^2) + (sc / sd^2))) * units cARate.l <- cARate.p - (z * cARate.se) cARate.u <- cARate.p + (z * cARate.se) ## Crude attributable fraction for risk data (from Hanley 2001): cAFrisk.ctype <- "Score" cAFRisk.p <- (csRR.p - 1) / csRR.p cAFRisk.l <- min((csRR.l - 1) / csRR.l, (csRR.u - 1) / csRR.u) cAFRisk.u <- max((csRR.l - 1) / csRR.l, (csRR.u - 1) / csRR.u) ## Crude attributable fraction for rate data (from Hanley 2001): cAFRate.ctype <- "Exact" cAFRate.p <- (ceIRR.p - 1) / ceIRR.p cAFRate.l <- min((ceIRR.l - 1) / ceIRR.l, (ceIRR.u - 1) / ceIRR.u) cAFRate.u <- max((ceIRR.l - 1) / ceIRR.l, (ceIRR.u - 1) / ceIRR.u) ## Crude estimated attributable fraction (from Hanley 2001): cAFest.ctype <- "Score" cAFest.p <- (scOR.p - 1) / scOR.p cAFest.l <- min((scOR.l - 1) / scOR.l, (scOR.u - 1) / scOR.u) cAFest.u <- max((scOR.l - 1) / scOR.l, (scOR.u - 1) / scOR.u) ## Crude population attributable risk (same as Rothman p 135 equation 7-2): cwPARisk.ctype <- "" cwPARisk.p <- ((sM1 / stotal) - (sc / sN0)) * units cwPARisk.se <- (sqrt(((sM1 * (stotal - sM1))/stotal^3) + ((sc * (sN0 - sc))/sN0^3))) * units cwPARisk.l <- cwPARisk.p - (z * cwPARisk.se) cwPARisk.u <- cwPARisk.p + (z * cwPARisk.se) ## 270115 Confidence intervals for PAR from Sarah Pirikahu MSc thesis. cpPARisk.ctype <- "Pirikahu" cpPARisk.p <- ((sM1 / stotal) - (sc / sN0)) * units cpPARisk.d1 <- (1 / stotal) - ((sa + sc) / stotal^2) cpPARisk.d2 <- -((sa + sc) / stotal^2) cpPARisk.d3 <- (sc / (sc + sd)^2) - ((sa + sc) / stotal^2) + (1 / stotal) - (1 / (sc + sd)) cpPARisk.d4 <- (sc / (sc + sd)^2) - ((sa + sc) / stotal^2) cpPARisk.var <- ((cpPARisk.d1^2) * sa) + ((cpPARisk.d2^2) * sb) + ((cpPARisk.d3^2) * sc) + ((cpPARisk.d4^2) * sd) cpPARisk.se <- sqrt(cpPARisk.var) * units cpPARisk.l <- cpPARisk.p - (z * cpPARisk.se) cpPARisk.u <- cpPARisk.p + (z * cpPARisk.se) ## Crude population attributable rate (same as Rothman p 137 equation 7-4): cPARate.ctype <- "" cPARate.p <- ((sM1 / sM0) - (sc / sd)) * units cPARate.se <- (sqrt((sM1 / sM0^2) + (sc / sd^2))) * units cPARate.l <- cPARate.p - (z * cPARate.se) cPARate.u <- cPARate.p + (z * cPARate.se) ## Crude population attributable fractions for risk data (from Hanley 2001): ## cPAFRisk.p <- ((csRR.p - 1) / csRR.p) * (sa / sM1) ## cPAFRisk.l <- ((csRR.l - 1) / csRR.l) * (sa / sM1) ## cPAFRisk.u <- ((csRR.u - 1) / csRR.u) * (sa / sM1) ## Crude population attributable fractions for risk data (from OpenEpi TwobyTwo): ## Changed 160609 cPAFRisk.ctype <- "" cPAFRisk.p <- (cIRiskpop.p - cIRisko.p) / cIRiskpop.p cPAFRisk.l <- min((cIRiskpop.l - cIRisko.l) / cIRiskpop.l, (cIRiskpop.u - cIRisko.u) / cIRiskpop.u) cPAFRisk.u <- max((cIRiskpop.l - cIRisko.l) / cIRiskpop.l, (cIRiskpop.u - cIRisko.u) / cIRiskpop.u) ## Crude population attributable fractions for rate data (from Hanley 2001): ## cPAFRate.ctype <- "Exact" ## cPAFRate.p <- ((ceIRR.p - 1) / ceIRR.p) * (sa / sM1) ## cPAFRate.l <- ((ceIRR.p - 1) / ceIRR.p) * (sa / sM1) ## cPAFRate.u <- ((ceIRR.p - 1) / ceIRR.p) * (sa / sM1) ## Crude population attributable fractions for rate data (from OpenEpi TwobyTwo): ## Changed 160609 cPAFRate.ctype <- "" cPAFRate.p <- (cIRatepop.p - cIRateo.p) / cIRatepop.p cPAFRate.l <- min((cIRatepop.l - cIRateo.l) / cIRatepop.l, (cIRatepop.u - cIRateo.u) / cIRatepop.u) cPAFRate.u <- max((cIRatepop.l - cIRateo.l) / cIRatepop.l, (cIRatepop.u - cIRateo.u) / cIRatepop.u) ## Crude estimated population attributable fraction (from Hanley, 2001): ## cPAFest.p <- ((scOR.p - 1) / scOR.p) * (sa / sM1) ## cPAFest.l <- ((scOR.p - 1) / scOR.p) * (sa / sM1) ## cPAFest.u <- ((scOR.p - 1) / scOR.p) * (sa / sM1) ## Crude estimated population attributable fraction (from OpenEpi TwobyTwo): ## Changed 160609 cPAFest.ctype <- "" cPAFest.p <- (cOpop.p - cOo.p) / cOpop.p cPAFest.l <- min((cOpop.l - cOo.l) / cOpop.l, (cOpop.u - cOo.u) / cOpop.u) cPAFest.u <- max((cOpop.l - cOo.l) / cOpop.l, (cOpop.u - cOo.u) / cOpop.u) ## =============================== ## MANTEL-HAENZEL SUMMARY MEASURES ## ================================ ## Summary incidence risk ratio (Rothman 2002 p 148 and 152, equation 8-2): sRR.p <- sum((a * N0 / total)) / sum((c * N1 / total)) varLNRR.s <- sum(((M1 * N1 * N0) / total^2) - ((a * c)/ total)) / (sum((a * N0)/total) * sum((c * N1)/total)) lnRR.s <- log(sRR.p) sRR.se <- (sqrt(varLNRR.s)) sRR.l <- exp(lnRR.s - (z * sqrt(varLNRR.s))) sRR.u <- exp(lnRR.s + (z * sqrt(varLNRR.s))) ## Summary incidence rate ratio (Rothman 2002 p 153, equation 8-5): sIRR.p <- sum((a * d) / M0) / sum((c * b) / M0) lnIRR.s <- log(sIRR.p) varLNIRR.s <- (sum((M1 * b * d) / M0^2)) / (sum((a * d) / M0) * sum((c * b) / M0)) sIRR.se <- sqrt(varLNIRR.s) sIRR.l <- exp(lnIRR.s - (z * sqrt(varLNIRR.s))) sIRR.u <- exp(lnIRR.s + (z * sqrt(varLNIRR.s))) ## Summary odds ratio (Cord Heuer 211004): sOR.p <- sum((a * d / total)) / sum((b * c / total)) G <- a * d / total H <- b * c / total P <- (a + d) / total Q <- (b + c) / total GQ.HP <- G * Q + H * P sumG <- sum(G) sumH <- sum(H) sumGP <- sum(G * P) sumGH <- sum(G * H) sumHQ <- sum(H * Q) sumGQ <- sum(G * Q) sumGQ.HP <- sum(GQ.HP) ## Correction from Richard Bourgon 29 September 2010: varLNOR.s <- sumGP/(2 * sumG^2) + sumGQ.HP/(2 * sumG * sumH) + sumHQ/(2 * sumH^2) ## varLNOR.s <- sumGP / (2 * sumG^2) + sumGQ.HP/(2 * sumGH) + sumHQ/(2 * sumG * sumH) lnOR.s <- log(sOR.p) sOR.se <- sqrt(varLNOR.s) sOR.l <- exp(lnOR.s - z * sqrt(varLNOR.s)) sOR.u <- exp(lnOR.s + z * sqrt(varLNOR.s)) ## Summary attributable risk (Rothman 2002 p 147 and p 152, equation 8-1): sARisk.p <- (sum(((a * N0) - (c * N1)) / total) / sum((N1 * N0) / total)) * units w <- (N1 * N0) / total var.p1 <- (((a * d) / (N1^2 * (N1 - 1))) + ((c * b) / (N0^2 * (N0 - 1)))) var.p1[N0 == 1] <- 0 var.p1[N1 == 1] <- 0 varARisk.s <- sum(w^2 * var.p1) / sum(w)^2 sARisk.se <- (sqrt(varARisk.s)) * units sARisk.l <- sARisk.p - (z * sARisk.se) sARisk.u <- sARisk.p + (z * sARisk.se) ## Summary attributable rate (Rothman 2002 p 153, equation 8-4): sARate.p <- sum(((a * d) - (c * b)) / M0) / sum((b * d) / M0) * units varARate.s <- sum(((b * d) / M0)^2 * ((a / b^2) + (c / d^2 ))) / sum((b * d) / M0)^2 sARate.se <- sqrt(varARate.s) * units sARate.l <- sARate.p - (z * sARate.se) sARate.u <- sARate.p + (z * sARate.se) ## =============================== ## EFFECT OF CONFOUNDING ## =============================== ## Effect of confounding for risk ratio (Woodward p 172): RR.conf.p <- (csRR.p / sRR.p) RR.conf.l <- (csRR.l / sRR.l) RR.conf.u <- (csRR.u / sRR.u) ## Effect of confounding for incidence risk ratio (Woodward p 172): IRR.conf.p <- (ceIRR.p / sIRR.p) IRR.conf.l <- (ceIRR.l / sIRR.l) IRR.conf.u <- (ceIRR.u / sIRR.u) ## Effect of confounding for odds ratio (Woodward p 172): OR.conf.p <- (scOR.p / sOR.p) OR.conf.l <- (scOR.l / sOR.l) OR.conf.u <- (scOR.u / sOR.u) ## Effect of confounding for attributable risk (Woodward p 172): ARisk.conf.p <- (cscARisk.p / scARisk.p) ARisk.conf.l <- (cscARisk.l / scARisk.l) ARisk.conf.u <- (cscARisk.u / scARisk.u) ## Effect of confounding for attributable rate (Woodward p 172): ARate.conf.p <- (cARate.p / sARate.p) ARate.conf.l <- (cARate.l / sARate.l) ARate.conf.u <- (cARate.u / sARate.u) ## =========================================== ## CHI-SQUARED TESTS OF HOMOGENEITY AND EFFECT ## =========================================== ## Chi-squared test statistic for individual strata. See Dawson Saunders and Trapp page 151: exp.a <- (N1 * M1) / total exp.b <- (N1 * M0) / total exp.c <- (N0 * M1) / total exp.d <- (N0 * M0) / total chi2 <- (((a - exp.a)^2)/ exp.a) + (((b - exp.b)^2)/ exp.b) + (((c - exp.c)^2)/ exp.c) + (((d - exp.d)^2)/ exp.d) p.chi2 <- 1 - pchisq(chi2, df = 1) ## Crude summary chi-squared test statistic with 1 degree of freedom: exp.sa <- (sN1 * sM1) / stotal exp.sb <- (sN1 * sM0) / stotal exp.sc <- (sN0 * sM1) / stotal exp.sd <- (sN0 * sM0) / stotal chi2s <- (((sa - exp.sa)^2)/ exp.sa) + (((sb - exp.sb)^2)/ exp.sb) + (((sc - exp.sc)^2)/ exp.sc) + (((sd - exp.sd)^2)/ exp.sd) p.chi2s <- 1 - pchisq(chi2s, df = 1) ## Mantel-Haenszel chi-squared test: if(length(a) > 1){ chi2m <- as.numeric(mantelhaen.test(dat)$statistic) p.chi2m <- as.numeric(mantelhaen.test(dat)$p.value) } if(length(a) > 1){ if(homogeneity == "woolf"){ ## Test of homogeneity of risk ratios (Jewell 2004, page 154). First work out the Woolf estimate of the adjusted risk ratio (labelled lnRR.s. here) based on Jewell (2004, page 134): lnRR. <- log((a / (a + b)) / (c / (c + d))) lnRR.var. <- (b / (a * (a + b))) + (d / (c * (c + d))) wRR. <- 1 / lnRR.var. lnRR.s. <- sum(wRR. * lnRR.) / sum(wRR.) ## Equation 10.3 from Jewell (2004): RR.homogeneity <- sum(wRR. * (lnRR. - lnRR.s.)^2) RR.homogeneity.p <- 1 - pchisq(RR.homogeneity, df = n.strata - 1) RR.homog <- data.frame(test.statistic = RR.homogeneity, df = n.strata - 1, p.value = RR.homogeneity.p) ## Test of homogeneity of odds ratios (Jewell 2004, page 154). First work out the Woolf estimate of the adjusted odds ratio (labelled lnOR.s. here) based on Jewell (2004, page 129): lnOR. <- log(((a + 0.5) * (d + 0.5)) / ((b + 0.5) * (c + 0.5))) lnOR.var. <- (1 / (a + 0.5)) + (1 / (b + 0.5)) + (1 / (c + 0.5)) + (1 / (d + 0.5)) wOR. <- 1 / lnOR.var. lnOR.s. <- sum((wOR. * lnOR.)) / sum(wOR.) ## Equation 10.3 from Jewell (2004): OR.homogeneity <- sum(wOR. * (lnOR. - lnOR.s.)^2) OR.homogeneity.p <- 1 - pchisq(OR.homogeneity, df = n.strata - 1) OR.homog <- data.frame(test.statistic = OR.homogeneity, df = n.strata - 1, p.value = OR.homogeneity.p) } if(homogeneity == "breslow.day"){ ## Setup calculations. From Jim Robison-Cox, based on Jewell (2004, page 154). n11k <- dat[1,1,] n21k <- dat[2,1,] n12k <- dat[1,2,] n22k <- dat[2,2,] row1sums <- n11k + n12k row2sums <- n21k + n22k col1sums <- n11k + n21k Amax <- apply(cbind(row1sums, col1sums), 1, min) ## Breslow-Day test of homogeneity of risk ratios. Astar must be no more than col1sums and no more than row1sums: bb <- row2sums + row1sums * sRR.p - col1sums * (1 - sRR.p) determ <- sqrt(bb^2 + 4 * (1 - sRR.p) * sRR.p * row1sums * col1sums) Astar <- (-bb + cbind(-determ, determ)) / (2 - 2 * sRR.p) Astar <- ifelse(Astar[,1] <= Amax & Astar[,1] >= 0, Astar[,1], Astar[,2]) ## print(Astar) Bstar <- row1sums - Astar Cstar <- col1sums - Astar Dstar <- row2sums - col1sums + Astar Var <- apply(1 / cbind(Astar, Bstar, Cstar, Dstar), 1, sum)^(-1) ## print(Var) RR.homogeneity <- sum((dat[1,1,] - Astar)^2 / Var) RR.homogeneity.p <- 1 - pchisq(RR.homogeneity, df = n.strata - 1) ## Breslow-Day test of homogeneity of odds ratios. Astar must be no more than col1sums and no more than row1sums: bb <- row2sums + row1sums * sOR.p - col1sums * (1 - sOR.p) determ <- sqrt(bb^2 + 4 * (1 - sOR.p) * sOR.p * row1sums * col1sums) Astar <- (-bb + cbind(-determ, determ)) / (2 - 2 * sOR.p) Astar <-ifelse(Astar[,1] <= Amax & Astar[,1] >= 0, Astar[,1], Astar[,2]) ## print(Astar) Bstar <-row1sums - Astar Cstar <- col1sums - Astar Dstar <- row2sums - col1sums + Astar Var <- apply(1 / cbind(Astar, Bstar, Cstar, Dstar), 1, sum)^(-1) ## print(Var) OR.homogeneity <- sum((dat[1,1,] - Astar)^2 / Var) OR.homogeneity.p <- 1 - pchisq(OR.homogeneity, df = n.strata - 1) } } ## Test of attributable risk homogeneity (see Woodward p 207): ## AR.homogeneity <- sum(AR.p - AR.s)^2 / SE.AR^2 ## Test of effect: ## AR.homogeneity.p <- 1 - pchisq(AR.homogeneity, df = n.strata - 1) ## AR.homog <- data.frame(test.statistic = AR.homogeneity, df = n.strata - 1, p.value = AR.homogeneity.p) ## =============================== ## RESULTS ## =============================== ## Results are entered in a list res <- list( ## Incidence risk ratio: wRR.strata = data.frame(est = wRR.p, lower = wRR.l, upper = wRR.u), scRR.strata = data.frame(est = scRR.p, lower = scRR.l, upper = scRR.u), ## Incidence rate ratio: IRR.strata = data.frame(est = IRR.p, lower = IRR.l, upper = IRR.u), ## Odds ratio: wOR.strata = data.frame(est = wOR.p, lower = wOR.l, upper = wOR.u), cfOR.strata = data.frame(est = cfOR.p, lower = cfOR.l, upper = cfOR.u), scOR.strata = data.frame(est = scOR.p, lower = scOR.l, upper = scOR.u), mOR.strata = data.frame(est = mOR.p, lower = mOR.l, upper = mOR.u), ## Attributable risk: wARisk.strata = data.frame(est = wARisk.p, lower = wARisk.l, upper = wARisk.u), scARisk.strata = data.frame(est = scARisk.p, lower = scARisk.l, upper = scARisk.u), ## Attributable rate: ARate.strata = data.frame(est = ARate.p, lower = ARate.l, upper = ARate.u), ## Attributable fraction for risk data: AFRisk.strata = data.frame(est = AFRisk.p, lower = AFRisk.l, upper = AFRisk.u), ## Attributable fraction for rate data: AFRate.strata = data.frame(est = AFRate.p, lower = AFRate.l, upper = AFRate.u), ## Estimated attributable fraction: AFest.strata = data.frame(est = AFest.p, lower = AFest.l, upper = AFest.u), ## Population attributable risk: wPARisk.strata = data.frame(est = wPARisk.p, lower = wPARisk.l, upper = wPARisk.u), pPARisk.strata = data.frame(est = pPARisk.p, lower = pPARisk.l, upper = pPARisk.u), ## Population attributable rate: PARate.strata = data.frame(est = PARate.p, lower = PARate.l, upper = PARate.u), ## Population attributable fraction for risk data: PAFRisk.strata = data.frame(est = PAFRisk.p, lower = PAFRisk.l, upper = PAFRisk.u), ## Population attributable fraction for rate data: PAFRate.strata = data.frame(est = PAFRate.p, lower = PAFRate.l, upper = PAFRate.u), ## Estimated population attributable fraction: PAFest.strata = data.frame(est = PAFest.p, lower = PAFest.l, upper = PAFest.u), ## Crude incidence risk ratio: wRR.crude = data.frame(est = cwRR.p, lower = cwRR.l, upper = cwRR.u), scRR.crude = data.frame(est = csRR.p, lower = csRR.l, upper = csRR.u), ## Crude incidence rate ratio: IRR.crude = data.frame(est = ceIRR.p, lower = ceIRR.l, upper = ceIRR.u), ## Crude odds ratio: wOR.crude = data.frame(est = cwOR.p, lower = cwOR.l, upper = cwOR.u), cfOR.crude = data.frame(est = ccfOR.p, lower = ccfOR.l, upper = ccfOR.u), scOR.crude = data.frame(est = csOR.p, lower = csOR.l, upper = csOR.u), cmOR.crude = data.frame(est = cmOR.p, lower = cmOR.l, upper = cmOR.u), ## Crude attributable risk: wARisk.crude = data.frame(est = cwARisk.p, lower = cwARisk.l, upper = cwARisk.u), scARisk.crude = data.frame(est = cscARisk.p, lower = cscARisk.l, upper = cscARisk.u), ## Crude attributable rate: ARate.crude = data.frame(est = cARate.p, lower = cARate.l, upper = cARate.u), ## Crude attributable fraction for risk data: AFRisk.crude = data.frame(est = cAFRisk.p, lower = cAFRisk.l, upper = cAFRisk.u), ## Crude attributable fraction for rate data: AFRate.crude = data.frame(est = cAFRate.p, lower = cAFRate.l, upper = cAFRate.u), ## Crude estimated attributable fraction: AFest.crude = data.frame(est = cAFest.p, lower = cAFest.l, upper = cAFest.u), ## Crude population attributable risk: wPARisk.crude = data.frame(est = cwPARisk.p, lower = cwPARisk.l, upper = cwPARisk.u), pPARisk.crude = data.frame(est = cpPARisk.p, lower = cpPARisk.l, upper = cpPARisk.u), ## Crude population attributable rate: PARate.crude = data.frame(est = cPARate.p, lower = cPARate.l, upper = cPARate.u), ## Crude population attributable fraction for risk data: PAFRisk.crude = data.frame(est = cPAFRisk.p, lower = cPAFRisk.l, upper = cPAFRisk.u), ## Crude population attributable fraction for rate data: PAFRate.crude = data.frame(est = cPAFRate.p, lower = cPAFRate.l, upper = cPAFRate.u), ## Crude estimated population attributable fraction: PAFest.crude = data.frame(est = cPAFest.p, lower = cPAFest.l, upper = cPAFest.u), ## Mantel-Haenszel adjusted incidence risk ratio: RR.mh = data.frame(est = sRR.p, se = sRR.se, lower = sRR.l, upper = sRR.u), ## Mantel-Haenszel adjusted incidence rate ratio: IRR.mh = data.frame(est = sIRR.p, se = sIRR.se, lower = sIRR.l, upper = sIRR.u), ## Mantel-Haenszel adjusted odds ratio: OR.mh = data.frame(est = sOR.p, se = sOR.se, lower = sOR.l, upper = sOR.u), ## Mantel-Haenszel adjusted attributable risk: ARisk.mh = data.frame(est = sARisk.p, se = sARisk.se, lower = sARisk.l, upper = sARisk.u), ## Mantel-Haenszel adjusted attributable rate: ARate.mh = data.frame(est = sARate.p, se = sARate.se, lower = sARate.l, upper = sARate.u), ## Effect of confounding for risk ratio (Woodward p 172): RR.conf = data.frame(est = RR.conf.p, lower = RR.conf.l, upper = RR.conf.u), ## Effect of confounding for rate ratio (Woodward p 172): IRR.conf = data.frame(est = IRR.conf.p, lower = IRR.conf.l, upper = IRR.conf.u), ## Effect of confounding for odds ratio (Woodward p 172): OR.conf = data.frame(est = OR.conf.p, lower = OR.conf.l, upper = OR.conf.u), ## Effect of confounding for attributable risk (Woodward p 172): ARisk.conf = data.frame(est = ARisk.conf.p, lower = ARisk.conf.l, upper = ARisk.conf.u), ## Effect of confounding for attributable rate (Woodward p 172): ARate.conf = data.frame(est = ARate.conf.p, lower = ARate.conf.l, upper = ARate.conf.u), ## Labelling for incidence prevalence units: count.units = ifelse(units == 1, "Cases per population unit", paste("Cases per ", units, " population units", sep = "")), time.units = ifelse(units == 1, "Cases per unit of population time at risk", paste("Cases per ", units, " units of population time at risk", sep = "")), chisq.strata = data.frame(test.statistic = chi2, df = 1, p.value = p.chi2), chisq.crude = data.frame(test.statistic = chi2s, df = 1, p.value = p.chi2s) ) if(n.strata > 1){ res$chisq.mh = data.frame(test.statistic = chi2m, df = 1, p.value = p.chi2m) res$RR.homog = data.frame(test.statistic = RR.homogeneity, df = n.strata - 1, p.value = RR.homogeneity.p) res$OR.homog = data.frame(test.statistic = OR.homogeneity, df = n.strata - 1, p.value = OR.homogeneity.p) } ## =============================== ## REPORTING ## =============================== ## method = "cohort.count", single strata: if(method == "cohort.count" & n.strata == 1){ ## Verbose part: massoc <- list( RR.strata.wald = res$wRR.strata, RR.strata.score = res$scRR.strata, OR.strata.wald = res$wOR.strata, OR.strata.cfield = res$cfOR.strata, OR.strata.score = res$scOR.strata, OR.strata.mle = res$mOR.strata, ARe.strata.wald = res$wARisk.strata, ARe.strata.score = res$scARisk.strata, ARp.strata.wald = res$wPARisk.strata, ARp.strata.piri = res$pPARisk.strata, AFe.strata = res$AFRisk.strata, AFp.strata = res$PAFRisk.strata, chisq.strata = res$chisq.strata) ## Define tab: if(outcome == "as.columns"){ r1 <- c(a, b, N1, cIRiske.p, cOe.p) r2 <- c(c, d, N0, cIRisko.p, cOo.p) r3 <- c(M1, M0, M0 + M1, cIRiskpop.p, cOpop.p) tab <- as.data.frame(rbind(r1, r2, r3)) colnames(tab) <- c(" Outcome +", " Outcome -", " Total", " Inc risk *", " Odds") rownames(tab) <- c("Exposed +", "Exposed -", "Total") tab <- format.data.frame(tab, digits = 3, justify = "right") } if(outcome == "as.rows"){ ## Non verbose part - define tab: r1 <- c(a, c, M1) r2 <- c(b, d, M0) r3 <- c(N1, N0, N0 + N1) tab <- as.data.frame(rbind(r1, r2, r3)) colnames(tab) <- c(" Exposed +", " Exposed -", " Total") rownames(tab) <- c("Outcome +", "Outcome -", "Total") tab <- format.data.frame(tab, digits = 3, justify = "right") } ## Output creation part: out <- list(method = "cohort.count", n.strata = n.strata, conf.level = conf.level, res = res, massoc = massoc, tab = tab) } ## method == "cohort.count", multiple strata: if(method == "cohort.count" & n.strata > 1){ ## Verbose part: massoc <- list( RR.strata.wald = res$wRR.strata, RR.srata.score = res$scRR.strata, RR.crude.wald = res$wRR.crude, RR.crude.score = res$scRR.crude, RR.mh = res$RR.mh, OR.strata.wald = res$wOR.strata, OR.strata.cfield = res$cfOR.strata, OR.strata.score = res$scOR.strata, OR.strata.mle = res$mOR.strata, OR.crude.wald = res$wOR.crude, OR.crude.cfield = res$cfOR.crude, OR.crude.score = res$scOR.crude, OR.crude.mle = res$cmOR.crude, OR.mh = res$OR.mh, ARe.strata.wald = res$wARisk.strata, ARe.strata.score = res$scARisk.strata, ARe.crude.wald = res$wARisk.crude, ARe.crude.score = res$scARisk.crude, AR.mh = res$ARisk.mh, ARp.strata.wald = res$wPARisk.strata, ARp.strata.piri = res$pPARisk.strata, AFe.strata = res$AFRisk.strata, AFp.strata = res$PAFRisk.strata, chisq.strata = res$chisq.strata, chisq.crude = res$chisq.crude, chisq.mh = res$chisq.mh, RR.homog = res$RR.homog, OR.homog = res$OR.homog) ## Define tab: if(outcome == "as.columns"){ r1 <- c(sa, sb, sN1, cIRiske.p, cOe.p) r2 <- c(sc, sd, sN0, cIRisko.p, cOo.p) r3 <- c(sM1, sM0, sM0 + sM1, cIRiskpop.p, cOpop.p) tab <- as.data.frame(rbind(r1, r2, r3)) colnames(tab) <- c(" Outcome +", " Outcome -", " Total", " Inc risk *", " Odds") rownames(tab) <- c("Exposed +", "Exposed -", "Total") tab <- format.data.frame(tab, digits = 3, justify = "right") } if(outcome == "as.rows"){ ## Non verbose part - define tab: r1 <- c(sa, sc, sM1) r2 <- c(sb, sd, sM0) r3 <- c(sN1, sN0, sN0 + sN1) tab <- as.data.frame(rbind(r1, r2, r3)) colnames(tab) <- c(" Exposed +", " Exposed -", " Total") rownames(tab) <- c("Outcome +", "Outcome -", "Total") tab <- format.data.frame(tab, digits = 3, justify = "right") } ## Output creation part: out <- list(method = "cohort.count", n.strata = n.strata, conf.level = conf.level, res = res, massoc = massoc, tab = tab) } ## method = "cohort.time", single strata: if(method == "cohort.time" & n.strata == 1){ ## Verbose part: massoc <- list( IRR.strata = res$IRR.strata, ARe.strata = res$ARate.strata, ARp.strata = res$PARate.strata, AFe.strata = res$AFRate.strata, AFp.strata = res$PAFRate.strata, chisq.strata = res$chisq.strata) ## Define tab: if(outcome == "as.columns"){ r1 <- c(a, b, cIRatee.p) r2 <- c(c, d, cIRateo.p) r3 <- c(M1, M0, cIRatepop.p) tab <- as.data.frame(rbind(r1, r2, r3)) colnames(tab) <- c(" Outcome +", " Time at risk", " Inc rate *") rownames(tab) <- c("Exposed +", "Exposed -", "Total") tab <- format.data.frame(tab, digits = 3, justify = "right") } if(outcome == "as.rows"){ ## Non verbose part - define tab: r1 <- c(a, c, M1) r2 <- c(b, d, M0) r3 <- c(N1, N0, N0 + N1) tab <- as.data.frame(rbind(r1, r2, r3)) colnames(tab) <- c(" Exposed +", " Exposed -", " Total") rownames(tab) <- c("Outcome +", "Time at risk", "Total") tab <- format.data.frame(tab, digits = 3, justify = "right") } ## Output creation part: out <- list(method = "cohort.time", n.strata = n.strata, conf.level = conf.level, res = res, massoc = massoc, tab = tab) } ## method = "cohort.time", multiple strata: if(method == "cohort.time" & n.strata > 1){ ## Verbose part: massoc <- list( IRR.strata = res$IRR.strata, IRR.crude = res$IRR.crude, IRR.mh = res$IRR.mh, AR.strata = res$ARate.strata, AR.crude = res$ARate.crude, AR.mh = res$ARate.mh, ARp.strata = res$PARate.strata, AFp.strata = res$PAFRate.strata, chisq.strata = res$chisq.strata, chisq.crude = res$chisq.crude, chisq.mh = res$chisq.mh) ## RR.homog = res$RR.homog, ## OR.homog = res$OR.homog, ## AR.homog = res$AR.homog) ## Define tab: if(outcome == "as.columns"){ r1 <- c(sa, sb, cIRatee.p) r2 <- c(sc, sd, cIRateo.p) r3 <- c(sM1, sM0, cIRatepop.p) tab <- as.data.frame(rbind(r1, r2, r3)) colnames(tab) <- c(" Outcome +", " Time at risk", " Inc rate *") rownames(tab) <- c("Exposed +", "Exposed -", "Total") tab <- format.data.frame(tab, digits = 3, justify = "right") } if(outcome == "as.rows"){ ## Non verbose part - define tab: r1 <- c(sa, sc) r2 <- c(sb, sd) r3 <- c(sN1, sN0) tab <- as.data.frame(rbind(r1, r2, r3)) colnames(tab) <- c(" Exposed +", " Exposed -") rownames(tab) <- c("Outcome +", "Outcome -", "Total") tab <- format.data.frame(tab, digits = 3, justify = "right") } ## Output creation part: out <- list(method = "cohort.time", n.strata = n.strata, conf.level = conf.level, res = res, massoc = massoc, tab = tab) } ## method == "case.control", single strata: if(method == "case.control" & n.strata == 1){ ## Verbose part: massoc <- list( OR.strata.wald = res$wOR.strata, OR.strata.cfield = res$cfOR.strata, OR.strata.score = res$scOR.strata, OR.strata.mle = res$mOR.strata, ARe.strata.wald = res$wARisk.strata, ARe.strata.score = res$scARisk.strata, ARp.strata.wald = res$wPARisk.strata, ARp.strata.piri = res$pPARisk.strata, AFeest.strata = res$AFest.strata, AFpest.strata = res$PAFest.strata, chisq.strata = res$chisq.strata) ## Define tab: if(outcome == "as.columns"){ r1 <- c(a, b, N1, cIRiske.p, cOe.p) r2 <- c(c, d, N0, cIRisko.p, cOo.p) r3 <- c(M1, M0, M0 + M1, cIRiskpop.p, cOpop.p) tab <- as.data.frame(rbind(r1, r2, r3)) colnames(tab) <- c(" Outcome +", " Outcome -", " Total", " Prevalence *", " Odds") rownames(tab) <- c("Exposed +", "Exposed -", "Total") tab <- format.data.frame(tab, digits = 3, justify = "right") } if(outcome == "as.rows"){ ## Non verbose part - define tab: r1 <- c(a, c, M1) r2 <- c(b, d, M0) r3 <- c(N1, N0, N0 + N1) tab <- as.data.frame(rbind(r1, r2, r3)) colnames(tab) <- c(" Exposed +", " Exposed -", " Total") rownames(tab) <- c("Outcome +", "Outcome -", "Total") tab <- format.data.frame(tab, digits = 3, justify = "right") } ## Output creation part: out <- list(method = "case.control", n.strata = n.strata, conf.level = conf.level, res = res, massoc = massoc, tab = tab) } ## method == "case.control", multiple strata: if(method == "case.control" & n.strata > 1){ ## Verbose part: massoc <- list( OR.strata.wald = res$wOR.strata, OR.strata.cfield = res$cfOR.strata, OR.strata.score = res$scOR.strata, OR.strata.mle = res$mOR.strata, OR.crude.wald = res$wOR.crude, OR.crude.cfield = res$cfOR.crude, OR.crude.score = res$scOR.crude, OR.crude.mle = res$cmOR.crude, OR.mh = res$OR.mh, ARe.strata.wald = res$wARisk.strata, ARe.strata.score = res$scARisk.strata, ARe.crude.wald = res$wARisk.crude, ARe.crude.score = res$scARisk.crude, AR.mh = res$ARisk.mh, ARp.strata.wald = res$wPARisk.strata, ARp.strata.piri = res$pPARisk.strata, ARp.crude.wald = res$wPARisk.crude, ARp.crude.piri = res$pPARisk.crude, AFest.strata = res$AFest.strata, AFpest.strata = res$PAFest.strata, chisq.strata = res$chisq.strata, chisq.crude = res$chisq.crude, chisq.mh = res$chisq.mh, OR.homog = res$OR.homog) ## Define tab: if(outcome == "as.columns"){ r1 <- c(sa, sb, sN1, cIRiske.p, cOe.p) r2 <- c(sc, sd, sN0, cIRisko.p, cOo.p) r3 <- c(sM1, sM0, sM0 + sM1, cIRiskpop.p, cOpop.p) tab <- as.data.frame(rbind(r1, r2, r3)) colnames(tab) <- c(" Outcome +", " Outcome -", " Total", " Prevalence *", " Odds") rownames(tab) <- c("Exposed +", "Exposed -", "Total") tab <- format.data.frame(tab, digits = 3, justify = "right") } if(outcome == "as.rows"){ ## Non verbose part - define tab: r1 <- c(sa, sc, sM1) r2 <- c(sb, sd, sM0) r3 <- c(sN1, sN0, sN0 + sN1) tab <- as.data.frame(rbind(r1, r2, r3)) colnames(tab) <- c(" Exposed +", " Exposed -", " Total") rownames(tab) <- c("Outcome +", "Outcome -", "Total") tab <- format.data.frame(tab, digits = 3, justify = "right") } ## Output creation part: out <- list(method = "case.control", n.strata = n.strata, conf.level = conf.level, res = res, massoc = massoc, tab = tab) } ## method == "cross.sectional", single strata: if(method == "cross.sectional" & n.strata == 1){ ## Verbose part: massoc <- list( PR.strata.wald = res$wRR.strata, PR.srata.score = res$scRR.strata, OR.strata.wald = res$wOR.strata, OR.strata.cfield = res$cfOR.strata, OR.strata.score = res$scOR.strata, OR.strata.mle = res$mOR.strata, ARe.strata.wald = res$wARisk.strata, ARe.strata.score = res$scARisk.strata, ARp.strata.wald = res$wPARisk.strata, ARp.strata.piri = res$pPARisk.strata, AFe.strata = res$AFRisk.strata, AFp.strata = res$PAFRisk.strata, chisq.strata = res$chisq.strata) ## Define tab: if(outcome == "as.columns"){ r1 <- c(a, b, N1, cIRiske.p, cOe.p) r2 <- c(c, d, N0, cIRisko.p, cOo.p) r3 <- c(M1, M0, M0 + M1, cIRiskpop.p, cOpop.p) tab <- as.data.frame(rbind(r1, r2, r3)) colnames(tab) <- c(" Outcome +", " Outcome -", " Total", " Prevalence *", " Odds") rownames(tab) <- c("Exposed +", "Exposed -", "Total") tab <- format.data.frame(tab, digits = 3, justify = "right") } if(outcome == "as.rows"){ ## Non verbose part - define tab: r1 <- c(a, c, M1) r2 <- c(b, d, M0) r3 <- c(N1, N0, N0 + N1) tab <- as.data.frame(rbind(r1, r2, r3)) colnames(tab) <- c(" Exposed +", " Exposed -", " Total") rownames(tab) <- c("Outcome +", "Outcome -", "Total") tab <- format.data.frame(tab, digits = 3, justify = "right") } ## Output creation part: out <- list(method = "cross.sectional", n.strata = n.strata, conf.level = conf.level, res = res, massoc = massoc, tab = tab) } ## method == "cross.sectional", multiple strata: if(method == "cross.sectional" & n.strata > 1){ ## Verbose part: massoc <- list( PR.strata.wald = res$wRR.strata, PR.srata.score = res$scRR.strata, PR.crude.wald = res$wRR.crude, PR.crude.score = res$scRR.crude, PR.mh = res$RR.mh, OR.strata.wald = res$wOR.strata, OR.strata.cfield = res$cfOR.strata, OR.strata.score = res$scOR.strata, OR.strata.mle = res$mOR.strata, OR.crude.wald = res$wOR.crude, OR.crude.cfield = res$cfOR.crude, OR.crude.score = res$scOR.crude, OR.crude.mle = res$cmOR.crude, OR.mh = res$OR.mh, ARe.strata.wald = res$wARisk.strata, ARe.strata.score = res$scARisk.strata, ARe.crude.wald = res$wARisk.crude, ARe.crude.score = res$scARisk.crude, AR.mh = res$ARisk.mh, ARp.strata.wald = res$wPARisk.strata, ARp.strata.piri = res$pPARisk.strata, AFe.strata = res$AFRisk.strata, AFp.strata = res$PAFRisk.strata, chisq.strata = res$chisq.strata, chisq.crude = res$chisq.crude, chisq.mh = res$chisq.mh, PR.homog = res$RR.homog, OR.homog = res$OR.homog) ## Define tab: if(outcome == "as.columns"){ r1 <- c(sa, sb, sN1, cIRiske.p, cOe.p) r2 <- c(sc, sd, sN0, cIRisko.p, cOo.p) r3 <- c(sM1, sM0, sM1 + sM0, cIRiskpop.p, cOpop.p) tab <- as.data.frame(rbind(r1, r2, r3)) colnames(tab) <- c(" Outcome +", " Outcome -", " Total", " Prevalence *", " Odds") rownames(tab) <- c("Exposed +", "Exposed -", "Total") tab <- format.data.frame(tab, digits = 3, justify = "right") } if(outcome == "as.rows"){ ## Non verbose part - define tab: r1 <- c(sa, sc, sM1) r2 <- c(sb, sd, sM0) r3 <- c(sN1, sN0, sN0 + sN1) tab <- as.data.frame(rbind(r1, r2, r3)) colnames(tab) <- c(" Exposed +", " Exposed -", " Total") rownames(tab) <- c("Outcome +", "Outcome -", "Total") tab <- format.data.frame(tab, digits = 3, justify = "right") } ## Output creation part: out <- list(method = "cross.sectional", n.strata = n.strata, conf.level = conf.level, res = res, massoc = massoc, tab = tab) } ## Set the class of the output object: class(out) <- "epi.2by2" ## Return it as the output: return(out) } ## Print method for epi.2by2: print.epi.2by2 <- function(x, ...) { ## cohort.count --- single strata if(x$method == "cohort.count" & x$n.strata == 1){ print(x$tab) cat("\nPoint estimates and", x$conf.level * 100, "%", "CIs:") cat("\n---------------------------------------------------------") with(x$res, { cat(sprintf("\nInc risk ratio (W) %.2f (%.2f, %.2f)", wRR.strata[1], wRR.strata[2], wRR.strata[3] )) cat(sprintf("\nOdds ratio (W) %.2f (%.2f, %.2f)", wOR.strata[1], wOR.strata[2], wOR.strata[3] )) cat(sprintf("\nAttrib risk (W) * %.2f (%.2f, %.2f)", wARisk.strata[1], wARisk.strata[2], wARisk.strata[3] )) cat(sprintf("\nAttrib risk in population (W) * %.2f (%.2f, %.2f)", wPARisk.strata[1], wPARisk.strata[2], wPARisk.strata[3] )) cat(sprintf("\nAttrib fraction in exposed (%%) %.2f (%.2f, %.2f)", AFRisk.strata[1] * 100, AFRisk.strata[2] * 100, AFRisk.strata[3] * 100 )) cat(sprintf("\nAttrib fraction in population (%%) %.2f (%.2f, %.2f)", PAFRisk.strata[1] * 100, PAFRisk.strata[2] * 100, PAFRisk.strata[3] * 100 )) }) cat("\n---------------------------------------------------------") p <- ifelse(as.numeric(x$res$chisq.strata)[3] < 0.001, "< 0.001", round(as.numeric(x$res$chisq.strata)[3], digits = 3)) cat("\n", "X2 test statistic:", as.numeric(round(x$res$chisq.strata[1], digits = 3)), "p-value:", p) cat("\n", "W: Wald confidence limits") cat("\n", "*", x$res$count.units, "\n") } ## cohort.count --- multiple strata if(x$method == "cohort.count" & x$n.strata > 1){ print(x$tab) cat("\n") cat("\nPoint estimates and", x$conf.level * 100, "%", "CIs:") cat("\n---------------------------------------------------------") with(x$res, { cat(sprintf("\nInc risk ratio (W) (crude) %.2f (%.2f, %.2f)", wRR.crude[1], wRR.crude[2], wRR.crude[3] )) cat(sprintf("\nInc risk ratio (M-H) %.2f (%.2f, %.2f)", RR.mh$est, RR.mh$lower, RR.mh$upper )) cat(sprintf("\nInc risk ratio (crude:M-H) %.2f", round(wRR.crude[1] / RR.mh[1], digits = 2) )) cat(sprintf("\nOdds ratio (W) (crude) %.2f (%.2f, %.2f)", wOR.crude[1], wOR.crude[2], wOR.crude[3] )) cat(sprintf("\nOdds ratio (M-H) %.2f (%.2f, %.2f)", OR.mh$est, OR.mh$lower, OR.mh$upper )) cat(sprintf("\nOdds ratio (crude:M-H) %.2f", round(wOR.crude[1] / OR.mh[1], digits = 2) )) cat(sprintf("\nAttrib risk (W) (crude) * %.2f (%.2f, %.2f)", wARisk.crude[1], wARisk.crude[2], wARisk.crude[3] )) cat(sprintf("\nAttrib risk (M-H) * %.2f (%.2f, %.2f)", ARisk.mh$est, ARisk.mh$lower, ARisk.mh$upper )) cat(sprintf("\nAttrib risk (crude:M-H) %.2f", round(wARisk.crude[1] / ARisk.mh[1], digits = 2) )) }) cat("\n---------------------------------------------------------") rrp <- ifelse(as.numeric(x$res$RR.homog)[3] < 0.001, "< 0.001", round(as.numeric(x$res$RR.homog)[3], digits = 3)) cat("\n", "Test of homogeneity of IRR: X2 test statistic:", as.numeric(round(x$res$RR.homog[1], digits = 3)), "p-value:", rrp) orp <- ifelse(as.numeric(x$res$OR.homog)[3] < 0.001, "< 0.001", round(as.numeric(x$res$OR.homog)[3], digits = 3)) cat("\n", "Test of homogeneity of OR: X2 test statistic:", as.numeric(round(x$res$OR.homog[1], digits = 3)), "p-value:", orp) cat("\n", "W: Wald confidence limits") cat("\n", "M-H: Mantel-Haenszel") cat("\n", "*", x$res$count.units, "\n") } ## cohort.time --- single strata if(x$method == "cohort.time" & x$n.strata == 1){ print(x$tab) cat("\nPoint estimates and", x$conf.level * 100, "%", "CIs:") cat("\n---------------------------------------------------------") with(x$res, { cat(sprintf("\nInc rate ratio %.2f (%.2f, %.2f)", IRR.crude[1], IRR.crude[2], IRR.crude[3] )) cat(sprintf("\nAttrib rate * %.2f (%.2f, %.2f)", ARate.crude[1], ARate.crude[2], ARate.crude[3] )) cat(sprintf("\nAttrib rate in population * %.2f (%.2f, %.2f)", PARate.crude[1], PARate.crude[2], PARate.crude[3] )) cat(sprintf("\nAttrib fraction in exposed (%%) %.2f (%.2f, %.2f)", AFRate.crude[1] * 100, AFRate.crude[2] * 100, AFRate.crude[3] * 100 )) cat(sprintf("\nAttrib fraction in population (%%) %.2f (%.2f, %.2f)", PAFRate.crude[1] * 100, PAFRate.crude[2] * 100, PAFRate.crude[3] * 100 )) }) cat("\n---------------------------------------------------------") p <- ifelse(as.numeric(x$res$chisq.strata)[3] < 0.001, "< 0.001", round(as.numeric(x$res$chisq.strata)[3], digits = 3)) cat("\n", "X2 test statistic:", as.numeric(round(x$res$chisq.strata[1], digits = 3)), "p-value:", p) cat("\n", "*", x$res$time.units, "\n") } ## cohort.time --- multiple strata if(x$method == "cohort.time" & x$n.strata > 1){ print(x$tab) cat("\nPoint estimates and", x$conf.level * 100, "%", "CIs:") cat("\n---------------------------------------------------------") with(x$res, { cat(sprintf("\nInc rate ratio (crude) %.2f (%.2f, %.2f)", IRR.crude[1], IRR.crude[2], IRR.crude[3] )) cat(sprintf("\nInc rate ratio (M-H) %.2f (%.2f, %.2f)", IRR.mh[1], IRR.mh[2], IRR.mh[3] )) cat(sprintf("\nInc rate ratio (crude:M-H) %.2f", round(IRR.crude[1] / IRR.mh[1], digits = 2) )) cat(sprintf("\nAttrib rate (crude) * %.2f (%.2f, %.2f)", ARate.crude[1], ARate.crude[2], ARate.crude[3] )) cat(sprintf("\nAttrib rate (M-H) * %.2f (%.2f, %.2f)", ARate.mh[1], ARate.mh[2], ARate.mh[3] )) cat(sprintf("\nAttrib rate (crude:M-H) %.2f", ARate.conf$est )) }) cat("\n---------------------------------------------------------") # rrp <- ifelse(as.numeric(x$res$RR.homog)[3] < 0.001, "< 0.001", round(as.numeric(x$res$RR.homog)[3], digits = 3)) # cat("\n", "Test of homogeneity of IRR: X2 test statistic:", as.numeric(round(x$res$RR.homog[1], digits = 3)), "p-value:", rrp) # orp <- ifelse(as.numeric(x$res$OR.homog)[3] < 0.001, "< 0.001", round(as.numeric(x$res$OR.homog)[3], digits = 3)) # cat("\n", "Test of homogeneity of OR: X2 test statistic:", as.numeric(round(x$res$OR.homog[1], digits = 3)), "p-value:", orp) cat("\n", "M-H: Mantel-Haenszel") cat("\n", "*", x$res$time.units, "\n") } ## case.control --- single strata if(x$method == "case.control" & x$n.strata == 1){ print(x$tab) cat("\nPoint estimates and", x$conf.level * 100, "%", "CIs:") cat("\n---------------------------------------------------------") with(x$res, { cat(sprintf("\nOdds ratio (W) %.2f (%.2f, %.2f)", wOR.strata[1], wOR.strata[2], wOR.strata[3] )) cat(sprintf("\nAttrib prevalence (W) * %.2f (%.2f, %.2f)", wARisk.strata[1], wARisk.strata[2], wARisk.strata[3] )) cat(sprintf("\nAttrib prevalence in population (W) * %.2f (%.2f, %.2f)", wPARisk.strata[1], wPARisk.strata[2], wPARisk.strata[3] )) cat(sprintf("\nAttrib fraction (est) in exposed (%%) %.2f (%.2f, %.2f)", AFest.strata[1] * 100, AFest.strata[2] * 100, AFest.strata[3] * 100 )) cat(sprintf("\nAttrib fraction (est) in population (%%) %.2f (%.2f, %.2f)", PAFest.strata[1] * 100, PAFest.strata[2] * 100, PAFest.strata[3] * 100 )) }) cat("\n---------------------------------------------------------") p <- ifelse(as.numeric(x$res$chisq.strata)[3] < 0.001, "< 0.001", round(as.numeric(x$res$chisq.strata)[3], digits = 3)) cat("\n", "X2 test statistic:", as.numeric(round(x$res$chisq.strata[1], digits = 3)), "p-value:", p) cat("\n", "W: Wald confidence limits") cat("\n", "*", x$res$count.units, "\n") } ## case.control --- multiple strata if(x$method == "case.control" & x$n.strata > 1){ print(x$tab) cat("\nPoint estimates and", x$conf.level * 100, "%", "CIs:") cat("\n---------------------------------------------------------") with(x$res, { cat(sprintf("\nOdds ratio (W) (crude) %.2f (%.2f, %.2f)", wOR.crude[1], wOR.crude[2], wOR.crude[3] )) cat(sprintf("\nOdds ratio (M-H) %.2f (%.2f, %.2f)", OR.mh[1], OR.mh[2], OR.mh[3] )) cat(sprintf("\nOdds ratio (crude:M-H) %.2f", round(wOR.crude[1] / OR.mh[1], digits = 2) )) cat(sprintf("\nAttrib prevalence (W) (crude) * %.2f (%.2f, %.2f)", wARisk.crude[1], wARisk.crude[2], wARisk.crude[3] )) cat(sprintf("\nAttrib prevalence (M-H) * %.2f (%.2f, %.2f)", ARisk.mh[1], ARisk.mh[2], ARisk.mh[3] )) cat(sprintf("\nAttrib prevalence (crude:M-H) %.2f", round(wARisk.crude[1] / ARisk.mh[1], digits = 2) )) }) cat("\n---------------------------------------------------------") orp <- ifelse(as.numeric(x$res$OR.homog)[3] < 0.001, "< 0.001", round(as.numeric(x$res$OR.homog)[3], digits = 3)) cat("\n", "Test of homogeneity of OR: X2 test statistic:", as.numeric(round(x$res$OR.homog[1], digits = 3)), "p-value:", orp) cat("\n", "W: Wald confidence limits") cat("\n", "M-H: Mantel-Haenszel") cat("\n", "*", x$res$count.units, "\n") } ## cross.sectional -- single strata if(x$method == "cross.sectional" & x$n.strata == 1){ print(x$tab) cat("\nPoint estimates and", x$conf.level * 100, "%", "CIs:") cat("\n---------------------------------------------------------") with(x$res, { cat(sprintf("\nPrevalence ratio (W) %.2f (%.2f, %.2f)", wRR.strata[1], wRR.strata[2], wRR.strata[3] )) cat(sprintf("\nOdds ratio (W) %.2f (%.2f, %.2f)", wOR.strata[1], wOR.strata[2], wOR.strata[3] )) cat(sprintf("\nAttrib prevalence (W) * %.2f (%.2f, %.2f)", wARisk.strata[1], wARisk.strata[2], wARisk.strata[3] )) cat(sprintf("\nAttrib prevalence in population * %.2f (%.2f, %.2f)", wPARisk.strata[1], wPARisk.strata[2], wPARisk.strata[3] )) cat(sprintf("\nAttrib fraction in exposed (%%) %.2f (%.2f, %.2f)", AFRisk.strata[1] * 100, AFRisk.strata[2] * 100, AFRisk.strata[3] * 100 )) cat(sprintf("\nAttrib fraction in population (%%) %.2f (%.2f, %.2f)", PAFRisk.strata[1] * 100, PAFRisk.strata[2] * 100, PAFRisk.strata[3] * 100 )) }) cat("\n---------------------------------------------------------") p <- ifelse(as.numeric(x$res$chisq.strata)[3] < 0.001, "< 0.001", round(as.numeric(x$res$chisq.strata)[3], digits = 3)) cat("\n", "X2 test statistic:", as.numeric(round(x$res$chisq.strata[1], digits = 3)), "p-value:", p) cat("\n", "W: Wald confidence limits") cat("\n", "*", x$res$count.units, "\n") } ## cross.sectional --- multiple strata if(x$method == "cross.sectional" & x$n.strata > 1){ print(x$tab) cat("\nPoint estimates and", x$conf.level * 100, "%", "CIs:") cat("\n---------------------------------------------------------") with(x$res, { cat(sprintf("\nPrevalence ratio (W) (crude) %.2f (%.2f, %.2f)", wRR.crude[1], wRR.crude[2], wRR.crude[3] )) cat(sprintf("\nPrevalence ratio (M-H) %.2f (%.2f, %.2f)", RR.mh[1], RR.mh[2], RR.mh[3] )) cat(sprintf("\nPrevalence ratio (crude:M-H) %.2f", round(wRR.crude[1] / RR.mh[1], digits = 2) )) cat(sprintf("\nOdds ratio (W) (crude) %.2f (%.2f, %.2f)", wOR.crude[1], wOR.crude[2], wOR.crude[3] )) cat(sprintf("\nOdds ratio (M-H) %.2f (%.2f, %.2f)", OR.mh$est, OR.mh$lower, OR.mh$upper )) cat(sprintf("\nOdds ratio (crude:M-H) %.2f", round(wOR.crude[1] / OR.mh[1], digits = 2) )) cat(sprintf("\nAtributable prevalence (W) (crude) * %.2f (%.2f, %.2f)", wARisk.crude[1], wARisk.crude[2], wARisk.crude[3] )) cat(sprintf("\nAtributable prevalence (M-H) * %.2f (%.2f, %.2f)", ARisk.mh[1], ARisk.mh[2], ARisk.mh[3] )) cat(sprintf("\nAtributable prevalence (crude:M-H) %.2f", round(wARisk.crude[1] /ARisk.mh[1], digits = 2) )) }) cat("\n---------------------------------------------------------") rrp <- ifelse(as.numeric(x$res$RR.homog)[3] < 0.001, "< 0.001", round(as.numeric(x$res$RR.homog)[3], digits = 3)) cat("\n", "Test of homogeneity of IRR: X2 test statistic:", as.numeric(round(x$res$RR.homog[1], digits = 3)), "p-value:", rrp) orp <- ifelse(as.numeric(x$res$OR.homog)[3] < 0.001, "< 0.001", round(as.numeric(x$res$OR.homog)[3], digits = 3)) cat("\n", "Test of homogeneity of OR: X2 test statistic:", as.numeric(round(x$res$OR.homog[1], digits = 3)), "p-value:", orp) cat("\n", "W: Wald confidence limits") cat("\n", "M-H: Mantel-Haenszel") cat("\n", "*", x$res$count.units, "\n") } } ## Summary method for epi.2by2: summary.epi.2by2 <- function(object, ...) { return(object$massoc) } epiR/R/epi.ltd.R0000644000176200001440000007404612601641614013050 0ustar liggesusersepi.ltd <- function(dat, std = "305"){ if (std == "305"){ std <- 305 a0 <- rep(1, times = 305) a1 <- c(-1,-0.99342,-0.98684,-0.98026,-0.97368,-0.96711,-0.96053,-0.95395,-0.94737, -0.94079,-0.93421,-0.92763,-0.92105,-0.91447,-0.90789,-0.90132,-0.89474, -0.88816,-0.88158,-0.875,-0.86842,-0.86184,-0.85526,-0.84868,-0.84211, -0.83553,-0.82895,-0.82237,-0.81579,-0.80921,-0.80263,-0.79605,-0.78947, -0.78289,-0.77632,-0.76974,-0.76316,-0.75658,-0.75,-0.74342,-0.73684, -0.73026,-0.72368,-0.71711,-0.71053,-0.70395,-0.69737,-0.69079,-0.68421, -0.67763,-0.67105,-0.66447,-0.65789,-0.65132,-0.64474,-0.63816,-0.63158, -0.625,-0.61842,-0.61184,-0.60526,-0.59868,-0.59211,-0.58553,-0.57895, -0.57237,-0.56579,-0.55921,-0.55263,-0.54605,-0.53947,-0.53289,-0.52632, -0.51974,-0.51316,-0.50658,-0.5,-0.49342,-0.48684,-0.48026,-0.47368, -0.46711,-0.46053,-0.45395,-0.44737,-0.44079,-0.43421,-0.42763,-0.42105, -0.41447,-0.40789,-0.40132,-0.39474,-0.38816,-0.38158,-0.375,-0.36842, -0.36184,-0.35526,-0.34868,-0.34211,-0.33553,-0.32895,-0.32237,-0.31579, -0.30921,-0.30263,-0.29605,-0.28947,-0.28289,-0.27632,-0.26974,-0.26316, -0.25658,-0.25,-0.24342,-0.23684,-0.23026,-0.22368,-0.21711,-0.21053, -0.20395,-0.19737,-0.19079,-0.18421,-0.17763,-0.17105,-0.16447,-0.15789, -0.15132,-0.14474,-0.13816,-0.13158,-0.125,-0.11842,-0.11184,-0.10526, -0.09868,-0.09211,-0.08553,-0.07895,-0.07237,-0.06579,-0.05921,-0.05263, -0.04605,-0.03947,-0.03289,-0.02632,-0.01974,-0.01316,-0.00658,0,0.00658, 0.01316,0.01974,0.02632,0.03289,0.03947,0.04605,0.05263,0.05921,0.06579, 0.07237,0.07895,0.08553,0.09211,0.09868,0.10526,0.11184,0.11842,0.125, 0.13158,0.13816,0.14474,0.15132,0.15789,0.16447,0.17105,0.17763,0.18421, 0.19079,0.19737,0.20395,0.21053,0.21711,0.22368,0.23026,0.23684,0.24342, 0.25,0.25658,0.26316,0.26974,0.27632,0.28289,0.28947,0.29605,0.30263, 0.30921,0.31579,0.32237,0.32895,0.33553,0.34211,0.34868,0.35526,0.36184, 0.36842,0.375,0.38158,0.38816,0.39474,0.40132,0.40789,0.41447,0.42105, 0.42763,0.43421,0.44079,0.44737,0.45395,0.46053,0.46711,0.47368,0.48026, 0.48684,0.49342,0.5,0.50658,0.51316,0.51974,0.52632,0.53289,0.53947, 0.54605,0.55263,0.55921,0.56579,0.57237,0.57895,0.58553,0.59211,0.59868, 0.60526,0.61184,0.61842,0.625,0.63158,0.63816,0.64474,0.65132,0.65789, 0.66447,0.67105,0.67763,0.68421,0.69079,0.69737,0.70395,0.71053,0.71711, 0.72368,0.73026,0.73684,0.74342,0.75,0.75658,0.76316,0.76974,0.77632, 0.78289,0.78947,0.79605,0.80263,0.80921,0.81579,0.82237,0.82895,0.83553, 0.84211,0.84868,0.85526,0.86184,0.86842,0.875,0.88158,0.88816,0.89474, 0.90132,0.90789,0.91447,0.92105,0.92763,0.93421,0.94079,0.94737,0.95395, 0.96053,0.96711,0.97368,0.98026,0.98684,0.99342,1) a2 <- c(1,0.98033,0.96079,0.94137,0.92209,0.90294,0.88392,0.86502,0.84626,0.82763, 0.80912,0.79075,0.77251,0.75439,0.73641,0.71856,0.70083,0.68324,0.66577, 0.64844,0.63123,0.61416,0.59721,0.5804,0.56371,0.54716,0.53073,0.51443, 0.49827,0.48223,0.46633,0.45055,0.4349,0.41939,0.404,0.38874,0.37361, 0.35862,0.34375,0.32901,0.3144,0.29993,0.28558,0.27136,0.25727,0.24331, 0.22948,0.21579,0.20222,0.18878,0.17547,0.16229,0.14924,0.13632,0.12353, 0.11087,0.09834,0.08594,0.07367,0.06153,0.04952,0.03763,0.02588,0.01426, 0.00277,-0.00859,-0.01982,-0.03093,-0.0419,-0.05274,-0.06345,-0.07403, -0.08449,-0.09481,-0.105,-0.11507,-0.125,-0.1348,-0.14448,-0.15402,-0.16343, -0.17272,-0.18187,-0.1909,-0.19979,-0.20856,-0.21719,-0.2257,-0.23407, -0.24232,-0.25043,-0.25842,-0.26627,-0.274,-0.2816,-0.28906,-0.2964, -0.30361,-0.31068,-0.31763,-0.32445,-0.33113,-0.33769,-0.34412,-0.35042, -0.35658,-0.36262,-0.36853,-0.37431,-0.37996,-0.38547,-0.39086,-0.39612, -0.40125,-0.40625,-0.41112,-0.41586,-0.42047,-0.42495,-0.4293,-0.43352, -0.43761,-0.44157,-0.4454,-0.4491,-0.45267,-0.45611,-0.45942,-0.4626, -0.46566,-0.46858,-0.47137,-0.47403,-0.47656,-0.47896,-0.48124,-0.48338, -0.48539,-0.48727,-0.48903,-0.49065,-0.49214,-0.49351,-0.49474,-0.49584, -0.49682,-0.49766,-0.49838,-0.49896,-0.49942,-0.49974,-0.49994,-0.5, -0.49994,-0.49974,-0.49942,-0.49896,-0.49838,-0.49766,-0.49682,-0.49584, -0.49474,-0.49351,-0.49214,-0.49065,-0.48903,-0.48727,-0.48539,-0.48338, -0.48124,-0.47896,-0.47656,-0.47403,-0.47137,-0.46858,-0.46566,-0.4626, -0.45942,-0.45611,-0.45267,-0.4491,-0.4454,-0.44157,-0.43761,-0.43352, -0.4293,-0.42495,-0.42047,-0.41586,-0.41112,-0.40625,-0.40125,-0.39612, -0.39086,-0.38547,-0.37996,-0.37431,-0.36853,-0.36262,-0.35658,-0.35042, -0.34412,-0.33769,-0.33113,-0.32445,-0.31763,-0.31068,-0.30361,-0.2964, -0.28906,-0.2816,-0.274,-0.26627,-0.25842,-0.25043,-0.24232,-0.23407, -0.2257,-0.21719,-0.20856,-0.19979,-0.1909,-0.18187,-0.17272,-0.16343, -0.15402,-0.14448,-0.1348,-0.125,-0.11507,-0.105,-0.09481,-0.08449,-0.07403, -0.06345,-0.05274,-0.0419,-0.03093,-0.01982,-0.00859,0.00277,0.01426, 0.02588,0.03763,0.04952,0.06153,0.07367,0.08594,0.09834,0.11087,0.12353, 0.13632,0.14924,0.16229,0.17547,0.18878,0.20222,0.21579,0.22948,0.24331, 0.25727,0.27136,0.28558,0.29993,0.3144,0.32901,0.34375,0.35862,0.37361, 0.38874,0.404,0.41939,0.4349,0.45055,0.46633,0.48223,0.49827,0.51443, 0.53073,0.54716,0.56371,0.5804,0.59721,0.61416,0.63123,0.64844,0.66577, 0.68324,0.70083,0.71856,0.73641,0.75439,0.77251,0.79075,0.80912,0.82763, 0.84626,0.86502,0.88392,0.90294,0.92209,0.94137,0.96079,0.98033,1) a3 <- c(-1,-0.96085,-0.92235,-0.88448,-0.84725,-0.81066,-0.77469,-0.73935,-0.70462, -0.67051,-0.63701,-0.60412,-0.57183,-0.54014,-0.50904,-0.47853,-0.44861, -0.41926,-0.3905,-0.3623,-0.33468,-0.30762,-0.28111,-0.25517,-0.22977, -0.20492,-0.18061,-0.15685,-0.13361,-0.11091,-0.08873,-0.06707,-0.04593, -0.0253,-0.00517,0.01444,0.03356,0.05218,0.07031,0.08796,0.10512,0.1218, 0.13801,0.15375,0.16902,0.18383,0.19819,0.21209,0.22554,0.23855,0.25112, 0.26326,0.27496,0.28623,0.29709,0.30752,0.31754,0.32715,0.33635,0.34515, 0.35356,0.36157,0.36919,0.37643,0.38329,0.38977,0.39589,0.40163,0.40701, 0.41203,0.4167,0.42102,0.42499,0.42862,0.43191,0.43487,0.4375,0.43981, 0.44179,0.44346,0.44482,0.44587,0.44661,0.44706,0.44721,0.44708,0.44665, 0.44595,0.44496,0.44371,0.44218,0.44039,0.43834,0.43603,0.43347,0.43066, 0.42761,0.42432,0.4208,0.41704,0.41306,0.40886,0.40444,0.3998,0.39496, 0.38991,0.38466,0.37921,0.37357,0.36774,0.36173,0.35554,0.34918,0.34264, 0.33594,0.32907,0.32205,0.31487,0.30755,0.30007,0.29246,0.28471,0.27683, 0.26882,0.26069,0.25244,0.24407,0.23559,0.227,0.21831,0.20953,0.20064, 0.19167,0.18262,0.17348,0.16427,0.15498,0.14562,0.1362,0.12673,0.11719, 0.10761,0.09797,0.0883,0.07858,0.06883,0.05906,0.04925,0.03943,0.02959, 0.01973,0.00987,0,-0.00987,-0.01973,-0.02959,-0.03943,-0.04925,-0.05906, -0.06883,-0.07858,-0.0883,-0.09797,-0.10761,-0.11719,-0.12673,-0.1362, -0.14562,-0.15498,-0.16427,-0.17348,-0.18262,-0.19167,-0.20064,-0.20953, -0.21831,-0.227,-0.23559,-0.24407,-0.25244,-0.26069,-0.26882,-0.27683, -0.28471,-0.29246,-0.30007,-0.30755,-0.31487,-0.32205,-0.32907,-0.33594, -0.34264,-0.34918,-0.35554,-0.36173,-0.36774,-0.37357,-0.37921,-0.38466, -0.38991,-0.39496,-0.3998,-0.40444,-0.40886,-0.41306,-0.41704,-0.4208, -0.42432,-0.42761,-0.43066,-0.43347,-0.43603,-0.43834,-0.44039,-0.44218, -0.44371,-0.44496,-0.44595,-0.44665,-0.44708,-0.44721,-0.44706,-0.44661, -0.44587,-0.44482,-0.44346,-0.44179,-0.43981,-0.4375,-0.43487,-0.43191, -0.42862,-0.42499,-0.42102,-0.4167,-0.41203,-0.40701,-0.40163,-0.39589, -0.38977,-0.38329,-0.37643,-0.36919,-0.36157,-0.35356,-0.34515,-0.33635, -0.32715,-0.31754,-0.30752,-0.29709,-0.28623,-0.27496,-0.26326,-0.25112, -0.23855,-0.22554,-0.21209,-0.19819,-0.18383,-0.16902,-0.15375,-0.13801, -0.1218,-0.10512,-0.08796,-0.07031,-0.05218,-0.03356,-0.01444,0.00517, 0.0253,0.04593,0.06707,0.08873,0.11091,0.13361,0.15685,0.18061,0.20492, 0.22977,0.25517,0.28111,0.30762,0.33468,0.3623,0.3905,0.41926,0.44861, 0.47853,0.50904,0.54014,0.57183,0.60412,0.63701,0.67051,0.70462,0.73935, 0.77469,0.81066,0.84725,0.88448,0.92235,0.96085,1) a4 <- c(1,0.93518,0.87228,0.81126,0.75211,0.69478,0.63926,0.5855,0.53349,0.4832, 0.43459,0.38764,0.34232,0.2986,0.25646,0.21587,0.1768,0.13923,0.10312, 0.06845,0.0352,0.00334,-0.02716,-0.05632,-0.08417,-0.11074,-0.13604, -0.1601,-0.18295,-0.20462,-0.22512,-0.24448,-0.26273,-0.27988,-0.29597, -0.31101,-0.32503,-0.33805,-0.3501,-0.36119,-0.37135,-0.3806,-0.38896, -0.39646,-0.40312,-0.40895,-0.41398,-0.41823,-0.42172,-0.42447,-0.4265, -0.42784,-0.42849,-0.42849,-0.42784,-0.42658,-0.42472,-0.42227,-0.41926, -0.41571,-0.41163,-0.40704,-0.40197,-0.39642,-0.39041,-0.38397,-0.37711, -0.36985,-0.3622,-0.35418,-0.34581,-0.3371,-0.32807,-0.31874,-0.30911, -0.29922,-0.28906,-0.27866,-0.26804,-0.25719,-0.24615,-0.23493,-0.22353, -0.21198,-0.20028,-0.18845,-0.1765,-0.16445,-0.15231,-0.14009,-0.12781, -0.11547,-0.10309,-0.09069,-0.07826,-0.06583,-0.0534,-0.04099,-0.0286, -0.01626,-0.00396,0.00828,0.02045,0.03254,0.04455,0.05645,0.06825,0.07993, 0.09149,0.10291,0.11419,0.12532,0.13629,0.14709,0.15771,0.16816,0.17841, 0.18847,0.19832,0.20796,0.21739,0.22659,0.23556,0.24429,0.25279,0.26103, 0.26902,0.27676,0.28423,0.29143,0.29836,0.30502,0.31139,0.31747,0.32327, 0.32878,0.33399,0.3389,0.3435,0.3478,0.3518,0.35548,0.35885,0.36191, 0.36465,0.36707,0.36917,0.37095,0.37241,0.37354,0.37435,0.37484,0.375, 0.37484,0.37435,0.37354,0.37241,0.37095,0.36917,0.36707,0.36465,0.36191, 0.35885,0.35548,0.3518,0.3478,0.3435,0.3389,0.33399,0.32878,0.32327, 0.31747,0.31139,0.30502,0.29836,0.29143,0.28423,0.27676,0.26902,0.26103, 0.25279,0.24429,0.23556,0.22659,0.21739,0.20796,0.19832,0.18847,0.17841, 0.16816,0.15771,0.14709,0.13629,0.12532,0.11419,0.10291,0.09149,0.07993, 0.06825,0.05645,0.04455,0.03254,0.02045,0.00828,-0.00396,-0.01626,-0.0286, -0.04099,-0.0534,-0.06583,-0.07826,-0.09069,-0.10309,-0.11547,-0.12781, -0.14009,-0.15231,-0.16445,-0.1765,-0.18845,-0.20028,-0.21198,-0.22353, -0.23493,-0.24615,-0.25719,-0.26804,-0.27866,-0.28906,-0.29922,-0.30911, -0.31874,-0.32807,-0.3371,-0.34581,-0.35418,-0.3622,-0.36985,-0.37711, -0.38397,-0.39041,-0.39642,-0.40197,-0.40704,-0.41163,-0.41571,-0.41926, -0.42227,-0.42472,-0.42658,-0.42784,-0.42849,-0.42849,-0.42784,-0.4265, -0.42447,-0.42172,-0.41823,-0.41398,-0.40895,-0.40312,-0.39646,-0.38896, -0.3806,-0.37135,-0.36119,-0.3501,-0.33805,-0.32503,-0.31101,-0.29597, -0.27988,-0.26273,-0.24448,-0.22512,-0.20462,-0.18295,-0.1601,-0.13604, -0.11074,-0.08417,-0.05632,-0.02716,0.00334,0.0352,0.06845,0.10312, 0.13923,0.1768,0.21587,0.25646,0.2986,0.34232,0.38764,0.43459,0.4832, 0.53349,0.5855,0.63926,0.69478,0.75211,0.81126,0.87228,0.93518,1) a5 <- c(-1,-0.90357,-0.81156,-0.72387,-0.64036,-0.56094,-0.48549,-0.41389,-0.34605, -0.28185,-0.22119,-0.16396,-0.11007,-0.05941,-0.01188,0.0326,0.07414, 0.11283,0.14877,0.18203,0.21272,0.24092,0.26671,0.29018,0.31141,0.33048, 0.34747,0.36247,0.37554,0.38677,0.39622,0.40397,0.41009,0.41465,0.41772, 0.41936,0.41964,0.41863,0.41638,0.41296,0.40843,0.40285,0.39627,0.38875, 0.38035,0.37112,0.3611,0.35036,0.33895,0.3269,0.31427,0.30111,0.28746, 0.27336,0.25886,0.24399,0.22881,0.21334,0.19762,0.1817,0.16561,0.14939, 0.13306,0.11666,0.10022,0.08377,0.06735,0.05098,0.03468,0.0185,0.00244, -0.01346,-0.02919,-0.04471,-0.06,-0.07506,-0.08984,-0.10435,-0.11855, -0.13243,-0.14598,-0.15917,-0.172,-0.18444,-0.1965,-0.20814,-0.21937, -0.23017,-0.24053,-0.25045,-0.2599,-0.2689,-0.27742,-0.28546,-0.29303, -0.3001,-0.30668,-0.31276,-0.31835,-0.32343,-0.32801,-0.33209,-0.33566, -0.33872,-0.34129,-0.34334,-0.3449,-0.34596,-0.34653,-0.3466,-0.34618, -0.34528,-0.3439,-0.34204,-0.33972,-0.33694,-0.3337,-0.33001,-0.32589, -0.32133,-0.31635,-0.31095,-0.30515,-0.29895,-0.29237,-0.28541,-0.27808, -0.2704,-0.26238,-0.25403,-0.24535,-0.23637,-0.22709,-0.21753,-0.20769, -0.1976,-0.18726,-0.1767,-0.16591,-0.15492,-0.14374,-0.13239,-0.12087, -0.10921,-0.09741,-0.0855,-0.07348,-0.06137,-0.04918,-0.03694,-0.02465, -0.01233,0,0.01233,0.02465,0.03694,0.04918,0.06137,0.07348,0.0855,0.09741, 0.10921,0.12087,0.13239,0.14374,0.15492,0.16591,0.1767,0.18726,0.1976, 0.20769,0.21753,0.22709,0.23637,0.24535,0.25403,0.26238,0.2704,0.27808, 0.28541,0.29237,0.29895,0.30515,0.31095,0.31635,0.32133,0.32589,0.33001, 0.3337,0.33694,0.33972,0.34204,0.3439,0.34528,0.34618,0.3466,0.34653, 0.34596,0.3449,0.34334,0.34129,0.33872,0.33566,0.33209,0.32801,0.32343, 0.31835,0.31276,0.30668,0.3001,0.29303,0.28546,0.27742,0.2689,0.2599, 0.25045,0.24053,0.23017,0.21937,0.20814,0.1965,0.18444,0.172,0.15917, 0.14598,0.13243,0.11855,0.10435,0.08984,0.07506,0.06,0.04471,0.02919, 0.01346,-0.00244,-0.0185,-0.03468,-0.05098,-0.06735,-0.08377,-0.10022, -0.11666,-0.13306,-0.14939,-0.16561,-0.1817,-0.19762,-0.21334,-0.22881, -0.24399,-0.25886,-0.27336,-0.28746,-0.30111,-0.31427,-0.3269,-0.33895, -0.35036,-0.3611,-0.37112,-0.38035,-0.38875,-0.39627,-0.40285,-0.40843, -0.41296,-0.41638,-0.41863,-0.41964,-0.41936,-0.41772,-0.41465,-0.41009, -0.40397,-0.39622,-0.38677,-0.37554,-0.36247,-0.34747,-0.33048,-0.31141, -0.29018,-0.26671,-0.24092,-0.21272,-0.18203,-0.14877,-0.11283,-0.07414, -0.0326,0.01188,0.05941,0.11007,0.16396,0.22119,0.28185,0.34605,0.41389, 0.48549,0.56094,0.64036,0.72387,0.81156,0.90357,1) } if (std == "270"){ std <- 270 a0 <- rep(1, times = 270) a1 <- c(-1,-0.99257,-0.98513,-0.9777,-0.97026,-0.96283,-0.95539,-0.94796,-0.94052, -0.93309,-0.92565,-0.91822,-0.91078,-0.90335,-0.89591,-0.88848,-0.88104, -0.87361,-0.86617,-0.85874,-0.8513,-0.84387,-0.83643,-0.829,-0.82156, -0.81413,-0.80669,-0.79926,-0.79182,-0.78439,-0.77695,-0.76952,-0.76208, -0.75465,-0.74721,-0.73978,-0.73234,-0.72491,-0.71747,-0.71004,-0.7026, -0.69517,-0.68773,-0.6803,-0.67286,-0.66543,-0.65799,-0.65056,-0.64312, -0.63569,-0.62825,-0.62082,-0.61338,-0.60595,-0.59851,-0.59108,-0.58364, -0.57621,-0.56877,-0.56134,-0.5539,-0.54647,-0.53903,-0.5316,-0.52416, -0.51673,-0.50929,-0.50186,-0.49442,-0.48699,-0.47955,-0.47212,-0.46468, -0.45725,-0.44981,-0.44238,-0.43494,-0.42751,-0.42007,-0.41264,-0.4052, -0.39777,-0.39033,-0.3829,-0.37546,-0.36803,-0.36059,-0.35316,-0.34572, -0.33829,-0.33086,-0.32342,-0.31599,-0.30855,-0.30112,-0.29368,-0.28625, -0.27881,-0.27138,-0.26394,-0.25651,-0.24907,-0.24164,-0.2342,-0.22677, -0.21933,-0.2119,-0.20446,-0.19703,-0.18959,-0.18216,-0.17472,-0.16729, -0.15985,-0.15242,-0.14498,-0.13755,-0.13011,-0.12268,-0.11524,-0.10781, -0.10037,-0.09294,-0.0855,-0.07807,-0.07063,-0.0632,-0.05576,-0.04833, -0.04089,-0.03346,-0.02602,-0.01859,-0.01115,-0.00372,0.00372,0.01115, 0.01859,0.02602,0.03346,0.04089,0.04833,0.05576,0.0632,0.07063,0.07807, 0.0855,0.09294,0.10037,0.10781,0.11524,0.12268,0.13011,0.13755,0.14498, 0.15242,0.15985,0.16729,0.17472,0.18216,0.18959,0.19703,0.20446,0.2119, 0.21933,0.22677,0.2342,0.24164,0.24907,0.25651,0.26394,0.27138,0.27881, 0.28625,0.29368,0.30112,0.30855,0.31599,0.32342,0.33086,0.33829,0.34572, 0.35316,0.36059,0.36803,0.37546,0.3829,0.39033,0.39777,0.4052,0.41264, 0.42007,0.42751,0.43494,0.44238,0.44981,0.45725,0.46468,0.47212,0.47955, 0.48699,0.49442,0.50186,0.50929,0.51673,0.52416,0.5316,0.53903,0.54647, 0.5539,0.56134,0.56877,0.57621,0.58364,0.59108,0.59851,0.60595,0.61338, 0.62082,0.62825,0.63569,0.64312,0.65056,0.65799,0.66543,0.67286,0.6803, 0.68773,0.69517,0.7026,0.71004,0.71747,0.72491,0.73234,0.73978,0.74721, 0.75465,0.76208,0.76952,0.77695,0.78439,0.79182,0.79926,0.80669,0.81413, 0.82156,0.829,0.83643,0.84387,0.8513,0.85874,0.86617,0.87361,0.88104, 0.88848,0.89591,0.90335,0.91078,0.91822,0.92565,0.93309,0.94052,0.94796, 0.95539,0.96283,0.97026,0.9777,0.98513,0.99257,1) a2 <- c(1,0.97778,0.95572,0.93383,0.91211,0.89055,0.86916,0.84793,0.82687,0.80597, 0.78524,0.76468,0.74428,0.72405,0.70398,0.68408,0.66435,0.64478,0.62538, 0.60614,0.58707,0.56817,0.54943,0.53085,0.51244,0.4942,0.47613,0.45822, 0.44047,0.42289,0.40548,0.38823,0.37115,0.35424,0.33749,0.3209,0.30449, 0.28824,0.27215,0.25623,0.24047,0.22489,0.20946,0.19421,0.17912,0.16419, 0.14943,0.13484,0.12041,0.10615,0.09205,0.07812,0.06436,0.05076,0.03733, 0.02406,0.01096,-0.00198,-0.01475,-0.02735,-0.03979,-0.05206,-0.06416, -0.0761,-0.08788,-0.09949,-0.11093,-0.12221,-0.13332,-0.14426,-0.15504, -0.16566,-0.1761,-0.18638,-0.1965,-0.20645,-0.21624,-0.22585,-0.23531, -0.24459,-0.25371,-0.26267,-0.27146,-0.28008,-0.28854,-0.29683,-0.30496, -0.31292,-0.32071,-0.32834,-0.3358,-0.3431,-0.35023,-0.3572,-0.36399, -0.37063,-0.3771,-0.3834,-0.38953,-0.3955,-0.40131,-0.40695,-0.41242, -0.41773,-0.42287,-0.42784,-0.43265,-0.43729,-0.44177,-0.44608,-0.45023, -0.45421,-0.45802,-0.46167,-0.46515,-0.46847,-0.47162,-0.47461,-0.47743, -0.48008,-0.48257,-0.48489,-0.48704,-0.48903,-0.49086,-0.49252,-0.49401, -0.49534,-0.4965,-0.49749,-0.49832,-0.49898,-0.49948,-0.49981,-0.49998, -0.49998,-0.49981,-0.49948,-0.49898,-0.49832,-0.49749,-0.4965,-0.49534, -0.49401,-0.49252,-0.49086,-0.48903,-0.48704,-0.48489,-0.48257,-0.48008, -0.47743,-0.47461,-0.47162,-0.46847,-0.46515,-0.46167,-0.45802,-0.45421, -0.45023,-0.44608,-0.44177,-0.43729,-0.43265,-0.42784,-0.42287,-0.41773, -0.41242,-0.40695,-0.40131,-0.3955,-0.38953,-0.3834,-0.3771,-0.37063, -0.36399,-0.3572,-0.35023,-0.3431,-0.3358,-0.32834,-0.32071,-0.31292, -0.30496,-0.29683,-0.28854,-0.28008,-0.27146,-0.26267,-0.25371,-0.24459, -0.23531,-0.22585,-0.21624,-0.20645,-0.1965,-0.18638,-0.1761,-0.16566, -0.15504,-0.14426,-0.13332,-0.12221,-0.11093,-0.09949,-0.08788,-0.0761, -0.06416,-0.05206,-0.03979,-0.02735,-0.01475,-0.00198,0.01096,0.02406, 0.03733,0.05076,0.06436,0.07812,0.09205,0.10615,0.12041,0.13484,0.14943, 0.16419,0.17912,0.19421,0.20946,0.22489,0.24047,0.25623,0.27215,0.28824, 0.30449,0.3209,0.33749,0.35424,0.37115,0.38823,0.40548,0.42289,0.44047, 0.45822,0.47613,0.4942,0.51244,0.53085,0.54943,0.56817,0.58707,0.60614, 0.62538,0.64478,0.66435,0.68408,0.70398,0.72405,0.74428,0.76468,0.78524, 0.80597,0.82687,0.84793,0.86916,0.89055,0.91211,0.93383,0.95572,0.97778, 1) a3 <- c(-1,-0.9558,-0.91243,-0.86987,-0.82813,-0.78719,-0.74705,-0.70769,-0.66913, -0.63135,-0.59433,-0.55809,-0.52261,-0.48788,-0.4539,-0.42067,-0.38817, -0.3564,-0.32536,-0.29504,-0.26542,-0.23651,-0.20831,-0.18079,-0.15397, -0.12782,-0.10235,-0.07755,-0.05341,-0.02993,-0.0071,0.01509,0.03664, 0.05756,0.07785,0.09752,0.11658,0.13503,0.15288,0.17014,0.1868,0.20289, 0.2184,0.23333,0.24771,0.26152,0.27479,0.28751,0.29968,0.31133,0.32245, 0.33305,0.34313,0.3527,0.36177,0.37035,0.37844,0.38604,0.39316,0.39981, 0.406,0.41173,0.417,0.42183,0.42621,0.43017,0.43369,0.43679,0.43947, 0.44175,0.44362,0.44509,0.44618,0.44687,0.44719,0.44714,0.44671,0.44593, 0.44479,0.44331,0.44148,0.43932,0.43682,0.43401,0.43087,0.42742,0.42367, 0.41962,0.41528,0.41065,0.40574,0.40056,0.3951,0.38939,0.38342,0.3772, 0.37073,0.36403,0.3571,0.34994,0.34257,0.33498,0.32718,0.31919,0.311, 0.30262,0.29406,0.28532,0.27642,0.26735,0.25812,0.24875,0.23923,0.22957, 0.21977,0.20985,0.19981,0.18966,0.1794,0.16904,0.15858,0.14803,0.1374, 0.12669,0.11591,0.10507,0.09416,0.08321,0.07221,0.06117,0.05009,0.03899, 0.02786,0.01673,0.00558,-0.00558,-0.01673,-0.02786,-0.03899,-0.05009, -0.06117,-0.07221,-0.08321,-0.09416,-0.10507,-0.11591,-0.12669,-0.1374, -0.14803,-0.15858,-0.16904,-0.1794,-0.18966,-0.19981,-0.20985,-0.21977, -0.22957,-0.23923,-0.24875,-0.25812,-0.26735,-0.27642,-0.28532,-0.29406, -0.30262,-0.311,-0.31919,-0.32718,-0.33498,-0.34257,-0.34994,-0.3571, -0.36403,-0.37073,-0.3772,-0.38342,-0.38939,-0.3951,-0.40056,-0.40574, -0.41065,-0.41528,-0.41962,-0.42367,-0.42742,-0.43087,-0.43401,-0.43682, -0.43932,-0.44148,-0.44331,-0.44479,-0.44593,-0.44671,-0.44714,-0.44719, -0.44687,-0.44618,-0.44509,-0.44362,-0.44175,-0.43947,-0.43679,-0.43369, -0.43017,-0.42621,-0.42183,-0.417,-0.41173,-0.406,-0.39981,-0.39316, -0.38604,-0.37844,-0.37035,-0.36177,-0.3527,-0.34313,-0.33305,-0.32245, -0.31133,-0.29968,-0.28751,-0.27479,-0.26152,-0.24771,-0.23333,-0.2184, -0.20289,-0.1868,-0.17014,-0.15288,-0.13503,-0.11658,-0.09752,-0.07785, -0.05756,-0.03664,-0.01509,0.0071,0.02993,0.05341,0.07755,0.10235,0.12782, 0.15397,0.18079,0.20831,0.23651,0.26542,0.29504,0.32536,0.3564,0.38817, 0.42067,0.4539,0.48788,0.52261,0.55809,0.59433,0.63135,0.66913,0.70769, 0.74705,0.78719,0.82813,0.86987,0.91243,0.9558,1) a4 <- c(1,0.92689,0.85622,0.78795,0.72205,0.65846,0.59714,0.53806,0.48118,0.42644, 0.37382,0.32327,0.27476,0.22823,0.18366,0.14101,0.10023,0.06129,0.02415, -0.01123,-0.04488,-0.07685,-0.10716,-0.13585,-0.16297,-0.18854,-0.2126, -0.23519,-0.25634,-0.27609,-0.29446,-0.3115,-0.32723,-0.34169,-0.35491, -0.36693,-0.37777,-0.38748,-0.39607,-0.40358,-0.41004,-0.41549,-0.41995, -0.42344,-0.42602,-0.42769,-0.42849,-0.42845,-0.42759,-0.42595,-0.42355, -0.42042,-0.41659,-0.41208,-0.40692,-0.40113,-0.39474,-0.38778,-0.38027, -0.37224,-0.36371,-0.3547,-0.34524,-0.33535,-0.32505,-0.31437,-0.30333, -0.29196,-0.28026,-0.26828,-0.25601,-0.2435,-0.23075,-0.21779,-0.20464, -0.19132,-0.17784,-0.16423,-0.1505,-0.13668,-0.12277,-0.1088,-0.09479, -0.08075,-0.0667,-0.05266,-0.03864,-0.02465,-0.01072,0.00315,0.01693, 0.03062,0.04419,0.05764,0.07095,0.08411,0.09711,0.10993,0.12256,0.13499, 0.14721,0.1592,0.17096,0.18247,0.19373,0.20473,0.21545,0.22588,0.23602, 0.24586,0.25539,0.2646,0.27348,0.28203,0.29025,0.29811,0.30562,0.31277, 0.31956,0.32597,0.33201,0.33766,0.34294,0.34782,0.35231,0.3564,0.36009, 0.36338,0.36627,0.36874,0.37081,0.37246,0.3737,0.37453,0.37495,0.37495, 0.37453,0.3737,0.37246,0.37081,0.36874,0.36627,0.36338,0.36009,0.3564, 0.35231,0.34782,0.34294,0.33766,0.33201,0.32597,0.31956,0.31277,0.30562, 0.29811,0.29025,0.28203,0.27348,0.2646,0.25539,0.24586,0.23602,0.22588, 0.21545,0.20473,0.19373,0.18247,0.17096,0.1592,0.14721,0.13499,0.12256, 0.10993,0.09711,0.08411,0.07095,0.05764,0.04419,0.03062,0.01693,0.00315, -0.01072,-0.02465,-0.03864,-0.05266,-0.0667,-0.08075,-0.09479,-0.1088, -0.12277,-0.13668,-0.1505,-0.16423,-0.17784,-0.19132,-0.20464,-0.21779, -0.23075,-0.2435,-0.25601,-0.26828,-0.28026,-0.29196,-0.30333,-0.31437, -0.32505,-0.33535,-0.34524,-0.3547,-0.36371,-0.37224,-0.38027,-0.38778, -0.39474,-0.40113,-0.40692,-0.41208,-0.41659,-0.42042,-0.42355,-0.42595, -0.42759,-0.42845,-0.42849,-0.42769,-0.42602,-0.42344,-0.41995,-0.41549, -0.41004,-0.40358,-0.39607,-0.38748,-0.37777,-0.36693,-0.35491,-0.34169, -0.32723,-0.3115,-0.29446,-0.27609,-0.25634,-0.23519,-0.2126,-0.18854, -0.16297,-0.13585,-0.10716,-0.07685,-0.04488,-0.01123,0.02415,0.06129, 0.10023,0.14101,0.18366,0.22823,0.27476,0.32327,0.37382,0.42644,0.48118, 0.53806,0.59714,0.65846,0.72205,0.78795,0.85622,0.92689,1) a5 <- c(-1,-0.89135,-0.78833,-0.69078,-0.59853,-0.51141,-0.42927,-0.35195,-0.2793, -0.21116,-0.14739,-0.08783,-0.03235,0.01919,0.06694,0.11103,0.15159, 0.18875,0.22264,0.25339,0.28111,0.30594,0.32798,0.34736,0.36418,0.37855, 0.39059,0.4004,0.40809,0.41375,0.41748,0.41939,0.41956,0.41809,0.41507, 0.41059,0.40472,0.39757,0.3892,0.37969,0.36913,0.35759,0.34514,0.33186, 0.3178,0.30305,0.28766,0.27171,0.25524,0.23833,0.22102,0.20337,0.18545, 0.16729,0.14896,0.1305,0.11195,0.09337,0.07479,0.05627,0.03783,0.01952, 0.00137,-0.01658,-0.03429,-0.05173,-0.06888,-0.08569,-0.10216,-0.11823, -0.13391,-0.14915,-0.16393,-0.17824,-0.19206,-0.20537,-0.21814,-0.23037, -0.24204,-0.25313,-0.26364,-0.27355,-0.28286,-0.29155,-0.29961,-0.30705, -0.31386,-0.32003,-0.32555,-0.33044,-0.33467,-0.33827,-0.34122,-0.34352, -0.34519,-0.34622,-0.34662,-0.3464,-0.34555,-0.34409,-0.34202,-0.33936, -0.3361,-0.33227,-0.32787,-0.32292,-0.31742,-0.31139,-0.30484,-0.29778, -0.29024,-0.28221,-0.27373,-0.2648,-0.25545,-0.24568,-0.23552,-0.22498, -0.21408,-0.20285,-0.19129,-0.17943,-0.16729,-0.15488,-0.14224,-0.12937, -0.11629,-0.10304,-0.08963,-0.07608,-0.0624,-0.04864,-0.0348,-0.0209, -0.00697,0.00697,0.0209,0.0348,0.04864,0.0624,0.07608,0.08963,0.10304, 0.11629,0.12937,0.14224,0.15488,0.16729,0.17943,0.19129,0.20285,0.21408, 0.22498,0.23552,0.24568,0.25545,0.2648,0.27373,0.28221,0.29024,0.29778, 0.30484,0.31139,0.31742,0.32292,0.32787,0.33227,0.3361,0.33936,0.34202, 0.34409,0.34555,0.3464,0.34662,0.34622,0.34519,0.34352,0.34122,0.33827, 0.33467,0.33044,0.32555,0.32003,0.31386,0.30705,0.29961,0.29155,0.28286, 0.27355,0.26364,0.25313,0.24204,0.23037,0.21814,0.20537,0.19206,0.17824, 0.16393,0.14915,0.13391,0.11823,0.10216,0.08569,0.06888,0.05173,0.03429, 0.01658,-0.00137,-0.01952,-0.03783,-0.05627,-0.07479,-0.09337,-0.11195, -0.1305,-0.14896,-0.16729,-0.18545,-0.20337,-0.22102,-0.23833,-0.25524, -0.27171,-0.28766,-0.30305,-0.3178,-0.33186,-0.34514,-0.35759,-0.36913, -0.37969,-0.3892,-0.39757,-0.40472,-0.41059,-0.41507,-0.41809,-0.41956, -0.41939,-0.41748,-0.41375,-0.40809,-0.4004,-0.39059,-0.37855,-0.36418, -0.34736,-0.32798,-0.30594,-0.28111,-0.25339,-0.22264,-0.18875,-0.15159, -0.11103,-0.06694,-0.01919,0.03235,0.08783,0.14739,0.21116,0.2793,0.35195, 0.42927,0.51141,0.59853,0.69078,0.78833,0.89135,1) } # Create list of cows: cows <- unique(dat[,1]) rval <- as.data.frame(matrix(rep(0, times = 9), nrow = 1)) names(rval) <- c("ckey", "lact", "llen", "vltd", "fltd", "pltd", "vstd", "fstd", "pstd") # Convert fat and protein yields to kilograms (1 litre milk = 0.970264 kg): dat$fat <- dat$fat * (dat$vol * 0.970264) dat$pro <- dat$pro * (dat$vol * 0.970264) # Loop through each cows's records: for(i in 1:length(cows)){ # Select herd test records for this cow: id <- dat[,1] == cows[i] dat.tmp <- dat[id, 1:8] # Take each lactation in turn: lacts <- unique(dat.tmp[,3]) for(j in 1:length(lacts)){ id <- dat.tmp[,3] == lacts[j] dat.ump <- dat.tmp[id,1:8] # How many herd tests were there? ntest <- length(dat.ump[,4]) # If there less than 4 herd test events, don't calculate anything: if (ntest < 4){ vstd <- 0; fstd <- 0; pstd <- 0 vltd <- 0; fltd <- 0; pltd <- 0 } if (ntest >= 4){ # Extract appropriate values of x on the basis of herd test days in milk: x0 <- a0[dat.ump[,4]] x1 <- a1[dat.ump[,4]] x2 <- a2[dat.ump[,4]] x3 <- a3[dat.ump[,4]] xmat <- cbind(x0, x1, x2, x3) txmat <- t(xmat) Xx <- txmat %*% xmat Xy.vol <- txmat %*% dat.ump[,6] Xy.fat <- txmat %*% dat.ump[,7] Xy.pro <- txmat %*% dat.ump[,8] # Regression coefficients: a.vol <- solve(Xx, Xy.vol) a.fat <- solve(Xx, Xy.fat) a.pro <- solve(Xx, Xy.pro) # Variable "flag" equals TRUE if there is no dry off date and FALSE otherwise: flag <- is.na(unique(dat.ump[,5])) # Lactation length and days in milk at last herd test: llen <- unique(dat.ump[,5]) last.ht <- max(dat.ump[,4]) # If dry (i.e. flag == FALSE) and llen <= std, calculate yield to dry off date: if (flag == FALSE & llen <= std){ vstd <- sum((a0[1:std] * a.vol[1]) + (a1[1:std] * a.vol[2]) + (a2[1:std] * a.vol[3]) + (a4[1:std] * a.vol[3])) fstd <- sum((a0[1:std] * a.fat[1]) + (a1[1:std] * a.fat[2]) + (a2[1:std] * a.fat[3]) + (a4[1:std] * a.fat[3])) pstd <- sum((a0[1:std] * a.pro[1]) + (a1[1:std] * a.pro[2]) + (a2[1:std] * a.pro[3]) + (a4[1:std] * a.pro[3])) vltd <- sum((a0[1:llen] * a.vol[1]) + (a1[1:llen] * a.vol[2]) + (a2[1:llen] * a.vol[3]) + (a4[1:llen] * a.vol[3])) fltd <- sum((a0[1:llen] * a.fat[1]) + (a1[1:llen] * a.fat[2]) + (a2[1:llen] * a.fat[3]) + (a4[1:llen] * a.fat[3])) pltd <- sum((a0[1:llen] * a.pro[1]) + (a1[1:llen] * a.pro[2]) + (a2[1:llen] * a.pro[3]) + (a4[1:llen] * a.pro[3])) } # If dry (i.e. flag == FALSE) and llen > std, calculate yield to dry off date: if (flag == FALSE & llen > std){ vstd <- sum((a0[1:std] * a.vol[1]) + (a1[1:std] * a.vol[2]) + (a2[1:std] * a.vol[3]) + (a4[1:std] * a.vol[3])) fstd <- sum((a0[1:std] * a.fat[1]) + (a1[1:std] * a.fat[2]) + (a2[1:std] * a.fat[3]) + (a4[1:std] * a.fat[3])) pstd <- sum((a0[1:std] * a.pro[1]) + (a1[1:std] * a.pro[2]) + (a2[1:std] * a.pro[3]) + (a4[1:std] * a.pro[3])) d.xs <- llen - std v.xs <- (sum((a0[std]*a.vol[1])+(a1[std]*a.vol[2]) + (a2[std]*a.vol[3])+(a4[std]*a.vol[3]))) + (dat.ump[,6][dat.ump[,4]==last.ht]/2) f.xs <- (sum((a0[std]*a.fat[1])+(a1[std]*a.fat[2]) + (a2[std]*a.fat[3])+(a4[std]*a.fat[3]))) + (dat.ump[,7][dat.ump[,4]==last.ht]/2) p.xs <- (sum((a0[std]*a.pro[1])+(a1[std]*a.pro[2]) + (a2[std]*a.pro[3])+(a4[std]*a.pro[3]))) + (dat.ump[,8][dat.ump[,4]==last.ht]/2) vltd <- vstd + (v.xs * d.xs) fltd <- fstd + (f.xs * d.xs) pltd <- pstd + (p.xs * d.xs) } # If lactating (i.e. flag == TRUE) and dim at last herd test <= std, calculate yield to last herd test: if (flag == TRUE & last.ht <= std){ vstd <- sum((a0[1:std] * a.vol[1]) + (a1[1:std] * a.vol[2]) + (a2[1:std] * a.vol[3]) + (a4[1:std] * a.vol[3])) fstd <- sum((a0[1:std] * a.fat[1]) + (a1[1:std] * a.fat[2]) + (a2[1:std] * a.fat[3]) + (a4[1:std] * a.fat[3])) pstd <- sum((a0[1:std] * a.pro[1]) + (a1[1:std] * a.pro[2]) + (a2[1:std] * a.pro[3]) + (a4[1:std] * a.pro[3])) vltd <- sum((a0[1:last.ht] * a.vol[1]) + (a1[1:last.ht] * a.vol[2]) + (a2[1:last.ht] * a.vol[3]) + (a4[1:last.ht] * a.vol[3])) fltd <- sum((a0[1:last.ht] * a.fat[1]) + (a1[1:last.ht] * a.fat[2]) + (a2[1:last.ht] * a.fat[3]) + (a4[1:last.ht] * a.fat[3])) pltd <- sum((a0[1:last.ht] * a.pro[1]) + (a1[1:last.ht] * a.pro[2]) + (a2[1:last.ht] * a.pro[3]) + (a4[1:last.ht] * a.pro[3])) } # If lactating (i.e. flag == TRUE) and dim at last herd test > std, calculate yield to last herd test: if (flag == TRUE & last.ht > std){ vstd <- sum((a0[1:std] * a.vol[1]) + (a1[1:std] * a.vol[2]) + (a2[1:std] * a.vol[3]) + (a4[1:std] * a.vol[3])) fstd <- sum((a0[1:std] * a.fat[1]) + (a1[1:std] * a.fat[2]) + (a2[1:std] * a.fat[3]) + (a4[1:std] * a.fat[3])) pstd <- sum((a0[1:std] * a.pro[1]) + (a1[1:std] * a.pro[2]) + (a2[1:std] * a.pro[3]) + (a4[1:std] * a.pro[3])) d.xs <- last.ht - std v.xs <- (sum((a0[std]*a.vol[1])+(a1[std]*a.vol[2]) + (a2[std]*a.vol[3])+(a4[std]*a.vol[3]))) + (dat.ump[,6][dat.ump[,4]==last.ht]/2) f.xs <- (sum((a0[std]*a.fat[1])+(a1[std]*a.fat[2]) + (a2[std]*a.fat[3])+(a4[std]*a.fat[3]))) + (dat.ump[,7][dat.ump[,4]==last.ht]/2) p.xs <- (sum((a0[std]*a.pro[1])+(a1[std]*a.pro[2]) + (a2[std]*a.pro[3])+(a4[std]*a.pro[3]))) + (dat.ump[,8][dat.ump[,4]==last.ht]/2) vltd <- vstd + (v.xs * d.xs) fltd <- fstd + (f.xs * d.xs) pltd <- pstd + (p.xs * d.xs) } } ckey <- unique(dat.ump[,1]) lact <- unique(dat.ump[,3]) rval.tmp <- round(as.data.frame(cbind(ckey, lact, llen, vltd, fltd, pltd, vstd, fstd, pstd)), digits = 0) rval <- rbind(rval, rval.tmp) } } rval <- as.data.frame(rval[-1,], row.names = NULL) print(rval) }epiR/R/epi.nomogram.R0000644000176200001440000000272012601641614014072 0ustar liggesusersepi.nomogram <- function(se, sp, lr, pre.pos, verbose = FALSE){ # If likelihood ratios are known: if(is.na(se) & is.na(sp) & !is.na(lr[1])& !is.na(lr[2])){ lr.pos <- lr[1] lr.neg <- lr[2] } # If likelihood ratios are not known: if(!is.na(se) & !is.na(sp) & is.na(lr[1]) & is.na(lr[2])){ # se <- ifelse(se == 1.0, 1 - 1E-04, se) # sp <- ifelse(sp == 1.0, 1 - 1E-04, sp) lr.pos <- se / (1 - sp) lr.neg <- (1 - se) / sp } pre.odds <- pre.pos / (1 - pre.pos) post.odds.pos <- pre.odds * lr.pos post.odds.neg <- pre.odds * lr.neg post.prob.pos <- post.odds.pos / (1 + post.odds.pos) post.prob.neg <- post.odds.neg / (1 + post.odds.neg) lr <- as.data.frame(cbind(pos = lr.pos, neg = lr.neg)) prob <- as.data.frame(cbind(pre.pos = pre.pos, post.pos = post.prob.pos, post.neg = post.prob.neg)) rval <- list(lr = lr, prob = prob) if(verbose == TRUE){ return(rval) } if(verbose == FALSE){ post.prob.pos <- ifelse(post.prob.pos < 0.01, round(post.prob.pos, digits = 4), round(post.prob.pos, digits = 2)) post.prob.neg <- ifelse(post.prob.neg < 0.01, round(post.prob.neg, digits = 4), round(post.prob.neg, digits = 2)) cat("The post-test probability of being disease positive is", post.prob.pos, "\n") cat("The post-test probability of being disease negative is", post.prob.neg, "\n") } }epiR/R/epi.smd.R0000644000176200001440000000526112601641614013041 0ustar liggesusers"epi.smd" <- function(mean.trt, sd.trt, n.trt, mean.ctrl, sd.ctrl, n.ctrl, names, method = "cohens", conf.level = 0.95) { # Declarations: N <- 1 - ((1 - conf.level) / 2) z <- qnorm(N, mean = 0, sd = 1) k <- length(names) N.i <- n.trt + n.ctrl # Pooled standard deviation of the two groups: s.i <- sqrt((((n.trt - 1) * sd.trt^2) + ((n.ctrl - 1) * sd.ctrl^2)) / (N.i - 2)) if(method == "cohens") { # Standardised mean difference method using Cohen's d: MD.i <- (mean.trt - mean.ctrl) / s.i SE.MD.i <- sqrt((N.i / (n.trt * n.ctrl)) + (MD.i^2 / (2 * (N.i - 2)))) lower.MD.i <- MD.i - (z * SE.MD.i) upper.MD.i <- MD.i + (z * SE.MD.i) } if(method == "hedges") { # Standardised mean difference method using Hedge's adjusted g: MD.i <- ((mean.trt - mean.ctrl) / s.i) * (1 - (3/ ((4 * N.i) - 9))) SE.MD.i <- sqrt((N.i / ((n.trt * n.ctrl)) + (MD.i^2 / (2 * (N.i - 3.94))))) lower.MD.i <- MD.i - (z * SE.MD.i) upper.MD.i <- MD.i + (z * SE.MD.i) } else if(method == "glass") { # Standardised mean difference method using Glass's delta: MD.i <- (mean.trt - mean.ctrl) / sd.ctrl SE.MD.i <- sqrt((N.i / ((n.trt * n.ctrl)) + (MD.i^2 / (2 * (n.ctrl - 1))))) lower.MD.i <- MD.i - (z * SE.MD.i) upper.MD.i <- MD.i + (z * SE.MD.i) } # IV pooled standardised mean difference: w.i <- 1 / (SE.MD.i)^2 MD.iv <- sum(w.i * MD.i) / sum(w.i) SE.MD.iv <- 1/sqrt((sum(w.i))) lower.MD.iv <- MD.iv - (z * SE.MD.iv) upper.MD.iv <- MD.iv + (z * SE.MD.iv) # Heterogeneity statistic: Q <- sum(w.i * (MD.i - MD.iv)^2) df <- k - 1 p.heterogeneity <- 1 - pchisq(Q, df) tau.sq <- (Q - (k - 1)) / (sum(w.i) - (sum((w.i)^2) / sum(w.i))) # If Q is less than (k - 1) tau.sq equals zero: tau.sq <- ifelse(Q < (k - 1), 0, tau.sq) w.dsl.i <- 1 / (SE.MD.i^2 + tau.sq) MD.dsl <- sum(w.dsl.i * MD.i) / sum(w.dsl.i) SE.MD.dsl <- 1 / sqrt(sum(w.dsl.i)) lower.MD.dsl <- MD.dsl - (z * SE.MD.dsl) upper.MD.dsl <- MD.dsl + (z * SE.MD.dsl) # Results: md <- as.data.frame(cbind(MD.i, SE.MD.i, lower.MD.i, upper.MD.i)) names(md) <- c("est", "se", "lower", "upper") md.invar <- as.data.frame(cbind(MD.iv, SE.MD.iv, lower.MD.iv, upper.MD.iv)) names(md.invar) <- c("est", "se", "lower", "upper") md.dsl <- as.data.frame(cbind(MD.dsl, lower.MD.dsl, upper.MD.dsl)) names(md.dsl) <- c("est", "lower", "upper") heterogeneity = c(Q = Q, df = df, p.value = p.heterogeneity) rval <- list(md = md, md.invar = md.invar, md.dsl = md.dsl, heterogeneity = heterogeneity) return(rval) } epiR/R/epi.ccc.r0000644000176200001440000000455712601641614013055 0ustar liggesusersepi.ccc = function(x, y, ci = "z-transform", conf.level = 0.95){ dat <- data.frame(x, y) id <- complete.cases(dat) nmissing <- sum(!complete.cases(dat)) dat <- dat[id,] N. <- 1 - ((1 - conf.level) / 2) zv <- qnorm(N., mean = 0, sd = 1) lower <- "lower" upper <- "upper" k <- length(dat$y) yb <- mean(dat$y) sy2 <- var(dat$y) * (k - 1) / k sd1 <- sd(dat$y) xb <- mean(dat$x) sx2 <- var(dat$x) * (k - 1) / k sd2 <- sd(dat$x) r <- cor(dat$x, dat$y) sl <- r * sd1 / sd2 sxy <- r * sqrt(sx2 * sy2) p <- 2 * sxy / (sx2 + sy2 + (yb - xb)^2) delta <- (dat$x - dat$y) rmean <- apply(dat, MARGIN = 1, FUN = mean) blalt <- data.frame(mean = rmean, delta) # Scale shift: v <- sd1 / sd2 # Location shift relative to the scale: u <- (yb - xb) / ((sx2 * sy2)^0.25) # Variable C.b is a bias correction factor that measures how far the best-fit line deviates from a line at 45 degrees (a measure of accuracy). No deviation from the 45 degree line occurs when C.b = 1. See Lin (1989 page 258). # C.b <- (((v + 1) / (v + u^2)) / 2)^-1 # The following taken from the Stata code for function "concord" (changed 290408): C.b <- p / r # Variance, test, and CI for asymptotic normal approximation (per Lin (March 2000) Biometrics 56:325-5): sep = sqrt(((1 - ((r)^2)) * (p)^2 * (1 - ((p)^2)) / (r)^2 + (2 * (p)^3 * (1 - p) * (u)^2 / r) - 0.5 * (p)^4 * (u)^4 / (r)^2 ) / (k - 2)) ll = p - zv * sep ul = p + zv * sep # Statistic, variance, test, and CI for inverse hyperbolic tangent transform to improve asymptotic normality: t <- log((1 + p) / (1 - p)) / 2 set = sep / (1 - ((p)^2)) llt = t - zv * set ult = t + zv * set llt = (exp(2 * llt) - 1) / (exp(2 * llt) + 1) ult = (exp(2 * ult) - 1) / (exp(2 * ult) + 1) if(ci == "asymptotic"){ rho.c <- as.data.frame(cbind(p, ll, ul)) names(rho.c) <- c("est", lower, upper) rval <- list(rho.c = rho.c, s.shift = v, l.shift = u, C.b = C.b, blalt = blalt, nmissing = nmissing) } else if(ci == "z-transform"){ rho.c <- as.data.frame(cbind(p, llt, ult)) names(rho.c) <- c("est", lower, upper) rval <- list(rho.c = rho.c, s.shift = v, l.shift = u, C.b = C.b, blalt = blalt, nmissing = nmissing) } return(rval) } epiR/R/epi.about.R0000644000176200001440000000072412601641614013367 0ustar liggesusers"epi.about" <- function() { cat("\n") cat("-----------------------------------------------------------\n") ver <- packageDescription("epiR", lib.loc = NULL, fields = "Version") cat(paste("epiR version", ver)) cat("\n") cat("Tools for the Analysis of Epidemiological Data") cat("\n") cat("See http://fvas.unimelb.edu.au/veam for details.") cat("\n") cat("-----------------------------------------------------------\n") invisible() } epiR/R/epi.dms.R0000644000176200001440000000251412601641614013037 0ustar liggesusers"epi.dms" <- function(dat){ # If matrix is comprised of one column, assume conversion FROM decimal degrees TO degree, minutes, seconds: if(dim(dat)[2] == 1){ dat. <- abs(dat) deg <- floor(dat.) ms <- (dat. - deg) * 60 min <- floor(ms) sec <- (ms - min) * 60 rval <- as.matrix(cbind(deg, min, sec), dimnames = NULL) id <- dat[,1] < 0 id <- ifelse(id == TRUE, -1, 1) rval[,1] <- rval[,1] * id # names(rval) <- c("deg", "min", "sec") } # If matrix is comprised of two columns, assume conversion is FROM degrees and decimal minutes TO decimal degrees: else if(dim(dat)[2] == 2){ deg <- abs(dat[,1]) min <- dat[,2] / 60 rval <- as.matrix(deg + min, dimnames = NULL) id <- dat[,1] < 0 id <- ifelse(id == TRUE, -1, 1) rval <- rval * id # names(rval) <- "ddeg" } # If matrix is comprised of three columns, assume conversion FROM degrees, minutes, seconds TO decimal degrees: else if(dim(dat)[2] == 3){ deg <- abs(dat[,1]) min <- dat[,2] / 60 sec <- dat[,3] / (60 * 60) rval <- as.matrix(deg + min + sec, dimnames = NULL) id <- dat[,1] < 0 id <- ifelse(id == TRUE, -1, 1) rval <- rval * id # names(rval) <- "ddeg" } return(rval) }epiR/R/epi.betabuster.r0000644000176200001440000000377412601641614014465 0ustar liggesusersepi.betabuster <- function(mode, conf, greaterthan, x, conf.level = 0.95, max.shape1 = 100, step = 0.001){ shape1 <- seq(from = 1, to = max.shape1, by = step) shape2 <- 2 - shape1 + (shape1 - 1) / mode p.vec <- pbeta(q = x, shape1 = shape1, shape2 = shape2) # What value of a has the lowest (abs(p.vec-(1 - q)))? if(greaterthan){ index <- which((abs(p.vec - (1 - conf))) == min(abs(p.vec - (1 - conf)))) } else{ index <- which((abs(p.vec - conf)) == min(abs(p.vec - conf))) } shape1 <- shape1[index] shape2 <- shape2[index] # In general, if an experiment resulted in 's' successes (e.g. no. test-positive animals) # recorded in 'n' trials (e.g. number of truly infected animals), # use of a beta (a, b) distribution with a = s+1 and b = n-s+1 is an appropriate choice to model the uncertainty in that parameter. s <- shape1 - 1 n <- shape1 + shape2 - 2 .mode <- (shape1 - 1) / (shape1 + shape2 - 2) .mean <- shape1 / (shape1 + shape2) .var <- shape1 * shape2 / (((shape1 + shape2)^2) * (shape1 + shape2 + 1)) .median <- qbeta(p = 0.5, shape1 = shape1, shape2 = shape2) lower <- qbeta(p = (1 - conf.level) / 2, shape1 = shape1, shape2 = shape2) upper <- qbeta(p = 1 - ((1 - conf.level) / 2), shape1 = shape1, shape2 = shape2) # dens <- dbeta(x = seq(from = 0, to = 1,by = 0.001), shape1 = a, shape2 = b) # beta.plot <- plot(x = seq(from = 0, to = 1, by = 0.001), y = dens, type = 'l', xlab = "Proportion", ylab = "Density") rval <- list(shape1 = shape1, shape2 = shape2, mode = .mode, mean = .mean, median = .median, lower = lower, upper = upper, variance = .var) rval # Example: # fred <- epi.betabuster(mode = 0.25, conf.level = 0.95, greaterthan = FALSE, x = 0.30, max.a = 100, step = 0.001); fred$a; fred$b; # plot(seq(from = 0, to = 1,by = 0.001), dbeta(x = seq(from = 0, to = 1,by = 0.001), shape1 = fred$a, shape2 = fred$b), type = 'l', xlab = "Proportion", ylab = "Density") } epiR/R/epi.prcc.R0000644000176200001440000000635712601641614013214 0ustar liggesusersepi.prcc <- function(dat, sided.test = 2){ # Calculate mu and number of parameters: N <- dim(dat)[1] K <- dim(dat)[2] - 1 # Return an error message if the number of parameters is greater than the number of model replications: if(K > N) stop("Error: the number of replications of the model must be greater than the number of model parameters") mu <- (1 + N) / 2 # Compute ranks: for(i in 1:(K + 1)){ dat[,i] <- rank(dat[,i]) } # Create a K + 1 by K + 1 matrix: C <- matrix(rep(0, times = (K + 1)^2), nrow = (K + 1)) # Correlations for each parameter pair: for(i in 1:(K + 1)){ for(j in 1:(K + 1)){ r.it <- dat[,i] r.jt <- dat[,j] r.js <- r.jt c.ij <- sum((r.it - mu) * (r.jt - mu)) / sqrt(sum((r.it - mu)^2) * sum((r.js - mu)^2)) C[i,j] <- c.ij } } # Fudge to deal with response variables that are all the same: if(is.na(C[K + 1,K + 1])) {gamma.iy <- rep(0, times = K) t.iy <- gamma.iy * sqrt((N - 2) / (1 - gamma.iy^2)) p <- rep(1, times = K) df <- rep((N - 2), times = K) # Results: rval <- as.data.frame(cbind(gamma = gamma.iy, test.statistic = t.iy, df = df, p.value = p)) return(rval) } else { # Matrix B is defined as the inverse of c.ij: B <- solve(C) # PRCC (gamma.iy) between the ith input parameter and the yth outcome is defined by Kendall and Stewart (1979) as follows: gamma.iy <- c() for(i in 1:K){ num <- -B[i,(K + 1)] den <- sqrt(B[i,i] * B[(K + 1),(K + 1)]) gamma.iy <- c(gamma.iy, num/den) } # Email Andrew Hill (mailto:fyu7@cdc.gov) 14 August 2009. # I think there may be an error in the epi.prcc function. Looking at the example in the package documentation, I note that the p-values all close to 0 yet if we switch the sign of y to force negative correlation with the x's we get p-values near 1. Backtracking, I think the problem is in the definition of the test statistic. There is a typo in the Blower-Dowlatabadi paper. I believe they misstate the test statistic at the end of the Appendix. It should be (dropping their subscripts): t = gamma * sqrt((N-2) / (1 - gamma^2)). # Equivalently, the square of the PRCC gamma^2 is asymptotically Beta(1/2,(N-2)/2). (ref. Muirhead's book 'Aspects of Multivariate Statistical Theory'). # Blower and Dowlatabadi appendix: # t.iy <- gamma.iy * sqrt((N - 2) / (1 - gamma.iy)) # Andrew Hill's correction: t.iy <- gamma.iy * sqrt((N - 2) / (1 - gamma.iy^2)) df <- rep((N - 2), times = K) # Blower and Dowlatabadi appendix: # p <- 1 - pt(q = t.iy, df = (N - 2)) if(sided.test == 2){ # Andrew Hill's correction: p <- 2 * pt(abs(t.iy), df = N - 2, lower.tail = FALSE) # p <- pbeta(t.iy^2, shape1 = 1/2, shape2 = (N - 2)/2, lower.tail = FALSE) } if(sided.test == 1){ # Andrew Hill's correction: p <- pt(abs(t.iy), df = N - 2,lower.tail = FALSE) } # Results: rval <- as.data.frame(cbind(gamma = gamma.iy, test.statistic = t.iy, df = df, p.value = p)) return(rval) } }epiR/R/epi.edr.R0000644000176200001440000000666212601641614013036 0ustar liggesusersepi.edr <- function(dat, n = 4, conf.level = 0.95, nsim = 99, na.zero = TRUE){ N. <- 1 - ((1 - conf.level) / 2) alpha <- 1 - conf.level z <- qnorm(N., mean = 0, sd = 1) num.sum <- 0; num.sd <- 0; num.n <- 0 den.sum <- 0; den.sd <- 0; den.n <- 0 start <- 2 * n for (i in start:length(dat)){ top.start <- (i - (n - 1)) top.finish <- i bot.start <- (i - (2 * n)) + 1 bot.finish <- i - n # Vector of outbreak counts for numerator and denominator: num.tmp <- dat[top.start:top.finish] den.tmp <- dat[bot.start:bot.finish] num.sum <- c(num.sum, sum(num.tmp)) num.sd <- c(num.sd, sd(num.tmp)) num.n <- c(num.n, length(num.tmp)) den.sum <- c(den.sum, sum(den.tmp)) den.sd <- c(den.sd, sd(den.tmp)) den.n <- c(den.n, length(den.tmp)) } # Remove the initiating zero and add a vector of zeroes to the start: num.sum <- c(rep(0, times = (start - 1)), num.sum[-1]) num.sd <- c(rep(0, times = (start - 1)), num.sd[-1]) num.n <- c(rep(0, times = (start - 1)), num.n[-1]) den.sum <- c(rep(0, times = (start - 1)), den.sum[-1]) den.sd <- c(rep(0, times = (start - 1)), den.sd[-1]) den.n <- c(rep(0, times = (start - 1)), den.n[-1]) # Work out the standard error of numerator and denominator: # SE_total = (n * SE_mean): # num.se <- num.n * (num.sd / sqrt(num.n)) # den.se <- den.n * (den.sd / sqrt(den.n)) num.mat <- matrix(rep(0, times = length(num.sum) * nsim), nrow = length(num.sum)) den.mat <- matrix(rep(0, times = length(num.sum) * nsim), nrow = length(num.sum)) for(i in 1:nsim){ num.mat[,i] <- rpois(n = length(num.sum), lambda = num.sum) den.mat[,i] <- rpois(n = length(den.sum), lambda = den.sum) } edr.p <- num.sum / den.sum edr.mat <- num.mat / den.mat edr.mat[is.na(edr.mat)] <- 0 quant <- function(x, probs) quantile(x, probs, na.rm = TRUE) edr.l <- apply(edr.mat, MARGIN = 1, FUN = quant, probs = alpha/2) edr.u <- apply(edr.mat, MARGIN = 1, FUN = quant, probs = 1 - alpha/2) # Work out EDR and confidence intervals of EDR: # Source: http://www.agron.missouri.edu/mnl/55/34kowles.html # edr.sed <- sqrt(num.se^2 + den.se^2) # edr.var <- (num.se^2 / den.sum^2) + (num.sum^2 / den.sum^4) * den.se^2 # Method 1 - use of extremes: # edr.l <- (num.sum - (z * num.se)) / (den.sum + (z * den.se)) # edr.u <- (num.sum + (z * num.se)) / (den.sum - (z * den.se)) # Method 2 - standard error of the difference between means: # edr.l <- 1 + ((num.sum - den.sum) - edr.sed) / (den.sum - (z * den.se)) # edr.u <- 1 + ((num.sum - den.sum) - edr.sed) / (den.sum + (z * den.se)) # Method 3 - approximate variance of the error of the ratios: # edr.l <- edr.p - (z * sqrt(edr.var)) # edr.l[edr.l < 0] <- 0 # edr.u <- edr.p + (z * sqrt(edr.var)) if(na.zero == FALSE) { rval <- as.data.frame(cbind(edr.p, edr.l, edr.u)) names(rval) <- c("est", "lower", "upper") } else if(na.zero == TRUE) { id <- is.na(edr.p) edr.p[id] <- 0 edr.l[id] <- 0 edr.u[id] <- 0 id <- is.infinite(edr.p) edr.p[id] <- 0 edr.l[id] <- 0 edr.u[id] <- 0 rval <- as.data.frame(cbind(edr.p, edr.l, edr.u)) names(rval) <- c("est", "lower", "upper") } rval } epiR/R/epi.studysize.R0000644000176200001440000002674412601641614014332 0ustar liggesusers"epi.studysize" <- function(treat, control, n, sigma, power, r = 1, design = 1, sided.test = 2, conf.level = 0.95, method = "means") { alpha.new <- (1 - conf.level) / sided.test z.alpha <- qnorm(1 - alpha.new, mean = 0, sd = 1) if(method == "means" & !is.na(treat) & !is.na(control) & is.na(n) & !is.na(sigma) & !is.na(power)){ # Sample size. From Woodward p 398: z.beta <- qnorm(power, mean = 0, sd = 1) delta <- abs(treat - control) n <- ((r + 1)^2 * (z.alpha + z.beta)^2 * sigma^2) / (delta^2 * r) # Account for the design effect: n <- n * design n.crude <- ceiling(n) n.treat <- ceiling(n / (r + 1)) * r n.control <- ceiling(n / (r + 1)) * 1 n.total <- n.treat + n.control rval <- list(n.crude = n.crude, n.total = n.total, n.treat = n.treat, n.control = n.control) } else if(method == "means" & !is.na(treat) & !is.na(control) & !is.na(n) & !is.na(sigma) & is.na(power)){ # Study power. From Woodward p 401: delta <- abs(treat - control) # Account for the design effect: n <- n / design z.beta <- ((delta * sqrt(n * r)) / ((r + 1) * sigma)) - z.alpha power <- pnorm(z.beta, mean = 0, sd = 1) rval <- list(power = power) } else if(method == "means" & is.na(treat) & is.na(control) & !is.na(n) & !is.na(sigma) & !is.na(power)){ # Maximum detectable difference. From Woodward p 401: z.beta <- qnorm(power, mean = 0, sd = 1) # Account for the design effect: n <- n / design delta <- ((r + 1) * (z.alpha + z.beta) * sigma) / (sqrt(n * r)) rval <- list(delta = delta) } else if (method == "proportions" & !is.na(treat) & !is.na(control) & is.na(n) & !is.na(power)) { # Sample size. z.beta <- qnorm(power, mean = 0, sd = 1) # delta <- abs(treat - control) # n <- (1/delta^2) * ((z.alpha * sqrt(treat * (1 - treat))) + (z.beta * sqrt(control * (1 - control))))^2 # From Woodward's spreadsheet. Changed 130814: lambda <- treat / control Pc <- control * (r * lambda + 1) / (r + 1) T1 <- (r + 1) / (r * (lambda - 1)^2 * control^2) T2 <- (r + 1) * Pc *(1 - Pc) T3 <- lambda * control * (1 - lambda * control) + r * control * (1 - control) n <- T1 * (z.alpha * sqrt(T2) + z.beta * sqrt(T3))^2 # Account for the design effect: n <- n * design # n.total <- 2 * ceiling(0.5 * n) # rval <- list(n.total = n.total) n.crude <- ceiling(n) n.treat <- ceiling(n / (r + 1)) * r n.control <- ceiling(n / (r + 1)) * 1 n.total <- n.treat + n.control rval <- list(n.crude = n.crude, n.total = n.total, n.treat = n.treat, n.control = n.control) } else if (method == "proportions" & !is.na(treat) & !is.na(control) & !is.na(n) & is.na(power)) { # Power. # Account for the design effect: n <- n / design # From Woodward's spreadsheet. Changed 130814: lambda <- control / treat Pc <- treat * (r * lambda + 1) / (r + 1) T1 <- ifelse(lambda >= 1, treat * (lambda - 1) * sqrt(n * r), treat * (1 - lambda) * sqrt(n * r)) T2 <- z.alpha * (r + 1) * sqrt(Pc * (1 - Pc)) T3 <- (r + 1) * (lambda * treat * (1 - lambda * treat) + r * treat * (1 - treat)) z.beta <- (T1 - T2) / sqrt(T3) # z.beta <- ((delta * sqrt(n)) - (z.alpha * sqrt(treat * (1 - treat))))/(sqrt(control * (1 - control))) power <- pnorm(z.beta, mean = 0, sd = 1) rval <- list(power = power) } else if (method == "proportions" & !is.na(treat) & !is.na(control) & !is.na(n) & !is.na(power)) { # Maximum detectable difference. z.beta <- qnorm(power, mean = 0, sd = 1) # Account for the design effect: n <- n / design delta <- 1/sqrt(n) * ((z.alpha * sqrt(treat * (1 - treat))) + (z.beta * sqrt(control * (1 - control)))) rval <- list(delta = delta) } # else # if(method == "proportions" & is.na(n)){ # # Sample size estimate. From Fleiss (1981). # z.beta <- qnorm(power, mean = 0, sd = 1) # delta <- abs(treat - control) # n <- (z.alpha + z.beta)^2 * (((treat * (1 - treat)) + (control * (1 - control))) / delta^2) + (2 / delta) + 2 # n <- ceiling(2 * n) # rval <- list(n = n) # } # else # if(method == "proportions" & !is.na(treat) & !is.na(control) & !is.na(n) & !is.na(power)){ # # Maximum detectable difference. From Fleiss (1981). # z.beta <- qnorm(power, mean = 0, sd = 1) # C <- (z.alpha + z.beta)^2 # p <- ((treat * (1 - treat)) + (control * (1 - control))) # delta <- 2 / ((n - 2) - (C * p)) # rval <- list(delta = delta) # } # else # if(method == "proportions" & is.na(power)){ # # Study power. From Fleiss (1981). # delta <- abs(treat - control) # s1 <- delta^2 * (n - 2 - (2 / delta)) # s2 <- ((treat * (1 - treat)) + (control * (1 - control))) # z.beta <- sqrt(s1/s2) - z.alpha # power <- pnorm(z.beta, mean = 0, sd = 1) # rval <- list(power = power) # } else if(method == "survival" & !is.na(treat) & !is.na(control) & is.na(n) & !is.na(power)){ # Sample size. # From: Therneau TM and Grambsch PM 2000. Modelling Survival Data - Extending the Cox Model. Springer, London, p 61 - 65. z.beta <- qnorm(power, mean = 0, sd = 1) p <- r / (r + 1); q <- 1 - p # p <- 0.5; q <- 1 - p exp.beta <- log(treat) / log(control) n <- ((z.alpha + z.beta)^2) / (p * q * log(exp.beta)^2) # Account for the design effect: n <- n * design n.crude <- ceiling(n) n.treat <- ceiling(n / (r + 1)) * r n.control <- ceiling(n / (r + 1)) * 1 n.total <- n.treat + n.control rval <- list(n.crude = n.crude, n.total = n.total, n.treat = n.treat, n.control = n.control) } else if(method == "survival" & !is.na(treat) & !is.na(control) & !is.na(n) & is.na(power)){ # Power. # From: Therneau TM and Grambsch PM 2000. Modelling Survival Data - Extending the Cox Model. Springer, London, p 61 - 65. beta <- log(treat / control) p <- r / (r + 1); q <- 1 - p # Account for the design effect: n <- n / design z.beta <- sqrt(n * p * q * beta^2) - z.alpha power <- pnorm(z.beta, mean = 0, sd = 1) rval <- list(power = power) } else if(method == "survival" & is.na(treat) & is.na(control) & !is.na(n) & !is.na(power)){ # Maximum detectable difference. # From: Therneau TM and Grambsch PM 2000. Modelling Survival Data - Extending the Cox Model. Springer, London, p 61 - 65. p <- r / (r + 1); q <- 1 - p z.beta <- qnorm(power, mean = 0, sd = 1) # Account for the design effect: n <- n / design beta <- sqrt(((z.alpha + z.beta)^2) / (n * p * q)) delta <- exp(beta) rval <- list(hazard = sort(c(delta, 1/delta))) } else if(method == "cohort.count" & !is.na(treat) & !is.na(control) & is.na(n) & !is.na(power)){ # Sample size estimate. From Woodward p 405: z.beta <- qnorm(power, mean = 0, sd = 1) lambda <- treat / control pi <- control pc <- (pi * ((r * lambda) + 1)) / (r + 1) p1 <- (r + 1) / (r * (lambda - 1)^2 * pi^2) p2 <- z.alpha * sqrt((r + 1) * pc * (1 - pc)) p3 <- z.beta * sqrt((lambda * pi * (1 - (lambda * pi))) + (r * pi * (1 - pi))) n <- p1 * (p2 + p3)^2 # Account for the design effect: n <- n * design n.crude <- ceiling(n) n.treat <- ceiling(n / (r + 1)) * r n.control <- ceiling(n / (r + 1)) * 1 n.total <- n.treat + n.control rval <- list(n.crude = n.crude, n.total = n.total, n.treat = n.treat, n.control = n.control) } else if(method == "cohort.count" & !is.na(treat) & !is.na(control) & !is.na(n) & is.na(power)){ # Study power. From Woodward p 409: lambda <- treat / control pi <- control pc <- (pi * ((r * lambda) + 1)) / (r + 1) # Account for the design effect: n <- n / design t1 <- ifelse(lambda >= 1, (pi * (lambda - 1) * sqrt(n * r)), (pi * (1 - lambda) * sqrt(n * r))) t2 <- z.alpha * (r + 1) * sqrt(pc * (1 - pc)) t3 <- (r + 1) * (lambda * pi * (1 - lambda * pi) + r * pi * (1 - pi)) z.beta <- (t1 - t2) / sqrt(t3) power <- pnorm(z.beta, mean = 0, sd = 1) rval <- list(power = power) } else if(method == "cohort.count" & is.na(treat) & !is.na(control) & !is.na(n) & !is.na(power)){ # Risk ratio to be detected - requires a value for control. From Woodward p 409: z.beta <- qnorm(power, mean = 0, sd = 1) pi <- control # Account for the design effect: n <- n / design Y <- r * n * pi^2 Z <- (r + 1) * pi * (z.alpha + z.beta)^2 a <- Y + (pi * Z) b <- (2 * Y) + Z c <- Y - (r * (1 - pi) * Z) lambda.pos <- (1 / (2 * a)) * (b + sqrt(b^2 - 4 * a * c)) lambda.neg <- (1 / (2 * a)) * (b - sqrt(b^2 - 4 * a * c)) rval <- list(lambda = sort(c(lambda.neg, lambda.pos))) } else if(method == "case.control" & !is.na(treat) & !is.na(control) & is.na(n) & !is.na(power)){ # Sample size. From Woodward p 412: z.beta <- qnorm(power, mean = 0, sd = 1) lambda <- treat / control # For this function, 'sigma' is the proportion of study subjects exposed: Pc <- (sigma / (r + 1)) * ((r * lambda / (1 + (lambda - 1) * sigma)) + 1) T1 <- (r + 1) * (1 + (lambda - 1) * sigma)^2 T2 <- r * sigma^2 * (sigma - 1)^2 * (lambda - 1)^2 T3 <- z.alpha * sqrt((r + 1) * Pc * (1 - Pc)) T4 <- lambda * sigma * (1 - sigma) T5 <- 1 + (lambda - 1) * sigma T6 <- T4 / (T5^2) T7 <- r * sigma * (1 - sigma) T8 <- z.beta * sqrt(T6 + T7) n <- (T1 / T2) * (T3 + T8)^2 # P <- sigma # pc. <- (P / (r + 1)) * ((r * lambda) / (1 + ((lambda - 1) * P)) + 1) # p1 <- (r + 1) * (1 + (lambda - 1) * P)^2 / (r * P^2 * (P - 1)^2 * (lambda - 1)^2) # p2 <- z.alpha * sqrt((r + 1) * pc. * (1 - pc.)) # p3 <- z.beta * sqrt(((lambda * P * (1 - P)) / ((1 + (lambda - 1) * P)^2)) + (r * P * (1 - P))) # n <- p1 * (p2 + p3)^2 # Account for the design effect: n <- n * design n.crude <- ceiling(n) n.treat <- ceiling(n / (r + 1)) * r n.control <- ceiling(n / (r + 1)) * 1 n.total <- n.treat + n.control rval <- list(n.crude = n.crude, n.total = n.total, n.treat = n.treat, n.control = n.control) } else if(method == "case.control" & !is.na(treat) & !is.na(control) & !is.na(n) & is.na(power)){ # Study power. From Woodward p 413: lambda <- treat / control # For this function, 'sd' is the proportion of study subjects exposed: P <- sigma # In this function "r" is input as the ratio of cases to controls. The formulae in Woodward assumes "r" is the ratio of controls to cases. r <- 1 /r # Account for the design effect: n <- n / design pc. <- (P / (r + 1)) * ((r * lambda) / (1 + ((lambda - 1) * P)) + 1) M <- abs(((lambda - 1) * (P - 1)) / (1 + (lambda - 1) * P)) term.n1 <- (M * P * sqrt(n * r)) / sqrt(r + 1) term.n2 <- z.alpha * sqrt((r + 1) * pc. * (1 - pc.)) term.d1 <- lambda * P * (1 - P) / (1 + (lambda - 1) * P)^2 term.d2 <- r * P * (1 - P) z.beta <- (term.n1 - term.n2) / sqrt(term.d1 + term.d2) power <- pnorm(z.beta, mean = 0, sd = 1) rval <- list(power = power) } else if(method == "case.control" & is.na(treat) & is.na(control) & !is.na(n) & !is.na(power)){ # Risk ratio to be detected. From Woodward p 409: z.beta <- qnorm(power, mean = 0, sd = 1) P <- sigma # Account for the design effect: n <- n / design a <- (r * P^2) - (n * r * P * (1 - P)) / ((z.alpha + z.beta)^2 * (r + 1)) b <- 1 + (2 * r * P) lambda.pos <- 1 + ((-b + sqrt(b^2 - (4 * a * (r + 1)))) / (2 * a)) lambda.neg <- 1 + ((-b - sqrt(b^2 - (4 * a * (r + 1)))) / (2 * a)) rval <- list(lambda = sort(c(lambda.neg, lambda.pos))) } rval } epiR/R/epi.insthaz.R0000644000176200001440000000131512601641614013732 0ustar liggesusers"epi.insthaz" <- function(survfit.obj, conf.level = 0.95){ N <- 1 - ((1 - conf.level) / 2) z <- qnorm(N, mean = 0, sd = 1) time <- survfit.obj$time time0 <- c(0, time[-length(time)]) interval <- (time - time0) a <- survfit.obj$n.event n <- survfit.obj$n.risk p <- a/n a. <- n/(n + z^2) b. <- a/n c. <- z^2/(2 * n) d. <- (a * (n - a)) / n^3 e. <- z^2 / (4 * n^2) est <- p / interval low <- (a. * (b. + c. - (z * sqrt(d. + e.)))) / interval up <- (a. * (b. + c. + (z * sqrt(d. + e.)))) / interval rval <- as.data.frame(cbind(time, est, low, up)) names(rval) <- c("time", "est", "lower", "upper") return(rval) }epiR/R/epi.empbayes.R0000644000176200001440000000150512601641614014060 0ustar liggesusers"epi.empbayes" <- function(obs, pop){ # gamma: mean of rate # phi: variance of rate gamma <- (sum(obs)) / (sum(pop)) rate <- obs / pop sum.pop <- sum(pop) phi.left <- sum(pop * (rate - gamma)^2) / sum.pop phi.right <- gamma / mean(pop) phi <- phi.left - phi.right # The convention is that phi = 0 whenever the above expression is negative. phi <- ifelse(phi < 0, 0, phi) emp <- ((phi * (rate - gamma)) / (phi + (gamma / pop))) + gamma # gamma = nu / alpha # phi = nu / alpha^2 alpha <- gamma / phi nu <- gamma^2 / phi inv.nu <- 1 / nu rval <- data.frame(gamma, phi, alpha, nu, inv.nu) names(rval) <- c("gamma (mean)", "phi (variance)", "alpha (shape)", "nu (scale)", "inv.nu (rate)") unlist(rval) } epiR/R/epi.simplesize.R0000644000176200001440000000215712601641614014443 0ustar liggesusersepi.simplesize <- function(N = 1E+06, Vsq, Py, epsilon.r, method = "mean", conf.level = 0.95) { N. <- 1 - ((1 - conf.level) / 2) z <- qnorm(N., mean = 0, sd = 1) # 280414. Removed the population size corrections because the exact formulae corrects for this. if (method == "total") { # Page 74 Levy and Lemeshow (equation 3.14): n <- (z^2 * N * Vsq) / (z^2 * Vsq + ((N - 1) * epsilon.r^2)) # f <- n / N # if(f > 0.10){n <- n / (1 + n/N)} rval <- round(n, digits = 0) } if (method == "mean") { # Page 74 Levy and Lemeshow (equation 3.15): n <- (z^2 * N * Vsq) / (z^2 * Vsq + ((N - 1) * epsilon.r^2)) # f <- n / N # if(f > 0.10){n <- n / (1 + n/N)} rval <- round(n, digits = 0) } if (method == "proportion") { # Page 74 Levy and Lemeshow (equation 3.16): n <- (z^2 * N * (1 - Py) * Py) / (((N - 1) * (epsilon.r^2) * Py^2) + (z^2 * Py * (1 - Py))) # f <- n / N # if(f > 0.10){n <- n / (1 + n/N)} rval <- round(n, digits = 0) } return(rval) } epiR/R/epi.occc.R0000644000176200001440000000455612601641614013173 0ustar liggesusers## x is a matrix line object, rows are cases, columns are raters ## na.rm: logical, if NAs should be excluded ## pairs: logical, if pairwise statistic values should be returned as ## part of the return value epi.occc <- function(dat, na.rm = FALSE, pairs = FALSE){ ## Create a list to hold all variables: elements <- list() ## Do all data manipulation within the list: elements <- within(elements, { if (!na.rm) { m <- apply(dat, 2, mean) s <- apply(dat, 2, sd) COV <- cov(dat) } else { m <- apply(dat, 2, mean, na.rm = TRUE) s <- apply(dat, 2, sd, na.rm = TRUE) COV <- cov(dat, use = "pairwise.complete.obs") } J <- ncol(dat) j <- col(matrix(0,J,J))[lower.tri(matrix(0,J,J))] k <- row(matrix(0,J,J))[lower.tri(matrix(0,J,J))] n <- (J * J - J) / 2 v <- numeric(n) u <- numeric(n) ksi <- numeric(n) ccc <- numeric(n) for (i in seq_len(n)) { v[i] <- s[j[i]] / s[k[i]] u[i] <- (m[j[i]] - m[k[i]]) / sqrt(s[j[i]] * s[k[i]]) ksi[i] <- s[j[i]]^2 + s[k[i]]^2 + (m[j[i]] - m[k[i]])^2 ccc[i] <- (2 * COV[j[i], k[i]]) / ksi[i] } accu <- ((v + 1/v + u^2) / 2)^-1 prec <- ccc / accu occc <- sum(ksi * ccc) / sum(ksi) oaccu <- sum(ksi * accu) / sum(ksi) oprec <- occc / oaccu prs <- if (pairs) { list(ccc = ccc, prec = prec, accu = accu, ksi = ksi, scale = v, location = u) } else NULL }) rval <- list(occc = elements$occc, oprec = elements$oprec, oaccu = elements$oaccu, pairs = elements$prs, data.name = deparse(substitute(dat))) class(rval) <- "epi.occc" return(rval) } # https://cran.r-project.org/web/packages/knitr/vignettes/knit_print.html print.epi.occc <- function(x, ...) { # cat("Overall concordance correlation coefficients\n") cat(sprintf("\nOverall CCC %.4f", x$occc)) cat(sprintf("\nOverall precision %.4f", x$oprec)) cat(sprintf("\nOverall accuracy %.4f", x$oaccu)) cat("\n") # print(data.frame(Value = c("Overall CCC" = x$occc, "Overall precision" = x$oprec, "Overall accuracy" = x$oaccu)), ...) } ## Summary method for epi.occc: summary.epi.occc <- function(object, ...) { out <- data.frame(occc = object$occc, oprec = object$oprec, oaccu = object$oaccu) return(out) }epiR/R/epi.iv.R0000644000176200001440000001521712601641614012676 0ustar liggesusers"epi.iv" <- function(ev.trt, n.trt, ev.ctrl, n.ctrl, names, method = "odds.ratio", alternative = c("two.sided", "less", "greater"), conf.level = 0.95) { # Declarations: k <- length(names) a.i <- ev.trt b.i <- n.trt - ev.trt c.i <- ev.ctrl d.i <- n.ctrl - ev.ctrl N. <- 1 - ((1 - conf.level) / 2) z <- qnorm(N., mean = 0, sd = 1) # Test each strata for zero values. Add 0.5 to all cells if any cell has a zero value: for(i in 1:k){ if(a.i[i] < 1 | b.i[i] < 1 | c.i[i] < 1 | d.i[i] < 1){ a.i[i] <- a.i[i] + 0.5; b.i[i] <- b.i[i] + 0.5; c.i[i] <- c.i[i] + 0.5; d.i[i] <- d.i[i] + 0.5 } } n.1i <- a.i + b.i n.2i <- c.i + d.i N.i <- a.i + b.i + c.i + d.i if(method == "odds.ratio") { # Individual study odds ratios: OR.i <- (a.i * d.i)/(b.i * c.i) lnOR.i <- log(OR.i) SE.lnOR.i <- sqrt(1/a.i + 1/b.i + 1/c.i + 1/d.i) SE.OR.i <- exp(SE.lnOR.i) lower.lnOR.i <- lnOR.i - (z * SE.lnOR.i) upper.lnOR.i <- lnOR.i + (z * SE.lnOR.i) lower.OR.i <- exp(lower.lnOR.i) upper.OR.i <- exp(upper.lnOR.i) # Weights: w.i <- 1 / (1/a.i + 1/b.i + 1/c.i + 1/d.i) w.iv.i <- 1/(SE.lnOR.i)^2 # IV pooled odds ratios: lnOR.iv <- sum(w.i * lnOR.i)/sum(w.iv.i) OR.iv <- exp(lnOR.iv) SE.lnOR.iv <- 1/sqrt((sum(w.iv.i))) SE.OR.iv <- exp(SE.lnOR.iv) lower.lnOR.iv <- lnOR.iv - (z * SE.lnOR.iv) upper.lnOR.iv <- lnOR.iv + (z * SE.lnOR.iv) lower.OR.iv <- exp(lower.lnOR.iv) upper.OR.iv <- exp(upper.lnOR.iv) # Test of heterogeneity: Q <- sum(w.iv.i * (lnOR.i - lnOR.iv)^2) df <- k - 1 p.heterogeneity <- 1 - pchisq(Q, df) # Higgins and Thompson (2002) H^2 and I^2 statistic: Hsq <- Q / (k - 1) lnHsq <- log(Hsq) if(Q > k) { lnHsq.se <- (1 * log(Q) - log(k - 1)) / (2 * sqrt(2 * Q) - sqrt((2 * (k - 3)))) } if(Q <= k) { lnHsq.se <- sqrt((1/(2 * (k - 2))) * (1 - (1 / (3 * (k - 2)^2)))) } lnHsq.l <- lnHsq - (z * lnHsq.se) lnHsq.u <- lnHsq + (z * lnHsq.se) Hsq.l <- exp(lnHsq.l) Hsq.u <- exp(lnHsq.u) Isq <- ((Hsq - 1) / Hsq) * 100 Isq.l <- ((Hsq.l - 1) / Hsq.l) * 100 Isq.u <- ((Hsq.u - 1) / Hsq.u) * 100 # Test of effect. Code for p-value taken from z.test function in TeachingDemos package: effect.z <- lnOR.iv/SE.lnOR.iv alternative <- match.arg(alternative) p.effect <- switch(alternative, two.sided = 2 * pnorm(abs(effect.z), lower.tail = FALSE), less = pnorm(effect.z), greater = pnorm(effect.z, lower.tail = FALSE)) # Results: OR <- as.data.frame(cbind(OR.i, SE.OR.i, lower.OR.i, upper.OR.i)) names(OR) <- c("est", "se", "lower", "upper") OR.summary <- as.data.frame(cbind(OR.iv, SE.OR.iv, lower.OR.iv, upper.OR.iv)) names(OR.summary) <- c("est", "se", "lower", "upper") weights <- as.data.frame(cbind(w.i, w.iv.i)) names(weights) <- c("raw", "inv.var") Hsq <- as.data.frame(cbind(Hsq, Hsq.l, Hsq.u)) names(Hsq) <- c("est", "lower", "upper") Isq <- as.data.frame(cbind(Isq, Isq.l, Isq.u)) names(Isq) <- c("est", "lower", "upper") rval <- list(OR = OR, OR.summary = OR.summary, weights = weights, heterogeneity = c(Q = Q, df = df, p.value = p.heterogeneity), Hsq = Hsq, Isq = Isq, effect = c(z = effect.z, p.value = p.effect)) } else if(method == "risk.ratio") { # Individual study risk ratios: RR.i <- (a.i/n.1i)/(c.i/n.2i) lnRR.i <- log(RR.i) SE.lnRR.i <- sqrt(1/a.i + 1/c.i - 1/n.1i - 1/n.2i) SE.RR.i <- exp(SE.lnRR.i) lower.lnRR.i <- lnRR.i - (z * SE.lnRR.i) upper.lnRR.i <- lnRR.i + (z * SE.lnRR.i) lower.RR.i <- exp(lower.lnRR.i) upper.RR.i <- exp(upper.lnRR.i) # Weights: w.i <- (c.i * n.1i) / N.i w.iv.i <- 1/(SE.lnRR.i)^2 # IV pooled risk ratios: lnRR.iv <- sum(w.iv.i * lnRR.i)/sum(w.iv.i) RR.iv <- exp(lnRR.iv) SE.lnRR.iv <- 1/sqrt((sum(w.iv.i))) SE.RR.iv <- exp(SE.lnRR.iv) lower.lnRR.iv <- lnRR.iv - (z * SE.lnRR.iv) upper.lnRR.iv <- lnRR.iv + (z * SE.lnRR.iv) lower.RR.iv <- exp(lower.lnRR.iv) upper.RR.iv <- exp(upper.lnRR.iv) # Test of heterogeneity: Q <- sum(w.iv.i * (lnRR.i - lnRR.iv)^2) df <- k - 1 p.heterogeneity <- 1 - pchisq(Q, df) # Higgins and Thompson (2002) H^2 and I^2 statistic: Hsq <- Q / (k - 1) lnHsq <- log(Hsq) if(Q > k) { lnHsq.se <- (1 * log(Q) - log(k - 1)) / (2 * sqrt(2 * Q) - sqrt((2 * (k - 3)))) } if(Q <= k) { lnHsq.se <- sqrt((1/(2 * (k - 2))) * (1 - (1 / (3 * (k - 2)^2)))) } lnHsq.l <- lnHsq - (z * lnHsq.se) lnHsq.u <- lnHsq + (z * lnHsq.se) Hsq.l <- exp(lnHsq.l) Hsq.u <- exp(lnHsq.u) Isq <- ((Hsq - 1) / Hsq) * 100 Isq.l <- ((Hsq.l - 1) / Hsq.l) * 100 Isq.u <- ((Hsq.u - 1) / Hsq.u) * 100 # Test of effect. Code for p-value taken from z.test function in TeachingDemos package: effect.z <- lnRR.iv/SE.lnRR.iv alternative <- match.arg(alternative) p.effect <- switch(alternative, two.sided = 2 * pnorm(abs(effect.z), lower.tail = FALSE), less = pnorm(effect.z), greater = pnorm(effect.z, lower.tail = FALSE)) # Results: RR <- as.data.frame(cbind(RR.i, SE.RR.i, lower.RR.i, upper.RR.i)) names(RR) <- c("est", "se", "lower", "upper") RR.summary <- as.data.frame(cbind(RR.iv, SE.RR.iv, lower.RR.iv, upper.RR.iv)) names(RR.summary) <- c("est", "se", "lower", "upper") weights <- as.data.frame(cbind(w.i, w.iv.i)) names(weights) <- c("raw", "inv.var") Hsq <- as.data.frame(cbind(Hsq, Hsq.l, Hsq.u)) names(Hsq) <- c("est", "lower", "upper") Isq <- as.data.frame(cbind(Isq, Isq.l, Isq.u)) names(Isq) <- c("est", "lower", "upper") rval <- list(RR = RR, RR.summary = RR.summary, weights = weights, heterogeneity = c(Q = Q, df = df, p.value = p.heterogeneity), Hsq = Hsq, Isq = Isq, effect = c(z = effect.z, p.value = p.effect)) } return(rval) } epiR/R/epi.directadj.r0000644000176200001440000000750212601641614014247 0ustar liggesusers"epi.directadj" <- function(obs, pop, std, units = 1, conf.level = 0.95){ # How many strata (rows) are there? n.strata <- dim(obs)[1] # How many adjustment variables (columns) are there? n.cov <- dim(obs)[2] N. <- 1 - ((1 - conf.level) / 2) alpha <- 1 - conf.level z <- qnorm(N., mean = 0, sd = 1) # Note inclusion of variable sindex (strata index) to make sure strata estimates are sorted in the right order: tmp <- data.frame(sindex = rep(1:nrow(pop), times = n.cov), strata = rep(rownames(pop), times = n.cov), cov = rep(colnames(pop), each = n.strata), obs = as.vector(obs), pop = as.vector(pop), std = as.vector(std)) # Crude incidence risk by strata-covariate: # tmp <- data.frame(tmp, (epi.conf(as.matrix(cbind(tmp$obs, tmp$pop)), ctype = "inc.risk", method = method, # design = 1, conf.level = conf.level) * units)) # Expected events (equals observed incidence risk multiplied by standard population size): tmp$exp <- (tmp$obs / tmp$pop) * tmp$std # Crude (for all strata-covariate combinations): t.obs <- tmp$obs t.pop <- tmp$pop # Confidence interval for crude incidence risk estimates corrected following email from Gillian Raab: crude.p <- t.obs / t.pop # crude.se <- crude.p / sqrt(t.pop) ## Incorrect. crude.se <- crude.p / sqrt(t.obs) ## replaced pop by obs crude.l <- qchisq(alpha / 2, 2 * t.obs) / 2 / t.pop ## next 2 lines changed crude.u <- qchisq(1 - alpha / 2, 2 * (t.obs + 1)) / 2 / t.pop crude <- data.frame(strata = tmp$strata, cov = tmp$cov, est = as.numeric(crude.p * units), lower = as.numeric(crude.l * units), upper = as.numeric(crude.u * units)) # Crude strata: t.obs <- by(data = tmp$obs, INDICES = tmp$sindex, FUN = sum) t.pop <- by(data = tmp$pop, INDICES = tmp$sindex, FUN = sum) t.strata <- rownames(pop) # Confidence interval for crude incidence risk estimates corrected following email from Gillian Raab: crude.p <- as.numeric(t.obs / t.pop) # crude.se <- crude.p / sqrt(t.pop) ## Incorrect. crude.se <- as.numeric(crude.p / sqrt(t.obs)) ## replaced pop by obs crude.l <- as.numeric(qchisq(alpha / 2, 2 * t.obs) / 2 / t.pop) ## next 2 lines changed crude.u <- as.numeric(qchisq(1 - alpha / 2, 2 * (t.obs + 1)) / 2 / t.pop) crude.strata <- data.frame(strata = t.strata, est = as.numeric(crude.p * units), lower = as.numeric(crude.l * units), upper = as.numeric(crude.u * units)) # Adjusted incidence risk, by strata. Confidence intervals based on Fay and Feuer (1997): t.obs <- by(data = tmp$obs, INDICES = tmp$sindex, FUN = sum) t.pop <- by(data = tmp$pop, INDICES = tmp$sindex, FUN = sum) t.exp <- by(data = tmp$exp, INDICES = tmp$sindex, FUN = sum) t.strata <- rownames(pop) t.std <- by(data = tmp$std, INDICES = tmp$sindex, FUN = sum) tstd <- matrix(rep(std, time = n.strata), byrow = TRUE, nrow = n.strata) stdwt <- tstd / apply(X = tstd, MARGIN = 1, FUN = sum) adj.p <- apply(X = stdwt * (obs / pop), MARGIN = 1, FUN = sum) adj.var <- apply((stdwt^2) * (obs / pop^2), MARGIN = 1, FUN = sum) wm <- apply(stdwt / pop, MARGIN = 1, FUN = max) adj.l <- qgamma(alpha / 2, shape = (adj.p^2) / adj.var, scale = adj.var / adj.p) adj.u <- qgamma(1 - alpha/2, shape = ((adj.p + wm)^2) / (adj.var + wm^2), scale = (adj.var + wm^2) / (adj.p + wm)) adj.strata <- data.frame(strata = t.strata, est = as.numeric(adj.p * units), lower = as.numeric(adj.l * units), upper = as.numeric(adj.u * units)) rval <- list(crude = crude, crude.strata = crude.strata, adj.strata = adj.strata) return(rval) }epiR/R/epi.cluster2size.R0000644000176200001440000000563312601641614014717 0ustar liggesusers"epi.cluster2size" <- function(nbar, R = NA, n, mean, sigma2.x, sigma2.y = NA, sigma2.xy = NA, epsilon.r, method = "mean", conf.level = 0.95){ N. <- 1 - ((1 - conf.level) / 2) z <- qnorm(N., mean = 0, sd = 1) if (method == "total") { if (length(n) != 2) stop("Error: n must be of length 2") if (length(mean) != 2) stop("Error: mean must be of length 2") if (length(sigma2.x) != 2) stop("Error: sigma2.x must be of length 2") sigma2.1x <- sigma2.x[1]; sigma2.2x <- sigma2.x[2] M <- n[1]; N <- n[2] Xbar <- mean[1]; xbar <- mean[2] # Equation 10.6, page 292 Levy and Lemeshow: numerator <- (sigma2.1x / Xbar^2) * (M / (M - 1)) + (1 / nbar) * (sigma2.2x / xbar^2) * ((N - nbar) / (N - 1)) denominator <- (epsilon.r^2 / z^2) + (sigma2.1x / (Xbar^2 * (M - 1))) rval <- round(numerator/denominator, digits = 0) } if (method == "mean") { if (length(n) != 2) stop("Error: n must be of length 2") if (length(mean) != 2) stop("Error: mean must be of length 2") if (length(sigma2.x) != 2) stop("Error: sigma2.x must be of length 2") sigma2.1x <- sigma2.x[1]; sigma2.2x <- sigma2.x[2] M <- n[1]; N <- n[2] Xbar <- mean[1]; xbar <- mean[2] # Equation 10.6, page 292 Levy and Lemeshow: numerator <- ((sigma2.1x / Xbar^2) * (M / (M - 1))) + ((1 / nbar) * (sigma2.2x / xbar^2) * ((N - nbar) / (N - 1))) denominator <- (epsilon.r^2 / z^2) + (sigma2.1x / (Xbar^2 * (M - 1))) rval <- round(numerator/denominator, digits = 0) } if (method == "proportion") { if (length(n) != 2) stop("Error: n must be of length 2") if (length(mean) != 2) stop("Error: mean must be of length 2") if (length(sigma2.x) != 2) stop("Error: sigma2.x must be of length 2") if (length(sigma2.y) != 2) stop("Error: sigma2.y must be of length 2") if (length(sigma2.xy) != 2) stop("Error: sigma2.xy must be of length 2") sigma2.1x <- sigma2.x[1]; sigma2.2x <- sigma2.x[2] sigma2.1y <- sigma2.y[1]; sigma2.2y <- sigma2.y[2] sigma.1xy <- sigma2.xy[1]; sigma.2xy <- sigma2.xy[2] M <- n[1]; N <- n[2] Xbar <- mean[1]; xbar <- mean[2] sigmasq.r1 <- sigma2.1y + (R^2 * sigma2.1x) - (2 * R * sigma.1xy) sigmasq.r2 <- sigma2.2y + (R^2 * sigma2.2x) - (2 * R * sigma.2xy) # Equation 10.7, page 292 Levy and Lemeshow: numerator <- ((sigmasq.r1 / Xbar^2) * (M / (M - 1))) + ((1 / nbar) * (sigmasq.r2 / xbar^2) * ((N - nbar) / (N - 1))) denominator <- (epsilon.r^2 / z^2) + (sigmasq.r1 / (Xbar^2 * (M - 1))) rval <- round(numerator/denominator, digits = 0) } return(rval) } epiR/R/epi.tests.R0000644000176200001440000003351312601641614013421 0ustar liggesusers"epi.tests" <- function(dat, conf.level = 0.95) { ## Create a list to hold all variables: elements <- list() ## Do all data manipulation within the list: elements <- within(elements, { N. <- 1 - ((1 - conf.level) / 2) z <- qnorm(N., mean = 0, sd = 1) ## Exact binomial confidence limits from D. Collett (1999) Modelling binary data. Chapman & Hall/CRC, Boca Raton Florida, p. 24. .funincrisk <- function(cdat, conf.level){ N. <- 1 - ((1 - conf.level) / 2) a <- cdat[,1] n <- cdat[,2] b <- n - a p <- a / n a. <- ifelse(a == 0, a + 1, a); b. <- ifelse(b == 0, b + 1, b) low <- a. /(a. + (b. + 1) * (1 / qf(1 - N., 2 * a., 2 * b. + 2))) up <- (a. + 1) / (a. + 1 + b. / (1 / qf(1 - N., 2 * b., 2 * a. + 2))) low <- ifelse(a == 0, 0, low) up <- ifelse(a == n, 1, up) rval <- data.frame(est = p, lower = low, upper = up) rval } ## From Greg Snow, R-sig-Epi, 3 Mar 2008: ## My prefered approach (not the only one), is to use the Bayesian interval using a uniform prior (beta(1,1) distribution) ## with the binomial (it is easier to do than it looks). Basically find the HPD interval from a beta distribution with parameters s+1 and f+1, ## where s and f are successes (correct test results) and failures (incorrect test results). ## I use the hpd function from the TeachingDemos package, but there are others as well (I'm a bit biased towards that package). ## For example, to calculate the 95% confidence interval for sensitivity when you have 95 true positives and 5 false negatives you would just ## type (after installing and loading the package): ## hpd(qbeta, shape1 = 96, shape2 = 6) ## And the 2 numbers are limits of a 95% confidence interval. I like this approach because it still gives sensible results when you ## have no false negatives (or false positives for specificity). ## hpd. <- function(posterior.icdf, conf = conf.level, tol = 1e-08, ...){ ## conf <- min(conf, 1 - conf) ## f <- function(x, posterior.icdf, conf, ...) { ## posterior.icdf(1 - conf + x, ...) - posterior.icdf(x, ...) ## } ## out <- optimize(f, c(0, conf), posterior.icdf = posterior.icdf, conf = conf, tol = tol, ...) ## return(c(posterior.icdf(out$minimum, ...), posterior.icdf(1 - conf + out$minimum, ...))) ## } ## ================= ## DECLARE VARIABLES ## ================= ## --------| D+ --| D- --| Total ## Test + | a | b | N1 ## Test - | c | d | N0 ## --------|------|------|------ ## Total | M1 | M0 | total a <- dat[1] b <- dat[3] c <- dat[2] d <- dat[4] ## Total disease pos: M1 <- a + c ## Total disease neg: M0 <- b + d ## Total test pos: N1 <- a + b ## Total test neg: N0 <- c + d ## Total subjects: total <- a + b + c + d ## True prevalence: tdat <- as.matrix(cbind(M1, total)) trval <- .funincrisk(tdat, conf.level) tp <- trval$est; tp.low <- trval$lower; tp.up <- trval$upper ## Greg Snow: ## r <- M1; n <- total ## alpha1 <- r + 1 ## alpha2 <- n - r + 1 ## tp <- r/n ## tp.low <- hpd.(qbeta, shape1 = alpha1, shape2 = alpha2)[1] ## tp.up <- hpd.(qbeta, shape1 = alpha1, shape2 = alpha2)[2] ## Altman: ## q <- 1 - p ## A <- (2 * r) + (z * z) ## B <- z * sqrt((z * z) + (4 * r * q)) ## C <- 2 * (n + (z * z)) ## tp <- p ## tp.low <- (A - B) / C ## tp.up <- (A + B) / C tprev <- data.frame(est = tp, lower = tp.low, upper = tp.up) ## Apparent prevalence: tdat <- as.matrix(cbind(N1, total)) trval <- .funincrisk(tdat, conf.level) ap <- trval$est; ap.low <- trval$lower; ap.up <- trval$upper ## Greg Snow: ## r <- N1; n <- total ## alpha1 <- r + 1 ## alpha2 <- n - r + 1 ## ap <- r/n ## ap.low <- hpd.(qbeta, shape1 = alpha1, shape2 = alpha2)[1] ## ap.up <- hpd.(qbeta, shape1 = alpha1, shape2 = alpha2)[2] ## Altman: ## q <- 1 - p ## A <- (2 * r) + (z * z) ## B <- z * sqrt((z * z) + (4 * r * q)) ## C <- 2 * (n + (z * z)) ## ap <- p ## ap.low <- (A - B) / C ## ap.up <- (A + B) / C aprev <- data.frame(est = ap, lower = ap.low, upper = ap.up) ## Sensitivity: tdat <- as.matrix(cbind(a, M1)) trval <- .funincrisk(tdat, conf.level) se <- trval$est; se.low <- trval$lower; se.up <- trval$upper ## Greg Snow: ## r <- a; n <- M1 ## alpha1 <- r + 1 ## alpha2 <- n - r + 1 ## se <- r/n ## se.low <- hpd.(qbeta, shape1 = alpha1, shape2 = alpha2)[1] ## se.up <- hpd.(qbeta, shape1 = alpha1, shape2 = alpha2)[2] ## Altman: ## q <- 1 - p ## A <- (2 * r) + (z * z) ## B <- z * sqrt((z * z) + (4 * r * q)) ## C <- 2 * (n + (z * z)) ## se <- p ## se.low <- (A - B) / C ## se.up <- (A + B) / C sensitivity <- data.frame(est = se, lower = se.low, upper = se.up) ## Specificity: tdat <- as.matrix(cbind(d, M0)) trval <- .funincrisk(tdat, conf.level) sp <- trval$est; sp.low <- trval$lower; sp.up <- trval$upper ## Greg Snow: ## r <- d; n <- M0 ## alpha1 <- r + 1 ## alpha2 <- n - r + 1 ## sp <- r/n ## sp.low <- hpd.(qbeta, shape1 = alpha1, shape2 = alpha2)[1] ## sp.up <- hpd.(qbeta, shape1 = alpha1, shape2 = alpha2)[2] ## Altman: ## q <- 1 - p ## A <- (2 * r) + (z * z) ## B <- z * sqrt((z * z) + (4 * r * q)) ## C <- 2 * (n + (z * z)) ## sp <- p ## sp.low <- (A - B) / C ## sp.up <- (A + B) / C specificity <- data.frame(est = sp, lower = sp.low, upper = sp.up) ## Positive predictive value: tdat <- as.matrix(cbind(a, N1)) trval <- .funincrisk(tdat, conf.level) ppv <- trval$est; ppv.low <- trval$lower; ppv.up <- trval$upper ## Greg Snow: ## r <- a; n <- N1 ## alpha1 <- r + 1 ## alpha2 <- n - r + 1 ## ppv <- r/n ## ppv.low <- hpd.(qbeta, shape1 = alpha1, shape2 = alpha2)[1] ## ppv.up <- hpd.(qbeta, shape1 = alpha1, shape2 = alpha2)[2] ## Altman: ## q <- 1 - p ## A <- (2 * r) + (z * z) ## B <- z * sqrt((z * z) + (4 * r * q)) ## C <- 2 * (n + (z * z)) ## ppv <- p ## ppv.low <- (A - B) / C ## ppv.up <- (A + B) / C pv.positive <- data.frame(est = ppv, lower = ppv.low, upper = ppv.up) ## Negative predictive value: tdat <- as.matrix(cbind(d, N0)) trval <- .funincrisk(tdat, conf.level) npv <- trval$est; npv.low <- trval$lower; npv.up <- trval$upper ## Greg Snow: ## r <- d; n <- N0 ## alpha1 <- r + 1 ## alpha2 <- n - r + 1 ## npv <- r/n ## npv.low <- hpd.(qbeta, shape1 = alpha1, shape2 = alpha2)[1] ## npv.up <- hpd.(qbeta, shape1 = alpha1, shape2 = alpha2)[2] ## Altman: ## q <- 1 - p ## A <- (2 * r) + (z * z) ## B <- z * sqrt((z * z) + (4 * r * q)) ## C <- 2 * (n + (z * z)) ## npv <- p ## npv.low <- (A - B) / C ## npv.up <- (A + B) / C pv.negative <- data.frame(est = npv, lower = npv.low, upper = npv.up) ## Likelihood ratio of a positive test. Confidence intervals from Simel et al. (1991) ## lrpos <- se / (1 - sp) lrpos <- (a/M1) / (1 - (d/M0)) lrpos.low <- exp(log(lrpos) - z * sqrt((1 - se) / (M1 * se) + (sp)/(M0 * (1 - sp)))) lrpos.up <- exp(log(lrpos) + z * sqrt((1 - se) / (M1 * se) + (sp)/(M0 * (1 - sp)))) lr.positive <- data.frame(est = lrpos, lower = lrpos.low, upper = lrpos.up) ## Likelihood ratio of a negative test. Confidence intervals from Simel et al. (1991) ## lrpos <- se / (1 - sp) lrneg <- (1 - (a/M1)) / (d/M0) lrneg.low <- exp(log(lrneg) - z * sqrt((se)/(M1 * (1 - se)) + (1 - sp)/(M0 * (sp)))) lrneg.up <- exp(log(lrneg) + z * sqrt((se)/(M1 * (1 - se)) + (1 - sp)/(M0 * (sp)))) lr.negative <- data.frame(est = lrneg, lower = lrneg.low, upper = lrneg.up) ## Diagnostic accuracy (from Scott et al. (2008)): tdat <- as.matrix(cbind((a + d), total)) trval <- .funincrisk(tdat, conf.level) da <- trval$est; da.low <- trval$lower; da.up <- trval$upper ## Greg Snow: ## r <- (a + d); n <- total ## p <- r/n ## alpha1 <- r + 1 ## alpha2 <- n - r + 1 ## da <- r/n ## da.low <- hpd.(qbeta, shape1 = alpha1, shape2 = alpha2)[1] ## da.up <- hpd.(qbeta, shape1 = alpha1, shape2 = alpha2)[2] ## Altman: ## q <- 1 - p ## A <- (2 * r) + (z * z) ## B <- z * sqrt((z * z) + (4 * r * q)) ## C <- 2 * (n + (z * z)) ## da <- p ## da.low <- (A - B) / C ## da.up <- (A + B) / C diag.acc <- data.frame(est = da, lower = da.low, upper = da.up) ## Diagnostic odds ratio (from Scott et al. (2008)): dOR.p <- (a * d) / (b * c) lndOR <- log(dOR.p) lndOR.var <- 1/a + 1/b + 1/c + 1/d lndOR.se <- sqrt(1/a + 1/b + 1/c + 1/d) lndOR.l <- lndOR - (z * lndOR.se) lndOR.u <- lndOR + (z * lndOR.se) dOR.se <- exp(lndOR.se) dOR.low <- exp(lndOR.l) dOR.up <- exp(lndOR.u) diag.or <- data.frame(est = dOR.p, lower = dOR.low, upper = dOR.up) ## Number needed to diagnose (from Scott et al. (2008)): ndx <- 1 / (se - (1 - sp)) ndx.1 <- 1 / (se.low - (1 - sp.low)) ndx.2 <- 1 / (se.up - (1 - sp.up)) ndx.low <- min(ndx.1, ndx.2) ndx.up <- max(ndx.1, ndx.2) nnd <- data.frame(est = ndx, lower = ndx.low, upper = ndx.up) ## Youden's index (from Bangdiwala et al. (2008)): c.p <- se - (1 - sp) c.1 <- se.low - (1 - sp.low) c.2 <- se.up - (1 - sp.up) c.low <- min(c.1, c.2) c.up <- max(c.1, c.2) youden <- data.frame(est = c.p, lower = c.low, upper = c.up) }) rval <- list( aprev = elements$aprev, tprev = elements$tprev, se = elements$sensitivity, sp = elements$specificity, diag.acc = elements$diag.acc, diag.or = elements$diag.or, nnd = elements$nnd, youden = elements$youden, ppv = elements$pv.positive, npv = elements$pv.negative, plr = elements$lr.positive, nlr = elements$lr.negative) ## Define tab: r1 <- with(elements, c(a, b, N1)) r2 <- with(elements, c(c, d, N0)) r3 <- with(elements, c(M1, M0, M0 + M1)) tab <- as.data.frame(rbind(r1, r2, r3)) colnames(tab) <- c(" Disease +", " Disease -", " Total") rownames(tab) <- c("Test +", "Test -", "Total") tab <- format.data.frame(tab, digits = 3, justify = "right") out <- list(conf.level = conf.level, elements = elements, rval = rval, tab = tab) class(out) <- "epi.tests" return(out) } ## Print method for epi.tests: print.epi.tests <- function(x, ...) { print(x$tab, ...) cat("\nPoint estimates and", x$conf.level * 100, "%", "CIs:") cat("\n---------------------------------------------------------") with(x$rval, { cat(sprintf("\nApparent prevalence %.2f (%.2f, %.2f)", aprev$est, aprev$lower, aprev$upper )) cat(sprintf("\nTrue prevalence %.2f (%.2f, %.2f)", tprev$est, tprev$lower, tprev$upper )) cat(sprintf("\nSensitivity %.2f (%.2f, %.2f)", se$est, se$lower, se$upper )) cat(sprintf("\nSpecificity %.2f (%.2f, %.2f)", sp$est, sp$lower, sp$upper )) cat(sprintf("\nPositive predictive value %.2f (%.2f, %.2f)", ppv$est, ppv$lower, ppv$upper )) cat(sprintf("\nNegative predictive value %.2f (%.2f, %.2f)", npv$est, npv$lower, npv$upper )) cat(sprintf("\nPositive likelihood ratio %.2f (%.2f, %.2f)", plr$est, plr$lower, plr$upper )) cat(sprintf("\nNegative likelihood ratio %.2f (%.2f, %.2f)", nlr$est, nlr$lower, nlr$upper )) }) cat("\n---------------------------------------------------------") cat("\n") } ## Summary method for epi.tests: summary.epi.tests <- function(object, ...) { ## Create a data frame: out <- do.call(rbind, object$rval) ## Return it: return(out) }epiR/R/epi.dgamma.R0000644000176200001440000000046312601641614013503 0ustar liggesusers"epi.dgamma" <- function(rr, quantiles = c(0.05, 0.95)){ fold.variation <- rr[2]/rr[1] low.p <- abs(qnorm(quantiles[1], mean = 0, sd = 1)) up.p <- abs(qnorm(quantiles[2], mean = 0, sd = 1)) p <- low.p + up.p tau <- (p^2) / (log(fold.variation) * log(fold.variation)) return(tau) } epiR/R/epi.stratasize.R0000644000176200001440000001012512601641614014442 0ustar liggesusersepi.stratasize <- function (strata.n, strata.mean, strata.var, strata.Py, epsilon.r, method = "mean", conf.level = 0.95) { N. <- 1 - ((1 - conf.level) / 2) z <- qnorm(N., mean = 0, sd = 1) if (method == "total") { N <- sum(strata.n) mean <- sum(strata.n * strata.mean) / N sigma.bx <- sum(strata.n * (strata.mean - mean)^2) / N sigma.wx <- sum(strata.n * strata.var) / N sigma.x <- sigma.bx + sigma.wx V <- sigma.x/mean^2 gamma <- sigma.bx/sigma.wx # Equation 6.25 Levy and Lemeshow. Example on p 177 gives 9 for z^2 which equates to an alpha of a bit less than 0.01. # Emailed Stan Lemeshow re this on 9 Feb 2008 - he confirms this is true. total.sample <- round((((z^2 * N)/(1 + gamma)) * V) / (((z^2 * V) / (1 + gamma)) + N * (epsilon.r^2)), digits = 0) strata.sample <- round(strata.n * (total.sample/N), digits = 0) result.01 <- c(strata.sample) result.02 <- c(total.sample) result.03 <- cbind(mean = mean, sigma.bx = sigma.bx, sigma.wx = sigma.wx, sigma.x = sigma.x, rel.var = V, gamma = gamma) rval <- list(strata.sample = result.01, total.sample = result.02, stats = result.03) } if (method == "mean") { N <- sum(strata.n) mean <- sum(strata.n * strata.mean) / N sigma.bx <- sum(strata.n * (strata.mean - mean)^2) / N sigma.wx <- sum(strata.n * strata.var) / N sigma.x <- sigma.bx + sigma.wx V <- sigma.x/mean^2 gamma <- sigma.bx/sigma.wx # Equation 6.25 Levy and Lemeshow. Example on p 177 gives 9 for z^2. Suspect this is an error. I use 1.96^2 =~ 4 total.sample <- round((((z^2 * N)/(1 + gamma)) * V) / (((z^2 * V) / (1 + gamma)) + N * (epsilon.r^2)), digits = 0) strata.sample <- round(strata.n * (total.sample/N), digits = 0) result.01 <- c(strata.sample) result.02 <- c(total.sample) result.03 <- cbind(mean = mean, sigma.bx = sigma.bx, sigma.wx = sigma.wx, sigma.x = sigma.x, rel.var = V, gamma = gamma) rval <- list(strata.sample = result.01, total.sample = result.02, stats = result.03) } if (method == "proportion") { # Where method == "proportion" the estimated proportions for each strata are entered into the vector strata.Py: N <- sum(strata.n) mean <- sum(strata.n * strata.Py) / N # The vector strata.var is ignored (variance of proportion calculated as follows): strata.var = (strata.Py * (1 - strata.Py)) phi <- (strata.n * sqrt(strata.var))/sum(strata.n * sqrt(strata.var)) sigma.bx <- sum((strata.n^2 * strata.var)/((phi) * (mean^2))) sigma.bxd <- sum((strata.n * strata.var)/mean^2) # Equation 6.23 Levy and Lemeshow. Note the similarity between 6.23 and 6.22: total.sample <- round(((z^2/N^2) * sigma.bx)/((epsilon.r^2) + ((z^2/N^2) * sigma.bxd)), digits = 0) strata.sample <- round(strata.n * (total.sample/N), digits = 0) result.01 <- c(strata.sample) result.02 <- c(total.sample) result.03 <- cbind(mean = mean, sigma.bx = sigma.bx, sigma.bxd = sigma.bxd, phi = phi) rval <- list(strata.sample = result.01, total.sample = result.02, stats = result.03) } if (method == "pps") { N <- sum(strata.n) mean <- sum(strata.n * strata.mean)/N strata.var = (strata.mean * (1 - strata.mean)) sigma.bx <- sum((strata.n * strata.var)/mean^2) total.sample <- round(((z^2/N) * sigma.bx)/(epsilon.r^2 + ((z^2/N^2) * sigma.bx)), digits = 0) strata.sample <- round(strata.n * (total.sample/N), digits = 0) result.01 <- c(strata.sample) result.02 <- c(total.sample) result.03 <- cbind(mean = mean, sigma.bx = sigma.bx) rval <- list(strata.sample = result.01, total.sample = result.02, stats = result.03) } return(rval) } epiR/R/epi.pooled.R0000644000176200001440000000051412601641614013534 0ustar liggesusersepi.pooled <- function(se, sp, P, m, r){ # Herd specificity: PlSp <- sp^m HSp <- (PlSp)^r # Herd sensitivity: HSe <- 1 - ((1 - (1 - P)^m) * (1 - se) + (1 - P)^m * PlSp)^r # Herd level apparent prevalence: HAPneg <- 1 - HSp rval <- list(HAPneg = HAPneg, HSe = HSe, HSp = HSp) rval } epiR/R/epi.RtoBUGS.R0000644000176200001440000000246612601641614013507 0ustar liggesusers# Source: Terry Elrod (Terry.Elrod@UAlberta.ca). "epi.RtoBUGS" <- function(datalist, towhere) { if(!is.list(datalist)) stop("First argument to writeDatafile must be a list.") cat(.formatData(datalist), file = towhere) } ".formatData" <- function(datalist) { if(!is.list(datalist)) stop("Argument to formatData must be a list.") n <- length(datalist) datalist.string <- as.list(rep(NA, n)) for(i in 1.:n) { if(is.numeric(datalist[[i]]) & length(datalist[[i]]) == 1.) datalist.string[[i]] <- paste(names(datalist)[i], "=", as.character(datalist[[i]]), sep = "") if(is.vector(datalist[[i]]) & length(datalist[[i]]) > 1.) datalist.string[[i]] <- paste(names(datalist)[i], "=c(", paste(as.character(datalist[[ i]]), collapse = ","), ")", sep = "") if(is.array(datalist[[i]])) datalist.string[[i]] <- paste(names(datalist)[i], "=structure(.Data=c(", paste( as.character(as.vector(aperm(datalist[[i]]))), collapse = ","), "),.Dim=c(", paste(as.character(dim(datalist[[i]])), collapse = ","), "))", sep = "") } datalist.tofile <- paste("list(", paste(unlist(datalist.string), collapse = ","), ")", sep = "") return(datalist.tofile) } epiR/R/epi.cpresids.r0000644000176200001440000000477212601641614014140 0ustar liggesusersepi.cpresids <- function(obs, fit, covpattern){ # Covariate pattern identifiers: cpid <- covpattern$cov.pattern$id # Number of observations that comprise each covariate pattern: n <- covpattern$cov.pattern$n # Number of outcome-positive observations observed for each covariate pattern: nY <- obs # Number of outcome-positive observations predicted for each covariate pattern: np <- fit * n # Predicted probability of outcome for each covariate pattern: pi. <- fit den <- rep(1, times = nrow(covpattern$cov.pattern)) # Turn factors into dummy variables: X <- den for(i in 3:dim(covpattern$cov.pattern)[2]){ ifelse(is.factor(covpattern$cov.pattern[,i]), # The function model.matrix returns the dummy variables for each factor. Remove the first column to return treatment contrasts. # That is, if you have a factor comprised of three levels, we return two columns to represent the treatment contrasts (i.e. 00, 01, and 10). X <- cbind(X, model.matrix(~covpattern$cov.pattern[,i] - 1)[,-1]), X <- cbind(X, covpattern$cov.pattern[,i])) } colnames(X) <- 1:dim(X)[2] # X <- as.matrix(cbind(den, covpattern$cov.pattern[3:dim(covpattern$cov.pattern)[2]])) V <- diag(np * (1 - pi.)) xvx <- solve(t(X) %*% V %*% X) sV <- sqrt(V) H <- sV %*% X %*% xvx %*% t(X) * sV leverage <- diag(H) # Raw residuals: raw <- (nY - np) # Standardised raw residuals: sraw <- raw /sd(np) # Pearson residuals: pearson <- (nY - np)/sqrt(np * (1 - pi.)) # Standardised Pearson residuals: spearson <- pearson / sqrt(1 - leverage) # Deviance residuals: sign <- ifelse(nY - np > 0, 1, -1) dev <- sign * sqrt(2 * ((nY * log(nY/np)) + ((n - nY) * log((n - nY)/(n * (1 - pi.)))))) dev[nY == 0] <- -sqrt(2 * n[nY == 0] * abs(log(1 - pi.[nY == 0]))) dev[nY == n] <- sqrt(2 * n[nY == n] * abs(log(pi.[nY == n]))) # Delta beta: deltabeta <- (pearson^2 * leverage) / (1 - leverage) # Standardised delta beta: sdeltabeta <- (spearson^2 * leverage) / (1 - leverage) # Delta chi-square (used to detect ill-fitting covariate patterns): deltachi <- pearson^2 / (1 - leverage) rval <- data.frame(cpid = cpid, n = n, obs = nY, pred = np, raw = raw, sraw = sraw, pearson = pearson, spearson = spearson, deviance = dev, leverage = leverage, deltabeta = deltabeta, sdeltabeta = sdeltabeta, deltachi = deltachi) return(rval) } epiR/R/zzz.R0000644000176200001440000000046312601641614012336 0ustar liggesusers.onAttach <- function(libname, pkgname) { ver <- as.character(read.dcf(file.path(libname, pkgname, "DESCRIPTION"), "Version")) packageStartupMessage("Package epiR ", ver, " is loaded", appendLF = TRUE) packageStartupMessage("Type help(epi.about) for summary information") packageStartupMessage("\n") }epiR/R/epi.offset.R0000644000176200001440000000061212601641614013537 0ustar liggesusers"epi.offset" <- function(id.names) { total <- length(id.names) counts <- as.vector(table(id.names)) offset <- c(1) for (i in 2:length(counts)-1) {var <- counts[i] + offset[i] offset <- c(offset, var) } offset <- c(offset, total) return(offset) } epiR/MD50000644000176200001440000001121012602004033011442 0ustar liggesusers3d7587b74f41d82fe67fbc84014b64fb *DESCRIPTION 32e358410a5194a4225a03827644b396 *NAMESPACE de0e859224af9995bd8cb034c63f8e0f *R/epi.2by2.r 90797cfa53b823b298af4c32eb3435bf *R/epi.RtoBUGS.R a9a4deaaec9cd9435529455159385225 *R/epi.about.R 35dd233e3feeceefc29040363acdbadd *R/epi.asc.R 88fc1fad29654a3b8cff28d58bd332d6 *R/epi.betabuster.r 89e1ade6b7a25729a3e7db833abd2739 *R/epi.bohning.R b0409b337ea6dd9275f69b8c9374f616 *R/epi.ccc.r 52927695dc6e0d0002c6ab70a12ac926 *R/epi.cluster1size.R aed994d4e585e092089481c32bc12835 *R/epi.cluster2size.R c3ff42af84bb01e927867d54b4ac4334 *R/epi.clustersize.R f4fd3d1b84ce96671b0e0c1c08433990 *R/epi.conf.R d535d0aaae250f95c38e00ccbba0b7ca *R/epi.convgrid.R 4eb1a96f7427ea81382d37754d717f49 *R/epi.cp.R 0bce821540346b1e58e640d7334c043b *R/epi.cpresids.r 34276075081f890bda9ae5d151d138e5 *R/epi.descriptives.R 7ca563951950b4476e001ce3dba64043 *R/epi.detectsize.R da3832a85a8085b34f75455e6defbf24 *R/epi.dgamma.R 4788877af77c92892dc43e324ccd05c5 *R/epi.directadj.r 85be50061d94e87a31c52f3287e8e492 *R/epi.dms.R 4a8af6911e7c482c1235fdca46584d68 *R/epi.dsl.R 276cc7e048b9b12cad3c16fe60190600 *R/epi.edr.R 658381258fe39d7cdbe6253c66a49034 *R/epi.empbayes.R e935ed017215d39b2225f61b23de3bfd *R/epi.herdtest.R 566a1b7408b9ecb76c20bb182fb11d18 *R/epi.indirectadj.R 9c2fdb99c5701dca5359dca3e651e5a2 *R/epi.insthaz.R 8198e262aafd2a83bba29777a1189a99 *R/epi.interaction.r 2a952ef6d504f5a1d6d1e39fd3d42666 *R/epi.iv.R 098e035a862a53f70d3c0490384bc8e1 *R/epi.kappa.R dd5a6b971d1652170e84fa8a709215a9 *R/epi.ltd.R 4c9176c6dbfbb04dac7159ca7527b888 *R/epi.mh.R 3d98611d0dba6b2b3d0fc12fac16b800 *R/epi.nomogram.R dd89f9509954970ef09f36b06192b96c *R/epi.occc.R 2c6852d882ea0b6808350b3fd47bae1b *R/epi.offset.R b1c0bdf50b56a4b9df09ac89f2de0b98 *R/epi.pooled.R 3a35fac64c1bcda32c3dcabe7b7d14b2 *R/epi.popsize.R 9984eb80ee76e03e982e401c2c3a2490 *R/epi.prcc.R d388e24a2baea682359c4f07b71ee689 *R/epi.prev.R b149e431ad851995742d356767b1a1ab *R/epi.simplesize.R 42ff19ebe373602acc39ab1a1dc7d244 *R/epi.smd.R 32b646ecabba4110b2566b44d1879d49 *R/epi.stratasize.R be94c9555d8cf256dbf189436ad76df8 *R/epi.studysize.R ba5e4d58f79b6d16659814f1f82b65c6 *R/epi.tests.R 00c26a92f3e827fd67862cf8434ecad2 *R/zzz.R e21ab6f2a8b2c8ed12468ce39757977e *data/epi.SClip.RData fd9920dce74d1e6cce1932570a9e85f4 *data/epi.epidural.RData 64af3548130a5495e0aed3c2b58f864f *data/epi.incin.RData f939059c30ca7a1ffcea51c4b999fe3a *man/epi.2by2.Rd bf7ef9dbf069d206b52f03ccf25f4744 *man/epi.RtoBUGS.Rd 0c9b04bc2d19935d734fa660831e0aff *man/epi.SClip.Rd c0207f2c0a218b3c799cfd6e5b0df220 *man/epi.about.Rd 5779280680d12e1355f64bf0984401fe *man/epi.asc.Rd ebbb59b1d73bff580541aec8b587b077 *man/epi.betabuster.Rd 7e06c95f246ea6a933847b73c2c0288a *man/epi.bohning.Rd f2590484d82cca9cf53f382b1c2321f7 *man/epi.ccc.Rd 2758ba80c5bde26752e8b5de29e03161 *man/epi.cluster1size.Rd b86f88dbb6fdfca830a2afbc9a764f30 *man/epi.cluster2size.Rd 572fe905ca4759a69445fda1de10d48e *man/epi.clustersize.Rd a467d56a87e4ccd1ca274827bc261b76 *man/epi.conf.Rd 07e702fd65b9cc6710d27c133b94f89f *man/epi.convgrid.Rd b8c620a7bf19cdff691279488646ab37 *man/epi.cp.Rd 11dc595f8d992cd9b43c0c1eb83879e7 *man/epi.cpresids.Rd fa9f1e4c1fc141ea16dbcc5d5f941342 *man/epi.descriptives.Rd 067584ac89bc7d866982bb30ba474502 *man/epi.detectsize.Rd 8b3b0014ef1fd0e3fc8da5c53b70e8ff *man/epi.dgamma.Rd 3833bd94037f590a646d6d566d7b0b96 *man/epi.directadj.Rd 9424e6c2bec3dea4d79f1551b8080901 *man/epi.dms.Rd 7b5172ac33a3a2452fcde4a664f95ffc *man/epi.dsl.Rd b561e5757a9fba546bc37cbfa5a6d6bf *man/epi.edr.Rd 8df7a37faf9dd8ef2d3cbe6f6ff92d9a *man/epi.empbayes.Rd 623bf229ccd13149e887c7760b3baf44 *man/epi.epidural.Rd 823c79c884aada4064fd2e901eb30958 *man/epi.herdtest.Rd 8bb745f9e835adcd99f19ffbbf48d8ed *man/epi.incin.Rd bbfcecb93c8519e9ccbef8d92991e42b *man/epi.indirectadj.Rd c7d89bc6c96dc8dde6c4793982960c85 *man/epi.insthaz.Rd f5eb9a339ceb32787522e1b10cfd2693 *man/epi.interaction.Rd 35859ed56cd79a40bd8f87855160ad9a *man/epi.iv.Rd 09edff0d4a987682013d795fab061873 *man/epi.kappa.Rd 85eb0f47054239387557a52af03a674f *man/epi.ltd.Rd bc96a2d036ef70851bb08581300677ed *man/epi.mh.Rd bc1165287dfa87662f653457a7a5aac6 *man/epi.nomogram.Rd a146d6ea97b4e86cd0c87fc7740305a2 *man/epi.occc.Rd cc7f75a85351f5542916421fa4b7e648 *man/epi.offset.Rd 2bd23171751550cdd1912e0d0244748d *man/epi.pooled.Rd 94d5f2ced08ba7556c21e6360d5f54b8 *man/epi.popsize.Rd d1796a40a7f4d23429458f0f8f489274 *man/epi.prcc.Rd 9c9de8405822d52a347f629ec8c6ed66 *man/epi.prev.Rd 2e02d145c1188e3270a93fd251ba46fa *man/epi.simplesize.Rd b6389f885f3fdf71f58c115334f230d4 *man/epi.smd.Rd 96e8daea0c1e36beb684ebbd85b7f2c6 *man/epi.stratasize.Rd 52c429fe19bfb0f07ab5a579a5874490 *man/epi.studysize.Rd dc1ebcea28fa5dddb4140d6f209f0c77 *man/epi.tests.Rd epiR/DESCRIPTION0000644000176200001440000000223312602004033012645 0ustar liggesusersPackage: epiR Version: 0.9-69 Date: 2015-09-27 Title: Tools for the Analysis of Epidemiological Data Author: Mark Stevenson with contributions from Telmo Nunes, Cord Heuer, Jonathon Marshall, Javier Sanchez, Ron Thornton, Jeno Reiczigel, Jim Robison-Cox, Paola Sebastiani, Peter Solymos, Kazuki Yoshida, Geoff Jones, Sarah Pirikahu and Simon Firestone Maintainer: Mark Stevenson Description: Tools for the analysis of epidemiological data. Contains functions for directly and indirectly adjusting measures of disease frequency, quantifying measures of association on the basis of single or multiple strata of count data presented in a contingency table, and computing confidence intervals around incidence risk and incidence rate estimates. Miscellaneous functions for use in meta-analysis, diagnostic test interpretation, and sample size calculations. Depends: R (>= 3.0.0), survival Imports: BiasedUrn, methods Suggests: MASS (>= 3.1-20) License: GPL (>= 2) URL: http://fvas.unimelb.edu.au/veam Packaged: 2015-09-27 01:17:03 UTC; Mark NeedsCompilation: no Repository: CRAN Date/Publication: 2015-09-27 17:15:39 epiR/man/0000755000176200001440000000000012601641614011725 5ustar liggesusersepiR/man/epi.indirectadj.Rd0000644000176200001440000001323612601641614015255 0ustar liggesusers\name{epi.indirectadj} \alias{epi.indirectadj} \title{Indirectly adjusted incidence risk estimates} \description{ Compute indirectly adjusted incidence risks and standardised mortality (incidence) ratios. } \usage{ epi.indirectadj(obs, pop, std, units, conf.level = 0.95) } \arguments{ \item{obs}{a one column matrix representing the number of observed number of events in each strata. The dimensions of \code{obs} must be named (see the examples, below).} \item{pop}{a matrix representing population size. Rows represent strata (e.g. region); columns represent the levels of the covariate to be adjusted for (e.g. age class, gender). The sum of each row will equal the total population size within each stratum. If there are no covariates \code{pop} will be a one column matrix. The dimensions of the \code{pop} matrix must be named (see the examples, below).} \item{std}{a one row matrix specifying the standard incidence risks to be applied to each level of the covariate to be adjusted for. The length of \code{std} should be one plus the number of covariates to be adjusted for (the additional value represents the incidence risk in the entire population). If there are no covariates to adjust for \code{std} is a single number representing the incidence risk in the entire population.} \item{units}{multiplier for the incidence risk estimates.} \item{conf.level}{magnitude of the returned confidence interval. Must be a single number between 0 and 1.} } \details{ Indirect standardisation can be performed whenever the stratum-specific incidence risk estimatesare either unknown or unreliable. If the stratum-specific incidence risk estimates are known, direct standardisation is preferred. Confidence intervals for the standardised mortality ratio estimates are based on the Poisson distribution (see Breslow and Day 1987, p 69 - 71 for details). } \value{ A list containing the following: \item{crude.strata}{the crude incidence risk estimates for each stratum.} \item{adj.strata}{the indirectly adjusted incidence risk estimates for each stratum.} \item{smr}{the standardised mortality (incidence) ratios for each stratum.} } \references{ Breslow NE, Day NE (1987). Statistical Methods in Cancer Reasearch: Volume II - The Design and Analysis of Cohort Studies. Lyon: International Agency for Cancer Research. Dohoo I, Martin W, Stryhn H (2009). Veterinary Epidemiologic Research. AVC Inc, Charlottetown, Prince Edward Island, Canada, pp. 85 - 89. Rothman KJ, Greenland S (1998). Modern Epidemiology, second edition. Lippincott Williams & Wilkins, Philadelphia. Sahai H, Khurshid A (1993). Confidence intervals for the mean of a Poisson distribution: A review. Biometrical Journal 35: 857 - 867. Sahai H, Khurshid A (1996). Statistics in Epidemiology. Methods, Techniques and Applications. CRC Press, Baton Roca. } \author{ Thanks to Dr. Telmo Nunes (UISEE/DETSA, Faculdade de Medicina Veterinaria - UTL, Rua Prof. Cid dos Santos, 1300-477 Lisboa Portugal) for details and code for the confidence interval calculations. } \seealso{ \code{\link{epi.directadj}} } \examples{ ## EXAMPLE 1 (without covariates): ## Adapted from Dohoo, Martin and Stryhn (2009). In this example the frequency ## of tuberculosis is expressed as incidence risk (i.e. the number of ## tuberculosis positive herds divided by the size of the herd population at ## risk). In their text, Dohoo et al. present the data as incidence rate (the ## number of tuberculosis positive herds per herd-year at risk). ## Data have been collected on the incidence of tuberculosis in two ## areas ("A" and "B"). Provided are the counts of (new) incident cases and ## counts of the herd population at risk. The standard incidence risk for ## the total population is 0.060 (6 cases per 100 herds at risk): obs <- matrix(data = c(58, 130), nrow = 2, byrow = TRUE, dimnames = list(c("A", "B"), "")) pop <- matrix(data = c(1000, 2000), nrow = 2, byrow = TRUE, dimnames = list(c("A", "B"), "")) std <- 0.060 epi.indirectadj(obs = obs, pop = pop, std = std, units = 100, conf.level = 0.95) ## EXAMPLE 2 (with covariates): ## We now have, for each area, data stratified by herd type (dairy, beef). ## The standard incidence rates for beef herds, dairy herds, and the total ## population are 0.025, 0.085, and 0.060 cases per herd, respectively: obs <- matrix(data = c(58, 130), nrow = 2, byrow = TRUE, dimnames = list(c("A", "B"), "")) pop <- matrix(data = c(550, 450, 500, 1500), nrow = 2, byrow = TRUE, dimnames = list(c("A", "B"), c("Beef", "Dairy"))) std <- matrix(data = c(0.025, 0.085, 0.060), nrow = 1, byrow = TRUE, dimnames = list("", c("Beef", "Dairy", "Total"))) epi.indirectadj(obs = obs, pop = pop, std = std, units = 100, conf.level = 0.95) ## > $crude.strata ## > est lower upper ## > A 5.8 4.404183 7.497845 ## > B 6.5 5.430733 7.718222 ## > $adj.strata ## > est lower upper ## > A 6.692308 5.076923 8.423077 ## > B 5.571429 4.628571 6.557143 ## > $smr.strata ## > obs exp est lower upper ## > A 58 52 1.1153846 0.8461538 1.403846 ## > B 130 140 0.9285714 0.7714286 1.092857 ## The crude incidence risk of tuberculosis in area A was 5.8 ## (95\% CI 4.0 to 7.5) cases per 100 herds at risk. The crude incidence ## risk of tuberculosis in area B was 6.5 (95\% CI 5.4 to 7.7) cases ## per 100 herds at risk. ## The indirectly adjusted incidence risk of tuberculosis in area A was 6.7 ## (95\% CI 5.1 to 8.4) cases per 100 herds at risk. The indirectly ## adjusted incidence risk of tuberculosis in area B was 5.6 ## (95\% CI 4.6 to 6.6) cases per 100 herds at risk. } \keyword{univar} epiR/man/epi.asc.Rd0000644000176200001440000000265112601641614013542 0ustar liggesusers\name{epi.asc} \alias{epi.asc} \title{ Write matrix to an ASCII raster file } \description{ Writes a data frame to an ASCII raster file, suitable for display in a Geographic Information System. } \usage{ epi.asc(dat, file, xllcorner, yllcorner, cellsize, na = -9999) } \arguments{ \item{dat}{a matrix with data suitable for plotting using the \code{image} function.} \item{file}{character string specifying the name and path of the ASCII raster output file.} \item{xllcorner}{the easting coordinate corresponding to the lower left hand corner of the matrix.} \item{yllcorner}{the northing coordinate corresponding to the lower left hand corner of the matrix.} \item{cellsize}{number, defining the size of each matrix cell.} \item{na}{scalar, defines null values in the matrix. NAs are converted to this value.} } \value{ Writes an ASCII raster file (typically with \code{*.asc} extension), suitable for display in a Geographic Information System. } \note{ The \code{image} function in R rotates tabular data counter clockwise by 90 degrees for display. A matrix of the form: \tabular{ll}{ 1 \tab 3 \cr 2 \tab 4 \cr } is displayed (using \code{image}) as: \tabular{ll}{ 3 \tab 4 \cr 1 \tab 2 \cr } It is recommended that the source data for this function is a matrix. Replacement of \code{NA}s in a data frame extends processing time for this function. } \keyword{univar} epiR/man/epi.directadj.Rd0000644000176200001440000001170112601641614014721 0ustar liggesusers\name{epi.directadj} \alias{epi.directadj} \title{Directly adjusted incidence rate estimates} \description{ Compute directly adjusted incidence rates.} \usage{ epi.directadj(obs, pop, std, units = 1, conf.level = 0.95) } \arguments{ \item{obs}{a matrix representing the observed number of events. Rows represent strata (e.g. region); columns represent the covariates to be adjusted for (e.g. age class, gender). The sum of each row will equal the total number of events for each stratum. If there are no covariates to be adjusted for \code{obs} will be a one column matrix. The rows the \code{obs} matrix must be named with the appropriate strata names. The columns of \code{obs} must be named with the appropriate level identifiers for the covariate. See the example, below.} \item{pop}{a matrix representing population time at risk. Rows represent strata (e.g. region); columns represent the covariates to be adjusted for (e.g. age class, gender). The sum of each row will equal the total population time at risk within each stratum. If there are no covariates \code{pop} will be a one column matrix. The rows the \code{pop} matrix must be named with the appropriate strata names. The columns of \code{pop} must be named with the appropriate level identifiers for the covariate. See the example, below.} \item{std}{a matrix representing the standard population size for the different levels of the covariate to be adjusted for. The columns of \code{std} must be named with the appropriate level identifiers for the covariate(s).} \item{units}{multiplier for the incidence risk estimates.} \item{conf.level}{magnitude of the returned confidence interval. Must be a single number between 0 and 1.} } \details{ This function returns unadjusted (crude) and directly adjusted incidence rate estimates for each of the specified population strata. The term `covariate' is used here to refer to the factors we want to control (i.e. adjust) for when calculating the directly adjusted incidence rate estimates. When the outcome of interest is rare, the confidence intervals returned by this function (based on Fay and Feuer, 1997) are appropriate for incidence risk data. In this situation the argument \code{pop} represents the size of the population at risk (instead of population time at risk). } \value{ A list containing the following: \item{crude}{the crude incidence rate estimates for each stratum-covariate combination.} \item{crude.strata}{the crude incidence rate estimates for each stratum.} \item{adj.strata}{the directly adjusted incidence rate estimates for each stratum.} } \references{ Fay M, Feuer E (1997). Confidence intervals for directly standardized rates: A method based on the gamma distribution. Statistics in Medicine 16: 791 - 801. Fleiss JL (1981). Statistical Methods for Rates and Proportions, Wiley, New York, USA, p 240. Greenland S, Rothman KJ. Introduction to stratified analysis. In: Rothman KJ, Greenland S (1998). Modern Epidemiology. Lippincott Williams, & Wilkins, Philadelphia, pp. 260 - 265. Thrusfield M (2007). Veterinary Epidemiology, Blackwell Publishing, London, UK, pp. 63 - 64. } \author{ Thanks to Karl Ove Hufthammer for helpful suggestions to improve the execution and documentation of this function. } \seealso{ \code{\link{epi.indirectadj}} } \examples{ ## EXAMPLE 1 (from Thrusfield 2007 pp. 63 - 64): ## A study was conducted to estimate the seroprevalence of leptospirosis ## in dogs in Glasgow and Edinburgh, Scotland. For the matrix titled pop ## the numbers represent dog-years at risk. The following data were ## obtained for male and female dogs: obs <- matrix(data = c(15,46,53,16), nrow = 2, byrow = TRUE, dimnames = list(c("ED","GL"), c("M","F"))) pop <- matrix(data = c(48,212,180,71), nrow = 2, byrow = TRUE, dimnames = list(c("ED","GL"), c("M","F"))) ## Compute directly adjusted seroprevalence estimates, using a standard ## population with equal numbers of male and female dogs: std <- matrix(data = c(250,250), nrow = 1, byrow = TRUE, dimnames = list("", c("M","F"))) epi.directadj(obs, pop, std, units = 1, conf.level = 0.95) ## > $crude ## > strata cov est lower upper ## > 1 ED M 0.3125000 0.1749039 0.5154212 ## > 2 GL M 0.2944444 0.2205591 0.3851406 ## > 3 ED F 0.2169811 0.1588575 0.2894224 ## > 4 GL F 0.2253521 0.1288082 0.3659577 ## > $crude.strata ## > strata est lower upper ## > 1 ED 0.2346154 0.1794622 0.3013733 ## > 2 GL 0.2749004 0.2138889 0.3479040 ## > $adj.strata ## > strata est lower upper ## > 1 ED 0.2647406 0.1866047 0.3692766 ## > 2 GL 0.2598983 0.1964162 0.3406224 ## The confounding effect of sex has been removed by the gender-adjusted ## incidence rate estimates. } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{univar} \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/epi.about.Rd0000644000176200001440000000176212601641614014110 0ustar liggesusers\name{epi.about} \alias{epi.about} \title{The library epiR: summary information} \description{ Tools for the analysis of epidemiological data. } \usage{ epi.about() } \details{ The most recent version of the \code{epiR} package can be obtained from: \url{http://fvas.unimelb.edu.au/veam} } \author{ Mark Stevenson (\email{mark.stevenson1@unimelb.edu.au}), Faculty of Veterinary and Agricultural Sciences, University of Melbourne, Parkville, Victoria 3010, Australia. Simon Firestone, Faculty of Veterinary and Agricultural Sciences, University of Melbourne, Parkville, Victoria 3010, Australia. Telmo Nunes, UISEE/DETSA, Faculdade de Medicina Veterinaria --- UTL, Rua Prof. Cid dos Santos, 1300 - 477 Lisboa Portugal. Javier Sanchez, Atlantic Veterinary College, University of Prince Edward Island, Charlottetown, Prince Edward Island, C1A 4P3, Canada. Ron Thornton, Ministry for Primary Industries New Zealand, PO Box 2526 Wellington, New Zealand. } \keyword{univar} epiR/man/epi.empbayes.Rd0000644000176200001440000000447712601641614014611 0ustar liggesusers\name{epi.empbayes} \alias{epi.empbayes} \title{Empirical Bayes estimates} \description{ Computes empirical Bayes estimates of observed event counts using the method of moments. } \usage{ epi.empbayes(obs, pop) } \arguments{ \item{obs}{a vector representing the observed event counts in each unit of interest.} \item{pop}{a vector representing the population count in each unit of interest.} } \details{ The gamma distribution is parameterised in terms of shape (\eqn{\alpha}) and scale (\eqn{\nu}) parameters. The mean of a given gamma distribution equals \eqn{\nu / \alpha}. The variance equals \eqn{\nu / \alpha^{2}}. The empirical Bayes estimate of event risk in each unit of interest equals \eqn{(obs + \nu) / (pop + \alpha)}. This technique performs poorly when your data contains large numbers of zero event counts. In this situation a Bayesian approach for estimating \eqn{\alpha} and \eqn{\nu} would be advised. } \value{ A data frame with four elements: \code{gamma} the mean event risk across all units, \code{phi} the variance of event risk across all units, \code{alpha} the estimated shape parameter of the gamma distribution, and \code{nu} the estimated scale parameter of the gamma distribution. } \references{ Bailey TC, Gatrell AC (1995). Interactive Spatial Data Analysis. Longman Scientific & Technical. London, pp. 303 - 308. Langford IH (1994). Using empirical Bayes estimates in the geographical analysis of disease risk. Area 26: 142 - 149. Meza J (2003). Empirical Bayes estimation smoothing of relative risks in disease mapping. Journal of Statistical Planning and Inference 112: 43 - 62. } \examples{ data(epi.SClip) obs <- epi.SClip$cases; pop <- epi.SClip$population est <- epi.empbayes(obs, pop) empbayes.prop <- (obs + est[4]) / (pop + est[3]) raw.prop <- (obs) / (pop) rank <- rank(raw.prop) dat <- data.frame(rank, raw.prop, empbayes.prop) plot(dat$rank, dat$raw.prop, type = "n", xlab = "Rank", ylab = "Risk") points(dat$rank, dat$raw.prop, pch = 16, col = "red") points(dat$rank, dat$empbayes.prop, pch = 16, col = "blue") legend(x = "topleft", legend = c("Raw estimate", "Bayes adjusted estimate"), col = c("red","blue"), pch = c(16,16), bty = "n") } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/epi.dms.Rd0000644000176200001440000000177012601641614013560 0ustar liggesusers\name{epi.dms} \alias{epi.dms} \title{Decimal degrees and degrees, minutes and seconds conversion } \description{ Converts decimal degrees to degrees, minutes and seconds. Converts degrees, minutes and seconds to decimal degrees. } \usage{ epi.dms(dat) } \arguments{ \item{dat}{the data. A one-column matrix is assumed when converting decimal degrees to degrees, minutes, and seconds. A two-column matrix is assumed when converting degrees and decimal minutes to decimal degrees. A three-column matrix is assumed when converting degrees, minutes and seconds to decimal degrees.} } \examples{ ## EXAMPLE 1: ## Degrees, minutes, seconds to decimal degrees: dat <- matrix(c(41, 38, 7.836, -40, 40, 27.921), byrow = TRUE, nrow = 2) epi.dms(dat) ## EXAMPLE 2: ## Decimal degrees to degrees, minutes, seconds: dat <- matrix(c(41.63551, -40.67442), nrow = 2) epi.dms(dat) } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/epi.prcc.Rd0000644000176200001440000000404012601641614013715 0ustar liggesusers\name{epi.prcc} \alias{epi.prcc} \title{ Partial rank correlation coefficients } \description{ Compute partial rank correlation coefficients. } \usage{ epi.prcc(dat, sided.test = 2) } \arguments{ \item{dat}{a data frame comprised of \code{K + 1} columns and \code{N} rows, where \code{K} represents the number of model parameters being evaluated and \code{N} represents the number of replications of the model. The last column of the data frame (i.e. column \code{K + 1}) provides the model output.} \item{sided.test}{use a one- or two-sided test? Use a two-sided test if you wish to evaluate whether or not the partial rank correlation coefficient is greater than or less than zero. Use a one-sided test to evaluate whether or not the partial rank correlation coefficient is greater than zero.} } \details{ If the number of parameters \code{K} is greater than the number of model replications \code{N} an error will be returned. } \value{ A data frame with three elements: \code{gamma} the partial rank corellation coefficient between each input parameter and the outcome, \code{test.statistic} the test statistic used to determine the significance of non-zero values of \code{gamma}, and \code{p.value} the associated P-value. } \references{ Blower S, Dowlatabladi H (1994). Sensitivity and uncertainty analysis of complex models of disease transmission: an HIV model, as an example. International Statistical Review 62: 229 - 243. Sanchez M, Blower S (1997) Uncertainty and sensitivity analysis of the basic reproductive rate. American Journal of Epidemiology, 145: 1127 - 1137. } \author{ Jonathon Marshall, J.C.Marshall@massey.ac.nz. } \examples{ ## Create a matrix of simulation results: x1 <- data.frame(rnorm(n = 10, mean = 120, sd = 10)) x2 <- data.frame(rnorm(n = 10, mean = 80, sd = 5)) x3 <- data.frame(rnorm(n = 10, mean = 40, sd = 20)) y <- 2 + (0.5 * x1) + (0.7 * x2) + (0.2 * x3) dat <- data.frame(cbind(X1 = x1, X2 = x2, X3 = x3, Y = y)) epi.prcc(dat, sided.test = 2) } \keyword{univar} epiR/man/epi.tests.Rd0000644000176200001440000001054312601641614014135 0ustar liggesusers\name{epi.tests} \alias{epi.tests} \alias{print.epi.tests} \alias{summary.epi.tests} \title{Sensitivity, specificity and predictive value of a diagnostic test} \description{ Computes true and apparent prevalence, sensitivity, specificity, positive and negative predictive values, and positive and negative likelihood ratios from count data provided in a 2 by 2 table. } \usage{ epi.tests(dat, conf.level = 0.95) \method{print}{epi.tests}(x, ...) \method{summary}{epi.tests}(object, ...) } \arguments{ \item{dat}{an object of class \code{table} containing the individual cell frequencies (see below).} \item{conf.level}{magnitude of the returned confidence interval. Must be a single number between 0 and 1.} \item{x, object}{an object of class \code{epi.tests}.} \item{\dots}{Ignored.} } \details{ Exact binomial confidence limits are calculated for test sensitivity, specificity, and positive and negative predictive value (see Collett 1999 for details). Confidence intervals for positive and negative likelihood ratios are based on formulae provided by Simel et al. (1991). Diagnostic accuracy is defined as the proportion of all tests that give a correct result. Diagnostic odds ratio is defined as how much more likely will the test make a correct diagnosis than an incorrect diagnosis in patients with the disease (Scott et al. 2008). The number needed to diagnose is defined as the number of paitents that need to be tested to give one correct positive test. Youden's index is the difference between the true positive rate and the false positive rate. Youden's index ranges from -1 to +1 with values closer to 1 if both sensitivity and specificity are high (i.e. close to 1). } \value{ An object of class \code{epi.tests} containing the following: \item{aprev}{apparent prevalence.} \item{tprev}{true prevalence.} \item{se}{test sensitivity.} \item{sp}{test specificity.} \item{diag.acc}{diagnostic accuracy.} \item{diag.or}{diagnostic odds ratio.} \item{nnd}{number needed to diagnose.} \item{youden}{Youden's index.} \item{ppv}{positive predictive value.} \item{npv}{negative predictive value.} \item{plr}{likelihood ratio of a positive test.} \item{nlr}{likelihood ratio of a negative test.} } \references{ Altman DG, Machin D, Bryant TN, and Gardner MJ (2000). Statistics with Confidence, second edition. British Medical Journal, London, pp. 28 - 29. Bangdiwala SI, Haedo AS, Natal ML (2008). The agreement chart as an alternative to the receiver-operating characteristic curve for diagnostic tests. Journal of Clinical Epidemiology 61: 866 - 874. Collett D (1999). Modelling Binary Data. Chapman & Hall/CRC, Boca Raton Florida, p. 24. Scott IA, Greenburg PB, Poole PJ (2008). Cautionary tales in the clinical interpretation of studies of diagnostic tests. Internal Medicine Journal 38: 120 - 129. Simel D, Samsa G, Matchar D (1991). Likelihood ratios with confidence: Sample size estimation for diagnostic test studies. Journal of Clinical Epidemiology 44: 763 - 770. Greg Snow (2008) Need help in calculating confidence intervals for sensitivity, specificity, PPV & NPV. R-sig-Epi Digest 23(1): 3 March 2008. } \author{ Mark Stevenson (Faculty of Veterinary and Agricultural Sciences, The University of Melbourne, Australia). } \note{ \tabular{llll}{ \tab Disease + \tab Disease - \tab Total\cr Test +\tab a\tab b \tab a + b\cr Test - \tab c\tab d \tab c + d\cr Total\tab a + c\tab b + d \tab a + b + c + d\cr } } \examples{ ## Scott et al. 2008, Table 1: ## A new diagnostic test was trialled on 1586 patients. Of 744 patients ## that were disease positive, 670 tested positive. Of 842 patients that ## were disease negative, 640 tested negative. What is the likeliood ## ratio of a positive test? What is the number needed to diagnose? dat <- as.table(matrix(c(670,202,74,640), nrow = 2, byrow = TRUE)) colnames(dat) <- c("Dis+","Dis-") rownames(dat) <- c("Test+","Test-") rval <- epi.tests(dat, conf.level = 0.95) print(rval); summary(rval) ## Test sensitivity is 0.90 (95\% CI 0.88 -- 0.92). Test specificity is ## 0.76 (95\% CI 0.73 -- 0.79). The likelihood ratio of a positive test ## is 3.75 (95\% CI 3.32 to 4.24). The number needed to diagnose is ## 1.51 (95\% CI 1.41 to 1.65). Around 15 persons need to be tested ## to return 10 positive tests. } \keyword{univar} epiR/man/epi.betabuster.Rd0000644000176200001440000000777512601641614015150 0ustar liggesusers\name{epi.betabuster} \alias{epi.betabuster} \title{An R version of Wes Johnson and Chun-Lung Su's Betabuster} \description{ A function to return shape1 and shape2 parameters for a beta distribution, based on expert elicitation. } \usage{ epi.betabuster(mode, conf, greaterthan, x, conf.level = 0.95, max.shape1 = 100, step = 0.001) } \arguments{ \item{mode}{scalar, the mode of the variable of interest. Must be a number between 0 and 1.} \item{conf}{level of confidence (expressed on a 0 to 1 scale) that the true value of the variable of interest is greater or less than argument \code{x}.} \item{greaterthan}{logical, if \code{TRUE} you are making the statement that you are \code{conf} confident that the true value of the variable of interest is greater than \code{x}. If \code{FALSE} you are making the statement that you are \code{conf} confident that the true value of the variable of interest is less than \code{x}.} \item{x}{scalar, value of the variable of interest (see above).} \item{conf.level}{magnitude of the returned confidence interval for the estimated beta distribution. Must be a single number between 0 and 1.} \item{max.shape1}{scalar, maximum value of the shape1 parameter for the beta distribution.} \item{step}{scalar, step value for the shape1 parameter. See details.} } \value{ A list containing the following: \item{shape1}{the \code{shape1} parameter for the estimated beta distribution.} \item{shape2}{the \code{shape2} parameter for the estimated beta distribution.} \item{mode}{the mode of the estimated beta distribution.} \item{mean}{the mean of the estimated beta distribution.} \item{median}{the median of the estimated beta distribution.} \item{lower}{the lower bound of the confidence interval of the estimated beta distribution.} \item{upper}{the upper bound of the confidence interval of the estimated beta distribution.} \item{variance}{the variance of the estimated beta distribution.} } \details{ The beta distribution has two parameters: \code{shape1} and \code{shape2}, corresponding to \code{a} and \code{b} in the original verion of BetaBuster. If \code{r} equals the number of times an event has occurred after \code{n} trials, \code{shape1} = \code{(r + 1)} and \code{shape2} = \code{(n - r + 1)}. BetaBuster can be downloaded from \url{http://www.ics.uci.edu/~wjohnson/BIDA/betabuster.zip}. } \references{ Christensen R, Johnson W, Branscum A, Hanson TE (2010). Bayesian Ideas and Data Analysis: An Introduction for Scientists and Statisticians. Chapman and Hall, Boca Raton. } \author{ Simon Firestone (Faculty of Veterinary and Agricultural Sciences, The University of Melbourne, Australia) with acknowledgements to Wes Johnson and Chun-Lung Su for the original standalone software. } \examples{ ## EXAMPLE 1: ## If a scientist is asked for their best guess for the diagnostic sensitivity ## of a particular test and the answer is 0.90, and if they are also willing ## to assert that they are 80\% certain that the sensitivity is greater than ## 0.75, what are the shape1 and shape2 parameters for a beta distribution ## satisfying these constraints? rval <- epi.betabuster(mode = 0.90, conf = 0.80, greaterthan = TRUE, x = 0.75, conf.level = 0.95, max.shape1 = 100, step = 0.001) rval$shape1; rval$shape2 ## The shape1 and shape2 parameters for the beta distribution that satisfy the ## constraints listed above are 9.875 and 1.986, respectively. ## This beta distribution reflects the probability distribution ## obtained if there were 9 successes, r: r <- rval$shape1 - 1; r ## from 10 trials, n: n <- rval$shape2 + rval$shape1 - 2; n ## Density plot of the estimated beta distribution: plot(seq(from = 0, to = 1, by = 0.001), dbeta(x = seq(from = 0, to = 1,by = 0.001), shape1 = rval$shape1, shape2 = rval$shape2), type = "l", xlab = "Test sensitivity", ylab = "Density") } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/epi.popsize.Rd0000644000176200001440000000322212601641614014460 0ustar liggesusers\name{epi.popsize} \alias{epi.popsize} \title{ Estimate population size } \description{ Estimates population size on the basis of capture-recapture sampling. } \usage{ epi.popsize(T1, T2, T12, conf.level = 0.95, verbose = FALSE) } \arguments{ \item{T1}{an integer representing the number of individuals tested in the first round.} \item{T2}{an integer representing the number of individuals tested in the second round.} \item{T12}{an integer representing the number of individuals tested in both the first and second round.} \item{conf.level}{magnitude of the returned confidence interval. Must be a single number between 0 and 1.} \item{verbose}{logical indicating whether detailed or summary results are to be returned.} } \value{ Returns the estimated population size and an estimate of the numbers of individuals that remain untested. } \references{ Cannon RM, Roe RT (1982). Livestock Disease Surveys A Field Manual for Veterinarians. Australian Government Publishing Service, Canberra, pp. 34. } \examples{ ## In a field survey 400 feral pigs are captured, marked and then released. ## On a second occassion 40 of the orignal capture are found when another 400 ## pigs are captured. Estimate the size of this feral pig population. Estimate ## the number of feral pigs that have not been tested. epi.popsize(T1 = 400, T2 = 400, T12 = 40, conf.level = 0.95, verbose = FALSE) ## Estimated population size: 4000 (95\% CI 3125 - 5557) ## Estimated number of untested pigs: 3240 (95\% CI 2365 - 4797) } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/epi.dgamma.Rd0000644000176200001440000000322012601641614014213 0ustar liggesusers\name{epi.dgamma} \alias{epi.dgamma} \title{Estimate the precision of a [structured] heterogeneity term } \description{ Returns the precision of a [structured] heterogeneity term after one has specified the amount of variation a priori. } \usage{ epi.dgamma(rr, quantiles = c(0.05, 0.95)) } \arguments{ \item{rr}{the lower and upper limits of relative risk, estimated \emph{a priori}.} \item{quantiles}{a vector of length two defining the quantiles of the lower and upper relative risk estimates.} } \value{ Returns the precision (the inverse variance) of the heterogeneity term. } \references{ Best, NG. WinBUGS 1.3.1 Short Course, Brisbane, November 2000. } \examples{ ## Suppose we are expecting the lower 5\% and upper 95\% confidence interval ## of relative risk in a data set to be 0.5 and 3.0, respectively. ## A prior guess at the precision of the heterogeneity term would be: tau <- epi.dgamma(rr = c(0.5, 3.0), quantiles = c(0.05, 0.95)) tau ## This can be translated into a gamma distribution. We set the mean of the ## distribution as tau and specify a large variance (that is, we are not ## certain about tau). mean <- tau var <- 1000 shape <- mean^2 / var inv.scale <- mean / var ## In WinBUGS the precision of the heterogeneity term may be parameterised ## as tau ~ dgamma(shape, inv.scale). Plot the probability density function ## of tau: z <- seq(0.01, 10, by = 0.01) fz <- dgamma(z, shape = shape, scale = 1/inv.scale) plot(z, fz, type = "l", ylab = "Probability density of tau") } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/epi.smd.Rd0000644000176200001440000000671312601641614013562 0ustar liggesusers\name{epi.smd} \alias{epi.smd} \title{Fixed-effect meta-analysis of continuous outcomes using the standardised mean difference method } \description{ Computes the standardised mean difference and confidence intervals of the standardised mean difference for continuous outcome data. } \usage{ epi.smd(mean.trt, sd.trt, n.trt, mean.ctrl, sd.ctrl, n.ctrl, names, method = "cohens", conf.level = 0.95) } \arguments{ \item{mean.trt}{a vector, defining the mean outcome in the treatment group.} \item{sd.trt}{a vector, defining the standard deviation of the outcome in the treatment group.} \item{n.trt}{a vector, defining the number of subjects in the treatment group.} \item{mean.ctrl}{a vector, defining the mean outcome in the control group.} \item{sd.ctrl}{a vector, defining the standard deviation of the outcome in the control group.} \item{n.ctrl}{a vector, defining the number of subjects in the control group.} \item{names}{character string identifying each trial.} \item{method}{a character string indicating the method to be used. Options are \code{cohens} or \code{hedges} and \code{glass}.} \item{conf.level}{magnitude of the returned confidence interval. Must be a single number between 0 and 1.} } \value{ A list containing the following: \item{md}{standardised mean difference and its confidence interval computed for each trial.} \item{md.invar}{the inverse variance (fixed effects) summary standardised mean difference.} \item{md.dsl}{the DerSimonian and Laird (random effects) summary standardised mean difference.} \item{heterogeneity}{a vector containing \code{Q} the heterogeneity test statistic, \code{df} the degrees of freedom and its associated P-value.} } \references{ Deeks JJ, Altman DG, Bradburn MJ (2001). Statistical methods for examining heterogeneity and combining results from several studies in meta-analysis. In: Egger M, Davey Smith G, Altman D (eds). Systematic Review in Health Care Meta-Analysis in Context. British Medical Journal, London, pp. 290 - 291. } \note{ The standardised mean difference method is used when trials assess the same outcome, but measure it in a variety of ways. For example: a set of trials might measure depression scores in psychiatric patients but use different methods to quantify depression. In this circumstance it is necessary to standardise the results of the trials to a uniform scale before they can be combined. The standardised mean difference method expresses the size of the treatment effect in each trial relative to the variability observed in that trial. } \seealso{ \code{\link{epi.dsl}, \link{epi.iv}, \link{epi.mh}} } \examples{ ## EXAMPLE 1: ## A systematic review comparing assertive community treatment (ACT) for the ## severely mentally ill was compared to standard care. A systematic review ## comparing ACT to standard care found three trials that assessed mental ## status after 12 months. All three trials used a different scoring system, ## so standardisation is required before they can be compared. names <- c("Audini", "Morse", "Lehman") mean.trt <- c(41.4, 0.95, -4.10) mean.ctrl <- c(42.3, 0.89, -3.80) sd.trt <- c(14, 0.76, 0.83) sd.ctrl <- c(12.4, 0.65, 0.87) n.trt <- c(30, 37, 67) n.ctrl <- c(28, 35, 58) epi.smd(mean.trt, sd.trt, n.trt, mean.ctrl, sd.ctrl, n.ctrl, names, method = "cohens", conf.level = 0.95) } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/epi.occc.Rd0000644000176200001440000000605212601641614013702 0ustar liggesusers\name{epi.occc} \alias{epi.occc} \alias{print.epi.occc} \alias{summary.epi.occc} \title{ Overall concordance correlation coefficient (OCCC) } \description{ Overall concordance correlation coefficient (OCCC) for agreement on a continuous measure based on Lin (1989, 2000) and Barnhart et al. (2002). } \usage{ epi.occc(dat, na.rm = FALSE, pairs = FALSE) \method{print}{epi.occc}(x, ...) \method{summary}{epi.occc}(object, ...) } \arguments{ \item{dat}{a matrix, or a matrix like object. Rows correspond to cases/observations, columns corresponds to raters/variables.} \item{na.rm}{logical. Should missing values (including \code{NaN}) be removed?} \item{pairs}{logical. Should the return object contain pairwise statistics? See Details.} \item{x, object}{an object of class \code{epi.occc}.} \item{\dots}{further arguments passed to \code{print} methods.} } \details{ The index proposed by Barnhart et al. (2002) is the same as the index suggested by Lin (1989) in the section of future studies with a correction of a typographical error in Lin (2000). } \value{ An object of class \code{epi.occc} with the following list elements (notation follows Barnhart et al. 2002): \itemize{ \item{\code{occc}: }{the value of the overall concordance correlation coefficient (\eqn{\rho_{o}^{c}}{rho.o^c}),} \item{\code{oprec}: }{overall precision (\eqn{\rho}{rho}),} \item{\code{oaccu}: }{overall accuracy (\eqn{\chi^{a}}{chi^a}),} \item{\code{pairs}: }{a list with following elements (only if \code{pairs = TRUE}, otherwise \code{NULL}; column indices for the pairs (j,k) follow lower-triangle column-major rule based on a \code{ncol(x)} times \code{ncol(x)} matrix), \itemize{ \item{\code{ccc}: }{pairwise CCC values (\eqn{\rho_{jk}^{c}}{rho_jk^c}),} \item{\code{prec}: }{pairwise precision values (\eqn{\rho_{jk}}{rho_jk}),} \item{\code{accu}: }{pairwise accuracy values \eqn{\chi_{jk}^{a}}{chi_jk^a}),} \item{\code{ksi}: }{pairwise weights (\eqn{\xi_{jk}}{ksi_jk}),} \item{\code{scale}: }{pairwise scale values (\eqn{v_{jk}}{v_jk}),} \item{\code{location}: }{pairwise location values (\eqn{u_{jk}}{u_jk}),} } } \item{\code{data.name}: }{name of the input data \code{dat}.} } } \references{ Barnhart H X, Haber M, Song J (2002). Overall concordance correlation coefficient for evaluating agreement among multiple observers. Biometrics 58: 1020 - 1027. Lin L (1989). A concordance correlation coefficient to evaluate reproducibility. Biometrics 45: 255 - 268. Lin L (2000). A note on the concordance correlation coefficient. Biometrics 56: 324 - 325. } \seealso{ \code{\link[epiR]{epi.ccc}} } \author{ Peter Solymos, solymos@ualberta.ca. } \examples{ ## Generate some artificial ratings data: set.seed(1234) p <- runif(10, 0, 1) x <- replicate(n = 5, expr = rbinom(10, 4, p) + 1) rval <- epi.occc(dat = x, pairs = TRUE) print(rval); summary(rval) } \keyword{htest} epiR/man/epi.simplesize.Rd0000644000176200001440000001122612601641614015156 0ustar liggesusers\name{epi.simplesize} \alias{epi.simplesize} \title{ Sample size under under simple random sampling } \description{ Estimates the required sample size under under simple random sampling. } \usage{ epi.simplesize(N = 1E+06, Vsq, Py, epsilon.r, method = "mean", conf.level = 0.95) } \arguments{ \item{N}{scalar, representing the population size.} \item{Vsq}{scalar, if method is \code{total} or \code{mean} this is the relative variance of the variable to be estimated (i.e. \code{var/mean^2}).} \item{Py}{scalar, if method is \code{proportion} this is an estimate of the unknown population proportion.} \item{epsilon.r}{the maximum relative difference between our estimate and the unknown population value.} \item{method}{a character string indicating the method to be used. Options are \code{total}, \code{mean}, or \code{proportion}.} \item{conf.level}{scalar, defining the level of confidence in the computed result.} } \value{ Returns an integer defining the size of the sample is required. } \references{ Levy PS, Lemeshow S (1999). Sampling of Populations Methods and Applications. Wiley Series in Probability and Statistics, London, pp. 70 - 75. Scheaffer RL, Mendenhall W, Lyman Ott R (1996). Elementary Survey Sampling. Duxbury Press, New York, pp. 95. Otte J, Gumm I (1997). Intra-cluster correlation coefficients of 20 infections calculated from the results of cluster-sample surveys. Preventive Veterinary Medicine 31: 147 - 150. } \note{ \code{epsilon.r} defines the maximum relative difference between our estimate and the unknown population value. The sample estimate should not differ in absolute value from the true unknown population parameter \code{d} by more than \code{epsilon.r * d}. } \examples{ ## EXAMPLE 1: ## A city contains 20 neighbourhood health clinics and it is desired to take a ## sample of clinics to estimate the total number of persons from all these ## clinics who have been given, during the past 12 month period, prescriptions ## for a recently approved antidepressant. If we assume that the average number ## of people seen at these clinics is 1500 per year with the standard deviation ## equal to 300, and that approximately 5\% of patients (regardless of clinic) ## are given this drug, how many clinics need to be sampled to yield an estimate ## that is within 20\% of the true population value? pmean <- 1500 * 0.05; pvar <- (300 * 0.05)^2 epi.simplesize(N = 20, Vsq = (pvar / pmean^2), Py = NA, epsilon.r = 0.20, method = "total", conf.level = 0.95) ## Three clinics need to be sampled to meet the survey requirements. ## EXAMPLE 2: ## We want to estimate the mean bodyweight of deer on a farm. There are 278 ## animals present. We anticipate the mean body weight to be around 200 kg ## and the standard deviation of body weight to be 30 kg. We would like to ## be 95\% certain that our estimate is within 10 kg of the true mean. How ## many deer should be sampled? epi.simplesize(N = 278, Vsq = 30^2 / 200^2, Py = NA, epsilon.r = 10/200, method = "mean", conf.level = 0.95) ## A total of 31 deer need to be sampled to meet the survey requirements. ## EXAMPLE 3: ## We want to estimate the seroprevalence of Brucella abortus in a population ## of cattle. An estimate of the unknown prevalence of B. abortus in this ## population is 0.15. We would like to be 95\% certain that our estimate is ## within 20\% of the true proportion of the population that is seropositive ## to B. abortus. Calculate the required sample size. n.crude <- epi.simplesize(N = 1E+06, Vsq = NA, Py = 0.15, epsilon.r = 0.20, method = "proportion", conf.level = 0.95) n.crude ## A total of 544 cattle need to be sampled to meet the survey requirements. ## EXAMPLE 3 (continued): ## Being seropositive to brucellosis is likely to cluster within herds. ## Otte and Gumm (1997) cite the intraclass correlation coefficient (rho) of ## Brucella abortus to be in the order of 0.09. Adjust the sample size ## estimate to account for clustering at the herd level. Assume that, on ## average, 20 animals will be sampled per herd: ## Let D equal the design effect and nbar equal the average number of ## individuals per cluster: ## rho = (D - 1) / (nbar - 1) ## Solving for D: ## D <- rho * (nbar - 1) + 1 rho <- 0.09; nbar <- 20 D <- rho * (nbar - 1) + 1 n.adj <- ceiling(n.crude * D) n.adj ## After accounting for the presence of clustering at the herd level we ## estimate that a total of 1475 cattle need to be sampled to meet ## the requirements of the survey. } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/epi.stratasize.Rd0000644000176200001440000001126412601641614015165 0ustar liggesusers\name{epi.stratasize} \alias{epi.stratasize} \title{Sample size under under stratified random sampling } \description{ Estimates the required sample size under under stratified random sampling. } \usage{ epi.stratasize(strata.n, strata.mean, strata.var, strata.Py, epsilon.r, method = "mean", conf.level = 0.95) } \arguments{ \item{strata.n}{vector, defining the size of each strata.} \item{strata.mean}{vector, representing the expected means in each strata. Only used when \code{method = "mean"}, \code{"total"} or \code{"pps"}.} \item{strata.var}{vector, representing the expected variance in each strata. Only used when \code{method = "mean"}, \code{"total"} or \code{"pps"}.} \item{strata.Py}{vector, representing the expected proportions in each strata. Only used when \code{method = "proportion"}.} \item{epsilon.r}{the maximum relative difference between our estimate and the unknown population value.} \item{method}{a character string indicating the method to be used. Options are \code{mean}, \code{total}, \code{proportion}, or \code{pps}.} \item{conf.level}{scalar, defining the level of confidence in the computed result.} } \value{ A list containing the following: \item{strata.sample}{the estimated sample size for each strata.} \item{strata.total}{the estimated total size.} \item{strata.stats}{\code{mean} the mean across all strata, \code{sigma.bx} the among-strata variance, \code{sigma.wx} the within-strata variance, and \code{sigma.x} the among-strata variance plus the within-strata variance, \code{rel.var} the within-strata variance divided by the square of the mean, and \code{gamma} the ratio of among-strata variance to within-strata variance.} } \references{ Levy PS, Lemeshow S (1999). Sampling of Populations Methods and Applications. Wiley Series in Probability and Statistics, London, pp. 175 - 179. } \author{ Mark Stevenson (Faculty of Veterinary and Agricultural Sciences, The University of Melbourne, Australia). Javier Sanchez (Atlantic Veterinary College, University of Prince Edward Island, Charlottetown Prince Edward Island, C1A 4P3, Canada). } \note{ Use method \code{proportion} to estimate sample size using stratified random sampling with equal weights (see Levy and Lemeshow, page 176). Use method \code{pps} to estimate sample size using proportional stratified random sampling with proportional allocation (see Levy and Lemeshow, page 179). When \code{method = "proportion"} the vectors \code{strata.mean} and \code{strata.var} are ignored. } \examples{ ## EXAMPLE 1: ## Hospital episodes (Levy and Lemeshow 1999, page 176 -- 178) ## We plan to take a sample of the members of a health maintenance ## organisation (HMO) for purposes of estimating the average number ## of hospital episodes per person per year. The sample will be selected ## from membership lists according to age (under 45 years, 45 -- 64 years, ## 65 years and over). The number of members in each strata are 600, 500, ## and 400 (respectively). Previous data estimates the mean number of ## hospital episodes per year for each strata as 0.164, 0.166, and 0.236 ## (respectively). The variance of these estimates are 0.245, 0.296, and ## 0.436 (respectively). How many from each strata should be sampled to be ## 95\% that the sample estimate of hospital episodes is within 20\% of the ## true value? strata.n <- c(600, 500, 400) strata.mean <- c(0.164, 0.166, 0.236) strata.var <- c(0.245, 0.296, 0.436) epi.stratasize(strata.n, strata.mean, strata.var, strata.Py, epsilon.r = 0.20, method = "mean", conf.level = 0.95) ## The number allocated to the under 45 years, 45 -- 64 years, and 65 years ## and over stratums should be 223, 186, and 149 (a total of 558). These ## results differ from the worked example provided in Levy and Lemeshow where ## certainty is set to approximately 99\%. ## EXAMPLE 2: ## Dairies are to be sampled to determine the proportion of herd managers ## using foot bathes. Herds are stratified according to size (small, medium, ## and large). The number of herds in each strata are 1500, 2500, and 4000 ## (respectively). A review of the literature indicates that use of foot bathes ## on farms is in the order of 0.50, with the probability of usage increasing ## as herds get larger. How many dairies should be sampled? strata.n <- c(1500, 2500, 4000) strata.Py <- c(0.50, 0.60, 0.70) epi.stratasize(strata.n, strata.mean, strata.var, strata.Py, epsilon.r = 0.20, method = "proportion", conf.level = 0.95) ## A total of 54 herds should be sampled: 10 small, 17 medium, and 27 large. } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/epi.interaction.Rd0000644000176200001440000001302312601641614015306 0ustar liggesusers\name{epi.interaction} \alias{epi.interaction} \title{Relative excess risk due to interaction in a case-control study} \description{ Computes the relative excess risk due to interaction, the proportion of disease among those with both exposures attributable to interaction, and the synergy index for case-control data. Confidence interval calculations are based on those described by Hosmer and Lemeshow (1992). } \usage{ epi.interaction(model, coeff, type = c("RERI", "APAB", "S"), conf.level = 0.95) } \arguments{ \item{model}{an object of class \code{glm}, \code{coxph} or \code{mle2}.} \item{coeff}{a vector specifying the position of the two coefficients of their interaction term in the model.} \item{type}{character string defining the type of analysis to be run. Options are \code{RERI} the relative excess risk due to interaction, \code{APAB} the proportion of disease among those with both exposures that is attributable to interaction of the two exposures, and \code{S} the synergy index.} \item{conf.level}{magnitude of the returned confidence interval. Must be a single number between 0 and 1.} } \details{ Interaction is defined as a departure from additivity of effects in epidemiologic studies. This function calculates three indices defined by Rothman (1998): (1) the relative excess risk due to interaction (RERI), (2) the proportion of disease among those with both exposures that is attributable to their interaction (AP[AB]), and (3) the synergy index (S). The synergy index measures the interaction between two risk factors expressed as the ratio of the relative excess risk for the combined effect of the risk factors and the sum of the relative excess risks for each separate effect of the two risk factors. In the absence of interaction both RERI and AP[AB] = 0 and S = 1. This function uses the delta method to calculate the confidence intervals for each of the interaction measures, as described by Hosmer and Lemeshow (1992). An error will be returned if the point estimate of the synergy index is less than one. In this situation a warning is issued advising the user to re-parameterise their model as a linear odds model. See Skrondal (2003) for further details. RERI, APAB and S can be used to assess additive interaction when the odds ratio estimates the risk ratio. However, it is recognised that odds ratios from case-control studies are not designed to directly estimate the risk or rate ratio (and only do so well when the outcome of interest is rare). } \value{ A data frame listing: \item{est}{the point estimate of the requested interaction measure.} \item{lower}{the lower bound of the confidence interval of the requested interaction measure.} \item{upper}{the upper bound of the confidence interval of the requested interaction measure.} } \references{ Chen S-C, Wong R-H, Shiu L-J, Chiou M-C, Lee H (2008). Exposure to mosquito coil smoke may be a risk factor for lung cancer in Taiwan. Journal of Epidemiology 18: 19 - 25. Hosmer DW, Lemeshow S (1992). Confidence interval estimation of interaction. Epidemiology 3: 452 - 456. Kalilani L, Atashili J (2006). Measuring additive interaction using odds ratios. Epidemiologic Perspectives & Innovations doi:10.1186/1742-5573-3-5. Rothman K, Greenland S (1998). Modern Epidemiology. Lippincott - Raven Philadelphia, USA. Rothman K, Keller AZ (1972). The effect of joint exposure to alcohol and tabacco on risk of cancer of the mouth and pharynx. Journal of Chronic Diseases 23: 711 - 716. Skrondal A (2003). Interaction as departure from additivity in case-control studies: A cautionary note. American Journal of Epidemiology 158: 251 - 258. } \examples{ ## Data from Rothman and Keller (1972) evaluating the effect of joint exposure ## to alcohol and tabacco on risk of cancer of the mouth and pharynx (cited in ## Hosmer and Lemeshow, 1992): can <- c(rep(1, times = 231), rep(0, times = 178), rep(1, times = 11), rep(0, times = 38)) smk <- c(rep(1, times = 225), rep(0, times = 6), rep(1, times = 166), rep(0, times = 12), rep(1, times = 8), rep(0, times = 3), rep(1, times = 18), rep(0, times = 20)) alc <- c(rep(1, times = 409), rep(0, times = 49)) dat <- data.frame(alc, smk, can) ## Table 2 of Hosmer and Lemeshow (1992): dat.glm01 <- glm(can ~ alc + smk + alc:smk, family = binomial, data = dat) summary(dat.glm01) ## Rothman defines an alternative coding scheme to be employed for ## parameterising an interaction term. Using this approach, instead of using ## two risk factors and one product term to represent the interaction (as ## above) the risk factors are combined into one variable with (in this case) ## four levels: ## a.neg b.neg: 0 0 0 ## a.pos b.neg: 1 0 0 ## a.neg b.pos: 0 1 0 ## a.pos b.pos: 0 0 1 dat$d <- rep(NA, times = nrow(dat)) dat$d[dat$alc == 0 & dat$smk == 0] <- 0 dat$d[dat$alc == 1 & dat$smk == 0] <- 1 dat$d[dat$alc == 0 & dat$smk == 1] <- 2 dat$d[dat$alc == 1 & dat$smk == 1] <- 3 dat$d <- factor(dat$d) ## Table 3 of Hosmer and Lemeshow (1992): dat.glm02 <- glm(can ~ d, family = binomial, data = dat) summary(dat.glm02) epi.interaction(model = dat.glm02, coeff = c(2,3,4), type = "RERI", conf.level = 0.95) epi.interaction(model = dat.glm02, coeff = c(2,3,4), type = "APAB", conf.level = 0.95) epi.interaction(model = dat.glm02, coeff = c(2,3,4), type = "S", conf.level = 0.95) ## Page 455 of Hosmer and Lemeshow (1992): ## RERI: 3.73 (95\% CI -1.84 -- 9.32). ## AP[AB]: 0.41 (95\% CI -0.07 -- 0.90). ## S: 1.87 (95\% CI 0.64 -- 5.41). } \keyword{univar} epiR/man/epi.offset.Rd0000644000176200001440000000233212601641614014256 0ustar liggesusers\name{epi.offset} \alias{epi.offset} \title{Create offset vector } \description{ Creates an offset vector based on a list. } \usage{ epi.offset(id.names) } \arguments{ \item{id.names}{a list identifying the [location] of each case. This must be a factor.} } \details{ This function is useful for supplying spatial data to WinBUGS. } \value{ A vector of length (1 + length of \code{id}). The first element of the offset vector is 1, corresponding to the position at which data for the first factor appears in id. The second element of the offset vector corresponds to the position at which the second factor appears in \code{id} and so on. The last element of the offset vector corresponds to the length of the \code{id} list. } \references{ Bailey TC, Gatrell AC (1995). Interactive Spatial Data Analysis. Longman Scientific & Technical. London. Langford IH (1994). Using empirical Bayes estimates in the geographical analysis of disease risk. Area 26: 142 - 149. } \examples{ dat <- c(1,1,1,2,2,2,2,3,3,3) dat <- as.factor(dat) offset <- epi.offset(dat) offset ## [1] 1 4 8 10 } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/epi.bohning.Rd0000644000176200001440000000214512601641614014416 0ustar liggesusers\name{epi.bohning} \alias{epi.bohning} \title{Bohning's test for overdispersion of Poisson data} \description{ A test for overdispersion of Poisson data. } \usage{ epi.bohning(obs, exp, alpha = 0.05) } \arguments{ \item{obs}{the observed number of cases in each area.} \item{exp}{the expected number of cases in each area.} \item{alpha}{alpha level to be used for the test of significance. Must be a single number between 0 and 1.} } \value{ A data frame with two elements: \code{test.statistic}, Bohning's test statistic and \code{p.value} the associated P-value. } \references{ Bohning D (2000). Computer-assisted Analysis of Mixtures and Applications. Chapman and Hall, Boca Raton. Ugarte MD, Ibanez B, Militino AF (2006). Modelling risks in disease mapping. Statistical Methods in Medical Research 15: 21 - 35. } \examples{ data(epi.SClip) obs <- epi.SClip$cases pop <- epi.SClip$population exp <- (sum(obs) / sum(pop)) * pop epi.bohning(obs, exp, alpha = 0.05) } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/epi.conf.Rd0000644000176200001440000002674412601641614013732 0ustar liggesusers\name{epi.conf} \alias{epi.conf} \title{Confidence intervals for means, proportions, incidence, and standardised mortality ratios } \description{ Computes confidence intervals for means, proportions, incidence, and standardised mortality ratios. } \usage{ epi.conf(dat, ctype = "mean.single", method, N, design = 1, conf.level = 0.95) } \arguments{ \item{dat}{the data, either a vector or a matrix depending on the method chosen.} \item{ctype}{a character string indicating the type of confidence interval to calculate. Options are \code{mean.single}, \code{mean.unpaired}, \code{mean.paired}, \code{prop.single}, \code{prop.unpaired}, \code{prop.paired}, \code{prevalence}, \code{inc.risk}, \code{inc.rate}, \code{odds} and \code{smr}.} \item{method}{a character string indicating the method to use. Where \code{ctype = "inc.risk"} or \code{ctype = "prevalence"} options are \code{exact}, \code{wilson} and \code{fleiss}. Where \code{ctype = "inc.rate"} options are \code{exact} and \code{byar}.} \item{N}{scalar, representing the population size.} \item{design}{scalar, representing the design effect.} \item{conf.level}{magnitude of the returned confidence interval. Must be a single number between 0 and 1.} } \details{ Method mean.single requires a vector as input. Method \code{mean.unpaired} requires a two-column data frame; the first column defining the groups must be a factor. Method \code{mean.paired} requires a two-column data frame; one column for each group. Method \code{prop.single} requires a two-column matrix; the first column specifies the number of positives, the second column specifies the number of negatives. Methods \code{prop.unpaired} and \code{prop.paired} require a four-column matrix; columns 1 and 2 specify the number of positives and negatives for the first group, columns 3 and 4 specify the number of positives and negatives for the second group. Method \code{prevalence} and \code{inc.risk} require a two-column matrix; the first column specifies the number of positives, the second column specifies the total number tested. Method \code{inc.rate} requires a two-column matrix; the first column specifies the number of positives, the second column specifies individual time at risk. Method \code{odds} require a two-column matrix; the first column specifies the number of positives, the second column specifies the number of negatives. Method \code{smr} requires a two-colum matrix; the first column specifies the total number of positives, the second column specifies the total number tested. The methodology implemented here follows Altman, Machin, Bryant, and Gardner (2000). Where method is \code{inc.risk}, \code{prevalence} or \code{inc.rate} if the numerator equals zero the lower bound of the confidence interval estimate is set to zero. Where method is \code{smr} the method of Dobson et al. (1991) is used. A summary of the methods used for each of the confidence interval calculations in this function is as follows: \tabular{ll}{ \code{ctype-method} \tab Reference \cr \code{mean.single} \tab Altman et al. (2000) \cr \code{mean.unpaired} \tab Altman et al. (2000) \cr \code{mean.paired} \tab Altman et al. (2000) \cr \code{prop.single} \tab Altman et al. (2000) \cr \code{prop.unpaired} \tab Altman et al. (2000) \cr \code{prop.paired} \tab Altman et al. (2000) \cr \code{inc.risk, exact} \tab Collett (1999) \cr \code{inc.risk, wilson} \tab Rothman (2002) \cr \code{inc.risk, fleiss} \tab Fleiss (1981) \cr \code{prevalence, exact} \tab Collett (1999) \cr \code{prevalence, wilson} \tab Rothman (2002) \cr \code{prevalence, fleiss} \tab Fleiss (1981) \cr \code{inc.rate, exact} \tab Collett (1999) \cr \code{inc.rate, byar} \tab Rothman (2002) \cr \code{odds} \tab Ederer and Mantel (1974) \cr \code{smr} \tab Dobson et al. (1991) \cr } The design effect is used to adjust the confidence interval around a prevalence or incidence risk estimate in the presence of clustering. The design effect is a measure of the variability between clusters and is calculated as the ratio of the variance calculated assuming a complex sample design divided by the variance calculated assuming simple random sampling. Adjustment for the effect of clustering can only be done on those prevalence and incidence risk methods that return a standard error (i.e. \code{method = "wilson"} or \code{method = "fleiss"}). } \references{ Altman DG, Machin D, Bryant TN, and Gardner MJ (2000). Statistics with Confidence, second edition. British Medical Journal, London, pp. 28 - 29 and pp. 45 - 56. Collett D (1999). Modelling Binary Data. Chapman & Hall/CRC, Boca Raton Florida, p. 24. Dobson AJ, Kuulasmaa K, Eberle E, and Scherer J (1991). Confidence intervals for weighted sums of Poisson parameters. Statistics in Medicine 10: 457 - 462. Ederer F, and Mantel N (1974). Confidence limits on the ratio of two Poisson variables. American Journal of Epidemiology 100: 165 - 167 Fleiss JL (1981). Statistical Methods for Rates and Proportions. 2nd edition. John Wiley & Sons, New York. Killip S, Mahfoud Z, Pearce K (2004). What is an intracluster correlation coefficient? Crucial concepts for primary care researchers. Annals of Family Medicine 2: 204 - 208. Otte J, Gumm I (1997). Intra-cluster correlation coefficients of 20 infections calculated from the results of cluster-sample surveys. Preventive Veterinary Medicine 31: 147 - 150. Rothman KJ (2002). Epidemiology An Introduction. Oxford University Press, London, pp. 130 - 143. } \examples{ ## EXAMPLE 1: dat <- rnorm(n = 100, mean = 0, sd = 1) epi.conf(dat, ctype = "mean.single") ## EXAMPLE 2: group <- c(rep("A", times = 5), rep("B", times = 5)) val = round(c(rnorm(n = 5, mean = 10, sd = 5), rnorm(n = 5, mean = 7, sd = 5)), digits = 0) dat <- data.frame(group = group, val = val) epi.conf(dat, ctype = "mean.unpaired") ## EXAMPLE 3: ## Two paired samples (Altman et al. 2000, page 31): ## Systolic blood pressure levels were measured in 16 middle-aged men ## before and after a standard exercise test. The mean rise in systolic ## blood pressure was 6.6 mmHg. The standard deviation of the difference ## was 6.0 mm Hg. The standard error of the mean difference was 1.49 mm Hg. before <- c(148,142,136,134,138,140,132,144,128,170,162,150,138,154,126,116) after <- c(152,152,134,148,144,136,144,150,146,174,162,162,146,156,132,126) dat <- data.frame(before, after) dat <- data.frame(cbind(before, after)) epi.conf(dat, ctype = "mean.paired", conf.level = 0.95) ## The 95\% confidence interval for the population value of the mean ## systolic blood pressure increase after standard exercise was 3.4 to 9.8 ## mm Hg. ## EXAMPLE 4: ## Single sample (Altman et al. 2000, page 47): ## Out of 263 giving their views on the use of personal computers in ## general practice, 81 thought that the privacy of their medical file ## had been reduced. pos <- 81 neg <- (263 - 81) dat <- as.matrix(cbind(pos, neg)) round(epi.conf(dat, ctype = "prop.single"), digits = 3) ## The 95\% confidence interval for the population value of the proportion ## of patients thinking their privacy was reduced was from 0.255 to 0.366. ## EXAMPLE 5: ## Two samples, unpaired (Altman et al. 2000, page 49): ## Goodfield et al. report adverse effects in 85 patients receiving either ## terbinafine or placebo treatment for dermatophyte onchomychois. ## Out of 56 patients receiving terbinafine, 5 patients experienced ## adverse effects. Out of 29 patients receiving a placebo, none experienced ## adverse effects. grp1 <- matrix(cbind(5, 51), ncol = 2) grp2 <- matrix(cbind(0, 29), ncol = 2) dat <- as.matrix(cbind(grp1, grp2)) round(epi.conf(dat, ctype = "prop.unpaired"), digits = 3) ## The 95\% confidence interval for the difference between the two groups is ## from -0.038 to +0.193. ## EXAMPLE 6: ## Two samples, paired (Altman et al. 2000, page 53): ## In a reliability exercise, 41 patients were randomly selected from those ## who had undergone a thalium-201 stress test. The 41 sets of images were ## classified as normal or not by the core thalium laboratory and, ## independently, by clinical investigators from different centres. ## Of the 19 samples identified as ischaemic by clinical investigators ## 5 were identified as ischaemic by the laboratory. Of the 22 samples ## identified as normal by clinical investigators 0 were identified as ## ischaemic by the laboratory. ## Clinic | Laboratory | | ## | Ischaemic | Normal | Total ## --------------------------------------------------------- ## Ischaemic | 14 | 5 | 19 ## Normal | 0 | 22 | 22 ## --------------------------------------------------------- ## Total | 14 | 27 | 41 ## --------------------------------------------------------- dat <- as.matrix(cbind(14, 5, 0, 22)) round(epi.conf(dat, ctype = "prop.paired", conf.level = 0.95), digits = 3) ## The 95\% confidence interval for the population difference in ## proportions is 0.011 to 0.226 or approximately +1\% to +23\%. ## EXAMPLE 7: ## A herd of 1000 cattle were tested for brucellosis. Four samples out of 200 ## test returned a positive result. Assuming 100\% test sensitivity and ## specificity, what is the estimated prevalence of brucellosis in this ## group of animals? pos <- 4; pop <- 200 dat <- as.matrix(cbind(pos, pop)) epi.conf(dat, ctype = "prevalence", method = "exact", N = 1000, design = 1, conf.level = 0.95) * 100 ## The estimated prevalence of brucellosis in this herd is 2 cases ## per 100 cattle (95\% CI 0.54 -- 5.0 cases per 100 cattle). ## EXAMPLE 8: ## The observed disease counts and population size in four areas are provided ## below. What are the the standardised morbidity ratios of disease for each ## area and their 95\% confidence intervals? obs <- c(5, 10, 12, 18); pop <- c(234, 189, 432, 812) dat <- as.matrix(cbind(obs, pop)) round(epi.conf(dat, ctype = "smr"), digits = 2) ## EXAMPLE 9: ## A survey has been conducted to determine the proportion of broilers ## protected from a given disease following vaccination. We assume that ## the intra-cluster correlation coefficient for protection (also known as the ## rate of homogeneity, rho) is 0.4 and the average number of birds per ## flock is 30. A total of 5898 birds from a total of 10363 were identified ## as protected. What proportion of birds are protected and what is the 95\% ## confidence interval for this estimate? ## Calculate the design effect, given rho = (design - 1) / (nbar - 1), where ## nbar equals the average number of individuals sampled per cluster: D <- 0.4 * (30 - 1) + 1 ## The design effect is 12.6. Now calculate the proportion protected: dat <- as.matrix(cbind(5898, 10363)) epi.conf(dat, ctype = "prevalence", method = "fleiss", N = 1000000, design = D, conf.level = 0.95) ## The estimated proportion of the population protected is 0.57 (95\% CI ## 0.53 -- 0.60). If we had mistakenly assumed that data were a simple random ## sample the confidence interval would have been 0.56 -- 0.58. } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/epi.clustersize.Rd0000644000176200001440000000644512601641614015355 0ustar liggesusers\name{epi.clustersize} \alias{epi.clustersize} \title{Sample size for cluster-sample surveys } \description{ Estimates the number of clusters to be sampled using a cluster-sample design. } \usage{ epi.clustersize(p, b, rho, epsilon.r, conf.level = 0.95) } \arguments{ \item{p}{the estimated prevalence of the outcome in the population.} \item{b}{the number of units sampled per cluster.} \item{rho}{the intra-cluster correlation, a measure of the variation between clusters compared with the variation within clusters.} \item{epsilon.r}{scalar, the acceptable relative error.} \item{conf.level}{scalar, defining the level of confidence in the computed result.} } \value{ A list containing the following: \item{clusters}{the estimated number of clusters to be sampled.} \item{units}{the total number of units to be sampled.} \item{design}{the design effect.} } \references{ Bennett S, Woods T, Liyanage WM, Smith DL (1991). A simplified general method for cluster-sample surveys of health in developing countries. World Health Statistics Quarterly 44: 98 - 106. Otte J, Gumm I (1997). Intra-cluster correlation coefficients of 20 infections calculated from the results of cluster-sample surveys. Preventive Veterinary Medicine 31: 147 - 150. } \note{ The intra-cluster correlation (\code{rho}) will be higher for those situations where the between-cluster variation is greater than within-cluster variation. The design effect depends on \code{rho} and \code{b} (the number of units sampled per cluster). Note that \code{b} is the number of units sampled per cluster, not the total number of units per cluster. \code{rho = (D - 1) / (b - 1)}. Design effects of 2, 4, and 7 can be used to estimate \code{rho} when intra-cluster correlation is low, medium, and high (respectively). A design effect of 7.5 should be used when the intra-cluster correlation is unknown. } \examples{ ## EXAMPLE 1: ## The expected prevalence of disease in a population of cattle is 0.10. ## We wish to conduct a survey, sampling 50 animals per farm. No data ## are available to provide an estimate of rho, though we suspect ## the intra-cluster correlation for this disease to be moderate. ## We wish to be 95\% certain of being within 10\% of the true population ## prevalence of disease. How many herds should be sampled? p <- 0.10; b <- 50; D <- 4 rho <- (D - 1) / (b - 1) epi.clustersize(p = 0.10, b = 50, rho = rho, epsilon.r = 0.10, conf.level = 0.95) ## We need to sample 278 herds (13900 samples in total). ## EXAMPLE 2 (from Bennett et al. 1991): ## A cross-sectional study is to be carried out to determine the prevalence ## of a given disease in a population using a two-stage cluster design. We ## estimate prevalence to be 0.20 and we expect rho to be in the order of 0.02. ## We want to take sufficient samples to be 95\% certain that our estimate of ## prevalence is within 5\% of the true population value (that is, a relative ## error of 0.05 / 0.20 = 0.25). Assuming 20 responses from each cluster, ## how many clusters do we need to be sample? epi.clustersize(p = 0.20, b = 20, rho = 0.02, epsilon.r = 0.25, conf.level = 0.95) ## We need to sample 18 clusters (360 samples in total). } \keyword{univar}% at least one, from doc/KEYWORDS epiR/man/epi.edr.Rd0000644000176200001440000000562712601641614013554 0ustar liggesusers\name{epi.edr} \alias{epi.edr} \title{ Estimated dissemination ratio } \description{ Computes estimated dissemination ratio on the basis of a vector of numbers (usually counts of incident cases identified on each day of an epidemic). } \usage{ epi.edr(dat, n = 4, conf.level = 0.95, nsim = 99, na.zero = TRUE) } \arguments{ \item{dat}{a numeric vector listing the number of incident cases for each day of an epidemic.} \item{n}{scalar, defining the number of days to be used when computing the estimated dissemination ratio.} \item{conf.level}{magnitude of the returned confidence interval. Must be a single number between 0 and 1.} \item{nsim}{scalar, defining the number of simulations to be used for the confidence interval calculations.} \item{na.zero}{logical, replace \code{NaN} or \code{Inf} values with zeros?} } \details{ In infectious disease epidemics the \emph{n}-day estimated dissemination ratio (EDR) at day \emph{i} equals the total number of incident cases between day \code{i} and day \code{[i - (n - 1)]} (inclusive) divided by the total number of incident cases between day \code{(i - n)} and day \code{(i - 2n) + 1} (inclusive). EDR values are often calculated for each day of an epidemic and presented as a time series analysis. If the EDR is consistently less than unity, the epidemic is said to be `under control'. A simulation approach is used to calculate confidence intervals around each daily EDR estimate. The numerator and denominator of the EDR estimate for each day is taken in turn and a random number drawn from a Poisson distribution, using the calculated numerator and denominator value as the mean. EDR is then calculated for these simulated values and the process repeated \code{nsim} times. Confidence intervals are then derived from the vector of simulated values for each day. } \value{ Returns the point estimate of the EDR and the lower and upper bounds of the confidence interval of the EDR. } \references{ Miller W (1976). A state-transition model of epidemic foot-and-mouth disease. In: Proceedings of an International Symposium: New Techniques in Veterinary Epidemiology and Economics, University of Reading, Reading, 56 - 72. Morris R, Sanson R, Stern M, Stevenson M, Wilesmith J (2002). Decision-support tools for foot-and-mouth disease control. Revue Scientifique et Technique de l'Office International des Epizooties 21, 557 - 567. } \examples{ set.seed(123) dat <- rpois(n = 50, lambda = 2) edr.04 <- epi.edr(dat, n = 4, conf.level = 0.95, nsim = 99, na.zero = TRUE) ## Plot: plot(1:50, 1:50, xlim = c(0,25), ylim = c(0, 10), xlab = "Days", ylab = "Estimated dissemination ratio", type = "n", main = "") lines(1:50, edr.04[,1], type = "l", lwd = 2, lty = 1, col = "blue") lines(1:50, edr.04[,2], type = "l", lwd = 1, lty = 2, col = "blue") lines(1:50, edr.04[,3], type = "l", lwd = 1, lty = 2, col = "blue") } \keyword{univar} epiR/man/epi.mh.Rd0000644000176200001440000001026612601641614013401 0ustar liggesusers\name{epi.mh} \alias{epi.mh} \title{Fixed-effects meta-analysis of binary outcomes using the Mantel-Haenszel method} \description{ Computes individual study odds or risk ratios for binary outcome data. Computes the summary odds or risk ratio using the Mantel-Haenszel method. Performs a test of heterogeneity among trials. Performs a test for the overall difference between groups (that is, after pooling the studies, do treated groups differ significantly from controls?). } \usage{ epi.mh(ev.trt, n.trt, ev.ctrl, n.ctrl, names, method = "odds.ratio", alternative = c("two.sided", "less", "greater"), conf.level = 0.95) } \arguments{ \item{ev.trt}{observed number of events in the treatment group.} \item{n.trt}{number in the treatment group.} \item{ev.ctrl}{observed number of events in the control group.} \item{n.ctrl}{number in the control group.} \item{names}{character string identifying each trial.} \item{method}{a character string indicating the method to be used. Options are \code{odds.ratio} or \code{risk.ratio}.} \item{alternative}{a character string specifying the alternative hypothesis, must be one of \code{two.sided}, \code{greater} or \code{less}. } \item{conf.level}{magnitude of the returned confidence interval. Must be a single number between 0 and 1.} } \details{ \code{alternative = "greater"} tests the hypothesis that the Mantel-Haenszel summary measure of association is greater than 1. } \value{ A list containing the following: \item{OR}{the odds ratio for each trial, the standard error of the odds ratio for each trial, and the lower and upper bounds of the confidence interval of the odds ratio for each trial.} \item{RR}{the risk ratio for each trial, the standard error of the risk ratio for each trial, and the lower and upper bounds of the confidence interval of the risk ratio for each trial.} \item{OR.summary}{the Mantel-Haenszel summary odds ratio, the standard error of the Mantel-Haenszel summary odds ratio, the lower and upper bounds of the confidence interval of the Mantel-Haenszel summary odds ratio.} \item{RR.summary}{the Mantel-Haenszel summary risk ratio, the standard error of the Mantel-Haenszel summary risk ratio, the lower and upper bounds of the confidence interval of the Mantel-Haenszel summary risk ratio.} \item{weights}{the raw and inverse variance weights assigned to each trial.} \item{heterogeneity}{a vector containing \code{Q} the heterogeneity test statistic, \code{df} the degrees of freedom and its associated P-value.} \item{Hsq}{the relative excess of the heterogeneity test statistic \code{Q} over the degrees of freedom \code{df}.} \item{Isq}{the percentage of total variation in study estimates that is due to heterogeneity rather than chance.} \item{effect}{a vector containing \code{z} the test statistic for overall treatment effect and its associated P-value.} } \references{ Deeks JJ, Altman DG, Bradburn MJ (2001). Statistical methods for examining heterogeneity and combining results from several studies in meta-analysis. In: Egger M, Davey Smith G, Altman D (eds). Systematic Review in Health Care Meta-Analysis in Context. British Medical Journal, London, 2001, pp. 291 - 299. Higgins JP, Thompson SG (2002). Quantifying heterogeneity in a meta-analysis. Statistics in Medicine 21: 1539 - 1558. } \note{ Using this method, the pooled odds and risk ratios are computed using the raw individual study weights. The methodology for computing the Mantel-Haenszel summary odds ratio follows the approach decribed in Deeks, Altman and Bradburn MJ (2001, pp 291 - 299). The function checks each strata for cells with zero frequencies. If a zero frequency is found in any cell, 0.5 is added to all cells within the strata. } \seealso{ \code{\link{epi.dsl}, \link{epi.iv}, \link{epi.smd}} } \examples{ data(epi.epidural) epi.mh(ev.trt = epi.epidural$ev.trt, n.trt = epi.epidural$n.trt, ev.ctrl = epi.epidural$ev.ctrl, n.ctrl = epi.epidural$n.ctrl, names = as.character(epi.epidural$trial), method = "odds.ratio", alternative = "two.sided", conf.level = 0.95) } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/epi.cpresids.Rd0000644000176200001440000000353212601641614014607 0ustar liggesusers\name{epi.cpresids} \alias{epi.cpresids} \title{ Covariate pattern residuals from a logistic regression model } \description{ Returns covariate pattern residuals and delta betas from a logistic regression model. } \usage{ epi.cpresids(obs, fit, covpattern) } \arguments{ \item{obs}{a vector of observed values (i.e. counts of `successes') for each covariate pattern).} \item{fit}{a vector defining the predicted (i.e. fitted) probability of success for each covariate pattern.} \item{covpattern}{a \code{\link{epi.cp}} object.} } \value{ A data frame with 13 elements: \code{cpid} the covariate pattern identifier, \code{n} the number of subjects in this covariate pattern, \code{obs} the observed number of successes, \code{pred} the predicted number of successes, \code{raw} the raw residuals, \code{sraw} the standardised raw residuals, \code{pearson} the Pearson residuals, \code{spearson} the standardised Pearson residuals, \code{deviance} the deviance residuals, \code{leverage} leverage, \code{deltabeta} the delta-betas, \code{sdeltabeta} the standardised delta-betas, and \code{deltachi} delta chi statistics. } \references{ Hosmer DW, Lemeshow S (1989). Applied Logistic Regression. John Wiley & Sons, New York, USA, pp. 137 - 138. } \seealso{ \code{\link{epi.cp}} } \examples{ infert.glm <- glm(case ~ spontaneous + induced, data = infert, family = binomial()) infert.mf <- model.frame(infert.glm) infert.cp <- epi.cp(infert.mf[-1]) infert.obs <- as.vector(by(infert$case, as.factor(infert.cp$id), FUN = sum)) infert.fit <- as.vector(by(fitted(infert.glm), as.factor(infert.cp$id), FUN = min)) infert.res <- epi.cpresids(obs = infert.obs, fit = infert.fit, covpattern = infert.cp) } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/epi.nomogram.Rd0000644000176200001440000000530612601641614014613 0ustar liggesusers\name{epi.nomogram} \alias{epi.nomogram} \title{Post-test probability of disease given sensitivity and specificity of a test} \description{ Computes the post-test probability of disease given sensitivity and specificity of a test. } \usage{ epi.nomogram(se, sp, lr, pre.pos, verbose = FALSE) } \arguments{ \item{se}{test sensitivity (0 - 1).} \item{sp}{test specificity (0 - 1).} \item{lr}{a vector of length 2 listing the positive and negative likelihood ratio (respectively) of the test. Ignored if \code{se} and \code{sp} are not null.} \item{pre.pos}{the pre-test probability of the outcome.} \item{verbose}{logical, indicating whether detailed or summary results are to be returned.} } \value{ A list containing the following: \item{lr}{the likelihood ratio of a positive and negative test.} \item{prob}{the post-test probability of the outcome given a positive and negative test.} } \references{ Hunink M, Glasziou P (2001). Decision Making in Health and Medicine - Integrating Evidence and Values. Cambridge University Press, pp. 128 - 156. } \examples{ ## EXAMPLE 1: ## You are presented with a dog with lethargy, exercise intolerance, ## weight gain and bilaterally symmetric truncal alopecia. You are ## suspicious of hypothyroidism and take a blood sample to measure ## basal serum thyroxine (T4). ## You believe that around 5\% of dogs presented to your clinic with ## a signalment of general debility have hypothyroidism. The serum T4 ## has a sensitivity of 0.89 and specificity of 0.85 for diagnosing ## hypothyroidism in the dog. The laboratory reports a serum T4 ## concentration of 22.0 nmol/L (reference range 19.0 to 58.0 nmol/L). ## What is the post-test probability that this dog is hypothyroid? epi.nomogram(se = 0.89, sp = 0.85, lr = NA, pre.pos = 0.05, verbose = FALSE) ## The post-test probability that this dog is hypothyroid is 24\%. ## EXAMPLE 2: ## A dog is presented to you with severe pruritis. You suspect sarcoptic ## mange and decide to take a skin scraping (LR+ 9000; LR- 0.1). The scrape ## returns a negative result (no mites are seen). What is the post-test ## probability that your patient has sarcoptic mange? You recall that you ## diagnose around 3 cases of sarcoptic mange per year in a clinic that ## sees approximately 2 -- 3 dogs per week presented with pruritic skin disease. pre.pos <- 3 / (3 * 52) epi.nomogram(se = NA, sp = NA, lr = c(9000, 0.1), pre.pos = pre.pos, verbose = FALSE) ## If the skin scraping is negative the post-test probability that this dog ## has sarcoptic mange is 0.2\%. } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/epi.cp.Rd0000644000176200001440000000364512601641614013402 0ustar liggesusers\name{epi.cp} \alias{epi.cp} \title{ Extract unique covariate patterns from a data set } \description{ Extract the set of unique patterns from a set of covariates. } \usage{ epi.cp(dat) } \arguments{ \item{dat}{an \emph{i} row by \emph{j} column data frame where the \emph{i} rows represent individual observations and the \emph{m} columns represent covariates.} } \details{ A covariate pattern is a unique combination of values of predictor variables. For example, if a model contains two dichotomous predictors, there will be four covariate patterns possible: \code{(1,1)}, \code{(1,0)}, \code{(0,1)}, and \code{(0,0)}. This function extracts the \emph{n} unique covariate patterns from a data set comprised of \emph{i} observations, labelling them from 1 to \emph{n}. A vector of length \emph{m} is also returned, listing the covariate pattern identifier for each observation. } \value{ A list containing the following: \item{cov.pattern}{a data frame with columns: \code{id} the unique covariate patterns, \code{n} the number of occasions each of the listed covariate pattern appears in the data, and the unique covariate combinations.} \item{id}{a vector listing the covariate pattern identifier for each observation.} } \references{ Dohoo I, Martin W, Stryhn H (2003). Veterinary Epidemiologic Research. AVC Inc, Charlottetown, Prince Edward Island, Canada. } \examples{ ## Generate a set of covariates: set.seed(seed = 1234) obs <- round(runif(n = 100, min = 0, max = 1), digits = 0) v1 <- round(runif(n = 100, min = 0, max = 4), digits = 0) v2 <- round(runif(n = 100, min = 0, max = 4), digits = 0) dat <- data.frame(obs, v1, v2) dat.glm <- glm(obs ~ v1 + v2, family = binomial, data = dat) dat.mf <- model.frame(dat.glm) ## Covariate pattern: epi.cp(dat.mf[-1]) ## There are 25 covariate patterns in this data set. Subject 100 has ## covariate pattern 21. } \keyword{univar} epiR/man/epi.ltd.Rd0000644000176200001440000000515612601641614013562 0ustar liggesusers\name{epi.ltd} \alias{epi.ltd} \title{Lactation to date and standard lactation milk yields } \description{ Calculate lactation to date and standard lactation (that is, 305 or 270 day) milk yields. } \usage{ epi.ltd(dat, std = "305") } \arguments{ \item{dat}{an eight column data frame listing (in order) cow identifier, herd test identifier, lactation number, herd test days in milk, lactation length (\code{NA} if lactation incomplete), herd test milk yield (litres), herd test fat (percent), and herd test protein (percent).} \item{std}{\code{std = "305"} returns 305-day milk volume, fat, and protein yield. \code{std = "270"} returns 270-day milk volume, fat, and protein yield.} } \details{ Lactation to date yields will only be calculated if there are four or more herd test events. } \value{ A data frame with nine elements: \code{ckey} cow identifier, \code{lact} lactation number, \code{llen} lactation length, \code{vltd} milk volume (litres) to last herd test or dry off date (computed on the basis of lactation length, \code{fltd} fat yield (kilograms) to last herd test or dry off date (computed on the basis of lactation length, \code{pltd} protein yield (kilograms) to last herd test or dry off date (computed on the basis of lactation length, \code{vstd} 305-day or 270-day milk volume yield (litres), \code{fstd} 305-day or 270-day milk fat yield (kilograms), and \code{pstd} 305-day or 270-day milk protein yield (kilograms). } \author{ Nicolas Lopez-Villalobos (IVABS, Massey University, Palmerston North New Zealand) and Mark Stevenson (Faculty of Veterinary and Agricultural Sciences, The University of Melbourne, Australia). } \references{ Kirkpatrick M, Lofsvold D, Bulmer M (1990). Analysis of the inheritance, selection and evolution of growth trajectories. Genetics 124: 979 - 993. } \examples{ ## Generate some herd test data: ckey <- rep(1, times = 12) pkey <- 1:12 lact <- rep(1:2, each = 6) dim <- c(25, 68, 105, 145, 200, 240, 30, 65, 90, 130, 190, 220) llen <- c(280, 280, 280, 280, 280, 280, NA, NA, NA, NA, NA, NA) vol <- c(18, 30, 25, 22, 18, 12, 20, 32, 27, 24, 20, 14) fat <- c(4.8, 4.3, 4.5, 4.7, 4.8, 4.9, 4.8, 4.3, 4.5, 4.7, 4.8, 4.9)/100 pro <- c(3.7, 3.5, 3.6, 3.7, 3.8, 3.9, 3.7, 3.5, 3.6, 3.7, 3.8, 3.9)/100 dat <- data.frame(ckey, pkey, lact, dim, llen, vol, fat, pro) ## Lactation to date and 305-day milk, fat, and protein yield: epi.ltd(dat, std = "305") ## Lactation to date and 270-day milk, fat, and protein yield: epi.ltd(dat, std = "270") } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/epi.SClip.Rd0000644000176200001440000000420612601641614014004 0ustar liggesusers\name{epi.SClip} \docType{data} \alias{epi.SClip} \title{Lip cancer in Scotland 1975 - 1980} \description{ This data set provides counts of lip cancer diagnoses made in Scottish districts from 1975 to 1980. In addition to district-level counts of disease events and estimates of the size of the population at risk, the data set contains (for each district) an estimate of the percentage of the population involved in outdoor industry (agriculture, fishing, and forestry). It is known that exposure to sunlight is a risk factor for cancer of the lip and high counts are to be expected in districts where there is a high proportion of the workforce involved in outdoor industry. } \usage{data(epi.SClip)} \format{ A data frame with 56 observations on the following 6 variables. \describe{ \item{gridcode}{alternative district identifier.} \item{id}{numeric district identifier (1 to 56).} \item{district}{district name.} \item{cases}{number of lip cancer cases diagnosed 1975 - 1980.} \item{population}{total person years at risk 1975 - 1980.} \item{prop.ag}{percent of the population engaged in outdoor industry.} } } \source{ This data set has been analysed by a number of authors including Clayton and Kaldor (1987), Conlon and Louis (1999), Stern and Cressie (1999), and Carlin and Louis (2000, p 270). } \references{ Clayton D, Kaldor J (1987). Empirical Bayes estimates of age-standardized relative risks for use in disease mapping. Biometrics, 43: 671 - 681. Conlon EM, Louis TA (1999). Addressing multiple goals in evaluating region-specific risk using Bayesian methods. In: Lawson AB (Editor), Disease Mapping and Risk Assessment for Public Health. John Wiley & Sons, Ltd , Chichester, pp. 31 - 47. Stern H, Cressie N (1999). Inference in extremes in disease mapping. In: Lawson AB (Editor), Disease Mapping and Risk Assessment for Public Health. John Wiley & Sons, Ltd , Chichester, pp. 63 - 84. Carlin BP, Louis TA (2000). Bayes and Empirical Bayes Methods for Data Analysis - Monographs on Statistics and Applied Probability 69. Chapman and Hall, London, pp. 270. } \keyword{datasets}epiR/man/epi.iv.Rd0000644000176200001440000001075112601641614013412 0ustar liggesusers\name{epi.iv} \alias{epi.iv} \title{Fixed-effect meta-analysis of binary outcomes using the inverse variance method } \description{ Computes individual study odds or risk ratios for binary outcome data. Computes the summary odds or risk ratio using the inverse variance method. Performs a test of heterogeneity among trials. Performs a test for the overall difference between groups (that is, after pooling the studies, do treated groups differ significantly from controls?). } \usage{ epi.iv(ev.trt, n.trt, ev.ctrl, n.ctrl, names, method = "odds.ratio", alternative = c("two.sided", "less", "greater"), conf.level = 0.95) } \arguments{ \item{ev.trt}{observed number of events in the treatment group.} \item{n.trt}{number in the treatment group.} \item{ev.ctrl}{observed number of events in the control group.} \item{n.ctrl}{number in the control group.} \item{names}{character string identifying each trial.} \item{method}{a character string indicating the method to be used. Options are \code{odds.ratio} or \code{risk.ratio}.} \item{alternative}{a character string specifying the alternative hypothesis, must be one of \code{two.sided}, \code{greater} or \code{less}. } \item{conf.level}{magnitude of the returned confidence interval. Must be a single number between 0 and 1.} } \details{ Using this method, the inverse variance weights are used to compute the pooled odds ratios and risk ratios. The inverse variance weights should be used to indicate the weight each trial contributes to the meta-analysis. \code{alternative = "greater"} tests the hypothesis that the inverse variance summary measure of association is greater than 1. } \value{ A list containing: \item{OR}{the odds ratio for each trial, the standard error of the odds ratio for each trial, and the lower and upper bounds of the confidence interval of the odds ratio for each trial.} \item{RR}{the risk ratio for each trial, the standard error of the risk ratio for each trial, and the lower and upper bounds of the confidence interval of the risk ratio for each trial.} \item{OR.summary}{the inverse variance summary odds ratio, the standard error of the inverse variance summary odds ratio, the lower and upper bounds of the confidence interval of the inverse variance summary odds ratio.} \item{RR.summary}{the inverse variance summary risk ratio, the standard error of the inverse variance summary risk ratio, the lower and upper bounds of the confidence interval of the inverse variance summary risk ratio.} \item{weights}{the raw and inverse variance weights assigned to each trial.} \item{heterogeneity}{a vector containing \code{Q} the heterogeneity test statistic, \code{df} the degrees of freedom and its associated P-value.} \item{Hsq}{the relative excess of the heterogeneity test statistic \code{Q} over the degrees of freedom \code{df}.} \item{Isq}{the percentage of total variation in study estimates that is due to heterogeneity rather than chance.} \item{effect}{a vector containing \code{z} the test statistic for overall treatment effect and its associated P-value.} } \references{ Deeks JJ, Altman DG, Bradburn MJ (2001). Statistical methods for examining heterogeneity and combining results from several studies in meta-analysis. In: Egger M, Davey Smith G, Altman D (eds). Systematic Review in Health Care Meta-Analysis in Context. British Medical Journal, London, 2001, pp. 291 - 299. Higgins JP, Thompson SG (2002). Quantifying heterogeneity in a meta-analysis. Statistics in Medicine 21: 1539 - 1558. } \note{ The inverse variance method performs poorly when data are sparse, both in terms of event rates being low and trials being small. The Mantel-Haenszel method (\code{\link{epi.mh}}) is more robust when data are sparse. Using this method, the inverse variance weights are used to compute the pooled odds ratios and risk ratios. The function checks each strata for cells with zero frequencies. If a zero frequency is found in any cell, 0.5 is added to all cells within the strata. } \seealso{ \code{\link{epi.dsl}}, \code{\link{epi.mh}}, \code{\link{epi.smd}} } \examples{ data(epi.epidural) epi.iv(ev.trt = epi.epidural$ev.trt, n.trt = epi.epidural$n.trt, ev.ctrl = epi.epidural$ev.ctrl, n.ctrl = epi.epidural$n.ctrl, names = as.character(epi.epidural$trial), method = "odds.ratio", alternative = "two.sided", conf.level = 0.95) } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/epi.studysize.Rd0000644000176200001440000003643212601641614015043 0ustar liggesusers\name{epi.studysize} \alias{epi.studysize} \title{ Estimate the sample size to compare means, proportions, and survival } \description{ Computes the sample size, power, and minimum detectable difference for cohort studies (using count data), case-control studies, when comparing means and survival. } \usage{ epi.studysize(treat, control, n, sigma, power, r = 1, design = 1, sided.test = 2, conf.level = 0.95, method = "means") } \arguments{ \item{treat}{the expected value for the treatment group (see below).} \item{control}{the expected value for the control group (see below).} \item{n}{scalar, defining the total number of subjects in the study (i.e. the number in the treatment and control group).} \item{sigma}{when \code{method = "means"} this is the expected standard deviation of the variable of interest for both treatment and control groups. When \code{method = "case.control"} this is the expected proportion of study subjects exposed to the risk factor of interest. This argument is ignored when \code{method = "proportions"}, \code{method = "survival"}, or \code{method = "cohort.count"}.} \item{power}{scalar, the required study power.} \item{r}{scalar, the number in the treatment group divided by the number in the control group. This argument is ignored when \code{method = "proportions"}.} \item{design}{scalar, the estimated design effect.} \item{sided.test}{use a one- or two-sided test? Use a two-sided test if you wish to evaluate whether or not the treatment group is better or worse than the control group. Use a one-sided test to evaluate whether or not the treatment group is better than the control group.} \item{conf.level}{scalar, defining the level of confidence in the computed result.} \item{method}{a character string indicating the method to be used. Options are \code{means}, \code{proportions}, \code{survival}, \code{cohort.count}, or \code{case.control}.} } \details{ The methodologies adopted in this function follow closely the approach described in Chapter 8 of Woodward (2005). When \code{method = "means"} the argument \code{treat} defines the mean outcome for the treatment group, \code{control} defines the mean outcome for the control group, and \code{sigma} defines the standard deviation of the outcome, assumed to be the same across the treatment and control groups (see Woodward pp 397 - 403). When \code{method = "proportions"} the argument \code{treat} defines the proportion in the treatment group and \code{control} defines the proportion in the control group. The arguments \code{sigma} and \code{r} are ignored. When \code{method = "survival"} the argument \code{treat} is the proportion of treated subjects that will have not experienced the event of interest at the end of the study period and \code{control} is the proportion of control subjects that will have not experienced the event of interest at the end of the study period. The argument \code{sigma} is ignored (see Therneau and Grambsch pp 61 - 65). When \code{method = "cohort.count"} the argument \code{treat} defines the estimated incidence risk (cumulative incidence) of the event of interest in the treatment group and \code{control} defines the estimated incidence risk of the event of interest in the control group. The argument \code{sigma} is ignored (see Woodward pp 405 - 410). When \code{method = "case.control"} the argument \code{treat} defines the estimated incidence risk (cumulative incidence) of the event of interest in the treatment group and \code{control} defines the estimated incidence risk of the event of interest in the control group. The argument \code{sigma} is the expected proportion of study subjects exposed to the risk factor of interest (see Woodward pp 410 - 412). In case-control studies sample size estimates are worked out on the basis of an expected odds (or risk) ratio. When \code{method = "case.control"} the estimated incidence risk estimates in the \code{treat} and \code{control} groups are used to define the expected risk ratio. See example 7 below, taken from Woodward p 412. For \code{method = "proportions"} it is assumed that one of the two proportions is known and we want to test the null hypothesis that the second proportion is equal to the first. In contrast, \code{method = "cohort.count"} relates to the two-sample problem where neither proportion is known (or assumed, at least). Thus, there is much more uncertainty in the \code{method = "cohort.count"} situation (compared with \code{method = "proportions"}) and correspondingly a requirement for a much larger sample size. Generally, \code{method = "cohort.count"} is more useful in practice. \code{method = "proportions"} is used in special situations, such as when a politician claims that at least 90\% of the population use seatbelts and we want to see if the data supports this claim. } \value{ A list containing one or more of the following: \item{n.crude}{the crude estimated total number of subjects required for the specified level of confidence and power.} \item{n.total}{the total estimated number of subjects required for the specified level of confidence and power, respecting the requirement for \code{r} times as many individuals in the treatment group compared with the control group.} \item{delta}{the minimum detectable difference given the specified level of confidence and power.} \item{lambda}{the minimum detectable risk ratio >1 and the maximum detectable risk ratio <1.} \item{power}{the power of the study given the specified number of study subjects and power.} } \references{ Fleiss JL (1981). Statistical Methods for Rates and Proportions. Wiley, New York. Kelsey JL, Thompson WD, Evans AS (1986). Methods in Observational Epidemiology. Oxford University Press, London, pp. 254 - 284. Therneau TM, Grambsch PM (2000). Modelling Survival Data - Extending the Cox Model. Springer, London, pp. 61 - 65. Woodward M (2005). Epidemiology Study Design and Data Analysis. Chapman & Hall/CRC, New York, pp. 381 - 426. } \note{ The power of a study is its ability to demonstrate the presence of an association, given that an association actually exists. The odds ratio and the risk ratio are approximately equal when the event of interest is rare. In this function \code{method = "case.control"} returns the sample size required to detect an approximate risk ratio in a case-control study (see Woodward p 412). When \code{method = "proportions"} values need to be entered for \code{control}, \code{n}, and \code{power} to return a value for \code{delta}. When \code{method = "cohort.count"} values need to be entered for \code{control}, \code{n}, and \code{power} to return a value for \code{lambda} (see example 6 below). } \examples{ ## EXAMPLE 1 (from Woodward 2005 p. 399): ## Supposed we wish to test, at the 5\% level of significance, the hypothesis ## that cholesterol means in a population are equal in two study years against ## the one-sided alternative that the mean is higher in the second of the ## two years. Suppose that equal sized samples will be taken in each year, ## but that these will not necessarily be from the same individuals (i.e. the ## two samples are drawn independently). Our test is to have a power of 0.95 ## at detecting a difference of 0.5 mmol/L. The standard deviation of serum ## cholesterol in humans is assumed to be 1.4 mmol/L. epi.studysize(treat = 5, control = 4.5, n = NA, sigma = 1.4, power = 0.95, r = 1, design = 1, sided.test = 1, conf.level = 0.95, method = "means") ## To satisfy the study requirements 340 individuals need to be tested: 170 in ## the first year and 170 in the second year. ## EXAMPLE 2 (from Woodward 2005 pp. 399 - 400): ## Women taking oral contraceptives sometimes experience anaemia due to ## impaired iron absorption. A study is planned to compare the use of iron ## tablets against a course of placebos. Oral contraceptive users are ## randomly allocated to one of the two treatment groups and mean serum ## iron concentration compared after 6 months. Data from previous studies ## indicates that the standard deviation of the increase in iron ## concentration will be around 4 micrograms\% over a 6-month period. ## The average increase in serum iron concentration without supplements is ## also thought to be 4 micrograms\%. The investigators wish to be 90\% sure ## of detecting when the supplement doubles the serum iron concentration using ## a two-sided 5\% significance test. It is decided to allocate 4 times as many ## women to the treatment group so as to obtain a better idea of its effect. ## How many women should be enrolled in this study? epi.studysize(treat = 8, control = 4, n = NA, sigma = 4, power = 0.90, r = 4, design = 1, sided.test = 2, conf.level = 0.95, method = "means") ## The estimated sample size is 66. We round this up to the nearest multiple ## of 5, to 70. We allocate 70/5 = 14 women to the placebo group and four ## times as many (56) to the iron treatment group. ## EXAMPLE 3 (from Woodward 2005 pp. 403 - 404): ## A government initiative has decided to reduce the prevalence of male ## smoking to, at most, 30\%. A sample survey is planned to test, at the ## 0.05 level, the hypothesis that the percentage of smokers in the male ## population is 30\% against the one-sided alternative that it is greater. ## The survey should be able to find a prevalence of 32\%, when it is true, ## with 0.90 power. How many men need to be sampled? epi.studysize(treat = 0.30, control = 0.32, n = NA, sigma = NA, power = 0.90, r = 1, design = 1, sided.test = 1, conf.level = 0.95, method = "proportions") ## ## A total of 18,315 men should be sampled: 9158 in the treatment group and ## 9158 in the control group. ## EXAMPLE 4 (from Therneau and Grambsch 2000 p. 63): ## The 5-year survival probability of patients receiving a standard treatment ## is 0.30 and we anticipate that a new treatment will increase it to 0.45. ## Assume that a study will use a two-sided test at the 0.05 level with 0.90 ## power to detect this difference. How many events are required? epi.studysize(treat = 0.45, control = 0.30, n = NA, sigma = NA, power = 0.90, r = 1, design = 1, sided.test = 2, conf.level = 0.95, method = "survival") ## A total of 250 events are required. Assuming one event per individual, ## assign 125 individuals to the treatment group and 125 to the control group. ## EXAMPLE 5 (from Therneau and Grambsch 2000 p. 63): ## What is the minimum detectable hazard in a study involving 500 subjects where ## the treatment to control ratio is 1:1, assuming a power of 0.90 and a ## 2-sided test at the 0.05 level? epi.studysize(treat = NA, control = NA, n = 500, sigma = NA, power = 0.90, r = 1, design = 1, sided.test = 2, conf.level = 0.95, method = "survival") ## Assuming treatment increases time to event (compared with controls), the ## minimum detectable hazard of a study involving 500 subjects (250 in the ## treatment group and 250 in the controls) is 1.33. ## EXAMPLE 6 (from Woodward 2005 p. 406): ## A cohort study of smoking and coronary heart disease (CHD) in middle aged men ## is planned. A sample of men will be selected at random from the population ## and those that agree to participate will be asked to complete a ## questionnaire. The follow-up period will be 5 years. The investigators would ## like to be 0.90 sure of being able to detect when the risk ratio of CHD ## is 1.4 for smokers, using a 0.05 significance test. Previous evidence ## suggests that the incidence risk of death rate in non-smokers is 413 per ## 100,000 per year. Assuming equal numbers of smokers and non-smokers are ## sampled, how many men should be sampled overall? treat = 1.4 * (5 * 413)/100000 control = (5 * 413)/100000 epi.studysize(treat = treat, control = control, n = NA, sigma = NA, power = 0.90, r = 1, design = 1, sided.test = 1, conf.level = 0.95, method = "cohort.count") ## A total of 12,130 men need to be sampled (6065 smokers and 6065 non-smokers). ## EXAMPLE 7 (from Woodward 2005 p. 406): ## Say, for example, we are only able to enrol 5000 subjects into the study ## described above. What is the minimum and maximum detectable risk ratio? control = (5 * 413)/100000 epi.studysize(treat = NA, control = control, n = 5000, sigma = NA, power = 0.90, r = 1, design = 1, sided.test = 1, conf.level = 0.95, method = "cohort.count") ## The minimum detectable risk ratio >1 is 1.65. The maximum detectable ## risk ratio <1 is 0.50. ## EXAMPLE 8 (from Woodward 2005 p. 412): ## A case-control study of the relationship between smoking and CHD is ## planned. A sample of men with newly diagnosed CHD will be compared for ## smoking status with a sample of controls. Assuming an equal number of ## cases and controls, how many are needed to detect an approximate risk ## ratio of 2.0 with 0.90 power using a two-sided 0.05 test? Previous surveys ## have shown that around 0.30 of the male population are smokers. epi.studysize(treat = 2/100, control = 1/100, n = NA, sigma = 0.30, power = 0.90, r = 1, design = 1, sided.test = 2, conf.level = 0.95, method = "case.control") ## A total of 376 men need to be sampled: 188 cases and 188 controls. ## EXAMPLE 9 (from Woodward p 414): ## Suppose we wish to determine the power to detect an approximate risk ## ratio of 2.0 using a two-sided 0.05 test when 188 cases and 940 controls ## are available (that is, the ratio of cases to controls is 1:5). Assume ## the prevalence of smoking in the male population is 0.30. n <- 188 + 940 epi.studysize(treat = 2/100, control = 1/100, n = n, sigma = 0.30, power = NA, r = 0.2, design = 1, sided.test = 2, conf.level = 0.95, method = "case.control") ## The power of this study, with the given sample size allocation is 0.99. ## EXAMPLE 10: ## A study is to be carried out to assess the effect of a new treatment for ## anoestrus in dairy cattle. What is the required sample size if we expect ## the proportion of cows responding in the treatment group to be 0.30 and the ## proportion of cows responding in the control group to be 0.15? The required ## power for this study is 0.80 using a two-sided 0.05 test. epi.studysize(treat = 0.30, control = 0.15, n = NA, sigma = NA, power = 0.80, r = 1, design = 1, sided.test = 2, conf.level = 0.95, method = "cohort.count") ## A total of 242 cows are required: 121 in the treatment group and 121 in ## the control group. ## Assume now that this study is going to be carried out using animals from a ## number of herds. What is the required sample size when you account for the ## observation that response to treatment is likely to cluster across herds. ## For the exercise, assume that the intra-cluster correlation coefficient ## (the rate of homogeneity, rho) is 0.05 and the average number of cows per ## herd is 30. Calculate the design effect, given ## rho = (design - 1) / (nbar - 1), where nbar equals the average number of ## individuals per cluster: design <- 0.05 * (30 - 1) + 1 epi.studysize(treat = 0.30, control = 0.15, n = NA, sigma = NA, power = 0.80, r = 1, design = design, sided.test = 2, conf.level = 0.95, method = "cohort.count") ## A total of 592 cows are required for this study: 296 in the treatment group ## and 296 in the control group, } \keyword{univar} epiR/man/epi.herdtest.Rd0000644000176200001440000000472412601641614014621 0ustar liggesusers\name{epi.herdtest} \alias{epi.herdtest} \title{ Estimate herd test characteristics } \description{ When tests are applied to individuals within a group we may wish to designate the group as being either diseased or non-diseased on the basis of the individual test results. This function estimates sensitivity and specificity of this testing regime at the group (or herd) level. } \usage{ epi.herdtest(se, sp, P, N, n, k) } \arguments{ \item{se}{a vector of length one defining the sensitivity of the individual test used.} \item{sp}{a vector of length one defining the specificity of the individual test used.} \item{P}{scalar, defining the estimated true prevalence.} \item{N}{scalar, defining the herd size.} \item{n}{scalar, defining the number of individuals to be tested per group (or herd).} \item{k}{scalar, defining the critical number of individuals testing positive that will denote the group as test positive.} } \value{ A data frame with four elements: \code{APpos} the probability of obtaining a positive test, \code{APneg} the probability of obtaining a negative test, \code{HSe} the estimated group (herd) sensitivity, and \code{HSp} the estimated group (herd) specificity. } \references{ Dohoo I, Martin W, Stryhn H (2003). Veterinary Epidemiologic Research. AVC Inc, Charlottetown, Prince Edward Island, Canada, pp. 113 - 115. } \author{ Ron Thornton, MAF New Zealand, PO Box 2526 Wellington, New Zealand. } \note{ The method implemented in this function is based on the hypergeometric distribution. } \examples{ ## EXAMPLE 1: ## We wish to estimate the herd-level sensitivity and specificity of ## a testing regime using an individual animal test of sensitivity 0.391 ## and specificity 0.964. The estimated true prevalence of disease is 0.12. ## Assume that 60 individuals will be tested per herd and we have ## specified that two or more positive test results identify the herd ## as positive. epi.herdtest(se = 0.391, sp = 0.964, P = 0.12, N = 1E06, n = 60, k = 2) ## This testing regime gives a herd sensitivity of 0.95 and a herd ## specificity of 0.36. With a herd sensitivity of 0.95 we can be ## confident that we will declare a herd infected if it is infected. ## With a herd specficity of only 0.36, we will declare 0.64 of disease ## negative herds as infected, so false positives are a problem. } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/epi.convgrid.Rd0000644000176200001440000000131212601641614014600 0ustar liggesusers\name{epi.convgrid} \alias{epi.convgrid} \title{Convert British National Grid georeferences to easting and northing coordinates} \description{ Convert British National Grid georeferences to easting and northing coordinates. } \usage{ epi.convgrid(os.refs) } \arguments{ \item{os.refs}{a vector of character strings listing the British National Grid georeferences to be converted.} } \note{ If an invalid georeference is encountered in the vector \code{os.ref} the method returns a \code{NA}. } \examples{ os.refs <- c("SJ505585","SJ488573","SJ652636") epi.convgrid(os.refs) } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/epi.incin.Rd0000644000176200001440000000352412601641614014074 0ustar liggesusers\name{epi.incin} \docType{data} \alias{epi.incin} \title{Laryngeal and lung cancer cases in Lancashire 1974 - 1983} \description{ Between 1972 and 1980 an industrial waste incinerator operated at a site about 2 kilometres southwest of the town of Coppull in Lancashire, England. Addressing community concerns that there were greater than expected numbers of laryngeal cancer cases in close proximity to the incinerator Diggle et al. (1990) conducted a study investigating risks for laryngeal cancer, using recorded cases of lung cancer as controls. The study area is 20 km x 20 km in size and includes location of residence of patients diagnosed with each cancer type from 1974 to 1983. The site of the incinerator was at easting 354500 and northing 413600. } \usage{data(epi.incin)} \format{ A data frame with 974 observations on the following 3 variables. \describe{ \item{xcoord}{easting coordinate (in metres) of each residence.} \item{ycoord}{northin coordinate (in metres) of each residence. } \item{status}{disease status: 0 = lung cancer, 1 = laryngeal cancer.} } } \source{ Bailey TC and Gatrell AC (1995). Interactive Spatial Data Analysis. Longman Scientific & Technical. London. } \references{ Diggle P, Gatrell A, and Lovett A (1990). Modelling the prevalence of cancer of the larynx in Lancashire: A new method for spatial epidemiology. In: Thomas R (Editor), Spatial Epidemiology. Pion Limited, London, pp. 35 - 47. Diggle P (1990). A point process modelling approach to raised incidence of a rare phenomenon in the viscinity of a prespecified point. Journal of the Royal Statistical Society, A, 153: 349 - 362. Diggle P, Rowlingson B (1994). A conditional approach to point process modelling of elevated risk. Journal of the Royal Statistical Society, A, 157: 433 - 440. } \keyword{datasets} epiR/man/epi.descriptives.Rd0000644000176200001440000000301012601641614015466 0ustar liggesusers\name{epi.descriptives} \alias{epi.descriptives} \title{Descriptive statistics } \description{ Computes descriptive statistics from a vector of numbers. } \usage{ epi.descriptives(dat, conf.level = 0.95) } \arguments{ \item{dat}{vector for which descriptive statistics will be calculated.} \item{conf.level}{magnitude of the returned confidence intervals. Must be a single number between 0 and 1.} } \value{ A list containing the following: \item{arithmetic}{\code{n} number of observations, \code{mean} arithmetic mean, \code{sd} arithmetic standard deviation, \code{q25} 25th quantile, \code{q75} 75th quantile, \code{lower} lower bound of the confidence interval, \code{upper} upper bound of the confidence interval, \code{min} minimum value, \code{max} maximum value, and \code{na} number of missing values.} \item{geometric}{\code{n} number of observations, \code{mean} geometric mean, \code{sd} geometric standard deviation, \code{q25} 25th quantile, \code{q75} 75th quantile, \code{lower} lower bound of the confidence interval, \code{upper} upper bound of the confidence interval, \code{min} minimum value, \code{max} maximum value, and \code{na} number of missing values.} \item{symmetry}{\code{skewness} and \code{kurtosis}. } } \examples{ id <- 1:1000 tmp <- rnorm(1000, mean = 0, sd = 1) id <- sample(id, size = 20) tmp[id] <- NA epi.descriptives(tmp, conf.level = 0.95) } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/epi.ccc.Rd0000644000176200001440000001215312601641614013522 0ustar liggesusers\name{epi.ccc} \alias{epi.ccc} \title{ Concordance correlation coefficient } \description{ Calculates Lin's (1989, 2000) concordance correlation coefficient for agreement on a continuous measure. } \usage{ epi.ccc(x, y, ci = "z-transform", conf.level = 0.95) } \arguments{ \item{x}{a vector, representing the first set of measurements.} \item{y}{a vector, representing the second set of measurements.} \item{ci}{a character string, indicating the method to be used. Options are \code{z-transform} or \code{asymptotic}.} \item{conf.level}{magnitude of the returned confidence interval. Must be a single number between 0 and 1.} } \details{ Computes Lin's (1989, 2000) concordance correlation coefficient for agreement on a continuous measure obtained by two methods. The concordance correlation coefficient combines measures of both precision and accuracy to determine how far the observed data deviate from the line of perfect concordance (that is, the line at 45 degrees on a square scatter plot). Lin's coefficient increases in value as a function of the nearness of the data's reduced major axis to the line of perfect concordance (the accuracy of the data) and of the tightness of the data about its reduced major axis (the precision of the data). Both \code{x} and \code{y} values need to be present for a measurement pair to be included in the analysis. If either or both values are missing (i.e. coded \code{NA}) then the measurement pair is deleted before analysis. } \value{ A list containing the following: \item{rho.c}{the concordance correlation coefficient.} \item{s.shift}{the scale shift.} \item{l.shift}{the location shift.} \item{C.b}{a bias correction factor that measures how far the best-fit line deviates from a line at 45 degrees. No deviation from the 45 degree line occurs when C.b = 1. See Lin (1989, page 258).} \item{blalt}{a data frame with two columns: \code{mean} the mean of each pair of measurements, \code{delta} vector \code{y} minus vector \code{x}.} \item{nmissing}{a count of the number of measurement pairs ignored due to missingness.} } \references{ Bland J, Altman D (1986). Statistical methods for assessing agreement between two methods of clinical measurement. The Lancet 327: 307 - 310. Bradley E, Blackwood L (1989). Comparing paired data: a simultaneous test for means and variances. American Statistician 43: 234 - 235. Dunn G (2004). Statistical Evaluation of Measurement Errors: Design and Analysis of Reliability Studies. London: Arnold. Hsu C (1940). On samples from a normal bivariate population. Annals of Mathematical Statistics 11: 410 - 426. Krippendorff K (1970). Bivariate agreement coefficients for reliability of data. In: Borgatta E, Bohrnstedt G (eds) Sociological Methodology. San Francisco: Jossey-Bass, pp. 139 - 150. Lin L (1989). A concordance correlation coefficient to evaluate reproducibility. Biometrics 45: 255 - 268. Lin L (2000). A note on the concordance correlation coefficient. Biometrics 56: 324 - 325. Pitman E (1939). A note on normal correlation. Biometrika 31: 9 - 12. Reynolds M, Gregoire T (1991). Comment on Bradley and Blackwood. American Statistician 45: 163 - 164. Snedecor G, Cochran W (1989). Statistical Methods. Ames: Iowa State University Press. } \seealso{ \code{\link[epiR]{epi.occc}} } \examples{ ## Concordance correlation plot: set.seed(seed = 1234) method1 <- rnorm(n = 100, mean = 0, sd = 1) method2 <- method1 + runif(n = 100, min = 0, max = 1) ## Introduce some missing values: method1[50] <- NA method2[75] <- NA tmp.ccc <- epi.ccc(method1, method2, ci = "z-transform", conf.level = 0.95) lab <- paste("CCC: ", round(tmp.ccc$rho.c[,1], digits = 2), " (95\% CI ", round(tmp.ccc$rho.c[,2], digits = 2), " - ", round(tmp.ccc$rho.c[,3], digits = 2), ")", sep = "") z <- lm(method2 ~ method1) par(pty = "s") plot(method1, method2, xlim = c(0, 5), ylim = c(0,5), xlab = "Method 1", ylab = "Method 2", pch = 16) abline(a = 0, b = 1, lty = 2) abline(z, lty = 1) legend(x = "topleft", legend = c("Line of perfect concordance", "Reduced major axis"), lty = c(2,1), lwd = c(1,1), bty = "n") text(x = 1.55, y = 3.8, labels = lab) ## Bland and Altman plot (Figure 2 from Bland and Altman 1986): x <- c(494,395,516,434,476,557,413,442,650,433,417,656,267, 478,178,423,427) y <- c(512,430,520,428,500,600,364,380,658,445,432,626,260, 477,259,350,451) tmp.ccc <- epi.ccc(x, y, ci = "z-transform", conf.level = 0.95) tmp.mean <- mean(tmp.ccc$blalt$delta) tmp.sd <- sqrt(var(tmp.ccc$blalt$delta)) plot(tmp.ccc$blalt$mean, tmp.ccc$blalt$delta, pch = 16, xlab = "Average PEFR by two meters (L/min)", ylab = "Difference in PEFR (L/min)", xlim = c(0,800), ylim = c(-140,140)) abline(h = tmp.mean, lty = 1, col = "gray") abline(h = tmp.mean - (2 * tmp.sd), lty = 2, col = "gray") abline(h = tmp.mean + (2 * tmp.sd), lty = 2, col = "gray") legend(x = "topleft", legend = c("Mean difference", "Mean difference +/ 2SD"), lty = c(1,2), bty = "n") legend(x = 0, y = 125, legend = c("Difference"), pch = 16, bty = "n") } \keyword{univar} epiR/man/epi.cluster2size.Rd0000644000176200001440000002515312601641614015434 0ustar liggesusers\name{epi.cluster2size} \alias{epi.cluster2size} \title{Sample size under under two-stage cluster sampling } \description{ Returns the required number of clusters to be sampled using a two-stage cluster sampling strategy. } \usage{ epi.cluster2size(nbar, R, n, mean, sigma2.x, sigma2.y, sigma2.xy, epsilon.r, method = "mean", conf.level = 0.95) } \arguments{ \item{nbar}{integer, representing the total number of listing units to be selected from each cluster.} \item{R}{scalar, representing an estimate of the unknown population prevalence to be estimated. Only used when \code{method = "proportion"}.} \item{n}{vector of length two, specifying the total number of clusters in the population and the total number of listing units within each cluster, respectively.} \item{mean}{vector of length two, specifying the mean of the variable of interest at the cluster level and listing unit level, respectively.} \item{sigma2.x}{vector of length two, specifying the variance of the [denomoniator] variable of interest at the cluster level and listing unit level, respectively.} \item{sigma2.y}{vector of length two, specifying the variance of the numerator variable of interest at the cluster level and listing unit level, respectively. See details. Only used when \code{method = "proportion"}.} \item{sigma2.xy}{vector of length two, specifying the the covariance at the cluster level and listing unit level, respectively. Only used when \code{method = "proportion"}.} \item{epsilon.r}{the maximum relative difference between the estimate and the unknown population value.} \item{method}{a character string indicating the method to be used. Options are \code{total}, \code{mean} or \code{proportion}.} \item{conf.level}{scalar, defining the level of confidence in the computed result.} } \details{ In simple two-stage cluster sampling the number of listing units to be selected from each cluster is determined on the basis of cost and on the basis of the relative sizes of the first- and second-stage variance components. Once the number of listing units is fixed we might then wish to determine the total number of clusters to be sampled to be confident of obtaining estimates that reflect the true population value. } \value{ Returns an integer defining the required number of clusters to be sampled. } \references{ Levy PS, Lemeshow S (1999). Sampling of Populations Methods and Applications. Wiley Series in Probability and Statistics, London, pp. 292. } \examples{ ## EXAMPLE 1 (from Levy and Lemeshow p 292): ## We intend to conduct a survey of nurse practitioners to estimate the ## average number of patients seen by each nurse. There are five health ## centres in the study area, each with three nurses. We intend to sample ## two nurses from each health centre. We would like to be 95\% confident ## that our estimate is within 30\% of the true population value. We expect ## that the mean number of patients seen at the health centre level ## is 84 (var 567) and the mean number of patients seen at the nurse ## level is 28 (var 160). How many health centres should be sampled? tn <- c(5, 3); tmean <- c(84, 28); tsigma2.x <- c(567, 160) epi.cluster2size(nbar = 2, n = tn, mean = tmean, sigma2.x = tsigma2.x, sigma2.y = NA, sigma2.xy = NA, epsilon.r = 0.3, method = "mean", conf.level = 0.95) ## Three health centres need to be sampled to meet the survey ## specifications. ## EXAMPLE 2 (from Levy and Lemeshow p 294): ## Same scenario as above, but this time we want to estimate the proportion ## of patients referred to a general practitioner from each clinic. As before, ## we want to be 95\% confident that our estimate of the proportion of referred ## patients is within 30\% of the true population value. We expect that ## approximately 36\% of patients are referred. ## On page 295 Levy and Lemeshow state that the parameters sigma2.x, sigma2.y ## and sigma2.xy are rarely known in advance and must be either estimated ## or guessed from experience or intuition. In this example (for ## demonstration) we use the actual patient data to calculate sigma2.x, ## sigma2.y and sigma2.xy. ## Nurse-level data. The following code reproduces Table 10.4 of Levy and ## Lemeshow (page 293). clinic <- rep(1:5, each = 3) nurse <- 1:15 Xij <- c(58,44,18,42,53,10,13,18,37,16,32,10,25,23,23) Yij <- c(5,6,6,3,19,2,12,6,30,5,14,4,17,9,14) ssudat <- data.frame(clinic, nurse, Xij, Yij) Xbar <- by(data = ssudat$Xij, INDICES = ssudat$clinic, FUN = mean) ssudat$Xbar <- rep(Xbar, each = 3) Ybar <- by(data = ssudat$Yij, INDICES = ssudat$clinic, FUN = mean) ssudat$Ybar <- rep(Ybar, each = 3) ssudat$Xij.Xbar <- (ssudat$Xij - ssudat$Xbar)^2 ssudat$Yij.Ybar <- (ssudat$Yij - ssudat$Ybar)^2 ssudat$XY <- (ssudat$Xij - ssudat$Xbar) * (ssudat$Yij - ssudat$Ybar) ## Collapse the nurse-level data (created above) to the clinic level. ## The following code reproduces Table 10.3 of Levy and Lemeshow (page 292). clinic <- as.vector(by(ssudat$clinic, INDICES = ssudat$clinic, FUN = min)) Xi <- as.vector(by(ssudat$Xij, INDICES = ssudat$clinic, FUN = sum)) Yi <- as.vector(by(ssudat$Yij, INDICES = ssudat$clinic, FUN = sum)) psudat <- data.frame(clinic, Xi, Yi) psudat$Xi.Xbar <- (psudat$Xi - mean(psudat$Xi))^2 psudat$Yi.Ybar <- (psudat$Yi - mean(psudat$Yi))^2 psudat$XY <- (psudat$Xi - mean(psudat$Xi)) * (psudat$Yi - mean(psudat$Yi)) ## Number of primary and secondary sampling units: npsu <- nrow(psudat) nssu <- mean(by(ssudat$nurse, INDICES = ssudat$clinic, FUN = length)) tn <- c(npsu, nssu) ## Mean of X at primary sampling unit and secondary sampling unit level: tmean <- c(mean(psudat$Xi), mean(ssudat$Xij)) ## Variance of number of patients seen: tsigma2.x <- c(mean(psudat$Xi.Xbar), mean(ssudat$Xij.Xbar)) ## Variance of number of patients referred: tsigma2.y <- c(mean(psudat$Yi.Ybar), mean(ssudat$Yij.Ybar)) tsigma2.xy <- c(mean(psudat$XY), mean(ssudat$XY)) epi.cluster2size(nbar = 2, R = 0.36, n = tn, mean = tmean, sigma2.x = tsigma2.x, sigma2.y = tsigma2.y, sigma2.xy = tsigma2.xy, epsilon.r = 0.3, method = "proportion", conf.level = 0.95) ## Two health centres need to be sampled to meet the survey ## specifications. ## EXAMPLE 3: ## We want to determine the prevalence of brucellosis in dairy cattle in a ## country comprised of 20 provinces. The number of dairy herds per province ## ranges from 50 to 1200. Herd size ranges from 25 to 900. We suspect that ## the prevalence of brucellosis-positive herds across the entire country ## is around 10\%. We suspect that there are a small number of provinces ## with a relatively high individual cow-level prevalence of disease ## (thought to be between 40\% and 80\%). How many herds should be sampled ## from each province if we want our estimate of prevalence to be within ## 30\% of the true population value? epi.simplesize(N = 1200, Vsq = NA, Py = 0.10, epsilon.r = 0.30, method = "proportion", conf.level = 0.95) ## A total of 234 herds should be sampled from each province. ## Next we work out the number of provinces that need to be sampled. ## Again, we would like to be 95\% confident that our estimate is within ## 30\% of the true population value. Simulate some data to derive appropriate ## estimates of sigma2.x, sigma2.y and sigma2.xy. ## Number of herds per province: npsu <- 20 nherds.p <- as.integer(runif(n = npsu, min = 50, max = 1200)) ## Mean herd size per province: hsize.p <- as.integer(runif(n = npsu, min = 25, max = 900)) ## Simulate estimates of the cow-level prevalence of brucellosis in each ## province. Here we generate an equal mix of `low' and `high' brucellosis ## prevalence provinces: prev.p <- c(runif(n = 15, min = 0, max = 0.05), runif(n = 5, min = 0.40, max = 0.80)) ## Generate some data: prov <- c(); herd <- c(); Xij <- c(); Yij <- c(); Xbar <- c(); Ybar <- c(); Xij.Xbar <- c(); Yij.Ybar <- c() for(i in 1:npsu){ ## Province identifiers: tprov <- rep(i, times = nherds.p[i]) prov <- c(prov, tprov) ## Herd identifiers: therd <- 1:nherds.p[i] herd <- c(herd, therd) ## Number of cows in each of the herds in this province: tXij <- as.integer(rlnorm(n = nherds.p[i], meanlog = log(hsize.p[i]), sdlog = 0.5)) tXbar <- mean(tXij) tXij.Xbar <- (tXij - tXbar)^2 Xij <- c(Xij, tXij) Xbar <- c(Xbar, rep(tXbar, times = nherds.p[i])) Xij.Xbar <- c(Xij.Xbar, tXij.Xbar) ## Number of brucellosis-positive cows in each herd: tYij <- c() for(j in 1:nherds.p[i]){ ttYij <- rbinom(n = 1, size = tXij[j], prob = prev.p[i]) tYij <- c(tYij, ttYij) } tYbar <- mean(tYij) tYij.Ybar <- (tYij - tYbar)^2 Yij <- c(Yij, tYij) Ybar <- c(Ybar, rep(tYbar, times = nherds.p[i])) Yij.Ybar <- c(Yij.Ybar, tYij.Ybar) } ssudat <- data.frame(prov, herd, Xij, Yij, Xbar, Ybar, Xij.Xbar, Yij.Ybar) ssudat$XY <- (ssudat$Xij - ssudat$Xbar) * (ssudat$Yij - ssudat$Ybar) ## Collapse the herd-level data (created above) to the province level: prov <- as.vector(by(ssudat$prov, INDICES = ssudat$prov, FUN = min)) Xi <- as.vector(by(ssudat$Xij, INDICES = ssudat$prov, FUN = sum)) Yi <- as.vector(by(ssudat$Yij, INDICES = ssudat$prov, FUN = sum)) psudat <- data.frame(prov, Xi, Yi) psudat$Xi.Xbar <- (psudat$Xi - mean(psudat$Xi))^2 psudat$Yi.Ybar <- (psudat$Yi - mean(psudat$Yi))^2 psudat$XY <- (psudat$Xi - mean(psudat$Xi)) * (psudat$Yi - mean(psudat$Yi)) ## Number of primary and secondary sampling units: npsu <- nrow(psudat) nssu <- round(mean(by(ssudat$herd, INDICES = ssudat$prov, FUN = length)), digits = 0) tn <- c(npsu, nssu) ## Mean of X at primary sampling unit and secondary sampling unit level: tmean <- c(mean(psudat$Xi), mean(ssudat$Xij)) ## Variance of herd size: tsigma2.x <- c(mean(psudat$Xi.Xbar), mean(ssudat$Xij.Xbar)) ## Variance of number of brucellosis-positive cows: tsigma2.y <- c(mean(psudat$Yi.Ybar), mean(ssudat$Yij.Ybar)) tsigma2.xy <- c(mean(psudat$XY), mean(ssudat$XY)) ## Finally, calculate the number of provinces to be sampled: tR <- sum(psudat$Yi) / sum(psudat$Xi) epi.cluster2size(nbar = 234, R = tR, n = tn, mean = tmean, sigma2.x = tsigma2.x, sigma2.y = tsigma2.y, sigma2.xy = tsigma2.xy, epsilon.r = 0.3, method = "proportion", conf.level = 0.95) ## Four provinces (sampling 234 herds from each) are required to be 95\% ## confident that our estimate of the individual animal prevalence of ## brucellosis is within 30\% of the true population value. } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/epi.2by2.Rd0000644000176200001440000005746712601641614013571 0ustar liggesusers\name{epi.2by2} \alias{epi.2by2} \alias{print.epi.2by2} \alias{summary.epi.2by2} \title{ Summary measures for count data presented in a 2 by 2 table } \description{ Computes summary measures of risk and a chi-squared test for difference in the observed proportions from count data presented in a 2 by 2 table. Multiple strata may be represented by additional rows of count data and in this case crude and Mantel-Haenszel adjusted measures of association are calculated and chi-squared tests of homogeneity are returned. } \usage{ epi.2by2(dat, method = "cohort.count", conf.level = 0.95, units = 100, homogeneity = "breslow.day", outcome = "as.columns") \method{print}{epi.2by2}(x, ...) \method{summary}{epi.2by2}(object, ...) } \arguments{ \item{dat}{an object of class \code{table} containing the individual cell frequencies.} \item{method}{a character string indicating the experimental design on which the tabular data has been based. Options are \code{cohort.count}, \code{cohort.time}, \code{case.control}, or \code{cross.sectional}.} \item{conf.level}{magnitude of the returned confidence intervals. Must be a single number between 0 and 1.} \item{units}{multiplier for prevalence and incidence estimates.} \item{homogeneity}{a character string indicating the type of homogeneity test to perform. Options are \code{breslow.day} or \code{woolf}.} \item{outcome}{a character string indicating how the outcome variable is represented in the contingency table. Options are \code{as.columns} (outcome as columns) or \code{as.rows} (outcome as rows).} \item{x, object}{an object of class \code{epi.2by2}.} \item{...}{Ignored.} } \details{ Where method is \code{cohort.count}, \code{case.control}, or \code{cross.sectional} and \code{outcome = as.columns} the required 2 by 2 table format is: \tabular{lll}{ \tab Disease + \tab Disease - \cr Expose + \tab a \tab b \cr Expose - \tab c \tab d \cr } Where method is \code{cohort.time} and \code{outcome = as.columns} the required 2 by 2 table format is: \tabular{lll}{ \tab Disease + \tab Time at risk \cr Expose + \tab a \tab b \cr Expose - \tab c \tab d \cr } A summary of the methods used for each of the confidence interval calculations in this function is as follows: \tabular{lll}{ Name \tab Type \tab Reference \cr \code{wRR.} \tab Wald \tab Wald (1943) \cr \code{scRR.} \tab Score \tab Miettinen and Nurminen (1985) \cr \code{IRR.} \tab - \tab Kirkwood and Steine (2003, 240 - 248) \cr \code{wOR.} \tab Wald \tab Wald (1943) \cr \code{cfOR.} \tab Cornfield \tab Cornfield (1956) \cr \code{scOR.} \tab Score \tab Miettinen and Nurminen 1985 \cr \code{mOR.} \tab MLE \tab Fleiss et al. (2003) \cr \code{wARisk.} \tab Wald \tab Wald (1943) \cr \code{scARisk.} \tab Score \tab Miettinen and Nurminen 1985 \cr \code{ARate.} \tab - \tab Rothman (2002) p 137 \cr \code{AFRisk.} \tab - \tab Hanley (2001) \cr \code{AFRate.} \tab - \tab Hanley (2001) \cr \code{AFest.} \tab - \tab Hanley (2001) \cr \code{wPARisk.} \tab Wald \tab Wald (1943) \cr \code{pPARisk.} \tab Pirikahu \tab Pirikahu (2014) \cr \code{PARate.} \tab - \tab Rothman (2002) p 137 \cr \code{PAFRisk.} \tab - \tab Jewell (2004) p 84 \cr \code{PAFRate.} \tab - \tab Sullivan (2009) \cr \code{PAFest.} \tab - \tab Jewell (2004) p 84 \cr } } \value{ An object of class \code{epi.2by2} comprised of: \item{method}{character string specifying the experimental design on which the tabular data has been based.} \item{n.strata}{number of strata.} \item{conf.level}{magnitude of the returned confidence intervals.} \item{massoc}{a list comprised of the computed measures of association. See below for details.} \item{tab}{a data frame comprised of of the contingency table data.} When method equals \code{cohort.count} the following measures of association and effect are returned: \code{RR.strata.wald}, \code{RR.srata.score}: incidence risk ratios for each strata (Wald and score confidence intervals, respectively). \code{RR.crude.wald}, \code{RR.crude.score}, \code{RR.mh}: incidence risk ratio (Wald and score confidence intervals, respectively) across all strata and Mantel-Haenszel adjusted incidence risk ratio. \code{OR.strata.wald}, \code{OR.strata.cfield}, \code{OR.strata.score}, \code{OR.strata.mle}: odds ratios for each strata (Wald, Cornfield and score confidence intervals, respectively). \code{OR.crude.wald}, \code{OR.crude.cfield}, \code{OR.crude.score}, \code{OR.crude.mle}, \code{OR.mh}: odds ratio (Wald, Cornfield, score and maximum likelihood and score confidence intervals, respectively) across all strata and Mantel-Haenszel adjusted odds ratio. \code{ARe.strata.wald}, \code{ARe.strata.score}: attributable risks in the exposed for each strata (Wald and score confidence intervals, respectively). \code{ARe.crude.wald}, \code{ARe.crude.score}, \code{AR.mh}: attributable risk (Wald and score confidence intervals, respectively) across all strata and Mantel-Haenszel adjusted attributable risk. \code{ARp.strata.wald}, \code{ARp.strata.piri}: population attributable risks for each strata (Wald and Pirikahu confidence intervals, respectively). \code{AFe.strata}: attributable fractions in the exposed for each strata. \code{AFp.strata}: attributable fractions in the population for each strata. \code{chisq.strata}: chi-squared test for difference in exposed and non-exposed proportions for each strata. \code{chisq.crude}: chi-squared test for difference in exposed and non-exposed proportions across all strata. \code{chisq.mh}: Mantel-Haenszel chi-squared test. \code{RR.homog}, \code{OR.homog}: tests of homogeneity of the individual strata incidence risk ratios and odds ratios. When method equals \code{cohort.time} the following measures of association and effect are returned: \code{IRR.strata}: incidence rate ratios for each strata. \code{IRR.crude}, \code{IRR.mh}: incidence rate ratio across all strata and Mantel-Haenszel adjusted incidence rate ratio. \code{AR.strata}: attributable rates in the exposed for each strata. \code{AR.crude}, \code{AR.mh}: attributable rate in the exposed across all strata and Mantel-Haenszel adjusted attributable rate in the exposed. \code{ARp.strata}: population attributable rates for each strata. \code{AFp.strata}: attributable fractions in the population for each strata. \code{chisq.strata}: chi-squared test for difference in exposed and non-exposed proportions for each strata. \code{chisq.crude}: chi-squared test for difference in exposed and non-exposed proportions across all strata. \code{chisq.mh}: Mantel-Haenszel chi-squared test. When method equals \code{case.control} the following measures of association and effect are returned: \code{OR.strata.wald}, \code{OR.strata.cfield}, \code{OR.strata.score}, \code{OR.strata.mle}: odds ratios for each strata (Wald, Cornfield, score and maximum likelihood confidence intervals, respectively). \code{OR.crude.wald}, \code{OR.crude.cfield}, \code{OR.crude.score}, \code{OR.crude.mle}, \code{OR.mh}: odds ratio (computed using Wald, Cornfield, score and maximum likelihood confidence intervals, respectively) across all strata and Mantel-Haenszel adjusted odds ratio. \code{ARe.strata.wald}, \code{ARe.strata.score}: attributable risks in the exposed for each strata (Wald and score confidence intervals, respectively). \code{ARe.crude.wald}, \code{ARe.crude.score}, \code{AR.mh}: attributable prevalence in the exposed across all strata (Wald and score confidence intervals, respectively) and Mantel-Haenszel attributable prevalence in the exposed. \code{ARp.strata.wald}, \code{ARp.strata.piri}: attributable prevalence in the population for each strata (Wald and Pirikahu confidence intervals, respectively). \code{ARp.crude.wald}, \code{ARp.crude.piri}: attributable prevalence in the population (Wald and Pirikahu confidence intervals, respectively). \code{AFest.strata}: estimated attributable fractions in the exposed for each strata. \code{AFpest.strata}: estimated attributable fractions in the population for each strata. \code{chisq.strata}: chi-squared test for difference in exposed and non-exposed proportions for each strata. \code{chisq.crude}: chi-squared test for difference in exposed and non-exposed proportions across all strata. \code{chisq.mh}: Mantel-Haenszel chi-squared test. \code{OR.homog}: tests of homogeneity of the individual strata odds ratios. When method equals \code{cross.sectional} the following measures of association and effect are returned: \code{PR.strata.wald}, \code{PR.srata.score}: prevalence ratios for each strata (Wald and score confidence intervals, respectively). \code{PR.crude.wald}, \code{PR.crude.score}, \code{PR.mh}: prevalence ratio (Wald and score confidence intervals, respectively) across all strata and Mantel-Haenszel adjusted prevalence ratio. \code{OR.strata.wald}, \code{OR.strata.cfield}, \code{OR.strata.score}, \code{OR.strata.mle}: odds ratios for each strata (Wald, Cornfield, score and maximum likelihood confidence intervals, respectively). \code{OR.crude.wald}, \code{OR.crude.cfield}, \code{OR.crude.score}, \code{OR.crude.mle}, \code{OR.mh}: odds ratio (computed using Wald, Cornfield, score and maximum likelihood confidence intervals, respectively) across all strata and Mantel-Haenszel adjusted odds ratio. \code{ARe.strata.wald}, \code{ARe.strata.score}: attributable risks in the exposed for each strata (Wald and score confidence intervals, respectively). \code{ARe.crude.wald}, \code{ARe.crude.score}, \code{AR.mh}: attributable prevalence in the exposed across all strata (Wald and score confidence intervals, respectively) and Mantel-Haenszel attributable prevalence in the exposed. \code{ARp.strata.wald}, \code{ARp.strata.piri}: attributable prevalence in the population for each strata (Wald and Pirikahu confidence intervals, respectively. \code{AFe.strata}: attributable fractions in the exposed for each strata. \code{AFp.strata}: attributable fractions in the population for each strata. \code{chisq.strata}: chi-squared test for difference in exposed and non-exposed proportions for each strata. \code{chisq.crude}: chi-squared test for difference in exposed and non-exposed proportions across all strata. \code{chisq.mh}: Mantel-Haenszel chi-squared test. \code{PR.homog}, \code{OR.homog}: tests of homogeneity of the individual strata prevalence and odds ratios. } \references{ Altman D, Machin D, Bryant T, Gardner M (2000). Statistics with Confidence. British Medical Journal, London, pp. 69. Cornfield, J (1956). A statistical problem arising from retrospective studies. In: Proceedings of the Third Berkeley Symposium on Mathematical Statistics and Probability, University of California Press, Berkeley California 4: 135 - 148. Elwood JM (2007). Critical Appraisal of Epidemiological Studies and Clinical Trials. Oxford University Press, London. Feinstein AR (2002). Principles of Medical Statistics. Chapman Hall/CRC, London, 332 - 336. Fisher RA (1962). Confidence limits for a cross-product ratio. Australian Journal of Statistics 4: 41. Feychting M, Osterlund B, Ahlbom A (1998). Reduced cancer incidence among the blind. Epidemiology 9: 490 - 494. Fleiss JL, Levin B, Paik MC (2003). Statistical Methods for Rates and Proportions. John Wiley and Sons, New York. Hanley JA (2001). A heuristic approach to the formulas for population attributable fraction. Journal of Epidemiology and Community Health 55: 508 - 514. Lancaster H (1961) Significance tests in discrete distributions. Journal of the American Statistical Association 56: 223 - 234. Jewell NP (2004). Statistics for Epidemiology. Chapman & Hall/CRC, London, pp. 84 - 85. Juul S (2004). Epidemiologi og evidens. Munksgaard, Copenhagen. Kirkwood BR, Sterne JAC (2003). Essential Medical Statistics. Blackwell Science, Malden, MA, USA. Lawson R (2004). Small sample confidence intervals for the odds ratio. Communications in Statistics Simulation and Computation 33: 1095 - 1113. Martin SW, Meek AH, Willeberg P (1987). Veterinary Epidemiology Principles and Methods. Iowa State University Press, Ames, Iowa, pp. 130. McNutt L, Wu C, Xue X, Hafner JP (2003). Estimating the relative risk in cohort studies and clinical trials of common outcomes. American Journal of Epidemiology 157: 940 - 943. Miettinen OS, Nurminen M (1985). Comparative analysis of two rates. Statistics in Medicine 4: 213 - 226. Pirikahu S (2014). Confidence Intervals for Population Attributable Risk. Unpublished MSc thesis. Massey University, Palmerston North, New Zealand. Robbins AS, Chao SY, Fonesca VP (2002). What's the relative risk? A method to directly estimate risk ratios in cohort studies of common outcomes. Annals of Epidemiology 12: 452 - 454. Rothman KJ (2002). Epidemiology An Introduction. Oxford University Press, London, pp. 130 - 143. Rothman KJ, Greenland S (1998). Modern Epidemiology. Lippincott Williams, & Wilkins, Philadelphia, pp. 271. Sullivan KM, Dean A, Soe MM (2009). OpenEpi: A Web-based Epidemiologic and Statistical Calculator for Public Health. Public Health Reports 124: 471 - 474. Wald A (1943). Tests of statistical hypotheses concerning several parameters when the number of observations is large. Transactions of the American Mathematical Society 54: 426 - 482. Willeberg P (1977). Animal disease information processing: Epidemiologic analyses of the feline urologic syndrome. Acta Veterinaria Scandinavica. Suppl. 64: 1 - 48. Woodward MS (2005). Epidemiology Study Design and Data Analysis. Chapman & Hall/CRC, New York, pp. 163 - 214. Zhang J, Yu KF (1998). What's the relative risk? A method for correcting the odds ratio in cohort studies of common outcomes. Journal of the American Medical Association 280: 1690 - 1691. } \author{ Mark Stevenson (Faculty of Veterinary and Agricultural Sciences, The University of Melbourne, Australia), Cord Heuer (EpiCentre, IVABS, Massey University, Palmerston North, New Zealand), Jim Robison-Cox (Department of Math Sciences, Montana State University, Montana, USA) and Kazuki Yoshida (Brigham and Women's Hospital, Boston Massachusetts, USA). Thanks to Ian Dohoo for numerous helpful suggestions to improve the documentation for this function. } \note{Measures of strength of association include the prevalence ratio, the incidence risk ratio, the incidence rate ratio and the odds ratio. The incidence risk ratio is the ratio of the incidence risk of disease in the exposed group to the incidence risk of disease in the unexposed group. The odds ratio (also known as the cross-product ratio) is an estimate of the incidence risk ratio. When the incidence of an outcome in the study population is low (say, less than 5\%) the odds ratio will provide a reliable estimate of the incidence risk ratio. The more frequent the outcome becomes, the more the odds ratio will overestimate the incidence risk ratio when it is greater than than 1 or understimate the incidence risk ratio when it is less than 1. Measures of effect include the attributable risk (or prevalence) and the attributable fraction. The attributable risk is the risk of disease in the exposed group minus the risk of disease in the unexposed group. The attributable risk provides a measure of the absolute increase or decrease in risk associated with exposure. The attributable fraction is the proportion of disease in the exposed group attributable to exposure. Measures of total effect include the population attributable risk (or prevalence) and the population attributable fraction (also known as the aetiologic fraction). The population attributable risk is the risk of disease in the population that may be attributed to exposure. The population attributable fraction is the proportion of the disease in the population that is attributable to exposure. Point estimates and confidence intervals for the prevalence ratio and incidence risk ratio are calculated using the Wald (Wald 1943) and score methods (Miettinen and Nurminen 1985). Point estimates and confidence intervals for the incidence rate ratio are calculated using the exact method described by Kirkwood and Sterne (2003) and Juul (2004). Point estimates and confidence intervals the odds ratio are calculated using Wald (Wald 1943), score (Miettinen and Nurminen 1985) and maximum likelihood methods (Fleiss et al. 2003). Point estimates and confidence intervals for the population attributable fraction are calculated using formulae provided by Jewell (2004, p 84 - 85). Point estimates and confidence intervals for the summary risk differences are calculated using formulae provided by Rothman and Greenland (1998, p 271) and Pirikahu (2014). The function checks each strata for cells with zero frequencies. If a zero frequency is found in any cell, 0.5 is added to all cells within the strata. The Mantel-Haenszel adjusted measures of association are valid when the measures of association across the different strata are similar (homogenous), that is when the test of homogeneity of the odds (risk) ratios is not significant. The tests of homogeneity of the odds (risk) ratio where \code{homogeneity = "breslow.day"} and \code{homogeneity = "woolf"} are based on Jewell (2004, p 152 - 158). Thanks to Jim Robison-Cox for sharing his implementation of these functions. } \examples{ ## EXAMPLE 1: ## A cross sectional study investigating the relationship between dry cat ## food (DCF) and feline urologic syndrome (FUS) was conducted (Willeberg ## 1977). Counts of individuals in each group were as follows: ## DCF-exposed cats (cases, non-cases) 13, 2163 ## Non DCF-exposed cats (cases, non-cases) 5, 3349 ## Outcome variable (FUS) as columns: dat <- matrix(c(13,2163,5,3349), nrow = 2, byrow = TRUE) rownames(dat) <- c("DF+", "DF-"); colnames(dat) <- c("FUS+", "FUS-"); dat epi.2by2(dat = as.table(dat), method = "cross.sectional", conf.level = 0.95, units = 100, homogeneity = "breslow.day", outcome = "as.columns") ## Outcome variable (FUS) as rows: dat <- matrix(c(13,5,2163,3349), nrow = 2, byrow = TRUE) rownames(dat) <- c("FUS+", "FUS-"); colnames(dat) <- c("DF+", "DF-"); dat epi.2by2(dat = as.table(dat), method = "cross.sectional", conf.level = 0.95, units = 100, homogeneity = "breslow.day", outcome = "as.rows") ## Prevalence ratio: ## The prevalence of FUS in DCF exposed cats is 4.01 (95\% CI 1.43 to 11.23) ## times greater than the prevalence of FUS in non-DCF exposed cats. ## Attributable fraction: ## In DCF exposed cats, 75\% of FUS is attributable to DCF (95\% CI 30\% to ## 91\%). ## Population attributable fraction: ## Fifty-four percent of FUS cases in the cat population are attributable ## to DCF (95\% CI 4\% to 78\%). ## EXAMPLE 2: ## This example shows how the table function can be used to pass data to ## epi.2by2. Here we use the birthwgt data from the MASS package. library(MASS) dat1 <- birthwt; head(dat1) ## Generate a table of cell frequencies. First set the levels of the outcome ## and the exposure so the frequencies in the 2 by 2 table come out in the ## conventional format: dat1$low <- factor(dat1$low, levels = c(1,0)) dat1$smoke <- factor(dat1$smoke, levels = c(1,0)) dat1$race <- factor(dat1$race, levels = c(1,2,3)) ## Generate the 2 by 2 table. Exposure (rows) = smoke. Outcome (columns) = low. tab1 <- table(dat1$smoke, dat1$low, dnn = c("Smoke", "Low BW")) print(tab1) ## Compute the incidence risk ratio and other measures of association: epi.2by2(dat = tab1, method = "cohort.count", conf.level = 0.95, units = 100, homogeneity = "breslow.day", outcome = "as.columns") ## Odds ratio: ## The odds of having a low birth weight child for smokers is 2.02 ## (95\% CI 1.08 to 3.78) times greater than the odds of having ## a low birth weight child for non-smokers. ## Now stratify by race: tab2 <- table(dat1$smoke, dat1$low, dat1$race, dnn = c("Smoke", "Low BW", "Race")) print(tab2) ## Compute the crude odds ratio, the Mantel-Haenszel adjusted odds ratio ## and other measures of association: epi.2by2(dat = tab2, method = "cohort.count", conf.level = 0.95, units = 100, homogeneity = "breslow.day", outcome = "as.columns") ## After adjusting for the confounding effect of race, the odds of ## having a low birth weight child for smokers is 2.15 (95\% CI 1.29 to 3.58) ## times that of non-smokers. ## Now turn tab2 into a data frame where the frequencies of individuals in ## each exposure-outcome category are provided. Often your data will be ## presented in this summary format: dat2 <- data.frame(tab2) print(dat2) ## Re-format dat2 (a summary count data frame) into tabular format using the ## xtabs function: tab3 <- xtabs(Freq ~ Smoke + Low.BW + Race, data = dat2) print(tab3) # tab3 can now be passed to epi.2by2: rval <- epi.2by2(dat = tab3, method = "cohort.count", conf.level = 0.95, units = 100, homogeneity = "breslow.day", outcome = "as.columns") print(rval) ## The Mantel-Haenszel adjusted odds ratio is 3.09 (95\% CI 1.49 to 6.39). The ## ratio of the crude odds ratio to the Mantel-Haensel adjusted odds ratio is ## 0.66. ## What are the Cornfield confidence limits, the maximum likelihood ## confidence limits and the score confidence limits for the crude odds ratio? rval$massoc$OR.crude.cfield rval$massoc$OR.crude.mle rval$massoc$OR.crude.score ## Cornfield: 2.20 (95\% CI 1.07 to 3.79) ## Maximum likelihood: 2.01 (1.03 to 3.96) # Score: 2.20 (95\% CI 2.84 to 5.17) ## Plot the individual strata-level odds ratios and compare them with the ## Mantel-Haenszel adjusted odds ratio. \dontrun{ library(ggplot2); library(scales) nstrata <- 1:dim(tab3)[3] strata.lab <- paste("Strata ", nstrata, sep = "") y.at <- c(nstrata, max(nstrata) + 1) y.lab <- c("M-H", strata.lab) x.at <- c(0.25, 0.5, 1, 2, 4, 8, 16, 32) or.l <- c(rval$massoc$OR.mh$lower, rval$massoc$OR.strata.cfield$lower) or.u <- c(rval$massoc$OR.mh$upper, rval$massoc$OR.strata.cfield$upper) or.p <- c(rval$massoc$OR.mh$est, rval$massoc$OR.strata.cfield$est) dat <- data.frame(y.at, y.lab, or.p, or.l, or.u) p <- ggplot(dat, aes(or.p, y.at)) p + geom_point() + geom_errorbarh(aes(xmax = or.l, xmin = or.u, height = 0.2)) + labs(x = "Odds ratio", y = "Strata") + scale_x_continuous(trans = log2_trans(), breaks = x.at, limits = c(0.25,32)) + scale_y_continuous(breaks = y.at, labels = y.lab) + geom_vline(xintercept = 1, lwd = 1) + coord_fixed(ratio = 0.75 / 1) + theme(axis.title.y = element_text(vjust = 0)) } ## EXAMPLE 3: ## A study was conducted by Feychting et al (1998) comparing cancer occurrence ## among the blind with occurrence among those who were not blind but had ## severe visual impairment. From these data we calculate a cancer rate of ## 136/22050 person-years among the blind compared with 1709/127650 person- ## years among those who were visually impaired but not blind. dat <- as.table(matrix(c(136,22050,1709,127650), nrow = 2, byrow = TRUE)) rval <- epi.2by2(dat = dat, method = "cohort.time", conf.level = 0.90, units = 1000, homogeneity = "breslow.day", outcome = "as.columns") summary(rval)$ARe.strata ## The incidence rate of cancer was 7.22 cases per 1000 person-years less in the ## blind, compared with those who were not blind but had severe visual impairment ## (90\% CI 6.20 to 8.24 cases per 1000 person-years). Confidence intervals ## for this attributable risk estimate are from Rothman (2002, p 137). summary(rval)$IRR round(summary(rval)$IRR.strata, digits = 2) ## The incidence rate of cancer in the blind group was less than half that of the ## comparison group (incidence rate ratio 0.46, 90\% CI 0.40 to 0.53). } \keyword{univar} epiR/man/epi.cluster1size.Rd0000644000176200001440000000360212601641614015426 0ustar liggesusers\name{epi.cluster1size} \alias{epi.cluster1size} \title{ Sample size under under one-stage cluster sampling } \description{ Returns the required number of clusters to be sampled using a one-stage cluster sampling strategy. } \usage{ epi.cluster1size(n, mean, var, epsilon.r, method = "mean", conf.level = 0.95) } \arguments{ \item{n}{integer, representing the total number of clusters in the population.} \item{mean}{number, representing the population mean of the variable of interest.} \item{var}{number, representing the population variance of the variable of interest.} \item{epsilon.r}{the maximum relative difference between our estimate and the unknown population value.} \item{method}{a character string indicating the method to be used. Options are \code{total}, \code{mean} or \code{mean.per.unit}.} \item{conf.level}{scalar, defining the level of confidence in the computed result.} } \value{ Returns an integer defining the required number of clusters to be sampled. } \references{ Levy PS, Lemeshow S (1999). Sampling of Populations Methods and Applications. Wiley Series in Probability and Statistics, London, pp. 258. } \examples{ ## A survey to estimate the total number of residents over 65 years of ## age that require the services of a nurse is to be carried out. There are ## five housing complexes in the study area and we expect that there might ## be a total of around 34 residents meeting this criteria (variance 6.8). ## We would like the estimated sample size to provide us with an estimate ## that is within 10\% of the true value. How many housing complexes (clusters) ## should be sampled? epi.cluster1size(n = 5, mean = 34, var = 6.8, epsilon.r = 0.10, method = "total", conf.level = 0.999) ## We would need to sample 3 housing complexes to meet the specifications ## for this study. } \keyword{univar} epiR/man/epi.prev.Rd0000644000176200001440000001072612601641614013752 0ustar liggesusers\name{epi.prev} \alias{epi.prev} \title{ Estimate true prevalence } \description{ Computes the true prevalence of a disease in a population on the basis of an imperfect test. } \usage{ epi.prev(pos, tested, se, sp, method = "wilson", conf.level = 0.95) } \arguments{ \item{pos}{the number of positives.} \item{tested}{the number tested.} \item{se}{test sensitivity (0 - 1).} \item{sp}{test specificity (0 - 1).} \item{method}{a character string indicating the method to use. Options are \code{"c-p"} (Cloppper-Pearson), \code{"sterne"} (Sterne), \code{"blaker"} (Blaker) and \code{"wilson"} (Wilson).} \item{conf.level}{magnitude of the returned confidence interval. Must be a single number between 0 and 1.} } \details{ Appropriate confidence intervals for the adjusted prevalence estimate are provided, accounting for the change in variance that arises from imperfect test sensitivity and specificity (see Reiczigel et al 2010 for details). The Clopper-Pearson method is known to be too conservative for two-sided intervals (Blaker 2000, Agresti and Coull 1998). Blaker's and Sterne's methods (Blaker 2000, Sterne 1954) provide smaller exact two-sided confidence interval estimates. } \value{ A list containing the following: \item{ap}{the point estimate of apparent prevalence and the lower and upper bounds of the confidence interval around the apparent prevalence estimate.} \item{tp}{the point estimate of the true prevalence and the lower and upper bounds of the confidence interval around the true prevalence estimate.} } \references{ Abel U (1993). DieBewertung Diagnostischer Tests. Hippokrates, Stuttgart. Agresti A, Coull BA (1998). Approximate is better than 'exact' for interval estimation of binomial proportions. American Statistician 52: 119 - 126. Blaker H (2000). Confidence curves and improved exact confidence intervals for discrete distributions. Canadian Journal of Statistics 28: 783 - 798. Clopper CJ, Pearson ES (1934). The use of confidence of fiducial limits illustrated in the case of the binomial. Biometrika 26: 404 - 413. Gardener IA, Greiner M (1999). Advanced Methods for Test Validation and Interpretation in Veterinary Medicince. Freie Universitat Berlin, ISBN 3-929619-22-9; 80 pp. Messam L, Branscum A, Collins M, Gardner I (2008) Frequentist and Bayesian approaches to prevalence estimation using examples from Johne's disease. Animal Health Research Reviews 9: 1 - 23. Reiczigel J, Foldi J, Ozsvari L (2010). Exact confidence limits for prevalence of disease with an imperfect diagnostic test. Epidemiology and Infection 138: 1674 - 1678. Rogan W, Gladen B (1978). Estimating prevalence from results of a screening test. American Journal of Epidemiology 107: 71 - 76. Sterne TE (1954). Some remarks on confidence or fiducial limits. Biometrika 41: 275 - 278. } \note{This function uses apparent prevalence, test sensitivity and test specificity to estimate true prevalence (after Rogan and Gladen, 1978). Confidence intervals for the apparent and true prevalence estimates are based on code provided by Reiczigel et al. (2010). } \examples{ ## A simple random sample of 150 cows from a herd of 2560 is taken. ## Each cow is given a screening test for brucellosis which has a ## sensitivity of 96% and a specificity of 89%. Of the 150 cows tested ## 23 were positive to the screening test. What is the estimated prevalence ## of brucellosis in this herd (and its 95% confidence interval)? epi.prev(pos = 23, tested = 150, se = 0.96, sp = 0.89, method = "blaker", conf.level = 0.95) ## The estimated true prevalence of brucellosis in this herd is 5.1 cases per ## 100 cows (95% CI 0 -- 13 cases per 100 cows). ## Moujaber et al. (2008) analysed the seroepidemiology of Helicobacter pylori ## infection in Australia. They reported seroprevalence rates together with ## 95% confidence intervals by age group using the Clopper-Pearson exact ## method (Clopper and Pearson, 1934). The ELISA test they applied had 96.4% ## sensitivity and 92.7% specificity. A total of 151 subjects 1 -- 4 years ## of age were tested. Of this group 6 were positive. What is the estimated ## true prevalence of Helicobacter pylori in this age group? epi.prev(pos = 6, tested = 151, se = 0.964, sp = 0.927, method = "c-p", conf.level = 0.95) ## The estimated true prevalence of Helicobacter pylori in 1 -- 4 year olds is ## 0 cases per 100 (95% 0 -- 1.3 cases per 100). } \keyword{univar} epiR/man/epi.kappa.Rd0000644000176200001440000001772312601641614014076 0ustar liggesusers\name{epi.kappa} \alias{epi.kappa} \title{Kappa statistic} \description{ Computes the kappa statistic and its confidence interval. } \usage{ epi.kappa(dat, method = "fleiss", alternative = c("two.sided", "less", "greater"), conf.level = 0.95) } \arguments{ \item{dat}{an object of class table with the individual cell frequencies.} \item{method}{a character string indicating the method to use. Options are \code{fleiss}, \code{watson} or \code{altman}.} \item{alternative}{a character string specifying the alternative hypothesis, must be one of \code{two.sided}, \code{greater} or \code{less}.} \item{conf.level}{magnitude of the returned confidence interval. Must be a single number between 0 and 1.} } \details{ Kappa is a measure of agreement beyond the level of agreement expected by chance alone. The observed agreement is the proportion of samples for which both methods (or observers) agree. The bias and prevalence adjusted kappa (Brt et al. 1993) provides a measure of observed agreement, an index of the bias between observers, and an index of the differences between the overall proportion of `yes' and `no' assessments. Common interpretations for the kappa statistic are as follows: < 0.2 slight agreement, 0.2 - 0.4 fair agreement, 0.4 - 0.6 moderate agreement, 0.6 - 0.8 substantial agreement, > 0.8 almost perfect agreement. The argument \code{alternative = "greater"} tests the hypothesis that kappa is greater than 0. } \value{ A list containing the following: \item{prop.agree}{a data frame with \code{obs} the observed proportion of agreement and \code{exp} the expected proportion of agreement.} \item{pindex}{a data frame with the prevalence index, the standard error of the prevalence index and the lower and upper bounds of the confidence interval for the prevalence index.} \item{bindex}{a data frame with the bias index, the standard error of the bias index and the lower and upper bounds of the confidence interval for the bias index.} \item{pabak}{a data frame with the prevalence and bias corrected kappa statistic and the lower and upper bounds of the confidence interval for the prevalence and bias corrected kappa statistic.} \item{kappa}{a data frame with the kappa statistic, the standard error of the kappa statistic and the lower and upper bounds of the confidence interval for the kappa statistic.} \item{z}{a data frame containing the z test statistic for kappa and its associated P-value.} \item{mcnemar}{a data frame containing the McNemar test statistic for kappa and its associated P-value.} } \references{ Altman DG, Machin D, Bryant TN, Gardner MJ (2000). Statistics with Confidence, second edition. British Medical Journal, London, pp. 116 - 118. Byrt T, Bishop J, Carlin JB (1993). Bias, prevalence and kappa. Journal of Clinical Epidemiology 46: 423 - 429. Dohoo I, Martin W, Stryhn H (2010). Veterinary Epidemiologic Research, second edition. AVC Inc, Charlottetown, Prince Edward Island, Canada, pp. 98 - 99. Fleiss JL, Levin B, Paik MC (2003). Statistical Methods for Rates and Proportions, third edition. John Wiley & Sons, London, 598 - 626. Rothman KJ (2002). Epidemiology An Introduction. Oxford University Press, London, pp. 130 - 143. Silva E, Sterry RA, Kolb D, Mathialagan N, McGrath MF, Ballam JM, Fricke PM (2007) Accuracy of a pregnancy-associated glycoprotein ELISA to determine pregnancy status of lactating dairy cows twenty-seven days after timed artificial insemination. Journal of Dairy Science 90: 4612 - 4622. Sim J, Wright CC (2005) The kappa statistic in reliability studies: Use, interpretation, and sample size requirements. Physical Therapy 85: 257 - 268. Watson PF, Petrie A (2010) Method agreement analysis: A review of correct methodology. Theriogenology 73: 1167 - 1179. } \note{ \tabular{llll}{ \tab Obs1 + \tab Obs1 - \tab Total \cr Obs 2 + \tab \code{a} \tab \code{b} \tab \code{(a+b)} \cr Obs 2 - \tab \code{c} \tab \code{d} \tab \code{(c+d)} \cr Total \tab \code{(a+c)} \tab \code{(b+d)} \tab \code{(a+b+c+d=N)}\cr } The kappa coefficient is influenced by the prevalence of the condition being assessed. A prevalence effect exists when the proportion of agreements on the positive classification differs from that of the negative classification. If the prevalence index is high (that is, the prevalence of a positive rating is very high or very low) chance agreement is also high and the value of kappa is reduced accordingly. The effect of prevalence on kappa is greater for large values of kappa than for small values (Byrt et al. 1993). Using the notation above, the prevalence index is calculated as \code{((a/N) - (d/N))}. Confidence intervals for the prevalence index are based on methods used for a difference in two proportions. See Rothman (2002, p 135 equation 7-2) for details. Bias is the extent to which raters disagree on the proportion of positive (or negative) cases. Bias affects interpretation of the kappa coefficient. When there is a large amount of bias, kappa is higher than when bias is low or absent. In contrast to prevalence, the effect of bias is greater when kappa is small than when it is large (Byrt et al. 1993). Using the notation above, the bias index is calculated as \code{((a + b)/N - (a + c)/N)}. Confidence intervals for the bias index are based on methods used for a difference in two proportions. See Rothman (2002, p 135 equation 7-2) for details. The McNemar test is used to test for the presence of bias. A statistically significant McNemar test (generally if P < 0.05) shows that there is evidence of a systematic difference between the proportion of `positive' responses from the two methods. If one method provides the `true values' (i.e. it is regarded as the gold standard method) the absence of a systematic difference implies that there is no bias. However, a non-significant result indicates only that there is no evidence of a systematic effect. A systematic effect may be present, but the power of the test may be inadequate to determine its presence. } \examples{ ## EXAMPLE 1: ## Kidney samples from 291 salmon were split with one half of the ## samples sent to each of two laboratories where an IFAT test ## was run on each sample. The following results were obtained: ## Lab 1 positive, lab 2 positive: 19 ## Lab 1 positive, lab 2 negative: 10 ## Lab 1 negative, lab 2 positive: 6 ## Lab 1 negative, lab 2 negative: 256 dat <- as.table(matrix(c(19,10,6,256), nrow = 2, byrow = TRUE)) colnames(dat) <- c("L1-pos","L1-neg") rownames(dat) <- c("L2-pos","L2-neg") epi.kappa(dat, method = "fleiss", alternative = "greater", conf.level = 0.95) ## The z test statistic is 11.53 (P < 0.01). We accept the alternative ## hypothesis that the kappa statistic is greater than zero. ## The proportion of agreements after chance has been excluded is ## 0.67 (95\% CI 0.56 to 0.79). We conclude that, on the basis of ## this sample, that there is substantial agreement between the two ## laboratories. ## EXAMPLE 2 (from Watson and Petrie 2010, page 1170): ## Silva et al. (2007) compared an early pregnancy enzyme-linked immunosorbent ## assay test for pregnancy associated glycoprotein on blood samples collected ## from lactating dairy cows at day 27 after artificial insemination with ## transrectal ultrasound (US) diagnosis of pregnancy at the same stage. ## The results were as follows: ## ELISA positive, US positive: 596 ## ELISA positive, US negative: 61 ## ELISA negative, US positive: 29 ## ELISA negative, Ul negative: 987 dat <- as.table(matrix(c(596,61,29,987), nrow = 2, byrow = TRUE)) colnames(dat) <- c("US-pos","US-neg") rownames(dat) <- c("ELISA-pos","ELISA-neg") epi.kappa(dat, method = "watson", alternative = "greater", conf.level = 0.95) ## The proportion of agreements after chance has been excluded is ## 0.89 (95\% CI 0.86 to 0.91). We conclude that that there is substantial ## agreement between the two pregnancy diagnostic methods. } \keyword{univar} epiR/man/epi.pooled.Rd0000644000176200001440000000420112601641614014247 0ustar liggesusers\name{epi.pooled} \alias{epi.pooled} \title{ Estimate herd test characteristics when pooled sampling is used } \description{ We may wish to designate a group of individuals (e.g. a herd) as being either diseased or non-diseased on the basis of pooled samples. This function estimates sensitivity and specificity of this testing regime at the group (or herd) level. } \usage{ epi.pooled(se, sp, P, m, r) } \arguments{ \item{se}{a vector of length one defining the sensitivity of the individual test used.} \item{sp}{a vector of length one defining the specificity of the individual test used.} \item{P}{scalar, defining the estimated true prevalence.} \item{m}{scalar, defining the number of individual samples to make up a pooled sample.} \item{r}{scalar, defining the number of pooled samples per group (or herd).} } \value{ A list containing the following: \item{HAPneg}{the apparent prevalence in a disease negative herd.} \item{HSe}{the estimated group (herd) level sensitivity.} \item{HSp}{the estimated group (herd) level specificity.} } \references{ Dohoo I, Martin W, Stryhn H (2003). Veterinary Epidemiologic Research. AVC Inc, Charlottetown, Prince Edward Island, Canada, pp. 115 - 117 . Christensen J, Gardner IA (2000). Herd-level interpretation of test results for epidemiologic studies of animal diseases. Preventive Veterinary Medicine 45: 83 - 106. } \examples{ ## We want to test dairy herds for Johne's disease using faecal culture ## which has a sensitivity and specificity of 0.647 and 0.981, respectively. ## Suppose we pool faecal samples from five cows together and use six pooled ## samples per herd. What is the herd level sensitivity and specificity ## based on this approach (assuming homogenous mixing)? epi.pooled(se = 0.647, sp = 0.981, P = 0.12, m = 5 , r = 6) ## Herd level sensitivity is 0.927, herd level specificity is 0.562. ## Sensitivity at the herd level is increased using the pooled sampling ## approach; herd level specificity is decreased. } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/epi.insthaz.Rd0000644000176200001440000000306212601641614014451 0ustar liggesusers\name{epi.insthaz} \alias{epi.insthaz} \title{Instantaneous hazard computed on the basis of a Kaplan-Meier survival function } \description{ Compute the instantaneous hazard on the basis of a Kaplan-Meier survival function. } \usage{ epi.insthaz(survfit.obj, conf.level = 0.95) } \arguments{ \item{survfit.obj}{a \code{survfit} object, computed using the \code{survival} package.} \item{conf.level}{magnitude of the returned confidence interval. Must be a single number between 0 and 1.} } \details{ Computes the instantaneous hazard of failure, equivalent to the proportion of the population failing per unit time. } \value{ A data frame with three elements: \code{time} the observed failure times, \code{est} the proportion of the population failing per unit time, \code{lower} the lower bounds of the confidence interval, and \code{upper} the upper bounds of the confidence interval. } \references{ Venables W, Ripley B (2002). Modern Applied Statistics with S, fourth edition. Springer, New York, pp. 353 - 385. Singer J, Willett J (2003). Applied Longitudinal Data Analysis Modeling Change and Event Occurrence. Oxford University Press, London, pp. 348. } \examples{ require(survival) ovarian.km <- survfit(Surv(futime,fustat) ~ 1, data = ovarian) ovarian.haz <- epi.insthaz(ovarian.km, conf.level = 0.95) plot(ovarian.haz$time, ovarian.haz$est, xlab = "Days", ylab = "Instantaneous hazard", type = "b", pch = 16) } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/epi.RtoBUGS.Rd0000644000176200001440000000163212601641614014217 0ustar liggesusers\name{epi.RtoBUGS} \alias{epi.RtoBUGS} \title{R to WinBUGS data conversion} \description{ Writes data from an R list to a text file in WinBUGS-compatible format. } \usage{ epi.RtoBUGS(datalist, towhere) } \arguments{ \item{datalist}{a list (normally, with named elements) which may include scalars, vectors, matrices, arrays of any number of dimensions, and data frames.} \item{towhere}{a character string identifying where the file is to be written.} } \details{ Does not check to ensure that only numbers are being produced. In particular, factor labels in a data frame will be output to the file, which normally won't be desired. } \references{ Best, NG. WinBUGS 1.3.1 Short Course, Brisbane, November 2000. } \author{ Terry Elrod (Terry.Elrod@UAlberta.ca), Kenneth Rice. } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/epi.dsl.Rd0000644000176200001440000001071312601641614013554 0ustar liggesusers\name{epi.dsl} \alias{epi.dsl} \title{Mixed-effects meta-analysis of binary outcomes using the DerSimonian and Laird method } \description{ Computes individual study odds or risk ratios for binary outcome data. Computes the summary odds or risk ratio using the DerSimonian and Laird method. Performs a test of heterogeneity among trials. Performs a test for the overall difference between groups (that is, after pooling the studies, do treated groups differ significantly from controls?). } \usage{ epi.dsl(ev.trt, n.trt, ev.ctrl, n.ctrl, names, method = "odds.ratio", alternative = c("two.sided", "less", "greater"), conf.level = 0.95) } \arguments{ \item{ev.trt}{observed number of events in the treatment group.} \item{n.trt}{number in the treatment group.} \item{ev.ctrl}{observed number of events in the control group.} \item{n.ctrl}{number in the control group.} \item{names}{character string identifying each trial.} \item{method}{a character string indicating the method to be used. Options are \code{odds.ratio} or \code{risk.ratio}.} \item{alternative}{a character string specifying the alternative hypothesis, must be one of \code{two.sided}, \code{greater} or \code{less}. } \item{conf.level}{magnitude of the returned confidence interval. Must be a single number between 0 and 1.} } \details{ \code{alternative = "greater"} tests the hypothesis that the DerSimonian and Laird summary measure of association is greater than 1. } \value{ A list containing the following: \item{OR}{the odds ratio for each trial, the standard error of the odds ratio for each trial, and the lower and upper bounds of the confidence interval of the odds ratio for each trial.} \item{RR}{the risk ratio for each trial, the standard error of the risk ratio for each trial, and the lower and upper bounds of the confidence interval of the risk ratio for each trial.} \item{OR.summary}{the DerSimonian and Laird summary odds ratio, the standard error of the DerSimonian and Laird summary odds ratio, the lower and upper bounds of the confidence interval of the DerSimonian and Laird summary odds ratio.} \item{RR.summary}{the DerSimonian and Laird summary risk ratio, the standard error of the DerSimonian and Laird summary risk ratio, the lower and upper bounds of the confidence interval of the DerSimonian and Laird summary risk ratio.} \item{weights}{the inverse variance and DerSimonian and Laird weights for each trial.} \item{heterogeneity}{a vector containing \code{Q} the heterogeneity test statistic, \code{df} the degrees of freedom and its associated P-value.} \item{Hsq}{the relative excess of the heterogeneity test statistic \code{Q} over the degrees of freedom \code{df}.} \item{Isq}{the percentage of total variation in study estimates that is due to heterogeneity rather than chance.} \item{tau.sq}{the variance of the treatment effect among trials.} \item{effect}{a vector containing \code{z} the test statistic for overall treatment effect and its associated P-value.} } \references{ Deeks JJ, Altman DG, Bradburn MJ (2001). Statistical methods for examining heterogeneity and combining results from several studies in meta-analysis. In: Egger M, Davey Smith G, Altman D (eds). Systematic Review in Health Care Meta-Analysis in Context. British Medical Journal, London, 2001, pp. 291 - 299. DerSimonian R, Laird N (1986). Meta-analysis in clinical trials. Controlled Clinical Trials 7: 177 - 188. Higgins J, Thompson S (2002). Quantifying heterogeneity in a meta-analysis. Statistics in Medicine 21: 1539 - 1558. } \note{ Under the random-effects model, the assumption of a common treatment effect is relaxed, and the effect sizes are assumed to have a normal distribution with variance \code{tau.sq}. Using this method, the DerSimonian and Laird weights are used to compute the pooled odds ratio. The function checks each strata for cells with zero frequencies. If a zero frequency is found in any cell, 0.5 is added to all cells within the strata. } \seealso{ \code{\link{epi.iv}}, \code{\link{epi.mh}}, \code{\link{epi.smd}} } \examples{ data(epi.epidural) epi.dsl(ev.trt = epi.epidural$ev.trt, n.trt = epi.epidural$n.trt, ev.ctrl = epi.epidural$ev.ctrl, n.ctrl = epi.epidural$n.ctrl, names = as.character(epi.epidural$trial), method = "odds.ratio", alternative = "two.sided", conf.level = 0.95) } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/epi.epidural.Rd0000644000176200001440000000252712601641614014603 0ustar liggesusers\name{epi.epidural} \docType{data} \alias{epi.epidural} \title{Rates of use of epidural anaesthesia in trials of caregiver support} \description{ This data set provides results of six trials investigating rates of use of epidural anaesthesia during childbirth. Each trial is made up of a group where a caregiver (midwife, nurse) provided support intervention and a group where standard care was provided. The objective was to determine if there were higher rates of epidural use when a caregiver was present at birth. } \usage{data(epi.epidural)} \format{ A data frame with 6 observations on the following 5 variables. \describe{ \item{trial}{the name and year of the trial.} \item{ev.trt}{number of births in the caregiver group where an epidural was used.} \item{n.trt}{number of births in the caregiver group.} \item{ev.ctrl}{number of births in the standard care group where an epidural was used.} \item{n.ctrl}{number of births in the standard care group.} } } \references{ Deeks JJ, Altman DG, Bradburn MJ (2001). Statistical methods for examining heterogeneity and combining results from several studies in meta-analysis. In: Egger M, Davey Smith G, Altman D (eds). Systematic Review in Health Care Meta-Analysis in Context. British Medical Journal, London, pp. 291 - 299. } \keyword{datasets} epiR/man/epi.detectsize.Rd0000644000176200001440000001237512601641614015143 0ustar liggesusers\name{epi.detectsize} \alias{epi.detectsize} \title{ Sample size to detect disease } \description{ Estimates the required sample size to detect disease. The method adjusts sample size estimates on the basis of test sensitivity and specificity and can account for series and parallel test interpretation. } \usage{ epi.detectsize(N, prev, se, sp, interpretation = "series", covar = c(0,0), conf.level = 0.95, finite.correction = TRUE) } \arguments{ \item{N}{a vector of length one or two defining the size of the population. The first element of the vector defines the number of clusters, the second element defining the mean number of sampling units per cluster.} \item{prev}{a vector of length one or two defining the prevalence of disease in the population. The first element of the vector defines the between-cluster prevalence, the second element defines the within-cluster prevalence.} \item{se}{a vector of length one or two defining the sensitivity of the test(s) used.} \item{sp}{a vector of length one or two defining the specificity of the test(s) used.} \item{interpretation}{a character string indicating how test results should be interpreted. Options are \code{series} or \code{parallel}.} \item{covar}{a vector of length two defining the covariance between test results for disease positive and disease negative groups. The first element of the vector is the covariance between test results for disease positive subjects. The second element of the vector is the covariance between test results for disease negative subjects. Use \code{covar = c(0,0)} (the default) if these values are not known.} \item{conf.level}{scalar, defining the level of confidence in the computed result.} \item{finite.correction}{logial, should a finite correction factor be applied?} } \value{ A list containing the following: \item{performance}{The sensitivity and specificity of the testing strategy.} \item{sample.size}{The number of clusters, units, and total number of units to be sampled.} } \references{ Dohoo I, Martin W, Stryhn H (2003). Veterinary Epidemiologic Research. AVC Inc, Charlottetown, Prince Edward Island, Canada, pp. 47 and pp 102 - 103. } \note{ The finite correction factor reduces the variance of the sample as the sample size approaches the population size. As a rule of thumb, set \code{finite.correction = TRUE} when the sample size is greater than 5\% of the population size. Define \code{se1} and \code{se2} as the sensitivity for the first and second test, \code{sp1} and \code{sp2} as the specificity for the first and second test, \code{p111} as the proportion of disease-positive subjects with a positive test result to both tests and \code{p000} as the proportion of disease-negative subjects with a negative test result to both tests. The covariance between test results for the disease-positive group is \code{p111 - se1 * se2}. The covariance between test results for the disease-negative group is \code{p000 - sp1 * sp2}. } \examples{ ## EXAMPLE 1: ## We would like to confirm the absence of disease in a single 1000-cow ## dairy herd. We expect the prevalence of disease in the herd to be 5\%. ## We intend to use a single test with a sensitivity of 0.90 and a ## specificity of 0.80. How many samples should we take to be 95\% certain ## that, if all tests are negative, the disease is not present? epi.detectsize(N = 1000, prev = 0.05, se = 0.90, sp = 0.80, interpretation = "series", covar = c(0,0), conf.level = 0.95, finite.correction = TRUE) ## We need to sample 59 cows. ## EXAMPLE 2: ## We would like to confirm the absence of disease in a study area. If the ## disease is present we expect the between-herd prevalence to be 8\% and the ## within-herd prevalence to be 5\%. We intend to use two tests: the first has ## a sensitivity and specificity of 0.90 and 0.80, respectively. The second ## has a sensitivity and specificity of 0.95 and 0.85, respectively. The two ## tests will be interpreted in parallel. How many herds and cows within herds ## should we sample to be 95\% certain that the disease is not present in the ## study area if all tests are negative? There area is comprised of ## approximately 5000 herds and the average number of cows per herd is 100. epi.detectsize(N = c(5000, 100), prev = c(0.08, 0.05), se = c(0.90, 0.95), sp = c(0.80, 0.85), interpretation = "parallel", covar = c(0,0), conf.level = 0.95, finite.correction = TRUE) ## We need to sample 31 cows from 38 herds (a total of 1178 samples). ## The sensitivity of this testing regime is 99\%. The specificity of this ## testing regime is 68\%. ## EXAMPLE 3: ## You want to document the absence of Mycoplasma from a 200-sow pig herd. ## Based on your experience and the literature, a minimum of 20\% of sows ## would have seroconverted if Mycoplasma were present in the herd. How many ## sows do you need to sample? epi.detectsize(N = 200, prev = 0.20, se = 1.00, sp = 1.00, conf.level = 0.95, finite.correction = TRUE) ## If you test 12 sows and all test negative you can state that you are 95\% ## confident that the prevalence rate of Mycoplasma in the herd is less than ## 20\%. } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line