epiR/0000755000176200001440000000000014166031357011157 5ustar liggesusersepiR/NAMESPACE0000644000176200001440000000710514166006603012375 0ustar liggesusers# Export all names. Internal (undocumented functions are not included) export(epi.2by2) export(epi.about) export(epi.asc) export(epi.betabuster) export(epi.blcm.paras) export(epi.bohning) export(epi.ccc) export(epi.conf) export(epi.convgrid) export(epi.cp) export(epi.cpresids) export(epi.descriptives) export(epi.dgamma) export(epi.directadj) export(epi.dms) export(epi.dsl) export(epi.edr) export(epi.empbayes) export(epi.herdtest) export(epi.indirectadj) export(epi.insthaz) export(epi.interaction) export(epi.iv) export(epi.kappa) export(epi.ltd) export(epi.mh) export(epi.nomogram) export(epi.occc) export(epi.offset) export(epi.pooled) export(epi.popsize) export(epi.prcc) export(epi.prev) export(epi.psi) export(epi.RtoBUGS) export(epi.smd) export(epi.smr) export(epi.sscc) export(epi.ssclus1estb) export(epi.ssclus1estc) export(epi.ssclus2estb) export(epi.ssclus2estc) export(epi.sscohortc) export(epi.sscohortt) export(epi.sscompb) export(epi.sscompc) export(epi.sscomps) export(epi.ssdetect) export(epi.ssdxsesp) export(epi.ssequb) export(epi.ssequc) export(epi.ssninfb) export(epi.ssninfc) export(epi.sssimpleestb) export(epi.sssimpleestc) export(epi.ssstrataestb) export(epi.ssstrataestc) export(epi.sssupb) export(epi.sssupc) export(epi.ssxsectn) export(epi.ssdxtest) export(epi.tests) # ------------------------------------------------------------------------------------ # Representative sampling - sample size probability of disease freedom: export(rsu.sspfree.rs) # Representative sampling - sample size surveillance system sensitivity: export(rsu.sssep.rs) export(rsu.sssep.rs2st) export(rsu.sssep.rsfreecalc) export(rsu.sssep.rspool) # Representative sampling - calculate surveillance system sensitivity: export(rsu.sep.rs) export(rsu.sep.rs2st) export(rsu.sep.rsmult) export(rsu.sep.rsfreecalc) export(rsu.sep.rspool) export(rsu.sep.rsvarse) # Representative sampling - calculate surveillance system specificity: export(rsu.spp.rs) # Representative sampling - probability of disease freedom: export(rsu.pfree.rs) export(rsu.pfree.equ) # ------------------------------------------------------------------------------------ # Risk-based sampling - sample size surveillance system sensitivity: export(rsu.sssep.rbsrg) export(rsu.sssep.rbmrg) export(rsu.sssep.rb2st1rf) export(rsu.sssep.rb2st2rf) # Risk-based sampling - calculate surveillance system sensitivity: export(rsu.sep.rb) export(rsu.sep.rb1rf) export(rsu.sep.rb2rf) export(rsu.sep.rbvarse) export(rsu.sep.rb2st) # Census: export(rsu.sep.cens) # Passive: export(rsu.sep.pass) # Miscellaneous functions: export(rsu.adjrisk) export(rsu.dxtest) export(rsu.epinf) export(rsu.pstar) export(rsu.sep) S3method(print, epi.2by2) S3method(summary, epi.2by2) S3method(print, epi.tests) S3method(summary, epi.tests) S3method(print, epi.occc) S3method(summary, epi.occc) # Import all packages listed as Imports or Depends import(survival) import(pander) import(sf) import(lubridate) importFrom(BiasedUrn, dFNCHypergeo) importFrom("graphics", "hist") importFrom("methods", "slot") importFrom("stats", "xtabs", "anova", "aov", "addmargins", "complete.cases", "chisq.test", "cor", "cov", "fisher.test", "mantelhaen.test", "model.matrix", "pbeta", "pbinom", "pchisq", "phyper", "pnorm", "pt", "qbeta", "qbinom", "qchisq", "qf", "qgamma", "qnorm", "qpois", "qt", "quantile", "rpois", "sd", "uniroot", "var", "vcov", "confint.default") importFrom("utils", "packageDescription", "write.table")epiR/data/0000755000176200001440000000000014165760076012076 5ustar liggesusersepiR/data/epi.incin.RData0000644000176200001440000001207613117711244014662 0ustar liggesusers g#g*%M`&ttP:w+Zݒ:{l$K2KZrӒai9z0uFUw{ޯΙч{YA <N {teRa_ˍh|/<'H\t}6صwR>ث)pksw+b-bV{ lo لCsy]O:k'Ho6ېc|%HEn=3-;Z^̝uخ܌w>H:7Ŏ\xMgk|lke=<_??H\8;%oetuy7{kX+W7<7 p/͋1ج}b=?51Ogq`Bc?wʕaGtR#Q=O/X~qc]Al0&^X+wqt枩|_v5/qZdi=]d?|W~PE=?!ՈiQY7j gf7)eUZm@z8Hz?_wOqϬ\GZ$4<</Liaw_|ƻgqϗurb詵yVnKTe/~ɽ*,_ 5I\:*½]zԚZgepE75L9oO2[<"86mkhn(py_|)w%9D~&'Cf5ͩKË%q-\CKrޠ9O'+1 ?u\9K创|"-+wC˯e3~"A5z+#2 ;<C.Κ+ψzXP[ĚI8^>8?$i4|\AWG=+"i9aXjN4?8:{M]=iїGỦN4f̫"CmXU>M⾩<'埁XTazԯ-bx3gݲm5kiVFS lW̎J4kagu5.5ړeyq:;?xF|̊DkF͒9}yLLf;U΋+taӫ h1i]Tr.b>LIPuN-MF~ qԝIgԡX}}Syخ9O,ȧkC[p_ԗ-37m}|m뗔\'Ũ%\M1WἊm|ݢ3FXsIm\ ZT:RG<+牖`+zp>)VkWqz|¾F|nt^_-̛4fqwpSl7eƬ}ܡ-Yӟ|-Vsbn`W7׳!}9.1gG$̼>3q2:Si׀C뫌- ԣmظGܧQ9m.XzQպ6g/Y3^S-aբw<33O[0G*.;kc^fAj;~W)\S{wLIWik\4yhq/yo -JӖQIa\/h^iL37ն]ë8QAMQ7+2w4%sm;+tVқKp\l 7a_0}N ,[V~?q} {A,V |i<3k7 ٱnK,;֯ OI6y(;-N VnGwM}o} T;_3d \Uœgw=v7A8jf_oa( n=~w7jO%__`Ŕs݈faA L==}|(\ϝ xc$WYyv>#?w9\VQ †WM|M60N|\}ދ֛`p{x<8n )]Į|7w>v$ .Ŝqv?aWV׊Zn0IE Aa턜&fCrwj}|S|~eա'ύyGwjp3EεHyD{1}.;A8+Vz,3?wasJw9{}{v6&n1&"jyK1G 8 oocyz}^49TβYoybӑYgw/aqe+0x.fWOn3x6Ow#pȈaƾ8є\MF&wSK'?3+jyԾyTQ}M_G:(@=bLҒŭMϏk+QwkԿ8Q\srX9u]{I|;%ep)4F>_GקME7Ο>Iy]joո29|¸xq3 IXn(i6'g@;Ĝ@=ܧ Ӫ~/*.ЁSr`\ҮWhDWΰ772{?_!'8>">{ >{V3~Kܳyc #t;gv")ybsj7x8Ի|͎]}g#%9GQ);2P]f$ս[z.JuWE6qNRlr=٪u 8{C޷ k5gkS6Jx#r-MRCгT_LY6b]Ғm4e,aMZ:wѓUu.'ti;Ŝaw`ZDiM0r vOx9y^__ҏV1sk/]E_ Q;SsxL{r2Cwql色A4cﰩj5DkIᲞO˶8(ۧ/EMUV/\L;\Yᰂe7c-^Q}hH2%rS{nTѨ~Um;t`-Qginf*YqwꝂs8o_{t mASx3۔ZտfŎZ{7^ >7{y]~Wp~W>4}/:7C 5k7b*;m{i]l'~fTzoifٻ\Mw;$ESV`ֹb4~e~t8C9pTwԡe<0{lʷ==Sr6Y\XgWʚ玬j ֻgA[WŸ圵agk}t^%\(G=Oa r:[|b#'󬆯Czz'v^kIcT>~Vgo5g8twpS?n$xOuO_tkz7Y^c l¯F`G3E|( z'I-k]=+>oZЦi:K.U}f?EؕW::Ltֺ,Wkj^kHym\,}UtyVǢ<)NxdͬT܏)}ihEmo~sH^~~/džz^rоI6qmF䜓bˎ+8ӯsh's|K;.p.{[5,SxR#=5~˽Sww&~~g(ܔz4qMfcLz?cl:gva{>OzNUϷ̉Uwc&#C>|>s9|>9w[^qqvכaP.׊WVzcr+WY\=wLo9Wx:tDӡC.epiR/data/epi.SClip.RData0000644000176200001440000000241613117711244014571 0ustar liggesusers eVMlE~k{כҦ?mH4ik7M8i+%p;W^FKHTHp8 !TN 8 NB o7M8|ޝ7{73o:cAG3| 2>D݋H1 "1؏A<FD<88D"A@CC5ʽ2!"1]0k>p,)'=vfY-_:lqjG2*[oa|5#n--3:Z!Mj7Sjō &\;w{s8Zh{{p:JQ0FOK̦@ڔ& v. |gt_GCgs]>CƵM\[H<;JT*@E0+}C1RE} %?iz$ɟi*\epiR/man/0000755000176200001440000000000014166006636011734 5ustar liggesusersepiR/man/epi.pooled.Rd0000644000176200001440000000422514164036762014265 0ustar liggesusers\name{epi.pooled} \alias{epi.pooled} \title{ Estimate herd test characteristics when pooled sampling is used } \description{ We may wish to designate a group of individuals (e.g., a herd) as being either diseased or non-diseased on the basis of pooled samples. This function estimates sensitivity and specificity of this testing regime at the group (or herd) level. } \usage{ epi.pooled(se, sp, P, m, r) } \arguments{ \item{se}{a vector of length one defining the sensitivity of the individual test used.} \item{sp}{a vector of length one defining the specificity of the individual test used.} \item{P}{scalar, defining the estimated true prevalence.} \item{m}{scalar, defining the number of individual samples to make up a pooled sample.} \item{r}{scalar, defining the number of pooled samples per group (or herd).} } \value{ A list containing the following: \item{HAPneg}{the apparent prevalence in a disease negative herd.} \item{HSe}{the estimated group (herd) level sensitivity.} \item{HSp}{the estimated group (herd) level specificity.} } \references{ Dohoo I, Martin W, Stryhn H (2003). Veterinary Epidemiologic Research. AVC Inc, Charlottetown, Prince Edward Island, Canada, pp. 115 - 117 . Christensen J, Gardner IA (2000). Herd-level interpretation of test results for epidemiologic studies of animal diseases. Preventive Veterinary Medicine 45: 83 - 106. } \examples{ ## EXAMPLE 1: ## We want to test dairy herds for Johne's disease using faecal culture ## which has a sensitivity and specificity of 0.647 and 0.981, respectively. ## Suppose we pool faecal samples from five cows together and collect six ## pooled samples per herd. What is the herd level sensitivity and specificity ## based on this approach (assuming homogenous mixing)? epi.pooled(se = 0.647, sp = 0.981, P = 0.12, m = 5 , r = 6) ## Herd level sensitivity is 0.927, herd level specificity is 0.562. ## Sensitivity at the herd level is increased using the pooled sampling ## approach. Herd level specificity is decreased. } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/epi.sscomps.Rd0000644000176200001440000001031414164036763014467 0ustar liggesusers\name{epi.sscomps} \alias{epi.sscomps} \title{ Sample size, power and minimum detectable hazard when comparing time to event } \description{ Sample size, power and minimum detectable hazard when comparing time to event. } \usage{ epi.sscomps(treat, control, n, power, r = 1, design = 1, sided.test = 2, nfractional = FALSE, conf.level = 0.95) } \arguments{ \item{treat}{the expected value for the treatment group (see below).} \item{control}{the expected value for the control group (see below).} \item{n}{scalar, defining the total number of subjects in the study (i.e., the number in the treatment and control group).} \item{power}{scalar, the required study power.} \item{r}{scalar, the number in the treatment group divided by the number in the control group. This argument is ignored when \code{method = "proportions"}.} \item{design}{scalar, the estimated design effect.} \item{sided.test}{use a one- or two-sided test? Use a two-sided test if you wish to evaluate whether or not the outcome hazard in the exposed (treatment) group is greater than or less than the outcome hazard in the unexposed (control) group. Use a one-sided test to evaluate whether or not the outcome hazard in the exposed (treatment) group is greater than the outcome hazard in the unexposed (control) group.} \item{nfractional}{logical, return fractional sample size.} \item{conf.level}{scalar, defining the level of confidence in the computed result.} } \details{ The argument \code{treat} is the proportion of treated subjects that will have not experienced the event of interest at the end of the study period and \code{control} is the proportion of control subjects that will have not experienced the event of interest at the end of the study period. See Therneau and Grambsch pp 61 - 65. } \value{ A list containing one or more of the following: \item{n.crude}{the crude estimated total number of events required for the specified level of confidence and power.} \item{n.total}{the total estimated number of events required for the specified level of confidence and power, respecting the requirement for \code{r} times as many events in the treatment group compared with the control group.} \item{hazard}{the minimum detectable hazard ratio >1 and the maximum detectable hazard ratio <1.} \item{power}{the power of the study given the number of events, the expected hazard ratio and level of confidence.} } \references{ Therneau TM, Grambsch PM (2000). Modelling Survival Data - Extending the Cox Model. Springer, London, pp. 61 - 65. Woodward M (2014). Epidemiology Study Design and Data Analysis. Chapman & Hall/CRC, New York, pp. 295 - 329. } \note{ The power of a study is its ability to demonstrate the presence of an association, given that an association actually exists. See the documentation for \code{\link{epi.sscohortc}} for an example using the \code{design} facility implemented in this function. } \examples{ ## EXAMPLE 1 (from Therneau and Grambsch 2000 p. 63): ## The 5-year survival probability of patients receiving a standard treatment ## is 0.30 and we anticipate that a new treatment will increase it to 0.45. ## Assume that a study will use a two-sided test at the 0.05 level with 0.90 ## power to detect this difference. How many events are required? epi.sscomps(treat = 0.45, control = 0.30, n = NA, power = 0.90, r = 1, design = 1, sided.test = 2, nfractional = FALSE, conf.level = 0.95) ## A total of 250 events are required. Assuming one event per individual, ## assign 125 individuals to the treatment group and 125 to the control group. ## EXAMPLE 2 (from Therneau and Grambsch 2000 p. 63): ## What is the minimum detectable hazard in a study involving 500 subjects where ## the treatment to control ratio is 1:1, assuming a power of 0.90 and a ## 2-sided test at the 0.05 level? epi.sscomps(treat = NA, control = NA, n = 500, power = 0.90, r = 1, design = 1, sided.test = 2, nfractional = FALSE, conf.level = 0.95) ## Assuming treatment increases time to event (compared with controls), the ## minimum detectable hazard of a study involving 500 subjects (250 in the ## treatment group and 250 in the controls) is 1.33. } \keyword{univar} epiR/man/epi.directadj.Rd0000644000176200001440000002223214164036762014732 0ustar liggesusers\name{epi.directadj} \alias{epi.directadj} \title{Directly adjusted incidence rate estimates} \description{ Compute directly adjusted incidence rate estimates.} \usage{ epi.directadj(obs, tar, std, units = 1, conf.level = 0.95) } \arguments{ \item{obs}{a matrix representing the observed number of events. Rows represent strata (e.g., region); columns represent the variables to be adjusted for (e.g., age class, gender). The sum of each row will equal the total number of events for each stratum. The rows of the \code{obs} matrix must be named with the appropriate strata names and the columns of \code{obs} must be named with the appropriate level identifiers for each explanatory variable. See the example, below.} \item{tar}{a matrix representing population time at risk. Rows represent strata (e.g., region); columns represent the variables to be adjusted for (e.g., age class, gender). The sum of each row will equal the total population time at risk for each stratum. The rows of the \code{pop} matrix must be named with the appropriate strata names and the columns of \code{pop} must be named with the appropriate level identifiers for each explanatory variable. See the example, below.} \item{std}{a matrix representing the standard population size for the different levels of the covariate to be adjusted for. The columns of \code{std} must be named with the appropriate level identifiers for each explanatory variable.} \item{units}{multiplier for the incidence rate estimates.} \item{conf.level}{magnitude of the returned confidence interval. Must be a single number between 0 and 1.} } \details{ This function returns unadjusted (crude) and directly adjusted incidence rate estimates for each of the specified population strata. The term `covariate' is used here to refer to the factors we want to control (i.e., adjust) for when calculating the directly adjusted incidence rate estimates. When the outcome of interest is rare, the confidence intervals for the adjusted incidence rates returned by this function (based on Fay and Feuer, 1997) will be appropriate for incidence risk data. In this situation the argument \code{tar} is assumed to represent the size of the population at risk (instead of population time at risk). Example 3 (below) provides an approach if you are working with incidence risk data and the outcome of interest is not rare. } \value{ A list containing the following: \item{crude}{the crude incidence rate estimates for each stratum-covariate combination.} \item{crude.strata}{the crude incidence rate estimates for each stratum.} \item{adj.strata}{the directly adjusted incidence rate estimates for each stratum.} } \references{ Fay M, Feuer E (1997). Confidence intervals for directly standardized rates: A method based on the gamma distribution. Statistics in Medicine 16: 791 - 801. Fleiss JL (1981). Statistical Methods for Rates and Proportions, Wiley, New York, USA, pp. 240. Frome E, Checkoway H (1985). Use of Poisson regression models in estimating incidence rates and ratios. American Journal of Epidemiology 121: 309 - 323. Haneuse S, Rothman KJ. Stratification and Standardization. In: Lash TL, VanderWeele TJ, Haneuse S, Rothman KJ (2021). Modern Epidemiology. Lippincott - Raven Philadelphia, USA, pp. 415 - 445. Thrusfield M (2007). Veterinary Epidemiology, Blackwell Publishing, London, UK, pp. 63 - 64. Wilcosky T, Chambless L (1985). A comparison of direct adjustment and regression adjustment of epidemiologic measures. Journal of Chronic Diseases 38: 849 - 956. } \author{ Thanks to Karl Ove Hufthammer for helpful suggestions to improve the execution and documentation of this function. } \seealso{ \code{\link{epi.indirectadj}} } \examples{ ## EXAMPLE 1 (from Thrusfield 2007 pp. 63 - 64): ## A study was conducted to estimate the seroprevalence of leptospirosis in ## dogs in Glasgow and Edinburgh, Scotland. Data frame dat.df lists counts ## of leptospirosis cases and the number of dog years at risk for male and ## female dogs: dat.df01 <- data.frame(obs = c(15,46,53,16), tar = c(48,212,180,71), sex = c("M","F","M","F"), city = c("ED","ED","GL","GL")) obs01 <- matrix(dat.df01$obs, nrow = 2, byrow = TRUE, dimnames = list(c("ED","GL"), c("M","F"))) tar01 <- matrix(dat.df01$tar, nrow = 2, byrow = TRUE, dimnames = list(c("ED","GL"), c("M","F"))) ## Create a standard population with equal numbers of male and female dogs: std01 <- matrix(data = c(250,250), nrow = 1, byrow = TRUE, dimnames = list("", c("M","F"))) ## Directly adjusted incidence rates: epi.directadj(obs01, tar01, std01, units = 1, conf.level = 0.95) ## $crude ## strata cov obs tar est lower upper ## ED M 15 48 0.3125000 0.1749039 0.5154212 ## GL M 53 180 0.2944444 0.2205591 0.3851406 ## ED F 46 212 0.2169811 0.1588575 0.2894224 ## GL F 16 71 0.2253521 0.1288082 0.3659577 ## $crude.strata ## strata obs tar est lower upper ## ED 61 260 0.2346154 0.1794622 0.3013733 ## GL 69 251 0.2749004 0.2138889 0.3479040 ## $adj.strata ## strata obs tar est lower upper ## ED 61 260 0.2647406 0.1866047 0.3692766 ## GL 69 251 0.2598983 0.1964162 0.3406224 ## The adjusted incidence rate of leptospirosis in Glasgow dogs is 26 (95\% ## CI 20 to 34) cases per 100 dog-years at risk. The confounding effect of ## gender has been removed by the adjusted incidence rate estimates. ## EXAMPLE 2: ## Here we provide a more flexible approach for calculating ## adjusted incidence rate estimates using Poisson regression. See Frome and ## Checkoway (1985) for details. dat.glm02 <- glm(obs ~ city, offset = log(tar), family = poisson, data = dat.df01) summary(dat.glm02) ## To obtain adjusted incidence rate estimates, use the predict method on a ## new data set with the time at risk (tar) variable set to 1 (which means ## log(tar) = 0). This will return the predicted number of cases per one unit ## of individual time, i.e., the incidence rate. dat.pred02 <- predict(object = dat.glm02, newdata = data.frame(city = c("ED","GL"), tar = c(1,1)), type = "link", se = TRUE) conf.level <- 0.95 critval <- qnorm(p = 1 - ((1 - conf.level) / 2), mean = 0, sd = 1) est <- dat.glm02$family$linkinv(dat.pred02$fit) lower <- dat.glm02$family$linkinv(dat.pred02$fit - (critval * dat.pred02$se.fit)) upper <- dat.glm02$family$linkinv(dat.pred02$fit + (critval * dat.pred02$se.fit)) round(x = data.frame(est, lower, upper), digits = 3) ## est lower upper ## 0.235 0.183 0.302 ## 0.275 0.217 0.348 ## Results identical to the crude incidence rate estimates from epi.directadj. ## EXAMPLE 3: ## Now adjust for the effect of gender and city and report the adjusted ## incidence rate estimates for each city: dat.glm03 <- glm(obs ~ city + sex, offset = log(tar), family = poisson, data = dat.df01) dat.pred03 <- predict(object = dat.glm03, newdata = data.frame(sex = c("F","F"), city = c("ED","GL"), tar = c(1,1)), type = "link", se.fit = TRUE) conf.level <- 0.95 critval <- qnorm(p = 1 - ((1 - conf.level) / 2), mean = 0, sd = 1) est <- dat.glm03$family$linkinv(dat.pred03$fit) lower <- dat.glm03$family$linkinv(dat.pred03$fit - (critval * dat.pred03$se.fit)) upper <- dat.glm03$family$linkinv(dat.pred03$fit + (critval * dat.pred03$se.fit)) round(x = data.frame(est, lower, upper), digits = 3) ## est lower upper ## 0.220 0.168 0.287 ## 0.217 0.146 0.323 ## Using Poisson regression the gender adjusted incidence rate of leptospirosis ## in Glasgow dogs was 22 (95\% CI 15 to 32) cases per 100 dog-years at risk. ## These results won't be the same as those using direct adjustment because ## for direct adjustment we use a contrived standard population. ## EXAMPLE 4 --- Logistic regression to return adjusted incidence risk ## estimates: ## Say, for argument's sake, that we are now working with incidence risk data. ## Here we'll re-label the variable 'tar' (time at risk) as 'pop' ## (population size). We adjust for the effect of gender and city and ## report the adjusted incidence risk of canine leptospirosis estimates for ## each city: dat.df01$pop <- dat.df01$tar dat.glm04 <- glm(cbind(obs, pop - obs) ~ city + sex, family = "binomial", data = dat.df01) dat.pred04 <- predict(object = dat.glm04, newdata = data.frame(sex = c("F","F"), city = c("ED","GL")), type = "link", se.fit = TRUE) conf.level <- 0.95 critval <- qnorm(p = 1 - ((1 - conf.level) / 2), mean = 0, sd = 1) est <- dat.glm04$family$linkinv(dat.pred04$fit) lower <- dat.glm04$family$linkinv(dat.pred04$fit - (critval * dat.pred04$se.fit)) upper <- dat.glm04$family$linkinv(dat.pred04$fit + (critval * dat.pred04$se.fit)) round(x = data.frame(est, lower, upper), digits = 3) ## est lower upper ## 0.220 0.172 0.276 ## 0.217 0.150 0.304 ## The adjusted incidence risk of leptospirosis in Glasgow dogs is 22 (95\% ## CI 15 to 30) cases per 100 dogs at risk. } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{univar} \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/epi.about.Rd0000644000176200001440000003404114164036761014113 0ustar liggesusers\name{epi.about} \alias{epi.about} \title{The library epiR: summary information} \description{ Tools for the analysis of epidemiological data. } \usage{ epi.about() } \details{ More information about the \code{epiR} package can be found at \url{https://fvas.unimelb.edu.au/research/groups/veterinary-epidemiology-melbourne} and \url{https://www.ausvet.com.au/}. } \section{FUNCTIONS AND DATASETS}{ The following is a summary of the main functions and datasets in the \pkg{epiR} package. An alphabetical list of all functions and datasets is available by typing \code{library(help = epiR)}. For further information on any of these functions, type \code{help(name)} or \code{?name} where \code{name} is the name of the function or dataset. For details on how to use \pkg{epiR} for routine epidemiological work start R, type \code{help.start()} to open the help browser and navigate to \code{Packages > epiR > Vignettes}. } \section{CONTENTS:}{ The functions in \pkg{epiR} can be categorised into two main groups: tools for epidemioliological analysis and tools for the analysis of surveillance data. A summary of the package functions is as follows: } \section{I. EPIDEMIOLOGY}{ \subsection{1. Descriptive statistics}{ \tabular{ll}{ \code{\link{epi.conf}} \tab Confidence intervals. \cr \code{\link{epi.descriptives}} \tab Descriptive statistics. \cr } } \subsection{2. Measures of health and measures of association}{ \tabular{ll}{ \code{\link{epi.directadj}} \tab Directly adjusted incidence rate estimates. \cr \code{\link{epi.edr}} \tab Compute estimated dissemination ratios from outbreak event data. \cr \code{\link{epi.empbayes}} \tab Empirical Bayes estimates of observed event counts. \cr \code{\link{epi.indirectadj}} \tab Indirectly adjusted incidence risk estimates. \cr \code{\link{epi.insthaz}} \tab Instantaneous hazard estimates based on Kaplan-Meier survival estimates. \cr \code{\link{epi.2by2}} \tab Measures of association from data presented in a 2 by 2 table. \cr } } \subsection{3. Diagnostic tests}{ \tabular{ll}{ \code{\link{epi.betabuster}} \tab An R version of Wes Johnson and Chun-Lung Su's Betabuster. \cr \code{\link{epi.herdtest}} \tab Estimate the characteristics of diagnostic tests applied at the herd (group) level. \cr \code{\link{epi.nomogram}} \tab Compute the post-test probability of disease given characteristics of a diagnostic test. \cr \code{\link{epi.pooled}} \tab Estimate herd test characteristics when samples are pooled. \cr \code{\link{epi.prev}} \tab Compute the true prevalence of a disease in a population on the basis of an imperfect test. \cr \code{\link{epi.tests}} \tab Sensitivity, specificity and predictive value of a diagnostic test. \cr } } \subsection{4. Meta-analysis}{ \tabular{ll}{ \code{\link{epi.dsl}} \tab Mixed-effects meta-analysis of binary outcome data using the DerSimonian and Laird method. \cr \code{\link{epi.iv}} \tab Fixed-effects meta-analysis of binary outcome data using the inverse variance method. \cr \code{\link{epi.mh}} \tab Fixed-effects meta-analysis of binary outcome data using the Mantel-Haenszel method. \cr \code{\link{epi.smd}} \tab Fixed-effects meta-analysis of continuous outcome data using the standardised mean difference method. \cr } } \subsection{5. Regression analysis tools}{ \tabular{ll}{ \code{\link{epi.cp}} \tab Extract unique covariate patterns from a data set. \cr \code{\link{epi.cpresids}} \tab Compute covariate pattern residuals from a logistic regression model. \cr \code{\link{epi.interaction}} \tab Relative excess risk due to interaction in a case-control study. \cr } } \subsection{6. Data manipulation tools}{ \tabular{ll}{ \code{\link{epi.asc}} \tab Write matrix to an ASCII raster file. \cr \code{\link{epi.convgrid}} \tab Convert British National Grid georeferences to easting and northing coordinates. \cr \code{\link{epi.dms}} \tab Convert decimal degrees to degrees, minutes and seconds and vice versa. \cr \code{\link{epi.ltd}} \tab Calculate lactation to date and standard lactation (that is, 305 or 270 day) milk yields. \cr \code{\link{epi.offset}} \tab Create an offset vector based on a list suitable for WinBUGS. \cr \code{\link{epi.RtoBUGS}} \tab Write data from an R list to a text file in WinBUGS-compatible format. \cr } } \subsection{7. Sample size calculations}{ The naming convention for the sample size functions in \pkg{epiR} is: \code{epi.ss} (sample size) + an abbreviation to represent the sampling design (e.g., \code{simple}, \code{strata}, \code{clus1}, \code{clus2}) + an abbreviation of the objectives of the study (\code{est} when you want to estimate a population parameter or \code{comp} when you want to compare two groups) + a single letter defining the outcome variable type (\code{b} for binary, \code{c} for continuous and \code{s} for survival data). \tabular{ll}{ \code{\link{epi.sssimpleestb}} \tab Sample size to estimate a binary outcome using simple random sampling. \cr \code{\link{epi.sssimpleestc}} \tab Sample size to estimate a continuous outcome using simple random sampling. \cr \tab \cr \code{\link{epi.ssstrataestb}} \tab Sample size to estimate a binary outcome using stratified random sampling. \cr \code{\link{epi.ssstrataestc}} \tab Sample size to estimate a continuous outcome using stratified random sampling. \cr \tab \cr \code{\link{epi.ssclus1estb}} \tab Sample size to estimate a binary outcome using one-stage cluster sampling. \cr \code{\link{epi.ssclus1estc}} \tab Sample size to estimate a continuous outcome using one-stage cluster sampling. \cr \tab \cr \code{\link{epi.ssclus2estb}} \tab Sample size to estimate a binary outcome using two-stage cluster sampling. \cr \code{\link{epi.ssclus2estc}} \tab Sample size to estimate a continuous outcome using two-stage cluster sampling. \cr \tab \cr \code{\link{epi.ssxsectn}} \tab Sample size, power or detectable prevalence ratio for a cross-sectional study. \cr \code{\link{epi.sscohortc}} \tab Sample size, power or detectable risk ratio for a cohort study using count data. \cr \code{\link{epi.sscohortt}} \tab Sample size, power or detectable risk ratio for a cohort study using time at risk data. \cr \code{\link{epi.sscc}} \tab Sample size, power or detectable odds ratio for case-control studies. \cr \tab \cr \code{\link{epi.sscompb}} \tab Sample size, power and detectable risk ratio when comparing binary outcomes. \cr \code{\link{epi.sscompc}} \tab Sample size, power and detectable risk ratio when comparing continuous outcomes. \cr \code{\link{epi.sscomps}} \tab Sample size, power and detectable hazard when comparing time to event. \cr \tab \cr \code{\link{epi.ssequb}} \tab Sample size for a parallel equivalence trial, binary outcome. \cr \code{\link{epi.ssequc}} \tab Sample size for a parallel equivalence trial, continuous outcome. \cr \tab \cr \code{\link{epi.sssupb}} \tab Sample size for a parallel superiority trial, binary outcome. \cr \code{\link{epi.sssupc}} \tab Sample size for a parallel superiority trial, continuous outcome. \cr \tab \cr \code{\link{epi.ssninfb}} \tab Sample size for a non-inferiority trial, binary outcome. \cr \code{\link{epi.ssninfc}} \tab Sample size for a non-inferiority trial, continuous outcome. \cr \tab \cr \code{\link{epi.ssdetect}} \tab Sample size to detect an event. \cr \code{\link{epi.ssdxsesp}} \tab Sample size to estimate the sensitivity or specificity of a diagnostic test. \cr \code{\link{epi.ssdxtest}} \tab Sample size to validate a diagnostic test in the absence of a gold standard. \cr } } \subsection{8. Miscellaneous functions}{ \tabular{ll}{ \code{\link{epi.prcc}} \tab Compute partial rank correlation coefficients. \cr \code{\link{epi.psi}} \tab Compute proportional similarity indices. \cr } } \subsection{9. Data sets}{ \tabular{ll}{ \code{\link{epi.epidural}} \tab Rates of use of epidural anaesthesia in trials of caregiver support. \cr \code{\link{epi.incin}} \tab Laryngeal and lung cancer cases in Lancashire 1974 - 1983. \cr \code{\link{epi.SClip}} \tab Lip cancer in Scotland 1975 - 1980. \cr } } } \section{II. SURVEILLANCE}{ Below, SSe stands for surveillance system sensitivity. That is, the average probability that a surveillance system (as a whole) will return a positive surveillance outcome, given disease is present in the population at a level equal to or greater than a specified design prevalence. \subsection{1. Representative sampling --- sample size}{ \tabular{ll}{ \code{\link{rsu.sspfree.rs}} \tab Defined probability of disease freedom.\cr \code{\link{rsu.sssep.rs}} \tab SSe, perfect test specificity. \cr \code{\link{rsu.sssep.rs2st}} \tab SSe, two stage sampling. \cr \code{\link{rsu.sssep.rsfreecalc}} \tab SSe, imperfect test specificity. \cr \code{\link{rsu.sssep.rspool}} \tab SSe, pooled sampling. \cr } } \subsection{2. Representative sampling --- surveillance system sensitivity and specificity}{ \tabular{ll}{ \code{\link{rsu.sep.rs}} \tab SSe, representative sampling. \cr \code{\link{rsu.sep.rs2st}} \tab SSe, representative two-stage sampling. \cr \code{\link{rsu.sep.rsmult}} \tab SSe, representative multiple surveillance components. \cr \code{\link{rsu.sep.rsfreecalc}} \tab SSe, imperfect test specificity. \cr \code{\link{rsu.sep.rspool}} \tab SSe, representative pooled sampling. \cr \code{\link{rsu.sep.rsvarse}} \tab SSe, varying surveillance unit sensitivity. \cr \code{\link{rsu.spp.rs}} \tab Surveillance system specificity. \cr } } \subsection{3. Representative sampling --- probability of disease freedom}{ \tabular{ll}{ \code{\link{rsu.pfree.rs}} \tab Probability of disease freedom for a single or multiple time periods. \cr \code{\link{rsu.pfree.equ}} \tab Equilibrium probability of disease freedom. \cr } } \subsection{4. Risk-based sampling --- sample size}{ \tabular{ll}{ \code{\link{rsu.sssep.rbsrg}} \tab SSe, single sensitivity for each risk group. \cr \code{\link{rsu.sssep.rbmrg}} \tab SSe, multiple sensitivities within risk groups. \cr \code{\link{rsu.sssep.rb2st1rf}} \tab SSe, 2 stage sampling, 1 risk factor. \cr \code{\link{rsu.sssep.rb2st2rf}} \tab SSe, 2 stage sampling, 2 risk factors. \cr } } \subsection{5. Risk-based sampling --- surveillance system sensitivity and specificity}{ \tabular{ll}{ \code{\link{rsu.sep.rb}} \tab SSe, risk-based sampling. \cr \code{\link{rsu.sep.rb1rf}} \tab SSe, risk-based sampling, 1 risk factor. \cr \code{\link{rsu.sep.rb2rf}} \tab SSe, risk-based sampling, 2 risk factors. \cr \code{\link{rsu.sep.rbvarse}} \tab SSe, risk-based sampling, varying unit sensitivity. \cr \code{\link{rsu.sep.rb2st}} \tab SSe, 2-stage risk-based sampling. \cr } } \subsection{6. Risk-based sampling --- probability of disease freedom}{ \tabular{ll}{ \code{\link{rsu.pfree.equ}} \tab Equilibrium probability of disease freedom. \cr } } \subsection{7. Census sampling --- surveillance system sensitivity}{ \tabular{ll}{ \code{\link{rsu.sep.cens}} \tab SSe, census sampling. \cr } } \subsection{8. Passive surveillance --- surveillance system sensitivity}{ \tabular{ll}{ \code{\link{rsu.sep.pass}} \tab SSe, passive surveillance. \cr } } \subsection{9. Miscellaneous functions}{ \tabular{ll}{ \code{\link{rsu.adjrisk}} \tab Adjusted risk values. \cr \code{\link{rsu.dxtest}} \tab Series and parallel diagnostic test interpretation. \cr \code{\link{rsu.epinf}} \tab Effective probability of disease. \cr \code{\link{rsu.pstar}} \tab Design prevalence back calculation. \cr \code{\link{rsu.sep}} \tab Probability disease is less than specified design prevalence. \cr } } } \author{ Mark Stevenson (\email{mark.stevenson1@unimelb.edu.au}), Faculty of Veterinary and Agricultural Sciences, University of Melbourne, Parkville Victoria 3010, Australia. Evan Sergeant (\email{evansergeant@gmail.com}), Ausvet Pty Ltd, Level 1 34 Thynne St, Bruce ACT 2617, Australia. Simon Firestone, Faculty of Veterinary and Agricultural Sciences, University of Melbourne, Parkville Victoria 3010, Australia. Telmo Nunes, UISEE/DETSA, Faculdade de Medicina Veterinaria --- UTL, Rua Prof. Cid dos Santos, 1300 - 477 Lisboa Portugal. Javier Sanchez, Atlantic Veterinary College, University of Prince Edward Island, Charlottetown Prince Edward Island, C1A 4P3, Canada. Ron Thornton, Ministry for Primary Industries New Zealand, PO Box 2526 Wellington, New Zealand. } \keyword{univar} epiR/man/epi.ssclus2estb.Rd0000644000176200001440000001154214164044605015252 0ustar liggesusers\name{epi.ssclus2estb} \alias{epi.ssclus2estb} \title{Number of clusters to be sampled to estimate a binary outcome using two-stage cluster sampling } \description{ Number of clusters to be sampled to estimate a binary outcome using two-stage cluster sampling. } \usage{ epi.ssclus2estb(b, Py, epsilon, error = "relative", rho, nfractional = FALSE, conf.level = 0.95) } \arguments{ \item{b}{scalar integer or vector of length two, the number of individual listing units in each cluster to be sampled. See details, below.} \item{Py}{scalar number, an estimate of the unknown population proportion.} \item{epsilon}{scalar number, the maximum difference between the estimate and the unknown population value expressed in absolute or relative terms.} \item{error}{character string. Options are \code{absolute} for absolute error and \code{relative} for relative error.} \item{rho}{scalar number, the intracluster correlation.} \item{nfractional}{logical, return fractional sample size.} \item{conf.level}{scalar, defining the level of confidence in the computed result.} } \details{ In many situations it is common for sampling units to be aggregated into clusters. Typical examples include individuals within households, children within classes (within schools) and cows within herds. We use the term primary sampling unit (PSU) to refer to what gets sampled first (clusters) and secondary sampling unit (SSU) to refer to what gets sampled second (individual listing units within each cluster). In this documentation the terms primary sampling unit and cluster are used interchangeably. Similarly, the terms secondary sampling unit and individual listing units are used interchangeably. \code{b} as a scalar integer represents the total number of individual listing units from each cluster to be sampled. If \code{b} is a vector of length two the first element represents the mean number of individual listing units to be sampled from each cluster and the second element represents the standard deviation of the number of individual listing units to be sampled from each cluster. The methodology used in this function follows closely the approach described by Bennett et al. (1991). At least 25 PSUs are recommended for two-stage cluster sampling designs. If less than 25 PSUs are returned by the function a warning is issued. As a rule of thumb, around 30 PSUs will provide good estimates of the true population value with an acceptable level of precision (Binkin et al. 1992) when: (1) the true population value is between 10\% and 90\%; and (2) the desired absolute error is around 5\%. For a fixed number of individual listing units selected per cluster (e.g., 10 individuals per cluster or 30 individuals per cluster), collecting information on more than 30 clusters can improve the precision of the final population estimate, however, beyond around 60 clusters the improvement in precision is minimal. } \value{ A list containing the following: \item{n.psu}{the total number of primary sampling units (clusters) to be sampled for the specified level of confidence and relative error.} \item{n.ssu}{the total number of secondary sampling units to be sampled for the specified level of confidence and relative error.} \item{DEF}{the design effect.} \item{rho}{the intracluster correlation, as entered by the user.} } \references{ Bennett S, Woods T, Liyanage W, Smith D (1991). A simplified general method for cluster-sample surveys of health in developing countries. World Health Statistics Quarterly 44: 98 - 106. Binkin N, Sullivan K, Staehling N, Nieburg P (1992). Rapid nutrition surveys: How many clusters are enough? Disasters 16: 97 - 103. Machin D, Campbell MJ, Tan SB, Tan SH (2018). Sample Sizes for Clinical, Laboratory ad Epidemiological Studies, Fourth Edition. Wiley Blackwell, London, pp. 195 - 214. } \examples{ ## EXAMPLE 1 (from Bennett et al. 1991 p 102): ## We intend to conduct a cross-sectional study to determine the prevalence ## of disease X in a given country. The expected prevalence of disease is ## thought to be around 20\%. Previous studies report an intracluster ## correlation coefficient for this disease to be 0.02. Suppose that we want ## to be 95\% certain that our estimate of the prevalence of disease is ## within 5\% of the true population value and that we intend to sample 20 ## individuals per cluster. How many clusters should be sampled to meet ## the requirements of the study? epi.ssclus2estb(b = 20, Py = 0.20, epsilon = 0.05, error = "absolute", rho = 0.02, nfractional = FALSE, conf.level = 0.95) ## A total of 17 clusters need to be sampled to meet the specifications ## of this study. epi.ssclus2estb returns a warning message that the number of ## clusters is less than 25. } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/epi.dgamma.Rd0000644000176200001440000000340314156505274014226 0ustar liggesusers\name{epi.dgamma} \alias{epi.dgamma} \title{Estimate the precision of a [structured] heterogeneity term } \description{ Returns the precision of a [structured] heterogeneity term after one has specified the amount of variation a priori. } \usage{ epi.dgamma(rr, quantiles = c(0.05, 0.95)) } \arguments{ \item{rr}{the lower and upper limits of relative risk, estimated \emph{a priori}.} \item{quantiles}{a vector of length two defining the quantiles of the lower and upper relative risk estimates.} } \value{ Returns the precision (the inverse variance) of the heterogeneity term. } \references{ Best, NG. WinBUGS 1.3.1 Short Course, Brisbane Australia, November 2000. } \examples{ ## EXAMPLE 1: ## Suppose we are expecting the lower 5\% and upper 95\% confidence interval ## of relative risk in a data set to be 0.5 and 3.0, respectively. ## A prior estimate of the precision of the heterogeneity term would be: tau <- epi.dgamma(rr = c(0.5, 3.0), quantiles = c(0.05, 0.95)) tau ## The estimate of the precision of the heterogeneity term (tau) is 3.37. ## This can be re-expressed using the gamma distribution. We set the mean of the ## distribution as tau and specify a large variance (that is, we are not ## certain about tau). mean <- tau; var <- 1000 shape <- mean^2 / var inv.scale <- mean / var ## In WinBUGS the precision of the heterogeneity term is parameterised ## as tau ~ dgamma(shape, inv.scale). Plot the probability density function ## of tau: z <- seq(0.01, 10, by = 0.01) fz <- dgamma(z, shape = shape, scale = 1 / inv.scale) plot(x = z, y = fz, type = "l", ylab = "Probability density of tau") } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/rsu.sep.Rd0000644000176200001440000000653614164037175013634 0ustar liggesusers\name{rsu.sep} \alias{rsu.sep} \title{ Probability that the prevalence of disease in a population is less than or equal to a specified design prevalence } \description{ Calculates the probability that the prevalence of disease in a population is less than or equal to a specified design prevalence following return of a specified number of negative test results. } \usage{ rsu.sep(N, n, pstar, se.u) } \arguments{ \item{N}{scalar or vector, integer representing the population size.} \item{n}{scalar or vector, integer representing the number of units sampled.} \item{pstar}{scalar or vector of the same length as \code{n} representing the desired design prevalence.} \item{se.u}{scalar or vector of the same length as \code{n} representing the unit sensitivity.} } \value{ A vector of the estimated probability that the prevalence of disease in the population is less than or equal to the specified design prevalence. } \references{ MacDiarmid S (1988). Future options for brucellosis surveillance in New Zealand beef herds. New Zealand Veterinary Journal 36: 39 - 42. Martin S, Shoukri M, Thorburn M (1992). Evaluating the health status of herds based on tests applied to individuals. Preventive Veterinary Medicine 14: 33 - 43. } \examples{ ## EXAMPLE 1: ## The population size in a provincial area is 193,000. In a given two- ## week period 7764 individuals have been tested for COVID-19 using an ## approved PCR test which is believed to have a diagnostic sensitivity of ## 0.85. All individuals have returned a negative result. What is the ## probability that the prevalence of COVID-19 in this population is less ## than or equal to 100 cases per 100,000? rsu.sep(N = 193000, n = 7764, pstar = 100 / 100000, se.u = 0.85) ## If all of the 7764 individuals returned a negative test we can be more than ## 99\% confident that the prevalence of COVID-19 in the province is less ## than 100 per 100,000. ## EXAMPLE 2: ## What is the probability that the prevalence of COVID-19 is less than or ## equal to 10 cases per 100,000? rsu.sep(N = 193000, n = 7764, pstar = 10 / 100000, se.u = 0.85) ## If all of the 7764 individuals returned a negative test we can be 49\% ## confident that the prevalence of COVID-19 in the province is less ## than 10 per 100,000. ## EXAMPLE 3: ## In a population of 1000 individuals 474 have been tested for disease X ## using a test with diagnostic sensitivity of 0.95. If all individuals tested ## have returned a negative result what is the maximum prevalence expected ## if disease is actually present in the population (i.e., what is the design ## prevalence)? pstar <- rsu.pstar(N = 1000, n = 474, se.p = 0.95, se.u = 0.95) pstar ## If 474 individuals are tested from a population of 1000 and each returns a ## negative result we can be 95\% confident that the maximum prevalence (if ## disease is actually present in the population) is 0.005. ## Confirm these calculations using function rsu.sep. If 474 individuals out ## of a population of 1000 are tested using a test with diagnostic sensitivity ## 0.95 and all return a negative result how confident can we be that the ## prevalence of disease in this population is 0.005 or less? rsu.sep(N = 1000, n = 474, pstar = pstar, se.u = 0.95) ## The surveillance system sensitivity is 0.95. } \keyword{methods} epiR/man/epi.ssdxtest.Rd0000644000176200001440000001343314164045054014657 0ustar liggesusers\name{epi.ssdxtest} \alias{epi.ssdxtest} \title{ Sample size to validate a diagnostic test in the absence of a gold standard } \description{ Sample size to validate a diagnostic test in the absence of a gold standard. } \usage{ epi.ssdxtest(pi, se, sp, epsilon.api, epsilon.ase, epsilon.asp, epsilon.asesp, r = 1, nfractional = FALSE, verbose = FALSE, conf.level = 0.95) } \arguments{ \item{pi}{vector of length two, the expected prevalence of the outcome of interest in the two populations (0 to 1), respectively.} \item{se}{vector of length two, the expected diagnostic sensitivity of the first and second test (0 to 1), respectively.} \item{sp}{vector of length two, the expected diagnostic specificity of the first and second test (0 to 1), respectively.} \item{epsilon.api}{vector of length two, the absolute error for the prevalence of the outcome of interest in the first and second study populations.} \item{epsilon.ase}{vector of length two, the absolute error for the sensitivity estimate of the first and second test.} \item{epsilon.asp}{vector of length two, the absolute error for the specificity estimate of the first and second test.} \item{epsilon.asesp}{vector of length two, the absolute error for the difference in the two sensitivity and specificity estimates.} \item{r}{scalar, the required number in the second population divided by the number in the first population.} \item{nfractional}{logical, return fractional sample size.} \item{verbose}{logical, return sample size estimates for \code{se}, \code{sp}, and \code{pi}?} \item{conf.level}{scalar, defining the level of confidence in the computed result.} } \details{ Hui and Walter (1980) describe an approach for estimating the sensitivity and specificity of a diagnostic test in the absence of a gold standard. Their method involves testing individuals from two populations with two conditionally independent diagnostic tests (neither of which is a gold standard). With such data, all six parameters of interest (two sensitivities, two specificities and two prevalences) can be estimated since there are six degrees of freedom available. The methodology in this function follows the sample size calculation methods described by Georgiadis et al. (2005). In their paper Georgiadis et al. (2005) parameterise the uncertainty in the prevalence, sensitivity and specificity estimates in terms of the width of the confidence interval. For consistency with the other sample size calculation functions in \pkg{epiR} the amount of uncertainty in the prevalence, sensitivity and specificity is parameterised in absolute terms. Using this approach, if we set \code{se = c(0.80,0.90)} and \code{epsilon.ase = c(0.05,0.10)} the number of subjects to return an estimate of \code{se1} that is between \code{0.75} and \code{0.85} and \code{se2} that is between \code{0.80} and \code{1.0} will be returned. } \value{ When \code{verbose = TRUE} a data frame listing the required sample size to estimate: \item{p1}{the prevalence of the outcome of interest in population 1.} \item{p2}{the prevalence of the outcome of interest in population 2.} \item{se1}{the sensitivity of the first diagnostic test.} \item{se2}{the sensitivity of the second diagnostic test.} \item{sp1}{the specificity of the first diagnostic test.} \item{sp2}{the specificity of the second diagnostic test.} \item{se1.se2}{the difference in the sensitivities of the two diagnostic tests.} \item{sp1.sp2}{the difference in the specificities of the two diagnostic tests.} When \code{verbose = FALSE} a data frame listing the maximum of the sample size estimates listed when \code{verbose = TRUE}. } \references{ Georgiadis M, Johnson W, Gardner I (2005) Sample size determination for estimation of the accuracy of two conditionally independent tests in the absence of a gold standard. Preventive Veterinary Medicine 71, 1 - 10. DOI: 10.1016/j.prevetmed.2005.04.004. Hui SL, Walter SD (1980) Estimating the error rates of diagnostic tests. Biometrics 36, 167 - 171. Nielsen SS, Gronbaek C, Agger JF, Houe H (2002) Maximum-likelihood estimation of sensitivity and specificity of ELISAs and faecal culture for diagnosis of paratuberculosis. Preventive Veterinary Medicine 53, 191 - 204. DOI: 10.1016/s0167-5877(01)00280-x. } \examples{ ## EXAMPLE 1 (from Georgiadis et al. 2005, pp. 5): ## Nielsen et al. (2002) present data from the evaluation of a milk ## antibody ELISA and faecal culture for the diagnosis of Mycobacterium avium ## subsp. paratuberculosis infection in cattle. Because the ELISA detects ## antibodies and culture is based on isolation of the bacterium in faeces ## we can reasonably assume that the two tests are conditionally independent. ## How many cattle need to be sampled if we wanted to be 95\% certain that ## our estimate of diagnostic sensitivity and specificity of the two tests ## is within 0.05 of the true population value assuming the number sampled ## in the second population divided by the number sampled in the first ## population is 0.817? The prevalence of Mycobacterium avium subsp. ## paratuberculosis is thought to be 0.093 and 0.204, respectively. Assume ## the sensitivity of the the ELISA and faecal culture is 0.349 and 0.534, ## respectively. Assume the specificity of the ELISA and faecal culture is ## 0.995 and 0.894, respectively. epi.ssdxtest(pi = c(0.093,0.204), se = c(0.349,0.534), sp = c(0.995,0.894), epsilon.api = c(0.05,0.05), epsilon.ase = c(0.05,0.05), epsilon.asp = c(0.05,0.05), epsilon.asesp = c(0.05,0.05), r = 0.817, nfractional = FALSE, verbose = FALSE, conf.level = 0.95) ## A total of 63,887 cattle need to be sampled (35,161 from population 1 and ## 28,726 from population 2) to meet the requirements of the study. } \keyword{univar} epiR/man/epi.sssimpleestc.Rd0000644000176200001440000000655714075470644015535 0ustar liggesusers\name{epi.sssimpleestc} \alias{epi.sssimpleestc} \title{ Sample size to estimate a continuous outcome using simple random sampling } \description{ Sample size to estimate a continuous outcome using simple random sampling. } \usage{ epi.sssimpleestc(N = 1E+06, xbar, sigma, epsilon, error = "relative", nfractional = FALSE, conf.level = 0.95) } \arguments{ \item{N}{scalar integer, representing the total number of individual listing units in the population.} \item{xbar}{scalar number, the expected mean of the continuous variable to be estimated.} \item{sigma}{scalar number, the expected standard deviation of the continuous variable to be estimated.} \item{epsilon}{scalar number, the maximum difference between the estimate and the unknown population value expressed in absolute or relative terms.} \item{error}{character string. Options are \code{absolute} for absolute error and \code{relative} for relative error.} \item{nfractional}{logical, return fractional sample size.} \item{conf.level}{scalar number, the level of confidence in the computed result.} } \value{ Returns an integer defining the required sample size. } \references{ Levy PS, Lemeshow S (1999). Sampling of Populations Methods and Applications. Wiley Series in Probability and Statistics, London, pp. 70 - 75. Scheaffer RL, Mendenhall W, Lyman Ott R (1996). Elementary Survey Sampling. Duxbury Press, New York, pp. 95. Otte J, Gumm I (1997). Intra-cluster correlation coefficients of 20 infections calculated from the results of cluster-sample surveys. Preventive Veterinary Medicine 31: 147 - 150. } \note{ If \code{epsilon.r} equals the relative error the sample estimate should not differ in absolute value from the true unknown population parameter \code{d} by more than \code{epsilon.r * d}. } \examples{ ## EXAMPLE 1: ## A city contains 20 neighbourhood health clinics and it is desired to take a ## sample of clinics to estimate the total number of persons from all these ## clinics who have been given, during the past 12 month period, prescriptions ## for a recently approved antidepressant. If we assume that the average number ## of people seen at these clinics is 1500 per year with the standard deviation ## equal to 300, and that approximately 5\% of patients (regardless of clinic) ## are given this drug, how many clinics need to be sampled to yield an estimate ## that is within 20\% of the true population value? pmean <- 1500 * 0.05; psigma <- (300 * 0.05) epi.sssimpleestc(N = 20, xbar = pmean, sigma = psigma, epsilon = 0.20, error = "relative", nfractional = FALSE, conf.level = 0.95) ## Four clinics need to be sampled to meet the requirements of the survey. ## EXAMPLE 2: ## We want to estimate the mean bodyweight of deer on a farm. There are 278 ## animals present. We anticipate the mean body weight to be around 200 kg ## and the standard deviation of body weight to be 30 kg. We would like to ## be 95\% certain that our estimate is within 10 kg of the true mean. How ## many deer should be sampled? epi.sssimpleestc(N = 278, xbar = 200, sigma = 30, epsilon = 10, error = "absolute", nfractional = FALSE, conf.level = 0.95) ## A total of 31 deer need to be sampled to meet the requirements of the survey. } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/epi.ssclus2estc.Rd0000644000176200001440000001026514164044624015255 0ustar liggesusers\name{epi.ssclus2estc} \alias{epi.ssclus2estc} \title{Number of clusters to be sampled to estimate a continuous outcome using two-stage cluster sampling } \description{ Number of clusters to be sampled to estimate a continuous outcome using two-stage cluster sampling. } \usage{ epi.ssclus2estc(b, N, xbar, xsigma, epsilon, error = "relative", rho, nfractional = FALSE, conf.level = 0.95) } \arguments{ \item{b}{scalar integer or vector of length two, the number of individual listing units in each cluster to be sampled. See details, below.} \item{N}{scalar integer, representing the total number of individual listing units in the population.} \item{xbar}{scalar number, the expected mean of the continuous variable to be estimated.} \item{xsigma}{scalar number, the expected standard deviation of the continuous variable to be estimated.} \item{epsilon}{scalar number, the maximum difference between the estimate and the unknown population value expressed in absolute or relative terms.} \item{error}{character string. Options are \code{absolute} for absolute error and \code{relative} for relative error.} \item{rho}{scalar number, the intracluster correlation.} \item{nfractional}{logical, return fractional sample size.} \item{conf.level}{scalar number, the level of confidence in the computed result.} } \details{ In many situations it is common for sampling units to be aggregated into clusters. Typical examples include individuals within households, children within classes (within schools) and cows within herds. We use the term primary sampling unit (PSU) to refer to what gets sampled first (clusters) and secondary sampling unit (SSU) to refer to what gets sampled second (individual listing units within each cluster). In this documentation the terms primary sampling unit and cluster are used interchangeably. Similarly, the terms secondary sampling unit and individual listing units are used interchangeably. \code{b} as a scalar integer represents the total number of individual listing units from each cluster to be sampled. If \code{b} is a vector of length two the first element represents the mean number of individual listing units to be sampled from each cluster and the second element represents the standard deviation of the number of individual listing units to be sampled from each cluster. } \value{ A list containing the following: \item{n.psu}{the total number of primary sampling units (clusters) to be sampled for the specified level of confidence and relative error.} \item{n.ssu}{the total number of secondary sampling units to be sampled for the specified level of confidence and relative error.} \item{DEF}{the design effect.} \item{rho}{the intracluster correlation, as entered by the user.} } \references{ Levy PS, Lemeshow S (1999). Sampling of Populations Methods and Applications. Wiley Series in Probability and Statistics, London, pp. 292. Machin D, Campbell MJ, Tan SB, Tan SH (2018). Sample Sizes for Clinical, Laboratory ad Epidemiological Studies, Fourth Edition. Wiley Blackwell, London, pp. 195 - 214. } \examples{ ## EXAMPLE 1 (from Levy and Lemeshow p 292): ## We intend to conduct a survey of nurse practitioners to estimate the ## average number of patients seen by each nurse. There are five health ## centres in the study area, each with three nurses. We intend to sample ## two nurses from each health centre. We would like to be 95\% confident ## that our estimate is within 30\% of the true population value. We expect ## that the mean number of patients seen at the health centre level ## is 84 (var 567) and the mean number of patients seen at the nurse ## level is 28 (var 160). Previous studies report an intracluster ## correlation for the number of patients seen per nurse to be 0.02. ## How many health centres should be sampled? epi.ssclus2estc(b = 2, N = 15, xbar = 28, xsigma = sqrt(160), epsilon = 0.30, error = "relative", rho = 0.02, nfractional = FALSE, conf.level = 0.95) ## A total of 3 health centres need to be sampled to meet the specifications ## of this study. } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/epi.blcm.paras.Rd0000644000176200001440000000705214074732464015030 0ustar liggesusers\name{epi.blcm.paras} \alias{epi.blcm.paras} \title{ Number of parameters to be inferred and number of informative priors required for a Bayesian latent class model } \description{ Returns the number of unknown parameters to be inferred and the number of informative priors likely to be needed for an identifiable Bayesian latent class model to estimate diagnostic sensitivity and specificity in the absence of a gold standard. } \usage{ epi.blcm.paras(ntest.dep = 2, ntest.indep = 1, npop = 2) } \arguments{ \item{ntest.dep}{scalar, the number of conditionally dependent tests evaluated.} \item{ntest.indep}{scalar, the number of conditionally independent tests evaluated.} \item{npop}{scalar, the number of populations with a distinct prevalence investigated.} } \value{ A list containing the following: \item{df}{scalar, the degrees of freedom in the available data.} \item{npars}{scalar, the number of unknown parameters to be inferred.} \item{ninf.priors}{scalar, the number of informative priors likely to be needed for an identifiable model.} } \author{ Simon Firestone and Allison Cheung (Faculty of Veterinary and Agricultural Sciences, The University of Melbourne, Parkville Victoria, Australia), Nagendra Singanallur (Australian Centre for Disease Preparedness, Newcomb Victoria, Australia). } \note{ A model may still be useful for inference if it has less informative priors, though cautious interpretation is warranted, typically with a sensitivity analysis of the influence of the priors on the findings. } \examples{ ## EXAMPLE 1 --- Two conditionally dependent tests, 1 population: epi.blcm.paras(ntest.dep = 2, ntest.indep = 0, npop = 1) ## This model has 3 degrees of freedom. The model has 7 unknown parameters to ## be inferred. At least 4 informative priors are required. ## EXAMPLE 2 --- Two conditionally dependent tests, 2 populations: epi.blcm.paras(ntest.dep = 2, ntest.indep = 0, npop = 2) ## This model has 6 degrees of freedom. The model has 8 unknown parameters to ## be inferred. At least 2 informative priors are required. ## EXAMPLE 3 --- Two conditionally dependent tests, 3 populations: epi.blcm.paras(ntest.dep = 2, ntest.indep = 0, npop = 3) ## This model has 9 degrees of freedom. The model has 9 unknown parameters to ## be inferred. This model may be able to proceed without informative priors. ## EXAMPLE 4 --- Two conditionally dependent tests, 1 independent test, 1 ## population: epi.blcm.paras(ntest.dep = 2, ntest.indep = 1, npop = 1) ## This model has 7 degrees of freedom. The model has 9 unknown parameters to ## be inferred. At least 2 informative priors are required. ## EXAMPLE 5 --- Two conditionally dependent tests, 1 independent test, 2 ## populations: epi.blcm.paras(ntest.dep = 2, ntest.indep = 1, npop = 2) ## This model has 14 degrees of freedom. The model has 10 unknown parameters to ## be inferred. This model may be able to proceed without informative priors. ## EXAMPLE 6 --- Three conditionally dependent tests, 1 population: epi.blcm.paras(ntest.dep = 3, ntest.indep = 0, npop = 1) ## This model has 7 degrees of freedom. The model has 13 unknown parameters to ## be inferred. At least 6 informative priors are required. ## EXAMPLE 7 --- Three conditionally dependent tests, 2 populations: epi.blcm.paras(ntest.dep = 3, ntest.indep = 0, npop = 2) ## This model has 14 degrees of freedom. The model has 14 unknown parameters to ## be inferred. This model may be able to proceed without informative priors. } \keyword{univar} epiR/man/epi.sscohortt.Rd0000644000176200001440000001316314164044706015031 0ustar liggesusers\name{epi.sscohortt} \alias{epi.sscohortt} \title{ Sample size, power or minimum detectable incidence rate ratio for a cohort study using person or animal time data } \description{ Sample size, power or minimum detectable incidence rate ratio for a cohort study using person or animal time data. } \usage{ epi.sscohortt(irexp1, irexp0, FT = NA, n, power, r = 1, design = 1, sided.test = 2, nfractional = FALSE, conf.level = 0.95) } \arguments{ \item{irexp1}{the expected incidence rate of the outcome in the exposed group (0 to 1).} \item{irexp0}{the expected incidence rate of the outcome in the non-exposed group (0 to 1).} \item{FT}{the follow-up period (in years) for the study.} \item{n}{scalar, defining the total number of subjects in the study (i.e., the number in both the exposed and unexposed groups).} \item{power}{scalar, the required study power.} \item{r}{scalar, the number in the exposed group divided by the number in the unexposed group.} \item{design}{scalar, the estimated design effect.} \item{sided.test}{use a one- or two-sided test? Use a two-sided test if you wish to evaluate whether or not the outcome incidence rate in the exposed group is greater than or less than the outcome incidence rate in the unexposed group. Use a one-sided test to evaluate whether or not the outcome incidence rate in the exposed group is greater than the outcome incidence rate in the unexposed group.} \item{nfractional}{logical, return fractional sample size.} \item{conf.level}{scalar, defining the level of confidence in the computed result.} } \details{ The methodology in this function follows the methodology described in Lwanga and Lemeshow (1991). } \value{ A list containing the following: \item{n.total}{the total number of subjects required for the specified level of confidence and power, respecting the requirement for \code{r} times as many individuals in the treatment group compared with the control group.} \item{n.exp1}{the total number of subjects in the treatment group for the specified level of confidence and power, respecting the requirement for \code{r} times as many individuals in the treatment group compared with the control group.} \item{n.exp0}{the total number of subjects in the control group for the specified level of confidence and power, respecting the requirement for \code{r} times as many individuals in the treatment group compared with the control group.} \item{power}{the power of the study given the number of study subjects, the expected effect size and level of confidence.} \item{irr}{the incidence rate of the outcome in the exposed group divided by the incidence rate in the unexposed group (the incidence rate ratio).} } \references{ Lemeshow S, Hosmer D, Klar J, Lwanga S (1990). Adequacy of Sample Size in Health Studies. John Wiley and Sons, New York. Lwanga S, Lemeshow S (1991). Sample Size Determination in Health Studies. World Health Organization, Geneva. } \note{ The power of a study is its ability to demonstrate the presence of an association, given that an association actually exists. Values need to be entered for \code{irexp0}, \code{n}, and \code{power} to return a value for \code{irr}. In this situation, the lower value of \code{irr} represents the maximum detectable incidence rate ratio that is less than 1; the upper value of \code{irr} represents the minimum detectable incidence rate ratio greater than 1. See the documentation for \code{\link{epi.sscohortc}} for an example using the \code{design} facility implemented in this function. } \examples{ ## EXAMPLE 1 (from Lwanga and Lemeshow 1991 p. 19): ## As part of a study of the long-term effect of noise on workers in a ## particularly noisy industry, it is planned to follow up a cohort of people ## who were recruited into the industry during a given period of time and to ## compare them with a similar cohort of individuals working in a much ## quieter industry. Subjects will be followed up for the rest of their lives or ## until their hearing is impaired. The results of a previous small-scale survey ## suggest that the annual incidence rate of hearing impairment in the noisy ## industry may be as high as 25\%. How many people should be followed up ## in each of the groups (which are to be of equal size) to test the hypothesis ## that the incidence rates for hearing impairment in the two groups are the ## same, at the 5\% level of significance and with a power of 80\%? The ## alternative hypothesis is that the annual incidence rate for hearing ## impairment in the quieter industry is not more than the national average of ## about 10\% (for people in the same age range), whereas in the noisy ## industry it differs from this. ## An annuual incidence rate of 25\% is equivalent to 25 cases of hearing ## impairment per 100 individuals per year. epi.sscohortt(irexp1 = 0.25, irexp0 = 0.10, FT = NA, n = NA, power = 0.80, r = 1, design = 1, sided.test = 2, nfractional = FALSE, conf.level = 0.95) ## A total of 46 subjects are required for this study: 23 in the exposed ## group and 23 in the unexposed group. ## EXAMPLE 2 (from Lwanga and Lemeshow 1991 p. 19): ## A study similar to that described above is to be undertaken, but the ## duration of the study will be limited to 5 years. How many subjects should ## be followed up in each group? epi.sscohortt(irexp1 = 0.25, irexp0 = 0.10, FT = 5, n = NA, power = 0.80, r = 1, design = 1, sided.test = 2, nfractional = FALSE, conf.level = 0.95) ## A total of 130 subjects are required for this study: 65 in the exposed ## group and 65 in the unexposed group. } \keyword{univar} epiR/man/rsu.sep.rb1rf.Rd0000644000176200001440000000651513742445324014644 0ustar liggesusers\name{rsu.sep.rb1rf} \alias{rsu.sep.rb1rf} \title{ Surveillance system sensitivity assuming risk-based sampling on one risk factor } \description{ Calculates risk-based surveillance system (population-level) sensitivity with a single risk factor, assuming one-stage risk-based sampling and allowing unit sensitivity to vary among risk strata. } \usage{ rsu.sep.rb1rf(N, n, rr, ppr, pstar, se.u, method = "binomial") } \arguments{ \item{N}{scalar or vector of the same length as that vector of \code{rr} defining the population size per risk strata. Ignored if \code{method = "binomial"}.} \item{n}{scalar or vector of the same length as that vector of \code{rr} defining the sample size per risk strata.} \item{rr}{scalar or vector of the same length as that vector of \code{ppr} defining the relative risk values.} \item{ppr}{scalar or vector of the same length as that vector of \code{rr} defining the population proportions in each risk strata. Ignored if \code{method = "hypergeometric"}.} \item{pstar}{scalar, defining the design prevalence.} \item{se.u}{scalar or vector of the same length as that vector of \code{rr} defining the unit sensitivity (which can vary across strata).} \item{method}{character string indicating the method to be used. Options are \code{binomial} or \code{hypergeometric}. See details, below.} } \details{ If \code{method = binomial} \code{N} is ignored and values for \code{ppr} need to be entered. Conversely, if \code{method = hypergeometric}, \code{ppr} is ignored and calculated from \code{N}. } \value{ A list comprised of two elements: \item{se.p}{scalar, surveillance system (population-level) sensitivity estimates.} \item{epi}{vector, effective probability of infection estimates.} \item{adj.risk}{vector, adjusted relative risk estimates.} } \examples{ ## EXAMPLE 1: ## A cross-sectional study is to be carried out to confirm the absence of ## disease using one-stage risk based sampling. Assume a design prevalence of ## 0.10 at the cluster (herd) level and the total number of clusters in ## the population is unknown. Clusters are categorised as being either high, ## medium or low risk with the probability of disease for clusters in the ## high and medium risk area 5 and 3 times the probability of disease in the ## low risk area. The proportions of clusters in the high, medium and low risk ## area are 0.10, 0.10 and 0.80, respectively and you elect to sample five ## clusters from each of the three areas using a test with diagnostic ## sensitivity of 0.90. What is the surveillance system sensitivity? rsu.sep.rb1rf(N = NA, n = c(5,5,5), rr = c(5,3,1), ppr = c(0.10,0.10,0.80), pstar = 0.10, se.u = 0.90, method = "binomial") ## The surveillance system sensitivity is 0.94. ## EXAMPLE 2: ## Same scenario as above, but this time assume we know how many clusters are ## in the high, medium and low risk areas: 10, 10 and 80, respectively. What is ## the surveillance system sensitivity? rsu.sep.rb1rf(N = c(10,10,80), n = c(5,5,5), rr = c(5,3,1), ppr = NA, pstar = 0.10, se.u = 0.90, method = "hypergeometric") ## The surveillance system sensitivity is 0.96, almost identical to that ## calculated above where the binomial distribution was used to account for ## not knowing the size of the cluster population at risk. } \keyword{methods} epiR/man/epi.prcc.Rd0000644000176200001440000000750314164036762013734 0ustar liggesusers\name{epi.prcc} \alias{epi.prcc} \title{ Partial rank correlation coefficients } \description{ Compute partial rank correlation coefficients. } \usage{ epi.prcc(dat, sided.test = 2, conf.level = 0.95) } \arguments{ \item{dat}{a data frame comprised of \code{K + 1} columns and \code{N} rows, where \code{K} represents the number of model parameters being evaluated and \code{N} represents the number of replications of the model. The last column of the data frame (i.e., column \code{K + 1}) provides the model output.} \item{sided.test}{use a one- or two-sided test? Use a two-sided test if you wish to evaluate whether or not the partial rank correlation coefficient is greater than or less than zero. Use a one-sided test to evaluate whether or not the partial rank correlation coefficient is greater than zero.} \item{conf.level}{magnitude of the returned confidence intervals. Must be a single number between 0 and 1.} } \details{ Calculation of the PRCC enables the determination of the statistical relationships between each input parameter and the outcome variable while keeping all of the other input parameters constant at their expected value (Conover, 1980). This procedure enables the independent effects of each parameter to be determined, even when the parameters are correlated. A PRCC indicates the degree of monotonicity between a specific input variable and an outcome; therefore only outcome variables that are monotonically related to the input parameters should be chosen for this analysis (Conover, 1980; Iman and Conover 1980). Monotonicity can be assessed by examining scatterplots where each input variable is plotted as a function of the outcome variable. The sign of the PRCC indicates the qualitative relationship between each input variable and the outcome variable. The magnitude of the PRCC indicates the importance of the uncertainty in the input variable in contributing to the imprecision in predicting the value of the outcome variable. The relative importance of the input variables can be directly evaluated by comparing the values of the PRCC. If the number of parameters \code{K} is greater than the number of model replications \code{N} an error will be returned. } \value{ A data frame with three elements: \code{est} the point estimate of the partial rank corellation coefficient between each input parameter and the outcome, \code{lower} the lower bound of the confidence interval of the partial rank corellation coefficient, \code{upper} the upper bound of the confidence interval of the partial rank corellation coefficient, \code{test.statistic} the test statistic used to determine the significance of non-zero values of the partial rank correlation coefficient, and \code{p.value} the associated P-value. } \references{ Blower S, Dowlatabladi H (1994). Sensitivity and uncertainty analysis of complex models of disease transmission: an HIV model, as an example. International Statistical Review 62: 229 - 243. Conover WJ (1980). Practical Nonparametric Statistics, 2nd edition, John Wiley and Sons Inc., New York, NY. Iman RL, Conover WJ (1982). A distribution-free approach to inducing rank correlation among input variables. Communication in Statistics --- Simulation and Computation 11: 311 - 334. Sanchez M, Blower S (1997) Uncertainty and sensitivity analysis of the basic reproductive rate. American Journal of Epidemiology 145: 1127 - 1137. } \author{ Jonathon Marshall, J.C.Marshall@massey.ac.nz. } \examples{ ## EXAMPLE 1: ## Create a matrix of simulation results: x1 <- rnorm(n = 10, mean = 120, sd = 130) x2 <- rnorm(n = 10, mean = 80, sd = 5) x3 <- rnorm(n = 10, mean = 40, sd = 20) y <- 2 + (0.5 * x1) - (1.7 * x2) + (0.2 * x3) dat.df01 <- data.frame(x1 = x1, x2 = x2, x3 = x3, y = y) epi.prcc(dat.df01, sided.test = 2, conf.level = 0.95) } \keyword{univar} epiR/man/rsu.sspfree.rs.Rd0000644000176200001440000000561013754667562015144 0ustar liggesusers\name{rsu.sspfree.rs} \alias{rsu.sspfree.rs} \title{ Sample size to achieve a desired probability of disease freedom assuming representative sampling } \description{ Calculates the required sample size to achieve a desired (posterior) probability of disease freedom assuming representative sampling, imperfect test sensitivity and perfect test specificity. } \usage{ rsu.sspfree.rs(N = NA, prior, p.intro, pstar, pfree, se.u) } \arguments{ \item{N}{scalar integer or vector of integers the same length as \code{n}, representing the population size. Use \code{NA} if unknown.} \item{prior}{scalar probability (0 to 1), representing the prior probability that the population is free of disease.} \item{p.intro}{scalar or vector of the same length as \code{pfree}, representing the probability of disease introduction during the next time period.} \item{pstar}{scalar numeric or vector of numbers the same length as \code{pfree} representing the design prevalence.} \item{pfree}{scalar numeric or vector of numbers the same length as \code{pfree} representing the desired probability of disease freedom.} \item{se.u}{scalar (0 to 1) representing the sensitivity of the diagnostic test at the surveillance unit level.} } \value{ A list comprised of three elements: \item{n}{a vector listing the required sample sizes.} \item{sep}{a vector listing the population sensitivity estimates.} \item{adj.prior}{a vector listing the adjusted priors.} } \references{ Martin P, Cameron A, Greiner M (2007). Demonstrating freedom from disease using multiple complex data sources 1: A new methodology based on scenario trees. Preventive Veterinary Medicine 79: 71 - 97. Martin P, Cameron A, Barfod K, Sergeant E, Greiner M (2007). Demonstrating freedom from disease using multiple complex data sources 2: Case study - Classical swine fever in Denmark. Preventive Veterinary Medicine 79: 98 - 115. } \note{ This function returns the sample size to achieve a desired (posterior) probability of disease freedom. Function \code{\link{rsu.sssep.rs}} returns the sample size to achieve a desired surveillance system sensitivity. } \examples{ ## EXAMPLE 1: ## Prior surveillance activities and expert opinion lead you to believe that ## there's a 75\% chance that your country is free of disease X. To confirm ## your country's disease freedom status you intend to use a test at the herd ## level which has a diagnostic sensitivity of 0.95. The probability of ## disease introduction during the time period of interest is relatively ## low, say 0.01. How many herds need to be sampled to be 95\% confident ## that the country is free of disease X assuming a design prevalence of ## 0.01? rsu.sspfree.rs(N = NA, prior = 0.75, p.intro = 0.01, pstar = 0.01, pfree = 0.95, se.u = 0.95) ## A total of 198 herds need to be sampled to meet the requirements of the ## study. } \keyword{methods} epiR/man/rsu.epinf.Rd0000644000176200001440000000365013701522454014133 0ustar liggesusers\name{rsu.epinf} \alias{rsu.epinf} \title{ Effective probability of disease } \description{ Calculates the effective probability of disease (adjusted design prevalence) for each risk group within a population. } \usage{ rsu.epinf(pstar, rr, ppr) } \arguments{ \item{pstar}{scalar, the design prevalence.} \item{rr}{vector, defining the relative risk values for each strata in the population.} \item{ppr}{vector of length \code{rr} defining the population proportions in each strata.} } \value{ A list of comprised of two elements: \item{epinf}{a vector listing the effective probability of infection listed in order of \code{rr}.} \item{adj.risk}{a vector listing the adjusted risk values listed in order of \code{rr}.} } \examples{ ## EXAMPLE 1: ## For a given disease of interest you believe that there is a 'high risk' ## and 'low risk' area in your country. The risk of disease in the high risk ## area compared with the low risk area is 5. A recent census shows that ## 10\% of the population are resident in the high risk area and 90\% ## are resident in the low risk area. You elect to set a design prevalence ## of 0.10. ## Calculate the effective probability of infection for each area. rsu.epinf(pstar = 0.1, rr = c(5,1), ppr = c(0.10,0.90)) ## The effective probabilities of infection for the high and low risk areas ## are 0.36 and 0.07, respectively. ## EXAMPLE 2: ## Re-calculate the effective probabilities of infection assuming there are ## 'high', 'medium' and 'low' risk areas. The risk of disease in the ## medium risk area compared with the low risk area is 3. Population ## proportions for each area are 0.10, 0.10 and 0.80, respectively. rsu.epinf(pstar = 0.10, rr = c(5,3,1), ppr = c(0.10,0.10,0.80)) ## The effective probabilities of infection for the high, medium and low ## risk areas are 0.31, 0.19 and 0.06, respectively. } \keyword{methods} epiR/man/epi.interaction.Rd0000644000176200001440000002144414165775260015330 0ustar liggesusers\name{epi.interaction} \alias{epi.interaction} \title{Relative excess risk due to interaction in a case-control study} \description{ For two binary explanatory variables included in a logistic regression as an interaction term, computes the relative excess risk due to interaction, the proportion of outcomes among those with both exposures attributable to interaction, and the synergy index. Confidence interval calculations are based on the delta method described by Hosmer and Lemeshow (1992). } \usage{ epi.interaction(model, coef, param = c("product", "dummy"), conf.level = 0.95) } \arguments{ \item{model}{an object of class \code{glm}, \code{clogit} or \code{coxph}.} \item{coef}{a vector listing the positions of the coefficients of the interaction terms in the model. What row numbers of the regression table summary list the coefficients for the interaction terms included in the model? } \item{param}{character stringing specifying the type of coding used for the variables included in the interaction term. Options are \code{product} where two risk factors and one product term are used to represent the interaction and \code{dummy} where the two risk factors are combined into a single variable with four levels. See the examples, below, for details.} \item{conf.level}{magnitude of the returned confidence interval. Must be a single number between 0 and 1.} } \details{ Interaction on an additive scale means that the combined effect of two exposures is greater (or less) than the sum of the individual effects of two exposures. Interaction on a multiplicative scale means that the combined effect of the two exposures is greater (or less) than the product of the individual effects of the two exposures. This function calculates three indices to assess the presence of additive interaction, as defined by Rothman (1998): (1) the relative excess risk due to interaction (RERI, sometimes called the interaction contrast ratio), (2) the proportion of disease among those with both exposures that is attributable to their interaction (AP[AB]), and (3) the synergy index (S). In addition If at least one of the two exposures are preventive (i.e., ORs of less than one) then estimates of RERI and AP are invalid (the SI remains unaffected). In this situation the function issues an appropriate warning. Exposures need to be recoded so the stratum with the lowest outcome risk becomes the new reference category when the two exposures are considered together. A RERI of zero means no additive interaction. A RERI of greater than one means positive interaction or more than additivity. A RERI of less than one means negative interaction or less than additivity. RERI ranges from zero to infinity. An AP[AB] of zero means no interaction or exactly additivity. An AP[AB] greater than zero means positive interaction or more than additivity. An AP[AB] of less than zero means negative interaction or less than additivity. AP[AB] ranges from -1 to +1. The synergy index is the ratio of the combined effects and the individual effects. An S of one means no interaction or exactly additivity. An S of greater than one means positive interaction or more than additivity. An S of less than one means negative interaction or less than additivity. S ranges from zero to infinity. In the absence of interaction AP[AB] = 0 and RERI and S = 1. Skrondal (2003) advocates for use of the synergy index as a summary measure of additive interaction, showing that when regression models adjust for the effect of confounding variables (as in the majority of cases) RERI and AP may be biased, while S remains unbiased. This function uses the delta method to calculate the confidence intervals for each of the interaction measures, as described by Hosmer and Lemeshow (1992). An error will be returned if the point estimate of the synergy index is less than one. In this situation a warning is issued advising the user to re-parameterise their model as a linear odds model. See Skrondal (2003) for details. A measure of multiplicative interaction is \code{RR11 / (RR10 * RR01)}. If \code{RR11 / (RR10 * RR01)} equals one multiplicative interaction is said to be absent. If \code{RR11 / (RR10 * RR01)} is greater than one multiplicative interaction is said to be positive. If \code{RR11 / (RR10 * RR01)} is less than one multiplicative interaction is said to be negative. } \value{ A list containing: \item{reri}{the point estimate and lower and upper bounds of the confidence interval for the relative excess risk due to interaction, RERI.} \item{apab}{the point estimate and lower and upper bounds of the confidence interval for the proportion of disease among those with both exposures that is attributable to their interaction, APAB.} \item{s}{the point estimate and lower and upper bounds of the confidence interval for the synergy index.} \item{multiplicative}{the point estimate and lower and upper bounds of the confidence interval for the odds ratio for multiplicative interaction.} } \references{ Chen S-C, Wong R-H, Shiu L-J, Chiou M-C, Lee H (2008). Exposure to mosquito coil smoke may be a risk factor for lung cancer in Taiwan. Journal of Epidemiology 18: 19 - 25. Hosmer DW, Lemeshow S (1992). Confidence interval estimation of interaction. Epidemiology 3: 452 - 456. Kalilani L, Atashili J (2006). Measuring additive interaction using odds ratios. Epidemiologic Perspectives & Innovations doi:10.1186/1742-5573-3-5. Knol MJ, VanderWeele TJ (2012). Recommendations for presenting analyses of effect modification and interaction. International Journal of Epidemiology 41: 514 - 520. Lash TL, VanderWeele TJ, Haneuse S, Rothman KJ (2021). Modern Epidemiology. Lippincott - Raven Philadelphia, USA, pp. 621 - 623. Rothman K, Keller AZ (1972). The effect of joint exposure to alcohol and tabacco on risk of cancer of the mouth and pharynx. Journal of Chronic Diseases 23: 711 - 716. Skrondal A (2003). Interaction as departure from additivity in case-control studies: A cautionary note. American Journal of Epidemiology 158: 251 - 258. VanderWeele TJ, Knol MJ (2014). A tutorial on interaction. Epidemiologic Methods 3: 33 - 72. } \examples{ ## EXAMPLE 1: ## Data from Rothman and Keller (1972) evaluating the effect of joint exposure ## to alcohol and tabacco on risk of cancer of the mouth and pharynx (cited in ## Hosmer and Lemeshow, 1992): can <- c(rep(1, times = 231), rep(0, times = 178), rep(1, times = 11), rep(0, times = 38)) smk <- c(rep(1, times = 225), rep(0, times = 6), rep(1, times = 166), rep(0, times = 12), rep(1, times = 8), rep(0, times = 3), rep(1, times = 18), rep(0, times = 20)) alc <- c(rep(1, times = 409), rep(0, times = 49)) dat.df01 <- data.frame(alc, smk, can) ## Table 2 of Hosmer and Lemeshow (1992): dat.glm01 <- glm(can ~ alc + smk + alc:smk, family = binomial, data = dat.df01) summary(dat.glm01) ## What is the measure of effect modification on the additive scale? epi.interaction(model = dat.glm01, param = "product", coef = c(2,3,4), conf.level = 0.95) ## Measure of interaction on the additive scale: RERI 3.73 ## (95\% CI -1.84 to 9.32), page 453 of Hosmer and Lemeshow (1992). ## What is the measure of effect modification on the multiplicative scale? ## See VanderWeele and Knol (2014) page 36 and Knol and Vanderweele (2012) ## for details. beta1 <- as.numeric(dat.glm01$coefficients[2]) beta2 <- as.numeric(dat.glm01$coefficients[3]) beta3 <- as.numeric(dat.glm01$coefficients[4]) exp(beta3) / (exp(beta1) * exp(beta2)) ## Measure of interaction on the multiplicative scale: 0.093. ## EXAMPLE 2: ## Rothman defines an alternative coding scheme to be employed for ## parameterising an interaction term. Using this approach, instead of using ## two risk factors and one product term to represent the interaction (as ## above) the risk factors are combined into one variable comprised of ## (in this case) four levels: ## a.neg b.neg: 0 0 0 ## a.pos b.neg: 1 0 0 ## a.neg b.pos: 0 1 0 ## a.pos b.pos: 0 0 1 dat.df01$d <- rep(NA, times = nrow(dat.df01)) dat.df01$d[dat.df01$alc == 0 & dat.df01$smk == 0] <- 0 dat.df01$d[dat.df01$alc == 1 & dat.df01$smk == 0] <- 1 dat.df01$d[dat.df01$alc == 0 & dat.df01$smk == 1] <- 2 dat.df01$d[dat.df01$alc == 1 & dat.df01$smk == 1] <- 3 dat.df01$d <- factor(dat.df01$d) ## Table 3 of Hosmer and Lemeshow (1992): dat.glm02 <- glm(can ~ d, family = binomial, data = dat.df01) summary(dat.glm02) ## What is the measure of effect modification on the additive scale? epi.interaction(model = dat.glm02, param = "dummy", coef = c(2,3,4), conf.level = 0.95) ## Measure of interaction on the additive scale: RERI 3.74 ## (95\% CI -1.84 to 9.32), page 455 of Hosmer and Lemeshow (1992). } \keyword{univar} epiR/man/epi.edr.Rd0000644000176200001440000000735314100645450013550 0ustar liggesusers\name{epi.edr} \alias{epi.edr} \title{ Estimated dissemination ratio } \description{ Computes estimated dissemination ratios on the basis of a vector of count data (usually incident cases identified on each day of an epidemic). } \usage{ epi.edr(dat, n = 4, conf.level = 0.95, nsim = 99, na.zero = TRUE) } \arguments{ \item{dat}{a numeric vector listing the number of incident cases for each day of an epidemic.} \item{n}{scalar, defining the number of days to be used when computing the estimated dissemination ratio.} \item{conf.level}{magnitude of the returned confidence interval. Must be a single number between 0 and 1.} \item{nsim}{scalar, defining the number of simulations to be used for the confidence interval calculations.} \item{na.zero}{logical, replace \code{NaN} or \code{Inf} values with zeros?} } \details{ In infectious disease epidemics the \emph{n}-day estimated dissemination ratio (EDR) at day \emph{i} equals the total number of incident cases between day \code{i} and day \code{[i - (n - 1)]} (inclusive) divided by the total number of incident cases between day \code{(i - n)} and day \code{(i - 2n) + 1} (inclusive). EDR values are often calculated for each day of an epidemic and presented as a time series analysis. If the EDR is consistently less than unity, the epidemic is said to be `under control'. A simulation approach is used to calculate confidence intervals around each daily EDR estimate. The numerator and denominator of the EDR estimate for each day is taken in turn and a random number drawn from a Poisson distribution, using the calculated numerator and denominator value as the mean. EDR is then calculated for these simulated values and the process repeated \code{nsim} times. Confidence intervals are then derived from the vector of simulated values for each day. } \value{ Returns the point estimate of the EDR and the lower and upper bounds of the confidence interval of the EDR. } \references{ Miller W (1976). A state-transition model of epidemic foot-and-mouth disease. In: Proceedings of an International Symposium: New Techniques in Veterinary Epidemiology and Economics, University of Reading, Reading, pp. 56 - 72. Morris R, Sanson R, Stern M, Stevenson M, Wilesmith J (2002). Decision-support tools for foot-and-mouth disease control. Revue Scientifique et Technique de l'Office International des Epizooties 21, 557 - 567. Perez-Reche FJ, Taylor N, McGuigan C, Conaglen P, Forbes K, Strachan N, Honhold N (2021) Estimated Dissemination Ratio --- A practical alternative to the reproduction number for infectious diseases. Frontiers in Public Health 9. DOI: 10.3389/fpubh.2021.675065. } \examples{ ## EXAMPLE 1: set.seed(1234) dat <- rpois(n = 50, lambda = 2) dat.edr01 <- epi.edr(dat, n = 4, conf.level = 0.95, nsim = 99, na.zero = TRUE) sdate <- as.Date(x = "31/12/2015", format = "\%d/\%m/\%Y") + 1:50 dat.df01 <- data.frame(sdate = sdate, est = dat.edr01$est, low = dat.edr01$lower, upp = dat.edr01$upper) ## Line plot of EDR (and its 95\% confidence interval) as a function of ## calendar time: \dontrun{ library(ggplot2); library(scales) ggplot() + geom_line(data = dat.df01, aes(x = sdate, y = est)) + geom_line(dat = dat.df01, aes(x = sdate, y = upp), lty = 3, size = 0.5) + geom_line(dat = dat.df01, aes(x = sdate, y = low), lty = 3, size = 0.5) + scale_x_date(breaks = date_breaks("1 week"), labels = date_format("\%d \%b"), name = "Date") + scale_y_continuous(trans = "log2", breaks = c(0.25,0.5,1,2,4,8,16), limits = c(0.25,16),name = "Estimated disemination ratio (EDR)") + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, size = 10)) + geom_hline(yintercept = 1, lty = 2) } } \keyword{univar} epiR/man/epi.ssninfc.Rd0000644000176200001440000001313714164035574014450 0ustar liggesusers\name{epi.ssninfc} \alias{epi.ssninfc} \title{ Sample size for a non-inferiority trial, continuous outcome } \description{ Sample size for a non-inferiority trial, continuous outcome. } \usage{ epi.ssninfc(treat, control, sd, delta, n, r = 1, power, nfractional = FALSE, alpha) } \arguments{ \item{treat}{the expected mean of the outcome of interest in the treatment group.} \item{control}{the expected mean of the outcome of interest in the control group.} \item{sd}{the expected population standard deviation of the outcome of interest.} \item{delta}{the equivalence limit, expressed as the absolute change in the outcome of interest that represents a clinically meaningful difference. For a non-inferiority trial the value entered for \code{delta} must be greater than or equal to zero.} \item{n}{scalar, the total number of study subjects in the trial.} \item{r}{scalar, the number in the treatment group divided by the number in the control group.} \item{power}{scalar, the required study power.} \item{nfractional}{logical, return fractional sample size.} \item{alpha}{scalar, defining the desired alpha level.} } \value{ A list containing the following: \item{n.total}{the total number of study subjects required.} \item{n.treat}{the required number of study subject in the treatment group.} \item{n.control}{the required number of study subject in the control group.} \item{delta}{the equivalence limit, as entered by the user.} \item{power}{the specified or calculated study power.} } \references{ Blackwelder WC (1982). Proving the null hypothesis in clinical trials. Controlled Clinical Trials 3: 345 - 353. Ewald B (2013). Making sense of equivalence and non-inferiority trials. Australian Prescriber 36: 170 - 173. Julious SA (2004). Sample sizes for clinical trials with normal data. Statistics in Medicine 23: 1921 - 1986. Julious SA (2009). Estimating Samples Sizes in Clinical Trials. CRC, New York. Machin D, Campbell MJ, Tan SB, Tan SH (2009). Sample Size Tables for Clinical Studies. Wiley Blackwell, New York. Scott IA (2009). Non-inferiority trials: determining whether alternative treatments are good enough. Medical Journal of Australia 190: 326 - 330. Wang B, Wang H, Tu X, Feng C (2017). Comparisons of superiority, non-inferiority, and equivalence trials. Shanghai Archives of Psychiatry 29, 385 - 388. DOI: 10.11919/j.issn.1002-0829.217163. Zhong B (2009). How to calculate sample size in randomized controlled trial? Journal of Thoracic Disease 1: 51 - 54. } \author{ Many thanks to Aniko Szabo (Medical College of Wisconsin, Wisconsin USA) for improvements to the power calculations for this function and suggestions to improve the documentation. } \note{ Consider a clinical trial comparing two groups, a standard treatment (\eqn{s}) and a new treatment (\eqn{n}). In each group, the mean of the outcome of interest for subjects receiving the standard treatment is \eqn{N_{s}} and the mean of the outcome of interest for subjects receiving the new treatment is \eqn{N_{n}}. We specify the absolute value of the maximum acceptable difference between \eqn{N_{n}} and \eqn{N_{s}} as \eqn{\delta}. For a non-inferiority trial the value entered for \code{delta} must be greater than or equal to zero. For a non-inferiority trial the null hypothesis is: \eqn{H_{0}: N_{s} - N_{n} \ge \delta} The alternative hypothesis is: \eqn{H_{1}: N_{s} - N_{n} < \delta} The aim of a non-inferiority trial is show that a new treatment is not (much) inferior to a standard treatment. Showing non-inferiority can be of interest because: (a) it is often not ethically possible to do a placebo-controlled trial; (b) the new treatment is not expected to be better than the standard treatment on primary efficacy endpoints, but is safer; (c) the new treatment is not expected to be better than the standard treatment on primary efficacy endpoints, but is cheaper to produce or easier to administer; and (d) the new treatment is not expected to be better than the standard treatment on primary efficacy endpoints in clinical trial, but compliance will be better outside the clinical trial and hence efficacy better outside the trial. When calculating the power of a study, the argument \code{n} refers to the total study size (that is, the number of subjects in the treatment group plus the number in the control group). For a comparison of the key features of superiority, equivalence and non-inferiority trials, refer to the documentation for \code{\link{epi.ssequb}}. } \examples{ ## EXAMPLE 1 (from Chow S, Shao J, Wang H 2008, p. 61 - 62): ## A pharmaceutical company is interested in conducting a clinical trial ## to compare two cholesterol lowering agents for treatment of patients with ## congestive heart disease using a parallel design. The primary efficacy ## parameter is the LDL. In what follows, we will consider the situation ## where the intended trial is for testing non-inferiority of mean responses ## in LDL. Assume that 80\% power is required at a 5\% level of significance. ## In this example we assume a -0.05 unit change in LDL is a clinically ## meaningful difference. Assume the standard deviation of LDL is 0.10 units ## and the LDL concentration in the treatment group is 0.20 units and the ## LDL concentration in the control group is 0.20 units. epi.ssninfc(treat = 0.20, control = 0.20, sd = 0.10, delta = 0.05, n = NA, r = 1, power = 0.80, nfractional = FALSE, alpha = 0.05) ## A total of 100 subjects need to be enrolled in the trial, 50 in the ## treatment group and 50 in the control group. } \keyword{univar} epiR/man/epi.convgrid.Rd0000644000176200001440000000147614074765156014631 0ustar liggesusers\name{epi.convgrid} \alias{epi.convgrid} \title{Convert British National Grid georeferences to easting and northing coordinates} \description{ Convert British National Grid georeferences to Brtish National Grid (EPSG 27700) easting and northing coordinates. } \usage{ epi.convgrid(osref) } \arguments{ \item{osref}{a vector of character strings listing the British National Grid georeferences to be converted.} } \note{ If an invalid georeference is encountered in the vector \code{os.ref} the method returns a \code{NA}. } \examples{ ## EXAMPLE 1: os.ref <- c("SJ505585","SJ488573","SJ652636") epi.convgrid(os.ref) os.ref <- c("SJ505585","SJ488573","ZZ123456") epi.convgrid(os.ref) } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/rsu.sssep.rs.Rd0000644000176200001440000001027113777226764014631 0ustar liggesusers\name{rsu.sssep.rs} \alias{rsu.sssep.rs} \title{ Sample size to achieve a desired surveillance system sensitivity assuming representative sampling } \description{ Calculates the sample size to achieve a desired surveillance system sensitivity assuming representative sampling for a single risk factor and varying unit sensitivity using the binomial method. } \usage{ rsu.sssep.rs(N, pstar, se.p = 0.95, se.u) } \arguments{ \item{N}{scalar integer or vector of same length as \code{pstar}, representing the population size.} \item{pstar}{a scalar or vector of either proportions (0 to 1) or a positive integers representing the design prevalence. If \code{pstar} is an integer represents the number of positive units in the population, and \code{N} must be provided.} \item{se.p}{scalar or vector of same length as \code{pstar} representing the desired surveillance system (population-level) sensitivity.} \item{se.u}{scalar (0 to 1) or vector of the same length as \code{pstar} representing the sensitivity of the diagnostic test at the surveillance unit level.} } \value{ A vector of required sample sizes. } \references{ MacDiarmid S (1988). Future options for brucellosis surveillance in New Zealand beef herds. New Zealand Veterinary Journal 36: 39 - 42. Martin S, Shoukri M, Thorburn M (1992). Evaluating the health status of herds based on tests applied to individuals. Preventive Veterinary Medicine 14: 33 - 43. } \note{ This function calculates the required sample size using the hypergeometric distribution if \code{N} is provided and the binomial distribution otherwise. This function returns the sample size to achieve a desired surveillance system sensitivity. Function \code{\link{rsu.sspfree.rs}} returns the sample size to achieve a desired (posterior) probability of disease freedom. } \examples{ ## EXAMPLE 1: ## You would like to confirm the absence of disease in a single 1000-cow ## dairy herd. You expect the prevalence of disease in the herd to be 0.05. ## You intend to use a single test with a sensitivity of 0.90 and a ## specificity of 1.00. How many herds need to be sampled if you want to ## be 95\% certain that the prevalence of brucellosis in dairy herds is ## less than the design prevalence if all tests are negative? rsu.sssep.rs(N = 1000, pstar = 0.05, se.p = 0.95, se.u = 0.90) ## We need to sample 65 cows. ## EXAMPLE 2: ## You would like to confirm the absence of disease in a study area comprised ## of 5000 herds. If the disease is present you expect the between-herd ## prevalence to be 0.08. You intend to use two tests: the first has a ## sensitivity and specificity of 0.90 and 0.80, respectively. The second has ## a sensitivity and specificity of 0.95 and 0.85, respectively. The two tests ## will be interpreted in parallel. How many herds should be sampled to be ## 95\% certain that the disease would be detected if it is present in the ## study area? ## Calculate the sensitivity and specificity of the diagnostic test regime: test <- rsu.dxtest(se = c(0.90, 0.95), sp = c(0.80, 0.85), interpretation = "parallel", covar = c(0,0)) ## Interpretation of these tests in parallel returns a diagnostic sensitivity ## of 0.995 and a diagnostic specificity of 0.68. ## How many herds should be sampled? rsu.sssep.rs(N = 5000, pstar = 0.08, se.p = 0.95, se.u = test$se) ## If you test 38 herds and all return a negative test you can state that ## you are 95\% confident that the disease is absent from the study area. ## The sensitivity of this testing regime is 99\%. ## EXAMPLE 3: ## You want to document the absence of Mycoplasma from a 200-sow pig herd. ## Based on your experience and the literature, a minimum of 20\% of sows ## would have seroconverted if Mycoplasma were present in the herd. How ## many herds should we sample to be 95\% certain that Mycoplasma would ## be detected if it is present if you use a test with perfect sensitivity? rsu.sssep.rs(N = 200, pstar = 0.20, se.p = 0.95, se.u = 1.00) ## If you test 15 sows and all of them test negative you can be 95\% ## confident that the prevalence rate of Mycoplasma in the herd is less than ## 20\%. } \keyword{methods} epiR/man/epi.ssequc.Rd0000644000176200001440000001512714164036763014312 0ustar liggesusers\name{epi.ssequc} \alias{epi.ssequc} \title{ Sample size for a parallel equivalence trial, continuous outcome } \description{ Sample size for a parallel equivalence trial, continuous outcome. } \usage{ epi.ssequc(treat, control, sd, delta, n, r = 1, power, nfractional = FALSE, alpha) } \arguments{ \item{treat}{the expected mean of the outcome of interest in the treatment group.} \item{control}{the expected mean of the outcome of interest in the control group.} \item{sd}{the expected population standard deviation of the outcome of interest.} \item{delta}{the equivalence limit, expressed as the absolute change in the outcome of interest that represents a clinically meaningful difference. For an equivalence trial the value entered for \code{delta} must be greater than zero.} \item{n}{scalar, the total number of study subjects in the trial.} \item{r}{scalar, the number in the treatment group divided by the number in the control group.} \item{power}{scalar, the required study power.} \item{nfractional}{logical, return fractional sample size.} \item{alpha}{scalar, defining the desired alpha level.} } \value{ A list containing the following: \item{n.total}{the total number of study subjects required.} \item{n.treat}{the required number of study subject in the treatment group.} \item{n.control}{the required number of study subject in the control group.} \item{delta}{the equivalence limit, as entered by the user.} \item{power}{the specified or calculated study power.} } \references{ Bennett JE, Dismukes WE, Duma RJ, Medoff G, Sande MA, Gallis H, Leonard J, Fields BT, Bradshaw M, Haywood H, McGee Z, Cate TR, Cobbs CG, Warner JF and Alling DW (1979). A comparison of amphotericin B alone and combined flucytosine in the treatment of cryptococcal meningitis. New England Journal of Medicine 301: 126 - 131. Chow S, Shao J, Wang H (2008). Sample Size Calculations in Clinical Research. Chapman & Hall/CRC Biostatistics Series, pp. 91. Ewald B (2013). Making sense of equivalence and non-inferiority trials. Australian Prescriber 36: 170 - 173. Julious SA (2004). Sample sizes for clinical trials with normal data. Statistics in Medicine 23: 1921 - 1986. Julious SA (2009). Estimating Samples Sizes in Clinical Trials. CRC, New York. Machin D, Campbell MJ, Tan SB, Tan SH (2009). Sample Size Tables for Clinical Studies. Wiley Blackwell, New York. Wang B, Wang H, Tu X, Feng C (2017). Comparisons of superiority, non-inferiority, and equivalence trials. Shanghai Archives of Psychiatry 29, 385 - 388. DOI: 10.11919/j.issn.1002-0829.217163. } \note{ Consider a clinical trial comparing two groups, a standard treatment (\eqn{s}) and a new treatment (\eqn{n}). In each group, the mean of the outcome of interest for subjects receiving the standard treatment is \eqn{N_{s}} and the mean of the outcome of interest for subjects receiving the new treatment is \eqn{N_{n}}. We specify the absolute value of the maximum acceptable difference between \eqn{N_{n}} and \eqn{N_{s}} as \eqn{\delta}. For a superiority trial the value entered for \code{delta} must be greater than or equal to zero. For an equivalence trial the null hypothesis is: \eqn{H_{0}: |N_{s} - N_{n}| \ge \delta} The alternative hypothesis is: \eqn{H_{1}: |N_{s} - N_{n}| < \delta} An equivalence trial is used if want to prove that two treatments produce the same clinical outcomes. In bioequivalence trials, a 90\% confidence interval is often used. The value of the maximum acceptable difference \eqn{\delta} is chosen so that a patient will not detect any change in effect when replacing the standard treatment with the new treatment. Note that when: \deqn{sign(P_n - P_s - \delta) \neq sign(z_{1-\alpha} + z_{1-\beta})} there is no solution for study power. For typical values of \eqn{\alpha} and \eqn{\beta} this would occur if \eqn{P_{n} - P_{s} - \delta < 0}. That is, when the targeted alternative is within the null hypothesis. The function issues a warning if these conditions are met. When calculating the power of a study, the argument \code{n} refers to the total study size (that is, the number of subjects in the treatment group plus the number in the control group). For a comparison of the key features of superiority, equivalence and non-inferiority trials, refer to the documentation for \code{\link{epi.ssequb}}. } \examples{ ## EXAMPLE 1 (from Machin, Campbell, Tan and Tan 2009 p. 113): ## It is anticipated that patients on a particular drug have a mean diastolic ## blood pressure of 96 mmHg, as against 94 mmHg on an alternative. It is also ## anticipated that the standard deviation of diastolic BP is approximately ## 8 mmHg. If one wishes to confirm that the difference is likely to be less ## than 5 mmHg, that is, one wishes to show equivalence, how many patients ## are needed to be enrolled in the trial? Assume 80\% power and ## 95\% significance. epi.ssequc(treat = 94, control = 96, sd = 8, delta = 5, n = NA, r = 1, power = 0.80, nfractional = FALSE, alpha = 0.05) ## A total of 244 subjects need to be enrolled in the trial, 122 in the ## treatment group and 122 in the control group. ## EXAMPLE 2 (from Chow S, Shao J, Wang H 2008, p. 64): ## A pharmaceutical company is interested in conducting a clinical trial ## to compare two cholesterol lowering agents for treatment of patients with ## congestive heart disease using a parallel design. The primary efficacy ## parameter is the LDL. In what follows, we will consider the situation ## where the intended trial is for testing equivalence of mean responses ## in LDL. Assume that 80\% power is required at a 5\% level of significance. ## In this example, we assume a 5 unit (i.e., delta = 5) change of LDL is ## considered of clinically meaningful difference. Assume the standard ## of LDL is 10 units and the LDL concentration in the treatment group is 20 ## units and the LDL concentration in the control group is 21 units. epi.ssequc(treat = 20, control = 21, sd = 10, delta = 5, n = NA, r = 1, power = 0.80, nfractional = FALSE, alpha = 0.05) ## A total of 216 subjects need to be enrolled in the trial, 108 in the ## treatment group and 108 in the control group. ## EXAMPLE 2 (cont.): ## Suppose only 150 subjects were enrolled in the trial, 75 in the treatment ## group and 75 in the control group. What is the estimated study power? epi.ssequc(treat = 20, control = 21, sd = 10, delta = 5, n = 150, r = 1, power = NA, nfractional = FALSE, alpha = 0.05) ## With only 150 subjects enrolled the estimated study power is 0.58. } \keyword{univar} epiR/man/epi.epidural.Rd0000644000176200001440000000252713117711412014600 0ustar liggesusers\name{epi.epidural} \docType{data} \alias{epi.epidural} \title{Rates of use of epidural anaesthesia in trials of caregiver support} \description{ This data set provides results of six trials investigating rates of use of epidural anaesthesia during childbirth. Each trial is made up of a group where a caregiver (midwife, nurse) provided support intervention and a group where standard care was provided. The objective was to determine if there were higher rates of epidural use when a caregiver was present at birth. } \usage{data(epi.epidural)} \format{ A data frame with 6 observations on the following 5 variables. \describe{ \item{trial}{the name and year of the trial.} \item{ev.trt}{number of births in the caregiver group where an epidural was used.} \item{n.trt}{number of births in the caregiver group.} \item{ev.ctrl}{number of births in the standard care group where an epidural was used.} \item{n.ctrl}{number of births in the standard care group.} } } \references{ Deeks JJ, Altman DG, Bradburn MJ (2001). Statistical methods for examining heterogeneity and combining results from several studies in meta-analysis. In: Egger M, Davey Smith G, Altman D (eds). Systematic Review in Health Care Meta-Analysis in Context. British Medical Journal, London, pp. 291 - 299. } \keyword{datasets} epiR/man/epi.cpresids.Rd0000644000176200001440000000361314164036762014617 0ustar liggesusers\name{epi.cpresids} \alias{epi.cpresids} \title{ Covariate pattern residuals from a logistic regression model } \description{ Returns covariate pattern residuals and delta betas from a logistic regression model. } \usage{ epi.cpresids(obs, fit, covpattern) } \arguments{ \item{obs}{a vector of observed values (i.e., counts of `successes') for each covariate pattern).} \item{fit}{a vector defining the predicted (i.e., fitted) probability of success for each covariate pattern.} \item{covpattern}{a \code{\link{epi.cp}} object.} } \value{ A data frame with 13 elements: \code{cpid} the covariate pattern identifier, \code{n} the number of subjects in this covariate pattern, \code{obs} the observed number of successes, \code{pred} the predicted number of successes, \code{raw} the raw residuals, \code{sraw} the standardised raw residuals, \code{pearson} the Pearson residuals, \code{spearson} the standardised Pearson residuals, \code{deviance} the deviance residuals, \code{leverage} leverage, \code{deltabeta} the delta-betas, \code{sdeltabeta} the standardised delta-betas, and \code{deltachi} delta chi statistics. } \references{ Hosmer DW, Lemeshow S (1989). Applied Logistic Regression. John Wiley & Sons, New York, USA, pp. 137 - 138. } \seealso{ \code{\link{epi.cp}} } \examples{ ## EXAMPLE 1: dat.glm01 <- glm(case ~ spontaneous + induced, family = binomial(), data = infert) ## Covariate patterns: dat.mf01 <- model.frame(dat.glm01) dat.cp01 <- epi.cp(dat.mf01[-1]) dat.obs01 <- as.vector(by(infert$case, as.factor(dat.cp01$id), FUN = sum)) dat.fit01 <- as.vector(by(fitted(dat.glm01), as.factor(dat.cp01$id), FUN = min)) dat.cpr01 <- epi.cpresids(obs = dat.obs01, fit = dat.fit01, covpattern = dat.cp01) head(dat.cpr01) } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/rsu.sep.rb2st.Rd0000644000176200001440000001337614011615550014655 0ustar liggesusers\name{rsu.sep.rb2st} \alias{rsu.sep.rb2st} \title{ Surveillance system sensitivity assuming risk based, two-stage sampling } \description{ Calculates the surveillance system sensitivity for detection of disease assuming risk based, two-stage sampling (sampling of clusters and sampling of units within clusters), imperfect test sensitivity and perfect test specificity. The method allows for a single risk factor at each stage. } \usage{ rsu.sep.rb2st(H = NA, N = NA, n, rr.c, ppr.c, pstar.c, rr.u, ppr.u, pstar.u, rg, se.u) } \arguments{ \item{H}{scalar, integer representing the total number of clusters in the population. Use \code{NA} if unknown.} \item{N}{vector, integer representing the number of surveillance units within each cluster. Use \code{NA} if unknown.} \item{n}{vector, integer representing the number of surveillance units tested within each cluster.} \item{rr.c}{cluster level relative risks (vector of length corresponding to the number of risk strata), use \code{rr.c = c(1,1)} if a risk factor does not apply.} \item{ppr.c}{vector listing the cluster level population proportions for each risk category. Use \code{NA} if there are no cluster level risk factors.} \item{pstar.c}{scalar, numeric (0 to 1) the cluster-level design prevalence.} \item{rr.u}{surveillance unit level relative risks (vector of length corresponding to the number of risk strata), \code{use rr.u = c(1,1)} if a risk factor does not apply.} \item{ppr.u}{matrix providing the surveillance unit level population proportions for each risk group. One row for each cluster, columns = unit level risk groups, not required if \code{N} is provided.} \item{pstar.u}{scalar, numeric (0 to 1) the unit-level design prevalence.} \item{rg}{vector, listing the risk group (index) for each cluster.} \item{se.u}{scalar, numeric (0 to 1), representing the sensitivity of the diagnostic test at the individual surveillance unit level.} } \value{ A list comprised of: \item{se.p}{the surveillance system (population-level) sensitivity of detection.} \item{se.c}{the cluster-level sensitivity of detection.} } \examples{ ## EXAMPLE 1: ## You have been asked to provide an assessment of a surveillance program ## for Actinobacillus hyopneumoniae in pigs. It is known that there are ## high risk and low risk areas for A. hypopneumoniae in your country with ## the estimated probability of disease in the high risk area thought to ## be around 3.5 times that of the probability of disease in the low risk area. ## It is known that 10\% of the 1784 pig herds in the study area are in the ## high risk area and 90\% are in the low risk area. ## The risk of A. hypopneumoniae is dependent on age, with adult pigs around ## five times more likely to be A. hypopneumoniae positive compared with ## younger (grower) pigs. ## Pigs from 20 herds have been sampled: 5 from the low-risk area and 15 from ## the high-risk area. All of the tested pigs were adults: no grower pigs ## were tested. ## The ELISA for A. hypopneumoniae in pigs has a diagnostic sensitivity ## of 0.95. ## What is the surveillance system sensitivity if we assume a design ## prevalence of 1 per 100 at the cluster (herd) level and 5 per 100 ## at the surveillance system unit (pig) level? # There are 1784 herds in the study area: H <- 1784 # Twenty of the 1784 herds are sampled. Generate 20 herds of varying size: set.seed(1234) hsize <- rlnorm(n = 20, meanlog = log(10), sdlog = log(8)) hsize <- round(hsize + 20, digits = 0) # Generate a matrix listing the number of growers and finishers in each of ## the 20 sampled herds. Anywhere between 80\% and 95\% of the animals in ## each herd are growers: set.seed(1234) pctg <- runif(n = 20, min = 0.80, max = 0.95) ngrow <- round(pctg * hsize, digits = 0) nfini <- hsize - ngrow N <- cbind(ngrow, nfini) # Generate a matrix listing the number of grower and finisher pigs sampled ## from each herd: nsgrow <- rep(0, times = 20) nsfini <- ifelse(nfini <= 15, nfini, 15) n <- cbind(nsgrow, nsfini) # The herd-level design prevalence is 0.01 and the individual pig-level design ## prevalence is 0.05: pstar.c <- 0.01 pstar.u <- 0.05 # For herds in the high-risk area the probability being A. hyopneumoniae ## positive is 3.5 times that of herds in the low-risk area. Ninety ## percent of herds are in the low risk area and 10\% are in the high risk area: rr.c <- c(1,3.5) ppr.c <- c(0.9,0.1) ## We've sampled 5 herds from the low risk area and 15 herds from the ## high risk area: rg <- c(rep(1, times = 5), rep(2, times = 15)) ## For finishers the probability being A. hyopneumoniae positive is 5 times ## that of growers: rr.u <- c(1,5) ## The diagnostic sensitivity of the A. hyopneumoniae ELISA is 0.95: se.u <- 0.95 rsu.sep.rb2st(H = H, N = N, n = n, pstar.c = pstar.c, pstar.u = pstar.u, rg = rg, rr.c = rr.c, rr.u = rr.u, ppr.c = ppr.c, ppr.u = NA, se.u = se.u) ## The estimated surveillance system sensitivity of this program is 0.31. ## EXAMPLE 2: ## Repeat these analyses assuming we don't know the total number of pig herds ## in the population and we have only an estimate of the proportions of ## growers and finishers in each herd. ## Generate a matrix listing the proportion of growers and finishers in each ## of the 20 sampled herds: ppr.u <- cbind(rep(0.9, times = 20), rep(0.1, times = 20)) # Set H (the number of clusters) and N (the number of surveillance units ## within each cluster) to NA: rsu.sep.rb2st(H = NA, N = NA, n = n, pstar.c = pstar.c, pstar.u = pstar.u, rg = rg, rr.c = rr.c, rr.u = rr.u, ppr.c = ppr.c, ppr.u = ppr.u, se.u = se.u) ## The estimated surveillance system sensitivity is 0.20. } \keyword{methods} epiR/man/epi.SClip.Rd0000644000176200001440000000420313117711436014004 0ustar liggesusers\name{epi.SClip} \docType{data} \alias{epi.SClip} \title{Lip cancer in Scotland 1975 - 1980} \description{ This data set provides counts of lip cancer diagnoses made in Scottish districts from 1975 to 1980. In addition to district-level counts of disease events and estimates of the size of the population at risk, the data set contains (for each district) an estimate of the percentage of the population involved in outdoor industry (agriculture, fishing, and forestry). It is known that exposure to sunlight is a risk factor for cancer of the lip and high counts are to be expected in districts where there is a high proportion of the workforce involved in outdoor industry. } \usage{data(epi.SClip)} \format{ A data frame with 56 observations on the following 6 variables. \describe{ \item{gridcode}{alternative district identifier.} \item{id}{numeric district identifier (1 to 56).} \item{district}{district name.} \item{cases}{number of lip cancer cases diagnosed 1975 - 1980.} \item{population}{total person years at risk 1975 - 1980.} \item{prop.ag}{percent of the population engaged in outdoor industry.} } } \source{ This data set has been analysed by a number of authors including Clayton and Kaldor (1987), Conlon and Louis (1999), Stern and Cressie (1999), and Carlin and Louis (2000, p 270). } \references{ Clayton D, Kaldor J (1987). Empirical Bayes estimates of age-standardized relative risks for use in disease mapping. Biometrics 43: 671 - 681. Conlon EM, Louis TA (1999). Addressing multiple goals in evaluating region-specific risk using Bayesian methods. In: Lawson AB (Editor), Disease Mapping and Risk Assessment for Public Health. John Wiley & Sons, Ltd, Chichester, pp. 31 - 47. Stern H, Cressie N (1999). Inference in extremes in disease mapping. In: Lawson AB (Editor), Disease Mapping and Risk Assessment for Public Health. John Wiley & Sons, Ltd, Chichester, pp. 63 - 84. Carlin BP, Louis TA (2000). Bayes and Empirical Bayes Methods for Data Analysis - Monographs on Statistics and Applied Probability 69. Chapman and Hall, London, pp. 270. } \keyword{datasets}epiR/man/epi.2by2.Rd0000644000176200001440000007675114164036761013575 0ustar liggesusers\name{epi.2by2} \alias{epi.2by2} \alias{print.epi.2by2} \alias{summary.epi.2by2} \title{ Summary measures for count data presented in a 2 by 2 table } \description{ Computes summary measures of risk and a chi-squared test for difference in the observed proportions from count data presented in a 2 by 2 table. With multiple strata the function returns crude and Mantel-Haenszel adjusted measures of association and chi-squared tests of homogeneity. } \usage{ epi.2by2(dat, method = "cohort.count", digits = 2, conf.level = 0.95, units = 100, interpret = FALSE, outcome = "as.columns") \method{print}{epi.2by2}(x, ...) \method{summary}{epi.2by2}(object, ...) } \arguments{ \item{dat}{a vector of length four, an object of class \code{table} or an object of class \code{grouped_df} from package \code{dplyr} containing the individual cell frequencies (see below).} \item{method}{a character string indicating the study design on which the tabular data has been based. Options are \code{cohort.count}, \code{cohort.time}, \code{case.control}, or \code{cross.sectional}. Based on the study design specified by the user, appropriate measures of association, measures of effect in the exposed and measures of effect in the population are returned by the function.} \item{digits}{scalar, number of digits to be reported for \code{print} output. Must be an integer of either 2, 3 or 4.} \item{conf.level}{magnitude of the returned confidence intervals. Must be a single number between 0 and 1.} \item{units}{multiplier for prevalence and incidence (risk or rate) estimates.} \item{interpret}{logical. If \code{TRUE} interpretive statements are appended to the \code{print}\code{epi.2by2} object.} \item{outcome}{a character string indicating how the outcome variable is represented in the contingency table. Options are \code{as.columns} (outcome as columns) or \code{as.rows} (outcome as rows).} \item{x, object}{an object of class \code{epi.2by2}.} \item{...}{Ignored.} } \details{ Where method is \code{cohort.count}, \code{case.control}, or \code{cross.sectional} and \code{outcome = as.columns} the required 2 by 2 table format is: \tabular{llll}{ -----------\tab ----------\tab ---------- \tab ----------\cr \tab Disease + \tab Disease - \tab Total \cr -----------\tab ----------\tab ---------- \tab ----------\cr Expose + \tab a \tab b \tab a+b \cr Expose - \tab c \tab d \tab c+d \cr -----------\tab ----------\tab ---------- \tab ----------\cr Total \tab a+c \tab b+d \tab a+b+c+d \cr -----------\tab ----------\tab ---------- \tab ----------\cr } Where method is \code{cohort.time} and \code{outcome = as.columns} the required 2 by 2 table format is: \tabular{llll}{ -----------\tab ----------\tab ------------- \cr \tab Disease + \tab Time at risk \cr -----------\tab ----------\tab ------------- \cr Expose + \tab a \tab b \cr Expose - \tab c \tab d \cr -----------\tab ----------\tab ------------- \cr Total \tab a+c \tab b+d \cr -----------\tab ----------\tab ------------- \cr } A summary of the methods used for each of the confidence interval calculations in this function is as follows: } \value{ An object of class \code{epi.2by2} comprised of: \item{method}{character string returning the study design specified by the user.} \item{n.strata}{number of strata.} \item{conf.level}{magnitude of the returned confidence intervals.} \item{interp}{logical. Are interpretative statements included?} \item{units}{character string listing the outcome measure units.} \item{tab}{a data frame comprised of of the contingency table data.} \item{massoc.summary}{a data frame listing the computed measures of association, measures of effect in the exposed and measures of effect in the population and their confidence intervals.} \item{massoc.interp}{a data frame listing the interpretive statements for each computed measure of association.} \item{massoc.detail}{a list comprised of the computed measures of association, measures of effect in the exposed and measures of effect in the population. See below for details.} When method equals \code{cohort.count} the following measures of association, measures of effect in the exposed and measures of effect in the population are returned: \item{\code{RR}}{Wald, Taylor and score confidence intervals for the incidence risk ratios for each strata. Wald, Taylort and score confidence intervals for the crude incidence risk ratio. Wald confidence interval for the Mantel-Haenszel adjusted incidence risk ratio.} \item{\code{OR}}{Wald, score, Cornfield and maximum likelihood confidence intervals for the odds ratios for each strata. Wald, score, Cornfield and maximum likelihood confidence intervals for the crude odds ratio. Wald confidence interval for the Mantel-Haenszel adjusted odds ratio.} \item{\code{ARisk}}{Wald and score confidence intervals for the attributable risk (risk difference) for each strata. Wald and score confidence intervals for the crude attributable risk. Wald, Sato and Greenland-Robins confidence intervals for the Mantel-Haenszel adjusted attributable risk.} \item{\code{PARisk}}{Wald and Pirikahu confidence intervals for the population attributable risk for each strata. Wald and Pirikahu confidence intervals for the crude population attributable risk. The Pirikahu confidence intervals are calculated using the delta method.} \item{\code{AFRisk}}{Wald confidence intervals for the attributable fraction for each strata. Wald confidence intervals for the crude attributable fraction.} \item{\code{PAFRisk}}{Wald confidence intervals for the population attributable fraction for each strata. Wald confidence intervals for the crude population attributable fraction.} \item{\code{chisq.strata}}{chi-squared test for difference in exposed and non-exposed proportions for each strata.} \item{\code{chisq.crude}}{chi-squared test for difference in exposed and non-exposed proportions across all strata.} \item{\code{chisq.mh}}{Mantel-Haenszel chi-squared test that the combined odds ratio estimate is equal to 1.} \item{\code{RR.homog}}{Mantel-Haenszel (Woolf) test of homogeneity of the individual strata incidence risk ratios.} \item{\code{OR.homog}}{Mantel-Haenszel (Woolf) test of homogeneity of the individual strata odds ratios.} When method equals \code{cohort.time} the following measures of association and effect are returned: \item{\code{IRR}}{Wald confidence interval for the incidence rate ratios for each strata. Wald confidence interval for the crude incidence rate ratio. Wald confidence interval for the Mantel-Haenszel adjusted incidence rate ratio.} \item{\code{ARate}}{Wald confidence interval for the attributable rate for each strata. Wald confidence interval for the crude attributable rate. Wald confidence interval for the Mantel-Haenszel adjusted attributable rate.} \item{\code{PARate}}{Wald confidence interval for the population attributable rate for each strata. Wald confidence intervals for the crude population attributable rate.} \item{\code{AFRate}}{Wald confidence interval for the attributable fraction for each strata. Wald confidence interval for the crude attributable fraction.} \item{\code{PAFRate}}{Wald confidence interval for the population attributable fraction for each strata. Wald confidence interval for the crude poulation attributable fraction.} \item{\code{chisq.strata}}{chi-squared test for difference in exposed and non-exposed proportions for each strata.} \item{\code{chisq.crude}}{chi-squared test for difference in exposed and non-exposed proportions across all strata.} \item{\code{chisq.mh}}{Mantel-Haenszel chi-squared test that the combined odds ratio estimate is equal to 1.} When method equals \code{case.control} the following measures of association and effect are returned: \item{\code{OR}}{Wald, score, Cornfield and maximum likelihood confidence intervals for the odds ratios for each strata. Wald, score, Cornfield and maximum likelihood confidence intervals for the crude odds ratio. Wald confidence interval for the Mantel-Haenszel adjusted odds ratio.} \item{\code{ARisk}}{Wald and score confidence intervals for the attributable risk for each strata. Wald and score confidence intervals for the crude attributable risk. Wald, Sato and Greenland-Robins confidence intervals for the Mantel-Haenszel adjusted attributable risk.} \item{\code{PARisk}}{Wald and Pirikahu confidence intervals for the population attributable risk for each strata. Wald and Pirikahu confidence intervals for the crude population attributable risk.} \item{\code{AFest}}{Wald confidence intervals for the estimated attributable fraction for each strata. Wald confidence intervals for the crude estimated attributable fraction.} \item{\code{PAFest}}{Wald confidence intervals for the population estimated attributable fraction for each strata. Wald confidence intervals for the crude population estimated attributable fraction.} \item{\code{chisq.strata}}{chi-squared test for difference in exposed and non-exposed proportions for each strata.} \item{\code{chisq.crude}}{chi-squared test for difference in exposed and non-exposed proportions across all strata.} \item{\code{chisq.mh}}{Mantel-Haenszel chi-squared test that the combined odds ratio estimate is equal to 1.} \item{\code{OR.homog}}{Mantel-Haenszel (Woolf) test of homogeneity of the individual strata odds ratios.} When method equals \code{cross.sectional} the following measures of association and effect are returned: \item{\code{PR}}{Wald, Taylor and score confidence intervals for the prevalence ratios for each strata. Wald, Taylor and score confidence intervals for the crude prevalence ratio. Wald confidence interval for the Mantel-Haenszel adjusted prevalence ratio.} \item{\code{OR}}{Wald, score, Cornfield and maximum likelihood confidence intervals for the odds ratios for each strata. Wald, score, Cornfield and maximum likelihood confidence intervals for the crude odds ratio. Wald confidence interval for the Mantel-Haenszel adjusted odds ratio.} \item{\code{ARisk}}{Wald and score confidence intervals for the attributable risk for each strata. Wald and score confidence intervals for the crude attributable risk. Wald, Sato and Greenland-Robins confidence intervals for the Mantel-Haenszel adjusted attributable risk.} \item{\code{PARisk}}{Wald and Pirikahu confidence intervals for the population attributable risk for each strata. Wald and Pirikahu confidence intervals for the crude population attributable risk.} \item{\code{AFRisk}}{Wald confidence intervals for the attributable fraction for each strata. Wald confidence intervals for the crude attributable fraction.} \item{\code{PAFRisk}}{Wald confidence intervals for the population attributable fraction for each strata. Wald confidence intervals for the crude population attributable fraction.} \item{\code{chisq.strata}}{chi-squared test for difference in exposed and non-exposed proportions for each strata.} \item{\code{chisq.crude}}{chi-squared test for difference in exposed and non-exposed proportions across all strata.} \item{\code{chisq.mh}}{Mantel-Haenszel chi-squared test that the combined odds ratio estimate is equal to 1.} \item{\code{PR.homog}}{Mantel-Haenszel (Woolf) test of homogeneity of the individual strata prevalence ratios.} \item{\code{OR.homog}}{Mantel-Haenszel (Woolf) test of homogeneity of the individual strata odds ratios.} The point estimates of the \code{wald}, \code{score} and \code{cfield} odds ratios are calculated using the cross product method. Method \code{mle} computes the conditional maximum likelihood estimate of the odds ratio. Confidence intervals for the Cornfield (\code{cfield}) odds ratios are computed using the hypergeometric distribution and computation times are slow when the cell frequencies are large. For this reason, Cornfield confidence intervals are only calculated if the total number of event frequencies is less than 500. Maximum likelihood estimates of the odds ratio and Fisher's exact test are only calculated when the total number of observations is less than 2E09. If the Haldane-Anscombe (Haldane 1940, Anscombe 1956) correction is applied (i.e., addition of 0.5 to each cell of the 2 by 2 table when at least one of the cell frequencies is zero) Cornfield (\code{cfield}) odds ratios are not computed. Variable \code{phi.coef} equals the phi coefficient (Fleiss et al. 2003, Equation 6.2, p. 98) and is included with the output for each of the uncorrected chi-squared tests. This value can be used for argument \code{rho.cc} in \code{epi.sscc}. Refer to the documentation for \code{\link{epi.sscc}} for details. The Mantel-Haenszel chi-squared test that the combined odds ratio estimate is equal to 1 uses a two-sided test without continuity correction. Intepretive statements for the number needed to treat to benefit (NNTB) and number needed to treat to harm (NNTH) follow the approach described by Altman (1998). See the examples for details. } \references{ Altman D (1998). British Medical Journal 317, 1309 - 1312. Altman D, Machin D, Bryant T, Gardner M (2000). Statistics with Confidence. British Medical Journal, London, pp. 69. Anscombe F (1956). On estimating binomial response relations. Biometrika 43, 461 - 464. Cornfield, J (1956). A statistical problem arising from retrospective studies. In: Proceedings of the Third Berkeley Symposium on Mathematical Statistics and Probability, University of California Press, Berkeley California 4: 135 - 148. Elwood JM (2007). Critical Appraisal of Epidemiological Studies and Clinical Trials. Oxford University Press, London. Feinstein AR (2002). Principles of Medical Statistics. Chapman Hall/CRC, London, pp. 332 - 336. Fisher RA (1962). Confidence limits for a cross-product ratio. Australian Journal of Statistics 4: 41. Feychting M, Osterlund B, Ahlbom A (1998). Reduced cancer incidence among the blind. Epidemiology 9: 490 - 494. Fleiss JL, Levin B, Paik MC (2003). Statistical Methods for Rates and Proportions. John Wiley and Sons, New York. Haldane J (1940). The mean and variance of the moments of chi square, when used as a test of homogeneity, when expectations are small. Biometrika 29, 133 - 143. Hanley JA (2001). A heuristic approach to the formulas for population attributable fraction. Journal of Epidemiology and Community Health 55: 508 - 514. Hightower AW, Orenstein WA, Martin SM (1988) Recommendations for the use of Taylor series confidence intervals for estimates of vaccine efficacy. Bulletin of the World Health Organization 66: 99 - 105. Jewell NP (2004). Statistics for Epidemiology. Chapman & Hall/CRC, London, pp. 84 - 85. Juul S (2004). Epidemiologi og evidens. Munksgaard, Copenhagen. Kirkwood BR, Sterne JAC (2003). Essential Medical Statistics. Blackwell Science, Malden, MA, USA. Klingenberg B (2014). A new and improved confidence interval for the Mantel-Haenszel risk difference. Statistics in Medicine 33: 2968 - 2983. Lancaster H (1961) Significance tests in discrete distributions. Journal of the American Statistical Association 56: 223 - 234. Lash TL, VanderWeele TJ, Haneuse S, Rothman KJ (2021). Modern Epidemiology. Lippincott - Raven Philadelphia, USA, pp. 79 - 103. Lawson R (2004). Small sample confidence intervals for the odds ratio. Communications in Statistics Simulation and Computation 33: 1095 - 1113. Martin SW, Meek AH, Willeberg P (1987). Veterinary Epidemiology Principles and Methods. Iowa State University Press, Ames, Iowa, pp. 130. McNutt L, Wu C, Xue X, Hafner JP (2003). Estimating the relative risk in cohort studies and clinical trials of common outcomes. American Journal of Epidemiology 157: 940 - 943. Miettinen OS, Nurminen M (1985). Comparative analysis of two rates. Statistics in Medicine 4: 213 - 226. Pirikahu S (2014). Confidence Intervals for Population Attributable Risk. Unpublished MSc thesis. Massey University, Palmerston North, New Zealand. Robbins AS, Chao SY, Fonesca VP (2002). What's the relative risk? A method to directly estimate risk ratios in cohort studies of common outcomes. Annals of Epidemiology 12: 452 - 454. Sullivan KM, Dean A, Soe MM (2009). OpenEpi: A Web-based Epidemiologic and Statistical Calculator for Public Health. Public Health Reports 124: 471 - 474. Wald A (1943). Tests of statistical hypotheses concerning several parameters when the number of observations is large. Transactions of the American Mathematical Society 54: 426 - 482. Willeberg P (1977). Animal disease information processing: Epidemiologic analyses of the feline urologic syndrome. Acta Veterinaria Scandinavica. Suppl. 64: 1 - 48. Woodward M (2014). Epidemiology Study Design and Data Analysis. Chapman & Hall/CRC, New York, pp. 89 - 124. Zhang J, Yu KF (1998). What's the relative risk? A method for correcting the odds ratio in cohort studies of common outcomes. Journal of the American Medical Association 280: 1690 - 1691. } \author{ Mark Stevenson (Faculty of Veterinary and Agricultural Sciences, The University of Melbourne, Australia), Cord Heuer (EpiCentre, IVABS, Massey University, Palmerston North, New Zealand), Jim Robison-Cox (Department of Math Sciences, Montana State University, Montana, USA), Kazuki Yoshida (Brigham and Women's Hospital, Boston Massachusetts, USA) and Simon Firestone (Faculty of Veterinary and Agricultural Sciences, The University of Melbourne, Australia). Thanks to Ian Dohoo for numerous helpful suggestions to improve the documentation for this function. } \note{Measures of association include the prevalence ratio, the incidence risk ratio, the incidence rate ratio and the odds ratio. The incidence risk ratio is the ratio of the incidence risk of disease in the exposed group to the incidence risk of disease in the unexposed group. The odds ratio (also known as the cross-product ratio) is an estimate of the incidence risk ratio. When the incidence of an outcome in the study population is low (say, less than 5\%) the odds ratio will provide a reliable estimate of the incidence risk ratio. The more frequent the outcome becomes, the more the odds ratio will overestimate the incidence risk ratio when it is greater than than 1 or understimate the incidence risk ratio when it is less than 1. Measures of effect in the exposed include the attributable risk (or prevalence) and the attributable fraction. The attributable risk is the risk of disease in the exposed group minus the risk of disease in the unexposed group. The attributable risk provides a measure of the absolute increase or decrease in risk associated with exposure. The attributable fraction is the proportion of study outcomes in the exposed group that is attributable to exposure. Measures of effect in the population include the population attributable risk (or prevalence) and the population attributable fraction (also known as the aetiologic fraction). The population attributable risk is the risk of the study outcome in the population that may be attributed to exposure. The population attributable fraction is the proportion of the study outcomes in the population that is attributable to exposure. Point estimates and confidence intervals for the prevalence ratio and incidence risk ratio are calculated using Wald (Wald 1943) and score methods (Miettinen and Nurminen 1985). Point estimates and confidence intervals for the incidence rate ratio are calculated using the exact method described by Kirkwood and Sterne (2003) and Juul (2004). Point estimates and confidence intervals the odds ratio are calculated using Wald (Wald 1943), score (Miettinen and Nurminen 1985) and maximum likelihood methods (Fleiss et al. 2003). Point estimates and confidence intervals for the population attributable risk are calculated using formulae provided by Lash et al (2021) and Pirikahu (2014). Point estimates and confidence intervals for the population attributable fraction are calculated using formulae provided by Jewell (2004, p 84 - 85). Point estimates and confidence intervals for the Mantel-Haenszel adjusted attributable risk are calculated using formulae provided by Klingenberg (2014). Wald confidence intervals are provided in the summary table simply because they are widely used and would be familiar to most users. The Mantel-Haenszel adjusted measures of association are valid when the measures of association across the different strata are similar (homogenous), that is when the test of homogeneity of the odds (risk) ratios is not significant. The Mantel-Haenszel (Woolf) test of homogeneity of the odds ratio are based on Jewell (2004, p 152 - 158). Thanks to Jim Robison-Cox for sharing his implementation of these functions. } \examples{ ## EXAMPLE 1: ## A cross sectional study investigating the relationship between dry cat ## food (DCF) and feline urologic syndrome (FUS) was conducted (Willeberg ## 1977). Counts of individuals in each group were as follows: ## DCF-exposed cats (cases, non-cases) 13, 2163 ## Non DCF-exposed cats (cases, non-cases) 5, 3349 ## Outcome variable (FUS) as columns: dat.v01 <- c(13,2163,5,3349); dat.v01 epi.2by2(dat = dat.v01, method = "cross.sectional", digits = 2, conf.level = 0.95, units = 100, interpret = FALSE, outcome = "as.columns") ## Outcome variable (FUS) as rows: dat.v01 <- c(13,5,2163,3349); dat.v01 epi.2by2(dat = dat.v01, method = "cross.sectional", digits = 2, conf.level = 0.95, units = 100, interpret = FALSE, outcome = "as.rows") ## The prevalence of FUS in DCF exposed cats was 4.01 (95\% CI 1.43 to 11.23) ## times greater than the prevalence of FUS in non-DCF exposed cats. ## In DCF exposed cats, 75\% (95\% CI 30\% to 91\%) of the FUS cases were ## attributable to DCF. ## Fifty-four percent of FUS cases in the population was attributable ## to DCF (95\% CI 4\% to 78\%). ## EXAMPLE 2: ## This example shows how the table function in base R can be used to pass ## data to epi.2by2. Here we use the birthwt data set from the MASS package. library(MASS) dat.df02 <- birthwt; head(dat.df02) ## Generate a table of cell frequencies. First, set the outcome and exposure ## as factors and set their levels appropriately so the frequencies in the ## 2 by 2 table come out in the conventional format: dat.df02$low <- factor(dat.df02$low, levels = c(1,0)) dat.df02$smoke <- factor(dat.df02$smoke, levels = c(1,0)) dat.df02$race <- factor(dat.df02$race, levels = c(1,2,3)) dat.tab02 <- table(dat.df02$smoke, dat.df02$low, dnn = c("Smoke", "Low BW")) print(dat.tab02) ## Compute the odds ratio and other measures of association: epi.2by2(dat = dat.tab02, method = "cohort.count", digits = 2, conf.level = 0.95, units = 100, interpret = FALSE, outcome = "as.columns") ## The odds of having a low birth weight child for smokers was 2.02 ## (95\% CI 1.08 to 3.78) times greater than the odds of having a low birth ## weight child for non-smokers. ## Stratify by race: dat.tab02 <- table(dat.df02$smoke, dat.df02$low, dat.df02$race, dnn = c("Smoke", "Low BW", "Race")) print(dat.tab02) ## Compute the crude odds ratio, the Mantel-Haenszel adjusted odds ratio ## and other measures of association: dat.epi02 <- epi.2by2(dat = dat.tab02, method = "cohort.count", digits = 2, conf.level = 0.95, units = 100, interpret = FALSE, outcome = "as.columns") print(dat.epi02) ## The Mantel-Haenszel test of homogeneity of the strata odds ratios is not ## significant (chi square test statistic 2.800; df 2; p-value = 0.25). ## We accept the null hypothesis and conclude that the odds ratios for ## each strata of race are the same. ## After accounting for the confounding effect of race, the odds of ## having a low birth weight child for smokers was 3.09 (95\% CI 1.49 to 6.39) ## times that of non-smokers. ## Compare the Greenland-Robins confidence intervals for the Mantel-Haenszel ## adjusted attributable risk with the Wald confidence intervals for the ## Mantel-Haenszel adjusted attributable risk: dat.epi02$massoc.detail$ARisk.mh.green dat.epi02$massoc.detail$ARisk.mh.wald ## How many mothers need to stop smoking to avoid one low birth weight baby? dat.epi02$massoc.interp$text[dat.epi02$massoc.interp$var == "NNTB NNTH (crude)"] ## If we don't account for confounding the number of mothers that need to ## stop smoking to avoid one low birth weight baby (NNTB) is ## 7 (95\% CI 3 to 62). dat.epi02$massoc.interp$text[dat.epi02$massoc.interp$var == "NNTB NNTH (M-H)"] ## After accounting for the confounding effect of race the number of mothers ## that need to stop smoking to avoid one low birth weight baby (NNTB) is ## 5 (95\% CI 2 to 71). ## Now turn dat.tab02 into a data frame where the frequencies of individuals in ## each exposure-outcome category are provided. Often your data will be ## presented in this summary format: dat.df02 <- data.frame(dat.tab02); head(dat.df02) ## Re-format dat.df02 (a summary count data frame) into tabular format using ## the xtabs function: dat.tab02 <- xtabs(Freq ~ Smoke + Low.BW + Race, data = dat.df02) print(dat.tab02) # dat02.tab can now be passed to epi.2by2: dat.epi02 <- epi.2by2(dat = dat.tab02, method = "cohort.count", digits = 2, conf.level = 0.95, units = 100, interpret = FALSE, outcome = "as.columns") print(dat.epi02) ## The Mantel-Haenszel adjusted odds ratio is 3.09 (95\% CI 1.49 to 6.39). The ## ratio of the crude odds ratio to the Mantel-Haensel adjusted odds ratio is ## 0.66. ## What are the Cornfield confidence limits, the maximum likelihood ## confidence limits and the score confidence limits for the crude odds ratio? dat.epi02$massoc.detail$OR.crude.cfield dat.epi02$massoc.detail$OR.crude.mle dat.epi02$massoc.detail$OR.crude.score ## Cornfield: 2.02 (95\% CI 1.07 to 3.79) ## Maximum likelihood: 2.01 (1.03 to 3.96) # Score: 2.02 (95\% CI 1.08 to 3.77) ## Plot the individual strata-level odds ratios and compare them with the ## Mantel-Haenszel adjusted odds ratio. \dontrun{ library(ggplot2); library(scales) nstrata <- 1:dim(dat.tab02)[3] strata.lab <- paste("Strata ", nstrata, sep = "") y.at <- c(nstrata, max(nstrata) + 1) y.lab <- c("M-H", strata.lab) x.at <- c(0.25,0.5,1,2,4,8,16,32) or.p <- c(dat.epi02$massoc.detail$OR.mh$est, dat.epi02$massoc.detail$OR.strata.cfield$est) or.l <- c(dat.epi02$massoc.detail$OR.mh$lower, dat.epi02$massoc.detail$OR.strata.cfield$lower) or.u <- c(dat.epi02$massoc.detail$OR.mh$upper, dat.epi02$massoc.detail$OR.strata.cfield$upper) dat.df02 <- data.frame(y.at, y.lab, or.p, or.l, or.u) ggplot(data = dat.df02, aes(x = or.p, y = y.at)) + geom_point() + geom_errorbarh(aes(xmax = or.l, xmin = or.u, height = 0.2)) + labs(x = "Odds ratio", y = "Strata") + scale_x_continuous(trans = log2_trans(), breaks = x.at, limits = c(0.25,32)) + scale_y_continuous(breaks = y.at, labels = y.lab) + geom_vline(xintercept = 1, lwd = 1) + coord_fixed(ratio = 0.75 / 1) + theme(axis.title.y = element_text(vjust = 0)) } ## EXAMPLE 3: ## Sometimes you'll have only event count data for a stratified analysis. This ## example shows how to coerce a three column matrix listing (in order) counts ## of outcome positive individuals, counts of outcome negative individuals (or ## total time at risk, as in the example below) and strata number into a three ## dimensional array. We assume that two rows are recorded for each strata. ## The first for those exposed and the second for those unexposed: dat.m03 <- matrix(c(1308,884,200,190,4325264,13142619,1530342,5586741,1,1,2,2), nrow = 4, ncol = 3, byrow = FALSE) colnames(dat.m03) <- c("obs","tar","grp") dat.df03 <- data.frame(dat.m03) ## Here we use the apply function to coerce the two rows for each strata into ## tabular format. An array is created of with the length of the third ## dimension of the array equal to the number of strata: dat.tab03 <- sapply(1:length(unique(dat.df03$grp)), function(x) as.matrix(dat.df03[dat.df03$grp == x,1:2], ncol = 2, byrow = TRUE), simplify = "array") dat.tab03 epi.2by2(dat = dat.tab03, method = "cohort.time", digits = 2, conf.level = 0.95, units = 1000 * 365.25, interpret = FALSE, outcome = "as.columns") ## The Mantel-Haenszel adjusted incidence rate ratio was 4.49 (95\% CI 4.15 ## to 4.86). ## EXAMPLE 4: ## Same as Example 2 but showing how a 2 by 2 contingency table can be prepared ## using tidyverse: \dontrun{ library(MASS); library(tidyverse) dat.df04 <- birthwt; head(dat.df04) dat.tab04 <- dat.df04 \%>\% mutate(low = factor(low, levels = c(1,0), labels = c("yes","no"))) \%>\% mutate(smoke = factor(smoke, levels = c(1,0), labels = c("yes","no"))) \%>\% mutate(race = factor(race)) \%>\% group_by(race, smoke, low) \%>\% summarise(n = n()) dat.tab04 ## View the data in conventional 2 by 2 table format: pivot_wider(dat.tab04, id_cols = c(race, smoke), names_from = low, values_from = n) dat.epi04 <- epi.2by2(dat = dat.tab04, method = "cohort.count", digits = 2, conf.level = 0.95, units = 100, interpret = FALSE, outcome = "as.columns") dat.epi04 } ## The Mantel-Haenszel test of homogeneity of the strata odds ratios is not ## significant (chi square test statistic 2.800; df 2; p-value = 0.25). ## We accept the null hypothesis and conclude that the odds ratios for ## each strata of race are the same. ## After accounting for the confounding effect of race, the odds of ## having a low birth weight child for smokers was 3.09 (95\% CI 1.49 to 6.39) ## times that of non-smokers. ## EXAMPLE 5: ## A study was conducted by Feychting et al (1998) comparing cancer occurrence ## among the blind with occurrence among those who were not blind but had ## severe visual impairment. From these data we calculate a cancer rate of ## 136/22050 person-years among the blind compared with 1709/127650 person- ## years among those who were visually impaired but not blind. dat.v05 <- c(136,22050,1709,127650) dat.epi05 <- epi.2by2(dat = dat.v05, method = "cohort.time", digits = 2, conf.level = 0.95, units = 1000, interpret = FALSE, outcome = "as.columns") summary(dat.epi05)$massoc.detail$ARate.strata.wald ## The incidence rate of cancer was 7.22 (95\% CI 6.00 to 8.43) cases per ## 1000 person-years less in the blind, compared with those who were not ## blind but had severe visual impairment. round(summary(dat.epi05)$massoc.detail$IRR.strata.wald, digits = 2) ## The incidence rate of cancer in the blind group was less than half that ## of the comparison group (incidence rate ratio 0.46, 95\% CI 0.38 to 0.55). ## EXAMPLE 6: ## A study has been conducted to assess the effect of a new treatment for ## mastitis in dairy cows. Eight herds took part in the study. The following ## data were obtained. The vectors ai, bi, ci and di list (for each herd) the ## number of cows in the E+D+, E+D-, E-D+ and E-D- groups, respectively. \dontrun{ hid <- 1:8 ai <- c(23,10,20,5,14,6,10,3) bi <- c(10,2,1,2,2,2,3,0) ci <- c(3,2,3,2,1,3,3,2) di <- c(6,4,3,2,6,3,1,1) dat.df06 <- data.frame(hid, ai, bi, ci, di) head(dat.df06) ## Re-format data into a format suitable for epi.2by2: hid <- rep(1:8, times = 4) exp <- factor(rep(c(1,1,0,0), each = 8), levels = c(1,0)) out <- factor(rep(c(1,0,1,0), each = 8), levels = c(1,0)) dat.df06 <- data.frame(hid, exp, out, n = c(ai,bi,ci,di)) dat.tab06 <- xtabs(n ~ exp + out + hid, data = dat.df06) print(dat.tab06) epi.2by2(dat = dat.tab06, method = "cohort.count", digits = 2, conf.level = 0.95, units = 1000, interpret = FALSE, outcome = "as.columns") ## The Mantel-Haenszel test of homogeneity of the strata odds ratios is not ## significant (chi square test statistic 5.276; df 7; p-value = 0.63). ## We accept the null hypothesis and conclude that the odds ratios for each ## strata of herd are the same. ## After adjusting for the effect of herd, compared to untreated cows, treatment ## increased the odds of recovery by a factor of 5.97 (95\% CI 2.72 to 13.13). } } \keyword{univar} epiR/man/epi.indirectadj.Rd0000644000176200001440000001335014164036762015262 0ustar liggesusers\name{epi.indirectadj} \alias{epi.indirectadj} \title{Indirectly adjusted incidence risk estimates} \description{ Compute indirectly adjusted incidence risks and standardised mortality (incidence) ratios. } \usage{ epi.indirectadj(obs, pop, std, units, conf.level = 0.95) } \arguments{ \item{obs}{a one column matrix representing the number of observed number of events in each strata. The dimensions of \code{obs} must be named (see the examples, below).} \item{pop}{a matrix representing population size. Rows represent strata (e.g., region); columns represent the levels of the explanatory variable to be adjusted for (e.g., age class, gender). The sum of each row will equal the total population size within each stratum. If there are no covariates \code{pop} will be a one column matrix. The dimensions of the \code{pop} matrix must be named (see the examples, below).} \item{std}{a one row matrix specifying the standard incidence risks to be applied to each level of the covariate to be adjusted for. The length of \code{std} should be one plus the number of covariates to be adjusted for (the additional value represents the incidence risk in the entire population). If there are no explanatory variables to adjust-for \code{std} is a single number representing the incidence risk in the entire population.} \item{units}{multiplier for the incidence risk estimates.} \item{conf.level}{magnitude of the returned confidence interval. Must be a single number between 0 and 1.} } \details{ Indirect standardisation can be performed whenever the stratum-specific incidence risk estimates are either unknown or unreliable. If the stratum-specific incidence risk estimates are known, direct standardisation is preferred. Confidence intervals for the standardised mortality ratio estimates are based on the Poisson distribution (see Breslow and Day 1987, p 69 - 71 for details). } \value{ A list containing the following: \item{crude.strata}{the crude incidence risk estimates for each stratum.} \item{adj.strata}{the indirectly adjusted incidence risk estimates for each stratum.} \item{smr}{the standardised mortality (incidence) ratios for each stratum.} } \references{ Breslow NE, Day NE (1987). Statistical Methods in Cancer Reasearch: Volume II - The Design and Analysis of Cohort Studies. Lyon: International Agency for Cancer Research. Dohoo I, Martin W, Stryhn H (2009). Veterinary Epidemiologic Research. AVC Inc, Charlottetown, Prince Edward Island, Canada, pp. 85 - 89. Lash TL, VanderWeele TJ, Haneuse S, Rothman KJ (2021). Modern Epidemiology. Lippincott - Raven Philadelphia, USA, pp. 75. Sahai H, Khurshid A (1993). Confidence intervals for the mean of a Poisson distribution: A review. Biometrical Journal 35: 857 - 867. Sahai H, Khurshid A (1996). Statistics in Epidemiology. Methods, Techniques and Applications. CRC Press, Baton Roca. } \author{ Thanks to Dr. Telmo Nunes (UISEE/DETSA, Faculdade de Medicina Veterinaria - UTL, Rua Prof. Cid dos Santos, 1300-477 Lisboa Portugal) for details and code for the confidence interval calculations. } \seealso{ \code{\link{epi.directadj}} } \examples{ ## EXAMPLE 1 (without covariates): ## Adapted from Dohoo, Martin and Stryhn (2009). In this example the frequency ## of tuberculosis is expressed as incidence risk (i.e., the number of ## tuberculosis positive herds divided by the size of the herd population at ## risk). In their text Dohoo et al. present the data as incidence rate (the ## number of tuberculosis positive herds per herd-year at risk). ## Data have been collected on the incidence of tuberculosis in two ## areas ("A" and "B"). Provided are the counts of (new) incident cases and ## counts of the herd population at risk. The standard incidence risk for ## the total population is 0.060 (6 cases per 100 herds at risk): obs.m01 <- matrix(data = c(58,130), nrow = 2, byrow = TRUE, dimnames = list(c("A", "B"), "")) pop.m01 <- matrix(data = c(1000,2000), nrow = 2, byrow = TRUE, dimnames = list(c("A", "B"), "")) std.m01 <- 0.060 epi.indirectadj(obs = obs.m01, pop = pop.m01, std = std.m01, units = 100, conf.level = 0.95) ## EXAMPLE 2 (with covariates): ## We now have, for each area, data stratified by herd type (dairy, beef). ## The standard incidence risks for beef herds, dairy herds, and the total ## population are 0.025, 0.085, and 0.060 cases per herd, respectively: obs.m02 <- matrix(data = c(58,130), nrow = 2, byrow = TRUE, dimnames = list(c("A", "B"), "")) pop.m02 <- matrix(data = c(550,450,500,1500), nrow = 2, byrow = TRUE, dimnames = list(c("A", "B"), c("Beef", "Dairy"))) std.m02 <- matrix(data = c(0.025,0.085,0.060), nrow = 1, byrow = TRUE, dimnames = list("", c("Beef", "Dairy", "Total"))) epi.indirectadj(obs = obs.m02, pop = pop.m02, std = std.m02, units = 100, conf.level = 0.95) ## > $crude.strata ## > est lower upper ## > A 5.8 4.404183 7.497845 ## > B 6.5 5.430733 7.718222 ## > $adj.strata ## > est lower upper ## > A 6.692308 5.076923 8.423077 ## > B 5.571429 4.628571 6.557143 ## > $smr.strata ## > obs exp est lower upper ## > A 58 52 1.1153846 0.8461538 1.403846 ## > B 130 140 0.9285714 0.7714286 1.092857 ## The crude incidence risk of tuberculosis in area A was 5.8 ## (95\% CI 4.0 to 7.5) cases per 100 herds at risk. The crude incidence ## risk of tuberculosis in area B was 6.5 (95\% CI 5.4 to 7.7) cases ## per 100 herds at risk. ## The indirectly adjusted incidence risk of tuberculosis in area A was 6.7 ## (95\% CI 5.1 to 8.4) cases per 100 herds at risk. The indirectly ## adjusted incidence risk of tuberculosis in area B was 5.6 ## (95\% CI 4.6 to 6.6) cases per 100 herds at risk. } \keyword{univar} epiR/man/rsu.sep.rspool.Rd0000644000176200001440000000524414164037175015144 0ustar liggesusers\name{rsu.sep.rspool} \alias{rsu.sep.rspool} \title{ Surveillance system sensitivity assuming representative sampling, imperfect pooled sensitivity and perfect pooled specificity } \description{ Calculates the surveillance system (population-level) sensitivity and specificity for detection of disease assuming representative sampling and allowing for imperfect sensitivity and specificity of the pooled test. } \usage{ rsu.sep.rspool(r, k, pstar, pse, psp = 1) } \arguments{ \item{r}{scalar or vector representing the number of pools.} \item{k}{scalar or vector of the same length as \code{r} representing the number of individual units that contribute to each pool (i.e., the pool size).} \item{pstar}{scalar or vector of the same length as \code{r} representing the design prevalence.} \item{pse}{scalar or vector of the same length as \code{r} representing the pool-level sensitivity.} \item{psp}{scalar or vector of the same length as \code{r} representing the pool-level specificity.} } \value{ A list comprised of two elements: \item{se.p}{scalar or vector, the surveillance system (population-level) sensitivity estimates.} \item{sp.p}{scalar or vector, the surveillance system (population-level) specificity estimates.} } \references{ Christensen J, Gardner I (2000). Herd-level interpretation of test results for epidemiologic studies of animal diseases. Preventive Veterinary Medicine 45: 83 - 106. } \examples{ ## EXAMPLE 1: ## To confirm your country's disease freedom status you intend to use a test ## applied at the herd level. The test is expensive so you decide to pool the ## samples taken from individual herds. If you decide to collect 60 pools, ## each comprised of samples from five herds what is the sensitivity of ## disease detection assuming a design prevalence of 0.01 and the sensitivity ## and specificity of the pooled test equals 1.0? rsu.sep.rspool(r = 60, k = 5, pstar = 0.01, pse = 1, psp = 1) ## This testing regime returns a population-level sensitivity of disease ## detection of 0.95. ## EXAMPLE 2: ## Repeat these calculations assuming the sensitivity of the pooled test ## equals 0.90. rsu.sep.rspool(r = 60, k = 5, pstar = 0.01, pse = 0.90, psp = 1) ## If the sensitivity of the pooled test equals 0.90 the population-level ## sensitivity of disease detection is 0.93. How can we improve population- ## level sensitivity? Answer: include more pools in the study. rsu.sep.rspool(r = 70, k = 5, pstar = 0.01, pse = 0.90, psp = 1) ## Testing 70 pools, each comprised of samples from 5 herds returns a ## population-level sensitivity of disease detection of 0.95. } \keyword{methods} epiR/man/epi.ccc.Rd0000644000176200001440000002405714164036761013537 0ustar liggesusers\name{epi.ccc} \alias{epi.ccc} \title{ Concordance correlation coefficient } \description{ Calculates Lin's (1989, 2000) concordance correlation coefficient for agreement on a continuous measure. } \usage{ epi.ccc(x, y, ci = "z-transform", conf.level = 0.95, rep.measure = FALSE, subjectid) } \arguments{ \item{x}{a vector, representing the first set of measurements.} \item{y}{a vector, representing the second set of measurements.} \item{ci}{a character string, indicating the method to be used. Options are \code{z-transform} or \code{asymptotic}.} \item{conf.level}{magnitude of the returned confidence interval. Must be a single number between 0 and 1.} \item{rep.measure}{logical. If \code{TRUE} there are repeated observations across \code{subject}.} \item{subjectid}{a factor providing details of the observer identifier if \code{rep.measure == TRUE}.} } \details{ Computes Lin's (1989, 2000) concordance correlation coefficient for agreement on a continuous measure obtained by two methods. The concordance correlation coefficient combines measures of both precision and accuracy to determine how far the observed data deviate from the line of perfect concordance (that is, the line at 45 degrees on a square scatter plot). Lin's coefficient increases in value as a function of the nearness of the data's reduced major axis to the line of perfect concordance (the accuracy of the data) and of the tightness of the data about its reduced major axis (the precision of the data). Both \code{x} and \code{y} values need to be present for a measurement pair to be included in the analysis. If either or both values are missing (i.e., coded \code{NA}) then the measurement pair is deleted before analysis. } \value{ A list containing the following: \item{rho.c}{the concordance correlation coefficient.} \item{s.shift}{the scale shift.} \item{l.shift}{the location shift.} \item{C.b}{a bias correction factor that measures how far the best-fit line deviates from a line at 45 degrees. No deviation from the 45 degree line occurs when C.b = 1. See Lin (1989, page 258).} \item{blalt}{a data frame with two columns: \code{mean} the mean of each pair of measurements, \code{delta} vector \code{y} minus vector \code{x}.} \item{sblalt}{a data frame listing the average difference between the two sets of measurements, the standard deviation of the difference between the two sets of measurements and the lower and upper confidence limits of the difference between the two sets of measurements. If \code{rep.measure == TRUE} the confidence interval of the difference is adjusted to account for repeated observations across individual subjects.} \item{nmissing}{a count of the number of measurement pairs ignored due to missingness.} } \references{ Bland J, Altman D (1986). Statistical methods for assessing agreement between two methods of clinical measurement. The Lancet 327: 307 - 310. Bland J, Altman D (1999). Measuring agreement in method comparison studies. Statistical Methods in Medical Research 8: 135 - 160. Bland J, Altman D (2007). Agreement between methods of measurement with multiple observations per individual. Journal of Biopharmaceutical Statistics 17: 571 - 582. (Corrects the formula quoted in the 1999 paper). Bradley E, Blackwood L (1989). Comparing paired data: a simultaneous test for means and variances. American Statistician 43: 234 - 235. Burdick RK, Graybill FA (1992). Confidence Intervals on Variance Components. New York: Dekker. Dunn G (2004). Statistical Evaluation of Measurement Errors: Design and Analysis of Reliability Studies. London: Arnold. Euser AM, Dekker FW, le Cessie S (2008). A practical approach to Bland-Altman plots and variation coefficients for log transformed variables. Journal of Clinical Epidemiology 61: 978 - 982. Hsu C (1940). On samples from a normal bivariate population. Annals of Mathematical Statistics 11: 410 - 426. Krippendorff K (1970). Bivariate agreement coefficients for reliability of data. In: Borgatta E, Bohrnstedt G (eds) Sociological Methodology. San Francisco: Jossey-Bass, pp. 139 - 150. Lin L (1989). A concordance correlation coefficient to evaluate reproducibility. Biometrics 45: 255 - 268. Lin L (2000). A note on the concordance correlation coefficient. Biometrics 56: 324 - 325. Pitman E (1939). A note on normal correlation. Biometrika 31: 9 - 12. Rashid M, Stevenson M, Waenga S, Mirams G, Campbell A, Vaughan J, Jabbar A (2018). Comparison of McMaster and FECPAK methods for counting nematode eggs in the faeces of alpacas. Parasites & Vectors 11, 278. DOI: 10.1186/s13071-018-2861-1. Reynolds M, Gregoire T (1991). Comment on Bradley and Blackwood. American Statistician 45: 163 - 164. Snedecor G, Cochran W (1989). Statistical Methods. Ames: Iowa State University Press. } \seealso{ \code{\link[epiR]{epi.occc}} } \examples{ ## EXAMPLE 1: set.seed(seed = 1234) method1 <- rnorm(n = 100, mean = 0, sd = 1) method2 <- method1 + runif(n = 100, min = -0.25, max = 0.25) ## Add some missing values: method1[50] <- NA method2[75] <- NA dat.df01 <- data.frame(method1, method2) rval.ccc01 <- epi.ccc(method1, method2, ci = "z-transform", conf.level = 0.95, rep.measure = FALSE) rval.lab01 <- data.frame(lab = paste("CCC: ", round(rval.ccc01$rho.c[,1], digits = 2), " (95\% CI ", round(rval.ccc01$rho.c[,2], digits = 2), " - ", round(rval.ccc01$rho.c[,3], digits = 2), ")", sep = "")) z <- lm(method2 ~ method1) alpha <- summary(z)$coefficients[1,1] beta <- summary(z)$coefficients[2,1] rval.lm01 <- data.frame(alpha, beta) ## Concordance correlation plot: \dontrun{ library(ggplot2) ggplot(data = dat.df01, aes(x = method1, y = method2)) + geom_point() + geom_abline(intercept = 0, slope = 1) + geom_abline(data = rval.lm01, aes(intercept = alpha, slope = beta), linetype = "dashed") + scale_x_continuous(limits = c(0,3), name = "Method 1") + scale_y_continuous(limits = c(0,3), name = "Method 2") + geom_text(data = rval.lab01, x = 0.5, y = 2.95, label = rval.lab01$lab) + coord_fixed(ratio = 1 / 1) ## In this plot the dashed line represents the line of perfect concordance. ## The solid line represents the reduced major axis. } ## EXAMPLE 2: ## Bland and Altman plot (Figure 2 from Bland and Altman 1986): x <- c(494,395,516,434,476,557,413,442,650,433,417,656,267, 478,178,423,427) y <- c(512,430,520,428,500,600,364,380,658,445,432,626,260, 477,259,350,451) rval.ccc02 <- epi.ccc(x, y, ci = "z-transform", conf.level = 0.95, rep.measure = FALSE) rval.df02 <- data.frame(mean = rval.ccc02$blalt[,1], delta = rval.ccc02$blalt[,2]) \dontrun{ library(ggplot2) ggplot(data = rval.ccc02$blalt, aes(x = mean, y = delta)) + geom_point() + geom_hline(data = rval.ccc02$sblalt, aes(yintercept = lower), linetype = 2) + geom_hline(data = rval.ccc02$sblalt, aes(yintercept = upper), linetype = 2) + geom_hline(data = rval.ccc02$sblalt, aes(yintercept = est), linetype = 1) + scale_x_continuous(limits = c(0,800), name = "Average PEFR by two meters (L/min)") + scale_y_continuous(limits = c(-150,150), name = "Difference in PEFR (L/min)") } ## EXAMPLE 3: ## Setting limits of agreement when your data are skewed. See Euser et al. ## (2008) for details and Rashid et al. (2018) for an applied example. x <- c(0,210,15,90,0,0,15,0,0,0,15,0,15,0,0,0,0,15,0,0,15,135,0,0,15, 120,30,15,30,0,0,5235,780,1275,10515,1635,1905,1830,720,450,225,420, 300,15,285,0,225,525,675,5280,465,270,0,1485,15,420,0,60,0,0,0,750, 570,0) y <- c(0,70,0,0,0,0,35,0,0,0,0,0,0,35,0,0,0,0,0,0,35,35,70,0,0,140,35, 105,0,0,0,1190,385,1190,6930,560,1260,700,840,0,105,385,245,35,105, 0,140,350,350,3640,385,350,0,1505,0,630,70,0,0,140,0,420,490,0) rval.ccc03 <- epi.ccc(x, y, ci = "z-transform", conf.level = 0.95, rep.measure = FALSE) \dontrun{ library(ggplot2) ggplot(data = rval.ccc03$blalt, aes(x = mean, y = delta)) + geom_point() + geom_hline(data = rval.ccc03$sblalt, aes(yintercept = lower), linetype = 2) + geom_hline(data = rval.ccc03$sblalt, aes(yintercept = upper), linetype = 2) + geom_hline(data = rval.ccc03$sblalt, aes(yintercept = est), linetype = 1) + scale_x_continuous(limits = c(0,8000), name = "Average of the two measurements") + scale_y_continuous(limits = c(-8000,8000), name = "Difference in the two measurements") } ## In the above plot the spread of the differences increases with increasing ## mean of the observations. The Bland Altman limits of agreement should be ## calculated on a log scale. logx <- log(x + 50, base = 10) logy <- log(y + 50, base = 10) log10.ccc03 <- epi.ccc(x = logx, y = logy, ci = "z-transform", conf.level = 0.95, rep.measure = FALSE) ## Transform the limits of agreement back to the original scale by taking ## anti-logs. If the limits of agreement for Z = log10(x) are between -a ## and +a, with a = 1.96 * s, the ratio between two measures on the original ## scale is between 10^-a and 10^a. See page 979 of Euser et al. (2008). a <- 1.96 * log10.ccc03$sblalt$delta.sd ## For a given value for the mean Xbar, it can be shown that x - y is between ## -2Xbar(10^a - 1) / (10^a + 1) and +2Xbar(10^a - 1) / (10^a + 1): Xbar = seq(from = 0, to = 8000, by = 100) Xbar.low <- (-2 * Xbar * (10^a - 1)) / (10^a + 1) Xbar.upp <- (+2 * Xbar * (10^a - 1)) / (10^a + 1) limits <- data.frame(mean = Xbar, lower = Xbar.low, upper = Xbar.upp) \dontrun{ library(ggplot2) ggplot(data = rval.ccc03$blalt, aes(x = mean, y = delta)) + geom_point() + geom_line(data = limits, aes(x = mean, y = lower), linetype = 2) + geom_line(data = limits, aes(x = mean, y = upper), linetype = 2) + geom_line(data = limits, aes(x = mean, y = 0), linetype = 1) + scale_x_continuous(limits = c(0,8000), name = "Average of the two measurements") + scale_y_continuous(limits = c(-8000,8000), name = "Difference in the two measurements") } } \keyword{univar} epiR/man/epi.ltd.Rd0000644000176200001440000000512514074732660013566 0ustar liggesusers\name{epi.ltd} \alias{epi.ltd} \title{Lactation to date and standard lactation milk yields } \description{ Calculate lactation to date and standard lactation (that is, 305 or 270 day) milk yields. } \usage{ epi.ltd(dat, std = "305") } \arguments{ \item{dat}{an eight column data frame listing (in order) cow identifier, herd test identifier, lactation number, herd test days in milk, lactation length (\code{NA} if lactation incomplete), herd test milk yield (litres), herd test fat (percent), and herd test protein (percent).} \item{std}{\code{std = "305"} returns 305-day milk volume, fat, and protein yield. \code{std = "270"} returns 270-day milk volume, fat, and protein yield.} } \details{ Lactation to date yields will only be calculated if there are four or more herd test events. } \value{ A data frame with nine elements: \code{ckey} cow identifier, \code{lact} lactation number, \code{llen} lactation length, \code{vltd} milk volume (litres) to last herd test or dry off date (computed on the basis of lactation length, \code{fltd} fat yield (kilograms) to last herd test or dry off date (computed on the basis of lactation length, \code{pltd} protein yield (kilograms) to last herd test or dry off date (computed on the basis of lactation length, \code{vstd} 305-day or 270-day milk volume yield (litres), \code{fstd} 305-day or 270-day milk fat yield (kilograms), and \code{pstd} 305-day or 270-day milk protein yield (kilograms). } \author{ Nicolas Lopez-Villalobos (IVABS, Massey University, Palmerston North New Zealand) and Mark Stevenson (Faculty of Veterinary and Agricultural Sciences, The University of Melbourne, Australia). } \references{ Kirkpatrick M, Lofsvold D, Bulmer M (1990). Analysis of the inheritance, selection and evolution of growth trajectories. Genetics 124: 979 - 993. } \examples{ ## EXAMPLE 1: ## Generate some herd test data: ckey <- rep(1, times = 12) pkey <- 1:12 lact <- rep(1:2, each = 6) dim <- c(25,68,105,145,200,240,30,65,90,130,190,220) llen <- c(280,280,280,280,280,280,NA,NA,NA,NA,NA,NA) vol <- c(18,30,25,22,18,12,20,32,27,24,20,14) fat <- c(4.8,4.3,4.5,4.7,4.8,4.9,4.8,4.3,4.5,4.7,4.8,4.9)/100 pro <- c(3.7,3.5,3.6,3.7,3.8,3.9,3.7,3.5,3.6,3.7,3.8,3.9)/100 dat.df01 <- data.frame(ckey, pkey, lact, dim, llen, vol, fat, pro) ## Lactation to date and 305-day milk, fat, and protein yield: epi.ltd(dat.df01, std = "305") ## Lactation to date and 270-day milk, fat, and protein yield: epi.ltd(dat.df01, std = "270") } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/rsu.pstar.Rd0000644000176200001440000000504714164037175014172 0ustar liggesusers\name{rsu.pstar} \alias{rsu.pstar} \title{ Design prevalence back calculation } \description{ Calculates design prevalence required for given sample size and desired surveillance system (population-level) sensitivity, assuming representative sampling, imperfect test sensitivity and perfect test specificity. } \usage{ rsu.pstar(N = NA, n, se.p, se.u) } \arguments{ \item{N}{scalar or vector, integer representing the population size. Use \code{NA} if unknown.} \item{n}{scalar or vector, integer representing the number of units sampled.} \item{se.p}{scalar or vector of the same length as \code{n} representing the desired surveillance system (population-level) sensitivity.} \item{se.u}{scalar or vector of the same length as \code{n} representing the unit sensitivity.} } \value{ A vector of design prevalence estimates. } \references{ MacDiarmid S (1988). Future options for brucellosis surveillance in New Zealand beef herds. New Zealand Veterinary Journal 36: 39 - 42. Martin S, Shoukri M, Thorburn M (1992). Evaluating the health status of herds based on tests applied to individuals. Preventive Veterinary Medicine 14: 33 - 43. } \examples{ ## EXAMPLE 1: ## In a study to provide evidence that your country is free of a given disease ## a total of 280 individuals are sampled. Assume a desired surveillance system ## sensitivity of 0.95 and an individual unit diagnostic sensitivity of 0.98. ## If all unit tests return a negative result, what is the maximum prevalence ## if disease is actually present in the population (i.e., what is the design ## prevalence)? rsu.pstar(N = NA, n = 280, se.p = 0.95, se.u = 0.98) ## If 280 individuals are sampled and tested and each returns a negative test ## result we can be 95\% confident that the maximum prevalence (if disease is ## actually present in the population) is 0.011. ## EXAMPLE 2: ## In a study to provide evidence disease freedom a total of 30 individuals ## are sampled from a set of cattle herds. Assume cattle herds in the study ## region range from 100 to 5000 cows. As above, assume a desired surveillance ## system sensitivity of 0.95 and an individuals unit diagnostic sensitivity ## of 0.98. If all 30 unit tests return a negative result, what is the expected ## design prevalence for each herd? round(rsu.pstar(N = c(100, 500, 1000, 5000), n = 30, se.p = 0.95, se.u = 0.98), digits = 3) ## The expected herd level design prevalence ranges from 0.086 (for a 100 ## cow herd) to 0.102 (for a 5000 cow herd). } \keyword{methods} epiR/man/epi.sscompb.Rd0000644000176200001440000001277714164200421014445 0ustar liggesusers\name{epi.sscompb} \alias{epi.sscompb} \title{ Sample size, power and minimum detectable risk ratio when comparing binary outcomes } \description{ Sample size, power and minimum detectable risk ratio when comparing binary outcomes. } \usage{ epi.sscompb(treat, control, n, power, r = 1, design = 1, sided.test = 2, nfractional = FALSE, conf.level = 0.95) } \arguments{ \item{treat}{the expected proportion for the treatment group (see below).} \item{control}{the expected proportion for the control group (see below).} \item{n}{scalar, defining the total number of subjects in the study (i.e., the number in the treatment plus the number in the control group).} \item{power}{scalar, the required study power.} \item{r}{scalar, the number in the treatment group divided by the number in the control group.} \item{design}{scalar, the estimated design effect.} \item{sided.test}{use a one- or two-sided test? Use a two-sided test if you wish to evaluate whether or not the outcome proportion in the exposed (treatment) group is greater than or less than the outcome proportion in the unexposed (control) group. Use a one-sided test to evaluate whether or not the outcome proportion in the exposed (treatment) group is greater than the outcome proportion in the unexposed (control) group.} \item{nfractional}{logical, return fractional sample size.} \item{conf.level}{scalar, defining the level of confidence in the computed result.} } \details{ The methodology in this function follows the approach described in Chapter 8 of Woodward (2014), pp. 295 - 329. With this function it is assumed that one of the two proportions is known and we want to test the null hypothesis that the second proportion is equal to the first. Users are referred to the \code{\link{epi.sscohortc}} function which relates to the two-sample problem where neither proportion is known (or assumed, at least). Because there is much more uncertainty in the two sample problem where neither proportion is known, \code{epi.sscohortc} returns much larger sample size estimates. This function (\code{epi.sscompb}) should be used in particular situations such as when a politician claims that at least 90\% of the population use seatbelts and we want to see if the data supports this claim. } \value{ A list containing the following: \item{n.total}{the total number of subjects required for the specified level of confidence and power, respecting the requirement for \code{r} times as many individuals in the treatment group compared with the control group.} \item{n.treat}{the total number of subjects in the treatment group for the specified level of confidence and power, respecting the requirement for \code{r} times as many individuals in the treatment group compared with the control group.} \item{n.control}{the total number of subjects in the control group for the specified level of confidence and power, respecting the requirement for \code{r} times as many individuals in the treatment group compared with the control group.} \item{power}{the power of the study given the number of study subjects, the expected effect size and level of confidence.} \item{lambda}{the proportion in the treatment group divided by the proportion in the control group (a risk ratio).} } \references{ Fleiss JL (1981). Statistical Methods for Rates and Proportions. Wiley, New York. Kelsey JL, Thompson WD, Evans AS (1986). Methods in Observational Epidemiology. Oxford University Press, London, pp. 254 - 284. Woodward M (2014). Epidemiology Study Design and Data Analysis. Chapman & Hall/CRC, New York, pp. 295 - 329. } \note{ The power of a study is its ability to demonstrate the presence of an association, given that an association actually exists. Values need to be entered for \code{control}, \code{n}, and \code{power} to return a value for \code{lambda}. In this situation, the lower value of lambda represents the maximum detectable risk ratio that is less than 1; the upper value of lambda represents the minimum detectable risk ratio greater than 1. See the documentation for \code{\link{epi.sscohortc}} for an example using the \code{design} facility implemented in this function. } \examples{ ## EXAMPLE 1 (from Woodward 2014 Example 8.12 p. 312): ## A government initiative has decided to reduce the prevalence of male ## smoking to, at most, 30\%. A sample survey is planned to test, at the ## 0.05 level, the hypothesis that the percentage of smokers in the male ## population is 30\% against the one-sided alternative that it is greater. ## The survey should be able to find a prevalence of 32\%, when it is true, ## with 0.90 power. How many men need to be sampled? epi.sscompb(treat = 0.30, control = 0.32, n = NA, power = 0.90, r = 1, design = 1, sided.test = 1, nfractional = FALSE, conf.level = 0.95) ## A total of 4568 men should be sampled: 2284 in the treatment group and ## 2284 in the control group. The risk ratio (that is, the prevalence of ## smoking in males post government initiative divided by the prevalence of ## smoking in males pre inititative) is 0.94. ## EXAMPLE 2: ## If we sample only 2000 men (1000 in the treatment group and 1000 in the ## control group) what is the maximum detectable risk ratio that is less ## than 1? epi.sscompb(treat = NA, control = 0.32, n = 1000, power = 0.90, r = 1, design = 1, sided.test = 1, nfractional = FALSE, conf.level = 0.95) ## If we sample only 10,000 men the maximum detectable risk ratio will be 0.88. } \keyword{univar} epiR/man/epi.ssstrataestb.Rd0000644000176200001440000000546514075464152015533 0ustar liggesusers\name{epi.ssstrataestb} \alias{epi.ssstrataestb} \title{Sample size to estimate a binary outcome using stratified random sampling } \description{ Sample size to estimate a binary outcome using stratified random sampling. } \usage{ epi.ssstrataestb(strata.n, strata.Py, epsilon, error = "relative", nfractional = FALSE, conf.level = 0.95) } \arguments{ \item{strata.n}{vector of integers, the number of individual listing units in each strata.} \item{strata.Py}{vector of numbers, the expected proportion of individual listing units with the outcome of interest for each strata.} \item{epsilon}{scalar number, the maximum difference between the estimate and the unknown population value expressed in absolute or relative terms.} \item{error}{character string. Options are \code{absolute} for absolute error and \code{relative} for relative error.} \item{nfractional}{logical, return fractional sample size.} \item{conf.level}{scalar number, the level of confidence in the computed result.} } \value{ A list containing the following: \item{strata.sample}{the estimated sample size for each strata.} \item{strata.total}{the estimated total size.} \item{strata.stats}{\code{mean} the mean across all strata, \code{sigma.bx} the among-strata variance, \code{sigma.wx} the within-strata variance, and \code{sigma.x} the among-strata variance plus the within-strata variance, \code{rel.var} the within-strata variance divided by the square of the mean, and \code{gamma} the ratio of among-strata variance to within-strata variance.} } \references{ Levy PS, Lemeshow S (1999). Sampling of Populations Methods and Applications. Wiley Series in Probability and Statistics, London, pp. 175 - 179. } \author{ Mark Stevenson (Faculty of Veterinary and Agricultural Sciences, The University of Melbourne, Australia). Javier Sanchez (Atlantic Veterinary College, University of Prince Edward Island, Charlottetown Prince Edward Island, C1A 4P3, Canada). } \examples{ ## EXAMPLE 1: ## Dairies are to be sampled to determine the proportion of herd managers ## using foot bathes. Herds are stratified according to size (small, medium, ## and large). The number of herds in each strata are 1500, 2500, and 4000 ## (respectively). A review of the literature indicates that use of foot bathes ## on farms is in the order of 0.50, with the probability of usage increasing ## as herds get larger. How many dairies should be sampled? strata.n <- c(1500, 2500, 4000) strata.Py <- c(0.50, 0.60, 0.70) epi.ssstrataestb(strata.n, strata.Py, epsilon = 0.20, error = "relative", nfractional = FALSE, conf.level = 0.95) ## A total of 55 herds should be sampled: 11 small, 18 medium, and 28 large. } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/rsu.sep.rbvarse.Rd0000644000176200001440000000451713742463310015267 0ustar liggesusers\name{rsu.sep.rbvarse} \alias{rsu.sep.rbvarse} \title{ Surveillance system sensitivity assuming risk based sampling and varying unit sensitivity } \description{ Calculates the surveillance system (population-level) sensitivity for detection of disease assuming risk based sampling and varying unit sensitivity. } \usage{ rsu.sep.rbvarse(N, rr, df, pstar) } \arguments{ \item{N}{scalar integer or vector of integers the same length as \code{rr}, representing the population size. Use \code{NA} if unknown.} \item{rr}{relative risk values (vector of values corresponding to the number of risk strata).} \item{df}{dataframe of values for each combination of risk stratum and sensitivity level, column 1 = risk group index, column 2 = unit sensitivity, column 3 = n (sample size for risk group and unit sensitivity).} \item{pstar}{scalar representing the design prevalence.} } \value{ A list comprised of five elements: \item{sep}{scalar, the population-level sensitivity estimate.} \item{epi}{vector, effective probability of infection estimates.} \item{adj.risk}{vector, adjusted risks.} \item{n}{vector, sample size by risk group} \item{se.u}{a vector of the mean sensitivity for each risk group.} } \references{ MacDiarmid S (1988). Future options for brucellosis surveillance in New Zealand beef herds. New Zealand Veterinary Journal 36: 39 - 42. Martin S, Shoukri M, Thorburn M (1992). Evaluating the health status of herds based on tests applied to individuals. Preventive Veterinary Medicine 14: 33 - 43. } \examples{ ## EXAMPLE 1: ## A study has been carried out to detect Johne's disease in a population of ## cattle. There are two risk groups ('high' and 'low') with the risk of ## disease in the high risk group five times that of the low risk group. ## The number of animals sampled and unit sensitivity varies by risk group, as ## detailed below. Assume there number of cattle in the high risk and low risk ## group is 200 and 1800, respectively. ## Calculate the surveillance system sensitivity assuming a design prevalence ## of 0.01. rg <- c(1,1,2,2) se.u <- c(0.92,0.85,0.92,0.85) n <- c(80,30,20,30) df <- data.frame(rg = rg, se.u = se.u, n = n) rsu.sep.rbvarse(N = c(200,1800), rr = c(5,1), df = df, pstar = 0.01) ## The surveillance system sensitivity is 0.99. } \keyword{methods}epiR/man/epi.bohning.Rd0000644000176200001440000000300714074765134014426 0ustar liggesusers\name{epi.bohning} \alias{epi.bohning} \title{Bohning's test for overdispersion of Poisson data} \description{ A test for overdispersion of Poisson data. } \usage{ epi.bohning(obs, exp, alpha = 0.05) } \arguments{ \item{obs}{the observed number of cases in each area.} \item{exp}{the expected number of cases in each area.} \item{alpha}{alpha level to be used for the test of significance. Must be a single number between 0 and 1.} } \value{ A data frame with two elements: \code{test.statistic}, Bohning's test statistic and \code{p.value} the associated P-value. } \references{ Bohning D (2000). Computer-assisted Analysis of Mixtures and Applications. Chapman and Hall, Boca Raton. Ugarte MD, Ibanez B, Militino AF (2006). Modelling risks in disease mapping. Statistical Methods in Medical Research 15: 21 - 35. } \examples{ ## EXAMPLE 1: data(epi.SClip) obs <- epi.SClip$cases pop <- epi.SClip$population exp <- (sum(obs) / sum(pop)) * pop epi.bohning(obs, exp, alpha = 0.05) ## Bohning's test was used to determine if there was statistically significant ## overdispersion in lip cancer cases across 56 Scottish districts for the ## period 1975 to 1980. ## The test statistic was 53.33. The associated P value was <0.01. We reject ## the null hypothesis of no over dispersion and accept the null hypothesis ## concluding that the lip cancer data are over dispersed. } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/epi.nomogram.Rd0000644000176200001440000000670614164036762014630 0ustar liggesusers\name{epi.nomogram} \alias{epi.nomogram} \title{Post-test probability of disease given sensitivity and specificity of a test} \description{ Compute the post-test probability of disease given sensitivity and specificity of a test. } \usage{ epi.nomogram(se, sp, lr, pre.pos, verbose = FALSE) } \arguments{ \item{se}{test sensitivity (0 - 1).} \item{sp}{test specificity (0 - 1).} \item{lr}{a vector of length 2 listing the positive and negative likelihood ratio (respectively) of the test. Ignored if \code{se} and \code{sp} are not null.} \item{pre.pos}{the pre-test probability of the outcome.} \item{verbose}{logical, indicating whether detailed or summary results are to be returned.} } \value{ A list containing the following: \item{lr}{a data frame listing the likelihood ratio of a positive and negative test.} \item{prior}{a data frame listing the pre-test probability of being outcome (i.e., disease) positive, as entered by the user.} \item{post}{a data frame listing: \code{opos.tpos} the post-test probability of being outcome (i.e., disease) positive given a positive test result and \code{opos.tneg} the post-test probability of being outcome (i.e., disease) positive given a negative test result.} } \references{ Caraguel C, Vanderstichel R (2013). The two-step Fagan's nomogram: ad hoc interpretation of a diagnostic test result without calculation. Evidence Based Medicine 18: 125 - 128. Hunink M, Glasziou P (2001). Decision Making in Health and Medicine - Integrating Evidence and Values. Cambridge University Press, pp. 128 - 156. } \examples{ ## EXAMPLE 1: ## You are presented with a dog with lethargy, exercise intolerance, ## weight gain and bilaterally symmetric truncal alopecia. You are ## suspicious of hypothyroidism and take a blood sample to measure ## basal serum thyroxine (T4). ## You believe that around 5\% of dogs presented to your clinic with ## a signalment of general debility have hypothyroidism. The serum T4 ## has a sensitivity of 0.89 and specificity of 0.85 for diagnosing ## hypothyroidism in the dog. The laboratory reports a serum T4 ## concentration of 22.0 nmol/L (reference range 19.0 to 58.0 nmol/L). ## What is the post-test probability that this dog is hypothyroid? epi.nomogram(se = 0.89, sp = 0.85, lr = NA, pre.pos = 0.05, verbose = FALSE) ## If the test is positive the post-test probability that this dog is ## hypothyroid is 0.24. If the test is negative the post-test probability ## that this dog is hypothyroid is 0.0068. ## EXAMPLE 2: ## A dog is presented to you with severe pruritis. You suspect sarcoptic ## mange and decide to take a skin scraping (LR+ 9000; LR- 0.1). The scrape ## returns a negative result (no mites are seen). What is the post-test ## probability that your patient has sarcoptic mange? You recall that you ## diagnose around 3 cases of sarcoptic mange per year in a clinic that ## sees approximately 2 -- 3 dogs per week presented with pruritic skin disease. ## Calculate the pre-test probability of sarcoptes: pre.pos <- 3 / (3 * 52) ## The pre-test probability that this dog is sarcoptes positive is 0.019. epi.nomogram(se = NA, sp = NA, lr = c(9000, 0.1), pre.pos = pre.pos, verbose = FALSE) ## If the skin scraping is negative the post-test probability that this dog ## has sarcoptic mange is 0.002. } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/epi.conf.Rd0000644000176200001440000003442414164036762013734 0ustar liggesusers\name{epi.conf} \alias{epi.conf} \title{Confidence intervals for means, proportions, incidence, and standardised mortality ratios } \description{ Computes confidence intervals for means, proportions, incidence, and standardised mortality ratios. } \usage{ epi.conf(dat, ctype = "mean.single", method, N, design = 1, conf.level = 0.95) } \arguments{ \item{dat}{the data, either a vector or a matrix depending on the method chosen.} \item{ctype}{a character string indicating the type of confidence interval to calculate. Options are \code{mean.single}, \code{mean.unpaired}, \code{mean.paired}, \code{prop.single}, \code{prop.unpaired}, \code{prop.paired}, \code{prevalence}, \code{inc.risk}, \code{inc.rate}, \code{odds}, \code{ratio} and \code{smr}.} \item{method}{a character string indicating the method to use. Where \code{ctype = "inc.risk"} or \code{ctype = "prevalence"} options are \code{exact}, \code{wilson}, \code{fleiss}, \code{agresti}, \code{clopper-pearson} and \code{jeffreys}. Where \code{ctype = "inc.rate"} options are \code{exact} and \code{byar}.} \item{N}{scalar, representing the population size.} \item{design}{scalar, representing the design effect.} \item{conf.level}{magnitude of the returned confidence interval. Must be a single number between 0 and 1.} } \details{ Method mean.single requires a vector as input. Method \code{mean.unpaired} requires a two-column data frame; the first column defining the groups must be a factor. Method \code{mean.paired} requires a two-column data frame; one column for each group. Method \code{prop.single} requires a two-column matrix; the first column specifies the number of positives, the second column specifies the number of negatives. Methods \code{prop.unpaired} and \code{prop.paired} require a four-column matrix; columns 1 and 2 specify the number of positives and negatives for the first group, columns 3 and 4 specify the number of positives and negatives for the second group. Method \code{prevalence} and \code{inc.risk} require a two-column matrix; the first column specifies the number of positives, the second column specifies the total number tested. Method \code{inc.rate} requires a two-column matrix; the first column specifies the number of positives, the second column specifies individual time at risk. Method \code{odds} requires a two-column matrix; the first column specifies the number of positives, the second column specifies the number of negatives. Method \code{ratio} requires a two-column matrix; the first column specifies the numerator, the second column specifies the denominator. Method \code{smr} requires a two-colum matrix; the first column specifies the total number of positives, the second column specifies the total number tested. The methodology implemented here follows Altman, Machin, Bryant, and Gardner (2000). Where method is \code{inc.risk} or \code{prevalence} if the numerator equals zero the lower bound of the confidence interval estimate is set to zero. Where method is \code{smr} the method of Dobson et al. (1991) is used. A summary of the methods used for each of the confidence interval calculations in this function is as follows: \tabular{ll}{ ----------------------------------- \tab ------------------------ \cr \code{ctype-method} \tab Reference \cr ----------------------------------- \tab ------------------------ \cr \code{mean.single} \tab Altman et al. (2000) \cr \code{mean.unpaired} \tab Altman et al. (2000) \cr \code{mean.paired} \tab Altman et al. (2000) \cr \code{prop.single} \tab Altman et al. (2000) \cr \code{prop.unpaired} \tab Altman et al. (2000) \cr \code{prop.paired} \tab Altman et al. (2000) \cr \code{inc.risk, exact} \tab Collett (1999) \cr \code{inc.risk, wilson} \tab Rothman (2012) \cr \code{inc.risk, fleiss} \tab Fleiss (1981) \cr \code{inc.risk, agresti} \tab Agresti and Coull (1998) \cr \code{inc.risk, clopper-pearson} \tab Clopper and Pearson (1934)\cr \code{inc.risk, jeffreys} \tab Brown et al. (2001) \cr \code{prevalence, exact} \tab Collett (1999) \cr \code{prevalence, wilson} \tab Wilson (1927) \cr \code{prevalence, fleiss} \tab Fleiss (1981) \cr \code{prevalence, agresti} \tab Agresti and Coull (1998) \cr \code{prevalence, clopper-pearson} \tab Clopper and Pearson (1934)\cr \code{prevalence, jeffreys} \tab Brown et al. (2001) \cr \code{inc.rate, exact} \tab Ulm (1990) \cr \code{inc.rate, byar} \tab Rothman (2012) \cr \code{odds} \tab Ederer and Mantel (1974) \cr \code{ratio} \tab Ederer and Mantel (1974) \cr \code{smr} \tab Dobson et al. (1991) \cr ----------------------------------- \tab ------------------------ \cr } The Wald interval often has inadequate coverage, particularly for small sample sizes and proportion estimates close to 0 or 1. Conversely, the Clopper-Pearson Exact method is conservative and tends to produce wider intervals than necessary. Brown et al. recommends the Wilson or Jeffreys methods when sample sizes are small and Agresti-Coull, Wilson, or Jeffreys methods for larger sample sizes. The Clopper-Pearson interval is an early and very common method for calculating binomial confidence intervals. The Clopper-Pearson interval is sometimes called an 'exact' method because it is based on the cumulative probabilities of the binomial distribution (i.e., exactly the correct distribution rather than an approximation). The design effect is used to adjust the confidence interval around a prevalence or incidence risk estimate in the presence of clustering. The design effect is a measure of the variability between clusters and is calculated as the ratio of the variance calculated assuming a complex sample design divided by the variance calculated assuming simple random sampling. Adjustment for the effect of clustering can only be made on those prevalence and incidence risk methods that return a standard error (i.e., \code{method = "wilson"} or \code{method = "fleiss"}). } \references{ Agresti A, Coull B (1998). Approximate is better than 'exact' for interval estimation of binomial proportions. The American Statistician 52. DOI: 10.2307/2685469. Altman DG, Machin D, Bryant TN, and Gardner MJ (2000). Statistics with Confidence, second edition. British Medical Journal, London, pp. 28 - 29 and pp. 45 - 56. Brown L, Cai T, Dasgupta A (2001). Interval estimation for a binomial proportion. Statistical Science 16: 101 - 133. Clopper C, Pearson E (1934) The use of confidence or fiducial limits illustrated in the case of the binomial. Biometrika 26: 404 - 413. DOI: 10.1093/biomet/26.4.404. Collett D (1999). Modelling Binary Data. Chapman & Hall/CRC, Boca Raton Florida, pp. 24. Dobson AJ, Kuulasmaa K, Eberle E, and Scherer J (1991). Confidence intervals for weighted sums of Poisson parameters. Statistics in Medicine 10: 457 - 462. Ederer F, and Mantel N (1974). Confidence limits on the ratio of two Poisson variables. American Journal of Epidemiology 100: 165 - 167 Fleiss JL (1981). Statistical Methods for Rates and Proportions. 2nd edition. John Wiley & Sons, New York. Killip S, Mahfoud Z, Pearce K (2004). What is an intracluster correlation coefficient? Crucial concepts for primary care researchers. Annals of Family Medicine 2: 204 - 208. Otte J, Gumm I (1997). Intra-cluster correlation coefficients of 20 infections calculated from the results of cluster-sample surveys. Preventive Veterinary Medicine 31: 147 - 150. Rothman KJ (2012). Epidemiology An Introduction. Oxford University Press, London, pp. 164 - 175. Ulm K (1990). A simple method to calculate the confidence interval of a standardized mortality ratio. American Journal of Epidemiology 131: 373 - 375. Wilson EB (1927) Probable inference, the law of succession, and statistical inference. Journal of the American Statistical Association 22: 209 - 212. } \examples{ ## EXAMPLE 1: dat.v01 <- rnorm(n = 100, mean = 0, sd = 1) epi.conf(dat = dat.v01, ctype = "mean.single") ## EXAMPLE 2: group <- c(rep("A", times = 5), rep("B", times = 5)) val = round(c(rnorm(n = 5, mean = 10, sd = 5), rnorm(n = 5, mean = 7, sd = 5)), digits = 0) dat.df02 <- data.frame(group = group, val = val) epi.conf(dat = dat.df02, ctype = "mean.unpaired") ## EXAMPLE 3: ## Two paired samples (Altman et al. 2000, page 31): ## Systolic blood pressure levels were measured in 16 middle-aged men ## before and after a standard exercise test. The mean rise in systolic ## blood pressure was 6.6 mmHg. The standard deviation of the difference ## was 6.0 mm Hg. The standard error of the mean difference was 1.49 mm Hg. before <- c(148,142,136,134,138,140,132,144,128,170,162,150,138,154,126,116) after <- c(152,152,134,148,144,136,144,150,146,174,162,162,146,156,132,126) dat.df03 <- data.frame(before, after) epi.conf(dat = dat.df03, ctype = "mean.paired", conf.level = 0.95) ## The 95\% confidence interval for the population value of the mean ## systolic blood pressure increase after standard exercise was 3.4 to 9.8 ## mm Hg. ## EXAMPLE 4: ## Single sample (Altman et al. 2000, page 47): ## Out of 263 giving their views on the use of personal computers in ## general practice, 81 thought that the privacy of their medical file ## had been reduced. pos <- 81 neg <- (263 - 81) dat.m04 <- as.matrix(cbind(pos, neg)) round(epi.conf(dat = dat.m04, ctype = "prop.single"), digits = 3) ## The 95\% confidence interval for the population value of the proportion ## of patients thinking their privacy was reduced was from 0.255 to 0.366. ## EXAMPLE 5: ## Two samples, unpaired (Altman et al. 2000, page 49): ## Goodfield et al. report adverse effects in 85 patients receiving either ## terbinafine or placebo treatment for dermatophyte onchomychois. ## Out of 56 patients receiving terbinafine, 5 patients experienced ## adverse effects. Out of 29 patients receiving a placebo, none experienced ## adverse effects. grp1 <- matrix(cbind(5, 51), ncol = 2) grp2 <- matrix(cbind(0, 29), ncol = 2) dat.m05 <- as.matrix(cbind(grp1, grp2)) round(epi.conf(dat = dat.m05, ctype = "prop.unpaired"), digits = 3) ## The 95\% confidence interval for the difference between the two groups is ## from -0.038 to +0.193. ## EXAMPLE 6: ## Two samples, paired (Altman et al. 2000, page 53): ## In a reliability exercise, 41 patients were randomly selected from those ## who had undergone a thalium-201 stress test. The 41 sets of images were ## classified as normal or not by the core thalium laboratory and, ## independently, by clinical investigators from different centres. ## Of the 19 samples identified as ischaemic by clinical investigators ## 5 were identified as ischaemic by the laboratory. Of the 22 samples ## identified as normal by clinical investigators 0 were identified as ## ischaemic by the laboratory. ## Clinic | Laboratory | | ## | Ischaemic | Normal | Total ## --------------------------------------------------------- ## Ischaemic | 14 | 5 | 19 ## Normal | 0 | 22 | 22 ## --------------------------------------------------------- ## Total | 14 | 27 | 41 ## --------------------------------------------------------- dat.m06 <- as.matrix(cbind(14, 5, 0, 22)) round(epi.conf(dat = dat.m06, ctype = "prop.paired", conf.level = 0.95), digits = 3) ## The 95\% confidence interval for the population difference in ## proportions is 0.011 to 0.226 or approximately +1\% to +23\%. ## EXAMPLE 7: ## A herd of 1000 cattle were tested for brucellosis. Four samples out of 200 ## test returned a positive result. Assuming 100\% test sensitivity and ## specificity, what is the estimated prevalence of brucellosis in this ## group of animals? pos <- 4; pop <- 200 dat.m07 <- as.matrix(cbind(pos, pop)) epi.conf(dat = dat.m07, ctype = "prevalence", method = "exact", N = 1000, design = 1, conf.level = 0.95) * 100 ## The estimated prevalence of brucellosis in this herd is 2.0 (95\% CI 0.54 to ## 5.0) cases per 100 cattle at risk. ## EXAMPLE 8: ## The observed disease counts and population size in four areas are provided ## below. What are the the standardised morbidity ratios of disease for each ## area and their 95\% confidence intervals? obs <- c(5, 10, 12, 18); pop <- c(234, 189, 432, 812) dat.m08 <- as.matrix(cbind(obs, pop)) round(epi.conf(dat = dat.m08, ctype = "smr"), digits = 2) ## EXAMPLE 9: ## A survey has been conducted to determine the proportion of broilers ## protected from a given disease following vaccination. We assume that ## the intra-cluster correlation coefficient for protection (also known as the ## rate of homogeneity, rho) is 0.4 and the average number of birds per ## flock is 30. A total of 5898 birds from a total of 10363 were identified ## as protected. What proportion of birds are protected and what is the 95\% ## confidence interval for this estimate? ## Calculate the design effect, given rho = (design - 1) / (nbar - 1), where ## nbar equals the average number of individuals sampled per cluster: D <- 0.4 * (30 - 1) + 1; D ## The design effect is 12.6. Now calculate the proportion protected. We set ## N to large number. dat.m09 <- as.matrix(cbind(5898, 10363)) epi.conf(dat = dat.m09, ctype = "prevalence", method = "fleiss", N = 1000000, design = D, conf.level = 0.95) ## The estimated proportion of the population protected is 0.57 (95\% CI ## 0.53 to 0.60). Recalculate this estimate assuming the data were from a ## simple random sample (i.e., where the design effect is one): epi.conf(dat = dat.m09, ctype = "prevalence", method = "fleiss", N = 1000000, design = 1, conf.level = 0.95) ## If we had mistakenly assumed that data were a simple random sample the ## confidence interval for the proportion of birds protect would have been ## 0.56 -- 0.58. } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/rsu.sssep.rb2st1rf.Rd0000644000176200001440000000716214074746110015636 0ustar liggesusers\name{rsu.sssep.rb2st1rf} \alias{rsu.sssep.rb2st1rf} \title{ Sample size to achieve a desired surveillance system sensitivity assuming risk-based 2-stage sampling on one risk factor at the cluster level } \description{ Calculates the sample size to achieve a desired surveillance system sensitivity assuming risk-based 2-stage sampling on one risk factor at the cluster level, imperfect test sensitivity and perfect test specificity. } \usage{ rsu.sssep.rb2st1rf(rr, ppr, spr, pstar.c, se.c, pstar.u, se.u, se.p) } \arguments{ \item{rr}{vector, defining the relative risk values for each strata in the population.} \item{ppr}{vector of length \code{rr} defining the population proportions in each strata.} \item{spr}{vector of length \code{rr} defining the planned number of units to be sampled from each strata.} \item{pstar.c}{scalar (either a proportion or integer) defining the cluster level design prevalence.} \item{se.c}{scalar proportion, defining the desired cluster level sensitivity.} \item{pstar.u}{scalar (either a proportion or integer) defining the surveillance unit level design prevalence.} \item{se.u}{scalar (0 to 1) representing the sensitivity of the diagnostic test at the surveillance unit level.} \item{se.p}{scalar (0 to 1) representing the desired surveillance system (population-level) sensitivity.} } \value{ A list comprised of seven elements: \item{n.clusters}{scalar, the total number of clusters to be sampled.} \item{n.clusters.per.strata}{a vector of the same length as \code{rr} listing the numbers of clusters to be sampled from each risk stratum.} \item{n.units}{scalar, the total number of units to be sampled.} \item{n.units.per.strata}{a vector of the same length of \code{rr} listing the total numbers of units to be sampled from each risk stratum.} \item{n.units.per.cluster}{scalar, the number of units to be sampled from each cluster.} \item{epinf}{a vector of the same length of \code{rr} listing the effective probability of infection for each risk stratum.} \item{adj.risk}{a vector of the same length of \code{rr} listing the adjusted risk values for each risk stratum.} } \examples{ ## EXAMPLE 1: ## A cross-sectional study is to be carried out to confirm the absence of ## disease using risk based sampling. The population of interest is comprised ## of individual sampling units managed within clusters. ## Clusters are stratified into 'high', 'medium' and 'low' risk areas ## where the cluster-level risk of disease in the high risk area compared ## with the low risk area is 5 and the cluster-level risk of disease in ## the medium risk area compared with the low risk area is 3. ## The proportions of the population at risk in the high, medium and low ## risk area are 0.10, 0.20 and 0.70, respectively. The proportion of samples ## taken from the high, medium and low risk areas will be 0.40, 0.40 and ## 0.20, respectively. ## You intend to use a test with diagnostic sensitivity of 0.90 and you'd ## like to take a sufficient number of samples to return a cluster-level ## sensitivity of 0.80 and a population-level (system) sensitivity of 0.95. ## How many units need to be sampled to meet the requirements of the study? rr <- c(5,3,1) ppr <- c(0.10,0.20,0.70) spr <- c(0.40,0.40,0.20) rsu.sssep.rb2st1rf(rr, ppr, spr, pstar.c = 0.01, se.c = 0.80, pstar.u = 0.10, se.u = 0.90, se.p = 0.95) ## A total of 197 clusters needs to be sampled, 79 from the high risk area, ## 79 from the medium risk area and 39 from the low risk area. A total of ## 18 units should be sampled from each cluster, 3546 units in total. } \keyword{methods} epiR/man/rsu.sep.cens.Rd0000644000176200001440000000357014164037175014556 0ustar liggesusers\name{rsu.sep.cens} \alias{rsu.sep.cens} \title{ Surveillance system sensitivity assuming data from a population census } \description{ Calculates the surveillance system (population-level) sensitivity for disease detection assuming imperfect test sensitivity, perfect test specificity and when every unit in the population is tested (a census). } \usage{ rsu.sep.cens(d = 1, se.u) } \arguments{ \item{d}{scalar integer defining the expected number of infected units in the population (that is, the population size multiplied by the design prevalence).} \item{se.u}{scalar or vector of numbers between 0 and 1 defining the unit sensitivity of the test.} } \value{ A vector of surveillance system (population-level) sensitivities.) } \examples{ ## EXAMPLE 1: ## Every animal in a population is to be sampled and tested using a test ## with a diagnostic sensitivity of 0.80. What is the probability that ## disease will be detected if we expect that there are five infected animals ## in the population? rsu.sep.cens(d = 5, se.u = 0.80) ## The probability that disease will be detected (i.e., the surveillance ## system sensitivity) is 0.99 (i.e., quite high, even though the sensitivity ## of the test is relatively low). ## EXAMPLE 2: ## Calculate the surveillance system sensitivity assuming every animal in ## populations of size 10, 50, 100, 250 and 500 will be sampled and tested, ## assuming a design prevalence in each population of 0.01 and use of a test ## with a diagnostic sensitivity of 0.92. rsu.sep.cens(d = ceiling(0.01 * c(10, 50, 100, 250, 500)), se.u = 0.92) ## For the populations comprised of 100 animals or less the surveillance ## system sensitivity is 0.92. For the populations comprised of greater than ## or equal to 250 animals the surveillance system sensitivity is greater ## than 0.99. } \keyword{methods} epiR/man/rsu.sssep.rbmrg.Rd0000644000176200001440000000676214074746064015317 0ustar liggesusers\name{rsu.sssep.rbmrg} \alias{rsu.sssep.rbmrg} \title{ Sample size to achieve a desired surveillance system sensitivity assuming risk-based sampling and multiple sensitivity values within risk groups } \description{ Sample the size to achieve a desired population sensitivity assuming risk-based sampling, multiple sensitivity values within risk groups for each risk group and perfect test specificity. } \usage{ rsu.sssep.rbmrg(pstar, rr, ppr, spr, spr.rg, se.p, se.u) } \arguments{ \item{pstar}{scalar, the design prevalence.} \item{rr}{vector of length equal to the number of risk strata, the relative risk values.} \item{ppr}{vector of the same length as \code{rr}, population proportions for each risk group.} \item{spr}{vector of the same length as \code{rr}, the planned surveillance proportions for each risk group.} \item{spr.rg}{matrix with rows equal to the number of risk groups and columns equal to the number of sensitivity values (row sums must equal 1), the proportions of samples for each sensitivity value in each risk group.} \item{se.p}{scalar (0 to 1) representing the desired surveillance system (population-level) sensitivity.} \item{se.u}{vector (0 to 1) representing the sensitivity of the diagnostic test at the surveillance unit level.} } \value{ A list comprised of three elements: \item{n}{matrix of sample sizes for each risk and sensitivity group.} \item{epi}{a vector of effective probability of infection estimates.} \item{mean.se}{a vector of the mean sensitivity for each risk group.} } \examples{ ## EXAMPLE 1: ## You are working with a disease of cattle where the prevalence is believed ## to vary according to herd type. The risk of disease is 5 times greater ## in dairy herds and 3 times greater in mixed herds compared with the ## reference category, beef herds. The distribution of dairy, mixed and beef ## herds in the population of interest is 0.10, 0.10 and 0.80, respectively. ## You intend to distribute your sampling effort 0.4, 0.4 and 0.2 across dairy, ## mixed and beef herds, respectively. ## Within each of the three risk groups a single test with a diagnostic ## sensitivity of 0.95 will be used. How many herds need to be sampled if ## you want to be 95\% certain of detecting disease if it is present in the ## population at a prevalence of 1\% or greater? ## Generate a matrix listing the proportions of samples for each test in ## each risk group (the number of rows equal the number of risk groups, ## the number of columns equal the number of tests): m <- rbind(1,1,1) rsu.sssep.rbmrg(pstar = 0.01, rr = c(5,3,1), ppr = c(0.1,0.1,0.8), spr = c(0.4,0.4,0.2), spr.rg = m, se.p = 0.95, se.u = 0.95) ## A total of 147 herds need to be sampled: 59 dairy, 59 mixed and 29 ## beef herds. ## EXAMPLE 2: ## Now assume that one of two tests will be used for each herd. The first ## test has a diagnostic sensitivity of 0.92. The second test has a diagnostic ## sensitivity of 0.80. The proportion of dairy, mixed and beef herds receiving ## the first test is 0.80, 0.50 and 0.70, respectively (which means that 0.20, ## 0.50 and 0.30 receive the second test, respectively). ## Recalculate the sample size. m <- rbind(c(0.8,0.2), c(0.5,0.5), c(0.7,0.3)) rsu.sssep.rbmrg(pstar = 0.01, rr = c(5,3,1), ppr = c(0.1,0.1,0.8), spr = c(0.4,0.4,0.2), spr.rg = m, se.p = 0.95, se.u = c(0.92,0.80)) ## A total of 159 herds need to be sampled: 64 dairy, 64 mixed and 31 ## beef herds. } \keyword{methods} epiR/man/epi.insthaz.Rd0000644000176200001440000001117714074731452014465 0ustar liggesusers\name{epi.insthaz} \alias{epi.insthaz} \title{Event instantaneous hazard based on Kaplan-Meier survival estimates } \description{ Compute event instantaneous hazard on the basis of a Kaplan-Meier survival function. } \usage{ epi.insthaz(survfit.obj, conf.level = 0.95) } \arguments{ \item{survfit.obj}{a \code{survfit} object, computed using the \code{survival} package.} \item{conf.level}{magnitude of the returned confidence interval. Must be a single number between 0 and 1.} } \details{ Computes the instantaneous hazard of the event of interest, equivalent to the proportion of the population failing per unit time. } \value{ A data frame with three or four elements: \code{strata} the strata identifier, \code{time} the observed failure times, \code{sest} the observed Kaplan-Meier survival function, \code{slow} the lower bound of the confidence interval for the observed Kaplan-Meier survival function, \code{supp} the upper bound of the confidence interval for the observed Kaplan-Meier survival function, \code{hest} the observed instantaneous hazard (the proportion of the population at risk experiencing the event of interest per unit time), \code{hlow} the lower bound of the confidence interval for the observed instantaneous hazard, and \code{hupp} the upper bound of the confidence interval for the observed instantaneous hazard. } \references{ Venables W, Ripley B (2002). Modern Applied Statistics with S, fourth edition. Springer, New York, pp. 353 - 385. Singer J, Willett J (2003). Applied Longitudinal Data Analysis Modeling Change and Event Occurrence. Oxford University Press, London, pp. 348. } \examples{ ## EXAMPLE 1: library(survival) dat.df01 <- lung dat.df01$status <- ifelse(dat.df01$status == 1, 0, dat.df01$status) dat.df01$status <- ifelse(dat.df01$status == 2, 1, dat.df01$status) dat.df01$sex <- factor(dat.df01$sex, levels = c(1,2), labels = c("Male","Female")) lung.km01 <- survfit(Surv(time = time, event = status) ~ 1, data = dat.df01) lung.haz01 <- epi.insthaz(lung.km01, conf.level = 0.95) lung.shaz01 <- data.frame( time = lowess(lung.haz01$time, lung.haz01$hlow, f = 0.20)$x, hest = lowess(lung.haz01$time, lung.haz01$hest, f = 0.20)$y, hlow = lowess(lung.haz01$time, lung.haz01$hlow, f = 0.20)$y, hupp = lowess(lung.haz01$time, lung.haz01$hupp, f = 0.20)$y) plot(x = lung.haz01$time, y = lung.haz01$hest, xlab = "Time (days)", ylab = "Daily probability of event", type = "s", col = "grey", ylim = c(0, 0.05)) lines(x = lung.shaz01$time, y = lung.shaz01$hest, lty = 1, lwd = 2, col = "black") lines(x = lung.shaz01$time, y = lung.shaz01$hlow, lty = 2, lwd = 1, col = "black") lines(x = lung.shaz01$time, y = lung.shaz01$hupp, lty = 2, lwd = 1, col = "black") \dontrun{ library(ggplot2) ggplot() + theme_bw() + geom_step(data = lung.haz01, aes(x = time, y = hest), colour = "grey") + geom_smooth(data = lung.haz01, aes(x = time, y = hest), method = "loess", colour = "black", size = 0.75, linetype = "solid", se = FALSE, span = 0.20) + geom_smooth(data = lung.haz01, aes(x = time, y = hlow), method = "loess", colour = "black", size = 0.5, linetype = "dashed", se = FALSE, span = 0.20) + geom_smooth(data = lung.haz01, aes(x = time, y = hupp), method = "loess", colour = "black", size = 0.5, linetype = "dashed", se = FALSE, span = 0.20) + scale_x_continuous(limits = c(0,1000), name = "Time (days)") + scale_y_continuous(limits = c(0,0.05), name = "Daily probability of event") } ## EXAMPLE 2: ## Now stratify by gender: lung.km02 <- survfit(Surv(time = time, event = status) ~ sex, data = dat.df01) lung.haz02 <- epi.insthaz(lung.km02, conf.level = 0.95) \dontrun{ library(ggplot2) ggplot() + theme_bw() + geom_step(data = lung.haz02, aes(x = time, y = hest), colour = "grey") + facet_grid(strata ~ .) + geom_smooth(data = lung.haz02, aes(x = time, y = hest), method = "loess", colour = "black", size = 0.75, linetype = "solid", se = FALSE, span = 0.20) + geom_smooth(data = lung.haz02, aes(x = time, y = hlow), method = "loess", colour = "black", size = 0.5, linetype = "dashed", se = FALSE, span = 0.20) + geom_smooth(data = lung.haz02, aes(x = time, y = hupp), method = "loess", colour = "black", size = 0.5, linetype = "dashed", se = FALSE, span = 0.20) + scale_x_continuous(limits = c(0,1000), name = "Time (days)") + scale_y_continuous(limits = c(0,0.05), name = "Daily probability of event") } } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/epi.cp.Rd0000644000176200001440000000423514074731572013407 0ustar liggesusers\name{epi.cp} \alias{epi.cp} \title{ Extract unique covariate patterns from a data set } \description{ Extract the set of unique patterns from a set of covariates (explanatory variables). } \usage{ epi.cp(dat) } \arguments{ \item{dat}{an \emph{i} row by \emph{j} column data frame where the \emph{i} rows represent individual observations and the \emph{m} columns represent a set of \emph{m} covariates. The function allows for one or more covariates for each observation.} } \details{ This function extracts the \emph{k} unique covariate patterns in a data set comprised of \emph{i} observations, labelling them from 1 to \emph{k}. The frequency of occurrence of each covariate pattern is listed. A vector of length \emph{i} is also returned, listing the 1:\emph{k} covariate pattern identifier for each observation. } \value{ A list containing the following: \item{cov.pattern}{a data frame with columns: \code{id} the unique covariate pattern identifier (labelled 1 to \emph{k}), \code{n} the number of occasions each of the listed covariate pattern appears in the data, and the unique covariate combinations.} \item{id}{a vector of length \emph{i} listing the 1:\emph{k} covariate pattern identifier for each observation.} } \author{ Thanks to Johann Popp and Mathew Jay for providing code and suggestions to enhance the utility of this function. } \references{ Dohoo I, Martin W, Stryhn H (2003). Veterinary Epidemiologic Research. AVC Inc, Charlottetown, Prince Edward Island, Canada. } \examples{ ## EXAMPLE 1: ## Generate a set of covariates: set.seed(seed = 1234) obs <- round(runif(n = 100, min = 0, max = 1), digits = 0) v1 <- round(runif(n = 100, min = 0, max = 4), digits = 0) v2 <- round(runif(n = 100, min = 0, max = 4), digits = 0) dat.df01 <- data.frame(obs, v1, v2) dat.glm01 <- glm(obs ~ v1 + v2, family = binomial, data = dat.df01) dat.mf01 <- model.frame(dat.glm01) ## Covariate pattern. Drop the first column of dat.mf01 (since column 1 is the ## outcome variable: epi.cp(dat.mf01[,2:3]) ## There are 25 covariate patterns in this data set. Subject 100 has ## covariate pattern 21. } \keyword{univar} epiR/man/epi.incin.Rd0000644000176200001440000000352013117711412014065 0ustar liggesusers\name{epi.incin} \docType{data} \alias{epi.incin} \title{Laryngeal and lung cancer cases in Lancashire 1974 - 1983} \description{ Between 1972 and 1980 an industrial waste incinerator operated at a site about 2 kilometres southwest of the town of Coppull in Lancashire, England. Addressing community concerns that there were greater than expected numbers of laryngeal cancer cases in close proximity to the incinerator Diggle et al. (1990) conducted a study investigating risks for laryngeal cancer, using recorded cases of lung cancer as controls. The study area is 20 km x 20 km in size and includes location of residence of patients diagnosed with each cancer type from 1974 to 1983. The site of the incinerator was at easting 354500 and northing 413600. } \usage{data(epi.incin)} \format{ A data frame with 974 observations on the following 3 variables. \describe{ \item{xcoord}{easting coordinate (in metres) of each residence.} \item{ycoord}{northin coordinate (in metres) of each residence. } \item{status}{disease status: 0 = lung cancer, 1 = laryngeal cancer.} } } \source{ Bailey TC and Gatrell AC (1995). Interactive Spatial Data Analysis. Longman Scientific & Technical. London. } \references{ Diggle P, Gatrell A, and Lovett A (1990). Modelling the prevalence of cancer of the larynx in Lancashire: A new method for spatial epidemiology. In: Thomas R (Editor), Spatial Epidemiology. Pion Limited, London, pp. 35 - 47. Diggle P (1990). A point process modelling approach to raised incidence of a rare phenomenon in the viscinity of a prespecified point. Journal of the Royal Statistical Society A 153: 349 - 362. Diggle P, Rowlingson B (1994). A conditional approach to point process modelling of elevated risk. Journal of the Royal Statistical Society A 157: 433 - 440. } \keyword{datasets} epiR/man/epi.psi.Rd0000644000176200001440000001102714075133040013560 0ustar liggesusers\name{epi.psi} \alias{epi.psi} \title{ Proportional similarity index } \description{ Compute proportional similarity index. } \usage{ epi.psi(dat, itno = 99, conf.level = 0.95) } \arguments{ \item{dat}{a data frame providing details of the distributions to be compared (in columns). The first column (either a character of factor) lists the levels of each distribution. Additional columns list the number of events for each factor level for each distribution to be compared.} \item{itno}{scalar, numeric defining the number of bootstrap simulations to be run to generate a confidence interval around the proportional similarity index estimate.} \item{conf.level}{scalar, numeric defining the magnitude of the returned confidence interval for each proportional similarity index estimate.} } \details{ The proportional similarity or Czekanowski index is an objective and simple measure of the area of intersection between two non-parametric frequency distributions (Feinsinger et al. 1981). PIS values range from 1 for identical frequency distributions to 0 for distributions with no common types. Bootstrap confidence intervals for this measure are estimated based on the approach developed by Garrett et al. (2007). } \value{ A five column data frame listing: \code{v1} the name of the reference column, \code{v2} the name of the comparison column, \code{est} the estimated proportional similarity index, \code{lower} the lower bound of the estimated proportional similarity index, and \code{upper} the upper bound of the estimated proportional similarity index. } \references{ Feinsinger P, Spears EE, Poole RW (1981) A simple measure of niche breadth. Ecology 62: 27 - 32. Garrett N, Devane M, Hudson J, Nicol C, Ball A, Klena J, Scholes P, Baker M, Gilpin B, Savill M (2007) Statistical comparison of Campylobacter jejuni subtypes from human cases and environmental sources. Journal of Applied Microbiology 103: 2113 - 2121. DOI: 10.1111/j.1365-2672.2007.03437.x. Mullner P, Collins-Emerson J, Midwinter A, Carter P, Spencer S, van der Logt P, Hathaway S, French NP (2010). Molecular epidemiology of Campylobacter jejuni in a geographically isolated country with a uniquely structured poultry industry. Applied Environmental Microbiology 76: 2145 - 2154. DOI: 10.1128/AEM.00862-09. Rosef O, Kapperud G, Lauwers S, Gondrosen B (1985) Serotyping of Campylobacter jejuni, Campylobacter coli, and Campylobacter laridis from domestic and wild animals. Applied and Environmental Microbiology, 49: 1507 - 1510. } \examples{ ## EXAMPLE 1: ## A cross-sectional study of Australian thoroughbred race horses was ## carried out. The sampling frame for this study comprised all horses ## registered with Racing Australia in 2017 -- 2018. A random sample of horses ## was selected from the sampling frame and the owners of each horse ## invited to take part in the study. Counts of source population horses ## and study population horses are provided below. How well did the geographic ## distribution of study population horses match the source population? state <- c("NSW","VIC","QLD","WA","SA","TAS","NT","Abroad") srcp <- c(11372,10722,7371,4200,2445,1029,510,101) stup <- c(622,603,259,105,102,37,22,0) dat.df01 <- data.frame(state, srcp, stup) epi.psi(dat.df01, itno = 99, conf.level = 0.95) ## The proportional similarity index for these data was 0.88 (95\% CI 0.86 to ## 0.90). We conclude that the distribution of sampled horses by state ## was consistent with the distribution of the source population by state. \dontrun{ ## Compare the relative frequencies of the source and study populations ## by state graphically: library(ggplot2) dat.df01$psrcp <- dat.df01$srcp / sum(dat.df01$srcp) dat.df01$pstup <- dat.df01$stup / sum(dat.df01$stup) dat.df01 <- dat.df01[sort.list(dat.df01$psrcp),] dat.df01$state <- factor(dat.df01$state, levels = dat.df01$state) ## Data frame for ggplot2: gdat.df01 <- data.frame(state = rep(dat.df01$state, times = 2), pop = c(rep("Source", times = nrow(dat.df01)), rep("Study", times = nrow(dat.df01))), pfreq = c(dat.df01$psrcp, dat.df01$pstup)) gdat.df01$state <- factor(gdat.df01$state, levels = dat.df01$state) ## Bar chart of relative frequencies by state faceted by population: ggplot(data = gdat.df01, aes(x = state, y = pfreq)) + geom_bar(stat = "identity", position = position_dodge(), color = "grey") + facet_grid(~ pop) + scale_x_discrete(name = "State") + scale_y_continuous(limits = c(0,0.50), name = "Proportion") } } \keyword{univar} epiR/man/rsu.sssep.rspool.Rd0000644000176200001440000000366313754670150015515 0ustar liggesusers\name{rsu.sssep.rspool} \alias{rsu.sssep.rspool} \title{ Sample size to achieve a desired surveillance system sensitivity using pooled samples assuming representative sampling } \description{ Calculates the required sample size to achieve a desired surveilance system sensitivity assuming representative sampling, imperfect pooled test sensitivity and imperfect pooled test specificity. } \usage{ rsu.sssep.rspool(k, pstar, pse, psp, se.p) } \arguments{ \item{k}{scalar or vector of the same length as \code{sep} representing the number of individual units that contribute to each pool (i.e the pool size).} \item{pstar}{scalar or vector of the same length as \code{sep} representing the design prevalence.} \item{pse}{scalar or vector of the same length as \code{sep} representing the pool-level sensitivity.} \item{psp}{scalar or vector of the same length as \code{sep} representing the pool-level specificity.} \item{se.p}{scalar or vector (0 to 1) representing the desired surveillance system (population-level) sensitivity.} } \value{ A vector of required sample sizes. } \references{ Christensen J, Gardner I (2000). Herd-level interpretation of test results for epidemiologic studies of animal diseases. Preventive Veterinary Medicine 45: 83 - 106. } \examples{ ## EXAMPLE 1: ## To confirm your country's disease freedom status you intend to use a test ## applied at the herd level. The test is expensive so you decide to pool the ## samples taken from individual herds. How many pooled samples of size 5 are ## required to be 95\% confident that you will have detected disease if ## 1\% of herds are disease-positive? Assume a diagnostic sensitivity and ## specificity of 0.90 and 0.95 for the pooled testing regime. rsu.sssep.rspool(k = 5, pstar = 0.01, pse = 0.90, psp = 0.95, se.p = 0.95) ## A total of 32 pools (each comprised a samples from 5 herds) need to be ## tested. } \keyword{methods} epiR/man/rsu.adjrisk.Rd0000644000176200001440000000777113761705262014477 0ustar liggesusers\name{rsu.adjrisk} \alias{rsu.adjrisk} \title{ Adjusted risk values } \description{ Calculates adjusted risk estimates for given relative risk and population proportions. This is an intermediate calculation in the calculation of effective probability of infection for risk-based surveillance activities. } \usage{ rsu.adjrisk(rr, ppr) } \arguments{ \item{rr}{vector or matrix, defining the relative risk values for each strata in the population. See details.} \item{ppr}{vector of length \code{rr} defining the population proportions in each strata.} } \details{ On some occasions there is interest in calculating adjusted risk values for a series of relative risk estimates drawn from (for example) a probability distribution. In this situation a matrix is passed to argument \code{rr} with the columns of the matrix corresponding to the number of risk strata and the rows corresponding to the number of iterations for simulation. When data are entered in this format \code{rsu.adjrisk} returns a matrix of adjusted risk values of the same dimension. See Example 3, below. } \value{ A vector of adjusted risk values listed in order of \code{rr}.) } \references{ Martin P, Cameron A, Greiner M (2007). Demonstrating freedom from disease using multiple complex data sources 1: A new methodology based on scenario trees. Preventive Veterinary Medicine 79: 71 - 97. } \examples{ ## EXAMPLE 1: ## The relative risk of a given disease in an area of your country is 5 ## compared with a known reference 'low risk' area. A recent census shows that ## 10\% of the population are resident in the high risk area and 90\% ## are resident in the low risk area. . ## Calculate the adjusted relative risks for each area. rsu.adjrisk(rr = c(5,1), ppr = c(0.10,0.90)) ## The adjusted relative risks for the high and low risk areas are 3.6 and ## 0.7, respectively. ## EXAMPLE 2: ## Re-calculate the adjusted relative risks assuming there are 'high', ## 'medium' and 'low' risk areas. The relative risks for the high, medium ## and low risk areas are 5, 3 and 1, respectively. Population proportions for ## each area are 0.10, 0.10 and 0.80, respectively. rsu.adjrisk(rr = c(5,3,1), ppr = c(0.10,0.10,0.80)) ## The adjusted relative risks for the high, medium and low risk areas are ## 3.1, 1.9 and 0.6, respectively. ## EXAMPLE 3: ## Consider now the situation where we are not certain of our relative risk ## estimates for the high, medium and low risk areas described in Example 2 ## so we ask a group of experts for their opinion. Minimum, mode and maximum ## relative risk estimates for the high and medium risk areas are defined ## using a PERT distribution. For the high risk area the mode of the ## relative risk is 5 with a minimum of 3 and a maximum of 20. For the medium ## risk area the mode of the relative risk is 3 with a minimum of 2 and a ## maximum of 20. As before, the population proportions for each area are ## 0.10, 0.10 and 0.80, respectively. Take 10 random draws from a PERT ## distribution (using the rpert function in package mc2d) and calculate ## the adjusted relative risks for each draw: \dontrun{ ## Set up an empty matrix to collect the simulated relative risk values: nsims <- 10; nrcat <- 3 rr <- matrix(NA, nrow = nsims, ncol = nrcat) ## Use the mc2d package to take nsims random draws from the PERT distribution: rr[,1] <- mc2d::rpert(n = nsims, min = 3, mode = 5, max = 20) rr[,2] <- mc2d::rpert(n = nsims, min = 2, mode = 3, max = 5) ## The low risk area is the reference, so its relative risk values are 1: rr[,3] <- 1 ## Population proportions: ppr <- c(0.10,0.10,0.80) rval.df <- rsu.adjrisk(rr, ppr) summary(rval.df) ## The median adjusted relative risks for the high, medium and low risk area ## are 3.6, 1.6 and 0.5 (respectively). The minimum adjusted relative risks ## are 2.5, 1.3 and 0.39, repectively. The maximum adjusted relative risks ## are 5.5, 2.3 and 0.72, respectively. } } \keyword{methods} epiR/man/epi.smr.Rd0000644000176200001440000001057114164036762013605 0ustar liggesusers\name{epi.smr} \alias{epi.smr} \title{Confidence intervals and tests of significance of the standardised mortality [morbidity] ratio } \description{ Computes confidence intervals and tests of significance of the standardised mortality [morbidity] ratio. } \usage{ epi.smr(obs, exp, method = "byar", conf.level = 0.95) } \arguments{ \item{obs}{scalar integer, defining the observed number of events.} \item{exp}{scalar number, defining the expected number of events.} \item{method}{character string, defining the method used. Options are \code{chi2}, \code{mid.p}, \code{fisher}, \code{byar}, \code{rothman.greenland}, \code{ury.wiggins} and \code{vandenbroucke}. See details, below.} \item{conf.level}{magnitude of the returned confidence interval. Must be a single number between 0 and 1.} } \details{ This function calculates the standardised mortality [morbidity] ratio based on scalars defining the observed and expected number of [disease] events. The hypothesis that the SMR equals one is tested using the Chi square test, the Mid-P exact test, the Fisher exact test and Byar's approximation. Confidence intervals for the SMR are calculated using the Mid-P exact test, the Fisher exact test, Byar's approximation, Rothman and Greenland's method, Ury and Wiggin's method and the Vandenbroucke method. Exact confidence intervals and p-values should be used when the number of observed events is less than or equal to five. For greater numbers of observed events, the approximation methods (Byar's, Rothman and Greenland, Ury and Wiggins and Vandenbroucke) should be used. A two-sided test of significance is returned, using the test statistic appropriate for the method used. } \value{ A data frame listing: \item{obs}{the observed number of events, as entered by the user.} \item{exp}{the expected number of events, as entered by the user.} \item{est}{the point estimate of the SMR.} \item{lower}{the lower bound of the confidence interval of the SMR.} \item{upper}{the upper bound of the confidence interval of the SMR.} \item{test.statistic}{test statistic of the significance of the SMR.} \item{p.value}{the probability that the null hypothesis (i.e., the number of observed events divided by the expected number of events equals 1) is true.} } \note{ Only 90\%, 95\% and 99\% confidence limits are computed using the Ury and Wiggins method. If \code{conf.level} does not equal 0.90, 0.95 or 0.99 \code{NAs} are returned for the lower and upper bound of the SMR confidence interval. Only 95\% confidence limits are computed using Vandenbroucke's method. If \code{conf.level} does not equal 0.95 \code{NAs} are returned for the lower and upper bound of the SMR confidence interval. } \references{ Armitage P, Berry G, Mathews J (2002). Statistical Methods in Medical Research. Blackwell Publications London. Lash TL, VanderWeele TJ, Haneuse S, Rothman KJ (2021). Modern Epidemiology. Lippincott - Raven Philadelphia, USA, pp. 99. Miettinen OS (1974). Comment. Journal of the American Statistical Association 69: 380 - 382. Rothman K, Boice J (1979). Epidemiologic Analysis with a Programmable Calculator. U.S. Department of Health, Education, and Welfare, Public Health Service, National Institutes of Health, Washington, USA. Snedecor G, Cochran W (1989). Statistical Methods. Iowa University Press Ames, Iowa. Ury H, Wiggins A (1985). Another shortcut method for calculating the confidence interval of a Poisson variable (or of a standardized mortality ratio). American Journal of Epidemiology 122, 197 - 198. Vandenbroucke J, (1982). A shortcut method for calculating the 95 percent confidence interval of the standardized mortality ratio (Letter). American Journal of Epidemiology 115, 303 - 304. } \examples{ ## EXAMPLE 1: ## The observed number of disease events in a province is 4; the expected ## number of disease events is 3.3. What is the standardised morbidity ratio ## and its 95\% confidence interval? Test the hypothesis that the SMR equals ## one. epi.smr(obs = 4, exp = 3.3, method = "mid.p", conf.level = 0.95) ## The standardised morbidity ratio is 1.2 (95\% CI 0.38 to 2.9). We accept ## the null hypothesis and conclude that the SMR does not significantly ## differ from one (p = 0.657). } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/epi.sscohortc.Rd0000644000176200001440000001734414164045770015017 0ustar liggesusers\name{epi.sscohortc} \alias{epi.sscohortc} \title{ Sample size, power or minimum detectable incidence risk ratio for a cohort study using individual count data } \description{ Sample size, power or minimum detectable incidence risk ratio for a cohort study using individual count data. } \usage{ epi.sscohortc(irexp1, irexp0, pexp = NA, n = NA, power = 0.80, r = 1, N, design = 1, sided.test = 2, finite.correction = FALSE, nfractional = FALSE, conf.level = 0.95) } \arguments{ \item{irexp1}{the expected incidence risk of the outcome in the exposed group (0 to 1).} \item{irexp0}{the expected incidence risk of the outcome in the non-exposed group (0 to 1).} \item{pexp}{the expected prevalence of exposure to the hypothesised risk factor in the population (0 to 1).} \item{n}{scalar, defining the total number of subjects in the study (i.e., the number in both the exposed and unexposed groups).} \item{power}{scalar, the required study power.} \item{r}{scalar, the number in the exposed group divided by the number in the unexposed group.} \item{N}{scalar, the estimated number of individuals in the population.} \item{design}{scalar, the estimated design effect.} \item{sided.test}{use a one- or two-sided test? Use a two-sided test if you wish to evaluate whether or not the outcome incidence risk in the exposed group is greater than or less than the outcome incidence risk in the unexposed group. Use a one-sided test to evaluate whether or not the outcome incidence risk in the exposed group is greater than the outcome incidence risk in the unexposed group.} \item{finite.correction}{logical, apply a finite correction factor?} \item{nfractional}{logical, return fractional sample size.} \item{conf.level}{scalar, defining the level of confidence in the computed result.} } \details{ The methodology in this function follows the methodology described in Chapter 8 of Woodward (2014), pp. 295 - 329. } \value{ A list containing the following: \item{n.total}{the total number of subjects required for the specified level of confidence and power, respecting the requirement for \code{r} times as many individuals in the exposed (treatment) group compared with the non-exposed (control) group.} \item{n.exp1}{the total number of subjects in the exposed (treatment) group for the specified level of confidence and power, respecting the requirement for \code{r} times as many individuals in the exposed (treatment) group compared with the non-exposed (control) group.} \item{n.exp0}{the total number of subjects in the non-exposed (control) group for the specified level of confidence and power, respecting the requirement for \code{r} times as many individuals in the exposed (treatment) group compared with the non-exposed (control) group.} \item{power}{the power of the study given the number of study subjects, the expected effect size and level of confidence.} \item{irr}{the incidence risk of the outcome in the exposed group divided by the incidence risk of the outcome in the unexposed group (the incidence risk ratio).} \item{or}{the odds of the outcome in the exposed group divided by the odds of the outcome in the unexposed group (the odds ratio).} } \references{ Kelsey JL, Thompson WD, Evans AS (1986). Methods in Observational Epidemiology. Oxford University Press, London, pp. 254 - 284. Woodward M (2014). Epidemiology Study Design and Data Analysis. Chapman & Hall/CRC, New York, pp. 295 - 329. } \note{ The power of a study is its ability to demonstrate the presence of an association, given that an association actually exists. Values need to be entered for \code{irexp0}, \code{pexp}, \code{n}, and \code{power} to return a value for \code{irr}. In this situation, the lower value of \code{irr} represents the maximum detectable incidence risk ratio that is less than 1; the upper value of \code{irr} represents the minimum detectable incidence risk ratio greater than 1. A value for \code{pexp} doesn't need to be entered if you want to calculate sample size or study power. When calculating study power or minimum detectable incidence risk ratio when \code{finite.correction = TRUE} the function takes the values of \code{n} and \code{N} entered by the user and back-calculates a value of \code{n} assuming an infinite population. Values for \code{power}, \code{irr} and \code{or} are then returned, assuming the back-calculated value of \code{n} is equivalent to the value of \code{n} entered by the user. } \examples{ ## EXAMPLE 1 (from Woodward 2014 Example 8.13 p. 314): ## A cohort study of smoking and coronary heart disease (CHD) in middle aged men ## is planned. A sample of men will be selected at random from the population ## and those that agree to participate will be asked to complete a ## questionnaire. The follow-up period will be 5 years. The investigators would ## like to be 0.90 sure of being able to detect when the risk ratio of CHD ## is 1.4 for smokers, using a 0.05 significance test. Previous evidence ## suggests that the incidence risk of death in non-smokers is 413 per ## 100,000 per year. Assuming equal numbers of smokers and non-smokers are ## sampled, how many men should be sampled overall? irexp1 = 1.4 * (5 * 413) / 100000; irexp0 = (5 * 413) / 100000 epi.sscohortc(irexp1 = irexp1, irexp0 = irexp0, pexp = NA, n = NA, power = 0.90, r = 1, N = NA, design = 1, sided.test = 1, finite.correction = FALSE, nfractional = FALSE, conf.level = 0.95) ## A total of 12,130 men need to be sampled (6065 smokers and 6065 non-smokers). ## EXAMPLE 2: ## Say, for example, we are only able to enrol 5000 subjects into the study ## described above. What is the minimum and maximum detectable risk ratio? irexp0 = (5 * 413)/100000 epi.sscohortc(irexp1 = NA, irexp0 = irexp0, pexp = NA, n = 5000, power = 0.90, r = 1, N = NA, design = 1, sided.test = 1, finite.correction = FALSE, nfractional = FALSE, conf.level = 0.95) ## The minimum detectable risk ratio >1 is 1.65. The maximum detectable ## risk ratio <1 is 0.50. ## EXAMPLE 3: ## A study is to be carried out to assess the effect of a new treatment for ## anoestrus in dairy cattle. What is the required sample size if we expect ## the proportion of cows responding in the treatment (exposed) group to be ## 0.30 and the proportion of cows responding in the control (unexposed) group ## to be 0.15? The required power for this study is 0.80 using a two-sided ## 0.05 test. epi.sscohortc(irexp1 = 0.30, irexp0 = 0.15, pexp = NA, n = NA, power = 0.80, r = 1, N = NA, design = 1, sided.test = 2, finite.correction = FALSE, nfractional = FALSE, conf.level = 0.95) ## A total of 242 cows are required: 121 in the treatment (exposed) group and ## 121 in the control (unexposed) group. ## Assume now that this study is going to be carried out using animals from a ## number of herds. What is the required sample size when you account for the ## observation that response to treatment is likely to cluster within herds? ## For the exercise, assume that the intra-cluster correlation coefficient ## (the rate of homogeneity, rho) for this treatment is 0.05 and the ## average number of cows sampled per herd will be 30. ## Calculate the design effect, given rho = (design - 1) / (nbar - 1), ## where nbar equals the average number of individuals per cluster: design <- 0.05 * (30 - 1) + 1; design ## The design effect is 2.45. epi.sscohortc(irexp1 = 0.30, irexp0 = 0.15, pexp = NA, n = NA, power = 0.80, r = 1, N = NA, design = design, sided.test = 2, finite.correction = FALSE, nfractional = FALSE, conf.level = 0.95) ## A total of 592 cows are required for this study: 296 in the treatment group ## and 296 in the control group. } \keyword{univar} epiR/man/epi.ssdetect.Rd0000644000176200001440000001360714164045023014613 0ustar liggesusers\name{epi.ssdetect} \alias{epi.ssdetect} \title{ Sample size to detect an event } \description{ Sample size to detect at least one event (e.g., a disease-positive individual) in a population. The method adjusts sample size estimates on the basis of test sensitivity and can account for series and parallel test interpretation. } \usage{ epi.ssdetect(N, prev, se, sp, interpretation = "series", covar = c(0,0), finite.correction = TRUE, nfractional = FALSE, conf.level = 0.95) } \arguments{ \item{N}{a vector of length one or two defining the size of the population. The first element of the vector defines the number of clusters, the second element defining the mean number of sampling units per cluster.} \item{prev}{a vector of length one or two defining the prevalence of disease in the population. The first element of the vector defines the between-cluster prevalence, the second element defines the within-cluster prevalence.} \item{se}{a vector of length one or two defining the sensitivity of the test(s) used.} \item{sp}{a vector of length one or two defining the specificity of the test(s) used.} \item{interpretation}{a character string indicating how test results should be interpreted. Options are \code{series} or \code{parallel}.} \item{covar}{a vector of length two defining the covariance between test results for disease positive and disease negative groups. The first element of the vector is the covariance between test results for disease positive subjects. The second element of the vector is the covariance between test results for disease negative subjects. Use \code{covar = c(0,0)} (the default) if these values are not known.} \item{finite.correction}{logical, apply finite correction? See details, below.} \item{nfractional}{logical, return fractional sample size.} \item{conf.level}{scalar, defining the level of confidence in the computed result.} } \value{ A list containing the following: \item{performance}{The sensitivity and specificity of the testing strategy.} \item{sample.size}{The number of clusters, units, and total number of units to be sampled.} } \references{ Cannon RM (2001). Sense and sensitivity --- designing surveys based on an imperfect test. Preventive Veterinary Medicine 49: 141 - 163. Dohoo I, Martin W, Stryhn H (2009). Veterinary Epidemiologic Research. AVC Inc, Charlottetown, Prince Edward Island, Canada, pp. 54. MacDiarmid S (1988). Future options for brucellosis surveillance in New Zealand beef herds. New Zealand Veterinary Journal 36, 39 - 42. DOI: 10.1080/00480169.1988.35472. } \note{ Sample size calculations are carried out using the binomial distribution and an approximation of the hypergeometric distribution (MacDiarmid 1988). Because the hypergeometric distribution takes into account the size of the population being sampled \code{finite.correction = TRUE} is only applied to the binomial sample size estimates. Define \code{se1} and \code{se2} as the sensitivity for the first and second test, \code{sp1} and \code{sp2} as the specificity for the first and second test, \code{p111} as the proportion of disease-positive subjects with a positive test result to both tests and \code{p000} as the proportion of disease-negative subjects with a negative test result to both tests. The covariance between test results for the disease-positive group is \code{p111 - se1 * se2}. The covariance between test results for the disease-negative group is \code{p000 - sp1 * sp2}.} \examples{ ## EXAMPLE 1: ## We would like to confirm the absence of disease in a single 1000-cow ## dairy herd. We expect the prevalence of disease in the herd to be 5\%. ## We intend to use a single test with a sensitivity of 0.90 and a ## specificity of 1.00. How many samples should we take to be 95\% certain ## that, if all tests are negative, the disease is not present? epi.ssdetect(N = 1000, prev = 0.05, se = 0.90, sp = 1.00, interpretation = "series", covar = c(0,0), finite.correction = TRUE, nfractional = FALSE, conf.level = 0.95) ## Using the hypergeometric distribution, we need to sample 65 cows. ## EXAMPLE 2: ## We would like to confirm the absence of disease in a study area. If the ## disease is present we expect the between-herd prevalence to be 8\% and the ## within-herd prevalence to be 5\%. We intend to use two tests: the first has ## a sensitivity and specificity of 0.90 and 0.80, respectively. The second ## has a sensitivity and specificity of 0.95 and 0.85, respectively. The two ## tests will be interpreted in parallel. How many herds and cows within herds ## should we sample to be 95\% certain that the disease is not present in the ## study area if all tests are negative? There area is comprised of ## approximately 5000 herds and the average number of cows per herd is 100. epi.ssdetect(N = c(5000, 100), prev = c(0.08, 0.05), se = c(0.90, 0.95), sp = c(0.80, 0.85), interpretation = "parallel", covar = c(0,0), finite.correction = TRUE, nfractional = FALSE, conf.level = 0.95) ## We need to sample 46 cows from 40 herds (a total of 1840 samples). ## The sensitivity of this testing regime is 99\%. The specificity of this ## testing regime is 68\%. ## EXAMPLE 3: ## You want to document the absence of Mycoplasma from a 200-sow pig herd. ## Based on your experience and the literature, a minimum of 20\% of sows ## would have seroconverted if Mycoplasma were present in the herd. How many ## sows do you need to sample? epi.ssdetect(N = 200, prev = 0.20, se = 1.00, sp = 1.00, interpretation = "series", covar = c(0,0), finite.correction = TRUE, nfractional = FALSE, conf.level = 0.95) ## If you test 15 sows and all test negative you can state that you are 95\% ## confident that the prevalence rate of Mycoplasma in the herd is less than ## 20\%. } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per lineepiR/man/rsu.sep.rs2st.Rd0000644000176200001440000000501413745671726014710 0ustar liggesusers\name{rsu.sep.rs2st} \alias{rsu.sep.rs2st} \title{ Surveillance system sensitivity assuming representative two-stage sampling } \description{ Calculates the surveillance system sensitivity for detection of disease assuming two-stage sampling (sampling of clusters and sampling of units within clusters), imperfect test sensitivity and perfect test specificity. } \usage{ rsu.sep.rs2st(H = NA, N = NA, n, pstar.c, pstar.u, se.u = 1) } \arguments{ \item{H}{scalar, integer representing the total number of clusters in the population. Use \code{NA} if unknown.} \item{N}{vector, integer representing the number of units within each cluster. Use \code{NA} if unknown.} \item{n}{vector, integer representing the number of units tested within each cluster.} \item{pstar.c}{scalar, numeric (0 to 1) representing the cluster-level design prevalence.} \item{pstar.u}{scalar, numeric (0 to 1) representing the unit-level design prevalence.} \item{se.u}{scalar, numeric (0 to 1), representing the sensitivity of the diagnostic test at the individual unit level.} } \value{ A list comprised of: \item{se.p}{the surveillance system (population-level) sensitivity of detection.} \item{se.c}{the cluster-level sensitivity of detection.} \item{se.u}{the unit-level sensitivity of detection.} \item{N}{the number of units within each cluster, as entered by the user.} \item{n}{the number of units tested within each cluster, as entered by the user.} } \note{ If \code{pstar.c} is not a proportion \code{N} must be provided and \code{N} must be greater than \code{n}. } \examples{ ## EXAMPLE 1: ## A study is to be conducted to confirm the absence of enzootic bovine ## leukosis disease in your country. Four herds are to be sampled from a ## population of 500 herds. There are 550, 250, 700 and 200 cows in each of ## the four herds. From each of the four herds 30 animals are to be sampled. ## The design prevalence for this study is set to 0.01 at the herd level ## and if a herd is positive for leukosis the individual animal level ## design prevalence is set to 0.10. Assuming a test with diagnostic ## sensitivity of 0.98 will be used, what is the sensitivity of ## disease detection at the population and cluster (herd) level? rsu.sep.rs2st(H = 500, N = c(550,250,700,200), n = rep(30, times = 4), pstar.c = 0.01, pstar.u = 0.10, se.u = 0.98) ## The population level sensitivity of detection is 0.037. The cluster level ## sensitivity of detection ranges from 0.950 to 0.958. } \keyword{methods} epiR/man/epi.sssimpleestb.Rd0000644000176200001440000001275314164036763015526 0ustar liggesusers\name{epi.sssimpleestb} \alias{epi.sssimpleestb} \title{ Sample size to estimate a binary outcome using simple random sampling } \description{ Sample size to estimate a binary outcome using simple random sampling. } \usage{ epi.sssimpleestb(N = 1E+06, Py, epsilon, error = "relative", se, sp, nfractional = FALSE, conf.level = 0.95) } \arguments{ \item{N}{scalar integer, the total number of individual listing units in the population.} \item{Py}{scalar number, an estimate of the population proportion to be estimated.} \item{epsilon}{scalar number, the maximum difference between the estimate and the unknown population value expressed in absolute or relative terms.} \item{error}{character string. Options are \code{absolute} for absolute error and \code{relative} for relative error.} \item{se}{the diagnostic sensitivity of the method used to detect positive outcomes (0 - 1).} \item{sp}{the diagnostic specificity of the method used to detect positive outcomes (0 - 1).} \item{nfractional}{logical, return fractional sample size.} \item{conf.level}{scalar number, the level of confidence in the computed result.} } \value{ Returns an integer defining the required sample size. } \references{ Getachew T, Getachew G, Sintayehu G, Getenet M, Fasil A (2016). Bayesian estimation of sensitivity and specificity of Rose Bengal, complement fixation, and indirect ELISA tests for the diagnosis of bovine brucellosis in Ethiopia. Veterinary Medicine International. DOI: 10.1155/2016/8032753 Humphry RW, Cameron A, Gunn GJ (2004). A practical approach to calculate sample size for herd prevalence surveys. Preventive Veterinary Medicine 65: 173 - 188. Levy PS, Lemeshow S (1999). Sampling of Populations Methods and Applications. Wiley Series in Probability and Statistics, London, pp. 70 - 75. Scheaffer RL, Mendenhall W, Lyman Ott R (1996). Elementary Survey Sampling. Duxbury Press, New York, pp. 95. Otte J, Gumm I (1997). Intra-cluster correlation coefficients of 20 infections calculated from the results of cluster-sample surveys. Preventive Veterinary Medicine 31: 147 - 150. } \note{ The sample size calculation method implemented in this function follows the approach described by Humphry et al. (2004) accounting for imperfect diagnostic sensitivity and specificity. If \code{epsilon.r} equals the relative error the sample estimate should not differ in absolute value from the true unknown population parameter \code{d} by more than \code{epsilon.r * d}. } \examples{ ## EXAMPLE 1: ## We want to estimate the seroprevalence of Brucella abortus in a population ## of cattle. An estimate of the unknown prevalence of B. abortus in this ## population is 0.15. We would like to be 95\% certain that our estimate is ## within 20\% of the true proportion of the population seropositive to ## B. abortus. Calculate the required sample size assuming use of a test ## with perfect diagnostic sensitivity and specificity. n.crude <- epi.sssimpleestb(N = 1E+06, Py = 0.15, epsilon = 0.20, error = "relative", se = 1.00, sp = 1.00, nfractional = FALSE, conf.level = 0.95) n.crude ## A total of 545 cattle need to be sampled to meet the requirements of the ## survey. ## EXAMPLE 1 (continued): ## Why don't I get the same results as other sample size calculators? The ## most likely reason is misspecification of epsilon. Other sample size ## calculators (e.g., OpenEpi) require you to enter the absolute ## error (as opposed to relative error). For the example above the absolute ## error is 0.20 * 0.15 = 0.03. Re-run epi.simpleestb: n.crude <- epi.sssimpleestb(N = 1E+06, Py = 0.15, epsilon = 0.03, error = "absolute", se = 0.94, sp = 0.88, nfractional = FALSE, conf.level = 0.95) n.crude ## A total of 545 cattle need to be sampled to meet the requirements of the ## survey. ## EXAMPLE 1 (continued): ## The OIE recommends that the compliment fixation test (CFT) is used for ## bovine brucellosis prevalence estimation. Assume the diagnostic sensitivity ## and specficity of the bovine brucellosis CFT to be used is 0.94 and 0.88 ## respectively (Getachew et al. 2016). Re-calculate the required sample size ## accounting for imperfect diagnostic test performance. n.crude <- epi.sssimpleestb(N = 1E+06, Py = 0.15, epsilon = 0.20, error = "relative", se = 0.94, sp = 0.88, nfractional = FALSE, conf.level = 0.95) n.crude ## A total of 1168 cattle need to be sampled to meet the requirements of the ## survey. ## EXAMPLE 1 (continued): ## Being seropositive to brucellosis is likely to cluster within herds. ## Otte and Gumm (1997) cite the intraclass correlation coefficient (rho) of ## Brucella abortus to be in the order of 0.09. Adjust the sample size ## estimate to account for clustering at the herd level. Assume that, on ## average, 20 animals will be sampled per herd: ## Let D equal the design effect and nbar equal the average number of ## individuals per cluster: ## rho = (D - 1) / (nbar - 1) ## Solving for D: ## D <- rho * (nbar - 1) + 1 rho <- 0.09; nbar <- 20 D <- rho * (nbar - 1) + 1 n.adj <- ceiling(n.crude * D) n.adj ## After accounting for use of an imperfect diagnostic test and the presence ## of clustering of brucellosis positivity at the herd level we estimate that ## a total of 3166 cattle need to be sampled to meet the requirements of ## the survey. } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/epi.empbayes.Rd0000644000176200001440000000546414074732132014610 0ustar liggesusers\name{epi.empbayes} \alias{epi.empbayes} \title{Empirical Bayes estimates of observed event counts} \description{ Computes empirical Bayes estimates of observed event counts using the method of moments. } \usage{ epi.empbayes(obs, pop) } \arguments{ \item{obs}{a vector representing the observed event counts in each unit of interest.} \item{pop}{a vector representing the population count in each unit of interest.} } \details{ The gamma distribution is parameterised in terms of shape (\eqn{\alpha}) and scale (\eqn{\nu}) parameters. The mean of a given gamma distribution equals \eqn{\nu / \alpha}. The variance equals \eqn{\nu / \alpha^{2}}. The empirical Bayes estimate of event risk in each unit of interest equals \eqn{(obs + \nu) / (pop + \alpha)}. This technique performs poorly when your data contains large numbers of zero event counts. In this situation a Bayesian approach for estimating \eqn{\alpha} and \eqn{\nu} would be advised. } \value{ A data frame with four elements: \code{gamma} the mean event risk across all units, \code{phi} the variance of event risk across all units, \code{alpha} the estimated shape parameter of the gamma distribution, and \code{nu} the estimated scale parameter of the gamma distribution. } \references{ Bailey TC, Gatrell AC (1995). Interactive Spatial Data Analysis. Longman Scientific & Technical. London, pp. 303 - 308. Langford IH (1994). Using empirical Bayes estimates in the geographical analysis of disease risk. Area 26: 142 - 149. Meza J (2003). Empirical Bayes estimation smoothing of relative risks in disease mapping. Journal of Statistical Planning and Inference 112: 43 - 62. } \examples{ ## EXAMPLE 1: data(epi.SClip) obs <- epi.SClip$cases; pop <- epi.SClip$population est <- epi.empbayes(obs, pop) crude.p <- ((obs) / (pop)) * 100000 crude.r <- rank(crude.p) ebay.p <- ((obs + est[4]) / (pop + est[3])) * 100000 dat.df01 <- data.frame(rank = c(crude.r, crude.r), Method = c(rep("Crude", times = length(crude.r)), rep("Empirical Bayes", times = length(crude.r))), est = c(crude.p, ebay.p)) ## Scatter plot showing the crude and empirical Bayes adjusted lip cancer ## incidence rates as a function of district rank for the crude lip ## cancer incidence rates: \dontrun{ library(ggplot2) ggplot(dat = dat.df01, aes(x = rank, y = est, colour = Method)) + geom_point() + scale_x_continuous(name = "District rank", breaks = seq(from = 0, to = 60, by = 10), labels = seq(from = 0, to = 60, by = 10), limits = c(0,60)) + scale_y_continuous(limits = c(0,30), name = "Lip cancer incidence rates (cases per 100,000 person years)") } } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/epi.dsl.Rd0000644000176200001440000001035414074732166013566 0ustar liggesusers\name{epi.dsl} \alias{epi.dsl} \title{Mixed-effects meta-analysis of binary outcomes using the DerSimonian and Laird method } \description{ Computes individual study odds or risk ratios for binary outcome data. Computes the summary odds or risk ratio using the DerSimonian and Laird method. Performs a test of heterogeneity among trials. Performs a test for the overall difference between groups (that is, after pooling the studies, do treated groups differ significantly from controls?). } \usage{ epi.dsl(ev.trt, n.trt, ev.ctrl, n.ctrl, names, method = "odds.ratio", alternative = c("two.sided", "less", "greater"), conf.level = 0.95) } \arguments{ \item{ev.trt}{observed number of events in the treatment group.} \item{n.trt}{number in the treatment group.} \item{ev.ctrl}{observed number of events in the control group.} \item{n.ctrl}{number in the control group.} \item{names}{character string identifying each trial.} \item{method}{a character string indicating the method to be used. Options are \code{odds.ratio} or \code{risk.ratio}.} \item{alternative}{a character string specifying the alternative hypothesis, must be one of \code{two.sided}, \code{greater} or \code{less}. } \item{conf.level}{magnitude of the returned confidence interval. Must be a single number between 0 and 1.} } \details{ \code{alternative = "greater"} tests the hypothesis that the DerSimonian and Laird summary measure of association is greater than 1. } \value{ A list containing the following: \item{OR}{the odds ratio for each trial and the lower and upper bounds of the confidence interval of the odds ratio for each trial.} \item{RR}{the risk ratio for each trial and the lower and upper bounds of the confidence interval of the risk ratio for each trial.} \item{OR.summary}{the DerSimonian and Laird summary odds ratio and the lower and upper bounds of the confidence interval of the DerSimonian and Laird summary odds ratio.} \item{RR.summary}{the DerSimonian and Laird summary risk ratio and the lower and upper bounds of the confidence interval of the DerSimonian and Laird summary risk ratio.} \item{weights}{the inverse variance and DerSimonian and Laird weights for each trial.} \item{heterogeneity}{a vector containing \code{Q} the heterogeneity test statistic, \code{df} the degrees of freedom and its associated P-value.} \item{Hsq}{the relative excess of the heterogeneity test statistic \code{Q} over the degrees of freedom \code{df}.} \item{Isq}{the percentage of total variation in study estimates that is due to heterogeneity rather than chance.} \item{tau.sq}{the variance of the treatment effect among trials.} \item{effect}{a vector containing \code{z} the test statistic for overall treatment effect and its associated P-value.} } \references{ Deeks JJ, Altman DG, Bradburn MJ (2001). Statistical methods for examining heterogeneity and combining results from several studies in meta-analysis. In: Egger M, Davey Smith G, Altman D (eds). Systematic Review in Health Care Meta-Analysis in Context. British Medical Journal, London, 2001, pp. 291 - 299. DerSimonian R, Laird N (1986). Meta-analysis in clinical trials. Controlled Clinical Trials 7: 177 - 188. Higgins J, Thompson S (2002). Quantifying heterogeneity in a meta-analysis. Statistics in Medicine 21: 1539 - 1558. } \note{ Under the random-effects model, the assumption of a common treatment effect is relaxed, and the effect sizes are assumed to have a normal distribution with variance \code{tau.sq}. Using this method, the DerSimonian and Laird weights are used to compute the pooled odds ratio. The function checks each strata for cells with zero frequencies. If a zero frequency is found in any cell, 0.5 is added to all cells within the strata. } \seealso{ \code{\link{epi.iv}}, \code{\link{epi.mh}}, \code{\link{epi.smd}} } \examples{ ## EXAMPLE 1: data(epi.epidural) epi.dsl(ev.trt = epi.epidural$ev.trt, n.trt = epi.epidural$n.trt, ev.ctrl = epi.epidural$ev.ctrl, n.ctrl = epi.epidural$n.ctrl, names = as.character(epi.epidural$trial), method = "odds.ratio", alternative = "two.sided", conf.level = 0.95) } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/epi.ssdxsesp.Rd0000644000176200001440000000452414120401602014636 0ustar liggesusers\name{epi.ssdxsesp} \alias{epi.ssdxsesp} \title{ Sample size to estimate the sensitivity or specificity of a diagnostic test } \description{ Sample size to estimate the sensitivity or specificity of a diagnostic test. } \usage{ epi.ssdxsesp(test, type = "se", Py, epsilon, error = "relative", nfractional = FALSE, conf.level = 0.95) } \arguments{ \item{test}{scalar number, the prior estimate of diagnostic test performance (0 to 1).} \item{type}{character string. Options are \code{se} to estimate a sample size to determine diagnostic sensitivity and \code{sp} to estimate a sample size to determine diagnostic specificity.} \item{Py}{scalar number, an estimate of the prevalence of the outcome in the study population.} \item{epsilon}{scalar number, the maximum difference between the estimate and the unknown population value expressed in absolute or relative terms.} \item{error}{character string. Options are \code{absolute} for absolute error and \code{relative} for relative error.} \item{nfractional}{logical, return fractional sample size.} \item{conf.level}{scalar number, the level of confidence in the computed result.} } \value{ Returns an integer defining the required sample size. } \references{ Hajian-Tilaki K (2014). Sample size estimation in diagnostic test studies of biomedical informatics. Journal of Biomedical Informatics 48: 193 - 204. DOI: 10.1016/j.jbi.2014.02.013. } \note{ The sample size calculation method implemented in this function follows the approach described by Hajian-Tilaki (2014). } \examples{ ## EXAMPLE 1 (from Hajian-Tilaki 2014, p 195): ## A new diagnostic test has been developed and we'd like to conduct a study ## to determine its diagnostic sensitivity which we believe should be in the ## order of 0.80. How many subjects should be enrolled if the prevalence of ## the disease outcome of interest is 0.10 and we'd like to be 95\% confident ## that our estimate of sensitivity is within 0.07 of the true population ## value? epi.ssdxsesp(test = 0.80, type = "se", Py = 0.10, epsilon = 0.07, error = "absolute", nfractional = FALSE, conf.level = 0.95) ## A total of 1255 subjects need to be enrolled to meet the requirements of the ## study. } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/rsu.sep.rs.Rd0000644000176200001440000000461013742461352014245 0ustar liggesusers\name{rsu.sep.rs} \alias{rsu.sep.rs} \title{ Surveillance system sensitivity assuming representative sampling } \description{ Calculates the surveillance system (population-level) sensitivity for detection of disease assuming representative sampling, imperfect test sensitivity and perfect test specificity using the hypergeometric method if \code{N} is known and the binomial method if \code{N} is unknown. } \usage{ rsu.sep.rs(N = NA, n, pstar, se.u = 1) } \arguments{ \item{N}{scalar integer or vector of integers the same length as \code{n}, representing the population size. Use \code{NA} if unknown.} \item{n}{scalar integer or vector of integers representing the number of units tested.} \item{pstar}{scalar numeric or vector of numbers the same length as \code{n} representing the design prevalence. See details, below.} \item{se.u}{scalar numeric or vector of numbers the same length as \code{n} representing the unit sensitivity.} } \value{ A vector of surveillance system (population-level) sensitivity estimates. } \references{ MacDiarmid S (1988). Future options for brucellosis surveillance in New Zealand beef herds. New Zealand Veterinary Journal 36: 39 - 42. Martin S, Shoukri M, Thorburn M (1992). Evaluating the health status of herds based on tests applied to individuals. Preventive Veterinary Medicine 14: 33 - 43. } \examples{ ## EXAMPLE 1: ## Three hundred samples are to be tested from a population of animals to ## confirm the absence of a disease. The total size of the population is ## unknown. Assuming a design prevalence of 0.01 and a test with ## diagnostic sensitivity of 0.95 will be used what is the sensitivity of ## disease detection at the population level? rsu.sep.rs(N = NA, n = 300, pstar = 0.01, se.u = 0.95) ## The sensitivity of disease detection at the population level is 0.943. ## EXAMPLE 2: ## Thirty animals from five herds ranging in size from 80 to 100 head are to be ## sampled to confirm the absence of a disease. Assuming a design prevalence ## of 0.01 and a test with diagnostic sensitivity of 0.95 will be used, what ## is the sensitivity of disease detection for each herd? N <- seq(from = 80, to = 100, by = 5) n <- rep(30, times = length(N)) rsu.sep.rs(N = N, n = n, pstar = 0.01, se.u = 0.95) ## The sensitivity of disease detection for each herd ranges from 0.28 to ## 0.36. } \keyword{methods}epiR/man/epi.iv.Rd0000644000176200001440000001042714074731370013417 0ustar liggesusers\name{epi.iv} \alias{epi.iv} \title{Fixed-effects meta-analysis of binary outcomes using the inverse variance method } \description{ Computes individual study odds or risk ratios for binary outcome data. Computes the summary odds or risk ratio using the inverse variance method. Performs a test of heterogeneity among trials. Performs a test for the overall difference between groups (that is, after pooling the studies, do treated groups differ significantly from controls?). } \usage{ epi.iv(ev.trt, n.trt, ev.ctrl, n.ctrl, names, method = "odds.ratio", alternative = c("two.sided", "less", "greater"), conf.level = 0.95) } \arguments{ \item{ev.trt}{observed number of events in the treatment group.} \item{n.trt}{number in the treatment group.} \item{ev.ctrl}{observed number of events in the control group.} \item{n.ctrl}{number in the control group.} \item{names}{character string identifying each trial.} \item{method}{a character string indicating the method to be used. Options are \code{odds.ratio} or \code{risk.ratio}.} \item{alternative}{a character string specifying the alternative hypothesis, must be one of \code{two.sided}, \code{greater} or \code{less}. } \item{conf.level}{magnitude of the returned confidence interval. Must be a single number between 0 and 1.} } \details{ Using this method, the inverse variance weights are used to compute the pooled odds ratios and risk ratios. The inverse variance weights should be used to indicate the weight each trial contributes to the meta-analysis. \code{alternative = "greater"} tests the hypothesis that the inverse variance summary measure of association is greater than 1. } \value{ A list containing: \item{OR}{the odds ratio for each trial and the lower and upper bounds of the confidence interval of the odds ratio for each trial.} \item{RR}{the risk ratio for each trial and the lower and upper bounds of the confidence interval of the risk ratio for each trial.} \item{OR.summary}{the inverse variance summary odds ratio and the lower and upper bounds of the confidence interval of the inverse variance summary odds ratio.} \item{RR.summary}{the inverse variance summary risk ratio and the lower and upper bounds of the confidence interval of the inverse variance summary risk ratio.} \item{weights}{the raw and inverse variance weights assigned to each trial.} \item{heterogeneity}{a vector containing \code{Q} the heterogeneity test statistic, \code{df} the degrees of freedom and its associated P-value.} \item{Hsq}{the relative excess of the heterogeneity test statistic \code{Q} over the degrees of freedom \code{df}.} \item{Isq}{the percentage of total variation in study estimates that is due to heterogeneity rather than chance.} \item{effect}{a vector containing \code{z} the test statistic for overall treatment effect and its associated P-value.} } \references{ Deeks JJ, Altman DG, Bradburn MJ (2001). Statistical methods for examining heterogeneity and combining results from several studies in meta-analysis. In: Egger M, Davey Smith G, Altman D (eds). Systematic Review in Health Care Meta-Analysis in Context. British Medical Journal, London, 2001, pp. 291 - 299. Higgins JP, Thompson SG (2002). Quantifying heterogeneity in a meta-analysis. Statistics in Medicine 21: 1539 - 1558. } \note{ The inverse variance method performs poorly when data are sparse, both in terms of event rates being low and trials being small. The Mantel-Haenszel method (\code{\link{epi.mh}}) is more robust when data are sparse. Using this method, the inverse variance weights are used to compute the pooled odds ratios and risk ratios. The function checks each strata for cells with zero frequencies. If a zero frequency is found in any cell, 0.5 is added to all cells within the strata. } \seealso{ \code{\link{epi.dsl}}, \code{\link{epi.mh}}, \code{\link{epi.smd}} } \examples{ ## EXAMPLE 1: data(epi.epidural) epi.iv(ev.trt = epi.epidural$ev.trt, n.trt = epi.epidural$n.trt, ev.ctrl = epi.epidural$ev.ctrl, n.ctrl = epi.epidural$n.ctrl, names = as.character(epi.epidural$trial), method = "odds.ratio", alternative = "two.sided", conf.level = 0.95) } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/rsu.sep.rb.Rd0000644000176200001440000000611613754452224014230 0ustar liggesusers\name{rsu.sep.rb} \alias{rsu.sep.rb} \title{ Surveillance system sensitivity assuming risk-based sampling and varying unit sensitivity } \description{ Calculates surveillance system (population-level) sensitivity assuming one-stage, risk-based sampling and varying unit sensitivity using either the binomial or hypergeometric methods. } \usage{ rsu.sep.rb(N, rr, ppr, df, pstar, method = "binomial") } \arguments{ \item{N}{vector of the same length as \code{rr}, population size estimates for each risk group.} \item{rr}{vector of length equal to the number of risk strata, the relative risk values.} \item{ppr}{vector of the same length as \code{rr}, population proportions for each risk group.} \item{df}{a dataframe of values for each combination of risk stratum and sensitivity level. Column 1 = risk group index, column 2 = unit sensitivities, column 3 = the sample size for risk group and unit sensitivity).} \item{pstar}{scalar, the design prevalence.} \item{method}{character string indicating the method to be used. Options are \code{binomial} or \code{hypergeometric}. See details, below.} } \details{ If \code{method = binomial} \code{N} is ignored and values for \code{ppr} need to be entered. Conversely, if \code{method = hypergeometric}, \code{ppr} is ignored and calculated from \code{N}. } \value{ A list comprised of five elements: \item{sep}{scalar, the population-level sensitivity estimate.} \item{epi}{vector, effective probability of infection estimates.} \item{adj.risk}{vector, adjusted risks.} \item{n}{vector, sample size by risk group} \item{se.u}{a vector of the mean sensitivity for each risk group.} } \examples{ ## EXAMPLE 1: ## Calculate the surveillance system sensitivity assuming one-stage risk- ## based sampling assuming a population comprised of high risk (n = 200 ## clusters) and low risk (n = 1800 clusters) where the probability of ## disease in the high risk group is 5 times that of the low risk group. ## Four clusters will be sampled with n = 80, 30, 20 and 30 surveillance ## units within each cluster tested using a test with diagnostic sensitivity ## at the surveillance unit level of 0.92, 0.85, 0.92 and 0.85, respectively. ## Assume a design prevalence of 0.01. rg <- c(1,1,2,2) se.u <- c(0.92,0.85,0.92,0.85) n <- c(80,30,20,30) df <- data.frame(rg, se.u, n) rsu.sep.rb(N = c(200,1800), rr = c(5,1), ppr = NA, df = df, pstar = 0.01, method = "hypergeometric") ## The expected surveillance system sensitivity is 0.993. ## EXAMPLE 2: ## Recalculate, assuming that we don't know the size of the cluster population ## at risk. ## When the size of the cluster population at risk is unknown we set N = NA ## and enter values for ppr (the proportion of the population in each risk ## group). Assume (from above) that 0.10 of the cluster population are in the ## high risk group and 0.90 are in the low risk group. rsu.sep.rb(N = NA, rr = c(5,1), ppr = c(0.10,0.90), df = df, pstar = 0.01, method = "binomial") ## The expected surveillance system sensitivity is 0.980. } \keyword{methods} epiR/man/epi.asc.Rd0000644000176200001440000000265113117711400013534 0ustar liggesusers\name{epi.asc} \alias{epi.asc} \title{ Write matrix to an ASCII raster file } \description{ Writes a data frame to an ASCII raster file, suitable for display in a Geographic Information System. } \usage{ epi.asc(dat, file, xllcorner, yllcorner, cellsize, na = -9999) } \arguments{ \item{dat}{a matrix with data suitable for plotting using the \code{image} function.} \item{file}{character string specifying the name and path of the ASCII raster output file.} \item{xllcorner}{the easting coordinate corresponding to the lower left hand corner of the matrix.} \item{yllcorner}{the northing coordinate corresponding to the lower left hand corner of the matrix.} \item{cellsize}{number, defining the size of each matrix cell.} \item{na}{scalar, defines null values in the matrix. NAs are converted to this value.} } \value{ Writes an ASCII raster file (typically with \code{*.asc} extension), suitable for display in a Geographic Information System. } \note{ The \code{image} function in R rotates tabular data counter clockwise by 90 degrees for display. A matrix of the form: \tabular{ll}{ 1 \tab 3 \cr 2 \tab 4 \cr } is displayed (using \code{image}) as: \tabular{ll}{ 3 \tab 4 \cr 1 \tab 2 \cr } It is recommended that the source data for this function is a matrix. Replacement of \code{NA}s in a data frame extends processing time for this function. } \keyword{univar} epiR/man/rsu.sssep.rs2st.Rd0000644000176200001440000001010413754670322015241 0ustar liggesusers\name{rsu.sssep.rs2st} \alias{rsu.sssep.rs2st} \title{ Sample size to achieve a desired surveillance system sensitivity assuming two-stage sampling } \description{ Calculates the required sample size to achieve a desired surveillance system sensitivity assuming two-stage sampling (sampling of clusters and sampling of units within clusters), imperfect test sensitivity and perfect test specificity. } \usage{ rsu.sssep.rs2st(H = NA, N = NA, pstar.c, se.c, pstar.u, se.u, se.p) } \arguments{ \item{H}{scalar, integer representing the total number of clusters in the population. Use \code{NA} if unknown.} \item{N}{vector, integer representing the number of units within each cluster. Use \code{NA} if unknown.} \item{pstar.c}{scalar, numeric (0 to 1) representing the cluster level design prevalence.} \item{se.c}{scalar, numeric (0 to 1) representing the required cluster level sensitivity.} \item{pstar.u}{scalar, numeric (0 to 1) representing the surveillance unit level design prevalence.} \item{se.u}{scalar (0 to 1) representing the sensitivity of the diagnostic test at the surveillance unit level.} \item{se.p}{scalar (0 to 1) representing the desired surveillance system (population-level) sensitivity.} } \value{ A list comprised of two data frames: \code{clusters} and \code{units}. Data frame \code{clusters} lists: \item{H}{the total number of clusters in the population, as entered by the user.} \item{nsample}{the number of clusters to be sampled.} Data frame \code{units} lists: \item{N}{the number of units within each cluster, as entered by the user.} \item{nsample}{the number of units to be sampled.} } \references{ Cameron A, Baldock C (1998). A new probability formula for surveys to substantiate freedom from disease. Preventive Veterinary Medicine 34: 1 - 17. Cameron A (1999). Survey Toolbox for Livestock Diseases --- A practical manual and software package for active surveillance of livestock diseases in developing countries. Australian Centre for International Agricultural Research, Canberra, Australia. MacDiarmid S (1988). Future options for brucellosis surveillance in New Zealand beef herds. New Zealand Veterinary Journal 36: 39 - 42. Martin S, Shoukri M, Thorburn M (1992). Evaluating the health status of herds based on tests applied to individuals. Preventive Veterinary Medicine 14: 33 - 43. } \examples{ ## EXAMPLE 1: ## Sampling is to be carried out to support a claim that a country is free ## of bovine brucellosis. We are not certain of the total number of herds ## in the country and we are not certain of the number of cows within each ## herd. ## The design prevalence for this study is set to 0.01 at the herd level and ## if a herd is positive for brucellosis the individual animal level ## design prevalence is set to 0.10. The sensitivity of the diagnostic ## test to be used is 0.95. ## How many herds and how many animals from within each herd ## need to be sampled to be 95\% confident of detecting disease at the ## herd and individual animal level? rsu.sssep.rs2st(H = NA, N = NA, pstar.c = 0.01, se.c = 0.95, pstar.u = 0.10, se.u = 0.95, se.p = 0.95) ## A total of 314 herds need to be sampled, 31 cows from each herd. ## EXAMPLE 2: ## Now lets say we know that there are 500 cattle herds in the country and ## we have the results of a recent livestock census providing counts of the ## number of cattle in each herd. How many herds and how many animals from ## within each herd need to be sampled to be 95\% confident of detecting ## disease at the herd and individual animal level? # Generate a vector of herd sizes. The minimum herd size is 25. set.seed(1234) hsize <- ceiling(rlnorm(n = 500, meanlog = 1.5, sdlog = 2)) + 25 nsample <- rsu.sssep.rs2st(H = 500, N = hsize, pstar.c = 0.01, se.c = 0.95, pstar.u = 0.10, se.u = 0.95, se.p = 0.95) nsample$clusters head(nsample$units) ## A total of 238 of the 500 herds need to be tested. The number of animals ## to sample from the first herd (comprised of 26 animals) is 18. } \keyword{univar} epiR/man/epi.betabuster.Rd0000644000176200001440000001263414133223464015137 0ustar liggesusers\name{epi.betabuster} \alias{epi.betabuster} \title{An R version of Wes Johnson and Chun-Lung Su's Betabuster} \description{ A function to return shape1 and shape2 parameters for a beta distribution, based on expert elicitation. } \usage{ epi.betabuster(mode, conf, greaterthan, x, conf.level = 0.95, max.shape1 = 100, step = 0.001) } \arguments{ \item{mode}{scalar, the mode of the variable of interest. Must be a number between 0 and 1.} \item{conf}{level of confidence (expressed on a 0 to 1 scale) that the true value of the variable of interest is greater or less than argument \code{x}.} \item{greaterthan}{logical, if \code{TRUE} you are making the statement that you are \code{conf} confident that the true value of the variable of interest is greater than \code{x}. If \code{FALSE} you are making the statement that you are \code{conf} confident that the true value of the variable of interest is less than \code{x}.} \item{x}{scalar, value of the variable of interest (see above).} \item{conf.level}{magnitude of the returned confidence interval for the estimated beta distribution. Must be a single number between 0 and 1.} \item{max.shape1}{scalar, maximum value of the shape1 parameter for the beta distribution.} \item{step}{scalar, step value for the shape1 parameter. See details.} } \value{ A list containing the following: \item{shape1}{the \code{shape1} parameter for the estimated beta distribution.} \item{shape2}{the \code{shape2} parameter for the estimated beta distribution.} \item{mode}{the mode of the estimated beta distribution.} \item{mean}{the mean of the estimated beta distribution.} \item{median}{the median of the estimated beta distribution.} \item{lower}{the lower bound of the confidence interval of the estimated beta distribution.} \item{upper}{the upper bound of the confidence interval of the estimated beta distribution.} \item{variance}{the variance of the estimated beta distribution.} } \details{ The beta distribution has two parameters: \code{shape1} and \code{shape2}, corresponding to \code{a} and \code{b} in the original version of BetaBuster. If \code{r} equals the number of times an event has occurred after \code{n} trials, \code{shape1} = \code{(r + 1)} and \code{shape2} = \code{(n - r + 1)}. Take care when you're parameterising probability estimates that are at the extremes of the 0 to 1 bounds. If the returned \code{shape1} parameter is equal to the value of \code{max.shape1} (which, by default is 100) consider increasing the value of the \code{max.shape1} argument. The \code{epi.betabuster} functions issues a warning if these conditions are met. } \references{ Christensen R, Johnson W, Branscum A, Hanson TE (2010). Bayesian Ideas and Data Analysis: An Introduction for Scientists and Statisticians. Chapman and Hall, Boca Raton. } \author{ Simon Firestone (Faculty of Veterinary and Agricultural Sciences, The University of Melbourne, Australia) with acknowledgements to Wes Johnson and Chun-Lung Su for the original standalone software. } \examples{ ## EXAMPLE 1: ## If a scientist is asked for their best guess for the diagnostic sensitivity ## of a particular test and the answer is 0.90, and if they are also willing ## to assert that they are 80\% certain that the sensitivity is greater than ## 0.75, what are the shape1 and shape2 parameters for a beta distribution ## satisfying these constraints? rval.beta01 <- epi.betabuster(mode = 0.90, conf = 0.80, greaterthan = TRUE, x = 0.75, conf.level = 0.95, max.shape1 = 100, step = 0.001) rval.beta01$shape1; rval.beta01$shape2 ## The shape1 and shape2 parameters for the beta distribution that satisfy the ## constraints listed above are 9.875 and 1.986, respectively. ## This beta distribution reflects the probability distribution obtained if ## there were 9 successes, r: r <- rval.beta01$shape1 - 1; r ## from 10 trials, n: n <- rval.beta01$shape2 + rval.beta01$shape1 - 2; n dat.df01 <- data.frame(x = seq(from = 0, to = 1, by = 0.001), y = dbeta(x = seq(from = 0, to = 1,by = 0.001), shape1 = rval.beta01$shape1, shape2 = rval.beta01$shape2)) ## Density plot of the estimated beta distribution: \dontrun{ library(ggplot2) ggplot(data = dat.df01, aes(x = x, y = y)) + geom_line() + scale_x_continuous(name = "Test sensitivity") + scale_y_continuous(name = "Density") } ## EXAMPLE 2: ## The most likely value of the specificity of a PCR for coxiellosis in ## small ruminants is 1.00 and we're 97.5\% certain that this estimate is ## greater than 0.99. What are the shape1 and shape2 parameters for a beta ## distribution satisfying these constraints? epi.betabuster(mode = 1.00, conf = 0.975, greaterthan = TRUE, x = 0.99, conf.level = 0.95, max.shape1 = 100, step = 0.001) ## The shape1 and shape2 parameters for the beta distribution that satisfy the ## constraints listed above are 100 and 1, respectively. epi.betabuster ## issues a warning that the value of shape1 equals max.shape1. We increase ## max.shape1 to 500: epi.betabuster(mode = 1.00, conf = 0.975, greaterthan = TRUE, x = 0.99, conf.level = 0.95, max.shape1 = 500, step = 0.001) ## The shape1 and shape2 parameters for the beta distribution that satisfy the ## constraints listed above are 367.04 and 1, respectively. } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/rsu.sssep.rb2st2rf.Rd0000644000176200001440000001004513754670222015635 0ustar liggesusers\name{rsu.sssep.rb2st2rf} \alias{rsu.sssep.rb2st2rf} \title{ Sample size to achieve a desired surveillance system sensitivity assuming risk-based 2-stage sampling on two risk factors at either the cluster level, unit level, or both } \description{ Calculates the sample size to achieve a desired surveillance system sensitivity assuming risk-based 2-stage sampling on two risk factors at either the cluster level, the unit level or both, imperfect test sensitivity and perfect test specificity. } \usage{ rsu.sssep.rb2st2rf(rr.c, ppr.c, spr.c, pstar.c, se.c, rr.u, ppr.u, spr.u, pstar.u, se.u, se.p) } \arguments{ \item{rr.c}{vector, corresponding to the number of risk strata defining the relative risk values at the cluster level.} \item{ppr.c}{vector of length equal to that of \code{rr.c} defining the population proportions at the cluster level.} \item{spr.c}{vector of length equal to that of \code{rr.c} defining the planned surveillance proportions at the cluster level.} \item{pstar.c}{scalar (either a proportion or integer) defining the cluster level design prevalence.} \item{se.c}{scalar (proportion), the desired cluster level sensitivity.} \item{rr.u}{vector, corresponding to the number of risk strata defining the relative risk values at the surveillance unit level.} \item{ppr.u}{vector, of length equal to that of \code{rr.u} defining the population proportions at the surveillance unit level.} \item{spr.u}{vector of length equal to that of \code{rr.u} defining the planned surveillance proportions at the surveillance unit level.} \item{pstar.u}{scalar (either a proportion or integer) defining the surveillance unit level design prevalence.} \item{se.u}{scalar (0 to 1) representing the sensitivity of the diagnostic test at the surveillance unit level.} \item{se.p}{scalar (0 to 1) representing the desired surveillance system (population-level) sensitivity..} } \value{ A list comprised of two elements: \item{clusters}{scalar, the total number of clusters to be sampled.} \item{units}{scalar, the total number of units to sample from each cluster.} } \examples{ ## EXAMPLE 1: ## A cross-sectional study is to be carried out to confirm the absence of ## disease using risk based sampling. Assume a design prevalence of 0.02 ## at the cluster (herd) level and a design prevalence of 0.10 at the ## surveillance unit (individual) level. Clusters are categorised as ## being either high, medium or low risk with the probability of disease for ## clusters in the high and medium risk area 5 and 3 times the probability of ## disease in the low risk area. The proportions of clusters in the high, ## medium and low risk area are 0.10, 0.20 and 0.70, respectively. The ## proportion of samples from the high, medium and low risk area will be ## 0.40, 0.40 and 0.20, respectively. ## Surveillance units (individuals) are categorised as being either high or ## low risk with the probability of disease for units in the high risk group ## 4 times the probability of disease in the low risk group. The proportions ## of units in the high and low risk groups are 0.10 and 0.90, respectively. ## All of your samples will be taken from units in the high risk group. ## You intend to use a test with diagnostic sensitivity of 0.95 and you'd ## like to take sufficient samples to be 95\% certain that you've detected ## disease at the population level, 95\% certain that you've detected disease ## at the cluster level and 95\% at the surveillance unit level. How many ## clusters and how many units need to be sampled to meet the requirements ## of the study? rsu.sssep.rb2st2rf( rr.c = c(5,3,1), ppr.c = c(0.1,0.2,0.7), spr.c = c(0.4,0.4,0.2), pstar.c = 0.02, se.c = 0.50, rr.u = c(4,1), ppr.u = c(0.1, 0.9), spr.u = c(1,0), pstar.u = 0.10, se.u = 0.90, se.p = 0.95) ## A total of 82 clusters needs to be sampled: 33 from the high risk area, ## 33 from the medium risk area and 16 from the low risk area. A total of ## 9 units should be sampled from each cluster. } \keyword{methods} epiR/man/rsu.sep.rsfreecalc.Rd0000644000176200001440000000651314074746166015746 0ustar liggesusers\name{rsu.sep.rsfreecalc} \alias{rsu.sep.rsfreecalc} \title{ Surveillance system sensitivity for detection of disease assuming representative sampling and imperfect test sensitivity and specificity. } \description{ Calculates the surveillance system (population-level) sensitivity for detection of disease assuming representative sampling and imperfect test sensitivity and specificity.} \usage{ rsu.sep.rsfreecalc(N, n, c = 1, pstar, se.u, sp.u) } \arguments{ \item{N}{scalar, integer representing the total number of subjects eligible to be sampled. Use \code{NA} if unknown.} \item{n}{scalar, integer representing the total number of subjects sampled.} \item{c}{scalar, integer representing the cut-point number of positives to classify a cluster as positive. If the number of positives is less than \code{c} the cluster is negative; if the number of positives is greater than or equal to \code{c} the cluster is positive.} \item{pstar}{scalar, numeric, representing the design prevalence, the hypothetical outcome prevalence to be detected. See details, below.} \item{se.u}{scalar, numeric (0 to 1) representing the diagnostic sensitivity of the test at the unit level.} \item{sp.u}{scalar, numeric (0 to 1) representing the diagnostic specificity of the test at the unit level.} } \details{ If a value for \code{N} is entered surveillance system sensitivity is calculated using the hypergeometric distribution. If \code{N} is \code{NA} surveillance system sensitivity is calculated using the binomial distribution. } \value{ A scalar representing the surveillance system (population-level) sensitivity.} \references{ Cameron A, Baldock C (1998a). A new probability formula for surveys to substantiate freedom from disease. Preventive Veterinary Medicine 34: 1 - 17. Cameron A, Baldock C (1998b). Two-stage sampling in surveys to substantiate freedom from disease. Preventive Veterinary Medicine 34: 19 - 30. Cameron A (1999). Survey Toolbox for Livestock Diseases --- A practical manual and software package for active surveillance of livestock diseases in developing countries. Australian Centre for International Agricultural Research, Canberra, Australia. } \examples{ ## EXAMPLE 1: ## Thirty animals from a herd of 150 are to be tested using a test with ## diagnostic sensitivity 0.90 and specificity 0.98. What is the ## surveillance system sensitivity assuming a design prevalence of 0.10 and ## two or more positive tests will be interpreted as a positive result? rsu.sep.rsfreecalc(N = 150, n = 30, c = 2, pstar = 0.10, se.u = 0.90, sp.u = 0.98) ## If a random sample of 30 animals is taken from a population of 150 and ## a positive test result is defined as two or more individuals returning ## a positive test, the probability of detecting disease if the population is ## diseased at a prevalence of 0.10 is 0.87. ## EXAMPLE 2: ## Repeat these calculations assuming herd size is unknown: rsu.sep.rsfreecalc(N = NA, n = 30, c = 2, pstar = 0.10, se.u = 0.90, sp.u = 0.98) ## If a random sample of 30 animals is taken from a population of unknown size ## and a positive test result is defined as two or more individuals returning ## a positive test, the probability of detecting disease if the population is ## diseased at a prevalence of 0.10 is 0.85. } \keyword{univar} epiR/man/rsu.pfree.equ.Rd0000644000176200001440000000734613757570204014741 0ustar liggesusers\name{rsu.pfree.equ} \alias{rsu.pfree.equ} \title{ Equilibrium probability of disease freedom assuming representative or risk based sampling } \description{ Calculates the long-term equilibrium probability of disease freedom and equilibrium prior probability of freedom, after discounting for the probability that disease has been introduced into the population and assuming population sensitivity and probability of introduction are constant over time. It does not specify how long it might take to reach equilibrium. } \usage{ rsu.pfree.equ(se.p, p.intro) } \arguments{ \item{se.p}{scalar or vector, the surveillance system (population-level) sensitivity for the given time period.} \item{p.intro}{scalar or vector of the same length as \code{sep} representing the probability of disease introduction for time period.} } \value{ A list comprised of two elements: \item{epfree}{a vector listing the equilibrium probability of disease freedom.} \item{depfree}{a vector listing the discounted equilibrium probability of disease freedom.} } \examples{ ## EXAMPLE 1: ## The current (ongoing) surveillance system for a given disease in your ## country has been estimated to have a population sensitivity of 0.60 per ## time period (one year). Assuming the probability of disease introduction ## per unit time is 0.02, what is the eventual plateau level for confidence ## of freedom and how long will it take to reach this level, assuming a # prior (starting) confidence of freedom of 0.50? ## Firstly, estimate the equilibrium (plateau) confidence of freedom: conf.eq <- rsu.pfree.equ(se.p = 0.60, p.intro = 0.02) conf.eq ## The equilibrium discounted probability of disease freedom is 0.986. ## Next, calculate confidence of freedom over 20 time periods for se.p = 0.60 ## and p.intro = 0.02: rval.df <- rsu.pfree.rs (se.p = rep(0.6, times = 20), p.intro = rep(0.02, times = 20), prior = 0.50) head(rval.df) ## When does the confidence of freedom first reach the equilibrium value ## (rounded to 3 digits)? rsep.p <- which(rval.df$pfree >= round(conf.eq$depfree, digits = 3)) rsep.p[1] ## It takes 9 time periods (years) to reach the equilibrium level of 0.986. ## EXAMPLE 2: ## You have been asked to design a surveillance system to detect a given ## disease in your country. If the probability of disease introduction per ## unit time is 0.10, what surveillance system sensitivity do you need to ## be 95\% certain that disease is absent based on the testing carried out as ## part of your program? ## Generate a vector of candidate surveillance system sensitivity estimates ## from 0.1 to 0.99: se.p <- seq(from = 0.10, to = 0.99, by = 0.01) ## Calculate the probability of disease freedom for each of the candidate ## surveillance system sensitivity estimates: rval.df <- rsu.pfree.equ(se.p = se.p, p.intro = 0.10) rval.df <- data.frame(se.p = se.p, depfree = rval.df$depfree) head(rval.df) ## Which of the surveillance system sensitivity estimates returns a ## probability of freedom greater than 0.95? rsep.p <- rval.df$se.p[rval.df$depfree > 0.95] rsep.p[1] ## The required surveillance system sensitivity for this program is 0.69. ## Plot the results: ## Not run: library(ggplot2) ggplot(data = rval.df, aes(x = se.p, y = depfree)) + geom_point() + geom_line() + scale_x_continuous(limits = c(0,1), name = "Surveillance system sensitivity") + scale_y_continuous(limits = c(0,1), name = "Equilibrium discounted probability of disease freedom") + geom_hline(aes(yintercept = 0.95), linetype = "dashed") + geom_vline(aes(xintercept = rsep.p[1]), linetype = "dashed") + theme_bw() ## End(Not run) } \keyword{methods} epiR/man/epi.occc.Rd0000644000176200001440000000605614123763240013707 0ustar liggesusers\name{epi.occc} \alias{epi.occc} \alias{print.epi.occc} \alias{summary.epi.occc} \title{ Overall concordance correlation coefficient (OCCC) } \description{ Overall concordance correlation coefficient (OCCC) for agreement on a continuous measure based on Lin (1989, 2000) and Barnhart et al. (2002). } \usage{ epi.occc(dat, na.rm = FALSE, pairs = FALSE) \method{print}{epi.occc}(x, ...) \method{summary}{epi.occc}(object, ...) } \arguments{ \item{dat}{a matrix, or a matrix like object. Rows correspond to cases/observations, columns corresponds to raters/variables.} \item{na.rm}{logical. Should missing values (including \code{NaN}) be removed?} \item{pairs}{logical. Should the return object contain pairwise statistics? See Details.} \item{x, object}{an object of class \code{epi.occc}.} \item{\dots}{further arguments passed to \code{print} methods.} } \details{ The index proposed by Barnhart et al. (2002) is the same as the index suggested by Lin (1989) in the section of future studies with a correction of a typographical error in Lin (2000). } \value{ An object of class \code{epi.occc} with the following list elements (notation follows Barnhart et al. 2002): \itemize{ \item{\code{occc}: }{the value of the overall concordance correlation coefficient (\eqn{\rho_{o}^{c}}{rho.o^c}),} \item{\code{oprec}: }{overall precision (\eqn{\rho}{rho}),} \item{\code{oaccu}: }{overall accuracy (\eqn{\chi^{a}}{chi^a}),} \item{\code{pairs}: }{a list with following elements (only if \code{pairs = TRUE}, otherwise \code{NULL}; column indices for the pairs (j,k) follow lower-triangle column-major rule based on a \code{ncol(x)} times \code{ncol(x)} matrix), \itemize{ \item{\code{ccc}: }{pairwise CCC values (\eqn{\rho_{jk}^{c}}{rho_jk^c}),} \item{\code{prec}: }{pairwise precision values (\eqn{\rho_{jk}}{rho_jk}),} \item{\code{accu}: }{pairwise accuracy values (\eqn{\chi_{jk}^{a}}{chi_jk^a}),} \item{\code{ksi}: }{pairwise weights (\eqn{\xi_{jk}}{ksi_jk}),} \item{\code{scale}: }{pairwise scale values (\eqn{v_{jk}}{v_jk}),} \item{\code{location}: }{pairwise location values (\eqn{u_{jk}}{u_jk}),} } } \item{\code{data.name}: }{name of the input data \code{dat}.} } } \references{ Barnhart H X, Haber M, Song J (2002). Overall concordance correlation coefficient for evaluating agreement among multiple observers. Biometrics 58: 1020 - 1027. Lin L (1989). A concordance correlation coefficient to evaluate reproducibility. Biometrics 45: 255 - 268. Lin L (2000). A note on the concordance correlation coefficient. Biometrics 56: 324 - 325. } \seealso{ \code{\link[epiR]{epi.ccc}} } \author{ Peter Solymos, solymos@ualberta.ca. } \examples{ ## EXAMPLE 1: ## Generate some rating data: \dontrun{ set.seed(1234) p <- runif(n = 10, min = 0, max = 1) x <- replicate(n = 5, expr = rbinom(n = 10, size = 4, prob = p) + 1) rval.occc01 <- epi.occc(dat = x, pairs = TRUE) print(rval.occc01); summary(rval.occc01) } } \keyword{htest} epiR/man/rsu.sep.rsvarse.Rd0000644000176200001440000000434613742463374015322 0ustar liggesusers\name{rsu.sep.rsvarse} \alias{rsu.sep.rsvarse} \title{ Surveillance system sensitivity assuming representative sampling and varying unit sensitivity } \description{ Calculates the surveillance system (population-level) sensitivity for detection of disease assuming representative sampling and varying unit sensitivity. } \usage{ rsu.sep.rsvarse(N = NA, pstar, se.u) } \arguments{ \item{N}{scalar integer or vector of integers the same length as \code{se.u}, representing the population size. Use \code{NA} if unknown.} \item{pstar}{scalar representing the design prevalence.} \item{se.u}{vector of numbers the same length as \code{N} representing the individual unit sensitivities.} } \value{ A vector of surveillance system (population-level) sensitivity estimates. } \references{ MacDiarmid S (1988). Future options for brucellosis surveillance in New Zealand beef herds. New Zealand Veterinary Journal 36: 39 - 42. Martin S, Shoukri M, Thorburn M (1992). Evaluating the health status of herds based on tests applied to individuals. Preventive Veterinary Medicine 14: 33 - 43. } \examples{ ## EXAMPLE 1: ## A study has been carried out to detect Johne's disease in a population of ## cattle. A random sample of 50 herds from a herd population of unknown size ## has been selected and, from each selected herd, a variable number of animals ## have been tested using faecal culture which is assumed to have a diagnostic ## sensitivity in the order of 0.60. ## The number of animals tested in each of the 50 herds is: set.seed(1234) ntest <- round(runif(n = 50, min = 10, max = 30), digits = 0) ntest ## Calculate the herd level sensitivity of disease detection, assuming we've ## been provided with no details of the number of animals in each of the 50 ## herds. Assume a within-herd design prevalence of 0.05: herd.se <- rsu.sep.rs(N = NA, n = ntest, pstar = 0.05, se.u = 0.60) range(herd.se) ## The herd level sensitivity of detection varies between 0.26 and 0.60. ## Calculate the surveillance system sensitivity assuming a herd-level design ## prevalence of 0.01: rsu.sep.rsvarse(N = NA, pstar = 0.01, se.u = herd.se) ## The surveillance system sensitivity is 0.20. } \keyword{methods}epiR/man/epi.offset.Rd0000644000176200001440000000237714074733454014301 0ustar liggesusers\name{epi.offset} \alias{epi.offset} \title{Create offset vector } \description{ Creates an offset vector based on a list. } \usage{ epi.offset(id.names) } \arguments{ \item{id.names}{a list identifying the [location] of each case. This must be a factor.} } \details{ This function is useful for supplying spatial data to WinBUGS. } \value{ A vector of length (1 + length of \code{id}). The first element of the offset vector is 1, corresponding to the position at which data for the first factor appears in id. The second element of the offset vector corresponds to the position at which the second factor appears in \code{id} and so on. The last element of the offset vector corresponds to the length of the \code{id} list. } \references{ Bailey TC, Gatrell AC (1995). Interactive Spatial Data Analysis. Longman Scientific & Technical. London. Langford IH (1994). Using empirical Bayes estimates in the geographical analysis of disease risk. Area 26: 142 - 149. } \examples{ ## EXAMPLE 1: dat.v01 <- c(1,1,1,2,2,2,2,3,3,3) dat.v01 <- as.factor(dat.v01) dat.ofs01 <- epi.offset(dat.v01) dat.ofs01 ## [1] 1 4 8 10 } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/rsu.pfree.rs.Rd0000644000176200001440000002162214164037175014562 0ustar liggesusers\name{rsu.pfree.rs} \alias{rsu.pfree.rs} \title{ Calculate the probability of freedom for given population sensitivity and probability of introduction } \description{ Calculates the posterior probability (confidence) of disease freedom (negative predictive value) for one or more population sensitivity (se.p) estimates, over one or more time periods. } \usage{ rsu.pfree.rs(se.p, p.intro = 0, prior = 0.5, by.time = TRUE) } \arguments{ \item{se.p}{scalar, vector or matrix representing the population sensitivity estimates. \code{se.p} will be scalar if you're calculating the posterior probability of disease freedom for a single time period. If \code{se.p} is a vector set \code{by.time = TRUE} if the \code{se.p} estimates are for separate time periods. Set \code{by.time = FALSE} if the \code{se.p} estimates are variations (iterations) within a single time period. If \code{se.p} is a matrix, columns represent consecutive time periods and rows represent multiple \code{se.p} estimates per time period.} \item{p.intro}{scalar, vector or matrix representing the probability of disease introduction per time period. If \code{p.intro} is scalar this value is applied across all \code{se.p} values and time periods. If \code{p.intro} is a vector set \code{by.time = TRUE} if the \code{p.intro} estimates are for separate time periods. Set \code{by.time = FALSE} if the \code{p.intro} estimates are variations (iterations) within a single time period. If \code{p.intro} is a matrix it should have the same dimensions as \code{se.p} with columns representing time periods and rows representing multiple \code{p.intro} estimates per time period.} \item{prior}{scalar or vector of the same length as the number of rows of \code{se.p} representing the prior probability of disease freedom before surveillance.} \item{by.time}{logical, representing the type of analysis. See details, below.} } \details{ The \code{by.time} argument is used for two specific circumstances. Use \code{by.time = TRUE} if the \code{se.p} estimates are a vector of values for consecutive time periods. Use \code{by.time = FALSE} if the \code{se.p} estimates are a vector of multiple values (iterations) for a single time period. Use \code{by.times = TRUE} if \code{se.p} is a symmetrical matrix and \code{p.intro} is a vector of values representing the probability of disease introduction over consecutive time periods. Use \code{by.time = FALSE} if \code{se.p} is a symmetrical matrix (with columns for time periods and rows representing estimates of \code{se.p} within each time period) and \code{p.intro} is a vector of values corresponding to multiple values for a single time period that are the same across all periods. } \value{ A list comprised of six elements: \item{PFree}{The posterior probability of disease freedom.} \item{SeP}{The population sensitivity.} \item{PIntro}{The probability of disease introduction (as entered by the user).} \item{Discounted prior}{The discounted prior confidence of disease freedom.} \item{Equilibrium PFree}{The equilibrium probability of disease freedom.} \item{Equilibrium prior}{The equilibrium discounted prior probability of disease freedom.} } \references{ Martin P, Cameron A, Greiner M (2007). Demonstrating freedom from disease using multiple complex data sources 1: A new methodology based on scenario trees. Preventive Veterinary Medicine 79: 71 - 97. Martin P, Cameron A, Barfod K, Sergeant E, Greiner M (2007). Demonstrating freedom from disease using multiple complex data sources 2: Case study - classical swine fever in Denmark. Preventive Veterinary Medicine 79: 98 - 115. } \examples{ ## EXAMPLE 1: ## You have estimated herd-sensitivity for 20 herds for a disease of concern, ## all returned negative results. What is the confidence of disease freedom ## for these herds, assuming that based on other data, 20\% of herds in the ## region are estimated to be disease positive? ## Generate 20 herd sensitivity estimates, using random values between 70\% ## and 95\%: herd.sens <- runif(n = 20, min = 0.70, max = 0.95) ## The background herd prevalence is 0.20, so the prior confidence of freedom ## is 1 - 0.2 = 0.8. For this example we assume the prior is applicable at ## the time of sampling so p.intro = 0 (the default) and we are carrying out ## an analysis using multiple estimates of population sensitivities for a ## single time period so we set by.time = FALSE. rval.df <- rsu.pfree.rs(se.p = herd.sens, p.intro = 0, prior = 0.80, by.time = FALSE) rval.df <- data.frame(SeP = rval.df$SeP, PFree = rval.df$PFree) range(rval.df$SeP) ## The herd-level probability of disease freedom ranges from about 0.93 to ## 0.99 depending on individual herd level sensitivity values. ## EXAMPLE 2: ## You have analysed 12 months of surveillance data for disease X, to provide ## 12 monthly estimates of population sensitivity. In addition, based on ## previous data, the monthly probability of the introduction of disease is ## estimated to be in the range of 0.005 (0.5\%) to 0.02 (2\%). The prior ## confidence of disease freedom is assumed to be 0.5 (i.e., uninformed). ## What is your level of confidence of disease freedom at the end of the 12 ## month surveillance period? ## Generate 12, monthly estimates of se.p and p.intro: pop.sens <- runif(n = 12, min = 0.40, max = 0.70) pintro <- runif(n = 12, min = 0.005, max = 0.020) ## For this example we're analysing a single population over multiple time ## periods, so we set by.time = TRUE: rval.df <- rsu.pfree.rs(se.p = pop.sens, p.intro = pintro, prior = 0.50, by.time = TRUE) rval.df <- data.frame(mnum = 1:12, mchar = seq(as.Date("2020/1/1"), by = "month", length.out = 12), SeP = t(rval.df$SeP), PFree = t(rval.df$PFree)) ## Plot the probability of disease freedom as a function of time: plot(x = rval.df$mnum, y = rval.df$PFree, xlim = c(1,12), ylim = c(0,1), xlab = "Month", ylab = "Probability of disease freedom", pch = 16, type = "b", xaxt = "n") axis(side = 1, at = rval.df$mnum, labels = format(rval.df$mchar, format = "\%b")) abline(h = 0.95, lty = 2) \dontrun{ library(ggplot2); library(scales) ggplot(data = rval.df, aes(x = mchar, y =PFree)) + geom_line(col = "black") + scale_x_date(breaks = date_breaks("1 month"), labels = date_format("\%b"), name = "Month") + scale_y_continuous(limits = c(0,1), name = "Probability of disease freedom") + geom_hline(yintercept = 0.95, linetype = "dashed") + theme_bw() } ## The estimated probability of disease freedom (Pfree) increases over time ## from about 0.70 (or less) to >0.99, depending on the actual se.p values ## generated by simulation. ## EXAMPLE 3: ## Extending the above example, instead of a simple deterministic estimate, ## you decide to use simulation to account for uncertainty in the monthly ## se.p and p.intro estimates. ## For simplicity, we generate 1200 random estimates of se.p and coerce them ## into a matrix with 12 columns and 100 rows: pop.sens <- matrix(runif(n = 1200, min = 0.40, max = 0.70), nrow = 100) ## For p.intro we generate a vector of 100 random values, which will then be ## used across all time periods: pintro <- runif(n = 100, min = 0.005, max = 0.020) ## For this example, because se.p is a matrix and p.intro is a vector matching ## one of the dimensions of se.p, by.time is ignored: rval.df <- rsu.pfree.rs(se.p = pop.sens, p.intro = pintro, prior = 0.5, by.time = TRUE) ## Calculate 95\% confidence intervals for the probability of disease freedom: rval.df <- apply(rval.df$PFree, FUN = quantile, MARGIN = 2, probs = c(0.025,0.5,0.975)) rval.df <- data.frame(mnum = 1:12, mchar = seq(as.Date("2020/1/1"), by = "month", length.out = 12), t(rval.df)) ## Plot the probability of disease freedom as a function of time. Dashed lines ## show the lower and upper bound of the confidence interval around the ## probability of disease freedom estimates: plot(x = rval.df$mnum, y = rval.df$X50., xlim = c(1,12), ylim = c(0,1), xlab = "Month", ylab = "Probability of disease freedom", type = "l", lwd = 2, xaxt = "n") axis(side = 1, at = rval.df$mnum, labels = format(rval.df$mchar, format = "\%b")) lines(x = rval.df$mnum, y = rval.df$X2.5., type = "l", lty = 2) lines(x = rval.df$mnum, y = rval.df$X97.5., type = "l", lty = 2) \dontrun{ library(ggplot2); library(scales) ggplot(data = rval.df, aes(x = mchar, y = X50.)) + geom_line(col = "black") + geom_ribbon(aes(ymin = X2.5., ymax = X97.5.), alpha = 0.25) + scale_x_date(breaks = date_breaks("1 month"), labels = date_format("\%b"), name = "Month") + scale_y_continuous(limits = c(0,1), name = "Probability of disease freedom") + theme_bw() } ## The median probability of disease freedom increases over time from about ## 0.7 (or less) to >0.99, depending on the actual se.p values generated by ## simulation. } \keyword{methods} epiR/man/epi.sssupc.Rd0000644000176200001440000001040614164036764014323 0ustar liggesusers\name{epi.sssupc} \alias{epi.sssupc} \title{ Sample size for a parallel superiority trial, continuous outcome } \description{ Sample size for a parallel superiority trial, continuous outcome. } \usage{ epi.sssupc(treat, control, sd, delta, n, r = 1, power, nfractional = FALSE, alpha) } \arguments{ \item{treat}{the expected mean of the outcome of interest in the treatment group.} \item{control}{the expected mean of the outcome of interest in the control group.} \item{sd}{the expected population standard deviation of the outcome of interest.} \item{delta}{the equivalence limit, expressed as the absolute change in the outcome of interest that represents a clinically meaningful difference. For a superiority trial the value entered for \code{delta} must be greater than or equal to zero.} \item{n}{scalar, the total number of study subjects in the trial.} \item{r}{scalar, the number in the treatment group divided by the number in the control group.} \item{power}{scalar, the required study power.} \item{nfractional}{logical, return fractional sample size.} \item{alpha}{scalar, defining the desired alpha level.} } \value{ A list containing the following: \item{n.total}{the total number of study subjects required.} \item{n.treat}{the required number of study subject in the treatment group.} \item{n.control}{the required number of study subject in the control group.} \item{delta}{the equivalence limit, as entered by the user.} \item{power}{the specified or calculated study power.} } \references{ Chow S, Shao J, Wang H (2008). Sample Size Calculations in Clinical Research. Chapman & Hall/CRC Biostatistics Series, page 61. Julious SA (2004). Sample sizes for clinical trials with normal data. Statistics in Medicine 23: 1921 - 1986. Pocock SJ (1983). Clinical Trials: A Practical Approach. Wiley, New York. Wang B, Wang H, Tu X, Feng C (2017). Comparisons of superiority, non-inferiority, and equivalence trials. Shanghai Archives of Psychiatry 29, 385 - 388. DOI: 10.11919/j.issn.1002-0829.217163. } \note{ Consider a clinical trial comparing two groups, a standard treatment (\eqn{s}) and a new treatment (\eqn{n}). In each group, the mean of the outcome of interest for subjects receiving the standard treatment is \eqn{N_{s}} and the mean of the outcome of interest for subjects receiving the new treatment is \eqn{N_{n}}. We specify the absolute value of the maximum acceptable difference between \eqn{N_{n}} and \eqn{N_{s}} as \eqn{\delta}. For a superiority trial the value entered for \code{delta} must be greater than or equal to zero. For a superiority trial the null hypothesis is: \eqn{H_{0}: N_{s} - N_{n} = 0} The alternative hypothesis is: \eqn{H_{1}: N_{s} - N_{n} != 0} When calculating the power of a study, the argument \code{n} refers to the total study size (that is, the number of subjects in the treatment group plus the number in the control group). For a comparison of the key features of superiority, equivalence and non-inferiority trials, refer to the documentation for \code{\link{epi.ssequb}}.} \examples{ ## EXAMPLE 1: ## A pharmaceutical company is interested in conducting a clinical trial ## to compare two cholesterol lowering agents for treatment of patients with ## congestive heart disease (CHD) using a parallel design. The primary ## efficacy parameter is the concentration of high density lipoproteins ## (HDL). We consider the situation where the intended trial is to test ## superiority of the test drug over the active control agent. Sample ## size calculations are to be calculated to achieve 80\% power at the ## 5\% level of significance. ## In this example, we assume that if treatment results in a 5 unit ## (i.e., delta = 5) increase in HDL it is declared to be superior to the ## active control. Assume the standard deviation of HDL is 10 units and ## the HDL concentration in the treatment group is 20 units and the ## HDL concentration in the control group is 20 units. epi.sssupc(treat = 20, control = 20, sd = 10, delta = 5, n = NA, r = 1, power = 0.80, nfractional = FALSE, alpha = 0.05) ## A total of 100 subjects need to be enrolled in the trial, 50 in the ## treatment group and 50 in the control group. } \keyword{univar} epiR/man/rsu.sep.rb2rf.Rd0000644000176200001440000001103614011615320014620 0ustar liggesusers\name{rsu.sep.rb2rf} \alias{rsu.sep.rb2rf} \title{ Surveillance system sensitivity assuming risk-based sampling on two risk factors } \description{ Calculates risk-based surveillance system (population-level) sensitivity with a two risk factors, assuming [one-stage] risk-based sampling and allowing unit sensitivity to vary among risk strata. } \usage{ rsu.sep.rb2rf(N, n, rr1, ppr1, rr2, ppr2, pstar, se.u, method = "binomial") } \arguments{ \item{N}{matrix of population sizes for each risk group. Rows = levels of \code{rr1}, columns = levels of \code{rr2}.} \item{n}{matrix of the number of surveillance units tested in each risk group. Rows = levels of \code{rr1}, columns = levels of \code{rr2}.} \item{rr1}{scalar or vector defining the first set of relative risk values.} \item{ppr1}{scalar or vector of the same length as that vector of \code{rr1} defining the population proportions in each of the first risk strata. Proportions must sum to one. Ignored if \code{method = "hypergeometric"}.} \item{rr2}{matrix defining the relative risks for the second risk factor. Rows = levels of \code{rr1}, columns = levels of \code{rr2}.} \item{ppr2}{matrix defining the population proportions in each of the second risk strata. Row proportions must sum to one. Rows = levels of \code{rr1}, columns = levels of \code{rr2}. Ignored if \code{method = "hypergeometric"}.} \item{pstar}{scalar, defining the design prevalence.} \item{se.u}{scalar or vector of the same length as that vector of \code{rr1} defining the unit sensitivity (which can vary across strata).} \item{method}{character string indicating the method to be used. Options are \code{binomial} or \code{hypergeometric}. See details, below.} } \details{ If \code{method = binomial} \code{N} is ignored and values for \code{ppr} need to be entered. Conversely, if \code{method = hypergeometric}, \code{ppr} is ignored and calculated from \code{N}. } \value{ A list comprised of two elements: \item{se.p}{scalar, surveillance system (population-level) sensitivity estimates.} \item{epi}{vector, effective probability of infection estimates.} \item{adj.risk1}{vector, adjusted relative risk estimates for the first risk factor.} \item{adj.risk2}{vector, adjusted relative risk estimates for the second risk factor.} } \examples{ ## EXAMPLE 1: ## A cross-sectional study is to be carried out to confirm the absence of ## disease using risk based sampling. Assume a design prevalence of 0.01 ## at the surveillance unit level. Surveillance units are categorised as ## being either high or low risk with the probability of disease for ## high risk surveillance units 3 times the probability of disease for low ## risk units. The proportion of units in each risk group is 0.20 and 0.80, ## respectively. ## Within each of the two risk categories the probability of disease varies ## with age with younger age groups having four times the risk of disease ## as older age groups. In the high risk area 10\% of the population are young ## and 90\% are old. In the low risk area 30\% of the population are young and ## 70\% are old. ## The total number of surveillance units in the population is unknown. The ## numbers of young and old surveillance units tested in the high and low risk ## groups are 40, 20, 20 and 10, respectively. You intend to use a test with ## diagnostic sensitivity of 0.80. What is the surveillance system sensitivity? rsu.sep.rb2rf(N = NA, n = rbind(c(40,20), c(20,10)), rr1 = c(3,1), ppr1 = c(0.20,0.80), rr2 = rbind(c(4,1), c(4,1)), ppr2 = rbind(c(0.10,0.90), c(0.30,0.70)), pstar = 0.01, se.u = 0.80, method = "binomial")$se.p ## The surveillance system sensitivity is 0.93. ## EXAMPLE 2: ## This example shows the importance of sampling high risk groups. Take the ## same scenario as above but switch the relative proportions sampled by ## risk group --- taking a greater number of samples from the low risk group ## compared with the high risk group: rsu.sep.rb2rf(N = NA, n = rbind(c(10,20), c(20,40)), rr1 = c(3,1), ppr1 = c(0.20,0.80), rr2 = rbind(c(4,1), c(4,1)), ppr2 = rbind(c(0.10,0.90), c(0.30,0.70)), pstar = 0.01, se.u = 0.80, method = "binomial")$se.p ## The surveillance system sensitivity is 0.69. Here we've taken exactly the ## same number of samples as Example 1, but there's a substantial decrease ## in surveillance system sensitivity because we've concentrated sampling on ## a low risk group (decreasing our ability to detect disease). } \keyword{methods} epiR/man/epi.herdtest.Rd0000644000176200001440000000500414074722764014625 0ustar liggesusers\name{epi.herdtest} \alias{epi.herdtest} \title{ Estimate the characteristics of diagnostic tests applied at the herd (group) level } \description{ When tests are applied to individuals within a group we may wish to designate the group as being either diseased or non-diseased on the basis of the individual test results. This function estimates sensitivity and specificity of this testing regime at the group (or herd) level. } \usage{ epi.herdtest(se, sp, P, N, n, k) } \arguments{ \item{se}{a vector of length one defining the sensitivity of the individual test used.} \item{sp}{a vector of length one defining the specificity of the individual test used.} \item{P}{scalar, defining the estimated true prevalence.} \item{N}{scalar, defining the herd size.} \item{n}{scalar, defining the number of individuals to be tested per group (or herd).} \item{k}{scalar, defining the critical number of individuals testing positive that will denote the group as test positive.} } \value{ A data frame with four elements: \code{APpos} the probability of obtaining a positive test, \code{APneg} the probability of obtaining a negative test, \code{HSe} the estimated group (herd) sensitivity, and \code{HSp} the estimated group (herd) specificity. } \references{ Dohoo I, Martin W, Stryhn H (2003). Veterinary Epidemiologic Research. AVC Inc, Charlottetown, Prince Edward Island, Canada, pp. 113 - 115. } \author{ Ron Thornton, MAF New Zealand, PO Box 2526 Wellington, New Zealand. } \note{ The method implemented in this function is based on the hypergeometric distribution. } \examples{ ## EXAMPLE 1: ## We want to estimate the herd-level sensitivity and specificity of ## a testing regime using an individual animal test of sensitivity 0.391 ## and specificity 0.964. The estimated true prevalence of disease is 0.12. ## Assume that 60 individuals will be tested per herd and we have ## specified that two or more positive test results identify the herd ## as positive. epi.herdtest(se = 0.391, sp = 0.964, P = 0.12, N = 1E06, n = 60, k = 2) ## This testing regime gives a herd sensitivity of 0.95 and a herd ## specificity of 0.36. With a herd sensitivity of 0.95 we can be ## confident that we will declare a herd infected if it is infected. ## With a herd specficity of only 0.36, we will declare 0.64 of disease ## negative herds as infected, so false positives are a problem. } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/epi.ssclus1estb.Rd0000644000176200001440000001133714164044464015256 0ustar liggesusers\name{epi.ssclus1estb} \alias{epi.ssclus1estb} \title{ Sample size to estimate a binary outcome using one-stage cluster sampling } \description{ Sample size to estimate a binary outcome using one-stage cluster sampling. } \usage{ epi.ssclus1estb(b, Py, epsilon, error = "relative", rho, nfractional = FALSE, conf.level = 0.95) } \arguments{ \item{b}{scalar integer or vector of length two, the number of individual listing units in each cluster to be sampled. See details, below.} \item{Py}{scalar number, an estimate of the unknown population proportion.} \item{epsilon}{scalar number, the maximum difference between the estimate and the unknown population value expressed in absolute or relative terms.} \item{error}{character string. Options are \code{absolute} for absolute error and \code{relative} for relative error.} \item{rho}{scalar number, the intracluster correlation.} \item{nfractional}{logical, return fractional sample size.} \item{conf.level}{scalar, defining the level of confidence in the computed result.} } \details{ In many situations it is common for sampling units to be aggregated into clusters. Typical examples include individuals within households, children within classes (within schools) and cows within herds. We use the term primary sampling unit (PSU) to refer to what gets sampled first (clusters) and secondary sampling unit (SSU) to refer to what gets sampled second (individual listing units within each cluster). In this documentation the terms primary sampling unit and cluster are used interchangeably. Similarly, the terms secondary sampling unit and individual listing units are used interchangeably. \code{b} as a scalar integer represents the total number of individual listing units from each cluster to be sampled. If \code{b} is a vector of length two the first element represents the mean number of individual listing units to be sampled from each cluster and the second element represents the standard deviation of the number of individual listing units to be sampled from each cluster. At least 25 PSUs (clusters) are recommended for one-stage cluster sampling designs. If less than 25 PSUs are returned by the function a warning is issued. } \value{ A list containing the following: \item{n.psu}{the total number of primary sampling units (clusters) to be sampled for the specified level of confidence and relative error.} \item{n.ssu}{the total number of secondary sampling units to be sampled for the specified level of confidence and relative error.} \item{DEF}{the design effect.} \item{rho}{the intracluster correlation, as entered by the user.} } \references{ Levy PS, Lemeshow S (1999). Sampling of Populations Methods and Applications. Wiley Series in Probability and Statistics, London, pp. 258. Machin D, Campbell MJ, Tan SB, Tan SH (2018). Sample Sizes for Clinical, Laboratory ad Epidemiological Studies, Fourth Edition. Wiley Blackwell, London, pp. 195 - 214. } \examples{ ## EXAMPLE 1: ## An aid project has distributed cook stoves in a single province in a ## resource-poor country. At the end of three years, the donors would like ## to know what proportion of households are still using their donated ## stove. A cross-sectional study is planned where villages in the province ## will be sampled and all households (approximately 75 per village) will be ## visited to determine whether or not the donated stove is still in use. ## A pilot study of the prevalence of stove usage in five villages ## showed that 0.46 of householders were still using their stove. The ## intracluster correlation for a study of this type is unknown, but thought ## to be relatively high (rho = 0.20). # If the donor wanted to be 90\% confident that the survey estimate of stove ## usage was within 10\% of the true population value, how many villages ## (i.e., clusters) would need to be sampled? epi.ssclus1estb(b = 75, Py = 0.46, epsilon = 0.10, error = "relative", rho = 0.20, nfractional = FALSE, conf.level = 0.90) ## A total of 67 villages need to be sampled to meet the specifications ## of this study. ## Now imagine the situation where the number of households per village ## varies. We are told that the average number of households per village is ## 75 with the 0.025 quartile 40 households and the 0.975 quartile 180 ## households. The expected standard deviation of the number of households ## per village is (180 - 40) / 4 = 35. How many villages need to be sampled? epi.ssclus1estb(b = c(75,35), Py = 0.46, epsilon = 0.10, error = "relative", rho = 0.20, nfractional = FALSE, conf.level = 0.90) ## A total of 81 villages need to be sampled to meet the specifications ## of this study. } \keyword{univar} epiR/man/epi.tests.Rd0000644000176200001440000002450514164036764014152 0ustar liggesusers\name{epi.tests} \alias{epi.tests} \alias{print.epi.tests} \alias{summary.epi.tests} \title{Sensitivity, specificity and predictive value of a diagnostic test} \description{ Computes true and apparent prevalence, sensitivity, specificity, positive and negative predictive values and positive and negative likelihood ratios from count data provided in a 2 by 2 table. } \usage{ epi.tests(dat, method = "exact", digits = 2, conf.level = 0.95) \method{print}{epi.tests}(x, ...) \method{summary}{epi.tests}(object, ...) } \arguments{ \item{dat}{a vector of length four, an object of class \code{table} or an object of class \code{grouped_df} from package \code{dplyr} containing the individual cell frequencies (see below).} \item{method}{a character string indicating the method to use. Options are \code{method = "exact"}, \code{method = "wilson"}, \code{method = "agresti"}, \code{method = "clopper-pearson"} and \code{method = "jeffreys"}.} \item{digits}{scalar, number of digits to be reported for \code{print} output. Must be an integer of either 2, 3 or 4.} \item{conf.level}{magnitude of the returned confidence interval. Must be a single number between 0 and 1.} \item{x, object}{an object of class \code{epi.tests}.} \item{\dots}{Ignored.} } \details{ When \code{method = "exact"} exact binomial confidence limits are calculated for test sensitivity, specificity, and positive and negative predictive value (see Collett 1999 for details). When \code{method = "wilson"} Wilson's confidence limits are calculated for test sensitivity, specificity, and positive and negative predictive value (see Rothman 2012 for details). When \code{method = "agresti"} Agresti's confidence limits are calculated for test sensitivity, specificity, and positive and negative predictive value (see Agresti and Coull 1998 for details). When \code{method = "clopper-pearson"} Clopper-Pearson's confidence limits are calculated for test sensitivity, specificity, and positive and negative predictive value (see Clopper and Pearson 1934 for details). When \code{method = "jeffreys"} Jeffrey's confidence limits are calculated for test sensitivity, specificity, and positive and negative predictive value (see Brown et al., 2001 for details). Confidence intervals for positive and negative likelihood ratios are based on formulae provided by Simel et al. (1991). Diagnostic accuracy is defined as the proportion of all tests that give a correct result. Diagnostic odds ratio is defined as how much more likely will the test make a correct diagnosis than an incorrect diagnosis in patients with the disease (Scott et al. 2008). The number needed to diagnose is defined as the number of paitents that need to be tested to give one correct positive test. Youden's index is the difference between the true positive rate and the false positive rate. Youden's index ranges from -1 to +1 with values closer to 1 if both sensitivity and specificity are high (i.e., close to 1). } \value{ An object of class \code{epi.tests} containing the following: \item{ap}{apparent prevalence.} \item{tp}{true prevalence.} \item{se}{test sensitivity.} \item{sp}{test specificity.} \item{diag.ac}{diagnostic accuracy (the correctly classified proportion).} \item{diag.or}{diagnostic odds ratio.} \item{nndx}{number needed to diagnose.} \item{youden}{Youden's index.} \item{pv.pos}{positive predictive value.} \item{pv.neg}{negative predictive value.} \item{lr.pos}{likelihood ratio of a positive test.} \item{lr.neg}{likelihood ratio of a negative test.} \item{p.rout}{the proportion of subjects with the outcome ruled out.} \item{p.rin}{the proportion of subjects with the outcome ruled in.} \item{p.tpdn}{the proportion of true outcome negative subjects that test positive (false T+ proportion for D-).} \item{p.tndp}{the proportion of true outcome positive subjects that test negative (false T- proportion for D+).} \item{p.dntp}{the proportion of test positive subjects that are outcome negative (false T+ proportion for T+).} \item{p.dptn}{the proportion of test negative subjects that are outcome positive (false T- proportion for T-).} } \references{ Agresti A, Coull B (1998). Approximate is better than 'exact' for interval estimation of binomial proportions. The American Statistician 52. DOI: 10.2307/2685469. Altman DG, Machin D, Bryant TN, Gardner MJ (2000). Statistics with Confidence, second edition. British Medical Journal, London, pp. 28 - 29. Bangdiwala SI, Haedo AS, Natal ML (2008). The agreement chart as an alternative to the receiver-operating characteristic curve for diagnostic tests. Journal of Clinical Epidemiology 61: 866 - 874. Brown L, Cai T, Dasgupta A (2001). Interval estimation for a binomial proportion. Statistical Science 16: 101 - 133. Clopper C, Pearson E (1934) The use of confidence or fiducial limits illustrated in the case of the binomial. Biometrika 26: 404 - 413. DOI: 10.1093/biomet/26.4.404. Collett D (1999). Modelling Binary Data. Chapman & Hall/CRC, Boca Raton Florida, pp. 24. Rothman KJ (2012). Epidemiology An Introduction. Oxford University Press, London, pp. 164 - 175. Scott IA, Greenburg PB, Poole PJ (2008). Cautionary tales in the clinical interpretation of studies of diagnostic tests. Internal Medicine Journal 38: 120 - 129. Simel D, Samsa G, Matchar D (1991). Likelihood ratios with confidence: Sample size estimation for diagnostic test studies. Journal of Clinical Epidemiology 44: 763 - 770. Greg Snow (2008) Need help in calculating confidence intervals for sensitivity, specificity, PPV & NPV. R-sig-Epi Digest 23(1): 3 March 2008. Wilson EB (1927) Probable inference, the law of succession, and statistical inference. Journal of the American Statistical Association 22: 209 - 212. } \author{ Mark Stevenson (Faculty of Veterinary and Agricultural Sciences, The University of Melbourne, Australia). Charles Reynard (School of Medical Sciences, The University of Manchester, United Kingdom). } \note{ \tabular{llll}{ -----------\tab ----------\tab ---------- \tab ----------\cr \tab Disease + \tab Disease - \tab Total \cr -----------\tab ----------\tab ---------- \tab ----------\cr Test + \tab a \tab b \tab a+b \cr Test - \tab c \tab d \tab c+d \cr -----------\tab ----------\tab ---------- \tab ----------\cr Total \tab a+c \tab b+d \tab a+b+c+d \cr -----------\tab ----------\tab ---------- \tab ----------\cr } } \examples{ ## EXAMPLE 1 (from Scott et al. 2008, Table 1): ## A new diagnostic test was trialled on 1586 patients. Of 744 patients that ## were disease positive, 670 were test positive. Of 842 patients that were ## disease negative, 640 were test negative. What is the likeliood ratio of ## a positive test? What is the number needed to diagnose? dat.v01 <- c(670,202,74,640) rval.tes01 <- epi.tests(dat.v01, method = "exact", digits = 2, conf.level = 0.95) print(rval.tes01) ## Test sensitivity is 0.90 (95\% CI 0.88 to 0.92). Test specificity is ## 0.76 (95\% CI 0.73 to 0.79). The likelihood ratio of a positive test ## is 3.75 (95\% CI 3.32 to 4.24). ## What is the number needed to diagnose? names(rval.tes01$detail) rval.tes01$detail$nndx ## The number needed to diagnose is 1.51 (95\% CI 1.41 to 1.65). Around 15 ## persons need to be tested to return 10 positive tests. ## EXAMPLE 2: ## Same as Example 1 but showing how a 2 by 2 contingency table can be prepared ## using tidyverse: \dontrun{ library(tidyverse) ## Generate a data set listing test results and true disease status: dis <- c(rep(1, times = 744), rep(0, times = 842)) tes <- c(rep(1, times = 670), rep(0, times = 74), rep(1, times = 202), rep(0, times = 640)) dat.df02 <- data.frame(dis, tes) tmp.df02 <- dat.df02 \%>\% mutate(dis = factor(dis, levels = c(1,0), labels = c("Dis+","Dis-"))) \%>\% mutate(tes = factor(tes, levels = c(1,0), labels = c("Test+","Test-"))) \%>\% group_by(tes, dis) \%>\% summarise(n = n()) tmp.df02 ## View the data in conventional 2 by 2 table format: pivot_wider(tmp.df02, id_cols = c(tes), names_from = dis, values_from = n) rval.tes02 <- epi.tests(tmp.df02, method = "exact", digits = 2, conf.level = 0.95) summary(rval.tes02) } ## Test sensitivity is 0.90 (95\% CI 0.88 to 0.92). Test specificity is ## 0.76 (95\% CI 0.73 to 0.79). The likelihood ratio of a positive test ## is 3.75 (95\% CI 3.32 to 4.24). ## EXAMPLE 3: ## A biomarker assay has been developed to identify patients that are at ## high risk of experiencing myocardial infarction. The assay varies on ## a continuous scale, from 0 to 1. Researchers believe that a biomarker ## assay result of greater than or equal to 0.60 renders a patient test ## positive, that is, at elevated risk of experiencing a heart attack ## over the next 12 months. ## Generate data consistent with the information provided above. Assume the ## prevalence of high risk subjects in your population is 0.35: set.seed(1234) dat.df03 <- data.frame(out = rbinom(n = 200, size = 1, prob = 0.35), bm = runif(n = 200, min = 0, max = 1)) ## Classify study subjects as either test positive or test negative ## according to their biomarker test result: dat.df03$test <- ifelse(dat.df03$bm >= 0.6, 1, 0) ## Generate a two-by-two table: dat.tab03 <- table(dat.df03$test, dat.df03$out)[2:1,2:1] rval.tes03 <- epi.tests(dat.tab03, method = "exact", digits = 2, conf.level = 0.95) print(rval.tes03) ## What proportion of subjects are ruled out as being at high risk of ## myocardial infarction? rval.tes03$detail$p.rout ## Answer: 0.61 (95\% CI 0.54 to 0.68). ## What proportion of subjects are ruled in as being at high risk of ## myocardial infarction? rval.tes03$detail$p.rin # Answer: 0.38 (95\% CI 0.32 to 0.45). ## What is the proportion of false positive results? That is, what is the ## proportion of test positive individuals among those that are disease ## negative, p.tpdn? rval.tes03$detail$p.tpdn # Answer: 0.37 (95\% CI 0.29 to 0.45). ## What is the proportion of false negative results? That is, what is the ## proportion of test negative individuals among those that are disease ## positive, p.tndp? rval.tes03$detail$p.tndp # Answer: 0.58 (95\% CI 0.44 to 0.70). } \keyword{univar} epiR/man/epi.mh.Rd0000644000176200001440000000774314074732714013417 0ustar liggesusers\name{epi.mh} \alias{epi.mh} \title{Fixed-effects meta-analysis of binary outcomes using the Mantel-Haenszel method} \description{ Computes individual study odds or risk ratios for binary outcome data. Computes the summary odds or risk ratio using the Mantel-Haenszel method. Performs a test of heterogeneity among trials. Performs a test for the overall difference between groups (that is, after pooling the studies, do treated groups differ significantly from controls?). } \usage{ epi.mh(ev.trt, n.trt, ev.ctrl, n.ctrl, names, method = "odds.ratio", alternative = c("two.sided", "less", "greater"), conf.level = 0.95) } \arguments{ \item{ev.trt}{observed number of events in the treatment group.} \item{n.trt}{number in the treatment group.} \item{ev.ctrl}{observed number of events in the control group.} \item{n.ctrl}{number in the control group.} \item{names}{character string identifying each trial.} \item{method}{a character string indicating the method to be used. Options are \code{odds.ratio} or \code{risk.ratio}.} \item{alternative}{a character string specifying the alternative hypothesis, must be one of \code{two.sided}, \code{greater} or \code{less}. } \item{conf.level}{magnitude of the returned confidence interval. Must be a single number between 0 and 1.} } \details{ \code{alternative = "greater"} tests the hypothesis that the Mantel-Haenszel summary measure of association is greater than 1. } \value{ A list containing the following: \item{OR}{the odds ratio for each trial and the lower and upper bounds of the confidence interval of the odds ratio for each trial.} \item{RR}{the risk ratio for each trial and the lower and upper bounds of the confidence interval of the risk ratio for each trial.} \item{OR.summary}{the Mantel-Haenszel summary odds ratio and the lower and upper bounds of the confidence interval of the Mantel-Haenszel summary odds ratio.} \item{RR.summary}{the Mantel-Haenszel summary risk ratio and the lower and upper bounds of the confidence interval of the Mantel-Haenszel summary risk ratio.} \item{weights}{the raw and inverse variance weights assigned to each trial.} \item{heterogeneity}{a vector containing \code{Q} the heterogeneity test statistic, \code{df} the degrees of freedom and its associated P-value.} \item{Hsq}{the relative excess of the heterogeneity test statistic \code{Q} over the degrees of freedom \code{df}.} \item{Isq}{the percentage of total variation in study estimates that is due to heterogeneity rather than chance.} \item{effect}{a vector containing \code{z} the test statistic for overall treatment effect and its associated P-value.} } \references{ Deeks JJ, Altman DG, Bradburn MJ (2001). Statistical methods for examining heterogeneity and combining results from several studies in meta-analysis. In: Egger M, Davey Smith G, Altman D (eds). Systematic Review in Health Care Meta-Analysis in Context. British Medical Journal, London, 2001, pp. 291 - 299. Higgins JP, Thompson SG (2002). Quantifying heterogeneity in a meta-analysis. Statistics in Medicine 21: 1539 - 1558. } \note{ Using this method, the pooled odds and risk ratios are computed using the raw individual study weights. The methodology for computing the Mantel-Haenszel summary odds ratio follows the approach decribed in Deeks, Altman and Bradburn MJ (2001, pp 291 - 299). The function checks each strata for cells with zero frequencies. If a zero frequency is found in any cell, 0.5 is added to all cells within the strata. } \seealso{ \code{\link{epi.dsl}, \link{epi.iv}, \link{epi.smd}} } \examples{ ## EXAMPLE 1: data(epi.epidural) epi.mh(ev.trt = epi.epidural$ev.trt, n.trt = epi.epidural$n.trt, ev.ctrl = epi.epidural$ev.ctrl, n.ctrl = epi.epidural$n.ctrl, names = as.character(epi.epidural$trial), method = "odds.ratio", alternative = "two.sided", conf.level = 0.95) } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/rsu.sep.pass.Rd0000644000176200001440000000636113754452056014600 0ustar liggesusers\name{rsu.sep.pass} \alias{rsu.sep.pass} \title{ Surveillance system sensitivity assuming passive surveillance and representative sampling within clusters } \description{ Calculates the surveillance system (population-level) sensitivity for detection of disease for a passive surveillance system assuming comprehensive population coverage and sampling of clinical cases within diseased clusters.} \usage{ rsu.sep.pass(N, n, step.p, pstar.c, p.inf.u, se.u) } \arguments{ \item{N}{scalar or vector of length equal to the number of rows in \code{step.p} representing the population size.} \item{n}{scalar or vector of length equal to the number of rows in \code{step.p} representing the number of units tested per cluster.} \item{step.p}{vector or matrix of detection probabilities (0 to 1) for each step in the detection process. If a vector each value represents a step probability for a single calculation. If a matrix, columns are step probabilities and rows are simulation iterations.} \item{pstar.c}{scalar (0 to 1) or vector of length equal to the number of rows in \code{step.p} representing the cluster-level design prevalence.} \item{p.inf.u}{scalar (0 to 1) or vector of length equal to the number of rows in \code{step.p} representing the probability of disease in sampled and tested units. This is equivalent to the positive predictive value for a given prior probability of infection.} \item{se.u}{scalar (0 to 1) or vector of length equal to the number of rows in \code{step.p}, representing the unit sensitivity.} } \value{ A list comprised of two elements: \item{se.p}{scalar or vector, the estimated surveillance system (population-level) sensitivity of detection.} \item{se.c}{scalar or vector, the estimated cluster-level sensitivity of detection.} If \code{step.p} is a vector, scalars are returned. If \code{step.p} is a matrix, values are vectors of length equal to the number of rows in \code{step.p}. } \references{ Lyngstad T, Hellberg H, Viljugrein H, Bang Jensen B, Brun E, Sergeant E, Tavornpanich S (2016). Routine clinical inspections in Norwegian marine salmonid sites: A key role in surveillance for freedom from pathogenic viral haemorrhagic septicaemia (VHS). Preventive Veterinary Medicine 124: 85 - 95. DOI: 10.1016/j.prevetmed.2015.12.008. } \examples{ ## EXAMPLE 1: ## A passive surveillance system for disease X operates in your country. ## There are four steps to the diagnostic cascade with detection probabilities ## for each process of 0.10, 0.20, 0.90 and 0.99, respectively. Assuming the ## probability that a unit actually has disease if it is submitted for ## testing is 0.98, the sensitivity of the diagnostic test used at the unit ## level is 0.90, the population is comprised of 1000 clusters (herds), ## five animals from each cluster (herd) are tested and the cluster-level ## design prevalence is 0.01, what is the sensitivity of disease detection ## at the cluster (herd) and population level? rsu.sep.pass(N = 1000, n = 5, step.p = c(0.10,0.20,0.90,0.99), pstar.c = 0.01, p.inf.u = 0.98, se.u = 0.90) ## The sensitivity of disease detection at the cluster (herd) level is 0.018. ## The sensitivity of disease detection at the population level is 0.16. } \keyword{methods} epiR/man/epi.RtoBUGS.Rd0000644000176200001440000000164513117711440014221 0ustar liggesusers\name{epi.RtoBUGS} \alias{epi.RtoBUGS} \title{R to WinBUGS data conversion} \description{ Writes data from an R list to a text file in WinBUGS-compatible format. } \usage{ epi.RtoBUGS(datalist, towhere) } \arguments{ \item{datalist}{a list (normally, with named elements) which may include scalars, vectors, matrices, arrays of any number of dimensions, and data frames.} \item{towhere}{a character string identifying where the file is to be written.} } \details{ The function doesn't check to ensure that only numbers are being produced. In particular, factor labels in a dataframe will be output to the file, which normally won't be desired. } \references{ Best, NG. WinBUGS 1.3.1 Short Course, Brisbane, November 2000. } \author{ Terry Elrod (terry.elrod@ualberta.ca), Kenneth Rice. } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/epi.ssequb.Rd0000644000176200001440000001475314164142054014304 0ustar liggesusers\name{epi.ssequb} \alias{epi.ssequb} \title{ Sample size for a parallel equivalence trial, binary outcome } \description{ Sample size for a parallel equivalence trial, binary outcome. } \usage{ epi.ssequb(treat, control, delta, n, r = 1, power, nfractional = FALSE, alpha) } \arguments{ \item{treat}{the expected proportion of successes in the treatment group.} \item{control}{the expected proportion of successes in the control group.} \item{delta}{the equivalence limit, expressed as the absolute change in the outcome of interest that represents a clinically meaningful difference. For an equivalence trial the value entered for \code{delta} must be greater than zero.} \item{n}{scalar, the total number of study subjects in the trial.} \item{r}{scalar, the number in the treatment group divided by the number in the control group.} \item{power}{scalar, the required study power.} \item{nfractional}{logical, return fractional sample size.} \item{alpha}{scalar, defining the desired alpha level.} } \value{ A list containing the following: \item{n.total}{the total number of study subjects required.} \item{n.treat}{the required number of study subject in the treatment group.} \item{n.control}{the required number of study subject in the control group.} \item{delta}{the equivalence limit, as entered by the user.} \item{power}{the specified or calculated study power.} } \references{ Bennett J, Dismukes W, Duma R, Medoff G, Sande M, Gallis H, Leonard J, Fields B, Bradshaw M, Haywood H, McGee Z, Cate T, Cobbs C, Warner J, Alling D (1979). A comparison of amphotericin B alone and combined with flucytosine in the treatment of cryptoccal meningitis. New England Journal of Medicine 301, 126 - 131. DOI: 10.1056/NEJM197907193010303. Chow S, Shao J, Wang H (2008). Sample Size Calculations in Clinical Research. Chapman & Hall/CRC Biostatistics Series, pp. 91. Ewald B (2013). Making sense of equivalence and non-inferiority trials. Australian Prescriber 36: 170 - 173. Julious SA (2004). Sample sizes for clinical trials with normal data. Statistics in Medicine 23: 1921 - 1986. Julious SA (2009). Estimating Samples Sizes in Clinical Trials. CRC, New York. Machin D, Campbell MJ, Tan SB, Tan SH (2009). Sample Size Tables for Clinical Studies. Wiley Blackwell, New York. Wang B, Wang H, Tu X, Feng C (2017). Comparisons of superiority, non-inferiority, and equivalence trials. Shanghai Archives of Psychiatry 29, 385 - 388. DOI: 10.11919/j.issn.1002-0829.217163. } \note{ A summary of the key features of superiority, equivalence and non-superiority trial comparisons is shown in the following table (adapted from Campbell et al., 2018 [page 170] and Wang et al., 2017): \tabular{lllll}{ --------------- \tab ------------------------- \tab ------------------------ \tab ---------------------- \tab ---------------------- \cr Test for \tab Null hypothesis \tab Alt hypothesis \tab Type I error \tab Type II error \cr --------------- \tab ------------------------- \tab ------------------------ \tab ---------------------- \tab ---------------------- \cr Superiority \tab H0: Ps - Pn == 0 \tab H1: Ps - Pn != 0 \tab 2 sided, 0.050 \tab 1 sided, 0.10 or 0.20 \cr Equivalence \tab H0: |Ps - Pn| >= delta \tab H1: |Ps - Pn| < delta \tab 2 sided, 0.050 \tab 2 sided, 0.10 or 0.20 \cr Non-inferiority \tab H0: Ps - Pn >= delta \tab H1: Ps - Pn < delta \tab 1 sided, 0.050 \tab 1 sided, 0.10 or 0.20 \cr --------------- \tab ------------------------- \tab ------------------------ \tab ---------------------- \tab ---------------------- \cr } With a superiority trial the aim is to estimate \eqn{Ps - Pn} with a view to claiming a difference between groups. With an equivalence trial the aim is not to estimate \eqn{Ps - Pn} but to judge if it is within the margins defined by \code{delta}. With a non-inferiority trial the aim is not to estimate \eqn{Ps - Pn} but to judge if it is within the margins defined by \code{delta}. Consider a clinical trial comparing two groups, a standard treatment (\eqn{s}) and a new treatment (\eqn{n}). A proportion of subjects in the standard treatment group experience the outcome of interest \eqn{P_{s}} and a proportion of subjects in the new treatment group experience the outcome of interest \eqn{P_{n}}. We specify the absolute value of the maximum acceptable difference between \eqn{P_{n}} and \eqn{P_{s}} as \eqn{\delta}. For an equivalence trial the null hypothesis is: \eqn{H_{0}: |P_{s} - P_{n}| \ge \delta} The alternative hypothesis is: \eqn{H_{1}: |P_{s} - P_{n}| < \delta} An equivalence trial is used if want to prove that two treatments produce the same clinical outcomes. The value of the maximum acceptable difference \eqn{\delta} is chosen so that a patient will not detect any change in effect when replacing the standard treatment with the new treatment. For a superiority trial the value entered for \code{delta} must be greater than or equal to zero. Note that when: \deqn{sign(P_n - P_s - \delta) \neq sign(z_{1-\alpha} + z_{1-\beta})} there is no solution for study power. For typical values of \eqn{\alpha} and \eqn{\beta} this would occur if \eqn{P_{n} - P_{s} - \delta < 0}. That is, when the targeted alternative is within the null hypothesis. The function issues a warning if these conditions are met. When calculating the power of a study, the argument \code{n} refers to the total study size (that is, the number of subjects in the treatment group plus the number in the control group). } \examples{ ## EXAMPLE 1 (from Machin, Campbell, Tan and Tan 2009 p. 113): ## Bennett, Dismukes, Duma et al. (1979) designed a clinical trial to test ## whether combination chemotherapy for a shorter period would be at least ## as good as conventional therapy for patients with cryptococcal meningitis. ## They recruited 39 patients to each treatment arm and wished to conclude ## that a difference of less than 20\% in response rate between the treatments ## would indicate equivalence. Assuming a one-sided test size of 10\% and a ## power of 80\% what would be a realistic sample size if the trial were ## to be repeated? epi.ssequb(treat = 0.50, control = 0.50, delta = 0.20, n = NA, r = 1, power = 0.80, nfractional = FALSE, alpha = 0.10) ## A total of 166 subjects need to be enrolled in the trial, 83 in the ## treatment group and 83 in the control group. } \keyword{univar} epiR/man/epi.ssninfb.Rd0000644000176200001440000001513514164036763014450 0ustar liggesusers\name{epi.ssninfb} \alias{epi.ssninfb} \title{ Sample size for a non-inferiority trial, binary outcome } \description{ Sample size for a non-inferiority trial, binary outcome. } \usage{ epi.ssninfb(treat, control, delta, n, r = 1, power, nfractional = FALSE, alpha) } \arguments{ \item{treat}{the expected proportion of successes in the treatment group.} \item{control}{the expected proportion of successes in the control group.} \item{delta}{the equivalence limit, expressed as the absolute change in the outcome of interest that represents a clinically meaningful difference. For a non-inferiority trial the value entered for \code{delta} must be greater than or equal to zero.} \item{n}{scalar, the total number of study subjects in the trial.} \item{r}{scalar, the number in the treatment group divided by the number in the control group.} \item{power}{scalar, the required study power.} \item{nfractional}{logical, return fractional sample size.} \item{alpha}{scalar, defining the desired alpha level.} } \value{ A list containing the following: \item{n.total}{the total number of study subjects required.} \item{n.treat}{the required number of study subject in the treatment group.} \item{n.control}{the required number of study subject in the control group.} \item{delta}{the equivalence limit, as entered by the user.} \item{power}{the specified or calculated study power.} } \references{ Blackwelder WC (1982). Proving the null hypothesis in clinical trials. Controlled Clinical Trials 3: 345 - 353. Ewald B (2013). Making sense of equivalence and non-inferiority trials. Australian Prescriber 36: 170 - 173. Julious SA (2004). Sample sizes for clinical trials with normal data. Statistics in Medicine 23: 1921 - 1986. Julious SA (2009). Estimating Samples Sizes in Clinical Trials. CRC, New York. Machin D, Campbell MJ, Tan SB, Tan SH (2009). Sample Size Tables for Clinical Studies. Wiley Blackwell, New York. Scott IA (2009). Non-inferiority trials: determining whether alternative treatments are good enough. Medical Journal of Australia 190: 326 - 330. Wang B, Wang H, Tu X, Feng C (2017). Comparisons of superiority, non-inferiority, and equivalence trials. Shanghai Archives of Psychiatry 29, 385 - 388. DOI: 10.11919/j.issn.1002-0829.217163. Zhong B (2009). How to calculate sample size in randomized controlled trial? Journal of Thoracic Disease 1: 51 - 54. } \author{ Many thanks to Aniko Szabo (Medical College of Wisconsin, Wisconsin USA) for improvements to the power calculations for this function and suggestions to improve the documentation. } \note{ Consider a clinical trial comparing two groups, a standard treatment (\eqn{s}) and a new treatment (\eqn{n}). A proportion of subjects in the standard treatment group experience the outcome of interest \eqn{P_{s}} and a proportion of subjects in the new treatment group experience the outcome of interest \eqn{P_{n}}. We specify the absolute value of the maximum acceptable difference between \eqn{P_{n}} and \eqn{P_{s}} as \eqn{\delta}. For a non-inferiority trial the value entered for \code{delta} must be greater than or equal to zero. For a non-inferiority trial the null hypothesis is: \eqn{H_{0}: P_{s} - P_{n} \ge \delta} The alternative hypothesis is: \eqn{H_{1}: P_{s} - P_{n} < \delta} The aim of a non-inferiority trial is show that a new treatment is not (much) inferior to a standard treatment. Showing non-inferiority can be of interest because: (a) it is often not ethically possible to do a placebo-controlled trial; (b) the new treatment is not expected to be better than the standard treatment on primary efficacy endpoints, but is safer; (c) the new treatment is not expected to be better than the standard treatment on primary efficacy endpoints, but is cheaper to produce or easier to administer; and (d) the new treatment is not expected to be better than the standard treatment on primary efficacy endpoints in clinical trial, but compliance will be better outside the clinical trial and hence efficacy better outside the trial. When calculating the power of a study, note that the argument \code{n} refers to the total study size (that is, the number of subjects in the treatment group plus the number in the control group). For a comparison of the key features of superiority, equivalence and non-inferiority trials, refer to the documentation for \code{\link{epi.ssequb}}. } \examples{ ## EXAMPLE 1 (from Chow S, Shao J, Wang H 2008, p. 90): ## A pharmaceutical company would like to conduct a clinical trial to ## compare the efficacy of two antimicrobial agents when administered orally ## to patients with skin infections. Assume the true mean cure rate of the ## treatment is 0.85 and the true mean cure rate of the control is 0.65. ## We consider the proportion cured in the treatment group minus the proportion ## cured in the control group (i.e., delta) of 0.10 or less to be of no clinical ## significance. ## Assuming a one-sided test size of 5\% and a power of 80\% how many ## subjects should be included in the trial? epi.ssninfb(treat = 0.85, control = 0.65, delta = 0.10, n = NA, r = 1, power = 0.80, nfractional = FALSE, alpha = 0.05) ## A total of 50 subjects need to be enrolled in the trial, 25 in the ## treatment group and 25 in the control group. ## EXAMPLE 1 (cont.): ## Suppose only 40 subjects were enrolled in the trial, 20 in the treatment ## group and 20 in the control group. What is the estimated study power? epi.ssninfb(treat = 0.85, control = 0.65, delta = 0.10, n = 40, r = 1, power = NA, nfractional = FALSE, alpha = 0.05) ## With only 40 subjects the estimated study power is 0.73. ## EXAMPLE 2: ## Assume the true mean cure rate for a treatment group to be 0.40 and the true ## mean cure rate for a control group to be the same, 0.40. We consider a ## difference of 0.10 in cured proportions (i.e., delta = 0.10) to be of no ## clinical importance. ## Assuming a one-sided test size of 5\% and a power of 30\% how many ## subjects should be included in the trial? n <- epi.ssninfb(treat = 0.4, control = 0.4, delta = 0.10, n = NA, r = 1, power = 0.3, nfractional = TRUE, alpha = 0.05)$n.total n ## A total of 120 subjects need to be enrolled in the trial, 60 in the ## treatment group and 60 in the control group. ## Re-run the function using n = 120 to confirm that power equals 0.30: epi.ssninfb(treat = 0.4, control = 0.4, delta = 0.10, n = n, r = 1, power = NA, nfractional = TRUE, alpha = 0.05)$power ## With 120 subjects the estimated study power is 0.30. } \keyword{univar} epiR/man/epi.sssupb.Rd0000644000176200001440000001002514112362420014277 0ustar liggesusers\name{epi.sssupb} \alias{epi.sssupb} \title{ Sample size for a parallel superiority trial, binary outcome } \description{ Sample size for a parallel superiority trial, binary outcome. } \usage{ epi.sssupb(treat, control, delta, n, r = 1, power, nfractional = FALSE, alpha) } \arguments{ \item{treat}{the expected proportion of successes in the treatment group.} \item{control}{the expected proportion of successes in the control group.} \item{delta}{the equivalence limit, expressed as the absolute change in the outcome of interest that represents a clinically meaningful difference. For a superiority trial the value entered for \code{delta} must be greater than or equal to zero.} \item{n}{scalar, the total number of study subjects in the trial.} \item{r}{scalar, the number in the treatment group divided by the number in the control group.} \item{power}{scalar, the required study power.} \item{nfractional}{logical, return fractional sample size.} \item{alpha}{scalar, defining the desired alpha level.} } \value{ A list containing the following: \item{n.total}{the total number of study subjects required.} \item{n.treat}{the required number of study subject in the treatment group.} \item{n.control}{the required number of study subject in the control group.} \item{delta}{the equivalence limit, as entered by the user.} \item{power}{the specified or calculated study power.} } \references{ Chow S, Shao J, Wang H (2008). Sample Size Calculations in Clinical Research. Chapman & Hall/CRC Biostatistics Series, page 90. Julious SA (2004). Sample sizes for clinical trials with normal data. Statistics in Medicine 23: 1921 - 1986. Pocock SJ (1983). Clinical Trials: A Practical Approach. Wiley, New York. Wang B, Wang H, Tu X, Feng C (2017). Comparisons of superiority, non-inferiority, and equivalence trials. Shanghai Archives of Psychiatry 29, 385 - 388. DOI: 10.11919/j.issn.1002-0829.217163. } \note{ Consider a clinical trial comparing two groups, a standard treatment (\eqn{s}) and a new treatment (\eqn{n}). A proportion of subjects in the standard treatment group experience the outcome of interest \eqn{P_{s}} and a proportion of subjects in the new treatment group experience the outcome of interest \eqn{P_{n}}. We specify the absolute value of the maximum acceptable difference between \eqn{P_{n}} and \eqn{P_{s}} as \eqn{\delta}. For a superiority trial the value entered for \code{delta} must be greater than or equal to zero. For a superiority trial the null hypothesis is: \eqn{H_{0}: P_{s} - P_{n} = 0} The alternative hypothesis is: \eqn{H_{1}: P_{s} - P_{n} != 0} When calculating the power of a study, the argument \code{n} refers to the total study size (that is, the number of subjects in the treatment group plus the number in the control group). For a comparison of the key features of superiority, equivalence and non-inferiority trials, refer to the documentation for \code{\link{epi.ssequb}}. } \examples{ ## EXAMPLE 1 (from Chow S, Shao J, Wang H 2008, p. 91): ## Suppose that a pharmaceutical company is interested in conducting a ## clinical trial to compare the efficacy of two antimicrobial agents ## when administered orally once daily in the treatment of patients ## with skin infections. In what follows, we consider the situation ## where the intended trial is for testing superiority of the ## test drug over the active control drug. For this purpose, the following ## assumptions are made. First, sample size calculation will be performed ## for achieving 80\% power at the 5\% level of significance. ## Assume the true mean cure rates of the treatment agents and the active ## control are 85\% and 65\%, respectively. Assume the superiority ## margin is 5\%. epi.sssupb(treat = 0.85, control = 0.65, delta = 0.05, n = NA, r = 1, power = 0.80, nfractional = FALSE, alpha = 0.05) ## A total of 196 subjects need to be enrolled in the trial, 98 in the ## treatment group and 98 in the control group. } \keyword{univar} epiR/man/epi.sscompc.Rd0000644000176200001440000001327314164044754014455 0ustar liggesusers\name{epi.sscompc} \alias{epi.sscompc} \title{ Sample size, power and minimum detectable difference when comparing continuous outcomes } \description{ Sample size, power and minimum detectable difference when comparing continuous outcomes. } \usage{ epi.sscompc(treat, control, n, sigma, power, r = 1, design = 1, sided.test = 2, nfractional = FALSE, conf.level = 0.95) } \arguments{ \item{treat}{the expected value for the treatment group (see below).} \item{control}{the expected value for the control group (see below).} \item{n}{scalar, defining the total number of subjects in the study (i.e., the number in the treatment and control group).} \item{sigma}{the expected standard deviation of the variable of interest for both treatment and control groups.} \item{power}{scalar, the required study power.} \item{r}{scalar, the number in the treatment group divided by the number in the control group.} \item{design}{scalar, the estimated design effect.} \item{sided.test}{use a one- or two-sided test? Use a two-sided test if you wish to evaluate whether or not the outcome in the exposed (treatment) group is greater than or less than the outcome in the unexposed (control) group. Use a one-sided test to evaluate whether or not the outcome in the exposed (treatment) group is greater than the outcome in the unexposed (control) group.} \item{nfractional}{logical, return fractional sample size.} \item{conf.level}{scalar, defining the level of confidence in the computed result.} } \details{ The methodology in this function follows the approach described in Chapter 8 of Woodward (2014), pp. 295 - 329. } \value{ A list containing the following: \item{n.total}{the total number of subjects required for the specified level of confidence and power, respecting the requirement for \code{r} times as many individuals in the treatment group compared with the control group.} \item{n.treat}{the total number of subjects in the treatment group for the specified level of confidence and power, respecting the requirement for \code{r} times as many individuals in the treatment group compared with the control group.} \item{n.control}{the total number of subjects in the control group for the specified level of confidence and power, respecting the requirement for \code{r} times as many individuals in the treatment group compared with the control group.} \item{power}{the power of the study given the number of study subjects, the expected effect size and level of confidence.} \item{delta}{the minimum detectable difference given the specified level of confidence and power.} } \references{ Kelsey JL, Thompson WD, Evans AS (1986). Methods in Observational Epidemiology. Oxford University Press, London, pp. 254 - 284. Woodward M (1999). Epidemiology Study Design and Data Analysis. Chapman & Hall/CRC, New York, pp. 329 - 365. Woodward M (2014). Epidemiology Study Design and Data Analysis. Chapman & Hall/CRC, New York, pp. 295 - 329. } \note{ The power of a study is its ability to demonstrate the presence of an association, given that an association actually exists. A detailed description of sample size calculations for case-control studies (with numerous worked examples, many of them reproduced below) is provided by Woodward (2014), pages 295 to 329. See the documentation for \code{\link{epi.sscohortc}} for an example using the \code{design} facility implemented in this function. } \examples{ ## EXAMPLE 1 (from Woodward 2014 Example 8.8 p. 308): ## Supposed we wish to test, at the 5\% level of significance, the hypothesis ## that cholesterol means in a population are equal in two study years against ## the one-sided alternative that the mean is higher in the second of the ## two years. Suppose that equal sized samples will be taken in each year, ## but that these will not necessarily be from the same individuals (i.e., the ## two samples are drawn independently). Our test is to have a power of 0.95 ## at detecting a difference of 0.5 mmol/L. The standard deviation of serum ## cholesterol in humans is assumed to be 1.4 mmol/L. epi.sscompc(treat = 5.0, control = 4.5, n = NA, sigma = 1.4, power = 0.95, r = 1, design = 1, sided.test = 1, nfractional = FALSE, conf.level = 0.95) ## To satisfy the study requirements 340 individuals need to be tested: 170 in ## the first year and 170 in the second year. ## EXAMPLE 2 (from Woodward 1999 Example 8.9 pp. 345): ## Women taking oral contraceptives sometimes experience anaemia due to ## impaired iron absorption. A study is planned to compare the use of iron ## tablets against a course of placebos. Oral contraceptive users are ## randomly allocated to one of the two treatment groups and mean serum ## iron concentration compared after 6 months. Data from previous studies ## indicates that the standard deviation of the increase in iron ## concentration will be around 4 micrograms\% over a 6-month period. ## The average increase in serum iron concentration without supplements is ## also thought to be 4 micrograms\%. The investigators want to be 90\% sure ## of detecting when the supplement doubles the serum iron concentration using ## a two-sided 5\% significance test. It is decided to allocate 4 times as many ## women to the treatment group so as to obtain a better estimate of its effect. ## How many women should be enrolled in this study? epi.sscompc(treat = 8, control = 4, n = NA, sigma = 4, power = 0.90, r = 4, design = 1, sided.test = 2, nfractional = FALSE, conf.level = 0.95) ## The estimated sample size is 70. We allocate 70/5 = 14 women to the ## placebo group and four times as many (n = 56) to the iron treatment group. } \keyword{univar} epiR/man/rsu.sep.rsmult.Rd0000644000176200001440000000673013733537010015147 0ustar liggesusers\name{rsu.sep.rsmult} \alias{rsu.sep.rsmult} \title{ Surveillance system sensitivity by combining multiple surveillance components } \description{ Calculates surveillance system (population-level) sensitivity for multiple components, accounting for lack of independence (overlap) between components. } \usage{ rsu.sep.rsmult(C = NA, pstar.c, rr, ppr, se.c) } \arguments{ \item{C}{scalar integer or vector of the same length as \code{rr}, representing the population sizes (number of clusters) for each risk group.} \item{pstar.c}{scalar (0 to 1) representing the cluster level design prevalence.} \item{rr}{vector of length equal to the number of risk strata, representing the cluster relative risks.} \item{ppr}{vector of the same length as \code{rr} representing the cluster level population proportions. Ignored if \code{C} is specified.} \item{se.c}{surveillance system sensitivity estimates for clusters in each component and corresponding risk group. A list with multiple elements where each element is a dataframe of population sensitivity values from a separate surveillance system component. The first column equals the clusterid, the second column equals the cluster-level risk group index and the third column equals the population sensitivity values.} } \value{ A list comprised of two elements: \item{se.p}{a matrix (or vector if \code{C} is not specified) of population-level (surveillance system) sensitivities (binomial and hypergeometric and adjusted vs unadjusted).} \item{se.component}{a matrix of adjusted and unadjusted sensitivities for each component.} } \examples{ ## EXAMPLE 1: ## You are working with a population that is comprised of indviduals in ## 'high' and 'low' risk area. There are 300 individuals in the high risk ## area and 1200 individuals in the low risk area. The risk of disease for ## those in the high risk area is assumed to be three times that of the low ## risk area. C <- c(300,1200) pstar.c <- 0.01 rr <- c(3,1) ## Generate population sensitivity values for clusters in each component of ## the surveillance system. Each of the three dataframes below lists id, ## rg (risk group) and cse (component sensitivity): comp1 <- data.frame(id = 1:100, rg = c(rep(1,time = 50), rep(2, times = 50)), cse = rep(0.5, times = 100)) comp2 <- data.frame(id = seq(from = 2, to = 120, by = 2), rg = c(rep(1, times = 25), rep(2, times = 35)), cse = runif(n = 60, min = 0.5, max = 0.8)) comp3 <- data.frame(id = seq(from = 5, to = 120, by = 5), rg = c(rep(1, times = 10), rep(2, times = 14)), cse = runif(n = 24, min = 0.7, max = 1)) # Combine the three components into a list: se.c <- list(comp1, comp2, comp3) ## What is the overall population-level (surveillance system) sensitivity? rsu.sep.rsmult(C = C, pstar.c = pstar.c, rr = rr, ppr = NA, se.c = se.c) ## The overall adjusted system sensitivity (calculated using the binomial ## distribution) is 0.85. ## EXAMPLE 2: ## Assume that you don't know exactly how many individuals are in the high ## and low risk areas but you have a rough estimate that the proportion of ## the population in each area is 0.2 and 0.8, respectively. What is the ## population-level (surveillance system) sensitivity? ppr <- c(0.20,0.80) rsu.sep.rsmult(C = NA, pstar.c = pstar.c, rr = rr, ppr = ppr, se.c = se.c) ## The overall adjusted system sensitivity (calculated using the binomial ## distribution) is 0.85. } \keyword{methods} epiR/man/epi.dms.Rd0000644000176200001440000000201214074716312013552 0ustar liggesusers\name{epi.dms} \alias{epi.dms} \title{Decimal degrees and degrees, minutes and seconds conversion } \description{ Converts decimal degrees to degrees, minutes and seconds. Converts degrees, minutes and seconds to decimal degrees. } \usage{ epi.dms(dat) } \arguments{ \item{dat}{the data. A one-column matrix is assumed when converting decimal degrees to degrees, minutes, and seconds. A two-column matrix is assumed when converting degrees and decimal minutes to decimal degrees. A three-column matrix is assumed when converting degrees, minutes and seconds to decimal degrees.} } \examples{ ## EXAMPLE 1: ## Degrees, minutes, seconds to decimal degrees: dat.m01 <- matrix(c(41, 38, 7.836, -40, 40, 27.921), byrow = TRUE, nrow = 2) epi.dms(dat.m01) ## EXAMPLE 2: ## Decimal degrees to degrees, minutes, seconds: dat.m02 <- matrix(c(41.63551, -40.67442), nrow = 2) epi.dms(dat.m02) } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/rsu.sssep.rbsrg.Rd0000644000176200001440000000524413770243144015310 0ustar liggesusers\name{rsu.sssep.rbsrg} \alias{rsu.sssep.rbsrg} \title{ Sample size to achieve a desired surveillance system sensitivity assuming risk-based sampling and a single sensitivity value for each risk group } \description{ Sample the size to achieve a desired population sensitivity assuming risk-based sampling, a single sensitivity value for each risk group and perfect test specificity. } \usage{ rsu.sssep.rbsrg(pstar, rr, ppr, spr, se.p, se.u) } \arguments{ \item{pstar}{scalar, representing the design prevalence.} \item{rr}{vector, defining the relative risk values for each strata in the population.} \item{ppr}{vector of length \code{rr}, defining the population proportions in each strata.} \item{spr}{vector of length \code{rr} representing the planned surveillance proportion for each strata in the population.} \item{se.p}{scalar (0 to 1) representing the desired surveillance system (population-level) sensitivity.} \item{se.u}{scalar (0 to 1) or vector of the same lengthe as \code{rr} representing the sensitivity of the diagnostic test applied at the unit level.} } \value{ A list of comprised of four elements: \item{n}{a vector listing the required sample sizes for each (risk) strata.} \item{total}{scalar, representing the total sample size.} \item{epinf}{a vector listing the effective probability of infection estimates.} \item{adj.risk}{a vector listing the adjusted risk estimates.} } \examples{ ## EXAMPLE 1: ## A cross-sectional study is to be carried out to confirm the absence of ## disease using risk based sampling. Assume a population level design ## prevalence of 0.10 and there are 'high', 'medium' and 'low' risk areas ## where the risk of disease in the high risk area compared with the low risk ## area is 5 and the risk of disease in the medium risk area compared with ## the low risk area is 3. The proportions of the population at risk in the ## high, medium and low risk area are 0.10, 0.10 and 0.80, respectively. ## Half of your samples will be taken from individuals in the high risk area, # 0.30 from the medium risk area and 0.20 from the low risk area. You intend ## to use a test with diagnostic sensitivity of 0.90 and you'd like to take ## sufficient samples to return a population sensitivity of 0.95. How many ## units need to be sampled to meet the requirements of the study? rsu.sssep.rbsrg(pstar = 0.10, rr = c(5,3,1), ppr = c(0.10,0.10,0.80), spr = c(0.50,0.30,0.20), se.p = 0.95, se.u = 0.90) ## A total of 14 units needs to be sampled to meet the requirements of the ## study: 7 from the high risk area, 5 from the medium risk area and 2 from ## the low risk area. } \keyword{methods} epiR/man/epi.popsize.Rd0000644000176200001440000000331614074733636014500 0ustar liggesusers\name{epi.popsize} \alias{epi.popsize} \title{ Estimate population size on the basis of capture-recapture sampling } \description{ Estimates population size on the basis of capture-recapture sampling. } \usage{ epi.popsize(T1, T2, T12, conf.level = 0.95, verbose = FALSE) } \arguments{ \item{T1}{an integer representing the number of individuals tested in the first round.} \item{T2}{an integer representing the number of individuals tested in the second round.} \item{T12}{an integer representing the number of individuals tested in both the first and second round.} \item{conf.level}{magnitude of the returned confidence interval. Must be a single number between 0 and 1.} \item{verbose}{logical indicating whether detailed or summary results are to be returned.} } \value{ Returns the estimated population size and an estimate of the numbers of individuals that remain untested. } \references{ Cannon RM, Roe RT (1982). Livestock Disease Surveys A Field Manual for Veterinarians. Australian Government Publishing Service, Canberra, pp. 34. } \examples{ ## EXAMPLE 1: ## In a field survey 400 feral pigs are captured, marked and then released. ## On a second occassion 40 of the orignal capture are found when another 400 ## pigs are captured. Estimate the size of this feral pig population. Estimate ## the number of feral pigs that have not been tested. epi.popsize(T1 = 400, T2 = 400, T12 = 40, conf.level = 0.95, verbose = FALSE) ## Estimated population size: 4000 (95\% CI 3125 to 5557) ## Estimated number of untested pigs: 3240 (95\% CI 2365 to 4797) } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/rsu.spp.rs.Rd0000644000176200001440000000577514164037175014276 0ustar liggesusers\name{rsu.spp.rs} \alias{rsu.spp.rs} \title{ Surveillance system specificity assuming representative sampling } \description{ Calculates surveillance system (population level) specificity assuming representative sampling and imperfect test specificity. } \usage{ rsu.spp.rs(N, n, c = 1, sp.u) } \arguments{ \item{N}{scalar or vector of the same length as that vector of \code{n} defining the [cluster] population size. Use \code{NA} if the size of the population not known, or for a more general application see details, below.} \item{n}{scalar or vector defining the sample size.} \item{c}{scalar or vector of the same length as that vector of \code{n} defining the cut-point number of positives to classify a cluster as positive, if the number of positive samples is less than \code{c} the cluster is declared is negative, if the number of positive samples is greater than \code{c} the cluster is declared positive.} \item{sp.u}{scalar (0 to 1) or vector of same length as \code{n}, the specificity of the diagnostic test at the surveillance unit level.} } \details{ This function calculates population specificity using the hypergeometric distribution if \code{N} and \code{c} are provided and the binomial distribution otherwise. If \code{N} is provided the number of false positives is fixed, based on \code{N} and test specificity \code{sp.u}. This implies that test specificity is a fixed individual-level characteristic (e.g., due to specific cross-reacting infection). If \code{N} is not supplied, cluster (e.g., herd) specificity is a random binomial function based only on the number of samples and test specificity (i.e., specificity is a function of the test and independent of individual characteristics). } \value{ A vector of population specificity estimates. } \references{ Martin S, Shoukri M, Thorburn M (1992). Evaluating the health status of herds based on tests applied to individuals. Preventive Veterinary Medicine 14: 33 - 43. } \examples{ ## EXAMPLE 1: ## Calculate the surveillance system specificity (i.e., the probability that ## an uninfected population will be correctly identified as negative) if 30 ## surveillance units have been tested from a population of 150 using a ## diagnostic test with surveillance unit specificity of 0.90, using a ## cut-point of one or more positives to consider the population positive. ## A specificity of 0.90 means that 9 out of 10 samples from disease-negative ## surveillance units will return a negative result (i.e., one of them will be ## a false positive). rsu.spp.rs(N = 150, n = 30, c = 1, sp.u = 0.90) ## The surveillance system specificity is 0.03. There is a probability of ## 0.03 that all 30 samples will be negative. ## EXAMPLE 2: ## Now assume we set a cut-point of 6. That is, 6 or more samples have to ## return a positive result for us to declare the population positive: rsu.spp.rs(N = 150, n = 30, c = 6, sp.u = 0.90) ## The surveillance system specificity is 0.95. } \keyword{methods} epiR/man/epi.prev.Rd0000644000176200001440000001545114075133040013746 0ustar liggesusers\name{epi.prev} \alias{epi.prev} \title{ Estimate true prevalence } \description{ Compute the true prevalence of a disease in a population on the basis of an imperfect test. } \usage{ epi.prev(pos, tested, se, sp, method = "wilson", units = 100, conf.level = 0.95) } \arguments{ \item{pos}{a vector listing the count of positive test results for each population.} \item{tested}{a vector listing the count of subjects tested for each population.} \item{se}{test sensitivity (0 - 1). \code{se} can either be a single number or a vector of the same length as \code{pos}. See the examples, below, for details.} \item{sp}{test specificity (0 - 1). \code{sp} can either be a single number or a vector of the same length as \code{pos}. See the examples, below, for details.} \item{method}{a character string indicating the confidence interval calculation method to use. Options are \code{"c-p"} (Cloppper-Pearson), \code{"sterne"} (Sterne), \code{"blaker"} (Blaker) and \code{"wilson"} (Wilson).} \item{units}{multiplier for the prevalence estimates.} \item{conf.level}{magnitude of the returned confidence interval. Must be a single number between 0 and 1.} } \details{ Appropriate confidence intervals for the adjusted prevalence estimate are provided, accounting for the change in variance that arises from imperfect test sensitivity and specificity (see Reiczigel et al 2010 for details). The Clopper-Pearson method is known to be too conservative for two-sided intervals (Blaker 2000, Agresti and Coull 1998). Blaker's and Sterne's methods (Blaker 2000, Sterne 1954) provide smaller exact two-sided confidence interval estimates. } \value{ A list containing the following: \item{ap}{the point estimate of apparent prevalence and the lower and upper bounds of the confidence interval around the apparent prevalence estimate.} \item{tp}{the point estimate of the true prevalence and the lower and upper bounds of the confidence interval around the true prevalence estimate.} } \references{ Abel U (1993). DieBewertung Diagnostischer Tests. Hippokrates, Stuttgart. Agresti A, Coull BA (1998). Approximate is better than 'exact' for interval estimation of binomial proportions. American Statistician 52: 119 - 126. Blaker H (2000). Confidence curves and improved exact confidence intervals for discrete distributions. Canadian Journal of Statistics 28: 783 - 798. Clopper CJ, Pearson ES (1934). The use of confidence of fiducial limits illustrated in the case of the binomial. Biometrika 26: 404 - 413. Gardener IA, Greiner M (1999). Advanced Methods for Test Validation and Interpretation in Veterinary Medicince. Freie Universitat Berlin, ISBN 3-929619-22-9; 80 pp. Messam L, Branscum A, Collins M, Gardner I (2008) Frequentist and Bayesian approaches to prevalence estimation using examples from Johne's disease. Animal Health Research Reviews 9: 1 - 23. Reiczigel J, Foldi J, Ozsvari L (2010). Exact confidence limits for prevalence of disease with an imperfect diagnostic test. Epidemiology and Infection 138: 1674 - 1678. Rogan W, Gladen B (1978). Estimating prevalence from results of a screening test. American Journal of Epidemiology 107: 71 - 76. Speybroeck N, Devleesschauwer B, Joseph L, Berkvens D (2012). Misclassification errors in prevalence estimation: Bayesian handling with care. International Journal of Public Health DOI:10.1007/s00038-012-0439-9. Sterne TE (1954). Some remarks on confidence or fiducial limits. Biometrika 41: 275 - 278. } \note{This function uses apparent prevalence, test sensitivity and test specificity to estimate true prevalence (after Rogan and Gladen, 1978). Confidence intervals for the apparent and true prevalence estimates are based on code provided by Reiczigel et al. (2010). If apparent prevalence is less than (1 - diagnostic test specificity) the Rogan Gladen estimate of true prevalence will be less than zero (Speybroeck et al. 2012). If the apparent prevalence is greater than the diagnostic test sensitivity the Rogan Gladen estimate of true prevalence will be greater than one. When AP < (1 - Sp) the function issues a warning to alert the user that the estimate of true prevalence is invalid. A similar warning is issued when AP > Se. In either situation a Bayesian approach for estimation of true prevalence is recommended. See Messam et al. (2008) for a concise introduction to this topic. } \examples{ ## EXAMPLE 1: ## A simple random sample of 150 cows from a herd of 2560 is taken. ## Each cow is given a screening test for brucellosis which has a ## sensitivity of 96\% and a specificity of 89\%. Of the 150 cows tested ## 45 were positive to the screening test. What is the estimated prevalence ## of brucellosis in this herd (and its 95\% confidence interval)? epi.prev(pos = 45, tested = 150, se = 0.96, sp = 0.89, method = "blaker", units = 100, conf.level = 0.95) ## The estimated true prevalence of brucellosis in this herd is 22 (95\% 14 ## to 32) cases per 100 cows at risk. # EXAMPLE 2: ## Moujaber et al. (2008) analysed the seroepidemiology of Helicobacter pylori ## infection in Australia. They reported seroprevalence rates together with ## 95\% confidence intervals by age group using the Clopper-Pearson exact ## method (Clopper and Pearson, 1934). The ELISA test they applied had 96.4\% ## sensitivity and 92.7\% specificity. A total of 151 subjects 1 -- 4 years ## of age were tested. Of this group 6 were positive. What is the estimated ## true prevalence of Helicobacter pylori in this age group? epi.prev(pos = 6, tested = 151, se = 0.964, sp = 0.927, method = "c-p", units = 100, conf.level = 0.95) ## The estimated true prevalence of Helicobacter pylori in 1 -- 4 year olds is ## -4 (95\% CI -6 to 1) cases per 100. The function issues a warning to alert ## the user that estimate of true prevalence invalid. ## EXAMPLE 3: ## Three dairy herds are tested for tuberculosis. On each herd a different test ## regime is used (each with a different diagnostic test sensitivity and ## specificity). The number of animals tested in each herd were 210, 189 and ## 124, respectively. The number of test-positives in each herd were 8, 12 ## and 7. Test sensitivities were 0.60, 0.65 and 0.70 (respectively). Test ## specificities were 0.90, 0.95 and 0.99. What is the estimated true ## prevalence of tuberculosis in the three herds? rval.prev03 <- epi.prev(pos = c(80,100,50), tested = c(210,189,124), se = c(0.60,0.65,0.70), sp = c(0.90,0.95,0.99), method = "blaker", units = 100, conf.level = 0.95) round(rval.prev03$tp, digits = 0) ## True prevalence estimates for each herd: ## Herd 1: 56 (95\% CI 43 to 70) cases per 100 cows. ## Herd 2: 80 (95\% CI 68 to 92) cases per 100 cows. ## Herd 3: 57 (95\% CI 45 to 70) cases per 100 cows. } \keyword{univar} epiR/man/rsu.dxtest.Rd0000644000176200001440000001464414164533352014355 0ustar liggesusers\name{rsu.dxtest} \alias{rsu.dxtest} \title{ Sensitivity and specificity of diagnostic tests interpreted in series or parallel } \description{ Calculates the sensitivity and specificity of two diagnostic tests interpreted in series or parallel. } \usage{ rsu.dxtest(se, sp, interpretation = "series", covar = c(0,0)) } \arguments{ \item{se}{a vector of length two defining the diagnostic sensitivity of the two tests.} \item{sp}{a vector of length two defining the diagnostic specificity of the two tests.} \item{interpretation}{a character string indicating how the test results should be interpreted. Options are \code{series} or \code{parallel}.} \item{covar}{a vector of length two defining the covariance between test results for disease positive and disease negative groups. The first element of the vector is the covariance between test results for disease positive subjects. The second element of the vector is the covariance between test results for disease negative subjects. Use \code{covar = c(0,0)} (the default) if the tests are known to be independent.} } \value{ A list comprised of two elements: \item{independent}{a data frame listing sensitivity \code{se} and specificity \code{sp} assuming the tests are independent.} \item{dependent}{a data frame listing sensitivity \code{se} and specificity \code{sp} calculated using the values of \code{covar}, as entered by the user.} If \code{covar = c(0,0)} data frames \code{independent} and \code{dependent} will be the same. } \references{ Dohoo I, Martin S, Stryhn H (2009). Veterinary Epidemiologic Research. AVC Inc Charlottetown, Prince Edward Island, Canada. Gardner I, Stryhn H, Lind P, Collins M (2000). Conditional dependence between tests affects the diagnosis and surveillance of animal diseases. Preventive Veterinary Medicine 45: 107 - 122. Martin S, Meek A, Willeberg P (1987). Veterinary Epidemiology Principles and Methods. Iowa State University Press Ames. } \note{ With \code{interpretation = "series"} a subject is declared test positive if both of the tests performed return a positive result. With \code{interpretation = "parallel"} a subject is declared test positive if one of the tests performed return a positive result. Intepreting test results in series increases diagnostic specificity. Interpreting test results in parallel increases diagnostic sensitivity. How do I work out appropriate values for \code{covar}? Assume you have two diagnostic tests --- an indirect fluorescent antibody test (IFAT) and a polymerase chain reaction (PCR). The diagnostic sensitivity and specificity of the IFAT is 0.784 and 0.951, respectively. The diagnostic sensitivity and specificity of the PCR is 0.926 and 0.979, respectively. These tests are used on a group of individuals known to be disease positive and a group of individuals known to be disease negative. Results for the disease positive group are as follows: \tabular{lllll}{ \tab IFAT \tab \tab \cr -----------\tab ----------\tab ---------- \tab ---------- \cr PCR \tab Pos \tab Neg \tab Total \cr -----------\tab ----------\tab ---------- \tab ---------- \cr Pos \tab 134 \tab 29 \tab 163 \cr Neg \tab 4 \tab 9 \tab 13 \cr -----------\tab ----------\tab ---------- \tab ---------- \cr Total \tab 138 \tab 38 \tab 176 \cr -----------\tab ----------\tab ---------- \tab ---------- \cr } Results for the disease negative group are as follows: \tabular{lllll}{ \tab IFAT \tab \tab \cr -----------\tab ----------\tab ---------- \tab ---------- \cr PCR \tab Pos \tab Neg \tab Total \cr -----------\tab ----------\tab ---------- \tab ---------- \cr Pos \tab 0 \tab 12 \tab 12 \cr Neg \tab 28 \tab 534 \tab 562 \cr -----------\tab ----------\tab ---------- \tab ---------- \cr Total \tab 28 \tab 546 \tab 574 \cr -----------\tab ----------\tab ---------- \tab ---------- \cr } The observed proportion of disease positive individuals with a positive test result to both tests as \code{p111}. For this example \code{p111 = 134 / 176 = 0.761}. The observed proportion of disease negative individuals with a negative test result to both tests as \code{p000}. For this example \code{p000 = 534 / 574 = 0.930}. Covariance for the disease positive group: \code{covar[1] = p111 - se[1] * se[2] = 0.761 - 0.784 * 0.926 = 0.035}. Covariance for the disease negative group: \code{covar[2] = p000 - sp[1] * sp[2] = 0.930 - 0.951 * 0.979 = -0.001}. The covariance for the disease positive group is small. The covariance for the disease negative group is negligible. } \examples{ ## EXAMPLE 1: ## You would like to confirm the absence of disease in a study area. You ## intend to use two tests: the first has a sensitivity and specificity of ## 0.90 and 0.80, respectively. The second has a sensitivity and specificity ## of 0.95 and 0.85, respectively. You need to make sure that an individual ## that returns a positive test really has disease, so the tests will be ## interpreted in series (to improve specificity). ## What is the diagnostic sensitivity and specificity of this testing ## regime? rsu.dxtest(se = c(0.90,0.95), sp = c(0.80,0.85), interpretation = "series", covar = c(0,0)) ## Interpretation of these tests in series returns a diagnostic sensitivity ## of 0.855 and a diagnostic specificity of 0.970. ## EXAMPLE 2 (from Dohoo, Martin and Stryhn p 113): ## An IFAT and PCR are to be used to diagnose infectious salmon anaemia. ## The diagnostic sensitivity and specificity of the IFAT is 0.784 and 0.951, ## respectively. The diagnostic sensitivity and specificity of the PCR is ## 0.926 and 0.979, respectively. It is known that the two tests are dependent, ## with details of the covariance calculated above. What is the expected ## sensitivity and specificity if the tests are to be interpreted in parallel? rsu.dxtest(se = c(0.784,0.926), sp = c(0.951,0.979), interpretation = "parallel", covar = c(0.035,-0.001)) ## Interpreting test results in parallel and accounting for the lack of ## test indepdendence returns a diagnostic sensitivity of 0.949 and diagnostic ## specificity of 0.930. } \keyword{methods} epiR/man/epi.ssclus1estc.Rd0000644000176200001440000001012614164044502015243 0ustar liggesusers\name{epi.ssclus1estc} \alias{epi.ssclus1estc} \title{ Sample size to estimate a continuous outcome using one-stage cluster sampling } \description{ Sample size to estimate a continuous outcome using one-stage cluster sampling. } \usage{ epi.ssclus1estc(b, N, xbar, xsigma, epsilon, error = "relative", rho, nfractional = FALSE, conf.level = 0.95) } \arguments{ \item{b}{scalar integer or vector of length two, the number of individual listing units in each cluster to be sampled. See details, below.} \item{N}{scalar integer, representing the total number of individual listing units in the population.} \item{xbar}{scalar number, the expected mean of the continuous variable to be estimated.} \item{xsigma}{scalar number, the expected standard deviation of the continuous variable to be estimated.} \item{epsilon}{scalar number, the maximum difference between the estimate and the unknown population value expressed in absolute or relative terms.} \item{error}{character string. Options are \code{absolute} for absolute error and \code{relative} for relative error.} \item{rho}{scalar number, the intracluster correlation.} \item{nfractional}{logical, return fractional sample size.} \item{conf.level}{scalar number, the level of confidence in the computed result.} } \value{ A list containing the following: \item{n.psu}{the total number of primary sampling units (clusters) to be sampled for the specified level of confidence and relative error.} \item{n.ssu}{the total number of secondary sampling units to be sampled for the specified level of confidence and relative error.} \item{DEF}{the design effect.} \item{rho}{the intracluster correlation, as entered by the user.} } \details{ In many situations it is common for sampling units to be aggregated into clusters. Typical examples include individuals within households, children within classes (within schools) and cows within herds. We use the term primary sampling unit (PSU) to refer to what gets sampled first (clusters) and secondary sampling unit (SSU) to refer to what gets sampled second (individual listing units within each cluster). In this documentation the terms primary sampling unit and cluster are used interchangeably. Similarly, the terms secondary sampling unit and individual listing units are used interchangeably. \code{b} as a scalar integer represents the total number of individual listing units from each cluster to be sampled. If \code{b} is a vector of length two the first element represents the mean number of individual listing units to be sampled from each cluster and the second element represents the standard deviation of the number of individual listing units to be sampled from each cluster. } \references{ Levy PS, Lemeshow S (1999). Sampling of Populations Methods and Applications. Wiley Series in Probability and Statistics, London, pp. 258. Machin D, Campbell MJ, Tan SB, Tan SH (2018). Sample Sizes for Clinical, Laboratory ad Epidemiological Studies, Fourth Edition. Wiley Blackwell, London, pp. 195 - 214. } \examples{ ## EXAMPLE 1: ## A survey to estimate the average number of residents over 75 years of ## age that require the services of a nurse in a given retirement village is ## to be carried out using a one-stage cluster sampling strategy. ## There are five housing complexes in the village with 25 residents in each. ## We expect that there might be an average of 34 residents meeting this ## criteria (SD 5.5). We would like the estimated sample size to provide us ## with an estimate that is within 10\% of the true value. Previous studies ## report an intracluster correlation for the number of residents requiring the ## services of a nurse in this retirement village housing complexes to ## be 0.10. How many housing complexes (clusters) should be sampled? epi.ssclus1estc(b = 25, N = 5 * 25, xbar = 34, xsigma = 5.5, epsilon = 0.10, error = "relative", rho = 0.10, nfractional = FALSE, conf.level = 0.95) ## A total of 2 housing complexes need to be sampled to meet the specifications ## of this study. } \keyword{univar} epiR/man/epi.ssxsectn.Rd0000644000176200001440000001517514164045723014660 0ustar liggesusers\name{epi.ssxsectn} \alias{epi.ssxsectn} \title{ Sample size, power or minimum detectable prevalence ratio or odds ratio for a cross-sectional study } \description{ Sample size, power or minimum detectable prevalence ratio or odds ratio for a cross-sectional study. } \usage{ epi.ssxsectn(pdexp1, pdexp0, pexp = NA, n = NA, power = 0.80, r = 1, N, design = 1, sided.test = 2, finite.correction = FALSE, nfractional = FALSE, conf.level = 0.95) } \arguments{ \item{pdexp1}{the expected prevalence of the outcome in the exposed group (0 to 1).} \item{pdexp0}{the expected prevalence of the outcome in the non-exposed group (0 to 1).} \item{pexp}{the expected prevalence of exposure to the hypothesised risk factor in the population (0 to 1).} \item{n}{scalar, defining the total number of subjects in the study (i.e., the number in both the exposed and unexposed groups).} \item{power}{scalar, the required study power.} \item{r}{scalar, the number in the exposed group divided by the number in the unexposed group.} \item{N}{scalar, the estimated number of individuals in the population.} \item{design}{scalar, the estimated design effect.} \item{sided.test}{use a one- or two-sided test? Use a two-sided test if you wish to evaluate whether or not the outcome incidence risk in the exposed group is greater than or less than the outcome incidence risk in the unexposed group. Use a one-sided test to evaluate whether or not the outcome incidence risk in the exposed group is greater than the outcome incidence risk in the unexposed group.} \item{finite.correction}{logical, apply a finite correction factor?} \item{nfractional}{logical, return fractional sample size.} \item{conf.level}{scalar, defining the level of confidence in the computed result.} } \details{ The methodology in this function follows the methodology described in Chapter 8 of Woodward (2014), pp. 295 - 329. } \value{ A list containing the following: \item{n.total}{the total number of subjects required for the specified level of confidence and power, respecting the requirement for \code{r} times as many individuals in the exposed (treatment) group compared with the non-exposed (control) group.} \item{n.exp1}{the total number of subjects in the exposed (treatment) group for the specified level of confidence and power, respecting the requirement for \code{r} times as many individuals in the exposed (treatment) group compared with the non-exposed (control) group.} \item{n.exp0}{the total number of subjects in the non-exposed (control) group for the specified level of confidence and power, respecting the requirement for \code{r} times as many individuals in the exposed (treatment) group compared with the non-exposed (control) group.} \item{power}{the power of the study given the number of study subjects, the expected effect size and level of confidence.} \item{pr}{the prevalence of the outcome in the exposed group divided by the prevalence of the outcome in the unexposed group (the prevalence ratio).} \item{or}{the odds of the outcome in the exposed group divided by the odds of the outcome in the unexposed group (the odds ratio).} } \references{ Kelsey JL, Thompson WD, Evans AS (1986). Methods in Observational Epidemiology. Oxford University Press, London, pp. 254 - 284. Mittleman MA (1995). Estimation of exposure prevalence in a population at risk using data from cases and an external estimate of the relative risk. Epidemiology 6: 551 - 553. Woodward M (2014). Epidemiology Study Design and Data Analysis. Chapman & Hall/CRC, New York, pp. 295 - 329. } \note{ The power of a study is its ability to demonstrate the presence of an association, given that an association actually exists. Values need to be entered for \code{pdexp0}, \code{pexp}, \code{n}, and \code{power} to return a value for the prevalence ratio \code{pr} and odds ratio \code{or}. In this situation, the lower value of \code{pr} represents the maximum detectable prevalence ratio that is less than 1; the upper value of \code{pr} represents the minimum detectable prevalence ratio greater than 1. A value for \code{pexp} doesn't need to be entered if you want to calculate sample size or study power. When calculating study power or minimum detectable prevalence risk ratio when \code{finite.correction = TRUE} the function takes the values of \code{n} and \code{N} entered by the user and back-calculates a value of \code{n} assuming an infinite population. Values for \code{power}, \code{pr} and \code{or} are then returned, assuming the back-calculated value of \code{n} is equivalent to the value of \code{n} entered by the user. See the documentation for \code{\link{epi.sscohortc}} for an example using the \code{design} facility implemented in this function. } \examples{ ## EXAMPLE 1: ## A cross-sectional study is to be carried out to quantify the association ## between farm management type (intensive, extensive) and evidence of ## Q fever in dairy goat herds. The investigators would like to be 0.80 sure ## of being able to detect when the risk ratio of Q fever is 2.0 for ## intensively managed herds, using a 0.05 significance test. Previous evidence ## suggests that the prevalence of Q fever in extensively managed dairy goat ## herds is 5 per 100 herds at risk and the prevalence of intensively managed ## herds in the population (the prevalence of exposure) is around 0.20. ## Assuming equal numbers of intensively managed and extensively managed ## herds will be sampled, how many herds need to be enrolled into the study? ## You estimate that there are around 60 dairy goat herds in your study area. pdexp1 = 2.0 * (5 / 100); pdexp0 = 5 / 100 epi.ssxsectn(pdexp1 = pdexp1, pdexp0 = pdexp0, pexp = 0.20, n = NA, power = 0.80, r = 1, N = 60, design = 1, sided.test = 2, finite.correction = TRUE, nfractional = FALSE, conf.level = 0.95) ## A total of 58 of the 60 herds need to be enrolled into the study ## (29 intensively managed and 29 extensively managed herds). ## EXAMPLE 2: ## Say, for example, we're only able to enrol 45 herds into the study ## described above. What is the minimum and maximum detectable prevalence ## ratio and minimum and maximum detectable odds ratio? epi.ssxsectn(pdexp1 = NA, pdexp0 = pdexp0, pexp = 0.20, n = 45, power = 0.80, r = 1, N = 60, design = 1, sided.test = 2, finite.correction = TRUE, nfractional = FALSE, conf.level = 0.95) ## The minimum detectable prevalence ratio >1 is 3.64. The maximum detectable ## prevalence ratio <1 is 0. ## The minimum detectable odds ratio >1 is 4.65. The maximum detectable ## odds ratio <1 is 0. } \keyword{univar} epiR/man/epi.ssstrataestc.Rd0000644000176200001440000000711714075464214015527 0ustar liggesusers\name{epi.ssstrataestc} \alias{epi.ssstrataestc} \title{Sample size to estimate a continuous outcome using a stratified random sampling design } \description{ Sample size to estimate a continuous outcome using a stratified random sampling design. } \usage{ epi.ssstrataestc(strata.n, strata.xbar, strata.sigma, epsilon, error = "relative", nfractional = FALSE, conf.level = 0.95) } \arguments{ \item{strata.n}{vector of integers, defining the number of individual listing units in each strata.} \item{strata.xbar}{vector of numbers, defining the expected means of the continuous variable to be estimated for each strata.} \item{strata.sigma}{vector of numbers, defining the expected standard deviation of the continous variable to be estimated for each strata.} \item{epsilon}{scalar number, the maximum difference between the estimate and the unknown population value expressed in absolute or relative terms.} \item{error}{character string. Options are \code{absolute} for absolute error and \code{relative} for relative error.} \item{nfractional}{logical, return fractional sample size.} \item{conf.level}{scalar number, the level of confidence in the computed result.} } \value{ A list containing the following: \item{strata.sample}{the estimated sample size for each strata.} \item{strata.total}{the estimated total size.} \item{strata.stats}{\code{mean} the mean across all strata, \code{sigma.bx} the among-strata variance, \code{sigma.wx} the within-strata variance, and \code{sigma.x} the among-strata variance plus the within-strata variance, \code{rel.var} the within-strata variance divided by the square of the mean, and \code{gamma} the ratio of among-strata variance to within-strata variance.} } \references{ Levy PS, Lemeshow S (1999). Sampling of Populations Methods and Applications. Wiley Series in Probability and Statistics, London, pp. 175 - 179. } \author{ Mark Stevenson (Faculty of Veterinary and Agricultural Sciences, The University of Melbourne, Australia). Javier Sanchez (Atlantic Veterinary College, University of Prince Edward Island, Charlottetown Prince Edward Island, C1A 4P3, Canada). } \examples{ ## EXAMPLE 1 (from Levy and Lemeshow 1999, page 176 -- 178): ## We plan to take a sample of the members of a health maintenance ## organisation (HMO) for purposes of estimating the average number ## of hospital episodes per person per year. The sample will be selected ## from membership lists according to age (under 45 years, 45 -- 64 years, ## 65 years and over). The number of members in each strata are 600, 500, ## and 400 (respectively). Previous data estimates the mean number of ## hospital episodes per year for each strata as 0.164, 0.166, and 0.236 ## (respectively). The variance of these estimates are 0.245, 0.296, and ## 0.436 (respectively). How many from each strata should be sampled to be ## 95\% that the sample estimate of hospital episodes is within 20\% of the ## true value? strata.n <- c(600,500,400) strata.xbar <- c(0.164,0.166,0.236) strata.sigma <- sqrt(c(0.245,0.296,0.436)) epi.ssstrataestc(strata.n, strata.xbar, strata.sigma, epsilon = 0.20, error = "relative", nfractional = FALSE, conf.level = 0.95) ## The number allocated to the under 45 years, 45 -- 64 years, and 65 years ## and over stratums should be 224, 187, and 150 (a total of 561). These ## results differ from the worked example provided in Levy and Lemeshow where ## certainty is set to approximately 99\%. } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/epi.smd.Rd0000644000176200001440000000671413534142776013576 0ustar liggesusers\name{epi.smd} \alias{epi.smd} \title{Fixed-effects meta-analysis of continuous outcomes using the standardised mean difference method } \description{ Computes the standardised mean difference and confidence intervals of the standardised mean difference for continuous outcome data. } \usage{ epi.smd(mean.trt, sd.trt, n.trt, mean.ctrl, sd.ctrl, n.ctrl, names, method = "cohens", conf.level = 0.95) } \arguments{ \item{mean.trt}{a vector, defining the mean outcome in the treatment group.} \item{sd.trt}{a vector, defining the standard deviation of the outcome in the treatment group.} \item{n.trt}{a vector, defining the number of subjects in the treatment group.} \item{mean.ctrl}{a vector, defining the mean outcome in the control group.} \item{sd.ctrl}{a vector, defining the standard deviation of the outcome in the control group.} \item{n.ctrl}{a vector, defining the number of subjects in the control group.} \item{names}{character string identifying each trial.} \item{method}{a character string indicating the method to be used. Options are \code{cohens} or \code{hedges} and \code{glass}.} \item{conf.level}{magnitude of the returned confidence interval. Must be a single number between 0 and 1.} } \value{ A list containing the following: \item{md}{standardised mean difference and its confidence interval computed for each trial.} \item{md.invar}{the inverse variance (fixed effects) summary standardised mean difference.} \item{md.dsl}{the DerSimonian and Laird (random effects) summary standardised mean difference.} \item{heterogeneity}{a vector containing \code{Q} the heterogeneity test statistic, \code{df} the degrees of freedom and its associated P-value.} } \references{ Deeks JJ, Altman DG, Bradburn MJ (2001). Statistical methods for examining heterogeneity and combining results from several studies in meta-analysis. In: Egger M, Davey Smith G, Altman D (eds). Systematic Review in Health Care Meta-Analysis in Context. British Medical Journal, London, pp. 290 - 291. } \note{ The standardised mean difference method is used when trials assess the same outcome, but measure it in a variety of ways. For example: a set of trials might measure depression scores in psychiatric patients but use different methods to quantify depression. In this circumstance it is necessary to standardise the results of the trials to a uniform scale before they can be combined. The standardised mean difference method expresses the size of the treatment effect in each trial relative to the variability observed in that trial. } \seealso{ \code{\link{epi.dsl}, \link{epi.iv}, \link{epi.mh}} } \examples{ ## EXAMPLE 1: ## A systematic review comparing assertive community treatment (ACT) for the ## severely mentally ill was compared to standard care. A systematic review ## comparing ACT to standard care found three trials that assessed mental ## status after 12 months. All three trials used a different scoring system, ## so standardisation is required before they can be compared. names <- c("Audini", "Morse", "Lehman") mean.trt <- c(41.4, 0.95, -4.10) mean.ctrl <- c(42.3, 0.89, -3.80) sd.trt <- c(14, 0.76, 0.83) sd.ctrl <- c(12.4, 0.65, 0.87) n.trt <- c(30, 37, 67) n.ctrl <- c(28, 35, 58) epi.smd(mean.trt, sd.trt, n.trt, mean.ctrl, sd.ctrl, n.ctrl, names, method = "cohens", conf.level = 0.95) } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/man/epi.sscc.Rd0000644000176200001440000002723314164036762013742 0ustar liggesusers\name{epi.sscc} \alias{epi.sscc} \title{ Sample size, power or minimum detectable odds ratio for an unmatched or matched case-control study } \description{ Calculates the sample size, power or minimum detectable odds ratio for an unmatched or matched case-control study. } \usage{ epi.sscc(OR, p1 = NA, p0, n, power, r = 1, phi.coef = 0, design = 1, sided.test = 2, nfractional = FALSE, conf.level = 0.95, method = "unmatched", fleiss = FALSE) } \arguments{ \item{OR}{scalar, the expected study odds ratio.} \item{p1}{scalar, the prevalence of exposure amongst the cases.} \item{p0}{scalar, the prevalence of exposure amongst the controls.} \item{n}{scalar, the total number of subjects in the study (i.e., the number of cases plus the number of controls).} \item{power}{scalar, the required study power.} \item{r}{scalar, the number in the control group divided by the number in the case group.} \item{phi.coef}{scalar, the correlation between case and control exposures for matched pairs. Ignored when \code{method = "unmatched"}.} \item{design}{scalar, the design effect.} \item{sided.test}{use a one- or two-sided test? Use a two-sided test if you wish to evaluate whether or not the odds of exposure in cases is greater than or less than the odds of exposure in controls. Use a one-sided test to evaluate whether or not the odds of exposure in cases is greater than the odds of exposure in controls.} \item{nfractional}{logical, return fractional sample size.} \item{conf.level}{scalar, the level of confidence in the computed result.} \item{method}{a character string defining the method to be used. Options are \code{unmatched} or \code{matched}.} \item{fleiss}{logical, indicating whether or not the Fleiss correction should be applied. This argument is ignored when \code{method = "matched"}.} } \details{ This function implements the methodology described by Dupont (1988). A detailed description of sample size calculations for case-control studies (with numerous worked examples, some of them reproduced below) is provided by Woodward (2014), pp. 295 - 329. A value for \code{p1} is only required if Fleiss correction is used. For this reason the default for \code{p1} is set to \code{NA}. The correlation between case and control exposures for matched pairs (\code{phi.coef}) can be estimated from previous studies using Equation (6.2) from Fleiss et al. 2003, p. 98. In the function \code{\link{epi.2by2}} the variable \code{phi.coef} is included with the output for each of the uncorrected chi-squared tests. This value can be used for argument \code{phi.coef} in \code{epi.sscc}. The methodology described in Woodward (2014), pp. 295 - 329 uses the proportion of discordant pairs to to determine the sample size for a matched case-control study. Note that the proportion of discordant pairs is likely to vary considerably between different studies since it depends not only on the correlation between case and control exposures but also on the exposure prevalence and the odds ratio. In contrast, estimates of \code{phi.coef} should be more stable between similar studies. When no estimate of \code{phi.coef} is available, investigators may prefer to perform their power calculations under the assumption that \code{phi.coef} equals, say, 0.2 rather than make the questionable independence assumption required by most other methods. } \value{ A list containing the following: \item{n.total}{the total number of subjects required to estimate the specified odds ratio at the desired level of confidence and power (i.e., the number of cases plus the number of controls).} \item{n.case}{the total number of case subjects required to estimate the specified odds ratio at the desired level of confidence and power.} \item{n.control}{the total number of control subjects required to estimate the specified odds ratio at the desired level of confidence and power.} \item{power}{the power of the study given the number of study subjects, the specified odds ratio and the desired level of confidence.} \item{OR}{the expected detectable odds ratio given the number of study subjects, the desired power and desired level of confidence.} } \references{ Dupont WD (1988) Power calculations for matched case-control studies. Biometrics 44: 1157 - 1168. Fleiss JL, Levin B, Paik MC (2003). Statistical Methods for Rates and Proportions. John Wiley and Sons, New York. Kelsey JL, Thompson WD, Evans AS (1986). Methods in Observational Epidemiology. Oxford University Press, London, pp. 254 - 284. Woodward M (2014). Epidemiology Study Design and Data Analysis. Chapman & Hall/CRC, New York, pp. 295 - 329. } \note{ The power of a study is its ability to demonstrate the presence of an association, given that an association actually exists. See the documentation for \code{\link{epi.sscohortc}} for an example using the \code{design} facility implemented in this function. } \examples{ ## EXAMPLE 1 (from Woodward 2014 Example 8.17 p. 318): ## A case-control study of the relationship between smoking and CHD is ## planned. A sample of men with newly diagnosed CHD will be compared for ## smoking status with a sample of controls. Assuming an equal number of ## cases and controls, how many study subject are required to detect an ## odds ratio of 2.0 with 0.90 power using a two-sided 0.05 test? Previous ## surveys have shown that around 0.30 of males without CHD are smokers. epi.sscc(OR = 2.0, p1 = NA, p0 = 0.30, n = NA, power = 0.90, r = 1, phi.coef = 0, design = 1, sided.test = 2, nfractional = FALSE, conf.level = 0.95, method = "unmatched", fleiss = FALSE) ## A total of 376 men need to be sampled: 188 cases and 188 controls. ## EXAMPLE 2 (from Woodward 2014 Example 8.18 p. 320): ## Suppose we wish to determine the power to detect an odds ratio of 2.0 ## using a two-sided 0.05 test when 188 cases and 940 controls ## are available (that is, the ratio of controls to cases is 5:1). Assume ## the prevalence of smoking in males without CHD is 0.30. n <- 188 + 940 epi.sscc(OR = 2.0, p1 = NA, p0 = 0.30, n = n, power = NA, r = 5, phi.coef = 0, design = 1, sided.test = 2, nfractional = FALSE, conf.level = 0.95, method = "unmatched", fleiss = TRUE) ## The power of this study, with the given sample size allocation is 0.99. ## EXAMPLE 3: ## The following statement appeared in a study proposal to identify risk ## factors for campylobacteriosis in humans: ## `We will prospectively recruit 300 culture-confirmed Campylobacter cases ## reported under the Public Health Act. We will then recruit one control per ## case from general practices of the enrolled cases, using frequency matching ## by age and sex. With exposure levels of 10\% (thought to be realistic ## given past foodborne disease case control studies) this sample size ## will provide 80\% power to detect an odds ratio of 2 at the 5\% alpha ## level.' ## Confirm the statement that 300 case subjects will provide 80\% power in ## this study. epi.sscc(OR = 2.0, p1 = NA, p0 = 0.10, n = 600, power = NA, r = 1, phi.coef = 0.01, design = 1, sided.test = 2, nfractional = FALSE, conf.level = 0.95, method = "matched", fleiss = TRUE) ## If the true odds ratio for Campylobacter in exposed subjects relative to ## unexposed subjects is 2.0 we will be able to reject the null hypothesis ## that this odds ratio equals 1 with probability (power) 0.826. The Type I # error probability associated with this test of this null hypothesis is 0.05. ## EXAMPLE 4: ## We wish to conduct a case-control study to assess whether bladder cancer ## may be associated with past exposure to cigarette smoking. Cases will be ## patients with bladder cancer and controls will be patients hospitalised ## for injury. It is assumed that 20\% of controls will be smokers or past ## smokers, and we wish to detect an odds ratio of 2 with power 90\%. ## Three controls will be recruited for every case. How many subjects need ## to be enrolled in the study? epi.sscc(OR = 2.0, p1 = NA, p0 = 0.20, n = NA, power = 0.90, r = 3, phi.coef = 0, design = 1, sided.test = 2, nfractional = FALSE, conf.level = 0.95, method = "unmatched", fleiss = FALSE) ## A total of 620 subjects need to be enrolled in the study: 155 cases and ## 465 controls. ## An alternative is to conduct a matched case-control study rather than the ## unmatched design outlined above. One case will be matched to one control ## and the correlation between case and control exposures for matched pairs ## (phi.coef) is estimated to be 0.01 (low). Using the same assumptions as ## those described above, how many study subjects will be required? epi.sscc(OR = 2.0, p1 = NA, p0 = 0.20, n = NA, power = 0.90, r = 1, phi.coef = 0.01, design = 1, sided.test = 2, nfractional = FALSE, conf.level = 0.95, method = "matched", fleiss = FALSE) ## A total of 456 subjects need to be enrolled in the study: 228 cases and ## 228 controls. ## EXAMPLE 5: ## Code to reproduce the isograph shown in Figure 2 in Dupont (1988): r <- 1 p0 = seq(from = 0.05, to = 0.95, length = 50) OR <- seq(from = 1.05, to = 6, length = 100) dat.df05 <- expand.grid(p0 = p0, OR = OR) dat.df05$n.total <- NA for(i in 1:nrow(dat.df05)){ dat.df05$n.total[i] <- epi.sscc(OR = dat.df05$OR[i], p1 = NA, p0 = dat.df05$p0[i], n = NA, power = 0.80, r = 1, phi.coef = 0, design = 1, sided.test = 2, nfractional = FALSE, conf.level = 0.95, method = "unmatched", fleiss = FALSE)$n.total } grid.n <- matrix(dat.df05$n.total, nrow = length(p0)) breaks <- c(22:30,32,34,36,40,45,50,55,60,70,80,90,100,125,150,175, 200,300,500,1000) par(mar = c(5,5,0,5), bty = "n") contour(x = p0, y = OR, z = log10(grid.n), add = FALSE, levels = log10(breaks), labels = breaks, xlim = c(0,1), ylim = c(1,6), las = 1, method = "flattest", xlab = 'Proportion of controls exposed', ylab = "Minimum OR to detect") \dontrun{ ## The same plot using ggplot2: library(ggplot2); library(directlabels) p <- ggplot(data = dat.df05, aes(x = p0, y = OR, z = n.total)) + theme_bw() + geom_contour(aes(colour = ..level..), breaks = breaks) + scale_x_continuous(limits = c(0,1), name = "Proportion of controls exposed") + scale_y_continuous(limits = c(1,6), name = "Minimum OR to detect") print(direct.label(p, list("far.from.others.borders", "calc.boxes", "enlarge.box", hjust = 1, vjust = 1, box.color = NA, fill = "transparent", "draw.rects"))) } ## EXAMPLE 6 (from Dupont 1988, p. 1164): ## A matched case control study is to be carried out to quantify the ## association between exposure A and an outcome B. Assume the prevalence ## of exposure in controls is 0.60 and the correlation between case and ## control exposures for matched pairs (phi.coef) is 0.20 (moderate). Assuming ## an equal number of cases and controls, how many subjects need to be ## enrolled into the study to detect an odds ratio of 3.0 with 0.80 power ## using a two-sided 0.05 test? epi.sscc(OR = 3.0, p1 = NA, p0 = 0.60, n = NA, power = 0.80, r = 1, phi.coef = 0.2, design = 1, sided.test = 2, nfractional = FALSE, conf.level = 0.95, method = "matched", fleiss = FALSE) ## A total of 162 subjects need to be enrolled in the study: 81 cases and ## 81 controls. ## How many cases and controls are required if we select three ## controls per case? epi.sscc(OR = 3.0, p1 = NA, p0 = 0.60, n = NA, power = 0.80, r = 3, phi.coef = 0.2, design = 1, sided.test = 2, nfractional = FALSE, conf.level = 0.95, method = "matched", fleiss = FALSE) ## A total of 204 subjects need to be enrolled in the study: 51 cases and ## 153 controls. } \keyword{univar} epiR/man/rsu.sssep.rsfreecalc.Rd0000644000176200001440000001165113754670004016302 0ustar liggesusers\name{rsu.sssep.rsfreecalc} \alias{rsu.sssep.rsfreecalc} \title{ Sample size to achieve a desired surveillance system sensitivity to detect disease at a specified design prevalence assuming representative sampling, imperfect unit sensitivity and specificity } \description{ Calculates the sample size to achieve a desired surveillance system sensitivity to detect disease at a specified design prevalence assuming representative sampling, imperfect unit sensitivity and specificity .} \usage{ rsu.sssep.rsfreecalc(N, pstar, mse.p, msp.p, se.u, sp.u, method = "hypergeometric", max.ss = 32000) } \arguments{ \item{N}{scalar, integer representing the total number of subjects eligible to be sampled.} \item{pstar}{scalar, numeric, representing the design prevalence, the hypothetical outcome prevalence to be detected. See details, below.} \item{mse.p}{scalar, numeric (0 to 1) representing the desired population level sensitivity. See details, below.} \item{msp.p}{scalar, numeric (0 to 1) representing the desired population level specificity. See details, below.} \item{se.u}{scalar (0 to 1) representing the sensitivity of the diagnostic test at the surveillance unit level.} \item{sp.u}{scalar, numeric (0 to 1) representing the specificity of the diagnostic test at the surveillance unit level.} \item{method}{a character string indicating the calculation method to use. Options are \code{binomial} or \code{hypergeometric}.} \item{max.ss}{scalar, integer defining the maximum upper limit for required sample size.} } \details{ Type I error is the probabilty of rejecting the null hypothesis when in reality it is true. In disease freedom studies this is the situation where you declare a population as disease negative when, in fact, it is actually disease positive. Type I error equals \code{1 - SeP}. Type II error is the probabilty of accepting the null hypothesis when in reality it is false. In disease freedom studies this is the situation where you declare a population as disease positive when, in fact, it is actually disease negative. Type II error equals \code{1 - SpP}. Argument \code{pstar} can be expressed as either a proportion or integer. Where the input value for \code{pstar} is between 0 and 1 the function interprets \code{pstar} as a prevalence. Where the input value for \code{pstar} is an integer greater than 1 the function interprets \code{pstar} as the number of outcome-positive individuals in the population of individuals at risk. A value for design prevalence is then calculated as \code{pstar / N}. } \value{ A list comprised of two data frames: \code{summary} and \code{details}. Data frame \code{summary} lists: \item{n}{the minimum number of individuals to be sampled.} \item{N}{the total number of individuals eligible to be sampled.} \item{c}{the cut-point number of positives to achieve the specified surveillance system (population-level) sensitivity and specificity.} \item{pstar}{the design prevalence.} \item{p1}{the probability that the population has the outcome of interest at the specified design prevalence.} \item{se.p}{the calculated population level sensitivity.} \item{sp.p}{the calculated population level specificity.} Data frame \code{details} lists: \item{n}{the minimum number of individuals to be sampled.} \item{se.p}{the calculated population level sensitivity.} \item{sp.p}{the calculated population level specificity.} } \references{ Cameron A, Baldock C (1998a). A new probability formula for surveys to substantiate freedom from disease. Preventive Veterinary Medicine 34: 1 - 17. Cameron A, Baldock C (1998b). Two-stage sampling in surveys to substantiate freedom from disease. Preventive Veterinary Medicine 34: 19 - 30. Cameron A (1999). Survey Toolbox for Livestock Diseases --- A practical manual and software package for active surveillance of livestock diseases in developing countries. Australian Centre for International Agricultural Research, Canberra, Australia. } \examples{ ## EXAMPLE 1: ## A cross-sectional study is to be carried out to confirm the absence of ## brucellosis in dairy herds using a bulk milk tank test assuming a design ## prevalence of 0.05. Assume the total number of dairy herds in your study ## area is 5000 and the bulk milk tank test to be used has a diagnostic ## sensitivity of 0.95 and a specificity of 1.00. How many herds need to be ## sampled to be 95\% certain that the prevalence of brucellosis in dairy herds ## is less than the design prevalence if less than a specified number of ## tests return a positive result? rsu.sssep.rsfreecalc(N = 5000, pstar = 0.05, mse.p = 0.95, msp.p = 0.95, se.u = 0.95, sp.u = 0.98, method = "hypergeometric", max.ss = 32000)$summary ## A system sensitivity of 95\% is achieved with a total sample size of 194 ## herds, assuming a cut-point of 7 or more positive herds are required to ## return a positive survey result. } \keyword{univar} epiR/man/epi.kappa.Rd0000644000176200001440000002342014164036762014075 0ustar liggesusers\name{epi.kappa} \alias{epi.kappa} \title{Kappa statistic} \description{ Computes the kappa statistic and its confidence interval. } \usage{ epi.kappa(dat, method = "fleiss", alternative = c("two.sided", "less", "greater"), conf.level = 0.95) } \arguments{ \item{dat}{an object of class matrix comprised of \code{n} rows and \code{n} columns listing the individual cell frequencies.} \item{method}{a character string indicating the method to use. Options are \code{fleiss}, \code{watson}, \code{altman} or \code{cohen}.} \item{alternative}{a character string specifying the alternative hypothesis, must be one of \code{two.sided}, \code{greater} or \code{less}.} \item{conf.level}{magnitude of the returned confidence interval. Must be a single number between 0 and 1.} } \details{ Kappa is a measure of agreement beyond the level of agreement expected by chance alone. The observed agreement is the proportion of samples for which both methods (or observers) agree. The bias and prevalence adjusted kappa (Byrt et al. 1993) provides a measure of observed agreement, an index of the bias between observers, and an index of the differences between the overall proportion of `yes' and `no' assessments. Bias and prevalence adjusted kappa are only returned if the number of rows and columns of argument \code{dat} equal 2. Common interpretations for the kappa statistic are as follows: < 0.2 slight agreement, 0.2 - 0.4 fair agreement, 0.4 - 0.6 moderate agreement, 0.6 - 0.8 substantial agreement, > 0.8 almost perfect agreement (Sim and Wright, 2005). Confidence intervals for the proportion of observations where there is agreement are calculated using the exact method (Collett 1999). The argument \code{alternative = "greater"} tests the hypothesis that kappa is greater than 0. } \value{ Where the number of rows and columns of argument \code{dat} is greater than 2 a list containing the following: \item{prop.agree}{a data frame with \code{obs} the observed proportion of agreement and \code{exp} the expected proportion of agreement.} \item{pabak}{a data frame with the prevalence and bias corrected kappa statistic and the lower and upper bounds of the confidence interval for the prevalence and bias corrected kappa statistic.} \item{kappa}{a data frame with the kappa statistic, the standard error of the kappa statistic and the lower and upper bounds of the confidence interval for the kappa statistic.} \item{z}{a data frame containing the z test statistic for kappa and its associated P-value.} Where the number of rows and columns of argument \code{dat} is equal to 2 a list containing the following: \item{prop.agree}{a data frame with \code{obs} the observed proportion of agreement and \code{exp} the expected proportion of agreement.} \item{pindex}{a data frame with the prevalence index, the standard error of the prevalence index and the lower and upper bounds of the confidence interval for the prevalence index.} \item{bindex}{a data frame with the bias index, the standard error of the bias index and the lower and upper bounds of the confidence interval for the bias index.} \item{pabak}{a data frame with the prevalence and bias corrected kappa statistic and the lower and upper bounds of the confidence interval for the prevalence and bias corrected kappa statistic.} \item{kappa}{a data frame with the kappa statistic, the standard error of the kappa statistic and the lower and upper bounds of the confidence interval for the kappa statistic.} \item{z}{a data frame containing the z test statistic for kappa and its associated P-value.} \item{mcnemar}{a data frame containing the McNemar test statistic for kappa and its associated P-value.} } \references{ Altman DG, Machin D, Bryant TN, Gardner MJ (2000). Statistics with Confidence, second edition. British Medical Journal, London, pp. 116 - 118. Byrt T, Bishop J, Carlin JB (1993). Bias, prevalence and kappa. Journal of Clinical Epidemiology 46: 423 - 429. Cohen J (1960). A coefficient of agreement for nominal scales. Educational and Psychological Measurement 20: 37 - 46. Collett D (1999). Modelling Binary Data. Chapman & Hall/CRC, Boca Raton Florida, pp. 24. Dohoo I, Martin W, Stryhn H (2010). Veterinary Epidemiologic Research, second edition. AVC Inc, Charlottetown, Prince Edward Island, Canada, pp. 98 - 99. Fleiss JL, Levin B, Paik MC (2003). Statistical Methods for Rates and Proportions, third edition. John Wiley & Sons, London, 598 - 626. Rothman KJ (2012). Epidemiology An Introduction. Oxford University Press, London, pp. 164 - 175. Silva E, Sterry RA, Kolb D, Mathialagan N, McGrath MF, Ballam JM, Fricke PM (2007) Accuracy of a pregnancy-associated glycoprotein ELISA to determine pregnancy status of lactating dairy cows twenty-seven days after timed artificial insemination. Journal of Dairy Science 90: 4612 - 4622. Sim J, Wright CC (2005) The kappa statistic in reliability studies: Use, interpretation, and sample size requirements. Physical Therapy 85: 257 - 268. Watson PF, Petrie A (2010) Method agreement analysis: A review of correct methodology. Theriogenology 73: 1167 - 1179. } \note{ \tabular{llll}{ --------------- \tab --------------- \tab --------------- \tab ------------------ \cr \tab Obs1 + \tab Obs1 - \tab Total \cr --------------- \tab --------------- \tab --------------- \tab ------------------ \cr Obs 2 + \tab \code{a} \tab \code{b} \tab \code{a+b} \cr Obs 2 - \tab \code{c} \tab \code{d} \tab \code{c+d} \cr --------------- \tab --------------- \tab --------------- \tab ------------------ \cr Total \tab \code{a+c} \tab \code{b+d} \tab \code{a+b+c+d=N} \cr --------------- \tab --------------- \tab --------------- \tab ------------------ \cr } The kappa coefficient is influenced by the prevalence of the condition being assessed. A prevalence effect exists when the proportion of agreements on the positive classification differs from that of the negative classification. If the prevalence index is high (that is, the prevalence of a positive rating is very high or very low) chance agreement is also high and the value of kappa is reduced accordingly. The effect of prevalence on kappa is greater for large values of kappa than for small values (Byrt et al. 1993). Using the notation above, the prevalence index is calculated as \code{((a/N) - (d/N))}. Confidence intervals for the prevalence index are based on methods used for a difference in two proportions. See Rothman (2012, p 167 equation 9-2) for details. Bias is the extent to which raters disagree on the proportion of positive (or negative) cases. Bias affects interpretation of the kappa coefficient. When there is a large amount of bias, kappa is higher than when bias is low or absent. In contrast to prevalence, the effect of bias is greater when kappa is small than when it is large (Byrt et al. 1993). Using the notation above, the bias index is calculated as \code{((a + b)/N - (a + c)/N)}. Confidence intervals for the bias index are based on methods used for a difference in two proportions. See Rothman (2012, p 167 equation 9-2) for details. The McNemar test is used to test for the presence of bias. A statistically significant McNemar test (generally if P < 0.05) shows that there is evidence of a systematic difference between the proportion of `positive' responses from the two methods. If one method provides the `true values' (i.e., it is regarded as the gold standard method) the absence of a systematic difference implies that there is no bias. However, a non-significant result indicates only that there is no evidence of a systematic effect. A systematic effect may be present, but the power of the test may be inadequate to determine its presence. } \examples{ ## EXAMPLE 1: ## Kidney samples from 291 salmon were split with one half of the ## samples sent to each of two laboratories where an IFAT test ## was run on each sample. The following results were obtained: ## Lab 1 positive, lab 2 positive: 19 ## Lab 1 positive, lab 2 negative: 10 ## Lab 1 negative, lab 2 positive: 6 ## Lab 1 negative, lab 2 negative: 256 dat.m01 <- matrix(c(19,10,6,256), nrow = 2, byrow = TRUE) colnames(dat.m01) <- c("L1-pos","L1-neg") rownames(dat.m01) <- c("L2-pos","L2-neg") epi.kappa(dat.m01, method = "fleiss", alternative = "greater", conf.level = 0.95) ## The z test statistic is 11.53 (P < 0.01). We accept the alternative ## hypothesis that the kappa statistic is greater than zero. ## The proportion of agreement after chance has been excluded is ## 0.67 (95\% CI 0.56 to 0.79). We conclude that, on the basis of ## this sample, that there is substantial agreement between the two ## laboratories. ## EXAMPLE 2 (from Watson and Petrie 2010, page 1170): ## Silva et al. (2007) compared an early pregnancy enzyme-linked immunosorbent ## assay test for pregnancy associated glycoprotein on blood samples collected ## from lactating dairy cows at day 27 after artificial insemination with ## transrectal ultrasound (US) diagnosis of pregnancy at the same stage. ## The results were as follows: ## ELISA positive, US positive: 596 ## ELISA positive, US negative: 61 ## ELISA negative, US positive: 29 ## ELISA negative, Ul negative: 987 dat.m02 <- matrix(c(596,61,29,987), nrow = 2, byrow = TRUE) colnames(dat.m02) <- c("US-pos","US-neg") rownames(dat.m02) <- c("ELISA-pos","ELISA-neg") epi.kappa(dat.m02, method = "watson", alternative = "greater", conf.level = 0.95) ## The proportion of agreements after chance has been excluded is ## 0.89 (95\% CI 0.86 to 0.91). We conclude that that there is substantial ## agreement between the two pregnancy diagnostic methods. } \keyword{univar} epiR/man/epi.descriptives.Rd0000644000176200001440000000456314076572604015516 0ustar liggesusers\name{epi.descriptives} \alias{epi.descriptives} \title{Descriptive statistics } \description{ Computes descriptive statistics for a numeric vector or a table of frequencies for a factor. } \usage{ epi.descriptives(dat, conf.level = 0.95) } \arguments{ \item{dat}{either a numeric vector or a factor.} \item{conf.level}{magnitude of the returned confidence intervals. Must be a single number between 0 and 1.} } \value{ If \code{dat} is numeric a list containing the following: \item{arithmetic}{\code{n} number of observations, \code{mean} arithmetic mean, \code{sd} arithmetic standard deviation, \code{q25} 25th quantile, \code{q50} 50th quantile, \code{q75} 75th quantile, \code{lower} lower bound of the confidence interval, \code{upper} upper bound of the confidence interval, \code{min} minimum value, \code{max} maximum value, and \code{na} number of missing values.} \item{geometric}{\code{n} number of observations, \code{mean} geometric mean, \code{sd} geometric standard deviation, \code{q25} 25th quantile, \code{q50} 50th quantile, \code{q75} 75th quantile, \code{lower} lower bound of the confidence interval, \code{upper} upper bound of the confidence interval, \code{min} minimum value, \code{max} maximum value, and \code{na} number of missing values.} \item{symmetry}{\code{skewness} and \code{kurtosis}. } If \code{dat} is a factor a data frame listing: \item{level}{The levels of the factor} \item{n}{The frequency of the respective factor level, including the column totals.} } \examples{ ## EXAMPLE 1: ## Generate some data: id <- 1:100 n <- rnorm(100, mean = 0, sd = 1) dat.df01 <- data.frame(id, n) # Add missing values: missing <- dat.df01$id \%in\% sample(dat.df01$id, size = 20) dat.df01$n[missing] <- NA epi.descriptives(dat.df01$n, conf.level = 0.95) ## EXAMPLE 2: ## Generate some data: n <- 1000; p.exp <- 0.50; p.dis <- 0.75 strata <- c(rep("A", times = n / 2), rep("B", times = n / 2)) exp <- rbinom(n = n, size = 1, prob = p.exp) dis <- rbinom(n = n, size = 1, prob = p.dis) dat.df02 <- data.frame(strata, exp, dis) dat.df02$strata <- factor(dat.df02$strata) dat.df02$exp <- factor(dat.df02$exp, levels = c("1", "0")) head(dat.df02) epi.descriptives(dat.df02$exp, conf.level = 0.95) } \keyword{univar}% at least one, from doc/KEYWORDS \keyword{univar}% __ONLY ONE__ keyword per line epiR/DESCRIPTION0000644000176200001440000000353514166031357012673 0ustar liggesusersPackage: epiR Version: 2.0.41 Date: 2022-01-07 Title: Tools for the Analysis of Epidemiological Data Author: Mark Stevenson and Evan Sergeant with contributions from Telmo Nunes, Cord Heuer, Jonathon Marshall, Javier Sanchez, Ron Thornton, Jeno Reiczigel, Jim Robison-Cox, Paola Sebastiani, Peter Solymos, Kazuki Yoshida, Geoff Jones, Sarah Pirikahu, Simon Firestone, Ryan Kyle, Johann Popp, Mathew Jay, Charles Reynard, Allison Cheung, Nagendra Singanallur and Aniko Szabo. Maintainer: Mark Stevenson Description: Tools for the analysis of epidemiological and surveillance data. Contains functions for directly and indirectly adjusting measures of disease frequency, quantifying measures of association on the basis of single or multiple strata of count data presented in a contingency table, computation of confidence intervals around incidence risk and incidence rate estimates and sample size calculations for cross-sectional, case-control and cohort studies. Surveillance tools include functions to calculate an appropriate sample size for 1- and 2-stage representative freedom surveys, functions to estimate surveillance system sensitivity and functions to support scenario tree modelling analyses. Depends: R (>= 3.0.0), survival Imports: BiasedUrn, pander, methods, sf, lubridate Suggests: MASS (>= 3.1-20), knitr, rmarkdown, RColorBrewer, ggplot2, plyr, rgdal, scales, spData, spatstat, foreign, maptools, rgeos, mapproj, tidyverse, kableExtra VignetteBuilder: knitr License: GPL (>= 2) Packaged: 2022-01-07 09:54:17 UTC; Mark Stevenson URL: https://fvas.unimelb.edu.au/research/groups/veterinary-epidemiology-melbourne and https://www.ausvet.com.au/ RoxygenNote: 7.1.0 NeedsCompilation: no Repository: CRAN Date/Publication: 2022-01-07 12:32:47 UTC epiR/build/0000755000176200001440000000000014166006711012252 5ustar liggesusersepiR/build/vignette.rds0000644000176200001440000000056014166006711014612 0ustar liggesusersO0;($&F|25>_bL t6ie m"F-#Dz}zBtaøC!}2Xח %)1%V}J5vzoދ݊:ɒ5uIs5hWtӽ^ˍ2:?;`ˈdڈ}sι`ᅗ?s47>,b9U?F df}QU4DpdNepiR/vignettes/0000755000176200001440000000000014166006711013163 5ustar liggesusersepiR/vignettes/epiR_sample_size.bib0000644000176200001440000000160314123600014017120 0ustar liggesusers@article{bennett_et_al:1991, author = {Bennett, S and Woods, T and Liyanage, W and Smith, D}, title = {A simplified general method for cluster-sample surveys of health in developing countries}, journal = {World Health Statistics Quarterly}, volume = {44}, pages = {98 - 106}, year = {1991} } @article{otte_gumm:1997, author = {Otte, JM and Gumm, ID}, title = {Intra-cluster correlation coefficients of 20 infections calculated from the results of cluster-sample surveys}, journal = {Preventive Veterinary Medicine}, volume = {31}, pages = {147 - 150}, year = {1997} } @article{stevenson:2021, author = {Stevenson, MA}, title = {Sample size estimation in veterinary epidemiological research}, journal = {Frontiers in Veterinary Science}, volume = {7}, pages = {539573}, year = {2021}, type = {Journal Article} } epiR/vignettes/population_attributable_risk.png0000644000176200001440000007016414110621512021654 0ustar liggesusersPNG  IHDR1*gAMA asRGB cHRMz&u0`:pQ<bKGD pHYsodoiIDATxwxEwfZz{G@PPEĊE?{C(*MD@{o μĐ(BH{۽ٽwg2DB!v!\? !ćP'BAB|}B! !ćhPR!2B㬴=!v=ґkE*RS'>"0 W0d1APQ'd>*8̀G_dz :3>Y:uB,RGb1=B)ц\T ޞ7h,p8rܜ lŀ)U$3x1 !pC.2)!ا{ǯ8AcN:zh(o64hl3BU}ƹ۰gR) o!@1VaӧOY1f!<شiӧ~ZuTڙF!:4*`ecp(8= 7o^|ԩSn7}Bn$1~¦S){̚U(;p6@@(U ]PϸP-?+JJ.D=RRJo)… TC ̩f;,҅P1T*._5nKِG)}=,!<7{`-д6R 8kk K3eK?U0d*W!K4ZkzfZWRle "ħ! d%J&dr"^SMr:c $FS'dMJɺ}o~n>+&*rM!΁ OUKF+=v76+K] c9^j߫<5d69煣RJ24sf2uu@ Lhfm6[\\jzuZ,"=rHjjb)8l```*U.u( Yuv!!!vzO`̕{IٜBXpp50RשW uqAjR… #F8x`FX&M3qD???TDD?9rzJ~tx̙!C$&&bZ7n|-=Jk055ᄈg)4hмysMJ7#`ƍADUi׶mXR,_i )Q1)eӦM'L ?-KI'HܳgOzJƏf O<ѥKÇ6gNVz:+;UrC1[/)eFʕ+WBڵkϞ=q~mjj*pd4PZZڛo9lذ⇕R[NJ٤IrԬY߯_?(p>}cbbBBBbcc gϞ-tFG7hݺG.>!ZBp[jJ*###.FԩS6m:dp:P "j&HII)W\Z{|ZJC%z}YRוTJ=sp8NgJNpht鬬bNNJ*Jו3v+KuQ'aUT 5.̙3~~~w1za۷o&Mr\/O<ٲe˰0ȟ1vU¥K0-&)<""000((2ƽ'%%%##RJ6H!cLuP0K`gϞc,66T3!VT tSHr0o߾#FuOi6׮]u_s>~;\rS䰜=333::v.0ͻvڱcccc [_~ٿdr96i T!d:rHRRr@@\uy``޽{ϝ;׽{@YͤsػgGٔRBqϞ=&S:uTr)r9}4c졇2pԩG}d2M4F1._=7 L_7:u1#>oZNZ-Zo4M/VeŠ mQ M׳ZܤzzzzDDxӦMƉ3g,\0""m۶U=n+~齙999i&~*p_fDZcu D>!0Bɾ}߂ t]_,ŋ[hQcҤIF uq6-33a߿>@R~5?k֬Yg$eR~jfTl%V`v[ h6׬Yb L/èԊ[)xD2!>lHMMVnի=G~G&`%8>!7|ԩ^zEP0nߞ?~ɱ{Qp8vќ[xseeeoѢEQre˖w}J\qN5!!~v?/ב{.@܎;}ݺϚ5wݜsD0k#8e1jM:6ҋ}TJt骙8 ūh8cvgL#FŁ/-wgeeXMs_^c,Zb4o\%)r0Zq7mv6l(4 ƍ/zhh}W/_avڵ/GoܹiӦ)k׮x I4y$lqqM&Ǹ9?q"֪U+''tFFF6jhƍmڴu !_3;;ʤ=sޞm=YgtG6Z VZN]mMWPKS(Kdd'&V  Ubi޼ƭ[7lhr` uED?l:{[VN]sbټW( AwŊXjU!N[`΢EWڵk'$$\.c#u;+WBH)iӦ5k4i2C^{믿Lc*cqfx)%W9˗߸qc|||rn7&ݙq瀛kTKm֤^LUvs:C*]95wRJ3A~SMKLL̬TZngR9sF۵l~PKX sAhTJP ֩zӧOuRF)壏>ڮ]~ t:rn9Rʄjժ5o;W^WX1nܸK _|g}v.?xv8c999vVZẮ.Fĉ͛7GD@]~9˱;ޛlse︷gҭ..23ݛL~O>$pM&1~* b9s&11јspJ*{I ԩsn2.?ѩd2fd6kvJJJJJJXXXPPv+TvRqɴ#Rʕ  PJ15\&3N814-Q@:u <(1s'2p?jFT&+ A<("j>}zI`4=zN:]@A2"3gFy洴6mPN3z))ٳgTRyUV\Cu%00p߾}Ç?qą rrrڶmkxŲz7nܸ1<C0,ֻpr l84}Ħ ]s,T8CVѠn/5 zF< ر?d2u֭F#tҥK111111?rʕ+W.$("""ʕ+WPo$0^ !t]w\1 x%CE*,uKVW9z^DS26%:p/<.eYU׫:( pkk\/r@AF.S*(,^ ֿ-pNql/ d,|sMs'0Z[. Qxw)ϥ:"G Hݭo %cҍ%{]סO 2q9lI"/qڶ;rT 6^q"Q![6̔D:rST[3谳V8X=8Q'H9SnWe]k-kinFfvNS-G=EʊNU !7PJsH"n.(]Vip` @rSlk@Ar#BWnQ~ :.^ c%>!Įbꍝ:K)#J?J+JFY{w(Bqw!e"2\,d0DT߉JW fW ѭF6eeLvvs(ݻJLP'>sl`v" 2 /9Bۇ$ 2v0ޖ`;S(BO!>>! C(BO!>>! C(BO!>>! C(BO!>>! C(BO!>>! C(BO!>>! C(BO!>>! C1JR!`:p uP*&bRJqΗ/_?rQ)e2N<٩S+BzDKШ$pq]}]J KI>odF|~5"/#"c,===551f}!Ě5kLvM BT6RoV6iի'7D^zWz.\c BBBVZJUV^J**U*'t`&vp8\l/hu:B(:}ƹD0 ~`pnNhtηDUZ5Ƹ3'lwUAm= xE|B&J!qb<ûo%Bg^y`vˡT\XBNJg-nv5'RHBO178K"RBӈBf=!C(BO!>>! C(BO!>>! C(BO!>>! C(BO!>>! C(BO!>k21`ܤN-!r9-b`3Vک&x$romTnQP'oT4#"9_ud݉?|ɗ3'7>!xrQ)@L{7;|t3 g!yiv !\%}s(w/7);nͣoeyc5CkR!xAqNyv7ȵJ!Þ}mPNɛneTH`jv<[IG=7tsS>j(ujv!+]T_uo|3O>"16\BNEhY̐s\L!īӗ|T,TLMj˃[BVg\bPGOE9*ְ;S; [BZF25{Ir2SLBW؎ ܖ'Srʕ*UXr1 ߹xo:*C!^뢠$c7} g'={s܌1ECq ![Љܻm՘6ONO!^;KѥSՖcXrgV6+  R!{]ԐT<uN~cf_ /VƤ)1)BƊE8c*VXr帊+Vn8mnwBE%} qIMQm+b19mѥ+륤bjx!+]<B_v<ڪG5/ =DE!2;S,&J[dz.r;\.ݕ>w\B!WpgB 2lSfjm)»;-OOb>!x{0j7-_@")}Bpb>!x+o{pb呂O!^⠏ 2/$f\R kc\%(*2:D!eEA_J)4[7oˡDMPx+B2/4gn,Gh.0CDt_@?yB1Ur9+ MMD)ӊs$~{ݿ5}XC{xLCT |o\5?Fm݄h=c*[W (*MP-U?=}cAپ͈ SR<}T8YBpNe~bk(ҭ{׭nݝa;Jݶ~xRLǒU*QBH+>u^߮9Ld2L&0UNFn0ׂ3sYf 'ݳgޣA9O>w=GrH BJ\flԔ?&u#tpA@)kv7n0rЦߘUy dH)X˺}D~UjT !Sr=>]?oQRqt^UYq_5EDIώu?oM3G' 栾gZ5;`iܣc9P * !xgBзϏeZtJf j&xxIdٽf@bݺPRRLe }2p·5Q;QYBY"_\0\b@P!20Q! \h>>՝#=c FӣݺA*5r/X 4rhtwMRVgaߟd'V\i\%u\\I]*`Bzd0~([)5X̓]"ƘiWBoPϘh6e=u)OK&H`WZO8*Ņx%9׺Y!UB|2#o[{UNO)+Pc-آ7c3xClZTRsaÆݘY)iԩs)+W;xpcpo'%`dd*BDC3H90S DƖ/ktf/o4?w ho7 +܍cǎ;xE)%غud4i6v~ ~…b^2a4-99[4-GAkQ!~j.`"}dU Q}/qbQǶy94?MZ˨VZj (88xժULMnfŘB% N ![y{G1Ɠ|ֳ$H1}25 ſa\8'mr}yj5f7@{v[W3̝T"x Q)Y">!\ggq~O{b~AƑBH)d VRwLh[x$BhTH'otɠϸfinaB!e*`'o8 ,e}cAcr !x+$w {wזe<J5O!dy\]~e?y2 [ g~2B!Ex,+M~i=G;/M !ky첩,4mQ>yד[^&vgP3!/N1{>_S.{GIy/{4hwl^ꔏR$RTO!CG3.N-{I!u*X]n'8Gyו%r}\N- }~ŔKOUB>0Uhٺ goEss+Wr>!x/wP yU]W0C9m^xoxbԔK!PvE*6.]hw: bҁV;qǦ~ZrQMBZ{ X|wtmrP6:UEH`M!{GT̂w#%秏.?0酻MH=6 !kyc}'B7>2#"*}BZA4 ?oP}*~?UB488?~bFͯ0r//h@ 89g !kyg "@,o& ɖyjBJŃ>H bηB0=8/}ƫtUB*ސgikV(`&a2[K;BEA_IŅko%ztտ-C~Oڻ gê&$rQ)e|G=;8潣'O>~ӧ9k.n֪n5 !\\ga5Qo|0o^*\c'vQ'aJ"4qۛ#DՊ 2>!x'O#r ӪkXYwfO!K s@ *dxO!^RA_"pRGM@-B!^4 sҁ MZ3w}&R4$BR+y.'[3w[?BWP5 Mmq!x*IUSN:6MZdz{_oX9J"|BZrFf̐'..*x![yh]n\2sw?g+%u])BI!^CgLNݳVl6[̚\:b?!x)]6~麮21(!o& t=!B%FWRyzͺC!Asd.iq\Bv`pXP@W__zEDT*o0 I!jx28[uoͻ,>c_v]Ƙg6!!!aBr~=3W)EsG7M FBqU}GCdՊ6"2>o߾(!m6[i!x=O+X>e;w3 ǔ7 D%.u]ʝLKY˿QOKK;zcǎ=jq?B7~sLyd3sC)7*{>k)9Sf{g#:D_{O?`\f0Rh5'''3@!Wc8Wh9 n_0.+}ka[?z[;ޮ6h +4ZtK;!F؎ g)/=t[+OLx஧?#c>CKy8#}aO9r=yhU:r˞}G+W3yi%pʻ3 R%KGiBӨʆB';iO}>]~K2&wD/3Rol&Nr]yeSXzc߭ato[Ss*ZT @nܤQ5=!\ogT큯j)R*r3qn'A\ ;[ZK.*D% tb;)+_,BN;Tko[7\%:QT_'["_v{HHzXeSW4lRCGεh%T#H1R*ݹK4j}=SZQ+ǂ/~i kv5'lwzTHr !ܹ99N]4$O/Z }omuL/⠏ 2/$ϰ+)5r",+ıSAU*GB;c XCn1!_7ǝc,aҲpG3aO!1O kUP)-K;^pgBNS]6~<3gMC&ߛ2?@qF6 !&̌3$U=tZ@˺zFUo75>ڵϬo5\"!ė!^UEb7GtZro"w.Fußտz> S^=U?R'\=o}yۓ~&?|T 읠Bv:t/b6[ 6ITTpύ}9>q\ vLYޙ13)vv-!͝ԥ˶i(5Vֻ뽣WQ<ʔb W\}1*B۝puI-eG43gYG4&-"\ͯ\_d/X+􍇓rj(jwb\RXtj-Rp.s T!vI@~+t+# uyaMDL@Lƴ7Si'\e `|;@V ^RyK#B""2DȽ% }8}ɀ1,=pc9r8ӫ78?`Jۥ) ֟4jTGWכj(S0lo K=ťN5,7U!$\>R^Pq+?䏜sEB|BT,g X(2Vf ӕ,mݶ6KNc !3!#"`;TZτ@Dm~;1df=0/cˣ*'1`2Cǒ>TJ]_榝ZB'u+9pA8l5G|{+B <}%%b'{ }#b(A@V[noeQ h,BNFG%pV.x[=hRBVfd,v̴jሄB,%}@DT~ٟO-~ma0-S&RBN3ə~Z?kq]qTBH1w@: ~)6 !kM 0&Ɗ4% !Ke>}1_1J.O6l{,BZrQIi&MƸ1!<-΅FBnD !ćP'BAB|}B! !ćP'BAB|}B! !ćP'BAB|}B! !ćP'BAB|}B! !ćP'BAB|v7  cJ*dWZWJ)c)eig!GG^(+3ƅ9_t3 D'B!tGL0\ė\WoP;)l[،iCR Re˖-XsBDtԩ=zv^B+٠R !~O~թO'^5;!Mcʸg *Ν;h"B^ȑ#WJ4RB?|X:;չVf~С+r@T2IȍwIr$i\ x8 FA2:|忮ؿUd@:ۯ,6??TJ)j%7T',`Fg1&pw1_]n[UVYdžxBn(T'>/)I:7LfmGGdGY;A xR2nYvΣNU&^v?t*6r#u4^O~=s^g~ׯZ,B[8"Ar9,?ƀfօCA7_*8xƀG%*e_jwxL"|"+jC KiP'>iEW> ww;uRߏGg +Z#>o6C&[lfn21a6yuQD%X4zq7fyecŷH4T~u\<?na/wʃ ߂,L]AKݲ ߅fv5p!XiDB}2f:wmOֿ\ ] P~Yw;!WR2v5C, ̠ʕ&@#9T{8Y]1&.^cg D~(L= `\C@w~Rs s- 5ɴ٥RJa~0@)0!* "#!0XE3dQI&++z_VY:*~-jȁ]pftZl|ƉฆAcmucYB"!RYkds`ۧMv;vlӦMw5k,2eJHHŋ{h׮C=$|85j 0 }RjΘ1#::zڵ| ԩSgĉoܸz}wBq5h`߾}RB &M \`%Ke˖Fu_}zBB9u ӦMs݁=\xxUKQƔ)S`۷o0 ==_LLL4O>dZoo#br&MR:txţ>ڪU3g̜9tZ,)STTI!LY d,@'*'?^;Y:~He58.p_tg3_@klY?$@0@UV̙3s޸qq /pa{w.҄Hueŧm䧖J»$Vמ5_|-[}Ym2srrN???Mt]9g9N &fnuփ~x[MJn#2Eޝ_A3.#3/SĕhУ/t+LSB-L\9/nZf3/pݹ3p\.0VrrrGuf2999P)ARvJOVu:./=W^^2D[jj杵~G0Jp4$$@Zq8VfI>Y3- NiF),p8@_bX +E.KŸk6-̙SN=f25ss,~Y&_Vޏⲿ,h.n$eV.&;;GD^}_B?};WhLҁ1H=0f|3ƀr#ƘqM4-((".8aƣ-Bߨm/&,tϏ|11v\|V .x\ 1q4-00fɂL+ɔwVՈP@Q$+:Y?7000,,Lp)^RWBJCBِpy<…pvs*iENVAf|QMEU//ŸkAEek_4^<  fshps{7yqAcE+pAjHs^T~ChK-h/wQ ֺ.9ץԽ/t)u4{~ |ySbcay;/no ܸ!I dB!}B! !ćP'BAB|}B! !ćP'BAB|}B! !ćP'BAB|}B! !ćP'BA2Q!%V"l0V2VЕ2o-CP+FR@AAJ` v` N aQ'l D7Z5 @+GFRI\% 6!4O>^u`:-@ȕOHAV??Df+GA&%w9 M@*P"+yy(j%W>!eCF@߾pa^OqTC}B!1@)f(O\5 J"hh$%FRPq$ C(BO!>>! C(BO!>>! C(BO!>>! C(BO!>>!Zc%eě!xOudƺf3VtE\RfLe R'oponHNsX,$E 8HphQ(eRhf!c5sٵ.q΅s!R1$+9 W"k$'yiiIIyH>VE7.W&S_8 >[ɳTx KueuwNؙqbO)]+"K2&QJG`t:u nK ReY_U22\.'z麮iZn]l)̗kEJitnu 믿_4 "5 ~}WFq݈SprJN%JdgB N&[=eT4*пQ.e tG@d2gOyjWsΕ-@C^({tsYYY RU2ҨQcV q^J3'Vz Cc|UffF.V"9:{l\\"c6kW[Om9 ~~ 8/N'p~5ꦦhf >=ӵ +"sR "h-KS9>Q7L\$2(8848884880*{& PpREw+R%}֚ZkI9sJ-{Bx "7?SyJ61@FެC+bE¨r==d{RJD`B:K!sbbbJ;!>v'''/_:t$%8}gϞ` Ru BJ=!t(ot* !ćPIB|}B! !ćP'BAB|ȍ@ϵJ"S!6ER1Hn *DtMz)C#GT Ax(RL0u$B%!c rfUu1@)QI&Dz(u)*]*(%=ƊnJ sC++!DZ8pP4T8<{tV@Z1J8*ܘ]Pe5*]SJu)CSqʊq^P:;ey`Ȫ W2D`̝ySU<3eQ"g\=g[k^IPpu~kLBwL?SJI u \8RNIӚԪTR!aEI \p0 JJLh()q*qJcys/) YIࢋM&'&*_j=oJw#*KRw UMޤ녶p_oJGD~Jꈨq ˡ#7:MoGD,k8e*bw;eT(SѦٗwʌkʗ+?`^˿7rй)v'۽׾b>:?}oon$ԉO| B *JN,sFջ+K5B%1q.Q I]8.A6ik?t0F=LrD[ 2& EQ~uʛ=?֨\)&:]7Rw@zF$Ʒ`rC`φ gmOfͺV=!0 WJ1G--*m4o`L*\G9wܬ-5"c٧-^EӛLرC2A¤!n9boݾZ &դ av7OôPPp\˶iۥSsl؉Aq5X}@p 0?p [m\LYT%tQMIZdGRNdvZkWm?۸]ۈȘ`~>}v pxF_dumc`B,+ io?0vVn^+paݎ b_۩@Ÿ9cC`[k s*muj_-*Og3Wm8[SP*&Dơ-8&kޯ`iJhQ2& OmqBM"0?[GGOto2enD3gB۟ONrIw)moVkoؾ[ $#bhh:@-ma1bzyZj Jͳ1AtӪG;L舨ѾLJ;Rqy8gLpd3< 1gc-VA;auȘve%93m<ּF]]G;WBzrB:=}PH\uFGL~utWc->[hWG}dThiٳ{sE/3y׼SW_~<ߧ~r-7nװ&0jוֹG,zv?M\Ep3_I9;mA G%bɷZ:>pϸ4v_O ?L   ,8~{z!Vsб_Z?]֤͎0;O:o/"~9&{VN̂GW?τ ʯ﫴sCj)<(+< E@GSd['OEG+T_?_@L9 򑻇X;jɩ庞v`Ϗ}o'r42Pkx3}8eÈްlGʦo>1mY"lL|{ʜ^ ?#bD\YuUBl{WV1 ظ惟 ^Ã٧_I OTvmJ__8/3ܗ:=bZƁϾ퍈hTeo}݃nDL^5YȜϟsqĦן`+g2y;3y#{ՙ jE]{4ydnu!}aisX(((,,ȸS@ MH/*zY,sM^;7};g}j'>{nmMl[g/5C=_Ac^oѯ̜nʮvqOQsnP6$=ޥvSe:WcҐ7-|4_#Y=_~0(zGT[Qޯ҉sTlfJݦ hd}!M:f |vR4q[GG4^B#dݾ6up}{݈XqwaJVmT5wXF~Is6 e0pX@om_ϝ{d׮*lG?y(`2 88j~zбG?iydm#k})ͻF;c=3cӜH2# xjcO~q'~l mHzuQny; P건֚-)-o=T}}B1vSCw,Kuŗiѝ*r;(.l?6ؾjab.gvN.0<}&olOҒrv̏:}vX//tڳ_GJŴu+tZ$0$?8jpt̠Nf.D͜UٴOǚ %2Qۏ̮[ӠyY9` ̙P+2.8rߏQckۆD n/rc 4]e)o_Ew-zͣܽ;p`qG83sv#R:fo 8wFnw19{['m*Fv1(ZHcf)tj&󓧟_xFB܈@)DƬf;+Hr i*3 \A,̑`e)/Fkrdlyjöu'ʅYEcfgHOL99YԴ4'[͚=/)^M7_M-lkZvg;n\lʙ뎪w<fw-[6lܼe˖-[vK莐gxpO^k?3{թa dۿn߱c'Z^/;ݔ\njm\UM3=bر[tVY'G޾}Ʈ؟gxm5$ڤv;ϟ9-P@"0)n\?6f:"81O~f4kC?x-[cݡNuWWm߱uf}raN}n|̘-k':n M;_ևƎۿߞ-j?1z>~?fY$L"[lٸɸ:%8;i~h4}CXoMm˶m&M]ҽG&͚ ܱnckkg{"3nM3&jI:a=߸Sgo_ʸO]9AQ=ivX^NLbz/<퇭۶-8Tk01)m߶o_9GGԹVHN@c<6jpm9AOO_C漩գˇz;˷o2vLgu;8 ;vX-Jg3o1s㠑cnGLpҭ[Ukսy ²[ߵet ~ >X~Q#W[Au3r躣Y_?x,Qko{Ĵë5 t[ћr׼|5_mܮ=ߴbD>uaKu;>f7"̟g?5?+QGt/ytG""nqf̼J)mCTf?묕FM)uYw61x!w֩}_<ֵDtw$s#cw}z-Őw0xۆ nڔ-!4W YV5&DЙng"(*–%#1=5:2$?4;J3",k W@%bC(ۇH&%, oJ=8kPJʨ™Oّq}Z~!J`gC!,>SƸow߬󾯾z()(E3p@@(-]FC^Q)V0Xg(%Gxդs\':*P^ﮈ,1n91hPD D.h&xyW4/tc5ηo2~ d,x `"/"bAzY71ރU)cH 2vS%ru"? vο 堑йɋgsk8XcJj1xd} ]4\BgY`\{ȴ "oBRpƦJC?CJ-ӋuE u]5i7*B>! C(BO!>>! C(BO!>>! C(BIi1?+qV6gG%<!DMF!>J3LJ;8eP)$Կ :2!8SR,Ƒ'sc 1@TJ)cU c;g T\@ )eਅ(_ =CC3ydeWR2 &8P NOUH_   gJ"3fd!isƪC,oȫra bL#ЄsiG-0ؑ,Lc翦 ܳjޟ8cW0΍>ƒ;!Jꈈ'#>~OAď6|~#7ծSg$FGn\N~ީP+e> k݈֬x#>]{1mcߝG߬]Qf5k֨5QJJJD<[k׬Y3_mBT5}Tt$9[Zji_{V~qq͗Ӟ7Dq q0u@#a[ڷ|{ sХd +F{b[oOZ؂4me:xgBZ㎎GlشvM+Gd}.8xSYuot[;|7a%3t[y5TOJ פo CݸwֶOϟ'.I{; o>otΗ  \P= ۇިÄN߯>r[z S b5q@fJ~=.O-6 /V.% c{s|GլlbSynty7FT_3;N7Gy۶|4Os 7,2}~ǀFn Ȩ)]tR[o3 Ξ=鄾D-V͛"b. [\ٲq3Ȍ?w&~6Bn*7tBO-57ܨ1fD5ʭϜ]6QQQQէՄv GDS[-2{xTTN]J͵_8oWglUʗwhLGLItӝn# nbVYI=QQ:5'*'>>) 4M .M랸81\6c-+Vܵ~Ϲw;NeH ~mID<|W l?y6d2Xsj`@2nlQCCGҁ(cReV@."⇝=5k\Ǐ鸝1AY-&Jm&wK #)ADGӯq1ƘRz`'qx& b׿q'RU|1k!v~s[ =~Ά1&ضKC߀ZڳXI+t52}_&NRۑ?֏8?v~+zH8sNN qa&/uKU6Fj雗JNpA_yܞZlC5k>XמV bz> zn$O^i (3t'M>t].tauv'UHp/bjl?7 j+MwW{?Y=`֮ޱtw|bߛj'l8x6^lXZEGr_s)wT뚝Yv]/}5y@]DŅwt/&fk5]9r<]lw)!*BO!>!PMR%tEXtdate:create2017-05-01T13:30:21+10:00Wd%tEXtdate:modify2017-05-01T13:27:27+10:00ވ[IENDB`epiR/vignettes/epiR_measures_of_association.bib0000644000176200001440000000420214120350772021521 0ustar liggesusers@article{altman:1998, author = {Altman, DG}, title = {Confidence intervals for the number needed to treat}, journal = {British Medical Journal}, volume = {317}, pages = {1309 - 1312}, year = {1998} } @article{grimes_schulz:2008, author = {Grimes, DA and Schulz, KF}, title = {Making sense of odds and odds ratios}, journal = {Obstetrics and Gynecology}, volume = {111}, pages = {423 - 426}, year = {2008} } @book{hosmer_lemeshow:2000, author = {Hosmer, DW and Lemeshow, S}, title = {{Applied Logistic Regression}}, publisher = {Jon Wiley and Sons Inc}, address = {London}, note = {hosmer_lemeshow:1999 ProCite field[38]: 137 - 152}, year = {2000} } @article{kuritz_et_al:1988, author = {Kuritz, SJ and Landis, JR and Koch, GG }, title = {{A general overview of Mantel-Haenszel methods: Applications and recent developments}}, journal = {Annual Reviews in Public Health}, volume = {9}, pages = {123 - 160}, year = {1988} } @book{porta:2014, author = {Porta, M and Greenland, S and Last, JM}, title = {{A Dictionary of Epidemiology}}, publisher = {Oxford University Press}, address = {London}, year = {2014} } @article{prasad_et_al:2007, author = {Prasad, K and Jaeschke, R and Wyer, P and Keitz, S and Guyatt, G}, title = {{Tips for teachers of evidence-based medicine: Understanding odds ratios and their relationship to risk ratios}}, journal = {Journal of General Internal Medicine}, volume = {23}, number = {5}, pages = {635 - 640}, year = {2007} } @article{siegerink_rohmann:2018, author = {Siegerink, B and Rohmann, JL}, title = {{Impact of your results: Beyond the relative risk}}, journal = {Research and Practice in Thrombosis and Haemostasis}, volume = {2}, pages = {653 - 657}, year = {2018} } @article{willeberg:1977, author = {Willeberg, P}, title = {{Animal disease information processing: Epidemiologic analyses of the feline urologic syndrome}}, journal = {Acta Veterinaria Scandinavica}, volume = {64}, pages = {1 - 48}, year = {1977} } epiR/vignettes/epiR_descriptive.Rmd0000644000176200001440000005251014153272044017132 0ustar liggesusers--- title: "Descriptive Epidemiology using epiR" author: "Mark Stevenson" date: "`r Sys.Date()`" bibliography: epiR_descriptive.bib link-citations: yes output: knitr:::html_vignette: toc: true vignette: > %\VignetteEngine{knitr::rmarkdown} %\VignetteIndexEntry{Descriptive epidemiology} %\usepackage[utf8]{inputenc} --- \setmainfont{Calibri Light} ```{r, echo = FALSE, message = FALSE} # If you want to create a PDF document paste the following after line 9 above: # pdf_document: # toc: true # highlight: tango # number_sections: no # latex_engine: xelatex # header-includes: # - \usepackage{fontspec} knitr::opts_chunk$set(collapse = T, comment = "#>") options(tibble.print_min = 4L, tibble.print_max = 4L) ``` Epidemiology is the study of the frequency, distribution and determinants of health-related states in populations and the application of such knowledge to control health problems [@cdc:2006]. This vignette provides instruction on the way R and `epiR` can be used for descriptive epidemiological analyses, that is, to describe how the frequency of disease varies by individual, place and time. ## Indivdual Descriptions of disease frequency involves reporting either the **prevalence** or **incidence** of disease. Some definitions. Strictly speaking, 'prevalence' equals the number of cases of a given disease or attribute that exists in a population at a specified point in time. Prevalence risk is the proportion of a population that has a specific disease or attribute at a specified point in time. Many authors use the term 'prevalence' when they really mean prevalence risk, and these notes will follow this convention. Two types of prevalence are reported in the literature: (1) **point prevalence** equals the proportion of a population in a diseased state at a single point in time, (2) **period prevalence** equals the proportion of a population with a given disease or condition over a specific period of time (i.e. the number of existing cases at the start of a follow-up period plus the number of incident cases that occur during the follow-up period). Incidence provides a measure of how frequently susceptible individuals become disease cases as they are observed over time. An incident case occurs when an individual changes from being susceptible to being diseased. The count of incident cases is the number of such events that occur in a population during a defined follow-up period. There are two ways to express incidence: **Incidence risk** (also known as cumulative incidence) is the proportion of initially susceptible individuals in a population who become new cases during a defined follow-up period. **Incidence rate** (also known as incidence density) is the number of new cases of disease that occur per unit of individual time at risk during a defined follow-up period. In addition to reporting the point estimate of disease frequency, it is important to provide an indication of the uncertainty around that point estimate. The `epi.conf` function in the `epiR` package allows you to calculate confidence intervals for prevalence, incidence risk and incidence rates. Let's say we're interested in the prevalence of disease X in a population comprised of 1000 individuals. Two hundred are tested and four returned a positive result. Assuming 100% test sensitivity and specificity, what is the estimated prevalence of disease X in this population? ```{r message = FALSE} library(epiR); library(ggplot2); library(scales) ncas <- 4; npop <- 200 tmp <- as.matrix(cbind(ncas, npop)) epi.conf(tmp, ctype = "prevalence", method = "exact", N = 1000, design = 1, conf.level = 0.95) * 100 ``` The estimated prevalence of disease X in this population is 2.0 (95% confidence interval [CI] 0.55 -- 5.0) cases per 100 individuals at risk. Another example. A study was conducted by @feychting_et_al:1998 to report the frequency of cancer among the blind. A total of 136 diagnoses of cancer were made from 22,050 person-years at risk. What was the incidence rate of cancer in this population? ```{r} ncas <- 136; ntar <- 22050 tmp <- as.matrix(cbind(ncas, ntar)) epi.conf(tmp, ctype = "inc.rate", method = "exact", N = 1000, design = 1, conf.level = 0.95) * 1000 ``` The incidence rate of cancer in this population was 6.2 (95% CI 5.2 to 7.3) cases per 1000 person-years at risk. Now lets say we want to compare the frequency of disease across several populations. An effective way to do this is to use a ranked error bar plot. With a ranked error bar plot the points represent the point estimate of the measure of disease frequency and the error bars indicate the 95% confidence interval around each estimate. The disease frequency estimates are then sorted from lowest to highest. Generate some data. First we'll generate a distribution of disease prevalence estimates. Let's say it has a mode of 0.60 and we're 80% certain that the prevalence is greater than 0.35. Use the `epi.betabuster` function to generate parameters that can be used for a beta distribution to satisfy these constraints: ```{r} tmp <- epi.betabuster(mode = 0.60, conf = 0.80, greaterthan = TRUE, x = 0.35, conf.level = 0.95, max.shape1 = 100, step = 0.001) tmp$shape1; tmp$shape2 ``` Now take 100 draws from a beta distribution using the `shape1` and `shape2` values calculated above and plot them as a frequency histogram: ```{r dfreq01-fig, warnings = FALSE, echo = TRUE, fig.cap="\\label{fig:dfreq01}Frequency histogram of disease prevalence estimates for our simulated population."} dprob <- rbeta(n = 25, shape1 = tmp$shape1, shape2 = tmp$shape2) dat.df <- data.frame(dprob = dprob) ggplot(data = dat.df, aes(x = dprob)) + theme_bw() + geom_histogram(binwidth = 0.01, colour = "gray", fill = "dark blue", size = 0.1) + scale_x_continuous(limits = c(0,1), name = "Prevalence") + scale_y_continuous(limits = c(0,10), name = "Number of draws") ``` Generate a vector of population sizes using the uniform distribution. Calculate the number of diseased individuals in each population using `dprob` (calculated above). Finally, calculate the prevalence of disease in each population and its 95% confidence interval using `epi.conf`. The function `epi.conf` provides several options for confidence interval calculation methods for prevalence. Here we'll use the exact method: ```{r} dat.df$rname <- paste("Region ", 1:25, sep = "") dat.df$npop <- round(runif(n = 25, min = 20, max = 1500), digits = 0) dat.df$ncas <- round(dat.df$dprob * dat.df$npop, digits = 0) tmp <- as.matrix(cbind(dat.df$ncas, dat.df$npop)) tmp <- epi.conf(tmp, ctype = "prevalence", method = "exact", N = 1000, design = 1, conf.level = 0.95) * 100 dat.df <- cbind(dat.df, tmp) head(dat.df) ``` Sort the data in order of variable `est` and assign a 1 to `n` identifier as variable `rank`: ```{r} dat.df <- dat.df[sort.list(dat.df$est),] dat.df$rank <- 1:nrow(dat.df) ``` Now create a ranked error bar plot. Because its useful to provide the region-area names on the horizontal axis we'll rotate the horizontal axis labels by 90 degrees. ```{r dfreq02-fig, warnings = FALSE, echo = TRUE, fig.cap="\\label{fig:dfreq02}Ranked error bar plot showing the prevalence of disease (and its 95% confidence interval) for 100 population units."} ggplot(data = dat.df, aes(x = rank, y = est)) + theme_bw() + geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.1) + geom_point() + scale_x_continuous(limits = c(0,25), breaks = dat.df$rank, labels = dat.df$rname, name = "Region") + scale_y_continuous(limits = c(0,100), name = "Cases per 100 individuals at risk") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) ``` ## Time Epidemic curve data are often presented in one of two formats: 1. One row for each individual identified as a case with an event date assigned to each. 2. One row for every event date with an integer representing the number of cases identified on that date. Generate some data, with one row for every individual identified as a case: ```{r} n.males <- 100; n.females <- 50 odate <- seq(from = as.Date("2004-07-26"), to = as.Date("2004-12-13"), by = 1) prob <- c(1:100, 41:1); prob <- prob / sum(prob) modate <- sample(x = odate, size = n.males, replace = TRUE, p = prob) fodate <- sample(x = odate, size = n.females, replace = TRUE) dat.df <- data.frame(sex = c(rep("Male", n.males), rep("Female", n.females)), odate = c(modate, fodate)) # Sort the data in order of odate: dat.df <- dat.df[sort.list(dat.df$odate),] ``` Plot the epidemic curve using the `ggplot2` and `scales` packages: ```{r epicurve01-fig, warnings = FALSE, echo = TRUE, fig.cap="\\label{fig:epicurve01}Frequency histogram showing counts of incident cases of disease as a function of calendar date, 26 July to 13 December 2004."} ggplot(data = dat.df, aes(x = as.Date(odate))) + theme_bw() + geom_histogram(binwidth = 7, colour = "gray", fill = "dark blue", size = 0.1) + scale_x_date(breaks = date_breaks("7 days"), labels = date_format("%d %b"), name = "Date") + scale_y_continuous(breaks = seq(from = 0, to = 20, by = 2), limits = c(0,20), name = "Number of cases") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) ``` Produce a separate epidemic curve for males and females using the `facet_grid` option in `ggplot2`: ```{r epicurve03-fig, warnings = FALSE, echo = TRUE, fig.cap="\\label{fig:epicurve03}Frequency histogram showing counts of incident cases of disease as a function of time, 26 July to 13 December 2004, conditioned by sex."} ggplot(data = dat.df, aes(x = as.Date(odate))) + theme_bw() + geom_histogram(binwidth = 7, colour = "gray", fill = "dark blue", size = 0.1) + scale_x_date(breaks = date_breaks("1 week"), labels = date_format("%d %b"), name = "Date") + scale_y_continuous(breaks = seq(from = 0, to = 20, by = 2), limits = c(0,20), name = "Number of cases") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + facet_grid( ~ sex) ``` Let's say an event occurred on 31 October 2004. Mark this date on your epidemic curve using `geom_vline`: ```{r epicurve04-fig, warnings = FALSE, echo = TRUE, fig.cap="\\label{fig:epicurve04}Frequency histogram showing counts of incident cases of disease as a function of time, 26 July to 13 December 2004, conditioned by sex. An event that occurred on 31 October 2004 is indicated by the vertical dashed line."} ggplot(data = dat.df, aes(x = as.Date(odate))) + theme_bw() + geom_histogram(binwidth = 7, colour = "gray", fill = "dark blue", size = 0.1) + scale_x_date(breaks = date_breaks("1 week"), labels = date_format("%d %b"), name = "Date") + scale_y_continuous(breaks = seq(from = 0, to = 20, by = 2), limits = c(0,20), name = "Number of cases") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + facet_grid( ~ sex) + geom_vline(aes(xintercept = as.numeric(as.Date("31/10/2004", format = "%d/%m/%Y"))), linetype = "dashed") ``` Plot the total number of disease events by day, coloured according to sex: ```{r epicurve05-fig, warnings = FALSE, echo = TRUE, fig.cap="\\label{fig:epicurve05}Frequency histogram showing counts of incident cases of disease as a function of time, 26 July to 13 December 2004, grouped by sex."} ggplot(data = dat.df, aes(x = as.Date(odate), group = sex, fill = sex)) + theme_bw() + geom_histogram(binwidth = 7, colour = "gray", size = 0.1) + scale_x_date(breaks = date_breaks("1 week"), labels = date_format("%d %b"), name = "Date") + scale_y_continuous(breaks = seq(from = 0, to = 20, by = 2), limits = c(0,20), name = "Number of cases") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + geom_vline(aes(xintercept = as.numeric(as.Date("31/10/2004", format = "%d/%m/%Y"))), linetype = "dashed") + scale_fill_manual(values = c("#d46a6a", "#738ca6"), name = "Sex") + theme(legend.position = c(0.90, 0.80)) ``` It can be difficult to appreciate differences in male and female disease counts as a function of date with the above plot format so dodge the data instead: ```{r epicurve06-fig, warnings = FALSE, echo = TRUE, fig.cap="\\label{fig:epicurve06}Frequency histogram showing counts of incident cases of disease as a function of time, 26 July to 13 December 2004, grouped by sex."} ggplot(data = dat.df, aes(x = as.Date(odate), group = sex, fill = sex)) + theme_bw() + geom_histogram(binwidth = 7, colour = "gray", size = 0.1, position = "dodge") + scale_x_date(breaks = date_breaks("1 week"), labels = date_format("%d %b"), name = "Date") + scale_y_continuous(breaks = seq(from = 0, to = 20, by = 2), limits = c(0,20), name = "Number of cases") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + geom_vline(aes(xintercept = as.numeric(as.Date("31/10/2004", format = "%d/%m/%Y"))), linetype = "dashed") + scale_fill_manual(values = c("#d46a6a", "#738ca6"), name = "Sex") + theme(legend.position = c(0.90, 0.80)) ``` We now provide code to deal with the situation where the data are presented with one row for every case event date and an integer representing the number of cases identified on each date. Simulate some data in this format. In the code below the variable `ncas` represents the number of cases identified on a given date. The variable `dcontrol` is a factor with two levels: `neg` and `pos`. Level `neg` flags dates when no disease control measures were in place; level `pos` flags dates when disease controls measures were in place. ```{r} odate <- seq(from = as.Date("1/1/00", format = "%d/%m/%y"), to = as.Date("1/1/05", format = "%d/%m/%y"), by = "1 month") ncas <- round(runif(n = length(odate), min = 0, max = 100), digits = 0) dat.df <- data.frame(odate, ncas) dat.df$dcontrol <- "neg" dat.df$dcontrol[dat.df$odate >= as.Date("1/1/03", format = "%d/%m/%y") & dat.df$odate <= as.Date("1/6/03", format = "%d/%m/%y")] <- "pos" head(dat.df) ``` Generate an epidemic curve. Note `weight = ncas` in the aesthetics argument for `ggplot2`: ```{r epicurve07-fig, warnings = FALSE, echo = TRUE, fig.cap="\\label{fig:epicurve07}Frequency histogram showing counts of incident cases of disease as a function of time, 1 January 2000 to 1 January 2005. Colours indicate the presence or absence of disease control measures."} ggplot() + theme_bw() + geom_histogram(dat.df, mapping = aes(x = odate, weight = ncas, fill = factor(dcontrol)), binwidth = 60, colour = "gray", size = 0.1) + scale_x_date(breaks = date_breaks("6 months"), labels = date_format("%b %Y"), name = "Date") + scale_y_continuous(limits = c(0,200), name = "Number of cases") + scale_fill_manual(values = c("#738ca6","#d46a6a")) + guides(fill = "none") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) ``` Now we'll add a line to the plot to show the cumulative number of cases detected as a function of calendar date. The coding here requires some thought. First question: What was the cumulative number of cases at the end of the follow-up period? Here we use the `cumsum` (cumulative sum) function in base R: ```{r} cumsum(dat.df$ncas) ``` At the end of the follow-up period the cumulative number of cases was in the order of 3100 (exact numbers will vary because we've used a simulation approach to generate this data). What we need to do is to get our 0 to 3100 cumulative cases to 'fit' into the 0 to 200 vertical axis limits of the epidemic curve. A reasonable approach would be to: (1) divide cumulative case numbers by 10; (2) set 350 as the upper limit of the vertical axis; and (3) set `sec.axis = sec_axis(~ . * 10)` to multiply the values that appear on the primary vertical axis by 10 for the labels that appear on the secondary vertical axis: ```{r epicurve08-fig, warnings = FALSE, echo = TRUE, fig.cap="\\label{fig:epicurve08}Frequency histogram showing counts of incident cases of disease as a function of time, 1 January 2000 to 1 January 2005. Colours indicate the presence or absence of disease control measures. Superimposed on this plot is a line showing cumulative case numbers."} ggplot() + theme_bw() + geom_histogram(data = dat.df, mapping = aes(x = odate, weight = ncas, fill = factor(dcontrol)), binwidth = 60, colour = "gray", size = 0.1) + geom_line(data = dat.df, mapping = aes(x = odate, y = cumsum(ncas) / 10)) + scale_x_date(breaks = date_breaks("6 months"), labels = date_format("%b %Y"), name = "Date") + scale_y_continuous(limits = c(0,350), name = "Number of cases", sec.axis = sec_axis(~ . * 10, name = "Cumulative number of cases")) + scale_fill_manual(values = c("#738ca6","#d46a6a")) + guides(fill = "none") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) ``` ## Place Two types of maps are often used when describing patterns of disease by place: 1. Choropleth maps. Choropleth mapping involves producing a summary statistic of the outcome of interest (e.g. count of disease events, prevalence, incidence) for each component area within a study region. A map is created by 'filling' (i.e. colouring) each component area with colour, providing an indication of the magnitude of the variable of interest and how it varies geographically. 2. Point maps. **Choropleth maps** For illustration we make a choropleth map of sudden infant death syndrome (SIDS) babies in North Carolina counties for 1974 using the `nc.sids` data provided with the `spData` package. ```{r message = FALSE, warning = FALSE} library(sf); library(spData); library(rgdal); library(plyr); library(RColorBrewer); library(spatstat) ncsids.sf <- st_read(dsn = system.file("shapes/sids.shp", package = "spData")[1]) ncsids.sf <- ncsids.sf[,c("BIR74","SID74")] head(ncsids.sf) ``` The `ncsids.sf` simple features object lists for each county in the North Carolina USA the number SIDS deaths for 1974. Plot a choropleth map of the counties of the North Carolina showing SIDS counts for 1974: ```{r spatial01-fig, warnings = FALSE, echo = TRUE, fig.cap="\\label{fig:spatial01}Map of North Carolina, USA showing the number of sudden infant death syndrome cases, by county for 1974."} ggplot() + theme_bw() + geom_sf(data = ncsids.sf, aes(fill = SID74), colour = "dark grey") + scale_fill_gradientn(limits = c(0,60), colours = brewer.pal(n = 5, "Reds"), guide = "colourbar") + scale_x_continuous(name = "Longitude") + scale_y_continuous(name = "Latitude") + labs(fill = "SIDS 1974") ``` **Point maps** For this example we will used the `epi.incin` data set included with `epiR`. Between 1972 and 1980 an industrial waste incinerator operated at a site about 2 kilometres southwest of the town of Coppull in Lancashire, England. Addressing community concerns that there were greater than expected numbers of laryngeal cancer cases in close proximity to the incinerator @diggle:1990 conducted a study investigating risks for laryngeal cancer, using recorded cases of lung cancer as controls. The study area is 20 km x 20 km in size and includes location of residence of patients diagnosed with each cancer type from 1974 to 1983. Load the `epi.incin` data set and create negative and positive labels for each point location. We don't have a boundary map for these data so we'll use `spatstat` to create a convex hull around the points and dilate the convex hull by 1000 metres as a proxy boundary. The point locations in this data are projected using the British National Grid coordinate reference system (EPSG code 27700). Create an observation window for the data as `coppull.ow` and a `ppp` object for plotting: ```{r message = FALSE} data(epi.incin); incin.df <- epi.incin incin.df$status <- factor(incin.df$status, levels = c(0,1), labels = c("Neg", "Pos")) names(incin.df)[3] <- "Status" incin.sf <- st_as_sf(incin.df, coords = c("xcoord","ycoord"), remove = FALSE) st_crs(incin.sf) <- 27700 coppull.ow <- convexhull.xy(x = incin.df[,1], y = incin.df[,2]) coppull.ow <- dilation(coppull.ow, r = 1000) ``` Create a simple features polygon object from `coppull.ow`. First we convert `coppull.ow` to a `SpatialPolygonsDataFrame` object: ```{r} coords <- matrix(c(coppull.ow$bdry[[1]]$x, coppull.ow$bdry[[1]]$y), ncol = 2, byrow = FALSE) pol <- Polygon(coords, hole = FALSE) pol <- Polygons(list(pol),1) pol <- SpatialPolygons(list(pol)) coppull.spdf <- SpatialPolygonsDataFrame(Sr = pol, data = data.frame(id = 1), match.ID = TRUE) ``` Convert the `SpatialPolygonsDataFrame` to an `sf` object and set the coordinate reference system: ```{r} coppull.sf <- as(coppull.spdf, "sf") st_crs(coppull.sf) <- 27700 ``` The `mformat` function is used to plot the axis labels in kilometres (instead of metres): ```{r} mformat <- function(){ function(x) format(x / 1000, digits = 2) } ``` ```{r spatial02-fig, warnings = FALSE, echo = TRUE, fig.cap="\\label{fig:spatial02}Point map showing the place of residence of individuals diagnosed with laryngeal cancer (Pos) and lung cancer (Neg), Copull Lancashire, UK, 1972 to 1980."} ggplot() + theme_bw() + geom_sf(data = incin.sf, aes(colour = Status, shape = Status)) + geom_sf(data = coppull.sf, fill = "transparent", colour = "black") + coord_sf(datum = st_crs(coppull.sf)) + scale_colour_manual(values = c("grey","red")) + scale_shape_manual(values = c(1,16)) + scale_x_continuous(name = "Easting (km)", labels = mformat()) + scale_y_continuous(name = "Northing (km)", labels = mformat()) + theme(legend.position = c(0.10, 0.15)) ``` ## References epiR/vignettes/risk_ratio.png0000644000176200001440000006776214110621444016054 0ustar liggesusersPNG  IHDR1*gAMA asRGB cHRMz&u0`:pQ<bKGD pHYsodnIDATxwtUޛ-N ^&H!"Y& ] ({!&;IB>R75@!;B!Ň>!x E(BO!^>!x>."c 1!4JBaN\+Pb?!\³NCTZP{FY@!rFqBI}Tq盁tٔ4'&̡<鏣Z)B}Dzcnrc"bweUzRzvvvvvvVvvV)m}潽ɀ)U(3U(Bȍ+\d2!M; CN1O_0}Dd;wɓlnРA``BI4BTEҷof3R 9âo:@1a?1fyGmڴɓu]7L%}!SCEwc9G0so}8Meߢ׭[7sLEAORɢU  gM!7)rENq)b;tGW\ߡRR !)C-%$]9 kE. Kƙ/YB.vBӊ!0(4adw!7УBb2LxIH)* \Lg%g8\p%UͶ>|.T(!^ 5XFFQ{RJDX,Y,7ȏ^߉[ZV2&DzU|?z gt8LI XOɃƮZ*--1iR1VfͶmU6m¸PJԎ]EWb>!cI&.]o6mڿ!DVV׮]%ss2|#{/7cBhOB1i$v*UT|y{#Ghn*H*ĒҎsp8.]&u]JiZ@@2##(c8H]jB…H]׍QX1%%l6+U=Z0@ʄ㜧\.DILL~xvzJB4^݊?^/GAR*=CBB:u_ڵyUTɿfLL!t\FG4)e6m6l?oXyzy3BJD4LIII/_._lNKKX,իWOOO?|pޚ1]ף\.ׁ7nܵknݺUZѣFݼmrΝN :DJR%>>>33322L2'Ot\38e˖mڴl6L*THJJ*W\5RUV6?@%}BH)aŽxf 7RR2eX֤̜A75MKNNxb}}}u]?w\VVV~yuEJ$ fY)uY__߈T+W\VV֩S"%%%55bDGG9r믿NMM5y1l6->œ˛!0#"gddZ*&&1J)VkŊo߱fXFt]K.I)7nx)t:cF~BnwqYcpFկ_X}qOȭF)RVWp?j YԌ5TRn]F?,)e͍Τm=::d2ot:hi}#S祔Oȭ&s\݆:+aypppǎ;1QzHHHٲev{^/_<$''7jԨaÆ~~~F^1h B{um۶+ 1/Xd \!DPPPTT԰aZject:gϞc#Țfc}UTqѩ(====3;ur-B%&&6ln!Pиq͛7O8qK=X˖-g̘ѷoZjTժUTr^zQFݺuׯO?[y2nT.]ڷo_*U:tQƘnX,w{2]X,^X@A[GuZj(/_q?ѣGocbb.\`ٌnƍ)Sf6-גe |jhsn1RWJ{d2(r0  ^8٩`ٌRBcǎ%&&v%((X1o6Sf-p 1???e>!#EGG_xN:QQQF1w7i߿ZlizΝ;wv)88^_(_?_L@(r0ݻc-[4膪SO=eg̘f$k mڴaifX.\0{l??ɓ'kvuB߸ L4I4٬]b4/{udÜ(CȭfӦMrO|ر޽?~ᇍo򌌌۷sϜ9i&!͛?CuF'?УG___MӮ9纮7GW[-yU(r0۶mC#GL<ѣFZzuHH10f/^x1__'Guw}ᇍp6i =zWvWq˖-NӸ!Ár\QQQUVu:Ma>!#F8q"&&L2+Wdm߾}РAϟZFr?ݻw/"v~HLL\tcbb)F7_`Q׻uVvmo+S'a}eee:wܵkn K-[0ڷo>ܹs_yG}rjo,߳gHm۶jժio4y<\FL9%)rK1*f[l [^rU um۶˗/߰aѣn F^xoaLCxBdgg>"(TNKJ,/n: W)nݺyCjJ)k׮k<\xjժyjjF;k1Ǐ2d1ٵ4zWV @5 b \dLHP'V`ݻwV^rsڵ/\p*UM9N|1jqv6liΝ;m{<*TP?cP&14Aۆ^T9iTfGuBO-nk.\#GTV(4klÆ &MCBBcsu\gΜ9sL5;D:un/7f]';fh~@qm of0i9ivJRt*`Eл4 fLٳKzǸbN<k]rTbLIN ٽ{Zb~֒>4Eֿ%ܙFG7ecʼn3VaI+F6Zf  :3'N|hѷ˖yuj\;"tL&d4-L42>!@Dytt48w܂O?եF]ڭ[M}9gn\(BJ ι@Vӧںqz:w]G#'3mkztl>6PkO)L/&%% U)0fƙ_gϞ:rpi~6ٷgφZirZQ' FOHJJp:[UY2=VXt6h`c*SjHHŸG U ,uP' 12% 2"Yr1)DFϞ=\»ч+%%LDDpXC6[ʕ]Lp?7W!p.\3Ђ6[ZzzJJ r3𲦥WT|pǎĄf%}&ĵxs. jMOM=s Ϗ^",+f(w당}2u=))i?fO?9G3ʶ].!0D)1.s:|"##sJ*{۶c"vS\R:}/82YxtQ03ƜRvh۶Y6*+ +{)BJ !tP sgSB lQ!wEf6]]F  _9RFnY}OΈ !uqKKyoۦ\7nݡTzڰ~} !'%i۔RNfI)nݠAp'Q'x 708Ay.룠O0(JKz9"EAaw'uL oFAB}B" !ċP'/BAB5$ƀKbCq).r #Q(T<20Lg됙 %g  =⭤8>"pܾɔ\!s9y3h` ׷1G!;:|yЗ_|m~`N9f7f}TL\yRJq׭[rJc^cd:{lǎKXBnTWԩSһuXQIsgQ'h+r \q߽fӅc_S+?<ѷy ȣBHiPw8Dl1Ԝ[KJ!E|B)NN19Kw"RDGnYL3QBJ_B" !ċP'/BAB}B" !ċP'/BAB}B" !ċP'/BAB}B" !ċP'/BAB}B" !ċP'/r9rQ0nDI-!r9&1WJ0+& $roP{r@((B'*qq4iw r ]`_1"P'S"Ĥ?~}gcb_9|W'Xls'KTXN!__7JDzFL_ЭIقknӺjDQB$>- ;{g,fEϾ6EYM{]">!xB ]ZrdD_Ng(iJw8YzxࣝR)C!@zGI!8Z˘/#LeIa B` F1B<=UkS'V0#TSz^}-c aDP3!+BgΚW-tʡ]/5AR'rИRco _98c - =#y4'c 10f2V@Rs$E>}C򘆨9!o)|קvuBJ)z(T>5ayrR4CTfŋ.^W*97B&E3h Εb(%(WL R(*'ùd!T/fL.dU^m"f Xh aȸRhUTƈt$ !%O B3f#T?躷.Hn_*3%cߟ<TӮ"b?! q8iM|`㎵#enbk(.%KhmtsN_ v]!ؿS *.4Jq_,Gͮ@B)̽ Sr=nʌ\pD} k]hT:|2nM!dBڲlhxf1~F<ԴPO|֭1RJӴG֩Sf}gS6lt6N ÆݕwPG`#ƮP! /dT|$mQI`"uǂmOo4?񺕣kÚoƈSN=zXBܹd2M6rL7:>Y@CFټ#3s(S\vʸ;M @)}vrٚ4&<0UE">}[{ǕS;m7W|9>i2|Sq9jժUV-כ̓L&SuQbL!Z.Hkg2{\#UpM![#́}B"6:Sq[۱wcoT`1qOimmя[wrʟ$X*V!DzݛpwN}"lB ,ٕ겧2g){kPwDS% !GS~m!Wr,7.dQg\ߎ\x¸/έ^>y2)om_O!E_+N1h><0?C*=%"nJtcK@P 36=ty޶Tb ,~blOn1/t)#VaRcc!]E2w=t}$RiC0J o+Xa4lܤwS/;Y1?w AHBf VRw:]LhKx BhTH'Ot͠ϸfL9*à;BJ)`,{okqX ,Bgr9K2.|)zmm d+VBۉsҿ}[Oٹte!̮f~^S|{jq LQ}6T_>oRƎWZ4b/!x*79}W167SsIm5 !s] 7hmuD % !s tŹt:-N3mD'G8GI !S]N֢hDzœZ_͈]<SS}BXnJ Teۙ.̪T@|B\n[ 2:іVimQU.!x*C+s@C~ĝ,  ֿg1W"!w}۶{DoG% !sm#otvCI+f{Aݻ0) !xw8}zǡ5Nyi''d܊ha0AcBǺ֐ zZjVVvo$720*_wK-@B<;\X`:~6L{?=[V&n. SYf3  !s. .Δ,$0/i|O!MWRr^5*6ھkw;N@@BLnZ j0~vo۹kOhW8b>!xkW7fCM}&QMBX[ X]w.m)𞆍m^&Jz !7[(J}Y~piS^bB<;Zu xc !c{_("%׷UiL(O!;:Cn允u|yAS|Q'.A@dM!s"⑮Lh u6b{a В_o'OuuEg3UT|˶U?ܬZ!\lZ{N!_+T\8~YdžZ}+qWq_{m!whXҀY DmΙRNwHУmC&|pltgE;.:y.YP'OS0g!59I7>Z^NuG9pS;WT(Bq3 C%Z}'9t0S i \\(8 !3˸`V^üź5E|B\ڵ^*UP)&{Bh  3:j hB`aώ\ƔLh >ePBr#PW*0!.򶍍JWCg/Fb>!xA΄ }rШ6[N/i:ЮCtO!(g\7d"`GZ}DWf] AB6;8%x!S՝.2mͷ?d(%u])BjI!MgLVWo\ 棔2os c+1)\._0.+M|{ŀ{ij@p3B?WjzFnI(B-YK&7F5#;23TJ^G>z}/'tE!'7JGrdYpd:le0ї6(}y5P~UZǹ?#5Mhl!8I rmxam+@{[fS_Z:[  ROc>C^\2VJ!B3Q'~#׻}-TRrjEC!MQ!@ZX>;N9Bu+k`zE==%KZ,O)v #?nDo|aIznl b?ᮝ>TRJH秿&[h۲w:k J!՛BJ Ȫj4U7.RS1\aPq[}$j0ٻ}֏oJx,B"aŘ L Lɖw{y$`>D;U+u Qץ)#Rr !%"+ w}p=m1-|-dxuZP1hRע]TJn v˯)?Y"! vּȑwls _92|g/n&g"96ԕ2?sw][4q\> Dc!Dpt]Nh>b5&>oGTJ q*E/~xi }k˟2=t:TR!u%Dpeef:tMӸ;}[_1}y׶ge9Dd JV0# RJJa 8%9KO:s\@J@}BH3Pm؋5۾%K00qnV{Z(u&La|[=9RrUrPaz*dILſ?y⓳sޜ]نL&|0c&? @qF 6 !%&IfƙRsΪ1RuL0@gPkc]8K$x3ěR+)Ƹqo`oK7&i#u84>23|>!+y0 ]'}=,C|ֳA!5%[H<`Vkl)=}nEg>'WB\.dzws)rSB) p.5izU ȞڃE/{rV):[[jO\6.W[)=Șpޑ@IȆVFVCg锚*#rr!1JezJBHi7n~M6'huK*g uQ *IIBDD(ܽոJtIL>nc_v9]_ι< %p#2/(5&?!7Hp&HM.>l;qFNxOR?bkH%Va& c}h`R'%BF~y}߻3=>\6֦Pq7#rm=2H0 yD_+]&%},,-[B|C,Us᯸Dг@ZM~Va 2GZ( Q*׶R|1sw7G53\Nsxa[YR/$JϬ{Q!1Q`Q5|˅cWGBdB\872-Vd5Ocp+R@sgnF$ԻYYrx 7\dяݦmXb׫AÖz0-5Ę=E|B\jH4HJ eD1#ے> "*[V:p #k1cJRHr=% !+s&ٚ~߮Ӻ+*as 3**!Ku'ܾ$JG{LJV$.O!Q6˸? Aj1D T'Vp<}𶨆~5!"Fd ABs71:vBQt'/BAB}B" !ċP'/BAB}B" !ċP'/BAB}B" !ċP'/BAB}B" !ċP'/BAB}B"ڍoPR*D&g*i])b!R %}!VPAxrs9/p3GĒJ%8ᓾاsw^yM#.}aW7 :ujѢDJ)ظq'JXB+ҠJ !?|ezYC;9~שVfol6-cǎ"TPMBnmEa |uY~?5Йmr9$]Ψߟ6K T R>گNȈ2M.򊶤ϸP Cn{`KINUmi?+@ "p&:4r\m?&pWwݲjJ ʊ& QE63 MRM{X70@7WgW]~55KǜR+z>ciZNqT&nlۄ;ahfq]i}_ 0|AHiVL=rZf.(CJJغbIɊ6 Qq_{:) O;_h<rK }u??U@"h}HsdvJfwGlؗa'谡C<8kI*&桒>"qjc"z[h#E@hh?!. "" <?9DEqQ\,7̨]bo>k֬x8qbӦMo@xx3V^|rh۶?,|׎9;v ,Q 3/P %`5@&t p`t @C@7'6@@ VSP6,_32ܹe6'NxG@@iʕ+jժ6?婧0y]ӫ{Yfij׮}ʖ-;{lպtҵk@ΝG3gΌѣGoܹsgVJY>22ON:SN?޽{><11_LHHB< 48t믿*_i-[fhٲرcu]O:Fرcll /0#*iDlBlLnߵkC*/mX,^}}Vַiiep'$$}GR+ƌ=XVΟ??gaXf̘QbE]Gt0le5h  l*pjp `#3]_t'3P5@y$@ @___] 97~'^8~8w}ݺu3++b|Yc*4m&`́h …5>>!gee@___yE}<[>\;›k⃈B\-;;tlZiErsfb3g̙Lf.W_ /+Gq_ @_4>m\ a2[?MFF+Qܬ_[rVI)o!o+_3HxZC2nވi`Cεz[j>!ūЍ18YNq1x@&rsP'/BAB}B" !ċP'/BAB}B" !ċP'/BAB}B" !ċP'/BAB}B" )ehRh,BJc,c])sҺz~[jHR(R:H Z{CnY )~ ڷ͡~}V s T'7>!%M O`&شUW{ )4HOPs@Q(RҤ!`~98޽IR*T2e 22=@uP'tHMVI\;Q'tdi?S'7>!" R6I2T'E>!x E(BO!^>!x E(BO!^>!x E(BO!^>!x E(BAPXB)uhvO:(@80fp09J1c,)sAHѣc&Wte}"3vgi5JzOI>Fc̋k6"@Ay$gƚ2„]ώNH=yi$P>B3ߺ\/‘#f QһHnqRpX97f79s΅`P:bRn1ps ; Pfn~ffe[h|OT+o# 7R#;7 I/O Г!%._)4+5~nB0M] x\$Ū8-dL_)%sә% .%Hi.%A*گ*r"D fٌqDd6lQjeB,8][#뺦iYYv)3o0_h,RJ @ZvVRG-[h2AX4k @з/TN答q\w2N%Jdfddρ*6z̝4}1v9WJ7yOCί\F_zҥK9爈BC>>yWV(UJX;RgΜZubBWVkL6D%ATҸ?OZZK"َ錍\2"B*Z,>Sp6X`APXp~U&%%&$WqΜ9SB !tkW7_R^y6+)߽=p.bs S>y:OjZG%3j}_%R%'͘/!7C|fߒPD%@d1zg=7rF6i ey["SJp:AJr~on!-RJJÁRO>s44[(s(;S{bi"*Eʹ~_" @7/ .Z]܋H!8c Ə6"g.i4N%Xϣ'+Ą@@>Y͵MCjܔ5KDѝA_)>Dk^_u3 Bn[j #%iOTKEd P7k߫rRLXSi(ŠfR"cȪ !,凈/^,W\IWp\ e˖-&/66622iRYԊ-B)y[ 8ѾeЩWO!^JE(BO!^>!x En zn&TR AJ)ƨrBGQ!2^kBOTGkQ)9Kp&!x*e'~D7K%4J80J"0!EK\HQRA%3oSRW>\^ k0cT[OC~kSRQ)lΡF+B8eTNN)jˌY)c#OWm=!c p+E@[Zjե̻1/xkqp{H,=\y%Bթ#T;9ۄ3D` 1)]EvZZ&aUJ"<() dݠB`PI M0%%2.8C%0@)`,y.o RPbS.줳'EZ5klqr]xjӅR殡JJEW]p_nKAD~Jꈨwv&"3[G?޸зܬx7^)S9G^u u)ӥBD2@ۚ-gGDSf\^.Tvye~4\u'׭ќu1]/*9ݟ=Ẑ77v#^D| B/uDD,{[g$T/dej]-DM0@&);f6nj~hDjlήȘ?]Z5#0g0Q!øRLéΨ:5P7l9Ң}2Qfy;݄`re,YAՃ8A[r*[o۫4-rscgH'?D`Co߮Cs.weDZ9=Lسf YAB!?8aoݮZ&iRmO[ݬ`Pp\kwoӹco۲3G*8טTn{j ݪo*d?')Q( Bcs93Jۮk>_mܶed2ּ6"2&.d_پ}p8o-s;t)U|z9ǹEU+ޚBnyf '"0K9?D6\/ s@󉚽d*<*!ԺwnmN]J`  FoǧZH4MDD.1u !U5T,<.Tx7kZse1RqsԸnxEm[M1UXN"V9¢ 㙯=[TѠߔ1A@~{^7{LewqlnF_Pw|:O>'^OVRʅVG31K?pکuQ7;Ĕyi{?]=bAD\0==3e_xr_" >xv~'BwRǠp7w&@~uLtd(uӏ:YBD\p^ uaZ̮V']}?=#;q7W&>ᄎ0ޙeY >3lqf \Z??@|cXl}a?Z82A-۠- ^\rayeK}x#/q컳Ғ׾1קib;Uk6ݿ X8c̙Coߛ7,ӫΒ~VODNşmxf֯?5]?VxXbBB;5 p0iR=rkq_nNwV1'/$tzu-"ܬ2gʬ?Lw&.}q Εw(]NjX\.s-=""bbM=w7%SKǎ%@񷤡TgΏ<.P LH]2QŸ=!LUcPgomz0,!ncA9eAG_wEʵM^U/hW[ړ=![~4CUs/Dl0rr%YNkIRQq`JJ)%WqnMİu[SFǒYfθruV>\uKgnx(ɿF#txG\ 7 @W4c_j:fwZCf|Wöm1\;ӡ2hrcZ]\c=W<_SvZ{gP@NzGT[Aޯҙ T` fĊ]g l퇂=0g*N??+TQ7Pr1,wyͦC/ i8+[7s{8<{O g= (ZQլ5c'Κ>_ǟ\Y'2APaK@]ו TazܠzС{kv⯘Lcs[gg/}XZFXMޅN7Q7=cĒ{.:d(FZO.r,Tuk͓ܔqhE>䁶5{p1˴VI/ 񻏁چӑ8ϧY|" /'&Yv癕:.zC0:FFڣ_JŴau+pZ%eZ# Y}<;-#,s3gj6ӡ&a jxVSsdhhbNGzf60v|FJ=eYvi}K]|qۗj{GL;Q9_!N c*]yL,S,SokU#]̜N{݈T碓M|gJMNfV>Y̑st}2LȨ}{R Wnv#ɘ@tHmRS~:y{1@h (eȘlv;]i @h*- 3.;XXvIT`B@_ηȔ2yjîMgwhy@a,rLrg3efd cv{hG϶zsޒˉjrAklQAqmhn̻2Ŧ;\(0󤘃wر;vرcϡ {O/7w=3SB]g?}ut¼]Wdž͛6w~޽gaM7\%v{&Թ lS?1KhճlڥGM8qXCNTlriwjmFco XX&e>|dvڽg9>ѡK et$b2q ' Nw2;c?i[aڴE+ۏs;v޸GmiѴ{vD]Ѻc_GO0e,VmKu?妣>~6@*w41;^+(ӯ ƍcWf@8Ƭ٥K[2sI=lhE]{Oq]'Aѩٻg[xo+ >c&Lx*Pm`?~֓{yộ3صkL4v^oTZgW,4ZVsQ.SoWٗwN1?WЙa&hkTDX<Mv4QmjR*٣$fkp2}wr{2zyMDTRII_)<~gu 3kG v䏧7v՞ӿήUN-Zom80 l >ZwQavW3Og"bOt5rĦ鈮?o, 5Jb_5 n#'k ܄?,xV{b߁oZUkcϸSOlU'صNǩ{jD܀Fg[MvKg$˅GS7*?|'"&ou#%k4 D?S5c O{I!fxt.c^*\NA7Y͘ Xje4j\X K (3_'@r\XHd)Er|liOx9SW&ߠ?KN `%5Laor\`<'%*k@ ''Z` \Hd^}Xrei4^vD]ɃʇXNYR\ xhdIqqN@-甥'%B}M9\qIّIl3ypG93/'O0kfp"C;z}ⰕTqy-/.e)"CsY iN)a3Hֵ񗓑DFd\al!>*-9Y4`4cB3,ĿuHKqGFd;4;/'e8) GFȋ1U4*;&>#}CܬȲ=tǥؙ]θdo̷Î?%DѵܵoJ*.8JŮtC(ۇQI&%M oN3ŻSР uC QOw7WNw6rrN\=e{LW`ʃZ9q-@IɅ@ -*Θqȕyeu^9na3\+*Q !wOvCr2C\GW/wJg'"[!cg4@m݇ BS{oX-ĕE/q ӫo|ȉמ3,JL)dܯU*8e۾XSƮsr03yk)@{ޜ{;MNrٕa θO| ^*Wx o ";,*@wq! -ɻcQޱrŅJE(quAP꺢R .Gc[ !ċPIB}B" !ċP'/BAB}B" !ċP'/BAB}B" !ċʤß[I+#ԣq~\TRT~"\#/B%}R)Θ:iˑTŘJB߿3)ȄLI7Gܞs΍i.RƄ Q)Y)%9c(B*L)y[ͿR X`7~P)Dcxf9㌥k'UR2 &8)R*$-2#5e8.4ȅ!Yi1Y"rR.q!KBuq.4-owr7o k֬/1>+1yߟ'ֽ3ik4\(,Ӭf5~iDR!Ov5k֨ռTv$̾ЮkժUudܣJ_P63X0A?XFYInD%}R%+':}ڴiӧM:˵{ADtay֋{~n+gc֤1'UԶ IBKߵ'1ݺuL!X[cGo;?1c ϾML{߉:@ܾys9bj`K1Fw ߷#]|$sХd֏y ]n~+V&A>u60]|װA-ygKcNдvJM+Gs!'\:s׹tH9u߿oަƙe3O҆rpM:}G0tލ==տ5 4/ @.XXe/>]nO%A=)o{rĝCYZvߠ 7 PW>dѷAnk:?8{;| ?op(:N?mWﭧ:0ffSo[sZ$+b̚slx9ӏ ڱ(`|F~kLܭY|dg2ԙ7Ak7 ḋ,I q٭{>BLϦ%"oRj< o۳d -o$v;TЮQcAS?[>(bKxM.Ԙd r1D5ʥ_ZJgiB#jUkO72"R{'nu^'ge/ٕ3ncZխ tĤ4=8@r)fnzl? UIz(1M3 .7i c]V~%1_<&G,_NVP5|__6qpo{+\|mYDnW>d9a22qj`@2n|"zuC6TՋR9㎥U}H"O=2n\h㧾_]yo~?o΃-+tdLp| u^o&wlS ?Eɯ~d1IR(CJ0ά' $2X~{≘|~*KA{/ޢl: 䩻: ئz7OUT/2nz̞ͪ9,FYqDNOJBGGnk }vg}/)3?q_>C=ǵ8}wTe4$\KپdP!̺y+}vV۾Xv.ÿqߊau1($ٳg>bb.LpNpb;IUCC*lh |rj$>k{ew5ҕ]͇/5se מǤ:+t9Q6VuO~a_{ak탲mUϥ ߭cM;z"*.(Ծc?3.^y&wEop̓eTnuWt=t6|εn;ic6G56Rg>~賤j#F1K' x'}YC@Vr>S! /v)2!%u]/XƕLh1WܙmOOϐR]d6tER*DƸRF[蟕  JJdF)@))ve/C%46&ԥi .RLhґq&PI]*gWoE%}RJs.E "R3!7>!xz$ F9O!^JE(B̖py\%tEXtdate:create2017-05-01T13:30:21+10:00Wd%tEXtdate:modify2017-05-01T13:27:23+10:00*HIENDB`epiR/vignettes/epiR_surveillance.Rmd0000644000176200001440000011271014110616626017305 0ustar liggesusers--- title: "Design and Analysis of Disease Surveillance Programs Using epiR" author: "Mark Stevenson and Evan Sergeant" date: "`r Sys.Date()`" bibliography: epiR_surveillance.bib link-citations: yes output: knitr:::html_vignette: toc: true vignette: > %\VignetteEngine{knitr::rmarkdown} %\VignetteIndexEntry{Disease surveillance} %\usepackage[utf8]{inputenc} --- \setmainfont{Calibri Light} ```{r, echo = FALSE, message = FALSE} # If you want to create a PDF document paste the following after line 9 above: # pdf_document: # toc: true # highlight: tango # number_sections: no # latex_engine: xelatex # header-includes: # - \usepackage{fontspec} knitr::opts_chunk$set(collapse = TRUE, comment = "#>") options(tibble.print_min = 4L, tibble.print_max = 4L) ``` Surveillance is defined as the on-going systematic collection, collation and interpretation of accurate information about a defined population with respect to disease and/or infection, closely integrated with timely dissemination of that information to those responsible for control and prevention measures [@thacker_berkelman:1988]. The Terrestrial Animal Health Code of the World Organisation of Animal Health [@oie:2021] defines surveillance as the investigation of a given population or subpopulation to detect the presence of a pathogenic agent or disease; the frequency and type of surveillance will be determined by the epidemiology of the pathogenic agent or disease, and the desired outputs. Surveillance is a tool for monitoring changes in health related events in a defined population with specific goals relating to: (1) the detection of disease incursions, both new and emerging, (2) the assessment of progress in terms of control or eradication of selected diseases and pathogens, (3) demonstration of disease freedom for trading partners, and (4) identification of hazards or risk factors for disease outbreaks. This vignette provides instruction on the way R and `epiR` (and specifically the surveillance functions within `epiR`) can be used for: (1) the design of disease surveillance programs; and (2) the design of programs to provide a quantitative basis for claims for disease freedom. ## Definitions **Design prevalence**. The design prevalence (minimum detectable prevalence, maximum acceptable or permissible prevalence, and minimum expected prevalence) is a fixed value for prevalence used for testing the hypothesis that disease is present in a population of interest. The null hypothesis is that disease is present in the population at a prevalence equal to or greater than the design prevalence. If a sufficient number of samples are collected and all return a negative result we may reject the null hypothesis and accept the alternative hypothesis to conclude that the prevalence is less than the design prevalence. A design prevalence is not related to any actual prevalence of disease in the population under study. It is not subject to uncertainty or variability and therefore doesn't need to be described using a distribution. *Cluster-level design prevalence* refers to a design prevalence assigned at the cluster (e.g. village, herd or household) level. *Unit-level design prevalence* refers to a design prevalence assigned at the individual unit (e.g. cow, sheep, bird) level. The unit-level prevalence of disease can be applied either within clusters (e.g. herds, flocks, villages) or across broader, unclustered populations (e.g. human populations or wildlife). **Surveillance system**. A surveillance system is a set of procedures to collect, collate and interpret information about a defined population with respect to disease. Most surveillance systems are comprised of several activities (e.g. on-farm testing, abattoir surveillance, disease hotlines) called **surveillance system components**. Each surveillance system component is comprised of **surveillance system units**. Surveillance system units are the individual items that get examined within each surveillance system component. For the surveillance system components listed above (on-farm testing, abattoir surveillance, disease hotlines) the corresponding surveillance units would be individual animals, carcasses and individual phone reports, respectively. **Unit sensitivity**. Unit sensitivity is defined as the average probability that a unit (selected from those processed) will return a positive surveillance outcome, given that disease is present in the population at a level equal to or greater than a specified design prevalence. **Component sensitivity**. Component sensitivity (CSe) is defined as the average probability that a surveillance system component will return a positive surveillance outcome, given disease is present in the population at a level equal to or greater than the specified design prevalence. **Surveillance system sensitivity**. Surveillance system sensitivity (SSe) is defined as the average probability that a surveillance system (as a whole) will return a positive surveillance outcome, given disease is present in the population at a level equal to or greater than a specified design prevalence. ## An approach for thinking about surveillance design and assessment The first thing to consider when you're designing or assessing a surveillance program is to consider the sampling method that will be used. If a surveillance program has been designed to detect a specific pathogen sampling will usually be either **representative** or **risk-based**. Other options include the situation where you might observe every individual in a population (a **census**) or where you might take no active steps to collect surveillance data but instead rely on stakeholders to report their observations on a voluntary basis (**passive surveillance**). Once we have the method of sampling defined we then move on to think about the different tasks that need to be done in terms of design of the actual surveillance system and finally, how we might assess the surveillance system once it has been designed and implemented. In terms of design, once you have specified the sampling method you need to determine how many surveillance system units will be sampled (usually to achieve a defined surveillance system sensitivity). Once samples have been collected and tested or if you are making an assessment of an existing surveillance system, we then might want to answer the question: if the disease of interest is actually present in the population what is the chance that the surveillance system will actually detect it? This question can be expressed in another three other ways: (1) What is the surveillance system sensitivity? or (2) What is the probability that the prevalence of disease is less than the specified design prevalence? or (3) What is the surveillance system's negative predictive value? The remainder of this vignette follows this general structure. For each sampling method (representative, risk-based, census and passive) we provide notes and examples on the use of `epiR` for sample size estimation, estimation of surveillance system sensitivity and estimation of the probability of disease freedom. While 'estimation of the probability of disease freedom' is the name assigned to the last group of analyses a more correct label would be 'estimation of the probability that the prevalence of disease is less than a specified design prevalence' (i.e. the negative predictive value of the surveillance system). Be aware that we can only truly demonstrate disease freedom if every member of the population at risk is assessed using a test with perfect diagnostic sensitivity and perfect diagnostic specificity. ## Representative sampling ### Sample size estimation The sample size functions for surveillance representative sampling in `epiR` fall into two classes: sampling to achieve a defined probability of disease freedom and sampling to achieve a defined surveillance system sensitivity. The surveillance system sensitivity sample size functions include those for simple random sampling and two stage sampling. Two stage sampling is the preferred (indeed, the only practical approach) when a population is organised in clusters (e.g. cows within herds, households within villages). With two stage sampling clusters (herds, villages) are sampled first and then from within each selected cluster individual surveillance units are sampled. ```{r ssrs.tab, echo=FALSE, message=FALSE, warnings=FALSE, results='asis'} library(pander) panderOptions('table.split.table', Inf) # panderOptions('table.alignment.default', function(df) ifelse(sapply(df, is.numeric), 'right', 'left')) set.caption("Functions to estimate sample size using representative population sampling data.") ssrs.tab <- " Sampling | Outcome | Details | Function Representative | Prob disease freedom | Imperfect Se, perfect Sp | `rsu.sspfree.rs` Representative | SSe | Imperfect Se, perfect Sp | `rsu.sssep.rs` Two stage representative | SSe | Imperfect Se, perfect Sp | `rsu.sssep.rs2st` Representative | SSe | Imperfect Se, imperfect Sp, known N | `rsu.sssep.rsfreecalc` Pooled representative | SSe | Imperfect Se, imperfect Sp | `rsu.sssep.rspool`" ssrs.df <- read.delim(textConnection(ssrs.tab), header = FALSE, sep = "|", strip.white = TRUE, stringsAsFactors = FALSE) names(ssrs.df) <- unname(as.list(ssrs.df[1,])) # put headers on ssrs.df <- ssrs.df[-1,] # remove first row row.names(ssrs.df) <- NULL pander(ssrs.df, style = 'rmarkdown') ``` *EXAMPLE 1* A cross-sectional study is to be carried out to confirm the absence of brucellosis in dairy herds using a bulk milk tank test assuming a design prevalence of 5%. Assume the total number of dairy herds in your study area is unknown and large and the bulk milk tank test to be used has a diagnostic sensitivity of 0.95 and a specificity of 1.00. How many herds need to be sampled to achieve a system sensitivity of 95%? That is, what is the probability that disease will be detected if it is present in the population at the designated design prevalence? ```{r message = FALSE} library(epiR) rsu.sssep.rs(N = NA, pstar = 0.05, se.p = 0.95, se.u = 0.95) ``` A total of 62 herds need to be sampled and tested. This question can be asked in another way. If our prior estimate of the probability that the population of herds is free of disease is 0.50 and we believe that there's a 1% chance of disease being introduced into the population during the next time period, how many herds need to be sampled to be 95% confident that disease is absent (i.e. less than the design prevalence) if all tests are negative? ```{r message = FALSE} rsu.sspfree.rs(N = NA, prior = 0.50, p.intro = 0.01, pstar = 0.05, pfree = 0.95, se.u = 0.95) ``` A total of 61 herds need to be sampled (similar to the value calculated above). Note that function `rsu.sssep.rs` returns the sample size to achieve a desired surveillance system sensitivity ('what's the probability that disease will be detected?'). Function `rsu.sspfree.rs` returns the sample size to achieve a desired (posterior) probability of disease freedom. Now assume that it is known that there are 500 dairy herds in your study area. Revise your sample size estimate to achieve the desired surveillance system sensitivity in light of this new information. ```{r message = FALSE} rsu.sssep.rs(N = 500, pstar = 0.05, se.p = 0.95, se.u = 0.95) ``` A total of 60 herds need to be sampled and tested. The sample size calculations presented so far assume the use of a test with perfect specificity (that is, if a sample returns a positive result we can be 100% certain that the herd is positive and disease is actually present in the population). Consider the situation where a test with imperfect specificity is used. Imperfect specificity presents problems for disease freedom surveys. If a positive test result is returned, how sure can we be that it is a true positive as opposed to a false positive? The `rsu.ss.rsfreecalc` function returns the required sample size to confirm the absence of disease using a test with imperfect diagnostic sensitivity and specificity based on the methodology implemented in the standalone software 'Freecalc' [@cameron_baldock:1998a]. *EXAMPLE 2* We'll continue with the brucellosis example introduced above. Imagine the test we're using has a diagnostic sensitivity of 0.95 (as before) but this time it has a specificity of 0.98. How many herds need to be sampled to be 95% certain that the prevalence of brucellosis in dairy herds is less than the design prevalence if less than a specified number of tests return a positive result? ```{r message = FALSE} rsu.sssep.rsfreecalc(N = 5000, pstar = 0.05, mse.p = 0.95, msp.p = 0.95, se.u = 0.95, sp.u = 0.98, method = "hypergeometric", max.ss = 32000)$summary ``` A population sensitivity of 95% is achieved with a total sample size of 194 herds, assuming a cut-point of 7 or more positive herds are required to return a positive survey result. Note the substantial increase in sample size when diagnostic specificity is imperfect (194 herds when specificity is 0.98 compared with 63 when specificity is 1.00). The relatively low design prevalence in combination with imperfect imperfect specificity means that false positives are more likely to be a problem in this population so the number tested needs to be (substantially) increased. Increase the design prevalence to 0.10 to see its effect on estimated sample size. ```{r message = FALSE} rsu.sssep.rsfreecalc(N = 5000, pstar = 0.10, mse.p = 0.95, msp.p = 0.95, se.u = 0.95, sp.u = 0.98, method = "hypergeometric", max.ss = 32000)$summary ``` The required sample size decreases to 66 and the cut-point to 3 positives due to: (1) the expected reduction in the number of false positives; and (2) the greater difference between true and false positive rates in the first example compared with the second. Now consider the situation where individual surveillance units (e.g. animals) are aggregated within groups called 'clusters' (e.g. herds). With this type of system **two-stage cluster sampling** is a commonly used approach for disease surveillance studies. With two stage cluster sampling herds (clusters) are sampled first and then individual surveillance units are then sampled from each sampled cluster. This means that we have two sample sizes to calculate: the number of clusters and the number of surveillance units from within each sampled cluster. *EXAMPLE 3* For this example we assume that there are 20,000 at risk herds in our population and we do not know the number of animals present in each herd. This disease is not very common among herds but if a herd is positive the prevalence is relatively high, so we set the herd-level design prevalence to 0.005 and the within-herd design prevalence to 0.05. The test we will use at the surveillance unit level has a diagnostic sensitivity of 0.90 and a diagnostic specificity of 1.00. The target sensitivity of disease detection at the herd level is 0.95 and the target sensitivity of disease detection at the population level is the same, 0.95. How many herds need to be sampled if you want to be 95% certain of detecting at least one infected herd if that the between-herd prevalence of disease is greater than or equal to 0.005? ```{r message = FALSE} rsu.sssep.rs(N = 20000, pstar = 0.005, se.p = 0.95, se.u = 0.95) ``` We need to sample a total of 622 herds. How many animals need to be sampled from each herd if you want to be 95% certain of detecting at least one infected animal if the within-herd prevalence of disease is greater than or equal to 0.05? ```{r message = FALSE} rsu.sssep.rs(N = NA, pstar = 0.05, se.p = 0.95, se.u = 0.90) ``` Within each selected herd we need to sample at least 66 animals. As an alternative we can calculate the required number of herds to sample and the required number of animals to sample from each herd in a single step using the function `rsu.sssep.rs2stage`: ```{r message = FALSE} rsu.sssep.rs2st(H = 20000, N = NA, pstar.c = 0.005, pstar.u = 0.05, se.p = 0.95, se.c = 0.95, se.u = 0.90) ``` ### Estimation of surveillance system sensitivity and specificity ```{r seprs.tab, echo=FALSE, message=FALSE, warnings=FALSE, results='asis'} set.caption("Functions to estimate surveillance system sensitivity (SSe) using representative population sampling data.") seprs.tab <- " Sampling | Outcome | Details | Function Representative | SSe | Imperfect Se, perfect Sp | `rsu.sep.rs` Two stage representative | SSe | Imperfect Se, perfect Sp | `rsu.sep.rs2st` Representative | SSe | Imperfect Se, perfect Sp, multiple components | `rsu.sep.rsmult` Representative | SSe | Imperfect Se, imperfect Sp | `rsu.sep.rsfreecalc` Pooled representative | SSe | Imperfect Se, perfect Sp | `rsu.sep.rspool` Representative | SSe | Imperfect Se, perfect Sp | `rsu.sep.rsvarse` Representative | SSp | Imperfect Sp | `rsu.spp.rs`" seprs.df <- read.delim(textConnection(seprs.tab), header = FALSE, sep = "|", strip.white = TRUE, stringsAsFactors = FALSE) names(seprs.df) <- unname(as.list(seprs.df[1,])) # put headers on seprs.df <- seprs.df[-1,] # remove first row row.names(seprs.df) <- NULL pander(seprs.df, style = 'rmarkdown') ``` *EXAMPLE 4* Three hundred samples are to be tested from a population of animals to confirm the absence of disease. The total size of the population is unknown. Assuming a design prevalence of 0.01 and a test with diagnostic sensitivity of 0.95 will be used what is the surveillance system sensitivity? That is, what is the probability that disease will be detected if it is present in the population at or above the specified design prevalence? ```{r message = FALSE} rsu.sep.rs(N = NA, n = 300, pstar = 0.01, se.u = 0.95) ``` The probability that this surveillance strategy will detect disease if it is present in the population at or above the specified design prevalence (the surveillance system sensitivity) is 0.943. *EXAMPLE 5* Thirty animals from five herds ranging in size from 80 to 100 head are to be sampled to confirm the absence of a disease. Assuming a design prevalence of 0.01 and a test with diagnostic sensitivity of 0.95 will be used, what is the sensitivity of disease detection for each herd? ```{r message = FALSE} N <- seq(from = 80, to = 100, by = 5) n <- rep(30, times = length(N)) herd.sep <- rsu.sep.rs(N = N, n = n, pstar = 0.01, se.u = 0.95) sort(round(herd.sep, digits = 2)) ``` The sensitivity of disease detection for each herd ranges from 0.28 to 0.36. *EXAMPLE 6* Assume 73 samples were tested at two different labs, using different tests. Laboratory 1 tested 50 samples with the standard test which has a diagnostic sensitivity of 0.80. Laboratory 2 tested the remaining 23 samples with a different test which has a diagnostic sensitivity of 0.70. What is the surveillance system sensitivity of disease detection if we set the design prevalence to 0.05? ```{r message = FALSE} # Diagnostic test sensitivities and the number of samples tested at each laboratory: se.t1 <- 0.80; se.t2 <- 0.70 n.lab1 <- 50; n.lab2 <- 23 # Create a vector of test sensitivities for each sample: se.all <- c(rep(se.t1, times = n.lab1), rep(se.t2, times = n.lab2)) rsu.sep.rsvarse(N = n.lab1 + n.lab2, pstar = 0.05, se.u = se.all) ``` If the design prevalence is 0.05 the estimated surveillance system sensitivity is 0.997. ### Estimation of the probability of disease freedom ```{r pfreers.tab, echo=FALSE, message=FALSE, warnings=FALSE, results='asis'} set.caption("Functions to estimate the probability of disease freedom using representative population sampling data.") pfreers.tab <- " Sampling | Outcome | Details | Function Representative | Prob disease of freedom | Imperfect Se, perfect Sp | `rsu.pfree.rs` Representative | Equilibrium prob of disease freedom | Imperfect Se, perfect Sp | `rsu.pfree.equ`" pfreers.df <- read.delim(textConnection(pfreers.tab), header = FALSE, sep = "|", strip.white = TRUE, stringsAsFactors = FALSE) names(pfreers.df) <- unname(as.list(pfreers.df[1,])) # put headers on pfreers.df <- pfreers.df[-1,] # remove first row row.names(pfreers.df) <- NULL pander(pfreers.df, style = 'rmarkdown') ``` *EXAMPLE 7* You are the epidemiologist for a land-locked country in central Asia. You have developed a surveillance program for a given disease which has an estimated system sensitivity of 0.65. The disease of interest is carried by live animals and you know that the frequency of illegal importation of animals into your country (and therefore the likelihood of disease incursion) is higher during the warmer months of the year (June to August). Plot the probability of disease freedom assuming surveillance testing is carried out each month. Include on your plot the probability of disease incursion to show how it changes during the year. Previous surveillance work indicates that the probability that your country is free of disease is 0.50. ```{r message = FALSE} library(ggplot2); library(lubridate); library(scales) # Define a vector disease incursion probabilities (January to December): p.intro <- c(0.01,0.01,0.01,0.02,0.04,0.10,0.10,0.10,0.08,0.06,0.04,0.02) rval.df <- rsu.pfree.rs(se.p = rep(0.65, times = 12), p.intro = p.intro, prior = 0.50, by.time = TRUE) # Re-format rval.df ready for for ggplot2: dat.df <- data.frame(mnum = rep(1:12, times = 2), mchar = rep(seq(as.Date("2020/1/1"), by = "month", length.out = 12), times = 2), class = c(rep("Disease introduction", times = length(p.intro)), rep("Disease freedom", times = length(p.intro))), prob = c(rval.df$PIntro, rval.df$PFree)) # Plot the results: ggplot(data = dat.df, aes(x = mchar, y = prob, group = class, col = class)) + theme_bw() + geom_point() + geom_line() + scale_colour_manual(values = c("red", "dark blue")) + scale_x_date(breaks = date_breaks("1 month"), labels = date_format("%b"), name = "Month") + scale_y_continuous(limits = c(0,1), name = "Probability") + geom_hline(aes(yintercept = 0.95), linetype = "dashed", col = "blue") + guides(col = guide_legend(title = "")) + theme(legend.position = c(0.8, 0.5)) ``` ## Risk-based sampling With risk-based sampling we modify the intensity of sampling effort across the population of interest according to risk (as opposed to representative sampling where the probability that an individual unit is sampled is uniform across the population of interest). When our objective is to detect the presence of disease risk-based sampling makes intuitive sense: we concentrate our search effort on those sections of the population where we believe we are more likely to detect disease (i.e. where the risk of disease is high). ### How many samples do I need? The sample size functions all relate to sampling to achieve a defined surveillance system sensitivity. ```{r ssrb.tab, echo=FALSE, message=FALSE, warnings=FALSE, results='asis'} set.caption("Functions to estimate sample size using risk based sampling data.") ssrb.tab <- " Sampling | Outcome | Details | Function Risk-based | SSe | Single Se for risk groups, perfect Sp | `rsu.sssep.rbsrg` Risk-based | SSe | Multiple Se within risk groups, perfect Sp | `rsu.sssep.rbmrg` Risk-based | SSe | Two stage sampling, 1 risk factor | `rsu.sssep.rb2st1rf` Risk-based | SSe | Two stage sampling, 2 risk factors | `rsu.sssep.rb2st2rf`" ssrb.df <- read.delim(textConnection(ssrb.tab), header = FALSE, sep = "|", strip.white = TRUE, stringsAsFactors = FALSE) names(ssrb.df) <- unname(as.list(ssrb.df[1,])) # put headers on ssrb.df <- ssrb.df[-1,] # remove first row row.names(ssrb.df) <- NULL pander(ssrb.df, style = 'rmarkdown') ``` *EXAMPLE 8* You are working with a disease of cattle where the prevalence of disease is believed to vary according to herd type. The risk of disease is 5 times greater in dairy herds and 3 times greater in mixed herds compared with the reference category, beef herds. The distribution of dairy, mixed and beef herds in the population of interest is 0.10, 0.10 and 0.80, respectively. Assume you intend to distribute your sampling effort 0.4, 0.4 and 0.2 across dairy, mixed and beef herds, respectively. Within each of the three risk groups a single test with a diagnostic sensitivity of 0.95 will be used. How many herds need to be sampled if you want to achieve 95% system sensitivity for a prevalence of disease in the population of greater than or equal to 1%? ```{r message = FALSE} # Matrix listing the proportions of samples for each test in each risk group (the number of rows equal the number of risk groups, the number of columns equal the number of tests): m <- rbind(1,1,1) rsu.sssep.rbmrg(pstar = 0.01, rr = c(5,3,1), ppr = c(0.1,0.1,0.8), spr = c(0.4,0.4,0.2), spr.rg = m, se.p = 0.95, se.u = 0.95) ``` A total of 147 herds need to be sampled: 59 dairy, 59 mixed and 29 beef herds. Now assume that one of two tests will be used for each herd. The first test has a diagnostic sensitivity of 0.92. The second test has a diagnostic sensitivity of 0.80. The proportion of dairy, mixed and beef herds receiving the first test is 0.80, 0.50 and 0.70, respectively (which means that 0.20, 0.50 and 0.30 receive the second test, respectively). Recalculate the sample size. ```{r message = FALSE} # Matrix listing the proportions of samples for each test in each risk group (the number of rows equal the number of risk groups, the number of columns equal the number of tests): m <- rbind(c(0.8,0.2), c(0.5,0.5), c(0.7,0.3)) rsu.sssep.rbmrg(pstar = 0.01, rr = c(5,3,1), ppr = c(0.1,0.1,0.8), spr = c(0.4,0.4,0.2), spr.rg = m, se.p = 0.95, se.u = c(0.92,0.80)) ``` A total of 159 herds need to be sampled: 64 dairy, 64 mixed and 31 beef herds. *EXAMPLE 9* A cross-sectional study is to be carried out to confirm the absence of disease using risk based sampling. Assume a population level design prevalence of 0.01 and there are 'high', 'medium' and 'low' risk areas where the risk of disease in the high risk area compared with the low risk area is 5 and the risk of disease in the medium risk area compared with the low risk area is 3. The proportions of the population at risk in the high, medium and low risk area are 0.10, 0.10 and 0.80, respectively. Half of your samples will be taken from individuals in the high risk area, 0.30 from the medium risk area and 0.20 from the low risk area. You intend to use a test with diagnostic sensitivity of 0.90 and you'd like to take sufficient samples to return a population sensitivity of 0.95. How many units need to be sampled to meet the requirements of the study? ```{r message = FALSE} rsu.sssep.rbsrg(pstar = 0.01, rr = c(5,3,1), ppr = c(0.10,0.10,0.80), spr = c(0.50,0.30,0.20), se.p = 0.95, se.u = 0.90) ``` A total of 147 units needs to be sampled to meet the requirements of the study: 74 from the high risk area, 45 from the medium risk area and 28 from the low risk area. *EXAMPLE 10* A cross-sectional study is to be carried out to confirm the absence of disease using risk based sampling. Assume a design prevalence of 0.02 at the cluster (herd) level and a design prevalence of 0.10 at the surveillance unit (individual animal) level. Clusters are categorised as being either high, medium or low risk with the probability of disease for clusters in the high and medium risk area 5 and 3 times the probability of disease in the low risk area. The proportions of clusters in the high, medium and low risk area are 0.10, 0.20 and 0.70, respectively. The proportion of samples from the high, medium and low risk area will be 0.40, 0.40 and 0.20, respectively. Surveillance units (individual animals) are categorised as being either high or low risk with the probability of disease for units in the high risk group 4 times the probability of disease in the low risk group. The proportions of units in the high and low risk groups are 0.10 and 0.90, respectively. All of your samples will be taken from units in the high risk group. You intend to use a test with diagnostic sensitivity of 0.95 and you'd like to take sufficient samples to be 95% certain that you've detected disease at the population level, 95% certain that you've detected disease at the cluster level and 95% at the surveillance unit level. How many clusters and how many units need to be sampled to meet the requirements of the study? ```{r message = FALSE} rsu.sssep.rb2st2rf( rr.c = c(5,3,1), ppr.c = c(0.10,0.20,0.70), spr.c = c(0.40,0.40,0.20), pstar.c = 0.02, rr.u = c(4,1), ppr.u = c(0.1, 0.9), spr.u = c(1,0), pstar.u = 0.10, se.p = 0.95, se.c = 0.95, se.u = 0.95) ``` A total of 82 clusters needs to be sampled: 33 from the high risk area, 33 from the medium risk area and 16 from the low risk area. A total of 9 units should be sampled from each cluster. ### Surveillance system sensitivity ```{r seprb.tab, echo=FALSE, message=FALSE, warnings=FALSE, results='asis'} set.caption("Functions to estimate surveillance system sensitivity using risk based sampling data.") ssrb.tab <- " Sampling | Outcome | Details | Function Risk-based | SSe | Varying Se, perfect Sp | `rsu.sep.rb` Risk-based | SSe | Varying Se, perfect Sp, one risk factor | `rsu.sep.rb1rf` Risk-based | SSe | Varying Se, perfect Sp, two risk factors | `rsu.sep.rb2rf`" ssrb.df <- read.delim(textConnection(ssrb.tab), header = FALSE, sep = "|", strip.white = TRUE, stringsAsFactors = FALSE) names(ssrb.df) <- unname(as.list(ssrb.df[1,])) # put headers on ssrb.df <- ssrb.df[-1,] # remove first row row.names(ssrb.df) <- NULL pander(ssrb.df, style = 'rmarkdown') ``` *EXAMPLE 11* You have been asked to provide an assessment of a surveillance program for Actinobacillus hyopneumoniae in pigs. It is known that there are high risk and low risk areas for A. hypopneumoniae in your country with the estimated probability of disease in the high risk area thought to be around 3.5 times that of the probability of disease in the low risk area. It is known that 10% of the 1784 pig herds in the study area are in the high risk area and 90% are in the low risk area. The risk of A. hypopneumoniae is dependent on age, with adult pigs around five times more likely to be A. hypopneumoniae positive compared with younger (grower) pigs. Pigs from 20 herds have been sampled: 5 from the low-risk area and 15 from the high-risk area. All of the tested pigs were adults: no grower pigs were tested. The ELISA for A. hypopneumoniae in pigs has a diagnostic sensitivity of 0.95. What is the surveillance system sensitivity if we assume a design prevalence of 1 per 100 at the cluster (herd) level and 5 per 100 at the surveillance system unit (pig) level? ```{r message = FALSE} # There are 1784 herds in the study area: H <- 1784 # Twenty of the 1784 herds are sampled. Generate 20 herds of varying size: set.seed(1234) hsize <- rlnorm(n = 20, meanlog = log(10), sdlog = log(8)) hsize <- round(hsize + 20, digits = 0) # Generate a matrix listing the number of growers and finishers in each of the 20 sampled herds. # Assume that anywhere between 80% and 95% of the pigs in each herd are growers: set.seed(1234) pctg <- runif(n = 20, min = 0.80, max = 0.95) ngrow <- round(pctg * hsize, digits = 0) nfini <- hsize - ngrow N <- cbind(ngrow, nfini) # Generate a matrix listing the number of grower and finisher pigs sampled from each herd. Fifteen pigs from each herd are sampled. If there's less than 15 pigs we sample the entire herd: nsgrow <- rep(0, times = 20) nsfini <- ifelse(nfini <= 15, nfini, 15) n <- cbind(nsgrow, nsfini) # The herd-level design prevalence is 0.01 and the individual pig-level design prevalence is 0.05: pstar.c <- 0.01 pstar.u <- 0.05 # For herds in the high-risk area the probability being A. hyopneumoniae positive is 3.5 times that of herds in the low-risk area. Ninety percent of herds are in the low risk area and 10% are in the high risk area: rr.c <- c(3.5,1) ppr.c <- c(0.1,0.9) # We've sampled 15 herds from the high risk area and 5 herds from the low risk area. Above, for vector rr.c, the relative risk for the high risk group is listed first so the vector rg follows this order: rg <- c(rep(1, times = 15), rep(2, times = 5)) # The probability being A. hyopneumoniae positive for finishers is 5 times that of growers. For the matrices N and n growers are listed first then finishers. Vector rr.u follows the same order: rr.u <- c(1,5) # The diagnostic sensitivity of the A. hyopneumoniae ELISA is 0.95: se.u <- 0.95 rsu.sep.rb2st(H = H, N = N, n = n, pstar.c = pstar.c, pstar.u = pstar.u, rg = rg, rr.c = rr.c, rr.u = rr.u, ppr.c = ppr.c, ppr.u = NA, se.u = se.u) ``` The estimated surveillance system sensitivity of this program is 0.32. Repeat these analyses assuming we don't know the total number of pig herds in the population and we have only an estimate of the proportions of growers and finishers in each herd. ```{r message = FALSE} # Generate a matrix listing the proportion of growers and finishers in each of the 20 sampled herds: ppr.u <- cbind(rep(0.9, times = 20), rep(0.1, times = 20)) # Set H (the number of clusters) and N (the number of surveillance units within each cluster) to NA: rsu.sep.rb2st(H = NA, N = NA, n = n, pstar.c = pstar.c, pstar.u = pstar.u, rg = rg, rr.c = rr.c, rr.u = rr.u, ppr.c = ppr.c, ppr.u = ppr.u, se.u = se.u) ``` The estimated surveillance system sensitivity is 0.21. ## Analysis of passive surveillance data ### Estimation of surveillance system sensitivity and specificity *EXAMPLE 12* There are four 'steps' in a (passive) disease detection process for disease X in your country: (1) an infected animal shows clinical signs of disease; (2) a herd manager observes clinical signs in a disease animal and calls a veterinarian; (3) a veterinarian responds appropriately to the disease investigation request (taking, for example, appropriate samples for laboratory investigation); and (4) the laboratory conducts appropriate tests on the submitted samples and interprets the results of those tests correctly. The probabilities for each step in the disease detection pathway (in order) are 0.10, 0.20, 0.90 and 0.99, respectively. Assuming the probability that a unit actually has disease if it is submitted for testing is 0.98, the sensitivity of the diagnostic test used at the unit level is 0.90, the population is comprised of 1000 clusters (herds), five animals from each cluster (herd) investigated for disease are tested and the cluster-level design prevalence is 0.01, what is the sensitivity of disease detection at the cluster (herd) and population level? ```{r message = FALSE} rsu.sep.pass(step.p = c(0.10,0.20,0.90,0.99), pstar.c = 0.01, p.inf.u = 0.98, N = 1000, n = 5, se.u = 0.90) ``` The sensitivity of disease detection at the cluster (herd) level is 0.018. The sensitivity of disease detection at the population level is 0.16. ## Miscellaneous functions ### Adjusted relative risks *EXAMPLE 13* For a given disease of interest you believe that there is a 'high risk' and 'low risk' area in your country. The risk of disease in the high-risk area compared with the low-risk area is 5. A recent census shows that 10% of the population are resident in the high-risk area and 90% are resident in the low-risk area. Calculate the adjusted relative risks for each area. ```{r message = FALSE} rsu.adjrisk(rr = c(5,1), ppr = c(0.10,0.90)) ``` The adjusted relative risks for the high and low risk areas are 3.6 and 0.7, respectively. ### Design prevalence back calculation *EXAMPLE 14* The population size in a provincial area in your country is 193,000. In a given two-week period a total of 7764 individuals have been tested for COVID-19 using an approved PCR which is believed to have a diagnostic sensitivity of 0.85. All of the individuals tested have returned a negative result. What is the maximum prevalence required to provide system sensitivity of 0.95 if COVID-19 is actually present in this population (i.e. what is the back-calculated design prevalence)? Express your result as the number of COVID-19 cases per 100,000 head of population. ```{r message = FALSE} rsu.pstar(N = 193000, n = 7764, se.p = 0.95, se.u = 0.85) * 100000 ``` If the 7764 individuals have all returned a negative test result (using a test with 85% sensitivity) we can be 95% confident that COVID-19, if it is present, is present at a prevalence of 44 cases per 100,000 or less. What is the probability that the prevalence of COVID-19 in this population is less than or equal to 10 cases per 100,000? ```{r message = FALSE} rsu.sep(N = 193000, n = 7764, pstar = 10 / 100000, se.u = 0.85) ``` If all of the 7764 individuals returned a negative test we can 48% confident that the prevalence of COVID-19 in the province is less than 10 per 100,000. How many need to be tested to be 95% confident that the prevalence of COVID-19 is less than or equal to 10 cases per 100,000? We return to the sample size functions covered earlier: ```{r message = FALSE} rsu.sssep.rs(N = 193000, pstar = 10 / 100000, se.p = 0.95, se.u = 0.85) ``` To be 95% confident that the prevalence of COVID-19 is less than or equal to 10 cases per 100,000 a total of 31,586 individuals need to be tested. ## References epiR/vignettes/population_attributable_fraction.png0000644000176200001440000007165614110621522022521 0ustar liggesusersPNG  IHDR1*gAMA asRGB cHRMz&u0`:pQ<bKGD pHYsodrIDATxwtUޛm@{/JED|VlDEX{H-$$4$Kp80;;3;3{}!"B)xi!CAB RP'r>!# !JjG(uc qVڟBVQȵJ))B C]K<c\Ko|GHv19, 30;_3puni^bMR|}GOe0a>]:uRXEc~)P)~2tҫЊ\T }''fsp89qyTEP'a?̈ 7"L [ӽ_&;Ac'OKa\.ɔCe6M&bAD]qV2&DL*V[s:c $yS'\7_F>w͛4iRfDDMӒ-Z9X,F*;&&}aaa%opE.Jɺؼ|n) !Ws޷o>}_~-[YYY+W;wnܹ,U1Uf.]̙3f͚G_=zJ)o\9\2OzwĊ^`LhZYB!Dݺul6[ttj=s4izvGFFT\999?رcGnvg}&4 zȍ!edH233].2;;l6{{{gff\wKyYu]JF*##K8H]/2!\)D4Liii6kg6lZ^^^+VR!1jTQxnˮM !ׂ1i6b9|pbbb:uWnnss///-Kff}|}}ԩrAٳ>>>*UZFӗwСC~kozix%;5F9{YfHHR)5@z>! 8պm۶GrZnݳgOooRRf麞}㏍uٿp=zd2=AnęO రÇgdd=I&YYY#~}%qd2>}z޼y.51>r+9~T||X"ĉoذѣ#АWrW>!30t]^-ܢ?õkV^=?kcǎM6Al6뺮BѡCjժ]հaCJ܌1q#7*===11׷]v111Ns7nܳgOFnzjv]!RQFmڴɹF> !9OVJEEEDFFFDD;v,...&&vclӦMN)))_u;tp%K;2؄ >Ge$rrr.igϞNOOQڵk[~>s^bE9>rk֬rDiSiӦ 44m׮]]WU7fՒׯzHHHppdzj n֬?\g\{M/}BHYg$=!!!>>>Fd2UXv'%%yÇX,lAAAsiWO-3LF&GP__sedd !233O:f@MJJ2Ɵ@DAE~fϢQ!edOLLLMM5BR'OVJ !bccCCC&߿55M;~b 7ƊGDVBԡC|}}W퉉ާO>z_}YŒMd!.S>!<#V L{Yx8qĖ-[7n?|f5MKKKKJJYOvv$)9wK,9t萔rӦMk矫V2-[4cccTᜯ\2))D#q#C^͢">(}Bn6@ƿSFKi)Yyswj oRFf 0 33p8lʊOwk51fsrrw֭{?`FecBWTi[llneL3t:u2=(rDVvjm%rN:F7?'''?3^V-6Z/+Kfr1rS;v4~29.(+|||Znm|Ln}D K:C}*%(r0_|1|RjfҪUѣGתUbqX~=lݺ?n۶m _}UVJao5M{ t,4vtA6o/tTΟPSN/K)Y2+++";;;>>f͚999ޱ˖-!7׸quw}C5^իWݧO޳gπ}𘒒2k֬J*eggCn-#""j׮}ɉ'└[o5mF C�c;wlԨQfx"==}ѧNGٸqpe>!7SJ~c)4q*T8uԉ'4i",眿[ON81?ᐿYCcrƯEzP栏W.s4MѣG>}2;V#X,wy˜\cI)󫩯}BnFI0))iժU3Fשּׁ,T'Nx~HOO%}#'NԪU+22؈dC2XrJWr]!0#V0 PNO:uzEEEVQQQUV5濋16iҤ&Mt؂ mQF^^^Ct5k\V=y 둄jKLþWDAmkܸѐ(Ϙ1ƌca`i4Y~?o`454FL76۹sgMӬV#GG)xF&ifY8Ţi /R)YDӟKҕtn]X5w?РA]׏9C0aBVFXGѺukj\)aDh"8tPfff>} Ym۶ӦMƚwh80SiVmq.B|Ooܸш_}UjjPG}\٨/{=xHHddd@~feddl޼fϨݴiS5.}<жm۶m^9믉FGDfҒ9baI1oTxĪևA>}V\h $Çccc+V'}y+Vc$322LbX{ャ,rItFXsΝvUV/8dR̢%_l ,xy?S }Bnf{Ν999m۶mٲeKF廒vK5ܹsv9癙Nf7ok߾#o|:Fc[n-4:X!1٬YzpԬ]3JyCdĈeEO;}Bn*;v-[Mԍ\#u"/{7~RRR,Ynw yfDlӦ1s[.=F~{駍ginիJ""zi21pr(yOh/ܽ{7ԭ[hIYzӦMX1c;<))bVi iYfFQ1}^K\):Xi v30~}/׸>!7#dm۶WZ.R7%?-Z4c ///#c$m&iiiF ~̙G~٣2֌ (s;/*!7 kť߿ߘ%ʕ+ڼysjjjXv}Μ9.v8Fe /uܹG;!*zZƸwýL&mJD%}B!iiI6i QJSGW^\.#3!.D,S R 0XJ;=sfMN:mQmĘ=++''ƄJ)ThmXNܧO!!ell FQ5kFլ/]c mVZС vl6VGN 1"422硊\B)c :u ""sr0++0 2 s1Y);}uhމtr.+sc{i&A!! e@ABBD<=55-5b cJfPb;w5 1ѴiӶm sY\@jҤ0Nn(C!E-^R%!1{&&$0.Ͼ})=v{UԨQ֦>!3<mqoJEDD0sGz:ٌƚr1]v{|||TT7Q)@.] !M.ݱ{O? VTTTojFW矋9%;gw+Vy헝T~>JBMƍ6k9ץTR.Gyy=pm6&%0b&Sl\jP'"W2v>5hƼ@)0~t=..j 2@( RYh7`8gΜe2;r=,B)u,^Tj}GSR\nwNf&Ո !Z "ܙػMȔsфL!wF]/o}tRNBnj*+(sl r]2ǿ8K!8A./QWpƨO){#HAR0޹)_9΁sZ/}BHad'|}KP!# !O!}B)G(BH9BAB RP'r>!# !O!}B)G(BH9BAB R\r\T$4QGK!\$#*D+㌕QBO $r=o]Wӣzq@((B'*q񯌻?i2DٓL}yژ^6DO!炊\T Sv2vt|?{t\C;7g}PD@}BRQw>3rWݛD^W OĶߖ'=Z3&*B')h4vgf>4xY0t0׆tJ>ykS'S(#s+_,%Lӕsfb|Q:"ev!#]QR.X/{~P;O<"1:\BLR4,fHu]Bٳ.aY %8f䛷O]MTLMj[BTg\_1;꓏ BkO S-!x"=rS>31T!r.,D8T\rJ+W\B/ޓBǺ + ؊f|eoܙqgΜ>s#<9K7cLQW\BTzr!xb\튬MB<څAd$ħ\Rkc\/,ԏD!eA_J)4c^RtȦn}9P oEcRHqR]Ɣž?}t{{}r1Dx(s''dca5+GXih"BHV(+}.Q 4DŘƊŦ?cATM) Pj+RJJr R5gϞ:w{9UF!]8cր0ki\Rr FF'Ns:炒=2E+~1cRw#Ө",>k0sƂ,VEeHMgR 4Si[YvT9{5WÔ<3Mr9H<+یx9 ! ,Ioܩv̋VqM}et~ѿWݙ&=:e{Mbd;EVEm{!h_+ב{W~9d&d2Q/qSt]w+iK0[8w{9y|yٴpiaBH*"!6IĐ*=F}pJ:w2&~ h8퟽Eh*5ۀT\z9oT\0ݭWiz=l|,8@oB D~G𶦚CzޓcP1[JPbR}&40|wbQ{2-R3 V^CqKZ|ϼnzvy DŅP).4W ѡx+BPʅRSf#bߩ>UQthδBdB]p5Owty_SJ) c>Bb yqbstfX1]}sTHUN[9 ߐ,nx1vKRB-[L)SnZLJ062x?t !F#lޜY|}sr0*On%Lg|JV(">}[t]Ƶͫ|~ԔYsBSɬͯ.85(%w!CgLo[֜K|)o?&!xQ@ vݛV:d % !sQ錋˞8lJ@k7X#~(BJZ|ɚ;?ڼZsYCⱊ)3@Sh˖5$3˩\ ⹊mbs/:qv"UZ^/DŨ*Blc-WgT'uBә>щ^yI&ⱊo`}-ufհw6ly] Di4!; rg,?\s|4 $Ulcs=: !c7*8QX2/UG>!xg}允u'}Q'U\ 2;(*;;;KΫE!# Lh u6ra;b vYN 'OU"W*l1X_f147=V(`&a2[K !\ ׆ﮗ1O=<}ۈW^{Y!wlXҀY.ڜ3KZճS!?8rԩcNhu/ M'r.Q ! k^lH-" \ 丄.q@__מAT/>""*XJ %r-28[uwͻ,> c_r]Ƙc6`B]q+䂭x籯NW^ު6o=3^ 'BqM}G҉geՊcڻwo(!Ķml6[i+BxōcݺQ?%Ƅ3zdSܞTW]v8]pd+5# d&Fq>55ȑ#G=r3g\Bȕ(nhex ѐ;Zv6ďAĕn\S~_ ,&3d4%Ozi݈ZjժU mܸqŊ}!;@%އg{ʋ&}w=8yo=C)7*{>kYtt?}1 P`>J)7<))QBfŶas>6Lз#o4'XGuS 3B?kzFni(B[lGU'L&ojP+wdz`_tE!b*rg~}H8^2V(<`(u*-[G[5Mhl!$A xbݝ-R^5}jw@29:*y郍EReSXxo ?H<U9[7i'V(=||1@ېORȮL !T?JJNh!ēSlGTqMg͜E!oꑉMc~ @ 4s!q HW;dȓRU:ńRXF(ɝFqw0vܧ/xr4hGG`B##LRV^Bd< r`i).#Usl{etޭ/BH06+8tڷgŊ+k`{=3-GZ,O)q +uLNnZO굦I HMWv PaPt=&[`=[=nm J!՛BJƀj4W7nRS1\ à$"u]?kz'O`̳w6|4!*u!*!O odZXCnp&L}Pq9}w~O/r-3JFRTK)9и۽{3o9?~~:p-CO$]TJ>OSW?Y"!\ vѣo v}e㞩ӜO7ۢ渓I-A2NQR5u]|RG~lHժDLJULQ~;k^4OԱ C.! ;';۩ku著5ȩ?{qpm{NDDdzq}`w.ݮGf9g)ǏR9(O)q~bB@7m9g 4΍QJH˺A΄ ~|z늮Z3}:gBJcW_**P)-K(=@τ0;9'f93 L`t@lBJ-Ľ3$Ue.JtazU gkgTZF58]"!{A!5%\-tۇ05lZ*Kr(0nk3(vA)s_-b>7R"t3s<3Y +vM&ӛo7k&S,#RB)Cԛ7s]sGq 1}#,]0@ !!8 P!1,)EZhǿ`YRi9~q~VrNLO);2!&>9B>ate`6}#-1ٺW(R ,[s~&CXCХ t%.(?6j׈ b;v ].YB:b4E)C89T*SBJt>9cDH?vgT'tpb#9ޙ-?XϚ>:b;ŀ@L NHKS`>Sᩧ)_d `{;@V_RS# !W @<#Y_+l^G:;[w'Į/_chvsg&Np:!+ RRAX,)S{!4dX'VP5/Xb??Pׇ믡V-`,^D `2 gO9W7@χ=;'\`Fr1¹Be.KЯ(JX[gNWiJw#m8B+?3^]E/) ș/Gz71df>q^  !c[R][^hnک/(' !s]d,`Ǫo'hg-MUK !}%%b˼'{|#b(A@V(4w!xb&FG%pSV-|[wа⩊eq1ê-B))"b-@.PaxۦMRBLŕ9g 2zc+*cs 3**!"Kuž$UsJ7{L+RgR@T'UtM:_%ccH0;PIB<؅sm^1B!\O1>*sBBn RP'r>!# !);JJ1@%bgZWJ)c)ei(BDG^ +34q9K2!LntGL0\䗾ǫfwzv S -{AfJԔRe˖-\sBDt=z"wc>J%8ӷ?u>]z⊅#U1i-P7 ;wnѢDJ)XfÇK\BǻAB8q7@#ؿvvs*ž˫"7TPq^s[R"npz!#8yh]{@g[.x(nTr]P' m8W>gTLfnWT ۏNJ5()P)H?u*F=GJE7#ؒ>B) js?&eaf1+@J"p=%N1JJ4- 8WR*M+0aӳV #¬6Vf 3[׾ Bʲ˸.|h:D ?O07iöM#~ˎ UmDH ,G\oxI@p[O}](]׿bc @]rsH9;5BϹι}q}~jҪ77~ AM>)GKgrbM&l;z-:vZa+,Jc]>lٰ! *DUd30JR%EW(rTV{h҃u}4^OW>sQ?} SV/]^Ez- @@])ȭAθ`ܘ[>:n7ph.\IybmJwd,`XS?-Pm-vR I0`&^a++M^VC_Q+jazVzu sWP VٮS'd(uAj5Gwh5O*f5'Ȁ9R;!]qDLqguI<}N$fRe .L7X,0|8H ZS'Ӵ |&4h>؅kR.g9R_9ҁif_0y[FׯaxT,իC׮q#<0=̙ ܿ?wkAm8bC4]bEJi{+xN/7h|+ZNڵn5W[wߨl!.cֹگw*(S߬o16#H)0SgAff` *M9<$@L 4lA lи1v"Bk1ą>Eo[h|;' @S^qO9$0mНwV%7Ly)_ c(^.'Zr6rM3]NэXm&++~7,_B~ hҽ@Wm?ֿBb/e~~޽\=/nϘ1#!!y䑦Mܹ7iӦ,^~vR^{mJqZ}RdDBXZ-#@U8U``;. V$@i+;@$au(Th;@jV-[vlï2eJ ~5jc1{-[wٳg3f)Sj׮iӦ"##gΜiZ,Xl2ҥѣqNcvɓ3gTJYYf]O?:uL<>no=99_LJJBngM>jժ۷o **j֬YByZ*w\3gΌzuqqq3fpݾ=\ppի+Qƴiwٶm 0`iii/b||l~'kժm۶w}#""L:;VJ+3^<íZ:}ٳNb6mZJt O7@S,_ ? 7oVenYp}#Fw085Zd,N x{^/q7nc!g!# !O!}B)G(BH9BAB RP'r>!# !O!}B)G(BH9BAB RP'r>!# !O!}B)G(BH9BAB RP'r>!# !O!}B)G(BH9BAB RP'r>!# !O!}B)G(BH9BAB RP'TY+N!7/@e 8΁1@%ZO!7r 36lDP$g@>B"0_c`IIOѣcG^J/(bZʘ(ι7Ws. coĩƙa JkBK"N.3VFu)ů_L!` 8W-&M:r$nȘ0WE^)+d^̸999Nn8HJi @g\NDkijIu]ӴYR /y) q7( 95 )[1b,O!ȔRejt\ R 9'㵖R{)--D)5*缬TkpŒR !\.HO ݈#gjb.z۝!r|R\h'rB& Ex/kS9 ugg zмn-ƥuB741!y|FsmKP?@@ߠ:4BC@)‘G7 WF9 ZkzYk'f3*Zj #4R^d P7ʭF *#Y;cFVUAW D<{l J@۝YM6ɍc\3g΄_3)Ă>!GOe$:7 WJRPIB RP'r>!# !J 51^bC)ʹ Dx Bd4u#* ! _ͪR McR*9*L~Q8RTTPpI:ƌ۔V᫕Ԕe0tyȯzH 22:SV%u`Bn[jTB!3K( QI2() dݠB`PI M0%%2.8C%0@)`,y. ؋B[VR)fS. GʉéI5^qr݈x FO,[C*"*"[p?ܘn"*Q?r{6nIFDCGĿ޸밷Y7^%SGdUR!.Gf_#[z%3U/*Fe`iEΠqJצ=twF"+o }E:>{.'7n/v! _qDD ,t;Wo3Ww /r_^4Q.Q I%m_Zm cQMQmU[vDgPk:fr,,56vW ~[/UKFaJ"ԮUP/"*0`7o8@l>6ؽa5X2&qEUv~ѽcKJMUIwB*[^ܬm5BcY~].Lؾ䇃Y2A!m+6n_-byl8Ю? g5H;l]:0?ߎSձ>&O8v~x#+ZD5`S`W/ٞZ]VQ=phmgkaOn#"c.l}"^wi߯a+*+X@oK18k],DkX @߳iܺO?*U7Eש@Ÿ>}ɦ`0pϺ۶zt5 =x1{|=]N}}G~{V 0@L߉\k vwܚ>ؙ߼F]]'IGĔZczv\:5sXP\}ZGLzuBWc.>[`WCx2]=zɌ{ԯ/鬬{ީ+Lsktj_ݰ}Q~ u ]' @p m6oS2bV%ႝ9/-jͨ௯W V[vogѥKnV@߀/,Vsm MlKEܿhA zmy›J?ۦ "b %O-j6ETJ;6ę>eEqsgqSwdcG,ݝ߯G*P_ᥗB._61k>o'Gt=_ߏgϷlLG΍g>̈q!:سdQDDĝyyY/{ =ghVsik`۳h_o·6t~ΌP}}“Ԇw[-:y_LMWny_Du戜eonDLZwo~,xݪ&7ݵ8X8c̕Hc?ԻV-#xГ?9lHEwgLQ7>_PK|}-6!B*|X;^n6u ""nݗR>slى >ԓ~r$3(4)=հ˃/ϼì+8SARW5Lj9+;1~q G*p H6m?Z!bY&1`ޖiT-֭OZ/` '){֞Pf8Suޝ*D?47"6t9.ki2Qq`JJ)%WqnMƐwu{SV֮S_3[gܞoq5s1|vE`Ľ)7 4PN z?D eL}w5Ui0dڏ516;.,6<٘SFtDc'lۣMLgw|>\7Z[^y^Sa|)l͈Mpɿ~Pu⌗h8fPŇ4^B-Bff4 n殃|}'DĪ=v{@`pkFUsFod107m]CFeۺ:^;wVv_DݻcJJ`L*zzбG?kqxm#Gk})>M;jٙWh̬_*<Ro3!OνkޮvO6r퉯}5"׈Mnw$7e>мŶQ] "FMxj%\<6qt ܤ{ǎJm6$v!&rfecS3,VIF4351awm,:-)k"*ШFn;j36mI&rff;1ӧTR%KOMpإ}70fEGݯ GLިv 1t<}&ݥGWGkWgc[bgٍHuTY@rr Iv,}Հ9?l:nsDDGu1̷ZPcf)3u#JqO/n}M{&B܈@)DƬf;38p+ў(cTFg%X#$*0!pO.}krdlyjD^'McdeKOM|LٙiYTr|g~bѩoBwڽZ׆^ϼwݸٔ3Kw!<%vϺ͛7ow͛7o޾7NpߣS+Ѐgxtħ\eO'԰yӆ?}۶o_9}^Iun{tġm'zEfq˘Gydx렽*5d9 ۶m;呕\V ^ @"M">|`۶n۾n1lص2:1ƀIɽ[vO4z <Ƹg?[=`ʔoG}sl[CM}޶}! l?$O;s6ĉ[Nau L=W<Ȁ&^7ڪ:۶mIxvIDF¡U7owqoyRp6o$o47ޞ[O2{6M5ӳڶc?cV9ywHG&N[t+IzbŎ#yQ=wBaްmW[锗&wd' :8qfrXnNLbz.:-[.<?1)RA۰b۶-~/9`T&JcSol&N?nx~N,Xc?#~3۶m~V oP''m߱}c55~==4qwU Lj[UM3~„q[8inһW܏ӧ-ܼu}lpKިL3*N:kZ=kjt׎g;ܶ\heBnF>YMgkaHETk>dl׿+K+Yngp/7b9oJ*0Ћc;].Ns#b[?:DĜzw|1)|u{±3kۅeV vnХmWGTdբcوc݌ϑLDBj%1[@{ `gzrEn_o_x{W{lo[Uk4ѵOomҰn QFD;@K^h5{/͎2XžN)>udjlu}sdSJ];ۛW߈CǹlN~thO :~8[=n^1>т^5EĝC:\qװA~r3nëz 4OUC2u1"zn@_߾&uٵ3@+ Y?lAĔnd-f FwflI;)D9WC/JE.-td&f}MV3whR&onaT~Cluǻ]&;,dYɉٺ2yXr{gȬ${Xx=#fz~+ - <8½LQĔ̂ fkYnSxdgW︛>!iN>י*=!ށ2#1>GIMpIa ʻ9T] kp ʕ旕/B!39yTFj*X4`2bR+$ƙ ٩vov%842pgŧdall\`JfP\9b27~0u=i?Bgf| [ޒЀ#pǧ@Hx{cv0pj@n1/{{x PRqP*v2@!|dRbQI4S|q.<5(%'?Td{iޚoK(_ Wp;^2LO`ڽZ9-@IɅ@ -*Θq /梻!#T'r>!# !O!}B)G(BH9BAB RP'r>!# !Ii1?+qV6GG%qpQIBPxpB*gLz`'OW)$Or))'ȝۓsι1ͅRʘs *1+x;g T\@ )e7P*7 L%.xwhJ!33g,yGM筤d\P)Lpv~S NO]HJZeV%k|8.4ȅ!Yi1]"rS.q!KBuq.4-w6o1M!J KNʟ=1y)SN)%\S˿&rP:~-?ޝ[47`gG /A꧇c|WUnOwoaSN>]w9tP `5q@྾fJՇ|!H%]9l{ϷsW@.% Μo]WpmI~ Vo g<kÃ67O}ywko_ns>uyAj7 ḋ.I)qۭW=s:6.Tܙ3g>OСEsYy(D^qk/ݞ#[6ngOoަ^PcF&LAa ߼{sq$cnYl jְ*n?}6'gٛc+Uh  ]2+aa;ȚUfjN=]PU}```:brJӞtKdQtZzvvX qo: UizRQ'cfpIciw3Ƽfl_KUCw[rYS1^_[~.w;+[bO2L:tm50 7iꁁc^]@J߾`aUݟqO{ VGT̎,21AY-&e&wK ѣd#"~#Wt1Ƙ)E!i"y)Ѿ^^ۃ@GšN]mU 6-[jkEQɳuV۴`ώӷqJ~>[RMR̙3KHbc$p.tb;wēU*m8k Hܛ=`Pm'v teugݾu{O_X3v4Ƚ}tWt/>j68k/6m!U{\ʠ;UqkGyK_OXQq!@)-N,{;8+H-{MpOnrGNv@ռ{;q>|X6ښt֥ƺ˴WǏb4,+@5K"DOx7s]!Sr}`L/Ut7MrPzu"4dB㌹:3sÞ%l:(Tq  ԍ?+w #"ȌSRRJQ0JJilL+KBJ]*&G8Cݭ#&MT  ΋nE%}RF䤓!"R3!׌>!#I4r}PIB*BH9BABʑ4 %tEXtdate:create2017-05-01T13:30:21+10:00Wd%tEXtdate:modify2017-05-01T13:27:27+10:00ވ[IENDB`epiR/vignettes/index.html0000644000176200001440000000665514100646226015173 0ustar liggesusers R: Vignettes and other documentation

Vignettes and other documentation


[Top]

Vignettes from package 'epiR'

epiR::epiR_RSurveillance epiR-RSurveillance function mapping HTML source R code
epiR::epiR_descriptive Descriptive epidemiology HTML source R code
epiR::epiR_measures_of_association Measures of association HTML source R code
epiR::epiR_sample_size Sample size calculations HTML source R code
epiR::epiR_surveillance Disease surveillance HTML source R code
epiR/vignettes/epiR_descriptive.bib0000644000176200001440000000164614110616220017137 0ustar liggesusers@book{cdc:2006, author = {Centers for Disease Control and Prevention}, title = {{Principles of Epidemiology in Public Health Practice: An Introduction to Applied Epidemiology and Biostatistics}}, publisher = {Centers for Disease Control and Prevention}, address = {Atlanta, Georgia}, note = {cdc:2006}, year = {2006} } @article{diggle:1990, author = {Diggle, PJ}, title = {A point process modeling approach to raised incidence of a rare phenomenon in the vicinity of a prespecified point}, journal = {Journal of the Royal Statistical Society Series A}, volume = {153}, pages = {349 - 362}, note = {diggle:1990}, year = {1990} } @article{feychting_et_al:1998, author = {Feychting, M and Osterlund, B and Ahlbom, A}, title = {Reduced cancer incidence among the blind}, journal = {Epidemiology}, volume = {9}, pages = {490 - 494}, year = {1998} } epiR/vignettes/epiR_surveillance.bib0000644000176200001440000000132214120350760017306 0ustar liggesusers@article{cameron_baldock:1998a, author = {Cameron, AR and Baldock, FC}, title = {A new probability formula for surveys to substantiate freedom from disease}, journal = {Preventive Veterinary Medicine}, volume = {34}, pages = {1 - 17}, year = {1998a} } @article{thacker_berkelman:1988, author = {Thacker, SB and Berkelman, RL}, title = {{Public health surveillance in the United States}}, journal = {Epidemiological Reviews}, volume = {10}, pages = {164 - 190}, year = {1988} } @inbook{oie:2021, author = {OIE}, title = {{Terrestrial Animal Health Code}}, publisher = {World Organisation for Animal Health}, address = {Paris}, year = {2021} } epiR/vignettes/epiR_measures_of_association.Rmd0000644000176200001440000005027514135133050021514 0ustar liggesusers--- title: "Measures of Association" author: "Mark Stevenson" date: "`r Sys.Date()`" bibliography: epiR_measures_of_association.bib link-citations: yes output: knitr:::html_vignette: toc: true vignette: > %\VignetteEngine{knitr::rmarkdown} %\VignetteIndexEntry{Measures of association} %\usepackage[utf8]{inputenc} --- \setmainfont{Calibri Light} ```{r, echo = FALSE, message = FALSE} library(knitr); library(kableExtra) knitr::opts_chunk$set(collapse = TRUE, comment = "#>") options(tibble.print_min = 4L, tibble.print_max = 4L) ``` A common task in epidemiology is to quantify the strength of association between exposures ('risk factors') and disease outcomes. In this context the term 'exposure' is taken to mean a variable whose association with the outcome is to be estimated. Exposures can be harmful, beneficial or both harmful and beneficial (e.g. if an immunisable disease is circulating, exposure to immunising agents helps most recipients but may harm those who experience adverse reactions). The term 'outcome' is used to describe all the possible results that may arise from exposure to a causal factor or from preventive or therapeutic interventions [@porta:2014]. In human and animal health an 'outcome-positive' individual is an individual who has experienced a given disease of interest. In this vignette we outline describe how `epiR` can be used to compute the various measures of association used in epidemiology notably the risk ratio, odds ratio, attributable risk in the exposed, attributable fraction in the exposed, attributable risk in the population and attributable fraction in the population. Examples are provided to demonstrate how the package can be used to deal with exposure-outcome data presented in various formats. This vignette has been written assuming the reader routinely formats their 2 $\times$ 2 table data with the outcome status as columns and exposure status as rows. If this is not the case the argument `outcome = "as.columns"` (the default) can be changed to `outcome = "as.rows"`. **Measures of association strength** ***The incidence risk ratio*** Consider a study where subjects are disease free at the start of the study and all are monitored for disease occurrence for a specified time period. At the start of the study period study subjects are classified according to exposure to a hypothesised risk factor. If both exposure and outcome are binary variables (yes or no) we can present the counts of subjects in each of the four exposure-disease categories in a 2 $\times$ 2 table. ```{r echo = FALSE, results = 'asis'} twobytwo <- data.frame("Dis pos" = c("a","c","a+c"), "Dis neg" = c("b","c","b+c"), "Total" = c("a+b","c+d","a+b+c+d")) colnames(twobytwo) <- c("Dis pos","Dis pos","Total") rownames(twobytwo) <- c("Exp pos","Exp neg","Total") kbl(twobytwo, caption = "A 2 by 2 table.") %>% column_spec(1, bold = FALSE, width = "5em") %>% column_spec(2, bold = FALSE, width = "5em") %>% column_spec(3, bold = FALSE, width = "5em") # row_spec(row = 1, bold = TRUE) ``` When our data are in this format we can calculate the incidence risk of the outcome in those that were exposed $R_E+$, the incidence risk in those that were not exposed $R_{E-}$ and finally the incidence risk in the total study population $R_{T}$: ```{r echo = FALSE, results = 'asis'} irr <- data.frame("Dis pos" = c("a","c","a+c"), "Dis neg" = c("b","c","b+c"), "Total" = c("a+b","c+d","a+b+c+d"),"Risk" = c("RE+ = a/(a+b)","RE- = c/(c+d)", "RT = (a+c)/(a+b+c+d)")) colnames(irr) <- c("Dis pos","Dis pos","Total","Risk") rownames(irr) <- c("Exp pos","Exp neg","Total") kbl(irr, caption = "A 2 by 2 table with incidence risks calculated for the exposed, the unexposed and the total study population.") %>% column_spec(1, bold = FALSE, width = "5em") %>% column_spec(2, bold = FALSE, width = "5em") %>% column_spec(3, bold = FALSE, width = "5em") %>% column_spec(4, bold = FALSE, width = "10em") # row_spec(row = 1, bold = TRUE) ``` The incidence risk ratio is then the incidence risk of the outcome in the exposed divided by the incidence risk of the outcome in the unexposed (Figure 1). ![The incidence risk ratio.](risk_ratio.png) The incidence risk ratio provides an estimate of how many times more likely exposed individuals are to experience the outcome of interest, compared with non-exposed individuals. If the incidence risk ratio equals 1, then the risk of the outcome in both the exposed and non-exposed groups are equal. If the incidence risk ratio is greater than 1, then exposure increases the outcome risk with greater departures from 1 indicative of a stronger effect. If the incidence risk ratio is less than 1, exposure reduces the outcome risk and exposure is said to be protective. ***The odds ratio --- cohort studies*** In a cohort study definition of exposure status (exposure-positive, exposure-negative) comes first. Subjects are then followed over time to determine their outcome status (outcome-positive, outcome-negative). The odds of the outcome in the exposed and unexposed populations are calculated as follows: ```{r echo = FALSE, results = 'asis'} or.cohort <- data.frame("Dis pos" = c("a","c","a+c"), "Dis neg" = c("b","d","b+d"), "Total" = c("a+b","c+d","a+b+c+d"),"Odds" = c("OE+ = a/b","OE- = c/d", "OT = (a+c)/(b+d)")) colnames(or.cohort) <- c("Dis pos","Dis pos","Total","Odds") rownames(or.cohort) <- c("Exp pos","Exp neg","Total") kbl(or.cohort, caption = "A 2 by 2 table with the odds of disease calculated for the exposed, the unexposed and the total study population.") %>% column_spec(1, bold = FALSE, width = "5em") %>% column_spec(2, bold = FALSE, width = "5em") %>% column_spec(3, bold = FALSE, width = "5em") %>% column_spec(4, bold = FALSE, width = "10em") # row_spec(row = 1, bold = TRUE) ``` The odds ratio for a cohort study is then the odds of the outcome in the exposed divided by the odds of the outcome in the unexposed. ***The odds ratio --- case-control studies*** In a case-control study outcome status (disease-positive, disease-negative) is defined first. The history provided by each study subject then provides information about exposure status. For case-control studies, instead of talking about the odds of *disease* in the exposed and unexposed groups (as we did when we were working with data from a cohort study) we talk about the odds of *exposure* in the case and control groups. ```{r echo = FALSE, results = 'asis'} or.cc <- data.frame("Case" = c("a","c","a+c","OD+ = a/c"), "Control" = c("b","d","b+d","OD- = b/d"), "Total" = c("a+b","c+d","a+b+c+d","OT = (a+b)/(c+d)")) colnames(or.cc) <- c("Case","Control","Total") rownames(or.cc) <- c("Exp pos","Exp neg","Total","Odds") kbl(or.cc, caption = "A 2 by 2 table with the odds of exposure calculated for cases, controls and the total study population.") %>% column_spec(1, bold = FALSE, width = "5em") %>% column_spec(2, bold = FALSE, width = "5em") %>% column_spec(3, bold = FALSE, width = "5em") %>% column_spec(4, bold = FALSE, width = "10em") # row_spec(row = 1, bold = TRUE) ``` The odds ratio is defined as the odds of exposure in the cases ($O_{D+}$) divided by the odds of exposure in the controls ($O_{D-}$). Note that the numeric estimate of the odds ratio is exactly the same as that calculated for a cohort study. The expression of the result is the only thing that differs. In a cohort study we talk about the odds of disease being $x$ times greater (or less) in the exposed, compared with the unexposed. In a case-control study we talk about the odds of exposure being $x$ times greater (or less) in cases, compared with controls. **Measures of effect in the exposed** ***The attributable risk in the exposed*** The attributable risk is defined as the increase or decrease in the risk of the outcome in the exposed that is attributable to exposure (Figure 2). Attributable risk (unlike the incidence risk ratio) provides a measure of the absolute frequency of the outcome associated with exposure. ![The attributable risk in the exposed.](attributable_risk.png) A useful way of expressing attributable risk in a clinical setting is in terms of the number needed to treat, NNT. NNT equals the inverse of the attributable risk. Depending on the outcome of interest we often elect to use different labels for NNT. When dealing with an outcome that is 'desirable' (e.g. treatment success) we call NNT the number needed to treat for benefit, NNTB. NNTB equals the number of subjects who would have to be exposed to result in a single (desirable) outcome. When dealing with an outcome that is 'undesirable' (e.g. death) we call NNT the number needed to treat for harm, NNTH. NNTH equals the number of subjects who would have to be exposed to result in a single (undesirable) outcome. ***The attributable fraction in the exposed*** The attributable fraction in the exposed is the proportion of outcome-positive subjects in the exposed group that is due to exposure (Figure 3). ![The attributable fraction in the exposed.](attributable_fraction.png) **Measures of effect in the population** ***The attributable risk in the population*** The population attributable risk is the increase or decrease in incidence risk of the outcome in the study population that is attributable to exposure (Figure 4). ![The attributable risk in the population](population_attributable_risk.png) ***The attributable fraction in the population*** The population attributable fraction (also known as the aetiologic fraction) is the proportion of outcome-positive subjects in the study population that is due to the exposure (Figure 5). ![The attributable fraction in the population](population_attributable_fraction.png) On the condition that the exposure of interest is a cause of the disease outcome, the population attributable fraction represents the proportional reduction in average disease risk over a specified period of time that would be achieved by eliminating the exposure of interest while the distribution of other risk factors in the population remained unchanged. For this reason, PAFs are particularly useful to guide policy makers when planning public health interventions. If you're going to use PAFs as a means for informing policy, make sure that: (1) the exposure of interest is causally related to the outcome, and (2) the exposure of interest is something amenable to intervention. **Theory to practice: Calculating measures of association using R** ***Direct entry of 2 by 2 table contingency table cell frequencies*** Firstly, a 2 $\times$ 2 table can be created by listing the contingency table cell frequencies in vector format. Take the following example. A cross sectional study investigating the relationship between dry cat food (DCF) and feline lower urinary tract disease (FLUTD) was conducted [@willeberg:1977]. Counts of individuals in each group were as follows. DCF-exposed cats (cases, non-cases) 13, 2163. Non DCF-exposed cats (cases, non-cases) 5, 3349. We can enter these data directly into R as a vector of length four. Check that your counts have been entered in the correct order by viewing the data as a matrix. ```{r} dat.v01 <- c(13,2163,5,3349); dat.v01 # View the data in the usual 2 by 2 table format: matrix(dat.v01, nrow = 2, byrow = TRUE) ``` Calculate the prevalence ratio, odds ratio, attributable prevalence in the exposed, attributable fraction in the exposed, attributable prevalence in the population and the attributable fraction in the population using function `epi.2by2`. Note that we use the term prevalence ratio (instead of incidence risk ratio) here because we're dealing with data from a cross-sectional study --- the frequency of disease is expressed as a prevalence, not an incidence. ```{r} library(epiR) epi.2by2(dat = dat.v01, method = "cross.sectional", conf.level = 0.95, units = 100, interpret = FALSE, outcome = "as.columns") ``` The prevalence of FLUTD in DCF exposed cats was 4.01 (95% CI 1.43 to 11.23) times greater than the prevalence of FLUTD in non-DCF exposed cats. In DCF exposed cats, 75% of FLUTD was attributable to DCF (95% CI 30% to 91%). Fifty-four percent of FLUTD cases in this cat population were attributable to DCF (95% CI 4% to 78%). Need a hand getting the correct wording to explain each of the listed measures of association and measures of effect? Set `interpret = TRUE` in `epi.2by2`: ```{r} epi.2by2(dat = dat.v01, method = "cross.sectional", conf.level = 0.95, units = 100, interpret = TRUE, outcome = "as.columns") ``` ***Data frame with one row per observation*** Here we provide examples where you have exposure status and outcome status listed for each member of your study population. There are two options for contingency table preparation in this situation: (1) using base R's table function; or (2) using the `tidyverse` package. For this example we use the low infant birth weight data presented by @hosmer_lemeshow:2000 and available in the `MASS` package in R. The `birthwt` data frame has 189 rows and 10 columns. The data were collected at Baystate Medical Center, Springfield, Massachusetts USA during 1986. **Two by two table preparation using the `table` function in base R** ```{r} library(MASS) # Load and view the data: dat.df02 <- birthwt; head(dat.df02) ``` Each row of this data set represents data for one mother. We're interested in the association between `smoke` (the mother's smoking status during pregnancy) and `low` (delivery of a baby less than 2.5 kg bodyweight). Its important that the table you present to `epi.2by2` is in the correct format: Outcome positives in the first column, outcome negatives in the second column, exposure positives in the first row and exposure negatives in the second row. If we run the `table` function on the `bwt` data the output table is in the wrong format: ```{r} dat.tab02 <- table(dat.df02$smoke, dat.df02$low, dnn = c("Smoke", "Low BW")); dat.tab02 ``` There are two ways to fix this problem. The quick fix is to simply ask R to switch the order of the rows and columns in the output table: ```{r} dat.tab02 <- table(dat.df02$smoke, dat.df02$low, dnn = c("Smoke", "Low BW")); dat.tab02 dat.tab02 <- dat.tab02[2:1,2:1]; dat.tab02 ``` The second approach is to set the exposure variable and the outcome variable as a factor and to define the levels of each factor using `levels = c(1,0)`: ```{r} dat.df02$low <- factor(dat.df02$low, levels = c(1,0)) dat.df02$smoke <- factor(dat.df02$smoke, levels = c(1,0)) dat.df02$race <- factor(dat.df02$race, levels = c(1,2,3)) dat.tab02 <- table(dat.df02$smoke, dat.df02$low, dnn = c("Smoke", "Low BW")); dat.tab02 ``` Now compute the odds ratio for smoking and delivery of a low birth weight baby: ```{r} dat.epi02 <- epi.2by2(dat = dat.tab02, method = "cohort.count", conf.level = 0.95, units = 100, interpret = FALSE, outcome = "as.columns") dat.epi02 ``` The odds of having a low birth weight child for smokers is 2.02 (95% CI 1.08 to 3.78) times greater than the odds of having a low birth weight child for non-smokers. **Two by two table preparation using `tidyverse`** The `tidyverse` package can also be used to prepare data in the required format: ```{r} library(tidyverse) dat.df03 <- birthwt; head(dat.df03) # Here we set the factor levels and tabulate the data in a single call using pipe operators: dat.tab03 <- dat.df03 %>% mutate(low = factor(low, levels = c(1,0), labels = c("yes","no"))) %>% mutate(smoke = factor(smoke, levels = c(1,0), labels = c("yes","no"))) %>% group_by(smoke, low) %>% summarise(n = n()) # View the data: dat.tab03 ## View the data in conventional 2 by 2 table format: pivot_wider(dat.tab03, id_cols = c(smoke), names_from = low, values_from = n) ``` As before, compute the odds ratio for smoking and delivery of a low birth weight baby: ```{r} dat.epi03 <- epi.2by2(dat = dat.tab03, method = "cohort.count", conf.level = 0.95, units = 100, interpret = FALSE, outcome = "as.columns") dat.epi03 ``` The odds of having a low birth weight child for smokers is 2.02 (95% CI 1.08 to 3.78) times greater than the odds of having a low birth weight child for non-smokers. **Confounding** We're concerned that the mother's race may confound the association between low birth weight and delivery of a low birth weight baby so we'll stratify the data by race and compute the Mantel-Haenszel adjusted odds ratio. As before, our tables can be prepared using either base R or `tidyverse`. ***Stratified two by two table preparation using the table function in base R*** ```{r} dat.df04 <- birthwt; head(dat.df04) dat.tab04 <- table(dat.df04$smoke, dat.df04$low, dat.df04$race, dnn = c("Smoke", "Low BW", "Race")); dat.tab04 ``` Compute the Mantel-Haenszel adjusted odds ratio for smoking and delivery of a low birth weight baby, adjusting for the effect of race. Function `epi.2by2` automatically calculates the Mantel-Haenszel odds ratio and risk ratio when it is presented with stratified contingency tables. ```{r} dat.epi04 <- epi.2by2(dat = dat.tab04, method = "cohort.count", conf.level = 0.95, units = 100, interpret = FALSE, outcome = "as.columns") dat.epi04 ``` The Mantel-Haenszel test of homogeneity of the strata odds ratios is not significant (chi square test statistic 2.800; df 2; p-value = 0.25). We accept the null hypothesis and conclude that the odds ratios for each strata of race are the same. Because the stratum specific odds ratios are the same it is appropriate to compute a Mantel-Haenszel adjusted odds ratio. After accounting for the confounding effect of race, the odds of having a low birth weight child for smokers was 3.09 (95% CI 1.49 to 6.39) times that of non-smokers. ***Stratified two by two table preparation using tidyverse*** ```{r} dat.df05 <- birthwt; head(dat.df05) dat.tab05 <- dat.df05 %>% mutate(low = factor(low, levels = c(1,0), labels = c("yes","no"))) %>% mutate(smoke = factor(smoke, levels = c(1,0), labels = c("yes","no"))) %>% mutate(race = factor(race)) %>% group_by(race, smoke, low) %>% summarise(n = n()) dat.tab05 ## View the data in conventional 2 by 2 table format: pivot_wider(dat.tab05, id_cols = c(race, smoke), names_from = low, values_from = n) ``` Compute the Mantel-Haenszel adjusted odds ratio for smoking and delivery of a low birth weight baby, adjusting for the effect of race: ```{r} dat.epi05 <- epi.2by2(dat = dat.tab05, method = "cohort.count", conf.level = 0.95, units = 100, interpret = FALSE, outcome = "as.columns") dat.epi05 ``` The Mantel-Haenszel test of homogeneity of the strata odds ratios is not significant (chi square test statistic 2.800; df 2; p-value = 0.25) so we accept the null hypothesis and conclude that the odds ratios for each strata of race are the same. After accounting for the confounding effect of race, the odds of having a low birth weight child for smokers is 3.09 (95% CI 1.49 to 6.39) times that of non-smokers. Plot the individual strata odds ratios and the Mantel-Haenszel summary odds ratio as an error bar plot to better understand how the Mantel-Haenszel adjusted odds ratio relates to the individual strata odds ratios: ```{r} library(ggplot2); library(scales) nstrata <- 1:length(unique(dat.tab05$race)) strata.lab <- paste("Strata ", nstrata, sep = "") y.at <- c(nstrata, max(nstrata) + 1) y.lab <- c("M-H", strata.lab) x.at <- c(0.25,0.5,1,2,4,8,16,32) or.p <- c(dat.epi05$massoc.detail$OR.mh$est, dat.epi05$massoc.detail$OR.strata.cfield$est) or.l <- c(dat.epi05$massoc.detail$OR.mh$lower, dat.epi05$massoc.detail$OR.strata.cfield$lower) or.u <- c(dat.epi05$massoc.detail$OR.mh$upper, dat.epi05$massoc.detail$OR.strata.cfield$upper) gdat.df05 <- data.frame(y.at, y.lab, or.p, or.l, or.u) ggplot(data = gdat.df05, aes(x = or.p, y = y.at)) + geom_point() + geom_errorbarh(aes(xmax = or.l, xmin = or.u, height = 0.2)) + labs(x = "Odds ratio", y = "Strata") + scale_x_continuous(trans = log2_trans(), breaks = x.at, limits = c(0.25,32)) + scale_y_continuous(breaks = y.at, labels = y.lab) + geom_vline(xintercept = 1, lwd = 1) + coord_fixed(ratio = 0.75 / 1) + theme(axis.title.y = element_text(vjust = 0)) ``` ## References --- nocite: '@*' ---epiR/vignettes/epiR_sample_size.Rmd0000644000176200001440000002265514135130042017122 0ustar liggesusers--- title: "Sample Size Calculations Using epiR" author: "Mark Stevenson" date: "`r Sys.Date()`" bibliography: epiR_sample_size.bib link-citations: yes output: knitr:::html_vignette: toc: true vignette: > %\VignetteEngine{knitr::rmarkdown} %\VignetteIndexEntry{Sample size calculations} %\usepackage[utf8]{inputenc} --- \setmainfont{Calibri Light} ```{r, echo = FALSE, message = FALSE} # If you want to create a PDF document paste the following after line 9 above: # pdf_document: # toc: true # highlight: tango # number_sections: no # latex_engine: xelatex # header-includes: # - \usepackage{fontspec} knitr::opts_chunk$set(collapse = TRUE, comment = "#>") options(tibble.print_min = 4L, tibble.print_max = 4L) ``` ### Prevalence estimation A review of sample size calculations in (veterinary) epidemiological research is provided by @stevenson:2021. The expected seroprevalence of brucellosis in a population of cattle is thought to be in the order of 15%. How many cattle need to be sampled and tested to be 95% certain that our seroprevalence estimate is within 20% of the true population value. That is, from 15 - (0.20 $\times$ 0.15) to 15 + (0.20 $\times$ 0.15 = 0.03) i.e. from 12% to 18%. Assume the test you will use has perfect sensitivity and specificity. This formula requires the population size to be specified so we set N to a large number, 1,000,000: ```{r message = FALSE} library(epiR) epi.sssimpleestb(N = 1E+06, Py = 0.15, epsilon = 0.20, error = "relative", se = 1, sp = 1, nfractional = FALSE, conf.level = 0.95) ``` A total of 545 cows are required to meet the requirements of the study. ### Prospective cohort study A prospective cohort study of dry food diets and feline lower urinary tract disease (FLUTD) in mature male cats is planned. A sample of cats will be selected at random from the population of cats in a given area and owners who agree to participate in the study will be asked to complete a questionnaire at the time of enrollment. Cats enrolled into the study will be followed for at least 5 years to identify incident cases of FLUTD. The investigators would like to be 0.80 certain of being able to detect when the risk ratio of FLUTD is 1.4 for cats habitually fed a dry food diet, using a 0.05 significance test. Previous evidence suggests that the incidence risk of FLUTD in cats not on a dry food (i.e. 'other') diet is around 50 per 1000. Assuming equal numbers of cats on dry food and other diets are sampled, how many cats should be sampled to meet the requirements of the study? ```{r message = FALSE} epi.sscohortt(irexp1 = 50/1000, irexp0 = 70/1000, FT = 5, n = NA, power = 0.80, r = 1, design = 1, sided.test = 2, nfractional = FALSE, conf.level = 0.95)$n.total ``` A total of 2080 subjects are required (1040 exposed and 1040 unexposed). ### Case-control study A case-control study of the relationship between white pigmentation around the eyes and ocular squamous cell carcinoma in Hereford cattle is planned. A sample of cattle with newly diagnosed squamous cell carcinoma will be compared for white pigmentation around the eyes with a sample of controls. Assuming an equal number of cases and controls, how many study subjects are required to detect an odds ratio of 2.0 with 0.80 power using a two-sided 0.05 test? Previous surveys have shown that around 0.30 of Hereford cattle without squamous cell carcinoma have white pigmentation around the eyes. ```{r message = FALSE} epi.sscc(OR = 2.0, p1 = NA, p0 = 0.30, n = NA, power = 0.80, r = 1, phi.coef = 0, design = 1, sided.test = 2, conf.level = 0.95, method = "unmatched", nfractional = FALSE, fleiss = FALSE)$n.total ``` If the true odds for squamous cell carcinoma in exposed subjects relative to unexposed subjects is 2.0, we will need to enroll 141 cases and 141 controls (282 cattle in total) to reject the null hypothesis that the odds ratio equals one with probability (power) 0.80. The Type I error probability associated with this test of this null hypothesis is 0.05. ### Non-inferiority trial Suppose a pharmaceutical company would like to conduct a clinical trial to compare the efficacy of two antimicrobial agents when administered orally to patients with skin infections. Assume the true mean cure rate of the treatment is 0.85 and the true mean cure rate of the control is 0.65. We consider a difference of less than 0.10 in cure rate to be of no clinical importance (i.e. delta = 0.10). Assuming a one-sided test size of 5% and a power of 80% how many subjects should be included in the trial? ```{r message = FALSE} epi.ssninfb(treat = 0.85, control = 0.65, delta = 0.10, n = NA, r = 1, power = 0.80, nfractional = FALSE, alpha = 0.05)$n.total ``` A total of 50 subjects need to be enrolled in the trial, 25 in the treatment group and 25 in the control group. ### One-stage cluster sampling An aid project has distributed cook stoves in a single province in a resource-poor country. At the end of three years, the donors would like to know what proportion of households are still using their donated stove. A cross-sectional study is planned where villages in a province will be sampled and all households (approximately 75 per village) will be visited to determine if the donated stove is still in use. A pilot study of the prevalence of stove usage in five villages showed that 0.46 of householders were still using their stove and the intracluster correlation coefficient (ICC) for stove use within villages is in the order of 0.20. If the donor wanted to be 95% confident that the survey estimate of stove usage was within 10% of the true population value, how many villages (clusters) need to be sampled? ```{r message = FALSE} epi.ssclus1estb(b = 75, Py = 0.46, epsilon = 0.10, error = "relative", rho = 0.20, conf.level = 0.95)$n.psu ``` A total of 96 villages need to be sampled to meet the requirements of the study. ### One-stage cluster sampling (continued) Continuing the example above, we are now told that the number of households per village varies. The average number of households per village is 75 with a 0.025 quartile of 40 households and a 0.975 quartile of 180. Assuming the number of households per village follows a normal distribution the expected standard deviation of the number of households per village is in the order of (180 - 40) $\div$ 4 = 35. How many villages need to be sampled? ```{r message = FALSE} epi.ssclus1estb(b = c(75,35), Py = 0.46, epsilon = 0.10, error = "relative", rho = 0.20, conf.level = 0.95)$n.psu ``` A total of 115 villages need to be sampled to meet the requirements of the study. ### Two-stage cluster sampling This example is adapted from @bennett_et_al:1991. We intend to conduct a cross-sectional study to determine the prevalence of disease X in a given country. The expected prevalence of disease is thought to be around 20%. Previous studies report an intracluster correlation coefficient for this disease to be 0.02. Suppose that we want to be 95% certain that our estimate of the prevalence of disease is within 5% of the true population value and that we intend to sample 20 individuals per cluster. How many clusters should be sampled to meet the requirements of the study? ```{r message = FALSE} # From first principles: n.crude <- epi.sssimpleestb(N = 1E+06, Py = 0.20, epsilon = 0.05 / 0.20, error = "relative", se = 1, sp = 1, nfractional = FALSE, conf.level = 0.95) n.crude # A total of 246 subjects need to be enrolled into the study. Calculate the design effect: rho <- 0.02; b <- 20 D <- rho * (b - 1) + 1; D # The design effect is 1.38. Our crude sample size estimate needs to be increased by a factor of 1.38. n.adj <- ceiling(n.crude * D) n.adj # After accounting for lack of independence in the data a total of 340 subjects need to be enrolled into the study. How many clusters are required? ceiling(n.adj / b) # Do all of the above using epi.ssclus2estb: epi.ssclus2estb(b = 20, Py = 0.20, epsilon = 0.05 / 0.20, error = "relative", rho = 0.02, nfractional = FALSE, conf.level = 0.95) ``` A total of 17 clusters need to be sampled to meet the specifications of this study. Function `epi.ssclus2estb` returns a warning message that the number of clusters is less than 25. ### Two-stage cluster sampling (continued) Continuing the brucellosis prevalence example (above) being seropositive to brucellosis is likely to cluster within herds. @otte_gumm:1997 cite the intracluster correlation coefficient for Brucella abortus in cattle to be in the order of 0.09. Adjust your sample size of 545 cows to account for lack of independence in the data, i.e. clustering at the herd level. Assume that b = 10 animals will be sampled per herd: ```{r message = FALSE} n.crude <- epi.sssimpleestb(N = 1E+06, Py = 0.15, epsilon = 0.20, error = "relative", se = 1, sp = 1, nfractional = FALSE, conf.level = 0.95) n.crude rho <- 0.09; b <- 10 D <- rho * (b - 1) + 1; D n.adj <- ceiling(n.crude * D) n.adj # Similar to the example above, we can do all of these calculations using epi.ssclus2estb: epi.ssclus2estb(b = 10, Py = 0.15, epsilon = 0.20, error = "relative", rho = 0.09, nfractional = FALSE, conf.level = 0.95) ``` After accounting for clustering at the herd level we estimate that a total of (545 $\times$ 1.81) = 986 cattle need to be sampled to meet the requirements of the survey. If 10 cows are sampled per herd this means that a total of (987 $\div$ 10) = 99 herds are required. ### ReferencesepiR/vignettes/epiR_RSurveillance.Rmd0000644000176200001440000002210514100646226017363 0ustar liggesusers--- title: "epiR - RSurveillance function mapping" author: "Evan Sergeant and Mark Stevenson" date: "`r Sys.Date()`" link-citations: yes output: knitr:::html_vignette: toc: true vignette: > %\VignetteEngine{knitr::rmarkdown} %\VignetteIndexEntry{epiR-RSurveillance function mapping} %\usepackage[utf8]{inputenc} --- \setmainfont{Calibri Light} ```{r, echo = FALSE, message = FALSE} # If you want to create a PDF document paste the following after line 9 above: # pdf_document: # toc: true # highlight: tango # number_sections: no # latex_engine: xelatex # header-includes: # - \usepackage{fontspec} knitr::opts_chunk$set(collapse = TRUE, comment = "#>") options(tibble.print_min = 4L, tibble.print_max = 4L) ``` The following tables lists each of the functions in `RSurveillance` and their equivalent in `epiR`. ## Representative sampling ### Sample size estimation ```{r ssrs.tab, echo=FALSE, message=FALSE, warnings=FALSE, results='asis'} library(pander) panderOptions('table.split.table', Inf) set.caption("Functions to estimate sample size using representative population sampling data.") ssrs.tab <- " Sampling | Outcome | RSurveillance | epiR Representative | Prob disease freedom | `n.pfree` | `rsu.sspfree.rs` Representative | SSe | `n.freedom` | `rsu.sssep.rs` Two stage representative | SSe | `n.2stage` | `rsu.sssep.rs2st` Representative | SSe | `n.freecalc` | `rsu.sssep.rsfreecalc` Pooled representative | SSe | `n.pooled` | `rsu.sssep.rspool`" ssrs.df <- read.delim(textConnection(ssrs.tab), header = FALSE, sep = "|", strip.white = TRUE, stringsAsFactors = FALSE) names(ssrs.df) <- unname(as.list(ssrs.df[1,])) # put headers on ssrs.df <- ssrs.df[-1,] # remove first row row.names(ssrs.df) <- NULL pander(ssrs.df, style = 'rmarkdown') ``` ### Estimation of surveillance system sensitivity ```{r seprs.tab, echo=FALSE, message=FALSE, warnings=FALSE, results='asis'} set.caption("Functions to estimate surveillance system sensitivity (SSe) using representative population sampling data.") seprs.tab <- " Sampling | Outcome | RSurveillance | epiR Representative | SSe | `sep.binom` | `rsu.sep.rs` Representative | SSe | `sep.hypergeo` | `rsu.sep.rs` Representative | SSe | `sep` | `rsu.sep.rs` Two stage representative | SSe | `sep.sys` | `rsu.sep.rs2st` Representative | SSe | `sse.combined` | `rsu.sep.rsmult` Representative | SSe | `sep.freecalc` | `rsu.sep.rsfreecalc` Representative | SSe | `sep.binom.imperfect`| `rsu.sep.rsfreecalc` Pooled representative | SSe | `sep.pooled` | `rsu.sep.rspool` Representative | SSe | `sep.var.se` | `rsu.sep.rsvarse` Representative | SSp | `spp` | `rsu.spp.rs` Representative | SSp | `sph.hp` | `rsu.spp.rs`" seprs.df <- read.delim(textConnection(seprs.tab), header = FALSE, sep = "|", strip.white = TRUE, stringsAsFactors = FALSE) names(seprs.df) <- unname(as.list(seprs.df[1,])) # put headers on seprs.df <- seprs.df[-1,] # remove first row row.names(seprs.df) <- NULL pander(seprs.df, style = 'rmarkdown') ``` ### Estimation of the probability of disease freedom ```{r pfreers.tab, echo=FALSE, message=FALSE, warnings=FALSE, results='asis'} set.caption("Functions to estimate the probability of disease freedom using representative population sampling data.") pfreers.tab <- " Sampling | Outcome | RSurveillance | epiR Representative | Prob disease of freedom | `pfree.1` | `rsu.pfree.rs` Representative | Prob disease of freedom | `pfree.calc` | `rsu.pfree.rs` Representative | Equilibrium prob of disease freedom | `pfree.equ` | `rsu.pfree.equ`" pfreers.df <- read.delim(textConnection(pfreers.tab), header = FALSE, sep = "|", strip.white = TRUE, stringsAsFactors = FALSE) names(pfreers.df) <- unname(as.list(pfreers.df[1,])) # put headers on pfreers.df <- pfreers.df[-1,] # remove first row row.names(pfreers.df) <- NULL pander(pfreers.df, style = 'rmarkdown') ``` ## Risk based sampling ### Sample size estimation ```{r ssrb.tab, echo=FALSE, message=FALSE, warnings=FALSE, results='asis'} set.caption("Functions to estimate sample size using risk based sampling data.") ssrb.tab <- " Sampling | Outcome | RSurveillance | epiR Risk-based | SSe | `n.rb` | `rsu.sssep.rbsrg` Risk-based | SSe | `n.rb.varse` | `rsu.sssep.rbmrg` Risk-based | SSe | `n.rb.2stage.1` | `rsu.sssep.rb2st1rf` Risk-based | SSe | `n.rb.2stage.2` | `rsu.sssep.rb2st2rf`" ssrb.df <- read.delim(textConnection(ssrb.tab), header = FALSE, sep = "|", strip.white = TRUE, stringsAsFactors = FALSE) names(ssrb.df) <- unname(as.list(ssrb.df[1,])) # put headers on ssrb.df <- ssrb.df[-1,] # remove first row row.names(ssrb.df) <- NULL pander(ssrb.df, style = 'rmarkdown') ``` ### Estimation of surveillance system sensitivity ```{r seprb.tab, echo=FALSE, message=FALSE, warnings=FALSE, results='asis'} set.caption("Functions to estimate surveillance system sensitivity (SSe) using risk based sampling data.") seprb.tab <- " Sampling | Outcome | RSurveillance | epiR Risk-based | SSe | `sep.rb.bin.varse` | `rsu.sep.rb` Risk-based | SSe | `sep.rb.bin` | `rsu.sep.rb1rf` Risk-based | SSe | `sep.rb.hypergeo` | `rsu.sep.rb1rf` Risk-based | SSe | `sep.rb2.bin` | `rsu.sep.rb2rf` Risk-based | SSe | `sep.rb2.hypergeo` | `rsu.sep.rb2rf` Risk-based | SSe | `sep.rb.hypergeo.varse` | `rsu.sep.rbvarse` Risk-based | SSe | `sse.rb2stage` | `rsu.sep.rb2stage`" seprb.df <- read.delim(textConnection(seprb.tab), header = FALSE, sep = "|", strip.white = TRUE, stringsAsFactors = FALSE) names(seprb.df) <- unname(as.list(seprb.df[1,])) # put headers on seprb.df <- seprb.df[-1,] # remove first row row.names(seprb.df) <- NULL pander(seprb.df, style = 'rmarkdown') ``` ## Census data ### Estimation of surveillance system sensitivity ```{r sepcen.tab, echo=FALSE, message=FALSE, warnings=FALSE, results='asis'} set.caption("Functions to estimate surveillance system sensitivity (SSe) using census data.") sepcen.tab <- " Sampling | Outcome | RSurveillance | epiR Risk-based | SSe | `sep.exact` | `rsu.sep.cens`" sepcen.df <- read.delim(textConnection(sepcen.tab), header = FALSE, sep = "|", strip.white = TRUE, stringsAsFactors = FALSE) names(sepcen.df) <- unname(as.list(sepcen.df[1,])) # put headers on sepcen.df <- sepcen.df[-1,] # remove first row row.names(sepcen.df) <- NULL pander(sepcen.df, style = 'rmarkdown') ``` ## Passive surveillance data ```{r seppas.tab, echo=FALSE, message=FALSE, warnings=FALSE, results='asis'} set.caption("Functions to estimate surveillance system sensitivity (SSe) using passively collected surveillance data.") sepcen.tab <- " Sampling | Outcome | RSurveillance | epiR Risk-based | SSe | `sep.passive` | `rsu.sep.pass`" seppas.df <- read.delim(textConnection(sepcen.tab), header = FALSE, sep = "|", strip.white = TRUE, stringsAsFactors = FALSE) names(seppas.df) <- unname(as.list(seppas.df[1,])) # put headers on seppas.df <- seppas.df[-1,] # remove first row row.names(seppas.df) <- NULL pander(seppas.df, style = 'rmarkdown') ``` ## Miscellaneous functions ```{r misc.tab, echo=FALSE, message=FALSE, warnings=FALSE, results='asis'} set.caption("Miscellaneous functions.") misc.tab <- " Details | RSurveillance | epiR Adjusted risk | `adj.risk` | `rsu.adjrisk` Adjusted risk | `adj.risk.sim` | `rsu.adjrisk` Series test interpretation, Se | `se.series` | `rsu.dxtest` Parallel test interpretation, Se | `se.parallel` | `rsu.dxtest` Series test interpretation, Sp | `sp.series` | `rsu.dxtest` Parallel test interpretation, Sp | `sp.parallel` | `rsu.dxtest` Effective probability of infection | `epi.calc` | `rsu.epinf` Design prevalence back calculation | `pstar.calc` | `rsu.pstar` Prob disease is less than design prevalence | | `rsu.sep`" misc.df <- read.delim(textConnection(misc.tab), header = FALSE, sep = "|", strip.white = TRUE, stringsAsFactors = FALSE) names(misc.df) <- unname(as.list(misc.df[1,])) # put headers on misc.df <- misc.df[-1,] # remove first row row.names(misc.df) <- NULL pander(misc.df, style = 'rmarkdown') ``` epiR/vignettes/attributable_risk.png0000644000176200001440000006772314110621462017415 0ustar liggesusersPNG  IHDR1*gAMA asRGB cHRMz&u0`:pQ<bKGD pHYsodnIDATxuU3{YjIQ ED0f9.>v}̽Sgs$CD xI!CAB}B# !ďP'?BABV\;BKDdL>{B38ґk_*RS'bTA0X1Ae-9O!Ťh>*8OuG^sXZ 5g|ͣb>"^|T1FB6EZJ1}َ vt:Nt:OOheWnLB1/BB]W"IU>r=:rn 1vCfnݺJ:!WigKKϞxf0J1\@_}u #8q⯿b!Lgc4aٗ1斮&bc?`: Jt@!<¯};}ƥMni>QRr!.^C)%4-B$&&R!>Mv>0tH \myheetpg ٲeK\\\ZZZSJooڴiIII=ܨQO^ <تU{߿ذaqqqR˗ϛ7OVZ9RP:͚߿ \&>t=n?^tI%~7t"B34hPbbkRʵkJ)4iRTuΝ;7::oBuzXVV։'4M+]thhhHHHJk.UJ'N0ҥK*_o>1V]l^f^H8Ǐ8p`.]n01\.WzzzzJ.RJj-W܁Ξ= h\l%tz<+l6%t8f900ȑ# N/S&0(].t,!E(rRJqW^w|p  rCCڴicXJ~\.+W (眛 .;؁iB yk1<,WjU%>e!"hZrrr-{u]w:elTOP9;!7#>~cA;sxbTR<B_}Z);;Q!/O54MKHH8yd@@R))){VQslA& ;VoL׮]Ui})dJIH8~A@LKOg9xpZ@">cAB\b>!7}Ϟ=jF>##`ӧn#<;ÇUjժ޽{@fdd\=z~g߾}oɓ'T: 6-_~4i21c0d+߲SP{k{'Ut(յe.Ǔt؛wر*5k gw) L"Ea ^}BnFFg ޽ۈˌBUW^RJ)eʕ?gΜ{,>>R9iMwu%M4-u!#G,Kʕu]/W\׬YӤI^P)-|Lo{xX_.isk3.c|DYw,|!wS6햗|cAбc^y"3:Ǝ=*V23*UNժ֭k֬Yph(VYۂԲMGm.Қwm/OAA7_l… 7mԤI9s8y|ѢEuظq [|F;2/<ȕ?t4jqo2BBB2L?{lՍL ֨ߠb2v WOք11!7߿_~ %ќsjXmۖdgwwl6Cn-Zn۶1f6/S97V1((AAAF+FGG8ql6gPA|t޿ߩ>sf 0@&k J7B63prJAw؝W2,00Tfx8Ƭ,grlSg2]p0c')NnZ!w` DŸA5r[^<ӧOp=yC9s]vz$oYȴ &<p̙5jÖq80rm۶9SNkf_~ gΜa5ox-6o޼sθ;g`t]]tr<|#aC U y+<|ی߿bիСC99=>,cpX,yR>xoꫯ233K.}&W.NHk8b8Nc0}cB[lٌJqٙthrV|5Ѳӂ?Xḷ pMJR:s0Н&6 Jj Yã*(3RGG*[1T)` 8]GϝNwC*q*5Y:ev}qx8gt0LKή }BnJϹ#b^˙ׅ^7RDʿ0nLKMRZp\T8p]3 ڵQጡ8u@sytTt:%cE2g=7PDaDnqM vUbdNˡy]sQ6 I|#l ^6RDXA^?\㙉{;lJIpӑPJq6^gc{ROkR&DsSӳ]γVHX\(ukz !72Dfp/+SAaFgfJͶkDݺptWݡJ~v¼`Ll鎭f2@ugffꔲAhE>!F[YvӬJ@΅ƕ'.4lcLziu,XOAj[V.r XLGIArcyqPr bq2nz9(B|UEe=xͬ{=z^+ bĠF(Z?OF˞ !X(uoTΟd p5@ARF֨5#-[o'cU+Jp5U !Ł3n3Q\?6l%}}BH2\;Ǎǿp6Wz: !(۹~g㓧Cw!ďP'?BAB}B# !ďP'?BAB}B# !ďP'?BAB}B# !ďP'?BAB}B# !ďP'?BAB}B# !ďP'?BAB}B# !ďP'?BAB}B# !ďhų%u M0ug޿R%Pr3(\h\dYyss^A4MpD,"WA_">y>aoșR1qI)9_lُ?9GDd:vXvJ:!"Ašfv|Ksf0Ivj?~bj 13⯿:uԔ)S<)EB*Ҋ\TJ0p&-\jrzUV.u_g3J=vcVR+VTJ*+V,_|^)?![ 3n8iYL3_j˨u\Bȵ(2}ƹD0 1V~+!\E[˸P;kNw|z;h&P*.!"o b7ڄ70>R )BHq*vY&u]^s!"RL=rubrR2(BO!~>! G(BO!~>! G(BO!~>! G(BO!~>! G(BO!~>!Αʘq&Jh !\K}D9/0R 1XI5!KG% ƜoλM 5V(}BE>"2.+a֨!yNv"%_̜gKS !E1e}Πϼ݁'N9y;ֽdRj+B` @hQ^[B*Nq!M߱/>|H rQ)F yplPo !g]#1y89c L#,B|Zl;*@{Oҥ˗/_.>>>tCB*`w}^N9s̙sg s=1E]q !WpQ]kYtM0~Փ;Sb(B*zq :txȐ1 ?yIXiB|W\.8?eȎ2N@B}B|X; n0BuvQ\Dνۖ j_\rʕ+_oڼ?3< !9} qI:dԨ{ZJbLc/?MIBO*8B_=9ބL_bziBo*gg Vɑ,r=N[w͛}ZPlPI.!k?3!od-ibZ,fb *vS ()O!>`T˻ut!"Bƀk !Wyp x[]m  }`vK]ϫeq!Ljcc1z" })ڷG5{KDMPx8+ !7A_hL):N.^_sq"ZcC@{Gܱn bhLRV@rC+T, w6|{z󘆨m^-)o)OУFuBndP٪nA)%T9h<OiٳgO8}纞sB.Ix!Zb%}p70Ɣ.U*(щz\(*sA=}1cR Ө,Am(0sX",RRK} 4SIۍtw~u"-{c?XݯS;OM_2s*rij3[یx9e !%KGî5KC+sc\DI Bz= wΞ6OlpwFV#+(1G'iqPBHȳ9uN3,fd2La]=*hʻ.yk/g-br]{(ѽ[~c_muA`RU2`Ly3]mR.1bQuQ$t;]i|wmsx_i My϶5ٽH_\.b*6>!$y+AJ T\0ݣWlC]l~>wc jWLQ1`j=ÝZSQ>|뉼1=-4Ҷ(@tqgBs~#h1`JH=51uVУKj E)q^[ *Toˀ >AK ‚g%Ė  "ՈBn^ m}G~ptʼn1ßif h\ M:-MzHBn}D =uOwX~f?qc'€4G .0K2c?[)u?scxY_KcLӴk!?y)gLX6S]u'􋞆FH`W[N8*Ņs׿T%9/],·\01Leog#m9xpVo@jvє;5x 3~CV;o>2j✯[?0sVJio߾5k^s6bŚ}60qRdFG<D@p`Q)!0v g|C`2ebշ1G%M_Tm7Ŵ__kr睓u+GUoַ n#"2>o>cRJyf4i$c2]Fkse\>2`4Mٛ4-HG+T&Vn6.4!èsf?Od0[hx_~aTq-^gsC'O-2+W\r ]r= TqkW)DkHit hvb&b%4ZQ) -Ahմ.#伎I>ch1?_[JKx r&ee# lVWjŤ_:Q)ǓFN1VrB`aS&1I y;8ST {b1{X !/wbBsu[&w<͡ ,+'1Inrf'==q ɹ(_*(T{0mq@[mz=4O Gq{oNw¶놔HGI~BxoԬW e,4l:d)Qgt?~g;D:ST RW@Ӹy l`鄪]f鎒>RB KNS}dVO:v2k_#km8u2|朻[SR+XJ`B{ۮ1/(if0)D31"(2}]LyPOvK) !dGDdB\ׂ0.od i^BHa^wS^u5,QƄ~ŇE!\2+&4ʣK`kM`E-zN'6ɲ?[RF%_L1W-ȩrIm5 !w]_t[S>LJQ>!./Aθ8鶃&,wt:Kls*%u'_u;>\u|TMt[}3KOTB|>0/gټēvf y*';}ie?8mס6?b9?@Tr !WyZ*~9-$,s[$^\Bn4h8ذ;G?t@@ϝ }B]^[H8k:߽jw)[t֯K~parB!>k@ˠ&2a??/ w!g]jH=#pZrM !wyk%۶h5^9g}jXB|wGvܝA6gH%B:]"0kO{Ǚ-YDhHPt뉓*EB|Bql8?xZ5[v㖭5n4BozQp_}z]{rmnؼeg6m]Q6B|ԥ3.WTޟV9AFZ&!,w,CFtտo+^~; @%}ЄBw@|ϧ,6gObB|;Zv! NyCc9"2*'mTq`IfsP"}BYG͈m}k*|iه}&Nx9O!>[=e@7A`D,3!tqgB{ƸiO)=@.Uv' [aϫw_=04%}BZ½9 O>2X;wpGzP! I!@)em?OLҊZ c'N9rձu>Î6nVBk 0bD~ذkϟ_(7Z;u0{ PI[=~ˠGoUzPJqBo#q@I]1rzyuk&⻴K}9TJ3RM&.0 g(u4 *ü  u{= )捺u%G.ThtڊCޗͅsC:u33sǨs !$/;\Ӥo;R-_8:dB*AvJbR]"҆UH嗗#GO-[&}:&~6k@Fvoy)B }5~/}|B6 we$V*6BxSZ( &*_fZ5oZe3ӿX.>F"|BY^*rAzO^',J硟R1Au⓼G0 ^p룟)vӋwꃭ" B|JY-3|sRRוRJ)}4ؿϹ?j1fYӴM]R'& Z|n~T]וB" A;⫼4dDͶUBϺJJ ?UBQw!ė]2sΐ?eyl[n]0$k;?+k׭|1vݻwBlݺftZB6εkN} &!֠a07U縝'R-5iI 77󩩩:|C9s/Blkl7i +8*~3IYnMfǾ+,y" LFdo֬YfohÆ [I!O~6Uhu׳ TO _&GgBC3=S9=!+l*wgˊ=9/FqQMBHq:&Gj -~̤3hw5S 5=!cmV6*oS+Be^ 2NݧST!ćyF L/ƍeݙ&1(fYSxG) ]%o;OL߷ERBJcT&wZp`ٿ娏_`b 4hE+BfΌgr"aK|w|ɟKYIMؽya|4B)FC_=yGOzٌ;ZLt(3ba.5E|BHcYiC_}wr/trbæ>U A"Pl3@*F;c+wKFR !0 TZ5cQtBڗA)u]1cm%50~u^?PkE!W  ,/^ȴ48zNX&L}2}w}9O8wh~60*R"Рqw3?iX+p๙#a)*RBdP Gx1 !:`C& `Xݲ۟ǐ;Ft})iL$&RS?xiM'=P8FBJ׸ǑtC{êQ+WP.!&>,2:Bk׎3^8NOض< C.! Ǒ5M[B齓oBvv!"S3Ad$>nWR k/2DYfC*GB1jG^0+c(a%}7A΄ ~xj=ms9RCrPa\fqJYG}&4/nקW@&ޝ28F BfƙRs*uwZ@*XcOֶ~[VSo6 8]"!ğ!^UXM1ƍCs?~57_y;RCZόS^=W g *)B^Dϛ/F_w^a@D72`'o<3RWBr<.WC}ZfKI%sh_DxOtQ*Bȕx<&ߚ1S)q])!jϛKMIځ^zG/'>F'<:ʔb ]Nw[F|T'\%'l2WiI'On* ҰRkT`5v nYBB*W!¹`%DRپDRM$@M.u~M HLTcBD xR! D+A?ho6^NIz zdVum-~ۛqѬe3Q$gj!AL&>kp= ĐWW|:(83 {gQjL0vB oC+3N6M[h\pکO0:M-=_|||y!Z@ 7}Xcm"k?!K,!v~yg8{X]{ F(Tͺ _{e3eB:*'-/9cиy;\IM$ΨB.x>u bB|V{(=j2i"= >!Gلy` rVc!b`d >!sZw5!,/6&c\wB|ѹ(B͈;! G(BO!~>! G(BO!~>! G(BO!~>! G(BO!~>! G(BѮ}WBI1BuuR1J)RtBB͠8>"|])1.%9 88K2!PA%2'Śj|pX:-3k g/]ts"LǏwҥӊB|^}J맆 P o5sF\dASUztɅ0 3ͧ83QIJJiڶMA&lz֊AD[589ӗ7D`fwAȍz2./\P)A(` m0B9~i[g4^YP%tp[ZlJ%TJtgά2((hJ)ERL9}Bnh,ƄsO$Fֿ_cm+*{ނkrS>#h}J>sdvKr;cШg4xЀç~fӚիr>nB?s-Ƙ1DKUTm}]m Wƍ g}UUNkYn+#Nh7*[N]<̃h܈ؕ'Z.hƬsb]~=% =n_ū\]|}Bk1Dɸ.^bи̹(& @SnvO9$0nГa܍oF˜0r9_mVhײJmM3`0@)0!&!8@7Ѯ$b.Q\,̨]bo\>mڴD7n\Fv@tt)S/^@V|A)+w^ԨQ# o2Lh:-vi` `' ~H~pq@@W :P`9@jV͛wrK˃+.+$$dҤIK^h?ՙRJ =gcpwu555uڴi0iҤ5jlܸwӧ[ovҥСCaÆ!ԩSO8#FhӦǧOZ3f̈]z 5k֜8q"{ׯݻ2$99^HJJB11!- `FT>҈٬GE0am˖AwbŸ8VwA}vmIII}ӧ`ȑ͛7?}i<Opp>r/V:e={֭[O>}MKK{fO9U_t)8Ud m*@4 ^sq4h裏?{:wlLWwڕb7ܳx˖-۴i3hm$fF! lF,0X7888""BH)>WBK {feAf&| ]wMBD+t.Qp ml6 Rf_Ix+eh`-_f9<ܜ_ BJ r+r X뒋;h*r/C))o ohjK)uu)uK]JM+)/APA͚P,̟̫ŋb}^,4R4C߬wU 3HZC2ވi`Í]n B.@!7)1ہ1ZKhM>!70@_6mIS*We!$s2є'׈>!%MuaX7{E.<z (iya"l%}Q'I BΝ0lpݻCÆP"+JAllN{! &r5Oȍ!=gOX0bTC}Bn Bc`pɔd3O}Bn J"hh$Ezr<)J !ďP'?BAB}B# !ďP'?BAB}B# !ďP'?BAB}B# !ďP'?BAB  BD>B fU` c1`&.0 8f̖%e?)z}c␮ҝlN;BOI)yyqf (Iqg-I#Lu)gyWs;3e:S@yFiZつ$8{%K\>Dr+(b0Șι79 ιs.#&*1@SΤg3 PfnAqtipWeW) R#K3cu#* |u 05$?B91g򱍻ID`_qH]*i PJ9GqT 7/J]""'CJ /1㫱Bܳρڥ5DKwR+.HC3Ndթ^ZIGK؜F+BdT..%hˌY%c{EW}!cwt Y՛Uӥ̻1/x{qr&W{^IPpuxk-0g{ P!ߑu \8LV/7 R!eEI \p0&JJLh()q*qJc9sy_h-c/ mYIͦt]ΔcSE5H8@DޢKnR{ݓ֡}Xꟲ [Zy?+; pݢ3o۫f1Ǚ_~3i; ŕbB_ܓ.[C1J"MkڦvfyDphM9قlkjklb;`F⋺{.cu޹s[,*?6Vl"}Ͼ+n{Uc@ D`C-M@s㎷WeXxF md]׎ 頽yʥD2Z6i¥B p Ҏ,zeva̹?BbGTzdSAvP\_\PYD6/mS`W.v>;bN|+iryۈȘ`K>Ġ=;p8m' :ܞ {+m`B%/7VBTlT%w߹kU<Ƶ&ѥݑ̲uH;9Iն;F:(S?ܫ WQAfuI11l[y1A5㖮7G/qϟXb['T2J/<Tx׫EZkq;Y+_`t:s=|c+^jZAzV~w)qY5O|DS]#-~7<dz{L99RN4ۗ=ߞ%{2/{Ud2G'Hu=3;m~urDC_ 2 g>lدo_wdt(͝ EP]璘eWES_=Xŗ>ruՓjٞP 'fn7-?ٟnڐJVucOqj'G->mcr&oSGD~^ s6/DGO0[%T$ ﯔ 5g?zoºݳ>o:U9 n7gW[nzں9>}9K1c'Kn?פks'@񷤡/z~if]*׋:`-3~#F/ZPr@ uyc>JD 9c·2w+TԢa;?_w̞2kNe3qtC!baOpXK:nSRJ)bs5m2FPCߙvڟxt`j: 1`k=[ayg~k;% F}{xʠ6 AY3#=f=<ӻTWBS~ϻNeߡFKe־zw7;[;"bo]Hӿ&gr󻧿@HN񎈯UNy|Yox&$b [ΌXӔ߸;D,*҂;?=&߯#7X| G.^533_PoS7sv:>JV_1u}YQ 춵0f0[ 8Qqܘ'ٹ, t]W&PR`RWV@]IO9S_}LIo1Eg)F+ܟUӷVFy;ӣZ=5z o8bʽn;{{jeUU,pné֪>[rS}MnPv GIJcwے.f>|ݥvn6Bu?`D*ܮl0:q2h)x>9Ȟfw8_vn o[tښP* U>$J)4! غ7iD0M=[3BG< 97s_QKta;Nm=l17nN&&vef;1'TR%KOMt:/:}s߇ ubʕڭg vglҁBn0@UXtZv<;,!A{Wqv`fn=nD'RMgIOnf9f}>~ѽ4ŖԳk (bPpVinRfF6*Mp`O| \/OgukWIz|4=FYV ^ @" KPnٺm3wSێMё0LLxKvvF3exʎC(z߼&M xp_zv歫ᾀmj6mTk|r͋<8#c jر}o`5[zVǍקaM*5k<6f֭[>zl,&x`ŦMo0-; MO͛Soڲe'wҢa&{{j­۷ش8w~WTqcөQ]a2m7~Юm*]kan]0O\(줐Ae,A'&1Kb6N-<[,8k(1)XZ[˷n]s*T1ly[ZcƎ=jpʦ-!xjv~HL䷖mݺi3]ͺ׌[3`'&l۾wG<(UeA#ǎ}RoJ3fԃCdvS,شe˒M6^otC3*-Zh͞۹U;#ٺj[7?խm}fZ7ީP;g~#b7{pRժϟ0Zq֒mJb֑vwѿ܍M6QI%xaD:b4"fM"":ӭv&6TբmGVN^]nĥTiMc:o( Nc业z$>{[]s(EQg/WIL=WH1q;eт^O9i49Y3[Eve%'ٙ;C{GT+LlvJOLp"F浼8PF2In)QW֑Jյ`܉SbcB'XLv`HM  pLJqGEo\i iXj!fGXdLsBdX0OVBJ60 Ϟ:^WSYfڿs$YЕfg"$&ʖ$=!{T+;`WTDH_k W @'TTṥo J*.8J.tC(LJQI&%M oJ)4(%K%TIj` =nyksA%CBY΅˿qB ouߗ_n JJ.JnQ!pƌt̟? ]^n2um Jp|?C)A8Ó&=pH;ZF:N}t|iUb| xy2f~FS<ˠAQBQWhʹy<߃ոB\Xdd^-xK`"'"b^{zY/1U,p˷}#R|].aW9(kG&Ms|)h$ wmrdg10/"?E6j6x d|{\n.X3P,- ̰ "ܳkq! -ɻcQ^Zy9B9}B#TpuCP꺢\5.Gc !ďPNB}B# !ďP'?BAB}B# !ďP'?BAB}B# !ďʤß[I1GG%8!(D| F!~rS&2+pTi\G&gJʼ8rsnLs2&\0J)eJa,1V BB0m5Jr'`Ⴉ߾slJ!33g,yKӇd\P)LpvaS No]H[e֩ߖ  gJ"3fd!isƬC,g)ra _bLr#Єsiy[1X_/L `~ صr8c0΍.lscABJX1ON:"_okpbsG|dD;_s/_w?|ku}F"tf~_5kts. WZ8AYcd Dԝ3.sӸxPժUӿ x7֨Vj܀fmR) lHM_QϼCB5K`0t9p4GOuoLဌE7 )!5ogϜt.2m[qǟ%Xk_/nGvٌZݦ~3vϳ헝}̲F[u΄z s&eTܪ2Nv==/'Qc`wBSӑCPwwi?7fy|fpYIuʻ>2xP6mЌZͱ[ԡe˴W>K F=rɍQ1'{r+dx\^TŸ]A^?o"C;\Ӹ 3qN{ff&9,R!2ƅ&0@R7:jF%L\PR"3O1JI)@Nw G*)1a|_I]*PR0!8*ń7iԥb\pLp~)Y'7(#t|9=NDdk&Q'?B䆆9âBB>! G[͓\Z%tEXtdate:create2017-05-01T13:30:21+10:00Wd%tEXtdate:modify2017-05-01T13:27:27+10:00ވ[IENDB`epiR/vignettes/attributable_fraction.png0000644000176200001440000007213114110621502020232 0ustar liggesusersPNG  IHDR1*gAMA asRGB cHRMz&u0`:pQ<bKGD pHYsodsNIDATxwxEwfZz)@wW@+͂RD{ Q"MzBHHo۝y?6 D!˽;;!"B BȵCAB|}B! !ćP'BAB|vvҐ 1Șg}cصh@Rb?!>@TJK3qJ>kL)CTZH;@!rF7!ebOٱjPMq Kr4ؚ%Q)yƶo>#3= >~xDJy}D1@wR22P/\*/Z0vn{\.ursNOhxL!ދcf Oay(d29S;l>@`2 `(;qÇ jmذappBܶ+C|,hgK[߾J1\@ϳ@…OfL?yUcf9b͚5{ꩧ ðX,J6τT }P>c-8Uys7TQL+b@:tPx;7n\lYi!xoøPE;%%Ŵ.RJI)K)gϞRBrעsW-BӮY'2Bȵ&]sY/qL)e?cL)u=TSTPK^_q6{rTڍR>Z(Xff&"j[f4f!s:[,3+VblJ)0JJ"1 ԯw5oX1W7\0%|]IBUe8rҥK].<88 kV~mvٳ?Sjj*<00jϳիo߾\rIpE.Jz}ش9_Tp%coĉ+Wl߾}޽ p\7n\`ABBСC('vׯ_ׯ_[nl_hѱc~^*ՖxY9\2O|gC ^`Lh Ix< YfDDl;wlӦM͚5>e>bʕ+;ʕ+'%%]vΝ:tp:A)@7>0!:gdgg'%%nYvNu-A̙36-<<\JiVN(1++ah4 \ :!DNNNBBBDDDXXY9R;6ނsӧ+T`FX2隦xJخٞ@ʄ!Ғʕ+d8GV\RJDx.sk󜜜3g6-##c߾}!!!kx!8qb8qb޼yfѽzvx1 322qMDX,~~~i'OTJ/X@{~}թScx(BJ r%&&jVbcǎ?~n۷*Ma\ey8~o]c 6;13 #11lGݿ˖-.(|Rf׭FI] !>}:;;;22{v=##㣏>Z~}&Mʗ/_b9p fi hѣGժU;l6䥏A)ظqcs̒SRRBBB:tr;vhѢnݺf@7O?!!?uhJ-Zh"77WfP'3'&&fdd4nUbŊCEFFf̭Vŋ҆ bӰgϞUVx㍅iZjj… ;w\jKYȡCњ+^I'$$TVpdff׬YsÆ {]vAiьm۶ :4""ϟߤIO)5J]7Zj߾}1!cJ*M4QJn6,Xl.y %$f(2}ȟrLJJ  u="""$$ȑ#.IEFF6kOӴի'&&VTiO)sәfn(*V(HII)(0 )))׷Z999 ۷9eKfftyUe>EvsgI)###SSSʕ+WPx,yRfK(Gtk&%J)rBBB!ਨԸ`wbIJJr\SNmٲeΜ95B,6_%v{dd81Ku]?tPPP"+++%%p:ufJJJlbXc۳r}۽cXΞ=;o޼պ~]Z,)Ŋ?|0jժesΓ~}U\9<<,1;CfW_csR6mTVDƛ}\.ג%K=6l Rrccc׮]|rMӚ7onJLLu=77?8q℔266q.]4==nFz6moO>!eܥpTĹ$1r6lغukv6MJ;vYRu{!e5\.א!CN>7ԭ[ df=7md["0;N"xv nw]ܮYzcV֨QÜ$00p͚5ǎ[ Om)t]7Ƕ6 n4++w)evvvE5kwȑ+W|qshԨQP'Ϙ1ÜufL)fkӧO!ZjUBWJfixTT#<9/%0;a׃\?88xvI).08BsȒhOHdo޼y;w_ 33uHIIܹspp9nz誄K~56bm.1"K 7+2JՊ:gR֘}~E4`V0A aΝмys03sݴij +ysS܅'ߒ3>!e7o͛wލf қ3l߾1֭[7M4M[va͛Wb9?~[oe6ؑe_}MAM TA^PbxCп,=cs}Bʔ~G?~|5>M6!ԩS~mعsڵ+\ lN~o7mDm2#c_کnnst??(RW_}544t̙  29}za(^u֗-={ܣkUI8ݞ;vjy7ƘX|x5PJ8!%(Rv4O:5sϜ93''p޽sf,sm۶@v^xsɖ-[6m e8Wߖ,2 γjۤq:uK JA2s>y䘘=z={NYQŌ֭cm*0aBʕդFm۶gϞ֭Ղ1`@IO+*䚀˫OHaF5k|7?s~ >:p u]Z#wƍضm[MAxzm~w}믛C]lMs̜/ͫJ"\3 ) 6R7on25u5M2Y̙cǎUZ`;fK옟3fРAp=[1fx<Ճ]5֍>!ei|ƍ?Sb*HHH(Jqȑf͚ռfTRJ.Ɨ[u7g/!^O)e?pw#ul~VVvSRR}9OIIy̹dO81/Y| 0 `V+VQNfFyfoܸK.[,+VvmJ;v?~VZs86 >1cWR^ԲfĿN)cr:KW#x]suQ'ĻA*&&~8#8py=>`*q.\paXU1+++3+bLM}#= ) – χeNW9rK JcLz-׮]=woۭiÆ/,{-B`\eR04MX,lYG }!*##3;)!=hִicqqI= Lr33SY!!!`蕒R MSROs O ynnnRjjt.-! !>ٳ9"  s8vylذa0UBHÈsDEEe;Y[cٟWKnPhl7(B|c uUMڷs=JOw݅g-7G XEPgz(B|gr^zZ &$$,[v##0|<q:>~ҥc ݹnA'7'Gaټ *r !etL||@LL " DllÐY1QA@l#FF}tt>zA:Z@f2oީx~HٸAz}ݶ觠O)Q n4Dtc e>}w.Duiu:.JŊm{Đ l)Un݊+r΁օխWOI)O|=O) ,şs0buuX5o.33Unn-]Z R6?w튉iٵ+\(@ !> HPF4[y@Ӓck~~BX-[fZ֩.c Y)%\(B|%b8wfg^FWĉ)) رcȝw:Y>n0 9x !XR*T[RJJJJJ cLܩSŊPGj  O!f~_cTS'Ŀ +p4{D)BH>Pו}bVx"g5}B9ǜ;0 %ErH Rz]a7 ^(BJǷZ! $W\򽊂>!$mB֥}(>Lm~r{ Q/BC(BF;JR!``p ^|mRJWSK!Z}DBzZ294MP|Bs%%`WS_m3_SMؗiR1qI)9_lق 8爨X,ǏԩSi!x=V9hT8̮ͮY5KINEI#>7Z aKSSScfBZ*..nʔ)_OBD+rQ)_o4DnW￯\r}À+U1 VZժUUVFUV\YQG>Bb%zN\n]5?HOfb|fE:eBH3%e!mn-:XWzuƸ;'l8Uq#K!_V2.Bt?v{>Lk;" B5Rw8Dly-oPPk›o) !Zcy4Mt,B!%-9iBHK!>>! C(BO!>>! C(BO!>>! C(BO!>>! C(BO!>>! C.9G.*ss-(%rE."$J)8c}ԄBb>*\p m|0Ѓju|jP" ȸ`ėG M8݈-z_2F"P'S"?뮝yNş:wý>#;&Pai;!pNw6M+]1O4扭K%9R+*B7)h4d(ߺX*T̫[VpKެw)B9xGc Q緣JrYn'7~5h4 D*!TxGI!8s^o߷nlnC-C%t^ 0k9cY!@!^; .n4BUvA\D}ۖ \RJU*Uhڬ?3uvBY>Jń8ݤf}>tԨoJ1Ř\?!xb\ޮvMBZѠ 2&fx<0 kc\-("<1BERhb{#\tMسPC&(]B\/4n<ǬFh0CDd_@&?t6D e!׵9cE>>񘆨C^_\RO~S' !׳P9j^ѠR<4C]~̙'O9\SS_^qABEi.b1D{H:R訨pD'<ЮPJU^ {!ŌICGQE! W6[4M ;<yw|Էt*ٚJP>!4]: ʕ0GM^<&5{ =zꆤPRWlDmЄhk:J /$fH " ՈB}%%Y{ey#1=nۃ@*9r?oұ'@hdlhykCr0#џZ>÷Ƿz׳0(?͑k+iHLhb VJq^o yR4ʷCMŔ3&-[My6^yEkaؿ-@T\hB߿X%9/ǫ],»\0\1| b^SL 7M`!۽ cOܡ)YX8Gd4ڱ֩8oe9Rs~?Y)i֭{ˊk\Ia^qȐ R T V-)18EB*dAJB\,+F&26}]ӖP;&5E`F/ݐa1""cȑ#7(7oX,&Mub!̍z'dhZrr6iZ2]w*T!vn".hn7ddPE|OHYW|0*9Ol2s֕ Ph|L2s'=速& MeW^z7rʫ{桡KiW)D{Pi X^Hqg)p[p5#_dEG&UۄҘ.A@)!<*UU!67F_P;1}$koٳ V=.R*]xXllWr؆ 󣠊!fX/<Ă*V(&##׉Jt= S׽#ϘB( P8`L @;XNW*jkL>v,48џ8{BA= )Я>WBHI)RLh^|s\䞓}kgƉgL uwaa'g=>e-X 9bB*v`OUux>ggt=dn g{¶VM۱#/#BR@R:/0t(84R!eXqw8XZ/\KM;Xߔ3};KzIFj(o i\qjptB[߇>ɖv;ϭ E5mjv @Ryv2Up7t#辙ծ_V/-= v耝y!CX¢Y۬7KxJ]!L+",2ǝ,PkV&2#RpC!.E+C72DE2DPܓRvSXn~ś/o1! UٹVXIљ8(ݐx BhI'otѠϸfwhfaB!בh1׼^[,eqǽ>7P/!xb;gIžgo?w#^]s\Q?bƃBS ς"7=yǏ'4a 2OF!<bw?ז:Hⵊml8헶}F5yM|*fw P3*!/UL>cc{_SŸIm5 !{]؟_hبwm)H@BW1A8鎃'ԭtlz[d/߁sW^YB!Z;?4臯/ >!xbr Rm?=d;Ϛ[⽊m/:ǧDisUm΂\BVkC};;AwbDB!כavlꎑ#?܉1*'Ul Wޥ]I?wnB)g`[^Z< /nL&hBZRueƃdrB- !{zKضӷiKC_42!xb<|$ pFiDB^N orwZZlB*9އS^\ !k7*o݆_~^ڼ׭@j/#' J*.߾32 MX5kԽ$R%BT1=r Wz.Ǟڭ뀙ݻϛ?BW*xkt;G>8)'_l@!{  'lrS"҆uW9<S1 }[F|uC ل=[^ 3$jB?!xAq m3!3舊 zNS ABUzsu֭Wƥߦ׫_)LRlD ⵊU\;Ư^7&~y(gg/vA\BJŴGY8c-rӋs2;tf@%B*6ϮXO^35F??[H !k7&3$ZG|ȑÇ!QxgAt?Lgia,n\Svߙsl+f7ui݌[nݺu mذa奝VZYM~GBq] yz֛P Mq =}Ք)o/=?f_%G)eߚ'''3 dg|Ŷas>N࿴a\Ǝ{kz:m-9:JLiƄG0W(B|J1-h\SKy䵅'?_TOf>RQ*o2 @iTm?6OPMWǜѲipUe NhR_de^^-&8R~PL"pmX{_ iW9c/"љP)ŬzS&DpX.{ޟqlaPV1`(]Û.m7r;ʦM{۞aԷXYxs ]hTL_u™A5 -\+o R8cJKP !NlrTΠ;bLcA/̙zgk]q{yY(ޗ('}|kQ7F4jà:1 +؅krW7B7*~3׿i7*XԊ "0ؗ' כ GRkK BЈلb 2O5R4ke5Xr)]9J0 !Tw@XǍe1o~of @ (_7h,as ^lS /rS@+ap4~^)%7aT\Ok_Zp`{ >{~MQRLo 6 !䊹2⓳"!UbCKLq; ^բ?lD5gKahrm I~# -goo߿l5"ƒљ+m6؃@P'\s ;ukLnn{ߛDt9kUɯjWafwRH&R@YvZV?GG) C1cP0 J2.6}Х3Nu)477B!K!0[Q>8%oNH&J}.Sqe <ˑn7}u=(EUR M7v}O~z?կiJ\vP!2(_pc{fBȵ-n~4q{nC~먧kM/M5$$)ɦe1?vWδl~ (BHs3 Ðh1W;Y>:z*#bcc@i?|Gi ݫv4|2ԋ+s:BJ"99nC4nx$OͯZ< qzzΔb .OV|5*B%]OunvRfyKޑ1 1ԩM} WYYUN X] Gj6*1x"\G8R*[BJRF24XвTHeei_A! D-A?h˚bNI]bڞBrѿ#}JL  @HsaGExԋF|p3?qs fTI B@BhX p~!^H$K \au9sdanfׇ5Z5*Uέ9VI)s Q6_ʞ6Q~ yz&0kc``=%?#GBP9}lufB|lZ7%Kn7@n.hY99)!;]d,`o46[[M!2qwZ5F Q * @)P\i] !_ y^ϱJ0߭mJ͝U\ СBJJ1\~hqI+oаeRЪcyMw;!^\zPL;l kYzBʊbrr|K[.Pad~S&S SnYeͧzZBʮ3ə~48cQCqTBJ!4 ҧC I!^p-J`C=-]r'/u(us?M*\CDrrŊbitoPCBZT䢒4Bи;Ŋ b;!E !ćP'BAB|}B! !ćP'BAB|}B! !ćP'BAB|}B! !ćP'BAB|}B! !ćP'BAB|v囸JJ1@%bgZWJ)c)ei'!"#/ݕBB\d}9EXD!eBI}M|5~:uBRxp۟o {v0T(8|ɒ%s+b8qGVJ6TB>5t7tlU ,?̴aBYx3߹s-[KBСCVJ4RB?7l |xwt]5dد^R\vZ:MBʶ.a xAƹ}qאK9}0_B ghMƹBnuOW?S:,RtNK\{"M2 Zr͠RFG /u](݌+ٜ>B) k{?#,igv +@DI;rX|:L*zʬwުKjIe%ds"\&PfhfA7ahL0E5n`^. MНIʞkN1JJ4-/8WR*MWmB`adܹG9􄥿L)ڬW BgרG.B+H%ϟst)W?-9Q+͗B G)T2P0U-[mԨť&Jϱu\P8P)r-(O:VLr Xף*jTFĤ} ̎oXp*(G:j@>"`P1WS\;<8G>v%[6_HY@A`ͯgdLO6B ۂ,+4TͿB{YA3nW}des# ;_T9X:Zu>vEIף}̠_/|yi^S<~q0д0Pzؽ'h ٲkXTjO Uq2F:4ȝτfm?VtMJ@Rxq7Gyic.1"Ҵp 嗂e~ 7xn>/0;߂$*>ѤDͼDC}ӧnӦӧMz``3ʕ+rʯj֬9exn  8p`zz /hZǏ_v[;5iҤvaȑRʗ_~ٜGiݺSfΜvm6۔)S*Wl'nn5~p@'?^YtY~Ew8 5d,N \_7ipwwݜŮg-[iӦgyFucuifι?cvnX,Nan>/[,uݕF~igg;} $Lyx_-z6r~s#ZBC:}zfFnni,4nZm^q!99vbns/q1j ''|Uw8EJu=77QJ9{ۯx<_}1z{IӚ7sr.9fl6TNNNU8ry<QVU_!-c^x jl3Nzbi ~Y7%Yqh._Hսŋ4Qp*{񾜾1fDӂ /1oK f̎h9 F-fB+đ=,܈P[эB1"nYǞi>_a& X,KH@~f݈ !Kg%.߂ .օ KrJP)B j=/X7[uavlGqFq:op*_ /%/ ^t<# Vkhpy]5_ ) ʯ5azb 6+*R{RpgmRJsCJKr0 o}Hj/Ïj%f; ?ƒ;{8/}ćͻ>! C(BO!>>! C(BO!>>! C(BO!>>! C(BO!>>! C(BO!>>! C(BO!>>! C(BO!>>! C(BO!>>! C(BO!>>! C(BO!>>! C(BOr-PIJ!iZi&_@ !I@)(wSF$'Ù3pY{DpM>T_t,N]9s΅`ER_<2_'Yto0} ,ʂ4[-<|̤)(2q!\)~%xԾzP kb v5nB0U] xٻd"#clf'^~ߜ\}utJ R:\IK @eddyPl3=}\y/(*[ίRqXfがܰybĔ)쵻{%Wɬc x.1s^^/yWz"f/HmD.EXsF2h21!z|Z ms+gLCC@H@"^5C_)>D{^,e9CֽS+ @!p`4P2.R򥙌*y,WbE(r-]o{?+~ܐq]% D4m)q;~/L|\2w}Z\2C*DF%3vhNDԥ]2?2S0/E)h&Q\p'ݽeq_/y=`Ԣ(k<ۋ]/|CEOQIDTax;ૄrE1됗|͐ ܢ[7K~u1MopQu+\{-ģָu+Xˍ_LkmP\)&\`o'n#D.ɝ+Ko١~TLa68@^jhZYԁI٬y[j2&a+=wvսc+J[ƪ-f]ꖿ~O7" q%hm޵_pX_ٍkB&l[4@ҰGaR9۴U1¼Is~* k~tS7t\#A;vl_PG>w9 0x uZmRLuT%v8ڡ$\-)'jnc{+7i*Rpsg]Po߮An[ J{"]}Fv&TUGo;Av*7m]lhn~G:};59G-xU{7ubӺĀ'Xr+ߩ]( q`mG[on秶,_}0Qr7p H=o~n!L`90Ar p-]uNIw}9ϥdgggdegggey$ʳk;FG6%s˛jڝ u:h7dDm 8O]_e~:,9AxlZo.ُ|Lty֖tlU"GGTD D\7  @ =9bh.?wof4`-jvl5 u̧Z@h^KIT'nn@pݞ+Oɯjʣp#@kҥOܝ㥗̼Obs[j\3`{-7j׶W~kly_RM5̿k}\?w.~ڶJMoœmߵKCo"A:tpTv_KlR. $80 ;N>{O :ac~#75tP9}/.ڛydهV1͏>6̳?{r؜݈Y9;}0xDó~3RD9ןm׋"ސ)n2,ޕ~~Õ+DDWW;xv>lu&&Fu=¬_ߎ7l@?̌)DwpfR6|ɂ%I;>+1)_ QNڭTW/ıW+|@pO?]/զ\;3X V9BC/e뭽~GnfԒ>~W>Ȝ/{/qf;c6;gy2ygsUwΏ?Krv#Շ3~;OׄOC3gd>Y`H@CH/.J}`OXϾ|"DD/צv}*}׭ ;NDdDiE b\2u{7{뙭>u,Ϭmhu+z>)sogT4VM8p }K*)3ӨC/Mj(8KFaP5MhlwbžшS*H( <Ǜ.8Wۄ!bgg61M_o~*o~!;Zܙ:g.ej{8]=Oיv2\Rrfk,?8@.]`޹@c5c0LJO7oYޡ~pNZ D|y5SvdDsw}zZWi8hʏ5}!.'.ur*5:*5'[xGr\W\YmAy;"^%i`dG{zRKab%b}c+Y+w2Ѣ7 9nA^{ET|dDWE'-Z'+A +8mϮCC{lh\\P`p{rol6kR< dئ~z;M"T321P0 e%%0&ppgl3бG:?iyhu+Gk%;nC?/._7VGđu׬=5z0 m:rʰ9k%2hX׌ZVR' H=AoKn>E˭btoPDm۴nU>i / y*olfOҒr]NxzN[GR|Șj=ohA6\n3W+U|vrMBn `e 9r[Y[xLsY=7A` r\@S١v*9eu: ݾ|뷃bjۖD8_Hǣc 4Ce)oQϣܹ3oQW*V8f:q2R* %%qzF0_n=yuf{OGDUַ?(bp`v.cV) h[gLy4`cKd:"0P*1ժge\yMer!xv' rL rSPLYmC[PsزX@TȮB~#7;XFR\ZgJF8yrF?$ܫi! زAqmp[9NfSxSvݴi7nڴiӦm{N =:9zWπW*6o]ط^hhg|mU㗶tcr~{ RrU7VL{ĸqㆴ ۳ᐣrNFٺuW7,+ڪHVkӨ0g-[~6u@cu$vg1coMqU룞i07{IW u6oџݡnfVw+nۼpφy|~N}o}t؁$粺n MKq hkӑjoPG3q->蟇3lޝyM68!85iqso<}C#XwoMmӖ-s&M5{MS3oݾm͈Ʀ׾I=tUc,iګT7gսD7a<=ncs7;W]9An5i!%d{,A'&1[bN-[Nu=̘T/귭[ėk;GWԽvHN@1cǎ5$uӖp[8ǿ_c朩ʥ=e[nLwu#?qm۷ճǎ}䮪T}`jI{G3M{xǩSnڲe_|׼fVjeyE~n15uS4h~ z1{=u ׌؟~j|//ڦ$f]}k(t>rcxMDTRI_9:|gaonGks NĴ'7 mg^Mu~󋃻u;j2ܑh ^lz-ޖ̌1"V)o֥:4Ȁ]Xa6ٔP1y1n jt׾͖)~ m-ĝZ@;sPw޸! qɭxZG8WP:$Qv[ޓ"ڶo6ήu~IS=)USַY$keOوxv=[nt 맏_ˇ RK=r!]YIi9Pt=؊ꯀe$'Zꄛ=u#-mY*byG|i`anigPk^{씤CYCly3dvb3"2™ +AADde({PHK˰5&&~aIsZCÃ,-9IZc9VU&DV;+1DPDyG\#$&@HB.t5 @'V%o J*.8JuC(LJYI&%L o~N3hҠ .P?/ _C=i\ .Vp5r.\{%`}[-@IɅ@ -*Θ0r.H2_s-]U.f1åR`%PJdIg@R[f:N}tBiUj (d̼̦ȹ_W BDC%f|7"'@jPyq s '> ĔBz\5ֿ%cH ]2vK_%rup"? JA3yk%<;w8y}7)QP _ [6/tp mBdUZ}[ A %Q``.*HbZ>!*8PR"( CQ. HÐ>vϣ9DZB9}B!'BAB|}B! !ćP'BAB|}B! !ćP'BAB|}B!42)-g0nѮQIŸ(~\TR"ށ\#B9}r͙S&2q}1zg|(7 `2=9眛\( 18RJRK̯sPJŅTJ!p!R ZRpTo\;ZTC3yh%WR2 &8;)R.$ZevEsCCBB8.4ȅ!Yi9["\Bh_&`y!4!8"\hZV "oL3yo1@?eZ9篃1M y]6Ź9 !ON"7uxy ?Rxyxm05ѕ}d-֭ zRӨVZ|6AGij/'v>>ѝ9xh;{u܀moUGkժY竎" xx[ԪUl@TooTJkp&k׮ۦc.UDc}uߣo#"⪯=oxh۝֬UӳR(OJ'O4i'~d;n5mO{ao>^x{*9WM;_*1|ڽ{לOwr5r0p);y !S o|ze}Dϱ77 sw8CJƹqׇ{Rɻ>ݒYuGpf#,g5THA1u~[i Ȩ)]tR;/?*tאҡelբE "ؤtVMCfܙSi Z"N'TؾqgC&o-׊lТ_Qc1]יAWV]ԙ%o1mf :%-ٔt׸߷RYiγ NIíU:.pv)ng[8@V#'B;,\\ևEDTp6@4 0`LӬ~E[4)_~]U{ac_yNg!LרT-|ڕ{?◹-)e̲1s_y`;4w<>b[un0 h#^YBضp[ 00OjEDӑ'ߘ"7+#?}wl1AsmQ.szSX,ܽ- T;qO1$RD; c<؂{VD6o=`_/6\WûVrv|fĭX]? 8Zmo0X[{ց\?ҕtbjء~ӧI]87qgtWVjFV<4"~CXu;Rq1#4S)x uN^FjlC 7.Yhs.xZS{lnjg7^=Nu@!$IL>3P;}89!ӭv=Xc ;v{b|raB$UpK͔^v C ejڽg2#[mwex*u⃙1~NC}FQz-;uo׀K֠{U۳_fz JiaunT%f;k>1]9Tr?1 ]l”Oj*cZt۠{zCw~îȎ> Vkdwv_ YjfҾħQ\r=BTq~wn]y2~`h>J.bET(?})1TCJ Q45+)8c`Vq94 ]ZּRJ`QJ`0!8"r!@I?(%TycƄT 44`BpT 34th4b\pLp~)]')3|@ڠz"25r(BMr]üa!W !ćPNB|}B!Bf%tEXtdate:create2017-05-01T13:30:21+10:00Wd%tEXtdate:modify2017-05-01T13:27:27+10:00ވ[IENDB`epiR/NEWS0000644000176200001440000003547514165775650011706 0ustar liggesusersCHANGES IN epiR VERSION 2.0.41 SIGNIFICANT USER-VISIBLE CHANGES o Function for epi.interaction reports RERI, APAB, S and multiplicative interaction as a list. [Suggested by Mark Stevenson] o Numerous tidy ups to the documentation. [Suggested by Mark Stevenson] CHANGES IN epiR VERSION 2.0.40 SIGNIFICANT USER-VISIBLE CHANGES o Error in documentation for epi.interaction corrected. [Spotted by Colleen Reynolds] o Function epi.kappa updated to include option to calculate Cohen's kappa. [Suggested by Victoria Mussemann] o Descriptive epidemiology vignette updated to include code to add the cumulative number of cases of disease as a function of time to an epidemic curve. [Suggested by Ahmad Rabiee] CHANGES IN epiR VERSION 2.0.39 SIGNIFICANT USER-VISIBLE CHANGES o Function epi.2by2 and epi.tests updated to include option to change the precision of measures of association using the print option. [Suggested by Shikha Chugh] o Function epi.tests updated to include option to calculate exact, Wilson, Agresti, Clopper-Pearson and Jeffreys confidence limits for true prevalence, apparent prevalence, diagnostic sensitivity, diagnostic specificity, positive predictive value and negative predictive value. [Suggested by Shikha Chugh] o Function epi.2by2 now reports variable phi.coef (phi coefficient) for each of the uncorrected chi-square tests. This value can be used as an input for function epi.sscc. [Suggested by Mark Stevenson] o Argument rho.cc in function epi.sscc changed to phi.coef so consistent with the literature and function epi.2by2. [Suggested by Mark Stevenson] o epi.sscompb methodology revised to return results consistent with Woodward (2014). [Spotted by Ajith Ramayyan] o Help for functions epi.sscompb, epi.sscompc, epi.ssxsectn and epi.sscohortc updated to cite the 2014 edition of Woodward (Epidemiology Study Design and Analysis). [Suggested by Ajith Ramayyan] o epi.tests now returns the proportion of true outcome negative subjects that test positive (false T+ proportion for D-), the proportion of true outcome positive subjects that test negative (false T- proportion for D+), the proportion of test positive subjects that are outcome negative (false T+ proportion for T+) and the proportion of test negative subjects that are outcome positive (false T- proportion for T-). [Suggested by Ahmad Rabiee] o epi.betabuster returns a warning if the shape1 parameter for the beta distribution equals max.shape1. [Suggested by Alex Hou] CHANGES IN epiR VERSION 2.0.38 SIGNIFICANT USER-VISIBLE CHANGES o rsu.sep corrected to return results consistent with rsu.pstar. [Suggested by Mark Stevenson] CHANGES IN epiR VERSION 2.0.37 SIGNIFICANT USER-VISIBLE CHANGES o epi.2by2 returns NA for Fisher exact test when total number of observations greater than 2E09, removing numeric overflow errors. [Suggested by Stuart Reece] CHANGES IN epiR VERSION 2.0.36 SIGNIFICANT USER-VISIBLE CHANGES o Argument p1 (prevalence of exposure among cases) added to epi.sscc, allowing Fleiss correction to be applied. [Suggested by Mark Stevenson] CHANGES IN epiR VERSION 2.0.35 SIGNIFICANT USER-VISIBLE CHANGES o New function epi.ssdxsesp to compute a sample size to estimate the sensitivity or specificity of a diagnostic test. [Suggested by Anou Dreyfus] o Additional example provided in the epi.2by2 documentation showing you how to construct an array suitable for a stratified contingency table analysis using count data. [Suggested by Cieran Harries] o Error in power calculation for epi.ssninfb and epi.ssninfc corrected. [Spotted by Aniko Szabo] o epi.ssninfb and epi.ssninfc now requires the user to specify the absolute value of delta (instead of assigning a negative sign to delta, which was prone to user misinterpretation). [Suggested by Aniko Szabo] o Documentation for epi.sssupb, epi.sssupb, epi.ssequb, epi.ssequc, epi.ssninfb and epi.ssninfc revised and improved. [Suggested by Aniko Szabo] CHANGES IN epiR VERSION 2.0.33 SIGNIFICANT USER-VISIBLE CHANGES o epi.bohning now handles missing values. [Suggested by Mark Stevenson] o epi.descriptives now reports level frequencies if input data is a factor. [Suggested by Mark Stevenson] CHANGES IN epiR VERSION 2.0.32 SIGNIFICANT USER-VISIBLE CHANGES o All sample size functions now allow user to express error in either relative or absolute terms. [Suggested by Mark Stevenson] CHANGES IN epiR VERSION 2.0.30 SIGNIFICANT USER-VISIBLE CHANGES o Function epi.2by2 and epi.tests data entry formats simplified. Both functions now accept tables generated from tidyverse. [Suggested by Mark Stevenson] o Documentation for all functions reviewed. Objects consistently named. [Suggested by Mark Stevenson] CHANGES IN epiR VERSION 2.0.29 SIGNIFICANT USER-VISIBLE CHANGES o Function epi.2by2 returns NAs for point estimates of incidence rate ratio when number of events equals zero. [Suggested by Mark Stevenson] CHANGES IN epiR VERSION 2.0.28 SIGNIFICANT USER-VISIBLE CHANGES o Function epi.directadj now returns zero for the directly adjusted incidence rate estimate when the number of observations in a strata equals zero and the time at risk in the same strata equals zero. [Suggested by Mark Stevenson] CHANGES IN epiR VERSION 2.0.26 SIGNIFICANT USER-VISIBLE CHANGES o Function epi.2by2 now provides interpretive statements of the number needed to treat and the number needed to harm (with confidence intervals) following the approach described by Altman (1998). [Suggested by Lucas Huggins] o Documentation for epi.directadj updated. CHANGES IN epiR VERSION 2.0.24 SIGNIFICANT USER-VISIBLE CHANGES o Function epi.kappa now handles n by n tables. [Suggested by Anita Tolpinrud] o Inconsistency in calculation of test of significance of SMRs using Byar's method corrected. [Spotted by Zhou Weilong] CHANGES IN epiR VERSION 2.0.23 SIGNIFICANT USER-VISIBLE CHANGES o New function epi.blcm.paras to return the number of unknown parameters to be inferred and the number of informative priors likely to be needed for an identifiable Bayesian latent class model to estimate diagnostic sensitivity and specificity in the absence of a gold standard. [Contributed by Simon Firestone, Allison Cheung and Nagendra Singanallur] o Inconsistency in calculation of test of significance of SMRs using Byar's method corrected. [Spotted by Zhou Weilong] CHANGES IN epiR VERSION 2.0.21 SIGNIFICANT USER-VISIBLE CHANGES o Major tidy up for epi.2by2. Function includes argument 'interpret' to return interpretive statements for each of the computed measures of association. epi.2by2 object now includes massoc.summary, massoc.interp and massoc.detail objects. [Suggested by Caitlin Pfeiffer and Petra Mullner] o print(epi.2by2) now returns Yates corrected chi2 test if any of the cell frequencies are less than 5. [Suggested by Simon Firestone] CHANGES IN epiR VERSION 2.0.20 SIGNIFICANT USER-VISIBLE CHANGES o rsu.dxtest code modified to check that appropriate values for test covariance have been used. Error returned when inappropriate values entered. The function now returns diagnostic sensitivity and specificity assuming tests are independent as well dependent. Documentation for rsu.dxtest updated with details on how to calculate argument covar. [Suggested by Barbara Moloney] CHANGES IN epiR VERSION 2.0.19 SIGNIFICANT USER-VISIBLE CHANGES o rsu.spp.rs code modified. o Documentation for rsu.spp.rs, epi.psi and rsu.sssep.rs updated. CHANGES IN epiR VERSION 2.0.18 SIGNIFICANT USER-VISIBLE CHANGES o Vignette on sample size calculations added. CHANGES IN epiR VERSION 2.0.17 SIGNIFICANT USER-VISIBLE CHANGES o epi.2by2 code modified to allow it handle the Haldane-Anscombe correction (i.e. addition of 0.5 to each cell of the 2 by 2 table when at least one of the cell frequencies is zero). [Suggested by Mark Stevenson] o Error in confidence interval calculation for epi.smr where method = "byar" corrected. [Spotted by Sarah Haile] o Vignette for surveillance functions updated. o epi.2by2 calculates maximum likelihood confidence intervals for the odds ratio when the total number of events is less than 2E09, removing numeric overflow errors. [Suggested by Stuart Reece] o New function epi.psi: Proportional similarity index to compare non-parametric frequency distributions. o epi.prcc now includes confidence intervals. Example provided in the documentation for epi.prcc improved. o Function epi.ssdetect returns sample size estimates using the binomial and hypergeometric distribution. [Suggested by Manuel Sanchez Vazquez] CHANGES IN epiR VERSION 2.0.1 SIGNIFICANT USER-VISIBLE CHANGES o New function epi.ssdxtest: Sample size to validate a diagnostic test in the absence of a gold standard. o Function epi.sssimpleestb modified to adjust the required sample size to account for imperfect diagnostic sensitivity and specificity. CHANGES IN epiR VERSION 2.0.0 SIGNIFICANT USER-VISIBLE CHANGES o Major upgrade with amalgamation of epiR with Evan Sergeant's RSurveillance package. o Function epi.insthaz now returns Kaplan-Meier survival estimates as well as instantaneous hazard. o Argument nfractional added to each of the sample size functions. o Inconsistency in reporting of results of epi.nomogram corrected. [Spotted by Pietro Ravani] CHANGES IN epiR VERSION 1.0-16 SIGNIFICANT USER-VISIBLE CHANGES o Function epi.ssxsectn updated to calculate minimum detectable odds ratio for a cross-sectional study. Function epi.sscohortc updated to calculate minimum detectable odds ratio for a cohort study. [Suggested by Alex Hou] CHANGES IN epiR VERSION 1.0-15 SIGNIFICANT USER-VISIBLE CHANGES o Function epi.insthaz now handles stratified data. Examples in function documentation updated to reflect these changes. o Argument rho in function epi.sscc changed to rho.cc to avoid confusion with rho used to represent the intracluster correlation coefficient. [Suggested by Xiaoqing Liu] o Amibguity in documentation for epi.sscomps corrected. [Spotted by Marco Barbara] o Function epi.2by2 now includes Taylor series confidence intervals for the incidence risk and prevalence ratio. CHANGES IN epiR VERSION 1.0-14 SIGNIFICANT USER-VISIBLE CHANGES o Long overdue vignette included. CHANGES IN epiR VERSION 1.0-13 SIGNIFICANT USER-VISIBLE CHANGES o epi.2by2 calculates Cornfield confidence intervals for the odds ratio when the total number of events is less than 500. This reduces computation time when cell frequencies are large. [Suggested by Jeff Canar] o epi.ssdetect now uses the hypergeometric distribution to calculate sample size, removing the need to apply a finite correction factor. [Suggested by Manuel Sanchez Vazquez] o epi.ssninfb Inconsistency in function documentation. [Spotted by Winston Mason] Fixed. CHANGES IN epiR VERSION 1.0-12 SIGNIFICANT USER-VISIBLE CHANGES o New function epi.smr: Computes confidence intervals and tests of significance of the standardised mortality [morbidity] ratio. CHANGES IN epiR VERSION 1.0-11 SIGNIFICANT USER-VISIBLE CHANGES o epi.2by2 Inconsistency in variables returned from print(epi.2by2) and summary(epi.2by2) corrected. [Spotted by Jose G Conde Santiago] CHANGES IN epiR VERSION 1.0-10 SIGNIFICANT USER-VISIBLE CHANGES o epi.interaction Function now includes option to use either product or dummy parameterisation of interaction terms. Documentation updated. [Suggested by Francois M Carrier] o New function epi.sscohortc: Sample size, power or minimum detectable incidence risk ratio for a cohort study using individual count data. o New function epi.sscohortt: Sample size, power or minimum detectable incidence rate ratio for a cohort study using individual time data. o New function epi.ssxsectn: Sample size, power or minimum detectable preva;ence ratio for a cross-sectional study. CHANGES IN epiR VERSION 1.0-06 SIGNIFICANT USER-VISIBLE CHANGES o Functions to calculate sample size. Major revision of the naming of functions to calculate sample size. o New function epi.ssclus1estb: Sample size to estimate a binary outcome using one-stage cluster sampling. CHANGES IN epiR VERSION 1.0-04 SIGNIFICANT USER-VISIBLE CHANGES o epi.conf Error in incidence rate confidence interval calculation. [Spotted by Kazuki Yoshida] Fixed. o epi.detectsize Inconsistency in function documentation. [Spotted by Jamie Madden] Fixed. o epi.kappa Function returns an error if the number of rows and number of columns in the data table presented for analysis does not equal two. [Spotted by Maia Dolgopoloff] Fixed. BUG FIXES o epi.2by Anomalies in calculation of test of homogeneity in strata odds ratios and risk ratios corrected. Argument "homogeneity" removed from function. Woolf test of homogeneity reported by default for print(x, ...). Test statistics, degrees of freedom and p-values for Breslow Day and Woolf tests of homogeneity returned using summary(x, ...). [Spotted by Antonio A Lopes] Fixed. CHANGES IN epiR VERSION 1.0-01 SIGNIFICANT USER-VISIBLE CHANGES o epi.2by Argument "homogeneity" removed from epi.2by2. Mantel-Haenszel (Woolf) test of homogeneity now reported by default for print(x, ...). Test statistics, degrees of freedom and p-values for Breslow Day and Woolf tests of homogeneity returned using summary(x, ...). BUG FIXES o epi.2by Anomalies in calculation of test of homogeneity in strata odds ratios and risk ratios corrected. Argument "homogeneity" removed from function. Woolf test of homogeneity reported by default for print(x, ...). Test statistics, degrees of freedom and p-values for Breslow Day and Woolf tests of homogeneity returned using summary(x, ...). [Spotted by Antonio A Lopes] Fixed. CHANGES IN epiR VERSION 0.9-98 SIGNIFICANT USER-VISIBLE CHANGES o epi.prev Function does not truncate true prevalence estimates to values between 0 and 1. Warning issued when apparent prevalence is less than (1 - specificity). Additional references added to docuementation. CHANGES IN epiR VERSION 0.9-97 BUG FIXES o epi.prev Error in formatting of confidence intervals when method = "sterne" and method = "blaker". [Spotted by Salome Duerr] Fixed. o epi.noninfb Study power estimation when r argument for epi.noninfb was not equal to 1 returned incorrect results. [Spotted by Aline Guttmann] Fixed. o epi.interaction Error returned when model = coxph. [Spotted by Eirik Degerud] Fixed. o epi.directadj Inconsistency in documentation. [Spotted by Jose G Conde Santiago] Fixed. epiR/R/0000755000176200001440000000000014165760102011354 5ustar liggesusersepiR/R/zMHRD.GR.R0000644000176200001440000000246313666557216012756 0ustar liggesuserszMHRD.GR <- function(dat, conf.level = 0.95, units = units) { if(length(dim(dat)) > 2){ ndat <- addmargins(A = dat, margin = 2, FUN = sum, quiet = FALSE) c1 <- ndat[1,1,]; c2 <- ndat[1,3,]; c3 <- ndat[2,1,]; c4 <- ndat[2,3,] dataset <- cbind(c1, c2, c3, c4) num <- sum(apply(X = dataset, MARGIN = 1, FUN = function(ro) (ro[1] * ro[4] - ro[3] * ro[2]) / (ro[2] + ro[4]))) W <- sum(apply(dataset, 1, function(ro) ro[2] * ro[4] / (ro[2] + ro[4]))) # Cochrane weights delta.MH <- num / W P <- sum(apply(dataset, 1, function(ro) (ro[2]^2 * ro[3] - ro[4]^2 * ro[1] + 0.5 * ro[2] * ro[4] * (ro[4] - ro[2])) / (ro[2] + ro[4])^2)) Q <- sum(apply(dataset,1,function(ro) (ro[1] * (ro[4] - ro[3]) + ro[3] * (ro[2] - ro[1])) / (2 * (ro[2] + ro[4])))) p1 <- dataset[,1] / dataset[,2] p2 <- dataset[,3] / dataset[,4] denom <- apply(dataset, 1, function(ro) ro[2] * ro[4] / (ro[2] + ro[4])) # Cochrane weights var.delta.MH <- sum (denom^2 * (p1 * (1 - p1) / dataset[,2] + p2 * (1 - p2) / dataset[,4])) / W^2 GRARisk.p <- delta.MH GRARisk.l <- GRARisk.p - qnorm(1 - (1 - conf.level) / 2) * sqrt(var.delta.MH) GRARisk.u <- GRARisk.p + qnorm(1 - (1 - conf.level) / 2) * sqrt(var.delta.MH) c(GRARisk.p * units, GRARisk.l * units, GRARisk.u * units) } }epiR/R/rsu.sspfree.rs.R0000644000176200001440000000071613745467646014431 0ustar liggesusersrsu.sspfree.rs <- function(N = NA, prior, p.intro, pstar, pfree, se.u) { # Discounted prior: adj.prior <- zdisc.prior(prior = prior, p.intro = p.intro) # Population sensitivity required to achieve a given value for probability of disease freedom: se.p <- zsep.pfree(prior = adj.prior, pfree = pfree) n <- rsu.sssep.rs(N = N, pstar = pstar, se.p = se.p, se.u = se.u) rval <- list(n = n, se.p = se.p, adj.prior = adj.prior) rval }epiR/R/epi.occc.R0000644000176200001440000000455613117711462013175 0ustar liggesusers## x is a matrix line object, rows are cases, columns are raters ## na.rm: logical, if NAs should be excluded ## pairs: logical, if pairwise statistic values should be returned as ## part of the return value epi.occc <- function(dat, na.rm = FALSE, pairs = FALSE){ ## Create a list to hold all variables: elements <- list() ## Do all data manipulation within the list: elements <- within(elements, { if (!na.rm) { m <- apply(dat, 2, mean) s <- apply(dat, 2, sd) COV <- cov(dat) } else { m <- apply(dat, 2, mean, na.rm = TRUE) s <- apply(dat, 2, sd, na.rm = TRUE) COV <- cov(dat, use = "pairwise.complete.obs") } J <- ncol(dat) j <- col(matrix(0,J,J))[lower.tri(matrix(0,J,J))] k <- row(matrix(0,J,J))[lower.tri(matrix(0,J,J))] n <- (J * J - J) / 2 v <- numeric(n) u <- numeric(n) ksi <- numeric(n) ccc <- numeric(n) for (i in seq_len(n)) { v[i] <- s[j[i]] / s[k[i]] u[i] <- (m[j[i]] - m[k[i]]) / sqrt(s[j[i]] * s[k[i]]) ksi[i] <- s[j[i]]^2 + s[k[i]]^2 + (m[j[i]] - m[k[i]])^2 ccc[i] <- (2 * COV[j[i], k[i]]) / ksi[i] } accu <- ((v + 1/v + u^2) / 2)^-1 prec <- ccc / accu occc <- sum(ksi * ccc) / sum(ksi) oaccu <- sum(ksi * accu) / sum(ksi) oprec <- occc / oaccu prs <- if (pairs) { list(ccc = ccc, prec = prec, accu = accu, ksi = ksi, scale = v, location = u) } else NULL }) rval <- list(occc = elements$occc, oprec = elements$oprec, oaccu = elements$oaccu, pairs = elements$prs, data.name = deparse(substitute(dat))) class(rval) <- "epi.occc" return(rval) } # https://cran.r-project.org/web/packages/knitr/vignettes/knit_print.html print.epi.occc <- function(x, ...) { # cat("Overall concordance correlation coefficients\n") cat(sprintf("\nOverall CCC %.4f", x$occc)) cat(sprintf("\nOverall precision %.4f", x$oprec)) cat(sprintf("\nOverall accuracy %.4f", x$oaccu)) cat("\n") # print(data.frame(Value = c("Overall CCC" = x$occc, "Overall precision" = x$oprec, "Overall accuracy" = x$oaccu)), ...) } ## Summary method for epi.occc: summary.epi.occc <- function(object, ...) { out <- data.frame(occc = object$occc, oprec = object$oprec, oaccu = object$oaccu) return(out) }epiR/R/zfleiss.R0000644000176200001440000000140114136503440013150 0ustar liggesuserszfleiss <- function(dat, N, design, conf.level){ N. <- 1 - ((1 - conf.level) / 2) # Sampling for Epidemiologists, Kevin M Sullivan a <- dat[,1] n <- dat[,2] p <- a / n q <- (1 - p) # 'n' = the total number of subjects sampled. 'N' equals the size of the total population. var.fl <- ((p * q) / (n - 1)) * ((N - n) / N) # Design effect equals [var.obs] / [var.srs]. # var.fl has been computed assuming simple random sampling so if an argument for design effect is provided we need to adjust se.wil accordingly: se.fl <- sqrt(design * var.fl) df <- n - 1 t <- abs(qt(p = N., df = df)) low <- p - (t * se.fl) upp <- p + (t * se.fl) rval <- data.frame(est = p, se = se.fl, lower = low, upper = upp) rval }epiR/R/rsu.sep.R0000644000176200001440000000017114130744002013066 0ustar liggesusersrsu.sep <- function(N, n, pstar, se.u = 0.95){ rval <- 1 - exp(pstar * (N * log(1 - se.u * n / N))) return(rval) }epiR/R/epi.kappa.R0000644000176200001440000001601414151003242013337 0ustar liggesusers"epi.kappa" <- function(dat, method = "fleiss", alternative = c("two.sided", "less", "greater"), conf.level = 0.95){ if (nrow(dat) != ncol(dat)) stop("Error: epi.kappa dat requires a table with equal numbers of rows and columns") N. <- 1 - ((1 - conf.level) / 2) z <- qnorm(N., mean = 0, sd = 1) lower <- "lower" upper <- "upper" n <- sum(dat) # ================================================================ # Kappa: if(method == "fleiss"){ # Turn cell frequencies into proportions: ndat <- dat / n # Overall proportion of observed agreement, pO tmp <- zexact(dat = as.matrix(cbind(sum(diag(dat)), sum(dat))), conf.level = conf.level) pO.p <- as.numeric(tmp[,1]) pO.l <- as.numeric(tmp[,2]) pO.u <- as.numeric(tmp[,3]) # Overall proportion of chance-expected agreement, pE r.totals <- apply(ndat, MARGIN = 1, FUN = sum) c.totals <- apply(ndat, MARGIN = 2, FUN = sum) pE.p <- sum(r.totals * c.totals) # Overall kappa (Equation 18.12 in Fleiss): kappa.p <- (pO.p - pE.p) / (1 - pE.p) # Standard error of kappa (Equation 18.13 in Fleiss): tmp.1 <- 1 / ((1 - pE.p) * sqrt(n)) tmp.2 <- sqrt(pE.p + pE.p^2 - sum((r.totals * c.totals) * (r.totals + c.totals))) kappa.se <- tmp.1 * tmp.2 kappa.l <- kappa.p - (z * kappa.se) kappa.u <- kappa.p + (z * kappa.se) } if(method == "watson"){ # Overall proportion of observed agreement, pO tmp <- zexact(dat = as.matrix(cbind(sum(diag(dat)), sum(dat))), conf.level = conf.level) pO.p <- as.numeric(tmp[,1]) pO.l <- as.numeric(tmp[,2]) pO.u <- as.numeric(tmp[,3]) # Expected proportion of agreement, pE: r.totals <- apply(dat, MARGIN = 1, FUN = sum) c.totals <- apply(dat, MARGIN = 2, FUN = sum) pE.p <- sum(r.totals * c.totals) / n^2 # Overall kappa (Equation 18.12 in Fleiss): kappa.p <- (pO.p - pE.p) / (1 - pE.p) # Standard error of kappa (page 1170 of Watson and Petrie 2010): kappa.se <- sqrt((pO.p * (1- pO.p)) / (n * (1 - pE.p)^2)) kappa.l <- kappa.p - (z * kappa.se) kappa.u <- kappa.p + (z * kappa.se) } if(method == "altman"){ # Overall proportion of observed agreement, pO tmp <- zexact(dat = as.matrix(cbind(sum(diag(dat)), sum(dat))), conf.level = conf.level) pO.p <- as.numeric(tmp[,1]) pO.l <- as.numeric(tmp[,2]) pO.u <- as.numeric(tmp[,3]) # Overall proportion of chance-expected agreement, pE r.totals <- apply(dat, MARGIN = 1, FUN = sum) c.totals <- apply(dat, MARGIN = 2, FUN = sum) pE.p <- sum(r.totals * c.totals) / n^2 kappa.p <- (pO.p - pE.p) / (1 - pE.p) kappa.se <- sqrt((pO.p * (1 - pO.p)) / (n * (1 - pE.p)^2)) kappa.l <- kappa.p - (z * kappa.se) kappa.u <- kappa.p + (z * kappa.se) } if(method == "cohen"){ tmp <- zexact(dat = as.matrix(cbind(sum(diag(dat)), sum(dat))), conf.level = conf.level) # Overall proportion of observed agreement: pO.p <- as.numeric(tmp[,1]) pO.l <- as.numeric(tmp[,2]) pO.u <- as.numeric(tmp[,3]) tdat <- dat / sum(dat) # Overall proportion of chance-expected agreement, pE r.totals <- apply(dat, MARGIN = 1, FUN = sum) c.totals <- apply(dat, MARGIN = 2, FUN = sum) pE.p <- sum(r.totals * c.totals) / n^2 kappa.p <- (pO.p - pE.p) / (1 - pE.p) kappa.se <- sqrt((pO.p * (1 - pO.p)) / (sum(dat) * (1 - pE.p) * (1 - pE.p))) kappa.l <- kappa.p - (z * kappa.se) kappa.u <- kappa.p + (z * kappa.se) } # ================================================================ # Bias index: if(nrow(dat == 2)){ # Bias index is the difference in proportions of 'yes' for two raters. # See Byrt et al. 1993, added 010814. # The Bias index is equal to zero if and only if the marginal proportions are equal. # BI = (a + b)/N - (a + c)/N # Confidence interval calculation same as that used for attributable risk # Rothman p 135 equation 7-2. a <- dat[1,1] + dat[1,2] c <- dat[1,1] + dat[2,1] bi.p <- ((a / n) - (c / n)) bi.se <- (sqrt(((a * (n - a))/n^3) + ((c * (n - c))/n^3))) bi.l <- (bi.p - (z * bi.se)) bi.u <- (bi.p + (z * bi.se)) } # ================================================================ # Prevalence index: if(nrow(dat == 2)){ # Prevalence index is the difference between the probability of 'Yes' and the probability of 'No' (after Byrt et al. 1993, added 010814). # PI = (a / N) - (d / N) # Confidence interval calculation same as that used for attributable risk (Rothman p 135 equation 7-2). a <- dat[1,1] d <- dat[2,2] pi.p <- ((a / n) - (d / n)) pi.se <- (sqrt(((a * (n - a))/n^3) + ((d * (n - d))/n^3))) pi.l <- (pi.p - (z * pi.se)) pi.u <- (pi.p + (z * pi.se)) } # ================================================================ # Population adjusted, bias corrected kappa (after Byrt et al. 1993, added 010814): pabak.p <- 2 * pO.p - 1 pabak.l <- 2 * pO.l - 1 pabak.u <- 2 * pO.u - 1 # ================================================================ # Test of effect (Equation 18.14 in Fleiss). Code for p-value taken from z.test function in TeachingDemos package: effect.z <- kappa.p / kappa.se alternative <- match.arg(alternative) p.effect <- switch(alternative, two.sided = 2 * pnorm(abs(effect.z), lower.tail = FALSE), less = pnorm(effect.z), greater = pnorm(effect.z, lower.tail = FALSE)) # ================================================================ # McNemar's test (Dohoo, Martin, Stryhn): if(nrow(dat == 2)){ mcnemar <- (dat[1,2] - dat[2,1])^2 / (dat[1,2] + dat[2,1]) p.chi2 <- 1 - pchisq(mcnemar, df = 1) } # ================================================================ # Results: if(nrow(dat == 2)){ prop.agree <- data.frame(obs = pO.p, exp = pE.p) pindex <- data.frame(est = pi.p, se = pi.se, lower = pi.l, upper = pi.u) bindex <- data.frame(est = bi.p, se = bi.se, lower = bi.l, upper = bi.u) pabak <- data.frame(est = pabak.p, lower = pabak.l, upper = pabak.u) kappa <- data.frame(est = kappa.p, se = kappa.se, lower = kappa.l, upper = kappa.u) z <- data.frame(test.statistic = effect.z, p.value = p.effect) mcnemar <- data.frame(test.statistic = mcnemar, df = 1, p.value = p.chi2) rval <- list(prop.agree = prop.agree, pindex = pindex, bindex = bindex, pabak = pabak, kappa = kappa, z = z, mcnemar = mcnemar) } if(nrow(dat >= 2)){ prop.agree <- data.frame(obs = pO.p, exp = pE.p) pabak <- data.frame(est = pabak.p, lower = pabak.l, upper = pabak.u) kappa <- data.frame(est = kappa.p, se = kappa.se, lower = kappa.l, upper = kappa.u) z <- data.frame(test.statistic = effect.z, p.value = p.effect) rval <- list(prop.agree = prop.agree, pabak = pabak, kappa = kappa, z = z) } return(rval) } epiR/R/epi.RtoBUGS.R0000644000176200001440000000246613117711466013515 0ustar liggesusers# Source: Terry Elrod (Terry.Elrod@UAlberta.ca). "epi.RtoBUGS" <- function(datalist, towhere) { if(!is.list(datalist)) stop("First argument to writeDatafile must be a list.") cat(.formatData(datalist), file = towhere) } ".formatData" <- function(datalist) { if(!is.list(datalist)) stop("Argument to formatData must be a list.") n <- length(datalist) datalist.string <- as.list(rep(NA, n)) for(i in 1.:n) { if(is.numeric(datalist[[i]]) & length(datalist[[i]]) == 1.) datalist.string[[i]] <- paste(names(datalist)[i], "=", as.character(datalist[[i]]), sep = "") if(is.vector(datalist[[i]]) & length(datalist[[i]]) > 1.) datalist.string[[i]] <- paste(names(datalist)[i], "=c(", paste(as.character(datalist[[ i]]), collapse = ","), ")", sep = "") if(is.array(datalist[[i]])) datalist.string[[i]] <- paste(names(datalist)[i], "=structure(.Data=c(", paste( as.character(as.vector(aperm(datalist[[i]]))), collapse = ","), "),.Dim=c(", paste(as.character(dim(datalist[[i]])), collapse = ","), "))", sep = "") } datalist.tofile <- paste("list(", paste(unlist(datalist.string), collapse = ","), ")", sep = "") return(datalist.tofile) } epiR/R/rsu.pfree.equ.R0000644000176200001440000000052413757570522014215 0ustar liggesusersrsu.pfree.equ <- function(se.p, p.intro) { prior.equ <- 1 - (p.intro / se.p) pfree.equ <- (1 - (p.intro / se.p)) / (1 - p.intro) # Function returns the equilibrium prior probability of disease freedom and # the equilibrium discounted prior probability of freedom: return(list(epfree = prior.equ, depfree = pfree.equ)) }epiR/R/zORwald.R0000644000176200001440000000064213666556144013101 0ustar liggesuserszORwald <- function(dat, conf.level){ N. <- 1 - ((1 - conf.level) / 2) z <- qnorm(N., mean = 0, sd = 1) a <- dat[1]; b <- dat[3]; c <- dat[2]; d <- dat[4] N1 <- a + b; N0 <- c + d wOR.p <- (a / b) / (c / d) lnwOR <- log(wOR.p) lnwOR.var <- 1/a + 1/b + 1/c + 1/d lnwOR.se <- sqrt(lnwOR.var) ll <- exp(lnwOR - (z * lnwOR.se)) ul <- exp(lnwOR + (z * lnwOR.se)) c(wOR.p, ll, ul) }epiR/R/epi.bohning.R0000644000176200001440000000072614076002154013701 0ustar liggesusers"epi.bohning" <- function(obs, exp, alpha = 0.05){ J <- length(obs) smr <- obs / exp smr.bar <- sum(smr, na.rm = TRUE) / J # Bohning's test: top <- (1 / (J - 1)) * sum(((obs - (smr.bar * exp))^2 / exp), na.rm = TRUE) - smr.bar bottom <- sqrt((2 * smr.bar) / (J - 1)) bohning <- top / bottom p <- 1 - pnorm(q = bohning, mean = 0, sd = 1) rval <- as.data.frame(cbind(test.statistic = bohning, p.value = p)) return(rval) }epiR/R/zexact.R0000644000176200001440000000111114136473114012771 0ustar liggesuserszexact <- function(dat, conf.level){ # Exact binomial confidence limits from function binom::binom.confint. Changed 190716. alpha <- 1 - conf.level alpha2 <- 0.5 * alpha a <- dat[,1] n <- dat[,2] p <- a / n a1 <- a == 0 a2 <- a == n lb <- ub <- a lb[a1] <- 1 ub[a2] <- n[a2] - 1 low <- 1 - qbeta(1 - alpha2, n + 1 - a, lb) upp <- 1 - qbeta(alpha2, n - ub, a + 1) if (any(a1)) low[a1] <- rep(0, sum(a1)) if (any(a2)) upp[a2] <- rep(1, sum(a2)) rval <- data.frame(est = p, lower = low, upper = upp) rval }epiR/R/zRRtaylor.R0000644000176200001440000000066213666562430013464 0ustar liggesuserszRRtaylor <- function(dat, conf.level){ N. <- 1 - ((1 - conf.level) / 2) z <- qnorm(N., mean = 0, sd = 1) a <- dat[1]; b <- dat[3]; c <- dat[2]; d <- dat[4] N1 <- a + b; N0 <- c + d RR.p <- (a / N1) / (c / N0) lnRR <- log(RR.p) lnRR.se <- sqrt(((1 - (a / N1)) / a) + ((1 - (c / N0)) / c)) ll <- exp(lnRR - (z * lnRR.se)) ul <- exp(lnRR + (z * lnRR.se)) c(RR.p, ll, ul) } epiR/R/zn.hypergeo.R0000644000176200001440000000020213665103436013746 0ustar liggesuserszn.hypergeo <- function(sep, N, d, se = 1) { n <- (N / se) * (1 - (1 - sep)^(1 / d)) n[n > N] <- NA return(ceiling(n)) }epiR/R/rsu.sep.pass.R0000644000176200001440000000050013754452114014042 0ustar liggesusersrsu.sep.pass <- function(N, n, step.p, pstar.c, p.inf.u, se.u){ if(is.matrix(step.p)) { tmp <- apply(step.p, FUN = prod, MARGIN = 1) } else { tmp <- prod(step.p) } se.c <- tmp * (1 - (1 - p.inf.u * se.u)^n) se.p <- 1 - (1 - se.c)^(pstar.c * N) return(list(se.p = se.p, se.c = se.c)) }epiR/R/rsu.sep.rb2st.r0000644000176200001440000000273314011615636014177 0ustar liggesusersrsu.sep.rb2st <- function(H = NA, N = NA, n, rr.c, ppr.c, pstar.c, rr.u, ppr.u, pstar.u, rg, se.u){ if(length(se.u) == 1) se.u <- rep(se.u, times = nrow(n)) sep <- numeric(nrow(n)) # Calculate sep for all clusters: if (length(N) == 1) { # Cluster sizes not provided so use binomial for all clusters: for (i in 1:nrow(n)) { sep[i] <- rsu.sep.rb1rf(pstar = pstar.u, rr = rr.u, ppr = ppr.u[i,], N = NA, n = n[i,], se.u = se.u[i], method = "binomial")[[1]] } } else { # Cluster sizes provided so use hypergeometric unless NA for specific clusters: for (i in 1:nrow(n)) { if (is.na(N[i,1])) { sep[i] <- rsu.sep.rb1rf(pstar = pstar.u, rr = rr.u, ppr = ppr.u[i,], N = NA, n = n[i,], se.u = se.u[i], method = "binomial")[[1]] } else { sep[i] <- rsu.sep.rb1rf(pstar = pstar.u, rr = rr.u, N = N[i,], n = n[i,], se.u = se.u[i], method = "hypergeometric")[[1]] } } } # Calculate system sensitivity: if (is.na(H)) { # Population size unknown, use binomial: sse <- rsu.sep.rb(pstar = pstar.c, rr = rr.c, N = NA, ppr = ppr.c, df = cbind(rg, sep, 1), method = "binomial") } else { # Population size known, use hypergeometric: sse <- rsu.sep.rb(pstar = pstar.c, rr = rr.c, N = H * ppr.c, ppr = NA, df = cbind(rg, sep, 1), method = "hypergeometric") } return(list("se.p" = sse[[1]], "se.c" = sep)) }epiR/R/zsep.pfree.R0000644000176200001440000000014613635510602013560 0ustar liggesuserszsep.pfree <- function(prior, pfree) { sep <- (1 - prior / pfree) / (1 - prior) return(sep) } epiR/R/epi.insthaz.R0000644000176200001440000001020413713756010013731 0ustar liggesusers"epi.insthaz" <- function(survfit.obj, conf.level = 0.95){ N <- 1 - ((1 - conf.level) / 2) z <- qnorm(N, mean = 0, sd = 1) if(length(survfit.obj$strata) == 0) { dat.df <- data.frame(time = survfit.obj$time, time0 = c(0, survfit.obj$time[-length(survfit.obj$time)])) # https://www.real-statistics.com/survival-analysis/kaplan-meier-procedure/confidence-interval-for-the-survival-function/ # Kaplan-Meier survival and confidence intervals: dat.df$sest <- survfit.obj$surv dat.df$sse <- survfit.obj$std.err dat.df$supp <- dat.df$sest^exp(z / log(dat.df$sest) * dat.df$sse / dat.df$sest) dat.df$slow <- dat.df$sest^exp(-z / log(dat.df$sest) * dat.df$sse / dat.df$sest) # Instantaneous hazard and confidence intervals: dat.df$int <- dat.df$time - dat.df$time0 dat.df$a <- survfit.obj$n.event dat.df$n <- survfit.obj$n.risk dat.df$p <- dat.df$a / dat.df$n dat.df$a. <- dat.df$n / (dat.df$n + z^2) dat.df$b. <- dat.df$a / dat.df$n dat.df$c. <- z^2 / (2 * dat.df$n) dat.df$d. <- (dat.df$a * (dat.df$n - dat.df$a)) / dat.df$n^3 dat.df$e. <- z^2 / (4 * dat.df$n^2) dat.df$hest <- dat.df$p / dat.df$int dat.df$hlow <- (dat.df$a. * (dat.df$b. + dat.df$c. - (z * sqrt(dat.df$d. + dat.df$e.)))) / dat.df$int dat.df$hupp <- (dat.df$a. * (dat.df$b. + dat.df$c. + (z * sqrt(dat.df$d. + dat.df$e.)))) / dat.df$int dat.df$hest[is.infinite(dat.df$hest)] <- 0 dat.df$hlow[is.infinite(dat.df$hlow)] <- 0 dat.df$hupp[is.infinite(dat.df$hupp)] <- 0 rval <- data.frame(time = dat.df$time, sest = dat.df$sest, slow = dat.df$slow, supp = dat.df$supp, hest = dat.df$hest, hlow = dat.df$hlow, hupp = dat.df$hupp) } else if(length(survfit.obj$strata) > 0) { # Strata names: strata <- names(survfit.obj$strata) strata <- sub(pattern = ".*=", replacement = "", strata) strata <- rep(strata, times = survfit.obj$strata) ustrata <- unique(strata) dat.df <- data.frame(strata, time = survfit.obj$time) time0 <- c() for(i in 1:length(ustrata)){ id <- dat.df$strata == ustrata[i] tdat.df <- dat.df[id,] if(nrow(tdat.df) == 1) { ttime0 <- 0 } else if(nrow(tdat.df) > 1) { ttime0 <- c(0, tdat.df$time[-length(tdat.df$time)]) } time0 <- c(time0, ttime0) } dat.df$time0 <- time0 dat.df <- dat.df[,c(1,3,2)] dat.df$int <- (dat.df$time - dat.df$time0) # Kaplan-Meier survival and confidence intervals: dat.df$sest <- survfit.obj$surv dat.df$sse <- survfit.obj$std.err dat.df$supp <- dat.df$sest^exp(z / log(dat.df$sest) * dat.df$sse / dat.df$sest) dat.df$slow <- dat.df$sest^exp(-z / log(dat.df$sest) * dat.df$sse / dat.df$sest) # Instantaneous hazard and confidence intervals: dat.df$a <- survfit.obj$n.event dat.df$n <- survfit.obj$n.risk dat.df$p <- dat.df$a / dat.df$n dat.df$a. <- dat.df$n / (dat.df$n + z^2) dat.df$b. <- dat.df$a / dat.df$n dat.df$c. <- z^2 / (2 * dat.df$n) dat.df$d. <- (dat.df$a * (dat.df$n - dat.df$a)) / dat.df$n^3 dat.df$e. <- z^2 / (4 * dat.df$n^2) dat.df$hest <- dat.df$p / dat.df$int dat.df$hlow <- (dat.df$a. * (dat.df$b. + dat.df$c. - (z * sqrt(dat.df$d. + dat.df$e.)))) / dat.df$int dat.df$hupp <- (dat.df$a. * (dat.df$b. + dat.df$c. + (z * sqrt(dat.df$d. + dat.df$e.)))) / dat.df$int dat.df$hest[is.infinite(dat.df$hest)] <- 0 dat.df$hlow[is.infinite(dat.df$hlow)] <- 0 dat.df$hupp[is.infinite(dat.df$hupp)] <- 0 rval <- data.frame(strata = dat.df$strata, time = dat.df$time, sest = dat.df$sest, slow = dat.df$slow, supp = dat.df$supp, hest = dat.df$hest, hlow = dat.df$hlow, hupp = dat.df$hupp) } return(rval) }epiR/R/epi.ltd.R0000644000176200001440000007404613117711460013050 0ustar liggesusersepi.ltd <- function(dat, std = "305"){ if (std == "305"){ std <- 305 a0 <- rep(1, times = 305) a1 <- c(-1,-0.99342,-0.98684,-0.98026,-0.97368,-0.96711,-0.96053,-0.95395,-0.94737, -0.94079,-0.93421,-0.92763,-0.92105,-0.91447,-0.90789,-0.90132,-0.89474, -0.88816,-0.88158,-0.875,-0.86842,-0.86184,-0.85526,-0.84868,-0.84211, -0.83553,-0.82895,-0.82237,-0.81579,-0.80921,-0.80263,-0.79605,-0.78947, -0.78289,-0.77632,-0.76974,-0.76316,-0.75658,-0.75,-0.74342,-0.73684, -0.73026,-0.72368,-0.71711,-0.71053,-0.70395,-0.69737,-0.69079,-0.68421, -0.67763,-0.67105,-0.66447,-0.65789,-0.65132,-0.64474,-0.63816,-0.63158, -0.625,-0.61842,-0.61184,-0.60526,-0.59868,-0.59211,-0.58553,-0.57895, -0.57237,-0.56579,-0.55921,-0.55263,-0.54605,-0.53947,-0.53289,-0.52632, -0.51974,-0.51316,-0.50658,-0.5,-0.49342,-0.48684,-0.48026,-0.47368, -0.46711,-0.46053,-0.45395,-0.44737,-0.44079,-0.43421,-0.42763,-0.42105, -0.41447,-0.40789,-0.40132,-0.39474,-0.38816,-0.38158,-0.375,-0.36842, -0.36184,-0.35526,-0.34868,-0.34211,-0.33553,-0.32895,-0.32237,-0.31579, -0.30921,-0.30263,-0.29605,-0.28947,-0.28289,-0.27632,-0.26974,-0.26316, -0.25658,-0.25,-0.24342,-0.23684,-0.23026,-0.22368,-0.21711,-0.21053, -0.20395,-0.19737,-0.19079,-0.18421,-0.17763,-0.17105,-0.16447,-0.15789, -0.15132,-0.14474,-0.13816,-0.13158,-0.125,-0.11842,-0.11184,-0.10526, -0.09868,-0.09211,-0.08553,-0.07895,-0.07237,-0.06579,-0.05921,-0.05263, -0.04605,-0.03947,-0.03289,-0.02632,-0.01974,-0.01316,-0.00658,0,0.00658, 0.01316,0.01974,0.02632,0.03289,0.03947,0.04605,0.05263,0.05921,0.06579, 0.07237,0.07895,0.08553,0.09211,0.09868,0.10526,0.11184,0.11842,0.125, 0.13158,0.13816,0.14474,0.15132,0.15789,0.16447,0.17105,0.17763,0.18421, 0.19079,0.19737,0.20395,0.21053,0.21711,0.22368,0.23026,0.23684,0.24342, 0.25,0.25658,0.26316,0.26974,0.27632,0.28289,0.28947,0.29605,0.30263, 0.30921,0.31579,0.32237,0.32895,0.33553,0.34211,0.34868,0.35526,0.36184, 0.36842,0.375,0.38158,0.38816,0.39474,0.40132,0.40789,0.41447,0.42105, 0.42763,0.43421,0.44079,0.44737,0.45395,0.46053,0.46711,0.47368,0.48026, 0.48684,0.49342,0.5,0.50658,0.51316,0.51974,0.52632,0.53289,0.53947, 0.54605,0.55263,0.55921,0.56579,0.57237,0.57895,0.58553,0.59211,0.59868, 0.60526,0.61184,0.61842,0.625,0.63158,0.63816,0.64474,0.65132,0.65789, 0.66447,0.67105,0.67763,0.68421,0.69079,0.69737,0.70395,0.71053,0.71711, 0.72368,0.73026,0.73684,0.74342,0.75,0.75658,0.76316,0.76974,0.77632, 0.78289,0.78947,0.79605,0.80263,0.80921,0.81579,0.82237,0.82895,0.83553, 0.84211,0.84868,0.85526,0.86184,0.86842,0.875,0.88158,0.88816,0.89474, 0.90132,0.90789,0.91447,0.92105,0.92763,0.93421,0.94079,0.94737,0.95395, 0.96053,0.96711,0.97368,0.98026,0.98684,0.99342,1) a2 <- c(1,0.98033,0.96079,0.94137,0.92209,0.90294,0.88392,0.86502,0.84626,0.82763, 0.80912,0.79075,0.77251,0.75439,0.73641,0.71856,0.70083,0.68324,0.66577, 0.64844,0.63123,0.61416,0.59721,0.5804,0.56371,0.54716,0.53073,0.51443, 0.49827,0.48223,0.46633,0.45055,0.4349,0.41939,0.404,0.38874,0.37361, 0.35862,0.34375,0.32901,0.3144,0.29993,0.28558,0.27136,0.25727,0.24331, 0.22948,0.21579,0.20222,0.18878,0.17547,0.16229,0.14924,0.13632,0.12353, 0.11087,0.09834,0.08594,0.07367,0.06153,0.04952,0.03763,0.02588,0.01426, 0.00277,-0.00859,-0.01982,-0.03093,-0.0419,-0.05274,-0.06345,-0.07403, -0.08449,-0.09481,-0.105,-0.11507,-0.125,-0.1348,-0.14448,-0.15402,-0.16343, -0.17272,-0.18187,-0.1909,-0.19979,-0.20856,-0.21719,-0.2257,-0.23407, -0.24232,-0.25043,-0.25842,-0.26627,-0.274,-0.2816,-0.28906,-0.2964, -0.30361,-0.31068,-0.31763,-0.32445,-0.33113,-0.33769,-0.34412,-0.35042, -0.35658,-0.36262,-0.36853,-0.37431,-0.37996,-0.38547,-0.39086,-0.39612, -0.40125,-0.40625,-0.41112,-0.41586,-0.42047,-0.42495,-0.4293,-0.43352, -0.43761,-0.44157,-0.4454,-0.4491,-0.45267,-0.45611,-0.45942,-0.4626, -0.46566,-0.46858,-0.47137,-0.47403,-0.47656,-0.47896,-0.48124,-0.48338, -0.48539,-0.48727,-0.48903,-0.49065,-0.49214,-0.49351,-0.49474,-0.49584, -0.49682,-0.49766,-0.49838,-0.49896,-0.49942,-0.49974,-0.49994,-0.5, -0.49994,-0.49974,-0.49942,-0.49896,-0.49838,-0.49766,-0.49682,-0.49584, -0.49474,-0.49351,-0.49214,-0.49065,-0.48903,-0.48727,-0.48539,-0.48338, -0.48124,-0.47896,-0.47656,-0.47403,-0.47137,-0.46858,-0.46566,-0.4626, -0.45942,-0.45611,-0.45267,-0.4491,-0.4454,-0.44157,-0.43761,-0.43352, -0.4293,-0.42495,-0.42047,-0.41586,-0.41112,-0.40625,-0.40125,-0.39612, -0.39086,-0.38547,-0.37996,-0.37431,-0.36853,-0.36262,-0.35658,-0.35042, -0.34412,-0.33769,-0.33113,-0.32445,-0.31763,-0.31068,-0.30361,-0.2964, -0.28906,-0.2816,-0.274,-0.26627,-0.25842,-0.25043,-0.24232,-0.23407, -0.2257,-0.21719,-0.20856,-0.19979,-0.1909,-0.18187,-0.17272,-0.16343, -0.15402,-0.14448,-0.1348,-0.125,-0.11507,-0.105,-0.09481,-0.08449,-0.07403, -0.06345,-0.05274,-0.0419,-0.03093,-0.01982,-0.00859,0.00277,0.01426, 0.02588,0.03763,0.04952,0.06153,0.07367,0.08594,0.09834,0.11087,0.12353, 0.13632,0.14924,0.16229,0.17547,0.18878,0.20222,0.21579,0.22948,0.24331, 0.25727,0.27136,0.28558,0.29993,0.3144,0.32901,0.34375,0.35862,0.37361, 0.38874,0.404,0.41939,0.4349,0.45055,0.46633,0.48223,0.49827,0.51443, 0.53073,0.54716,0.56371,0.5804,0.59721,0.61416,0.63123,0.64844,0.66577, 0.68324,0.70083,0.71856,0.73641,0.75439,0.77251,0.79075,0.80912,0.82763, 0.84626,0.86502,0.88392,0.90294,0.92209,0.94137,0.96079,0.98033,1) a3 <- c(-1,-0.96085,-0.92235,-0.88448,-0.84725,-0.81066,-0.77469,-0.73935,-0.70462, -0.67051,-0.63701,-0.60412,-0.57183,-0.54014,-0.50904,-0.47853,-0.44861, -0.41926,-0.3905,-0.3623,-0.33468,-0.30762,-0.28111,-0.25517,-0.22977, -0.20492,-0.18061,-0.15685,-0.13361,-0.11091,-0.08873,-0.06707,-0.04593, -0.0253,-0.00517,0.01444,0.03356,0.05218,0.07031,0.08796,0.10512,0.1218, 0.13801,0.15375,0.16902,0.18383,0.19819,0.21209,0.22554,0.23855,0.25112, 0.26326,0.27496,0.28623,0.29709,0.30752,0.31754,0.32715,0.33635,0.34515, 0.35356,0.36157,0.36919,0.37643,0.38329,0.38977,0.39589,0.40163,0.40701, 0.41203,0.4167,0.42102,0.42499,0.42862,0.43191,0.43487,0.4375,0.43981, 0.44179,0.44346,0.44482,0.44587,0.44661,0.44706,0.44721,0.44708,0.44665, 0.44595,0.44496,0.44371,0.44218,0.44039,0.43834,0.43603,0.43347,0.43066, 0.42761,0.42432,0.4208,0.41704,0.41306,0.40886,0.40444,0.3998,0.39496, 0.38991,0.38466,0.37921,0.37357,0.36774,0.36173,0.35554,0.34918,0.34264, 0.33594,0.32907,0.32205,0.31487,0.30755,0.30007,0.29246,0.28471,0.27683, 0.26882,0.26069,0.25244,0.24407,0.23559,0.227,0.21831,0.20953,0.20064, 0.19167,0.18262,0.17348,0.16427,0.15498,0.14562,0.1362,0.12673,0.11719, 0.10761,0.09797,0.0883,0.07858,0.06883,0.05906,0.04925,0.03943,0.02959, 0.01973,0.00987,0,-0.00987,-0.01973,-0.02959,-0.03943,-0.04925,-0.05906, -0.06883,-0.07858,-0.0883,-0.09797,-0.10761,-0.11719,-0.12673,-0.1362, -0.14562,-0.15498,-0.16427,-0.17348,-0.18262,-0.19167,-0.20064,-0.20953, -0.21831,-0.227,-0.23559,-0.24407,-0.25244,-0.26069,-0.26882,-0.27683, -0.28471,-0.29246,-0.30007,-0.30755,-0.31487,-0.32205,-0.32907,-0.33594, -0.34264,-0.34918,-0.35554,-0.36173,-0.36774,-0.37357,-0.37921,-0.38466, -0.38991,-0.39496,-0.3998,-0.40444,-0.40886,-0.41306,-0.41704,-0.4208, -0.42432,-0.42761,-0.43066,-0.43347,-0.43603,-0.43834,-0.44039,-0.44218, -0.44371,-0.44496,-0.44595,-0.44665,-0.44708,-0.44721,-0.44706,-0.44661, -0.44587,-0.44482,-0.44346,-0.44179,-0.43981,-0.4375,-0.43487,-0.43191, -0.42862,-0.42499,-0.42102,-0.4167,-0.41203,-0.40701,-0.40163,-0.39589, -0.38977,-0.38329,-0.37643,-0.36919,-0.36157,-0.35356,-0.34515,-0.33635, -0.32715,-0.31754,-0.30752,-0.29709,-0.28623,-0.27496,-0.26326,-0.25112, -0.23855,-0.22554,-0.21209,-0.19819,-0.18383,-0.16902,-0.15375,-0.13801, -0.1218,-0.10512,-0.08796,-0.07031,-0.05218,-0.03356,-0.01444,0.00517, 0.0253,0.04593,0.06707,0.08873,0.11091,0.13361,0.15685,0.18061,0.20492, 0.22977,0.25517,0.28111,0.30762,0.33468,0.3623,0.3905,0.41926,0.44861, 0.47853,0.50904,0.54014,0.57183,0.60412,0.63701,0.67051,0.70462,0.73935, 0.77469,0.81066,0.84725,0.88448,0.92235,0.96085,1) a4 <- c(1,0.93518,0.87228,0.81126,0.75211,0.69478,0.63926,0.5855,0.53349,0.4832, 0.43459,0.38764,0.34232,0.2986,0.25646,0.21587,0.1768,0.13923,0.10312, 0.06845,0.0352,0.00334,-0.02716,-0.05632,-0.08417,-0.11074,-0.13604, -0.1601,-0.18295,-0.20462,-0.22512,-0.24448,-0.26273,-0.27988,-0.29597, -0.31101,-0.32503,-0.33805,-0.3501,-0.36119,-0.37135,-0.3806,-0.38896, -0.39646,-0.40312,-0.40895,-0.41398,-0.41823,-0.42172,-0.42447,-0.4265, -0.42784,-0.42849,-0.42849,-0.42784,-0.42658,-0.42472,-0.42227,-0.41926, -0.41571,-0.41163,-0.40704,-0.40197,-0.39642,-0.39041,-0.38397,-0.37711, -0.36985,-0.3622,-0.35418,-0.34581,-0.3371,-0.32807,-0.31874,-0.30911, -0.29922,-0.28906,-0.27866,-0.26804,-0.25719,-0.24615,-0.23493,-0.22353, -0.21198,-0.20028,-0.18845,-0.1765,-0.16445,-0.15231,-0.14009,-0.12781, -0.11547,-0.10309,-0.09069,-0.07826,-0.06583,-0.0534,-0.04099,-0.0286, -0.01626,-0.00396,0.00828,0.02045,0.03254,0.04455,0.05645,0.06825,0.07993, 0.09149,0.10291,0.11419,0.12532,0.13629,0.14709,0.15771,0.16816,0.17841, 0.18847,0.19832,0.20796,0.21739,0.22659,0.23556,0.24429,0.25279,0.26103, 0.26902,0.27676,0.28423,0.29143,0.29836,0.30502,0.31139,0.31747,0.32327, 0.32878,0.33399,0.3389,0.3435,0.3478,0.3518,0.35548,0.35885,0.36191, 0.36465,0.36707,0.36917,0.37095,0.37241,0.37354,0.37435,0.37484,0.375, 0.37484,0.37435,0.37354,0.37241,0.37095,0.36917,0.36707,0.36465,0.36191, 0.35885,0.35548,0.3518,0.3478,0.3435,0.3389,0.33399,0.32878,0.32327, 0.31747,0.31139,0.30502,0.29836,0.29143,0.28423,0.27676,0.26902,0.26103, 0.25279,0.24429,0.23556,0.22659,0.21739,0.20796,0.19832,0.18847,0.17841, 0.16816,0.15771,0.14709,0.13629,0.12532,0.11419,0.10291,0.09149,0.07993, 0.06825,0.05645,0.04455,0.03254,0.02045,0.00828,-0.00396,-0.01626,-0.0286, -0.04099,-0.0534,-0.06583,-0.07826,-0.09069,-0.10309,-0.11547,-0.12781, -0.14009,-0.15231,-0.16445,-0.1765,-0.18845,-0.20028,-0.21198,-0.22353, -0.23493,-0.24615,-0.25719,-0.26804,-0.27866,-0.28906,-0.29922,-0.30911, -0.31874,-0.32807,-0.3371,-0.34581,-0.35418,-0.3622,-0.36985,-0.37711, -0.38397,-0.39041,-0.39642,-0.40197,-0.40704,-0.41163,-0.41571,-0.41926, -0.42227,-0.42472,-0.42658,-0.42784,-0.42849,-0.42849,-0.42784,-0.4265, -0.42447,-0.42172,-0.41823,-0.41398,-0.40895,-0.40312,-0.39646,-0.38896, -0.3806,-0.37135,-0.36119,-0.3501,-0.33805,-0.32503,-0.31101,-0.29597, -0.27988,-0.26273,-0.24448,-0.22512,-0.20462,-0.18295,-0.1601,-0.13604, -0.11074,-0.08417,-0.05632,-0.02716,0.00334,0.0352,0.06845,0.10312, 0.13923,0.1768,0.21587,0.25646,0.2986,0.34232,0.38764,0.43459,0.4832, 0.53349,0.5855,0.63926,0.69478,0.75211,0.81126,0.87228,0.93518,1) a5 <- c(-1,-0.90357,-0.81156,-0.72387,-0.64036,-0.56094,-0.48549,-0.41389,-0.34605, -0.28185,-0.22119,-0.16396,-0.11007,-0.05941,-0.01188,0.0326,0.07414, 0.11283,0.14877,0.18203,0.21272,0.24092,0.26671,0.29018,0.31141,0.33048, 0.34747,0.36247,0.37554,0.38677,0.39622,0.40397,0.41009,0.41465,0.41772, 0.41936,0.41964,0.41863,0.41638,0.41296,0.40843,0.40285,0.39627,0.38875, 0.38035,0.37112,0.3611,0.35036,0.33895,0.3269,0.31427,0.30111,0.28746, 0.27336,0.25886,0.24399,0.22881,0.21334,0.19762,0.1817,0.16561,0.14939, 0.13306,0.11666,0.10022,0.08377,0.06735,0.05098,0.03468,0.0185,0.00244, -0.01346,-0.02919,-0.04471,-0.06,-0.07506,-0.08984,-0.10435,-0.11855, -0.13243,-0.14598,-0.15917,-0.172,-0.18444,-0.1965,-0.20814,-0.21937, -0.23017,-0.24053,-0.25045,-0.2599,-0.2689,-0.27742,-0.28546,-0.29303, -0.3001,-0.30668,-0.31276,-0.31835,-0.32343,-0.32801,-0.33209,-0.33566, -0.33872,-0.34129,-0.34334,-0.3449,-0.34596,-0.34653,-0.3466,-0.34618, -0.34528,-0.3439,-0.34204,-0.33972,-0.33694,-0.3337,-0.33001,-0.32589, -0.32133,-0.31635,-0.31095,-0.30515,-0.29895,-0.29237,-0.28541,-0.27808, -0.2704,-0.26238,-0.25403,-0.24535,-0.23637,-0.22709,-0.21753,-0.20769, -0.1976,-0.18726,-0.1767,-0.16591,-0.15492,-0.14374,-0.13239,-0.12087, -0.10921,-0.09741,-0.0855,-0.07348,-0.06137,-0.04918,-0.03694,-0.02465, -0.01233,0,0.01233,0.02465,0.03694,0.04918,0.06137,0.07348,0.0855,0.09741, 0.10921,0.12087,0.13239,0.14374,0.15492,0.16591,0.1767,0.18726,0.1976, 0.20769,0.21753,0.22709,0.23637,0.24535,0.25403,0.26238,0.2704,0.27808, 0.28541,0.29237,0.29895,0.30515,0.31095,0.31635,0.32133,0.32589,0.33001, 0.3337,0.33694,0.33972,0.34204,0.3439,0.34528,0.34618,0.3466,0.34653, 0.34596,0.3449,0.34334,0.34129,0.33872,0.33566,0.33209,0.32801,0.32343, 0.31835,0.31276,0.30668,0.3001,0.29303,0.28546,0.27742,0.2689,0.2599, 0.25045,0.24053,0.23017,0.21937,0.20814,0.1965,0.18444,0.172,0.15917, 0.14598,0.13243,0.11855,0.10435,0.08984,0.07506,0.06,0.04471,0.02919, 0.01346,-0.00244,-0.0185,-0.03468,-0.05098,-0.06735,-0.08377,-0.10022, -0.11666,-0.13306,-0.14939,-0.16561,-0.1817,-0.19762,-0.21334,-0.22881, -0.24399,-0.25886,-0.27336,-0.28746,-0.30111,-0.31427,-0.3269,-0.33895, -0.35036,-0.3611,-0.37112,-0.38035,-0.38875,-0.39627,-0.40285,-0.40843, -0.41296,-0.41638,-0.41863,-0.41964,-0.41936,-0.41772,-0.41465,-0.41009, -0.40397,-0.39622,-0.38677,-0.37554,-0.36247,-0.34747,-0.33048,-0.31141, -0.29018,-0.26671,-0.24092,-0.21272,-0.18203,-0.14877,-0.11283,-0.07414, -0.0326,0.01188,0.05941,0.11007,0.16396,0.22119,0.28185,0.34605,0.41389, 0.48549,0.56094,0.64036,0.72387,0.81156,0.90357,1) } if (std == "270"){ std <- 270 a0 <- rep(1, times = 270) a1 <- c(-1,-0.99257,-0.98513,-0.9777,-0.97026,-0.96283,-0.95539,-0.94796,-0.94052, -0.93309,-0.92565,-0.91822,-0.91078,-0.90335,-0.89591,-0.88848,-0.88104, -0.87361,-0.86617,-0.85874,-0.8513,-0.84387,-0.83643,-0.829,-0.82156, -0.81413,-0.80669,-0.79926,-0.79182,-0.78439,-0.77695,-0.76952,-0.76208, -0.75465,-0.74721,-0.73978,-0.73234,-0.72491,-0.71747,-0.71004,-0.7026, -0.69517,-0.68773,-0.6803,-0.67286,-0.66543,-0.65799,-0.65056,-0.64312, -0.63569,-0.62825,-0.62082,-0.61338,-0.60595,-0.59851,-0.59108,-0.58364, -0.57621,-0.56877,-0.56134,-0.5539,-0.54647,-0.53903,-0.5316,-0.52416, -0.51673,-0.50929,-0.50186,-0.49442,-0.48699,-0.47955,-0.47212,-0.46468, -0.45725,-0.44981,-0.44238,-0.43494,-0.42751,-0.42007,-0.41264,-0.4052, -0.39777,-0.39033,-0.3829,-0.37546,-0.36803,-0.36059,-0.35316,-0.34572, -0.33829,-0.33086,-0.32342,-0.31599,-0.30855,-0.30112,-0.29368,-0.28625, -0.27881,-0.27138,-0.26394,-0.25651,-0.24907,-0.24164,-0.2342,-0.22677, -0.21933,-0.2119,-0.20446,-0.19703,-0.18959,-0.18216,-0.17472,-0.16729, -0.15985,-0.15242,-0.14498,-0.13755,-0.13011,-0.12268,-0.11524,-0.10781, -0.10037,-0.09294,-0.0855,-0.07807,-0.07063,-0.0632,-0.05576,-0.04833, -0.04089,-0.03346,-0.02602,-0.01859,-0.01115,-0.00372,0.00372,0.01115, 0.01859,0.02602,0.03346,0.04089,0.04833,0.05576,0.0632,0.07063,0.07807, 0.0855,0.09294,0.10037,0.10781,0.11524,0.12268,0.13011,0.13755,0.14498, 0.15242,0.15985,0.16729,0.17472,0.18216,0.18959,0.19703,0.20446,0.2119, 0.21933,0.22677,0.2342,0.24164,0.24907,0.25651,0.26394,0.27138,0.27881, 0.28625,0.29368,0.30112,0.30855,0.31599,0.32342,0.33086,0.33829,0.34572, 0.35316,0.36059,0.36803,0.37546,0.3829,0.39033,0.39777,0.4052,0.41264, 0.42007,0.42751,0.43494,0.44238,0.44981,0.45725,0.46468,0.47212,0.47955, 0.48699,0.49442,0.50186,0.50929,0.51673,0.52416,0.5316,0.53903,0.54647, 0.5539,0.56134,0.56877,0.57621,0.58364,0.59108,0.59851,0.60595,0.61338, 0.62082,0.62825,0.63569,0.64312,0.65056,0.65799,0.66543,0.67286,0.6803, 0.68773,0.69517,0.7026,0.71004,0.71747,0.72491,0.73234,0.73978,0.74721, 0.75465,0.76208,0.76952,0.77695,0.78439,0.79182,0.79926,0.80669,0.81413, 0.82156,0.829,0.83643,0.84387,0.8513,0.85874,0.86617,0.87361,0.88104, 0.88848,0.89591,0.90335,0.91078,0.91822,0.92565,0.93309,0.94052,0.94796, 0.95539,0.96283,0.97026,0.9777,0.98513,0.99257,1) a2 <- c(1,0.97778,0.95572,0.93383,0.91211,0.89055,0.86916,0.84793,0.82687,0.80597, 0.78524,0.76468,0.74428,0.72405,0.70398,0.68408,0.66435,0.64478,0.62538, 0.60614,0.58707,0.56817,0.54943,0.53085,0.51244,0.4942,0.47613,0.45822, 0.44047,0.42289,0.40548,0.38823,0.37115,0.35424,0.33749,0.3209,0.30449, 0.28824,0.27215,0.25623,0.24047,0.22489,0.20946,0.19421,0.17912,0.16419, 0.14943,0.13484,0.12041,0.10615,0.09205,0.07812,0.06436,0.05076,0.03733, 0.02406,0.01096,-0.00198,-0.01475,-0.02735,-0.03979,-0.05206,-0.06416, -0.0761,-0.08788,-0.09949,-0.11093,-0.12221,-0.13332,-0.14426,-0.15504, -0.16566,-0.1761,-0.18638,-0.1965,-0.20645,-0.21624,-0.22585,-0.23531, -0.24459,-0.25371,-0.26267,-0.27146,-0.28008,-0.28854,-0.29683,-0.30496, -0.31292,-0.32071,-0.32834,-0.3358,-0.3431,-0.35023,-0.3572,-0.36399, -0.37063,-0.3771,-0.3834,-0.38953,-0.3955,-0.40131,-0.40695,-0.41242, -0.41773,-0.42287,-0.42784,-0.43265,-0.43729,-0.44177,-0.44608,-0.45023, -0.45421,-0.45802,-0.46167,-0.46515,-0.46847,-0.47162,-0.47461,-0.47743, -0.48008,-0.48257,-0.48489,-0.48704,-0.48903,-0.49086,-0.49252,-0.49401, -0.49534,-0.4965,-0.49749,-0.49832,-0.49898,-0.49948,-0.49981,-0.49998, -0.49998,-0.49981,-0.49948,-0.49898,-0.49832,-0.49749,-0.4965,-0.49534, -0.49401,-0.49252,-0.49086,-0.48903,-0.48704,-0.48489,-0.48257,-0.48008, -0.47743,-0.47461,-0.47162,-0.46847,-0.46515,-0.46167,-0.45802,-0.45421, -0.45023,-0.44608,-0.44177,-0.43729,-0.43265,-0.42784,-0.42287,-0.41773, -0.41242,-0.40695,-0.40131,-0.3955,-0.38953,-0.3834,-0.3771,-0.37063, -0.36399,-0.3572,-0.35023,-0.3431,-0.3358,-0.32834,-0.32071,-0.31292, -0.30496,-0.29683,-0.28854,-0.28008,-0.27146,-0.26267,-0.25371,-0.24459, -0.23531,-0.22585,-0.21624,-0.20645,-0.1965,-0.18638,-0.1761,-0.16566, -0.15504,-0.14426,-0.13332,-0.12221,-0.11093,-0.09949,-0.08788,-0.0761, -0.06416,-0.05206,-0.03979,-0.02735,-0.01475,-0.00198,0.01096,0.02406, 0.03733,0.05076,0.06436,0.07812,0.09205,0.10615,0.12041,0.13484,0.14943, 0.16419,0.17912,0.19421,0.20946,0.22489,0.24047,0.25623,0.27215,0.28824, 0.30449,0.3209,0.33749,0.35424,0.37115,0.38823,0.40548,0.42289,0.44047, 0.45822,0.47613,0.4942,0.51244,0.53085,0.54943,0.56817,0.58707,0.60614, 0.62538,0.64478,0.66435,0.68408,0.70398,0.72405,0.74428,0.76468,0.78524, 0.80597,0.82687,0.84793,0.86916,0.89055,0.91211,0.93383,0.95572,0.97778, 1) a3 <- c(-1,-0.9558,-0.91243,-0.86987,-0.82813,-0.78719,-0.74705,-0.70769,-0.66913, -0.63135,-0.59433,-0.55809,-0.52261,-0.48788,-0.4539,-0.42067,-0.38817, -0.3564,-0.32536,-0.29504,-0.26542,-0.23651,-0.20831,-0.18079,-0.15397, -0.12782,-0.10235,-0.07755,-0.05341,-0.02993,-0.0071,0.01509,0.03664, 0.05756,0.07785,0.09752,0.11658,0.13503,0.15288,0.17014,0.1868,0.20289, 0.2184,0.23333,0.24771,0.26152,0.27479,0.28751,0.29968,0.31133,0.32245, 0.33305,0.34313,0.3527,0.36177,0.37035,0.37844,0.38604,0.39316,0.39981, 0.406,0.41173,0.417,0.42183,0.42621,0.43017,0.43369,0.43679,0.43947, 0.44175,0.44362,0.44509,0.44618,0.44687,0.44719,0.44714,0.44671,0.44593, 0.44479,0.44331,0.44148,0.43932,0.43682,0.43401,0.43087,0.42742,0.42367, 0.41962,0.41528,0.41065,0.40574,0.40056,0.3951,0.38939,0.38342,0.3772, 0.37073,0.36403,0.3571,0.34994,0.34257,0.33498,0.32718,0.31919,0.311, 0.30262,0.29406,0.28532,0.27642,0.26735,0.25812,0.24875,0.23923,0.22957, 0.21977,0.20985,0.19981,0.18966,0.1794,0.16904,0.15858,0.14803,0.1374, 0.12669,0.11591,0.10507,0.09416,0.08321,0.07221,0.06117,0.05009,0.03899, 0.02786,0.01673,0.00558,-0.00558,-0.01673,-0.02786,-0.03899,-0.05009, -0.06117,-0.07221,-0.08321,-0.09416,-0.10507,-0.11591,-0.12669,-0.1374, -0.14803,-0.15858,-0.16904,-0.1794,-0.18966,-0.19981,-0.20985,-0.21977, -0.22957,-0.23923,-0.24875,-0.25812,-0.26735,-0.27642,-0.28532,-0.29406, -0.30262,-0.311,-0.31919,-0.32718,-0.33498,-0.34257,-0.34994,-0.3571, -0.36403,-0.37073,-0.3772,-0.38342,-0.38939,-0.3951,-0.40056,-0.40574, -0.41065,-0.41528,-0.41962,-0.42367,-0.42742,-0.43087,-0.43401,-0.43682, -0.43932,-0.44148,-0.44331,-0.44479,-0.44593,-0.44671,-0.44714,-0.44719, -0.44687,-0.44618,-0.44509,-0.44362,-0.44175,-0.43947,-0.43679,-0.43369, -0.43017,-0.42621,-0.42183,-0.417,-0.41173,-0.406,-0.39981,-0.39316, -0.38604,-0.37844,-0.37035,-0.36177,-0.3527,-0.34313,-0.33305,-0.32245, -0.31133,-0.29968,-0.28751,-0.27479,-0.26152,-0.24771,-0.23333,-0.2184, -0.20289,-0.1868,-0.17014,-0.15288,-0.13503,-0.11658,-0.09752,-0.07785, -0.05756,-0.03664,-0.01509,0.0071,0.02993,0.05341,0.07755,0.10235,0.12782, 0.15397,0.18079,0.20831,0.23651,0.26542,0.29504,0.32536,0.3564,0.38817, 0.42067,0.4539,0.48788,0.52261,0.55809,0.59433,0.63135,0.66913,0.70769, 0.74705,0.78719,0.82813,0.86987,0.91243,0.9558,1) a4 <- c(1,0.92689,0.85622,0.78795,0.72205,0.65846,0.59714,0.53806,0.48118,0.42644, 0.37382,0.32327,0.27476,0.22823,0.18366,0.14101,0.10023,0.06129,0.02415, -0.01123,-0.04488,-0.07685,-0.10716,-0.13585,-0.16297,-0.18854,-0.2126, -0.23519,-0.25634,-0.27609,-0.29446,-0.3115,-0.32723,-0.34169,-0.35491, -0.36693,-0.37777,-0.38748,-0.39607,-0.40358,-0.41004,-0.41549,-0.41995, -0.42344,-0.42602,-0.42769,-0.42849,-0.42845,-0.42759,-0.42595,-0.42355, -0.42042,-0.41659,-0.41208,-0.40692,-0.40113,-0.39474,-0.38778,-0.38027, -0.37224,-0.36371,-0.3547,-0.34524,-0.33535,-0.32505,-0.31437,-0.30333, -0.29196,-0.28026,-0.26828,-0.25601,-0.2435,-0.23075,-0.21779,-0.20464, -0.19132,-0.17784,-0.16423,-0.1505,-0.13668,-0.12277,-0.1088,-0.09479, -0.08075,-0.0667,-0.05266,-0.03864,-0.02465,-0.01072,0.00315,0.01693, 0.03062,0.04419,0.05764,0.07095,0.08411,0.09711,0.10993,0.12256,0.13499, 0.14721,0.1592,0.17096,0.18247,0.19373,0.20473,0.21545,0.22588,0.23602, 0.24586,0.25539,0.2646,0.27348,0.28203,0.29025,0.29811,0.30562,0.31277, 0.31956,0.32597,0.33201,0.33766,0.34294,0.34782,0.35231,0.3564,0.36009, 0.36338,0.36627,0.36874,0.37081,0.37246,0.3737,0.37453,0.37495,0.37495, 0.37453,0.3737,0.37246,0.37081,0.36874,0.36627,0.36338,0.36009,0.3564, 0.35231,0.34782,0.34294,0.33766,0.33201,0.32597,0.31956,0.31277,0.30562, 0.29811,0.29025,0.28203,0.27348,0.2646,0.25539,0.24586,0.23602,0.22588, 0.21545,0.20473,0.19373,0.18247,0.17096,0.1592,0.14721,0.13499,0.12256, 0.10993,0.09711,0.08411,0.07095,0.05764,0.04419,0.03062,0.01693,0.00315, -0.01072,-0.02465,-0.03864,-0.05266,-0.0667,-0.08075,-0.09479,-0.1088, -0.12277,-0.13668,-0.1505,-0.16423,-0.17784,-0.19132,-0.20464,-0.21779, -0.23075,-0.2435,-0.25601,-0.26828,-0.28026,-0.29196,-0.30333,-0.31437, -0.32505,-0.33535,-0.34524,-0.3547,-0.36371,-0.37224,-0.38027,-0.38778, -0.39474,-0.40113,-0.40692,-0.41208,-0.41659,-0.42042,-0.42355,-0.42595, -0.42759,-0.42845,-0.42849,-0.42769,-0.42602,-0.42344,-0.41995,-0.41549, -0.41004,-0.40358,-0.39607,-0.38748,-0.37777,-0.36693,-0.35491,-0.34169, -0.32723,-0.3115,-0.29446,-0.27609,-0.25634,-0.23519,-0.2126,-0.18854, -0.16297,-0.13585,-0.10716,-0.07685,-0.04488,-0.01123,0.02415,0.06129, 0.10023,0.14101,0.18366,0.22823,0.27476,0.32327,0.37382,0.42644,0.48118, 0.53806,0.59714,0.65846,0.72205,0.78795,0.85622,0.92689,1) a5 <- c(-1,-0.89135,-0.78833,-0.69078,-0.59853,-0.51141,-0.42927,-0.35195,-0.2793, -0.21116,-0.14739,-0.08783,-0.03235,0.01919,0.06694,0.11103,0.15159, 0.18875,0.22264,0.25339,0.28111,0.30594,0.32798,0.34736,0.36418,0.37855, 0.39059,0.4004,0.40809,0.41375,0.41748,0.41939,0.41956,0.41809,0.41507, 0.41059,0.40472,0.39757,0.3892,0.37969,0.36913,0.35759,0.34514,0.33186, 0.3178,0.30305,0.28766,0.27171,0.25524,0.23833,0.22102,0.20337,0.18545, 0.16729,0.14896,0.1305,0.11195,0.09337,0.07479,0.05627,0.03783,0.01952, 0.00137,-0.01658,-0.03429,-0.05173,-0.06888,-0.08569,-0.10216,-0.11823, -0.13391,-0.14915,-0.16393,-0.17824,-0.19206,-0.20537,-0.21814,-0.23037, -0.24204,-0.25313,-0.26364,-0.27355,-0.28286,-0.29155,-0.29961,-0.30705, -0.31386,-0.32003,-0.32555,-0.33044,-0.33467,-0.33827,-0.34122,-0.34352, -0.34519,-0.34622,-0.34662,-0.3464,-0.34555,-0.34409,-0.34202,-0.33936, -0.3361,-0.33227,-0.32787,-0.32292,-0.31742,-0.31139,-0.30484,-0.29778, -0.29024,-0.28221,-0.27373,-0.2648,-0.25545,-0.24568,-0.23552,-0.22498, -0.21408,-0.20285,-0.19129,-0.17943,-0.16729,-0.15488,-0.14224,-0.12937, -0.11629,-0.10304,-0.08963,-0.07608,-0.0624,-0.04864,-0.0348,-0.0209, -0.00697,0.00697,0.0209,0.0348,0.04864,0.0624,0.07608,0.08963,0.10304, 0.11629,0.12937,0.14224,0.15488,0.16729,0.17943,0.19129,0.20285,0.21408, 0.22498,0.23552,0.24568,0.25545,0.2648,0.27373,0.28221,0.29024,0.29778, 0.30484,0.31139,0.31742,0.32292,0.32787,0.33227,0.3361,0.33936,0.34202, 0.34409,0.34555,0.3464,0.34662,0.34622,0.34519,0.34352,0.34122,0.33827, 0.33467,0.33044,0.32555,0.32003,0.31386,0.30705,0.29961,0.29155,0.28286, 0.27355,0.26364,0.25313,0.24204,0.23037,0.21814,0.20537,0.19206,0.17824, 0.16393,0.14915,0.13391,0.11823,0.10216,0.08569,0.06888,0.05173,0.03429, 0.01658,-0.00137,-0.01952,-0.03783,-0.05627,-0.07479,-0.09337,-0.11195, -0.1305,-0.14896,-0.16729,-0.18545,-0.20337,-0.22102,-0.23833,-0.25524, -0.27171,-0.28766,-0.30305,-0.3178,-0.33186,-0.34514,-0.35759,-0.36913, -0.37969,-0.3892,-0.39757,-0.40472,-0.41059,-0.41507,-0.41809,-0.41956, -0.41939,-0.41748,-0.41375,-0.40809,-0.4004,-0.39059,-0.37855,-0.36418, -0.34736,-0.32798,-0.30594,-0.28111,-0.25339,-0.22264,-0.18875,-0.15159, -0.11103,-0.06694,-0.01919,0.03235,0.08783,0.14739,0.21116,0.2793,0.35195, 0.42927,0.51141,0.59853,0.69078,0.78833,0.89135,1) } # Create list of cows: cows <- unique(dat[,1]) rval <- as.data.frame(matrix(rep(0, times = 9), nrow = 1)) names(rval) <- c("ckey", "lact", "llen", "vltd", "fltd", "pltd", "vstd", "fstd", "pstd") # Convert fat and protein yields to kilograms (1 litre milk = 0.970264 kg): dat$fat <- dat$fat * (dat$vol * 0.970264) dat$pro <- dat$pro * (dat$vol * 0.970264) # Loop through each cows's records: for(i in 1:length(cows)){ # Select herd test records for this cow: id <- dat[,1] == cows[i] dat.tmp <- dat[id, 1:8] # Take each lactation in turn: lacts <- unique(dat.tmp[,3]) for(j in 1:length(lacts)){ id <- dat.tmp[,3] == lacts[j] dat.ump <- dat.tmp[id,1:8] # How many herd tests were there? ntest <- length(dat.ump[,4]) # If there less than 4 herd test events, don't calculate anything: if (ntest < 4){ vstd <- 0; fstd <- 0; pstd <- 0 vltd <- 0; fltd <- 0; pltd <- 0 } if (ntest >= 4){ # Extract appropriate values of x on the basis of herd test days in milk: x0 <- a0[dat.ump[,4]] x1 <- a1[dat.ump[,4]] x2 <- a2[dat.ump[,4]] x3 <- a3[dat.ump[,4]] xmat <- cbind(x0, x1, x2, x3) txmat <- t(xmat) Xx <- txmat %*% xmat Xy.vol <- txmat %*% dat.ump[,6] Xy.fat <- txmat %*% dat.ump[,7] Xy.pro <- txmat %*% dat.ump[,8] # Regression coefficients: a.vol <- solve(Xx, Xy.vol) a.fat <- solve(Xx, Xy.fat) a.pro <- solve(Xx, Xy.pro) # Variable "flag" equals TRUE if there is no dry off date and FALSE otherwise: flag <- is.na(unique(dat.ump[,5])) # Lactation length and days in milk at last herd test: llen <- unique(dat.ump[,5]) last.ht <- max(dat.ump[,4]) # If dry (i.e. flag == FALSE) and llen <= std, calculate yield to dry off date: if (flag == FALSE & llen <= std){ vstd <- sum((a0[1:std] * a.vol[1]) + (a1[1:std] * a.vol[2]) + (a2[1:std] * a.vol[3]) + (a4[1:std] * a.vol[3])) fstd <- sum((a0[1:std] * a.fat[1]) + (a1[1:std] * a.fat[2]) + (a2[1:std] * a.fat[3]) + (a4[1:std] * a.fat[3])) pstd <- sum((a0[1:std] * a.pro[1]) + (a1[1:std] * a.pro[2]) + (a2[1:std] * a.pro[3]) + (a4[1:std] * a.pro[3])) vltd <- sum((a0[1:llen] * a.vol[1]) + (a1[1:llen] * a.vol[2]) + (a2[1:llen] * a.vol[3]) + (a4[1:llen] * a.vol[3])) fltd <- sum((a0[1:llen] * a.fat[1]) + (a1[1:llen] * a.fat[2]) + (a2[1:llen] * a.fat[3]) + (a4[1:llen] * a.fat[3])) pltd <- sum((a0[1:llen] * a.pro[1]) + (a1[1:llen] * a.pro[2]) + (a2[1:llen] * a.pro[3]) + (a4[1:llen] * a.pro[3])) } # If dry (i.e. flag == FALSE) and llen > std, calculate yield to dry off date: if (flag == FALSE & llen > std){ vstd <- sum((a0[1:std] * a.vol[1]) + (a1[1:std] * a.vol[2]) + (a2[1:std] * a.vol[3]) + (a4[1:std] * a.vol[3])) fstd <- sum((a0[1:std] * a.fat[1]) + (a1[1:std] * a.fat[2]) + (a2[1:std] * a.fat[3]) + (a4[1:std] * a.fat[3])) pstd <- sum((a0[1:std] * a.pro[1]) + (a1[1:std] * a.pro[2]) + (a2[1:std] * a.pro[3]) + (a4[1:std] * a.pro[3])) d.xs <- llen - std v.xs <- (sum((a0[std]*a.vol[1])+(a1[std]*a.vol[2]) + (a2[std]*a.vol[3])+(a4[std]*a.vol[3]))) + (dat.ump[,6][dat.ump[,4]==last.ht]/2) f.xs <- (sum((a0[std]*a.fat[1])+(a1[std]*a.fat[2]) + (a2[std]*a.fat[3])+(a4[std]*a.fat[3]))) + (dat.ump[,7][dat.ump[,4]==last.ht]/2) p.xs <- (sum((a0[std]*a.pro[1])+(a1[std]*a.pro[2]) + (a2[std]*a.pro[3])+(a4[std]*a.pro[3]))) + (dat.ump[,8][dat.ump[,4]==last.ht]/2) vltd <- vstd + (v.xs * d.xs) fltd <- fstd + (f.xs * d.xs) pltd <- pstd + (p.xs * d.xs) } # If lactating (i.e. flag == TRUE) and dim at last herd test <= std, calculate yield to last herd test: if (flag == TRUE & last.ht <= std){ vstd <- sum((a0[1:std] * a.vol[1]) + (a1[1:std] * a.vol[2]) + (a2[1:std] * a.vol[3]) + (a4[1:std] * a.vol[3])) fstd <- sum((a0[1:std] * a.fat[1]) + (a1[1:std] * a.fat[2]) + (a2[1:std] * a.fat[3]) + (a4[1:std] * a.fat[3])) pstd <- sum((a0[1:std] * a.pro[1]) + (a1[1:std] * a.pro[2]) + (a2[1:std] * a.pro[3]) + (a4[1:std] * a.pro[3])) vltd <- sum((a0[1:last.ht] * a.vol[1]) + (a1[1:last.ht] * a.vol[2]) + (a2[1:last.ht] * a.vol[3]) + (a4[1:last.ht] * a.vol[3])) fltd <- sum((a0[1:last.ht] * a.fat[1]) + (a1[1:last.ht] * a.fat[2]) + (a2[1:last.ht] * a.fat[3]) + (a4[1:last.ht] * a.fat[3])) pltd <- sum((a0[1:last.ht] * a.pro[1]) + (a1[1:last.ht] * a.pro[2]) + (a2[1:last.ht] * a.pro[3]) + (a4[1:last.ht] * a.pro[3])) } # If lactating (i.e. flag == TRUE) and dim at last herd test > std, calculate yield to last herd test: if (flag == TRUE & last.ht > std){ vstd <- sum((a0[1:std] * a.vol[1]) + (a1[1:std] * a.vol[2]) + (a2[1:std] * a.vol[3]) + (a4[1:std] * a.vol[3])) fstd <- sum((a0[1:std] * a.fat[1]) + (a1[1:std] * a.fat[2]) + (a2[1:std] * a.fat[3]) + (a4[1:std] * a.fat[3])) pstd <- sum((a0[1:std] * a.pro[1]) + (a1[1:std] * a.pro[2]) + (a2[1:std] * a.pro[3]) + (a4[1:std] * a.pro[3])) d.xs <- last.ht - std v.xs <- (sum((a0[std]*a.vol[1])+(a1[std]*a.vol[2]) + (a2[std]*a.vol[3])+(a4[std]*a.vol[3]))) + (dat.ump[,6][dat.ump[,4]==last.ht]/2) f.xs <- (sum((a0[std]*a.fat[1])+(a1[std]*a.fat[2]) + (a2[std]*a.fat[3])+(a4[std]*a.fat[3]))) + (dat.ump[,7][dat.ump[,4]==last.ht]/2) p.xs <- (sum((a0[std]*a.pro[1])+(a1[std]*a.pro[2]) + (a2[std]*a.pro[3])+(a4[std]*a.pro[3]))) + (dat.ump[,8][dat.ump[,4]==last.ht]/2) vltd <- vstd + (v.xs * d.xs) fltd <- fstd + (f.xs * d.xs) pltd <- pstd + (p.xs * d.xs) } } ckey <- unique(dat.ump[,1]) lact <- unique(dat.ump[,3]) rval.tmp <- round(as.data.frame(cbind(ckey, lact, llen, vltd, fltd, pltd, vstd, fstd, pstd)), digits = 0) rval <- rbind(rval, rval.tmp) } } rval <- as.data.frame(rval[-1,], row.names = NULL) print(rval) }epiR/R/rsu.sep.rb1rf.R0000644000176200001440000000117013741150220014101 0ustar liggesusersrsu.sep.rb1rf <- function(N, n, rr, ppr, pstar, se.u, method = "binomial") { if(method == "binomial") {epi <- rsu.epinf(pstar = pstar, rr = rr, ppr = ppr) p.all.neg <- (1 - se.u * epi[[1]])^n se.p <- 1 - prod(p.all.neg) rval <- list(se.p = se.p, epi = epi[[1]], adj.risk = epi[[2]]) } else if(method == "hypergeometric") {ppr <- N / sum(N) epi <- rsu.epinf(pstar = pstar, rr = rr, ppr = ppr) p.all.neg <- (1 - se.u * n / N)^(epi[[1]] * N) se.p <- 1 - prod(p.all.neg) rval <- list(se.p = se.p, epi = epi[[1]], adj.risk = epi[[2]]) } return(rval) }epiR/R/epi.nomogram.R0000644000176200001440000000306313726062146014101 0ustar liggesusersepi.nomogram <- function(se, sp, lr, pre.pos, verbose = FALSE){ # If likelihood ratios are known: if(is.na(se) & is.na(sp) & !is.na(lr[1])& !is.na(lr[2])){ lr.pos <- lr[1] lr.neg <- lr[2] } # If likelihood ratios are not known: if(!is.na(se) & !is.na(sp) & is.na(lr[1]) & is.na(lr[2])){ # se <- ifelse(se == 1.0, 1 - 1E-04, se) # sp <- ifelse(sp == 1.0, 1 - 1E-04, sp) lr.pos <- se / (1 - sp) lr.neg <- (1 - se) / sp } pre.odds <- pre.pos / (1 - pre.pos) post.odds.pos <- pre.odds * lr.pos post.odds.neg <- pre.odds * lr.neg post.opos.tpos <- post.odds.pos / (1 + post.odds.pos) post.opos.tneg <- post.odds.neg / (1 + post.odds.neg) lr <- data.frame(pos = lr.pos, neg = lr.neg) prior <- data.frame(opos = pre.pos) post <- data.frame(opos.tpos = post.opos.tpos, opos.tneg = post.opos.tneg) rval <- list(lr = lr, prior = prior, post = post) if(verbose == TRUE){ return(rval) } if(verbose == FALSE){ post.opos.tpos <- ifelse(post.opos.tpos < 0.01, round(post.opos.tpos, digits = 4), round(post.opos.tpos, digits = 2)) post.opos.tneg <- ifelse(post.opos.tneg < 0.01, round(post.opos.tneg, digits = 4), round(post.opos.tneg, digits = 2)) cat("Given a positive test result, the post-test probability of being outcome positive is", post.opos.tpos, "\n") cat("Given a negative test result, the post-test probability of being outcome positive is", post.opos.tneg, "\n") } }epiR/R/rsu.sep.rsmult.R0000644000176200001440000000647713762005606014444 0ustar liggesusersrsu.sep.rsmult <- function(C = NA, pstar.c, rr, ppr, se.c) { ppr <- ifelse(length(C) > 1, C / sum(C), ppr) components <- length(se.c) epi <- rsu.epinf(pstar.c, rr, ppr)[[1]] # Create master list of clusters sampled: cluster.list <- se.c[[1]] i <- 2 while(i <= components){ cluster.list<- merge(cluster.list, se.c[[i]], by.x = 1, by.y = 1, all.x = TRUE, all.y = TRUE) i <- i + 1 } # Ensure risk group recorded in data: risk.group <- cluster.list[,2] tmp <- which(is.na(risk.group)) if(length(tmp) > 0) { for(i in tmp) { j <- 2 while(j <= components && is.na(risk.group[i])) { risk.group[i] <- cluster.list[i,(j - 1) * 2 + 2] j <- j + 1 } } } # Replace NA values with 0: for (i in 2:ncol(cluster.list)) { cluster.list[is.na(cluster.list[,i]), i]<- 0 } # Set up arrays for epi and p.neg (adjusted and unadjusted) for each cluster and each component: epi.c <- array(0, dim = c(nrow(cluster.list), components)) epi.c[,1] <- epi[risk.group] # dim 3: 1 = adjusted, 2 = unadjusted (independence) p.neg <- array(0, dim = c(nrow(cluster.list), components, 2)) p.neg[,1,1] <- 1 - cluster.list[,3] * epi.c[,1] p.neg[,1,2] <- p.neg[,1,1] for(i in 2:components){ for(j in 1:nrow(cluster.list)) { epi.c[j,i] <- 1 - rsu.pfree.rs(se.p = cluster.list[j,(i - 1) * 2 + 1], p.intro = 0, prior = 1 - epi.c[j,i - 1])$PFree } p.neg[,i,1]<- 1 - cluster.list[,(i - 1) * 2 + 3] * epi.c[,i] p.neg[,i,2]<- 1 - cluster.list[,(i - 1) * 2 + 3] * epi.c[,1] } # Calculate n, mean se.c and mean epi for each risk group and component n <- array(0, dim = c(components, length(rr))) sep.mean <- array(0, dim = c(components, length(rr))) epi.mean <- array(0, dim = c(components, length(rr), 2)) for(i in 1:components) { n[i,] <- table(se.c[[i]][2]) sep.mean[i,] <- sapply(split(se.c[[i]][3], se.c[[i]][2]), FUN = colMeans) epi.mean[i,,1] <- sapply(split(epi.c[cluster.list[,(i - 1) * 2 + 2] > 0, i], cluster.list[cluster.list[,(i - 1) * 2 + 2] > 0,(i - 1) * 2 + 2]), FUN = mean) epi.mean[i,,2] <- epi.mean[1,,1] } # Calculate Cse and SSe: cse <- array(0, dim = c(2, components, 2)) rownames(cse) <- c("Adjusted", "Unadjusted") colnames(cse) <- paste("Component", 1:components) dimnames(cse)[[3]] <- c("Binomial", "Hypergeometric") sse <- array(0, dim = c(2, 2)) rownames(sse) <- rownames(cse) colnames(sse) <- dimnames(cse)[[3]] rownames(epi.mean) <- colnames(cse) rownames(sep.mean) <- colnames(cse) rownames(n) <- colnames(cse) colnames(epi.mean) <- paste("RR =", rr) colnames(sep.mean) <- paste("RR =", rr) colnames(n)<- paste("RR =", rr) dimnames(epi.mean)[[3]]<- rownames(cse) # rows = adjusted and unadjusted, dim3 = binomial and hypergeometric for (i in 1:2){ for (j in 1:components) { cse[i,j,1] <- 1 - prod(p.neg[,j,i]) if (length(C) > 1) { cse[i,j,2] <- 1 - prod((1 - sep.mean[j,] * n[j,] / C)^(epi.mean[j,,i] * C)) } } sse[i,1] <- 1 - prod(1 - cse[i,,1]) sse[i,2] <- 1 - prod(1 - cse[i,,2]) } if (length(C) <= 1){ sse <- sse[,1] cse <- cse[,,1] } return(list(se.p = sse, se.component = cse)) }epiR/R/epi.dms.R0000644000176200001440000000251413117711452013040 0ustar liggesusers"epi.dms" <- function(dat){ # If matrix is comprised of one column, assume conversion FROM decimal degrees TO degree, minutes, seconds: if(dim(dat)[2] == 1){ dat. <- abs(dat) deg <- floor(dat.) ms <- (dat. - deg) * 60 min <- floor(ms) sec <- (ms - min) * 60 rval <- as.matrix(cbind(deg, min, sec), dimnames = NULL) id <- dat[,1] < 0 id <- ifelse(id == TRUE, -1, 1) rval[,1] <- rval[,1] * id # names(rval) <- c("deg", "min", "sec") } # If matrix is comprised of two columns, assume conversion is FROM degrees and decimal minutes TO decimal degrees: else if(dim(dat)[2] == 2){ deg <- abs(dat[,1]) min <- dat[,2] / 60 rval <- as.matrix(deg + min, dimnames = NULL) id <- dat[,1] < 0 id <- ifelse(id == TRUE, -1, 1) rval <- rval * id # names(rval) <- "ddeg" } # If matrix is comprised of three columns, assume conversion FROM degrees, minutes, seconds TO decimal degrees: else if(dim(dat)[2] == 3){ deg <- abs(dat[,1]) min <- dat[,2] / 60 sec <- dat[,3] / (60 * 60) rval <- as.matrix(deg + min + sec, dimnames = NULL) id <- dat[,1] < 0 id <- ifelse(id == TRUE, -1, 1) rval <- rval * id # names(rval) <- "ddeg" } return(rval) }epiR/R/zzz.R0000644000176200001440000000066213632261236012342 0ustar liggesusers.onAttach <- function(libname, pkgname) { ver <- as.character(read.dcf(file.path(libname, pkgname, "DESCRIPTION"), "Version")) packageStartupMessage("Package epiR ", ver, " is loaded", appendLF = TRUE) packageStartupMessage("Type help(epi.about) for summary information") packageStartupMessage("Type browseVignettes(package = 'epiR') to learn how to use epiR for applied epidemiological analyses") packageStartupMessage("\n") }epiR/R/zMHRD.Sato.R0000644000176200001440000000214013666557132013341 0ustar liggesuserszMHRD.Sato <- function(dat, conf.level = 0.95, units = units) { if(length(dim(dat)) > 2){ ndat <- addmargins(A = dat, margin = 2, FUN = sum, quiet = FALSE) c1 <- ndat[1,1,]; c2 <- ndat[1,3,]; c3 <- ndat[2,1,]; c4 <- ndat[2,3,] dataset <- cbind(c1, c2, c3, c4) num <- sum(apply(X = dataset, MARGIN = 1, FUN = function(ro) (ro[1] * ro[4] - ro[3] * ro[2]) / (ro[2] + ro[4]))) W <- sum(apply(dataset, 1, function(ro) ro[2] * ro[4] / (ro[2] + ro[4]))) # Cochrane weights delta.MH <- num / W P <- sum(apply(dataset, 1, function(ro) (ro[2]^2 * ro[3] - ro[4]^2 * ro[1] + 0.5 * ro[2] * ro[4] * (ro[4] - ro[2])) / (ro[2] + ro[4])^2)) Q <- sum(apply(dataset,1,function(ro) (ro[1] * (ro[4] - ro[3]) + ro[3] * (ro[2] - ro[1])) / (2 * (ro[2] + ro[4])))) var.delta.MH = (delta.MH * P + Q) / W^2 SatoARisk.p <- delta.MH SatoARisk.l <- SatoARisk.p - qnorm(1 - (1 - conf.level) / 2) * sqrt(var.delta.MH) SatoARisk.u <- SatoARisk.p + qnorm(1 - (1 - conf.level) / 2) * sqrt(var.delta.MH) c(SatoARisk.p * units, SatoARisk.l * units, SatoARisk.u * units) } }epiR/R/epi.ssstrataestc.R0000644000176200001440000000316414075465132015007 0ustar liggesusersepi.ssstrataestc <- function (strata.n, strata.xbar, strata.sigma, epsilon, error = "relative", nfractional = FALSE, conf.level = 0.95) { N. <- 1 - ((1 - conf.level) / 2) z <- qnorm(N., mean = 0, sd = 1) epsilon.r <- ifelse(error == "relative", epsilon, epsilon / strata.xbar) N <- sum(strata.n) mean <- sum(strata.n * strata.xbar) / N sigma.bx <- sum(strata.n * (strata.xbar - mean)^2) / N sigma.wx <- sum(strata.n * strata.sigma^2) / N sigma.x <- sigma.bx + sigma.wx V <- sigma.x / mean^2 gamma <- sigma.bx / sigma.wx if(nfractional == TRUE){ # Equation 6.25 Levy and Lemeshow. Example on p 177 gives 9 for z^2. # Suspect this is an error. I use 1.96^2 =~ 4 total.sample <- (((z^2 * N)/(1 + gamma)) * V) / (((z^2 * V) / (1 + gamma)) + N * (epsilon.r^2)) strata.sample <- strata.n * (total.sample / N) total.sample <- sum(strata.sample) } if(nfractional == FALSE){ # Equation 6.25 Levy and Lemeshow. Example on p 177 gives 9 for z^2. # Suspect this is an error. I use 1.96^2 =~ 4 total.sample <- ceiling((((z^2 * N)/(1 + gamma)) * V) / (((z^2 * V) / (1 + gamma)) + N * (epsilon.r^2))) strata.sample <- ceiling(strata.n * (total.sample / N)) total.sample <- sum(strata.sample) } result.01 <- c(strata.sample) result.02 <- c(total.sample) result.03 <- cbind(mean = mean, sigma.bx = sigma.bx, sigma.wx = sigma.wx, sigma.x = sigma.x, rel.var = V, gamma = gamma) rval <- list(strata.sample = result.01, total.sample = result.02, stats = result.03) return(rval) } epiR/R/rsu.pstar.R0000644000176200001440000000103213665112614013437 0ustar liggesusersrsu.pstar <- function(N = NA, n, se.p, se.u){ if (length(N) == 1){ if (is.na(N)){ pstar <- (1 - exp((log(1 - se.p)) / n)) / se.u } else { pstar <- log(1 - se.p) / log(1 - se.u * n / N) / N } } else { if (length(n) == 1) n <- rep(n, length(N)) pstar <- numeric(length(N)) pstar[is.na(N)] <- (1 - exp((log(1 - se.p)) / n[is.na(N)])) / se.u pstar[!is.na(N)] <- log(1 - se.p) / log(1 - se.u * n[!is.na(N)] / N[!is.na(N)]) / N[!is.na(N)] } return(pstar) }epiR/R/epi.descriptives.R0000644000176200001440000000565614076571146015004 0ustar liggesusersepi.descriptives <- function(dat, conf.level = 0.95){ conf.low <- (1 - conf.level) / 2 conf.upp <- conf.level + (1 - conf.level) / 2 if(class(dat) != "numeric" & class(dat) != "factor") stop("Error: dat must be numeric") if(class(dat) == "numeric"){ an <- length(dat) amean <- mean(dat, na.rm = TRUE) asd <- sd(dat, na.rm = TRUE) ase <- asd / sqrt(an) ana <- is.na(dat); ana <- sum(as.numeric(ana)) aq25 <- as.vector(quantile(dat, probs = 0.25, na.rm = TRUE)) aq50 <- as.vector(quantile(dat, probs = 0.50, na.rm = TRUE)) aq75 <- as.vector(quantile(dat, probs = 0.75, na.rm = TRUE)) alcl <- as.vector(quantile(dat, probs = conf.low, na.rm = TRUE)) aucl <- as.vector(quantile(dat, probs = conf.upp, na.rm = TRUE)) amin <- min(dat, na.rm = TRUE) amax <- max(dat, na.rm = TRUE) # Geometric mean. Make sure all values positive: tdat <- dat[dat > 0] gn <- length(tdat) gmean <- exp(mean(log(tdat), na.rm = TRUE)) gsd <- sd(log(tdat), na.rm = TRUE) gse <- gsd / sqrt(gn) gna <- is.na(tdat); gna <- sum(as.numeric(gna)) gq25 <- as.vector(exp(quantile(log(tdat), probs = 0.25, na.rm = TRUE))) gq50 <- as.vector(exp(quantile(log(tdat), probs = 0.50, na.rm = TRUE))) gq75 <- as.vector(exp(quantile(log(tdat), probs = 0.75, na.rm = TRUE))) glcl <- as.vector(quantile(log(tdat), probs = conf.low, na.rm = TRUE)) gucl <- as.vector(quantile(log(tdat), probs = conf.upp, na.rm = TRUE)) gmin <- as.vector(exp(min(log(tdat), na.rm = TRUE))) gmax <- as.vector(exp(max(log(tdat), na.rm = TRUE))) # Skewness: x <- dat - mean(dat, na.rm = TRUE) skew <- sqrt(an) * sum(x^3, na.rm = TRUE) / (sum(x^2, na.rm = TRUE)^(3/2)) # Kurtosis: x <- dat - mean(dat, na.rm = TRUE) r <- an * sum(x^4, na.rm = TRUE) / (sum(x^2, na.rm = TRUE)^2) kurt <- ((an + 1) * (r - 3) + 6) * (an - 1)/((an - 2) * (an - 3)) rval <- list( arithmetic = data.frame(n = an, mean = amean, sd = asd, q25 = aq25, q50 = aq50, q75 = aq75, lower = alcl, upper = aucl, min = amin, max = amax, na = ana), geometric = data.frame(n = gn, mean = gmean, sd = gsd, q25 = gq25, q50 = gq50, q75 = gq75, lower = glcl, upper = gucl, min = gmin, max = gmax, na = gna), symmetry = data.frame(skewness = skew, kurtosis = kurt) ) } if(class(dat) == "factor"){ tmp <- table(dat, useNA = "always") total <- data.frame(level = "sum", n = margin.table(tmp)) tmp <- data.frame(level = row.names(tmp), n = as.numeric(tmp)) rval <- rbind(tmp, total) } return(rval) } epiR/R/epi.ssclus2estb.R0000644000176200001440000000337314075465474014553 0ustar liggesusers"epi.ssclus2estb" <- function(b, Py, epsilon, error = "relative", rho, nfractional = FALSE, conf.level = 0.95){ N. <- 1 - ((1 - conf.level) / 2) z <- qnorm(N., mean = 0, sd = 1) epsilon.a <- ifelse(error == "absolute", epsilon, Py * epsilon) # Estimate of the required standard error: se <- epsilon.a / z # Design effect when clusters are of different size: if(length(b) == 2){ # Machin et al. (2018) pp. 197, Equation 12.7: bbar <- b[1] bsigma <- b[2] bcv <- bsigma / bbar D <- 1 + ((bcv^2 + 1) * bbar - 1) * rho n.ssu <- (z^2 * Py * (1 - Py)) * D / epsilon.a^2 n.psu <- n.ssu / bbar # Round after you've calculated n.ssu and n.psu, after Machin et al. (2018) pp. 205: if(nfractional == TRUE){ n.ssu <- n.ssu n.psu <- n.psu } if(nfractional == FALSE){ n.ssu <- ceiling(n.ssu) n.psu <- ceiling(n.psu) } } # Design effect when clusters are of equal size: else if(length(b) == 1){ D <- 1 + ((b - 1) * rho) n.ssu <- (z^2 * Py * (1 - Py)) * D / epsilon.a^2 n.psu <- n.ssu / b # Round after you've calculated n.ssu and n.psu, after Machin et al. (2018) pp. 205: if(nfractional == TRUE){ n.ssu <- n.ssu n.psu <- n.psu } if(nfractional == FALSE){ n.ssu <- ceiling(n.ssu) n.psu <- ceiling(n.psu) } } if(n.psu <= 25) warning(paste('The calculated number of primary sampling units (n.psu) is ', n.psu, '. At least 25 primary sampling units are recommended for two-stage cluster sampling designs.', sep = ""), call. = FALSE) rval <- list(n.psu = n.psu, n.ssu = n.ssu, DEF = D, rho = rho) return(rval) } epiR/R/zRRwald.R0000644000176200001440000000101613666555762013105 0ustar liggesuserszRRwald <- function(dat, conf.level){ N. <- 1 - ((1 - conf.level) / 2) z <- qnorm(N., mean = 0, sd = 1) a <- dat[1]; b <- dat[3]; c <- dat[2]; d <- dat[4] N1 <- a + b; N0 <- c + d wRR.p <- (a / N1) / (c / N0) lnwRR <- log(wRR.p) lnwRR.var <- (1 / a) - (1 / N1) + (1 / c) - (1 / N0) lnwRR.se <- sqrt((1 / a) - (1 / N1) + (1 / c) - (1 / N0)) wRR.se <- exp(lnwRR.se) ll <- exp(lnwRR - (z * lnwRR.se)) ul <- exp(lnwRR + (z * lnwRR.se)) c(wRR.p, ll, ul) }epiR/R/epi.sscomps.R0000644000176200001440000000426113720170034013740 0ustar liggesusers"epi.sscomps" <- function(treat, control, n, power, r = 1, design = 1, sided.test = 2, nfractional = FALSE, conf.level = 0.95) { alpha.new <- (1 - conf.level) / sided.test z.alpha <- qnorm(1 - alpha.new, mean = 0, sd = 1) if(!is.na(treat) & !is.na(control) & is.na(n) & !is.na(power)){ # From: Therneau TM and Grambsch PM 2000. Modelling Survival Data - Extending the Cox Model. Springer, London, p 61 - 65. z.beta <- qnorm(power, mean = 0, sd = 1) p <- r / (r + 1); q <- 1 - p # p <- 0.5; q <- 1 - p exp.beta <- log(treat) / log(control) n <- ((z.alpha + z.beta)^2) / (p * q * log(exp.beta)^2) # Account for the design effect: n <- n * design if(nfractional == TRUE){ n.crude <- n n.treat <- (n / (r + 1)) * r n.control <- (n / (r + 1)) * 1 n.total <- n.treat + n.control } if(nfractional == FALSE){ n.crude <- ceiling(n) n.treat <- ceiling(n / (r + 1)) * r n.control <- ceiling(n / (r + 1)) * 1 n.total <- n.treat + n.control } rval <- list(n.crude = n.crude, n.total = n.total, n.treat = n.treat, n.control = n.control) } else if(!is.na(treat) & !is.na(control) & !is.na(n) & is.na(power)){ # From: Therneau TM and Grambsch PM 2000. Modelling Survival Data - Extending the Cox Model. Springer, London, p 61 - 65. beta <- log(treat / control) p <- r / (r + 1); q <- 1 - p # Account for the design effect: n <- n / design z.beta <- sqrt(n * p * q * beta^2) - z.alpha power <- pnorm(z.beta, mean = 0, sd = 1) rval <- list(power = power) } else if(is.na(treat) & is.na(control) & !is.na(n) & !is.na(power)){ # From: Therneau TM and Grambsch PM 2000. Modelling Survival Data - Extending the Cox Model. Springer, London, p 61 - 65. p <- r / (r + 1); q <- 1 - p z.beta <- qnorm(power, mean = 0, sd = 1) # Account for the design effect: n <- n / design beta <- sqrt(((z.alpha + z.beta)^2) / (n * p * q)) delta <- exp(beta) rval <- list(hazard = sort(c(delta, 1/delta))) } rval } epiR/R/epi.ssdxsesp.R0000644000176200001440000000111214120333564014121 0ustar liggesusersepi.ssdxsesp <- function(test, type = "se", Py, epsilon, error = "relative", nfractional = FALSE, conf.level = 0.95){ N. <- 1 - ((1 - conf.level)/2) z <- qnorm(N., mean = 0, sd = 1) epsilon.a <- ifelse(error == "absolute", epsilon, Py * epsilon) if(type == "se"){ n <- (z^2 * test * (1 - test)) / ((epsilon.a)^2 * Py) } if(type == "sp"){ n <- (z^2 * test * (1 - test)) / ((epsilon.a)^2 * (1 - Py)) } if (nfractional == TRUE) { n <- n } if (nfractional == FALSE) { n <- ceiling(n) } rval <- n return(rval) }epiR/R/epi.ssclus2estc.R0000644000176200001440000000365014075465500014540 0ustar liggesusers"epi.ssclus2estc" <- function(b, N, xbar, xsigma, epsilon, error = "relative", rho, nfractional = FALSE, conf.level = 0.95){ N. <- 1 - ((1 - conf.level) / 2) z <- qnorm(N., mean = 0, sd = 1) epsilon.r <- ifelse(error == "relative", epsilon, epsilon / xbar) # Vsq is the relative variance of the continuous variable to be estimated (i.e. var / mean^2): Vsq <- xsigma^2 / xbar^2 # Design effect when clusters are of different size: if(length(b) == 2){ # Machin et al. (2018) pp. 197, Equation 12.7: bbar <- b[1] bsigma <- b[2] bcv <- bsigma / bbar D <- 1 + ((bcv^2 + 1) * bbar - 1) * rho # Number of secondary sampling units required (from page 74 Levy and Lemeshow, Equation 3.15) with result multiplied by D: n.ssu <- (z^2 * N * Vsq * D) / (z^2 * Vsq + ((N - 1) * epsilon.r^2)) n.psu <- n.ssu / bbar # Round after you've calculated n.ssu and n.psu, after Machin et al. (2018) pp. 205: if(nfractional == TRUE){ n.ssu <- n.ssu n.psu <- n.psu } if(nfractional == FALSE){ n.ssu <- ceiling(n.ssu) n.psu <- ceiling(n.psu) } } # Design effect when clusters are of equal size: else if(length(b) == 1){ D <- 1 + ((b - 1) * rho) # Number of secondary sampling units required (from page 74 Levy and Lemeshow, Equation 3.15) with result multiplied by D: n.ssu <- (z^2 * N * Vsq * D) / (z^2 * Vsq + ((N - 1) * epsilon.r^2)) n.psu <- n.ssu / b # Round after you've calculated n.ssu and n.psu, after Machin et al. (2018) pp. 205: if(nfractional == TRUE){ n.ssu <- n.ssu n.psu <- n.psu } if(nfractional == FALSE){ n.ssu <- ceiling(n.ssu) n.psu <- ceiling(n.psu) } } rval <- list(n.psu = n.psu, n.ssu = n.ssu, DEF = D, rho = rho) return(rval) } epiR/R/epi.ssdxtest.R0000644000176200001440000001706513732374472014157 0ustar liggesusersepi.ssdxtest <- function(pi, se, sp, epsilon.api, epsilon.ase, epsilon.asp, epsilon.asesp, r = 1, nfractional = FALSE, verbose = FALSE, conf.level = 0.95){ se1 <- se[1]; se2 <- se[2] sp1 <- sp[1]; sp2 <- sp[2] pi1 <- pi[1]; pi2 <- pi[2] alpha <- (1 - conf.level) / 2 z.alpha <- qnorm(1 - alpha, mean = 0, sd = 1) # pbars: pbar111 <- ((pi1 * se1 * se2) + ((1 - pi1) * (1 - sp1) * (1 - sp2))) pbar211 <- ((pi2 * se1 * se2) + ((1 - pi2) * (1 - sp1) * (1 - sp2))) pbar112 <- ((pi1 * se1 * (1 - se2)) + ((1 - pi1) * (1 - sp1) * sp2)) pbar121 <- ((pi1 * se2 * (1 - se1)) + ((1 - pi1) * (1 - sp2) * sp1)) pbar212 <- ((pi2 * se1 * (1 - se2)) + ((1 - pi2) * (1 - sp1) * sp2)) pbar221 <- ((pi2 * se2 * (1 - se1)) + ((1 - pi2) * (1 - sp2) * sp1)) pbar122 <- ((pi1 * (1 - se1) * (1 - se2)) + ((1 - pi1) * sp1 * sp2)) pbar222 <- ((pi2 * (1 - se1) * (1 - se2)) + ((1 - pi2) * sp1 * sp2)) # Matrix A: matrixA <- matrix(rep(0, times = 36), nrow = 6) dimnames(matrixA) <- list(c("pi1","pi2","b1","b2","a1","a2"), c("pi1","pi2","b1","b2","a1","a2")) matrixA[1,1] <- ((((((1 - sp1) * (1 - sp2)) - (se1 * se2))^2) / pbar111) + (((((1 - sp1) * sp2) - (se1 * (1 - se2)))^2) / pbar112) + ((((sp1 * (1 - sp2)) - ((1 - se1) * se2))^2) / pbar121) + ((((sp1 * sp2) - ((1 - se1) * (1 - se2)))^2) / pbar122)) matrixA[2,1] <- 0 matrixA[3,1] <- ((((1 - sp1) * (1 - sp2) * se2) / pbar111) + ((sp2 * (1 - sp1) * (1 - se2)) / pbar112) + (-((1 - sp2) * (sp1) * se2) / pbar121) + (-(sp1 * (sp2) * (1 - se2)) / pbar122)) matrixA[4,1] <- ((((1 - sp1) * (1 - sp2) * se1) / pbar111) + (((1 - sp2) * (sp1) * (1 - se1)) / pbar121) + (-((sp2) * (1 - sp1) * se1) / pbar112) + (-(sp1 * (sp2) * (1 - se1)) / pbar122)) matrixA[5,1] <- ((((1 - sp2) * se1 * se2) / pbar111) + ((sp2 * se1 * (1 - se2)) / pbar112) + (-( (1 - sp2) * (1 - se1) * se2) / pbar121) + (-(sp1 * (1 - se1) * (1 - se2)) / pbar122)) matrixA[6,1] <- ((((1 - sp1) * se1 * se2) / pbar111) + ((sp1 * se2 * (1 - se1)) / pbar121) + (-( (1 - sp1) * (1 - se2) * se1) / pbar112) + (-(sp2 * (1 - se1) * (1 - se2)) / pbar122)) matrixA[2,2] <- r * ((((((1 - sp1) * (1 - sp2)) - (se1 * se2))^2) / pbar211) + (((((1 - sp1) * sp2) - (se1 * (1 - se2)))^2) / pbar212) + ((((sp1 * (1 - sp2)) - ((1 - se1) * se2))^2) / pbar221) + ((((sp1 * sp2) - ((1 - se1) * (1 - se2)))^2) / pbar222)) matrixA[3,2] <- r * ((((1 - sp1) * (1 - sp2) * se2) / pbar211) + ((sp2 * (1 - sp1) * (1 - se2)) / pbar212) + (-((1 - sp2) * (sp1) * se2) / pbar221) + (-(sp1 * (sp2) * (1 - se2)) / pbar222)) matrixA[4,2] <- r * ((((1 - sp1) * (1 - sp2) * se1) / pbar211) + (((1 - sp2) * (sp1) * (1 - se1)) / pbar221) + (-((sp2) * (1 - sp1) * se1) / pbar212) + (-(sp1 * (sp2) * (1 - se1)) / pbar222)) matrixA[5,2] <- r * ((((1 - sp2) * se1 * se2) / pbar211) + ((sp2 * se1 * (1 - se2)) / pbar212) + (-((1 - sp2) * (1 - se1) * se2) / pbar221) + (-(sp1 * (1 - se1) * (1 - se2)) / pbar222)) matrixA[6,2] <- r * ( ( ( (1-sp1)*se1*se2)/pbar211) + ( (sp1*se2*(1-se1))/pbar221) + ( - ( (1-sp1)*(1-se2)*se1)/pbar212) + ( - ( sp2*(1-se1)*(1-se2))/pbar222)) matrixA[3,3] <- ( ( (pi1^2)*( ((se2^2)/pbar111) + ( ( (1-se2)^2)/pbar112) + ( (se2^2)/pbar121) + (((1-se2)^2)/pbar122))) + ( r * ((pi2)^2) * ( ( (se2^2)/pbar211) + ( ( (1-se2)^2)/pbar212) + ( (se2^2)/pbar221) + ( ( (1-se2)^2)/pbar222)))) matrixA[4,3] <- ( ( (pi1^2)* (((se1*se2)/pbar111) + ( - (se1*(1-se2))/pbar112) + (-((1-se1)*se2)/pbar121) + (((1-se1)*(1-se2))/pbar122))) + ( r * (pi2^2)* (((se1*se2)/pbar211) + ( - (se1*(1-se2))/pbar212) + (-((1-se1)*se2)/pbar221) + (((1-se1)*(1-se2))/pbar222)))) matrixA[5,3] <- - ( ( (pi1*(1-pi1)) * ( (((1-sp2)*se2)/pbar111) + ((sp2*(1-se2))/pbar112) + (((1-sp2)*se2)/pbar121) + ((sp2*(1-se2))/pbar122))) + ( r * (pi2*(1-pi2)) * ( (((1-sp2)*se2)/pbar211) + ((sp2*(1-se2))/pbar212) + (((1-sp2)*se2)/pbar221) + ((sp2*(1-se2))/pbar222))) ) matrixA[6,3] <- ( ( (pi1*(1-pi1)) * ( ( - ( (1-sp1)*se2)/pbar111) + (sp1*se2/pbar121) + ( (1-sp1)*(1-se2)/pbar112) + ( - (sp1*(1-se2))/pbar122))) + ( r * (pi2*(1-pi2)) * ( ( - ( (1-sp1)*se2)/pbar211) + (sp1*se2/pbar221) + ( (1-sp1)*(1-se2)/pbar212) + ( - (sp1*(1-se2))/pbar222))) ) matrixA[4,4] <- ( ( (pi1^2)*( ((se1^2)/pbar111) + ( ( (1-se1)^2)/pbar121) + ( (se1^2)/pbar112) + (((1-se1)^2)/pbar122))) + ( r * ((pi2)^2) * ( ( (se1^2)/pbar211) + ( ( (1-se1)^2)/pbar221) + ( (se1^2)/pbar212) + ( ( (1-se1)^2)/pbar222)))) matrixA[5,4] <- ( ( (pi1*(1-pi1)) * ( ( - ( (1-sp2)*se1)/pbar111) + (sp2*se1/pbar112) + ( (1-sp2)*(1-se1)/pbar121) + ( - (sp2*(1-se1))/pbar122))) + (r * (pi2*(1-pi2)) * ( ( - ( (1-sp2)*se1)/pbar211) + (sp2*se1/pbar212) + ( (1-sp2)*(1-se1)/pbar221) + ( - (sp2*(1-se1))/pbar222))) ) matrixA[6,4] <- - ( ( (pi1*(1-pi1)) * ( (((1-sp1)*se1)/pbar111) + ((sp1*(1-se1))/pbar121) + (((1-sp1)*se1)/pbar112) + ((sp1*(1-se1))/pbar122))) + ( r * (pi2*(1-pi2)) * ( (((1-sp1)*se1)/pbar211) + ((sp1*(1-se1))/pbar221) + (((1-sp1)*se1)/pbar212) + ((sp1*(1-se1))/pbar222))) ) matrixA[5,5] <- ((((1-pi1)^2) * ((((1-sp2)^2)/pbar111) + ((sp2^2)/pbar112) + (((1-sp2)^2)/pbar121) + ((sp2^2)/pbar122))) + (r* ((1-pi2)^2) * ((((1-sp2)^2)/pbar211) + ((sp2^2)/pbar212) + (((1-sp2)^2)/pbar221) + ((sp2^2)/pbar222)))) matrixA[6,5] <- ( ( ((1-pi1)^2) * ( ( (1-sp1)*(1-sp2)/pbar111) + ( - ((1-sp1)*sp2/pbar112)) + ( - (sp1*(1-sp2)/pbar121)) + (sp1*sp2/pbar122))) + ( r * ((1-pi2)^2) * ( ( (1-sp1)*(1-sp2)/pbar211) + ( - ((1-sp1)*sp2/pbar212)) + ( - (sp1*(1-sp2)/pbar221)) + (sp1*sp2/pbar222))) ) matrixA[6,6] <- ((((1-pi1)^2) * ((((1-sp1)^2)/pbar111) + ((sp1^2)/pbar121) + (((1-sp1)^2)/pbar112) + ((sp1^2)/pbar122))) + (r * ((1-pi2)^2) * ((((1-sp1)^2)/pbar211) + ((sp1^2)/pbar221) + (((1-sp1)^2)/pbar212) + ((sp1^2)/pbar222)))) matrixA[1,2] <- matrixA[2,1] matrixA[1,3] <- matrixA[3,1] matrixA[1,4] <- matrixA[4,1] matrixA[1,5] <- matrixA[5,1] matrixA[1,6] <- matrixA[6,1] matrixA[2,3] <- matrixA[3,2] matrixA[2,4] <- matrixA[4,2] matrixA[2,5] <- matrixA[5,2] matrixA[2,6] <- matrixA[6,2] matrixA[3,4] <- matrixA[4,3] matrixA[3,5] <- matrixA[5,3] matrixA[3,6] <- matrixA[6,3] matrixA[4,5] <- matrixA[5,4] matrixA[4,6] <- matrixA[6,4] matrixA[5,6] <- matrixA[6,5] matrixC <- solve(matrixA) var.se1se2 <- matrixC[3,3] + matrixC[4,4] - 2 * matrixC[4,3] var.sp1sp2 <- matrixC[5,5] + matrixC[6,6] - 2 * matrixC[6,5] # Confidence interval width equals twice the absolute error: wpi <- epsilon.api * 2 wse <- epsilon.ase * 2 wsp <- epsilon.asp * 2 wsesp <- epsilon.asesp * 2 w <- c(wpi, wse, wsp, wsesp) matrixC.d <- c(as.numeric(diag(matrixC)), var.se1se2, var.sp1sp2) n1 <- (2 * z.alpha * sqrt(matrixC.d) / w)^2 n2 <- n1 * r sample.size <- data.frame(p1 = c(n1[1], n2[1]), p2 = c(n1[2], n2[2]), se1 = c(n1[3], n2[3]), se2 = c(n1[4], n2[4]), sp1 = c(n1[5], n2[5]), sp2 = c(n1[6], n2[6]), se1.se2 = c(n1[7], n2[7]), sp1.sp2 = c(n1[8], n2[8])) row.names(sample.size) <- c("pop1","pop2") if(nfractional == TRUE & verbose == TRUE){ rval <- sample.size } if(nfractional == FALSE & verbose == TRUE){ rval <- ceiling(sample.size) } if(nfractional == TRUE & verbose == FALSE){ rval <- data.frame(n = c(max(sample.size[1,]), max(sample.size[2,]))) row.names(rval) <- c("pop1","pop2") } if(nfractional == FALSE & verbose == FALSE){ rval <- data.frame(n = c(ceiling(max(sample.size[1,])), ceiling(max(sample.size[2,])))) row.names(rval) <- c("pop1","pop2") } return(rval) } epiR/R/zORml.R0000644000176200001440000000044613760116540012547 0ustar liggesuserszORml <- function(dat, conf.level){ mOR.tmp <- suppressWarnings(fisher.test(dat, conf.int = TRUE, conf.level = conf.level)) mOR.p <- as.numeric(mOR.tmp$estimate) mOR.l <- as.numeric(mOR.tmp$conf.int)[1] mOR.u <- as.numeric(mOR.tmp$conf.int)[2] c(mOR.p, mOR.l, mOR.u) } epiR/R/zlimit.R0000644000176200001440000000115213666556326013026 0ustar liggesuserszlimit <- function(x1, n1, x2, n2, conf.level, lim, t){ z = qchisq(conf.level, 1) px = x1 / n1 score <- 1:1000 score = 0 # Edited from Agresti version to increase speed 290617: repeat{ a. = n2 *(lim - 1) b. = n1 * lim + n2 - (x1 + x2) * (lim - 1) c. = -(x1 + x2) p2d = (-b. + sqrt(b.^2 - 4 * a. * c.)) / (2 * a.) p1d = p2d * lim / (1 + p2d * (lim - 1)) score = ((n1 * (px - p1d))^2) * (1 / (n1 * p1d * (1 - p1d)) + 1 / (n2 * p2d * (1 - p2d))) ci = lim if(t == 0) {lim = ci / 1.001} else{lim = ci * 1.001} if(score > z){ break } } return(ci) }epiR/R/epi.psi.r0000644000176200001440000000405213735776762013134 0ustar liggesusersepi.psi <- function(dat = dat, itno = 99, conf.level = 0.95){ dat.mat <- data.matrix(dat[,2:ncol(dat)]) dat.mat <- matrix(dat.mat, nrow = ncol(dat.mat), byrow = TRUE) similarityIndex <- function(x,y){ 1 - 0.5 * sum(abs(x / sum(x) - y / sum(y))) } getSample <- function(x){ sa <- sample(c(1:length(x)), sum(x), replace = TRUE, prob = x / sum(x)) out <- matrix(0, 1, length(x)) for(i in 1:length(x)){ out[i] <- length(sa[sa == i]) } out } qlow <- (1 - conf.level) / 2 qupp <- 1 - qlow sources <- nrow(dat.mat) types <- ncol(dat.mat) combinations <- sources * (sources - 1) / 2 # Set the combinations: comb <- matrix(0, combinations, 2) k <- 1 for(i in 1:(sources - 1)){ for(j in (i + 1):sources){ comb[k,] <- c(i,j) k <- k + 1 } } simin <- matrix(0, combinations, itno) v1 <- c(); v2 <- c(); est <- c(); lower <- c(); upper <- c() for(i in 1:combinations){ for(j in 1:itno){ simin[i,j] <- similarityIndex(getSample(dat.mat[comb[i,1],]), getSample(dat.mat[comb[i,2],])) } tv1 <- as.numeric(comb[i,1]) v1 <- c(tv1, v1) tv2 <- as.numeric(comb[i,2]) v2 <- c(tv2, v2) test <- as.numeric(similarityIndex(dat.mat[comb[i,1],], dat.mat[comb[i,2],])) est <- c(test, est) tlower <- as.numeric(quantile(simin[i,], qlow)) lower <- c(tlower, lower) tupper <- as.numeric(quantile(simin[i,], qupp)) upper <- c(tupper, upper) # cat(comb[i,1], " <-> ", comb[i,2], " PS = ", similarityIndex(dat[comb[i,1],], dat[comb[i,2],]), " (", quantile(simin[i,], lower), ",", quantile(simin[i,], upper), ")\n", sep = "") } rval <- data.frame(v1, v2, est, lower, upper) # Fix up variable names: lookup <- data.frame(id = 1:length(2:ncol(dat)), vname = names(dat)[2:ncol(dat)]) rval$v1 <- as.character(lookup$vname[match(rval$v1, lookup$id)]) rval$v2 <- as.character(lookup$vname[match(rval$v2, lookup$id)]) return(rval) } epiR/R/epi.asc.R0000644000176200001440000000166613117711444013033 0ustar liggesusersepi.asc <- function(dat, file, xllcorner, yllcorner, cellsize, na = -9999) { id <- is.na(dat) dat[id] <- na ncols <- dim(dat)[2] nrows <- dim(dat)[1] h.ncol <- paste("ncols", nrows) h.nrow <- paste("nrows", ncols) # h.ncol <- paste("ncols", ncols) # h.nrow <- paste("nrows", nrows) h.xllcorner <- paste("xllcorner", xllcorner) h.yllcorner <- paste("yllcorner", yllcorner) h.cellsize <- paste("cellsize", cellsize) h.nodata <- paste("nodata_value", na) header <- rbind(h.ncol, h.nrow, h.xllcorner, h.yllcorner, h.cellsize, h.nodata) write.table(header, file = file, append = FALSE, quote = FALSE, sep = " ", row.names = FALSE, col.names = FALSE) rval <- as.matrix(dat) rval <- matrix(rval, nrow = ncols, byrow = TRUE) rval <- rval[ncols:1,] write.table(rval, file = file, append = TRUE, quote = FALSE, sep = " ", row.names = FALSE, col.names = FALSE) } epiR/R/rsu.sssep.rsfreecalc.R0000644000176200001440000000543513752202472015565 0ustar liggesusersrsu.sssep.rsfreecalc <- function(N, pstar, mse.p = 0.95, msp.p = 0.95, se.u, sp.u, method = "hypergeometric", max.ss = 32000){ type1 <- 1 - mse.p type2 <- 1 - msp.p pstar <- ifelse(pstar < 1, pstar, pstar / N) N1 <- min(N, max.ss) brks <- c(50, 100, 1000, 5000, 10000, Inf) # brks <- c(50, 100, 1000, 5000, 10000, 50000, Inf) steps <- c(5, 10, 50, 100, 200) step <- steps[which(N1 < brks)[1]] ss <- seq(from = 0, to = N1, by = step) ss[1] <- 1 if (length(ss) == 1) ss[2] <- N1 cp <- c() SeH <- c() SpH <- c() P1 <- c() success <- FALSE for(s in 1:length(ss)){ tmp <- zget.cp(ss[s], se.u, sp.u, type2) cp[s] <- tmp[[1]] SpH[s] <- tmp[[2]] if(method == "hypergeometric"){ P1[s] <- 1 - rsu.sep.rsfreecalc(N = N, n = ss[s], c = cp[s], pstar = pstar, se.u = se.u, sp.u = sp.u) } else{ P1[s] <- 1 - zsep.binom.imperfect(n = ss[s], c = cp[s], se = se.u, sp = sp.u, pstar = pstar) } SeH[s] <- 1 - P1[s] cp[s] <- cp[s] - 1 if(P1[s] <= type1) { success <- TRUE n1 <- ss[s] break } } if(success == TRUE){ # Sample sizes to evaluate in addition to those already done: ss[(s + 1):(s + step)] <- (ss[s - 1] + 1):(ss[s - 1] + step) # Step through each of the additional sample size estimates: for(x in 1:step){ # s equals the number of candidate sample sizes used to see if a solution could be found tmp <- zget.cp(n.cp = ss[s + x], se = se.u, sp = sp.u, type2 = type2) cp[s + x] <- tmp[[1]] SpH[s + x] <- tmp[[2]] if(method == "hypergeometric"){ P1[s + x] <- 1 - rsu.sep.rsfreecalc(N = N, n = ss[s + x], c = cp[s + x], pstar = pstar, se.u = se.u, sp.u = sp.u) } else{ P1[s + x] <- 1 - zsep.binom.imperfect(n = ss[s + x], c = cp[s + x], se = se.u, sp = sp.u, pstar = pstar) } SeH[s + x] <- 1 - P1[s + x] # Subtract one from the calculated number of cutpoints. cp[s + x] <- cp[s + x] - 1 if(P1[s + x] <= type1){ success <- TRUE n1 <- ss[s + x] break } } # Summary: rval1 <- data.frame(n = n1, N = N, c = cp[s + x], pstar = pstar, p1 = P1[s + x], se.p = SeH[s + x], sp.p = SpH[s + x]) # Number of elements in detail data frame: # ne <- length(seq(from = 1, to = N1, by = step)) + x # Changed MS 011020: ne <- s + x rval2 <- data.frame(n = ss[1:ne], c = cp, p = P1, se.p = SeH, sp.p = SpH) # Sort in order of n: rval2 <- rval2[sort.list(rval2$n),] rval <- list(summary = rval1, details = rval2) } else{ rval <- "Unable to achieve required accuracy by sampling every unit" } return(rval) }epiR/R/zARwald.R0000644000176200001440000000107313746132150013044 0ustar liggesuserszARwald <- function(dat, conf.level, units){ N. <- 1 - ((1 - conf.level) / 2) z <- qnorm(N., mean = 0, sd = 1) a <- as.numeric(dat[1]); b <- as.numeric(dat[3]); c <- as.numeric(dat[2]); d <- as.numeric(dat[4]) N1 <- a + b; N0 <- c + d wARisk.p <- ((a / N1) - (c / N0)) ## wARisk.var <- (((a * b) / (N1^2 * (N1 - 1))) + ((c * d) / (N0^2 * (N0 - 1)))) wARisk.se <- (sqrt(((a * (N1 - a))/N1^3) + ((c * (N0 - c))/N0^3))) ll <- (wARisk.p - (z * wARisk.se)) ul <- (wARisk.p + (z * wARisk.se)) c(wARisk.p * units, ll * units, ul * units) }epiR/R/zMHRD.Sato0.R0000644000176200001440000000224213666557036013427 0ustar liggesuserszMHRD.Sato0 <- function(dat, conf.level = 0.95, units = units) { if(length(dim(dat)) > 2){ ndat <- addmargins(A = dat, margin = 2, FUN = sum, quiet = FALSE) c1 <- ndat[1,1,]; c2 <- ndat[1,3,]; c3 <- ndat[2,1,]; c4 <- ndat[2,3,] dataset <- cbind(c1, c2, c3, c4) num <- sum(apply(X = dataset, MARGIN = 1, FUN = function(ro) (ro[1] * ro[4] - ro[3] * ro[2]) / (ro[2] + ro[4]))) W <- sum(apply(dataset, 1, function(ro) ro[2] * ro[4] / (ro[2] + ro[4]))) # Cochrane weights delta.MH <- num / W P <- sum(apply(dataset, 1, function(ro) (ro[2]^2 * ro[3] - ro[4]^2 * ro[1] + 0.5 * ro[2] * ro[4] * (ro[4] - ro[2])) / (ro[2] + ro[4])^2)) Q <- sum(apply(dataset,1,function(ro) (ro[1] * (ro[4] - ro[3]) + ro[3] * (ro[2] - ro[1])) / (2 * (ro[2] + ro[4])))) delta.Mid <- delta.MH + 0.5 * qchisq(conf.level, df = 1) * (P / W^2) ME <- sqrt(delta.Mid^2 - delta.MH^2 + qchisq(conf.level, df = 1) * Q / W^2) CI <- delta.Mid + cbind(-1,1) * ME Sato0ARisk.p <- delta.Mid Sato0ARisk.l <- Sato0ARisk.p - ME Sato0ARisk.u <- Sato0ARisk.p + ME c(Sato0ARisk.p * units, Sato0ARisk.l * units, Sato0ARisk.u * units) } }epiR/R/epi.ssxsectn.R0000644000176200001440000001322413720215506014126 0ustar liggesusersepi.ssxsectn <- function(pdexp1 = 0.25, pdexp0 = 0.10, pexp = NA, n = NA, power = 0.80, r = 1, N, design = 1, sided.test = 2, finite.correction = FALSE, nfractional = FALSE, conf.level = 0.95){ alpha.new <- (1 - conf.level) / sided.test z.alpha <- qnorm(1 - alpha.new, mean = 0, sd = 1) if (!is.na(pdexp1) & !is.na(n) & !is.na(power)){ stop("Error: at least one of exposed, n and power must be NA.") } # Sample size: if(!is.na(pdexp1) & !is.na(pdexp0) & is.na(n) & !is.na(power)){ # Sample size estimate. From Woodward p 405: z.beta <- qnorm(power, mean = 0, sd = 1) # Prevalence ratio: lambda <- pdexp1 / pdexp0 # Odds ratio: psi <- (pdexp1 / (1 - pdexp1)) / (pdexp0 / (1 - pdexp0)) pi <- pdexp0 pc <- (pi * ((r * lambda) + 1)) / (r + 1) p1 <- (r + 1) / (r * (lambda - 1)^2 * pi^2) p2 <- z.alpha * sqrt((r + 1) * pc * (1 - pc)) p3 <- z.beta * sqrt((lambda * pi * (1 - (lambda * pi))) + (r * pi * (1 - pi))) n0 <- p1 * (p2 + p3)^2 # Account for the design effect: n0 <- n0 * design # Finite correction: n <- ifelse(finite.correction == TRUE, (n0 * N) / (n0 + (N - 1)), n0) if(nfractional == TRUE){ n.exp1 <- n / (r + 1) * r n.exp0 <- n / (r + 1) * 1 n.total <- n.exp1 + n.exp0 } if(nfractional == FALSE){ n.exp1 <- ceiling(n / (r + 1) * r) n.exp0 <- ceiling(n / (r + 1) * 1) n.total <- n.exp1 + n.exp0 } rval <- list(n.total = n.total, n.exp1 = n.exp1, n.exp0 = n.exp0, power = power, pr = lambda, or = psi) } # Power: else if(!is.na(pdexp1) & !is.na(pdexp0) & !is.na(n) & is.na(power)){ # Study power. From Woodward p 409: # Prevalence ratio: lambda <- pdexp1 / pdexp0 # Odds ratio: psi <- (pdexp1 / (1 - pdexp1)) / (pdexp0 / (1 - pdexp0)) pi <- pdexp0 pc <- (pi * ((r * lambda) + 1)) / (r + 1) if(nfractional == TRUE){ n.exp1 <- n / (r + 1) * r n.exp0 <- n / (r + 1) * 1 n.total <- n.exp1 + n.exp0 } if(nfractional == FALSE){ n.exp1 <- ceiling(n / (r + 1) * r) n.exp0 <- ceiling(n / (r + 1) * 1) n.total <- n.exp1 + n.exp0 } # Convert n (finite corrected sample size) to n0: n0 <- ifelse(finite.correction == TRUE, (n * N - n) / (N - n), n) t1 <- ifelse(lambda >= 1, (pi * (lambda - 1) * sqrt(n0 * r)), (pi * (1 - lambda) * sqrt(n0 * r))) t2 <- z.alpha * (r + 1) * sqrt(pc * (1 - pc)) t3 <- (r + 1) * (lambda * pi * (1 - lambda * pi) + r * pi * (1 - pi)) z.beta <- (t1 - t2) / sqrt(t3) power <- pnorm(z.beta, mean = 0, sd = 1) rval <- list(n.total = n.total, n.exp1 = n.exp1, n.exp0 = n.exp0, power = power, pr = lambda, or = psi) } # Lambda: else if(is.na(pdexp1) & !is.na(pdexp0) & !is.na(n) & !is.na(power)){ # Risk ratio to be detected - requires an estimate of prevalence of exposure in the unexposed. # From Woodward p 409: z.beta <- qnorm(power, mean = 0, sd = 1) pi <- pdexp0 if(nfractional == TRUE){ n.exp1 <- n / (r + 1) * r n.exp0 <- n / (r + 1) * 1 n.total <- n.exp1 + n.exp0 } if(nfractional == FALSE){ n.exp1 <- ceiling(n / (r + 1) * r) n.exp0 <- ceiling(n / (r + 1) * 1) n.total <- n.exp1 + n.exp0 } # Convert n (finite corrected sample size) to n0: n0 <- ifelse(finite.correction == TRUE, (n * N - n) / (N - n), n) Y <- r * n0 * pi^2 Z <- (r + 1) * pi * (z.alpha + z.beta)^2 a <- Y + (pi * Z) b <- (2 * Y) + Z c <- Y - (r * (1 - pi) * Z) # Risk ratio: lambda.pos <- (1 / (2 * a)) * (b + sqrt(b^2 - 4 * a * c)) lambda.neg <- (1 / (2 * a)) * (b - sqrt(b^2 - 4 * a * c)) rlambda.pos <- lambda.pos rlambda.neg <- ifelse(lambda.neg < 0, 0, lambda.neg) # From http://www.epigear.com/index_files/or2rr.html: # s = prevalence of disease in the population # p = prevalence of exposure in the population # Prevalence of disease in the exposed, unexposed and population: pdexp1.pos <- lambda.pos * pdexp0 pdexp0.pos <- pdexp0 s.pos <- (pdexp1.pos + pdexp0.pos) / 2 p.pos <- pexp pdexp1.neg <- lambda.neg * pdexp0 pdexp0.neg <- pdexp0 s.neg <- (pdexp1.neg + pdexp0.neg) / 2 p.neg <- pexp # Odds ratio: psi.pos <- (lambda.pos * (1 - (s.pos / (p.pos * lambda.pos + 1 - p.pos)))) / (1 - ((lambda.pos * s.pos) / (p.pos * lambda.pos + 1 - p.pos))) psi.neg <- (lambda.neg * (1 - (s.neg / (p.neg * lambda.neg + 1 - p.neg)))) / (1 - ((lambda.neg * s.neg) / (p.neg * lambda.neg + 1 - p.neg))) rpsi.pos <- psi.pos rpsi.neg <- ifelse(psi.neg < 0, 0, psi.neg) rval <- list(n.total = n.total, n.exp1 = n.exp1, n.exp0 = n.exp0, power = power, pr = sort(c(rlambda.neg, rlambda.pos)), or = sort(c(rpsi.neg, rpsi.pos))) } rval } # epi.ssxsection(pdexp1 = 0.25, pdexp0 = 0.10, pexp = 0.05, n = NA, power = 0.80, r = 1, design = 1, sided.test = 2, conf.level = 0.95) # epi.ssxsection(pdexp1 = 0.25, pdexp0 = 0.10, pexp = 0.05, n = 200, power = NA, r = 1, design = 1, sided.test = 2, conf.level = 0.95) # epi.ssxsection(pdexp1 = NA, pdexp0 = 0.10, pexp = 0.05, n = 200, power = 0.80, r = 1, design = 1, sided.test = 2, conf.level = 0.95) epiR/R/zclopperpearson.R0000644000176200001440000000053614136475402014735 0ustar liggesuserszclopperpearson <- function(dat, conf.level){ # From RSurveillance function binom.cp: a <- dat[,1] n <- dat[,2] p <- a / n tails <- 2 low <- stats::qbeta((1 - conf.level) / tails, a, n - a + 1) upp <- stats::qbeta(1 - (1 - conf.level) / tails, a + 1, n - a) rval <- data.frame(est = p, lower = low, upper = upp) rval }epiR/R/epi.cpresids.R0000644000176200001440000000477213117711450014077 0ustar liggesusersepi.cpresids <- function(obs, fit, covpattern){ # Covariate pattern identifiers: cpid <- covpattern$cov.pattern$id # Number of observations that comprise each covariate pattern: n <- covpattern$cov.pattern$n # Number of outcome-positive observations observed for each covariate pattern: nY <- obs # Number of outcome-positive observations predicted for each covariate pattern: np <- fit * n # Predicted probability of outcome for each covariate pattern: pi. <- fit den <- rep(1, times = nrow(covpattern$cov.pattern)) # Turn factors into dummy variables: X <- den for(i in 3:dim(covpattern$cov.pattern)[2]){ ifelse(is.factor(covpattern$cov.pattern[,i]), # The function model.matrix returns the dummy variables for each factor. Remove the first column to return treatment contrasts. # That is, if you have a factor comprised of three levels, we return two columns to represent the treatment contrasts (i.e. 00, 01, and 10). X <- cbind(X, model.matrix(~covpattern$cov.pattern[,i] - 1)[,-1]), X <- cbind(X, covpattern$cov.pattern[,i])) } colnames(X) <- 1:dim(X)[2] # X <- as.matrix(cbind(den, covpattern$cov.pattern[3:dim(covpattern$cov.pattern)[2]])) V <- diag(np * (1 - pi.)) xvx <- solve(t(X) %*% V %*% X) sV <- sqrt(V) H <- sV %*% X %*% xvx %*% t(X) * sV leverage <- diag(H) # Raw residuals: raw <- (nY - np) # Standardised raw residuals: sraw <- raw /sd(np) # Pearson residuals: pearson <- (nY - np)/sqrt(np * (1 - pi.)) # Standardised Pearson residuals: spearson <- pearson / sqrt(1 - leverage) # Deviance residuals: sign <- ifelse(nY - np > 0, 1, -1) dev <- sign * sqrt(2 * ((nY * log(nY/np)) + ((n - nY) * log((n - nY)/(n * (1 - pi.)))))) dev[nY == 0] <- -sqrt(2 * n[nY == 0] * abs(log(1 - pi.[nY == 0]))) dev[nY == n] <- sqrt(2 * n[nY == n] * abs(log(pi.[nY == n]))) # Delta beta: deltabeta <- (pearson^2 * leverage) / (1 - leverage) # Standardised delta beta: sdeltabeta <- (spearson^2 * leverage) / (1 - leverage) # Delta chi-square (used to detect ill-fitting covariate patterns): deltachi <- pearson^2 / (1 - leverage) rval <- data.frame(cpid = cpid, n = n, obs = nY, pred = np, raw = raw, sraw = sraw, pearson = pearson, spearson = spearson, deviance = dev, leverage = leverage, deltabeta = deltabeta, sdeltabeta = sdeltabeta, deltachi = deltachi) return(rval) } epiR/R/globals.R0000644000176200001440000000004614036442266013127 0ustar liggesusersutils::globalVariables(c("interpret"))epiR/R/rsu.sssep.rbsrg.R0000644000176200001440000000073713752164756014607 0ustar liggesusersrsu.sssep.rbsrg <- function(pstar, rr, ppr, spr, se.p, se.u) { epi <- rsu.epinf(pstar = pstar, rr = rr, ppr = ppr) p.pos <- sum(epi[[1]] * spr * se.u) n.total <- ceiling(log(1 - se.p) / log(1 - p.pos)) n <- numeric(length(rr)) for(i in 1:length(rr)){ if(i < length(rr)){ n[i] <- ceiling(n.total * spr[i]) } else { n[i] <- n.total - sum(n) } } return(list(total = n.total, n = n, epinf = epi[[1]], adj.risk = epi[[2]])) }epiR/R/epi.ssninfb.R0000644000176200001440000000610114112023732013704 0ustar liggesusersepi.ssninfb <- function(treat, control, delta, n, r = 1, power, nfractional = FALSE, alpha){ # Stop if a negative value for delta entered: if (delta < 0){ stop("For a non-inferiority trial delta must be greater than or equal to zero.") } z.alpha <- qnorm(1 - alpha, mean = 0, sd = 1) if (!is.na(treat) & !is.na(control) & !is.na(delta) & !is.na(power) & is.na(n)) { beta <- 1 - power z.beta <- qnorm(1 - beta, mean = 0, sd = 1) # delta equals the max absolute tolerable difference between treat and control. # Make delta negative: ndelta <- -delta # Aniko Szabo 230821: Add check for non-existent solution: if (sign(z.alpha + z.beta) != sign(treat - control - ndelta)){ stop("Target power is not reachable. Check the exact specification of the hypotheses.") } # http://powerandsamplesize.com/Calculators/Compare-2-Proportions/2-Sample-Non-Inferiority-or-Superiority: if(nfractional == FALSE){ n.control <- ceiling((treat * (1 - treat) / r + control * (1 - control)) * ((z.alpha + z.beta) / (treat - control - ndelta))^2) n.treat <- n.control * r n.total <- n.treat + n.control } if(nfractional == TRUE){ n.control <- (treat * (1 - treat) / r + control * (1 - control)) * ((z.alpha + z.beta) / (treat - control - ndelta))^2 n.treat <- n.control * r n.total <- n.treat + n.control } rval <- list(n.total = n.total, n.treat = n.treat, n.control = n.control, delta = delta, power = power) } if (!is.na(treat) & !is.na(control) & !is.na(delta) & !is.na(n) & is.na(power) & !is.na(r) & !is.na(alpha)) { # delta equals the max absolute tolerable difference between treat and control. # Make delta negative: ndelta <- -delta # Work out the number of subjects in the control group. r equals the number in the treatment group divided by the number in the control group. if(nfractional == FALSE){ n.control <- ceiling(1 / (r + 1) * (n)) n.treat <- n - n.control n.total <- n.treat + n.control } if(nfractional == TRUE){ n.control <- 1 / (r + 1) * (n) n.treat <- n - n.control n.total <- n.treat + n.control } # Replaced 010518 in response to email from Aline Guttmann on 080318: z <- (treat - control - ndelta) / sqrt(treat * (1 - treat) / n.treat + control * (1 - control) / n.control) # Original code: # z <- (treat - control - ndelta) / sqrt(treat * (1 - treat) / n.treat / r + control * (1 - control) / n.control) # Aniko Szabo 230821 - use only one tail: power <- pnorm(z - z.alpha, mean = 0, sd = 1) # Original code: # power <- pnorm(z - z.alpha, mean = 0, sd = 1) + pnorm(-z - z.alpha, mean = 0, sd = 1) rval <- list(n.total = n.total, n.treat = n.treat, n.control = n.control, delta = delta, power = power) } rval } epiR/R/rsu.spp.rs.R0000644000176200001440000000074213777237176013561 0ustar liggesusersrsu.spp.rs<- function (N = NA, n, c = 1, sp.u){ if (is.na(N)) { sph <- stats::pbinom(q = c - 1, size = n, prob = 1 - sp.u) } else if (!is.na(N) & !is.na(c) & !is.na(sp.u)) { # Expected number of test positive animals in the herd: EY <- N * (1 - sp.u) Y <- floor(EY) m <- EY - Y sph <- m * stats::phyper(q = c - 1, m = Y + 1, n = N - Y - 1, k = n) + (1 - m) * stats::phyper(q = c - 1, m = Y, n = N - Y, k = n) } return(sph) } epiR/R/epi.ssdetect.R0000644000176200001440000000653413733124636014107 0ustar liggesusersepi.ssdetect <- function(N, prev, se, sp, interpretation = "series", covar = c(0,0), finite.correction = TRUE, nfractional = FALSE, conf.level = 0.95){ alpha <- (1 - conf.level) # Hypergeometric sample size: n.hypergeo <- function(N, d, se = 1, conf.level){ n <- (N / se) * (1 - (1 - conf.level)^(1/d)) n[n > N] <- NA return(ceiling(n)) } # Binomial sample size: n.binom <- function(pstar, se = 1, conf.level){ n <- log(1 - conf.level) / log(1 - pstar * se) return(ceiling(n)) } # Covar is a vector of length two. First element is covariance for D+ group, second element is covariance for D- group. See Dohoo, Martin and Stryhn page 103. # Work out sensitivity and specificity: if (length(se) > 1 & interpretation == "series") { Ses <- se[1] * se[2] + covar[1] Sps <- 1 - (1 - sp[1]) * (1 - sp[2]) - covar[2] use <- Ses usp <- Sps } if (length(se) > 1 & interpretation == "parallel") { Sep <- 1 - (1 - se[1]) * (1 - se[2]) - covar[1] Spp <- sp[1] * sp[2] + covar[2] use <- Sep usp <- Spp } if (length(se) == 1) { use <- se usp <- sp } if (length(N) == 1) { d <- ceiling(N * prev) units.h <- n.hypergeo(N, d, se = use, conf.level) units.b <- n.binom(pstar = prev, se = use, conf.level = conf.level) units.b <- ifelse(finite.correction == TRUE, units.b / (1 + (units.b / N)), units.b) units = c(hypergeo = units.h, binom = units.b) if(nfractional == TRUE){ units <- units } if(nfractional == FALSE){ units <- ceiling(units) } performance <- data.frame(se = use, sp = usp) sample.size <- units rval <- list(performance = performance, sample.size = sample.size) } if (length(N) == 2) { # Number of diseased units within each cluster: d2 <- ceiling(N[2] * prev[2]) # Number of units to sample within each cluster: units.h <- n.hypergeo(N = N[2], d = d2, se = use, conf.level = conf.level) units.b <- n.binom(pstar = prev[2], se = use, conf.level = conf.level) units.b <- ifelse(finite.correction == TRUE, units.b / (1 + (units.b / N[2])), units.b) units = c(hypergeo = units.h, binom = units.b) if(nfractional == TRUE){ units <- units } if(nfractional == FALSE){ units <- ceiling(units) } # Increase the number of clusters using alpha: pd <- prev[1] * (1 - alpha) # Expected number of disease-positive clusters: d1 <- ceiling(N[1] * pd) clusters.h <- n.hypergeo(N = N[1], d = d1, se = use, conf.level = conf.level) clusters.b <- n.binom(pstar = prev[1], se = use, conf.level = conf.level) clusters.b <- ifelse(finite.correction == TRUE, clusters.b / (1 + (clusters.b / N[1])), units.b) clusters = c(hypergeo = clusters.h, binom = clusters.b) if(nfractional == TRUE){ clusters <- clusters } if(nfractional == FALSE){ clusters <- ceiling(clusters) } total <- c(hypergeo = (clusters[1] * units[1]), binom = c(clusters[2] * units[2])) performance <- data.frame(se = use, sp = usp) sample.size <- data.frame(clusters = clusters, units = units, total = total) rval <- list(performance = performance, sample.size = sample.size) } return(rval) } epiR/R/rsu.dxtest.R0000644000176200001440000000557714000443130013623 0ustar liggesusersrsu.dxtest <- function(se, sp, interpretation = "series", covar = c(0,0)){ # Objects se and sp must be of length 2: if(length(se) != 2) stop('se must be a vector of length 2.') if(length(sp) != 2) stop('sp must be a vector of length 2.') if(length(covar) != 2) stop('covar must be a vector of length 2.') # Values of se and sp must range between 0 and 1: if(se[1] < 0 | se[1] > 1) stop('se must be a number between 0 and 1.') if(sp[1] < 0 | sp[1] > 1) stop('sp must be a number between 0 and 1.') if(se[2] < 0 | se[2] > 1) stop('se must be a number between 0 and 1.') if(sp[2] < 0 | sp[2] > 1) stop('sp must be a number between 0 and 1.') # First element of covar is covariance for D+ group, second element is covariance for D- group. # See Dohoo, Martin and Stryhn (2009) page 111. # Minimums and maximums for the conditional covariance for sensitivity. # See page 111 Gardner et al. (2000): min.covse <- max(-1 * (1 - se[1]) * (1 - se[2]), -se[1] * se[2]) max.covse <- min(se[1] * (1 - se[2]), se[2] * (1 - se[1])) # Minimums and maximums for the conditional covariance for specificity. min.covsp <- max(-1 * (1 - sp[1]) * (1 - sp[2]), -sp[1] * sp[2]) max.covsp <- min(sp[1] * (1 - sp[2]), sp[2] * (1 - sp[1])) # Check the values of covar entered by the user and return error if outside range: if(covar[1] < min.covse | covar[1] > max.covse) stop('The covariance estimate for test sensitivity is outside of the plausible range given the sensitivities of the two tests.') if(covar[2] < min.covsp | covar[2] > max.covsp) stop('The covariance estimate for test specificity is outside of the plausible range given the specificities of the two tests.') # Series interpretation: if (interpretation == "series") { # Sensitivity and specificity assuming tests are independent. # Equations 5.18 and 5.19 Dohoo et al. (2009) page 111: sei <- se[1] * se[2] spi <- sp[1] + sp[2] - (sp[1] * sp[2]) # Sensitivity and specificity assuming tests are dependent. # Equations 5.24 and 5.25 Dohoo et al. (2009) page 113: sed <- se[1] * se[2] + covar[1] spd <- 1 - (1 - sp[1]) * (1 - sp[2]) - covar[2] } # Parallel interpretation: if (interpretation == "parallel") { # Sensitivity and specificity assuming tests are independent. # Equations 5.16 and 5.17 Dohoo et al. (2009) page 111: sei <- se[1] + se[2] - (se[1] * se[2]) spi <- sp[1] * sp[2] # Sensitivity and specificity assuming tests are dependent. # Equations 5.22 and 5.23 Dohoo et al. (2009) page 113: sed <- 1 - (1 - se[1]) * (1 - se[2]) - covar[1] spd <- sp[1] * sp[2] + covar[2] } independent <- data.frame(se = sei, sp = spi) dependent <- data.frame(se = sed, sp = spd) rval <- list(independent = independent, dependent = dependent) return(rval) } epiR/R/zORscore.R0000644000176200001440000000154213666556414013265 0ustar liggesuserszORscore <- function(dat, conf.level){ x1 <- dat[1]; n1 <- dat[1] + dat[3] x2 <- dat[2]; n2 <- dat[2] + dat[4] px = x1 / n1 py = x2 / n2 scOR.p <- (dat[1] / dat[3]) / (dat[2] / dat[4]) if(((x1 == 0) && (x2 == 0)) || ((x1 == n1) && (x2 == n2))){ ul = 1/0 ll = 0 } else if((x1 == 0) || (x2 == n2)){ ll = 0 theta = 0.01 / n2 ul = zlimit(x1, n1, x2, n2, conf.level, theta, 1) } else if((x1 == n1) || (x2 == 0)){ ul = 1 / 0 theta = 100 * n1 ll = zlimit(x1, n1, x2, n2, conf.level, theta, 0) } else{ theta = px / (1 - px) / (py / (1 - py)) / 1.1 ll = zlimit(x1, n1, x2, n2, conf.level, theta, 0) theta = px / (1 - px) / (py / (1 - py)) * 1.1 ul = zlimit(x1, n1, x2, n2, conf.level, theta, 1) } c(scOR.p, ll,ul) }epiR/R/epi.sscompc.R0000644000176200001440000000576113720213720013726 0ustar liggesusers"epi.sscompc" <- function(treat, control, n, sigma, power, r = 1, design = 1, sided.test = 2, nfractional = FALSE, conf.level = 0.95) { alpha.new <- (1 - conf.level) / sided.test z.alpha <- qnorm(1 - alpha.new, mean = 0, sd = 1) if (!is.na(treat) & !is.na(control) & !is.na(n) & !is.na(power)){ stop("Error: at least one of treat, n and power must be NA.") } # Sample size: if(!is.na(treat) & !is.na(control) & is.na(n) & !is.na(sigma) & !is.na(power)){ # Sample size. From Woodward p 398: z.beta <- qnorm(power, mean = 0, sd = 1) delta <- abs(treat - control) n <- ((r + 1)^2 * (z.alpha + z.beta)^2 * sigma^2) / (delta^2 * r) # Account for the design effect: n <- n * design if(nfractional == TRUE){ n.crude <- n n.treat <- (n / (r + 1)) * r n.control <- (n / (r + 1)) * 1 n.total <- n.treat + n.control } if(nfractional == FALSE){ n.crude <- ceiling(n) n.treat <- ceiling(n / (r + 1)) * r n.control <- ceiling(n / (r + 1)) * 1 n.total <- n.treat + n.control } rval <- list(n.total = n.total, n.treat = n.treat, n.control = n.control, power = power, delta = delta) } # Power: else if(!is.na(treat) & !is.na(control) & !is.na(n) & !is.na(sigma) & is.na(power)){ # Study power. From Woodward p 401: delta <- abs(treat - control) # Account for the design effect: n <- n / design if(nfractional == TRUE){ n.crude <- n n.treat <- (n / (r + 1)) * r n.control <- (n / (r + 1)) * 1 n.total <- n.treat + n.control } if(nfractional == FALSE){ n.crude <- ceiling(n) n.treat <- ceiling(n / (r + 1)) * r n.control <- ceiling(n / (r + 1)) * 1 n.total <- n.treat + n.control } z.beta <- ((delta * sqrt(n * r)) / ((r + 1) * sigma)) - z.alpha power <- pnorm(z.beta, mean = 0, sd = 1) rval <- list(n.total = n.total, n.treat = n.treat, n.control = n.control, power = power, delta = delta) } # Delta: else if(is.na(treat) & is.na(control) & !is.na(n) & !is.na(sigma) & !is.na(power)){ # Maximum detectable difference. From Woodward p 401: z.beta <- qnorm(power, mean = 0, sd = 1) # Account for the design effect: n <- n / design if(nfractional == TRUE){ n.crude <- n n.treat <- (n / (r + 1)) * r n.control <- (n / (r + 1)) * 1 n.total <- n.treat + n.control } if(nfractional == FALSE){ n.crude <- ceiling(n) n.treat <- ceiling(n / (r + 1)) * r n.control <- ceiling(n / (r + 1)) * 1 n.total <- n.treat + n.control } delta <- ((r + 1) * (z.alpha + z.beta) * sigma) / (sqrt(n * r)) rval <- list(n.total = n.total, n.treat = n.treat, n.control = n.control, power = power, delta = delta) } rval }epiR/R/rsu.pfree.rs.R0000644000176200001440000000455413761701652014051 0ustar liggesusersrsu.pfree.rs <- function(se.p, p.intro = 0, prior = 0.5, by.time = TRUE){ if (is.matrix(se.p)) { if (is.vector(p.intro)) { # p.intro is scalar or vector across multiple iterations replicated across time periods: if (length(p.intro) == 1 | (dim(se.p)[1] != dim(se.p)[2] & length(p.intro) == nrow(se.p)) | (dim(se.p)[1] == dim(se.p)[2] & !by.time)) { tmp <- matrix(p.intro, nrow = nrow(se.p), ncol = ncol(se.p), byrow = FALSE) } # p.intro varies across time periods replicated across iterations: else if (length(p.intro) == ncol(se.p)) { tmp<- matrix(p.intro, nrow = nrow(se.p), ncol = ncol(se.p), byrow = TRUE) } else { warning("Unable to coerce p.intro into same dimensions as se.p", immediate. = TRUE) } p.intro <- tmp } else if (!all.equal(dim(se.p), dim(p.intro))) { warning("Unable to coerce p.intro into same dimensions as se.p", immediate. = TRUE) } } else { # se.p is a vector # Convert se.p and p.intro to matrix if (by.time) { # If p.intro is a scalar, convert to a vector same length as se.p: if (length(p.intro) == 1) p.intro <- rep(p.intro, length(se.p)) se.p <- matrix(se.p, nrow = 1) p.intro <- matrix(p.intro, nrow = 1) } else { se.p <- matrix(se.p, ncol = 1) p.intro <- matrix(p.intro, ncol = 1) } } # Set up arrays for pfree, discounted prior and equilibrium prior and pfree: pfree <- array(0, dim = c(nrow(se.p), ncol(se.p))) colnames(pfree) <- colnames(se.p) prior.disc <- pfree pfree.eq <- pfree prior.eq <- pfree # Calculate discounted prior for t = 1 prior.disc[,1]<- zdisc.prior(prior, p.intro[,1]) # Loop to calculate values for successive time periods: for (p in 1:ncol(se.p)){ pfree[,p] <- prior.disc[,p] / (1 - se.p[,p] * (1 - prior.disc[,p])) tmp <- rsu.pfree.equ(se.p[,p], p.intro[,p]) pfree.eq[,p] <- tmp[[1]] prior.eq[,p] <- tmp[[2]] # Calculate discounted prior for next time period except for the last period: if (p < ncol(se.p)) prior.disc[,p + 1] <- zdisc.prior(pfree[,p], p.intro[,p]) } return(list(PFree = pfree, SeP = se.p, PIntro = p.intro, "Discounted prior" = prior.disc, "Equilibrium PFree" = pfree.eq, "Equilibrium prior" = prior.eq)) } epiR/R/epi.prcc.R0000644000176200001440000000747513746702120013217 0ustar liggesusersepi.prcc <- function(dat, sided.test = 2, conf.level = 0.95){ # Calculate mu and number of parameters: N <- dim(dat)[1] K <- dim(dat)[2] - 1 # Return an error message if the number of parameters is greater than the number of model replications: if(K > N) stop("Error: the number of replications of the model must be greater than the number of model parameters") mu <- (1 + N) / 2 # Compute ranks: for(i in 1:(K + 1)){ dat[,i] <- rank(dat[,i]) } # Create a K + 1 by K + 1 matrix: C <- matrix(rep(0, times = (K + 1)^2), nrow = (K + 1)) # Correlations for each parameter pair: for(i in 1:(K + 1)){ for(j in 1:(K + 1)){ r.it <- dat[,i] r.jt <- dat[,j] r.js <- r.jt c.ij <- sum((r.it - mu) * (r.jt - mu)) / sqrt(sum((r.it - mu)^2) * sum((r.js - mu)^2)) C[i,j] <- c.ij } } # Fudge to deal with response variables that are all the same: if(is.na(C[K + 1,K + 1])) { gamma.iy <- rep(0, times = K) # Test statistic: t.iy <- gamma.iy * sqrt((N - 2) / (1 - gamma.iy^2)) p <- rep(1, times = K) df <- rep((N - 2), times = K) # Critical value from the t distribution: N. <- 1 - ((1 - conf.level) / 2) t <- qt(p = N., df = df) # Standard error: se.t <- gamma.iy / t.iy # Confidence interval: gamma.low <- gamma.iy - (t * se.t) gamma.upp <- gamma.iy + (t * se.t) # Results: rval <- data.frame(est = gamma.iy, lower = gamma.low, upper = gamma.upp, test.statistic = t.iy, df = df, p.value = p) return(rval) } else { # Matrix B is defined as the inverse of c.ij: B <- solve(C) # PRCC (gamma.iy) between the ith input parameter and the yth outcome is defined by Kendall and Stewart (1979) as follows: gamma.iy <- c() for(i in 1:K){ num <- -B[i,(K + 1)] den <- sqrt(B[i,i] * B[(K + 1),(K + 1)]) gamma.iy <- c(gamma.iy, num/den) } # Email Andrew Hill (mailto:fyu7@cdc.gov) 14 August 2009. # I think there may be an error in the epi.prcc function. Looking at the example in the package documentation, I note that the p-values all close to 0 yet if we switch the sign of y to force negative correlation with the x's we get p-values near 1. Backtracking, I think the problem is in the definition of the test statistic. There is a typo in the Blower-Dowlatabadi paper. I believe they misstate the test statistic at the end of the Appendix. It should be (dropping their subscripts): t = gamma * sqrt((N-2) / (1 - gamma^2)). # Equivalently, the square of the PRCC gamma^2 is asymptotically Beta(1/2,(N-2)/2). (ref. Muirhead's book 'Aspects of Multivariate Statistical Theory'). # Blower and Dowlatabadi appendix: # t.iy <- gamma.iy * sqrt((N - 2) / (1 - gamma.iy)) # Andrew Hill's correction: t.iy <- gamma.iy * sqrt((N - 2) / (1 - gamma.iy^2)) df <- rep((N - 2), times = K) # Blower and Dowlatabadi appendix: # p <- 1 - pt(q = t.iy, df = (N - 2)) if(sided.test == 2){ # Andrew Hill's correction: p <- 2 * pt(abs(t.iy), df = N - 2, lower.tail = FALSE) # p <- pbeta(t.iy^2, shape1 = 1/2, shape2 = (N - 2)/2, lower.tail = FALSE) } if(sided.test == 1){ # Andrew Hill's correction: p <- pt(abs(t.iy), df = N - 2,lower.tail = FALSE) } # Critical value from the t distribution: N. <- 1 - ((1 - conf.level) / 2) t <- qt(p = N., df = df) # Standard error: se.t <- gamma.iy / t.iy # Confidence interval: gamma.low <- gamma.iy - (t * se.t) gamma.upp <- gamma.iy + (t * se.t) # Results: rval <- data.frame(est = gamma.iy, lower = gamma.low, upper = gamma.upp, test.statistic = t.iy, df = df, p.value = p) return(rval) } }epiR/R/epi.sscohortc.R0000644000176200001440000001301613720201540014253 0ustar liggesusersepi.sscohortc <- function(irexp1 = 0.25, irexp0 = 0.10, pexp = NA, n = NA, power = 0.80, r = 1, N, design = 1, sided.test = 2, finite.correction = FALSE, nfractional = FALSE, conf.level = 0.95){ alpha.new <- (1 - conf.level) / sided.test z.alpha <- qnorm(1 - alpha.new, mean = 0, sd = 1) if (!is.na(irexp1) & !is.na(n) & !is.na(power)){ stop("Error: at least one of exposed, n and power must be NA.") } # Sample size: if(!is.na(irexp1) & !is.na(irexp0) & is.na(n) & !is.na(power)){ # Sample size estimate. From Woodward p 405: z.beta <- qnorm(power, mean = 0, sd = 1) # Risk ratio: lambda <- irexp1 / irexp0 # Odds ratio: psi <- (irexp1 / (1 - irexp1)) / (irexp0 / (1 - irexp0)) pi <- irexp0 pc <- (pi * ((r * lambda) + 1)) / (r + 1) p1 <- (r + 1) / (r * (lambda - 1)^2 * pi^2) p2 <- z.alpha * sqrt((r + 1) * pc * (1 - pc)) p3 <- z.beta * sqrt((lambda * pi * (1 - (lambda * pi))) + (r * pi * (1 - pi))) n0 <- p1 * (p2 + p3)^2 # Account for the design effect: n0 <- n0 * design # Finite correction: n <- ifelse(finite.correction == TRUE, (n0 * N) / (n0 + (N - 1)), n0) if(nfractional == TRUE){ n.crude <- n n.exp1 <- n / (r + 1) * r n.exp0 <- n / (r + 1) * 1 n.total <- n.exp1 + n.exp0 } if(nfractional == FALSE){ n.crude <- ceiling(n) n.exp1 <- ceiling(n / (r + 1)) * r n.exp0 <- ceiling(n / (r + 1)) * 1 n.total <- n.exp1 + n.exp0 } rval <- list(n.total = n.total, n.exp1 = n.exp1, n.exp0 = n.exp0, power = power, irr = lambda, or = psi) } # Power: else if(!is.na(irexp1) & !is.na(irexp0) & !is.na(n) & is.na(power)){ # Study power. From Woodward p 409: # Risk ratio: lambda <- irexp1 / irexp0 # Odds ratio: psi <- (irexp1 / (1 - irexp1)) / (irexp0 / (1 - irexp0)) pi <- irexp0 pc <- (pi * ((r * lambda) + 1)) / (r + 1) # Account for the design effect: n <- n / design if(nfractional == TRUE){ n.exp1 <- (n / (r + 1)) * r n.exp0 <- (n / (r + 1)) * 1 n.total <- n.exp1 + n.exp0 } if(nfractional == FALSE){ n.exp1 <- ceiling((n / (r + 1)) * r) n.exp0 <- ceiling((n / (r + 1)) * 1) n.total <- n.exp1 + n.exp0 } # Convert n (finite corrected sample size) to n0: n0 <- ifelse(finite.correction == TRUE, (n * N - n) / (N - n), n) t1 <- ifelse(lambda >= 1, (pi * (lambda - 1) * sqrt(n0 * r)), (pi * (1 - lambda) * sqrt(n0 * r))) t2 <- z.alpha * (r + 1) * sqrt(pc * (1 - pc)) t3 <- (r + 1) * (lambda * pi * (1 - lambda * pi) + r * pi * (1 - pi)) z.beta <- (t1 - t2) / sqrt(t3) power <- pnorm(z.beta, mean = 0, sd = 1) rval <- list(n.total = n.total, n.exp1 = n.exp1, n.exp0 = n.exp0, power = power, irr = lambda, or = psi) } # Lambda: else if(is.na(irexp1) & !is.na(irexp0) & !is.na(n) & !is.na(power)){ # Risk ratio to be detected - requires a value for unexposed. From Woodward p 409: z.beta <- qnorm(power, mean = 0, sd = 1) pi <- irexp0 # Account for the design effect: n <- n / design n.exp1 <- (n / (r + 1)) * r n.exp0 <- (n / (r + 1)) * 1 n.total <- n.exp1 + n.exp0 # Convert n (finite corrected sample size) to n0: n0 <- ifelse(finite.correction == TRUE, (n * N - n) / (N - n), n) Y <- r * n0 * pi^2 Z <- (r + 1) * pi * (z.alpha + z.beta)^2 a <- Y + (pi * Z) b <- (2 * Y) + Z c <- Y - (r * (1 - pi) * Z) # Risk ratio: lambda.pos <- (1 / (2 * a)) * (b + sqrt(b^2 - 4 * a * c)) lambda.neg <- (1 / (2 * a)) * (b - sqrt(b^2 - 4 * a * c)) rlambda.pos <- lambda.pos rlambda.neg <- ifelse(lambda.neg < 0, 0, lambda.neg) # From http://www.epigear.com/index_files/or2rr.html: # s = prevalence of disease in the population # p = prevalence of exposure in the population # Prevalence of disease in the exposed, unexposed and population: irexp1.pos <- lambda.pos * irexp0 irexp0.pos <- irexp0 s.pos <- (irexp1.pos + irexp0.pos) / 2 p.pos <- pexp irexp1.neg <- lambda.neg * irexp0 irexp0.neg <- irexp0 s.neg <- (irexp1.neg + irexp0.neg) / 2 p.neg <- pexp # Odds ratio: psi.pos <- (lambda.pos * (1 - (s.pos / (p.pos * lambda.pos + 1 - p.pos)))) / (1 - ((lambda.pos * s.pos) / (p.pos * lambda.pos + 1 - p.pos))) psi.neg <- (lambda.neg * (1 - (s.neg / (p.neg * lambda.neg + 1 - p.neg)))) / (1 - ((lambda.neg * s.neg) / (p.neg * lambda.neg + 1 - p.neg))) rpsi.pos <- psi.pos rpsi.neg <- ifelse(psi.neg < 0, 0, psi.neg) rval <- list(n.total = n.total, n.exp1 = n.exp1, n.exp0 = n.exp0, power = power, irr = sort(c(rlambda.neg, rlambda.pos)), or = sort(c(rpsi.neg, rpsi.pos))) } rval } # epi.sscohortc(irexp1 = 0.25, irexp0 = 0.10, n = NA, power = 0.80, r = 1, design = 1, sided.test = 2, conf.level = 0.95) # epi.sscohortc(irexp1 = 0.25, irexp0 = 0.10, n = 200, power = NA, r = 1, design = 1, sided.test = 2, conf.level = 0.95) # epi.sscohortc(irexp1 = NA, irexp0 = 0.10, n = 200, power = 0.80, r = 1, design = 1, sided.test = 2, conf.level = 0.95) epiR/R/rsu.sssep.rs.R0000644000176200001440000000143413745456070014102 0ustar liggesusersrsu.sssep.rs <- function(N = NA, pstar, se.p = 0.95, se.u = 1) { if (length(N) == 1) { if (is.na(N)) { n <- zn.binom(se.p = se.p, pstar = pstar, se.u = se.u) } else { d <- pstar if (pstar < 1 & pstar > 0) { d <- ceiling(N * pstar) } n <- zn.hypergeo(sep = se.p, N = N, d = d, se = se.u) } } else { n <- numeric(length(N)) n[is.na(N)] <- zn.binom(se.p = se.p, pstar = pstar, se.u = se.u) pstar.int <- !(pstar < 1 & pstar > 0) d <- pstar if(length(d) == 1) d <- rep(d, length(N)) if(pstar < 1 & pstar > 0){ d[!is.na(N)] <- ceiling(N[!is.na(N)] * pstar) } n[!is.na(N)] <- zn.hypergeo(sep = se.p, N = N[!is.na(N)], d = d[!is.na(N)], se = se.u) } return(n) }epiR/R/epi.about.R0000644000176200001440000000072413117711444013371 0ustar liggesusers"epi.about" <- function() { cat("\n") cat("-----------------------------------------------------------\n") ver <- packageDescription("epiR", lib.loc = NULL, fields = "Version") cat(paste("epiR version", ver)) cat("\n") cat("Tools for the Analysis of Epidemiological Data") cat("\n") cat("See http://fvas.unimelb.edu.au/veam for details.") cat("\n") cat("-----------------------------------------------------------\n") invisible() } epiR/R/epi.sssupb.R0000644000176200001440000000470214112024000013553 0ustar liggesusersepi.sssupb <- function(treat, control, delta, n, r = 1, power, nfractional = FALSE, alpha){ # Stop if a negative value for delta entered: if (delta < 0){ stop("For a superiority trial delta must be greater than or equal to zero.") } z.alpha <- qnorm(1 - alpha, mean = 0, sd = 1) if (!is.na(treat) & !is.na(control) & !is.na(delta) & !is.na(power) & is.na(n)) { beta <- 1 - power z.beta <- qnorm(1 - beta, mean = 0, sd = 1) # http://powerandsamplesize.com/Calculators/Compare-2-Proportions/2-Sample-Non-Inferiority-or-Superiority: if(nfractional == TRUE){ n.control <- (treat * (1 - treat) / r + control * (1 - control)) * ((z.alpha + z.beta) / (treat - control - delta))^2 n.treat <- n.control * r n.total <- n.treat + n.control } if(nfractional == FALSE){ n.control <- ceiling((treat * (1 - treat) / r + control * (1 - control)) * ((z.alpha + z.beta) / (treat - control - delta))^2) n.treat <- ceiling(n.control * r) n.total <- n.treat + n.control } rval <- list(n.total = n.total, n.treat = n.treat, n.control = n.control, delta = delta, power = power) } if (!is.na(treat) & !is.na(control) & !is.na(delta) & !is.na(n) & is.na(power) & !is.na(r) & !is.na(alpha)) { # Work out the number of subjects in the control group. r equals the number in the treatment group divided by the number in the control group. if(nfractional == TRUE){ n.control <- 1 / (r + 1) * n n.treat <- n - n.control n.total <- n.treat + n.control } if(nfractional == FALSE){ n.control <- ceiling(1 / (r + 1) * n) n.treat <- n - n.control n.total <- n.treat + n.control } z <- (treat - control - delta) / sqrt(treat * (1 - treat) / n.treat / r + control * (1 - control) / n.control) power <- pnorm(z - z.alpha, mean = 0, sd = 1) + pnorm(-z - z.alpha, mean = 0, sd = 1) rval <- list(n.total = n.total, n.treat = n.treat, n.control = n.control, delta = delta, power = power) } rval } # epi.supb(treat = 0.85, control = 0.65, delta = -0.10, n = NA, r = 1, power = 0.80, alpha = 0.05) # 1032 patients are required to have a 90% chance of detecting, as significant at the 5% level, an increase in the primary outcome measure from 50% in the control group to 60% in the experimental group. # Reference: Pocock SJ. Clinical Trials: A Practical Approach. Wiley; 1983.epiR/R/zORcfield.R0000644000176200001440000000323013666614322013365 0ustar liggesuserszORcfield <- function (dat, conf.level, interval = c(1e-08, 1e+08)){ # dFNCHypergeo <- function(x, m1, m2, n, odds, precision = 1e-07){ # stopifnot(is.numeric(x), is.numeric(m1), is.numeric(m2), # is.numeric(n), is.numeric(odds), is.numeric(precision)) # .Call("dFNCHypergeo", as.integer(x), as.integer(m1), as.integer(m2), # as.integer(n), as.double(odds), as.double(precision), # PACKAGE = "BiasedUrn") # } a <- dat[1]; b <- dat[3]; c <- dat[2]; d <- dat[4] N1 <- a + b; N0 <- c + d cfOR.p <- (a / b) / (c / d) if (((a == 0) && (c == 0)) || ((a == N1) && (c == N0))) { ll <- 0 ul <- Inf } else if (c == N0 || a == 0) { ll <- 0 ul <- uniroot(function(or) { sum(sapply(max(0, a + c - N0):a, BiasedUrn::dFNCHypergeo, N1, N0, a + c, or)) - BiasedUrn::dFNCHypergeo(a, N1, N0, a + c, or)/2 - (1- conf.level)/2 }, interval = interval)$root } else if (a == N1 || c == 0) { ll <- uniroot(function(or) { sum(sapply(a:min(N1, a + c), BiasedUrn::dFNCHypergeo, N1, N0, a + c, or)) - BiasedUrn::dFNCHypergeo(a, N1, N0, a + c, or)/2 - (1 - conf.level)/2 }, interval = interval)$root ul <- Inf } else { ll <- uniroot(function(or) { sum(sapply(a:min(N1, a + c), BiasedUrn::dFNCHypergeo, N1, N0, a + c, or)) - BiasedUrn::dFNCHypergeo(a, N1, N0, a + c, or)/2 - (1 - conf.level)/2 }, interval = interval)$root ul <- uniroot(function(or) { sum(sapply(max(0, a + c - N0):a, BiasedUrn::dFNCHypergeo, N1, N0, a + c, or)) - BiasedUrn::dFNCHypergeo(a, N1, N0, a + c, or)/2 - (1 - conf.level)/2 }, interval = interval)$root } c(cfOR.p, ll, ul) }epiR/R/epi.sssimpleestc.R0000644000176200001440000000126614075465502015004 0ustar liggesusersepi.sssimpleestc <- function(N = 1E+06, xbar, sigma, epsilon, error = "relative", nfractional = FALSE, conf.level = 0.95){ N. <- 1 - ((1 - conf.level) / 2) z <- qnorm(N., mean = 0, sd = 1) epsilon.r <- ifelse(error == "relative", epsilon, epsilon / xbar) # Vsq is the relative variance of the continuous variable to be estimated (i.e. var / mean^2): Vsq <- sigma^2 / xbar^2 # Page 74 Levy and Lemeshow (equation 3.15): n <- (z^2 * N * Vsq) / (z^2 * Vsq + ((N - 1) * epsilon.r^2)) if(nfractional == TRUE){ n <- n } if(nfractional == FALSE){ n <- ceiling(n) } rval <- n return(rval) }epiR/R/zwilson.R0000644000176200001440000000163414136477326013223 0ustar liggesuserszwilson <- function(dat, conf.level){ # Wilson, E.B. (1927) Probable inference, the law of succession, and statistical inference J. Amer. Stat. Assoc 22, 209-212. Changed 28 Oct 2021 N. <- 1 - ((1 - conf.level) / 2) z <- qnorm(N., mean = 0, sd = 1) a <- dat[,1] n <- dat[,2] p <- a / n limits <- (z * ((p * (1 - p) + (z^2) / (4 * n)) / n)^(1/2)) / (1 + (z^2) / n) pest <- (p + (z^2) / (2 * n)) / (1 + (z^2) / n) upp <- pest + limits low <- pest - limits d. <- (a * (n - a)) / n^3 e. <- z^2 / (4 * n^2) var.wil <- sqrt(d. + e.) # Design effect equals [var.obs] / [var.srs]. # var.wil has been computed assuming simple random sampling so if an argument for design effect is provided adjust se.wil accordingly. Assume design = 1: design <- 1 se.wil <- sqrt(design * var.wil) rval <- data.frame(est = p, se = se.wil, lower = low, upper = upp) rval }epiR/R/rsu.sep.rsvarse.R0000644000176200001440000000057113735027300014563 0ustar liggesusersrsu.sep.rsvarse <- function(N = NA, pstar, se.u) { if (!(is.na(N)) & N < length(se.u)) return("Error: N cannot be less than the number of unit sensitivity values") if (is.na(N)) { sep <- 1 - prod(1 - se.u * pstar) } else { if (pstar < 1 & pstar > 0) pstar <- ceiling(N * pstar) sep <- 1 - (1 - mean(se.u) * length(se.u) / N)^pstar } return(sep) }epiR/R/epi.ssstrataestb.R0000644000176200001440000000317214075465504015010 0ustar liggesusersepi.ssstrataestb <- function (strata.n, strata.Py, epsilon, error = "relative", nfractional = FALSE, conf.level = 0.95) { N. <- 1 - ((1 - conf.level) / 2) z <- qnorm(N., mean = 0, sd = 1) epsilon.r <- ifelse(error == "relative", epsilon, epsilon / strata.Py) # Where method == "proportion" the estimated proportions for each strata are entered into the vector strata.Py: N <- sum(strata.n) mean <- sum(strata.n * strata.Py) / N # The vector strata.var is ignored (variance of proportion calculated as follows): strata.var = (strata.Py * (1 - strata.Py)) phi <- (strata.n * sqrt(strata.var)) / sum(strata.n * sqrt(strata.var)) sigma.bx <- sum((strata.n^2 * strata.var) / ((phi) * (mean^2))) sigma.bxd <- sum((strata.n * strata.var) / mean^2) if(nfractional == TRUE){ # Equation 6.23 Levy and Lemeshow. Note the similarity between 6.23 and 6.22: total.sample <- ((z^2/N^2) * sigma.bx) / ((epsilon.r^2) + ((z^2/N^2) * sigma.bxd)) strata.sample <- strata.n * (total.sample / N) } if(nfractional == FALSE){ # Equation 6.23 Levy and Lemeshow. Note the similarity between 6.23 and 6.22: total.sample <- ceiling(((z^2/N^2) * sigma.bx) / ((epsilon.r^2) + ((z^2/N^2) * sigma.bxd))) strata.sample <- ceiling(strata.n * (total.sample / N)) } result.01 <- c(strata.sample) result.02 <- c(total.sample) result.03 <- cbind(mean = mean, sigma.bx = sigma.bx, sigma.bxd = sigma.bxd, phi = phi) rval <- list(strata.sample = result.01, total.sample = result.02, stats = result.03) return(rval) } epiR/R/zrsu.rspfree.1.R0000644000176200001440000000107513757564466014336 0ustar liggesuserszrsu.rspfree.1 <- function(sep, p.intro, prior = 0.5) { if (length(p.intro) < length(sep)) p.intro <- rep(p.intro, length(sep)) prior.disc <- numeric(length(sep)) pfree <- numeric(length(sep)) prior.disc <- zdisc.prior(prior, p.intro) pfree <- prior.disc / (1 - sep * (1 - prior.disc)) tmp <- rsu.pfree.equ(sep, p.intro) prior.eq <- tmp[[1]] pfree.eq <- tmp[[2]] return(data.frame(SeP = sep, PIntro = p.intro, "Discounted prior" = prior.disc, PFree = pfree, "Equilibrium PFree" = pfree.eq, "Equilibrium prior" = prior.eq)) }epiR/R/zdisc.prior.R0000644000176200001440000000020713634771210013746 0ustar liggesuserszdisc.prior <- function(prior, p.intro) { prior.disc <- 1 - (1 - prior + p.intro - ((1 - prior) * p.intro)) return(prior.disc) }epiR/R/zz2stat.R0000644000176200001440000000120013666556732013132 0ustar liggesuserszz2stat <- function (p1x, nx, p1y, ny, dif){ diff = p1x-p1y-dif if (abs(diff) == 0) { fmdiff = 0} else{ t = ny / nx a = 1 + t b = -(1 + t + p1x + t * p1y + dif * (t + 2)) c = dif * dif + dif * (2 * p1x + t + 1) + p1x + t * p1y d = -p1x * dif * (1 + dif) v = (b / a / 3)^3 - b * c / (6 * a * a) + d / a / 2 s = sqrt((b / a / 3)^2 - c / a / 3) if(v > 0){u = s} else{u = -s} w = (3.141592654 + acos(v / u^3)) / 3 p1d = 2 * u * cos(w) - b / a / 3 p2d = p1d - dif var = p1d * (1 - p1d) / nx + p2d * (1 - p2d) / ny fmdiff = diff^2 / var } return(fmdiff) }epiR/R/epi.convgrid.R0000644000176200001440000000463714074767030014106 0ustar liggesusers # 070605: MS modified to insert NA when valid OS reference could not be found. .selectstring <- function(text, first, last = 1e+06){ storage.mode(text) <- "character" n <- max(lt <- length(text), length(first), length(last)) if (lt < n) text <- rep(text, length = n) substr(text, first, last) } epi.convgrid <- function(osref){ name <- c("SW","SX","SY","SZ","TV","SV","SQ","SR","SS","ST","SU","TQ","TR","SM","SN","SO","SP","TL","TM","NQ","NL","NF","NA","HV","HQ","HL","SG","SB","NW", "NR","NM","NG","NB","HW","HR","HM","SH","SC","NX","NS","NN","NH","NC","HX","HS","HN","SJ","SD","NY","NT","NO","NJ","ND","HY","HT","HO","SK","SE", "NZ","NU","NP","NK","NE","HZ","HU","HP","TF","TA","OV","OQ","OL","OF","OA","JV","JQ","JL","TG","TB","OW") easting <- c(100,200,300,400,500,0,0,100,200,300,400,500,600,100,200,300,400,500,600,0,0,0, 0,0,0,0,100,100,100,100,100,100,100,100,100,100,200,200,200,200,200,200,200,200, 200,200,300,300,300,300,300,300,300,300,300,300,400,400,400,400,400,400,400,400,400,400, 500,500,500,500,500,500,500,500,500,500,600,600,600) northing <- c(0,0,0,0,0,0,100,100,100,100,100,100,100,200,200,200,200, 200,200,600,700,800,900,1000,1100,1200,300,400,500,600,700,800,900,1000, 1100,1200,300,400,500,600,700,800,900,1000,1100,1200,300,400,500,600,700, 800,900,1000,1100,1200,300,400,500,600,700,800,900,1000,1100,1200,300,400, 500,600,700,800,900,1000,1100,1200,300,400,500) cells <- data.frame(name, easting, northing) xcoord <- c(); ycoord <- c() for(i in 1:length(osref)){ grid <- osref[i] grid <- .selectstring(text = grid, first = 1, last = 2) coords <- osref[i] easting <- as.numeric(.selectstring(text = coords, first = 3, last = 5)) * 100 northing <- as.numeric(.selectstring(text = coords, first = 6, last = 8)) * 100 id <- cells$name == grid tmp <- cells[id, 1:3] tmp <- cbind(((tmp$easting * 1000) + easting), ((tmp$northing * 1000) + northing)) if(dim(tmp)[1] == 0) tmp <- matrix(c(NA, NA), nrow = 1) xcoord <- c(xcoord, tmp[1]) ycoord <- c(ycoord, tmp[2]) } rval <- data.frame(osref = osref, xcoord = xcoord, ycoord = ycoord) return(rval) } epiR/R/zjeffreys.R0000644000176200001440000000056414136475416013524 0ustar liggesuserszjeffreys <- function(dat, conf.level){ # From RSurveillance function binom.jeffreys: a <- dat[,1] n <- dat[,2] p <- a / n tails <- 2 low <- stats::qbeta((1 - conf.level) / tails, a + 0.5, n - a + 0.5) upp <- stats::qbeta(1 - (1 - conf.level) / tails, a + 0.5, n - a + 0.5) rval <- data.frame(est = p, lower = low, upper = upp) rval }epiR/R/epi.edr.R0000644000176200001440000000666213117711456013043 0ustar liggesusersepi.edr <- function(dat, n = 4, conf.level = 0.95, nsim = 99, na.zero = TRUE){ N. <- 1 - ((1 - conf.level) / 2) alpha <- 1 - conf.level z <- qnorm(N., mean = 0, sd = 1) num.sum <- 0; num.sd <- 0; num.n <- 0 den.sum <- 0; den.sd <- 0; den.n <- 0 start <- 2 * n for (i in start:length(dat)){ top.start <- (i - (n - 1)) top.finish <- i bot.start <- (i - (2 * n)) + 1 bot.finish <- i - n # Vector of outbreak counts for numerator and denominator: num.tmp <- dat[top.start:top.finish] den.tmp <- dat[bot.start:bot.finish] num.sum <- c(num.sum, sum(num.tmp)) num.sd <- c(num.sd, sd(num.tmp)) num.n <- c(num.n, length(num.tmp)) den.sum <- c(den.sum, sum(den.tmp)) den.sd <- c(den.sd, sd(den.tmp)) den.n <- c(den.n, length(den.tmp)) } # Remove the initiating zero and add a vector of zeroes to the start: num.sum <- c(rep(0, times = (start - 1)), num.sum[-1]) num.sd <- c(rep(0, times = (start - 1)), num.sd[-1]) num.n <- c(rep(0, times = (start - 1)), num.n[-1]) den.sum <- c(rep(0, times = (start - 1)), den.sum[-1]) den.sd <- c(rep(0, times = (start - 1)), den.sd[-1]) den.n <- c(rep(0, times = (start - 1)), den.n[-1]) # Work out the standard error of numerator and denominator: # SE_total = (n * SE_mean): # num.se <- num.n * (num.sd / sqrt(num.n)) # den.se <- den.n * (den.sd / sqrt(den.n)) num.mat <- matrix(rep(0, times = length(num.sum) * nsim), nrow = length(num.sum)) den.mat <- matrix(rep(0, times = length(num.sum) * nsim), nrow = length(num.sum)) for(i in 1:nsim){ num.mat[,i] <- rpois(n = length(num.sum), lambda = num.sum) den.mat[,i] <- rpois(n = length(den.sum), lambda = den.sum) } edr.p <- num.sum / den.sum edr.mat <- num.mat / den.mat edr.mat[is.na(edr.mat)] <- 0 quant <- function(x, probs) quantile(x, probs, na.rm = TRUE) edr.l <- apply(edr.mat, MARGIN = 1, FUN = quant, probs = alpha/2) edr.u <- apply(edr.mat, MARGIN = 1, FUN = quant, probs = 1 - alpha/2) # Work out EDR and confidence intervals of EDR: # Source: http://www.agron.missouri.edu/mnl/55/34kowles.html # edr.sed <- sqrt(num.se^2 + den.se^2) # edr.var <- (num.se^2 / den.sum^2) + (num.sum^2 / den.sum^4) * den.se^2 # Method 1 - use of extremes: # edr.l <- (num.sum - (z * num.se)) / (den.sum + (z * den.se)) # edr.u <- (num.sum + (z * num.se)) / (den.sum - (z * den.se)) # Method 2 - standard error of the difference between means: # edr.l <- 1 + ((num.sum - den.sum) - edr.sed) / (den.sum - (z * den.se)) # edr.u <- 1 + ((num.sum - den.sum) - edr.sed) / (den.sum + (z * den.se)) # Method 3 - approximate variance of the error of the ratios: # edr.l <- edr.p - (z * sqrt(edr.var)) # edr.l[edr.l < 0] <- 0 # edr.u <- edr.p + (z * sqrt(edr.var)) if(na.zero == FALSE) { rval <- as.data.frame(cbind(edr.p, edr.l, edr.u)) names(rval) <- c("est", "lower", "upper") } else if(na.zero == TRUE) { id <- is.na(edr.p) edr.p[id] <- 0 edr.l[id] <- 0 edr.u[id] <- 0 id <- is.infinite(edr.p) edr.p[id] <- 0 edr.l[id] <- 0 edr.u[id] <- 0 rval <- as.data.frame(cbind(edr.p, edr.l, edr.u)) names(rval) <- c("est", "lower", "upper") } rval } epiR/R/rsu.sssep.rb2st1rf.R0000644000176200001440000000135313754661614015125 0ustar liggesusersrsu.sssep.rb2st1rf <- function(rr, ppr, spr, pstar.c, se.c, pstar.u, se.u, se.p) { n.u <- zn.binom(se.p = se.c, pstar = pstar.u, se.u = se.u) n <- rsu.sssep.rbsrg(pstar = pstar.c, rr = rr, ppr = ppr, spr = spr, se.p = se.c, se.u = se.p) t.clusters <- n$total n.clusters.per.strata <- n$n t.units <- n$total * n.u n.units.per.strata <- n$n * n.u n.units.per.cluster <- n.u epinf <- n$epinf adj.risk <- n$adj.risk rval <- list( n.clusters = t.clusters, n.clusters.per.strata = n.clusters.per.strata, n.units = t.units, n.units.per.strata = n.units.per.strata, n.units.per.cluster = n.units.per.cluster, epinf = epinf, adj.risk = adj.risk) return(rval) }epiR/R/rsu.epinf.R0000644000176200001440000000025213665124140013407 0ustar liggesusersrsu.epinf <- function(pstar, rr, ppr) { adj.risk <- rsu.adjrisk(rr = rr, ppr = ppr) epinf <- pstar * adj.risk return(list(epinf = epinf, adj.risk = adj.risk)) }epiR/R/epi.popsize.R0000644000176200001440000000330113117711464013744 0ustar liggesusers"epi.popsize" <- function (T1, T2, T12, conf.level = 0.95, verbose = FALSE) { N. <- c(((1 - conf.level) / 2), 1 - ((1 - conf.level) / 2)) z <- qnorm(N., mean = 0, sd = 1)[2] lower <- "lower" upper <- "upper" N <- T1 * (T2 / T12) p <- T1 / N fcf <- sqrt(1 - (T2 / N)) width <- z * sqrt(((p * (1 - p)) / T2) * (1 - T2 / N)) + (1 / (2 * N)) low.p <- p - width up.p <- p + width low.N <- round((T1 / up.p), digits = 0) up.N <- round(ceiling(T1 / low.p), digits = 0) # New tests first round = T1 # New tests second round = T2 - T12 total.test <- T1 + (T2 - T12) untest <- N - total.test low.untest <- ifelse(low.N - total.test < 0, 0, low.N - total.test) up.untest <- ifelse(up.N - total.test < 0, 0, up.N - total.test) population <- as.data.frame(cbind(round(N, digits = 0), low.N, up.N)) names(population) <- c("est", lower, upper) untested <- as.data.frame(cbind(round(untest, digits = 0), low.untest, up.untest)) names(untested) <- c("est", lower, upper) rval <- list(population = population, untested = untested) if(verbose == TRUE){ return(rval) } else if(verbose == FALSE){ line1 <- paste("Estimated population size: ", round(N, digits = 0), " (", (conf.level * 100), "% CI ", low.N, " - ", up.N, ")", sep = "") line2 <- paste("Estimated number of untested subjects: ", round(untest, digits = 0), " (", (conf.level * 100), "% CI ", low.untest, " - ", up.untest, ")", sep = "") cat("\n", line1) cat("\n", line2, "\n") } } epiR/R/rsu.sep.rs.R0000644000176200001440000000223513701536100013515 0ustar liggesusersrsu.sep.rs <- function(N = NA, n, pstar, se.u = 1){ # Check for errors in inputs # pstar.int = flag to indicate proportion (F) or integer (T) design prevalence: pstar.int <- !(pstar < 1 & pstar > 0) if (sum(is.na(N)) > 0 & pstar.int) { err.msg <- "Population size (N) must be provided if design prevalence is an integer." return(err.msg) } else if (pstar.int & (pstar < 1 | pstar != round(pstar, 0))) { err.msg <- "Design prevalence must be a proportion or a positive integer." return(err.msg) } # sep calculations: se.p <- numeric(length(n)) if (length(N) == 1) N <- rep(N, times = length(n)) if (length(se.u) == 1) se.u <- rep(se.u, times = length(n)) d <- pstar if (length(d) == 1) d <- rep(d, times = length(n)) if (length(se.p[is.na(N)]) > 0) se.p[is.na(N)] <- zsep.binom(n = n[is.na(N)], pstar = pstar, se = se.u[is.na(N)], sp = 1) if (sum(!is.na(N)) != 0) { if (!pstar.int) { d[!is.na(N)] <- ceiling(N[!is.na(N)] * pstar) } se.p[!is.na(N)] <- zsep.hypergeo(N = N[!is.na(N)], n = n[!is.na(N)], d = d[!is.na(N)], se = se.u[!is.na(N)]) } return(se.p) } epiR/R/rsu.sssep.rb2st2rf.R0000644000176200001440000000061013754677052015124 0ustar liggesusersrsu.sssep.rb2st2rf <- function(rr.c, ppr.c, spr.c, pstar.c, se.c, rr.u, ppr.u, spr.u, pstar.u, se.u, se.p){ n.u <- rsu.sssep.rbsrg(pstar = pstar.u, rr = rr.u, ppr = ppr.u, spr = spr.u, se.p = se.u, se.u = se.c) n.c <- rsu.sssep.rbsrg(pstar = pstar.c, rr = rr.c, ppr = ppr.c, spr = spr.c, se.p = se.c, se.u = se.p) n <- list(clusters = n.c, units = n.u) return(n) }epiR/R/epi.indirectadj.R0000644000176200001440000000664513117711456014552 0ustar liggesusers"epi.indirectadj" <- function(obs, pop, std, units, conf.level = 0.95){ # How many strata (rows) are there? n.strata <- dim(pop)[1] # How many covariates are there? n.cov <- dim(pop)[2] N <- 1 - ((1 - conf.level) / 2) alpha <- 1 - conf.level z <- qnorm(N, mean = 0, sd = 1) tmp <- data.frame(strata = rep(rownames(pop), times = n.cov), cov = rep(colnames(pop), each = n.strata), pop = as.vector(pop), std = rep(as.vector(std[1:n.cov]), each = n.strata)) # Expected events (equals std incidence multiplied by population size): tmp$exp <- (tmp$pop * tmp$std) # tmp <- tmp[order(tmp$strata, tmp$cov),] # Crude risk by strata: # Turn 'obs' into a table object so calculations can easily be done by strata: t.obs <- by(data = obs, INDICES = rownames(obs), FUN = sum) t.exp <- by(data = tmp$exp, INDICES = tmp$strata, FUN = sum) t.pop <- by(data = tmp$pop, INDICES = tmp$strata, FUN = sum) # Confidence interval for crude incidence risk estimates corrected following email from Gillian Raab: crude.p <- t.obs / t.pop # crude.se <- crude.p / sqrt(t.pop) ## Incorrect. crude.se <- crude.p / sqrt(t.obs) ## replaced pop by obs crude.l <- qchisq(alpha / 2, 2 * t.obs) / 2 / t.pop ## next 2 lines changed crude.u <- qchisq(1 - alpha / 2, 2 *(t.obs + 1)) / 2 / t.pop crude.strata <- data.frame(est = as.vector(crude.p) * units, lower = as.vector(crude.l) * units, upper = as.vector(crude.u) * units) rownames(crude.strata) <- names(t.exp) # Indirectly adjusted risk for each strata (see page 378 of Stata manual): t.obs <- by(data = obs, INDICES = rownames(obs), FUN = sum) t.exp <- by(data = tmp$exp, INDICES = tmp$strata, FUN = sum) t.pop <- by(data = tmp$pop, INDICES = tmp$strata, FUN = sum) if(n.cov > 1){ adj.p <- (std[n.cov + 1] * (t.obs / t.exp)) adj.l <- (std[n.cov + 1] * (qpois((1 - conf.level) / 2, lambda = t.obs, log.p = FALSE) / t.exp)) adj.u <- (std[n.cov + 1] * (qpois(1 - (1 - conf.level) / 2, lambda = t.obs, log.p = FALSE) / t.exp)) adj.strata <- data.frame(est = as.vector(adj.p) * units, lower = as.vector(adj.l) * units, upper = as.vector(adj.u) * units) rownames(adj.strata) <- names(t.exp) } if(n.cov == 1){ adj.p <- (std * (t.obs / t.exp)) adj.l <- (std * (qpois((1 - conf.level) / 2, lambda = t.obs, log.p = FALSE) / t.exp)) adj.u <- (std * (qpois(1 - (1 - conf.level) / 2, lambda = t.obs, log.p = FALSE) / t.exp)) adj.strata <- data.frame(est = as.vector(adj.p) * units, lower = as.vector(adj.l) * units, upper = as.vector(adj.u) * units) rownames(adj.strata) <- names(t.exp) } # Crude standardised mortality ratio (confidence intervals based on Breslow and Day 1987 p 69-71): smr.p <- t.obs / t.exp smr.l <- qpois((1 - conf.level) / 2, lambda = t.obs, log.p = FALSE) / t.exp smr.u <- qpois(1 - (1 - conf.level) / 2, lambda = t.obs, log.p = FALSE) / t.exp smr.strata <- data.frame(obs = as.vector(t.obs), exp = as.vector(t.exp), est = as.vector(smr.p), lower = as.vector(smr.l), upper = as.vector(smr.u)) rownames(smr.strata) <- names(t.exp) rval <- list(crude.strata = crude.strata, adj.strata = adj.strata, smr.strata = smr.strata) return(rval) }epiR/R/rsu.sep.rspool.R0000644000176200001440000000027313701536576014430 0ustar liggesusersrsu.sep.rspool <- function(r, k, pstar, pse, psp = 1){ sep <- 1 - ((1 - (1 - pstar)^k) * (1 - pse) + (1 - pstar)^k * psp)^r spp <- psp^r return(list(se.p = sep, sp.p = spp)) } epiR/R/epi.dgamma.R0000644000176200001440000000046313117711452013504 0ustar liggesusers"epi.dgamma" <- function(rr, quantiles = c(0.05, 0.95)){ fold.variation <- rr[2]/rr[1] low.p <- abs(qnorm(quantiles[1], mean = 0, sd = 1)) up.p <- abs(qnorm(quantiles[2], mean = 0, sd = 1)) p <- low.p + up.p tau <- (p^2) / (log(fold.variation) * log(fold.variation)) return(tau) } epiR/R/epi.ssclus1estc.R0000644000176200001440000000362314075465476014553 0ustar liggesusers"epi.ssclus1estc" <- function(b, N, xbar, xsigma, epsilon, error = "relative", rho, nfractional = FALSE, conf.level = 0.95){ N. <- 1 - ((1 - conf.level) / 2) z <- qnorm(N., mean = 0, sd = 1) epsilon.r <- ifelse(error == "relative", epsilon, epsilon / xbar) # Vsq is the relative variance of the continuous variable to be estimated (i.e. var / mean^2): Vsq <- xsigma^2 / xbar^2 # Design effect when clusters are of different size: if(length(b) == 2){ # Machin et al. (2018) pp. 197, Equation 12.7: bbar <- b[1] bsigma <- b[2] bcv <- bsigma / bbar D <- 1 + ((bcv^2 + 1) * bbar - 1) * rho # Number of secondary sampling units required (from page 74 Levy and Lemeshow, Equation 3.15) with result multiplied by D: n.ssu <- (z^2 * N * Vsq * D) / (z^2 * Vsq + ((N - 1) * epsilon.r^2)) n.psu <- n.ssu / bbar # Round after you've calculated n.ssu and n.psu, after Machin et al. (2018) pp. 205: if(nfractional == TRUE){ n.ssu <- n.ssu n.psu <- n.psu } if(nfractional == FALSE){ n.ssu <- ceiling(n.ssu) n.psu <- ceiling(n.psu) } } # Design effect when clusters are of equal size: else if(length(b) == 1){ D <- 1 + ((b - 1) * rho) # Number of secondary sampling units required (from page 74 Levy and Lemeshow, Equation 3.15) with result multiplied by D: n.ssu <- (z^2 * N * Vsq * D) / (z^2 * Vsq + ((N - 1) * epsilon.r^2)) n.psu <- n.ssu / b # Round after you've calculated n.ssu and n.psu, after Machin et al. (2018) pp. 205: if(nfractional == TRUE){ n.ssu <- n.ssu n.psu <- n.psu } if(nfractional == FALSE){ n.ssu <- ceiling(n.ssu) n.psu <- ceiling(n.psu) } } rval <- list(n.psu = n.psu, n.ssu = n.ssu, DEF = D, rho = rho) return(rval) }epiR/R/zsph.binom.R0000644000176200001440000000013613634771136013577 0ustar liggesuserszsph.binom <- function(n, c, sp) { sph <- stats::pbinom(c - 1, n, 1 - sp) return(sph) }epiR/R/epi.ssequb.R0000644000176200001440000000632414112023130013543 0ustar liggesusersepi.ssequb <- function(treat, control, delta, n, r = 1, power, nfractional = FALSE, alpha){ # Stop if a negative value for delta entered: if (delta <= 0){ stop("For an equivalence trial delta must be greater than zero.") } z.alpha <- qnorm(1 - alpha, mean = 0, sd = 1) # Sample size: if (!is.na(treat) & !is.na(control) & !is.na(delta) & !is.na(power) & is.na(n)) { beta <- (1 - power) z.beta <- qnorm(1 - beta / 2, mean = 0, sd = 1) pA <- treat; pB <- control qA <- 1 - pA; qB <- 1 - pB epsilon <- pA - pB # Chow et al page 89, Equation 4.2.4: # nB <- (z.alpha + z.beta)^2 / (delta - abs(epsilon))^2 * (((pA * qA) / r) + (pB * qB)) # http://powerandsamplesize.com/Calculators/Compare-2-Proportions/2-Sample-Equivalence: nB <- (pA * qA / r + pB * qB) * ((z.alpha + z.beta) / (abs(pA - pB) - delta))^2 if(nfractional == TRUE){ nA <- nB * r nB <- nB n.total <- nA + nB } if(nfractional == FALSE){ nA <- ceiling(nB * r) nB <- ceiling(nB) n.total <- nA + nB } rval <- list(n.total = n.total, n.treat = nA, n.control = nB, delta = delta, power = power) } # Power: if (!is.na(treat) & !is.na(control) & !is.na(delta) & !is.na(n) & is.na(power) & !is.na(r) & !is.na(alpha)) { pA <- treat; pB <- control qA <- 1 - pA; qB <- 1 - pB if(nfractional == TRUE){ nA <- n - 1 / (r + 1) * (n) nB <- 1 / (r + 1) * (n) n.total <- nA + nB } if(nfractional == FALSE){ nA <- n - ceiling(1 / (r + 1) * (n)) nB <- ceiling(1 / (r + 1) * (n)) n.total <- nA + nB } epsilon <- pA - pB # Chow et al. page 89, second equation from top of page: z <- (delta - abs(epsilon)) / sqrt((pA * qA / nA) + (pB * qB / nB)) power <- 2 * pnorm(z - z.alpha, mean = 0, sd = 1) - 1 # http://powerandsamplesize.com/Calculators/Test-1-Proportion/1-Sample-Equivalence: # z = (abs(pA - pB) - delta) / sqrt((pA * qA / nA) + (pB * qB / nB)) # power = 2 * (pnorm(z - z.alpha) + pnorm(-z - z.alpha)) - 1 # From user (Wu et al. 2008, page 433): # z1 <- (delta - abs(pA - pB)) / sqrt((pA * qA / nA) + (pB * qB / nB)) # z2 <- (delta + abs(pA - pB)) / sqrt((pA * qA / nA) + (pB * qB / nB)) # power <- 1 - pnorm(-z1 + z.alpha) - pnorm(-z2 + z.alpha) rval <- list(n.total = n.total, n.treat = nA, n.control = nB, delta = delta, power = power) } rval } # Chow S, Shao J, Wang H. 2008. Sample Size Calculations in Clinical Research. 2nd Ed. Chapman & Hall/CRC Biostatistics Series. page 89 # epi.equivb(treat = 0.65, control = 0.85, delta = 0.05, n = NA, power = 0.80, r = 1, alpha = 0.05) # n.treat = 136, n.control = 136, n.total = 272 # Agrees with http://powerandsamplesize.com/Calculators/Compare-2-Proportions/2-Sample-Equivalence # epi.equivb(treat = 0.65, control = 0.85, delta = 0.05, n = NA, power = 0.80, r = 1, alpha = 0.05) # n.treat = 136, n.control = 136, n.total = 272 # Agrees with https://www.sealedenvelope.com/power/binary-equivalence/ # epi.equivb(treat = 0.65, control = 0.85, delta = 0.05, n = 200, power = NA, r = 1, alpha = 0.05)epiR/R/epi.betabuster.R0000644000176200001440000000330614133231304014405 0ustar liggesusersepi.betabuster <- function(mode, conf, greaterthan, x, conf.level = 0.95, max.shape1 = 100, step = 0.001){ shape1 <- seq(from = 1, to = max.shape1, by = step) shape2 <- 2 - shape1 + (shape1 - 1) / mode p.vec <- pbeta(q = x, shape1 = shape1, shape2 = shape2) # What value of a has the lowest (abs(p.vec-(1 - q)))? if(greaterthan){ index <- which((abs(p.vec - (1 - conf))) == min(abs(p.vec - (1 - conf)))) } else{ index <- which((abs(p.vec - conf)) == min(abs(p.vec - conf))) } shape1 <- shape1[index] shape2 <- shape2[index] # In general, if an experiment resulted in 's' successes (e.g. no. test-positive animals) # recorded in 'n' trials (e.g. number of truly infected animals), # use of a beta (a, b) distribution with a = s+1 and b = n-s+1 is an appropriate choice to model the uncertainty in that parameter. s <- shape1 - 1 n <- shape1 + shape2 - 2 .mode <- (shape1 - 1) / (shape1 + shape2 - 2) .mean <- shape1 / (shape1 + shape2) .var <- shape1 * shape2 / (((shape1 + shape2)^2) * (shape1 + shape2 + 1)) .median <- qbeta(p = 0.5, shape1 = shape1, shape2 = shape2) lower <- qbeta(p = (1 - conf.level) / 2, shape1 = shape1, shape2 = shape2) upper <- qbeta(p = 1 - ((1 - conf.level) / 2), shape1 = shape1, shape2 = shape2) # Issue a warning if the value of shape1 == max.shape1: if(shape1 == max.shape1) warning('The estimated value of shape1 equals max.shape1. Consider increasing the value of max.shape1.', call. = FALSE) rval <- list(shape1 = shape1, shape2 = shape2, mode = .mode, mean = .mean, median = .median, lower = lower, upper = upper, variance = .var) rval } epiR/R/epi.sssimpleestb.R0000644000176200001440000000156514075465030015001 0ustar liggesusersepi.sssimpleestb <- function(N = 1E+06, Py, epsilon, error = "relative", se, sp, nfractional = FALSE, conf.level = 0.95) { N. <- 1 - ((1 - conf.level) / 2) z <- qnorm(N., mean = 0, sd = 1) epsilon.a <- ifelse(error == "absolute", epsilon, Py * epsilon) # Equation 2 from Humphry et al. (2004): p01 <- (z / epsilon.a)^2 p02.u <- ((se * Py) + (1 - sp) * (1 - Py)) * (1 - (se * Py) - (1 - sp) * (1 - Py)) p02.l <- (se + sp - 1)^2 n <- p01 * (p02.u / p02.l) # Page 74 Levy and Lemeshow (equation 3.16): # n <- (z^2 * N * (1 - Py) * Py) / (((N - 1) * (epsilon.r^2) * Py^2) + (z^2 * Py * (1 - Py))) f <- n / N if(f > 0.10){n <- n / (1 + n/N)} if(nfractional == TRUE){ n <- n } if(nfractional == FALSE){ n <- ceiling(n) } rval <- n return(rval) } epiR/R/epi.mh.R0000644000176200001440000002107413117711462012664 0ustar liggesusers"epi.mh" <- function(ev.trt, n.trt, ev.ctrl, n.ctrl, names, method = "odds.ratio", alternative = c("two.sided", "less", "greater"), conf.level = 0.95) { # Declarations: k <- length(names) a.i <- ev.trt b.i <- n.trt - ev.trt c.i <- ev.ctrl d.i <- n.ctrl - ev.ctrl N. <- 1 - ((1 - conf.level) / 2) z <- qnorm(N., mean = 0, sd = 1) # Test each strata for zero values. Add 0.5 to all cells if any cell has a zero value: for(i in 1:k){ if(a.i[i] < 1 | b.i[i] < 1 | c.i[i] < 1 | d.i[i] < 1){ a.i[i] <- a.i[i] + 0.5; b.i[i] <- b.i[i] + 0.5; c.i[i] <- c.i[i] + 0.5; d.i[i] <- d.i[i] + 0.5 } } n.1i <- a.i + b.i n.2i <- c.i + d.i N.i <- a.i + b.i + c.i + d.i # For summary odds ratio: R <- sum((a.i * d.i) / N.i) S <- sum((b.i * c.i) / N.i) E <- sum(((a.i + d.i) * a.i * d.i) / N.i^2) F. <- sum(((a.i + d.i) * b.i * c.i) / N.i^2) G <- sum(((b.i + c.i) * a.i * d.i) / N.i^2) H <- sum(((b.i + c.i) * b.i * c.i) / N.i^2) # For summary risk ratio: P <- sum(((n.1i * n.2i * (a.i + c.i)) - (a.i * c.i * N.i)) / N.i^2) R. <- sum((a.i * n.2i) / N.i) S. <- sum((c.i * n.1i) / N.i) if(method == "odds.ratio"){ # Individual study odds ratios: OR.i <- (a.i * d.i) / (b.i * c.i) lnOR.i <- log(OR.i) SE.lnOR.i <- sqrt(1/a.i + 1/b.i + 1/c.i + 1/d.i) SE.OR.i <- exp(SE.lnOR.i) lower.lnOR.i <- lnOR.i - (z * SE.lnOR.i) upper.lnOR.i <- lnOR.i + (z * SE.lnOR.i) lower.OR.i <- exp(lower.lnOR.i) upper.OR.i <- exp(upper.lnOR.i) # Weights: w.i <- (b.i * c.i) / N.i # w.i <- 1 / (1/a.i + 1/b.i + 1/c.i + 1/d.i) w.iv.i <- 1 / (SE.lnOR.i)^2 # MH pooled odds ratios (relative effect measures combined in their natural scale): OR.mh <- sum(w.i * OR.i) / sum(w.i) lnOR.mh <- sum(w.i * log(OR.i)) / sum(w.i) # Same method for calculating confidence intervals around pooled OR as epi.2by2 so results differ from page 304 of Egger, Smith and Altman: G <- a.i * d.i / N.i H <- b.i * c.i / N.i P <- (a.i + d.i) / N.i Q <- (b.i + c.i) / N.i GQ.HP <- G * Q + H * P sumG <- sum(G) sumH <- sum(H) sumGP <- sum(G * P) sumGH <- sum(G * H) sumHQ <- sum(H * Q) sumGQ <- sum(G * Q) sumGQ.HP <- sum(GQ.HP) var.lnOR.mh <- sumGP / (2 * sumG^2) + sumGQ.HP/(2 * sumGH) + sumHQ/(2 * sumH^2) SE.lnOR.mh <- sqrt(var.lnOR.mh) SE.OR.mh <- exp(SE.lnOR.mh) lower.OR.mh <- exp(lnOR.mh - z * SE.lnOR.mh) upper.OR.mh <- exp(lnOR.mh + z * SE.lnOR.mh) # Test of heterogeneity (based on inverse variance weights): Q <- sum(w.iv.i * (lnOR.i - lnOR.mh)^2) df <- k - 1 p.heterogeneity <- 1 - pchisq(Q, df) # Higgins and Thompson (2002) H^2 and I^2 statistic: Hsq <- Q / (k - 1) lnHsq <- log(Hsq) if(Q > k) { lnHsq.se <- (1 * log(Q) - log(k - 1)) / (2 * sqrt(2 * Q) - sqrt((2 * (k - 3)))) } if(Q <= k) { lnHsq.se <- sqrt((1/(2 * (k - 2))) * (1 - (1 / (3 * (k - 2)^2)))) } lnHsq.l <- lnHsq - (z * lnHsq.se) lnHsq.u <- lnHsq + (z * lnHsq.se) Hsq.l <- exp(lnHsq.l) Hsq.u <- exp(lnHsq.u) Isq <- ((Hsq - 1) / Hsq) * 100 Isq.l <- ((Hsq.l - 1) / Hsq.l) * 100 Isq.u <- ((Hsq.u - 1) / Hsq.u) * 100 # Test of effect. Code for p-value taken from z.test function in TeachingDemos package: effect.z <- lnOR.mh / SE.lnOR.mh alternative <- match.arg(alternative) p.effect <- switch(alternative, two.sided = 2 * pnorm(abs(effect.z), lower.tail = FALSE), less = pnorm(effect.z), greater = pnorm(effect.z, lower.tail = FALSE)) # Results: OR <- data.frame(OR.i, lower.OR.i, upper.OR.i) names(OR) <- c("est", "lower", "upper") OR.summary <- data.frame(OR.mh, lower.OR.mh, upper.OR.mh) names(OR.summary) <- c("est", "lower", "upper") weights <- data.frame(w.i, w.iv.i) names(weights) <- c("raw", "inv.var") Hsq <- data.frame(Hsq, Hsq.l, Hsq.u) names(Hsq) <- c("est", "lower", "upper") Isq <- data.frame(Isq, Isq.l, Isq.u) names(Isq) <- c("est", "lower", "upper") rval <- list(OR = OR, OR.summary = OR.summary, weights = weights, heterogeneity = c(Q = Q, df = df, p.value = p.heterogeneity), Hsq = Hsq, Isq = Isq, effect = c(z = effect.z, p.value = p.effect)) } else if(method == "risk.ratio"){ # Individual study risk ratios: RR.i <- (a.i / n.1i) / (c.i / n.2i) lnRR.i <- log(RR.i) SE.lnRR.i <- sqrt(1/a.i + 1/c.i - 1/n.1i - 1/n.2i) SE.RR.i <- exp(SE.lnRR.i) lower.lnRR.i <- lnRR.i - (z * SE.lnRR.i) upper.lnRR.i <- lnRR.i + (z * SE.lnRR.i) lower.RR.i <- exp(lower.lnRR.i) upper.RR.i <- exp(upper.lnRR.i) # Weights: w.i <- (c.i * n.1i) / N.i w.iv.i <- 1 / (SE.lnRR.i)^2 # MH pooled odds ratios (relative effect measures combined in their natural scale): RR.mh <- sum(w.i * RR.i) / sum(w.i) lnRR.mh <- log(RR.mh) SE.lnRR.mh <- sqrt(P / (R. * S.)) SE.RR.mh <- exp(SE.lnRR.mh) lower.lnRR.mh <- log(RR.mh) - (z * SE.lnRR.mh) upper.lnRR.mh <- log(RR.mh) + (z * SE.lnRR.mh) lower.RR.mh <- exp(lower.lnRR.mh) upper.RR.mh <- exp(upper.lnRR.mh) # Test of heterogeneity (based on inverse variance weights): Q <- sum(w.iv.i * (lnRR.i - lnRR.mh)^2) df <- k - 1 p.heterogeneity <- 1 - pchisq(Q, df) # Higgins and Thompson (2002) H^2 and I^2 statistic: Hsq <- Q / (k - 1) lnHsq <- log(Hsq) if(Q > k) { lnHsq.se <- (1 * log(Q) - log(k - 1)) / (2 * sqrt(2 * Q) - sqrt((2 * (k - 3)))) } if(Q <= k) { lnHsq.se <- sqrt((1/(2 * (k - 2))) * (1 - (1 / (3 * (k - 2)^2)))) } lnHsq.l <- lnHsq - (z * lnHsq.se) lnHsq.u <- lnHsq + (z * lnHsq.se) Hsq.l <- exp(lnHsq.l) Hsq.u <- exp(lnHsq.u) Isq <- ((Hsq - 1) / Hsq) * 100 Isq.l <- ((Hsq.l - 1) / Hsq.l) * 100 Isq.u <- ((Hsq.u - 1) / Hsq.u) * 100 # Test of effect. Code for p-value taken from z.test function in TeachingDemos package: effect.z <- log(RR.mh) / SE.lnRR.mh alternative <- match.arg(alternative) p.effect <- switch(alternative, two.sided = 2 * pnorm(abs(effect.z), lower.tail = FALSE), less = pnorm(effect.z), greater = pnorm(effect.z, lower.tail = FALSE)) # Results: RR <- data.frame(RR.i, lower.RR.i, upper.RR.i) names(RR) <- c("est", "lower", "upper") RR.summary <- data.frame(RR.mh, lower.RR.mh, upper.RR.mh) names(RR.summary) <- c("est", "lower", "upper") weights <- data.frame(w.i, w.iv.i) names(weights) <- c("raw", "inv.var") Hsq <- data.frame(Hsq, Hsq.l, Hsq.u) names(Hsq) <- c("est", "lower", "upper") Isq <- data.frame(Isq, Isq.l, Isq.u) names(Isq) <- c("est", "lower", "upper") rval <- list(RR = RR, RR.summary = RR.summary, weights = weights, heterogeneity = c(Q = Q, df = df, p.value = p.heterogeneity), Hsq = Hsq, Isq = Isq, effect = c(z = effect.z, p.value = p.effect)) } return(rval) } epiR/R/epi.smr.R0000644000176200001440000001572214041476702013066 0ustar liggesusers# Statistical significance and confidence intervals for an SMR epi.smr <- function(obs = 4, exp = 3.3, method = "byar", conf.level = 0.95){ if(length(obs) > 1 | length(exp) > 1){ stop(message = "Arguments obs and exp must be of length 1\n") } N. <- 1 - ((1 - conf.level) / 2) z <- qnorm(N., mean = 0, sd = 1) a <- obs; lambda <- exp smr <- a / lambda if(a / as.integer(a) != 1){ stop(message = "Argument obs must be a whole number\n") } # ------------------------------------------------------------------------------------- # Chi square test (Checkoway and Pearce Research Methods in Occupational Epidemiology page 127) if(method == "chi2"){ chi2.ts <- (a - lambda)^2 / lambda chi2.p <- 1 - pchisq(chi2.ts, df = 1) rval <- data.frame(test.statistic = chi2.ts, df = 1, p.value = chi2.p) } # ------------------------------------------------------------------------------------- # Mid-P exact test (Rothman and Boice 1979): else if(method == "mid.p"){ if(a > 5){ stop(message = "Observed number of events is greater than 5. Use approximate methods (e.g. method = 'byar').\n") } if(a < lambda){ k <- seq(from = 0, to = a - 1, by = 1) } else if(a >= lambda){ k <- seq(from = a + 1, to = 5 * a, by = 1) } mid.p <- (0.5 * ((exp(-lambda) * lambda^(a)) / (factorial(a)))) + sum((exp(-lambda) * lambda^(k)) / (factorial(k))) mid.p <- 2 * mid.p # Confidence interval. The lower bound of the CI is the value of a in the following expression # that equals 1 - alpha / 2 (Miettinen [1974d] modification cited in Rothman and Boice 1979, page 29): fmidl <- function(alow){ k <- seq(from = 0, to = a - 1, by = 1) (0.5 * ((exp(-alow) * alow^(a)) / factorial(a))) + sum(exp(-alow) * alow^(k) / factorial(k)) - N. } alow <- uniroot(fmidl, interval = c(0, 1e+08)) # Upper bound of confidence interval: fmidu <- function(aupp){ k <- seq(from = 0, to = a - 1, by = 1) (0.5 * ((exp(-aupp) * aupp^(a)) / factorial(a))) + sum(exp(-aupp) * aupp^(k) / factorial(k)) - (1 - N.) } aupp <- uniroot(fmidu, interval = c(0, 1e+08)) mid.low <- alow$root / lambda mid.upp <- aupp$root / lambda rval <- data.frame(obs = a, exp = lambda, est = smr, lower = mid.low, upper = mid.upp, p.value = mid.p) } # ------------------------------------------------------------------------------------- # Fisher exact Test based on Poisson distribution (see Rosner): else if(method == "fisher"){ if(a > 5){ stop(message = "Observed number of events is greater than 5. Use approximate methods (e.g. method = 'byar').\n") } if(a < lambda){ k <- seq(from = 0, to = a, by = 1) exact.p <- min(2 * sum((exp(-lambda) * lambda^(k)) / factorial(k)), 1) } else if(a >= lambda){ k <- seq(from = 0, to = a - 1, by = 1) exact.p <- min(2 * (1 - sum((exp(-lambda) * lambda^(k)) / factorial(k))), 1) } # Lower bound of confidence interval: ffisl <- function(alow){ k <- seq(from = 0, to = a, by = 1) sum((exp(-alow) * alow^(k)) / factorial(k)) - N. } alow <- uniroot(ffisl, interval = c(0, 1e+08)) # Upper bound of confidence interval: ffisu <- function(aupp){ k <- seq(from = 0, to = a, by = 1) sum((exp(-aupp) * aupp^(k)) / factorial(k)) - (1 - N.) } aupp <- uniroot(ffisu, interval = c(0, 1e+08)) fis.low <- alow$root / lambda fis.upp <- aupp$root / lambda rval <- data.frame(obs = a, exp = lambda, est = smr, lower = fis.low, upper = fis.upp, p.value = exact.p) } # ------------------------------------------------------------------------------------- # Byar's approximation: else if(method == "byar"){ if(a < lambda){ .a <- a + 1 } else if(a >= lambda){ .a <- a } byar.z <- ((9 * .a)^(0.5)) * (1 - (1 / (9 * .a)) - ((lambda / .a)^(1/3))) byar.p <- ifelse(byar.z < 0, 2 * pnorm(q = byar.z, mean = 0, sd = 1), 2 * (1 - pnorm(q = byar.z, mean = 0, sd = 1))) # Confidence interval - Regidor et al. (1993): alow <- a * (1 - (1 / (9 * a)) - (z / 3) * sqrt(1 / a))^3 aupp <- (a + 1) * (1 - (1 / (9 * (a + 1))) + (z / 3) * sqrt(1 / (a + 1)))^3 byar.low <- alow / lambda byar.upp <- aupp / lambda rval <- data.frame(obs = a, exp = lambda, est = smr, lower = byar.low, upper = byar.upp, test.statistic = byar.z, p.value = byar.p) } # ------------------------------------------------------------------------------------- # Rothman Greenland: else if(method == "rothman.greenland"){ roth.low <- exp(log(smr) - (z * (1 / sqrt(a)))) roth.upp <- exp(log(smr) + (z * (1 / sqrt(a)))) rval <- data.frame(obs = a, exp = lambda, est = smr, lower = roth.low, upper = roth.upp) } # ------------------------------------------------------------------------------------- # Ury & Wiggins. Code in SMRDoc.pdf incorrect. Need to go to original Ury and Wiggins paper. else if(method == "ury.wiggins"){ # Use only when conf.level = 0.90, 0.95 or 0.99: ury.ok <- conf.level == 0.90 | conf.level == 0.95 | conf.level == 0.99 if(ury.ok == FALSE){ simpleWarning(message = "Ury and Wiggins confidence limits only valid when conf.level = 0.90, 0.95 or 0.95") ury.wiggans <- data.frame(obs = a, exp = lambda, est = smr, lower = NA, upper = NA) } else if(ury.ok == TRUE){ if(conf.level == 0.90){ cons <- c(0.65,1.65) } if(conf.level == 0.95){ cons <- c(1,2) } else if(conf.level == 0.95){ cons <- c(2,3) } a.low <- a - (z * sqrt(a)) + cons[1] a.upp <- a + (z * sqrt(a)) + cons[2] ury.low <- a.low / lambda ury.upp <- a.upp / lambda rval <- data.frame(obs = a, exp = lambda, est = smr, lower = ury.low, upper = ury.upp) } } # ------------------------------------------------------------------------------------- # Vandenbroucke (1982): else if(method == "vandenbroucke"){ # Use only when conf.level = 0.95: van.ok <- conf.level == 0.95 if(van.ok == FALSE){ simpleWarning(message = "Vandenbroucke confidence limits only valid when conf.level = 0.95") rval <- data.frame(obs = a, exp = lambda, est = smr, lower = NA, upper = NA) } else if(van.ok == TRUE){ van.low <- (sqrt(a) - (z * 0.5))^(2) / lambda van.upp <- (sqrt(a) + (z * 0.5))^(2) / lambda rval <- data.frame(obs = a, exp = lambda, est = smr, lower = van.low, upper = van.upp) } } # ------------------------------------------------------------------------------------- return(rval) } epiR/R/zn.binom.R0000644000176200001440000000016613665133040013233 0ustar liggesuserszn.binom <- function(se.p, pstar, se.u = 1) { n <- log(1 - se.p) / log(1 - pstar * se.u) return(ceiling(n)) } epiR/R/zsep.binom.R0000644000176200001440000000020613665131434013565 0ustar liggesuserszsep.binom <- function(n, pstar, se = 1, sp = 1) { sep <- 1 - (1 - (se * pstar + (1 - sp) * (1 - pstar)))^n return(sep) } epiR/R/rsu.sep.rb.R0000644000176200001440000000137413741150126013503 0ustar liggesusersrsu.sep.rb <- function(N, rr, ppr, df, pstar, method = "binomial"){ if(method == "binomial"){ epi <- rsu.epinf(pstar, rr, ppr) p.all.neg <- (1 - df[,2] * epi[[1]][df[,1]])^df[3] sep <- 1 - prod(p.all.neg) return(list(sep = sep, epi = epi[[1]], adj.risk = epi[[2]])) } else if(method == "hypergeometric"){ ppr <- N / sum(N) epi <- rsu.epinf(pstar, rr, ppr) n <- numeric(length(rr)) se <- n for (r in 1:length(rr)) { n[r] <- sum(df[df[,1] == r,3]) se[r] <- mean(df[df[,1] == r,2]) } p.all.neg <- (1 - se * n/N)^(epi[[1]] * N) sep <- 1 - prod(p.all.neg) return(list(sep = sep, epi = epi[[1]], adj.risk = epi[[2]], n = n, se.u = se)) } } epiR/R/epi.tests.R0000644000176200001440000005102514142426276013426 0ustar liggesusers"epi.tests" <- function(dat, method = "exact", digits = 2, conf.level = 0.95) { # Stop if invalid number of digits: if(digits != 2 & digits != 3 & digits != 4) stop("Argument 'digits' for this function must take the value of 2, 3 or 4.") # Number of columns: dim <- ifelse(is.null(dim(dat)[2]), 0, dim(dat)[2]) # If dat is a dplyr object re-jig as conventional R table: id <- class(dat) == "grouped_df" | class(dat) == "tbl_df" | class(dat) == "tbl" | class(dat) == "data.frame" if(dim == 3 & sum(id) == 4){ # Assign names: names(dat) <- c("tes","out","n") # Counts are in column 3. Must be integer: if(!is.integer(dat$n)) stop('Column 3 (cell frequencies) must be integer.') # Test variable column 1. Must be a factor: if(!is.factor(dat$tes)) stop('Column 1 (test) must be a factor.') # Outcome variable column 2. Must be a factor: if(!is.factor(dat$out)) stop('Column 2 (outcome) must be a factor.') dat <- xtabs(n ~ tes + out, data = dat) } # If dat vector of length 4 (i.e. cell frequencies) re-jig into a conventional R table: if(length(dat) == 4 & is.vector(dat) == TRUE){ dat <- as.table(matrix(dat, nrow = 2, byrow = TRUE)) } N. <- 1 - ((1 - conf.level) / 2) z <- qnorm(N., mean = 0, sd = 1) ## ================= ## DECLARE VARIABLES ## ================= ## --------| D+ --| D- --| Total ## Test + | a | b | N1 ## Test - | c | d | N0 ## --------|------|------|------ ## Total | M1 | M0 | total a <- dat[1] b <- dat[3] c <- dat[2] d <- dat[4] ## Total disease pos: M1 <- a + c ## Total disease neg: M0 <- b + d ## Total test pos: N1 <- a + b ## Total test neg: N0 <- c + d ## Total subjects: total <- a + b + c + d ## True prevalence: tdat <- as.matrix(cbind(M1, total)) if(method == "exact"){ trval <- zexact(tdat, conf.level) } if(method == "wilson"){ trval <- zwilson(tdat, conf.level) } if(method == "agresti"){ trval <- zagresti(tdat, conf.level) } if(method == "clopper-pearson"){ trval <- zclopperpearson(tdat, conf.level) } if(method == "jeffreys"){ trval <- zjeffreys(tdat, conf.level) } tp <- data.frame(est = trval$est, lower = trval$lower, upper = trval$upper) ## Greg Snow: ## r <- M1; n <- total ## alpha1 <- r + 1 ## alpha2 <- n - r + 1 ## tp <- r/n ## tp.low <- hpd.(qbeta, shape1 = alpha1, shape2 = alpha2)[1] ## tp.up <- hpd.(qbeta, shape1 = alpha1, shape2 = alpha2)[2] ## Altman: ## q <- 1 - p ## A <- (2 * r) + (z * z) ## B <- z * sqrt((z * z) + (4 * r * q)) ## C <- 2 * (n + (z * z)) ## tp <- p ## tp.low <- (A - B) / C ## tp.up <- (A + B) / C ## Apparent prevalence: tdat <- as.matrix(cbind(N1, total)) if(method == "exact"){ trval <- zexact(tdat, conf.level) } if(method == "wilson"){ trval <- zwilson(tdat, conf.level) } if(method == "agresti"){ trval <- zagresti(tdat, conf.level) } if(method == "clopper-pearson"){ trval <- zclopperpearson(tdat, conf.level) } if(method == "jeffreys"){ trval <- zjeffreys(tdat, conf.level) } ap <- data.frame(est = trval$est, lower = trval$lower, upper = trval$upper) ## Greg Snow: ## r <- N1; n <- total ## alpha1 <- r + 1 ## alpha2 <- n - r + 1 ## ap <- r/n ## ap.low <- hpd.(qbeta, shape1 = alpha1, shape2 = alpha2)[1] ## ap.up <- hpd.(qbeta, shape1 = alpha1, shape2 = alpha2)[2] ## Altman: ## q <- 1 - p ## A <- (2 * r) + (z * z) ## B <- z * sqrt((z * z) + (4 * r * q)) ## C <- 2 * (n + (z * z)) ## ap <- p ## ap.low <- (A - B) / C ## ap.up <- (A + B) / C ## Sensitivity: tdat <- as.matrix(cbind(a, M1)) if(method == "exact"){ trval <- zexact(tdat, conf.level) } if(method == "wilson"){ trval <- zwilson(tdat, conf.level) } if(method == "agresti"){ trval <- zagresti(tdat, conf.level) } if(method == "clopper-pearson"){ trval <- zclopperpearson(tdat, conf.level) } if(method == "jeffreys"){ trval <- zjeffreys(tdat, conf.level) } se <- data.frame(est = trval$est, lower = trval$lower, upper = trval$upper) ## Greg Snow: ## r <- a; n <- M1 ## alpha1 <- r + 1 ## alpha2 <- n - r + 1 ## se <- r/n ## se.low <- hpd.(qbeta, shape1 = alpha1, shape2 = alpha2)[1] ## se.up <- hpd.(qbeta, shape1 = alpha1, shape2 = alpha2)[2] ## Altman: ## q <- 1 - p ## A <- (2 * r) + (z * z) ## B <- z * sqrt((z * z) + (4 * r * q)) ## C <- 2 * (n + (z * z)) ## se <- p ## se.low <- (A - B) / C ## se.up <- (A + B) / C ## Specificity: tdat <- as.matrix(cbind(d, M0)) if(method == "exact"){ trval <- zexact(tdat, conf.level) } if(method == "wilson"){ trval <- zwilson(tdat, conf.level) } if(method == "agresti"){ trval <- zagresti(tdat, conf.level) } if(method == "clopper-pearson"){ trval <- zclopperpearson(tdat, conf.level) } if(method == "jeffreys"){ trval <- zjeffreys(tdat, conf.level) } sp <- data.frame(est = trval$est, lower = trval$lower, upper = trval$upper) ## Greg Snow: ## r <- d; n <- M0 ## alpha1 <- r + 1 ## alpha2 <- n - r + 1 ## sp <- r/n ## sp.low <- hpd.(qbeta, shape1 = alpha1, shape2 = alpha2)[1] ## sp.up <- hpd.(qbeta, shape1 = alpha1, shape2 = alpha2)[2] ## Altman: ## q <- 1 - p ## A <- (2 * r) + (z * z) ## B <- z * sqrt((z * z) + (4 * r * q)) ## C <- 2 * (n + (z * z)) ## sp <- p ## sp.low <- (A - B) / C ## sp.up <- (A + B) / C ## Positive predictive value: tdat <- as.matrix(cbind(a, N1)) if(method == "exact"){ trval <- zexact(tdat, conf.level) } if(method == "wilson"){ trval <- zwilson(tdat, conf.level) } if(method == "agresti"){ trval <- zagresti(tdat, conf.level) } if(method == "clopper-pearson"){ trval <- zclopperpearson(tdat, conf.level) } if(method == "jeffreys"){ trval <- zjeffreys(tdat, conf.level) } pv.pos <- data.frame(est = trval$est, lower = trval$lower, upper = trval$upper) ## Greg Snow: ## r <- a; n <- N1 ## alpha1 <- r + 1 ## alpha2 <- n - r + 1 ## ppv <- r/n ## ppv.low <- hpd.(qbeta, shape1 = alpha1, shape2 = alpha2)[1] ## ppv.up <- hpd.(qbeta, shape1 = alpha1, shape2 = alpha2)[2] ## Altman: ## q <- 1 - p ## A <- (2 * r) + (z * z) ## B <- z * sqrt((z * z) + (4 * r * q)) ## C <- 2 * (n + (z * z)) ## ppv <- p ## ppv.low <- (A - B) / C ## ppv.up <- (A + B) / C ## Negative predictive value: tdat <- as.matrix(cbind(d, N0)) if(method == "exact"){ trval <- zexact(tdat, conf.level) } if(method == "wilson"){ trval <- zwilson(tdat, conf.level) } if(method == "agresti"){ trval <- zagresti(tdat, conf.level) } if(method == "clopper-pearson"){ trval <- zclopperpearson(tdat, conf.level) } if(method == "jeffreys"){ trval <- zjeffreys(tdat, conf.level) } pv.neg <- data.frame(est = trval$est, lower = trval$lower, upper = trval$upper) ## Greg Snow: ## r <- d; n <- N0 ## alpha1 <- r + 1 ## alpha2 <- n - r + 1 ## npv <- r/n ## npv.low <- hpd.(qbeta, shape1 = alpha1, shape2 = alpha2)[1] ## npv.up <- hpd.(qbeta, shape1 = alpha1, shape2 = alpha2)[2] ## Altman: ## q <- 1 - p ## A <- (2 * r) + (z * z) ## B <- z * sqrt((z * z) + (4 * r * q)) ## C <- 2 * (n + (z * z)) ## npv <- p ## npv.low <- (A - B) / C ## npv.up <- (A + B) / C ## Likelihood ratio of a positive test. Confidence intervals from Simel et al. (1991) ## lrpos <- se / (1 - sp) lrpos.est <- (a / M1) / (1 - (d / M0)) lrpos.low <- exp(log(lrpos.est) - z * sqrt((1 - se$est) / (M1 * se$est) + (sp$est) / (M0 * (1 - sp$est)))) lrpos.up <- exp(log(lrpos.est) + z * sqrt((1 - se$est) / (M1 * se$est) + (sp$est) / (M0 * (1 - sp$est)))) lr.pos <- data.frame(est = lrpos.est, lower = lrpos.low, upper = lrpos.up) ## Likelihood ratio of a negative test. Confidence intervals from Simel et al. (1991) ## lrpos <- se / (1 - sp) lrneg.est <- (1 - (a / M1)) / (d / M0) lrneg.low <- exp(log(lrneg.est) - z * sqrt((se$est)/(M1 * (1 - se$est)) + (1 - sp$est) / (M0 * (sp$est)))) lrneg.up <- exp(log(lrneg.est) + z * sqrt((se$est) / (M1 * (1 - se$est)) + (1 - sp$est) / (M0 * (sp$est)))) lr.neg <- data.frame(est = lrneg.est, lower = lrneg.low, upper = lrneg.up) ## Diagnostic accuracy (from Scott et al. (2008)): tdat <- as.matrix(cbind((a + d), total)) if(method == "exact"){ trval <- zexact(tdat, conf.level) } if(method == "wilson"){ trval <- zwilson(tdat, conf.level) } if(method == "agresti"){ trval <- zagresti(tdat, conf.level) } if(method == "clopper-pearson"){ trval <- zclopperpearson(tdat, conf.level) } if(method == "jeffreys"){ trval <- zjeffreys(tdat, conf.level) } diag.ac <- data.frame(est = trval$est, lower = trval$lower, upper = trval$upper) ## Greg Snow: ## r <- (a + d); n <- total ## p <- r/n ## alpha1 <- r + 1 ## alpha2 <- n - r + 1 ## da <- r/n ## da.low <- hpd.(qbeta, shape1 = alpha1, shape2 = alpha2)[1] ## da.up <- hpd.(qbeta, shape1 = alpha1, shape2 = alpha2)[2] ## Altman: ## q <- 1 - p ## A <- (2 * r) + (z * z) ## B <- z * sqrt((z * z) + (4 * r * q)) ## C <- 2 * (n + (z * z)) ## da <- p ## da.low <- (A - B) / C ## da.up <- (A + B) / C ## Diagnostic odds ratio (from Scott et al. (2008)): dOR.p <- (a * d) / (b * c) lndOR <- log(dOR.p) lndOR.var <- 1 / a + 1 / b + 1 / c + 1 / d lndOR.se <- sqrt(1 / a + 1 / b + 1 / c + 1 / d) lndOR.l <- lndOR - (z * lndOR.se) lndOR.u <- lndOR + (z * lndOR.se) dOR.se <- exp(lndOR.se) dOR.low <- exp(lndOR.l) dOR.up <- exp(lndOR.u) diag.or <- data.frame(est = dOR.p, lower = dOR.low, upper = dOR.up) ## Number needed to diagnose (from Scott et al. (2008)): nndx.est <- 1 / (se$est - (1 - sp$est)) nndx.1 <- 1 / (se$lower - (1 - sp$lower)) nndx.2 <- 1 / (se$upper - (1 - sp$upper)) nndx.low <- min(nndx.1, nndx.2) nndx.up <- max(nndx.1, nndx.2) nndx <- data.frame(est = nndx.est, lower = nndx.low, upper = nndx.up) ## Youden's index (from Bangdiwala et al. (2008)): c.p <- se$est - (1 - sp$est) c.1 <- se$lower - (1 - sp$lower) c.2 <- se$upper - (1 - sp$upper) c.low <- min(c.1, c.2) c.up <- max(c.1, c.2) youden <- data.frame(est = c.p, lower = c.low, upper = c.up) ## Proportion ruled out: tdat <- as.matrix(cbind((c + d), total)) if(method == "exact"){ trval <- zexact(tdat, conf.level) } if(method == "wilson"){ trval <- zwilson(tdat, conf.level) } if(method == "agresti"){ trval <- zagresti(tdat, conf.level) } if(method == "clopper-pearson"){ trval <- zclopperpearson(tdat, conf.level) } if(method == "jeffreys"){ trval <- zjeffreys(tdat, conf.level) } p.rout <- data.frame(est = trval$est, lower = trval$lower, upper = trval$upper) ## Proportion ruled in: tdat <- as.matrix(cbind((a + b), total)) if(method == "exact"){ trval <- zexact(tdat, conf.level) } if(method == "wilson"){ trval <- zwilson(tdat, conf.level) } if(method == "agresti"){ trval <- zagresti(tdat, conf.level) } if(method == "clopper-pearson"){ trval <- zclopperpearson(tdat, conf.level) } if(method == "jeffreys"){ trval <- zjeffreys(tdat, conf.level) } p.rin <- data.frame(est = trval$est, lower = trval$lower, upper = trval$upper) ## False T+ proportion for true D-, Pr(T+|D-): tdat <- as.matrix(cbind(b, M0)) if(method == "exact"){ trval <- zexact(tdat, conf.level) } if(method == "wilson"){ trval <- zwilson(tdat, conf.level) } if(method == "agresti"){ trval <- zagresti(tdat, conf.level) } if(method == "clopper-pearson"){ trval <- zclopperpearson(tdat, conf.level) } if(method == "jeffreys"){ trval <- zjeffreys(tdat, conf.level) } p.fpos <- data.frame(est = trval$est, lower = trval$lower, upper = trval$upper) p.tpdn <- data.frame(est = trval$est, lower = trval$lower, upper = trval$upper) ## False T- proportion for true D+, Pr(T-|D+): tdat <- as.matrix(cbind(c, M1)) if(method == "exact"){ trval <- zexact(tdat, conf.level) } if(method == "wilson"){ trval <- zwilson(tdat, conf.level) } if(method == "agresti"){ trval <- zagresti(tdat, conf.level) } if(method == "clopper-pearson"){ trval <- zclopperpearson(tdat, conf.level) } if(method == "jeffreys"){ trval <- zjeffreys(tdat, conf.level) } # p.fneg <- data.frame(est = trval$est, lower = trval$lower, upper = trval$upper) p.tndp <- data.frame(est = trval$est, lower = trval$lower, upper = trval$upper) ## False T+ proportion for T+, Pr(D-|T+): tdat <- as.matrix(cbind(b, N1)) if(method == "exact"){ trval <- zexact(tdat, conf.level) } if(method == "wilson"){ trval <- zwilson(tdat, conf.level) } if(method == "agresti"){ trval <- zagresti(tdat, conf.level) } if(method == "clopper-pearson"){ trval <- zclopperpearson(tdat, conf.level) } if(method == "jeffreys"){ trval <- zjeffreys(tdat, conf.level) } p.dntp <- data.frame(est = trval$est, lower = trval$lower, upper = trval$upper) ## False T- proportion for T-, Pr(D+|T-): tdat <- as.matrix(cbind(c, N0)) if(method == "exact"){ trval <- zexact(tdat, conf.level) } if(method == "wilson"){ trval <- zwilson(tdat, conf.level) } if(method == "agresti"){ trval <- zagresti(tdat, conf.level) } if(method == "clopper-pearson"){ trval <- zclopperpearson(tdat, conf.level) } if(method == "jeffreys"){ trval <- zjeffreys(tdat, conf.level) } p.dptn <- data.frame(est = trval$est, lower = trval$lower, upper = trval$upper) rval <- list(ap = ap, tp = tp, se = se, sp = sp, diag.ac = diag.ac, diag.or = diag.or, nndx = nndx, youden = youden, pv.pos = pv.pos, pv.neg = pv.neg, lr.pos = lr.pos, lr.neg = lr.neg, p.rout = p.rout, p.rin = p.rin, p.tpdn = p.tpdn, p.tndp = p.tndp, p.dntp = p.dntp, p.dptn = p.dptn) ## Define tab: r1 <- c(a, b, N1) r2 <- c(c, d, N0) r3 <- c(M1, M0, M0 + M1) tab <- as.data.frame(rbind(r1, r2, r3)) colnames(tab) <- c(" Outcome +", " Outcome -", " Total") rownames(tab) <- c("Test +", "Test -", "Total") tab <- format.data.frame(tab, digits = 3, justify = "right") out <- list(detail = rval, tab = tab, method = method, digits = digits, conf.level = conf.level) class(out) <- "epi.tests" return(out) } ## Print method for epi.tests: print.epi.tests <- function(x, ...) { print(x$tab, ...) cat("\nPoint estimates and ", x$conf.level * 100, "%", " CIs:", sep = "") cat("\n--------------------------------------------------------------") if(x$digits == 2){ tap <- "\nApparent prevalence * %.2f (%.2f, %.2f)" ttp <- "\nTrue prevalence * %.2f (%.2f, %.2f)" tse <- "\nSensitivity * %.2f (%.2f, %.2f)" tsp <- "\nSpecificity * %.2f (%.2f, %.2f)" tppv <- "\nPositive predictive value * %.2f (%.2f, %.2f)" tnpv <- "\nNegative predictive value * %.2f (%.2f, %.2f)" tplr <- "\nPositive likelihood ratio %.2f (%.2f, %.2f)" tnlr <- "\nNegative likelihood ratio %.2f (%.2f, %.2f)" ttpdn <- "\nFalse T+ proportion for true D- * %.2f (%.2f, %.2f)" ttndp <- "\nFalse T- proportion for true D+ * %.2f (%.2f, %.2f)" tdntp <- "\nFalse T+ proportion for T+ * %.2f (%.2f, %.2f)" tdptn <- "\nFalse T- proportion for T- * %.2f (%.2f, %.2f)" tdac <- "\nCorrectly classified proportion * %.2f (%.2f, %.2f)" } else if(x$digits == 3){ tap <- "\nApparent prevalence * %.3f (%.3f, %.3f)" ttp <- "\nTrue prevalence * %.3f (%.3f, %.3f)" tse <- "\nSensitivity * %.3f (%.3f, %.3f)" tsp <- "\nSpecificity * %.3f (%.3f, %.3f)" tppv <- "\nPositive predictive value * %.3f (%.3f, %.3f)" tnpv <- "\nNegative predictive value * %.3f (%.3f, %.3f)" tplr <- "\nPositive likelihood ratio %.3f (%.3f, %.3f)" tnlr <- "\nNegative likelihood ratio %.3f (%.3f, %.3f)" ttpdn <- "\nFalse T+ proportion for true D- * %.3f (%.3f, %.3f)" ttndp <- "\nFalse T- proportion for true D+ * %.3f (%.3f, %.3f)" tdntp <- "\nFalse T+ proportion for T+ * %.3f (%.3f, %.3f)" tdptn <- "\nFalse T- proportion for T- * %.3f (%.3f, %.3f)" tdac <- "\nCorrectly classified proportion * %.3f (%.3f, %.3f)" } else if(x$digits == 4){ tap <- "\nApparent prevalence * %.4f (%.4f, %.4f)" ttp <- "\nTrue prevalence * %.4f (%.4f, %.4f)" tse <- "\nSensitivity * %.4f (%.4f, %.4f)" tsp <- "\nSpecificity * %.4f (%.4f, %.4f)" tppv <- "\nPositive predictive value * %.4f (%.4f, %.4f)" tnpv <- "\nNegative predictive value * %.4f (%.4f, %.4f)" tplr <- "\nPositive likelihood ratio %.4f (%.4f, %.4f)" tnlr <- "\nNegative likelihood ratio %.4f (%.4f, %.4f)" ttpdn <- "\nFalse T+ proportion for true D- * %.4f (%.4f, %.4f)" ttndp <- "\nFalse T- proportion for true D+ * %.4f (%.4f, %.4f)" tdntp <- "\nFalse T+ proportion for T+ * %.4f (%.4f, %.4f)" tdptn <- "\nFalse T- proportion for T- * %.4f (%.4f, %.4f)" tdac <- "\nCorrectly classified proportion * %.4f (%.4f, %.4f)" } with(x$detail, { cat(sprintf(tap, ap$est, ap$lower, ap$upper )) cat(sprintf(ttp, tp$est, tp$lower, tp$upper )) cat(sprintf(tse, se$est, se$lower, se$upper )) cat(sprintf(tsp, sp$est, sp$lower, sp$upper )) cat(sprintf(tppv, pv.pos$est, pv.pos$lower, pv.pos$upper )) cat(sprintf(tnpv, pv.neg$est, pv.neg$lower, pv.neg$upper )) cat(sprintf(tplr, lr.pos$est, lr.pos$lower, lr.pos$upper )) cat(sprintf(tnlr, lr.neg$est, lr.neg$lower, lr.neg$upper )) cat(sprintf(ttpdn, p.tpdn$est, p.tpdn$lower, p.tpdn$upper )) cat(sprintf(ttndp, p.tndp$est, p.tndp$lower, p.tndp$upper )) cat(sprintf(tdntp, p.dntp$est, p.dntp$lower, p.dntp$upper )) cat(sprintf(tdptn, p.dptn$est, p.dptn$lower, p.dptn$upper )) cat(sprintf(tdac, diag.ac$est, diag.ac$lower, diag.ac$upper )) }) cat("\n--------------------------------------------------------------") if(x$method == "exact"){ cmethod <- "* Exact CIs\n" } if(x$method == "wilson"){ cmethod <- "* Wilson CIs\n" } if(x$method == "agresti"){ cmethod <- "* Agresti CIs\n" } if(x$method == "clopper-pearson"){ cmethod <- "* Clopper-Pearson CIs\n" } if(x$method == "jeffreys"){ cmethod <- "* Jeffreys CIs\n" } cat("\n", cmethod, sep = "") } ## Summary method for epi.tests: summary.epi.tests <- function(object, ...) { ## Create a data frame: out <- do.call(rbind, object$detail) ## Return it: return(out) }epiR/R/epi.sssupc.R0000644000176200001440000000542014112021410013554 0ustar liggesusersepi.sssupc <- function(treat, control, sd, delta, n, r = 1, power, nfractional = FALSE, alpha){ # Stop if a negative value for delta entered: if (delta < 0){ stop("For a superiority trial delta must be greater than or equal to zero.") } z.alpha <- qnorm(1 - alpha, mean = 0, sd = 1) if (!is.na(treat) & !is.na(control) & !is.na(delta) & !is.na(power) & is.na(n)) { beta <- (1 - power) z.beta <- qnorm(1 - beta, mean = 0, sd = 1) # http://powerandsamplesize.com/Calculators/Compare-2-Means/2-Sample-Non-Inferiority-or-Superiority: if(nfractional == TRUE){ n.control <- (1 + 1 / r) * (sd * (z.alpha + z.beta) / (treat - control - delta))^2 n.treat <- n.control * r n.total <- n.treat + n.control } if(nfractional == FALSE){ n.control <- ceiling((1 + 1 / r) * (sd * (z.alpha + z.beta) / (treat - control - delta))^2) n.treat <- ceiling(n.control * r) n.total <- n.treat + n.control } rval <- list(n.total = n.total, n.treat = n.treat, n.control = n.control, delta = delta, power = power) } if (!is.na(treat) & !is.na(control) & !is.na(delta) & !is.na(n) & is.na(power) & !is.na(r) & !is.na(alpha)) { # Work out the number of subjects in the control group. r equals the number in the treatment group divided by the number in the control group. if(nfractional == TRUE){ n.control <- 1 / (r + 1) * n n.treat <- n - n.control n.total <- n.treat + n.control } if(nfractional == FALSE){ n.control <- ceiling(1 / (r + 1) * (n)) n.treat <- n - n.control n.total <- n.treat + n.control } z <- (treat - control - delta) / (sd * sqrt((1 + 1 / r) / n.control)) power <- pnorm(z - z.alpha) + pnorm(-z - z.alpha) rval <- list(n.total = n.total, n.treat = n.treat, n.control = n.control, delta = delta, power = power) } rval } # A superiority trial is one where you want to demonstrate that one treatment or intervention is better than another (or better than no treatment/intervention). An equivalence trial is where you want to demonstrate that a new treatment is no better or worse than an existing treatment and non-inferiority is to show that a new treatment is not worse than an existing treatment. # epi.supc(treat = 5, control = 5, sd = 20, delta = 5, n = NA, r = 1, power = 0.80, alpha = 0.05) # 264 patients are required to have a 90% chance of detecting, as significant at the 5% level, an increase in the primary outcome measure from 40 in the control group to 50 in the experimental group. # Reference: Pocock SJ. Clinical Trials: A Practical Approach. Wiley; 1983. # Julious SA. Sample sizes for clinical trials with Normal data. Statist. Med. 2004; 23:1921-1986.epiR/R/epi.blcm.paras.r0000644000176200001440000000071714040011422014324 0ustar liggesusersepi.blcm.paras <- function(ntest.dep = 2, ntest.indep = 1, npop = 2){ ntest.total <- ntest.indep + ntest.dep df <- (2^ntest.total - 1) * npop npar <- (2 * ntest.total + npop) + (ntest.dep^2 - ntest.dep) if(npar < df){ ninf.prior <- 0 } if(npar >= df){ ninf.prior <- npar - df } return(list(ntest.dep = ntest.dep, ntest.indep = ntest.indep, npop = npop, df = df, npar = npar, ninf.prior = ninf.prior)) }epiR/R/epi.cp.R0000644000176200001440000000346713521502104012656 0ustar liggesusersepi.cp <- function(dat){ # Re-write of function following email from Johann Popp 11 October 2017. Function modified to handle one covariate (following email with Mathew Jay) ndat <- data.frame(id = 1:nrow(dat), dat) if (!is.null(dim(ndat[,ncol(ndat):2]))) { # Add an indicator variable for covariate patterns: ndat$indi <- apply(X = ndat[,ncol(ndat):2], MARGIN = 1, FUN = function(x) as.factor(paste(x, collapse = ""))) # Order the data according to the indicator variable. Removed 4 Aug 2018. # ndat <- ndat[order(ndat$indi),] # Create a variable that indicates all the cases of each covariate pattern: cp.id <- tapply(ndat$id, ndat$indi, function(x) paste(x, collapse = ",")) # Create a data frame of unique covariate patterns: cp <- unique(ndat[,2:ncol(ndat)]) n <- as.numeric(unlist(lapply(strsplit(cp.id, ","), length))) id <- tapply(ndat$id, ndat$indi, function(x) (x)[1]) lookup <- data.frame(id = 1:length(n), indi = row.names(id)) cov.pattern <- data.frame(id = 1:length(n), n, cp[,-ncol(cp)]) rownames(cov.pattern) <- rownames(cp) # Create a vector with the covariate pattern for each case: id <- lookup$id[match(ndat$indi, lookup$indi)] # id <- as.numeric(unlist(lapply(strsplit(cp.id, ","), function(x) rep(min(as.numeric(unlist(x))), length(x)))))[order(ndat$id)] list(cov.pattern = cov.pattern, id = id) } else { # ndat <- ndat[order(ndat[2]),] cp.id <- tapply(ndat$id, ndat[2], function(x) paste(x, collapse = ",")) cp <- unique(ndat[, 2:ncol(ndat)]) n <- as.numeric(unlist(lapply(strsplit(cp.id, ","), length))) cov.pattern <- data.frame(id = 1:length(n), n, cp) id <- cov.pattern$id[match(as.vector(as.matrix(ndat[2])), cov.pattern$cp)] } list(cov.pattern = cov.pattern, id = id) } epiR/R/epi.sscompb.R0000644000176200001440000001041414134532434013722 0ustar liggesusers"epi.sscompb" <- function(treat, control, n, power, r = 1, design = 1, sided.test = 2, nfractional = FALSE, conf.level = 0.95) { alpha.new <- (1 - conf.level) / sided.test z.alpha <- qnorm(1 - alpha.new, mean = 0, sd = 1) if (!is.na(treat) & !is.na(control) & !is.na(n) & !is.na(power)){ stop("Error: at least one of treat, n and power must be NA.") } # Sample size. if (!is.na(treat) & !is.na(control) & is.na(n) & !is.na(power)) { z.beta <- qnorm(power, mean = 0, sd = 1) delta <- abs(treat - control) lambda <- treat / control # Woodward (2014) page 312, n <- (1 / delta^2) * ((z.alpha * sqrt(treat * (1 - treat))) + (z.beta * sqrt(control * (1 - control))))^2 # From Woodward's spreadsheet. Changed 130814: # lambda <- treat / control # Pc <- control * (r * lambda + 1) / (r + 1) # T1 <- (r + 1) / (r * (lambda - 1)^2 * control^2) # T2 <- (r + 1) * Pc *(1 - Pc) # T3 <- lambda * control * (1 - lambda * control) + r * control * (1 - control) # n <- T1 * (z.alpha * sqrt(T2) + z.beta * sqrt(T3))^2 if(nfractional == TRUE){ n1 <- n / (r + 1) n1 <- n1 * design n0 <- r * n1 n.total <- n1 + n0 } if(nfractional == FALSE){ n1 <- n / (r + 1) n1 <- ceiling(n1 * design) n0 <- ceiling(r * n1) n.total <- n1 + n0 } rval <- list(n.total = n.total, n.treat = n1, n.control = n0, power = power, lambda = sort(c(lambda, 1 / lambda))) } # Power. else if (!is.na(treat) & !is.na(control) & !is.na(n) & is.na(power)) { if(nfractional == TRUE){ n1 <- n / (r + 1) n1 <- n1 * design n0 <- r * n1 n.total <- n1 + n0 } if(nfractional == FALSE){ n1 <- n / (r + 1) n1 <- ceiling(n1 * design) n0 <- ceiling(r * n1) n.total <- n1 + n0 } # From Woodward's spreadsheet. Changed 130814: # lambda <- control / treat # Pc <- treat * (r * lambda + 1) / (r + 1) # T1 <- ifelse(lambda >= 1, treat * (lambda - 1) * sqrt(n * r), treat * (1 - lambda) * sqrt(n * r)) # T2 <- z.alpha * (r + 1) * sqrt(Pc * (1 - Pc)) # T3 <- (r + 1) * (lambda * treat * (1 - lambda * treat) + r * treat * (1 - treat)) # z.beta <- (T1 - T2) / sqrt(T3) delta <- abs(treat - control) lambda <- treat / control z.beta <- ((delta * sqrt(n)) - (z.alpha * sqrt(treat * (1 - treat)))) / (sqrt(control * (1 - control))) power <- pnorm(z.beta, mean = 0, sd = 1) rval <- list(n.total = n.total, n.treat = n1, n.control = n0, power = power, lambda = sort(c(lambda, 1 / lambda))) } # Lambda: else if (is.na(treat) & !is.na(control) & !is.na(n) & !is.na(power)) { z.beta <- qnorm(power, mean = 0, sd = 1) if(nfractional == TRUE){ n1 <- n / (r + 1) n1 <- n1 * design n0 <- r * n1 n.total <- n1 + n0 } if(nfractional == FALSE){ n1 <- n / (r + 1) n1 <- ceiling(n1 * design) n0 <- ceiling(r * n1) n.total <- n1 + n0 } # Here we use the formulae for study power (Woodward page 312) and then solve for treat which then allows us to calculate lambda. Pfun <- function(treat, control, n, r, z.alpha) { delta <- treat - control lambda <- treat / control z.beta <- ((delta * sqrt(n)) - (z.alpha * sqrt(treat * (1 - treat)))) / (sqrt(control * (1 - control))) # lambda <- control / treat # Pc <- treat * (r * lambda + 1) / (r + 1) # T1 <- treat * (lambda - 1) * sqrt(n * r) # T2 <- z.alpha * (r + 1) * sqrt(Pc * (1 - Pc)) # T3 <- (r + 1) * (lambda * treat * (1 - lambda * treat) + r * treat * (1 - treat)) # z.beta <- (T1 - T2) / sqrt(T3) pnorm(z.beta, mean = 0, sd = 1) - power } etreat <- uniroot(Pfun, control = control, n = n, r = r, z.alpha = z.alpha, interval = c(1e-06, 1))$root rval <- list(n.total = n.total, n.treat = n1, n.control = n0, power = power, lambda = sort(c(etreat / control, control / etreat))) } rval } epiR/R/zagresti.R0000644000176200001440000000074014136475426013342 0ustar liggesuserszagresti <- function(dat, conf.level){ # From RSurveillance function binom.agresti: tails <- 2 a <- dat[,1] n <- dat[,2] z.conf <- stats::qnorm(1 - (1 - conf.level) / tails, 0, 1) a.ac <- a + z.conf^2 / 2 n.ac <- n + z.conf^2 p.ac <- a.ac / n.ac q.ac <- 1 - p.ac low <- p.ac - z.conf * (p.ac * q.ac)^0.5 * n.ac^-0.5 upp <- p.ac + z.conf * (p.ac * q.ac)^0.5 * n.ac^-0.5 rval <- data.frame(est = p.ac, lower = low, upper = upp) rval }epiR/R/zsep.hypergeo.R0000644000176200001440000000023213635363122014300 0ustar liggesuserszsep.hypergeo <- function(N, n, d, se = 1) { d <- pmin(N, d) sep <- 1 - (1 - se * n / N)^d sep[n == 0] <- 0 sep[n > N] <- NA return(sep) }epiR/R/epi.empbayes.R0000644000176200001440000000150513117711456014065 0ustar liggesusers"epi.empbayes" <- function(obs, pop){ # gamma: mean of rate # phi: variance of rate gamma <- (sum(obs)) / (sum(pop)) rate <- obs / pop sum.pop <- sum(pop) phi.left <- sum(pop * (rate - gamma)^2) / sum.pop phi.right <- gamma / mean(pop) phi <- phi.left - phi.right # The convention is that phi = 0 whenever the above expression is negative. phi <- ifelse(phi < 0, 0, phi) emp <- ((phi * (rate - gamma)) / (phi + (gamma / pop))) + gamma # gamma = nu / alpha # phi = nu / alpha^2 alpha <- gamma / phi nu <- gamma^2 / phi inv.nu <- 1 / nu rval <- data.frame(gamma, phi, alpha, nu, inv.nu) names(rval) <- c("gamma (mean)", "phi (variance)", "alpha (shape)", "nu (scale)", "inv.nu (rate)") unlist(rval) } epiR/R/rsu.sep.rb2rf.R0000644000176200001440000000300013741154070014103 0ustar liggesusersrsu.sep.rb2rf <- function(N, n, rr1, ppr1, rr2, ppr2, pstar, se.u, method = "binomial") { if(method == "binomial") {ar1 <- rsu.adjrisk(rr1, ppr1) ar2 <- array(0, dim = dim(rr2)) rownames(ar2) <- paste("RR1",1:length(rr1), se.p = "=") colnames(ar2) <- paste("RR2",1:ncol(rr2), se.p = "=") epi <- ar2 p.neg <- ar2 if(length(se.u) == 1) se.u <- array(se.u, dim = dim(rr2)) for (i in 1:length(rr1)){ ar2[i,]<- rsu.adjrisk(rr2[i,], ppr2[i,]) epi[i,]<- ar1[i] * ar2[i,] * pstar p.neg[i,] <- (1 - epi[i,] * se.u[i,])^n[i,] } se.p <- 1 - prod(p.neg) rval <- list(se.p = se.p, epi = epi, adj.risk1 = ar1, adj.risk2 = ar2) } else if(method == "hypergeometric") {ppr1 <- rowSums(N) / sum(N) ppr2 <- array(0, dim = dim(rr2)) rownames(ppr2)<- paste("RR1",1:length(rr1), se.p = "=") colnames(ppr2)<- paste("RR2",1:ncol(rr2), se.p = "=") ar1 <- rsu.adjrisk(rr1, ppr1) ar2 <- array(0, dim = dim(rr2)) rownames(ar2) <- rownames(ppr2) colnames(ar2) <- colnames(ppr2) epi <- ar2 p.neg <- ar2 if (length(se.u) == 1) se.u <- array(se.u, dim = dim(rr2)) for (i in 1:length(rr1)){ ppr2[i,] <- N[i,] / sum(N[i,]) ar2[i,] <- rsu.adjrisk(rr2[i,], ppr2[i,]) epi[i,] <- ar1[i] * ar2[i,] * pstar p.neg[i,] <- (1 - se.u[i,] * n[i,] / N[i,])^(epi[i,] * N[i,]) } se.p <- 1 - prod(p.neg) rval <- list(se.p = se.p, epi = epi, adj.risk1 = ar1, adj.risk2 = ar2) } rval }epiR/R/zRRscore.R0000644000176200001440000000665213746131220013256 0ustar liggesuserszRRscore <- function(dat, conf.level){ N. <- 1 - ((1 - conf.level) / 2) z <- qnorm(N., mean = 0, sd = 1) a <- as.numeric(dat[1]); b <- as.numeric(dat[3]); c <- as.numeric(dat[2]); d <- as.numeric(dat[4]) N1 <- a + b; N0 <- c + d scRR.p <- (a / N1) / (c / N0) if ((c == 0) && (a == 0)){ ul = Inf ll = 0 } else{ a1 = N0 * (N0 * (N0 + N1) * a + N1 * (N0 + a) * (z^2)) a2 = -N0 * (N0 * N1 * (c + a) + 2 * (N0 + N1) * c * a + N1 * (N0 + c + 2 * a) * (z^2)) a3 = 2 * N0 * N1 * c * (c + a) + (N0 + N1) * (c^2) * a + N0 * N1 * (c + a) * (z^2) a4 = -N1 * (c ^ 2) * (c + a) b1 = a2 / a1 b2 = a3 / a1 b3 = a4 / a1 c1 = b2 - (b1^2) / 3 c2 = b3 - b1 * b2 / 3 + 2 * (b1^3) / 27 ceta = acos(sqrt(27) * c2 / (2 * c1 * sqrt(-c1))) t1 = -2 * sqrt(-c1 / 3) * cos(pi / 3 - ceta / 3) t2 = -2 * sqrt(-c1 / 3) * cos(pi / 3 + ceta / 3) t3 = 2 * sqrt(-c1 / 3) * cos(ceta / 3) p01 = t1 - b1 / 3 p02 = t2 - b1 / 3 p03 = t3 - b1 / 3 p0sum = p01 + p02 + p03 p0up = min(p01, p02, p03) p0low = p0sum - p0up - max(p01, p02, p03) if( (c == 0) && (a != 0) ){ ll = (1 - (N1 - a) * (1 - p0low) / (c + N1 - (N0 + N1) * p0low)) / p0low ul = Inf } else if((c != N0) && (a == 0)){ ul = (1 - (N1 - a) * (1 - p0up) / (c + N1 - (N0 + N1) * p0up)) / p0up ll = 0 } else if((c == N0) && (a == N1)){ ul = (N0 + z^2) / N0 ll = N1 / (N1 + z^2) } else if((a == N1) || (c == N0)){ if((c == N0) && (a == 0)) {ll = 0} if((c == N0) && (a != 0)) { phat1 = c / N0 phat2 = a / N1 phihat = phat2 / phat1 phil = 0.95 * phihat chi2 = 0 while (chi2 <= z){ a = (N0 + N1) * phil b = -((c + N1) * phil + a + N0) c = c + a p1hat = (-b - sqrt(b^2 -4 * a * c)) / (2 * a) p2hat = p1hat * phil q2hat = 1 - p2hat var = (N0 * N1 * p2hat) / (N1 * (phil - p2hat) + N0 * q2hat) chi2 = ((a - N1 * p2hat) / q2hat) / sqrt(var) ll = phil phil = ll / 1.0001}} i = c j = a ni = N0 nj = N1 if(a == N1){ i = a j = c ni = N1 nj = N0 } phat1 = i / ni phat2 = j / nj phihat = phat2 / phat1 phiu = 1.1 * phihat if((c == N0) && (a == 0)) { if(N0 < 100) {phiu = 0.01} else {phiu = 0.001} } chi1 = 0 while (chi1 >= -z){ a. = (ni + nj) * phiu b. = -((i + nj) * phiu + j + ni) c. = i + j p1hat = (-b. - sqrt(b.^2 - 4 * a. * c.)) / (2 * a.) p2hat = p1hat * phiu q2hat = 1 - p2hat var = (ni * nj * p2hat) / (nj * (phiu - p2hat) + ni * q2hat) chi1 = ((j - nj * p2hat) / q2hat) / sqrt(var) phiu1 = phiu phiu = 1.0001 * phiu1 } if(a == N1) { ul = (1 - (N1 - a) * (1 - p0up) / (c + N1 - (N0 + N1) * p0up)) / p0up ll = 1 / phiu1 } else{ul = phiu1} } else{ ul = (1 - (N1 - a) * (1 - p0up) / (c + N1 - (N0 + N1) * p0up)) /p0up ll = (1 - (N1 - a) * (1 - p0low) / (c + N1 - (N0 + N1) * p0low)) / p0low } } c(scRR.p, ll, ul) }epiR/R/epi.pooled.R0000644000176200001440000000051413117711464013540 0ustar liggesusersepi.pooled <- function(se, sp, P, m, r){ # Herd specificity: PlSp <- sp^m HSp <- (PlSp)^r # Herd sensitivity: HSe <- 1 - ((1 - (1 - P)^m) * (1 - se) + (1 - P)^m * PlSp)^r # Herd level apparent prevalence: HAPneg <- 1 - HSp rval <- list(HAPneg = HAPneg, HSe = HSe, HSp = HSp) rval } epiR/R/epi.prev.R0000644000176200001440000002360613370265524013243 0ustar liggesusers"epi.prev" <- function(pos, tested, se, sp, method = "wilson", units = 100, conf.level = 0.95){ # Confidence intervals: if(method == "c-p") ap.cl <- tp.cl <- .bin.ci(x = pos, n = tested, method = "exact", alpha = 1 - conf.level) else if (method == "sterne") ap.cl <- tp.cl <- .sterne.ci(x = pos, n = tested, alpha = 1 - conf.level) else if (method == "blaker") ap.cl <- tp.cl <- .blaker.ci(x = pos, n = tested, conf.level) else if (method == "wilson") ap.cl <- tp.cl <- .bin.ci(x = pos, n = tested, method = "wilson", alpha = 1 - conf.level) else stop('Valid methods are "c-p", "sterne", "blaker", or "wilson"') # Apparent prevalence: ap.p <- pos / tested # True prevalence: tp.p <- (ap.p + sp - 1) / (se + sp - 1) # The next two lines commented out 4 Nov 2018. Report TP estimates out of 0-1 range and issue warning. # tp.p[tp.p < 0] <- 0 # tp.p[tp.p > 1] <- 1 tp.cl <- (tp.cl + sp - 1) / (se + sp - 1) # The next two lines commented out 4 Nov 2018. Report TP estimates out of 0-1 range and issue warning. # tp.cl[tp.cl < 0] <- 0 # tp.cl[tp.cl > 1] <- 1 # tp.cl <- pmax(tp.cl, c(0, 0)) # tp.cl <- pmin(tp.cl, c(1, 1)) if(length(pos) == 1){ if(ap.p < (1 - sp)) warning('Apparent prevalence is less than (1 - Sp). Rogan Gladen estimate of true prevalence invalid.') if(ap.p > se) warning('Apparent prevalence greater than Se. Rogan Gladen estimate of true prevalence invalid.') result.01 <- data.frame(est = ap.p * units, lower = ap.cl[1] * units, upper = ap.cl[2] * units) result.02 <- data.frame(est = tp.p * units, lower = tp.cl[1] * units, upper = tp.cl[2] * units) } if(length(pos) > 1){ id <- ap.p < (1 - sp) if(sum(id) > 0) warning('At least one apparent prevalence is less than (1 - Sp). Rogan Gladen estimate of true prevalence invalid.') ie <- (ap.p > se) if(sum(ie) > 0) warning('At least one apparent prevalence greater than Se. Rogan Gladen estimate of true prevalence invalid.') result.01 <- data.frame(est = ap.p * units, lower = ap.cl[,1] * units, upper = ap.cl[,2] * units) result.02 <- data.frame(est = tp.p * units, lower = tp.cl[,1] * units, upper = tp.cl[,2] * units) } rval <- list(ap = result.01, tp = result.02) return(rval) } # ----------------------------------- # Exact confidence intervals # ----------------------------------- # Binomial confidence intervals: .bin.ci <- function (x, n, alpha, method = c("wilson", "exact", "asymptotic", "all"), return.df = FALSE){ method <- match.arg(method) bc <- function(x, n, alpha, method) { nu1 <- 2 * (n - x + 1) nu2 <- 2 * x ll <- if (x > 0) x / (x + qf(1 - alpha/2, nu1, nu2) * (n - x + 1)) else 0 nu1p <- nu2 + 2 nu2p <- nu1 - 2 pp <- if (x < n) qf(1 - alpha/2, nu1p, nu2p) else 1 ul <- ((x + 1) * pp)/(n - x + (x + 1) * pp) zcrit <- -qnorm(alpha/2) z2 <- zcrit * zcrit p <- x/n cl <- (p + z2/2/n + c(-1, 1) * zcrit * sqrt((p * (1 - p) + z2/4/n)/n))/(1 + z2/n) if (x == 1) cl[1] <- -log(1 - alpha)/n if (x == (n - 1)) cl[2] <- 1 + log(1 - alpha)/n asymp.lcl <- x/n - qnorm(1 - alpha/2) * sqrt(((x/n) * (1 - x/n)) / n) asymp.ucl <- x/n + qnorm(1 - alpha/2) * sqrt(((x/n) * (1 - x/n)) / n) res <- rbind(c(ll, ul), cl, c(asymp.lcl, asymp.ucl)) res <- cbind(rep(x/n, 3), res) switch(method, wilson = res[2, ], exact = res[1, ], asymptotic = res[3,], all = res, res) } if ((length(x) > 1 | length(n) > 1) & method == "all") { method <- "wilson" warning("method = 'all' will not work with vectors ... setting method to wilson") } if (length(x) == 1 & length(n) == 1 & method == "all") { mat <- bc(x, n, alpha, method) dimnames(mat) <- list(c("Exact", "Wilson", "Asymptotic"), c("PointEst", "Lower", "Upper")) # if (include.n) # mat <- cbind(N = n, mat) # if (include.x) # mat <- cbind(X = x, mat) # if (return.df) mat <- as.data.frame(mat) return(mat) } mat <- matrix(ncol = 3, nrow = length(x)) for (i in 1:length(x)) mat[i,] <- bc(x[i], n[i], alpha = alpha, method = method) mat <- mat[,2:3] mat } # Sterne confidence intervals: .sterne.ci <- function(x, n, alpha, del = 10^-5){ lower <- c(); upper <- c() for(i in 1:length(x)){ # Lower bound a_alpha^st(X) if (x[i] == 0){tlower <- 0} else { J <- c(0:(x[i] - 1), (x[i] + 1):n[i]) k1 <- min(J) pi1 <- .piXeta(x. = x[i], n. = n[i], eta = .theta(k = k1, x. = x[i], n. = n[i])) # Calculation of k_alpha(X) if (pi1 >= alpha){kal <- k1} else { k <- x[i] - 1 while (k1 < k - 1){ k2 <- floor((k + k1) / 2) pi2 <- .piXeta(x. = x[i], n. = n[i], eta = .theta(k = k2, x. = x[i], n. = n[i])) if (pi2 >= alpha){k <- k2} else {k1 <- k2} } kal <- k } # Calculation of a_alpha^st(X): b1 <- .theta(k = kal, x. = x[i], n. = n[i]) pi1 <- 1 - .Feta(y. = x[i] - 1, n. = n[i], eta = b1) + .Feta(y. = kal - 1, n. = n[i], eta = b1) if (pi1 <= alpha){b <- b1} else { b <- max(.theta(k = kal - 1, x. = x[i], n. = n[i]), .logit(del)) pi <- 1 - .Feta(y. = x[i] - 1, n. = n[i], eta = b) + .Feta(y. = kal - 1, n. = n[i], eta = b) while (b1 - b > del || pi1 - pi > del){ b2 <- (b + b1) / 2 pi2 <- 1 - .Feta(y. = x[i] - 1, n. = n[i], eta = b2) + .Feta(y. = kal - 1, n. = n[i], eta = b2) if (pi2 > alpha){ b1 <- b2 pi1 <- pi2} else { b <- b2 pi <- pi2}}} tlower <- .invlogit(b) } # Upper bound b_alpha^st(X): if (x[i] == n[i]){tupper <- 1} else { J <- c(0:(x[i] - 1),(x[i] + 1):n[i]) k1 <- max(J) pi1 <- .piXeta(x. = x[i], n. = n[i], eta = .theta(k = k1, x. = x[i], n. = n[i])) # Calculation of k_alpha(X): if (pi1 >= alpha){kau <- k1} else { k <- x[i] + 1 pi <- 1 while (k1 > k + 1){ k2 <- floor((k + k1) / 2) pi2 <- .piXeta(x. = x[i], n. = n[i], eta = .theta(k = k2, x. = x[i], n. = n[i])) if (pi2 >= alpha){k <- k2} else {k1 <- k2} } kau <- k } # Calculation of b_alpha^st(X): b1 <- .theta(k = kau, x. = x[i], n. = n[i]) pi1 <- 1 - .Feta(y. = kau, n. = n[i], eta = b1) + .Feta(y. = x[i], n. = n[i], eta = b1) if (pi1 <= alpha){ b <- b1 po <- pi1} else { b <- min(.theta(k = kau + 1, x. = x[i], n. = n[i]), b1 + n[i]) pi <- 1 - .Feta(y. = kau, n. = n[i], eta = b) + .Feta(y. = x[i], n. = n[i], eta = b) while (b - b1 > del || pi1 - pi > del){ b2 <- (b + b1) / 2 pi2 <- 1 - .Feta(y. = kau, n. = n[i], eta = b2) + .Feta(y. = x[i], n. = n[i], eta = b2) if (pi2 > alpha){ b1 <- b2 pi1 <- pi2} else { b <- b2 pi <- pi2}}} tupper <- .invlogit(b) } # c("a_alpha^St" = pu, "b_alpha^St" = po) lower <- c(lower, tlower) upper <- c(upper, tupper) } rval <- data.frame(lower = lower, upper = upper) return(rval) } # Blaker confidence intervals: .blaker.ci <- function(x, n, conf.level, tolerance = 1e-04){ lower <- c(); upper <- c() for(i in 1:length(x)){ tlower = 0; tupper = 1 if (x[i] != 0){ tlower = qbeta((1 - conf.level) / 2, x[i], n[i] - x[i] + 1) while (.acceptbin(x. = x[i], n. = n[i], p = tlower + tolerance) < (1 - conf.level)) tlower = tlower + tolerance } if (x[i] != n[i]){ tupper = qbeta(1 - (1 - conf.level) / 2, x[i] + 1, n[i] - x[i]) while (.acceptbin(x. = x[i], n. = n[i], p = tupper - tolerance) < (1 - conf.level)) tupper = tupper - tolerance } lower <- c(lower, tlower) upper <- c(upper, tupper) } rval <- data.frame(lower = lower, upper = upper) return(rval) } # Support functions: .acceptbin = function(x., n., p){ # Computes the Blaker acceptability of p when x is observed and X is bin(n, p) p1 = 1 - pbinom(q = (x. - 1), size = n., prob = p) p2 = pbinom(q = x., size = n., prob = p) a1 = p1 + pbinom(q = (qbinom(p = p1, size = n., prob = p) - 1), size = n., prob = p) a2 = p2 + 1 - pbinom(q = qbinom(p = (1 - p2), size = n., prob = p), size = n., prob = p) return(min(a1, a2)) } .logit <- function(p){log(p / (1 - p))} .invlogit <- function(y){exp(y) / (1 + exp(y))} .theta <- function(k, x., n.){(lchoose(n., x.) - lchoose(n., k)) / (k - x.)} .Feta <- function(y., n., eta){pbinom(y., n., .invlogit(eta))} # The function piXeta(x, eta) automatically accounts for the fact that if k_alpha(X) = min(J) then a_alpha^st(X) = a_alpha(X) .piXeta <- function(x., n., eta){ if (.invlogit(eta) >= 1){f <- 0} else { J <- c(0:(x. - 1),(x. + 1):n.) # on (-infinity, theta_0] t1 <- .theta(0, x., n.) if (is.na(t1) != 1 && eta <= t1){f <- 1 - .Feta(y. = x. - 1, n. = n., eta = eta)} # on [theta_0,mode] k1 <- J[J < (x. - 1)] if (length(k1) > 0){ the1 <- .theta(k1, x., n.) the2 <- .theta(k1 + 1, x., n.) pos <- (the1 <= eta) * (eta < the2) if (sum(pos) > 0){f <- 1 - .Feta(y. = x. - 1, n., eta) + .Feta(y. = max(k1 * pos), n., eta)} } # mode the1 <- .theta(x. - 1, x., n.) the2 <- .theta(x. + 1, x., n.) if (eta >= the1 && eta <= the2){f <- 1} } # on [mode,theta_n] k2 <- J[J > (x. + 1)] if (length(k2) > 0){ the1 <- .theta(k2 - 1, x., n.) the2 <- .theta(k2, x., n.) kre <- sum(k2 * (the1 < eta) * (eta <= the2)) if (kre > 0){ f <- 1 - .Feta(y. = kre - 1, n., eta) + .Feta(y. = x., n., eta)} } # on [theta_n,infty) t2 <- .theta(n., x., n.) if (is.na(t2) != 1 && eta >= t2){f <- .Feta(y. = x., n., eta)} f} epiR/R/zsep.binom.imperfect.R0000644000176200001440000000024613634771164015554 0ustar liggesuserszsep.binom.imperfect <- function(n, c, se, sp, pstar) { P.Pos <- pstar * se + (1 - pstar) * (1 - sp) sep <- 1 - stats::pbinom(c - 1, n, P.Pos) return(sep) }epiR/R/zget.cp.R0000644000176200001440000000041113634771556013064 0ustar liggesuserszget.cp <- function(n.cp, se, sp, type2){ cp <- 0 SpH <- 0 while (SpH < 1 - type2) { cp <- cp + 1 # Probability of observed result from disease-free population: SpH <- zsph.binom(n.cp, cp, sp) } return (list("cp" = cp, "SpH" = SpH)) }epiR/R/epi.ssninfc.R0000644000176200001440000000514214112023762013714 0ustar liggesusersepi.ssninfc <- function(treat, control, sd, delta, n, r = 1, power, nfractional = FALSE, alpha){ # Stop if a negative value for delta entered: if (delta < 0){ stop("For a non-inferiority trial delta must be greater than or equal to zero.") } z.alpha <- qnorm(1 - alpha, mean = 0, sd = 1) if (!is.na(treat) & !is.na(control) & !is.na(delta) & !is.na(power) & is.na(n)) { # delta equals the max absolute tolerable difference between treat and control. # Make delta negative: ndelta <- -delta beta <- (1 - power) z.beta <- qnorm(1 - beta, mean = 0, sd = 1) # http://powerandsamplesize.com/Calculators/Compare-2-Means/2-Sample-Non-Inferiority-or-Superiority: # Aniko Szabo 230821: Add check for non-existent solution: if (sign(z.alpha + z.beta) != sign(treat - control - ndelta)){ stop("Target power is not reachable. Check the exact specification of the hypotheses.") } n.control <- (1 + 1 / r) * (sd * (z.alpha + z.beta) / (treat - control - ndelta))^2 n.treat <- n.control * r if(nfractional == TRUE){ n.control <- n.control n.treat <- n.treat n.total <- n.treat + n.control } if(nfractional == FALSE){ n.control <- ceiling(n.control) n.treat <- ceiling(n.treat) n.total <- n.treat + n.control } rval <- list(n.total = n.total, n.treat = n.treat, n.control = n.control, delta = delta, power = power) } if (!is.na(treat) & !is.na(control) & !is.na(delta) & !is.na(n) & is.na(power) & !is.na(r) & !is.na(alpha)) { # delta equals the max absolute tolerable difference between treat and control. # Make delta negative: ndelta <- -delta # Work out the number of subjects in the control group. r equals the number in the treatment group divided by the number in the control group. if(nfractional == TRUE){ n.control <- 1 / (r + 1) * n n.treat <- n - n.control n.total <- n.treat + n.control } if(nfractional == FALSE){ n.control <- ceiling(1 / (r + 1) * n) n.treat <- n - n.control n.total <- n.treat + n.control } z <- (treat - control - ndelta) / (sd * sqrt((1 + 1 / r) / n.control)) # Aniko Szabo 230821 - use only one tail: power <- pnorm(z - z.alpha, mean = 0, sd = 1) # Original code: # power <- pnorm(z - z.alpha, mean = 0, sd = 1) + pnorm(-z - z.alpha, mean = 0, sd = 1) rval <- list(n.total = n.total, n.treat = n.treat, n.control = n.control, delta = delta, power = power) } rval } epiR/R/rsu.sep.rsfreecalc.R0000644000176200001440000000211113752202402015174 0ustar liggesusersrsu.sep.rsfreecalc <- function(N, n, c = 1, pstar, se.u, sp.u) { if(!is.na(N)){ d <- round(max(1, N * pstar), digits = 0) # y cannot be greater than d or n: maxy <- min(d, n) prod <- 0 # At cutpoint c, the herd is positive: for (x in 0:(c - 1)) { for (y in 0:maxy) { if ((d >= y) * ((N - d) >= (n - y))) { minxy <- min(x, y) fact <- 0 for (j in 0:minxy) { # Avoid illegal ranges: if ((y >= j) * ((n - y) >= (x - j)) * (d >= y) * (N >= n)) { fact <- fact + choose(y, j) * se.u^j *(1 - se.u)^(y - j) * choose(n - y, x - j) * (1 - sp.u)^(x - j) * sp.u^(n - x - y + j) } else {fact <- 0} } } else { fact <- 0} newprod <- stats::dhyper(x = y, m = d, n = N - d, k = n, log = FALSE) * fact prod <- prod + newprod } } sep <- 1 - prod return(sep) } else if(is.na(N)){ P.Pos <- pstar * se.u + (1 - pstar) * (1 - sp.u) sep <- 1 - stats::pbinom(c - 1, n, P.Pos) return(sep) } } epiR/R/rsu.sep.rbvarse.r0000644000176200001440000000063413741153426014610 0ustar liggesusersrsu.sep.rbvarse <- function(N, rr, df, pstar){ ppr <- N / sum(N) epi<- rsu.epinf(pstar, rr, ppr) n <- numeric(length(rr)) se <- n for(r in 1:length(rr)){ n[r] <- sum(df[df[,1] == r, 3]) se[r] <- mean(df[df[,1] == r, 2]) } p.all.neg <- (1 - se * n/N)^(epi[[1]] * N) sep <- 1 - prod(p.all.neg) return(list(sep = sep, epi = epi[[1]], adj.risk = epi[[2]], n = n, se = se)) }epiR/R/epi.2by2.R0000644000176200001440000046553614142470724013057 0ustar liggesusers"epi.2by2" <- function(dat, method = "cohort.count", digits = 2, conf.level = 0.95, units = 100, interpret = FALSE, outcome = "as.columns"){ # Stop if invalid number of digits: if(digits != 2 & digits != 3 & digits != 4) stop("Argument 'digits' for this function must take the value of 2, 3 or 4.") ## Elwoood JM (1992). Causal Relationships in Medicine - A Practical System for Critical Appraisal. Oxford Medical Publications, London, p 266 - 293. ## Rothman KJ (2002). Epidemiology An Introduction. Oxford University Press, London, p 130 - 143. ## Hanley JA (2001). A heuristic approach to the formulas for population attributable fraction. J. Epidemiol. Community Health 55: 508 - 514. ## Jewell NP (2004). Statistics for Epidemiology. Chapman & Hall/CRC, New York, p 84 - 85. ## Incidence risk in exposed: IRiske ## Incidence risk in unexposed: IRisko ## Incidence risk in population: IRpop ## Incidence rate in exposed: IRatee ## Incidence rate in unexposed: IRateo ## Incidence rate in population: IRatepop ## Odds in exposed: Oe ## Odds in unexposed: Oo ## Odds in population: Opop ## Incidence risk ratio: RR.p ## Incidence rate ratio: IRR.p ## Odds ratio: OR.p ## Attributable risk: ARisk.p ## Attributable rate: ARate.p ## Attributable fraction risk data: AFRisk.p ## Attributable fraction rate data: AFRate.p ## Estimated attributable fraction: AFest.p ## Population attributable risk: PARisk.p ## Population attributable rate: PARate.p ## Population attributable fraction risk data: PAFRisk.p ## Population attributable fraction rate data: PAFRate.p ## Crude incidence risk ratio (strata): cRR.p ## Crude incidence rate ratio (strata): cIRR.p ## Crude incidence odds ratio (strata): cOR.p ## Crude attributable risk (strata): cARisk.p ## Crude attributable rate (strata): cARate.p ## Summary incidence risk ratio: sRR.p ## Summary incidence rate ratio: sIRR.p ## Summary incidence odds ratio: sOR.p ## Summary attributable risk: sARisk.p ## Summary attributable rate: sARate.p ## Reporting - method == cohort.count: ## Inc risk ratio; odds ratio ## Attributable risk; attributable risk in population ## Attributable fraction in exposed; attributable fraction in population ## Reporting - method == cohort.time: ## Inc rate ratio ## Attributable rate; attributable rate in population ## Attributable fraction in exposed; attributable fraction in population ## Reporting - method == case.control: ## Odds ratio ## Attributable prevalence; attributable prevalence in population ## Attributable fraction (est) in exposed; attributable fraction (est) in population ## Reporting - method == cross.sectional: ## Prevalence ratio; odds ratio ## Attributable prevalence; attributable prevalence in population ## Attributable fraction in exposed; attributable fraction in population # If dat is a dplyr object and there's three columns re-jig into a conventional R table: id <- class(dat) == "grouped_df" | class(dat) == "tbl_df" | class(dat) == "tbl" | class(dat) == "data.frame" # Number of columns: dim <- ifelse(is.null(dim(dat)[2]), 0, dim(dat)[2]) if(dim == 3 & sum(id) == 4){ # Assign names: names(dat) <- c("exp","out","n") # Counts are in column 3. Must be numeric: if(!is.numeric(dat$n)) stop('Column 3 (cell frequencies) must be integer.') # Exposure variable column 1. Must be a factor: if(!is.factor(dat$exp)) stop('Column 1 (exposure) must be a factor.') # Outcome variable column 2. Must be a factor: if(!is.factor(dat$out)) stop('Column 2 (outcome) must be a factor.') dat <- xtabs(n ~ exp + out, data = dat) } # If dat is a dplyr object and there's four columns re-jig into a conventional R table: id <- class(dat) == "grouped_df" | class(dat) == "tbl_df" | class(dat) == "tbl" | class(dat) == "data.frame" if(dim == 4 & sum(id) == 4){ # Assign names: names(dat) <- c("conf","exp","out","n") # Counts are in column 4. Must be numeric: if(!is.numeric(dat$n)) stop('Column 4 (cell frequencies) must be integer.') # Confounder variable column 1. Must be a factor: if(!is.factor(dat$conf)) stop('Column 1 (confounder) must be a factor.') # Exposure variable column 2. Must be a factor: if(!is.factor(dat$exp)) stop('Column 2 (exposure) must be a factor.') # Outcome variable column 3. Must be a factor: if(!is.factor(dat$out)) stop('Column 3 (outcome) must be a factor.') # Re-jig data as a conventional R table: dat <- xtabs(n ~ exp + out + conf, data = dat) } # If dat vector of length 4 (i.e. cell frequencies) re-jig into a conventional R table: if(length(dat) == 4 & is.vector(dat) == TRUE){ dat <- as.table(matrix(dat, nrow = 2, byrow = TRUE)) } ## If outcome is assigned by column, leave the data as is: if(outcome == "as.columns"){ dat <- dat} ## If outcome is assigned by row, transpose it: if(outcome == "as.rows"){ dat <- t(dat)} ## Make a copy of the original data. These values used when sums of cells across all strata are greater than zero but some strata contain zero cell frequencies: if(length(dim(dat)) == 2){ a <- dat[1]; A <- a b <- dat[3]; B <- b c <- dat[2]; C <- c d <- dat[4]; D <- d } if(length(dim(dat)) > 2){ a <- dat[1,1,]; A <- a b <- dat[1,2,]; B <- b c <- dat[2,1,]; C <- c d <- dat[2,2,]; D <- d } # Commented this section out 100617. The CI methods that are used are robust to zero cell frequencies. # Test each strata for zero values. Add 0.5 to all cells if any cell has a zero value: # for(i in 1:length(a)){ # if(a[i] < 1 | b[i] < 1 | c[i] < 1 | d[i] < 1){ # a[i] <- a[i] + 0.5; b[i] <- b[i] + 0.5; c[i] <- c[i] + 0.5; d[i] <- d[i] + 0.5 # } # } # dFNCHypergeo <- function(x, m1, m2, n, odds, precision = 1e-07){ # stopifnot(is.numeric(x), is.numeric(m1), is.numeric(m2), # is.numeric(n), is.numeric(odds), is.numeric(precision)) # .Call("dFNCHypergeo", as.integer(x), as.integer(m1), as.integer(m2), # as.integer(n), as.double(odds), as.double(precision), # PACKAGE = "BiasedUrn") # } # See http://www.stat.ufl.edu/~aa/cda/R/two-sample/R2/index.html # See https://stackoverflow.com/questions/4357827/do-while-loop-in-r ## ================= ## DECLARE VARIABLES ## ================= ## | D+ | D- | Total ## ---------------------------- ## Exp + | a | b | N1 ## Exp - | c | d | N0 ## -------|------|------|------ ## Total | M1 | M0 | Total N. <- 1 - ((1 - conf.level) / 2) z <- qnorm(N., mean = 0, sd = 1) # For large numbers you need to use floating point rather than integer representation. This will avoid "integer overflow" messages: a <- as.numeric(a); A <- as.numeric(A) b <- as.numeric(b); B <- as.numeric(B) c <- as.numeric(c); C <- as.numeric(C) d <- as.numeric(d); D <- as.numeric(D) # Total within strata cases: M1 <- a + c # Total within strata non-cases: M0 <- b + d # Total within strata exposed: N1 <- a + b # Total within strata unexposed: N0 <- c + d # Total within strata subjects: total <- a + b + c + d # Number of strata: n.strata <- length(a) # Added 190809: # If the sums across strata for all cells are greater than 0, use the sums of the crude data (cf the sums of the adjusted values): if(sum(A) > 0 & sum(B) > 0 & sum(C) > 0 & sum(D) > 0){ sa <- sum(A); sb <- sum(B); sc <- sum(C); sd <- sum(D) } # If the sums across strata for all cells contain a 0, use the sums of the adjusted data: if(sum(A) == 0 | sum(B) == 0 | sum(C) == 0 | sum(D) == 0){ sa <- sum(a); sb <- sum(b); sc <- sum(c); sd <- sum(d) } # sa <- sum(a); sb <- sum(b); sc <- sum(c); sd <- sum(d) # Grand total cases: sM1 <- sa + sc # Grand total non-cases: sM0 <- sb + sd # Grand total exposed: sN1 <- sa + sb # Grand total unexposed: sN0 <- sc + sd # Grand total: stotal <- sa + sb + sc + sd # Within-strata incidence risk in exposed: .tmp <- zexact(as.matrix(cbind(a, N1)), conf.level = conf.level) IRiske.p <- as.numeric(.tmp[,1]) * units IRiske.l <- as.numeric(.tmp[,2]) * units IRiske.u <- as.numeric(.tmp[,3]) * units # Within-strata incidence risk in unexposed: .tmp <- zexact(as.matrix(cbind(c, N0)), conf.level = conf.level) IRisko.p <- as.numeric(.tmp[,1]) * units IRisko.l <- as.numeric(.tmp[,2]) * units IRisko.u <- as.numeric(.tmp[,3]) * units # Within-strata incidence risk in population: .tmp <- zexact(as.matrix(cbind(M1, total)), conf.level = conf.level) IRiskpop.p <- as.numeric(.tmp[,1]) * units IRiskpop.l <- as.numeric(.tmp[,2]) * units IRiskpop.u <- as.numeric(.tmp[,3]) * units # Within-strata incidence rate in exposed: .tmp <- zincrate(as.matrix(cbind(a, b)), conf.level = conf.level) IRatee.p <- as.numeric(.tmp[,1]) * units IRatee.l <- as.numeric(.tmp[,2]) * units IRatee.u <- as.numeric(.tmp[,3]) * units # Within-strata incidence rate in unexposed: .tmp <- zincrate(as.matrix(cbind(c, d)), conf.level = conf.level) IRateo.p <- as.numeric(.tmp[,1]) * units IRateo.l <- as.numeric(.tmp[,2]) * units IRateo.u <- as.numeric(.tmp[,3]) * units # Within-strata incidence rate in population: .tmp <- zincrate(as.matrix(cbind(M1, M0)), conf.level = conf.level) IRatepop.p <- as.numeric(.tmp[,1]) * units IRatepop.l <- as.numeric(.tmp[,2]) * units IRatepop.u <- as.numeric(.tmp[,3]) * units # Within-strata odds in exposed (based on Ederer F and Mantel N (1974) Confidence limits on the ratio of two Poisson variables. # American Journal of Epidemiology 100: 165 - 167. # Cited in Altman, Machin, Bryant, and Gardner (2000) Statistics with Confidence, British Medical Journal, page 69). # Added 160609. Al <- (qbinom(1 - N., size = a + b, prob = (a / (a + b)))) / (a + b) Au <- (qbinom(N., size = a + b, prob = (a / (a + b)))) / (a + b) Oe.p <- (a / b) Oe.l <- (Al / (1 - Al)) Oe.u <- (Au / (1 - Au)) # Within-strata odds in unexposed: Al <- (qbinom(1 - N., size = c + d, prob = (c / (c + d)))) / (c + d) Au <- (qbinom(N., size = c + d, prob = (c / (c + d)))) / (c + d) Oo.p <- (c / d) Oo.l <- (Al / (1 - Al)) Oo.u <- (Au / (1 - Au)) # Within-strata odds in population: Al <- (qbinom(1 - N., size = M1 + M0, prob = (M1 / (M1 + M0)))) / (M1 + M0) Au <- (qbinom(N., size = M1 + M0, prob = (M1 / (M1 + M0)))) / (M1 + M0) Opop.p <- (M1 / M0) Opop.l <- (Al / (1 - Al)) Opop.u <- (Au / (1 - Au)) # Crude incidence risk in exposed: .tmp <- zexact(as.matrix(cbind(sa, sN1)), conf.level = conf.level) cIRiske.p <- as.numeric(.tmp[,1]) * units cIRiske.l <- as.numeric(.tmp[,2]) * units cIRiske.u <- as.numeric(.tmp[,3]) * units # Crude incidence risk in unexposed: .tmp <- zexact(as.matrix(cbind(sc, sN0)), conf.level = conf.level) cIRisko.p <- as.numeric(.tmp[,1]) * units cIRisko.l <- as.numeric(.tmp[,2]) * units cIRisko.u <- as.numeric(.tmp[,3]) * units # Crude incidence risk in population: .tmp <- zexact(as.matrix(cbind(sM1, stotal)), conf.level = conf.level) cIRiskpop.p <- as.numeric(.tmp[,1]) * units cIRiskpop.l <- as.numeric(.tmp[,2]) * units cIRiskpop.u <- as.numeric(.tmp[,3]) * units # Crude incidence rate in exposed: .tmp <- zincrate(as.matrix(cbind(sa, sb)), conf.level = conf.level) cIRatee.p <- as.numeric(.tmp[,1]) * units cIRatee.l <- as.numeric(.tmp[,2]) * units cIRatee.u <- as.numeric(.tmp[,3]) * units # Crude incidence rate in unexposed: .tmp <- zincrate(as.matrix(cbind(sc, sd)), conf.level = conf.level) cIRateo.p <- as.numeric(.tmp[,1]) * units cIRateo.l <- as.numeric(.tmp[,2]) * units cIRateo.u <- as.numeric(.tmp[,3]) * units # Crude incidence risk in population: .tmp <- zincrate(as.matrix(cbind(sM1, sM0)), conf.level = conf.level) cIRatepop.p <- as.numeric(.tmp[,1]) * units cIRatepop.l <- as.numeric(.tmp[,2]) * units cIRatepop.u <- as.numeric(.tmp[,3]) * units # Crude odds in exposed (based on Ederer F and Mantel N (1974) Confidence limits on the ratio of two Poisson variables. # American Journal of Epidemiology 100: 165 - 167. # Cited in Altman, Machin, Bryant, and Gardner (2000) Statistics with Confidence, British Medical Journal, page 69). # Added 160609 Al <- (qbinom(1 - N., size = sa + sb, prob = (sa / (sa + sb)))) / (sa + sb) u <- (qbinom(N., size = sa + sb, prob = (sa / (sa + sb)))) / (sa + sb) cOe.p <- sa / sb cOe.l <- Al / (1 - Al) cOe.u <- Au / (1 - Au) # Crude odds in unexposed: Al <- (qbinom(1 - N., size = sc + sd, prob = (sc / (sc + sd)))) / (sc + sd) Au <- (qbinom(N., size = sc + sd, prob = (sc / (sc + sd)))) / (sc + sd) cOo.p <- sc / sd cOo.l <- Al / (1 - Al) cOo.u <- Au / (1 - Au) # Crude odds in population: Al <- (qbinom(1 - N., size = sM1 + sM0, prob = (sM1 / (sM1 + sM0)))) / (sM1 + sM0) Au <- (qbinom(N., size = sM1 + sM0, prob = (sM1 / (sM1 + sM0)))) / (sM1 + sM0) cOpop.p <- sM1 / sM0 cOpop.l <- Al / (1 - Al) cOpop.u <- Au / (1 - Au) ## ========================================= ## INDIVIDUAL STRATA MEASURES OF ASSOCIATION ## ========================================= # Individual strata incidence risk ratio - Wald confidence limits (Rothman p 135 equation 7-3): wRR.ctype <- "Wald" wRR.p <- c(); wRR.l <- c(); wRR.u <- c() if(length(dim(dat)) == 3){ for(i in 1:dim(dat)[3]){ .tmp <- zRRwald(dat[,,i], conf.level) wRR.p <- c(wRR.p, .tmp[1]) wRR.l <- c(wRR.l, .tmp[2]) wRR.u <- c(wRR.u, .tmp[3]) } } if(length(dim(dat)) == 2){ .tmp <- zRRwald(dat, conf.level) wRR.p <- .tmp[1] wRR.l <- .tmp[2] wRR.u <- .tmp[3] } wRR.p <- ifelse(wRR.p == 0 | is.nan(wRR.p) | is.infinite(wRR.p), NaN, wRR.p) wRR.l <- ifelse(wRR.p == 0 | is.nan(wRR.p) | is.infinite(wRR.p), NaN, wRR.l) wRR.u <- ifelse(wRR.p == 0 | is.nan(wRR.p) | is.infinite(wRR.p), NaN, wRR.u) # Individual strata incidence risk ratio - Taylor confidence limits (Hightower et al 1988): tRR.ctype <- "Taylor" tRR.p <- c(); tRR.l <- c(); tRR.u <- c() if(length(dim(dat)) == 3){ for(i in 1:dim(dat)[3]){ .tmp <- zRRtaylor(dat[,,i], conf.level) tRR.p <- c(tRR.p, .tmp[1]) tRR.l <- c(tRR.l, .tmp[2]) tRR.u <- c(tRR.u, .tmp[3]) } } if(length(dim(dat)) == 2){ .tmp <- zRRtaylor(dat, conf.level) tRR.p <- .tmp[1] tRR.l <- .tmp[2] tRR.u <- .tmp[3] } tRR.p <- ifelse(tRR.p == 0 | is.nan(tRR.p) | is.infinite(tRR.p), NaN, tRR.p) tRR.l <- ifelse(tRR.p == 0 | is.nan(tRR.p) | is.infinite(tRR.p), NaN, tRR.l) tRR.u <- ifelse(tRR.p == 0 | is.nan(tRR.p) | is.infinite(tRR.p), NaN, tRR.u) # Individual strata incidence risk ratio - score confidence limits: scRR.ctype <- "Score" scRR.p <- c(); scRR.l <- c(); scRR.u <- c() if(length(dim(dat)) == 3){ for(i in 1:dim(dat)[3]){ .tmp <- zRRscore(dat[,,i], conf.level) scRR.p <- c(scRR.p, .tmp[1]) scRR.l <- c(scRR.l, .tmp[2]) scRR.u <- c(scRR.u, .tmp[3]) } } if(length(dim(dat)) == 2){ .tmp <- zRRscore(dat, conf.level) scRR.p <- .tmp[1] scRR.l <- .tmp[2] scRR.u <- .tmp[3] } scRR.p <- ifelse(scRR.p == 0 | is.nan(scRR.p) | is.infinite(scRR.p), NaN, scRR.p) scRR.l <- ifelse(scRR.p == 0 | is.nan(scRR.p) | is.infinite(scRR.p), NaN, scRR.l) scRR.u <- ifelse(scRR.p == 0 | is.nan(scRR.p) | is.infinite(scRR.p), NaN, scRR.u) # Individual strata incidence rate ratio (exact confidence intervals from epibasic.xlsx http://ph.au.dk/uddannelse/software/): IRR.ctype <- "" IRR.p <- (a / b) / (c / d) lnIRR <- log(IRR.p) lnIRR.var <- (1 / a) + (1 / c) lnIRR.se <- sqrt((1 / a) + (1 / c)) IRR.se <- exp(lnIRR.se) # See https://stats.stackexchange.com/questions/495622/calculation-of-the-confidence-interval-for-incidence-rate-ratio-using-exact-appr IRR.l <- suppressWarnings(d / b * (a / (c + 1)) * 1 / qf(p = (1 - conf.level) / 2, df1 = 2 * (c + 1), df2 = 2 * a, lower.tail = FALSE)) IRR.u <- suppressWarnings(d / b * ((a + 1) / c) * qf(p = (1 - conf.level) / 2, df1 = 2 * (a + 1), df2 = 2 * c, lower.tail = FALSE)) # pl <- a / (a + (c + 1) * (1 / qf(1 - N., 2 * a, 2 * c + 2))) # ph <- (a + 1) / (a + 1 + c / (1 / qf(1 - N., 2 * c, 2 * a + 2))) # IRR.l <- pl * d / ((1 - pl) * b) # IRR.u <- ph * d / ((1 - ph) * b) # lnIRR.l <- lnIRR - (z * lnIRR.se) # IRR.l <- exp(lnIRR.l) # lnIRR.u <- lnIRR + (z * lnIRR.se) # IRR.u <- exp(lnIRR.u) IRR.p <- ifelse(IRR.p == 0 | is.nan(IRR.p) | is.infinite(IRR.p), NaN, IRR.p) IRR.l <- ifelse(IRR.p == 0 | is.nan(IRR.p) | is.infinite(IRR.p), NaN, IRR.l) IRR.u <- ifelse(IRR.p == 0 | is.nan(IRR.p) | is.infinite(IRR.p), NaN, IRR.u) ## Incidence rate ratio weights (equal to precision, the inverse of the variance of the IRR. See Woodward page 168): IRR.w <- 1 / (exp(lnIRR.var)) ## Individual strata Wald odds ratios (Rothman p 139 equation 7-6): wOR.ctype <- "Wald" wOR.p <- c(); wOR.l <- c(); wOR.u <- c() if(length(dim(dat)) == 3){ for(i in 1:dim(dat)[3]){ .tmp <- zORwald(dat[,,i], conf.level) wOR.p <- c(wOR.p, .tmp[1]) wOR.l <- c(wOR.l, .tmp[2]) wOR.u <- c(wOR.u, .tmp[3]) } } if(length(dim(dat)) == 2){ .tmp <- zORwald(dat, conf.level) wOR.p <- .tmp[1] wOR.l <- .tmp[2] wOR.u <- .tmp[3] } wOR.p <- ifelse(wOR.p == 0 | is.nan(wOR.p) | is.infinite(wOR.p), NaN, wOR.p) wOR.l <- ifelse(wOR.p == 0 | is.nan(wOR.p) | is.infinite(wOR.p), NaN, wOR.l) wOR.u <- ifelse(wOR.p == 0 | is.nan(wOR.p) | is.infinite(wOR.p), NaN, wOR.u) # Individual strata odds ratio - Cornfield confidence limits. # Only calculate Cornfield confidence limits if N < 500; function very slow with large numbers otherwise: if(sum(total) < 500){ cfOR.ctype <- "Cornfield" cfOR.p <- c(); cfOR.l <- c(); cfOR.u <- c() # Use zORcfield if cell frequencies are integer: if(length(dim(dat)) == 3 & is.integer(dat) == TRUE){ for(i in 1:dim(dat)[3]){ .tmp <- zORcfield(dat[,,i], conf.level) cfOR.p <- c(cfOR.p, .tmp[1]) cfOR.l <- c(cfOR.l, .tmp[2]) cfOR.u <- c(cfOR.u, .tmp[3]) } } # Use zORcfield if cell frequencies are integer: if(length(dim(dat)) == 2 & is.integer(dat) == TRUE){ .tmp <- zORcfield(dat, conf.level) cfOR.p <- .tmp[1] cfOR.l <- .tmp[2] cfOR.u <- .tmp[3] } # Return NAs for cfOR.p if Haldane Anscombe correction used (i.e. non-integer cell frequencies): if(length(dim(dat)) == 3 & is.integer(dat) == FALSE){ for(i in 1:dim(dat)[3]){ .tmp <- c(NA,NA,NA) cfOR.p <- c(cfOR.p, .tmp[1]) cfOR.l <- c(cfOR.l, .tmp[2]) cfOR.u <- c(cfOR.u, .tmp[3]) } } # Return NAs for cfOR.p if Haldane Anscombe correction used (i.e. non-integer cell frequencies): if(length(dim(dat)) == 2 & is.integer(dat) == TRUE){ .tmp <- c(NA,NA,NA) cfOR.p <- .tmp[1] cfOR.l <- .tmp[2] cfOR.u <- .tmp[3] } cfOR.p <- ifelse(cfOR.p == 0 | is.nan(cfOR.p) | is.infinite(cfOR.p), NaN, cfOR.p) cfOR.l <- ifelse(cfOR.p == 0 | is.nan(cfOR.p) | is.infinite(cfOR.p), NaN, cfOR.l) cfOR.u <- ifelse(cfOR.p == 0 | is.nan(cfOR.p) | is.infinite(cfOR.p), NaN, cfOR.u) } if(sum(total) >= 500){ cfOR.ctype <- "Cornfield" cfOR.p <- c(); cfOR.l <- c(); cfOR.u <- c() if(length(dim(dat)) == 3){ for(i in 1:dim(dat)[3]){ .tmp <- c(NA,NA,NA) cfOR.p <- c(cfOR.p, .tmp[1]) cfOR.l <- c(cfOR.l, .tmp[2]) cfOR.u <- c(cfOR.u, .tmp[3]) } } if(length(dim(dat)) == 2){ # .tmp <- zORcfield(dat, conf.level) cfOR.p <- Oe.p / Oo.p cfOR.l <- NA cfOR.u <- NA } cfOR.p <- ifelse(cfOR.p == 0 | is.nan(cfOR.p) | is.infinite(cfOR.p), NaN, cfOR.p) cfOR.l <- ifelse(cfOR.p == 0 | is.nan(cfOR.p) | is.infinite(cfOR.p), NaN, cfOR.l) cfOR.u <- ifelse(cfOR.p == 0 | is.nan(cfOR.p) | is.infinite(cfOR.p), NaN, cfOR.u) } # Individual strata odds ratio - score confidence limits: scOR.ctype <- "Score" scOR.p <- c(); scOR.l <- c(); scOR.u <- c() if(length(dim(dat)) == 3){ for(i in 1:dim(dat)[3]){ .tmp <- zORscore(dat[,,i], conf.level) scOR.p <- c(scOR.p, .tmp[1]) scOR.l <- c(scOR.l, .tmp[2]) scOR.u <- c(scOR.u, .tmp[3]) } } if(length(dim(dat)) == 2){ .tmp <- zORscore(dat, conf.level) scOR.p <- .tmp[1] scOR.l <- .tmp[2] scOR.u <- .tmp[3] } scOR.p <- ifelse(scOR.p == 0 | is.nan(scOR.p) | is.infinite(scOR.p), NaN, scOR.p) scOR.l <- ifelse(scOR.p == 0 | is.nan(scOR.p) | is.infinite(scOR.p), NaN, scOR.l) scOR.u <- ifelse(scOR.p == 0 | is.nan(scOR.p) | is.infinite(scOR.p), NaN, scOR.u) # Individual strata odds ratios - maximum likelihood estimate (using fisher.test function): # Replaced 130612. mOR.ctype <- "MLE" mOR.p <- c(); mOR.l <- c(); mOR.u <- c() # If numbers too large error returned 'x' has entries too large to be integer. if(sum(total) < 2E09){ if(length(dim(dat)) == 3){ for(i in 1:dim(dat)[3]){ .tmp <- zORml(dat[,,i], conf.level) mOR.p <- c(mOR.p, .tmp[1]) mOR.l <- c(mOR.l, .tmp[2]) mOR.u <- c(mOR.u, .tmp[3]) } } if(length(dim(dat)) == 2){ .tmp <- zORml(dat, conf.level) mOR.p <- .tmp[1] mOR.l <- .tmp[2] mOR.u <- .tmp[3] } mOR.p <- ifelse(mOR.p == 0 | is.nan(mOR.p) | is.infinite(mOR.p), NaN, mOR.p) mOR.l <- ifelse(mOR.p == 0 | is.nan(mOR.p) | is.infinite(mOR.p), NaN, mOR.l) mOR.u <- ifelse(mOR.p == 0 | is.nan(mOR.p) | is.infinite(mOR.p), NaN, mOR.u) } if(sum(total) >= 2E09){ if(length(dim(dat)) == 3){ for(i in 1:dim(dat)[3]){ mOR.p <- Oe.p / Oo.p mOR.l <- NA mOR.u <- NA } } if(length(dim(dat)) == 2){ mOR.p <- Oe.p / Oo.p mOR.l <- NA mOR.u <- NA } mOR.p <- ifelse(mOR.p == 0 | is.nan(mOR.p) | is.infinite(mOR.p), NaN, mOR.p) mOR.l <- ifelse(mOR.p == 0 | is.nan(mOR.p) | is.infinite(mOR.p), NaN, mOR.l) mOR.u <- ifelse(mOR.p == 0 | is.nan(mOR.p) | is.infinite(mOR.p), NaN, mOR.u) } # Individual strata attributable risk (Rothman p 135 equation 7-2): wARisk.ctype <- "Wald" wARisk.p <- c(); wARisk.l <- c(); wARisk.u <- c() if(length(dim(dat)) == 3){ for(i in 1:dim(dat)[3]){ .tmp <- zARwald(dat[,,i], conf.level, units) wARisk.p <- c(wARisk.p, .tmp[1]) wARisk.l <- c(wARisk.l, .tmp[2]) wARisk.u <- c(wARisk.u, .tmp[3]) } } if(length(dim(dat)) == 2){ .tmp <- zARwald(dat, conf.level, units) wARisk.p <- .tmp[1] wARisk.l <- .tmp[2] wARisk.u <- .tmp[3] } # Individual strata NNTB-NNTH - Wald confidence limits: wNNT.p <- 1 / (wARisk.p / units) .wNNT.l <- 1 / (wARisk.l / units) .wNNT.u <- 1 / (wARisk.u / units) wNNT.l <- min(c(.wNNT.l, .wNNT.u)) wNNT.u <- max(c(.wNNT.l, .wNNT.u)) # Individual strata attributable risk - score confidence limits: scARisk.ctype <- "Score" scARisk.p <- c(); scARisk.l <- c(); scARisk.u <- c() if(length(dim(dat)) == 3){ for(i in 1:dim(dat)[3]){ .tmp <- zARscore(dat[,,i], conf.level, units) scARisk.p <- c(scARisk.p, .tmp[1]) scARisk.l <- c(scARisk.l, .tmp[2]) scARisk.u <- c(scARisk.u, .tmp[3]) } } if(length(dim(dat)) == 2){ .tmp <- zARscore(dat, conf.level, units) scARisk.p <- .tmp[1] scARisk.l <- .tmp[2] scARisk.u <- .tmp[3] } # Individual strata NNTB-NNTH - score confidence limits: scNNT.p <- 1 / (scARisk.p / units) .scNNT.l <- 1 / (scARisk.l / units) .scNNT.u <- 1 / (scARisk.u / units) scNNT.l <- min(c(.scNNT.l, .scNNT.u)) scNNT.u <- max(c(.scNNT.l, .scNNT.u)) # Individual strata attributable rate (Rothman p 137 equation 7-4): ARate.ctype <- "" ARate.p <- ((a / b) - (c / d)) * units ARate.var <- (a / b^2) + (c / d^2) ARate.se <- (sqrt((a / b^2) + (c / d^2))) * units ARate.l <- ARate.p - (z * ARate.se) ARate.u <- ARate.p + (z * ARate.se) # Attribtable rate weights (equal to precision, the inverse of the variance of the RR. See Woodward page 168): ARate.w <- 1 / (ARate.var) # Individual strata attributable fraction for risk data (from Hanley 2001): AFRisk.ctype <- "" AFRisk.p <- ((wRR.p - 1) / wRR.p) AFRisk.l <- (wRR.l - 1) / wRR.l AFRisk.u <- (wRR.u - 1) / wRR.u ## AFRisk.l <- min((wRR.l - 1) / wRR.l, (wRR.u - 1) / wRR.u) ## AFRisk.u <- max((wRR.l - 1) / wRR.l, (wRR.u - 1) / wRR.u) # Individual strata attributable fraction for rate data (from Hanley 2001): AFRate.ctype <- "" AFRate.p <- (IRR.p - 1) / IRR.p # Bug found 031013. The following two lines of code replace those on lines 449 and 450. AFRate.l <- (IRR.l - 1) / IRR.l AFRate.u <- (IRR.u - 1) / IRR.u # AFRate.l <- min((IRR.l - 1) / IRR.l, (IRR.u - 1) / IRR.u) # AFRate.u <- max((IRR.l - 1) / IRR.l, (IRR.u - 1) / IRR.u) # Individual strata estimated attributable fraction (from Hanley 2001): AFest.ctype <- "" AFest.p <- (mOR.p - 1) / mOR.p AFest.l <- (mOR.l - 1) / mOR.l AFest.u <- (mOR.u - 1) / mOR.u # Bug found 031013. The following two lines of code replace those on lines 457 and 458. # AFest.l <- min((OR.l - 1) / OR.l, (OR.u - 1) / OR.u) # AFest.u <- max((OR.l - 1) / OR.l, (OR.u - 1) / OR.u) # Individual strata population attributable risk (same as Rothman p 135 equation 7-2): wPARisk.ctype <- "" wPARisk.p <- ((M1 / total) - (c / N0)) * units wPARisk.se <- (sqrt(((M1 * (total - M1))/total^3) + ((c * (N0 - c))/N0^3))) * units wPARisk.l <- wPARisk.p - (z * wPARisk.se) wPARisk.u <- wPARisk.p + (z * wPARisk.se) # 270115 Confidence intervals for PAR from Sarah Pirikahu MSc thesis. pPARisk.ctype <- "Pirikahu" pPARisk.p <- ((M1 / total) - (c / N0)) * units pPARisk.d1 <- (1 / total) - ((a + c) / total^2) pPARisk.d2 <- -((a + c) / total^2) pPARisk.d3 <- (c / (c + d)^2) - ((a + c) / total^2) + (1 / total) - (1 / (c + d)) pPARisk.d4 <- (c / (c + d)^2) - ((a + c) / total^2) pPARisk.var <- ((pPARisk.d1^2) * a) + ((pPARisk.d2^2) * b) + ((pPARisk.d3^2) * c) + ((pPARisk.d4^2) * d) pPARisk.se <- sqrt(pPARisk.var) * units pPARisk.l <- pPARisk.p - (z * pPARisk.se) pPARisk.u <- pPARisk.p + (z * pPARisk.se) # Individual strata population attributable rate (same as Rothman p 137 equation 7-4): PARate.ctype <- "" PARate.p <- ((M1 / M0) - (c / d)) * units PARate.se <- (sqrt((M1 / M0^2) + (c / d^2))) * units PARate.l <- PARate.p - (z * PARate.se) PARate.u <- PARate.p + (z * PARate.se) # Individual strata population attributable fractions for risk data (from Hanley, 2001): # PAFRisk.p <- ((wRR.p - 1) / wRR.p) * (a / M1) # PAFRisk.l <- ((wRR.l - 1) / wRR.l) * (a / M1) # PAFRisk.u <- ((wRR.u - 1) / wRR.u) * (a / M1) # Individual strata population attributable fractions for risk data (from OpenEpi TwobyTwo): # PAFRisk.p <- (IRiskpop.p - IRisko.p) / IRiskpop.p # PAFRisk.l <- min((IRiskpop.l - IRisko.l) / IRiskpop.l, (IRiskpop.u - IRisko.u) / IRiskpop.u) # PAFRisk.u <- max((IRiskpop.l - IRisko.l) / IRiskpop.l, (IRiskpop.u - IRisko.u) / IRiskpop.u) # Individual strata population attributable fractions for risk data (from Jewell, page 84): PAFRisk.ctype <- "Jewell" PAFRisk.p <- ((a * d) - (b * c)) / ((a + c) * (c + d)) PAFRisk.var <- (b + (PAFRisk.p * (a + d))) / (total * c) PAFRisk.l <- 1 - exp(log(1 - PAFRisk.p) + (z * sqrt(PAFRisk.var))) PAFRisk.u <- 1 - exp(log(1 - PAFRisk.p) - (z * sqrt(PAFRisk.var))) # Individual strata population attributable fractions for rate data (from Hanley, 2001): # PAFRate.p <- ((IRR.p - 1) / IRR.p) * (a / M1) # PAFRate.l <- ((IRR.l - 1) / IRR.l) * (a / M1) # PAFRate.u <- ((IRR.u - 1) / IRR.u) * (a / M1) # Individual strata population attributable fractions for rate data (from OpenEpi TwobyTwo - Jewell doesn't provide a method for rate data): PAFRate.ctype <- "Sullivan" PAFRate.p <- (IRatepop.p - IRateo.p) / IRatepop.p tmp <- cbind((IRatepop.l - IRateo.l) / IRatepop.l, (IRatepop.u - IRateo.u) / IRatepop.u) PAFRate.l <- apply(X = tmp, MARGIN = 1, FUN = min) PAFRate.u <- apply(X = tmp, MARGIN = 1, FUN = max) # Individual strata estimated population attributable fraction (from Hanley, 2001): # PAFest.p <- ((OR.p - 1) / OR.p) * (a / M1) # PAFest.l <- ((OR.l - 1) / OR.l) * (a / M1) # PAFest.u <- ((OR.u - 1) / OR.u) * (a / M1) # Individual strata estimated population attributable fraction (from OpenEpi TwobyTwo): # PAFest.p <- (Opop.p - Oo.p) / Opop.p # PAFest.l <- min((Opop.l - Oo.l) / Opop.l, (Opop.u - Oo.u) / Opop.u) # PAFest.u <- max((Opop.l - Oo.l) / Opop.l, (Opop.u - Oo.u) / Opop.u) # Individual strata population attributable fractions for risk data (from Jewell, page 84): PAFest.ctype <- "Jewell" PAFest.p <- ((a * d) - (b * c)) / (d * (a + c)) PAFest.var <- (a / (c * (a + c))) + (b / (d * (b + d))) PAFest.l <- 1 - exp(log(1 - PAFest.p) + (z * sqrt(PAFest.var))) PAFest.u <- 1 - exp(log(1 - PAFest.p) - (z * sqrt(PAFest.var))) ## ============================= ## CRUDE MEASURES OF ASSOCIATION ## ============================= # Crude incidence risk ratio - Wald confidence limits (Rothman p 135 equation 7-3): cwRR.ctype <- "Wald" .tmp <- zRRwald(apply(dat, MARGIN = c(1,2), FUN = sum), conf.level) cwRR.p <- .tmp[1] cwRR.l <- .tmp[2] cwRR.u <- .tmp[3] # Crude incidence risk ratio - Taylor confidence limits (Hightower et al 1988): ctRR.ctype <- "Taylor" .tmp <- zRRtaylor(apply(dat, MARGIN = c(1,2), FUN = sum), conf.level) ctRR.p <- .tmp[1] ctRR.l <- .tmp[2] ctRR.u <- .tmp[3] # Crude incidence risk ratio - score confidence limits: csRR.ctype <- "Score" .tmp <- zRRscore(apply(dat, MARGIN = c(1,2), FUN = sum), conf.level) csRR.p <- .tmp[1] csRR.l <- .tmp[2] csRR.u <- .tmp[3] # Crude incidence rate ratio (exact confidence intervals from epibasic.xlsx http://ph.au.dk/uddannelse/software/): ceIRR.ctype <- "Exact" ceIRR.p <- (sa / sb) / (sc / sd) celnIRR <- log(ceIRR.p) celnIRR.se <- sqrt((1 / sa) + (1 / sc)) ceIRR.se <- exp(celnIRR.se) pl <- sa / (sa + (sc + 1) * (1 / qf(1 - N., 2 * sa, 2 * sc + 2))) ph <- (sa + 1) / (sa + 1 + sc / (1 / qf(1 - N., 2 * sc, 2 * sa + 2))) ceIRR.l <- pl * sd / ((1 - pl) * sb) ceIRR.u <- ph * sd / ((1 - ph) * sb) # Crude odds ratio - Wald confidence limits: cwOR.ctype <- "Wald" .tmp <- zORwald(apply(dat, MARGIN = c(1,2), FUN = sum), conf.level) cwOR.p <- .tmp[1] cwOR.l <- .tmp[2] cwOR.u <- .tmp[3] # Crude odds ratio - Cornfield confidence limits: # Only calculate Cornfield confidence limits if N < 500; function very slow with large numbers otherwise: # Use zORcfield if cell frequencies are integer: if(sum(total) < 500 & is.integer(dat) == TRUE){ ccfOR.ctype <- "Cornfield" .tmp <- zORcfield(apply(dat, MARGIN = c(1,2), FUN = sum), conf.level) ccfOR.p <- .tmp[1] ccfOR.l <- .tmp[2] ccfOR.u <- .tmp[3] } # Return NAs for ccfOR.p if Haldane Anscombe correction used (i.e. non-integer cell frequencies): if(sum(total) < 500 & is.integer(dat) == FALSE){ ccfOR.ctype <- "Cornfield" .tmp <- c(NA,NA,NA) ccfOR.p <- .tmp[1] ccfOR.l <- .tmp[2] ccfOR.u <- .tmp[3] } # Return NAs for ccfOR.p if total >= 500: if(sum(total) >= 500){ ccfOR.ctype <- "Cornfield" ccfOR.p <- Oe.p / Oo.p ccfOR.l <- NA ccfOR.u <- NA } # Crude odds ratio - score confidence limits: csOR.ctype <- "Score" .tmp <- zORscore(apply(dat, MARGIN = c(1,2), FUN = sum), conf.level) csOR.p <- .tmp[1] csOR.l <- .tmp[2] csOR.u <- .tmp[3] # Crude odds ratio - maximum likelihood estimate (using fisher.test function): # Replaced 130612. cmOR.ctype <- "MLE" if(sum(total) < 2E09){ cmOR.tmp <- suppressWarnings(fisher.test(apply(dat, MARGIN = c(1,2), FUN = sum), conf.int = TRUE, conf.level = conf.level)) cmOR.p <- as.numeric(cmOR.tmp$estimate) cmOR.l <- as.numeric(cmOR.tmp$conf.int)[1] cmOR.u <- as.numeric(cmOR.tmp$conf.int)[2] } if(sum(total) >= 2E09){ cmOR.p <- NA cmOR.l <- NA cmOR.u <- NA } # Crude attributable risk - Wald confidence limits (Rothman p 135 equation 7-2): cwARisk.ctype <- "Wald" .tmp <- zARwald(apply(dat, MARGIN = c(1,2), FUN = sum), conf.level, units) cwARisk.p <- .tmp[1] cwARisk.l <- .tmp[2] cwARisk.u <- .tmp[3] # Crude NNTB-NNTH - Wald confidence limits: cwNNT.p <- 1 / (cwARisk.p / units) .cwNNT.l <- 1 / (cwARisk.l / units) .cwNNT.u <- 1 / (cwARisk.u / units) cwNNT.l <- min(c(.cwNNT.l, .cwNNT.u)) cwNNT.u <- max(c(.cwNNT.l, .cwNNT.u)) # Crude attributable risk - score confidence limits: cscARisk.ctype <- "Score" .tmp <- zARscore(apply(dat, MARGIN = c(1,2), FUN = sum), conf.level, units) cscARisk.p <- .tmp[1] cscARisk.l <- .tmp[2] cscARisk.u <- .tmp[3] # Crude NNTB-NNTH - score confidence limits: cscNNT.p <- 1 / (cscARisk.p / units) .cscNNT.l <- 1 / (cscARisk.l / units) .cscNNT.u <- 1 / (cscARisk.u / units) cscNNT.l <- min(c(.cscNNT.l, .cscNNT.u)) cscNNT.u <- max(c(.cscNNT.l, .cscNNT.u)) # Crude attributable rate (Rothman p 137 equation 7-4): cARate.ctype <- "Wald" cARate.p <- ((sa / sb) - (sc / sd)) * units cARate.se <- (sqrt((sa / sb^2) + (sc / sd^2))) * units cARate.l <- cARate.p - (z * cARate.se) cARate.u <- cARate.p + (z * cARate.se) # Crude attributable fraction for risk data (from Hanley 2001): cAFrisk.ctype <- "Score" cAFRisk.p <- (csRR.p - 1) / csRR.p cAFRisk.l <- min((csRR.l - 1) / csRR.l, (csRR.u - 1) / csRR.u) cAFRisk.u <- max((csRR.l - 1) / csRR.l, (csRR.u - 1) / csRR.u) # Crude attributable fraction for rate data (from Hanley 2001): cAFRate.ctype <- "Exact" cAFRate.p <- (ceIRR.p - 1) / ceIRR.p cAFRate.l <- min((ceIRR.l - 1) / ceIRR.l, (ceIRR.u - 1) / ceIRR.u) cAFRate.u <- max((ceIRR.l - 1) / ceIRR.l, (ceIRR.u - 1) / ceIRR.u) # Crude estimated attributable fraction (from Hanley 2001): cAFest.ctype <- "Score" .tmp <- zORscore(apply(dat, MARGIN = c(1,2), FUN = sum), conf.level) scOR.p <- .tmp[1] scOR.l <- .tmp[2] scOR.u <- .tmp[3] cAFest.p <- (scOR.p - 1) / scOR.p cAFest.l <- min((scOR.l - 1) / scOR.l, (scOR.u - 1) / scOR.u) cAFest.u <- max((scOR.l - 1) / scOR.l, (scOR.u - 1) / scOR.u) # Crude population attributable risk (same as Rothman p 135 equation 7-2): cwPARisk.ctype <- "Wald" cwPARisk.p <- ((sM1 / stotal) - (sc / sN0)) * units cwPARisk.se <- (sqrt(((sM1 * (stotal - sM1))/stotal^3) + ((sc * (sN0 - sc))/sN0^3))) * units cwPARisk.l <- cwPARisk.p - (z * cwPARisk.se) cwPARisk.u <- cwPARisk.p + (z * cwPARisk.se) # 270115 Confidence intervals for PAR from Sarah Pirikahu MSc thesis. cpPARisk.ctype <- "Pirikahu" cpPARisk.p <- ((sM1 / stotal) - (sc / sN0)) * units cpPARisk.d1 <- (1 / stotal) - ((sa + sc) / stotal^2) cpPARisk.d2 <- -((sa + sc) / stotal^2) cpPARisk.d3 <- (sc / (sc + sd)^2) - ((sa + sc) / stotal^2) + (1 / stotal) - (1 / (sc + sd)) cpPARisk.d4 <- (sc / (sc + sd)^2) - ((sa + sc) / stotal^2) cpPARisk.var <- ((cpPARisk.d1^2) * sa) + ((cpPARisk.d2^2) * sb) + ((cpPARisk.d3^2) * sc) + ((cpPARisk.d4^2) * sd) cpPARisk.se <- sqrt(cpPARisk.var) * units cpPARisk.l <- cpPARisk.p - (z * cpPARisk.se) cpPARisk.u <- cpPARisk.p + (z * cpPARisk.se) # Crude population attributable rate (same as Rothman p 137 equation 7-4): cPARate.ctype <- "Wald" cPARate.p <- ((sM1 / sM0) - (sc / sd)) * units cPARate.se <- (sqrt((sM1 / sM0^2) + (sc / sd^2))) * units cPARate.l <- cPARate.p - (z * cPARate.se) cPARate.u <- cPARate.p + (z * cPARate.se) # Crude population attributable fractions for risk data (from Hanley 2001): # cPAFRisk.p <- ((csRR.p - 1) / csRR.p) * (sa / sM1) # cPAFRisk.l <- ((csRR.l - 1) / csRR.l) * (sa / sM1) # cPAFRisk.u <- ((csRR.u - 1) / csRR.u) * (sa / sM1) # Crude population attributable fractions for risk data (from OpenEpi TwobyTwo): # Changed 160609 cPAFRisk.ctype <- "" cPAFRisk.p <- (cIRiskpop.p - cIRisko.p) / cIRiskpop.p cPAFRisk.l <- min((cIRiskpop.l - cIRisko.l) / cIRiskpop.l, (cIRiskpop.u - cIRisko.u) / cIRiskpop.u) cPAFRisk.u <- max((cIRiskpop.l - cIRisko.l) / cIRiskpop.l, (cIRiskpop.u - cIRisko.u) / cIRiskpop.u) # Crude population attributable fractions for rate data (from Hanley 2001): # cPAFRate.ctype <- "Exact" # cPAFRate.p <- ((ceIRR.p - 1) / ceIRR.p) * (sa / sM1) # cPAFRate.l <- ((ceIRR.p - 1) / ceIRR.p) * (sa / sM1) # cPAFRate.u <- ((ceIRR.p - 1) / ceIRR.p) * (sa / sM1) # Crude population attributable fractions for rate data (from OpenEpi TwobyTwo): # Changed 160609 cPAFRate.ctype <- "" cPAFRate.p <- (cIRatepop.p - cIRateo.p) / cIRatepop.p cPAFRate.l <- min((cIRatepop.l - cIRateo.l) / cIRatepop.l, (cIRatepop.u - cIRateo.u) / cIRatepop.u) cPAFRate.u <- max((cIRatepop.l - cIRateo.l) / cIRatepop.l, (cIRatepop.u - cIRateo.u) / cIRatepop.u) # Crude estimated population attributable fraction (from Hanley, 2001): # cPAFest.p <- ((scOR.p - 1) / scOR.p) * (sa / sM1) # cPAFest.l <- ((scOR.p - 1) / scOR.p) * (sa / sM1) # cPAFest.u <- ((scOR.p - 1) / scOR.p) * (sa / sM1) # Crude estimated population attributable fraction (from OpenEpi TwobyTwo): # Changed 160609 cPAFest.ctype <- "" cPAFest.p <- (cOpop.p - cOo.p) / cOpop.p cPAFest.l <- min((cOpop.l - cOo.l) / cOpop.l, (cOpop.u - cOo.u) / cOpop.u) cPAFest.u <- max((cOpop.l - cOo.l) / cOpop.l, (cOpop.u - cOo.u) / cOpop.u) ## =============================== ## MANTEL-HAENZEL SUMMARY MEASURES ## =============================== # Summary incidence risk ratio (Rothman 2002 p 148 and 152, equation 8-2): sRR.p <- sum((a * N0 / total)) / sum((c * N1 / total)) varLNRR.s <- sum(((M1 * N1 * N0) / total^2) - ((a * c)/ total)) / (sum((a * N0)/total) * sum((c * N1)/total)) lnRR.s <- log(sRR.p) sRR.se <- (sqrt(varLNRR.s)) sRR.l <- exp(lnRR.s - (z * sqrt(varLNRR.s))) sRR.u <- exp(lnRR.s + (z * sqrt(varLNRR.s))) # Summary incidence rate ratio (Rothman 2002 p 153, equation 8-5): sIRR.p <- sum((a * d) / M0) / sum((c * b) / M0) lnIRR.s <- log(sIRR.p) varLNIRR.s <- (sum((M1 * b * d) / M0^2)) / (sum((a * d) / M0) * sum((c * b) / M0)) sIRR.se <- sqrt(varLNIRR.s) sIRR.l <- exp(lnIRR.s - (z * sqrt(varLNIRR.s))) sIRR.u <- exp(lnIRR.s + (z * sqrt(varLNIRR.s))) # Summary odds ratio (Cord Heuer 211004): sOR.p <- sum((a * d / total)) / sum((b * c / total)) G <- a * d / total H <- b * c / total P <- (a + d) / total Q <- (b + c) / total GQ.HP <- G * Q + H * P sumG <- sum(G) sumH <- sum(H) sumGP <- sum(G * P) sumGH <- sum(G * H) sumHQ <- sum(H * Q) sumGQ <- sum(G * Q) sumGQ.HP <- sum(GQ.HP) # Correction from Richard Bourgon 290910: varLNOR.s <- sumGP / (2 * sumG^2) + sumGQ.HP / (2 * sumG * sumH) + sumHQ / (2 * sumH^2) # varLNOR.s <- sumGP / (2 * sumG^2) + sumGQ.HP / (2 * sumGH) + sumHQ / (2 * sumG * sumH) lnOR.s <- log(sOR.p) sOR.se <- sqrt(varLNOR.s) sOR.l <- exp(lnOR.s - z * sqrt(varLNOR.s)) sOR.u <- exp(lnOR.s + z * sqrt(varLNOR.s)) # Summary attributable risk (Rothman 2002 p 147 and p 152, equation 8-1): sARisk.p <- (sum(((a * N0) - (c * N1)) / total) / sum((N1 * N0) / total)) * units w <- (N1 * N0) / total var.p1 <- (((a * d) / (N1^2 * (N1 - 1))) + ((c * b) / (N0^2 * (N0 - 1)))) var.p1[N0 == 1] <- 0 var.p1[N1 == 1] <- 0 varARisk.s <- sum(w^2 * var.p1) / sum(w)^2 sARisk.se <- (sqrt(varARisk.s)) * units sARisk.l <- sARisk.p - (z * sARisk.se) sARisk.u <- sARisk.p + (z * sARisk.se) # Summary NNTB-NNTH: sNNT.p <- 1 / (sARisk.p / units) .sNNT.l <- 1 / (sARisk.l / units) .sNNT.u <- 1 / (sARisk.u / units) sNNT.l <- min(c(.sNNT.l, .sNNT.u)) sNNT.u <- max(c(.sNNT.l, .sNNT.u)) # Summary attributable risk (Klingenberg 2014, Statistics in Medicine 33: 2968 - 2983). SatoARisk.ctype <- "Sato" .tmp <- zMHRD.Sato(dat, conf.level, units) SatoARisk.p <- ifelse(is.null(.tmp), NA, .tmp[1]) SatoARisk.l <- ifelse(is.null(.tmp), NA, .tmp[2]) SatoARisk.u <- ifelse(is.null(.tmp), NA, .tmp[3]) # Summary NNTB-NNTH - Sato confidence limits: SatoNNT.p <- 1 / (SatoARisk.p / units) .SatoNNT.l <- 1 / (SatoARisk.l / units) .SatoNNT.u <- 1 / (SatoARisk.u / units) SatoNNT.l <- min(c(.SatoNNT.l, .SatoNNT.u)) SatoNNT.u <- max(c(.SatoNNT.l, .SatoNNT.u)) # Summary attributable risk (Klingenberg (2014, Statistics in Medicine 33: 2968 - 2983). GRARisk.ctype <- "Greenland-Robins" .tmp <- zMHRD.GR(dat, conf.level, units) GRARisk.p <- ifelse(is.null(.tmp), NA, .tmp[1]) GRARisk.l <- ifelse(is.null(.tmp), NA, .tmp[2]) GRARisk.u <- ifelse(is.null(.tmp), NA, .tmp[3]) # Summary NNTB-NNTH - Greenland-Robins confidence limits: GRNNT.p <- 1 / (GRARisk.p / units) .GRNNT.l <- 1 / (GRARisk.l / units) .GRNNT.u <- 1 / (GRARisk.u / units) GRNNT.l <- min(c(.GRNNT.l, .GRNNT.u)) GRNNT.u <- max(c(.GRNNT.u, .GRNNT.u)) # Summary attributable rate (Rothman 2002 p 153, equation 8-4): sARate.p <- sum(((a * d) - (c * b)) / M0) / sum((b * d) / M0) * units varARate.s <- sum(((b * d) / M0)^2 * ((a / b^2) + (c / d^2 ))) / sum((b * d) / M0)^2 sARate.se <- sqrt(varARate.s) * units sARate.l <- sARate.p - (z * sARate.se) sARate.u <- sARate.p + (z * sARate.se) ## =============================== ## EFFECT OF CONFOUNDING ## =============================== # Effect of confounding for risk ratio (Woodward p 172): RR.conf.p <- (csRR.p / sRR.p) RR.conf.l <- (csRR.l / sRR.l) RR.conf.u <- (csRR.u / sRR.u) # Effect of confounding for incidence risk ratio (Woodward p 172): IRR.conf.p <- (ceIRR.p / sIRR.p) IRR.conf.l <- (ceIRR.l / sIRR.l) IRR.conf.u <- (ceIRR.u / sIRR.u) # Effect of confounding for odds ratio (Woodward p 172): OR.conf.p <- (scOR.p / sOR.p) OR.conf.l <- (scOR.l / sOR.l) OR.conf.u <- (scOR.u / sOR.u) # Effect of confounding for attributable risk (Woodward p 172): ARisk.conf.p <- (cscARisk.p / scARisk.p) ARisk.conf.l <- (cscARisk.l / scARisk.l) ARisk.conf.u <- (cscARisk.u / scARisk.u) # Effect of confounding for attributable rate (Woodward p 172): ARate.conf.p <- (cARate.p / sARate.p) ARate.conf.l <- (cARate.l / sARate.l) ARate.conf.u <- (cARate.u / sARate.u) ## =========================================== ## CHI-SQUARED TESTS OF HOMOGENEITY AND EFFECT ## =========================================== if(length(a) == 1){ # Uncorrected chi-squared test statistic for individual strata: .tmp <- suppressWarnings(chisq.test(dat, correct = FALSE)) # phi coefficient used for rho.cc in epi.sscc (Fleiss 2003, p. 98). Added 241021. phi.coef <- sqrt(as.numeric(.tmp$statistic) / total) chi2.strata.uncor <- data.frame(test.statistic = as.numeric(.tmp$statistic), df = 1, p.value.1s = .tmp$p.value / 2, p.value.2s = .tmp$p.value, phi.coef = phi.coef) # Set chi.correction to one if correction to chi2 needed and 0 otherwise: lcfreq <- sum(ifelse(as.vector(.tmp$expected) < 5, 1, 0)) chi2.correction <- ifelse(lcfreq > 0, TRUE, FALSE) # Yates corrected chi-square test for individual strata: .tmp <- suppressWarnings(chisq.test(dat, correct = TRUE)) chi2.strata.yates <- data.frame(test.statistic = as.numeric(.tmp$statistic), df = 1, p.value.1s = .tmp$p.value / 2, p.value.2s = .tmp$p.value) # Fisher's exact test for individual strata: if(sum(total) < 2E09){ .tmp <- suppressWarnings(fisher.test(x = dat, alternative = "two.sided", conf.int = TRUE, conf.level = conf.level, simulate.p.value = FALSE)) chi2.strata.fisher <- data.frame(test.statistic = NA, df = NA, p.value.1s = .tmp$p.value / 2, p.value.2s = .tmp$p.value) } if(sum(total) >= 2E09){ chi2.strata.fisher <- data.frame(test.statistic = NA, df = NA, p.value.1s = NA, p.value.2s = NA) } } # Uncorrected chi-squared test statistic for individual strata: if(length(a) > 1){ # Uncorrected chi-squared test statistic for individual strata: test.statistic <- c(); df <- c(); p.value.1s <- c(); p.value.2s <- c(); lcfreq <- c() phi.coef <- c() for(i in 1:dim(dat)[3]){ .tmp <- suppressWarnings(chisq.test(dat[,,i], correct = FALSE)) test.statistic <- c(test.statistic, as.numeric(.tmp$statistic)) df <- c(df, 1) p.value.1s <- c(p.value.1s, .tmp$p.value / 2) p.value.2s <- c(p.value.2s, .tmp$p.value) lcfreq <- c(lcfreq, sum(ifelse(as.vector(.tmp$expected) < 5, 1, 0))) phi.coef <- c(phi.coef, sqrt(as.numeric(.tmp$statistic) / total[i])) } chi2.strata.uncor <- data.frame(test.statistic, df, p.value.1s, p.value.2s, phi.coef) # Set chi.correction to one if correction to chi2 needed and 0 otherwise: chi2.correction <- ifelse(sum(lcfreq) > 0, TRUE, FALSE) # Yates corrected chi-square test for individual strata: test.statistic <- c(); df <- c(); p.value.1s <- c(); p.value.2s <- c() for(i in 1:dim(dat)[3]){ .tmp <- suppressWarnings(chisq.test(dat[,,i], correct = TRUE)) test.statistic <- c(test.statistic, as.numeric(.tmp$statistic)) df <- c(df, 1) p.value.1s <- c(p.value.1s, .tmp$p.value / 2) p.value.2s <- c(p.value.2s, .tmp$p.value) chi2.strata.yates <- data.frame(test.statistic, df, p.value.1s, p.value.2s) } # Fisher corrected chi-square test for individual strata: test.statistic <- c(); df <- c(); p.value.1s <- c(); p.value.2s <- c() if(sum(total) < 2E09){ for(i in 1:dim(dat)[3]){ .tmp <- suppressWarnings(fisher.test(x = dat[,,i], alternative = "two.sided", conf.int = TRUE, conf.level = conf.level, simulate.p.value = FALSE)) test.statistic <- c(test.statistic, NA) df <- c(df, NA) p.value.1s <- c(p.value.1s, .tmp$p.value / 2) p.value.2s <- c(p.value.2s, .tmp$p.value) } } if(sum(total) >= 2E09){ for(i in 1:dim(dat)[3]){ test.statistic <- c(test.statistic, NA) df <- c(df, NA) p.value.1s <- c(p.value.1s, NA) p.value.2s <- c(p.value.2s, NA) } } chi2.strata.fisher <- data.frame(test.statistic, df, p.value.1s, p.value.2s) # Uncorrected chi-squared test statistic across all strata: chi2.crude.uncor <- suppressWarnings(chisq.test(x = matrix(c(sa, sc, sb, sd), ncol = 2), correct = FALSE)) phi.coef <- sqrt(as.numeric(chi2.crude.uncor$statistic) / stotal) chi2.crude.uncor <- data.frame(test.statistic = as.numeric(chi2.crude.uncor$statistic), df = 1, p.value.1s = chi2.crude.uncor$p.value / 2, p.value.2s = chi2.crude.uncor$p.value, phi.coef = phi.coef) # Yates corrected chi-square test across all strata: chi2.crude.yates <- suppressWarnings(chisq.test(x = matrix(c(sa, sc, sb, sd), ncol = 2), correct = FALSE)) chi2.crude.yates <- data.frame(test.statistic = as.numeric(chi2.crude.yates$statistic), df = 1, p.value.1s = chi2.crude.yates$p.value / 2, p.value.2s = chi2.crude.yates$p.value) # Fisher's exact test across all strata: chi2.crude.fisher <- suppressWarnings(fisher.test(x = matrix(c(sa, sc, sb, sd), ncol = 2), alternative = "two.sided", conf.int = TRUE, conf.level = conf.level, simulate.p.value = FALSE)) chi2.crude.fisher <- data.frame(test.statistic = NA, df = NA, p.value.1s = chi2.crude.fisher$p.value / 2, p.value.2s = chi2.crude.fisher$p.value) # Mantel-Haenszel chi-squared test that combined OR = 1: chi2.mh <- suppressWarnings(mantelhaen.test(x = dat, alternative = "two.sided", correct = FALSE, conf.level = conf.level)) chi2.mh <- data.frame(test.statistic = as.numeric(chi2.mh$statistic), df = 1, p.value.1s = chi2.mh$p.value / 2, p.value.2s = chi2.mh$p.value) # Woolf test of homogeneity of risk ratios (Jewell 2004, page 154). # First work out the Woolf estimate of the adjusted risk ratio (labelled lnRR.s. here) based on Jewell (2004, page 134): # 241118: Removed argument lnRR. <- log((a / (a + b)) / (c / (c + d))) lnRR.var. <- (b / (a * (a + b))) + (d / (c * (c + d))) wRR. <- 1 / lnRR.var. lnRR.s. <- sum(wRR. * lnRR.) / sum(wRR.) # Equation 10.3 from Jewell (2004): wRR.homog <- sum(wRR. * (lnRR. - lnRR.s.)^2) wRR.homog.p <- 1 - pchisq(wRR.homog, df = n.strata - 1) wPR.homog <- sum(wRR. * (lnRR. - lnRR.s.)^2) wPR.homog.p <- 1 - pchisq(wPR.homog, df = n.strata - 1) # Woolf test of homogeneity of odds ratios (Jewell 2004, page 154). First work out the Woolf estimate of the adjusted odds ratio (labelled lnOR.s. here) based on Jewell (2004, page 129): lnOR. <- log(((a + 0.5) * (d + 0.5)) / ((b + 0.5) * (c + 0.5))) lnOR.var. <- (1 / (a + 0.5)) + (1 / (b + 0.5)) + (1 / (c + 0.5)) + (1 / (d + 0.5)) wOR. <- 1 / lnOR.var. lnOR.s. <- sum((wOR. * lnOR.)) / sum(wOR.) # Equation 10.3 from Jewell (2004): wOR.homog <- sum(wOR. * (lnOR. - lnOR.s.)^2) wOR.homog.p <- 1 - pchisq(wOR.homog, df = n.strata - 1) # Breslow-Day test of homogeneity of odds ratio. Setup calculations. From Jim Robison-Cox, based on Jewell (2004, page 154). n11k <- dat[1,1,] n21k <- dat[2,1,] n12k <- dat[1,2,] n22k <- dat[2,2,] row1sums <- n11k + n12k row2sums <- n21k + n22k col1sums <- n11k + n21k Amax <- apply(cbind(row1sums, col1sums), 1, min) # Breslow-Day test of homogeneity of risk ratios. Astar must be no more than col1sums and no more than row1sums: # bb <- row2sums + row1sums * sRR.p - col1sums * (1 - sRR.p) # determ <- sqrt(bb^2 + 4 * (1 - sRR.p) * sRR.p * row1sums * col1sums) # Astar <- (-bb + cbind(-determ, determ)) / (2 - 2 * sRR.p) # Astar <- ifelse(Astar[,1] <= Amax & Astar[,1] >= 0, Astar[,1], Astar[,2]) # print(Astar) # Bstar <- row1sums - Astar # Cstar <- col1sums - Astar # Dstar <- row2sums - col1sums + Astar # Var <- apply(1 / cbind(Astar, Bstar, Cstar, Dstar), 1, sum)^(-1) # print(Var) # # bRR.homog <- sum((dat[1,1,] - Astar)^2 / Var) # bRR.homog.p <- 1 - pchisq(bRR.homog, df = n.strata - 1) ## Breslow-Day test of homogeneity of odds ratios. Astar must be no more than col1sums and no more than row1sums: bb <- row2sums + row1sums * sOR.p - col1sums * (1 - sOR.p) determ <- sqrt(bb^2 + 4 * (1 - sOR.p) * sOR.p * row1sums * col1sums) Astar <- (-bb + cbind(-determ, determ)) / (2 - 2 * sOR.p) Astar <-ifelse(Astar[,1] <= Amax & Astar[,1] >= 0, Astar[,1], Astar[,2]) # print(Astar) Bstar <- row1sums - Astar Cstar <- col1sums - Astar Dstar <- row2sums - col1sums + Astar Var <- apply(1 / cbind(Astar, Bstar, Cstar, Dstar), 1, sum)^(-1) # print(Var) bOR.homog <- sum((dat[1,1,] - Astar)^2 / Var, na.rm = TRUE) bOR.homog.p <- 1 - pchisq(bOR.homog, df = n.strata - 1) } # Test of homogeneity of attributable risks (see Woodward p 207): # AR.homogeneity <- sum(AR.p - AR.s)^2 / SE.AR^2 # Test of effect: # AR.homogeneity.p <- 1 - pchisq(AR.homogeneity, df = n.strata - 1) # AR.homog <- data.frame(test.statistic = AR.homogeneity, df = n.strata - 1, p.value = AR.homogeneity.p) ## =============================== ## RESULTS ## =============================== ## Results are entered into a list: res <- list( ## Strata incidence risk ratio: RR.strata.wald = data.frame(est = wRR.p, lower = wRR.l, upper = wRR.u), RR.strata.taylor = data.frame(est = tRR.p, lower = tRR.l, upper = tRR.u), RR.strata.score = data.frame(est = scRR.p, lower = scRR.l, upper = scRR.u), ## Crude incidence risk ratio: RR.crude.wald = data.frame(est = cwRR.p, lower = cwRR.l, upper = cwRR.u), RR.crude.taylor = data.frame(est = ctRR.p, lower = ctRR.l, upper = ctRR.u), RR.crude.score = data.frame(est = csRR.p, lower = csRR.l, upper = csRR.u), ## Mantel-Haenszel incidence risk ratio: RR.mh.wald = data.frame(est = sRR.p, lower = sRR.l, upper = sRR.u), ## Strata incidence rate ratio: IRR.strata.wald = data.frame(est = IRR.p, lower = IRR.l, upper = IRR.u), ## Crude incidence rate ratio: IRR.crude.wald = data.frame(est = ceIRR.p, lower = ceIRR.l, upper = ceIRR.u), ## Mantel-Haenszel incidence rate ratio: IRR.mh.wald = data.frame(est = sIRR.p, lower = sIRR.l, upper = sIRR.u), ## Strata odds ratio: OR.strata.wald = data.frame(est = wOR.p, lower = wOR.l, upper = wOR.u), OR.strata.cfield = data.frame(est = cfOR.p, lower = cfOR.l, upper = cfOR.u), OR.strata.score = data.frame(est = scOR.p, lower = scOR.l, upper = scOR.u), OR.strata.mle = data.frame(est = mOR.p, lower = mOR.l, upper = mOR.u), ## Crude odds ratio: OR.crude.wald = data.frame(est = cwOR.p, lower = cwOR.l, upper = cwOR.u), OR.crude.cfield = data.frame(est = ccfOR.p, lower = ccfOR.l, upper = ccfOR.u), OR.crude.score = data.frame(est = csOR.p, lower = csOR.l, upper = csOR.u), OR.crude.mle = data.frame(est = cmOR.p, lower = cmOR.l, upper = cmOR.u), ## Mantel-Haenszel odds ratio: OR.mh.wald = data.frame(est = sOR.p, lower = sOR.l, upper = sOR.u), ## Strata attributable risk: ARisk.strata.wald = data.frame(est = wARisk.p, lower = wARisk.l, upper = wARisk.u), ARisk.strata.score = data.frame(est = scARisk.p, lower = scARisk.l, upper = scARisk.u), ## Crude attributable risk: ARisk.crude.wald = data.frame(est = cwARisk.p, lower = cwARisk.l, upper = cwARisk.u), ARisk.crude.score = data.frame(est = cscARisk.p, lower = cscARisk.l, upper = cscARisk.u), ## Mantel-Haenszel attributable risk: ARisk.mh.wald = data.frame(est = sARisk.p, lower = sARisk.l, upper = sARisk.u), ARisk.mh.sato = data.frame(est = SatoARisk.p, lower = SatoARisk.l, upper = SatoARisk.u), ARisk.mh.green = data.frame(est = GRARisk.p, lower = GRARisk.l, upper = GRARisk.u), ## Strata NNTB NNTH: NNT.strata.wald = data.frame(est = wNNT.p, lower = wNNT.l, upper = wNNT.u), NNT.strata.score = data.frame(est = scNNT.p, lower = scNNT.l, upper = scNNT.u), ## Crude NNTB NNTH: NNT.crude.wald = data.frame(est = cwNNT.p, lower = cwNNT.l, upper = cwNNT.u), NNT.crude.score = data.frame(est = cscNNT.p, lower = cscNNT.l, upper = cscNNT.u), ## Mantel-Haenszel NNTB NNTH: NNT.mh.wald = data.frame(est = sNNT.p, lower = sNNT.l, upper = sNNT.u), NNT.mh.sato = data.frame(est = SatoNNT.p, lower = SatoNNT.l, upper = SatoNNT.u), NNT.mh.green = data.frame(est = GRNNT.p, lower = GRNNT.l, upper = GRNNT.u), ## Strata attributable rate: ARate.strata.wald = data.frame(est = ARate.p, lower = ARate.l, upper = ARate.u), ## Crude attributable rate: ARate.crude.wald = data.frame(est = cARate.p, lower = cARate.l, upper = cARate.u), ## Mantel-Haenszel adjusted attributable rate: ARate.mh.wald = data.frame(est = sARate.p, lower = sARate.l, upper = sARate.u), ## Strata attributable fraction for risk data: AFRisk.strata.wald = data.frame(est = AFRisk.p, lower = AFRisk.l, upper = AFRisk.u), ## Crude attributable fraction for risk data: AFRisk.crude.wald = data.frame(est = cAFRisk.p, lower = cAFRisk.l, upper = cAFRisk.u), ## Strata attributable fraction for rate data: AFRate.strata.wald = data.frame(est = AFRate.p, lower = AFRate.l, upper = AFRate.u), ## Crude attributable fraction for rate data: AFRate.crude.wald = data.frame(est = cAFRate.p, lower = cAFRate.l, upper = cAFRate.u), ## Strata estimated attributable fraction: AFest.strata.wald = data.frame(est = AFest.p, lower = AFest.l, upper = AFest.u), ## Crude estimated attributable fraction: AFest.crude.wald = data.frame(est = cAFest.p, lower = cAFest.l, upper = cAFest.u), ## Strata population attributable risk: PARisk.strata.wald = data.frame(est = wPARisk.p, lower = wPARisk.l, upper = wPARisk.u), PARisk.strata.piri = data.frame(est = pPARisk.p, lower = pPARisk.l, upper = pPARisk.u), ## Crude population attributable risk: PARisk.crude.wald = data.frame(est = cwPARisk.p, lower = cwPARisk.l, upper = cwPARisk.u), PARisk.crude.piri = data.frame(est = cpPARisk.p, lower = cpPARisk.l, upper = cpPARisk.u), ## Strata population attributable rate: PARate.strata.wald = data.frame(est = PARate.p, lower = PARate.l, upper = PARate.u), ## Crude population attributable rate: PARate.crude.wald = data.frame(est = cPARate.p, lower = cPARate.l, upper = cPARate.u), ## Strata population attributable fraction for risk data: PAFRisk.strata.wald = data.frame(est = PAFRisk.p, lower = PAFRisk.l, upper = PAFRisk.u), ## Crude population attributable fraction for risk data: PAFRisk.crude.wald = data.frame(est = cPAFRisk.p, lower = cPAFRisk.l, upper = cPAFRisk.u), ## Strata population attributable fraction for rate data: PAFRate.strata.wald = data.frame(est = PAFRate.p, lower = PAFRate.l, upper = PAFRate.u), ## Crude population attributable fraction for rate data: PAFRate.crude.wald = data.frame(est = cPAFRate.p, lower = cPAFRate.l, upper = cPAFRate.u), ## Strata estimated population attributable fraction: PAFest.strata.wald = data.frame(est = PAFest.p, lower = PAFest.l, upper = PAFest.u), ## Crude estimated population attributable fraction: PAFest.crude.wald = data.frame(est = cPAFest.p, lower = cPAFest.l, upper = cPAFest.u), ## Effect of confounding for risk ratio (Woodward p 172): RR.conf = data.frame(est = RR.conf.p, lower = RR.conf.l, upper = RR.conf.u), ## Effect of confounding for rate ratio (Woodward p 172): IRR.conf = data.frame(est = IRR.conf.p, lower = IRR.conf.l, upper = IRR.conf.u), ## Effect of confounding for odds ratio (Woodward p 172): OR.conf = data.frame(est = OR.conf.p, lower = OR.conf.l, upper = OR.conf.u), ## Effect of confounding for attributable risk (Woodward p 172): ARisk.conf = data.frame(est = ARisk.conf.p, lower = ARisk.conf.l, upper = ARisk.conf.u), ## Effect of confounding for attributable rate (Woodward p 172): ARate.conf = data.frame(est = ARate.conf.p, lower = ARate.conf.l, upper = ARate.conf.u), ## Labelling for units: units.count = c(ifelse(units == 1, "Outcomes per population unit", paste("Outcomes per ", units, " population units", sep = "")), ifelse(units == 1, "per population unit", paste("per ", units, " population units", sep = ""))), units.time = c(ifelse(units == 1, "Outcomes per unit of population time at risk", paste("Outcomes per ", units, " units of population time at risk", sep = "")), ifelse(units == 1, "per population time at risk", paste("per ", units, " units of population time at risk", sep = ""))), ## Chi-square tests: chi2.strata.uncor = chi2.strata.uncor, chi2.strata.yates = chi2.strata.yates, chi2.strata.fisher = chi2.strata.fisher, chi2.correction = chi2.correction ) if(n.strata > 1){ res$chi2.crude.uncor = chi2.crude.uncor res$chi2.crude.yates = chi2.crude.yates res$chi2.crude.fisher = chi2.crude.fisher res$chi2.mh = chi2.mh res$wOR.homog = data.frame(test.statistic = wOR.homog, df = n.strata - 1, p.value = wOR.homog.p) res$bOR.homog = data.frame(test.statistic = bOR.homog, df = n.strata - 1, p.value = bOR.homog.p) res$wPR.homog = data.frame(test.statistic = wPR.homog, df = n.strata - 1, p.value = wPR.homog.p) res$wRR.homog = data.frame(test.statistic = wRR.homog, df = n.strata - 1, p.value = wRR.homog.p) } ## Interpretation statements: directn.srr <- ifelse(res$RR.strata.wald[1] < 1, "less", "greater") directn.crr <- ifelse(res$RR.crude.wald[1] < 1, "less", "greater") directn.mrr <- ifelse(res$RR.mh.wald[1] < 1, "less", "greater") directn.sor <- ifelse(res$OR.strata.wald[1] < 1, "less", "greater") directn.cor <- ifelse(res$OR.crude.wald[1] < 1, "less", "greater") directn.mor <- ifelse(res$OR.mh.wald[1] < 1, "less", "greater") directn.sirr <- ifelse(res$IRR.strata.wald[1] < 1, "less", "greater") directn.cirr <- ifelse(res$IRR.crude.wald[1] < 1, "less", "greater") directn.mirr <- ifelse(res$IRR.mh.wald[1] < 1, "less", "greater") ## Cohort count single strata: # RR interpretation: cohort.count.ss.rr = paste("The outcome risk among the exposed was ", round(res$RR.strata.wald[1], digits = 2)," (", conf.level * 100,"% CI ", round(res$RR.strata.wald[2], digits = 2)," to ", round(res$RR.strata.wald[3], digits = 2), ") times ", directn.srr, " than the outcome risk among the unexposed.", sep = "") # OR interpretation: cohort.count.ss.or = paste("The outcome odds among the exposed was ", round(res$OR.strata.wald[1], digits = 2)," (", conf.level * 100,"% CI ", round(res$OR.strata.wald[2], digits = 2)," to ", round(res$OR.strata.wald[3], digits = 2),") times ", directn.sor, " than the outcome odds among the unexposed.", sep = "") # AR interpretation: cohort.count.ss.ar = paste("Exposure changed outcome risk in the exposed by ", round(res$ARisk.strata.wald[1], digits = 2)," (", conf.level * 100,"% CI ", round(res$ARisk.strata.wald[2], digits = 2)," to ", round(res$ARisk.strata.wald[3], digits = 2),") ", res$units.count[2], ".", sep = "") # NNT and NNH --- from Altman (1998): nnss <- NA nnss <- as.numeric(ifelse(res$NNT.strata.wald[2] > 0 & res$NNT.strata.wald[3] > 0, 1, nnss)) nnss <- as.numeric(ifelse(res$NNT.strata.wald[2] < 0 & res$NNT.strata.wald[3] < 0, 2, nnss)) nnss <- as.numeric(ifelse(res$NNT.strata.wald[1] > 0 & res$NNT.strata.wald[2] < 0 & res$NNT.strata.wald[3] > 0, 3, nnss)) nnss <- as.numeric(ifelse(res$NNT.strata.wald[1] < 0 & res$NNT.strata.wald[2] < 0 & res$NNT.strata.wald[3] > 0, 4, nnss)) cohort.count.ss.nnt <- NA cohort.count.ss.nnt[nnss == 1] <- paste("The number needed to treat for one subject to benefit (NNTB) is ", round(abs(res$NNT.strata.wald[1]), digits = ), " (", conf.level * 100,"% CI ", round(abs(res$NNT.strata.wald[2]), digits = 0)," to ", round(abs(res$NNT.strata.wald[3]), digits = 0),").", sep = "") cohort.count.ss.nnt[nnss == 2] <- paste("The number needed to treat for one subject to be harmed (NNTH) is ", round(abs(res$NNT.strata.wald[1]), digits = ), " (", conf.level * 100,"% CI ", round(abs(res$NNT.strata.wald[2]), digits = 0)," to ", round(abs(res$NNT.strata.wald[3]), digits = 0),").", sep = "") cohort.count.ss.nnt[nnss == 3] <- paste("The number needed to treat for one subject to benefit (NNTB) is ", round(abs(res$NNT.strata.wald[1]), digits = ), " (NNTH ", round(abs(res$NNT.strata.wald[2]), digits = 0)," to infinity to NNTB ", round(abs(res$NNT.strata.wald[3]), digits = 0),").", sep = "") cohort.count.ss.nnt[nnss == 4] <- paste("The number needed to treat for one subject to be harmed (NNTH) is ", round(abs(res$NNT.strata.wald[1]), digits = ), " (NNTH ", round(abs(res$NNT.strata.wald[2]), digits = 0)," to infinity to NNTB ", round(abs(res$NNT.strata.wald[3]), digits = 0),").", sep = "") # AF interpretation: cohort.count.ss.af = paste(round(res$AFRisk.strata.wald[1] * 100, digits = 1), "% of outcomes in the exposed were attributable to exposure (", conf.level * 100, "% CI ", round(res$AFRisk.strata.wald[2] * 100, digits = 1), "% to ", round(res$AFRisk.strata.wald[3] * 100, digits = 1), "%).", sep = "") # PAR interpretation: cohort.count.ss.par = paste("Exposure changed outcome risk in the population by ", round(res$PARisk.strata.wald[1], digits = 2)," (", conf.level * 100,"% CI ", round(res$PARisk.strata.wald[2], digits = 2)," to ", round(res$PARisk.strata.wald[3], digits = 2),") ", res$units.count[2], ".", sep = "") # PAF interpretation: cohort.count.ss.paf = paste(round(res$PAFRisk.strata.wald[1] * 100, digits = 1), "% of outcomes in the population were attributable to exposure (", conf.level * 100, "% CI ", round(res$PAFRisk.strata.wald[2] * 100, digits = 1),"% to ", round(res$PAFRisk.strata.wald[3] * 100, digits = 1), "%).", sep = "") # ----------------------------------------------------------------------- ## Cohort count multiple strata: # Crude RR interpretation: cohort.count.ms.crr = paste("If we don't account for confounding the outcome risk among the exposed was ", round(res$RR.crude.wald[1], digits = 2)," (", conf.level * 100,"% CI ", round(res$RR.crude.wald[2], digits = 2)," to ", round(res$RR.crude.wald[3], digits = 2), ") times ", directn.crr, " than the outcome risk among the unexposed.", sep = "") # M-H RR interpretation: cohort.count.ms.mrr = paste("After accounting for confounding the outcome risk among the exposed was ", round(res$RR.mh.wald[1], digits = 2)," (", conf.level * 100,"% CI ", round(res$RR.mh.wald[2], digits = 2)," to ", round(res$RR.mh.wald[3], digits = 2), ") times ", directn.mrr, " than the outcome risk among the unexposed.", sep = "") # Crude OR interpretation: cohort.count.ms.cor = paste("If we don't account for confounding the outcome odds among the exposed was ", round(res$OR.crude.wald[1], digits = 2)," (", conf.level * 100,"% CI ", round(res$OR.crude.wald[2], digits = 2)," to ", round(res$OR.crude.wald[3], digits = 2), ") times ", directn.cor, " than the outcome odds among the unexposed. ", sep = "") # M-H OR interpretation: cohort.count.ms.mor = paste("After accounting for confounding the outcome odds among the exposed was ", round(res$OR.mh.wald[1], digits = 2)," (", conf.level * 100,"% CI ", round(res$OR.mh.wald[2], digits = 2)," to ", round(res$OR.mh.wald[3], digits = 2), ") times ", directn.mor, " than the outcome odds among the unexposed.", sep = "") # Crude AR interpretation: cohort.count.ms.car = paste("If we don't account for confounding exposure changed outcome risk in the exposed by ", round(res$ARisk.crude.wald[1], digits = 2)," (", conf.level * 100,"% CI ", round(res$ARisk.crude.wald[2], digits = 2)," to ", round(res$ARisk.crude.wald[3], digits = 2),") ", res$units.count[2], ".", sep = "") # M-H AR interpretation: cohort.count.ms.mar = paste("After accounting for confounding exposure changed outcome risk in the exposed by ", round(res$ARisk.mh.wald[1], digits = 2)," (", conf.level * 100,"% CI ", round(res$ARisk.mh.wald[2], digits = 2)," to ", round(res$ARisk.mh.wald[3], digits = 2),") ", res$units.count[2], ".", sep = "") # NNTB - NNTH interpretation - multiple strata, crude: nnmsc <- NA nnmsc <- as.numeric(ifelse(res$NNT.crude.wald[2] > 0 & res$NNT.crude.wald[3] > 0, 1, nnmsc)) nnmsc <- as.numeric(ifelse(res$NNT.crude.wald[2] < 0 & res$NNT.crude.wald[3] < 0, 2, nnmsc)) nnmsc <- as.numeric(ifelse(res$NNT.crude.wald[1] > 0 & res$NNT.crude.wald[2] < 0 & res$NNT.crude.wald[3] > 0, 3, nnmsc)) nnmsc <- as.numeric(ifelse(res$NNT.crude.wald[1] < 0 & res$NNT.crude.wald[2] < 0 & res$NNT.crude.wald[3] > 0, 4, nnmsc)) cohort.count.ms.cnnt <- NA cohort.count.ms.cnnt[nnmsc == 1] <- paste("If we don't account for confounding the number needed to treat for one subject to benefit (NNTB) is ", round(abs(res$NNT.crude.wald[1]), digits = ), " (", conf.level * 100,"% CI ", round(abs(res$NNT.crude.wald[2]), digits = 0)," to ", round(abs(res$NNT.crude.wald[3]), digits = 0),").", sep = "") cohort.count.ms.cnnt[nnmsc == 2] <- paste("If we don't account for confounding the number needed to treat for one subject to be harmed (NNTH) is ", round(abs(res$NNT.crude.wald[1]), digits = ), " (", conf.level * 100,"% CI ", round(abs(res$NNT.crude.wald[2]), digits = 0)," to ", round(abs(res$NNT.crude.wald[3]), digits = 0),").", sep = "") cohort.count.ms.cnnt[nnmsc == 3] <- paste("If we don't account for confounding the number needed to treat for one subject to benefit (NNTB) is ", round(abs(res$NNT.crude.wald[1]), digits = ), " (NNTH ", round(abs(res$NNT.crude.wald[2]), digits = 0)," to infinity to NNTB ", round(abs(res$NNT.crude.wald[3]), digits = 0),").", sep = "") cohort.count.ms.cnnt[nnmsc == 4] <- paste("If we don't account for confounding the number needed to treat for one subject to be harmed (NNTH) is ", round(abs(res$NNT.crude.wald[1]), digits = ), " (NNTH ", round(abs(res$NNT.crude.wald[2]), digits = 0)," to infinity to NNTB ", round(abs(res$NNT.crude.wald[3]), digits = 0),").", sep = "") # NNTB - NNTH interpretation - multiple strata, M-H adjusted: nnmsm <- NA nnmsm <- as.numeric(ifelse(res$NNT.mh.wald[2] > 0 & res$NNT.mh.wald[3] > 0, 1, nnmsc)) nnmsm <- as.numeric(ifelse(res$NNT.mh.wald[2] < 0 & res$NNT.mh.wald[3] < 0, 2, nnmsc)) nnmsm <- as.numeric(ifelse(res$NNT.mh.wald[1] > 0 & res$NNT.mh.wald[2] < 0 & res$NNT.mh.wald[3] > 0, 3, nnmsc)) nnmsm <- as.numeric(ifelse(res$NNT.mh.wald[1] < 0 & res$NNT.mh.wald[2] < 0 & res$NNT.mh.wald[3] > 0, 4, nnmsc)) cohort.count.ms.mnnt <- NA cohort.count.ms.mnnt[nnmsc == 1] <- paste("After accounting for confounding the number needed to treat for one subject to benefit (NNTB) is ", round(abs(res$NNT.mh.wald[1]), digits = ), " (", conf.level * 100,"% CI ", round(abs(res$NNT.mh.wald[2]), digits = 0)," to ", round(abs(res$NNT.mh.wald[3]), digits = 0),").", sep = "") cohort.count.ms.mnnt[nnmsc == 2] <- paste("After accounting for confounding the number needed to treat for one subject to be harmed (NNTH) is ", round(abs(res$NNT.mh.wald[1]), digits = ), " (", conf.level * 100,"% CI ", round(abs(res$NNT.mh.wald[2]), digits = 0)," to ", round(abs(res$NNT.mh.wald[3]), digits = 0),").", sep = "") cohort.count.ms.mnnt[nnmsc == 3] <- paste("After accounting for confounding the number needed to treat for one subject to benefit (NNTB) is ", round(abs(res$NNT.mh.wald[1]), digits = ), " (NNTH ", round(abs(res$NNT.mh.wald[2]), digits = 0)," to infinity to NNTB ", round(abs(res$NNT.mh.wald[3]), digits = 0),").", sep = "") cohort.count.ms.mnnt[nnmsc == 4] <- paste("After accounting for confounding the number needed to treat for one subject to be harmed (NNTH) is ", round(abs(res$NNT.mh.wald[1]), digits = ), " (NNTH ", round(abs(res$NNT.mh.wald[2]), digits = 0)," to infinity to NNTB ", round(abs(res$NNT.mh.wald[3]), digits = 0),").", sep = "") # ----------------------------------------------------------------------- ## Cohort time single strata: # RR interpretation: cohort.time.ss.rr = paste("The outcome rate among the exposed was ", round(res$IRR.strata.wald[1], digits = 2)," (", conf.level * 100,"% CI ", round(res$IRR.strata.wald[2], digits = 2)," to ", round(res$IRR.strata.wald[3], digits = 2), ") times ", directn.sirr, " than the outcome rate among the unexposed.", sep = "") # AR interpretation: cohort.time.ss.ar = paste("Exposure changed the outcome rate in the exposed by ", round(res$ARate.crude.wald[1], digits = 2)," (", conf.level * 100,"% CI ", round(res$ARate.crude.wald[2], digits = 2)," to ", round(res$ARate.crude.wald[3], digits = 2),") ", res$units.time[2], ".", sep = "") # AF interpretation: cohort.time.ss.af = paste(round(res$AFRate.crude.wald[1] * 100, digits = 1), "% of outcomes in the exposed were attributable to exposure (", conf.level * 100, "% CI ", round(res$AFRate.crude.wald[2] * 100, digits = 1), "% to ", round(res$AFRate.crude.wald[3] * 100, digits = 1), "%).", sep = "") # PAR interpretation: cohort.time.ss.par = paste("Exposure changed the outcome rate in the population by ", round(res$PARate.crude.wald[1], digits = 2)," (", conf.level * 100,"% CI ", round(res$PARate.crude.wald[2], digits = 2)," to ", round(res$PARate.crude.wald[3], digits = 2),") ", res$units.time[2], ".", sep = "") # PAF interpretation: cohort.time.ss.paf = paste(round(res$PAFRate.crude.wald[1] * 100, digits = 1), "% of outcomes in the population were attributable to exposure (", conf.level * 100, "% CI ", round(res$PAFRate.crude.wald[2] * 100, digits = 1),"% to ", round(res$PAFRate.crude.wald[3] * 100, digits = 1), "%).", sep = "") # ----------------------------------------------------------------------- ## Cohort time multiple strata: # Crude RR interpretation: cohort.time.ms.crr = paste("If we don't account for confounding the outcome rate among the exposed was ", round(res$IRR.crude.wald[1], digits = 2)," (", conf.level * 100,"% CI ", round(res$IRR.crude.wald[2], digits = 2)," to ", round(res$RR.crude.wald[3], digits = 2), ") times ", directn.cirr, " than the outcome rate among the unexposed. ", sep = "") # M-H RR interpretation: cohort.time.ms.mrr = paste("After accounting for confounding the outcome rate among the exposed was ", round(res$IRR.mh.wald[1], digits = 2)," (", conf.level * 100,"% CI ", round(res$IRR.mh.wald[2], digits = 2)," to ", round(res$RR.mh.wald[3], digits = 2), ") times ", directn.mirr, " than the outcome rate among the unexposed.", sep = "") # Crude AR interpretation: cohort.time.ms.car = paste("If we don't account for confounding exposure changed the outcome rate in the exposed by ", round(res$ARate.crude.wald[1], digits = 2)," (", conf.level * 100,"% CI ", round(res$ARate.crude.wald[2], digits = 2)," to ", round(res$ARate.crude.wald[3], digits = 2),") ", res$units.time[2], ". ", sep = "") # M-H AR interpretation: cohort.time.ms.mar = paste("After accounting for confounding exposure changed the outcome rate in the exposed by ", round(res$ARate.crude.wald[1], digits = 2)," (", conf.level * 100,"% CI ", round(res$ARate.crude.wald[2], digits = 2)," to ", round(res$ARisk.mh.wald[3], digits = 2),") ", res$units.time[2], ".", sep = "") # ----------------------------------------------------------------------- ## Case control single strata: # OR interpretation: case.control.ss.or = paste("The exposure odds among cases was ", round(res$OR.strata.wald[1], digits = 2)," (", conf.level * 100,"% CI ", round(res$OR.strata.wald[2], digits = 2)," to ", round(res$OR.strata.wald[3], digits = 2),") times ", directn.sor, " than exposure odds among controls.", sep = "") # AF interpretation: case.control.ss.af = paste(round(res$AFest.strata.wald[1] * 100, digits = 1), "% of outcomes in the exposed were attributable to exposure (", conf.level * 100, "% CI ", round(res$AFest.strata.wald[2] * 100, digits = 1), "% to ", round(res$AFest.strata.wald[3] * 100, digits = 1), "%).", sep = "") # PAF interpretation: case.control.ss.paf = paste(round(res$PAFest.strata.wald[1] * 100, digits = 1), "% of outcomes in the population were attributable to exposure (", conf.level * 100, "% CI ", round(res$PAFest.strata.wald[2] * 100, digits = 1),"% to ", round(res$PAFest.strata.wald[3] * 100, digits = 1), "%).", sep = "") # ----------------------------------------------------------------------- ## Case control multiple strata: # Crude OR interpretation: case.control.ms.cor = paste("If we don't account for confounding exposure odds among cases was ", round(res$OR.crude.wald[1], digits = 2)," (", conf.level * 100,"% CI ", round(res$OR.crude.wald[2], digits = 2)," to ", round(res$OR.crude.wald[3], digits = 2), ") times ", directn.cor, " than the exposure odds among controls.", sep = "") # M-H OR interpretation: case.control.ms.mor = paste("After accounting for confounding exposure odds among cases was ", round(res$OR.mh.wald[1], digits = 2)," (", conf.level * 100,"% CI ", round(res$OR.mh.wald[2], digits = 2)," to ", round(res$OR.mh.wald[3], digits = 2), ") times ", directn.mor, " than the exposure odds among controls.", sep = "") # AF interpretation: case.control.ms.caf = paste(round(res$AFest.crude.wald[1] * 100, digits = 1), "% of outcomes in the exposed were attributable to exposure (", conf.level * 100, "% CI ", round(res$AFest.crude.wald[2] * 100, digits = 1), "% to ", round(res$AFest.crude.wald[3] * 100, digits = 1), "%).", sep = "") # PAF interpretation: case.control.ms.cpaf = paste(round(res$PAFest.crude.wald[1] * 100, digits = 1), "% of outcomes in the population were attributable to exposure (", conf.level * 100, "% CI ", round(res$PAFest.crude.wald[2] * 100, digits = 1), "% to ", round(res$PAFest.crude.wald[3] * 100, digits = 1), "%).", sep = "") # ----------------------------------------------------------------------- ## Cross sectional single strata: # RR interpretation: cross.sectional.ss.rr = paste("The outcome prevalence among the exposed was ", round(res$RR.strata.wald[1], digits = 2)," (", conf.level * 100,"% CI ", round(res$RR.strata.wald[2], digits = 2)," to ", round(res$RR.strata.wald[3], digits = 2), ") times ", directn.srr, " than the outcome prevalence among the unexposed.", sep = "") # OR interpretation: cross.sectional.ss.or = paste("The outcome odds among the exposed was ", round(res$OR.strata.wald[1], digits = 2)," (", conf.level * 100,"% CI ", round(res$OR.strata.wald[2], digits = 2)," to ", round(res$OR.strata.wald[3], digits = 2), ") times ", directn.srr, " than the outcome odds among the unexposed.", sep = "") # AR interpretation: cross.sectional.ss.ar = paste("Exposure changed the outcome prevalence in the exposed by ", round(res$ARate.strata.wald[1], digits = 2)," (", conf.level * 100,"% CI ", round(res$ARate.strata.wald[2], digits = 2)," to ", round(res$ARate.strata.wald[3], digits = 2),") ", res$units.count[2], ".", sep = "") # NNT and NNH --- from Altman (1998): nnss <- NA nnss <- as.numeric(ifelse(res$NNT.strata.wald[2] > 0 & res$NNT.strata.wald[3] > 0, 1, nnss)) nnss <- as.numeric(ifelse(res$NNT.strata.wald[2] < 0 & res$NNT.strata.wald[3] < 0, 2, nnss)) nnss <- as.numeric(ifelse(res$NNT.strata.wald[1] > 0 & res$NNT.strata.wald[2] < 0 & res$NNT.strata.wald[3] > 0, 3, nnss)) nnss <- as.numeric(ifelse(res$NNT.strata.wald[1] < 0 & res$NNT.strata.wald[2] < 0 & res$NNT.strata.wald[3] > 0, 4, nnss)) cross.sectional.ss.nnt <- NA cross.sectional.ss.nnt[nnss == 1] <- paste("The number needed to treat for one subject to benefit (NNTB) is ", round(abs(res$NNT.strata.wald[1]), digits = ), " (", conf.level * 100,"% CI ", round(abs(res$NNT.strata.wald[2]), digits = 0)," to ", round(abs(res$NNT.strata.wald[3]), digits = 0),").", sep = "") cross.sectional.ss.nnt[nnss == 2] <- paste("The number needed to treat for one subject to be harmed (NNTH) is ", round(abs(res$NNT.strata.wald[1]), digits = ), " (", conf.level * 100,"% CI ", round(abs(res$NNT.strata.wald[2]), digits = 0)," to ", round(abs(res$NNT.strata.wald[3]), digits = 0),").", sep = "") cross.sectional.ss.nnt[nnss == 3] <- paste("The number needed to treat for one subject to benefit (NNTB) is ", round(abs(res$NNT.strata.wald[1]), digits = ), " (NNTH ", round(abs(res$NNT.strata.wald[2]), digits = 0)," to infinity to NNTB ", round(abs(res$NNT.strata.wald[3]), digits = 0),").", sep = "") cross.sectional.ss.nnt[nnss == 4] <- paste("The number needed to treat for one subject to be harmed (NNTH) is ", round(abs(res$NNT.strata.wald[1]), digits = ), " (NNTH ", round(abs(res$NNT.strata.wald[2]), digits = 0)," to infinity to NNTB ", round(abs(res$NNT.strata.wald[3]), digits = 0),").", sep = "") # AF interpretation: cross.sectional.ss.af = paste(round(res$AFRate.strata.wald[1] * 100, digits = 1), "% of outcomes in the exposed were attributable to exposure (", conf.level * 100, "% CI ", round(res$AFRate.strata.wald[2] * 100, digits = 1), "% to ", round(res$AFRate.strata.wald[3] * 100, digits = 1), "%).", sep = "") # PAR interpretation: cross.sectional.ss.par = paste("Exposure changed the outcome prevalence in the population by ", round(res$PARate.strata.wald[1], digits = 2)," (", conf.level * 100,"% CI ", round(res$PARate.strata.wald[2], digits = 2)," to ", round(res$PARate.strata.wald[3], digits = 2),") ", res$units.count[2], ".", sep = "") # PAF interpretation: cross.sectional.ss.paf = paste(round(res$PAFRate.strata.wald[1] * 100, digits = 1), "% of outcomes in the population were attributable to exposure (", conf.level * 100, "% CI ", round(res$PAFRate.strata.wald[2] * 100, digits = 1),"% to ", round(res$PAFRate.strata.wald[3] * 100, digits = 1), "%).", sep = "") # ----------------------------------------------------------------------- ## Cross sectional multiple strata: # Crude RR interpretation: cross.sectional.ms.crr = paste("If we don't account for confounding the outcome prevalence among the exposed was ", round(res$RR.crude.wald[1], digits = 2)," (", conf.level * 100,"% CI ", round(res$RR.crude.wald[2], digits = 2)," to ", round(res$RR.crude.wald[3], digits = 2), ") times ", directn.crr, " than the outcome prevalence among the unexposed.", sep = "") # M-H RR interpretation: cross.sectional.ms.mrr = paste("After accounting for confounding outcome prevalence among the exposed was ", round(res$RR.mh.wald[1], digits = 2)," (", conf.level * 100,"% CI ", round(res$RR.mh.wald[2], digits = 2)," to ", round(res$RR.mh.wald[3], digits = 2), ") times ", directn.mrr, " than the outcome prevalence among the unexposed.", sep = "") # Crude OR interpretation: cross.sectional.ms.cor = paste("If we don't account for confounding the outcome odds among the exposed was ", round(res$OR.crude.wald[1], digits = 2)," (", conf.level * 100,"% CI ", round(res$OR.crude.wald[2], digits = 2)," to ", round(res$OR.crude.wald[3], digits = 2), ") times ", directn.cor, " than the outcome prevalence among the unexposed.", sep = "") # M-H OR interpretation: cross.sectional.ms.mor = paste("After accounting for confounding the outcome odds among the exposed was ", round(res$OR.mh.wald[1], digits = 2)," (", conf.level * 100,"% CI ", round(res$OR.mh.wald[2], digits = 2)," to ", round(res$OR.mh.wald[3], digits = 2), ") times ", directn.mor, " than the outcome odds among the unexposed.", sep = "") # Crude AR interpretation: cross.sectional.ms.car = paste("If we don't account for confounding exposure changed the outcome prevalence in the exposed by ", round(res$ARisk.crude.wald[1], digits = 2)," (", conf.level * 100,"% CI ", round(res$ARisk.crude.wald[2], digits = 2)," to ", round(res$ARisk.crude.wald[3], digits = 2),") ", res$units.time[2], ".", sep = "") # M-H AR interpretation: cross.sectional.ms.mar = paste("After accounting for confounding exposure changed the outcome prevalence in the exposed by ", round(res$ARisk.mh.wald[1], digits = 2)," (", conf.level * 100,"% CI ", round(res$ARisk.mh.wald[2], digits = 2)," to ", round(res$ARisk.mh.wald[3], digits = 2),") ", res$units.count[2], ".", sep = "") # NNTB - NNTH - multiple strata, crude: nnmsc <- NA nnmsc <- as.numeric(ifelse(res$NNT.crude.wald[2] > 0 & res$NNT.crude.wald[3] > 0, 1, nnmsc)) nnmsc <- as.numeric(ifelse(res$NNT.crude.wald[2] < 0 & res$NNT.crude.wald[3] < 0, 2, nnmsc)) nnmsc <- as.numeric(ifelse(res$NNT.crude.wald[1] > 0 & res$NNT.crude.wald[2] < 0 & res$NNT.crude.wald[3] > 0, 3, nnmsc)) nnmsc <- as.numeric(ifelse(res$NNT.crude.wald[1] < 0 & res$NNT.crude.wald[2] < 0 & res$NNT.crude.wald[3] > 0, 4, nnmsc)) cross.sectional.ms.cnnt <- NA cross.sectional.ms.cnnt[nnmsc == 1] <- paste("If we don't account for confounding the number needed to treat for one subject to benefit (NNTB) is ", round(abs(res$NNT.crude.wald[1]), digits = ), " (", conf.level * 100,"% CI ", round(abs(res$NNT.crude.wald[2]), digits = 0)," to ", round(abs(res$NNT.crude.wald[3]), digits = 0),").", sep = "") cross.sectional.ms.cnnt[nnmsc == 2] <- paste("If we don't account for confounding the number needed to treat for one subject to be harmed (NNTH) is ", round(abs(res$NNT.crude.wald[1]), digits = ), " (", conf.level * 100,"% CI ", round(abs(res$NNT.crude.wald[2]), digits = 0)," to ", round(abs(res$NNT.crude.wald[3]), digits = 0),").", sep = "") cross.sectional.ms.cnnt[nnmsc == 3] <- paste("If we don't account for confounding the number needed to treat for one subject to benefit (NNTB) is ", round(abs(res$NNT.crude.wald[1]), digits = ), " (NNTH ", round(abs(res$NNT.crude.wald[2]), digits = 0)," to infinity to NNTB ", round(abs(res$NNT.crude.wald[3]), digits = 0),").", sep = "") cross.sectional.ms.cnnt[nnmsc == 4] <- paste("If we don't account for confounding the number needed to treat for one subject to be harmed (NNTH) is ", round(abs(res$NNT.crude.wald[1]), digits = ), " (NNTH ", round(abs(res$NNT.crude.wald[2]), digits = 0)," to infinity to NNTB ", round(abs(res$NNT.crude.wald[3]), digits = 0),").", sep = "") # NNTB - NNTH - multiple strata, Mentel-Haenszel: nnmsm <- NA nnmsm <- as.numeric(ifelse(res$NNT.mh.wald[2] > 0 & res$NNT.mh.wald[3] > 0, 1, nnmsc)) nnmsm <- as.numeric(ifelse(res$NNT.mh.wald[2] < 0 & res$NNT.mh.wald[3] < 0, 2, nnmsc)) nnmsm <- as.numeric(ifelse(res$NNT.mh.wald[1] > 0 & res$NNT.mh.wald[2] < 0 & res$NNT.mh.wald[3] > 0, 3, nnmsc)) nnmsm <- as.numeric(ifelse(res$NNT.mh.wald[1] < 0 & res$NNT.mh.wald[2] < 0 & res$NNT.mh.wald[3] > 0, 4, nnmsc)) cross.sectional.ms.mnnt <- NA cross.sectional.ms.mnnt[nnmsm == 1] <- paste("After accounting for confounding the number needed to treat for one subject to benefit (NNTB) is ", round(abs(res$NNT.mh.wald[1]), digits = ), " (", conf.level * 100,"% CI ", round(abs(res$NNT.mh.wald[2]), digits = 0)," to ", round(abs(res$NNT.mh.wald[3]), digits = 0),").", sep = "") cross.sectional.ms.mnnt[nnmsm == 2] <- paste("After accounting for confounding the number needed to treat for one subject to be harmed (NNTH) is ", round(abs(res$NNT.mh.wald[1]), digits = ), " (", conf.level * 100,"% CI ", round(abs(res$NNT.mh.wald[2]), digits = 0)," to ", round(abs(res$NNT.mh.wald[3]), digits = 0),").", sep = "") cross.sectional.ms.mnnt[nnmsm == 3] <- paste("After accounting for confounding the number needed to treat for one subject to benefit (NNTB) is ", round(abs(res$NNT.mh.wald[1]), digits = ), " (NNTH ", round(abs(res$NNT.mh.wald[2]), digits = 0)," to infinity to NNTB ", round(abs(res$NNT.mh.wald[3]), digits = 0),").", sep = "") cross.sectional.ms.mnnt[nnmsm == 4] <- paste("After accounting for confounding the number needed to treat for one subject to be harmed (NNTH) is ", round(abs(res$NNT.mh.wald[1]), digits = ), " (NNTH ", round(abs(res$NNT.mh.wald[2]), digits = 0)," to infinity to NNTB ", round(abs(res$NNT.mh.wald[3]), digits = 0),").", sep = "") # Compile all interpretative statements into a list: interp.txt <- list( cohort.count.ss.rr = cohort.count.ss.rr, cohort.count.ss.or = cohort.count.ss.or, cohort.count.ss.ar = cohort.count.ss.ar, cohort.count.ss.nnt = cohort.count.ss.nnt, cohort.count.ss.af = cohort.count.ss.af, cohort.count.ss.par = cohort.count.ss.par, cohort.count.ss.paf = cohort.count.ss.paf, cohort.count.ms.crr = cohort.count.ms.crr, cohort.count.ms.mrr = cohort.count.ms.mrr, cohort.count.ms.cor = cohort.count.ms.cor, cohort.count.ms.mor = cohort.count.ms.mor, cohort.count.ms.car = cohort.count.ms.car, cohort.count.ms.mar = cohort.count.ms.mar, cohort.count.ms.cnnt = cohort.count.ms.cnnt, cohort.count.ms.mnnt = cohort.count.ms.mnnt, cohort.time.ss.rr = cohort.time.ss.rr, cohort.time.ss.ar = cohort.time.ss.ar, cohort.time.ss.af = cohort.time.ss.af, cohort.time.ss.par = cohort.time.ss.par, cohort.time.ss.paf = cohort.time.ss.paf, cohort.time.ms.crr = cohort.time.ms.crr, cohort.time.ms.mrr = cohort.time.ms.mrr, cohort.time.ms.car = cohort.time.ms.car, cohort.time.ms.mar = cohort.time.ms.mar, case.control.ss.or = case.control.ss.or, case.control.ss.af = case.control.ss.af, case.control.ss.paf = case.control.ss.paf, case.control.ms.cor = case.control.ms.cor, case.control.ms.mor = case.control.ms.mor, case.control.ms.caf = case.control.ms.caf, case.control.ms.cpaf = case.control.ms.cpaf, cross.sectional.ss.rr = cross.sectional.ss.rr, cross.sectional.ss.or = cross.sectional.ss.or, cross.sectional.ss.ar = cross.sectional.ss.ar, cross.sectional.ss.nnt = cross.sectional.ss.nnt, cross.sectional.ss.af = cross.sectional.ss.af, cross.sectional.ss.par = cross.sectional.ss.par, cross.sectional.ss.paf = cross.sectional.ss.paf, cross.sectional.ms.crr = cross.sectional.ms.crr, cross.sectional.ms.mrr = cross.sectional.ms.mrr, cross.sectional.ms.cor = cross.sectional.ms.cor, cross.sectional.ms.mor = cross.sectional.ms.mor, cross.sectional.ms.car = cross.sectional.ms.car, cross.sectional.ms.mar = cross.sectional.ms.mar) ## =============================== ## REPORTING ## =============================== ## method = "cohort.count", single strata: if(method == "cohort.count" & n.strata == 1){ ## Verbose part: massoc.detail <- list( RR.strata.wald = res$RR.strata.wald, RR.strata.taylor = res$RR.strata.taylor, RR.strata.score = res$RR.strata.score, OR.strata.wald = res$OR.strata.wald, OR.strata.cfield = res$OR.strata.cfield, OR.strata.score = res$OR.strata.score, OR.strata.mle = res$OR.strata.mle, ARisk.strata.wald = res$ARisk.strata.wald, ARisk.strata.score = res$ARisk.strata.score, NNT.strata.wald = res$NNT.strata.wald, NNT.strata.score = res$NNT.strata.score, AFRisk.strata.wald = res$AFRisk.strata.wald, PARisk.strata.wald = res$PARisk.strata.wald, PARisk.strata.piri = res$PARisk.strata.piri, PAFRisk.strata.wald= res$PAFRisk.strata.wald, chi2.strata.uncor = res$chi2.strata.uncor, chi2.strata.yates = res$chi2.strata.yates, chi2.strata.fisher = res$chi2.strata.fisher, chi2.correction = res$chi2.correction) massoc.summary <- data.frame( var = c("Inc risk ratio", "Odds ratio", "Attrib risk *", "Attrib fraction in exposed (%)", "Attrib risk in population *", "Attrib fraction in population (%)"), est = as.numeric(c(res$RR.strata.wald[1], res$OR.strata.wald[1], res$ARisk.strata.wald[1], res$AFRisk.strata.wald[1] * 100, res$PARisk.strata.wald[1], res$PAFRisk.strata.wald[1] * 100)), lower = as.numeric(c(res$RR.strata.wald[2], res$OR.strata.wald[2], res$ARisk.strata.wald[2], res$AFRisk.strata.wald[2] * 100, res$PARisk.strata.wald[2], res$PAFRisk.strata.wald[2] * 100)), upper = as.numeric(c(res$RR.strata.wald[3], res$OR.strata.wald[3], res$ARisk.strata.wald[3], res$AFRisk.strata.wald[3] * 100, res$PARisk.strata.wald[3], res$PAFRisk.strata.wald[3] * 100))) massoc.interp <- data.frame( var = c("Inc risk ratio", "Odds ratio", "Attrib risk *", "NNTB NNTH", "Attrib fraction in exposed (%)", "Attrib risk in population *", "Attrib fraction in population (%)"), text = c(interp.txt$cohort.count.ss.rr, interp.txt$cohort.count.ss.or, interp.txt$cohort.count.ss.ar, interp.txt$cohort.count.ss.nnt, interp.txt$cohort.count.ss.af, interp.txt$cohort.count.ss.par, interp.txt$cohort.count.ss.paf)) ## Define tab: if(outcome == "as.columns"){ r1 <- c(a, b, N1, cIRiske.p, cOe.p) r2 <- c(c, d, N0, cIRisko.p, cOo.p) r3 <- c(M1, M0, M0 + M1, cIRiskpop.p, cOpop.p) tab <- as.data.frame(rbind(r1, r2, r3)) colnames(tab) <- c(" Outcome +", " Outcome -", " Total", " Inc risk *", " Odds") rownames(tab) <- c("Exposed +", "Exposed -", "Total") tab <- format.data.frame(tab, digits = 3, justify = "right") } if(outcome == "as.rows"){ ## Non verbose part - define tab: r1 <- c(a, c, M1) r2 <- c(b, d, M0) r3 <- c(N1, N0, N0 + N1) tab <- as.data.frame(rbind(r1, r2, r3)) colnames(tab) <- c(" Exposed +", " Exposed -", " Total") rownames(tab) <- c("Outcome +", "Outcome -", "Total") tab <- format.data.frame(tab, digits = 3, justify = "right") } ## Output creation part: out <- list(method = "cohort.count", n.strata = n.strata, digits = digits, conf.level = conf.level, interp = interpret, units = res$units.count, tab = tab, massoc.summary = massoc.summary, massoc.interp = massoc.interp, massoc.detail = massoc.detail) } ## method == "cohort.count", multiple strata: if(method == "cohort.count" & n.strata > 1){ ## Verbose part: massoc.detail <- list( RR.strata.wald = res$RR.strata.wald, RR.strata.taylor = res$RR.strata.taylor, RR.strata.score = res$RR.strata.score, RR.crude.wald = res$RR.crude.wald, RR.crude.taylor = res$RR.crude.taylor, RR.crude.score = res$RR.crude.score, RR.mh.wald = res$RR.mh.wald, OR.strata.wald = res$OR.strata.wald, OR.strata.cfield = res$OR.strata.cfield, OR.strata.score = res$OR.strata.score, OR.strata.mle = res$OR.strata.mle, OR.crude.wald = res$OR.crude.wald, OR.crude.score = res$OR.crude.score, OR.crude.cfield = res$OR.crude.cfield, OR.crude.mle = res$OR.crude.mle, OR.mh.wald = res$OR.mh.wald, ARisk.strata.wald = res$ARisk.strata.wald, ARisk.strata.score = res$ARisk.strata.score, ARisk.crude.wald = res$ARisk.crude.wald, ARisk.crude.score = res$ARisk.crude.score, ARisk.mh.wald = res$ARisk.mh.wald, ARisk.mh.sato = res$ARisk.mh.sato, ARisk.mh.green = res$ARisk.mh.green, NNT.strata.wald = res$NNT.strata.wald, NNT.strata.score = res$NNT.strata.score, NNT.crude.wald = res$NNT.crude.wald, NNT.crude.score = res$NNT.crude.score, NNT.mh.wald = res$NNT.mh.wald, NNT.mh.sato = res$NNT.mh.sato, NNT.mh.green = res$NNT.mh.green, PARisk.strata.wald = res$PARisk.strata.wald, PARisk.strata.piri = res$PARisk.strata.piri, PARisk.crude.wald = res$PARisk.crude.wald, PARisk.crude.piri = res$PARisk.crude.piri, AFRisk.strata.wald = res$AFRisk.strata.wald, AFRisk.crude.wald = res$AFRisk.crude.wald, PAFRisk.strata.wald= res$PAFRisk.strata.wald, PAFRisk.crude.wald = res$PAFRisk.crude.wald, chi2.strata.uncor = res$chi2.strata.uncor, chi2.strata.yates = res$chi2.strata.yates, chi2.strata.fisher = res$chi2.strata.fisher, chi2.correction = res$chi2.correction, chi2.crude.uncor = res$chi2.crude.uncor, chi2.crude.yates = res$chi2.crude.yates, chi2.crude.fisher = res$chi2.crude.fisher, chi2.mh = res$chi2.mh, wRR.homog = res$wRR.homog, wOR.homog = res$wOR.homog, bOR.homog = res$bOR.homog) massoc.summary <- data.frame( var = c("Inc risk ratio (crude)", "Inc risk ratio (M-H)", "Inc risk ratio (crude:M-H)", "Odds ratio (crude)", "Odds ratio (M-H)", "Odds ratio (crude:M-H)", "Attrib risk (crude) *", "Attrib risk (M-H) *", "Attrib risk (crude:M-H)"), est = as.numeric(c(res$RR.crude.wald[1], res$RR.mh.wald[1], res$RR.crude.wald[1] / res$RR.mh.wald[1], res$OR.crude.wald[1], res$OR.mh.wald[1], res$OR.crude.wald[1] / res$OR.mh.wald[1], res$ARisk.crude.wald[1], res$ARisk.mh.wald[1], res$ARisk.crude.wald[1] / res$ARisk.mh.wald[1])), lower = as.numeric(c(res$RR.crude.wald[2], res$RR.mh.wald[2], NA, res$OR.crude.wald[2], res$OR.mh.wald[2], NA, res$ARisk.crude.wald[2], res$ARisk.mh.wald[2], NA)), upper = as.numeric(c(res$RR.crude.wald[3], res$RR.mh.wald[3], NA, res$OR.crude.wald[3], res$OR.mh.wald[3], NA, res$ARisk.crude.wald[3], res$ARisk.mh.wald[3], NA))) massoc.interp <- data.frame( var = c("Inc risk ratio (crude)", "Inc risk ratio (M-H)", "Inc risk ratio (crude:M-H)", "Odds ratio (crude)", "Odds ratio (M-H)", "Odds ratio (crude:M-H)", "Attrib risk (crude) *", "Attrib risk (M-H) *", "Attrib risk (crude:M-H)", "NNTB NNTH (crude)", "NNTB NNTH (M-H)"), text = c(interp.txt$cohort.count.ms.crr, interp.txt$cohort.count.ms.mrr, NA, interp.txt$cohort.count.ms.cor, interp.txt$cohort.count.ms.mor, NA, interp.txt$cohort.count.ms.car, interp.txt$cohort.count.ms.mar, NA, interp.txt$cohort.count.ms.cnnt, interp.txt$cohort.count.ms.mnnt)) ## Define tab: if(outcome == "as.columns"){ r1 <- c(sa, sb, sN1, cIRiske.p, cOe.p) r2 <- c(sc, sd, sN0, cIRisko.p, cOo.p) r3 <- c(sM1, sM0, sM0 + sM1, cIRiskpop.p, cOpop.p) tab <- as.data.frame(rbind(r1, r2, r3)) colnames(tab) <- c(" Outcome +", " Outcome -", " Total", " Inc risk *", " Odds") rownames(tab) <- c("Exposed +", "Exposed -", "Total") tab <- format.data.frame(tab, digits = 3, justify = "right") } if(outcome == "as.rows"){ ## Non verbose part - define tab: r1 <- c(sa, sc, sM1) r2 <- c(sb, sd, sM0) r3 <- c(sN1, sN0, sN0 + sN1) tab <- as.data.frame(rbind(r1, r2, r3)) colnames(tab) <- c(" Exposed +", " Exposed -", " Total") rownames(tab) <- c("Outcome +", "Outcome -", "Total") tab <- format.data.frame(tab, digits = 3, justify = "right") } ## Output creation part: out <- list(method = "cohort.count", n.strata = n.strata, digits = digits, conf.level = conf.level, interp = interpret, units = res$units.count, tab = tab, massoc.summary = massoc.summary, massoc.interp = massoc.interp, massoc.detail = massoc.detail) } ## method = "cohort.time", single strata: if(method == "cohort.time" & n.strata == 1){ ## Verbose part: massoc.detail <- list( IRR.strata.wald = res$IRR.strata.wald, ARate.strata.wald = res$ARate.strata.wald, PARate.strata.wald = res$PARate.strata.wald, AFRate.strata.wald = res$AFRate.strata.wald, PAFRate.strata.wald = res$PAFRate.strata.wald, chi2.strata.uncor = res$chi2.strata.uncor, chi2.strata.yates = res$chi2.strata.yates, chi2.strata.fisher = res$chi2.strata.fisher, chi2.correction = res$chi2.correction) massoc.summary <- data.frame( var = c("Inc rate ratio", "Attrib rate *", "Attrib rate in population *", "Attrib fraction in exposed (%)", "Attrib fraction in population (%)"), est = as.numeric(c(res$IRR.strata.wald[1], res$ARate.strata.wald[1], res$PARate.strata.wald[1], res$AFRate.strata.wald[1] * 100, res$PAFRate.strata.wald[1] * 100)), lower = as.numeric(c(res$IRR.strata.wald[2], res$ARate.strata.wald[2], res$PARate.strata.wald[2], res$AFRate.strata.wald[2] * 100, res$PAFRate.strata.wald[2] * 100)), upper = as.numeric(c(res$IRR.strata.wald[3], res$ARate.strata.wald[3], res$PARate.strata.wald[3], res$AFRate.strata.wald[3] * 100, res$PAFRate.strata.wald[3] * 100))) massoc.interp <- data.frame( var = c("Inc rate ratio", "Attrib rate *", "Attrib rate in population *", "Attrib fraction in exposed (%)", "Attrib fraction in population (%)"), text = c(interp.txt$cohort.time.ss.rr, interp.txt$cohort.time.ss.ar, interp.txt$cohort.time.ss.af, interp.txt$cohort.time.ss.par, interp.txt$cohort.time.ss.paf)) ## Define tab: if(outcome == "as.columns"){ r1 <- c(a, b, cIRatee.p) r2 <- c(c, d, cIRateo.p) r3 <- c(M1, M0, cIRatepop.p) tab <- as.data.frame(rbind(r1, r2, r3)) colnames(tab) <- c(" Outcome +", " Time at risk", " Inc rate *") rownames(tab) <- c("Exposed +", "Exposed -", "Total") tab <- format.data.frame(tab, digits = 3, justify = "right") } if(outcome == "as.rows"){ ## Non verbose part - define tab: r1 <- c(a, c, M1) r2 <- c(b, d, M0) r3 <- c(N1, N0, N0 + N1) tab <- as.data.frame(rbind(r1, r2, r3)) colnames(tab) <- c(" Exposed +", " Exposed -", " Total") rownames(tab) <- c("Outcome +", "Time at risk", "Total") tab <- format.data.frame(tab, digits = 3, justify = "right") } ## Output creation part: out <- list(method = "cohort.time", n.strata = n.strata, digits = digits, conf.level = conf.level, interp = interpret, units = res$units.time, tab = tab, massoc.summary = massoc.summary, massoc.interp = massoc.interp, massoc.detail = massoc.detail) } ## method = "cohort.time", multiple strata: if(method == "cohort.time" & n.strata > 1){ ## Verbose part: massoc.detail <- list( IRR.strata.wald = res$IRR.strata.wald, IRR.crude.wald = res$IRR.crude.wald, IRR.mh.wald = res$IRR.mh.wald, ARate.strata.wald = res$ARate.strata.wald, ARate.crude.wald = res$ARate.crude.wald, ARate.mh.wald = res$ARate.mh.wald, PARate.strata.wald = res$PARate.strata.wald, PARate.crude.wald = res$PARate.crude.wald, AFRate.strata.wald = res$AFRate.strata.wald, AFRate.crude.wald = res$AFRate.crude.wald, PAFRate.strata.wald = res$PAFRate.strata.wald, PAFRate.crude.wald = res$PAFRate.crude.wald, chi2.strata.uncor = res$chi2.strata.uncor, chi2.strata.yates = res$chi2.strata.yates, chi2.strata.fisher = res$chi2.strata.fisher, chi2.correction = res$chi2.correction, chi2.crude.uncor = res$chi2.crude.uncor, chi2.crude.yates = res$chi2.crude.yates, chi2.crude.fisher = res$chi2.crude.fisher, chi2.mh = res$chi2.mh) massoc.summary <- data.frame( var = c("Inc rate ratio (crude)", "Inc rate ratio (M-H)", "Inc rate ratio (crude:M-H)", "Attrib rate (crude) *", "Attrib rate (M-H) *", "Attrib rate (crude:M-H)"), est = as.numeric(c(res$IRR.crude.wald[1], res$IRR.mh.wald[1], res$IRR.crude.wald[1] / res$IRR.mh.wald[1], res$ARate.crude.wald[1], res$ARate.mh.wald[1], res$ARate.crude.wald[1] / res$ARate.mh.wald[1])), lower = as.numeric(c(res$IRR.crude.wald[2], res$IRR.mh.wald[2], NA, res$ARate.crude.wald[2], res$ARate.mh.wald[2], NA)), upper = as.numeric(c(res$IRR.crude.wald[3], res$IRR.mh.wald[3], NA, res$ARate.crude.wald[3], res$ARate.mh.wald[3], NA))) massoc.interp <- data.frame( var = c("Inc rate ratio (crude)", "Inc rate ratio (M-H)", "Inc rate ratio (crude:M-H)", "Attrib rate (crude) *", "Attrib rate (M-H) *", "Attrib rate (crude:M-H)"), text = c(interp.txt$cohort.time.ms.crr, interp.txt$cohort.time.ms.mrr, NA, interp.txt$cohort.time.ms.car, interp.txt$cohort.time.ms.mar, NA)) ## Define tab: if(outcome == "as.columns"){ r1 <- c(sa, sb, cIRatee.p) r2 <- c(sc, sd, cIRateo.p) r3 <- c(sM1, sM0, cIRatepop.p) tab <- as.data.frame(rbind(r1, r2, r3)) colnames(tab) <- c(" Outcome +", " Time at risk", " Inc rate *") rownames(tab) <- c("Exposed +", "Exposed -", "Total") tab <- format.data.frame(tab, digits = 3, justify = "right") } if(outcome == "as.rows"){ ## Non verbose part - define tab: r1 <- c(sa, sc) r2 <- c(sb, sd) r3 <- c(sN1, sN0) tab <- as.data.frame(rbind(r1, r2, r3)) colnames(tab) <- c(" Exposed +", " Exposed -") rownames(tab) <- c("Outcome +", "Outcome -", "Total") tab <- format.data.frame(tab, digits = 3, justify = "right") } ## Output creation part: out <- list(method = "cohort.time", n.strata = n.strata, digits = digits, conf.level = conf.level, interp = interpret, units = res$units.time, tab = tab, massoc.summary = massoc.summary, massoc.interp = massoc.interp, massoc.detail = massoc.detail) } ## method == "case.control", single strata: if(method == "case.control" & n.strata == 1){ ## Verbose part: massoc.detail <- list( OR.strata.wald = res$OR.strata.wald, OR.strata.cfield = res$OR.strata.cfield, OR.strata.score = res$OR.strata.score, OR.strata.mle = res$OR.strata.mle, AFest.strata.wald = res$AFest.strata.wald, PAFest.strata.wald = res$PAFest.strata.wald, chi2.strata.uncor = res$chi2.strata.uncor, chi2.strata.yates = res$chi2.strata.yates, chi2.strata.fisher = res$chi2.strata.fisher, chi2.correction = res$chi2.correction) massoc.summary <- data.frame( var = c("Odds ratio (W)", "Attrib fraction (est) in exposed (%)", "Attrib fraction (est) in population (%)"), est = as.numeric(c(res$OR.strata.wald[1], res$AFest.strata.wald[1] * 100, res$PAFest.strata.wald[1] * 100)), lower = as.numeric(c(res$OR.strata.wald[2], res$AFest.strata.wald[2] * 100, res$PAFest.strata.wald[2] * 100)), upper = as.numeric(c(res$OR.strata.wald[3], res$AFest.strata.wald[3] * 100, res$PAFest.strata.wald[3] * 100))) massoc.interp <- data.frame( var = c("Odds ratio (W)", "Attrib fraction (est) in exposed (%)", "Attrib fraction (est) in population (%)"), text = c(interp.txt$case.control.ss.or, interp.txt$case.control.ss.af, interp.txt$case.control.ss.paf)) ## Define tab: if(outcome == "as.columns"){ r1 <- c(a, b, N1, cIRiske.p, cOe.p) r2 <- c(c, d, N0, cIRisko.p, cOo.p) r3 <- c(M1, M0, M0 + M1, cIRiskpop.p, cOpop.p) tab <- as.data.frame(rbind(r1, r2, r3)) colnames(tab) <- c(" Outcome +", " Outcome -", " Total", " Prevalence *", " Odds") rownames(tab) <- c("Exposed +", "Exposed -", "Total") tab <- format.data.frame(tab, digits = 3, justify = "right") } if(outcome == "as.rows"){ ## Non verbose part - define tab: r1 <- c(a, c, M1) r2 <- c(b, d, M0) r3 <- c(N1, N0, N0 + N1) tab <- as.data.frame(rbind(r1, r2, r3)) colnames(tab) <- c(" Exposed +", " Exposed -", " Total") rownames(tab) <- c("Outcome +", "Outcome -", "Total") tab <- format.data.frame(tab, digits = 3, justify = "right") } ## Output creation part: out <- list(method = "case.control", n.strata = n.strata, digits = digits, conf.level = conf.level, interp = interpret, units = res$units.count, tab = tab, massoc.summary = massoc.summary, massoc.interp = massoc.interp, massoc.detail = massoc.detail) } ## method == "case.control", multiple strata: if(method == "case.control" & n.strata > 1){ ## Verbose part: massoc.detail <- list( OR.strata.wald = res$OR.strata.wald, OR.strata.cfield = res$OR.strata.cfield, OR.strata.score = res$OR.strata.score, OR.strata.mle = res$OR.strata.mle, OR.crude.wald = res$OR.crude.wald, OR.crude.cfield = res$OR.crude.cfield, OR.crude.score = res$OR.crude.score, OR.crude.mle = res$OR.crude.mle, OR.mh.wald = res$OR.mh.wald, AFest.strata.wald = res$AFest.strata.wald, AFest.crude.wald = res$AFest.crude.wald, PAFest.strata.wald = res$PAFest.strata.wald, PAFest.crude.wald = res$PAFest.crude.wald, chi2.strata.uncor = res$chi2.strata.uncor, chi2.strata.yates = res$chi2.strata.yates, chi2.strata.fisher = res$chi2.strata.fisher, chi2.correction = res$chi2.correction, chi2.crude.uncor = res$chi2.crude.uncor, chi2.crude.yates = res$chi2.crude.yates, chi2.crude.fisher = res$chi2.crude.fisher, chi2.mh = res$chi2.mh, OR.homog.woolf = res$wOR.homog, OR.homog.brday = res$bOR.homog) massoc.summary <- data.frame( var = c("Odds ratio (crude)", "Odds ratio (M-H)", "Odds ratio (crude:M-H)", "Attrib fraction (est) in exposed (crude %)", "Attrib fraction (est) in population (crude %) *"), est = as.numeric(c(res$OR.crude.wald[1], res$OR.mh.wald[1], res$OR.crude.wald[1] / res$OR.mh.wald[1], res$AFest.crude.wald[1], res$PAFest.crude.wald[1])), lower = as.numeric(c(res$OR.crude.wald[2], res$OR.mh.wald[2], NA, res$AFest.crude.wald[2], res$PAFest.crude.wald[2])), upper = as.numeric(c(res$OR.crude.wald[3], res$OR.mh.wald[3], NA, res$AFest.crude.wald[3], res$PAFest.crude.wald[3]))) massoc.interp <- data.frame( var = c("Odds ratio (crude)", "Odds ratio (M-H)", "Odds ratio (crude:M-H)", "Attrib fraction (est) in exposed (crude %)", "Attrib fraction (est) in population (crude %) *"), text = c(interp.txt$case.control.ms.cor, interp.txt$case.control.ms.mor, NA, interp.txt$case.control.ms.caf, interp.txt$case.control.ms.cpaf)) ## Define tab: if(outcome == "as.columns"){ r1 <- c(sa, sb, sN1, cIRiske.p, cOe.p) r2 <- c(sc, sd, sN0, cIRisko.p, cOo.p) r3 <- c(sM1, sM0, sM0 + sM1, cIRiskpop.p, cOpop.p) tab <- as.data.frame(rbind(r1, r2, r3)) colnames(tab) <- c(" Outcome +", " Outcome -", " Total", " Prevalence *", " Odds") rownames(tab) <- c("Exposed +", "Exposed -", "Total") tab <- format.data.frame(tab, digits = 3, justify = "right") } if(outcome == "as.rows"){ ## Non verbose part - define tab: r1 <- c(sa, sc, sM1) r2 <- c(sb, sd, sM0) r3 <- c(sN1, sN0, sN0 + sN1) tab <- as.data.frame(rbind(r1, r2, r3)) colnames(tab) <- c(" Exposed +", " Exposed -", " Total") rownames(tab) <- c("Outcome +", "Outcome -", "Total") tab <- format.data.frame(tab, digits = 3, justify = "right") } ## Output creation part: out <- list(method = "case.control", n.strata = n.strata, digits = digits, conf.level = conf.level, interp = interpret, units = res$units.count, tab = tab, massoc.summary = massoc.summary, massoc.interp = massoc.interp, massoc.detail = massoc.detail) } ## method == "cross.sectional", single strata: if(method == "cross.sectional" & n.strata == 1){ ## Verbose part: massoc.detail <- list( PR.strata.wald = res$RR.strata.wald, PR.strata.taylor = res$RR.strata.taylor, PR.strata.score = res$RR.strata.score, OR.strata.wald = res$OR.strata.wald, OR.strata.cfield = res$OR.strata.cfield, OR.strata.score = res$OR.strata.score, OR.strata.mle = res$OR.strata.mle, ARisk.strata.wald = res$ARisk.strata.wald, ARisk.strata.score = res$ARisk.strata.score, NNT.strata.wald = res$NNT.strata.wald, NNT.strata.score = res$NNT.strata.score, PARisk.strata.wald = res$PARisk.strata.wald, PARisk.strata.piri = res$PARisk.strata.piri, AFRisk.strata.wald = res$AFRisk.strata.wald, PAFRisk.strata.wald = res$PAFRisk.strata.wald, chi2.strata.uncor = res$chi2.strata.uncor, chi2.strata.yates = res$chi2.strata.yates, chi2.strata.fisher = res$chi2.strata.fisher, chi2.correction = res$chi2.correction) massoc.summary <- data.frame( var = c("Prevalence ratio", "Odds ratio", "Attrib prevalence *", "Attrib fraction in exposed (%)", "Attrib prevalence in population *", "Attrib fraction in population (%)"), est = as.numeric(c(res$RR.strata.wald[1], res$OR.strata.wald[1], res$ARisk.strata.wald[1], res$AFRisk.strata.wald[1] * 100, res$PARisk.strata.wald[1], res$PAFRisk.strata.wald[1] * 100)), lower = as.numeric(c(res$RR.strata.wald[2], res$OR.strata.wald[2], res$ARisk.strata.wald[2], res$AFRisk.strata.wald[2] * 100, res$PARisk.strata.wald[2], res$PAFRisk.strata.wald[2] * 100)), upper = as.numeric(c(res$RR.strata.wald[3], res$OR.strata.wald[3], res$ARisk.strata.wald[3], res$AFRisk.strata.wald[3] * 100, res$PARisk.strata.wald[3], res$PAFRisk.strata.wald[3] * 100))) massoc.interp <- data.frame( var = c("Prevalence ratio", "Odds ratio", "Attrib prevalence *", "NNTB NNTH", "Attrib fraction in exposed (%)", "Attrib prevalence in population *", "Attrib fraction in population (%)"), text = c(interp.txt$cross.sectional.ss.rr, interp.txt$cross.sectional.ss.or, interp.txt$cross.sectional.ss.ar, interp.txt$cross.sectional.ss.nnt, interp.txt$cross.sectional.ss.af, interp.txt$cross.sectional.ss.par, interp.txt$cross.sectional.ss.paf)) ## Define tab: if(outcome == "as.columns"){ r1 <- c(a, b, N1, cIRiske.p, cOe.p) r2 <- c(c, d, N0, cIRisko.p, cOo.p) r3 <- c(M1, M0, M0 + M1, cIRiskpop.p, cOpop.p) tab <- as.data.frame(rbind(r1, r2, r3)) colnames(tab) <- c(" Outcome +", " Outcome -", " Total", " Prevalence *", " Odds") rownames(tab) <- c("Exposed +", "Exposed -", "Total") tab <- format.data.frame(tab, digits = 3, justify = "right") } if(outcome == "as.rows"){ ## Non verbose part - define tab: r1 <- c(a, c, M1) r2 <- c(b, d, M0) r3 <- c(N1, N0, N0 + N1) tab <- as.data.frame(rbind(r1, r2, r3)) colnames(tab) <- c(" Exposed +", " Exposed -", " Total") rownames(tab) <- c("Outcome +", "Outcome -", "Total") tab <- format.data.frame(tab, digits = 3, justify = "right") } ## Output creation part: out <- list(method = "cross.sectional", n.strata = n.strata, digits = digits, conf.level = conf.level, interp = interpret, units = res$units.count, tab = tab, massoc.summary = massoc.summary, massoc.interp = massoc.interp, massoc.detail = massoc.detail) } ## method == "cross.sectional", multiple strata: if(method == "cross.sectional" & n.strata > 1){ ## Verbose part: massoc.detail <- list( PR.strata.wald = res$RR.strata.wald, PR.strata.taylor = res$RR.strata.taylor, PR.strata.score = res$RR.strata.score, PR.crude.wald = res$RR.crude.wald, PR.crude.taylor = res$RR.crude.taylor, PR.crude.score = res$RR.crude.score, PR.mh.wald = res$RR.mh.wald, OR.strata.wald = res$OR.strata.wald, OR.strata.cfield = res$OR.strata.cfield, OR.strata.score = res$OR.strata.score, OR.strata.mle = res$OR.strata.mle, OR.crude.wald = res$OR.crude.wald, OR.crude.cfield = res$OR.crude.cfield, OR.crude.score = res$OR.crude.score, OR.crude.mle = res$OR.crude.mle, OR.mh.wald = res$OR.mh.wald, ARisk.strata.wald = res$ARisk.strata.wald, ARisk.strata.score = res$ARisk.strata.score, ARisk.crude.wald = res$ARisk.crude.wald, ARisk.crude.score = res$ARisk.crude.score, ARisk.mh.wald = res$ARisk.mh.wald, ARisk.mh.sato = res$ARisk.mh.sato, ARisk.mh.green = res$ARisk.mh.green, NNT.strata.wald = res$NNT.strata.wald, NNT.strata.score = res$NNT.strata.score, NNT.crude.wald = res$NNT.crude.wald, NNT.crude.score = res$NNT.crude.score, NNT.mh.wald = res$NNT.mh.wald, NNT.mh.sato = res$NNT.mh.sato, NNT.mh.green = res$NNT.mh.green, PARisk.strata.wald = res$PARisk.strata.wald, PARisk.strata.piri = res$PARisk.strata.piri, PARisk.crude.wald = res$PARisk.crude.wald, PARisk.crude.piri = res$PARisk.crude.piri, AFRisk.strata.wald = res$AFRisk.strata.wald, AFRisk.crude.wald = res$AFRisk.crude.wald, PAFRisk.strata.wald = res$PAFRisk.strata.wald, PAFRisk.crude.wald = res$PAFRisk.crude.wald, chi2.strata.uncor = res$chi2.strata.uncor, chi2.strata.yates = res$chi2.strata.yates, chi2.strata.fisher = res$chi2.strata.fisher, chi2.correction = res$chi2.correction, chi2.crude.uncor = res$chi2.crude.uncor, chi2.crude.yates = res$chi2.crude.yates, chi2.crude.fisher = res$chi2.crude.fisher, chi2.mh = res$chi2.mh, PR.homog.woolf = res$wPR.homog, RR.homog.woolf = res$wRR.homog, OR.homog.woolf = res$wOR.homog, OR.homog.brday = res$bOR.homog) massoc.summary <- data.frame( var = c("Prevalence ratio (crude)", "Prevalence ratio (M-H)", "Prevalence ratio (crude:M-H)", "Odds ratio (crude)", "Odds ratio (M-H)", "Odds ratio (crude:M-H)", "Attributable prevalence (crude) *", "Attributable prevalence (M-H) *", "Attributable prevalence (crude:M-H)"), est = as.numeric(c(res$RR.crude.wald[1], res$RR.mh.wald[1], res$RR.crude.wald[1] / res$RR.mh.wald[1], res$OR.crude.wald[1], res$OR.mh.wald[1], res$OR.crude.wald[1] / res$OR.mh.wald[1], res$ARisk.crude.wald[1], res$ARisk.mh.wald[1], res$ARisk.crude.wald[1] / res$ARisk.mh.wald[1])), lower = as.numeric(c(res$RR.crude.wald[2], res$RR.mh.wald[2], NA, res$OR.crude.wald[2], res$OR.mh.wald[2], NA, res$ARisk.crude.wald[2], res$ARisk.mh.wald[2], NA)), upper = as.numeric(c(res$RR.crude.wald[3], res$RR.mh.wald[3], NA, res$OR.crude.wald[3], res$OR.mh.wald[3], NA, res$ARisk.crude.wald[3], res$ARisk.mh.wald[3], NA))) massoc.interp <- data.frame( var = c("Inc risk ratio (crude)", "Inc risk ratio (M-H)", "Inc risk ratio (crude:M-H)", "Odds ratio (crude)", "Odds ratio (M-H)", "Odds ratio (crude:M-H)", "Attrib risk (crude) *", "Attrib risk (M-H) *", "Attrib risk (crude:M-H)", "NNTB NNTH (crude)", "NNTB NNTH (M-H)"), text = c(interp.txt$cohort.count.ms.crr, interp.txt$cohort.count.ms.mrr, NA, interp.txt$cohort.count.ms.cor, interp.txt$cohort.count.ms.mor, NA, interp.txt$cohort.count.ms.car, interp.txt$cohort.count.ms.mar, NA, interp.txt$cohort.count.ms.cnnt, interp.txt$cohort.count.ms.mnnt)) ## Define tab: if(outcome == "as.columns"){ r1 <- c(sa, sb, sN1, cIRiske.p, cOe.p) r2 <- c(sc, sd, sN0, cIRisko.p, cOo.p) r3 <- c(sM1, sM0, sM1 + sM0, cIRiskpop.p, cOpop.p) tab <- as.data.frame(rbind(r1, r2, r3)) colnames(tab) <- c(" Outcome +", " Outcome -", " Total", " Prevalence *", " Odds") rownames(tab) <- c("Exposed +", "Exposed -", "Total") tab <- format.data.frame(tab, digits = 3, justify = "right") } if(outcome == "as.rows"){ ## Non verbose part - define tab: r1 <- c(sa, sc, sM1) r2 <- c(sb, sd, sM0) r3 <- c(sN1, sN0, sN0 + sN1) tab <- as.data.frame(rbind(r1, r2, r3)) colnames(tab) <- c(" Exposed +", " Exposed -", " Total") rownames(tab) <- c("Outcome +", "Outcome -", "Total") tab <- format.data.frame(tab, digits = 3, justify = "right") } ## Output creation part: out <- list(method = "cross.sectional", n.strata = n.strata, digits = digits, conf.level = conf.level, interp = interpret, units = res$units.count, tab = tab, massoc.summary = massoc.summary, massoc.interp = massoc.interp, massoc.detail = massoc.detail) } ## Set the class of the output object: class(out) <- "epi.2by2" ## And return object of class epi.2by2 as the output: return(out) } ## =========================================== ## PRINT OUTPUT ## =========================================== ## Print method for object of class epi.2by2: print.epi.2by2 <- function(x, ...) { ## cohort.count --- single strata ## x <- out if(x$method == "cohort.count" & x$n.strata == 1){ print(x$tab) cat("\nPoint estimates and ", x$conf.level * 100, "%", " CIs:", sep = "") cat("\n-------------------------------------------------------------------") if(x$digits == 2){ tirr <- "\nInc risk ratio %.2f (%.2f, %.2f)" tor <- "\nOdds ratio %.2f (%.2f, %.2f)" tar <- "\nAttrib risk in the exposed * %.2f (%.2f, %.2f)" taf <- "\nAttrib fraction in the exposed (%%) %.2f (%.2f, %.2f)" tpar <- "\nAttrib risk in the population * %.2f (%.2f, %.2f)" tpaf <- "\nAttrib fraction in the population (%%) %.2f (%.2f, %.2f)" } else if(x$digits == 3){ tirr <- "\nInc risk ratio %.3f (%.3f, %.3f)" tor <- "\nOdds ratio %.3f (%.3f, %.3f)" tar <- "\nAttrib risk in the exposed * %.3f (%.3f, %.3f)" taf <- "\nAttrib fraction in the exposed (%%) %.3f (%.3f, %.3f)" tpar <- "\nAttrib risk in the population * %.3f (%.3f, %.3f)" tpaf <- "\nAttrib fraction in the population (%%) %.3f (%.3f, %.3f)" } else if(x$digits == 4){ tirr <- "\nInc risk ratio %.4f (%.4f, %.4f)" tor <- "\nOdds ratio %.4f (%.4f, %.4f)" tar <- "\nAttrib risk in the exposed * %.4f (%.4f, %.4f)" taf <- "\nAttrib fraction in the exposed (%%) %.4f (%.4f, %.4f)" tpar <- "\nAttrib risk in the population * %.4f (%.4f, %.4f)" tpaf <- "\nAttrib fraction in the population (%%) %.4f (%.4f, %.4f)" } with(x$massoc.summary, { cat(sprintf(tirr, est[1], lower[1], upper[1] )) cat(sprintf(tor, est[2], lower[2], upper[2] )) cat(sprintf(tar, est[3], lower[3], upper[3] )) cat(sprintf(taf, est[4], lower[4], upper[4] )) cat(sprintf(tpar, est[5], lower[5], upper[5] )) cat(sprintf(tpaf, est[6], lower[6], upper[6] )) }) cat("\n-------------------------------------------------------------------") # Which chi2 test to report? chi2.name <- ifelse(x$massoc.detail$chi2.correction == TRUE, "Yates corrected", "Uncorrected") chi2.statistic <- ifelse(x$massoc.detail$chi2.correction == TRUE, as.numeric(x$massoc.detail$chi2.strata.yates)[1], as.numeric(x$massoc.detail$chi2.strata.uncor)[1]) chi2.df <- ifelse(x$massoc.detail$chi2.correction == TRUE, as.numeric(x$massoc.detail$chi2.strata.yates)[2], as.numeric(x$massoc.detail$chi2.strata.uncor)[2]) # Two sided p-value: chi2.pvalue <- ifelse(x$massoc.detail$chi2.correction == TRUE, as.numeric(x$massoc.detail$chi2.strata.yates)[4], as.numeric(x$massoc.detail$chi2.strata.uncor)[4]) chi2.pvalue <- ifelse(chi2.pvalue < 0.001, "<0.001", sprintf("%.3f", chi2.pvalue)) # Fisher's exact p-value: chi2.fpvalue <- ifelse(x$massoc.detail$chi2.strata.fisher$p.value.2s < 0.001, "<0.001", sprintf("%.3f", x$massoc.detail$chi2.strata.fisher$p.value.2s)) cat("\n", chi2.name, " chi2 test that OR = 1: chi2(", chi2.df, ") = ", sprintf("%.3f", chi2.statistic), " Pr>chi2 = ", chi2.pvalue, sep = "") cat("\n", "Fisher exact", " test that OR = 1:", " Pr>chi2 = ", chi2.fpvalue, sep = "") cat("\n", "Wald confidence limits") cat("\n", "CI: confidence interval") cat("\n", "*", x$units[1], "\n") if(x$interp == TRUE){ cat("\n Measures of association strength:") cat("\n", x$massoc.interp$text[1], "\n", "\n", x$massoc.interp$text[2], "\n") cat("\n Measures of effect in the exposed:") cat("\n", x$massoc.interp$text[3], x$massoc.interp$text[5], "\n") cat("\n Number needed to treat for benefit (NNTB) and harm (NNTH):") cat("\n", x$massoc.interp$text[4], "\n") cat("\n Measures of effect in the population:") cat("\n", x$massoc.interp$text[6], x$massoc.interp$text[7], "\n") } } ## cohort.count --- multiple strata if(x$method == "cohort.count" & x$n.strata > 1){ print(x$tab) cat("\n") cat("\nPoint estimates and ", x$conf.level * 100, "%", " CIs:", sep = "") cat("\n-------------------------------------------------------------------") with(x$massoc.summary, { if(x$digits == 2){ tirrc <- "\nInc risk ratio (crude) %.2f (%.2f, %.2f)" tirrm <- "\nInc risk ratio (M-H) %.2f (%.2f, %.2f)" tirrcm <- "\nInc risk ratio (crude:M-H) %.2f" torc <- "\nOdds ratio (crude) %.2f (%.2f, %.2f)" torm <- "\nOdds ratio (M-H) %.2f (%.2f, %.2f)" torcm <- "\nOdds ratio (crude:M-H) %.2f" tarc <- "\nAttrib risk in the exposed (crude) * %.2f (%.2f, %.2f)" tarm <- "\nAttrib risk in the exposed (M-H) * %.2f (%.2f, %.2f)" tarcm <- "\nAttrib risk (crude:M-H) %.2f" } else if(x$digits == 3){ tirrc <- "\nInc risk ratio (crude) %.3f (%.3f, %.3f)" tirrm <- "\nInc risk ratio (M-H) %.3f (%.3f, %.3f)" tirrcm <- "\nInc risk ratio (crude:M-H) %.3f" torc <- "\nOdds ratio (crude) %.3f (%.3f, %.3f)" torm <- "\nOdds ratio (M-H) %.3f (%.3f, %.3f)" torcm <- "\nOdds ratio (crude:M-H) %.3f" tarc <- "\nAttrib risk in the exposed (crude) * %.3f (%.3f, %.3f)" tarm <- "\nAttrib risk in the exposed (M-H) * %.3f (%.3f, %.3f)" tarcm <- "\nAttrib risk (crude:M-H) %.3f" } else if(x$digits == 4){ tirrc <- "\nInc risk ratio (crude) %.4f (%.4f, %.4f)" tirrm <- "\nInc risk ratio (M-H) %.4f (%.4f, %.4f)" tirrcm <- "\nInc risk ratio (crude:M-H) %.4f" torc <- "\nOdds ratio (crude) %.4f (%.4f, %.4f)" torm <- "\nOdds ratio (M-H) %.4f (%.4f, %.4f)" torcm <- "\nOdds ratio (crude:M-H) %.4f" tarc <- "\nAttrib risk in the exposed (crude) * %.4f (%.4f, %.4f)" tarm <- "\nAttrib risk in the exposed (M-H) * %.4f (%.4f, %.4f)" tarcm <- "\nAttrib risk (crude:M-H) %.4f" } cat(sprintf(tirrc, est[1], lower[1], upper[1] )) cat(sprintf(tirrm, est[2], lower[2], upper[2] )) cat(sprintf(tirrcm, est[3] )) cat(sprintf(torc, est[4], lower[4], upper[4] )) cat(sprintf(torm, est[5], lower[5], upper[5] )) cat(sprintf(torcm, est[6] )) cat(sprintf(tarc, est[7], lower[7], upper[7] )) cat(sprintf(tarm, est[8], lower[8], upper[8] )) cat(sprintf(tarcm, est[9] )) }) cat("\n-------------------------------------------------------------------") # M-H test of homogeneity of RRs: wrr.st <- as.numeric(x$massoc.detail$wRR.homog[1]) wrr.df <- as.numeric(x$massoc.detail$wRR.homog[2]) wrr.p <- ifelse(as.numeric(x$massoc.detail$wRR.homog)[3] < 0.001, "<0.001", sprintf("%.3f", x$massoc.detail$wRR.homog[3])) # M-H test of homogeneity of ORs: wor.st <- as.numeric(x$massoc.detail$wOR.homog[1]) wor.df <- as.numeric(x$massoc.detail$wOR.homog[2]) wor.p <- ifelse(as.numeric(x$massoc.detail$wOR.homog)[3] < 0.001, "<0.001", sprintf("%.3f", x$massoc.detail$wOR.homog[3])) mh.st <- as.numeric(x$massoc.detail$chi2.mh[1]) mh.df <- as.numeric(x$massoc.detail$chi2.mh[2]) mh.p <- ifelse(as.numeric(x$massoc.detail$chi2.mh)[3] < 0.001, "<0.001", sprintf("%.3f", x$massoc.detail$chi2.mh[3])) cat("\n", " M-H test of homogeneity of PRs: chi2(", wrr.df, ") = ", sprintf("%.3f", wrr.st), " Pr>chi2 = ", wrr.p, sep = "") cat("\n", " M-H test of homogeneity of ORs: chi2(", wor.df, ") = ", sprintf("%.3f", wor.st), " Pr>chi2 = ", wor.p, sep = "") cat("\n", " Test that M-H adjusted OR = 1: chi2(", mh.df, ") = ", sprintf("%.3f", mh.st), " Pr>chi2 = ", mh.p, sep = "") cat("\n", "Wald confidence limits") cat("\n", "M-H: Mantel-Haenszel; CI: confidence interval") cat("\n", "*", x$units[1], "\n") if(x$interp == TRUE){ cat("\n Measures of association strength:") cat("\n", x$massoc.interp$text[1], x$massoc.interp$text[2], "\n") cat("\n", x$massoc.interp$text[4], x$massoc.interp$text[5], "\n") cat("\n Measures of effect in the exposed:") cat("\n", x$massoc.interp$text[7], x$massoc.interp$text[8], "\n") cat("\n Number needed to treat for benefit (NNTB) and harm (NNTH):") cat("\n", x$massoc.interp$text[10], x$massoc.interp$text[11], "\n") } } ## cohort.time --- single strata if(x$method == "cohort.time" & x$n.strata == 1){ print(x$tab) cat("\nPoint estimates and ", x$conf.level * 100, "%", " CIs:", sep = "") cat("\n-------------------------------------------------------------------") with(x$massoc.summary, { if(x$digits == 2){ tirr <- "\nInc rate ratio %.2f (%.2f, %.2f)" tar <- "\nAttrib rate in the exposed * %.2f (%.2f, %.2f)" taf <- "\nAttrib fraction in the exposed (%%) %.2f (%.2f, %.2f)" tpar <- "\nAttrib rate in the population * %.2f (%.2f, %.2f)" tpaf <- "\nAttrib fraction in the population (%%) %.2f (%.2f, %.2f)" } else if(x$digits == 3){ tirr <- "\nInc rate ratio %.3f (%.3f, %.3f)" tar <- "\nAttrib rate in the exposed * %.3f (%.3f, %.3f)" taf <- "\nAttrib fraction in the exposed (%%) %.3f (%.3f, %.3f)" tpar <- "\nAttrib rate in the population * %.3f (%.3f, %.3f)" tpaf <- "\nAttrib fraction in the population (%%) %.3f (%.3f, %.3f)" } else if(x$digits == 4){ tirr <- "\nInc rate ratio %.4f (%.4f, %.4f)" tar <- "\nAttrib rate in the exposed * %.4f (%.4f, %.4f)" taf <- "\nAttrib fraction in the exposed (%%) %.4f (%.4f, %.4f)" tpar <- "\nAttrib rate in the population * %.4f (%.4f, %.4f)" tpaf <- "\nAttrib fraction in the population (%%) %.4f (%.4f, %.4f)" } cat(sprintf(tirr, est[1], lower[1], upper[1] )) cat(sprintf(tar, est[2], lower[2], upper[2] )) cat(sprintf(taf, est[4], lower[4], upper[4] )) cat(sprintf(tpar, est[3], lower[3], upper[3] )) cat(sprintf(tpaf, est[5], lower[5], upper[5] )) }) cat("\n-------------------------------------------------------------------") cat("\n", "Wald confidence limits") cat("\n", "CI: confidence interval") cat("\n", "*", x$units[1], "\n") if(x$interp == TRUE){ cat("\n Measures of association strength:") cat("\n", x$massoc.interp$text[1], "\n") cat("\n Measures of effect in the exposed:") cat("\n", x$massoc.interp$text[2], x$massoc.interp$text[3], "\n") cat("\n Measures of effect in the population:") cat("\n", x$massoc.interp$text[4], x$massoc.interp$text[5], "\n") } } ## cohort.time --- multiple strata if(x$method == "cohort.time" & x$n.strata > 1){ if(x$digits == 2){ tirrc <- "\nInc rate ratio (crude) %.2f (%.2f, %.2f)" tirrm <- "\nInc rate ratio (M-H) %.2f (%.2f, %.2f)" tirrcm <- "\nInc rate ratio (crude:M-H) %.2f" tarc <- "\nAttrib rate in the exposed (crude) * %.2f (%.3f, %.3f)" tarm <- "\nAttrib rate in the exposed (M-H) * %.2f (%.3f, %.3f)" tarcm <- "\nAttrib rate (crude:M-H) %.2f" } else if(x$digits == 3){ tirrc <- "\nInc rate ratio (crude) %.3f (%.3f, %.3f)" tirrm <- "\nInc rate ratio (M-H) %.3f (%.3f, %.3f)" tirrcm <- "\nInc rate ratio (crude:M-H) %.3f" tarc <- "\nAttrib rate in the exposed (crude) * %.3f (%.3f, %.3f)" tarm <- "\nAttrib rate in the exposed (M-H) * %.3f (%.3f, %.3f)" tarcm <- "\nAttrib rate (crude:M-H) %.3f" } else if(x$digits == 4){ tirrc <- "\nInc rate ratio (crude) %.4f (%.4f, %.4f)" tirrm <- "\nInc rate ratio (M-H) %.4f (%.4f, %.4f)" tirrcm <- "\nInc rate ratio (crude:M-H) %.4f" tarc <- "\nAttrib rate in the exposed (crude) * %.4f (%.4f, %.4f)" tarm <- "\nAttrib rate in the exposed (M-H) * %.4f (%.4f, %.4f)" tarcm <- "\nAttrib rate (crude:M-H) %.4f" } print(x$tab) cat("\nPoint estimates and ", x$conf.level * 100, "%", " CIs:", sep = "") cat("\n-------------------------------------------------------------------") with(x$massoc.summary, { cat(sprintf(tirrc, est[1], lower[1], upper[1] )) cat(sprintf(tirrm, est[2], lower[2], upper[2] )) cat(sprintf(tirrcm, est[3] )) cat(sprintf(tarc, est[4], lower[4], upper[4] )) cat(sprintf(tarm, est[5], lower[5], upper[5] )) cat(sprintf(tarcm, est[6] )) }) cat("\n-------------------------------------------------------------------") cat("\n", "Wald confidence limits") cat("\n", "M-H: Mantel-Haenszel; CI: confidence interval") cat("\n", "*", x$units[1], "\n") if(x$interp == TRUE){ cat("\n Measures of association strength:") cat("\n", x$massoc.interp$text[1], x$massoc.interp$text[2], "\n") cat("\n Measures of effect in the exposed:") cat("\n", x$massoc.interp$text[4], x$massoc.interp$text[5], "\n") } } ## case.control --- single strata if(x$method == "case.control" & x$n.strata == 1){ if(x$digits == 2){ tor <- "\nOdds ratio %.2f (%.2f, %.2f)" taf <- "\nAttrib fraction (est) in the exposed (%%) %.2f (%.2f, %.2f)" tpaf <- "\nAttrib fraction (est) in the population (%%) %.2f (%.2f, %.2f)" } else if(x$digits == 3){ tor <- "\nOdds ratio %.3f (%.3f, %.3f)" taf <- "\nAttrib fraction (est) in the exposed (%%) %.3f (%.3f, %.3f)" tpaf <- "\nAttrib fraction (est) in the population (%%) %.3f (%.3f, %.3f)" } else if(x$digits == 4){ tor <- "\nOdds ratio %.4f (%.4f, %.4f)" taf <- "\nAttrib fraction (est) in the exposed (%%) %.4f (%.4f, %.4f)" tpaf <- "\nAttrib fraction (est) in the population (%%) %.4f (%.4f, %.4f)" } print(x$tab) cat("\nPoint estimates and ", x$conf.level * 100, "%", " CIs:", sep = "") cat("\n-------------------------------------------------------------------") with(x$massoc.summary, { cat(sprintf(tor, est[1], lower[1], upper[1] )) cat(sprintf(taf, est[2], lower[2], upper[2] )) cat(sprintf(tpaf, est[3], lower[3], upper[3] )) }) cat("\n-------------------------------------------------------------------") # Which chi2 test to report? chi2.name <- ifelse(x$massoc.detail$chi2.correction == TRUE, "Yates corrected", "Uncorrected") chi2.statistic <- ifelse(x$massoc.detail$chi2.correction == TRUE, as.numeric(x$massoc.detail$chi2.strata.yates)[1], as.numeric(x$massoc.detail$chi2.strata.uncor)[1]) chi2.df <- ifelse(x$massoc.detail$chi2.correction == TRUE, as.numeric(x$massoc.detail$chi2.strata.yates)[2], as.numeric(x$massoc.detail$chi2.strata.uncor)[2]) # Two sided p-value: chi2.pvalue <- ifelse(x$massoc.detail$chi2.correction == TRUE, as.numeric(x$massoc.detail$chi2.strata.yates)[4], as.numeric(x$massoc.detail$chi2.strata.uncor)[4]) chi2.pvalue <- ifelse(chi2.pvalue < 0.001, "<0.001", sprintf("%.3f", chi2.pvalue)) # Fisher's exact p-value: chi2.fpvalue <- ifelse(x$massoc.detail$chi2.strata.fisher$p.value.2s < 0.001, "<0.001", sprintf("%.3f", x$massoc.detail$chi2.strata.fisher$p.value.2s)) cat("\n", chi2.name, " chi2 test that OR = 1: chi2(", chi2.df, ") = ", sprintf("%.3f", chi2.statistic), " Pr>chi2 = ", chi2.pvalue, sep = "") cat("\n", "Fisher exact", " test that OR = 1:", " Pr>chi2 = ", chi2.fpvalue, sep = "") cat("\n", "Wald confidence limits") cat("\n", "CI: confidence interval") cat("\n", "*", x$units[1], "\n") if(x$interp == TRUE){ cat("\n Measures of association strength:") cat("\n", x$massoc.interp$text[1], "\n") cat("\n Measures of effect in the exposed:") cat("\n", x$massoc.interp$text[2], "\n") cat("\n Measures of effect in the population:") cat("\n", x$massoc.interp$text[3], "\n") } } ## case.control --- multiple strata if(x$method == "case.control" & x$n.strata > 1){ if(x$digits == 2){ torc <- "\nOdds ratio (crude) %.2f (%.2f, %.2f)" torm <- "\nOdds ratio (M-H) %.2f (%.2f, %.2f)" torcm <- "\nOdds ratio (crude:M-H) %.2f" tafc <- "\nAttrib fraction (est) in the exposed (%%) %.2f (%.2f, %.2f)" tpafc <- "\nAttrib fraction (est) in the population (%%) * %.2f (%.2f, %.2f)" } else if(x$digits == 3){ torc <- "\nOdds ratio (crude) %.3f (%.3f, %.3f)" torm <- "\nOdds ratio (M-H) %.3f (%.3f, %.3f)" torcm <- "\nOdds ratio (crude:M-H) %.3f" tafc <- "\nAttrib fraction (est) in the exposed (%%) %.3f (%.3f, %.3f)" tpafc <- "\nAttrib fraction (est) in the population (%%) * %.3f (%.3f, %.3f)" } else if(x$digits == 4){ torc <- "\nOdds ratio (crude) %.4f (%.4f, %.4f)" torm <- "\nOdds ratio (M-H) %.4f (%.4f, %.4f)" torcm <- "\nOdds ratio (crude:M-H) %.4f" tafc <- "\nAttrib fraction (est) in the exposed (%%) %.4f (%.4f, %.4f)" tpafc <- "\nAttrib fraction (est) in the population (%%) * %.4f (%.4f, %.4f)" } print(x$tab) cat("\nPoint estimates and ", x$conf.level * 100, "%", " CIs:", sep = "") cat("\n-------------------------------------------------------------------") with(x$massoc.summary, { cat(sprintf(torc, est[1], lower[1], upper[1] )) cat(sprintf(torm, est[2], lower[2], upper[2] )) cat(sprintf(torcm, est[3] )) cat(sprintf(tafc, est[4], lower[4], upper[4] )) cat(sprintf(tpafc, est[5], lower[5], upper[5] )) }) cat("\n-------------------------------------------------------------------") # M-H test of homogeneity of ORs: wor.st <- as.numeric(x$massoc.detail$OR.homog.woolf[1]) wor.df <- as.numeric(x$massoc.detail$OR.homog.woolf[2]) wor.p <- ifelse(as.numeric(x$massoc.detail$OR.homog.woolf)[3] < 0.001, "<0.001", sprintf("%.3f", x$massoc.detail$OR.homog.woolf[3])) mh.st <- as.numeric(x$massoc.detail$chi2.mh[1]) mh.df <- as.numeric(x$massoc.detail$chi2.mh[2]) mh.p <- ifelse(as.numeric(x$massoc.detail$chi2.mh)[3] < 0.001, "<0.001", sprintf("%.3f", x$massoc.detail$chi2.mh[3])) cat("\n", " Woolf test of homogeneity of ORs: chi2(", wor.df, ") = ", sprintf("%.3f", wor.st), " Pr>chi2 = ", wor.p, sep = "") cat("\n", " Test that M-H adjusted OR = 1: chi2(", mh.df, ") = ", sprintf("%.3f", mh.st), " Pr>chi2 = ", mh.p, sep = "") cat("\n", "Wald confidence limits") cat("\n", "M-H: Mantel-Haenszel; CI: confidence interval") cat("\n", "*", x$units[1], "\n") if(x$interp == TRUE){ cat("\n Measures of association strength:") cat("\n", x$massoc.interp$text[1], x$massoc.interp$text[2], "\n") cat("\n Measures of effect in the exposed:") cat("\n", x$massoc.interp$text[4], "\n") cat("\n Measures of effect in the population:") cat("\n", x$massoc.interp$text[5], "\n") } } ## cross.sectional -- single strata if(x$method == "cross.sectional" & x$n.strata == 1){ if(x$digits == 2){ tpr <- "\nPrevalence ratio %.2f (%.2f, %.2f)" tor <- "\nOdds ratio %.2f (%.2f, %.2f)" tap <- "\nAttrib prevalence in the exposed * %.2f (%.2f, %.2f)" taf <- "\nAttrib fraction in the exposed (%%) %.2f (%.2f, %.2f)" tpap <- "\nAttrib prevalence in the population * %.2f (%.2f, %.2f)" tpaf <- "\nAttrib fraction in the population (%%) %.2f (%.2f, %.2f)" } else if(x$digits == 3){ tpr <- "\nPrevalence ratio %.3f (%.3f, %.3f)" tor <- "\nOdds ratio %.3f (%.3f, %.3f)" tap <- "\nAttrib prevalence in the exposed * %.3f (%.3f, %.3f)" taf <- "\nAttrib fraction in the exposed (%%) %.3f (%.3f, %.3f)" tpap <- "\nAttrib prevalence in the population * %.3f (%.3f, %.3f)" tpaf <- "\nAttrib fraction in the population (%%) %.3f (%.3f, %.3f)" } else if(x$digits == 4){ tpr <- "\nPrevalence ratio %.4f (%.4f, %.4f)" tor <- "\nOdds ratio %.4f (%.4f, %.4f)" tap <- "\nAttrib prevalence in the exposed * %.4f (%.4f, %.4f)" taf <- "\nAttrib fraction in the exposed (%%) %.4f (%.4f, %.4f)" tpap <- "\nAttrib prevalence in the population * %.4f (%.4f, %.4f)" tpaf <- "\nAttrib fraction in the population (%%) %.4f (%.4f, %.4f)" } print(x$tab) cat("\nPoint estimates and ", x$conf.level * 100, "%", " CIs:", sep = "") cat("\n-------------------------------------------------------------------") with(x$massoc.summary, { cat(sprintf(tpr, est[1], lower[1], upper[1] )) cat(sprintf(tor, est[2], lower[2], upper[2] )) cat(sprintf(tap, est[3], lower[3], upper[3] )) cat(sprintf(taf, est[4], lower[4], upper[4] )) cat(sprintf(tpap, est[5], lower[5], upper[5] )) cat(sprintf(tpaf, est[6], lower[6], upper[6] )) }) cat("\n-------------------------------------------------------------------") # Which chi2 test to report? chi2.name <- ifelse(x$massoc.detail$chi2.correction == TRUE, "Yates corrected", "Uncorrected") chi2.statistic <- ifelse(x$massoc.detail$chi2.correction == TRUE, as.numeric(x$massoc.detail$chi2.strata.yates)[1], as.numeric(x$massoc.detail$chi2.strata.uncor)[1]) chi2.df <- ifelse(x$massoc.detail$chi2.correction == TRUE, as.numeric(x$massoc.detail$chi2.strata.yates)[2], as.numeric(x$massoc.detail$chi2.strata.uncor)[2]) # Two sided p-value: chi2.pvalue <- ifelse(x$massoc.detail$chi2.correction == TRUE, as.numeric(x$massoc.detail$chi2.strata.yates)[4], as.numeric(x$massoc.detail$chi2.strata.uncor)[4]) chi2.pvalue <- ifelse(chi2.pvalue < 0.001, "<0.001", sprintf("%.3f", chi2.pvalue)) # Fisher's exact p-value: chi2.fpvalue <- ifelse(x$massoc.detail$chi2.strata.fisher$p.value.2s < 0.001, "<0.001", sprintf("%.3f", x$massoc.detail$chi2.strata.fisher$p.value.2s)) cat("\n", chi2.name, " chi2 test that OR = 1: chi2(", chi2.df, ") = ", sprintf("%.3f", chi2.statistic), " Pr>chi2 = ", chi2.pvalue, sep = "") cat("\n", "Fisher exact", " test that OR = 1:", " Pr>chi2 = ", chi2.fpvalue, sep = "") cat("\n", "Wald confidence limits") cat("\n", "CI: confidence interval") cat("\n", "*", x$units[1], "\n") if(x$interp == TRUE){ cat("\n Measures of association strength:") cat("\n", x$massoc.interp$text[1], "\n", "\n", x$massoc.interp$text[2], "\n") cat("\n Measures of effect in the exposed:") cat("\n", x$massoc.interp$text[3], x$massoc.interp$text[5], "\n") cat("\n Number needed to treat for benefit (NNTB) and harm (NNTH):") cat("\n", x$massoc.interp$text[4], "\n") cat("\n Measures of effect in the population:") cat("\n", x$massoc.interp$text[6], x$massoc.interp$text[7], "\n") } } ## cross.sectional --- multiple strata if(x$method == "cross.sectional" & x$n.strata > 1){ if(x$digits == 2){ tprc <- "\nPrevalence ratio (crude) %.2f (%.2f, %.2f)" tprm <- "\nPrevalence ratio (M-H) %.2f (%.2f, %.2f)" tprcm <- "\nInc risk ratio (crude:M-H) %.2f" torc <- "\nOdds ratio (crude) %.2f (%.2f, %.2f)" torm <- "\nOdds ratio (M-H) %.2f (%.2f, %.2f)" torcm <- "\nOdds ratio (crude:M-H) %.2f" tapc <- "\nAtributable prevalence in the exposed (crude) *%.2f (%.2f, %.2f)" tapm <- "\nAtributable prevalence in the exposed (M-H) * %.2f (%.2f, %.2f)" tapcm <- "\nAtributable prevalence (crude:M-H) %.2f" } else if(x$digits == 3){ tprc <- "\nPrevalence ratio (crude) %.3f (%.3f, %.3f)" tprm <- "\nPrevalence ratio (M-H) %.3f (%.3f, %.3f)" tprcm <- "\nInc risk ratio (crude:M-H) %.3f" torc <- "\nOdds ratio (crude) %.3f (%.3f, %.3f)" torm <- "\nOdds ratio (M-H) %.3f (%.3f, %.3f)" torcm <- "\nOdds ratio (crude:M-H) %.3f" tapc <- "\nAtributable prevalence in the exposed (crude) *%.3f (%.3f, %.3f)" tapm <- "\nAtributable prevalence in the exposed (M-H) * %.3f (%.3f, %.3f)" tapcm <- "\nAtributable prevalence (crude:M-H) %.3f" } else if(x$digits == 4){ tprc <- "\nPrevalence ratio (crude) %.4f (%.4f, %.4f)" tprm <- "\nPrevalence ratio (M-H) %.4f (%.4f, %.4f)" tprcm <- "\nInc risk ratio (crude:M-H) %.4f" torc <- "\nOdds ratio (crude) %.4f (%.4f, %.4f)" torm <- "\nOdds ratio (M-H) %.4f (%.4f, %.4f)" torcm <- "\nOdds ratio (crude:M-H) %.4f" tapc <- "\nAtributable prevalence in the exposed (crude) *%.4f (%.4f, %.4f)" tapm <- "\nAtributable prevalence in the exposed (M-H) * %.4f (%.4f, %.4f)" tapcm <- "\nAtributable prevalence (crude:M-H) %.4f" } print(x$tab) cat("\nPoint estimates and ", x$conf.level * 100, "%", " CIs:", sep = "") cat("\n-------------------------------------------------------------------") with(x$massoc.summary, { cat(sprintf(tprc, est[1], lower[1], upper[1] )) cat(sprintf(tprm, est[2], lower[2], upper[2] )) cat(sprintf(tprcm, est[3] )) cat(sprintf(torc, est[4], lower[4], upper[4] )) cat(sprintf(torm, est[5], lower[5], upper[5] )) cat(sprintf(torcm, est[6] )) cat(sprintf(tapc, est[7], lower[7], upper[7] )) cat(sprintf(tapm, est[8], lower[8], upper[8] )) cat(sprintf(tapcm, est[9] )) }) cat("\n-------------------------------------------------------------------") # Woolf test of homogeneity of PRs: wrr.st <- as.numeric(x$massoc.detail$PR.homog.woolf[1]) wrr.df <- as.numeric(x$massoc.detail$PR.homog.woolf[2]) wrr.p <- ifelse(as.numeric(x$massoc.detail$PR.homog.woolf)[3] < 0.001, "<0.001", sprintf("%.3f", x$massoc.detail$PR.homog.woolf[3])) # Woolf test of homogeneity of ORs: wor.st <- as.numeric(x$massoc.detail$OR.homog.woolf[1]) wor.df <- as.numeric(x$massoc.detail$OR.homog.woolf[2]) wor.p <- ifelse(as.numeric(x$massoc.detail$OR.homog.woolf)[3] < 0.001, "<0.001", sprintf("%.3f", x$massoc.detail$OR.homog.woolf[3])) mh.st <- as.numeric(x$massoc.detail$chi2.mh[1]) mh.df <- as.numeric(x$massoc.detail$chi2.mh[2]) mh.p <- ifelse(as.numeric(x$massoc.detail$chi2.mh)[3] < 0.001, "<0.001", sprintf("%.3f", x$massoc.detail$chi2.mh[3])) cat("\n", " Woolf test of homogeneity of PRs: chi2(", wrr.df, ") = ", sprintf("%.3f", wrr.st), " Pr>chi2 = ", wrr.p, sep = "") cat("\n", " Woolf test of homogeneity of ORs: chi2(", wor.df, ") = ", sprintf("%.3f", wor.st), " Pr>chi2 = ", wor.p, sep = "") cat("\n", " Test that M-H adjusted OR = 1: chi2(", mh.df, ") = ", sprintf("%.3f", mh.st), " Pr>chi2 = ", mh.p, sep = "") cat("\n", "Wald confidence limits") cat("\n", "M-H: Mantel-Haenszel; CI: confidence interval") cat("\n", "*", x$units[1], "\n") if(x$interp == TRUE){ cat("\n Measures of association strength:") cat("\n", x$massoc.interp$text[1], x$massoc.interp$text[2], "\n") cat("\n", x$massoc.interp$text[4], x$massoc.interp$text[5], "\n") cat("\n Measures of effect in the exposed:") cat("\n", x$massoc.interp$text[7], x$massoc.interp$text[8], "\n") cat("\n Number needed to treat for benefit (NNTB) and harm (NNTH):") cat("\n", x$massoc.interp$text[10], x$massoc.interp$text[11], "\n") } } } ## Summary method for object of class epi.2by2: summary.epi.2by2 <- function(object, ...) { rval <- list(massoc.detail = object$massoc.detail, massoc.summary = object$massoc.summary) return(rval) } epiR/R/epi.conf.R0000644000176200001440000002273414136502730013207 0ustar liggesusers"epi.conf" <- function(dat, ctype = "mean.single", method, N, design = 1, conf.level = 0.95){ N. <- 1 - ((1 - conf.level) / 2) z <- qnorm(N., mean = 0, sd = 1) # Define function to calculate confidence interval for a single proportion. # This is used on several occasions in this function: .propsingle <- function(tmp.dat, conf.level = conf.level){ if (is.matrix(tmp.dat) == FALSE) stop("Error: dat must be a two-column matrix") # The method implemented here follows Altman et al (2000) p 46: r <- tmp.dat[,1] n <- tmp.dat[,1] + tmp.dat[,2] p <- r/n q <- 1 - r/n se <- sqrt((p * q) / n) A <- (2 * r) + (z * z) B <- z * sqrt((z * z) + (4 * r * q)) C <- 2 * (n + (z * z)) low <- (A - B) / C upp <- (A + B) / C tmp.rval <- data.frame(est = p, se = se, lower = low, upper = upp) } if(ctype == "mean.single"){ if (is.vector(dat) == FALSE) stop("Error: dat must be a vector") mean <- mean(dat) n <- length(dat) var <- var(dat) sd <- sqrt(var) se <- sd/sqrt(n) P <- (1 - conf.level)/2 t <- abs(qt(P, n - 1)) low <- mean - (t * se) upp <- mean + (t * se) rval <- data.frame(est = mean, se = se, lower = low, upper = upp) } if(ctype == "mean.unpaired"){ if (is.data.frame(dat) == FALSE) stop("Error: dat must be a two-column data frame") n <- as.vector(by(dat[,2], dat[,1], length)) if (length(n) > 2) stop("Error: there must be only two groups") if (is.factor(dat[,1] == FALSE)) stop("Error: the first column of the data frame must be factor") sum <- as.vector(by(dat[,2], dat[,1], sum)) mean <- as.vector(by(dat[,2], dat[,1], mean)) mean.diff <- mean[1] - mean[2] var <- as.vector(by(dat[,2], dat[,1], var)) s <- sqrt((((n[1] - 1) * var[1]) + ((n[2] - 1) * var[2])) / (n[1] + n[2] - 2)) se.diff <- s * sqrt(1/n[1] + 1/n[2]) P <- (1 - conf.level)/2 t <- abs(qt(P, (n[1] + n[2] - 2))) low <- mean[1] - mean[2] - (t * se.diff) upp <- mean[1] - mean[2] + (t * se.diff) rval <- data.frame(est = mean[1] - mean[2], se = se.diff, lower = low, upper = upp) } if(ctype == "mean.paired"){ if (is.data.frame(dat) == FALSE) stop("Error: dat must be a two-column data frame") diff <- as.vector(dat[,2] - dat[,1]) n <- length(dat[,1]) mean.diff <- mean(diff) sd.diff <- sd(diff) se.diff <- sd.diff / sqrt(n) P <- (1 - conf.level)/2 t <- abs(qt(P, (n - 1))) low <- mean.diff - (t * se.diff) upp <- mean.diff + (t * se.diff) rval <- data.frame(est = mean.diff, se = se.diff, lower = low, upper = upp) } if(ctype == "prop.single"){ rval <- .propsingle(tmp.dat = dat, conf.level = conf.level) } if(ctype == "prop.unpaired"){ if (is.matrix(dat) == FALSE) stop("Error: dat must be a four-column matrix") # Work out the confidence interval for each proportion: prop.1 <- .propsingle(tmp.dat = matrix(dat[,1:2], ncol = 2), conf.level = conf.level) n1 <- dat[,1] + dat[,2] p1 <- prop.1[,1] l1 <- prop.1[,3] u1 <- prop.1[,4] prop.2 <- .propsingle(tmp.dat = matrix(dat[,3:4], ncol = 2), conf.level = conf.level) n2 <- dat[,3] + dat[,4] p2 <- prop.2[,1] l2 <- prop.2[,3] u2 <- prop.2[,4] # Altman's recommended method (p 48 - 49): D <- p1 - p2 se.D <- sqrt(((p1 * (1 - p1)) / n1) + ((p2 * (1 - p2)) / n2)) low <- D - sqrt((p1 - l1)^2 + (u2 - p2)^2) upp <- D + sqrt((p2 - l2)^2 + (u1 - p1)^2) rval <- data.frame(est = D, se = se.D, lower = low, upper = upp) } if(ctype == "prop.paired"){ if (is.matrix(dat) == FALSE) stop("Error: dat must be a four-column matrix") n <- dat[,1] + dat[,2] + dat[,3] + dat[,4] r <- dat[,1] s <- dat[,2] t <- dat[,3] u <- dat[,4] p1 <- (r + s) / n p2 <- (r + t) / n D <- (s - t) / n A <- (r + s) * (t + u) * (r + t) * (s + u) B <- (r * u) - (s * t) se.D <- 1/n * sqrt(s + t - ((s - t)^2 / n)) # Select an appropriate value for C: if(B > n/2) C <- B - n/2 if(B >= 0 & B <= n/2) C <- 0 if(B < 0) C <- B # Calculate phi: phi <- C / sqrt(A) # Set phi to zero if one of the following conditions are true: if(r + s == 0) phi <- 0 if(t + u == 0) phi <- 0 if(r + t == 0) phi <- 0 if(s + u == 0) phi <- 0 # Calculate confidence intervals for the raw proportions: tmp.dat <- matrix(c((r + s), (n - (r + s))), ncol = 2) prop.1 <- .propsingle(tmp.dat, conf.level = conf.level) l1 <- prop.1[,3] u1 <- prop.1[,4] tmp.dat <- matrix(c((r + t), (n - (r + t))), ncol = 2) prop.2 <- .propsingle(tmp.dat, conf.level = conf.level) l2 <- prop.2[,3] u2 <- prop.2[,4] # Altman's recommended method (p 52): low <- D - sqrt((p1 - l1)^2 - 2 * phi * (p1 - l1) * (u2 - p2) + (u2 - p2)^2) upp <- D + sqrt((p2 - l2)^2 - 2 * phi * (p2 - l2) * (u1 - p1) + (u1 - p1)^2) rval <- data.frame(est = D, se = se.D, lower = low, upper = upp) } if(ctype == "inc.risk" | ctype == "prevalence"){ if (is.matrix(dat) == FALSE) stop("Error: dat must be a two-column matrix") if(method == "exact"){ trval <- zexact(dat, conf.level) rval <- data.frame(est = trval$est, lower = trval$lower, upper = trval$upper) rval } if(method == "wilson"){ trval <- zwilson(dat, conf.level) rval <- data.frame(est = trval$est, se = trval$se, lower = trval$lower, upper = trval$upper) rval } if(method == "fleiss"){ trval <- zfleiss(dat, N = N, design = design, conf.level) rval <- data.frame(est = trval$est, se = trval$se, lower = trval$lower, upper = trval$upper) rval } if(method == "agresti"){ trval <- zagresti(dat, conf.level) rval <- data.frame(est = trval$est, lower = trval$lower, upper = trval$upper) rval } if(method == "clopper-pearson"){ trval <- zclopperpearson(dat, conf.level) rval <- data.frame(est = trval$est, lower = trval$lower, upper = trval$upper) rval } if(method == "jeffreys"){ trval <- zjeffreys(dat, conf.level) rval <- data.frame(est = trval$est, lower = trval$lower, upper = trval$upper) rval } } if(ctype == "inc.rate"){ if (is.matrix(dat) == FALSE) stop("Error: dat must be a two-column matrix") if(method == "exact"){ # Exact method (see http://www.folkesundhed.au.dk/uddannelse/software): a <- dat[,1] n <- dat[,2] p <- a / n # Changed 210519. Now use the method of Ulm (1990), which is used in poisson.test(). See email from Kazuki Yoshida 14 May 2019: low <- (qchisq(p = 1 - N., df = 2 * a) / 2) / n upp <- (qchisq(p = N., df = 2 * (a + 1)) / 2) / n # If numerator equals zero set lower bound of confidence limit to zero: # low <- ifelse(a == 0, 0, (0.5 * qchisq(p = N., df = 2 * a + 2, lower.tail = FALSE) / n)) # Changed 020617. # up <- 0.5 * qchisq(p = 1 - N., df = 2 * a, lower.tail = FALSE) / n # up <- 0.5 * qchisq(p = 1 - N., df = 2 * a + 2, lower.tail = FALSE) / n rval <- data.frame(est = p, lower = low, upper = upp) } if(method == "byar"){ # Byar's method (see Rothman, Epidemiology An Introduction, page 134): a.prime <- dat[,1] + 0.5 p <- dat[,1]/dat[,2] PT <- dat[,2] low <- (a.prime * (1 - (1/(9 * a.prime)) - (z/3 * sqrt(1/a.prime)))^3)/PT upp <- (a.prime * (1 - (1/(9 * a.prime)) + (z/3 * sqrt(1/a.prime)))^3)/PT rval <- data.frame(est = p, lower = low, upper = upp) } } else if(ctype == "smr"){ if (is.matrix(dat) == FALSE) stop("Error: dat must be a two-column matrix") # After Dobson et al. 1991. Adapted from Excel code written by Iain Buchan # Public Health Informatics at the University of Manchester (www.phi.man.ac.uk) # buchan@man.ac.uk # dat[,1] = obs; dat[,2] = pop obs <- dat[,1] exp <- (sum(dat[,1]) / sum(dat[,2])) * dat[,2] smr <- obs / exp se.smr <- sqrt(dat[,2]) / exp low <- ifelse(dat[,1] > 0, ((qchisq(N., df = 2 * dat[,1], lower.tail = FALSE) / 2) / exp), 0) upp <- ifelse(dat[,1] > 0, ((qchisq(1 - N., df = 2 * (dat[,1] + 1), lower.tail = FALSE) / 2) / exp), ((qchisq(1 - N., df = 2, lower.tail = FALSE) / 2) / exp)) rval <- data.frame(est = smr, se = se.smr, lower = low, upper = upp) } else if(ctype == "odds" | ctype == "ratio"){ ## Ederer F and Mantel N (1974) Confidence limits on the ratio of two Poisson variables. American Journal of Epidemiology 100: 165 - 167 ## Cited in Altman, Machin, Bryant, and Gardner (2000) Statistics with Confidence, British Medical Journal, page 69. ## Added 161214 if (is.matrix(dat) == FALSE) stop("Error: dat must be a two-column matrix") a <- dat[,1]; b <- dat[,2] Al <- (qbinom(1 - N., size = a + b, prob = (a / (a + b)))) / (a + b) Au <- (qbinom(N., size = a + b, prob = (a / (a + b)))) / (a + b) odds.p <- (a / b) odds.l <- (Al / (1 - Al)) odds.u <- (Au / (1 - Au)) rval <- data.frame(est = odds.p, lower = odds.l, upper = odds.u) } return(rval) } epiR/R/rsu.sep.cens.R0000644000176200001440000000012613741031234014020 0ustar liggesusersrsu.sep.cens <- function(d = 1, se.u) { se.p <- 1 - (1 - se.u)^d return(se.p) }epiR/R/epi.sscc.R0000644000176200001440000002702214135126350013207 0ustar liggesusers"epi.sscc" <- function(OR, p1 = NA, p0, n, power, r = 1, phi.coef = 0, design = 1, sided.test = 2, nfractional = FALSE, conf.level = 0.95, method = "unmatched", fleiss = FALSE) { # p0: proportion of controls exposed. # phi.coef: correlation between case and control exposures in matched pairs (defaults to 0). # https://www2.ccrb.cuhk.edu.hk/stat/epistudies/cc2.htm alpha.new <- (1 - conf.level) / sided.test z.alpha <- qnorm(1 - alpha.new, mean = 0, sd = 1) if (!is.na(OR) & !is.na(n) & !is.na(power)){ stop("Error: at least one of OR, n and power must be NA.") } # ------------------------------------------------------------------------------------------------------ # Unmatched case-control - sample size: if(method == "unmatched" & !is.na(OR) & is.na(n) & !is.na(power)){ beta <- 1 - power z.beta <- qnorm(p = beta, lower.tail = FALSE) # Sample size without Fleiss correction - from Woodward's spreadsheet: Pc <- (p0 / (r + 1)) * ((r * OR / (1 + (OR - 1) * p0)) + 1) t1 <- (r + 1) * (1 + (OR - 1) * p0)^2 t2 <- r * p0^2 * (p0 - 1)^2 * (OR - 1)^2 t3 <- z.alpha * sqrt((r + 1) * Pc * (1 - Pc)) t4 <- OR * p0 * (1 - p0) t5 <- 1 + (OR - 1) * p0 t6 <- t4 / (t5^2) t7 <- r * p0 * (1 - p0) t8 <- z.beta * sqrt(t6 + t7) n. <- (t1 / t2) * (t3 + t8)^2 # q0 <- 1 - p0 # p1 <- (p0 * OR) / (1 + p0 * (OR - 1)) # q1 <- 1 - p1 # pbar <- (p1 + (r * p0)) / (r + 1) # qbar <- 1 - pbar # Np1 <- z.alpha * sqrt((r + 1) * pbar * qbar) # Np2 <- z.beta * sqrt((r * p1 * q1) + (p0 * q0)) # Dp1 <- r * (p1 - p0)^2 # n. <- (Np1 + Np2)^2 / Dp1 if(fleiss == TRUE){ d <- 1 + ((2 * (r + 1)) / (n. * r * abs(p0 - p1))) n <- (n. / 4) * (1 + sqrt(d))^2 } if(fleiss == FALSE){ n <- n. } if(nfractional == TRUE){ n1 <- n / (1 + r) n1 <- n1 * design n0 <- r * n1 n.total <- n0 + n1 } if(nfractional == FALSE){ n1 <- n / (1 + r) n1 <- ceiling(n1 * design) n0 <- ceiling(r * n1) n.total <- n0 + n1 } rval <- list(n.total = n.total, n.case = n1, n.control = n0, power = power, OR = OR) } # Unmatched case-control - power: else if(method == "unmatched" & !is.na(OR) & !is.na(n) & is.na(power)){ # From Woodward's spreadsheet: Pc <- (p0 / (r + 1)) * ((r * OR / (1 + (OR - 1) * p0)) + 1) M <- abs((OR - 1) * (p0 - 1) / (1 + (OR - 1) * p0)) termn1 <- M * p0 * sqrt(n * r) / sqrt(r + 1) termn2 <- z.alpha * sqrt((r + 1) * Pc *(1 - Pc)) termd1 <- OR * p0 * (1 - p0) / (1 + (OR - 1) * p0)^2 termd2 <- r * p0 * (1 - p0) z.beta <- (termn1 - termn2) / sqrt(termd1 + termd2) # q0 <- 1 - p0 # p1 <- (p0 * OR) / (1 + p0 * (OR - 1)) # q1 <- 1 - p1 # pbar <- (p1 + (r * p0)) / (r + 1) # qbar <- 1 - pbar n1 <- n / (1 + r) # z.beta <- sqrt((n1 * r * (p1 - p0)^2) / (pbar * qbar * (r + 1))) - z.alpha power <- pnorm(z.beta, mean = 0, sd = 1) if(nfractional == TRUE){ n1 <- n1 * design n0 <- n - n1 n.total <- n0 + n1 } if(nfractional == FALSE){ n1 <- ceiling(n1 * design) n0 <- n - n1 n.total <- n0 + n1 } rval <- list(n.total = n.total, n.case = n1, n.control = n0, power = power, OR = OR) } # Unmatched case-control - effect: else if(method == "unmatched" & is.na(OR) & !is.na(n) & !is.na(power)){ if(nfractional == TRUE){ n1 <- n / (r + 1) n1 <- n1 * design n0 <- n - n1 n.total <- n0 + n1 } if(nfractional == FALSE){ n1 <- n / (r + 1) n1 <- ceiling(n1 * design) n0 <- n - n1 n.total <- n0 + n1 } Pfun <- function(OR, power, p0, r, n, design, z.alpha){ q0 <- 1 - p0 p1 <- (p0 * OR) / (1 + p0 * (OR - 1)) q1 <- 1 - p1 pbar <- (p1 + (r * p0)) / (r + 1) qbar <- 1 - pbar # Account for the design effect: n1 <- n / (1 + r) n1 <- ceiling(n1 * design) n0 <- n - n1 # n0 <- ceiling(r * n1) z.beta <- sqrt((n1 * r * (p1 - p0)^2) / (pbar * qbar * (r + 1))) - z.alpha # Take the calculated value of the power and subtract the power entered by the user: pnorm(z.beta, mean = 0, sd = 1) - power } # Find the value of OR that matches the power entered by the user: OR.up <- uniroot(Pfun, power = power, p0 = p0, r = r, n = n, design = design, z.alpha = z.alpha, interval = c(1,1E06))$root OR.lo <- uniroot(Pfun, power = power, p0 = p0, r = r, n = n, design = design, z.alpha = z.alpha, interval = c(0.0001,1))$root # x = seq(from = 0.01, to = 100, by = 0.01) # y = Pfun(x, power = 0.8, p0 = 0.15, r = 1, n = 150, design = 1, z.alpha = 1.96) # windows(); plot(x, y, xlim = c(0,5)); abline(h = 0, lty = 2) # Two possible values for OR meet the conditions of Pfun. So hence we set the lower bound of the search interval to 1. rval <- list(n.total = n1 + n0, n.case = n1, n.control = n0, power = power, OR = c(OR.lo, OR.up)) } # ------------------------------------------------------------------------------------------------------ # Matched case-control - sample size: if(method == "matched" & !is.na(OR) & is.na(n) & !is.na(power)){ beta <- 1 - power z.beta <- qnorm(p = beta, lower.tail = FALSE) odds0 = p0 / (1 - p0) odds1 = odds0 * OR p1 = odds1 / (1 + odds1) delta.p = p1 - p0 psi <- OR pq <- p1 * (1 - p1) * p0 * (1 - p0) p0.p <- (p1 * p0 + phi.coef * sqrt(pq)) / p1 p0.n <- (p0 * (1 - p1) - phi.coef * sqrt(pq)) / (1 - p1) tm <- ee.psi <- ee.one <- nu.psi <- nu.one <- rep(NA, r) for(m in 1:r){ tm[m] <- p1 * choose(r, m - 1) * ((p0.p)^(m - 1)) * ((1 - p0.p)^(r - m + 1)) + (1 - p1) * choose(r,m) * ((p0.n)^m) * ((1 - p0.n))^(r - m) ee.psi[m] <- m * tm[m] * psi / (m * psi + r - m + 1) ee.one[m] <- m * tm[m] / (r + 1) nu.psi[m] <- m * tm[m] * psi * (r - m + 1) / ((m * psi + r - m + 1)^2) nu.one[m] <- m * tm[m] * (r - m + 1) / ((r + 1)^2) } ee.psi <- sum(ee.psi) ee.one <- sum(ee.one) nu.psi <- sum(nu.psi) nu.one <- sum(nu.one) n. <- ((z.beta * sqrt(nu.psi) + z.alpha * sqrt(nu.one))^2) / ((ee.one - ee.psi)^2) if(fleiss == TRUE){ d <- 1 + ((2 * (r + 1)) / (n. * r * abs(p0 - p1))) n <- (n. / 4) * (1 + sqrt(d))^2 } if(fleiss == FALSE){ n <- n. } if(nfractional == TRUE){ n1 <- n n0 <- r * n1 n.total <- n0 + n1 } if(nfractional == FALSE){ n1 <- ceiling(n) n0 <- r * n1 n.total <- n0 + n1 } rval <- list(n.total = n.total, n.case = n1, n.control = n0, power = power, OR = OR) } # Matched case-control - power: else if(method == "matched" & !is.na(OR) & !is.na(n) & is.na(power)){ odds0 = p0 / (1 - p0) odds1 = odds0 * OR p1 = odds1 / (1 + odds1) delta.p = p1 - p0 psi <- OR beta <- 1 - power z.beta <- qnorm(p = beta,lower.tail = FALSE) pq <- p1 * (1 - p1) * p0 * (1 - p0) p0.p <- (p1 * p0 + phi.coef * sqrt(pq)) / p1 p0.n <- (p0 * (1 - p1) - phi.coef * sqrt(pq)) / (1 - p1) tm <- ee.psi <- ee.one <- nu.psi <- nu.one <- rep(NA, r) for(m in 1:r){ tm[m] <- p1 * choose(r, m - 1) * ((p0.p)^(m - 1)) * ((1 - p0.p)^(r - m + 1)) + (1 - p1) * choose(r,m) * ((p0.n)^m) * ((1 - p0.n))^(r - m) ee.psi[m] <- m * tm[m] * psi / (m * psi + r - m + 1) ee.one[m] <- m * tm[m] / (r + 1) nu.psi[m] <- m * tm[m] * psi * (r - m + 1) / ((m * psi + r - m + 1)^2) nu.one[m] <- m * tm[m] * (r - m + 1) / ((r + 1)^2) } ee.psi <- sum(ee.psi) ee.one <- sum(ee.one) nu.psi <- sum(nu.psi) nu.one <- sum(nu.one) if(nfractional == TRUE){ n1 <- n / (1 + r) n1 <- n1 * design n0 <- n - n1 n.total <- n0 + n1 } if(nfractional == FALSE){ n1 <- n / (1 + r) n1 <- ceiling(n1 * design) n0 <- n - n1 n.total <- n0 + n1 } z.beta <- (sqrt(n1 * ((ee.one - ee.psi)^2)) - z.alpha * sqrt(nu.one)) / sqrt(nu.psi) power <- 1 - pnorm(q = z.beta, lower.tail = FALSE) rval <- list(n.total = n1 + n0, n.case = n1, n.control = n0, power = power, OR = OR) } # Matched case-control - effect: else if(method == "matched" & is.na(OR) & !is.na(n) & !is.na(power)){ if(nfractional == TRUE){ n1 <- n / (1 + r) n1 <- n1 * design n0 <- n - n1 n.total <- n1 + n0 } if(nfractional == FALSE){ n1 <- n / (1 + r) n1 <- ceiling(n1 * design) n0 <- n - n1 n.total <- n1 + n0 } Pfun <- function(OR, power, p0, r, n, design, z.alpha){ odds0 = p0 / (1 - p0) odds1 = odds0 * OR p1 = odds1 / (1 + odds1) delta.p = p1 - p0 psi <- OR beta <- 1 - power z.beta <- qnorm(p = beta, lower.tail = FALSE) pq <- p1 * (1 - p1) * p0 * (1 - p0) p0.p <- (p1 * p0 + phi.coef * sqrt(pq)) / p1 p0.n <- (p0 * (1 - p1) - phi.coef * sqrt(pq)) / (1 - p1) tm <- ee.psi <- ee.one <- nu.psi <- nu.one <- rep(NA, r) for(m in 1:r){ tm[m] <- p1 * choose(r, m - 1) * ((p0.p)^(m - 1)) * ((1 - p0.p)^(r - m + 1)) + (1 - p1) * choose(r,m) * ((p0.n)^m) * ((1 - p0.n))^(r - m) ee.psi[m] <- m * tm[m] * psi / (m * psi + r - m + 1) ee.one[m] <- m * tm[m] / (r + 1) nu.psi[m] <- m * tm[m] * psi * (r - m + 1) / ((m * psi + r - m + 1)^2) nu.one[m] <- m * tm[m] * (r - m + 1) / ((r + 1)^2) } ee.psi <- sum(ee.psi) ee.one <- sum(ee.one) nu.psi <- sum(nu.psi) nu.one <- sum(nu.one) z.beta <- (sqrt(n1 * ((ee.one - ee.psi)^2)) - z.alpha * sqrt(nu.one)) / sqrt(nu.psi) # Take the calculated value of the power and subtract the power entered by the user: pnorm(z.beta, mean = 0, sd = 1) - power } # Find the value of OR that matches the power entered by the user: OR.up <- uniroot(Pfun, power = power, p0 = p0, r = r, n = n, design = design, z.alpha = z.alpha, interval = c(1,1E06))$root OR.lo <- uniroot(Pfun, power = power, p0 = p0, r = r, n = n, design = design, z.alpha = z.alpha, interval = c(0.0001,1))$root # x = seq(from = 0.01, to = 100, by = 0.01) # y = Pfun(x, power = 0.8, p0 = 0.15, r = 1, n = 150, design = 1, z.alpha = 1.96) # windows(); plot(x, y, xlim = c(0,5)); abline(h = 0, lty = 2) # Two possible values for OR meet the conditions of Pfun. So hence we set the lower bound of the search interval to 1. rval <- list(n.total = n1 + n0, n.case = n1, n.control = n0, power = power, OR = c(OR.lo, OR.up)) } # ------------------------------------------------------------------------------------------------------ rval } epiR/R/epi.smd.R0000644000176200001440000000520613117711466013046 0ustar liggesusers"epi.smd" <- function(mean.trt, sd.trt, n.trt, mean.ctrl, sd.ctrl, n.ctrl, names, method = "cohens", conf.level = 0.95) { # Declarations: N <- 1 - ((1 - conf.level) / 2) z <- qnorm(N, mean = 0, sd = 1) k <- length(names) N.i <- n.trt + n.ctrl # Pooled standard deviation of the two groups: s.i <- sqrt((((n.trt - 1) * sd.trt^2) + ((n.ctrl - 1) * sd.ctrl^2)) / (N.i - 2)) if(method == "cohens") { # Standardised mean difference method using Cohen's d: MD.i <- (mean.trt - mean.ctrl) / s.i SE.MD.i <- sqrt((N.i / (n.trt * n.ctrl)) + (MD.i^2 / (2 * (N.i - 2)))) lower.MD.i <- MD.i - (z * SE.MD.i) upper.MD.i <- MD.i + (z * SE.MD.i) } if(method == "hedges") { # Standardised mean difference method using Hedge's adjusted g: MD.i <- ((mean.trt - mean.ctrl) / s.i) * (1 - (3/ ((4 * N.i) - 9))) SE.MD.i <- sqrt((N.i / ((n.trt * n.ctrl)) + (MD.i^2 / (2 * (N.i - 3.94))))) lower.MD.i <- MD.i - (z * SE.MD.i) upper.MD.i <- MD.i + (z * SE.MD.i) } else if(method == "glass") { # Standardised mean difference method using Glass's delta: MD.i <- (mean.trt - mean.ctrl) / sd.ctrl SE.MD.i <- sqrt((N.i / ((n.trt * n.ctrl)) + (MD.i^2 / (2 * (n.ctrl - 1))))) lower.MD.i <- MD.i - (z * SE.MD.i) upper.MD.i <- MD.i + (z * SE.MD.i) } # IV pooled standardised mean difference: w.i <- 1 / (SE.MD.i)^2 MD.iv <- sum(w.i * MD.i) / sum(w.i) SE.MD.iv <- 1/sqrt((sum(w.i))) lower.MD.iv <- MD.iv - (z * SE.MD.iv) upper.MD.iv <- MD.iv + (z * SE.MD.iv) # Heterogeneity statistic: Q <- sum(w.i * (MD.i - MD.iv)^2) df <- k - 1 p.heterogeneity <- 1 - pchisq(Q, df) tau.sq <- (Q - (k - 1)) / (sum(w.i) - (sum((w.i)^2) / sum(w.i))) # If Q is less than (k - 1) tau.sq equals zero: tau.sq <- ifelse(Q < (k - 1), 0, tau.sq) w.dsl.i <- 1 / (SE.MD.i^2 + tau.sq) MD.dsl <- sum(w.dsl.i * MD.i) / sum(w.dsl.i) SE.MD.dsl <- 1 / sqrt(sum(w.dsl.i)) lower.MD.dsl <- MD.dsl - (z * SE.MD.dsl) upper.MD.dsl <- MD.dsl + (z * SE.MD.dsl) # Results: md <- data.frame(MD.i, lower.MD.i, upper.MD.i) names(md) <- c("est", "lower", "upper") md.invar <- data.frame(MD.iv, lower.MD.iv, upper.MD.iv) names(md.invar) <- c("est", "lower", "upper") md.dsl <- data.frame(MD.dsl, lower.MD.dsl, upper.MD.dsl) names(md.dsl) <- c("est", "lower", "upper") heterogeneity = c(Q = Q, df = df, p.value = p.heterogeneity) rval <- list(md = md, md.invar = md.invar, md.dsl = md.dsl, heterogeneity = heterogeneity) return(rval) } epiR/R/epi.directadj.R0000644000176200001440000001202514063765332014213 0ustar liggesusers"epi.directadj" <- function(obs, tar, std, units = 1, conf.level = 0.95){ # How many strata (rows) are there? n.strata <- dim(obs)[1] # How many adjustment variables (columns) are there? n.cov <- dim(obs)[2] N. <- 1 - ((1 - conf.level) / 2) alpha <- 1 - conf.level z <- qnorm(N., mean = 0, sd = 1) # Variable sindex to make sure strata estimates are sorted in the right order: tmp <- data.frame(sindex = rep(1:nrow(tar), times = n.cov), strata = rep(rownames(tar), times = n.cov), cov = rep(colnames(tar), each = n.strata), obs = as.vector(obs), tar = as.vector(tar), std = rep(as.vector(std), each = n.strata)) # =================================================================================== # Crude incidence rate by strata-covariate combinations: # tmp <- data.frame(tmp, (epi.conf(as.matrix(cbind(tmp$obs, tmp$tar)), ctype = "inc.risk", method = method, # design = 1, conf.level = conf.level) * units)) # Confidence interval for crude incidence risk estimates corrected following email from Gillian Raab: # crude.p <- t.obs / t.tar # crude.se <- crude.p / sqrt(t.tar) ## Incorrect. # crude.se <- crude.p / sqrt(t.obs) ## replaced tar by obs # crude.l <- qchisq(alpha / 2, 2 * t.obs) / 2 / t.tar ## next 2 lines changed # crude.u <- qchisq(1 - alpha / 2, 2 * (t.obs + 1)) / 2 / t.tar # Confidence intervals for crude incidence *rate* estimates: crude.df <- epi.conf(dat = as.matrix(cbind(tmp$obs, tmp$tar)), ctype = "inc.rate", method = "exact", N = 1000, design = 1, conf.level = 0.95) crude.df$est <- ifelse(tmp$obs == 0 & tmp$tar == 0, 0, crude.df$est) crude.df$lower <- ifelse(tmp$obs == 0 & tmp$tar == 0, 0, crude.df$lower) crude.df$upper <- ifelse(tmp$obs == 0 & tmp$tar == 0, 0, crude.df$upper) crude <- data.frame(strata = tmp$strata, cov = tmp$cov, obs = tmp$obs, tar = tmp$tar, est = as.numeric(crude.df$est * units), lower = as.numeric(crude.df$lower * units), upper = as.numeric(crude.df$upper * units)) # =================================================================================== # Crude incidence rate by strata: t.obs <- as.numeric(by(data = tmp$obs, INDICES = tmp$sindex, FUN = sum)) t.tar <- as.numeric(by(data = tmp$tar, INDICES = tmp$sindex, FUN = sum)) t.strata <- rownames(tar) # Confidence interval for crude incidence rate estimates corrected following email from Gillian Raab: scrude.p <- as.numeric(t.obs / t.tar) # scrude.se <- scrude.p / sqrt(t.tar) # Incorrect. scrude.se <- as.numeric(scrude.p / sqrt(t.obs)) # replaced t.tar with obs scrude.l <- as.numeric(qchisq(alpha / 2, 2 * t.obs) / 2 / t.tar) # next 2 lines changed scrude.u <- as.numeric(qchisq(1 - alpha / 2, 2 * (t.obs + 1)) / 2 / t.tar) crude.strata <- data.frame(strata = t.strata, obs = t.obs, tar = t.tar, est = as.numeric(scrude.p * units), lower = as.numeric(scrude.l * units), upper = as.numeric(scrude.u * units)) crude.strata$est <- ifelse(t.obs == 0 & t.tar == 0, 0, crude.strata$est) crude.strata$lower <- ifelse(t.obs == 0 & t.tar == 0, 0, crude.strata$lower) crude.strata$upper <- ifelse(t.obs == 0 & t.tar == 0, 0, crude.strata$upper) # =================================================================================== # Adjusted incidence *rate* by strata. Confidence intervals based on Fay and Feuer (1997): t.obs <- as.numeric(by(data = tmp$obs, INDICES = tmp$sindex, FUN = sum)) t.tar <- as.numeric(by(data = tmp$tar, INDICES = tmp$sindex, FUN = sum)) t.strata <- rownames(tar) tstd <- matrix(rep(std, times = n.strata), byrow = TRUE, nrow = n.strata) stdwt <- tstd / apply(X = tstd, MARGIN = 1, FUN = sum) adj.p <- stdwt * (obs / tar) # NaNs returned when zero numerator and zero denominator: adj.p[is.nan(adj.p)] <- 0 adj.p <- apply(X = adj.p, MARGIN = 1, FUN = sum) adj.v <- (stdwt^2) * (obs / tar^2) # NaNs returned when zero numerator and zero denominator: adj.v[is.nan(adj.v)] <- 0 adj.v <- apply(X = adj.v, MARGIN = 1, FUN = sum) wm <- stdwt / tar # Inf returned when tar is zero: wm[is.infinite(wm)] <- 0 wm <- apply(wm, MARGIN = 1, FUN = max) adj.l <- qgamma(alpha / 2, shape = (adj.p^2) / adj.v, scale = adj.v / adj.p) adj.l[is.nan(adj.l)] <- 0 adj.u <- qgamma(1 - alpha/2, shape = ((adj.p + wm)^2) / (adj.v + wm^2), scale = (adj.v + wm^2) / (adj.p + wm)) adj.u[is.nan(adj.u)] <- 0 adj.strata <- data.frame(strata = t.strata, obs = t.obs, tar = t.tar, est = as.numeric(adj.p * units), lower = as.numeric(adj.l * units), upper = as.numeric(adj.u * units)) rval <- list(crude = crude, crude.strata = crude.strata, adj.strata = adj.strata) return(rval) }epiR/R/epi.ssclus1estb.R0000644000176200001440000000345314075465472014547 0ustar liggesusers"epi.ssclus1estb" <- function(b, Py, epsilon, error = "relative", rho, nfractional = FALSE, conf.level = 0.95){ N. <- 1 - ((1 - conf.level) / 2) z <- qnorm(N., mean = 0, sd = 1) epsilon.a <- ifelse(error == "absolute", epsilon, Py * epsilon) # Estimate of the required standard error: se <- epsilon.a / z # Design effect when clusters are of different size: if(length(b) == 2){ # Machin et al. (2018) pp. 197, Equation 12.7: bbar <- b[1] bsigma <- b[2] bcv <- bsigma / bbar D <- 1 + ((bcv^2 + 1) * bbar - 1) * rho n.ssu <- (z^2 * Py * (1 - Py)) * D / epsilon.a^2 n.psu <- n.ssu / bbar # Round after you've calculated n.ssu and n.psu, after Machin et al. (2018) pp. 205: if(nfractional == TRUE){ n.ssu <- n.ssu n.psu <- n.psu } if(nfractional == FALSE){ n.ssu <- ceiling(n.ssu) n.psu <- ceiling(n.psu) } } # Design effect when clusters are of equal size: else if(length(b) == 1){ D <- 1 + ((b - 1) * rho) n.ssu <- (z^2 * Py * (1 - Py)) * D / epsilon.a^2 n.psu <- n.ssu / b # Round after you've calculated n.ssu and n.psu, after Machin et al. (2018) pp. 205: if(nfractional == TRUE){ n.ssu <- n.ssu n.psu <- n.psu } if(nfractional == FALSE){ n.ssu <- ceiling(n.ssu) n.psu <- ceiling(n.psu) } } # n.psu <- ceiling((Py * (1 - Py) * D) / (se^2 * b)) if(n.psu <= 25) warning(paste('The calculated number of primary sampling units (n.psu) is ', n.psu, '. At least 25 primary sampling units are recommended for two-stage cluster sampling designs.', sep = ""), call. = FALSE) rval <- list(n.psu = n.psu, n.ssu = n.ssu, DEF = D, rho = rho) return(rval) } epiR/R/rsu.sssep.rbmrg.R0000644000176200001440000000201013752164776014565 0ustar liggesusersrsu.sssep.rbmrg <- function(pstar, rr, ppr, spr, spr.rg, se.p, se.u){ mean.se <- numeric(length(rr)) for(r in 1:length(rr)){ mean.se[r] <- sum(spr.rg[r,] * se.u) } epi <- rsu.epinf(pstar = pstar, rr = rr, ppr = ppr)[[1]] p.pos <- sum(epi * mean.se * spr) n.total <- ceiling(log(1 - se.p) / log(1 - p.pos)) n.rg <- numeric(length(rr)) n <- array(0, dim = c(nrow(spr.rg), ncol(spr.rg))) for(i in 1:length(rr)){ if(i < length(rr)){ n.rg[i] <- ceiling(n.total * spr[i]) } else { n.rg[i] <- n.total - sum(n.rg) } for (j in 1:length(se.u)) { if (j < length(se.u)) { n[i,j] <- ceiling(n.rg[i] * spr.rg[i,j]) } else { n[i,j] <- n.rg[i] - sum(n[i,]) } } } n <- cbind(n, n.rg) tmp <- apply(n, FUN = sum, MARGIN = 2) n <- rbind(n, tmp) colnames(n) <- c(paste("se.u", se.u), "total") rownames(n) <- c(paste("rr", rr), "total") return(list(n = n, epi = epi, mean.se = mean.se)) }epiR/R/epi.dsl.R0000644000176200001440000002144613117711454013046 0ustar liggesusers"epi.dsl" <- function(ev.trt, n.trt, ev.ctrl, n.ctrl, names, method = "odds.ratio", alternative = c("two.sided", "less", "greater"), conf.level = 0.95) { # Declarations: k <- length(names) a.i <- ev.trt b.i <- n.trt - ev.trt c.i <- ev.ctrl d.i <- n.ctrl - ev.ctrl N <- 1 - ((1 - conf.level) / 2) z <- qnorm(N, mean = 0, sd = 1) # Test each strata for zero values. Add 0.5 to all cells if any cell has a zero value: for(i in 1:k){ if(a.i[i] < 1 | b.i[i] < 1 | c.i[i] < 1 | d.i[i] < 1){ a.i[i] <- a.i[i] + 0.5; b.i[i] <- b.i[i] + 0.5; c.i[i] <- c.i[i] + 0.5; d.i[i] <- d.i[i] + 0.5 } } n.1i <- a.i + b.i n.2i <- c.i + d.i N.i <- a.i + b.i + c.i + d.i # For summary odds ratio: R <- sum((a.i * d.i) / N.i) S <- sum((b.i * c.i) / N.i) E <- sum(((a.i + d.i) * a.i * d.i) / N.i^2) F. <- sum(((a.i + d.i) * b.i * c.i) / N.i^2) G <- sum(((b.i + c.i) * a.i * d.i) / N.i^2) H <- sum(((b.i + c.i) * b.i * c.i) / N.i^2) P <- sum(((n.1i * n.2i * (a.i + c.i)) - (a.i * c.i * N.i)) / N.i^2) # For summary risk ratio: R. <- sum((a.i * n.2i) / N.i) S. <- sum((c.i * n.1i) / N.i) # Individual study odds ratios: if(method == "odds.ratio") {OR.i <- (a.i * d.i) / (b.i * c.i) lnOR.i <- log(OR.i) SE.lnOR.i <- sqrt(1/a.i + 1/b.i + 1/c.i + 1/d.i) SE.OR.i <- exp(SE.lnOR.i) lower.lnOR.i <- lnOR.i - (z * SE.lnOR.i) upper.lnOR.i <- lnOR.i + (z * SE.lnOR.i) lower.OR.i <- exp(lower.lnOR.i) upper.OR.i <- exp(upper.lnOR.i) # Weights: w.i <- (b.i * c.i) / N.i w.iv.i <- 1 / (SE.lnOR.i)^2 # MH pooled odds ratios (relative effect measures combined in their natural scale): OR.mh <- sum(w.iv.i * OR.i)/sum(w.iv.i) lnOR.mh <- log(OR.mh) SE.lnOR.mh <- sqrt(1/2 * ((E/R^2) + ((F. + G)/(R * S)) + (H/S^2))) # DSL pooled odds ratios: Q <- sum(w.iv.i * (lnOR.i - lnOR.mh)^2) df <- k - 1 p.heterogeneity <- 1 - pchisq(Q, df) tau.sq.upper <- Q - df tau.sq.lower <- sum(w.iv.i) - (sum((w.iv.i)^2) / sum(w.iv.i)) # If Q is less than (k - 1) tau.sq equals zero: tau.sq <- ifelse(Q < (k - 1), 0, (tau.sq.upper / tau.sq.lower)) w.dsl.i <- 1 / (((SE.lnOR.i)^2) + tau.sq) lnOR.dsl <- sum(w.dsl.i * lnOR.i) / sum(w.dsl.i) OR.dsl <- exp(lnOR.dsl) SE.lnOR.dsl <- 1 / sqrt(sum(w.dsl.i)) SE.OR.dsl <- exp(SE.lnOR.dsl) lower.lnOR.dsl <- log(OR.dsl) - (z * SE.lnOR.dsl) upper.lnOR.dsl <- log(OR.dsl) + (z * SE.lnOR.dsl) lower.OR.dsl <- exp(lower.lnOR.dsl) upper.OR.dsl <- exp(upper.lnOR.dsl) # Higgins and Thompson (2002) H^2 and I^2 statistic: Hsq <- Q / (k - 1) lnHsq <- log(Hsq) if(Q > k) { lnHsq.se <- (1 * log(Q) - log(k - 1)) / (2 * sqrt(2 * Q) - sqrt((2 * (k - 3)))) } if(Q <= k) { lnHsq.se <- sqrt((1/(2 * (k - 2))) * (1 - (1 / (3 * (k - 2)^2)))) } lnHsq.l <- lnHsq - (z * lnHsq.se) lnHsq.u <- lnHsq + (z * lnHsq.se) Hsq.l <- exp(lnHsq.l) Hsq.u <- exp(lnHsq.u) Isq <- ((Hsq - 1) / Hsq) * 100 Isq.l <- ((Hsq.l - 1) / Hsq.l) * 100 Isq.u <- ((Hsq.u - 1) / Hsq.u) * 100 # Test of effect. Code for p-value taken from z.test function in TeachingDemos package: effect.z <- log(OR.dsl) / SE.lnOR.dsl alternative <- match.arg(alternative) p.effect <- switch(alternative, two.sided = 2 * pnorm(abs(effect.z), lower.tail = FALSE), less = pnorm(effect.z), greater = pnorm(effect.z, lower.tail = FALSE)) # Results: OR <- data.frame(OR.i, lower.OR.i, upper.OR.i) names(OR) <- c("est", "lower", "upper") OR.summary <- data.frame(OR.dsl, lower.OR.dsl, upper.OR.dsl) names(OR.summary) <- c("est", "lower", "upper") weights <- data.frame(w.iv.i, w.dsl.i) names(weights) <- c("inv.var", "dsl") Hsq <- data.frame(Hsq, Hsq.l, Hsq.u) names(Hsq) <- c("est", "lower", "upper") Isq <- data.frame(Isq, Isq.l, Isq.u) names(Isq) <- c("est", "lower", "upper") rval <- list(OR = OR, OR.summary = OR.summary, weights = weights, heterogeneity = c(Q = Q, df = df, p.value = p.heterogeneity), Hsq = Hsq, Isq = Isq, tau.sq = tau.sq, effect = c(z = effect.z, p.value = p.effect)) return(rval) } else if(method == "risk.ratio") {RR.i <- (a.i / n.1i) / (c.i / n.2i) lnRR.i <- log(RR.i) SE.lnRR.i <- sqrt(1/a.i + 1/c.i - 1/n.1i - 1/n.2i) SE.RR.i <- exp(SE.lnRR.i) lower.lnRR.i <- lnRR.i - (z * SE.lnRR.i) upper.lnRR.i <- lnRR.i + (z * SE.lnRR.i) lower.RR.i <- exp(lower.lnRR.i) upper.RR.i <- exp(upper.lnRR.i) # Weights: w.i <- (b.i * c.i) / N.i w.iv.i <- 1 / (SE.lnRR.i)^2 # MH pooled risk ratios (relative effect measures combined in their natural scale): RR.mh <- sum(w.i * RR.i) / sum(w.i) lnRR.mh <- log(RR.mh) SE.lnRR.mh <- sqrt(P / (R. * S.)) SE.RR.mh <- exp(SE.lnRR.mh) lower.lnRR.mh <- log(RR.mh) - (z * SE.lnRR.mh) upper.lnRR.mh <- log(RR.mh) + (z * SE.lnRR.mh) lower.RR.mh <- exp(lower.lnRR.mh) upper.RR.mh <- exp(upper.lnRR.mh) # DSL pooled risk ratios: Q <- sum(w.iv.i * (lnRR.i - lnRR.mh)^2) df <- k - 1 p.heterogeneity <- 1 - pchisq(Q, df) tau.sq.upper <- Q - df tau.sq.lower <- sum(w.iv.i) - (sum((w.iv.i)^2) / sum(w.iv.i)) # If Q is less than (k - 1) tau.sq equals zero: tau.sq <- ifelse(Q < (k - 1), 0, (tau.sq.upper / tau.sq.lower)) w.dsl.i <- 1 / (((SE.lnRR.i)^2) + tau.sq) lnRR.dsl <- sum(w.dsl.i * lnRR.i) / sum(w.dsl.i) RR.dsl <- exp(lnRR.dsl) SE.lnRR.dsl <- 1 / sqrt(sum(w.dsl.i)) SE.RR.dsl <- exp(SE.lnRR.dsl) lower.lnRR.dsl <- log(RR.dsl) - (z * SE.lnRR.dsl) upper.lnRR.dsl <- log(RR.dsl) + (z * SE.lnRR.dsl) lower.RR.dsl <- exp(lower.lnRR.dsl) upper.RR.dsl <- exp(upper.lnRR.dsl) # Higgins and Thompson (2002) H^2 and I^2 statistic: Hsq <- Q / (k - 1) lnHsq <- log(Hsq) if(Q > k) { lnHsq.se <- (1 * log(Q) - log(k - 1)) / (2 * sqrt(2 * Q) - sqrt((2 * (k - 3)))) } if(Q <= k) { lnHsq.se <- sqrt((1/(2 * (k - 2))) * (1 - (1 / (3 * (k - 2)^2)))) } lnHsq.l <- lnHsq - (z * lnHsq.se) lnHsq.u <- lnHsq + (z * lnHsq.se) Hsq.l <- exp(lnHsq.l) Hsq.u <- exp(lnHsq.u) Isq <- ((Hsq - 1) / Hsq) * 100 Isq.l <- ((Hsq.l - 1) / Hsq.l) * 100 Isq.u <- ((Hsq.u - 1) / Hsq.u) * 100 # Test of effect. Code for p-value taken from z.test function in TeachingDemos package: effect.z <- log(RR.dsl) / SE.lnRR.dsl alternative <- match.arg(alternative) p.effect <- switch(alternative, two.sided = 2 * pnorm(abs(effect.z), lower.tail = FALSE), less = pnorm(effect.z), greater = pnorm(effect.z, lower.tail = FALSE)) # Results: RR <- data.frame(RR.i, lower.RR.i, upper.RR.i) names(RR) <- c("est", "lower", "upper") RR.summary <- data.frame(RR.dsl, lower.RR.dsl, upper.RR.dsl) names(RR.summary) <- c("est", "lower", "upper") weights <- data.frame(w.iv.i, w.dsl.i) names(weights) <- c("inv.var", "dsl") Hsq <- data.frame(Hsq, Hsq.l, Hsq.u) names(Hsq) <- c("est", "lower", "upper") Isq <- data.frame(Isq, Isq.l, Isq.u) names(Isq) <- c("est", "lower", "upper") rval <- list(RR = RR, RR.summary = RR.summary, weights = weights, heterogeneity = c(Q = Q, df = df, p.value = p.heterogeneity), Hsq = Hsq, Isq = Isq, tau.sq = tau.sq, effect = c(z = effect.z, p.value = p.effect)) return(rval) } } epiR/R/epi.iv.R0000644000176200001440000001475513117711456012711 0ustar liggesusers"epi.iv" <- function(ev.trt, n.trt, ev.ctrl, n.ctrl, names, method = "odds.ratio", alternative = c("two.sided", "less", "greater"), conf.level = 0.95) { # Declarations: k <- length(names) a.i <- ev.trt b.i <- n.trt - ev.trt c.i <- ev.ctrl d.i <- n.ctrl - ev.ctrl N. <- 1 - ((1 - conf.level) / 2) z <- qnorm(N., mean = 0, sd = 1) # Test each strata for zero values. Add 0.5 to all cells if any cell has a zero value: for(i in 1:k){ if(a.i[i] < 1 | b.i[i] < 1 | c.i[i] < 1 | d.i[i] < 1){ a.i[i] <- a.i[i] + 0.5; b.i[i] <- b.i[i] + 0.5; c.i[i] <- c.i[i] + 0.5; d.i[i] <- d.i[i] + 0.5 } } n.1i <- a.i + b.i n.2i <- c.i + d.i N.i <- a.i + b.i + c.i + d.i if(method == "odds.ratio") { # Individual study odds ratios: OR.i <- (a.i * d.i)/(b.i * c.i) lnOR.i <- log(OR.i) SE.lnOR.i <- sqrt(1/a.i + 1/b.i + 1/c.i + 1/d.i) SE.OR.i <- exp(SE.lnOR.i) lower.lnOR.i <- lnOR.i - (z * SE.lnOR.i) upper.lnOR.i <- lnOR.i + (z * SE.lnOR.i) lower.OR.i <- exp(lower.lnOR.i) upper.OR.i <- exp(upper.lnOR.i) # Weights: w.i <- 1 / (1/a.i + 1/b.i + 1/c.i + 1/d.i) w.iv.i <- 1/(SE.lnOR.i)^2 # IV pooled odds ratios: lnOR.iv <- sum(w.i * lnOR.i)/sum(w.iv.i) OR.iv <- exp(lnOR.iv) SE.lnOR.iv <- 1/sqrt((sum(w.iv.i))) SE.OR.iv <- exp(SE.lnOR.iv) lower.lnOR.iv <- lnOR.iv - (z * SE.lnOR.iv) upper.lnOR.iv <- lnOR.iv + (z * SE.lnOR.iv) lower.OR.iv <- exp(lower.lnOR.iv) upper.OR.iv <- exp(upper.lnOR.iv) # Test of heterogeneity: Q <- sum(w.iv.i * (lnOR.i - lnOR.iv)^2) df <- k - 1 p.heterogeneity <- 1 - pchisq(Q, df) # Higgins and Thompson (2002) H^2 and I^2 statistic: Hsq <- Q / (k - 1) lnHsq <- log(Hsq) if(Q > k) { lnHsq.se <- (1 * log(Q) - log(k - 1)) / (2 * sqrt(2 * Q) - sqrt((2 * (k - 3)))) } if(Q <= k) { lnHsq.se <- sqrt((1/(2 * (k - 2))) * (1 - (1 / (3 * (k - 2)^2)))) } lnHsq.l <- lnHsq - (z * lnHsq.se) lnHsq.u <- lnHsq + (z * lnHsq.se) Hsq.l <- exp(lnHsq.l) Hsq.u <- exp(lnHsq.u) Isq <- ((Hsq - 1) / Hsq) * 100 Isq.l <- ((Hsq.l - 1) / Hsq.l) * 100 Isq.u <- ((Hsq.u - 1) / Hsq.u) * 100 # Test of effect. Code for p-value taken from z.test function in TeachingDemos package: effect.z <- lnOR.iv/SE.lnOR.iv alternative <- match.arg(alternative) p.effect <- switch(alternative, two.sided = 2 * pnorm(abs(effect.z), lower.tail = FALSE), less = pnorm(effect.z), greater = pnorm(effect.z, lower.tail = FALSE)) # Results: OR <- data.frame(OR.i, lower.OR.i, upper.OR.i) names(OR) <- c("est", "lower", "upper") OR.summary <- data.frame(OR.iv, lower.OR.iv, upper.OR.iv) names(OR.summary) <- c("est", "lower", "upper") weights <- data.frame(w.i, w.iv.i) names(weights) <- c("raw", "inv.var") Hsq <- data.frame(Hsq, Hsq.l, Hsq.u) names(Hsq) <- c("est", "lower", "upper") Isq <- data.frame(Isq, Isq.l, Isq.u) names(Isq) <- c("est", "lower", "upper") rval <- list(OR = OR, OR.summary = OR.summary, weights = weights, heterogeneity = c(Q = Q, df = df, p.value = p.heterogeneity), Hsq = Hsq, Isq = Isq, effect = c(z = effect.z, p.value = p.effect)) } else if(method == "risk.ratio") { # Individual study risk ratios: RR.i <- (a.i/n.1i)/(c.i/n.2i) lnRR.i <- log(RR.i) SE.lnRR.i <- sqrt(1/a.i + 1/c.i - 1/n.1i - 1/n.2i) SE.RR.i <- exp(SE.lnRR.i) lower.lnRR.i <- lnRR.i - (z * SE.lnRR.i) upper.lnRR.i <- lnRR.i + (z * SE.lnRR.i) lower.RR.i <- exp(lower.lnRR.i) upper.RR.i <- exp(upper.lnRR.i) # Weights: w.i <- (c.i * n.1i) / N.i w.iv.i <- 1/(SE.lnRR.i)^2 # IV pooled risk ratios: lnRR.iv <- sum(w.iv.i * lnRR.i)/sum(w.iv.i) RR.iv <- exp(lnRR.iv) SE.lnRR.iv <- 1/sqrt((sum(w.iv.i))) SE.RR.iv <- exp(SE.lnRR.iv) lower.lnRR.iv <- lnRR.iv - (z * SE.lnRR.iv) upper.lnRR.iv <- lnRR.iv + (z * SE.lnRR.iv) lower.RR.iv <- exp(lower.lnRR.iv) upper.RR.iv <- exp(upper.lnRR.iv) # Test of heterogeneity: Q <- sum(w.iv.i * (lnRR.i - lnRR.iv)^2) df <- k - 1 p.heterogeneity <- 1 - pchisq(Q, df) # Higgins and Thompson (2002) H^2 and I^2 statistic: Hsq <- Q / (k - 1) lnHsq <- log(Hsq) if(Q > k) { lnHsq.se <- (1 * log(Q) - log(k - 1)) / (2 * sqrt(2 * Q) - sqrt((2 * (k - 3)))) } if(Q <= k) { lnHsq.se <- sqrt((1/(2 * (k - 2))) * (1 - (1 / (3 * (k - 2)^2)))) } lnHsq.l <- lnHsq - (z * lnHsq.se) lnHsq.u <- lnHsq + (z * lnHsq.se) Hsq.l <- exp(lnHsq.l) Hsq.u <- exp(lnHsq.u) Isq <- ((Hsq - 1) / Hsq) * 100 Isq.l <- ((Hsq.l - 1) / Hsq.l) * 100 Isq.u <- ((Hsq.u - 1) / Hsq.u) * 100 # Test of effect. Code for p-value taken from z.test function in TeachingDemos package: effect.z <- lnRR.iv/SE.lnRR.iv alternative <- match.arg(alternative) p.effect <- switch(alternative, two.sided = 2 * pnorm(abs(effect.z), lower.tail = FALSE), less = pnorm(effect.z), greater = pnorm(effect.z, lower.tail = FALSE)) # Results: RR <- data.frame(RR.i, lower.RR.i, upper.RR.i) names(RR) <- c("est", "lower", "upper") RR.summary <- data.frame(RR.iv, lower.RR.iv, upper.RR.iv) names(RR.summary) <- c("est", "lower", "upper") weights <- data.frame(w.i, w.iv.i) names(weights) <- c("raw", "inv.var") Hsq <- data.frame(Hsq, Hsq.l, Hsq.u) names(Hsq) <- c("est", "lower", "upper") Isq <- data.frame(Isq, Isq.l, Isq.u) names(Isq) <- c("est", "lower", "upper") rval <- list(RR = RR, RR.summary = RR.summary, weights = weights, heterogeneity = c(Q = Q, df = df, p.value = p.heterogeneity), Hsq = Hsq, Isq = Isq, effect = c(z = effect.z, p.value = p.effect)) } return(rval) } epiR/R/zARscore.R0000644000176200001440000000174713666562562013257 0ustar liggesuserszARscore <- function(dat, conf.level, units){ N. <- 1 - ((1 - conf.level) / 2) z <- qnorm(N., mean = 0, sd = 1) a <- dat[1]; b <- dat[3]; c <- dat[2]; d <- dat[4] N1 <- a + b; N0 <- c + d sARisk.p <- ((a / N1) - (c / N0)) px = a / N1 py = c / N0 z = qchisq(conf.level, 1) proot = px - py dp = 1 - proot niter = 1 while(niter <= 50){ dp = 0.5 * dp up2 = proot + dp score = zz2stat(px, N1, py, N0, up2) if(score < z){proot = up2} niter = niter + 1 if((dp < 0.0000001) || (abs(z - score) < 0.000001)){ niter = 51 ul = up2 } } proot = px - py dp = 1 + proot niter = 1 while(niter <= 50){ dp = 0.5 * dp low2 = proot - dp score = zz2stat(px, N1, py, N0, low2) if(score < z){proot = low2} niter = niter + 1 if((dp < 0.0000001) || (abs(z - score) < 0.000001)){ ll = low2 niter = 51 } } c(sARisk.p * units, ll * units, ul * units) }epiR/R/epi.interaction.R0000644000176200001440000002152214165774742014613 0ustar liggesusersepi.interaction <- function(model, coef, param = c("product", "dummy"), conf.level = 0.95){ N. <- 1 - ((1 - conf.level)/2) z <- qnorm(N., mean = 0, sd = 1) if (class(model)[1] != "glm" & class(model)[2] != "lm" & class(model)[1] != "clogit" & class(model)[1] != "coxph") stop("Error: model must be either a glm or coxph object") theta1 <- as.numeric(model$coefficients[coef[1]]) theta2 <- as.numeric(model$coefficients[coef[2]]) theta3 <- as.numeric(model$coefficients[coef[3]]) if(theta1 < 0 | theta2 < 0) warning("At least one of the two regression coefficients is less than zero (i.e., OR < 1). Estimates of RERI and AP will be invalid. Estimate of SI valid.") if(class(model)[1] == "glm" & class(model)[2] == "lm"){ theta1.se <- summary(model)$coefficients[coef[1],2] theta2.se <- summary(model)$coefficients[coef[2],2] theta3.se <- summary(model)$coefficients[coef[3],2] } if(class(model)[1] == "clogit" | class(model)[1] == "coxph"){ theta1.se <- summary(model)$coefficients[coef[1],3] theta2.se <- summary(model)$coefficients[coef[2],3] theta3.se <- summary(model)$coefficients[coef[3],3] } if(param == "product"){ # RERI: cov.mat <- vcov(model) h1 <- exp(theta1 + theta2 + theta3) - exp(theta1) h2 <- exp(theta1 + theta2 + theta3) - exp(theta2) h3 <- exp(theta1 + theta2 + theta3) reri.var <- (h1^2 * theta1.se^2) + (h2^2 * theta2.se^2) + (h3^2 * theta3.se^2) + (2 * h1 * h2 * cov.mat[coef[1],coef[2]]) + (2 * h1 * h3 * cov.mat[coef[1],coef[3]]) + (2 * h2 * h3 * cov.mat[coef[2],coef[3]]) reri.se <- sqrt(reri.var) reri.p <- exp(theta1 + theta2 + theta3) - exp(theta1) - exp(theta2) + 1 reri.l <- reri.p - (z * reri.se) reri.u <- reri.p + (z * reri.se) reri <- data.frame(est = reri.p, lower = reri.l, upper = reri.u) # Multiplicative interaction: mult.p <- as.numeric(exp(theta3)) mult.l <- exp(confint.default(model)[coef[3],1]) mult.u <- exp(confint.default(model)[coef[3],2]) multiplicative <- data.frame(est = mult.p, lower = mult.l, upper = mult.u) # APAB: cov.mat <- vcov(model) h1 <- ((exp(theta1 + theta2 + theta3) - exp(theta1)) / (exp(theta1 + theta2 + theta3))) - ((exp(theta1 + theta2 + theta3) - exp(theta1) - exp(theta2) + 1) / (exp(theta1 + theta2 + theta3))) h2 <- ((exp(theta1 + theta2 + theta3) - exp(theta2)) / (exp(theta1 + theta2 + theta3))) - ((exp(theta1 + theta2 + theta3) - exp(theta1) - exp(theta2) + 1) / (exp(theta1 + theta2 + theta3))) h3 <- 1 -((exp(theta1 + theta2 + theta3) - exp(theta1) - exp(theta2) + 1) / exp(theta1 + theta2 + theta3)) apab.var <- (h1^2 * theta1.se^2) + (h2^2 * theta2.se^2) + (h3^2 * theta3.se^2) + (2 * h1 * h2 * cov.mat[coef[1],coef[2]]) + (2 * h1 * h3 * cov.mat[coef[1],coef[3]]) + (2 * h2 * h3 * cov.mat[coef[2],coef[3]]) apab.se <- sqrt(apab.var) apab.p <- (exp(theta1 + theta2 + theta3) - exp(theta1) - exp(theta2) + 1) / exp(theta1 + theta2 + theta3) apab.l <- apab.p - (z * apab.se) apab.u <- apab.p + (z * apab.se) apab <- data.frame(est = apab.p, lower = apab.l, upper = apab.u) # S: s.p <- (exp(theta1 + theta2 + theta3) - 1) / (exp(theta1) + exp(theta2) - 2) cov.mat <- vcov(model) # If model type is glm or cph and point estimate of S is negative terminate analysis. # Advise user to use a linear odds model: if(class(model)[1] == "glm" & class(model)[2] == "lm" & s.p < 0){ warning(paste("Point estimate of synergy index (S) is less than zero (", round(s.p, digits = 2), ").\n Confidence intervals cannot be calculated using the delta method. Consider re-parameterising as linear odds model.", sep = "")) } if(class(model)[1] == "clogit" & class(model)[2] == "coxph" & s.p < 0){ warning(paste("Point estimate of synergy index (S) is less than zero (", round(s.p, digits = 2), ").\n Confidence intervals cannot be calculated using the delta method. Consider re-parameterising as linear odds model.", sep = "")) } h1 <- ((exp(theta1 + theta2 + theta3)) / (exp(theta1 + theta2 + theta3) - 1)) - (exp(theta1) / (exp(theta1) + exp(theta2) - 2)) h2 <- ((exp(theta1 + theta2 + theta3)) / (exp(theta1 + theta2 + theta3) - 1)) - (exp(theta2) / (exp(theta1) + exp(theta2) - 2)) h3 <- exp(theta1 + theta2 + theta3) / (exp(theta1 + theta2 + theta3) - 1) lns.var <- h1^2 * theta1.se^2 + h2^2 * theta2.se^2 + h3^2 * theta3.se^2 + (2 * h1 * h2 * cov.mat[coef[2],coef[1]]) + (2 * h1 * h3 * cov.mat[coef[3],coef[1]]) + (2 * h2 * h3 * cov.mat[coef[3],coef[2]]) lns.se <- sqrt(lns.var) lns.p <- log(s.p) lns.l <- lns.p - (z * lns.se) lns.u <- lns.p + (z * lns.se) s.l <- exp(lns.l) s.u <- exp(lns.u) s <- data.frame(est = s.p, lower = s.l, upper = s.u) rval <- list(reri = reri, apab = apab, s = s, multiplicative = multiplicative) } if(param == "dummy"){ # RERI: cov.mat <- vcov(model) h1 <- -exp(theta1) h2 <- -exp(theta2) h3 <- exp(theta3) reri.var <- (h1^2 * (cov.mat[coef[1],coef[1]])) + (h2^2 * (cov.mat[coef[2],coef[2]])) + (h3^2 * (cov.mat[coef[3],coef[3]])) + (2 * h1 * h2 * cov.mat[coef[1],coef[2]]) + (2 * h1 * h3 * cov.mat[coef[1],coef[3]]) + (2 * h2 * h3 * cov.mat[coef[2],coef[3]]) reri.se <- sqrt(reri.var) reri.p <- exp(theta3) - exp(theta1) - exp(theta2) + 1 reri.l <- reri.p - (z * reri.se) reri.u <- reri.p + (z * reri.se) reri <- data.frame(est = reri.p, lower = reri.l, upper = reri.u) # Multiplicative interaction: mult.p <- as.numeric(exp(theta3)) mult.l <- exp(confint.default(model)[coef[3],1]) mult.u <- exp(confint.default(model)[coef[3],2]) multiplicative <- data.frame(est = mult.p, lower = mult.l, upper = mult.u) # APAB: cov.mat <- vcov(model) h1 <- -exp(theta1 - theta3) h2 <- -exp(theta2 - theta3) h3 <- (exp(theta1) + exp(theta2) - 1) / exp(theta3) apab.var <- (h1^2 * (cov.mat[coef[1],coef[1]])) + (h2^2 * (cov.mat[coef[2],coef[2]])) + (h3^2 * (cov.mat[coef[3],coef[3]])) + (2 * h1 * h2 * cov.mat[coef[1],coef[2]]) + (2 * h1 * h3 * cov.mat[coef[1],coef[3]]) + (2 * h2 * h3 * cov.mat[coef[2],coef[3]]) apab.se <- sqrt(apab.var) # apab.p <- exp(-theta3) - exp(theta1 - theta3) - exp(theta2 - theta3) + 1 # Equation 4 (Skrondal 2003): apab.p <- (exp(theta3) - exp(theta1) - exp(theta2) + 1) / exp(theta3) apab.l <- apab.p - (z * apab.se) apab.u <- apab.p + (z * apab.se) apab <- data.frame(est = apab.p, lower = apab.l, upper = apab.u) # S: s.p <- (exp(theta3) - 1) / (exp(theta1) + exp(theta2) - 2) cov.mat <- vcov(model) # If model type is glm or cph and point estimate of S is negative terminate analysis. # Advise user to use a linear odds model: if(class(model)[1] == "glm" & class(model)[2] == "lm" & s.p < 0){ warning(paste("Point estimate of synergy index (S) is less than zero (", round(s.p, digits = 2), ").\n Confidence intervals cannot be calculated using the delta method. Consider re-parameterising as linear odds model.", sep = "")) } if(class(model)[1] == "clogit" & class(model)[2] == "coxph" & s.p < 0){ warning(paste("Point estimate of synergy index (S) is less than zero (", round(s.p, digits = 2), ").\n Confidence intervals cannot be calculated using the delta method. Consider re-parameterising as linear odds model.", sep = "")) } # Use delta method (Hosmer and Lemeshow 1992) if model type is glm, clogit or cph: h1 <- -exp(theta1) / (exp(theta1) + exp(theta2) - 2) h2 <- -exp(theta2) / (exp(theta1) + exp(theta2) - 2) h3 <- exp(theta3) / (exp(theta3) - 1) lns.var <- h1^2 * theta1.se^2 + h2^2 * theta2.se^2 + h3^2 * theta3.se^2 + (2 * h1 * h2 * cov.mat[coef[2],coef[1]]) + (2 * h1 * h3 * cov.mat[coef[3],coef[1]]) + (2 * h2 * h3 * cov.mat[coef[3],coef[2]]) lns.se <- sqrt(lns.var) lns.p <- log(s.p) lns.l <- lns.p - (z * lns.se) lns.u <- lns.p + (z * lns.se) s.l <- exp(lns.l) s.u <- exp(lns.u) s <- data.frame(est = s.p, lower = s.l, upper = s.u) rval <- list(reri = reri, apab = apab, s = s, multiplicative = multiplicative) } return(rval) } epiR/R/rsu.sssep.rspool.R0000644000176200001440000000024613745457752015004 0ustar liggesusersrsu.sssep.rspool <- function(k, pstar, pse, psp, se.p) { n <- log(1 - se.p) / log(((1 - (1 - pstar)^k) * (1 - pse) + (1 - pstar)^k * psp)) return(ceiling(n)) }epiR/R/zincrate.R0000644000176200001440000000065713666555620013342 0ustar liggesuserszincrate <- function(dat, conf.level){ N. <- 1 - ((1 - conf.level) / 2) a <- dat[,1] n <- dat[,2] p <- a / n # Changed 210519. Now use the method of Ulm (1990), which is used in poisson.test(). See email from Kazuki Yoshida 14 May 2019: low <- (qchisq(p = 1 - N., df = 2 * a) / 2) / n upp <- (qchisq(p = N., df = 2 * (a + 1)) / 2) / n rval <- data.frame(est = p, lower = low, upper = upp) rval }epiR/R/epi.ssequc.R0000644000176200001440000000504414112023672013555 0ustar liggesusersepi.ssequc <- function(treat, control, sd, delta, n, r = 1, power, nfractional = FALSE, alpha){ # Stop if a negative value for delta entered: if (delta <= 0){ stop("For an equivalence trial delta must be greater than zero.") } z.alpha <- qnorm(1 - alpha, mean = 0, sd = 1) if (!is.na(treat) & !is.na(control) & !is.na(power) & is.na(n)) { beta <- (1 - power) z.beta <- qnorm(1 - beta / 2, mean = 0, sd = 1) # http://powerandsamplesize.com/Calculators/Compare-2-Means/2-Sample-Equality: n <- ((sd * (z.alpha + z.beta) / (abs(treat - control) - delta))^2) if(nfractional == TRUE){ n.control <- (1 + 1 / r) * n n.treat <- n.control * r n.total <- n.treat + n.control } if(nfractional == FALSE){ n.control <- (1 + 1 / r) * (ceiling(n)) n.treat <- n.control * r n.total <- n.treat + n.control } rval <- list(n.total = n.total, n.treat = n.treat, n.control = n.control, delta = delta, power = power) } if (!is.na(treat) & !is.na(control) & !is.na(n) & is.na(power) & !is.na(r) & !is.na(alpha)) { # Work out the number of subjects in the control group. r equals the number in the treatment group divided by the number in the control group. if(nfractional == TRUE){ n.control <- 1 / (r + 1) * n n.treat <- n - n.control n.total <- n.treat + n.control } if(nfractional == FALSE){ n.control <- ceiling(1 / (r + 1) * n) n.treat <- n - n.control n.total <- n.treat + n.control } z <- (abs(treat - control) - delta) / (sd * sqrt((1 / n.treat) + (1 / n.control))) power <- 2 * (pnorm(z - z.alpha, mean = 0, sd = 1) + pnorm(-z - z.alpha, mean = 0, sd = 1)) - 1 rval <- list(n.total = n.total, n.treat = n.treat, n.control = n.control, delta = delta, power = power) } rval } # Chow S, Shao J, Wang H. 2008. Sample Size Calculations in Clinical Research. 2nd Ed. Chapman & Hall/CRC Biostatistics Series. page 62 # epi.ssequc(treat = 5, control = 4, sd = 10, delta = 5, n = NA, power = 0.80, r = 1, nfractional = FALSE, alpha = 0.05) # n.treat = 108, n.control = 108, n.total = 216 # Agrees with http://powerandsamplesize.com/Calculators/Compare-2-Means/2-Sample-Equivalence # epi.ssequc(treat = 5, control = 4, sd = 10, delta = 5, n = NA, power = 0.80, r = 2, nfractional = TRUE, alpha = 0.05) # n.treat = 162, n.control = 81, n.total = 243 # Agrees with http://powerandsamplesize.com/Calculators/Compare-2-Means/2-Sample-Equivalence epiR/R/rsu.sep.rs2st.R0000644000176200001440000000061613745477360014173 0ustar liggesusersrsu.sep.rs2st <- function(H = NA, N = NA, n, pstar.c, pstar.u, se.u = 1) { # Calculate cluster level sensitivities: sep.cluster <- rsu.sep.rs(N = N, n = n, pstar = pstar.u, se.u = se.u) # Calculate overall system sensitivity: sep <- rsu.sep.rsvarse(N = H, pstar = pstar.c, se.u = sep.cluster) rval <- list(se.p = sep, se.c = sep.cluster, se.u = se.u, N = N, n = n) rval }epiR/R/epi.ccc.R0000644000176200001440000000701313242155044013002 0ustar liggesusersepi.ccc = function(x, y, ci = "z-transform", conf.level = 0.95, rep.measure = FALSE, subjectid){ N. <- 1 - ((1 - conf.level) / 2) zv <- qnorm(N., mean = 0, sd = 1) dat <- data.frame(x, y) id <- complete.cases(dat) nmissing <- sum(!complete.cases(dat)) dat <- dat[id,] k <- length(dat$y) yb <- mean(dat$y) sy2 <- var(dat$y) * (k - 1) / k sd1 <- sd(dat$y) xb <- mean(dat$x) sx2 <- var(dat$x) * (k - 1) / k sd2 <- sd(dat$x) r <- cor(dat$x, dat$y) sl <- r * sd1 / sd2 sxy <- r * sqrt(sx2 * sy2) p <- 2 * sxy / (sx2 + sy2 + (yb - xb)^2) delta <- (dat$x - dat$y) rmean <- apply(dat, MARGIN = 1, FUN = mean) blalt <- data.frame(mean = rmean, delta) # Scale shift: v <- sd1 / sd2 # Location shift relative to the scale: u <- (yb - xb) / ((sx2 * sy2)^0.25) # Variable C.b is a bias correction factor that measures how far the best-fit line deviates from a line at 45 degrees (a measure of accuracy). # No deviation from the 45 degree line occurs when C.b = 1. See Lin (1989 page 258). # C.b <- (((v + 1) / (v + u^2)) / 2)^-1 # The following taken from the Stata code for function "concord" (changed 290408): C.b <- p / r # Variance, test, and CI for asymptotic normal approximation (per Lin [March 2000] Biometrics 56:325-5): sep <- sqrt(((1 - ((r)^2)) * (p)^2 * (1 - ((p)^2)) / (r)^2 + (2 * (p)^3 * (1 - p) * (u)^2 / r) - 0.5 * (p)^4 * (u)^4 / (r)^2 ) / (k - 2)) ll <- p - (zv * sep) ul <- p + (zv * sep) # Statistic, variance, test, and CI for inverse hyperbolic tangent transform to improve asymptotic normality: t <- log((1 + p) / (1 - p)) / 2 set = sep / (1 - ((p)^2)) llt = t - (zv * set) ult = t + (zv * set) llt = (exp(2 * llt) - 1) / (exp(2 * llt) + 1) ult = (exp(2 * ult) - 1) / (exp(2 * ult) + 1) # Calculate delta.sd if repeated measures: if(rep.measure == TRUE){ # Make sure subject is a factor: dat$sub <- subjectid if(!is.factor(dat$sub)) dat$sub <- as.factor(dat$sub) # Number of subjects: nsub <- length(levels(dat$sub)) # One way analysis of variance: model <- aov(delta ~ dat$sub) # Degrees of freedom: MSB <- anova(model)[[3]][1] # Sums of squares: MSW <- anova(model)[[3]][2] # Calculate number of complete pairs for each subject: pairs <- NULL for(i in 1:nsub){ pairs[i] <- sum(is.na(delta[dat$sub == levels(dat$sub)[i]]) == FALSE) } sig.dl <- (MSB - MSW) / ((sum(pairs)^2 - sum(pairs^2)) / ((nsub - 1) * sum(pairs))) delta.sd <- sqrt(sig.dl + MSW) } # Calculate delta.sd if no repeated measures: if(rep.measure == FALSE){ delta.sd <- sqrt(var(delta, na.rm = TRUE)) } # Upper and lower bounds for Bland Altmann plot: ba.p <- mean(delta) ba.l <- ba.p - (zv * delta.sd) ba.u <- ba.p + (zv * delta.sd) sblalt <- data.frame("est" = ba.p, "delta.sd" = delta.sd, "lower" = ba.l, "upper" = ba.u) if(ci == "asymptotic"){ rho.c <- data.frame(p, ll, ul) names(rho.c) <- c("est", "lower", "upper") rval <- list(rho.c = rho.c, s.shift = v, l.shift = u, C.b = C.b, blalt = blalt, sblalt = sblalt, nmissing = nmissing) } else if(ci == "z-transform"){ rho.c <- data.frame(p, llt, ult) names(rho.c) <- c("est", "lower", "upper") rval <- list(rho.c = rho.c, s.shift = v, l.shift = u, C.b = C.b, blalt = blalt, sblalt = sblalt, nmissing = nmissing) } return(rval) } epiR/R/rsu.sssep.rs2st.R0000644000176200001440000000073313754661146014536 0ustar liggesusersrsu.sssep.rs2st <- function(H = NA, N = NA, pstar.c, se.c, pstar.u, se.u, se.p) { se.cluster <- se.c se.unit <- se.u nclusters <- rsu.sssep.rs(N = H, pstar = pstar.c, se.p = se.p, se.u = se.cluster) nunits <- rsu.sssep.rs(N = N, pstar = pstar.u, se.p = se.cluster, se.u = se.unit) nclusters <- data.frame(H = H, nsample = nclusters) nunits <- data.frame(N = N, nsample = nunits) rval <- list(clusters = nclusters, units = nunits) rval }epiR/R/epi.sscohortt.R0000644000176200001440000002233713720164356014317 0ustar liggesusers"epi.sscohortt" <- function(irexp1 = 0.25, irexp0 = 0.10, FT = NA, n = NA, power = 0.80, r = 1, design = 1, sided.test = 2, nfractional = FALSE, conf.level = 0.95){ alpha.new <- (1 - conf.level) / sided.test z.alpha <- qnorm(1 - alpha.new, mean = 0, sd = 1) if (!is.na(irexp1) & !is.na(n) & !is.na(power)){ stop("Error: at least one of irexp1, n and power must be NA.") } # ================================================================================================= # Sample size, follow-up time unspecified. Lwanga and Lemeshow (1991) Table 14, page 77: if(!is.na(irexp1) & !is.na(irexp0) & is.na(n) & !is.na(power) & is.na(FT)){ z.beta <- qnorm(power, mean = 0, sd = 1) lambda0 <- irexp0 lambda1 <- irexp1 lambda <- mean(c(lambda1, lambda0)) n.exp1 <- (z.alpha * sqrt((1 + r) * lambda^2) + z.beta * sqrt((r * lambda1^2 + lambda0^2)))^2 / (r * (lambda1 - lambda0)^2) # Account for the design effect: n.exp1 <- ceiling(n.exp1 * design) # r is the ratio of the number in the control group to the number in the treatment group: if(nfractional == TRUE){ n.exp0 <- r * n.exp1 n.total <- n.exp1 + n.exp0 } if(nfractional == FALSE){ n.exp0 <- ceiling(r * n.exp1) n.total <- n.exp1 + n.exp0 } rval <- list(n.total = n.total, n.exp1 = n.exp1, n.exp0 = n.exp0, power = power, irr = irexp1 / irexp0) } # ------------------------------------------------------------------------------------------------- # Sample size, follow-up time specified. Lwanga and Lemeshow (1991) Table 14, page 77: if(!is.na(irexp1) & !is.na(irexp0) & is.na(n) & !is.na(power) & !is.na(FT)){ # Sample size estimate. z.beta <- qnorm(power, mean = 0, sd = 1) lambda0 <- irexp0 lambda1 <- irexp1 lambda <- mean(c(lambda1, lambda0)) flambda0 <- (lambda0^3 * FT) / ((lambda0 * FT) - 1 + exp(-lambda0 * FT)) flambda1 <- (lambda1^3 * FT) / ((lambda1 * FT) - 1 + exp(-lambda1 * FT)) flambda <- (lambda^3 * FT) / ((lambda * FT) - 1 + exp(-lambda * FT)) n.exp1 <- (z.alpha * sqrt((1 + r) * flambda) + z.beta * sqrt((r * flambda1 + flambda0)))^2 / (r * (lambda1 - lambda0)^2) # Account for the design effect: n.exp1 <- ceiling(n.exp1 * design) # r is the ratio of the number in the control group to the number in the treatment group: n.exp0 <- r * n.exp1 n.total <- n.exp1 + n.exp0 rval <- list(n.total = n.total, n.exp1 = n.exp1, n.exp0 = n.exp0, power = power, irr = irexp1 / irexp0) } # ================================================================================================= # Power, follow-up time unspecified. Lwanga and Lemeshow (1991) Table 13: else if(!is.na(irexp1) & !is.na(irexp0) & !is.na(n) & is.na(power) & is.na(FT)){ lambda0 <- irexp0 lambda1 <- irexp1 lambda <- mean(c(lambda1, lambda0)) # Account for the design effect: n <- n / design n.exp1 <- ceiling(n / (r + 1)) * r n.exp0 <- ceiling(n / (r + 1)) * 1 n.total <- n.exp1 + n.exp0 z.beta <- ((sqrt(n.exp1 * r) * (lambda1 - lambda0)) - (z.alpha * sqrt((1 + r) * lambda^2))) / sqrt((r * lambda1^2) + lambda0^2) power <- pnorm(z.beta, mean = 0, sd = 1) rval <- list(n.total = n.total, n.exp1 = n.exp1, n.exp0 = n.exp0, power = power, irr = irexp1 / irexp0) } # ------------------------------------------------------------------------------------------------- # Power, follow-up time specified. Lwanga and Lemeshow (1991) page 19: else if(!is.na(irexp1) & !is.na(irexp0) & !is.na(n) & is.na(power) & !is.na(FT)){ lambda0 <- irexp0 lambda1 <- irexp1 lambda <- mean(c(lambda1, lambda0)) flambda0 <- (lambda0^3 * FT) / ((lambda0 * FT) - 1 + exp(-lambda0 * FT)) flambda1 <- (lambda1^3 * FT) / ((lambda1 * FT) - 1 + exp(-lambda1 * FT)) flambda <- (lambda^3 * FT) / ((lambda * FT) - 1 + exp(-lambda * FT)) # Account for the design effect: n <- n / design n.exp1 <- ceiling(n / (r + 1)) * r n.exp0 <- ceiling(n / (r + 1)) * 1 n.total <- n.exp1 + n.exp0 z.beta <- ((sqrt(n.exp1 * r) * (lambda1 - lambda0)) - (z.alpha * sqrt((1 + r) * flambda))) / sqrt((r * flambda1) + flambda0) power <- pnorm(z.beta, mean = 0, sd = 1) rval <- list(n.total = n.total, n.exp1 = n.exp1, n.exp0 = n.exp0, power = power, irr = irexp1 / irexp0) } # ================================================================================================= # Incidence rate ratio, follow-up time unspecified. Lwanga and Lemeshow (1991) Table 13: else if(is.na(irexp1) & !is.na(irexp0) & !is.na(n) & !is.na(power) & is.na(FT)){ # Here we use the formulae for study power (from Lwanga and Lemeshow Table 13) and then solve for irexp1 # (which then allows us to calculate the incidence rate ratio). n <- n / design n.exp1 <- ceiling(n / (r + 1)) * r n.exp0 <- ceiling(n / (r + 1)) * 1 n.total <- n.exp1 + n.exp0 PfunFT0 <- function(irexp1, irexp0, n, power, r, design, z.alpha = z.alpha){ lambda0 <- irexp0 lambda1 <- irexp1 lambda <- mean(c(lambda1, lambda0)) # Account for the design effect: n <- n / design n.exp1 <- ceiling(n / (r + 1)) * r n.exp0 <- ceiling(n / (r + 1)) * 1 n.total <- n.exp1 + n.exp0 z.beta <- ((sqrt(n.exp1 * r) * (lambda1 - lambda0)) - (z.alpha * sqrt((1 + r) * lambda^2))) / sqrt((r * lambda1^2) + lambda0^2) # Take the calculated value of the power and subtract the power entered by the user: pnorm(z.beta, mean = 0, sd = 1) - power } # Estimated incidence rate ratio for the exposed group: irexp1e <- uniroot(PfunFT0, irexp0 = irexp0, n = n, power = power, r = r, design = design, z.alpha = z.alpha, interval = c(1E-6,1))$root irr <- sort(c(irexp1e / irexp0, irexp0 / irexp1e)) rval <- list(n.total = n.total, n.exp1 = n.exp1, n.exp0 = n.exp0, power = power, irr = irr) } # ------------------------------------------------------------------------------------------------- # Incidence rate ratio, follow-up time specified. Lwanga and Lemeshow (1991) Table 13: else if(is.na(irexp1) & !is.na(irexp0) & !is.na(n) & !is.na(power) & !is.na(FT)){ # Here we use the formulae for study power (from Lwanga and Lemeshow Table 13) and then solve for irexp1 # (which then allows us to calculate the incidence rate ratio). n <- n / design n.exp1 <- ceiling(n / (r + 1)) * r n.exp0 <- ceiling(n / (r + 1)) * 1 n.total <- n.exp1 + n.exp0 # Where irr > 1: PfunFT1 <- function(irexp1, irexp0, FT, n, power, r, design, z.alpha = z.alpha){ lambda0 <- irexp0 lambda1 <- irexp1 lambda <- mean(c(lambda1, lambda0)) flambda0 <- (lambda0^3 * FT) / ((lambda0 * FT) - 1 + exp(-lambda0 * FT)) flambda1 <- (lambda1^3 * FT) / ((lambda1 * FT) - 1 + exp(-lambda1 * FT)) flambda <- (lambda^3 * FT) / ((lambda * FT) - 1 + exp(-lambda * FT)) # Account for the design effect: n <- n / design n.exp1 <- ceiling(n / (r + 1)) * r n.exp0 <- ceiling(n / (r + 1)) * 1 n.total <- n.exp1 + n.exp0 z.beta <- ((sqrt(n.exp1 * r) * (lambda1 - lambda0)) - (z.alpha * sqrt((1 + r) * flambda))) / sqrt((r * flambda1) + flambda0) # Take the calculated value of the power and subtract the power entered by the user: pnorm(z.beta, mean = 0, sd = 1) - power } # Estimated incidence rate ratio for the exposed group: irexp1e <- uniroot(PfunFT1, irexp0 = irexp0, FT = FT, n = n, power = power, r = r, design = design, z.alpha = z.alpha, interval = c(1E-6,1))$root irr <- sort(c(irexp1e / irexp0, irexp0 / irexp1e)) rval <- list(n.total = n.total, n.exp1 = n.exp1, n.exp0 = n.exp0, power = power, irr = irr) } # ================================================================================================= rval } # epi.sscohortt(irexp1 = 0.25, irexp0 = 0.10, FT = NA, n = NA, power = 0.80, r = 1, design = 1, sided.test = 2, conf.level = 0.95) # n = 46 # # epi.sscohortt(irexp1 = 0.25, irexp0 = 0.10, FT = NA, n = 46, power = NA, r = 1, design = 1, sided.test = 2, conf.level = 0.95) # power = 0.80 # # epi.sscohortt(irexp1 = NA, irexp0 = 0.10, FT = NA, n = 46, power = 0.80, r = 1, design = 1, sided.test = 2, conf.level = 0.95) # irr = 0.404737 2.470740 # # epi.sscohortt(irexp1 = 0.25, irexp0 = 0.10, FT = 5, n = NA, power = 0.80, r = 1, design = 1, sided.test = 2, conf.level = 0.95) # n = 130 # # epi.sscohortt(irexp1 = 0.25, irexp0 = 0.10, FT = 5, n = 130, power = NA, r = 1, design = 1, sided.test = 2, conf.level = 0.95) # power = 0.80 # # epi.sscohortt(irexp1 = NA, irexp0 = 0.10, FT = 5, n = 130, power = 0.80, r = 1, design = 1, sided.test = 2, conf.level = 0.95) # irr = 0.404737 2.470740epiR/R/rsu.adjrisk.R0000644000176200001440000000070313757603256013752 0ustar liggesusersrsu.adjrisk <- function(rr, ppr) { if(class(rr)[1] != "matrix"){ sum.prod <- sum(rr * ppr) ar <- rr / sum.prod } else if(class(rr)[1] == "matrix"){ tmp <- rr ar <- rr for(r in 1:ncol(rr)){ tmp[,r] <- rr[,r] * ppr[r] } sum.prod <- apply(tmp, FUN = sum, MARGIN = 1) for (r in 1:ncol(rr)){ ar[,r]<- rr[,r] / sum.prod } return(ar) } return(ar) }epiR/R/epi.herdtest.R0000644000176200001440000000121613117711454014077 0ustar liggesusersepi.herdtest <- function(se, sp, P, N, n, k){ # Probability of testing positive: APpos <- P * se + (1 - P) * (1 - sp) APneg <- (1 - sp) # if(n/N < 0.2){ # Binomial distribution: # HSe <- 1 - pbinom(k - 1, n, P) # HSp <- phyper(k - 1, N * APneg, N - N * APneg, n) # rval <- list(APpos = APpos, APneg = APneg, HSe = HSe, HSp = HSp) # } # else if(n/N >= 0.2){ # Hypergeometric distribution: HSe <- 1 - phyper(k - 1, N * APpos, N - N * APpos, n) HSp <- phyper(k - 1, N * APneg, N - N * APneg, n) rval <- list(APpos = APpos, APneg = APneg, HSe = HSe, HSp = HSp) # } rval } epiR/R/epi.offset.R0000644000176200001440000000061213117711464013543 0ustar liggesusers"epi.offset" <- function(id.names) { total <- length(id.names) counts <- as.vector(table(id.names)) offset <- c(1) for (i in 2:length(counts)-1) {var <- counts[i] + offset[i] offset <- c(offset, var) } offset <- c(offset, total) return(offset) } epiR/MD50000644000176200001440000003257414166031357011502 0ustar liggesusersb0f4d0014b8032e7e142158e8e00fb20 *DESCRIPTION 480896be24a9995183b4a206b942d902 *NAMESPACE 39c8f66b7b3a12377829c97d374530fd *NEWS d366e2643290939d27aff97862e66d9d *R/epi.2by2.R 90797cfa53b823b298af4c32eb3435bf *R/epi.RtoBUGS.R a9a4deaaec9cd9435529455159385225 *R/epi.about.R 35dd233e3feeceefc29040363acdbadd *R/epi.asc.R 9ede17141615376b592a27456e9486b8 *R/epi.betabuster.R 4eccc5b8a0ba009d36d3688689b161f0 *R/epi.blcm.paras.r 5414b8d1e192dbf1867ceed92d10376a *R/epi.bohning.R dcdda419005d13810bdb30773a344804 *R/epi.ccc.R 1add8754553d34fe89675f35c56e138b *R/epi.conf.R b68aa1eb42b71bfc1e98bf2fa30b79fd *R/epi.convgrid.R f5ada1be056fbaa31934d66ce0c48c58 *R/epi.cp.R 0bce821540346b1e58e640d7334c043b *R/epi.cpresids.R ed0b89b2a78be0d3a0a1c92164a863dd *R/epi.descriptives.R da3832a85a8085b34f75455e6defbf24 *R/epi.dgamma.R f6d66329aeeec4100db8f2c01e58a2ce *R/epi.directadj.R 85be50061d94e87a31c52f3287e8e492 *R/epi.dms.R c4af10a5115fd3bb7713694393c9a3d3 *R/epi.dsl.R 276cc7e048b9b12cad3c16fe60190600 *R/epi.edr.R 658381258fe39d7cdbe6253c66a49034 *R/epi.empbayes.R e935ed017215d39b2225f61b23de3bfd *R/epi.herdtest.R 566a1b7408b9ecb76c20bb182fb11d18 *R/epi.indirectadj.R a9eb519c803f08421b7c12897e441a14 *R/epi.insthaz.R d6ac62ffcd0bbaa3d450011e842dcaf6 *R/epi.interaction.R c6d1bbc1733a57b2ed6973bbc9a2689a *R/epi.iv.R 9cb615f92da0644d226c23aa67e5fdd5 *R/epi.kappa.R dd5a6b971d1652170e84fa8a709215a9 *R/epi.ltd.R 27df85ca4570d0077ec68ff552739d13 *R/epi.mh.R ade4c3e5b52a8828b39e7efdf8238a81 *R/epi.nomogram.R dd89f9509954970ef09f36b06192b96c *R/epi.occc.R 2c6852d882ea0b6808350b3fd47bae1b *R/epi.offset.R b1c0bdf50b56a4b9df09ac89f2de0b98 *R/epi.pooled.R 3a35fac64c1bcda32c3dcabe7b7d14b2 *R/epi.popsize.R 6f62a332c31a596317a7bff79b98a749 *R/epi.prcc.R 14e28441a7d56e5235f4b10877278b41 *R/epi.prev.R e355dbf1148b6d47f0d927a6c683b62c *R/epi.psi.r 4e719a2b9c4ba572f0ee3c3d70d54190 *R/epi.smd.R f5f94b6c6e56a8824a2606c60b4c29ec *R/epi.smr.R 4f936b84d1ddf116626b168e2c4f6ebe *R/epi.sscc.R 1b30db00f7eb7e439d2ec51232d959d7 *R/epi.ssclus1estb.R 0ff83ff13c2b2f8c7d02caad0166cbba *R/epi.ssclus1estc.R 7c54dc0ddae6ab30ca8b3cd720e428ba *R/epi.ssclus2estb.R d8bae7e957def1e7199168aa9d435a3d *R/epi.ssclus2estc.R ca8f50180b338cfad05435887db72bd0 *R/epi.sscohortc.R 88822231973ebccb0bb7ead3b3544fdd *R/epi.sscohortt.R e63184e8503d36ad650708320fbd733a *R/epi.sscompb.R cd91e109d55214108f581550747b4b39 *R/epi.sscompc.R be48ac8e19dddcaffb123dfca38fed47 *R/epi.sscomps.R 3bd7e43ccc72108b3203f5b5b962633e *R/epi.ssdetect.R 8b1b5f94f3354ed84443fe3e651663f8 *R/epi.ssdxsesp.R b2729e90922da7d7a20870e70a835fda *R/epi.ssdxtest.R 1d153759eee179db8bd2a35fae48c1a6 *R/epi.ssequb.R e89ab171d0f0202f4e8c0d0bd2562731 *R/epi.ssequc.R ae3c60fde4ad6b9ce904b379f92bd8d6 *R/epi.ssninfb.R d0eac12096fec85010b0f386ce907b83 *R/epi.ssninfc.R 2275cc1da98e04b2e631c02f86870ab8 *R/epi.sssimpleestb.R 2eb16a99067d8dc2f10fe6bf920ed6eb *R/epi.sssimpleestc.R b1a640f9fdd80941d260cdfdc43ba050 *R/epi.ssstrataestb.R ac051dcfd237e4435ead4fc5857ca047 *R/epi.ssstrataestc.R 4890ccea744d2b9133581791ff320e65 *R/epi.sssupb.R 8b515b8bca159a11610471a53465ca53 *R/epi.sssupc.R 6377da94e85c4f9f4c530ef54c5296d3 *R/epi.ssxsectn.R e10003d7bb095bb3524e7ec772560737 *R/epi.tests.R 7ebbb038e96191769d39777beaaea333 *R/globals.R 6c6cd1c127179567c90016339259f3f9 *R/rsu.adjrisk.R 7e22441e1e8feb37c9921fc3a921e3f0 *R/rsu.dxtest.R 5960cef8d27928c941a7efc980d96764 *R/rsu.epinf.R 43f95c9da2dcbda1384999c5220a6922 *R/rsu.pfree.equ.R e64def06c293a7ff1020f2dd33864b4d *R/rsu.pfree.rs.R 0833ef71f9a5fe3134aa369be3f791f7 *R/rsu.pstar.R 69ee31fffbbf53aca254ce894a204754 *R/rsu.sep.R 90e2f78318ab7758011b33e936ee8b48 *R/rsu.sep.cens.R e77698f9ba6dd861c4a7631ad0aad6d9 *R/rsu.sep.pass.R 66739cc177c34b6969569fd7ba19da8d *R/rsu.sep.rb.R 8fa981881f4ccc0232821d98b88e01c5 *R/rsu.sep.rb1rf.R 34e2d9af6e1a2408e8fc26cacc1e7621 *R/rsu.sep.rb2rf.R 57be5d382529b9ae7aead2fe61730e0a *R/rsu.sep.rb2st.r 356ccc82c2f46b8b862c4f4585efc644 *R/rsu.sep.rbvarse.r ad236e58bf59f1b7e9c1c42a0f163c0f *R/rsu.sep.rs.R 8502b533094f37eef80a44969a1489c7 *R/rsu.sep.rs2st.R c0c301898c1ca1ff9582fd521ab4eea5 *R/rsu.sep.rsfreecalc.R ef50e8b2398a685d7c866e91e30458f7 *R/rsu.sep.rsmult.R e3f60e76494b59895770accffd71f9a7 *R/rsu.sep.rspool.R a9f22f060ec1fed1176d124164665cfb *R/rsu.sep.rsvarse.R 25f280846705fc2874eb6c55c7abf3b7 *R/rsu.spp.rs.R 2f4fb7f4f247c87547abd003cf3eee86 *R/rsu.sspfree.rs.R 1dc40d4af1e2f24620c9c3e7f416c1b8 *R/rsu.sssep.rb2st1rf.R 2cf4e5bc59de56063bc493aa7faa45a3 *R/rsu.sssep.rb2st2rf.R 9e7eec197ecc0dbab7d6712f77dae1a2 *R/rsu.sssep.rbmrg.R a269028ef59cf1ad72bae3cf7d84ebe4 *R/rsu.sssep.rbsrg.R 64b8a62e9e9d33bf6ad7c621878dda73 *R/rsu.sssep.rs.R 7640213e3c16fa83e8ae2fb9f48d54f1 *R/rsu.sssep.rs2st.R 8022f13536f897b2865027f5dc1ada0d *R/rsu.sssep.rsfreecalc.R 87c1f914432d00b9cd581958aca21e79 *R/rsu.sssep.rspool.R d15c0403a0b5ecdc2c183c9ddb4c1ab6 *R/zARscore.R b1a6c72d0dbd4bc33eb0cb96ce42c470 *R/zARwald.R 7d2f659ba184ade06466bf9373f8eab9 *R/zMHRD.GR.R c67bfb708f5711de19b842be8542e143 *R/zMHRD.Sato.R 8694667de27a8e1afc4c0567cadd72fd *R/zMHRD.Sato0.R 9e29660c17100ac89212cd113f0f46ad *R/zORcfield.R a42709a7d6c620cc2df6788188d21886 *R/zORml.R 2c78daf5f14af0afbdf0145ffeebbd4f *R/zORscore.R 2e3bbed3e7346335b438480e7bfd5332 *R/zORwald.R b07bf7602901561501fa73c269db9280 *R/zRRscore.R 9413a076af61628cf0602f60780c23e2 *R/zRRtaylor.R bff22705c492fa9d4217dec409560603 *R/zRRwald.R 367db2abbac44ed6f7cf08e551aa390b *R/zagresti.R ccecf921d12a94fc9a0436144d3691af *R/zclopperpearson.R 2193730fba66814c5026b8aef56a1667 *R/zdisc.prior.R b1c955527e566ed3c693445932fddefc *R/zexact.R 04cd622bea721b72728f8e55e5e8d06a *R/zfleiss.R a6c81cf0fecb6be26bb545cfdc5c6617 *R/zget.cp.R 75ab2c2711e9adc75d472d06493a24a2 *R/zincrate.R f6867e5570430c95b2d39a511ea34094 *R/zjeffreys.R 4cab435dbb27fe4a4cee1f30d63ddd16 *R/zlimit.R fc06f3a3db7f382f1bc435fe73aa7836 *R/zn.binom.R d1f7bb02fe43d8e3d0ca20914fcb82ab *R/zn.hypergeo.R 447cdd2cc38f2342226e2710577c8c26 *R/zrsu.rspfree.1.R 653920cd56fcef998f62f2bafb926ff8 *R/zsep.binom.R d6a967b91c1a4014ff01042ca18b895d *R/zsep.binom.imperfect.R ff498de74510e53729c86d69dceba84a *R/zsep.hypergeo.R 6dba31143de4b00cafa39db26b8f122b *R/zsep.pfree.R b4cf33b8c56797df2feb82618d45da94 *R/zsph.binom.R f4146fe1ad6564e751528e4f3be28b9a *R/zwilson.R 81719befc03215c9d112004a4d4d0d04 *R/zz2stat.R 491cf20244f710d9379ce18c8a3ec67c *R/zzz.R da1c73f2462f5be92d287398f3df923d *build/vignette.rds e21ab6f2a8b2c8ed12468ce39757977e *data/epi.SClip.RData fd9920dce74d1e6cce1932570a9e85f4 *data/epi.epidural.RData 64af3548130a5495e0aed3c2b58f864f *data/epi.incin.RData 8ee362682ac5703ce01414810d7c71f3 *inst/doc/epiR_RSurveillance.R 23ee75821868486e58c1690655a0cfed *inst/doc/epiR_RSurveillance.Rmd 56be4c2281f4b8db8e545da0e8ec619f *inst/doc/epiR_RSurveillance.html 8c27c35de9894971f2262fdd468555e2 *inst/doc/epiR_descriptive.R 50cef379b011b9de459d10b34b36e0e2 *inst/doc/epiR_descriptive.Rmd 5823f76c70822f69acb01bf481d11a41 *inst/doc/epiR_descriptive.html 890834da7339a0fc89a1ff957f4d4e69 *inst/doc/epiR_measures_of_association.R 84c1428b5f7eb8c60acfb23aad65fa08 *inst/doc/epiR_measures_of_association.Rmd 2f31eb2d8db4587ab778a49f26ec6fd9 *inst/doc/epiR_measures_of_association.html eb4800ada8c27acf5603327fd4ad53cd *inst/doc/epiR_sample_size.R 01e9aca1de4d3258371ffb7674229bc9 *inst/doc/epiR_sample_size.Rmd 5a13931e8dd2477f82f77ff5b66cf00d *inst/doc/epiR_sample_size.html 3634fc67fd086cf0a5dcf5f5a37dc3e8 *inst/doc/epiR_surveillance.R fba5fb620b339f9d4d09077c57e58bdd *inst/doc/epiR_surveillance.Rmd cee52975f19dcbe1e7f3dd7c96e083d3 *inst/doc/epiR_surveillance.html 1d89952c5c16cc0704ae8da480a23c3c *man/epi.2by2.Rd f42bb15fa9bf5d14f728c1c1aa147e36 *man/epi.RtoBUGS.Rd 189a2a8bdb0082ceeaf7d721ef637686 *man/epi.SClip.Rd ab7ccfa3734b52f0eb9a885622e1e333 *man/epi.about.Rd 5779280680d12e1355f64bf0984401fe *man/epi.asc.Rd fe02a56ebab6b2ff55bccf16b50f8780 *man/epi.betabuster.Rd 491f29de6888c2a68760a1daa2bb6af5 *man/epi.blcm.paras.Rd 89f4bb13fc4c52766b1a59957dbd0bb7 *man/epi.bohning.Rd 9340ee6aff00263cdb8b1750ba28ad9c *man/epi.ccc.Rd 9d046a376859a14498c9dd82bfef6aee *man/epi.conf.Rd 914d1f2737ce1b8ed796777cd8d7fc05 *man/epi.convgrid.Rd a72f876827a5c350d86544153597fed2 *man/epi.cp.Rd 34b57f86b14e1472bffa5da0602abc4c *man/epi.cpresids.Rd c830a44cf0b0a01291071b77e4a8c941 *man/epi.descriptives.Rd 30abe14c38b6f0ad4996bc3d0484c378 *man/epi.dgamma.Rd 79e7e2a56ddb5211411a364a63748ca4 *man/epi.directadj.Rd e83db9c4e594c5fa4ce5477800e695b2 *man/epi.dms.Rd b217793b01ae44a18c9029fd85796b06 *man/epi.dsl.Rd f93caded5dc85afcbae3ba369ba42e9a *man/epi.edr.Rd e857e176b3ea8194b1a3e26eb1a44cde *man/epi.empbayes.Rd 623bf229ccd13149e887c7760b3baf44 *man/epi.epidural.Rd 11594d5361973ae5e9c5be9a81660da7 *man/epi.herdtest.Rd e9184315369aa93b32e1126e1487cf95 *man/epi.incin.Rd a1743efb0506827aecbc6bfadfcb5ee5 *man/epi.indirectadj.Rd 8212727694cf9a422e68ef7abec9b623 *man/epi.insthaz.Rd 3afc01bb2040db2fbb73835d44aef306 *man/epi.interaction.Rd 0e12d55a7790dbbb98401685ab542ec1 *man/epi.iv.Rd 00a522620cf39ccd08f489c25238b3db *man/epi.kappa.Rd d7cb8eaf692e9aafc19cc051e4a8ff80 *man/epi.ltd.Rd b1e183f8c9ea246f6cafef84173b50bc *man/epi.mh.Rd faacb8e6442f58301f50e16468230498 *man/epi.nomogram.Rd 94c9d98bbf48fc2b075b949830c55b49 *man/epi.occc.Rd e5333e25afb89f64c2d81c0996083733 *man/epi.offset.Rd 9c8d71ba25b9daed141b9a34573ba120 *man/epi.pooled.Rd 4feba08068fed0d4b92aa7aafef9ca20 *man/epi.popsize.Rd 5a697af6787a547c5c7d8def3f1498a1 *man/epi.prcc.Rd 64383087923f41144e7e21056b766aea *man/epi.prev.Rd 734daea4b79dcdb8803cb9e64ae09b39 *man/epi.psi.Rd ba1d761f07a27f5ac401b50f4a5e0bef *man/epi.smd.Rd 56c32551473ae1fb881373a652cff1e5 *man/epi.smr.Rd a283e114b34ee14cb06b62de0f96e67d *man/epi.sscc.Rd 69328a4ebc02cd0426dc8c7ce2e3c4f6 *man/epi.ssclus1estb.Rd 485d434834665a6df279458438367e2d *man/epi.ssclus1estc.Rd c44d6d7eb48bed3977d5e4943e268181 *man/epi.ssclus2estb.Rd 121d23dfe125df240c20d6f187364f48 *man/epi.ssclus2estc.Rd 5e24c9a27c4f130bd9acc79128e9c851 *man/epi.sscohortc.Rd 543320712dbc598fd9f644d93aae2987 *man/epi.sscohortt.Rd b9a1cece43e4061f436e9ebc248a9f65 *man/epi.sscompb.Rd ebac1e8fd761ff9d27af704fcd925e08 *man/epi.sscompc.Rd 563fd80944781e70fd0c25bb54982a7b *man/epi.sscomps.Rd ef258da56acaa55ed929c5442540ffd0 *man/epi.ssdetect.Rd c9e58dfadfdc72f954d5c8206844e85a *man/epi.ssdxsesp.Rd b7355d37c832bd5bbfb9b84fe9805388 *man/epi.ssdxtest.Rd 4e8a78f0144b7936a121e74400828d39 *man/epi.ssequb.Rd 8f67abbe011e0f5baa34d8a073005e30 *man/epi.ssequc.Rd ec0b1f0aecc2b8314aa86e26cfe42018 *man/epi.ssninfb.Rd 39b603141e947e38192788382b95b6de *man/epi.ssninfc.Rd 5497745655745e9d9d3ba8425ae1f2fb *man/epi.sssimpleestb.Rd 8d2ef4589b9c3fc34355b7a5ea8cb6a4 *man/epi.sssimpleestc.Rd aad16f896bee41c04808ee5e52373e5a *man/epi.ssstrataestb.Rd b0ad674c9d64b9c693425b75dbcc7c9a *man/epi.ssstrataestc.Rd babb4d2a83f3f409675bd6ca0bf4885d *man/epi.sssupb.Rd 2bcaf117d118752f89e7e4ed0a1751e2 *man/epi.sssupc.Rd 0ab1f464a0b154cf3f3c676413540fdf *man/epi.ssxsectn.Rd 3c91e093c698ef8f8a9b4fb7c2699cf1 *man/epi.tests.Rd 96c8dc99b01d0d3bfece11489f1f2353 *man/rsu.adjrisk.Rd 809988a1e009504765b98de66ace2907 *man/rsu.dxtest.Rd 2566a5f5d3e4af820d37aea9c06011ed *man/rsu.epinf.Rd d8a517f74a21e19de4dd152d6652d603 *man/rsu.pfree.equ.Rd 2e24ff2369d8c9ad17871671082e4cef *man/rsu.pfree.rs.Rd 521e1087421756b63ec6a6b2bccc67ec *man/rsu.pstar.Rd 585d011fcc68d1782ab623244665bd79 *man/rsu.sep.Rd 731a753b2a1df66b662db50a688492e0 *man/rsu.sep.cens.Rd 6bf7a380a4750fdc73d0c045757c9c73 *man/rsu.sep.pass.Rd 19e1cd4c5ef62617b5eaa0aefa706c27 *man/rsu.sep.rb.Rd dd677657b932e11dfca166d6586786c3 *man/rsu.sep.rb1rf.Rd 52951a660fb5c6212ec5093b0673192b *man/rsu.sep.rb2rf.Rd b4993b8281639eb83a2dc93044493184 *man/rsu.sep.rb2st.Rd 3d8a1762ee6d7149a9930249377a5944 *man/rsu.sep.rbvarse.Rd 2cf523164571d53acad857dc2589edfc *man/rsu.sep.rs.Rd 5ae8355a5ed7d56c90e9e904873a8593 *man/rsu.sep.rs2st.Rd 725d7db80c50f1fd17e586dae4348715 *man/rsu.sep.rsfreecalc.Rd 2cda5c9d0be4d5696aa72609b3bb12f4 *man/rsu.sep.rsmult.Rd 408d17df92713206c951831b29d5ecfe *man/rsu.sep.rspool.Rd 0c21b19d9ce80d56b9f12461c0d091e8 *man/rsu.sep.rsvarse.Rd 1e75531090f9c6ffafd23606819c776e *man/rsu.spp.rs.Rd 7d4cf8fc7f150397c6b210596aadb735 *man/rsu.sspfree.rs.Rd 1668972bebb0655353a866bdc4740ca4 *man/rsu.sssep.rb2st1rf.Rd 2fc7c73f73200f8b91aa608606d15d66 *man/rsu.sssep.rb2st2rf.Rd aa2a51c1938d970409a36d8171396360 *man/rsu.sssep.rbmrg.Rd ba4c371718888951505ae06a96687154 *man/rsu.sssep.rbsrg.Rd 9ef5d67c7132c7d830ca3267b3fd1257 *man/rsu.sssep.rs.Rd 9bdb0e19976b80ee17f96621102cb12d *man/rsu.sssep.rs2st.Rd fedbda3f8710db5c92dbb1f7b3756dbf *man/rsu.sssep.rsfreecalc.Rd 200155cbf3d4a37f44bd2091b7283141 *man/rsu.sssep.rspool.Rd dc68a6c64f1e828a863477b3103c6c08 *vignettes/attributable_fraction.png 59d15cbb03278ebab02d560412003429 *vignettes/attributable_risk.png 23ee75821868486e58c1690655a0cfed *vignettes/epiR_RSurveillance.Rmd 50cef379b011b9de459d10b34b36e0e2 *vignettes/epiR_descriptive.Rmd 67acaef1bbc5446d7e166263371e87af *vignettes/epiR_descriptive.bib 84c1428b5f7eb8c60acfb23aad65fa08 *vignettes/epiR_measures_of_association.Rmd 19dc51199053ac528a80f18242621ae5 *vignettes/epiR_measures_of_association.bib 01e9aca1de4d3258371ffb7674229bc9 *vignettes/epiR_sample_size.Rmd 46713eb4b7c2fe6566bd367ccc5a2998 *vignettes/epiR_sample_size.bib fba5fb620b339f9d4d09077c57e58bdd *vignettes/epiR_surveillance.Rmd d68a09664abacf8de9310c3a82e9a056 *vignettes/epiR_surveillance.bib 7aeda10cf14573c2c0f12e6773e5030b *vignettes/index.html eaad89cd0a2046a310e127855a51c765 *vignettes/population_attributable_fraction.png e76298cce3b2b25cf1224a723f121f17 *vignettes/population_attributable_risk.png bac71b9308554c530a0191dd4e5d6db8 *vignettes/risk_ratio.png epiR/inst/0000755000176200001440000000000014166006711012130 5ustar liggesusersepiR/inst/doc/0000755000176200001440000000000014166006711012675 5ustar liggesusersepiR/inst/doc/epiR_measures_of_association.R0000644000176200001440000001771514166006703020717 0ustar liggesusers## ---- echo = FALSE, message = FALSE------------------------------------------- library(knitr); library(kableExtra) knitr::opts_chunk$set(collapse = TRUE, comment = "#>") options(tibble.print_min = 4L, tibble.print_max = 4L) ## ----echo = FALSE, results = 'asis'------------------------------------------- twobytwo <- data.frame("Dis pos" = c("a","c","a+c"), "Dis neg" = c("b","c","b+c"), "Total" = c("a+b","c+d","a+b+c+d")) colnames(twobytwo) <- c("Dis pos","Dis pos","Total") rownames(twobytwo) <- c("Exp pos","Exp neg","Total") kbl(twobytwo, caption = "A 2 by 2 table.") %>% column_spec(1, bold = FALSE, width = "5em") %>% column_spec(2, bold = FALSE, width = "5em") %>% column_spec(3, bold = FALSE, width = "5em") # row_spec(row = 1, bold = TRUE) ## ----echo = FALSE, results = 'asis'------------------------------------------- irr <- data.frame("Dis pos" = c("a","c","a+c"), "Dis neg" = c("b","c","b+c"), "Total" = c("a+b","c+d","a+b+c+d"),"Risk" = c("RE+ = a/(a+b)","RE- = c/(c+d)", "RT = (a+c)/(a+b+c+d)")) colnames(irr) <- c("Dis pos","Dis pos","Total","Risk") rownames(irr) <- c("Exp pos","Exp neg","Total") kbl(irr, caption = "A 2 by 2 table with incidence risks calculated for the exposed, the unexposed and the total study population.") %>% column_spec(1, bold = FALSE, width = "5em") %>% column_spec(2, bold = FALSE, width = "5em") %>% column_spec(3, bold = FALSE, width = "5em") %>% column_spec(4, bold = FALSE, width = "10em") # row_spec(row = 1, bold = TRUE) ## ----echo = FALSE, results = 'asis'------------------------------------------- or.cohort <- data.frame("Dis pos" = c("a","c","a+c"), "Dis neg" = c("b","d","b+d"), "Total" = c("a+b","c+d","a+b+c+d"),"Odds" = c("OE+ = a/b","OE- = c/d", "OT = (a+c)/(b+d)")) colnames(or.cohort) <- c("Dis pos","Dis pos","Total","Odds") rownames(or.cohort) <- c("Exp pos","Exp neg","Total") kbl(or.cohort, caption = "A 2 by 2 table with the odds of disease calculated for the exposed, the unexposed and the total study population.") %>% column_spec(1, bold = FALSE, width = "5em") %>% column_spec(2, bold = FALSE, width = "5em") %>% column_spec(3, bold = FALSE, width = "5em") %>% column_spec(4, bold = FALSE, width = "10em") # row_spec(row = 1, bold = TRUE) ## ----echo = FALSE, results = 'asis'------------------------------------------- or.cc <- data.frame("Case" = c("a","c","a+c","OD+ = a/c"), "Control" = c("b","d","b+d","OD- = b/d"), "Total" = c("a+b","c+d","a+b+c+d","OT = (a+b)/(c+d)")) colnames(or.cc) <- c("Case","Control","Total") rownames(or.cc) <- c("Exp pos","Exp neg","Total","Odds") kbl(or.cc, caption = "A 2 by 2 table with the odds of exposure calculated for cases, controls and the total study population.") %>% column_spec(1, bold = FALSE, width = "5em") %>% column_spec(2, bold = FALSE, width = "5em") %>% column_spec(3, bold = FALSE, width = "5em") %>% column_spec(4, bold = FALSE, width = "10em") # row_spec(row = 1, bold = TRUE) ## ----------------------------------------------------------------------------- dat.v01 <- c(13,2163,5,3349); dat.v01 # View the data in the usual 2 by 2 table format: matrix(dat.v01, nrow = 2, byrow = TRUE) ## ----------------------------------------------------------------------------- library(epiR) epi.2by2(dat = dat.v01, method = "cross.sectional", conf.level = 0.95, units = 100, interpret = FALSE, outcome = "as.columns") ## ----------------------------------------------------------------------------- epi.2by2(dat = dat.v01, method = "cross.sectional", conf.level = 0.95, units = 100, interpret = TRUE, outcome = "as.columns") ## ----------------------------------------------------------------------------- library(MASS) # Load and view the data: dat.df02 <- birthwt; head(dat.df02) ## ----------------------------------------------------------------------------- dat.tab02 <- table(dat.df02$smoke, dat.df02$low, dnn = c("Smoke", "Low BW")); dat.tab02 ## ----------------------------------------------------------------------------- dat.tab02 <- table(dat.df02$smoke, dat.df02$low, dnn = c("Smoke", "Low BW")); dat.tab02 dat.tab02 <- dat.tab02[2:1,2:1]; dat.tab02 ## ----------------------------------------------------------------------------- dat.df02$low <- factor(dat.df02$low, levels = c(1,0)) dat.df02$smoke <- factor(dat.df02$smoke, levels = c(1,0)) dat.df02$race <- factor(dat.df02$race, levels = c(1,2,3)) dat.tab02 <- table(dat.df02$smoke, dat.df02$low, dnn = c("Smoke", "Low BW")); dat.tab02 ## ----------------------------------------------------------------------------- dat.epi02 <- epi.2by2(dat = dat.tab02, method = "cohort.count", conf.level = 0.95, units = 100, interpret = FALSE, outcome = "as.columns") dat.epi02 ## ----------------------------------------------------------------------------- library(tidyverse) dat.df03 <- birthwt; head(dat.df03) # Here we set the factor levels and tabulate the data in a single call using pipe operators: dat.tab03 <- dat.df03 %>% mutate(low = factor(low, levels = c(1,0), labels = c("yes","no"))) %>% mutate(smoke = factor(smoke, levels = c(1,0), labels = c("yes","no"))) %>% group_by(smoke, low) %>% summarise(n = n()) # View the data: dat.tab03 ## View the data in conventional 2 by 2 table format: pivot_wider(dat.tab03, id_cols = c(smoke), names_from = low, values_from = n) ## ----------------------------------------------------------------------------- dat.epi03 <- epi.2by2(dat = dat.tab03, method = "cohort.count", conf.level = 0.95, units = 100, interpret = FALSE, outcome = "as.columns") dat.epi03 ## ----------------------------------------------------------------------------- dat.df04 <- birthwt; head(dat.df04) dat.tab04 <- table(dat.df04$smoke, dat.df04$low, dat.df04$race, dnn = c("Smoke", "Low BW", "Race")); dat.tab04 ## ----------------------------------------------------------------------------- dat.epi04 <- epi.2by2(dat = dat.tab04, method = "cohort.count", conf.level = 0.95, units = 100, interpret = FALSE, outcome = "as.columns") dat.epi04 ## ----------------------------------------------------------------------------- dat.df05 <- birthwt; head(dat.df05) dat.tab05 <- dat.df05 %>% mutate(low = factor(low, levels = c(1,0), labels = c("yes","no"))) %>% mutate(smoke = factor(smoke, levels = c(1,0), labels = c("yes","no"))) %>% mutate(race = factor(race)) %>% group_by(race, smoke, low) %>% summarise(n = n()) dat.tab05 ## View the data in conventional 2 by 2 table format: pivot_wider(dat.tab05, id_cols = c(race, smoke), names_from = low, values_from = n) ## ----------------------------------------------------------------------------- dat.epi05 <- epi.2by2(dat = dat.tab05, method = "cohort.count", conf.level = 0.95, units = 100, interpret = FALSE, outcome = "as.columns") dat.epi05 ## ----------------------------------------------------------------------------- library(ggplot2); library(scales) nstrata <- 1:length(unique(dat.tab05$race)) strata.lab <- paste("Strata ", nstrata, sep = "") y.at <- c(nstrata, max(nstrata) + 1) y.lab <- c("M-H", strata.lab) x.at <- c(0.25,0.5,1,2,4,8,16,32) or.p <- c(dat.epi05$massoc.detail$OR.mh$est, dat.epi05$massoc.detail$OR.strata.cfield$est) or.l <- c(dat.epi05$massoc.detail$OR.mh$lower, dat.epi05$massoc.detail$OR.strata.cfield$lower) or.u <- c(dat.epi05$massoc.detail$OR.mh$upper, dat.epi05$massoc.detail$OR.strata.cfield$upper) gdat.df05 <- data.frame(y.at, y.lab, or.p, or.l, or.u) ggplot(data = gdat.df05, aes(x = or.p, y = y.at)) + geom_point() + geom_errorbarh(aes(xmax = or.l, xmin = or.u, height = 0.2)) + labs(x = "Odds ratio", y = "Strata") + scale_x_continuous(trans = log2_trans(), breaks = x.at, limits = c(0.25,32)) + scale_y_continuous(breaks = y.at, labels = y.lab) + geom_vline(xintercept = 1, lwd = 1) + coord_fixed(ratio = 0.75 / 1) + theme(axis.title.y = element_text(vjust = 0)) epiR/inst/doc/epiR_descriptive.Rmd0000644000176200001440000005251014153272044016644 0ustar liggesusers--- title: "Descriptive Epidemiology using epiR" author: "Mark Stevenson" date: "`r Sys.Date()`" bibliography: epiR_descriptive.bib link-citations: yes output: knitr:::html_vignette: toc: true vignette: > %\VignetteEngine{knitr::rmarkdown} %\VignetteIndexEntry{Descriptive epidemiology} %\usepackage[utf8]{inputenc} --- \setmainfont{Calibri Light} ```{r, echo = FALSE, message = FALSE} # If you want to create a PDF document paste the following after line 9 above: # pdf_document: # toc: true # highlight: tango # number_sections: no # latex_engine: xelatex # header-includes: # - \usepackage{fontspec} knitr::opts_chunk$set(collapse = T, comment = "#>") options(tibble.print_min = 4L, tibble.print_max = 4L) ``` Epidemiology is the study of the frequency, distribution and determinants of health-related states in populations and the application of such knowledge to control health problems [@cdc:2006]. This vignette provides instruction on the way R and `epiR` can be used for descriptive epidemiological analyses, that is, to describe how the frequency of disease varies by individual, place and time. ## Indivdual Descriptions of disease frequency involves reporting either the **prevalence** or **incidence** of disease. Some definitions. Strictly speaking, 'prevalence' equals the number of cases of a given disease or attribute that exists in a population at a specified point in time. Prevalence risk is the proportion of a population that has a specific disease or attribute at a specified point in time. Many authors use the term 'prevalence' when they really mean prevalence risk, and these notes will follow this convention. Two types of prevalence are reported in the literature: (1) **point prevalence** equals the proportion of a population in a diseased state at a single point in time, (2) **period prevalence** equals the proportion of a population with a given disease or condition over a specific period of time (i.e. the number of existing cases at the start of a follow-up period plus the number of incident cases that occur during the follow-up period). Incidence provides a measure of how frequently susceptible individuals become disease cases as they are observed over time. An incident case occurs when an individual changes from being susceptible to being diseased. The count of incident cases is the number of such events that occur in a population during a defined follow-up period. There are two ways to express incidence: **Incidence risk** (also known as cumulative incidence) is the proportion of initially susceptible individuals in a population who become new cases during a defined follow-up period. **Incidence rate** (also known as incidence density) is the number of new cases of disease that occur per unit of individual time at risk during a defined follow-up period. In addition to reporting the point estimate of disease frequency, it is important to provide an indication of the uncertainty around that point estimate. The `epi.conf` function in the `epiR` package allows you to calculate confidence intervals for prevalence, incidence risk and incidence rates. Let's say we're interested in the prevalence of disease X in a population comprised of 1000 individuals. Two hundred are tested and four returned a positive result. Assuming 100% test sensitivity and specificity, what is the estimated prevalence of disease X in this population? ```{r message = FALSE} library(epiR); library(ggplot2); library(scales) ncas <- 4; npop <- 200 tmp <- as.matrix(cbind(ncas, npop)) epi.conf(tmp, ctype = "prevalence", method = "exact", N = 1000, design = 1, conf.level = 0.95) * 100 ``` The estimated prevalence of disease X in this population is 2.0 (95% confidence interval [CI] 0.55 -- 5.0) cases per 100 individuals at risk. Another example. A study was conducted by @feychting_et_al:1998 to report the frequency of cancer among the blind. A total of 136 diagnoses of cancer were made from 22,050 person-years at risk. What was the incidence rate of cancer in this population? ```{r} ncas <- 136; ntar <- 22050 tmp <- as.matrix(cbind(ncas, ntar)) epi.conf(tmp, ctype = "inc.rate", method = "exact", N = 1000, design = 1, conf.level = 0.95) * 1000 ``` The incidence rate of cancer in this population was 6.2 (95% CI 5.2 to 7.3) cases per 1000 person-years at risk. Now lets say we want to compare the frequency of disease across several populations. An effective way to do this is to use a ranked error bar plot. With a ranked error bar plot the points represent the point estimate of the measure of disease frequency and the error bars indicate the 95% confidence interval around each estimate. The disease frequency estimates are then sorted from lowest to highest. Generate some data. First we'll generate a distribution of disease prevalence estimates. Let's say it has a mode of 0.60 and we're 80% certain that the prevalence is greater than 0.35. Use the `epi.betabuster` function to generate parameters that can be used for a beta distribution to satisfy these constraints: ```{r} tmp <- epi.betabuster(mode = 0.60, conf = 0.80, greaterthan = TRUE, x = 0.35, conf.level = 0.95, max.shape1 = 100, step = 0.001) tmp$shape1; tmp$shape2 ``` Now take 100 draws from a beta distribution using the `shape1` and `shape2` values calculated above and plot them as a frequency histogram: ```{r dfreq01-fig, warnings = FALSE, echo = TRUE, fig.cap="\\label{fig:dfreq01}Frequency histogram of disease prevalence estimates for our simulated population."} dprob <- rbeta(n = 25, shape1 = tmp$shape1, shape2 = tmp$shape2) dat.df <- data.frame(dprob = dprob) ggplot(data = dat.df, aes(x = dprob)) + theme_bw() + geom_histogram(binwidth = 0.01, colour = "gray", fill = "dark blue", size = 0.1) + scale_x_continuous(limits = c(0,1), name = "Prevalence") + scale_y_continuous(limits = c(0,10), name = "Number of draws") ``` Generate a vector of population sizes using the uniform distribution. Calculate the number of diseased individuals in each population using `dprob` (calculated above). Finally, calculate the prevalence of disease in each population and its 95% confidence interval using `epi.conf`. The function `epi.conf` provides several options for confidence interval calculation methods for prevalence. Here we'll use the exact method: ```{r} dat.df$rname <- paste("Region ", 1:25, sep = "") dat.df$npop <- round(runif(n = 25, min = 20, max = 1500), digits = 0) dat.df$ncas <- round(dat.df$dprob * dat.df$npop, digits = 0) tmp <- as.matrix(cbind(dat.df$ncas, dat.df$npop)) tmp <- epi.conf(tmp, ctype = "prevalence", method = "exact", N = 1000, design = 1, conf.level = 0.95) * 100 dat.df <- cbind(dat.df, tmp) head(dat.df) ``` Sort the data in order of variable `est` and assign a 1 to `n` identifier as variable `rank`: ```{r} dat.df <- dat.df[sort.list(dat.df$est),] dat.df$rank <- 1:nrow(dat.df) ``` Now create a ranked error bar plot. Because its useful to provide the region-area names on the horizontal axis we'll rotate the horizontal axis labels by 90 degrees. ```{r dfreq02-fig, warnings = FALSE, echo = TRUE, fig.cap="\\label{fig:dfreq02}Ranked error bar plot showing the prevalence of disease (and its 95% confidence interval) for 100 population units."} ggplot(data = dat.df, aes(x = rank, y = est)) + theme_bw() + geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.1) + geom_point() + scale_x_continuous(limits = c(0,25), breaks = dat.df$rank, labels = dat.df$rname, name = "Region") + scale_y_continuous(limits = c(0,100), name = "Cases per 100 individuals at risk") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) ``` ## Time Epidemic curve data are often presented in one of two formats: 1. One row for each individual identified as a case with an event date assigned to each. 2. One row for every event date with an integer representing the number of cases identified on that date. Generate some data, with one row for every individual identified as a case: ```{r} n.males <- 100; n.females <- 50 odate <- seq(from = as.Date("2004-07-26"), to = as.Date("2004-12-13"), by = 1) prob <- c(1:100, 41:1); prob <- prob / sum(prob) modate <- sample(x = odate, size = n.males, replace = TRUE, p = prob) fodate <- sample(x = odate, size = n.females, replace = TRUE) dat.df <- data.frame(sex = c(rep("Male", n.males), rep("Female", n.females)), odate = c(modate, fodate)) # Sort the data in order of odate: dat.df <- dat.df[sort.list(dat.df$odate),] ``` Plot the epidemic curve using the `ggplot2` and `scales` packages: ```{r epicurve01-fig, warnings = FALSE, echo = TRUE, fig.cap="\\label{fig:epicurve01}Frequency histogram showing counts of incident cases of disease as a function of calendar date, 26 July to 13 December 2004."} ggplot(data = dat.df, aes(x = as.Date(odate))) + theme_bw() + geom_histogram(binwidth = 7, colour = "gray", fill = "dark blue", size = 0.1) + scale_x_date(breaks = date_breaks("7 days"), labels = date_format("%d %b"), name = "Date") + scale_y_continuous(breaks = seq(from = 0, to = 20, by = 2), limits = c(0,20), name = "Number of cases") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) ``` Produce a separate epidemic curve for males and females using the `facet_grid` option in `ggplot2`: ```{r epicurve03-fig, warnings = FALSE, echo = TRUE, fig.cap="\\label{fig:epicurve03}Frequency histogram showing counts of incident cases of disease as a function of time, 26 July to 13 December 2004, conditioned by sex."} ggplot(data = dat.df, aes(x = as.Date(odate))) + theme_bw() + geom_histogram(binwidth = 7, colour = "gray", fill = "dark blue", size = 0.1) + scale_x_date(breaks = date_breaks("1 week"), labels = date_format("%d %b"), name = "Date") + scale_y_continuous(breaks = seq(from = 0, to = 20, by = 2), limits = c(0,20), name = "Number of cases") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + facet_grid( ~ sex) ``` Let's say an event occurred on 31 October 2004. Mark this date on your epidemic curve using `geom_vline`: ```{r epicurve04-fig, warnings = FALSE, echo = TRUE, fig.cap="\\label{fig:epicurve04}Frequency histogram showing counts of incident cases of disease as a function of time, 26 July to 13 December 2004, conditioned by sex. An event that occurred on 31 October 2004 is indicated by the vertical dashed line."} ggplot(data = dat.df, aes(x = as.Date(odate))) + theme_bw() + geom_histogram(binwidth = 7, colour = "gray", fill = "dark blue", size = 0.1) + scale_x_date(breaks = date_breaks("1 week"), labels = date_format("%d %b"), name = "Date") + scale_y_continuous(breaks = seq(from = 0, to = 20, by = 2), limits = c(0,20), name = "Number of cases") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + facet_grid( ~ sex) + geom_vline(aes(xintercept = as.numeric(as.Date("31/10/2004", format = "%d/%m/%Y"))), linetype = "dashed") ``` Plot the total number of disease events by day, coloured according to sex: ```{r epicurve05-fig, warnings = FALSE, echo = TRUE, fig.cap="\\label{fig:epicurve05}Frequency histogram showing counts of incident cases of disease as a function of time, 26 July to 13 December 2004, grouped by sex."} ggplot(data = dat.df, aes(x = as.Date(odate), group = sex, fill = sex)) + theme_bw() + geom_histogram(binwidth = 7, colour = "gray", size = 0.1) + scale_x_date(breaks = date_breaks("1 week"), labels = date_format("%d %b"), name = "Date") + scale_y_continuous(breaks = seq(from = 0, to = 20, by = 2), limits = c(0,20), name = "Number of cases") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + geom_vline(aes(xintercept = as.numeric(as.Date("31/10/2004", format = "%d/%m/%Y"))), linetype = "dashed") + scale_fill_manual(values = c("#d46a6a", "#738ca6"), name = "Sex") + theme(legend.position = c(0.90, 0.80)) ``` It can be difficult to appreciate differences in male and female disease counts as a function of date with the above plot format so dodge the data instead: ```{r epicurve06-fig, warnings = FALSE, echo = TRUE, fig.cap="\\label{fig:epicurve06}Frequency histogram showing counts of incident cases of disease as a function of time, 26 July to 13 December 2004, grouped by sex."} ggplot(data = dat.df, aes(x = as.Date(odate), group = sex, fill = sex)) + theme_bw() + geom_histogram(binwidth = 7, colour = "gray", size = 0.1, position = "dodge") + scale_x_date(breaks = date_breaks("1 week"), labels = date_format("%d %b"), name = "Date") + scale_y_continuous(breaks = seq(from = 0, to = 20, by = 2), limits = c(0,20), name = "Number of cases") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + geom_vline(aes(xintercept = as.numeric(as.Date("31/10/2004", format = "%d/%m/%Y"))), linetype = "dashed") + scale_fill_manual(values = c("#d46a6a", "#738ca6"), name = "Sex") + theme(legend.position = c(0.90, 0.80)) ``` We now provide code to deal with the situation where the data are presented with one row for every case event date and an integer representing the number of cases identified on each date. Simulate some data in this format. In the code below the variable `ncas` represents the number of cases identified on a given date. The variable `dcontrol` is a factor with two levels: `neg` and `pos`. Level `neg` flags dates when no disease control measures were in place; level `pos` flags dates when disease controls measures were in place. ```{r} odate <- seq(from = as.Date("1/1/00", format = "%d/%m/%y"), to = as.Date("1/1/05", format = "%d/%m/%y"), by = "1 month") ncas <- round(runif(n = length(odate), min = 0, max = 100), digits = 0) dat.df <- data.frame(odate, ncas) dat.df$dcontrol <- "neg" dat.df$dcontrol[dat.df$odate >= as.Date("1/1/03", format = "%d/%m/%y") & dat.df$odate <= as.Date("1/6/03", format = "%d/%m/%y")] <- "pos" head(dat.df) ``` Generate an epidemic curve. Note `weight = ncas` in the aesthetics argument for `ggplot2`: ```{r epicurve07-fig, warnings = FALSE, echo = TRUE, fig.cap="\\label{fig:epicurve07}Frequency histogram showing counts of incident cases of disease as a function of time, 1 January 2000 to 1 January 2005. Colours indicate the presence or absence of disease control measures."} ggplot() + theme_bw() + geom_histogram(dat.df, mapping = aes(x = odate, weight = ncas, fill = factor(dcontrol)), binwidth = 60, colour = "gray", size = 0.1) + scale_x_date(breaks = date_breaks("6 months"), labels = date_format("%b %Y"), name = "Date") + scale_y_continuous(limits = c(0,200), name = "Number of cases") + scale_fill_manual(values = c("#738ca6","#d46a6a")) + guides(fill = "none") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) ``` Now we'll add a line to the plot to show the cumulative number of cases detected as a function of calendar date. The coding here requires some thought. First question: What was the cumulative number of cases at the end of the follow-up period? Here we use the `cumsum` (cumulative sum) function in base R: ```{r} cumsum(dat.df$ncas) ``` At the end of the follow-up period the cumulative number of cases was in the order of 3100 (exact numbers will vary because we've used a simulation approach to generate this data). What we need to do is to get our 0 to 3100 cumulative cases to 'fit' into the 0 to 200 vertical axis limits of the epidemic curve. A reasonable approach would be to: (1) divide cumulative case numbers by 10; (2) set 350 as the upper limit of the vertical axis; and (3) set `sec.axis = sec_axis(~ . * 10)` to multiply the values that appear on the primary vertical axis by 10 for the labels that appear on the secondary vertical axis: ```{r epicurve08-fig, warnings = FALSE, echo = TRUE, fig.cap="\\label{fig:epicurve08}Frequency histogram showing counts of incident cases of disease as a function of time, 1 January 2000 to 1 January 2005. Colours indicate the presence or absence of disease control measures. Superimposed on this plot is a line showing cumulative case numbers."} ggplot() + theme_bw() + geom_histogram(data = dat.df, mapping = aes(x = odate, weight = ncas, fill = factor(dcontrol)), binwidth = 60, colour = "gray", size = 0.1) + geom_line(data = dat.df, mapping = aes(x = odate, y = cumsum(ncas) / 10)) + scale_x_date(breaks = date_breaks("6 months"), labels = date_format("%b %Y"), name = "Date") + scale_y_continuous(limits = c(0,350), name = "Number of cases", sec.axis = sec_axis(~ . * 10, name = "Cumulative number of cases")) + scale_fill_manual(values = c("#738ca6","#d46a6a")) + guides(fill = "none") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) ``` ## Place Two types of maps are often used when describing patterns of disease by place: 1. Choropleth maps. Choropleth mapping involves producing a summary statistic of the outcome of interest (e.g. count of disease events, prevalence, incidence) for each component area within a study region. A map is created by 'filling' (i.e. colouring) each component area with colour, providing an indication of the magnitude of the variable of interest and how it varies geographically. 2. Point maps. **Choropleth maps** For illustration we make a choropleth map of sudden infant death syndrome (SIDS) babies in North Carolina counties for 1974 using the `nc.sids` data provided with the `spData` package. ```{r message = FALSE, warning = FALSE} library(sf); library(spData); library(rgdal); library(plyr); library(RColorBrewer); library(spatstat) ncsids.sf <- st_read(dsn = system.file("shapes/sids.shp", package = "spData")[1]) ncsids.sf <- ncsids.sf[,c("BIR74","SID74")] head(ncsids.sf) ``` The `ncsids.sf` simple features object lists for each county in the North Carolina USA the number SIDS deaths for 1974. Plot a choropleth map of the counties of the North Carolina showing SIDS counts for 1974: ```{r spatial01-fig, warnings = FALSE, echo = TRUE, fig.cap="\\label{fig:spatial01}Map of North Carolina, USA showing the number of sudden infant death syndrome cases, by county for 1974."} ggplot() + theme_bw() + geom_sf(data = ncsids.sf, aes(fill = SID74), colour = "dark grey") + scale_fill_gradientn(limits = c(0,60), colours = brewer.pal(n = 5, "Reds"), guide = "colourbar") + scale_x_continuous(name = "Longitude") + scale_y_continuous(name = "Latitude") + labs(fill = "SIDS 1974") ``` **Point maps** For this example we will used the `epi.incin` data set included with `epiR`. Between 1972 and 1980 an industrial waste incinerator operated at a site about 2 kilometres southwest of the town of Coppull in Lancashire, England. Addressing community concerns that there were greater than expected numbers of laryngeal cancer cases in close proximity to the incinerator @diggle:1990 conducted a study investigating risks for laryngeal cancer, using recorded cases of lung cancer as controls. The study area is 20 km x 20 km in size and includes location of residence of patients diagnosed with each cancer type from 1974 to 1983. Load the `epi.incin` data set and create negative and positive labels for each point location. We don't have a boundary map for these data so we'll use `spatstat` to create a convex hull around the points and dilate the convex hull by 1000 metres as a proxy boundary. The point locations in this data are projected using the British National Grid coordinate reference system (EPSG code 27700). Create an observation window for the data as `coppull.ow` and a `ppp` object for plotting: ```{r message = FALSE} data(epi.incin); incin.df <- epi.incin incin.df$status <- factor(incin.df$status, levels = c(0,1), labels = c("Neg", "Pos")) names(incin.df)[3] <- "Status" incin.sf <- st_as_sf(incin.df, coords = c("xcoord","ycoord"), remove = FALSE) st_crs(incin.sf) <- 27700 coppull.ow <- convexhull.xy(x = incin.df[,1], y = incin.df[,2]) coppull.ow <- dilation(coppull.ow, r = 1000) ``` Create a simple features polygon object from `coppull.ow`. First we convert `coppull.ow` to a `SpatialPolygonsDataFrame` object: ```{r} coords <- matrix(c(coppull.ow$bdry[[1]]$x, coppull.ow$bdry[[1]]$y), ncol = 2, byrow = FALSE) pol <- Polygon(coords, hole = FALSE) pol <- Polygons(list(pol),1) pol <- SpatialPolygons(list(pol)) coppull.spdf <- SpatialPolygonsDataFrame(Sr = pol, data = data.frame(id = 1), match.ID = TRUE) ``` Convert the `SpatialPolygonsDataFrame` to an `sf` object and set the coordinate reference system: ```{r} coppull.sf <- as(coppull.spdf, "sf") st_crs(coppull.sf) <- 27700 ``` The `mformat` function is used to plot the axis labels in kilometres (instead of metres): ```{r} mformat <- function(){ function(x) format(x / 1000, digits = 2) } ``` ```{r spatial02-fig, warnings = FALSE, echo = TRUE, fig.cap="\\label{fig:spatial02}Point map showing the place of residence of individuals diagnosed with laryngeal cancer (Pos) and lung cancer (Neg), Copull Lancashire, UK, 1972 to 1980."} ggplot() + theme_bw() + geom_sf(data = incin.sf, aes(colour = Status, shape = Status)) + geom_sf(data = coppull.sf, fill = "transparent", colour = "black") + coord_sf(datum = st_crs(coppull.sf)) + scale_colour_manual(values = c("grey","red")) + scale_shape_manual(values = c(1,16)) + scale_x_continuous(name = "Easting (km)", labels = mformat()) + scale_y_continuous(name = "Northing (km)", labels = mformat()) + theme(legend.position = c(0.10, 0.15)) ``` ## References epiR/inst/doc/epiR_surveillance.html0000644000176200001440000034400014166006711017237 0ustar liggesusers Design and Analysis of Disease Surveillance Programs Using epiR

Design and Analysis of Disease Surveillance Programs Using epiR

Mark Stevenson and Evan Sergeant

2022-01-07

Surveillance is defined as the on-going systematic collection, collation and interpretation of accurate information about a defined population with respect to disease and/or infection, closely integrated with timely dissemination of that information to those responsible for control and prevention measures (Thacker and Berkelman 1988).

The Terrestrial Animal Health Code of the World Organisation of Animal Health (OIE 2021) defines surveillance as the investigation of a given population or subpopulation to detect the presence of a pathogenic agent or disease; the frequency and type of surveillance will be determined by the epidemiology of the pathogenic agent or disease, and the desired outputs. Surveillance is a tool for monitoring changes in health related events in a defined population with specific goals relating to: (1) the detection of disease incursions, both new and emerging, (2) the assessment of progress in terms of control or eradication of selected diseases and pathogens, (3) demonstration of disease freedom for trading partners, and (4) identification of hazards or risk factors for disease outbreaks.

This vignette provides instruction on the way R and epiR (and specifically the surveillance functions within epiR) can be used for: (1) the design of disease surveillance programs; and (2) the design of programs to provide a quantitative basis for claims for disease freedom.

Definitions

Design prevalence. The design prevalence (minimum detectable prevalence, maximum acceptable or permissible prevalence, and minimum expected prevalence) is a fixed value for prevalence used for testing the hypothesis that disease is present in a population of interest. The null hypothesis is that disease is present in the population at a prevalence equal to or greater than the design prevalence. If a sufficient number of samples are collected and all return a negative result we may reject the null hypothesis and accept the alternative hypothesis to conclude that the prevalence is less than the design prevalence.

A design prevalence is not related to any actual prevalence of disease in the population under study. It is not subject to uncertainty or variability and therefore doesn’t need to be described using a distribution. Cluster-level design prevalence refers to a design prevalence assigned at the cluster (e.g. village, herd or household) level. Unit-level design prevalence refers to a design prevalence assigned at the individual unit (e.g. cow, sheep, bird) level. The unit-level prevalence of disease can be applied either within clusters (e.g. herds, flocks, villages) or across broader, unclustered populations (e.g. human populations or wildlife).

Surveillance system. A surveillance system is a set of procedures to collect, collate and interpret information about a defined population with respect to disease. Most surveillance systems are comprised of several activities (e.g. on-farm testing, abattoir surveillance, disease hotlines) called surveillance system components. Each surveillance system component is comprised of surveillance system units. Surveillance system units are the individual items that get examined within each surveillance system component. For the surveillance system components listed above (on-farm testing, abattoir surveillance, disease hotlines) the corresponding surveillance units would be individual animals, carcasses and individual phone reports, respectively.

Unit sensitivity. Unit sensitivity is defined as the average probability that a unit (selected from those processed) will return a positive surveillance outcome, given that disease is present in the population at a level equal to or greater than a specified design prevalence.

Component sensitivity. Component sensitivity (CSe) is defined as the average probability that a surveillance system component will return a positive surveillance outcome, given disease is present in the population at a level equal to or greater than the specified design prevalence.

Surveillance system sensitivity. Surveillance system sensitivity (SSe) is defined as the average probability that a surveillance system (as a whole) will return a positive surveillance outcome, given disease is present in the population at a level equal to or greater than a specified design prevalence.

An approach for thinking about surveillance design and assessment

The first thing to consider when you’re designing or assessing a surveillance program is to consider the sampling method that will be used. If a surveillance program has been designed to detect a specific pathogen sampling will usually be either representative or risk-based. Other options include the situation where you might observe every individual in a population (a census) or where you might take no active steps to collect surveillance data but instead rely on stakeholders to report their observations on a voluntary basis (passive surveillance).

Once we have the method of sampling defined we then move on to think about the different tasks that need to be done in terms of design of the actual surveillance system and finally, how we might assess the surveillance system once it has been designed and implemented.

In terms of design, once you have specified the sampling method you need to determine how many surveillance system units will be sampled (usually to achieve a defined surveillance system sensitivity).

Once samples have been collected and tested or if you are making an assessment of an existing surveillance system, we then might want to answer the question: if the disease of interest is actually present in the population what is the chance that the surveillance system will actually detect it? This question can be expressed in another three other ways: (1) What is the surveillance system sensitivity? or (2) What is the probability that the prevalence of disease is less than the specified design prevalence? or (3) What is the surveillance system’s negative predictive value?

The remainder of this vignette follows this general structure. For each sampling method (representative, risk-based, census and passive) we provide notes and examples on the use of epiR for sample size estimation, estimation of surveillance system sensitivity and estimation of the probability of disease freedom. While ‘estimation of the probability of disease freedom’ is the name assigned to the last group of analyses a more correct label would be ‘estimation of the probability that the prevalence of disease is less than a specified design prevalence’ (i.e. the negative predictive value of the surveillance system). Be aware that we can only truly demonstrate disease freedom if every member of the population at risk is assessed using a test with perfect diagnostic sensitivity and perfect diagnostic specificity.

Representative sampling

Sample size estimation

The sample size functions for surveillance representative sampling in epiR fall into two classes: sampling to achieve a defined probability of disease freedom and sampling to achieve a defined surveillance system sensitivity.

The surveillance system sensitivity sample size functions include those for simple random sampling and two stage sampling. Two stage sampling is the preferred (indeed, the only practical approach) when a population is organised in clusters (e.g. cows within herds, households within villages). With two stage sampling clusters (herds, villages) are sampled first and then from within each selected cluster individual surveillance units are sampled.

Functions to estimate sample size using representative population sampling data.
Sampling Outcome Details Function
Representative Prob disease freedom Imperfect Se, perfect Sp rsu.sspfree.rs
Representative SSe Imperfect Se, perfect Sp rsu.sssep.rs
Two stage representative SSe Imperfect Se, perfect Sp rsu.sssep.rs2st
Representative SSe Imperfect Se, imperfect Sp, known N rsu.sssep.rsfreecalc
Pooled representative SSe Imperfect Se, imperfect Sp rsu.sssep.rspool

EXAMPLE 1

A cross-sectional study is to be carried out to confirm the absence of brucellosis in dairy herds using a bulk milk tank test assuming a design prevalence of 5%. Assume the total number of dairy herds in your study area is unknown and large and the bulk milk tank test to be used has a diagnostic sensitivity of 0.95 and a specificity of 1.00. How many herds need to be sampled to achieve a system sensitivity of 95%? That is, what is the probability that disease will be detected if it is present in the population at the designated design prevalence?

library(epiR)
rsu.sssep.rs(N = NA, pstar = 0.05, se.p = 0.95, se.u = 0.95)
#> [1] 62

A total of 62 herds need to be sampled and tested.

This question can be asked in another way. If our prior estimate of the probability that the population of herds is free of disease is 0.50 and we believe that there’s a 1% chance of disease being introduced into the population during the next time period, how many herds need to be sampled to be 95% confident that disease is absent (i.e. less than the design prevalence) if all tests are negative?

rsu.sspfree.rs(N = NA, prior = 0.50, p.intro = 0.01, pstar = 0.05, pfree = 0.95, se.u = 0.95)
#> $n
#> [1] 61
#> 
#> $se.p
#> [1] 0.9484106
#> 
#> $adj.prior
#> [1] 0.495

A total of 61 herds need to be sampled (similar to the value calculated above). Note that function rsu.sssep.rs returns the sample size to achieve a desired surveillance system sensitivity (‘what’s the probability that disease will be detected?’). Function rsu.sspfree.rs returns the sample size to achieve a desired (posterior) probability of disease freedom.

Now assume that it is known that there are 500 dairy herds in your study area. Revise your sample size estimate to achieve the desired surveillance system sensitivity in light of this new information.

rsu.sssep.rs(N = 500, pstar = 0.05, se.p = 0.95, se.u = 0.95)
#> [1] 60

A total of 60 herds need to be sampled and tested.

The sample size calculations presented so far assume the use of a test with perfect specificity (that is, if a sample returns a positive result we can be 100% certain that the herd is positive and disease is actually present in the population).

Consider the situation where a test with imperfect specificity is used. Imperfect specificity presents problems for disease freedom surveys. If a positive test result is returned, how sure can we be that it is a true positive as opposed to a false positive? The rsu.ss.rsfreecalc function returns the required sample size to confirm the absence of disease using a test with imperfect diagnostic sensitivity and specificity based on the methodology implemented in the standalone software ‘Freecalc’ (Cameron and Baldock, n.d.).

EXAMPLE 2

We’ll continue with the brucellosis example introduced above. Imagine the test we’re using has a diagnostic sensitivity of 0.95 (as before) but this time it has a specificity of 0.98. How many herds need to be sampled to be 95% certain that the prevalence of brucellosis in dairy herds is less than the design prevalence if less than a specified number of tests return a positive result?

rsu.sssep.rsfreecalc(N = 5000, pstar = 0.05, mse.p = 0.95, 
   msp.p = 0.95, se.u = 0.95, sp.u = 0.98, method = "hypergeometric", 
   max.ss = 32000)$summary
#>     n    N c pstar         p1     se.p      sp.p
#> 1 194 5000 7  0.05 0.04898102 0.951019 0.9573939

A population sensitivity of 95% is achieved with a total sample size of 194 herds, assuming a cut-point of 7 or more positive herds are required to return a positive survey result.

Note the substantial increase in sample size when diagnostic specificity is imperfect (194 herds when specificity is 0.98 compared with 63 when specificity is 1.00). The relatively low design prevalence in combination with imperfect imperfect specificity means that false positives are more likely to be a problem in this population so the number tested needs to be (substantially) increased. Increase the design prevalence to 0.10 to see its effect on estimated sample size.

rsu.sssep.rsfreecalc(N = 5000, pstar = 0.10, mse.p = 0.95, 
   msp.p = 0.95, se.u = 0.95, sp.u = 0.98, method = "hypergeometric", 
   max.ss = 32000)$summary
#>    n    N c pstar         p1      se.p      sp.p
#> 1 66 5000 3   0.1 0.04992274 0.9500773 0.9566218

The required sample size decreases to 66 and the cut-point to 3 positives due to: (1) the expected reduction in the number of false positives; and (2) the greater difference between true and false positive rates in the first example compared with the second.

Now consider the situation where individual surveillance units (e.g. animals) are aggregated within groups called ‘clusters’ (e.g. herds). With this type of system two-stage cluster sampling is a commonly used approach for disease surveillance studies.

With two stage cluster sampling herds (clusters) are sampled first and then individual surveillance units are then sampled from each sampled cluster. This means that we have two sample sizes to calculate: the number of clusters and the number of surveillance units from within each sampled cluster.

EXAMPLE 3

For this example we assume that there are 20,000 at risk herds in our population and we do not know the number of animals present in each herd. This disease is not very common among herds but if a herd is positive the prevalence is relatively high, so we set the herd-level design prevalence to 0.005 and the within-herd design prevalence to 0.05. The test we will use at the surveillance unit level has a diagnostic sensitivity of 0.90 and a diagnostic specificity of 1.00. The target sensitivity of disease detection at the herd level is 0.95 and the target sensitivity of disease detection at the population level is the same, 0.95.

How many herds need to be sampled if you want to be 95% certain of detecting at least one infected herd if that the between-herd prevalence of disease is greater than or equal to 0.005?

rsu.sssep.rs(N = 20000, pstar = 0.005, se.p = 0.95, se.u = 0.95)
#> [1] 622

We need to sample a total of 622 herds.

How many animals need to be sampled from each herd if you want to be 95% certain of detecting at least one infected animal if the within-herd prevalence of disease is greater than or equal to 0.05?

rsu.sssep.rs(N = NA, pstar = 0.05, se.p = 0.95, se.u = 0.90)
#> [1] 66

Within each selected herd we need to sample at least 66 animals.

As an alternative we can calculate the required number of herds to sample and the required number of animals to sample from each herd in a single step using the function rsu.sssep.rs2stage:

rsu.sssep.rs2st(H = 20000, N = NA, pstar.c = 0.005, pstar.u = 0.05, se.p = 0.95, se.c = 0.95, se.u = 0.90)
#> $clusters
#>       H nsample
#> 1 20000     622
#> 
#> $units
#>    N nsample
#> 1 NA      66

Estimation of surveillance system sensitivity and specificity

Functions to estimate surveillance system sensitivity (SSe) using representative population sampling data.
Sampling Outcome Details Function
Representative SSe Imperfect Se, perfect Sp rsu.sep.rs
Two stage representative SSe Imperfect Se, perfect Sp rsu.sep.rs2st
Representative SSe Imperfect Se, perfect Sp, multiple components rsu.sep.rsmult
Representative SSe Imperfect Se, imperfect Sp rsu.sep.rsfreecalc
Pooled representative SSe Imperfect Se, perfect Sp rsu.sep.rspool
Representative SSe Imperfect Se, perfect Sp rsu.sep.rsvarse
Representative SSp Imperfect Sp rsu.spp.rs

EXAMPLE 4

Three hundred samples are to be tested from a population of animals to confirm the absence of disease. The total size of the population is unknown. Assuming a design prevalence of 0.01 and a test with diagnostic sensitivity of 0.95 will be used what is the surveillance system sensitivity? That is, what is the probability that disease will be detected if it is present in the population at or above the specified design prevalence?

rsu.sep.rs(N = NA, n = 300, pstar = 0.01, se.u = 0.95)
#> [1] 0.9429384

The probability that this surveillance strategy will detect disease if it is present in the population at or above the specified design prevalence (the surveillance system sensitivity) is 0.943.

EXAMPLE 5

Thirty animals from five herds ranging in size from 80 to 100 head are to be sampled to confirm the absence of a disease. Assuming a design prevalence of 0.01 and a test with diagnostic sensitivity of 0.95 will be used, what is the sensitivity of disease detection for each herd?

N <- seq(from = 80, to = 100, by = 5)
n <- rep(30, times = length(N))

herd.sep <- rsu.sep.rs(N = N, n = n, pstar = 0.01, se.u = 0.95)
sort(round(herd.sep, digits = 2))
#> [1] 0.28 0.30 0.32 0.34 0.36

The sensitivity of disease detection for each herd ranges from 0.28 to 0.36.

EXAMPLE 6

Assume 73 samples were tested at two different labs, using different tests. Laboratory 1 tested 50 samples with the standard test which has a diagnostic sensitivity of 0.80. Laboratory 2 tested the remaining 23 samples with a different test which has a diagnostic sensitivity of 0.70. What is the surveillance system sensitivity of disease detection if we set the design prevalence to 0.05?

# Diagnostic test sensitivities and the number of samples tested at each laboratory:
se.t1 <- 0.80; se.t2 <- 0.70
n.lab1 <- 50; n.lab2 <- 23

# Create a vector of test sensitivities for each sample:
se.all <- c(rep(se.t1, times = n.lab1), rep(se.t2, times = n.lab2))
rsu.sep.rsvarse(N = n.lab1 + n.lab2, pstar = 0.05, se.u = se.all)
#> [1] 0.9971275

If the design prevalence is 0.05 the estimated surveillance system sensitivity is 0.997.

Estimation of the probability of disease freedom

Functions to estimate the probability of disease freedom using representative population sampling data.
Sampling Outcome Details Function
Representative Prob disease of freedom Imperfect Se, perfect Sp rsu.pfree.rs
Representative Equilibrium prob of disease freedom Imperfect Se, perfect Sp rsu.pfree.equ

EXAMPLE 7

You are the epidemiologist for a land-locked country in central Asia. You have developed a surveillance program for a given disease which has an estimated system sensitivity of 0.65. The disease of interest is carried by live animals and you know that the frequency of illegal importation of animals into your country (and therefore the likelihood of disease incursion) is higher during the warmer months of the year (June to August).

Plot the probability of disease freedom assuming surveillance testing is carried out each month. Include on your plot the probability of disease incursion to show how it changes during the year. Previous surveillance work indicates that the probability that your country is free of disease is 0.50.

library(ggplot2); library(lubridate); library(scales)

# Define a vector disease incursion probabilities (January to December):
p.intro <- c(0.01,0.01,0.01,0.02,0.04,0.10,0.10,0.10,0.08,0.06,0.04,0.02)

rval.df <- rsu.pfree.rs(se.p = rep(0.65, times = 12), p.intro = p.intro, prior = 0.50, by.time = TRUE)

# Re-format rval.df ready for for ggplot2:
dat.df <- data.frame(mnum = rep(1:12, times = 2),
   mchar = rep(seq(as.Date("2020/1/1"), by = "month", length.out = 12), times = 2),                 
   class = c(rep("Disease introduction", times = length(p.intro)), 
             rep("Disease freedom", times = length(p.intro))),
   prob = c(rval.df$PIntro, rval.df$PFree))

# Plot the results:
ggplot(data = dat.df, aes(x = mchar, y = prob, group = class, col = class)) +
  theme_bw() +
  geom_point() + 
  geom_line() +
  scale_colour_manual(values = c("red", "dark blue")) + 
  scale_x_date(breaks = date_breaks("1 month"), labels = date_format("%b"),
     name = "Month") +
  scale_y_continuous(limits = c(0,1), name = "Probability") +
  geom_hline(aes(yintercept = 0.95), linetype = "dashed", col = "blue") +
  guides(col = guide_legend(title = "")) +
  theme(legend.position = c(0.8, 0.5))

Risk-based sampling

With risk-based sampling we modify the intensity of sampling effort across the population of interest according to risk (as opposed to representative sampling where the probability that an individual unit is sampled is uniform across the population of interest). When our objective is to detect the presence of disease risk-based sampling makes intuitive sense: we concentrate our search effort on those sections of the population where we believe we are more likely to detect disease (i.e. where the risk of disease is high).

How many samples do I need?

The sample size functions all relate to sampling to achieve a defined surveillance system sensitivity.

Functions to estimate sample size using risk based sampling data.
Sampling Outcome Details Function
Risk-based SSe Single Se for risk groups, perfect Sp rsu.sssep.rbsrg
Risk-based SSe Multiple Se within risk groups, perfect Sp rsu.sssep.rbmrg
Risk-based SSe Two stage sampling, 1 risk factor rsu.sssep.rb2st1rf
Risk-based SSe Two stage sampling, 2 risk factors rsu.sssep.rb2st2rf

EXAMPLE 8

You are working with a disease of cattle where the prevalence of disease is believed to vary according to herd type. The risk of disease is 5 times greater in dairy herds and 3 times greater in mixed herds compared with the reference category, beef herds. The distribution of dairy, mixed and beef herds in the population of interest is 0.10, 0.10 and 0.80, respectively. Assume you intend to distribute your sampling effort 0.4, 0.4 and 0.2 across dairy, mixed and beef herds, respectively.

Within each of the three risk groups a single test with a diagnostic sensitivity of 0.95 will be used. How many herds need to be sampled if you want to achieve 95% system sensitivity for a prevalence of disease in the population of greater than or equal to 1%?

# Matrix listing the proportions of samples for each test in each risk group (the number of rows equal the number of risk groups, the number of columns equal the number of tests):
m <- rbind(1,1,1)

rsu.sssep.rbmrg(pstar = 0.01, rr = c(5,3,1), ppr = c(0.1,0.1,0.8),
   spr = c(0.4,0.4,0.2), spr.rg = m, se.p = 0.95, se.u = 0.95)
#> $n
#>       se.u 0.95 total
#> rr 5         59    59
#> rr 3         59    59
#> rr 1         29    29
#> total       147   147
#> 
#> $epi
#> [1] 0.03125 0.01875 0.00625
#> 
#> $mean.se
#> [1] 0.95 0.95 0.95

A total of 147 herds need to be sampled: 59 dairy, 59 mixed and 29 beef herds.

Now assume that one of two tests will be used for each herd. The first test has a diagnostic sensitivity of 0.92. The second test has a diagnostic sensitivity of 0.80. The proportion of dairy, mixed and beef herds receiving the first test is 0.80, 0.50 and 0.70, respectively (which means that 0.20, 0.50 and 0.30 receive the second test, respectively). Recalculate the sample size.

# Matrix listing the proportions of samples for each test in each risk group (the number of rows equal the number of risk groups, the number of columns equal the number of tests):
m <- rbind(c(0.8,0.2), c(0.5,0.5), c(0.7,0.3))

rsu.sssep.rbmrg(pstar = 0.01, rr = c(5,3,1), ppr = c(0.1,0.1,0.8),
   spr = c(0.4,0.4,0.2), spr.rg = m, se.p = 0.95, se.u = c(0.92,0.80))
#> $n
#>       se.u 0.92 se.u 0.8 total
#> rr 5         52       12    64
#> rr 3         32       32    64
#> rr 1         22        9    31
#> total       106       53   159
#> 
#> $epi
#> [1] 0.03125 0.01875 0.00625
#> 
#> $mean.se
#> [1] 0.896 0.860 0.884

A total of 159 herds need to be sampled: 64 dairy, 64 mixed and 31 beef herds.

EXAMPLE 9

A cross-sectional study is to be carried out to confirm the absence of disease using risk based sampling. Assume a population level design prevalence of 0.01 and there are ‘high’, ‘medium’ and ‘low’ risk areas where the risk of disease in the high risk area compared with the low risk area is 5 and the risk of disease in the medium risk area compared with the low risk area is 3. The proportions of the population at risk in the high, medium and low risk area are 0.10, 0.10 and 0.80, respectively.

Half of your samples will be taken from individuals in the high risk area, 0.30 from the medium risk area and 0.20 from the low risk area. You intend to use a test with diagnostic sensitivity of 0.90 and you’d like to take sufficient samples to return a population sensitivity of 0.95. How many units need to be sampled to meet the requirements of the study?

rsu.sssep.rbsrg(pstar = 0.01, rr = c(5,3,1), ppr = c(0.10,0.10,0.80), 
   spr = c(0.50,0.30,0.20), se.p = 0.95, se.u = 0.90)
#> $total
#> [1] 147
#> 
#> $n
#> [1] 74 45 28
#> 
#> $epinf
#> [1] 0.03125 0.01875 0.00625
#> 
#> $adj.risk
#> [1] 3.125 1.875 0.625

A total of 147 units needs to be sampled to meet the requirements of the study: 74 from the high risk area, 45 from the medium risk area and 28 from the low risk area.

EXAMPLE 10

A cross-sectional study is to be carried out to confirm the absence of disease using risk based sampling. Assume a design prevalence of 0.02 at the cluster (herd) level and a design prevalence of 0.10 at the surveillance unit (individual animal) level. Clusters are categorised as being either high, medium or low risk with the probability of disease for clusters in the high and medium risk area 5 and 3 times the probability of disease in the low risk area. The proportions of clusters in the high, medium and low risk area are 0.10, 0.20 and 0.70, respectively. The proportion of samples from the high, medium and low risk area will be 0.40, 0.40 and 0.20, respectively.

Surveillance units (individual animals) are categorised as being either high or low risk with the probability of disease for units in the high risk group 4 times the probability of disease in the low risk group. The proportions of units in the high and low risk groups are 0.10 and 0.90, respectively. All of your samples will be taken from units in the high risk group.

You intend to use a test with diagnostic sensitivity of 0.95 and you’d like to take sufficient samples to be 95% certain that you’ve detected disease at the population level, 95% certain that you’ve detected disease at the cluster level and 95% at the surveillance unit level. How many clusters and how many units need to be sampled to meet the requirements of the study?

rsu.sssep.rb2st2rf(
   rr.c = c(5,3,1), ppr.c = c(0.10,0.20,0.70), spr.c = c(0.40,0.40,0.20),
   pstar.c = 0.02,
   rr.u = c(4,1), ppr.u = c(0.1, 0.9), spr.u = c(1,0),
   pstar.u = 0.10, 
   se.p = 0.95, se.c = 0.95, se.u = 0.95)
#> $clusters
#> $clusters$total
#> [1] 82
#> 
#> $clusters$n
#> [1] 33 33 16
#> 
#> $clusters$epinf
#> [1] 0.05555556 0.03333333 0.01111111
#> 
#> $clusters$adj.risk
#> [1] 2.7777778 1.6666667 0.5555556
#> 
#> 
#> $units
#> $units$total
#> [1] 9
#> 
#> $units$n
#> [1] 9 0
#> 
#> $units$epinf
#> [1] 0.30769231 0.07692308
#> 
#> $units$adj.risk
#> [1] 3.0769231 0.7692308

A total of 82 clusters needs to be sampled: 33 from the high risk area, 33 from the medium risk area and 16 from the low risk area. A total of 9 units should be sampled from each cluster.

Surveillance system sensitivity

Functions to estimate surveillance system sensitivity using risk based sampling data.
Sampling Outcome Details Function
Risk-based SSe Varying Se, perfect Sp rsu.sep.rb
Risk-based SSe Varying Se, perfect Sp, one risk factor rsu.sep.rb1rf
Risk-based SSe Varying Se, perfect Sp, two risk factors rsu.sep.rb2rf

EXAMPLE 11

You have been asked to provide an assessment of a surveillance program for Actinobacillus hyopneumoniae in pigs. It is known that there are high risk and low risk areas for A. hypopneumoniae in your country with the estimated probability of disease in the high risk area thought to be around 3.5 times that of the probability of disease in the low risk area. It is known that 10% of the 1784 pig herds in the study area are in the high risk area and 90% are in the low risk area.

The risk of A. hypopneumoniae is dependent on age, with adult pigs around five times more likely to be A. hypopneumoniae positive compared with younger (grower) pigs.

Pigs from 20 herds have been sampled: 5 from the low-risk area and 15 from the high-risk area. All of the tested pigs were adults: no grower pigs were tested.

The ELISA for A. hypopneumoniae in pigs has a diagnostic sensitivity of 0.95.

What is the surveillance system sensitivity if we assume a design prevalence of 1 per 100 at the cluster (herd) level and 5 per 100 at the surveillance system unit (pig) level?

# There are 1784 herds in the study area:
H <- 1784

# Twenty of the 1784 herds are sampled. Generate 20 herds of varying size:
set.seed(1234)
hsize <- rlnorm(n = 20, meanlog = log(10), sdlog = log(8))
hsize <- round(hsize + 20, digits = 0)

# Generate a matrix listing the number of growers and finishers in each of the 20 sampled herds. 
# Assume that anywhere between 80% and 95% of the pigs in each herd are growers:
set.seed(1234)
pctg <- runif(n = 20, min = 0.80, max = 0.95)
ngrow <- round(pctg * hsize, digits = 0)
nfini <- hsize - ngrow
N <- cbind(ngrow, nfini)

# Generate a matrix listing the number of grower and finisher pigs sampled from each herd. Fifteen pigs from each herd are sampled. If there's less than 15 pigs we sample the entire herd:
nsgrow <- rep(0, times = 20)
nsfini <- ifelse(nfini <= 15, nfini, 15)
n <- cbind(nsgrow, nsfini)

# The herd-level design prevalence is 0.01 and the individual pig-level design prevalence is 0.05: 
pstar.c <- 0.01
pstar.u <- 0.05

# For herds in the high-risk area the probability being A. hyopneumoniae positive is 3.5 times that of herds in the low-risk area. Ninety percent of herds are in the low risk area and 10% are in the high risk area:
rr.c <- c(3.5,1)
ppr.c <- c(0.1,0.9) 

# We've sampled 15 herds from the high risk area and 5 herds from the low risk area. Above, for vector rr.c, the relative risk for the high risk group is listed first so the vector rg follows this order:
rg <- c(rep(1, times = 15), rep(2, times = 5))

# The probability being A. hyopneumoniae positive for finishers is 5 times that of growers. For the matrices N and n growers are listed first then finishers. Vector rr.u follows the same order:
rr.u <- c(1,5)

# The diagnostic sensitivity of the A. hyopneumoniae ELISA is 0.95:
se.u <- 0.95

rsu.sep.rb2st(H = H, N = N, n = n, 
   pstar.c = pstar.c, pstar.u = pstar.u,
   rg = rg, rr.c = rr.c, rr.u = rr.u,
   ppr.c = ppr.c, ppr.u = NA,
   se.u = se.u)
#> $se.p
#> [1] 0.3171584
#> 
#> $se.c
#>  [1] 0.8173677 0.8785324 0.9982369 0.6569587 0.8288718 0.9299997 0.8650648
#>  [8] 0.8291064 0.6708758 0.7663220 0.6748275 0.6619839 0.7663220 0.6959647
#> [15] 0.9989481 0.6880795 0.8291064 0.8234889 0.8234889 0.8901875

The estimated surveillance system sensitivity of this program is 0.32.

Repeat these analyses assuming we don’t know the total number of pig herds in the population and we have only an estimate of the proportions of growers and finishers in each herd.

# Generate a matrix listing the proportion of growers and finishers in each of the 20 sampled herds:

ppr.u <- cbind(rep(0.9, times = 20), rep(0.1, times = 20))

# Set H (the number of clusters) and N (the number of surveillance units within each cluster) to NA:
rsu.sep.rb2st(H = NA, N = NA, n = n, 
   pstar.c = pstar.c, pstar.u = pstar.u,
   rg = rg, rr.c = rr.c, rr.u = rr.u,
   ppr.c = ppr.c, ppr.u = ppr.u,
   se.u = se.u)
#> $se.p
#> [1] 0.2078001
#> 
#> $se.c
#>  [1] 0.5245994 0.5245994 0.8925568 0.3105070 0.4274746 0.6052477 0.6052477
#>  [8] 0.5245994 0.3105070 0.4274746 0.3105070 0.3105070 0.4274746 0.3105070
#> [15] 0.9384860 0.3105070 0.5245994 0.5245994 0.5245994 0.9384860

The estimated surveillance system sensitivity is 0.21.

Analysis of passive surveillance data

Estimation of surveillance system sensitivity and specificity

EXAMPLE 12

There are four ‘steps’ in a (passive) disease detection process for disease X in your country: (1) an infected animal shows clinical signs of disease; (2) a herd manager observes clinical signs in a disease animal and calls a veterinarian; (3) a veterinarian responds appropriately to the disease investigation request (taking, for example, appropriate samples for laboratory investigation); and (4) the laboratory conducts appropriate tests on the submitted samples and interprets the results of those tests correctly. The probabilities for each step in the disease detection pathway (in order) are 0.10, 0.20, 0.90 and 0.99, respectively.

Assuming the probability that a unit actually has disease if it is submitted for testing is 0.98, the sensitivity of the diagnostic test used at the unit level is 0.90, the population is comprised of 1000 clusters (herds), five animals from each cluster (herd) investigated for disease are tested and the cluster-level design prevalence is 0.01, what is the sensitivity of disease detection at the cluster (herd) and population level?

rsu.sep.pass(step.p = c(0.10,0.20,0.90,0.99), pstar.c = 0.01,
   p.inf.u = 0.98, N = 1000, n = 5, se.u = 0.90)
#> $se.p
#> [1] 0.164565
#> 
#> $se.c
#> [1] 0.01781959

The sensitivity of disease detection at the cluster (herd) level is 0.018. The sensitivity of disease detection at the population level is 0.16.

Miscellaneous functions

Adjusted relative risks

EXAMPLE 13

For a given disease of interest you believe that there is a ‘high risk’ and ‘low risk’ area in your country. The risk of disease in the high-risk area compared with the low-risk area is 5. A recent census shows that 10% of the population are resident in the high-risk area and 90% are resident in the low-risk area. Calculate the adjusted relative risks for each area.

rsu.adjrisk(rr = c(5,1), ppr = c(0.10,0.90))
#> [1] 3.5714286 0.7142857

The adjusted relative risks for the high and low risk areas are 3.6 and 0.7, respectively.

Design prevalence back calculation

EXAMPLE 14

The population size in a provincial area in your country is 193,000. In a given two-week period a total of 7764 individuals have been tested for COVID-19 using an approved PCR which is believed to have a diagnostic sensitivity of 0.85. All of the individuals tested have returned a negative result. What is the maximum prevalence required to provide system sensitivity of 0.95 if COVID-19 is actually present in this population (i.e. what is the back-calculated design prevalence)? Express your result as the number of COVID-19 cases per 100,000 head of population.

rsu.pstar(N = 193000, n = 7764, se.p = 0.95, se.u = 0.85) * 100000
#> [1] 44.61341

If the 7764 individuals have all returned a negative test result (using a test with 85% sensitivity) we can be 95% confident that COVID-19, if it is present, is present at a prevalence of 44 cases per 100,000 or less.

What is the probability that the prevalence of COVID-19 in this population is less than or equal to 10 cases per 100,000?

rsu.sep(N = 193000, n = 7764, pstar = 10 / 100000, se.u = 0.85)
#> [1] 0.4890517

If all of the 7764 individuals returned a negative test we can 48% confident that the prevalence of COVID-19 in the province is less than 10 per 100,000. How many need to be tested to be 95% confident that the prevalence of COVID-19 is less than or equal to 10 cases per 100,000? We return to the sample size functions covered earlier:

rsu.sssep.rs(N = 193000, pstar = 10 / 100000, se.p = 0.95, se.u = 0.85)
#> [1] 31586

To be 95% confident that the prevalence of COVID-19 is less than or equal to 10 cases per 100,000 a total of 31,586 individuals need to be tested.

References

Cameron, AR, and FC Baldock. n.d. “A New Probability Formula for Surveys to Substantiate Freedom from Disease.” Preventive Veterinary Medicine 34: 1–17.

OIE. 2021. “Terrestrial Animal Health Code.” In. Paris: World Organisation for Animal Health.

Thacker, SB, and RL Berkelman. 1988. “Public health surveillance in the United States.” Epidemiological Reviews 10: 164–90.

epiR/inst/doc/epiR_RSurveillance.html0000644000176200001440000004532614166006663017340 0ustar liggesusers epiR - RSurveillance function mapping

epiR - RSurveillance function mapping

Evan Sergeant and Mark Stevenson

2022-01-07

The following tables lists each of the functions in RSurveillance and their equivalent in epiR.

Representative sampling

Sample size estimation

Functions to estimate sample size using representative population sampling data.
Sampling Outcome RSurveillance epiR
Representative Prob disease freedom n.pfree rsu.sspfree.rs
Representative SSe n.freedom rsu.sssep.rs
Two stage representative SSe n.2stage rsu.sssep.rs2st
Representative SSe n.freecalc rsu.sssep.rsfreecalc
Pooled representative SSe n.pooled rsu.sssep.rspool

Estimation of surveillance system sensitivity

Functions to estimate surveillance system sensitivity (SSe) using representative population sampling data.
Sampling Outcome RSurveillance epiR
Representative SSe sep.binom rsu.sep.rs
Representative SSe sep.hypergeo rsu.sep.rs
Representative SSe sep rsu.sep.rs
Two stage representative SSe sep.sys rsu.sep.rs2st
Representative SSe sse.combined rsu.sep.rsmult
Representative SSe sep.freecalc rsu.sep.rsfreecalc
Representative SSe sep.binom.imperfect rsu.sep.rsfreecalc
Pooled representative SSe sep.pooled rsu.sep.rspool
Representative SSe sep.var.se rsu.sep.rsvarse
Representative SSp spp rsu.spp.rs
Representative SSp sph.hp rsu.spp.rs

Estimation of the probability of disease freedom

Functions to estimate the probability of disease freedom using representative population sampling data.
Sampling Outcome RSurveillance epiR
Representative Prob disease of freedom pfree.1 rsu.pfree.rs
Representative Prob disease of freedom pfree.calc rsu.pfree.rs
Representative Equilibrium prob of disease freedom pfree.equ rsu.pfree.equ

Risk based sampling

Sample size estimation

Functions to estimate sample size using risk based sampling data.
Sampling Outcome RSurveillance epiR
Risk-based SSe n.rb rsu.sssep.rbsrg
Risk-based SSe n.rb.varse rsu.sssep.rbmrg
Risk-based SSe n.rb.2stage.1 rsu.sssep.rb2st1rf
Risk-based SSe n.rb.2stage.2 rsu.sssep.rb2st2rf

Estimation of surveillance system sensitivity

Functions to estimate surveillance system sensitivity (SSe) using risk based sampling data.
Sampling Outcome RSurveillance epiR
Risk-based SSe sep.rb.bin.varse rsu.sep.rb
Risk-based SSe sep.rb.bin rsu.sep.rb1rf
Risk-based SSe sep.rb.hypergeo rsu.sep.rb1rf
Risk-based SSe sep.rb2.bin rsu.sep.rb2rf
Risk-based SSe sep.rb2.hypergeo rsu.sep.rb2rf
Risk-based SSe sep.rb.hypergeo.varse rsu.sep.rbvarse
Risk-based SSe sse.rb2stage rsu.sep.rb2stage

Census data

Estimation of surveillance system sensitivity

Functions to estimate surveillance system sensitivity (SSe) using census data.
Sampling Outcome RSurveillance epiR
Risk-based SSe sep.exact rsu.sep.cens

Passive surveillance data

Functions to estimate surveillance system sensitivity (SSe) using passively collected surveillance data.
Sampling Outcome RSurveillance epiR
Risk-based SSe sep.passive rsu.sep.pass

Miscellaneous functions

Miscellaneous functions.
Details RSurveillance epiR
Adjusted risk adj.risk rsu.adjrisk
Adjusted risk adj.risk.sim rsu.adjrisk
Series test interpretation, Se se.series rsu.dxtest
Parallel test interpretation, Se se.parallel rsu.dxtest
Series test interpretation, Sp sp.series rsu.dxtest
Parallel test interpretation, Sp sp.parallel rsu.dxtest
Effective probability of infection epi.calc rsu.epinf
Design prevalence back calculation pstar.calc rsu.pstar
Prob disease is less than design prevalence rsu.sep
epiR/inst/doc/epiR_descriptive.R0000644000176200001440000003065714166006677016346 0ustar liggesusers## ---- echo = FALSE, message = FALSE------------------------------------------- # If you want to create a PDF document paste the following after line 9 above: # pdf_document: # toc: true # highlight: tango # number_sections: no # latex_engine: xelatex # header-includes: # - \usepackage{fontspec} knitr::opts_chunk$set(collapse = T, comment = "#>") options(tibble.print_min = 4L, tibble.print_max = 4L) ## ----message = FALSE---------------------------------------------------------- library(epiR); library(ggplot2); library(scales) ncas <- 4; npop <- 200 tmp <- as.matrix(cbind(ncas, npop)) epi.conf(tmp, ctype = "prevalence", method = "exact", N = 1000, design = 1, conf.level = 0.95) * 100 ## ----------------------------------------------------------------------------- ncas <- 136; ntar <- 22050 tmp <- as.matrix(cbind(ncas, ntar)) epi.conf(tmp, ctype = "inc.rate", method = "exact", N = 1000, design = 1, conf.level = 0.95) * 1000 ## ----------------------------------------------------------------------------- tmp <- epi.betabuster(mode = 0.60, conf = 0.80, greaterthan = TRUE, x = 0.35, conf.level = 0.95, max.shape1 = 100, step = 0.001) tmp$shape1; tmp$shape2 ## ----dfreq01-fig, warnings = FALSE, echo = TRUE, fig.cap="\\label{fig:dfreq01}Frequency histogram of disease prevalence estimates for our simulated population."---- dprob <- rbeta(n = 25, shape1 = tmp$shape1, shape2 = tmp$shape2) dat.df <- data.frame(dprob = dprob) ggplot(data = dat.df, aes(x = dprob)) + theme_bw() + geom_histogram(binwidth = 0.01, colour = "gray", fill = "dark blue", size = 0.1) + scale_x_continuous(limits = c(0,1), name = "Prevalence") + scale_y_continuous(limits = c(0,10), name = "Number of draws") ## ----------------------------------------------------------------------------- dat.df$rname <- paste("Region ", 1:25, sep = "") dat.df$npop <- round(runif(n = 25, min = 20, max = 1500), digits = 0) dat.df$ncas <- round(dat.df$dprob * dat.df$npop, digits = 0) tmp <- as.matrix(cbind(dat.df$ncas, dat.df$npop)) tmp <- epi.conf(tmp, ctype = "prevalence", method = "exact", N = 1000, design = 1, conf.level = 0.95) * 100 dat.df <- cbind(dat.df, tmp) head(dat.df) ## ----------------------------------------------------------------------------- dat.df <- dat.df[sort.list(dat.df$est),] dat.df$rank <- 1:nrow(dat.df) ## ----dfreq02-fig, warnings = FALSE, echo = TRUE, fig.cap="\\label{fig:dfreq02}Ranked error bar plot showing the prevalence of disease (and its 95% confidence interval) for 100 population units."---- ggplot(data = dat.df, aes(x = rank, y = est)) + theme_bw() + geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.1) + geom_point() + scale_x_continuous(limits = c(0,25), breaks = dat.df$rank, labels = dat.df$rname, name = "Region") + scale_y_continuous(limits = c(0,100), name = "Cases per 100 individuals at risk") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) ## ----------------------------------------------------------------------------- n.males <- 100; n.females <- 50 odate <- seq(from = as.Date("2004-07-26"), to = as.Date("2004-12-13"), by = 1) prob <- c(1:100, 41:1); prob <- prob / sum(prob) modate <- sample(x = odate, size = n.males, replace = TRUE, p = prob) fodate <- sample(x = odate, size = n.females, replace = TRUE) dat.df <- data.frame(sex = c(rep("Male", n.males), rep("Female", n.females)), odate = c(modate, fodate)) # Sort the data in order of odate: dat.df <- dat.df[sort.list(dat.df$odate),] ## ----epicurve01-fig, warnings = FALSE, echo = TRUE, fig.cap="\\label{fig:epicurve01}Frequency histogram showing counts of incident cases of disease as a function of calendar date, 26 July to 13 December 2004."---- ggplot(data = dat.df, aes(x = as.Date(odate))) + theme_bw() + geom_histogram(binwidth = 7, colour = "gray", fill = "dark blue", size = 0.1) + scale_x_date(breaks = date_breaks("7 days"), labels = date_format("%d %b"), name = "Date") + scale_y_continuous(breaks = seq(from = 0, to = 20, by = 2), limits = c(0,20), name = "Number of cases") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) ## ----epicurve03-fig, warnings = FALSE, echo = TRUE, fig.cap="\\label{fig:epicurve03}Frequency histogram showing counts of incident cases of disease as a function of time, 26 July to 13 December 2004, conditioned by sex."---- ggplot(data = dat.df, aes(x = as.Date(odate))) + theme_bw() + geom_histogram(binwidth = 7, colour = "gray", fill = "dark blue", size = 0.1) + scale_x_date(breaks = date_breaks("1 week"), labels = date_format("%d %b"), name = "Date") + scale_y_continuous(breaks = seq(from = 0, to = 20, by = 2), limits = c(0,20), name = "Number of cases") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + facet_grid( ~ sex) ## ----epicurve04-fig, warnings = FALSE, echo = TRUE, fig.cap="\\label{fig:epicurve04}Frequency histogram showing counts of incident cases of disease as a function of time, 26 July to 13 December 2004, conditioned by sex. An event that occurred on 31 October 2004 is indicated by the vertical dashed line."---- ggplot(data = dat.df, aes(x = as.Date(odate))) + theme_bw() + geom_histogram(binwidth = 7, colour = "gray", fill = "dark blue", size = 0.1) + scale_x_date(breaks = date_breaks("1 week"), labels = date_format("%d %b"), name = "Date") + scale_y_continuous(breaks = seq(from = 0, to = 20, by = 2), limits = c(0,20), name = "Number of cases") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + facet_grid( ~ sex) + geom_vline(aes(xintercept = as.numeric(as.Date("31/10/2004", format = "%d/%m/%Y"))), linetype = "dashed") ## ----epicurve05-fig, warnings = FALSE, echo = TRUE, fig.cap="\\label{fig:epicurve05}Frequency histogram showing counts of incident cases of disease as a function of time, 26 July to 13 December 2004, grouped by sex."---- ggplot(data = dat.df, aes(x = as.Date(odate), group = sex, fill = sex)) + theme_bw() + geom_histogram(binwidth = 7, colour = "gray", size = 0.1) + scale_x_date(breaks = date_breaks("1 week"), labels = date_format("%d %b"), name = "Date") + scale_y_continuous(breaks = seq(from = 0, to = 20, by = 2), limits = c(0,20), name = "Number of cases") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + geom_vline(aes(xintercept = as.numeric(as.Date("31/10/2004", format = "%d/%m/%Y"))), linetype = "dashed") + scale_fill_manual(values = c("#d46a6a", "#738ca6"), name = "Sex") + theme(legend.position = c(0.90, 0.80)) ## ----epicurve06-fig, warnings = FALSE, echo = TRUE, fig.cap="\\label{fig:epicurve06}Frequency histogram showing counts of incident cases of disease as a function of time, 26 July to 13 December 2004, grouped by sex."---- ggplot(data = dat.df, aes(x = as.Date(odate), group = sex, fill = sex)) + theme_bw() + geom_histogram(binwidth = 7, colour = "gray", size = 0.1, position = "dodge") + scale_x_date(breaks = date_breaks("1 week"), labels = date_format("%d %b"), name = "Date") + scale_y_continuous(breaks = seq(from = 0, to = 20, by = 2), limits = c(0,20), name = "Number of cases") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + geom_vline(aes(xintercept = as.numeric(as.Date("31/10/2004", format = "%d/%m/%Y"))), linetype = "dashed") + scale_fill_manual(values = c("#d46a6a", "#738ca6"), name = "Sex") + theme(legend.position = c(0.90, 0.80)) ## ----------------------------------------------------------------------------- odate <- seq(from = as.Date("1/1/00", format = "%d/%m/%y"), to = as.Date("1/1/05", format = "%d/%m/%y"), by = "1 month") ncas <- round(runif(n = length(odate), min = 0, max = 100), digits = 0) dat.df <- data.frame(odate, ncas) dat.df$dcontrol <- "neg" dat.df$dcontrol[dat.df$odate >= as.Date("1/1/03", format = "%d/%m/%y") & dat.df$odate <= as.Date("1/6/03", format = "%d/%m/%y")] <- "pos" head(dat.df) ## ----epicurve07-fig, warnings = FALSE, echo = TRUE, fig.cap="\\label{fig:epicurve07}Frequency histogram showing counts of incident cases of disease as a function of time, 1 January 2000 to 1 January 2005. Colours indicate the presence or absence of disease control measures."---- ggplot() + theme_bw() + geom_histogram(dat.df, mapping = aes(x = odate, weight = ncas, fill = factor(dcontrol)), binwidth = 60, colour = "gray", size = 0.1) + scale_x_date(breaks = date_breaks("6 months"), labels = date_format("%b %Y"), name = "Date") + scale_y_continuous(limits = c(0,200), name = "Number of cases") + scale_fill_manual(values = c("#738ca6","#d46a6a")) + guides(fill = "none") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) ## ----------------------------------------------------------------------------- cumsum(dat.df$ncas) ## ----epicurve08-fig, warnings = FALSE, echo = TRUE, fig.cap="\\label{fig:epicurve08}Frequency histogram showing counts of incident cases of disease as a function of time, 1 January 2000 to 1 January 2005. Colours indicate the presence or absence of disease control measures. Superimposed on this plot is a line showing cumulative case numbers."---- ggplot() + theme_bw() + geom_histogram(data = dat.df, mapping = aes(x = odate, weight = ncas, fill = factor(dcontrol)), binwidth = 60, colour = "gray", size = 0.1) + geom_line(data = dat.df, mapping = aes(x = odate, y = cumsum(ncas) / 10)) + scale_x_date(breaks = date_breaks("6 months"), labels = date_format("%b %Y"), name = "Date") + scale_y_continuous(limits = c(0,350), name = "Number of cases", sec.axis = sec_axis(~ . * 10, name = "Cumulative number of cases")) + scale_fill_manual(values = c("#738ca6","#d46a6a")) + guides(fill = "none") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) ## ----message = FALSE, warning = FALSE----------------------------------------- library(sf); library(spData); library(rgdal); library(plyr); library(RColorBrewer); library(spatstat) ncsids.sf <- st_read(dsn = system.file("shapes/sids.shp", package = "spData")[1]) ncsids.sf <- ncsids.sf[,c("BIR74","SID74")] head(ncsids.sf) ## ----spatial01-fig, warnings = FALSE, echo = TRUE, fig.cap="\\label{fig:spatial01}Map of North Carolina, USA showing the number of sudden infant death syndrome cases, by county for 1974."---- ggplot() + theme_bw() + geom_sf(data = ncsids.sf, aes(fill = SID74), colour = "dark grey") + scale_fill_gradientn(limits = c(0,60), colours = brewer.pal(n = 5, "Reds"), guide = "colourbar") + scale_x_continuous(name = "Longitude") + scale_y_continuous(name = "Latitude") + labs(fill = "SIDS 1974") ## ----message = FALSE---------------------------------------------------------- data(epi.incin); incin.df <- epi.incin incin.df$status <- factor(incin.df$status, levels = c(0,1), labels = c("Neg", "Pos")) names(incin.df)[3] <- "Status" incin.sf <- st_as_sf(incin.df, coords = c("xcoord","ycoord"), remove = FALSE) st_crs(incin.sf) <- 27700 coppull.ow <- convexhull.xy(x = incin.df[,1], y = incin.df[,2]) coppull.ow <- dilation(coppull.ow, r = 1000) ## ----------------------------------------------------------------------------- coords <- matrix(c(coppull.ow$bdry[[1]]$x, coppull.ow$bdry[[1]]$y), ncol = 2, byrow = FALSE) pol <- Polygon(coords, hole = FALSE) pol <- Polygons(list(pol),1) pol <- SpatialPolygons(list(pol)) coppull.spdf <- SpatialPolygonsDataFrame(Sr = pol, data = data.frame(id = 1), match.ID = TRUE) ## ----------------------------------------------------------------------------- coppull.sf <- as(coppull.spdf, "sf") st_crs(coppull.sf) <- 27700 ## ----------------------------------------------------------------------------- mformat <- function(){ function(x) format(x / 1000, digits = 2) } ## ----spatial02-fig, warnings = FALSE, echo = TRUE, fig.cap="\\label{fig:spatial02}Point map showing the place of residence of individuals diagnosed with laryngeal cancer (Pos) and lung cancer (Neg), Copull Lancashire, UK, 1972 to 1980."---- ggplot() + theme_bw() + geom_sf(data = incin.sf, aes(colour = Status, shape = Status)) + geom_sf(data = coppull.sf, fill = "transparent", colour = "black") + coord_sf(datum = st_crs(coppull.sf)) + scale_colour_manual(values = c("grey","red")) + scale_shape_manual(values = c(1,16)) + scale_x_continuous(name = "Easting (km)", labels = mformat()) + scale_y_continuous(name = "Northing (km)", labels = mformat()) + theme(legend.position = c(0.10, 0.15)) epiR/inst/doc/epiR_surveillance.Rmd0000644000176200001440000011271014110616626017017 0ustar liggesusers--- title: "Design and Analysis of Disease Surveillance Programs Using epiR" author: "Mark Stevenson and Evan Sergeant" date: "`r Sys.Date()`" bibliography: epiR_surveillance.bib link-citations: yes output: knitr:::html_vignette: toc: true vignette: > %\VignetteEngine{knitr::rmarkdown} %\VignetteIndexEntry{Disease surveillance} %\usepackage[utf8]{inputenc} --- \setmainfont{Calibri Light} ```{r, echo = FALSE, message = FALSE} # If you want to create a PDF document paste the following after line 9 above: # pdf_document: # toc: true # highlight: tango # number_sections: no # latex_engine: xelatex # header-includes: # - \usepackage{fontspec} knitr::opts_chunk$set(collapse = TRUE, comment = "#>") options(tibble.print_min = 4L, tibble.print_max = 4L) ``` Surveillance is defined as the on-going systematic collection, collation and interpretation of accurate information about a defined population with respect to disease and/or infection, closely integrated with timely dissemination of that information to those responsible for control and prevention measures [@thacker_berkelman:1988]. The Terrestrial Animal Health Code of the World Organisation of Animal Health [@oie:2021] defines surveillance as the investigation of a given population or subpopulation to detect the presence of a pathogenic agent or disease; the frequency and type of surveillance will be determined by the epidemiology of the pathogenic agent or disease, and the desired outputs. Surveillance is a tool for monitoring changes in health related events in a defined population with specific goals relating to: (1) the detection of disease incursions, both new and emerging, (2) the assessment of progress in terms of control or eradication of selected diseases and pathogens, (3) demonstration of disease freedom for trading partners, and (4) identification of hazards or risk factors for disease outbreaks. This vignette provides instruction on the way R and `epiR` (and specifically the surveillance functions within `epiR`) can be used for: (1) the design of disease surveillance programs; and (2) the design of programs to provide a quantitative basis for claims for disease freedom. ## Definitions **Design prevalence**. The design prevalence (minimum detectable prevalence, maximum acceptable or permissible prevalence, and minimum expected prevalence) is a fixed value for prevalence used for testing the hypothesis that disease is present in a population of interest. The null hypothesis is that disease is present in the population at a prevalence equal to or greater than the design prevalence. If a sufficient number of samples are collected and all return a negative result we may reject the null hypothesis and accept the alternative hypothesis to conclude that the prevalence is less than the design prevalence. A design prevalence is not related to any actual prevalence of disease in the population under study. It is not subject to uncertainty or variability and therefore doesn't need to be described using a distribution. *Cluster-level design prevalence* refers to a design prevalence assigned at the cluster (e.g. village, herd or household) level. *Unit-level design prevalence* refers to a design prevalence assigned at the individual unit (e.g. cow, sheep, bird) level. The unit-level prevalence of disease can be applied either within clusters (e.g. herds, flocks, villages) or across broader, unclustered populations (e.g. human populations or wildlife). **Surveillance system**. A surveillance system is a set of procedures to collect, collate and interpret information about a defined population with respect to disease. Most surveillance systems are comprised of several activities (e.g. on-farm testing, abattoir surveillance, disease hotlines) called **surveillance system components**. Each surveillance system component is comprised of **surveillance system units**. Surveillance system units are the individual items that get examined within each surveillance system component. For the surveillance system components listed above (on-farm testing, abattoir surveillance, disease hotlines) the corresponding surveillance units would be individual animals, carcasses and individual phone reports, respectively. **Unit sensitivity**. Unit sensitivity is defined as the average probability that a unit (selected from those processed) will return a positive surveillance outcome, given that disease is present in the population at a level equal to or greater than a specified design prevalence. **Component sensitivity**. Component sensitivity (CSe) is defined as the average probability that a surveillance system component will return a positive surveillance outcome, given disease is present in the population at a level equal to or greater than the specified design prevalence. **Surveillance system sensitivity**. Surveillance system sensitivity (SSe) is defined as the average probability that a surveillance system (as a whole) will return a positive surveillance outcome, given disease is present in the population at a level equal to or greater than a specified design prevalence. ## An approach for thinking about surveillance design and assessment The first thing to consider when you're designing or assessing a surveillance program is to consider the sampling method that will be used. If a surveillance program has been designed to detect a specific pathogen sampling will usually be either **representative** or **risk-based**. Other options include the situation where you might observe every individual in a population (a **census**) or where you might take no active steps to collect surveillance data but instead rely on stakeholders to report their observations on a voluntary basis (**passive surveillance**). Once we have the method of sampling defined we then move on to think about the different tasks that need to be done in terms of design of the actual surveillance system and finally, how we might assess the surveillance system once it has been designed and implemented. In terms of design, once you have specified the sampling method you need to determine how many surveillance system units will be sampled (usually to achieve a defined surveillance system sensitivity). Once samples have been collected and tested or if you are making an assessment of an existing surveillance system, we then might want to answer the question: if the disease of interest is actually present in the population what is the chance that the surveillance system will actually detect it? This question can be expressed in another three other ways: (1) What is the surveillance system sensitivity? or (2) What is the probability that the prevalence of disease is less than the specified design prevalence? or (3) What is the surveillance system's negative predictive value? The remainder of this vignette follows this general structure. For each sampling method (representative, risk-based, census and passive) we provide notes and examples on the use of `epiR` for sample size estimation, estimation of surveillance system sensitivity and estimation of the probability of disease freedom. While 'estimation of the probability of disease freedom' is the name assigned to the last group of analyses a more correct label would be 'estimation of the probability that the prevalence of disease is less than a specified design prevalence' (i.e. the negative predictive value of the surveillance system). Be aware that we can only truly demonstrate disease freedom if every member of the population at risk is assessed using a test with perfect diagnostic sensitivity and perfect diagnostic specificity. ## Representative sampling ### Sample size estimation The sample size functions for surveillance representative sampling in `epiR` fall into two classes: sampling to achieve a defined probability of disease freedom and sampling to achieve a defined surveillance system sensitivity. The surveillance system sensitivity sample size functions include those for simple random sampling and two stage sampling. Two stage sampling is the preferred (indeed, the only practical approach) when a population is organised in clusters (e.g. cows within herds, households within villages). With two stage sampling clusters (herds, villages) are sampled first and then from within each selected cluster individual surveillance units are sampled. ```{r ssrs.tab, echo=FALSE, message=FALSE, warnings=FALSE, results='asis'} library(pander) panderOptions('table.split.table', Inf) # panderOptions('table.alignment.default', function(df) ifelse(sapply(df, is.numeric), 'right', 'left')) set.caption("Functions to estimate sample size using representative population sampling data.") ssrs.tab <- " Sampling | Outcome | Details | Function Representative | Prob disease freedom | Imperfect Se, perfect Sp | `rsu.sspfree.rs` Representative | SSe | Imperfect Se, perfect Sp | `rsu.sssep.rs` Two stage representative | SSe | Imperfect Se, perfect Sp | `rsu.sssep.rs2st` Representative | SSe | Imperfect Se, imperfect Sp, known N | `rsu.sssep.rsfreecalc` Pooled representative | SSe | Imperfect Se, imperfect Sp | `rsu.sssep.rspool`" ssrs.df <- read.delim(textConnection(ssrs.tab), header = FALSE, sep = "|", strip.white = TRUE, stringsAsFactors = FALSE) names(ssrs.df) <- unname(as.list(ssrs.df[1,])) # put headers on ssrs.df <- ssrs.df[-1,] # remove first row row.names(ssrs.df) <- NULL pander(ssrs.df, style = 'rmarkdown') ``` *EXAMPLE 1* A cross-sectional study is to be carried out to confirm the absence of brucellosis in dairy herds using a bulk milk tank test assuming a design prevalence of 5%. Assume the total number of dairy herds in your study area is unknown and large and the bulk milk tank test to be used has a diagnostic sensitivity of 0.95 and a specificity of 1.00. How many herds need to be sampled to achieve a system sensitivity of 95%? That is, what is the probability that disease will be detected if it is present in the population at the designated design prevalence? ```{r message = FALSE} library(epiR) rsu.sssep.rs(N = NA, pstar = 0.05, se.p = 0.95, se.u = 0.95) ``` A total of 62 herds need to be sampled and tested. This question can be asked in another way. If our prior estimate of the probability that the population of herds is free of disease is 0.50 and we believe that there's a 1% chance of disease being introduced into the population during the next time period, how many herds need to be sampled to be 95% confident that disease is absent (i.e. less than the design prevalence) if all tests are negative? ```{r message = FALSE} rsu.sspfree.rs(N = NA, prior = 0.50, p.intro = 0.01, pstar = 0.05, pfree = 0.95, se.u = 0.95) ``` A total of 61 herds need to be sampled (similar to the value calculated above). Note that function `rsu.sssep.rs` returns the sample size to achieve a desired surveillance system sensitivity ('what's the probability that disease will be detected?'). Function `rsu.sspfree.rs` returns the sample size to achieve a desired (posterior) probability of disease freedom. Now assume that it is known that there are 500 dairy herds in your study area. Revise your sample size estimate to achieve the desired surveillance system sensitivity in light of this new information. ```{r message = FALSE} rsu.sssep.rs(N = 500, pstar = 0.05, se.p = 0.95, se.u = 0.95) ``` A total of 60 herds need to be sampled and tested. The sample size calculations presented so far assume the use of a test with perfect specificity (that is, if a sample returns a positive result we can be 100% certain that the herd is positive and disease is actually present in the population). Consider the situation where a test with imperfect specificity is used. Imperfect specificity presents problems for disease freedom surveys. If a positive test result is returned, how sure can we be that it is a true positive as opposed to a false positive? The `rsu.ss.rsfreecalc` function returns the required sample size to confirm the absence of disease using a test with imperfect diagnostic sensitivity and specificity based on the methodology implemented in the standalone software 'Freecalc' [@cameron_baldock:1998a]. *EXAMPLE 2* We'll continue with the brucellosis example introduced above. Imagine the test we're using has a diagnostic sensitivity of 0.95 (as before) but this time it has a specificity of 0.98. How many herds need to be sampled to be 95% certain that the prevalence of brucellosis in dairy herds is less than the design prevalence if less than a specified number of tests return a positive result? ```{r message = FALSE} rsu.sssep.rsfreecalc(N = 5000, pstar = 0.05, mse.p = 0.95, msp.p = 0.95, se.u = 0.95, sp.u = 0.98, method = "hypergeometric", max.ss = 32000)$summary ``` A population sensitivity of 95% is achieved with a total sample size of 194 herds, assuming a cut-point of 7 or more positive herds are required to return a positive survey result. Note the substantial increase in sample size when diagnostic specificity is imperfect (194 herds when specificity is 0.98 compared with 63 when specificity is 1.00). The relatively low design prevalence in combination with imperfect imperfect specificity means that false positives are more likely to be a problem in this population so the number tested needs to be (substantially) increased. Increase the design prevalence to 0.10 to see its effect on estimated sample size. ```{r message = FALSE} rsu.sssep.rsfreecalc(N = 5000, pstar = 0.10, mse.p = 0.95, msp.p = 0.95, se.u = 0.95, sp.u = 0.98, method = "hypergeometric", max.ss = 32000)$summary ``` The required sample size decreases to 66 and the cut-point to 3 positives due to: (1) the expected reduction in the number of false positives; and (2) the greater difference between true and false positive rates in the first example compared with the second. Now consider the situation where individual surveillance units (e.g. animals) are aggregated within groups called 'clusters' (e.g. herds). With this type of system **two-stage cluster sampling** is a commonly used approach for disease surveillance studies. With two stage cluster sampling herds (clusters) are sampled first and then individual surveillance units are then sampled from each sampled cluster. This means that we have two sample sizes to calculate: the number of clusters and the number of surveillance units from within each sampled cluster. *EXAMPLE 3* For this example we assume that there are 20,000 at risk herds in our population and we do not know the number of animals present in each herd. This disease is not very common among herds but if a herd is positive the prevalence is relatively high, so we set the herd-level design prevalence to 0.005 and the within-herd design prevalence to 0.05. The test we will use at the surveillance unit level has a diagnostic sensitivity of 0.90 and a diagnostic specificity of 1.00. The target sensitivity of disease detection at the herd level is 0.95 and the target sensitivity of disease detection at the population level is the same, 0.95. How many herds need to be sampled if you want to be 95% certain of detecting at least one infected herd if that the between-herd prevalence of disease is greater than or equal to 0.005? ```{r message = FALSE} rsu.sssep.rs(N = 20000, pstar = 0.005, se.p = 0.95, se.u = 0.95) ``` We need to sample a total of 622 herds. How many animals need to be sampled from each herd if you want to be 95% certain of detecting at least one infected animal if the within-herd prevalence of disease is greater than or equal to 0.05? ```{r message = FALSE} rsu.sssep.rs(N = NA, pstar = 0.05, se.p = 0.95, se.u = 0.90) ``` Within each selected herd we need to sample at least 66 animals. As an alternative we can calculate the required number of herds to sample and the required number of animals to sample from each herd in a single step using the function `rsu.sssep.rs2stage`: ```{r message = FALSE} rsu.sssep.rs2st(H = 20000, N = NA, pstar.c = 0.005, pstar.u = 0.05, se.p = 0.95, se.c = 0.95, se.u = 0.90) ``` ### Estimation of surveillance system sensitivity and specificity ```{r seprs.tab, echo=FALSE, message=FALSE, warnings=FALSE, results='asis'} set.caption("Functions to estimate surveillance system sensitivity (SSe) using representative population sampling data.") seprs.tab <- " Sampling | Outcome | Details | Function Representative | SSe | Imperfect Se, perfect Sp | `rsu.sep.rs` Two stage representative | SSe | Imperfect Se, perfect Sp | `rsu.sep.rs2st` Representative | SSe | Imperfect Se, perfect Sp, multiple components | `rsu.sep.rsmult` Representative | SSe | Imperfect Se, imperfect Sp | `rsu.sep.rsfreecalc` Pooled representative | SSe | Imperfect Se, perfect Sp | `rsu.sep.rspool` Representative | SSe | Imperfect Se, perfect Sp | `rsu.sep.rsvarse` Representative | SSp | Imperfect Sp | `rsu.spp.rs`" seprs.df <- read.delim(textConnection(seprs.tab), header = FALSE, sep = "|", strip.white = TRUE, stringsAsFactors = FALSE) names(seprs.df) <- unname(as.list(seprs.df[1,])) # put headers on seprs.df <- seprs.df[-1,] # remove first row row.names(seprs.df) <- NULL pander(seprs.df, style = 'rmarkdown') ``` *EXAMPLE 4* Three hundred samples are to be tested from a population of animals to confirm the absence of disease. The total size of the population is unknown. Assuming a design prevalence of 0.01 and a test with diagnostic sensitivity of 0.95 will be used what is the surveillance system sensitivity? That is, what is the probability that disease will be detected if it is present in the population at or above the specified design prevalence? ```{r message = FALSE} rsu.sep.rs(N = NA, n = 300, pstar = 0.01, se.u = 0.95) ``` The probability that this surveillance strategy will detect disease if it is present in the population at or above the specified design prevalence (the surveillance system sensitivity) is 0.943. *EXAMPLE 5* Thirty animals from five herds ranging in size from 80 to 100 head are to be sampled to confirm the absence of a disease. Assuming a design prevalence of 0.01 and a test with diagnostic sensitivity of 0.95 will be used, what is the sensitivity of disease detection for each herd? ```{r message = FALSE} N <- seq(from = 80, to = 100, by = 5) n <- rep(30, times = length(N)) herd.sep <- rsu.sep.rs(N = N, n = n, pstar = 0.01, se.u = 0.95) sort(round(herd.sep, digits = 2)) ``` The sensitivity of disease detection for each herd ranges from 0.28 to 0.36. *EXAMPLE 6* Assume 73 samples were tested at two different labs, using different tests. Laboratory 1 tested 50 samples with the standard test which has a diagnostic sensitivity of 0.80. Laboratory 2 tested the remaining 23 samples with a different test which has a diagnostic sensitivity of 0.70. What is the surveillance system sensitivity of disease detection if we set the design prevalence to 0.05? ```{r message = FALSE} # Diagnostic test sensitivities and the number of samples tested at each laboratory: se.t1 <- 0.80; se.t2 <- 0.70 n.lab1 <- 50; n.lab2 <- 23 # Create a vector of test sensitivities for each sample: se.all <- c(rep(se.t1, times = n.lab1), rep(se.t2, times = n.lab2)) rsu.sep.rsvarse(N = n.lab1 + n.lab2, pstar = 0.05, se.u = se.all) ``` If the design prevalence is 0.05 the estimated surveillance system sensitivity is 0.997. ### Estimation of the probability of disease freedom ```{r pfreers.tab, echo=FALSE, message=FALSE, warnings=FALSE, results='asis'} set.caption("Functions to estimate the probability of disease freedom using representative population sampling data.") pfreers.tab <- " Sampling | Outcome | Details | Function Representative | Prob disease of freedom | Imperfect Se, perfect Sp | `rsu.pfree.rs` Representative | Equilibrium prob of disease freedom | Imperfect Se, perfect Sp | `rsu.pfree.equ`" pfreers.df <- read.delim(textConnection(pfreers.tab), header = FALSE, sep = "|", strip.white = TRUE, stringsAsFactors = FALSE) names(pfreers.df) <- unname(as.list(pfreers.df[1,])) # put headers on pfreers.df <- pfreers.df[-1,] # remove first row row.names(pfreers.df) <- NULL pander(pfreers.df, style = 'rmarkdown') ``` *EXAMPLE 7* You are the epidemiologist for a land-locked country in central Asia. You have developed a surveillance program for a given disease which has an estimated system sensitivity of 0.65. The disease of interest is carried by live animals and you know that the frequency of illegal importation of animals into your country (and therefore the likelihood of disease incursion) is higher during the warmer months of the year (June to August). Plot the probability of disease freedom assuming surveillance testing is carried out each month. Include on your plot the probability of disease incursion to show how it changes during the year. Previous surveillance work indicates that the probability that your country is free of disease is 0.50. ```{r message = FALSE} library(ggplot2); library(lubridate); library(scales) # Define a vector disease incursion probabilities (January to December): p.intro <- c(0.01,0.01,0.01,0.02,0.04,0.10,0.10,0.10,0.08,0.06,0.04,0.02) rval.df <- rsu.pfree.rs(se.p = rep(0.65, times = 12), p.intro = p.intro, prior = 0.50, by.time = TRUE) # Re-format rval.df ready for for ggplot2: dat.df <- data.frame(mnum = rep(1:12, times = 2), mchar = rep(seq(as.Date("2020/1/1"), by = "month", length.out = 12), times = 2), class = c(rep("Disease introduction", times = length(p.intro)), rep("Disease freedom", times = length(p.intro))), prob = c(rval.df$PIntro, rval.df$PFree)) # Plot the results: ggplot(data = dat.df, aes(x = mchar, y = prob, group = class, col = class)) + theme_bw() + geom_point() + geom_line() + scale_colour_manual(values = c("red", "dark blue")) + scale_x_date(breaks = date_breaks("1 month"), labels = date_format("%b"), name = "Month") + scale_y_continuous(limits = c(0,1), name = "Probability") + geom_hline(aes(yintercept = 0.95), linetype = "dashed", col = "blue") + guides(col = guide_legend(title = "")) + theme(legend.position = c(0.8, 0.5)) ``` ## Risk-based sampling With risk-based sampling we modify the intensity of sampling effort across the population of interest according to risk (as opposed to representative sampling where the probability that an individual unit is sampled is uniform across the population of interest). When our objective is to detect the presence of disease risk-based sampling makes intuitive sense: we concentrate our search effort on those sections of the population where we believe we are more likely to detect disease (i.e. where the risk of disease is high). ### How many samples do I need? The sample size functions all relate to sampling to achieve a defined surveillance system sensitivity. ```{r ssrb.tab, echo=FALSE, message=FALSE, warnings=FALSE, results='asis'} set.caption("Functions to estimate sample size using risk based sampling data.") ssrb.tab <- " Sampling | Outcome | Details | Function Risk-based | SSe | Single Se for risk groups, perfect Sp | `rsu.sssep.rbsrg` Risk-based | SSe | Multiple Se within risk groups, perfect Sp | `rsu.sssep.rbmrg` Risk-based | SSe | Two stage sampling, 1 risk factor | `rsu.sssep.rb2st1rf` Risk-based | SSe | Two stage sampling, 2 risk factors | `rsu.sssep.rb2st2rf`" ssrb.df <- read.delim(textConnection(ssrb.tab), header = FALSE, sep = "|", strip.white = TRUE, stringsAsFactors = FALSE) names(ssrb.df) <- unname(as.list(ssrb.df[1,])) # put headers on ssrb.df <- ssrb.df[-1,] # remove first row row.names(ssrb.df) <- NULL pander(ssrb.df, style = 'rmarkdown') ``` *EXAMPLE 8* You are working with a disease of cattle where the prevalence of disease is believed to vary according to herd type. The risk of disease is 5 times greater in dairy herds and 3 times greater in mixed herds compared with the reference category, beef herds. The distribution of dairy, mixed and beef herds in the population of interest is 0.10, 0.10 and 0.80, respectively. Assume you intend to distribute your sampling effort 0.4, 0.4 and 0.2 across dairy, mixed and beef herds, respectively. Within each of the three risk groups a single test with a diagnostic sensitivity of 0.95 will be used. How many herds need to be sampled if you want to achieve 95% system sensitivity for a prevalence of disease in the population of greater than or equal to 1%? ```{r message = FALSE} # Matrix listing the proportions of samples for each test in each risk group (the number of rows equal the number of risk groups, the number of columns equal the number of tests): m <- rbind(1,1,1) rsu.sssep.rbmrg(pstar = 0.01, rr = c(5,3,1), ppr = c(0.1,0.1,0.8), spr = c(0.4,0.4,0.2), spr.rg = m, se.p = 0.95, se.u = 0.95) ``` A total of 147 herds need to be sampled: 59 dairy, 59 mixed and 29 beef herds. Now assume that one of two tests will be used for each herd. The first test has a diagnostic sensitivity of 0.92. The second test has a diagnostic sensitivity of 0.80. The proportion of dairy, mixed and beef herds receiving the first test is 0.80, 0.50 and 0.70, respectively (which means that 0.20, 0.50 and 0.30 receive the second test, respectively). Recalculate the sample size. ```{r message = FALSE} # Matrix listing the proportions of samples for each test in each risk group (the number of rows equal the number of risk groups, the number of columns equal the number of tests): m <- rbind(c(0.8,0.2), c(0.5,0.5), c(0.7,0.3)) rsu.sssep.rbmrg(pstar = 0.01, rr = c(5,3,1), ppr = c(0.1,0.1,0.8), spr = c(0.4,0.4,0.2), spr.rg = m, se.p = 0.95, se.u = c(0.92,0.80)) ``` A total of 159 herds need to be sampled: 64 dairy, 64 mixed and 31 beef herds. *EXAMPLE 9* A cross-sectional study is to be carried out to confirm the absence of disease using risk based sampling. Assume a population level design prevalence of 0.01 and there are 'high', 'medium' and 'low' risk areas where the risk of disease in the high risk area compared with the low risk area is 5 and the risk of disease in the medium risk area compared with the low risk area is 3. The proportions of the population at risk in the high, medium and low risk area are 0.10, 0.10 and 0.80, respectively. Half of your samples will be taken from individuals in the high risk area, 0.30 from the medium risk area and 0.20 from the low risk area. You intend to use a test with diagnostic sensitivity of 0.90 and you'd like to take sufficient samples to return a population sensitivity of 0.95. How many units need to be sampled to meet the requirements of the study? ```{r message = FALSE} rsu.sssep.rbsrg(pstar = 0.01, rr = c(5,3,1), ppr = c(0.10,0.10,0.80), spr = c(0.50,0.30,0.20), se.p = 0.95, se.u = 0.90) ``` A total of 147 units needs to be sampled to meet the requirements of the study: 74 from the high risk area, 45 from the medium risk area and 28 from the low risk area. *EXAMPLE 10* A cross-sectional study is to be carried out to confirm the absence of disease using risk based sampling. Assume a design prevalence of 0.02 at the cluster (herd) level and a design prevalence of 0.10 at the surveillance unit (individual animal) level. Clusters are categorised as being either high, medium or low risk with the probability of disease for clusters in the high and medium risk area 5 and 3 times the probability of disease in the low risk area. The proportions of clusters in the high, medium and low risk area are 0.10, 0.20 and 0.70, respectively. The proportion of samples from the high, medium and low risk area will be 0.40, 0.40 and 0.20, respectively. Surveillance units (individual animals) are categorised as being either high or low risk with the probability of disease for units in the high risk group 4 times the probability of disease in the low risk group. The proportions of units in the high and low risk groups are 0.10 and 0.90, respectively. All of your samples will be taken from units in the high risk group. You intend to use a test with diagnostic sensitivity of 0.95 and you'd like to take sufficient samples to be 95% certain that you've detected disease at the population level, 95% certain that you've detected disease at the cluster level and 95% at the surveillance unit level. How many clusters and how many units need to be sampled to meet the requirements of the study? ```{r message = FALSE} rsu.sssep.rb2st2rf( rr.c = c(5,3,1), ppr.c = c(0.10,0.20,0.70), spr.c = c(0.40,0.40,0.20), pstar.c = 0.02, rr.u = c(4,1), ppr.u = c(0.1, 0.9), spr.u = c(1,0), pstar.u = 0.10, se.p = 0.95, se.c = 0.95, se.u = 0.95) ``` A total of 82 clusters needs to be sampled: 33 from the high risk area, 33 from the medium risk area and 16 from the low risk area. A total of 9 units should be sampled from each cluster. ### Surveillance system sensitivity ```{r seprb.tab, echo=FALSE, message=FALSE, warnings=FALSE, results='asis'} set.caption("Functions to estimate surveillance system sensitivity using risk based sampling data.") ssrb.tab <- " Sampling | Outcome | Details | Function Risk-based | SSe | Varying Se, perfect Sp | `rsu.sep.rb` Risk-based | SSe | Varying Se, perfect Sp, one risk factor | `rsu.sep.rb1rf` Risk-based | SSe | Varying Se, perfect Sp, two risk factors | `rsu.sep.rb2rf`" ssrb.df <- read.delim(textConnection(ssrb.tab), header = FALSE, sep = "|", strip.white = TRUE, stringsAsFactors = FALSE) names(ssrb.df) <- unname(as.list(ssrb.df[1,])) # put headers on ssrb.df <- ssrb.df[-1,] # remove first row row.names(ssrb.df) <- NULL pander(ssrb.df, style = 'rmarkdown') ``` *EXAMPLE 11* You have been asked to provide an assessment of a surveillance program for Actinobacillus hyopneumoniae in pigs. It is known that there are high risk and low risk areas for A. hypopneumoniae in your country with the estimated probability of disease in the high risk area thought to be around 3.5 times that of the probability of disease in the low risk area. It is known that 10% of the 1784 pig herds in the study area are in the high risk area and 90% are in the low risk area. The risk of A. hypopneumoniae is dependent on age, with adult pigs around five times more likely to be A. hypopneumoniae positive compared with younger (grower) pigs. Pigs from 20 herds have been sampled: 5 from the low-risk area and 15 from the high-risk area. All of the tested pigs were adults: no grower pigs were tested. The ELISA for A. hypopneumoniae in pigs has a diagnostic sensitivity of 0.95. What is the surveillance system sensitivity if we assume a design prevalence of 1 per 100 at the cluster (herd) level and 5 per 100 at the surveillance system unit (pig) level? ```{r message = FALSE} # There are 1784 herds in the study area: H <- 1784 # Twenty of the 1784 herds are sampled. Generate 20 herds of varying size: set.seed(1234) hsize <- rlnorm(n = 20, meanlog = log(10), sdlog = log(8)) hsize <- round(hsize + 20, digits = 0) # Generate a matrix listing the number of growers and finishers in each of the 20 sampled herds. # Assume that anywhere between 80% and 95% of the pigs in each herd are growers: set.seed(1234) pctg <- runif(n = 20, min = 0.80, max = 0.95) ngrow <- round(pctg * hsize, digits = 0) nfini <- hsize - ngrow N <- cbind(ngrow, nfini) # Generate a matrix listing the number of grower and finisher pigs sampled from each herd. Fifteen pigs from each herd are sampled. If there's less than 15 pigs we sample the entire herd: nsgrow <- rep(0, times = 20) nsfini <- ifelse(nfini <= 15, nfini, 15) n <- cbind(nsgrow, nsfini) # The herd-level design prevalence is 0.01 and the individual pig-level design prevalence is 0.05: pstar.c <- 0.01 pstar.u <- 0.05 # For herds in the high-risk area the probability being A. hyopneumoniae positive is 3.5 times that of herds in the low-risk area. Ninety percent of herds are in the low risk area and 10% are in the high risk area: rr.c <- c(3.5,1) ppr.c <- c(0.1,0.9) # We've sampled 15 herds from the high risk area and 5 herds from the low risk area. Above, for vector rr.c, the relative risk for the high risk group is listed first so the vector rg follows this order: rg <- c(rep(1, times = 15), rep(2, times = 5)) # The probability being A. hyopneumoniae positive for finishers is 5 times that of growers. For the matrices N and n growers are listed first then finishers. Vector rr.u follows the same order: rr.u <- c(1,5) # The diagnostic sensitivity of the A. hyopneumoniae ELISA is 0.95: se.u <- 0.95 rsu.sep.rb2st(H = H, N = N, n = n, pstar.c = pstar.c, pstar.u = pstar.u, rg = rg, rr.c = rr.c, rr.u = rr.u, ppr.c = ppr.c, ppr.u = NA, se.u = se.u) ``` The estimated surveillance system sensitivity of this program is 0.32. Repeat these analyses assuming we don't know the total number of pig herds in the population and we have only an estimate of the proportions of growers and finishers in each herd. ```{r message = FALSE} # Generate a matrix listing the proportion of growers and finishers in each of the 20 sampled herds: ppr.u <- cbind(rep(0.9, times = 20), rep(0.1, times = 20)) # Set H (the number of clusters) and N (the number of surveillance units within each cluster) to NA: rsu.sep.rb2st(H = NA, N = NA, n = n, pstar.c = pstar.c, pstar.u = pstar.u, rg = rg, rr.c = rr.c, rr.u = rr.u, ppr.c = ppr.c, ppr.u = ppr.u, se.u = se.u) ``` The estimated surveillance system sensitivity is 0.21. ## Analysis of passive surveillance data ### Estimation of surveillance system sensitivity and specificity *EXAMPLE 12* There are four 'steps' in a (passive) disease detection process for disease X in your country: (1) an infected animal shows clinical signs of disease; (2) a herd manager observes clinical signs in a disease animal and calls a veterinarian; (3) a veterinarian responds appropriately to the disease investigation request (taking, for example, appropriate samples for laboratory investigation); and (4) the laboratory conducts appropriate tests on the submitted samples and interprets the results of those tests correctly. The probabilities for each step in the disease detection pathway (in order) are 0.10, 0.20, 0.90 and 0.99, respectively. Assuming the probability that a unit actually has disease if it is submitted for testing is 0.98, the sensitivity of the diagnostic test used at the unit level is 0.90, the population is comprised of 1000 clusters (herds), five animals from each cluster (herd) investigated for disease are tested and the cluster-level design prevalence is 0.01, what is the sensitivity of disease detection at the cluster (herd) and population level? ```{r message = FALSE} rsu.sep.pass(step.p = c(0.10,0.20,0.90,0.99), pstar.c = 0.01, p.inf.u = 0.98, N = 1000, n = 5, se.u = 0.90) ``` The sensitivity of disease detection at the cluster (herd) level is 0.018. The sensitivity of disease detection at the population level is 0.16. ## Miscellaneous functions ### Adjusted relative risks *EXAMPLE 13* For a given disease of interest you believe that there is a 'high risk' and 'low risk' area in your country. The risk of disease in the high-risk area compared with the low-risk area is 5. A recent census shows that 10% of the population are resident in the high-risk area and 90% are resident in the low-risk area. Calculate the adjusted relative risks for each area. ```{r message = FALSE} rsu.adjrisk(rr = c(5,1), ppr = c(0.10,0.90)) ``` The adjusted relative risks for the high and low risk areas are 3.6 and 0.7, respectively. ### Design prevalence back calculation *EXAMPLE 14* The population size in a provincial area in your country is 193,000. In a given two-week period a total of 7764 individuals have been tested for COVID-19 using an approved PCR which is believed to have a diagnostic sensitivity of 0.85. All of the individuals tested have returned a negative result. What is the maximum prevalence required to provide system sensitivity of 0.95 if COVID-19 is actually present in this population (i.e. what is the back-calculated design prevalence)? Express your result as the number of COVID-19 cases per 100,000 head of population. ```{r message = FALSE} rsu.pstar(N = 193000, n = 7764, se.p = 0.95, se.u = 0.85) * 100000 ``` If the 7764 individuals have all returned a negative test result (using a test with 85% sensitivity) we can be 95% confident that COVID-19, if it is present, is present at a prevalence of 44 cases per 100,000 or less. What is the probability that the prevalence of COVID-19 in this population is less than or equal to 10 cases per 100,000? ```{r message = FALSE} rsu.sep(N = 193000, n = 7764, pstar = 10 / 100000, se.u = 0.85) ``` If all of the 7764 individuals returned a negative test we can 48% confident that the prevalence of COVID-19 in the province is less than 10 per 100,000. How many need to be tested to be 95% confident that the prevalence of COVID-19 is less than or equal to 10 cases per 100,000? We return to the sample size functions covered earlier: ```{r message = FALSE} rsu.sssep.rs(N = 193000, pstar = 10 / 100000, se.p = 0.95, se.u = 0.85) ``` To be 95% confident that the prevalence of COVID-19 is less than or equal to 10 cases per 100,000 a total of 31,586 individuals need to be tested. ## References epiR/inst/doc/epiR_surveillance.R0000644000176200001440000003312214166006710016473 0ustar liggesusers## ---- echo = FALSE, message = FALSE------------------------------------------- # If you want to create a PDF document paste the following after line 9 above: # pdf_document: # toc: true # highlight: tango # number_sections: no # latex_engine: xelatex # header-includes: # - \usepackage{fontspec} knitr::opts_chunk$set(collapse = TRUE, comment = "#>") options(tibble.print_min = 4L, tibble.print_max = 4L) ## ----ssrs.tab, echo=FALSE, message=FALSE, warnings=FALSE, results='asis'------ library(pander) panderOptions('table.split.table', Inf) # panderOptions('table.alignment.default', function(df) ifelse(sapply(df, is.numeric), 'right', 'left')) set.caption("Functions to estimate sample size using representative population sampling data.") ssrs.tab <- " Sampling | Outcome | Details | Function Representative | Prob disease freedom | Imperfect Se, perfect Sp | `rsu.sspfree.rs` Representative | SSe | Imperfect Se, perfect Sp | `rsu.sssep.rs` Two stage representative | SSe | Imperfect Se, perfect Sp | `rsu.sssep.rs2st` Representative | SSe | Imperfect Se, imperfect Sp, known N | `rsu.sssep.rsfreecalc` Pooled representative | SSe | Imperfect Se, imperfect Sp | `rsu.sssep.rspool`" ssrs.df <- read.delim(textConnection(ssrs.tab), header = FALSE, sep = "|", strip.white = TRUE, stringsAsFactors = FALSE) names(ssrs.df) <- unname(as.list(ssrs.df[1,])) # put headers on ssrs.df <- ssrs.df[-1,] # remove first row row.names(ssrs.df) <- NULL pander(ssrs.df, style = 'rmarkdown') ## ----message = FALSE---------------------------------------------------------- library(epiR) rsu.sssep.rs(N = NA, pstar = 0.05, se.p = 0.95, se.u = 0.95) ## ----message = FALSE---------------------------------------------------------- rsu.sspfree.rs(N = NA, prior = 0.50, p.intro = 0.01, pstar = 0.05, pfree = 0.95, se.u = 0.95) ## ----message = FALSE---------------------------------------------------------- rsu.sssep.rs(N = 500, pstar = 0.05, se.p = 0.95, se.u = 0.95) ## ----message = FALSE---------------------------------------------------------- rsu.sssep.rsfreecalc(N = 5000, pstar = 0.05, mse.p = 0.95, msp.p = 0.95, se.u = 0.95, sp.u = 0.98, method = "hypergeometric", max.ss = 32000)$summary ## ----message = FALSE---------------------------------------------------------- rsu.sssep.rsfreecalc(N = 5000, pstar = 0.10, mse.p = 0.95, msp.p = 0.95, se.u = 0.95, sp.u = 0.98, method = "hypergeometric", max.ss = 32000)$summary ## ----message = FALSE---------------------------------------------------------- rsu.sssep.rs(N = 20000, pstar = 0.005, se.p = 0.95, se.u = 0.95) ## ----message = FALSE---------------------------------------------------------- rsu.sssep.rs(N = NA, pstar = 0.05, se.p = 0.95, se.u = 0.90) ## ----message = FALSE---------------------------------------------------------- rsu.sssep.rs2st(H = 20000, N = NA, pstar.c = 0.005, pstar.u = 0.05, se.p = 0.95, se.c = 0.95, se.u = 0.90) ## ----seprs.tab, echo=FALSE, message=FALSE, warnings=FALSE, results='asis'----- set.caption("Functions to estimate surveillance system sensitivity (SSe) using representative population sampling data.") seprs.tab <- " Sampling | Outcome | Details | Function Representative | SSe | Imperfect Se, perfect Sp | `rsu.sep.rs` Two stage representative | SSe | Imperfect Se, perfect Sp | `rsu.sep.rs2st` Representative | SSe | Imperfect Se, perfect Sp, multiple components | `rsu.sep.rsmult` Representative | SSe | Imperfect Se, imperfect Sp | `rsu.sep.rsfreecalc` Pooled representative | SSe | Imperfect Se, perfect Sp | `rsu.sep.rspool` Representative | SSe | Imperfect Se, perfect Sp | `rsu.sep.rsvarse` Representative | SSp | Imperfect Sp | `rsu.spp.rs`" seprs.df <- read.delim(textConnection(seprs.tab), header = FALSE, sep = "|", strip.white = TRUE, stringsAsFactors = FALSE) names(seprs.df) <- unname(as.list(seprs.df[1,])) # put headers on seprs.df <- seprs.df[-1,] # remove first row row.names(seprs.df) <- NULL pander(seprs.df, style = 'rmarkdown') ## ----message = FALSE---------------------------------------------------------- rsu.sep.rs(N = NA, n = 300, pstar = 0.01, se.u = 0.95) ## ----message = FALSE---------------------------------------------------------- N <- seq(from = 80, to = 100, by = 5) n <- rep(30, times = length(N)) herd.sep <- rsu.sep.rs(N = N, n = n, pstar = 0.01, se.u = 0.95) sort(round(herd.sep, digits = 2)) ## ----message = FALSE---------------------------------------------------------- # Diagnostic test sensitivities and the number of samples tested at each laboratory: se.t1 <- 0.80; se.t2 <- 0.70 n.lab1 <- 50; n.lab2 <- 23 # Create a vector of test sensitivities for each sample: se.all <- c(rep(se.t1, times = n.lab1), rep(se.t2, times = n.lab2)) rsu.sep.rsvarse(N = n.lab1 + n.lab2, pstar = 0.05, se.u = se.all) ## ----pfreers.tab, echo=FALSE, message=FALSE, warnings=FALSE, results='asis'---- set.caption("Functions to estimate the probability of disease freedom using representative population sampling data.") pfreers.tab <- " Sampling | Outcome | Details | Function Representative | Prob disease of freedom | Imperfect Se, perfect Sp | `rsu.pfree.rs` Representative | Equilibrium prob of disease freedom | Imperfect Se, perfect Sp | `rsu.pfree.equ`" pfreers.df <- read.delim(textConnection(pfreers.tab), header = FALSE, sep = "|", strip.white = TRUE, stringsAsFactors = FALSE) names(pfreers.df) <- unname(as.list(pfreers.df[1,])) # put headers on pfreers.df <- pfreers.df[-1,] # remove first row row.names(pfreers.df) <- NULL pander(pfreers.df, style = 'rmarkdown') ## ----message = FALSE---------------------------------------------------------- library(ggplot2); library(lubridate); library(scales) # Define a vector disease incursion probabilities (January to December): p.intro <- c(0.01,0.01,0.01,0.02,0.04,0.10,0.10,0.10,0.08,0.06,0.04,0.02) rval.df <- rsu.pfree.rs(se.p = rep(0.65, times = 12), p.intro = p.intro, prior = 0.50, by.time = TRUE) # Re-format rval.df ready for for ggplot2: dat.df <- data.frame(mnum = rep(1:12, times = 2), mchar = rep(seq(as.Date("2020/1/1"), by = "month", length.out = 12), times = 2), class = c(rep("Disease introduction", times = length(p.intro)), rep("Disease freedom", times = length(p.intro))), prob = c(rval.df$PIntro, rval.df$PFree)) # Plot the results: ggplot(data = dat.df, aes(x = mchar, y = prob, group = class, col = class)) + theme_bw() + geom_point() + geom_line() + scale_colour_manual(values = c("red", "dark blue")) + scale_x_date(breaks = date_breaks("1 month"), labels = date_format("%b"), name = "Month") + scale_y_continuous(limits = c(0,1), name = "Probability") + geom_hline(aes(yintercept = 0.95), linetype = "dashed", col = "blue") + guides(col = guide_legend(title = "")) + theme(legend.position = c(0.8, 0.5)) ## ----ssrb.tab, echo=FALSE, message=FALSE, warnings=FALSE, results='asis'------ set.caption("Functions to estimate sample size using risk based sampling data.") ssrb.tab <- " Sampling | Outcome | Details | Function Risk-based | SSe | Single Se for risk groups, perfect Sp | `rsu.sssep.rbsrg` Risk-based | SSe | Multiple Se within risk groups, perfect Sp | `rsu.sssep.rbmrg` Risk-based | SSe | Two stage sampling, 1 risk factor | `rsu.sssep.rb2st1rf` Risk-based | SSe | Two stage sampling, 2 risk factors | `rsu.sssep.rb2st2rf`" ssrb.df <- read.delim(textConnection(ssrb.tab), header = FALSE, sep = "|", strip.white = TRUE, stringsAsFactors = FALSE) names(ssrb.df) <- unname(as.list(ssrb.df[1,])) # put headers on ssrb.df <- ssrb.df[-1,] # remove first row row.names(ssrb.df) <- NULL pander(ssrb.df, style = 'rmarkdown') ## ----message = FALSE---------------------------------------------------------- # Matrix listing the proportions of samples for each test in each risk group (the number of rows equal the number of risk groups, the number of columns equal the number of tests): m <- rbind(1,1,1) rsu.sssep.rbmrg(pstar = 0.01, rr = c(5,3,1), ppr = c(0.1,0.1,0.8), spr = c(0.4,0.4,0.2), spr.rg = m, se.p = 0.95, se.u = 0.95) ## ----message = FALSE---------------------------------------------------------- # Matrix listing the proportions of samples for each test in each risk group (the number of rows equal the number of risk groups, the number of columns equal the number of tests): m <- rbind(c(0.8,0.2), c(0.5,0.5), c(0.7,0.3)) rsu.sssep.rbmrg(pstar = 0.01, rr = c(5,3,1), ppr = c(0.1,0.1,0.8), spr = c(0.4,0.4,0.2), spr.rg = m, se.p = 0.95, se.u = c(0.92,0.80)) ## ----message = FALSE---------------------------------------------------------- rsu.sssep.rbsrg(pstar = 0.01, rr = c(5,3,1), ppr = c(0.10,0.10,0.80), spr = c(0.50,0.30,0.20), se.p = 0.95, se.u = 0.90) ## ----message = FALSE---------------------------------------------------------- rsu.sssep.rb2st2rf( rr.c = c(5,3,1), ppr.c = c(0.10,0.20,0.70), spr.c = c(0.40,0.40,0.20), pstar.c = 0.02, rr.u = c(4,1), ppr.u = c(0.1, 0.9), spr.u = c(1,0), pstar.u = 0.10, se.p = 0.95, se.c = 0.95, se.u = 0.95) ## ----seprb.tab, echo=FALSE, message=FALSE, warnings=FALSE, results='asis'----- set.caption("Functions to estimate surveillance system sensitivity using risk based sampling data.") ssrb.tab <- " Sampling | Outcome | Details | Function Risk-based | SSe | Varying Se, perfect Sp | `rsu.sep.rb` Risk-based | SSe | Varying Se, perfect Sp, one risk factor | `rsu.sep.rb1rf` Risk-based | SSe | Varying Se, perfect Sp, two risk factors | `rsu.sep.rb2rf`" ssrb.df <- read.delim(textConnection(ssrb.tab), header = FALSE, sep = "|", strip.white = TRUE, stringsAsFactors = FALSE) names(ssrb.df) <- unname(as.list(ssrb.df[1,])) # put headers on ssrb.df <- ssrb.df[-1,] # remove first row row.names(ssrb.df) <- NULL pander(ssrb.df, style = 'rmarkdown') ## ----message = FALSE---------------------------------------------------------- # There are 1784 herds in the study area: H <- 1784 # Twenty of the 1784 herds are sampled. Generate 20 herds of varying size: set.seed(1234) hsize <- rlnorm(n = 20, meanlog = log(10), sdlog = log(8)) hsize <- round(hsize + 20, digits = 0) # Generate a matrix listing the number of growers and finishers in each of the 20 sampled herds. # Assume that anywhere between 80% and 95% of the pigs in each herd are growers: set.seed(1234) pctg <- runif(n = 20, min = 0.80, max = 0.95) ngrow <- round(pctg * hsize, digits = 0) nfini <- hsize - ngrow N <- cbind(ngrow, nfini) # Generate a matrix listing the number of grower and finisher pigs sampled from each herd. Fifteen pigs from each herd are sampled. If there's less than 15 pigs we sample the entire herd: nsgrow <- rep(0, times = 20) nsfini <- ifelse(nfini <= 15, nfini, 15) n <- cbind(nsgrow, nsfini) # The herd-level design prevalence is 0.01 and the individual pig-level design prevalence is 0.05: pstar.c <- 0.01 pstar.u <- 0.05 # For herds in the high-risk area the probability being A. hyopneumoniae positive is 3.5 times that of herds in the low-risk area. Ninety percent of herds are in the low risk area and 10% are in the high risk area: rr.c <- c(3.5,1) ppr.c <- c(0.1,0.9) # We've sampled 15 herds from the high risk area and 5 herds from the low risk area. Above, for vector rr.c, the relative risk for the high risk group is listed first so the vector rg follows this order: rg <- c(rep(1, times = 15), rep(2, times = 5)) # The probability being A. hyopneumoniae positive for finishers is 5 times that of growers. For the matrices N and n growers are listed first then finishers. Vector rr.u follows the same order: rr.u <- c(1,5) # The diagnostic sensitivity of the A. hyopneumoniae ELISA is 0.95: se.u <- 0.95 rsu.sep.rb2st(H = H, N = N, n = n, pstar.c = pstar.c, pstar.u = pstar.u, rg = rg, rr.c = rr.c, rr.u = rr.u, ppr.c = ppr.c, ppr.u = NA, se.u = se.u) ## ----message = FALSE---------------------------------------------------------- # Generate a matrix listing the proportion of growers and finishers in each of the 20 sampled herds: ppr.u <- cbind(rep(0.9, times = 20), rep(0.1, times = 20)) # Set H (the number of clusters) and N (the number of surveillance units within each cluster) to NA: rsu.sep.rb2st(H = NA, N = NA, n = n, pstar.c = pstar.c, pstar.u = pstar.u, rg = rg, rr.c = rr.c, rr.u = rr.u, ppr.c = ppr.c, ppr.u = ppr.u, se.u = se.u) ## ----message = FALSE---------------------------------------------------------- rsu.sep.pass(step.p = c(0.10,0.20,0.90,0.99), pstar.c = 0.01, p.inf.u = 0.98, N = 1000, n = 5, se.u = 0.90) ## ----message = FALSE---------------------------------------------------------- rsu.adjrisk(rr = c(5,1), ppr = c(0.10,0.90)) ## ----message = FALSE---------------------------------------------------------- rsu.pstar(N = 193000, n = 7764, se.p = 0.95, se.u = 0.85) * 100000 ## ----message = FALSE---------------------------------------------------------- rsu.sep(N = 193000, n = 7764, pstar = 10 / 100000, se.u = 0.85) ## ----message = FALSE---------------------------------------------------------- rsu.sssep.rs(N = 193000, pstar = 10 / 100000, se.p = 0.95, se.u = 0.85) epiR/inst/doc/epiR_RSurveillance.R0000644000176200001440000002035714166006663016572 0ustar liggesusers## ---- echo = FALSE, message = FALSE------------------------------------------- # If you want to create a PDF document paste the following after line 9 above: # pdf_document: # toc: true # highlight: tango # number_sections: no # latex_engine: xelatex # header-includes: # - \usepackage{fontspec} knitr::opts_chunk$set(collapse = TRUE, comment = "#>") options(tibble.print_min = 4L, tibble.print_max = 4L) ## ----ssrs.tab, echo=FALSE, message=FALSE, warnings=FALSE, results='asis'------ library(pander) panderOptions('table.split.table', Inf) set.caption("Functions to estimate sample size using representative population sampling data.") ssrs.tab <- " Sampling | Outcome | RSurveillance | epiR Representative | Prob disease freedom | `n.pfree` | `rsu.sspfree.rs` Representative | SSe | `n.freedom` | `rsu.sssep.rs` Two stage representative | SSe | `n.2stage` | `rsu.sssep.rs2st` Representative | SSe | `n.freecalc` | `rsu.sssep.rsfreecalc` Pooled representative | SSe | `n.pooled` | `rsu.sssep.rspool`" ssrs.df <- read.delim(textConnection(ssrs.tab), header = FALSE, sep = "|", strip.white = TRUE, stringsAsFactors = FALSE) names(ssrs.df) <- unname(as.list(ssrs.df[1,])) # put headers on ssrs.df <- ssrs.df[-1,] # remove first row row.names(ssrs.df) <- NULL pander(ssrs.df, style = 'rmarkdown') ## ----seprs.tab, echo=FALSE, message=FALSE, warnings=FALSE, results='asis'----- set.caption("Functions to estimate surveillance system sensitivity (SSe) using representative population sampling data.") seprs.tab <- " Sampling | Outcome | RSurveillance | epiR Representative | SSe | `sep.binom` | `rsu.sep.rs` Representative | SSe | `sep.hypergeo` | `rsu.sep.rs` Representative | SSe | `sep` | `rsu.sep.rs` Two stage representative | SSe | `sep.sys` | `rsu.sep.rs2st` Representative | SSe | `sse.combined` | `rsu.sep.rsmult` Representative | SSe | `sep.freecalc` | `rsu.sep.rsfreecalc` Representative | SSe | `sep.binom.imperfect`| `rsu.sep.rsfreecalc` Pooled representative | SSe | `sep.pooled` | `rsu.sep.rspool` Representative | SSe | `sep.var.se` | `rsu.sep.rsvarse` Representative | SSp | `spp` | `rsu.spp.rs` Representative | SSp | `sph.hp` | `rsu.spp.rs`" seprs.df <- read.delim(textConnection(seprs.tab), header = FALSE, sep = "|", strip.white = TRUE, stringsAsFactors = FALSE) names(seprs.df) <- unname(as.list(seprs.df[1,])) # put headers on seprs.df <- seprs.df[-1,] # remove first row row.names(seprs.df) <- NULL pander(seprs.df, style = 'rmarkdown') ## ----pfreers.tab, echo=FALSE, message=FALSE, warnings=FALSE, results='asis'---- set.caption("Functions to estimate the probability of disease freedom using representative population sampling data.") pfreers.tab <- " Sampling | Outcome | RSurveillance | epiR Representative | Prob disease of freedom | `pfree.1` | `rsu.pfree.rs` Representative | Prob disease of freedom | `pfree.calc` | `rsu.pfree.rs` Representative | Equilibrium prob of disease freedom | `pfree.equ` | `rsu.pfree.equ`" pfreers.df <- read.delim(textConnection(pfreers.tab), header = FALSE, sep = "|", strip.white = TRUE, stringsAsFactors = FALSE) names(pfreers.df) <- unname(as.list(pfreers.df[1,])) # put headers on pfreers.df <- pfreers.df[-1,] # remove first row row.names(pfreers.df) <- NULL pander(pfreers.df, style = 'rmarkdown') ## ----ssrb.tab, echo=FALSE, message=FALSE, warnings=FALSE, results='asis'------ set.caption("Functions to estimate sample size using risk based sampling data.") ssrb.tab <- " Sampling | Outcome | RSurveillance | epiR Risk-based | SSe | `n.rb` | `rsu.sssep.rbsrg` Risk-based | SSe | `n.rb.varse` | `rsu.sssep.rbmrg` Risk-based | SSe | `n.rb.2stage.1` | `rsu.sssep.rb2st1rf` Risk-based | SSe | `n.rb.2stage.2` | `rsu.sssep.rb2st2rf`" ssrb.df <- read.delim(textConnection(ssrb.tab), header = FALSE, sep = "|", strip.white = TRUE, stringsAsFactors = FALSE) names(ssrb.df) <- unname(as.list(ssrb.df[1,])) # put headers on ssrb.df <- ssrb.df[-1,] # remove first row row.names(ssrb.df) <- NULL pander(ssrb.df, style = 'rmarkdown') ## ----seprb.tab, echo=FALSE, message=FALSE, warnings=FALSE, results='asis'----- set.caption("Functions to estimate surveillance system sensitivity (SSe) using risk based sampling data.") seprb.tab <- " Sampling | Outcome | RSurveillance | epiR Risk-based | SSe | `sep.rb.bin.varse` | `rsu.sep.rb` Risk-based | SSe | `sep.rb.bin` | `rsu.sep.rb1rf` Risk-based | SSe | `sep.rb.hypergeo` | `rsu.sep.rb1rf` Risk-based | SSe | `sep.rb2.bin` | `rsu.sep.rb2rf` Risk-based | SSe | `sep.rb2.hypergeo` | `rsu.sep.rb2rf` Risk-based | SSe | `sep.rb.hypergeo.varse` | `rsu.sep.rbvarse` Risk-based | SSe | `sse.rb2stage` | `rsu.sep.rb2stage`" seprb.df <- read.delim(textConnection(seprb.tab), header = FALSE, sep = "|", strip.white = TRUE, stringsAsFactors = FALSE) names(seprb.df) <- unname(as.list(seprb.df[1,])) # put headers on seprb.df <- seprb.df[-1,] # remove first row row.names(seprb.df) <- NULL pander(seprb.df, style = 'rmarkdown') ## ----sepcen.tab, echo=FALSE, message=FALSE, warnings=FALSE, results='asis'---- set.caption("Functions to estimate surveillance system sensitivity (SSe) using census data.") sepcen.tab <- " Sampling | Outcome | RSurveillance | epiR Risk-based | SSe | `sep.exact` | `rsu.sep.cens`" sepcen.df <- read.delim(textConnection(sepcen.tab), header = FALSE, sep = "|", strip.white = TRUE, stringsAsFactors = FALSE) names(sepcen.df) <- unname(as.list(sepcen.df[1,])) # put headers on sepcen.df <- sepcen.df[-1,] # remove first row row.names(sepcen.df) <- NULL pander(sepcen.df, style = 'rmarkdown') ## ----seppas.tab, echo=FALSE, message=FALSE, warnings=FALSE, results='asis'---- set.caption("Functions to estimate surveillance system sensitivity (SSe) using passively collected surveillance data.") sepcen.tab <- " Sampling | Outcome | RSurveillance | epiR Risk-based | SSe | `sep.passive` | `rsu.sep.pass`" seppas.df <- read.delim(textConnection(sepcen.tab), header = FALSE, sep = "|", strip.white = TRUE, stringsAsFactors = FALSE) names(seppas.df) <- unname(as.list(seppas.df[1,])) # put headers on seppas.df <- seppas.df[-1,] # remove first row row.names(seppas.df) <- NULL pander(seppas.df, style = 'rmarkdown') ## ----misc.tab, echo=FALSE, message=FALSE, warnings=FALSE, results='asis'------ set.caption("Miscellaneous functions.") misc.tab <- " Details | RSurveillance | epiR Adjusted risk | `adj.risk` | `rsu.adjrisk` Adjusted risk | `adj.risk.sim` | `rsu.adjrisk` Series test interpretation, Se | `se.series` | `rsu.dxtest` Parallel test interpretation, Se | `se.parallel` | `rsu.dxtest` Series test interpretation, Sp | `sp.series` | `rsu.dxtest` Parallel test interpretation, Sp | `sp.parallel` | `rsu.dxtest` Effective probability of infection | `epi.calc` | `rsu.epinf` Design prevalence back calculation | `pstar.calc` | `rsu.pstar` Prob disease is less than design prevalence | | `rsu.sep`" misc.df <- read.delim(textConnection(misc.tab), header = FALSE, sep = "|", strip.white = TRUE, stringsAsFactors = FALSE) names(misc.df) <- unname(as.list(misc.df[1,])) # put headers on misc.df <- misc.df[-1,] # remove first row row.names(misc.df) <- NULL pander(misc.df, style = 'rmarkdown') epiR/inst/doc/epiR_measures_of_association.Rmd0000644000176200001440000005027514135133050021226 0ustar liggesusers--- title: "Measures of Association" author: "Mark Stevenson" date: "`r Sys.Date()`" bibliography: epiR_measures_of_association.bib link-citations: yes output: knitr:::html_vignette: toc: true vignette: > %\VignetteEngine{knitr::rmarkdown} %\VignetteIndexEntry{Measures of association} %\usepackage[utf8]{inputenc} --- \setmainfont{Calibri Light} ```{r, echo = FALSE, message = FALSE} library(knitr); library(kableExtra) knitr::opts_chunk$set(collapse = TRUE, comment = "#>") options(tibble.print_min = 4L, tibble.print_max = 4L) ``` A common task in epidemiology is to quantify the strength of association between exposures ('risk factors') and disease outcomes. In this context the term 'exposure' is taken to mean a variable whose association with the outcome is to be estimated. Exposures can be harmful, beneficial or both harmful and beneficial (e.g. if an immunisable disease is circulating, exposure to immunising agents helps most recipients but may harm those who experience adverse reactions). The term 'outcome' is used to describe all the possible results that may arise from exposure to a causal factor or from preventive or therapeutic interventions [@porta:2014]. In human and animal health an 'outcome-positive' individual is an individual who has experienced a given disease of interest. In this vignette we outline describe how `epiR` can be used to compute the various measures of association used in epidemiology notably the risk ratio, odds ratio, attributable risk in the exposed, attributable fraction in the exposed, attributable risk in the population and attributable fraction in the population. Examples are provided to demonstrate how the package can be used to deal with exposure-outcome data presented in various formats. This vignette has been written assuming the reader routinely formats their 2 $\times$ 2 table data with the outcome status as columns and exposure status as rows. If this is not the case the argument `outcome = "as.columns"` (the default) can be changed to `outcome = "as.rows"`. **Measures of association strength** ***The incidence risk ratio*** Consider a study where subjects are disease free at the start of the study and all are monitored for disease occurrence for a specified time period. At the start of the study period study subjects are classified according to exposure to a hypothesised risk factor. If both exposure and outcome are binary variables (yes or no) we can present the counts of subjects in each of the four exposure-disease categories in a 2 $\times$ 2 table. ```{r echo = FALSE, results = 'asis'} twobytwo <- data.frame("Dis pos" = c("a","c","a+c"), "Dis neg" = c("b","c","b+c"), "Total" = c("a+b","c+d","a+b+c+d")) colnames(twobytwo) <- c("Dis pos","Dis pos","Total") rownames(twobytwo) <- c("Exp pos","Exp neg","Total") kbl(twobytwo, caption = "A 2 by 2 table.") %>% column_spec(1, bold = FALSE, width = "5em") %>% column_spec(2, bold = FALSE, width = "5em") %>% column_spec(3, bold = FALSE, width = "5em") # row_spec(row = 1, bold = TRUE) ``` When our data are in this format we can calculate the incidence risk of the outcome in those that were exposed $R_E+$, the incidence risk in those that were not exposed $R_{E-}$ and finally the incidence risk in the total study population $R_{T}$: ```{r echo = FALSE, results = 'asis'} irr <- data.frame("Dis pos" = c("a","c","a+c"), "Dis neg" = c("b","c","b+c"), "Total" = c("a+b","c+d","a+b+c+d"),"Risk" = c("RE+ = a/(a+b)","RE- = c/(c+d)", "RT = (a+c)/(a+b+c+d)")) colnames(irr) <- c("Dis pos","Dis pos","Total","Risk") rownames(irr) <- c("Exp pos","Exp neg","Total") kbl(irr, caption = "A 2 by 2 table with incidence risks calculated for the exposed, the unexposed and the total study population.") %>% column_spec(1, bold = FALSE, width = "5em") %>% column_spec(2, bold = FALSE, width = "5em") %>% column_spec(3, bold = FALSE, width = "5em") %>% column_spec(4, bold = FALSE, width = "10em") # row_spec(row = 1, bold = TRUE) ``` The incidence risk ratio is then the incidence risk of the outcome in the exposed divided by the incidence risk of the outcome in the unexposed (Figure 1). ![The incidence risk ratio.](risk_ratio.png) The incidence risk ratio provides an estimate of how many times more likely exposed individuals are to experience the outcome of interest, compared with non-exposed individuals. If the incidence risk ratio equals 1, then the risk of the outcome in both the exposed and non-exposed groups are equal. If the incidence risk ratio is greater than 1, then exposure increases the outcome risk with greater departures from 1 indicative of a stronger effect. If the incidence risk ratio is less than 1, exposure reduces the outcome risk and exposure is said to be protective. ***The odds ratio --- cohort studies*** In a cohort study definition of exposure status (exposure-positive, exposure-negative) comes first. Subjects are then followed over time to determine their outcome status (outcome-positive, outcome-negative). The odds of the outcome in the exposed and unexposed populations are calculated as follows: ```{r echo = FALSE, results = 'asis'} or.cohort <- data.frame("Dis pos" = c("a","c","a+c"), "Dis neg" = c("b","d","b+d"), "Total" = c("a+b","c+d","a+b+c+d"),"Odds" = c("OE+ = a/b","OE- = c/d", "OT = (a+c)/(b+d)")) colnames(or.cohort) <- c("Dis pos","Dis pos","Total","Odds") rownames(or.cohort) <- c("Exp pos","Exp neg","Total") kbl(or.cohort, caption = "A 2 by 2 table with the odds of disease calculated for the exposed, the unexposed and the total study population.") %>% column_spec(1, bold = FALSE, width = "5em") %>% column_spec(2, bold = FALSE, width = "5em") %>% column_spec(3, bold = FALSE, width = "5em") %>% column_spec(4, bold = FALSE, width = "10em") # row_spec(row = 1, bold = TRUE) ``` The odds ratio for a cohort study is then the odds of the outcome in the exposed divided by the odds of the outcome in the unexposed. ***The odds ratio --- case-control studies*** In a case-control study outcome status (disease-positive, disease-negative) is defined first. The history provided by each study subject then provides information about exposure status. For case-control studies, instead of talking about the odds of *disease* in the exposed and unexposed groups (as we did when we were working with data from a cohort study) we talk about the odds of *exposure* in the case and control groups. ```{r echo = FALSE, results = 'asis'} or.cc <- data.frame("Case" = c("a","c","a+c","OD+ = a/c"), "Control" = c("b","d","b+d","OD- = b/d"), "Total" = c("a+b","c+d","a+b+c+d","OT = (a+b)/(c+d)")) colnames(or.cc) <- c("Case","Control","Total") rownames(or.cc) <- c("Exp pos","Exp neg","Total","Odds") kbl(or.cc, caption = "A 2 by 2 table with the odds of exposure calculated for cases, controls and the total study population.") %>% column_spec(1, bold = FALSE, width = "5em") %>% column_spec(2, bold = FALSE, width = "5em") %>% column_spec(3, bold = FALSE, width = "5em") %>% column_spec(4, bold = FALSE, width = "10em") # row_spec(row = 1, bold = TRUE) ``` The odds ratio is defined as the odds of exposure in the cases ($O_{D+}$) divided by the odds of exposure in the controls ($O_{D-}$). Note that the numeric estimate of the odds ratio is exactly the same as that calculated for a cohort study. The expression of the result is the only thing that differs. In a cohort study we talk about the odds of disease being $x$ times greater (or less) in the exposed, compared with the unexposed. In a case-control study we talk about the odds of exposure being $x$ times greater (or less) in cases, compared with controls. **Measures of effect in the exposed** ***The attributable risk in the exposed*** The attributable risk is defined as the increase or decrease in the risk of the outcome in the exposed that is attributable to exposure (Figure 2). Attributable risk (unlike the incidence risk ratio) provides a measure of the absolute frequency of the outcome associated with exposure. ![The attributable risk in the exposed.](attributable_risk.png) A useful way of expressing attributable risk in a clinical setting is in terms of the number needed to treat, NNT. NNT equals the inverse of the attributable risk. Depending on the outcome of interest we often elect to use different labels for NNT. When dealing with an outcome that is 'desirable' (e.g. treatment success) we call NNT the number needed to treat for benefit, NNTB. NNTB equals the number of subjects who would have to be exposed to result in a single (desirable) outcome. When dealing with an outcome that is 'undesirable' (e.g. death) we call NNT the number needed to treat for harm, NNTH. NNTH equals the number of subjects who would have to be exposed to result in a single (undesirable) outcome. ***The attributable fraction in the exposed*** The attributable fraction in the exposed is the proportion of outcome-positive subjects in the exposed group that is due to exposure (Figure 3). ![The attributable fraction in the exposed.](attributable_fraction.png) **Measures of effect in the population** ***The attributable risk in the population*** The population attributable risk is the increase or decrease in incidence risk of the outcome in the study population that is attributable to exposure (Figure 4). ![The attributable risk in the population](population_attributable_risk.png) ***The attributable fraction in the population*** The population attributable fraction (also known as the aetiologic fraction) is the proportion of outcome-positive subjects in the study population that is due to the exposure (Figure 5). ![The attributable fraction in the population](population_attributable_fraction.png) On the condition that the exposure of interest is a cause of the disease outcome, the population attributable fraction represents the proportional reduction in average disease risk over a specified period of time that would be achieved by eliminating the exposure of interest while the distribution of other risk factors in the population remained unchanged. For this reason, PAFs are particularly useful to guide policy makers when planning public health interventions. If you're going to use PAFs as a means for informing policy, make sure that: (1) the exposure of interest is causally related to the outcome, and (2) the exposure of interest is something amenable to intervention. **Theory to practice: Calculating measures of association using R** ***Direct entry of 2 by 2 table contingency table cell frequencies*** Firstly, a 2 $\times$ 2 table can be created by listing the contingency table cell frequencies in vector format. Take the following example. A cross sectional study investigating the relationship between dry cat food (DCF) and feline lower urinary tract disease (FLUTD) was conducted [@willeberg:1977]. Counts of individuals in each group were as follows. DCF-exposed cats (cases, non-cases) 13, 2163. Non DCF-exposed cats (cases, non-cases) 5, 3349. We can enter these data directly into R as a vector of length four. Check that your counts have been entered in the correct order by viewing the data as a matrix. ```{r} dat.v01 <- c(13,2163,5,3349); dat.v01 # View the data in the usual 2 by 2 table format: matrix(dat.v01, nrow = 2, byrow = TRUE) ``` Calculate the prevalence ratio, odds ratio, attributable prevalence in the exposed, attributable fraction in the exposed, attributable prevalence in the population and the attributable fraction in the population using function `epi.2by2`. Note that we use the term prevalence ratio (instead of incidence risk ratio) here because we're dealing with data from a cross-sectional study --- the frequency of disease is expressed as a prevalence, not an incidence. ```{r} library(epiR) epi.2by2(dat = dat.v01, method = "cross.sectional", conf.level = 0.95, units = 100, interpret = FALSE, outcome = "as.columns") ``` The prevalence of FLUTD in DCF exposed cats was 4.01 (95% CI 1.43 to 11.23) times greater than the prevalence of FLUTD in non-DCF exposed cats. In DCF exposed cats, 75% of FLUTD was attributable to DCF (95% CI 30% to 91%). Fifty-four percent of FLUTD cases in this cat population were attributable to DCF (95% CI 4% to 78%). Need a hand getting the correct wording to explain each of the listed measures of association and measures of effect? Set `interpret = TRUE` in `epi.2by2`: ```{r} epi.2by2(dat = dat.v01, method = "cross.sectional", conf.level = 0.95, units = 100, interpret = TRUE, outcome = "as.columns") ``` ***Data frame with one row per observation*** Here we provide examples where you have exposure status and outcome status listed for each member of your study population. There are two options for contingency table preparation in this situation: (1) using base R's table function; or (2) using the `tidyverse` package. For this example we use the low infant birth weight data presented by @hosmer_lemeshow:2000 and available in the `MASS` package in R. The `birthwt` data frame has 189 rows and 10 columns. The data were collected at Baystate Medical Center, Springfield, Massachusetts USA during 1986. **Two by two table preparation using the `table` function in base R** ```{r} library(MASS) # Load and view the data: dat.df02 <- birthwt; head(dat.df02) ``` Each row of this data set represents data for one mother. We're interested in the association between `smoke` (the mother's smoking status during pregnancy) and `low` (delivery of a baby less than 2.5 kg bodyweight). Its important that the table you present to `epi.2by2` is in the correct format: Outcome positives in the first column, outcome negatives in the second column, exposure positives in the first row and exposure negatives in the second row. If we run the `table` function on the `bwt` data the output table is in the wrong format: ```{r} dat.tab02 <- table(dat.df02$smoke, dat.df02$low, dnn = c("Smoke", "Low BW")); dat.tab02 ``` There are two ways to fix this problem. The quick fix is to simply ask R to switch the order of the rows and columns in the output table: ```{r} dat.tab02 <- table(dat.df02$smoke, dat.df02$low, dnn = c("Smoke", "Low BW")); dat.tab02 dat.tab02 <- dat.tab02[2:1,2:1]; dat.tab02 ``` The second approach is to set the exposure variable and the outcome variable as a factor and to define the levels of each factor using `levels = c(1,0)`: ```{r} dat.df02$low <- factor(dat.df02$low, levels = c(1,0)) dat.df02$smoke <- factor(dat.df02$smoke, levels = c(1,0)) dat.df02$race <- factor(dat.df02$race, levels = c(1,2,3)) dat.tab02 <- table(dat.df02$smoke, dat.df02$low, dnn = c("Smoke", "Low BW")); dat.tab02 ``` Now compute the odds ratio for smoking and delivery of a low birth weight baby: ```{r} dat.epi02 <- epi.2by2(dat = dat.tab02, method = "cohort.count", conf.level = 0.95, units = 100, interpret = FALSE, outcome = "as.columns") dat.epi02 ``` The odds of having a low birth weight child for smokers is 2.02 (95% CI 1.08 to 3.78) times greater than the odds of having a low birth weight child for non-smokers. **Two by two table preparation using `tidyverse`** The `tidyverse` package can also be used to prepare data in the required format: ```{r} library(tidyverse) dat.df03 <- birthwt; head(dat.df03) # Here we set the factor levels and tabulate the data in a single call using pipe operators: dat.tab03 <- dat.df03 %>% mutate(low = factor(low, levels = c(1,0), labels = c("yes","no"))) %>% mutate(smoke = factor(smoke, levels = c(1,0), labels = c("yes","no"))) %>% group_by(smoke, low) %>% summarise(n = n()) # View the data: dat.tab03 ## View the data in conventional 2 by 2 table format: pivot_wider(dat.tab03, id_cols = c(smoke), names_from = low, values_from = n) ``` As before, compute the odds ratio for smoking and delivery of a low birth weight baby: ```{r} dat.epi03 <- epi.2by2(dat = dat.tab03, method = "cohort.count", conf.level = 0.95, units = 100, interpret = FALSE, outcome = "as.columns") dat.epi03 ``` The odds of having a low birth weight child for smokers is 2.02 (95% CI 1.08 to 3.78) times greater than the odds of having a low birth weight child for non-smokers. **Confounding** We're concerned that the mother's race may confound the association between low birth weight and delivery of a low birth weight baby so we'll stratify the data by race and compute the Mantel-Haenszel adjusted odds ratio. As before, our tables can be prepared using either base R or `tidyverse`. ***Stratified two by two table preparation using the table function in base R*** ```{r} dat.df04 <- birthwt; head(dat.df04) dat.tab04 <- table(dat.df04$smoke, dat.df04$low, dat.df04$race, dnn = c("Smoke", "Low BW", "Race")); dat.tab04 ``` Compute the Mantel-Haenszel adjusted odds ratio for smoking and delivery of a low birth weight baby, adjusting for the effect of race. Function `epi.2by2` automatically calculates the Mantel-Haenszel odds ratio and risk ratio when it is presented with stratified contingency tables. ```{r} dat.epi04 <- epi.2by2(dat = dat.tab04, method = "cohort.count", conf.level = 0.95, units = 100, interpret = FALSE, outcome = "as.columns") dat.epi04 ``` The Mantel-Haenszel test of homogeneity of the strata odds ratios is not significant (chi square test statistic 2.800; df 2; p-value = 0.25). We accept the null hypothesis and conclude that the odds ratios for each strata of race are the same. Because the stratum specific odds ratios are the same it is appropriate to compute a Mantel-Haenszel adjusted odds ratio. After accounting for the confounding effect of race, the odds of having a low birth weight child for smokers was 3.09 (95% CI 1.49 to 6.39) times that of non-smokers. ***Stratified two by two table preparation using tidyverse*** ```{r} dat.df05 <- birthwt; head(dat.df05) dat.tab05 <- dat.df05 %>% mutate(low = factor(low, levels = c(1,0), labels = c("yes","no"))) %>% mutate(smoke = factor(smoke, levels = c(1,0), labels = c("yes","no"))) %>% mutate(race = factor(race)) %>% group_by(race, smoke, low) %>% summarise(n = n()) dat.tab05 ## View the data in conventional 2 by 2 table format: pivot_wider(dat.tab05, id_cols = c(race, smoke), names_from = low, values_from = n) ``` Compute the Mantel-Haenszel adjusted odds ratio for smoking and delivery of a low birth weight baby, adjusting for the effect of race: ```{r} dat.epi05 <- epi.2by2(dat = dat.tab05, method = "cohort.count", conf.level = 0.95, units = 100, interpret = FALSE, outcome = "as.columns") dat.epi05 ``` The Mantel-Haenszel test of homogeneity of the strata odds ratios is not significant (chi square test statistic 2.800; df 2; p-value = 0.25) so we accept the null hypothesis and conclude that the odds ratios for each strata of race are the same. After accounting for the confounding effect of race, the odds of having a low birth weight child for smokers is 3.09 (95% CI 1.49 to 6.39) times that of non-smokers. Plot the individual strata odds ratios and the Mantel-Haenszel summary odds ratio as an error bar plot to better understand how the Mantel-Haenszel adjusted odds ratio relates to the individual strata odds ratios: ```{r} library(ggplot2); library(scales) nstrata <- 1:length(unique(dat.tab05$race)) strata.lab <- paste("Strata ", nstrata, sep = "") y.at <- c(nstrata, max(nstrata) + 1) y.lab <- c("M-H", strata.lab) x.at <- c(0.25,0.5,1,2,4,8,16,32) or.p <- c(dat.epi05$massoc.detail$OR.mh$est, dat.epi05$massoc.detail$OR.strata.cfield$est) or.l <- c(dat.epi05$massoc.detail$OR.mh$lower, dat.epi05$massoc.detail$OR.strata.cfield$lower) or.u <- c(dat.epi05$massoc.detail$OR.mh$upper, dat.epi05$massoc.detail$OR.strata.cfield$upper) gdat.df05 <- data.frame(y.at, y.lab, or.p, or.l, or.u) ggplot(data = gdat.df05, aes(x = or.p, y = y.at)) + geom_point() + geom_errorbarh(aes(xmax = or.l, xmin = or.u, height = 0.2)) + labs(x = "Odds ratio", y = "Strata") + scale_x_continuous(trans = log2_trans(), breaks = x.at, limits = c(0.25,32)) + scale_y_continuous(breaks = y.at, labels = y.lab) + geom_vline(xintercept = 1, lwd = 1) + coord_fixed(ratio = 0.75 / 1) + theme(axis.title.y = element_text(vjust = 0)) ``` ## References --- nocite: '@*' ---epiR/inst/doc/epiR_sample_size.R0000644000176200001440000000635114166006704016321 0ustar liggesusers## ---- echo = FALSE, message = FALSE------------------------------------------- # If you want to create a PDF document paste the following after line 9 above: # pdf_document: # toc: true # highlight: tango # number_sections: no # latex_engine: xelatex # header-includes: # - \usepackage{fontspec} knitr::opts_chunk$set(collapse = TRUE, comment = "#>") options(tibble.print_min = 4L, tibble.print_max = 4L) ## ----message = FALSE---------------------------------------------------------- library(epiR) epi.sssimpleestb(N = 1E+06, Py = 0.15, epsilon = 0.20, error = "relative", se = 1, sp = 1, nfractional = FALSE, conf.level = 0.95) ## ----message = FALSE---------------------------------------------------------- epi.sscohortt(irexp1 = 50/1000, irexp0 = 70/1000, FT = 5, n = NA, power = 0.80, r = 1, design = 1, sided.test = 2, nfractional = FALSE, conf.level = 0.95)$n.total ## ----message = FALSE---------------------------------------------------------- epi.sscc(OR = 2.0, p1 = NA, p0 = 0.30, n = NA, power = 0.80, r = 1, phi.coef = 0, design = 1, sided.test = 2, conf.level = 0.95, method = "unmatched", nfractional = FALSE, fleiss = FALSE)$n.total ## ----message = FALSE---------------------------------------------------------- epi.ssninfb(treat = 0.85, control = 0.65, delta = 0.10, n = NA, r = 1, power = 0.80, nfractional = FALSE, alpha = 0.05)$n.total ## ----message = FALSE---------------------------------------------------------- epi.ssclus1estb(b = 75, Py = 0.46, epsilon = 0.10, error = "relative", rho = 0.20, conf.level = 0.95)$n.psu ## ----message = FALSE---------------------------------------------------------- epi.ssclus1estb(b = c(75,35), Py = 0.46, epsilon = 0.10, error = "relative", rho = 0.20, conf.level = 0.95)$n.psu ## ----message = FALSE---------------------------------------------------------- # From first principles: n.crude <- epi.sssimpleestb(N = 1E+06, Py = 0.20, epsilon = 0.05 / 0.20, error = "relative", se = 1, sp = 1, nfractional = FALSE, conf.level = 0.95) n.crude # A total of 246 subjects need to be enrolled into the study. Calculate the design effect: rho <- 0.02; b <- 20 D <- rho * (b - 1) + 1; D # The design effect is 1.38. Our crude sample size estimate needs to be increased by a factor of 1.38. n.adj <- ceiling(n.crude * D) n.adj # After accounting for lack of independence in the data a total of 340 subjects need to be enrolled into the study. How many clusters are required? ceiling(n.adj / b) # Do all of the above using epi.ssclus2estb: epi.ssclus2estb(b = 20, Py = 0.20, epsilon = 0.05 / 0.20, error = "relative", rho = 0.02, nfractional = FALSE, conf.level = 0.95) ## ----message = FALSE---------------------------------------------------------- n.crude <- epi.sssimpleestb(N = 1E+06, Py = 0.15, epsilon = 0.20, error = "relative", se = 1, sp = 1, nfractional = FALSE, conf.level = 0.95) n.crude rho <- 0.09; b <- 10 D <- rho * (b - 1) + 1; D n.adj <- ceiling(n.crude * D) n.adj # Similar to the example above, we can do all of these calculations using epi.ssclus2estb: epi.ssclus2estb(b = 10, Py = 0.15, epsilon = 0.20, error = "relative", rho = 0.09, nfractional = FALSE, conf.level = 0.95) epiR/inst/doc/epiR_sample_size.Rmd0000644000176200001440000002265514135130042016634 0ustar liggesusers--- title: "Sample Size Calculations Using epiR" author: "Mark Stevenson" date: "`r Sys.Date()`" bibliography: epiR_sample_size.bib link-citations: yes output: knitr:::html_vignette: toc: true vignette: > %\VignetteEngine{knitr::rmarkdown} %\VignetteIndexEntry{Sample size calculations} %\usepackage[utf8]{inputenc} --- \setmainfont{Calibri Light} ```{r, echo = FALSE, message = FALSE} # If you want to create a PDF document paste the following after line 9 above: # pdf_document: # toc: true # highlight: tango # number_sections: no # latex_engine: xelatex # header-includes: # - \usepackage{fontspec} knitr::opts_chunk$set(collapse = TRUE, comment = "#>") options(tibble.print_min = 4L, tibble.print_max = 4L) ``` ### Prevalence estimation A review of sample size calculations in (veterinary) epidemiological research is provided by @stevenson:2021. The expected seroprevalence of brucellosis in a population of cattle is thought to be in the order of 15%. How many cattle need to be sampled and tested to be 95% certain that our seroprevalence estimate is within 20% of the true population value. That is, from 15 - (0.20 $\times$ 0.15) to 15 + (0.20 $\times$ 0.15 = 0.03) i.e. from 12% to 18%. Assume the test you will use has perfect sensitivity and specificity. This formula requires the population size to be specified so we set N to a large number, 1,000,000: ```{r message = FALSE} library(epiR) epi.sssimpleestb(N = 1E+06, Py = 0.15, epsilon = 0.20, error = "relative", se = 1, sp = 1, nfractional = FALSE, conf.level = 0.95) ``` A total of 545 cows are required to meet the requirements of the study. ### Prospective cohort study A prospective cohort study of dry food diets and feline lower urinary tract disease (FLUTD) in mature male cats is planned. A sample of cats will be selected at random from the population of cats in a given area and owners who agree to participate in the study will be asked to complete a questionnaire at the time of enrollment. Cats enrolled into the study will be followed for at least 5 years to identify incident cases of FLUTD. The investigators would like to be 0.80 certain of being able to detect when the risk ratio of FLUTD is 1.4 for cats habitually fed a dry food diet, using a 0.05 significance test. Previous evidence suggests that the incidence risk of FLUTD in cats not on a dry food (i.e. 'other') diet is around 50 per 1000. Assuming equal numbers of cats on dry food and other diets are sampled, how many cats should be sampled to meet the requirements of the study? ```{r message = FALSE} epi.sscohortt(irexp1 = 50/1000, irexp0 = 70/1000, FT = 5, n = NA, power = 0.80, r = 1, design = 1, sided.test = 2, nfractional = FALSE, conf.level = 0.95)$n.total ``` A total of 2080 subjects are required (1040 exposed and 1040 unexposed). ### Case-control study A case-control study of the relationship between white pigmentation around the eyes and ocular squamous cell carcinoma in Hereford cattle is planned. A sample of cattle with newly diagnosed squamous cell carcinoma will be compared for white pigmentation around the eyes with a sample of controls. Assuming an equal number of cases and controls, how many study subjects are required to detect an odds ratio of 2.0 with 0.80 power using a two-sided 0.05 test? Previous surveys have shown that around 0.30 of Hereford cattle without squamous cell carcinoma have white pigmentation around the eyes. ```{r message = FALSE} epi.sscc(OR = 2.0, p1 = NA, p0 = 0.30, n = NA, power = 0.80, r = 1, phi.coef = 0, design = 1, sided.test = 2, conf.level = 0.95, method = "unmatched", nfractional = FALSE, fleiss = FALSE)$n.total ``` If the true odds for squamous cell carcinoma in exposed subjects relative to unexposed subjects is 2.0, we will need to enroll 141 cases and 141 controls (282 cattle in total) to reject the null hypothesis that the odds ratio equals one with probability (power) 0.80. The Type I error probability associated with this test of this null hypothesis is 0.05. ### Non-inferiority trial Suppose a pharmaceutical company would like to conduct a clinical trial to compare the efficacy of two antimicrobial agents when administered orally to patients with skin infections. Assume the true mean cure rate of the treatment is 0.85 and the true mean cure rate of the control is 0.65. We consider a difference of less than 0.10 in cure rate to be of no clinical importance (i.e. delta = 0.10). Assuming a one-sided test size of 5% and a power of 80% how many subjects should be included in the trial? ```{r message = FALSE} epi.ssninfb(treat = 0.85, control = 0.65, delta = 0.10, n = NA, r = 1, power = 0.80, nfractional = FALSE, alpha = 0.05)$n.total ``` A total of 50 subjects need to be enrolled in the trial, 25 in the treatment group and 25 in the control group. ### One-stage cluster sampling An aid project has distributed cook stoves in a single province in a resource-poor country. At the end of three years, the donors would like to know what proportion of households are still using their donated stove. A cross-sectional study is planned where villages in a province will be sampled and all households (approximately 75 per village) will be visited to determine if the donated stove is still in use. A pilot study of the prevalence of stove usage in five villages showed that 0.46 of householders were still using their stove and the intracluster correlation coefficient (ICC) for stove use within villages is in the order of 0.20. If the donor wanted to be 95% confident that the survey estimate of stove usage was within 10% of the true population value, how many villages (clusters) need to be sampled? ```{r message = FALSE} epi.ssclus1estb(b = 75, Py = 0.46, epsilon = 0.10, error = "relative", rho = 0.20, conf.level = 0.95)$n.psu ``` A total of 96 villages need to be sampled to meet the requirements of the study. ### One-stage cluster sampling (continued) Continuing the example above, we are now told that the number of households per village varies. The average number of households per village is 75 with a 0.025 quartile of 40 households and a 0.975 quartile of 180. Assuming the number of households per village follows a normal distribution the expected standard deviation of the number of households per village is in the order of (180 - 40) $\div$ 4 = 35. How many villages need to be sampled? ```{r message = FALSE} epi.ssclus1estb(b = c(75,35), Py = 0.46, epsilon = 0.10, error = "relative", rho = 0.20, conf.level = 0.95)$n.psu ``` A total of 115 villages need to be sampled to meet the requirements of the study. ### Two-stage cluster sampling This example is adapted from @bennett_et_al:1991. We intend to conduct a cross-sectional study to determine the prevalence of disease X in a given country. The expected prevalence of disease is thought to be around 20%. Previous studies report an intracluster correlation coefficient for this disease to be 0.02. Suppose that we want to be 95% certain that our estimate of the prevalence of disease is within 5% of the true population value and that we intend to sample 20 individuals per cluster. How many clusters should be sampled to meet the requirements of the study? ```{r message = FALSE} # From first principles: n.crude <- epi.sssimpleestb(N = 1E+06, Py = 0.20, epsilon = 0.05 / 0.20, error = "relative", se = 1, sp = 1, nfractional = FALSE, conf.level = 0.95) n.crude # A total of 246 subjects need to be enrolled into the study. Calculate the design effect: rho <- 0.02; b <- 20 D <- rho * (b - 1) + 1; D # The design effect is 1.38. Our crude sample size estimate needs to be increased by a factor of 1.38. n.adj <- ceiling(n.crude * D) n.adj # After accounting for lack of independence in the data a total of 340 subjects need to be enrolled into the study. How many clusters are required? ceiling(n.adj / b) # Do all of the above using epi.ssclus2estb: epi.ssclus2estb(b = 20, Py = 0.20, epsilon = 0.05 / 0.20, error = "relative", rho = 0.02, nfractional = FALSE, conf.level = 0.95) ``` A total of 17 clusters need to be sampled to meet the specifications of this study. Function `epi.ssclus2estb` returns a warning message that the number of clusters is less than 25. ### Two-stage cluster sampling (continued) Continuing the brucellosis prevalence example (above) being seropositive to brucellosis is likely to cluster within herds. @otte_gumm:1997 cite the intracluster correlation coefficient for Brucella abortus in cattle to be in the order of 0.09. Adjust your sample size of 545 cows to account for lack of independence in the data, i.e. clustering at the herd level. Assume that b = 10 animals will be sampled per herd: ```{r message = FALSE} n.crude <- epi.sssimpleestb(N = 1E+06, Py = 0.15, epsilon = 0.20, error = "relative", se = 1, sp = 1, nfractional = FALSE, conf.level = 0.95) n.crude rho <- 0.09; b <- 10 D <- rho * (b - 1) + 1; D n.adj <- ceiling(n.crude * D) n.adj # Similar to the example above, we can do all of these calculations using epi.ssclus2estb: epi.ssclus2estb(b = 10, Py = 0.15, epsilon = 0.20, error = "relative", rho = 0.09, nfractional = FALSE, conf.level = 0.95) ``` After accounting for clustering at the herd level we estimate that a total of (545 $\times$ 1.81) = 986 cattle need to be sampled to meet the requirements of the survey. If 10 cows are sampled per herd this means that a total of (987 $\div$ 10) = 99 herds are required. ### ReferencesepiR/inst/doc/epiR_descriptive.html0000644000176200001440000156065414166006677017117 0ustar liggesusers Descriptive Epidemiology using epiR

Descriptive Epidemiology using epiR

Mark Stevenson

2022-01-07

Epidemiology is the study of the frequency, distribution and determinants of health-related states in populations and the application of such knowledge to control health problems (Disease Control and Prevention 2006).

This vignette provides instruction on the way R and epiR can be used for descriptive epidemiological analyses, that is, to describe how the frequency of disease varies by individual, place and time.

Indivdual

Descriptions of disease frequency involves reporting either the prevalence or incidence of disease.

Some definitions. Strictly speaking, ‘prevalence’ equals the number of cases of a given disease or attribute that exists in a population at a specified point in time. Prevalence risk is the proportion of a population that has a specific disease or attribute at a specified point in time. Many authors use the term ‘prevalence’ when they really mean prevalence risk, and these notes will follow this convention.

Two types of prevalence are reported in the literature: (1) point prevalence equals the proportion of a population in a diseased state at a single point in time, (2) period prevalence equals the proportion of a population with a given disease or condition over a specific period of time (i.e. the number of existing cases at the start of a follow-up period plus the number of incident cases that occur during the follow-up period).

Incidence provides a measure of how frequently susceptible individuals become disease cases as they are observed over time. An incident case occurs when an individual changes from being susceptible to being diseased. The count of incident cases is the number of such events that occur in a population during a defined follow-up period. There are two ways to express incidence:

Incidence risk (also known as cumulative incidence) is the proportion of initially susceptible individuals in a population who become new cases during a defined follow-up period.

Incidence rate (also known as incidence density) is the number of new cases of disease that occur per unit of individual time at risk during a defined follow-up period.

In addition to reporting the point estimate of disease frequency, it is important to provide an indication of the uncertainty around that point estimate. The epi.conf function in the epiR package allows you to calculate confidence intervals for prevalence, incidence risk and incidence rates.

Let’s say we’re interested in the prevalence of disease X in a population comprised of 1000 individuals. Two hundred are tested and four returned a positive result. Assuming 100% test sensitivity and specificity, what is the estimated prevalence of disease X in this population?

library(epiR); library(ggplot2); library(scales)

ncas <- 4; npop <- 200
tmp <- as.matrix(cbind(ncas, npop))
epi.conf(tmp, ctype = "prevalence", method = "exact", N = 1000, design = 1, 
   conf.level = 0.95) * 100
#>   est     lower    upper
#> 1   2 0.5475566 5.041361

The estimated prevalence of disease X in this population is 2.0 (95% confidence interval [CI] 0.55 – 5.0) cases per 100 individuals at risk.

Another example. A study was conducted by Feychting, Osterlund, and Ahlbom (1998) to report the frequency of cancer among the blind. A total of 136 diagnoses of cancer were made from 22,050 person-years at risk. What was the incidence rate of cancer in this population?

ncas <- 136; ntar <- 22050
tmp <- as.matrix(cbind(ncas, ntar))
epi.conf(tmp, ctype = "inc.rate", method = "exact", N = 1000, design = 1, 
   conf.level = 0.95) * 1000
#>         est    lower    upper
#> ncas 6.1678 5.174806 7.295817

The incidence rate of cancer in this population was 6.2 (95% CI 5.2 to 7.3) cases per 1000 person-years at risk.

Now lets say we want to compare the frequency of disease across several populations. An effective way to do this is to use a ranked error bar plot. With a ranked error bar plot the points represent the point estimate of the measure of disease frequency and the error bars indicate the 95% confidence interval around each estimate. The disease frequency estimates are then sorted from lowest to highest.

Generate some data. First we’ll generate a distribution of disease prevalence estimates. Let’s say it has a mode of 0.60 and we’re 80% certain that the prevalence is greater than 0.35. Use the epi.betabuster function to generate parameters that can be used for a beta distribution to satisfy these constraints:

tmp <- epi.betabuster(mode = 0.60, conf = 0.80, greaterthan = TRUE, x = 0.35, 
   conf.level = 0.95, max.shape1 = 100, step = 0.001)
tmp$shape1; tmp$shape2
#> [1] 2.357
#> [1] 1.904667

Now take 100 draws from a beta distribution using the shape1 and shape2 values calculated above and plot them as a frequency histogram:

dprob <- rbeta(n = 25, shape1 = tmp$shape1, shape2 = tmp$shape2)
dat.df <- data.frame(dprob = dprob)

ggplot(data = dat.df, aes(x = dprob)) +
  theme_bw() +
  geom_histogram(binwidth = 0.01, colour = "gray", fill = "dark blue", size = 0.1) +
  scale_x_continuous(limits = c(0,1), name = "Prevalence") +
  scale_y_continuous(limits = c(0,10), name = "Number of draws")
#> Warning: Removed 2 rows containing missing values (geom_bar).
\label{fig:dfreq01}Frequency histogram of disease prevalence estimates for our simulated population.

Frequency histogram of disease prevalence estimates for our simulated population.

Generate a vector of population sizes using the uniform distribution. Calculate the number of diseased individuals in each population using dprob (calculated above). Finally, calculate the prevalence of disease in each population and its 95% confidence interval using epi.conf. The function epi.conf provides several options for confidence interval calculation methods for prevalence. Here we’ll use the exact method:

dat.df$rname <- paste("Region ", 1:25, sep = "")
dat.df$npop <- round(runif(n = 25, min = 20, max = 1500), digits = 0)
dat.df$ncas <- round(dat.df$dprob * dat.df$npop, digits = 0)

tmp <- as.matrix(cbind(dat.df$ncas, dat.df$npop))
tmp <- epi.conf(tmp, ctype = "prevalence", method = "exact", N = 1000, design = 1, 
   conf.level = 0.95) * 100
dat.df <- cbind(dat.df, tmp)
head(dat.df)
#>       dprob    rname npop ncas      est    lower    upper
#> 1 0.6589162 Region 1  812  535 65.88670 62.51113 69.14642
#> 2 0.3325450 Region 2  164   55 33.53659 26.36472 41.31582
#> 3 0.3057466 Region 3  447  137 30.64877 26.40333 35.15254
#> 4 0.5273257 Region 4 1413  745 52.72470 50.08277 55.35527
#> 5 0.8444275 Region 5 1253 1058 84.43735 82.30985 86.40179
#> 6 0.5486895 Region 6   84   46 54.76190 43.52135 65.65691

Sort the data in order of variable est and assign a 1 to n identifier as variable rank:

dat.df <- dat.df[sort.list(dat.df$est),]
dat.df$rank <- 1:nrow(dat.df)

Now create a ranked error bar plot. Because its useful to provide the region-area names on the horizontal axis we’ll rotate the horizontal axis labels by 90 degrees.

ggplot(data = dat.df, aes(x = rank, y = est)) +
  theme_bw() +
  geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.1) +
  geom_point() +
  scale_x_continuous(limits = c(0,25), breaks = dat.df$rank, labels = dat.df$rname, name = "Region") +
  scale_y_continuous(limits = c(0,100), name = "Cases per 100 individuals at risk") + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1))
\label{fig:dfreq02}Ranked error bar plot showing the prevalence of disease (and its 95% confidence interval) for 100 population units.

Ranked error bar plot showing the prevalence of disease (and its 95% confidence interval) for 100 population units.

Time

Epidemic curve data are often presented in one of two formats:

  1. One row for each individual identified as a case with an event date assigned to each.

  2. One row for every event date with an integer representing the number of cases identified on that date.

Generate some data, with one row for every individual identified as a case:

n.males <- 100; n.females <- 50
odate <- seq(from = as.Date("2004-07-26"), to = as.Date("2004-12-13"), by = 1)
prob <- c(1:100, 41:1); prob <- prob / sum(prob)
modate <- sample(x = odate, size = n.males, replace = TRUE, p = prob)
fodate <- sample(x = odate, size = n.females, replace = TRUE)

dat.df <- data.frame(sex = c(rep("Male", n.males), rep("Female", n.females)), 
   odate = c(modate, fodate))

# Sort the data in order of odate:
dat.df <- dat.df[sort.list(dat.df$odate),] 

Plot the epidemic curve using the ggplot2 and scales packages:

ggplot(data = dat.df, aes(x = as.Date(odate))) +
  theme_bw() +
  geom_histogram(binwidth = 7, colour = "gray", fill = "dark blue", size = 0.1) +
  scale_x_date(breaks = date_breaks("7 days"), labels = date_format("%d %b"), 
     name = "Date") +
  scale_y_continuous(breaks = seq(from = 0, to = 20, by = 2), limits = c(0,20), name = "Number of cases") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))
\label{fig:epicurve01}Frequency histogram showing counts of incident cases of disease as a function of calendar date, 26 July to 13 December 2004.

Frequency histogram showing counts of incident cases of disease as a function of calendar date, 26 July to 13 December 2004.

Produce a separate epidemic curve for males and females using the facet_grid option in ggplot2:

ggplot(data = dat.df, aes(x = as.Date(odate))) +
  theme_bw() +
  geom_histogram(binwidth = 7, colour = "gray", fill = "dark blue", size = 0.1) +
  scale_x_date(breaks = date_breaks("1 week"), labels = date_format("%d %b"), 
     name = "Date") +
  scale_y_continuous(breaks = seq(from = 0, to = 20, by = 2), limits = c(0,20), name = "Number of cases") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
  facet_grid( ~ sex)
\label{fig:epicurve03}Frequency histogram showing counts of incident cases of disease as a function of time, 26 July to 13 December 2004, conditioned by sex.

Frequency histogram showing counts of incident cases of disease as a function of time, 26 July to 13 December 2004, conditioned by sex.

Let’s say an event occurred on 31 October 2004. Mark this date on your epidemic curve using geom_vline:

ggplot(data = dat.df, aes(x = as.Date(odate))) +
  theme_bw() +
  geom_histogram(binwidth = 7, colour = "gray", fill = "dark blue", size = 0.1) +
  scale_x_date(breaks = date_breaks("1 week"), labels = date_format("%d %b"), 
     name = "Date") +
  scale_y_continuous(breaks = seq(from = 0, to = 20, by = 2), limits = c(0,20), name = "Number of cases") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
  facet_grid( ~ sex) +
  geom_vline(aes(xintercept = as.numeric(as.Date("31/10/2004", format = "%d/%m/%Y"))), 
   linetype = "dashed")
\label{fig:epicurve04}Frequency histogram showing counts of incident cases of disease as a function of time, 26 July to 13 December 2004, conditioned by sex. An event that occurred on 31 October 2004 is indicated by the vertical dashed line.

Frequency histogram showing counts of incident cases of disease as a function of time, 26 July to 13 December 2004, conditioned by sex. An event that occurred on 31 October 2004 is indicated by the vertical dashed line.

Plot the total number of disease events by day, coloured according to sex:

ggplot(data = dat.df, aes(x = as.Date(odate), group = sex, fill = sex)) +
  theme_bw() +
  geom_histogram(binwidth = 7, colour = "gray", size = 0.1) +
  scale_x_date(breaks = date_breaks("1 week"), labels = date_format("%d %b"), 
     name = "Date") +
  scale_y_continuous(breaks = seq(from = 0, to = 20, by = 2), limits = c(0,20), name = "Number of cases") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
  geom_vline(aes(xintercept = as.numeric(as.Date("31/10/2004", format = "%d/%m/%Y"))), 
   linetype = "dashed") + 
  scale_fill_manual(values = c("#d46a6a", "#738ca6"), name = "Sex") +
  theme(legend.position = c(0.90, 0.80))
\label{fig:epicurve05}Frequency histogram showing counts of incident cases of disease as a function of time, 26 July to 13 December 2004, grouped by sex.

Frequency histogram showing counts of incident cases of disease as a function of time, 26 July to 13 December 2004, grouped by sex.

It can be difficult to appreciate differences in male and female disease counts as a function of date with the above plot format so dodge the data instead:

ggplot(data = dat.df, aes(x = as.Date(odate), group = sex, fill = sex)) +
  theme_bw() +
  geom_histogram(binwidth = 7, colour = "gray", size = 0.1, position = "dodge") +
  scale_x_date(breaks = date_breaks("1 week"), labels = date_format("%d %b"), 
     name = "Date") +
  scale_y_continuous(breaks = seq(from = 0, to = 20, by = 2), limits = c(0,20), name = "Number of cases") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
  geom_vline(aes(xintercept = as.numeric(as.Date("31/10/2004", format = "%d/%m/%Y"))), 
   linetype = "dashed") + 
  scale_fill_manual(values = c("#d46a6a", "#738ca6"), name = "Sex") + 
  theme(legend.position = c(0.90, 0.80))
\label{fig:epicurve06}Frequency histogram showing counts of incident cases of disease as a function of time, 26 July to 13 December 2004, grouped by sex.

Frequency histogram showing counts of incident cases of disease as a function of time, 26 July to 13 December 2004, grouped by sex.

We now provide code to deal with the situation where the data are presented with one row for every case event date and an integer representing the number of cases identified on each date.

Simulate some data in this format. In the code below the variable ncas represents the number of cases identified on a given date. The variable dcontrol is a factor with two levels: neg and pos. Level neg flags dates when no disease control measures were in place; level pos flags dates when disease controls measures were in place.

odate <- seq(from = as.Date("1/1/00", format = "%d/%m/%y"), 
   to = as.Date("1/1/05", format = "%d/%m/%y"), by = "1 month")
ncas <- round(runif(n = length(odate), min = 0, max = 100), digits = 0)

dat.df <- data.frame(odate, ncas)
dat.df$dcontrol <- "neg"
dat.df$dcontrol[dat.df$odate >= as.Date("1/1/03", format = "%d/%m/%y") & 
   dat.df$odate <= as.Date("1/6/03", format = "%d/%m/%y")] <- "pos"
head(dat.df)
#>        odate ncas dcontrol
#> 1 2000-01-01    0      neg
#> 2 2000-02-01   66      neg
#> 3 2000-03-01   84      neg
#> 4 2000-04-01   27      neg
#> 5 2000-05-01   17      neg
#> 6 2000-06-01   69      neg

Generate an epidemic curve. Note weight = ncas in the aesthetics argument for ggplot2:

ggplot() +
  theme_bw() +
  geom_histogram(dat.df, mapping = aes(x = odate, weight = ncas, fill = factor(dcontrol)), binwidth = 60, colour = "gray", size = 0.1) +
  scale_x_date(breaks = date_breaks("6 months"), labels = date_format("%b %Y"), 
     name = "Date") +
  scale_y_continuous(limits = c(0,200), name = "Number of cases") +
  scale_fill_manual(values = c("#738ca6","#d46a6a")) + 
  guides(fill = "none") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))
\label{fig:epicurve07}Frequency histogram showing counts of incident cases of disease as a function of time, 1 January 2000 to 1 January 2005. Colours indicate the presence or absence of disease control measures.

Frequency histogram showing counts of incident cases of disease as a function of time, 1 January 2000 to 1 January 2005. Colours indicate the presence or absence of disease control measures.

Now we’ll add a line to the plot to show the cumulative number of cases detected as a function of calendar date. The coding here requires some thought. First question: What was the cumulative number of cases at the end of the follow-up period? Here we use the cumsum (cumulative sum) function in base R:

cumsum(dat.df$ncas)
#>  [1]    0   66  150  177  194  263  273  307  337  415  427  482  548  627  657
#> [16]  659  715  727  756  820  844  868  889  905  957 1032 1089 1144 1198 1254
#> [31] 1261 1264 1350 1357 1414 1472 1499 1586 1682 1697 1794 1850 1928 1969 1996
#> [46] 2048 2127 2171 2263 2279 2310 2328 2401 2420 2472 2477 2526 2590 2659 2684
#> [61] 2771

At the end of the follow-up period the cumulative number of cases was in the order of 3100 (exact numbers will vary because we’ve used a simulation approach to generate this data). What we need to do is to get our 0 to 3100 cumulative cases to ‘fit’ into the 0 to 200 vertical axis limits of the epidemic curve. A reasonable approach would be to: (1) divide cumulative case numbers by 10; (2) set 350 as the upper limit of the vertical axis; and (3) set sec.axis = sec_axis(~ . * 10) to multiply the values that appear on the primary vertical axis by 10 for the labels that appear on the secondary vertical axis:


ggplot() +
  theme_bw() +
  geom_histogram(data = dat.df, mapping = aes(x = odate, weight = ncas, fill = factor(dcontrol)), binwidth = 60, colour = "gray", size = 0.1) +
  geom_line(data = dat.df, mapping = aes(x = odate, y = cumsum(ncas) / 10)) + 
  scale_x_date(breaks = date_breaks("6 months"), labels = date_format("%b %Y"), 
     name = "Date") +
  scale_y_continuous(limits = c(0,350), name = "Number of cases", 
      sec.axis = sec_axis(~ . * 10, name = "Cumulative number of cases")) +
  scale_fill_manual(values = c("#738ca6","#d46a6a")) +  
  guides(fill = "none") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))
\label{fig:epicurve08}Frequency histogram showing counts of incident cases of disease as a function of time, 1 January 2000 to 1 January 2005. Colours indicate the presence or absence of disease control measures. Superimposed on this plot is a line showing cumulative case numbers.

Frequency histogram showing counts of incident cases of disease as a function of time, 1 January 2000 to 1 January 2005. Colours indicate the presence or absence of disease control measures. Superimposed on this plot is a line showing cumulative case numbers.

Place

Two types of maps are often used when describing patterns of disease by place:

  1. Choropleth maps. Choropleth mapping involves producing a summary statistic of the outcome of interest (e.g. count of disease events, prevalence, incidence) for each component area within a study region. A map is created by ‘filling’ (i.e. colouring) each component area with colour, providing an indication of the magnitude of the variable of interest and how it varies geographically.

  2. Point maps.

Choropleth maps

For illustration we make a choropleth map of sudden infant death syndrome (SIDS) babies in North Carolina counties for 1974 using the nc.sids data provided with the spData package.

library(sf); library(spData); library(rgdal); library(plyr); library(RColorBrewer); library(spatstat)

ncsids.sf <- st_read(dsn = system.file("shapes/sids.shp", package = "spData")[1])
#> Reading layer `sids' from data source 
#>   `C:\Program Files\R\R-4.1.1\library\spData\shapes\sids.shp' 
#>   using driver `ESRI Shapefile'
#> Simple feature collection with 100 features and 22 fields
#> Geometry type: MULTIPOLYGON
#> Dimension:     XY
#> Bounding box:  xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965
#> CRS:           NA
ncsids.sf <- ncsids.sf[,c("BIR74","SID74")]
head(ncsids.sf)
#> Simple feature collection with 6 features and 2 fields
#> Geometry type: MULTIPOLYGON
#> Dimension:     XY
#> Bounding box:  xmin: -81.74107 ymin: 36.07282 xmax: -75.77316 ymax: 36.58965
#> CRS:           NA
#>   BIR74 SID74                       geometry
#> 1  1091     1 MULTIPOLYGON (((-81.47276 3...
#> 2   487     0 MULTIPOLYGON (((-81.23989 3...
#> 3  3188     5 MULTIPOLYGON (((-80.45634 3...
#> 4   508     1 MULTIPOLYGON (((-76.00897 3...
#> 5  1421     9 MULTIPOLYGON (((-77.21767 3...
#> 6  1452     7 MULTIPOLYGON (((-76.74506 3...

The ncsids.sf simple features object lists for each county in the North Carolina USA the number SIDS deaths for 1974. Plot a choropleth map of the counties of the North Carolina showing SIDS counts for 1974:

ggplot() + 
   theme_bw() +
   geom_sf(data = ncsids.sf, aes(fill = SID74), colour = "dark grey") + 
   scale_fill_gradientn(limits = c(0,60), colours = brewer.pal(n = 5, "Reds"), guide = "colourbar") +
   scale_x_continuous(name = "Longitude") +
   scale_y_continuous(name = "Latitude") +
   labs(fill = "SIDS 1974")
\label{fig:spatial01}Map of North Carolina, USA showing the number of sudden infant death syndrome cases, by county for 1974.

Map of North Carolina, USA showing the number of sudden infant death syndrome cases, by county for 1974.

Point maps

For this example we will used the epi.incin data set included with epiR. Between 1972 and 1980 an industrial waste incinerator operated at a site about 2 kilometres southwest of the town of Coppull in Lancashire, England. Addressing community concerns that there were greater than expected numbers of laryngeal cancer cases in close proximity to the incinerator Diggle (1990) conducted a study investigating risks for laryngeal cancer, using recorded cases of lung cancer as controls. The study area is 20 km x 20 km in size and includes location of residence of patients diagnosed with each cancer type from 1974 to 1983.

Load the epi.incin data set and create negative and positive labels for each point location. We don’t have a boundary map for these data so we’ll use spatstat to create a convex hull around the points and dilate the convex hull by 1000 metres as a proxy boundary. The point locations in this data are projected using the British National Grid coordinate reference system (EPSG code 27700). Create an observation window for the data as coppull.ow and a ppp object for plotting:

data(epi.incin); incin.df <- epi.incin
incin.df$status <- factor(incin.df$status, levels = c(0,1), labels = c("Neg", "Pos"))
names(incin.df)[3] <- "Status"

incin.sf <- st_as_sf(incin.df, coords = c("xcoord","ycoord"), remove = FALSE)
st_crs(incin.sf) <- 27700

coppull.ow <- convexhull.xy(x = incin.df[,1], y = incin.df[,2])
coppull.ow <- dilation(coppull.ow, r = 1000)

Create a simple features polygon object from coppull.ow. First we convert coppull.ow to a SpatialPolygonsDataFrame object:

coords <- matrix(c(coppull.ow$bdry[[1]]$x, coppull.ow$bdry[[1]]$y), ncol = 2, byrow = FALSE)
pol <- Polygon(coords, hole = FALSE)
pol <- Polygons(list(pol),1)
pol <- SpatialPolygons(list(pol))
coppull.spdf <- SpatialPolygonsDataFrame(Sr = pol, data = data.frame(id = 1), match.ID = TRUE)

Convert the SpatialPolygonsDataFrame to an sf object and set the coordinate reference system:

coppull.sf <- as(coppull.spdf, "sf")
st_crs(coppull.sf) <- 27700

The mformat function is used to plot the axis labels in kilometres (instead of metres):

mformat <- function(){
   function(x) format(x / 1000, digits = 2)
}
ggplot() +
   theme_bw() +
   geom_sf(data = incin.sf, aes(colour = Status, shape = Status)) +
   geom_sf(data = coppull.sf, fill = "transparent", colour = "black") +
   coord_sf(datum = st_crs(coppull.sf)) +
   scale_colour_manual(values = c("grey","red")) +
   scale_shape_manual(values = c(1,16)) +
   scale_x_continuous(name = "Easting (km)", labels = mformat()) +
   scale_y_continuous(name = "Northing (km)", labels = mformat()) +
    theme(legend.position = c(0.10, 0.15))
\label{fig:spatial02}Point map showing the place of residence of individuals diagnosed with laryngeal cancer (Pos) and lung cancer (Neg), Copull Lancashire, UK, 1972 to 1980.

Point map showing the place of residence of individuals diagnosed with laryngeal cancer (Pos) and lung cancer (Neg), Copull Lancashire, UK, 1972 to 1980.

References

Diggle, PJ. 1990. “A Point Process Modeling Approach to Raised Incidence of a Rare Phenomenon in the Vicinity of a Prespecified Point.” Journal of the Royal Statistical Society Series A 153: 349–62.

Disease Control, Centers for, and Prevention. 2006. Principles of Epidemiology in Public Health Practice: An Introduction to Applied Epidemiology and Biostatistics. Atlanta, Georgia: Centers for Disease Control; Prevention.

Feychting, M, B Osterlund, and A Ahlbom. 1998. “Reduced Cancer Incidence Among the Blind.” Epidemiology 9: 490–94.

epiR/inst/doc/epiR_measures_of_association.html0000644000176200001440000112336614166006704021464 0ustar liggesusers Measures of Association

Measures of Association

Mark Stevenson

2022-01-07

A common task in epidemiology is to quantify the strength of association between exposures (‘risk factors’) and disease outcomes. In this context the term ‘exposure’ is taken to mean a variable whose association with the outcome is to be estimated.

Exposures can be harmful, beneficial or both harmful and beneficial (e.g. if an immunisable disease is circulating, exposure to immunising agents helps most recipients but may harm those who experience adverse reactions). The term ‘outcome’ is used to describe all the possible results that may arise from exposure to a causal factor or from preventive or therapeutic interventions (Porta, Greenland, and Last 2014). In human and animal health an ‘outcome-positive’ individual is an individual who has experienced a given disease of interest.

In this vignette we outline describe how epiR can be used to compute the various measures of association used in epidemiology notably the risk ratio, odds ratio, attributable risk in the exposed, attributable fraction in the exposed, attributable risk in the population and attributable fraction in the population. Examples are provided to demonstrate how the package can be used to deal with exposure-outcome data presented in various formats.

This vignette has been written assuming the reader routinely formats their 2 \(\times\) 2 table data with the outcome status as columns and exposure status as rows. If this is not the case the argument outcome = "as.columns" (the default) can be changed to outcome = "as.rows".

Measures of association strength

The incidence risk ratio

Consider a study where subjects are disease free at the start of the study and all are monitored for disease occurrence for a specified time period. At the start of the study period study subjects are classified according to exposure to a hypothesised risk factor. If both exposure and outcome are binary variables (yes or no) we can present the counts of subjects in each of the four exposure-disease categories in a 2 \(\times\) 2 table.

A 2 by 2 table.
Dis pos Dis pos Total
Exp pos a b a+b
Exp neg c c c+d
Total a+c b+c a+b+c+d

When our data are in this format we can calculate the incidence risk of the outcome in those that were exposed \(R_E+\), the incidence risk in those that were not exposed \(R_{E-}\) and finally the incidence risk in the total study population \(R_{T}\):

A 2 by 2 table with incidence risks calculated for the exposed, the unexposed and the total study population.
Dis pos Dis pos Total Risk
Exp pos a b a+b RE+ = a/(a+b)
Exp neg c c c+d RE- = c/(c+d)
Total a+c b+c a+b+c+d RT = (a+c)/(a+b+c+d)

The incidence risk ratio is then the incidence risk of the outcome in the exposed divided by the incidence risk of the outcome in the unexposed (Figure 1).

The incidence risk ratio.

The incidence risk ratio provides an estimate of how many times more likely exposed individuals are to experience the outcome of interest, compared with non-exposed individuals.

If the incidence risk ratio equals 1, then the risk of the outcome in both the exposed and non-exposed groups are equal. If the incidence risk ratio is greater than 1, then exposure increases the outcome risk with greater departures from 1 indicative of a stronger effect. If the incidence risk ratio is less than 1, exposure reduces the outcome risk and exposure is said to be protective.

The odds ratio — cohort studies

In a cohort study definition of exposure status (exposure-positive, exposure-negative) comes first. Subjects are then followed over time to determine their outcome status (outcome-positive, outcome-negative). The odds of the outcome in the exposed and unexposed populations are calculated as follows:

A 2 by 2 table with the odds of disease calculated for the exposed, the unexposed and the total study population.
Dis pos Dis pos Total Odds
Exp pos a b a+b OE+ = a/b
Exp neg c d c+d OE- = c/d
Total a+c b+d a+b+c+d OT = (a+c)/(b+d)

The odds ratio for a cohort study is then the odds of the outcome in the exposed divided by the odds of the outcome in the unexposed.

The odds ratio — case-control studies

In a case-control study outcome status (disease-positive, disease-negative) is defined first. The history provided by each study subject then provides information about exposure status. For case-control studies, instead of talking about the odds of disease in the exposed and unexposed groups (as we did when we were working with data from a cohort study) we talk about the odds of exposure in the case and control groups.

A 2 by 2 table with the odds of exposure calculated for cases, controls and the total study population.
Case Control Total
Exp pos a b a+b
Exp neg c d c+d
Total a+c b+d a+b+c+d
Odds OD+ = a/c OD- = b/d OT = (a+b)/(c+d)

The odds ratio is defined as the odds of exposure in the cases (\(O_{D+}\)) divided by the odds of exposure in the controls (\(O_{D-}\)). Note that the numeric estimate of the odds ratio is exactly the same as that calculated for a cohort study. The expression of the result is the only thing that differs. In a cohort study we talk about the odds of disease being \(x\) times greater (or less) in the exposed, compared with the unexposed. In a case-control study we talk about the odds of exposure being \(x\) times greater (or less) in cases, compared with controls.

Measures of effect in the exposed

The attributable risk in the exposed

The attributable risk is defined as the increase or decrease in the risk of the outcome in the exposed that is attributable to exposure (Figure 2). Attributable risk (unlike the incidence risk ratio) provides a measure of the absolute frequency of the outcome associated with exposure.

The attributable risk in the exposed.

A useful way of expressing attributable risk in a clinical setting is in terms of the number needed to treat, NNT. NNT equals the inverse of the attributable risk. Depending on the outcome of interest we often elect to use different labels for NNT. When dealing with an outcome that is ‘desirable’ (e.g. treatment success) we call NNT the number needed to treat for benefit, NNTB. NNTB equals the number of subjects who would have to be exposed to result in a single (desirable) outcome. When dealing with an outcome that is ‘undesirable’ (e.g. death) we call NNT the number needed to treat for harm, NNTH. NNTH equals the number of subjects who would have to be exposed to result in a single (undesirable) outcome.

The attributable fraction in the exposed

The attributable fraction in the exposed is the proportion of outcome-positive subjects in the exposed group that is due to exposure (Figure 3).

The attributable fraction in the exposed.

Measures of effect in the population

The attributable risk in the population

The population attributable risk is the increase or decrease in incidence risk of the outcome in the study population that is attributable to exposure (Figure 4).

The attributable risk in the population

The attributable fraction in the population

The population attributable fraction (also known as the aetiologic fraction) is the proportion of outcome-positive subjects in the study population that is due to the exposure (Figure 5).

The attributable fraction in the population

On the condition that the exposure of interest is a cause of the disease outcome, the population attributable fraction represents the proportional reduction in average disease risk over a specified period of time that would be achieved by eliminating the exposure of interest while the distribution of other risk factors in the population remained unchanged.

For this reason, PAFs are particularly useful to guide policy makers when planning public health interventions. If you’re going to use PAFs as a means for informing policy, make sure that: (1) the exposure of interest is causally related to the outcome, and (2) the exposure of interest is something amenable to intervention.

Theory to practice: Calculating measures of association using R

Direct entry of 2 by 2 table contingency table cell frequencies

Firstly, a 2 \(\times\) 2 table can be created by listing the contingency table cell frequencies in vector format. Take the following example.

A cross sectional study investigating the relationship between dry cat food (DCF) and feline lower urinary tract disease (FLUTD) was conducted (Willeberg 1977). Counts of individuals in each group were as follows. DCF-exposed cats (cases, non-cases) 13, 2163. Non DCF-exposed cats (cases, non-cases) 5, 3349. We can enter these data directly into R as a vector of length four. Check that your counts have been entered in the correct order by viewing the data as a matrix.

dat.v01 <- c(13,2163,5,3349); dat.v01
#> [1]   13 2163    5 3349

# View the data in the usual 2 by 2 table format:
matrix(dat.v01, nrow = 2, byrow = TRUE)
#>      [,1] [,2]
#> [1,]   13 2163
#> [2,]    5 3349

Calculate the prevalence ratio, odds ratio, attributable prevalence in the exposed, attributable fraction in the exposed, attributable prevalence in the population and the attributable fraction in the population using function epi.2by2. Note that we use the term prevalence ratio (instead of incidence risk ratio) here because we’re dealing with data from a cross-sectional study — the frequency of disease is expressed as a prevalence, not an incidence.

library(epiR)

epi.2by2(dat = dat.v01, method = "cross.sectional", conf.level = 0.95, units = 100, 
   interpret = FALSE, outcome = "as.columns")
#>              Outcome +    Outcome -      Total        Prevalence *        Odds
#> Exposed +           13         2163       2176               0.597     0.00601
#> Exposed -            5         3349       3354               0.149     0.00149
#> Total               18         5512       5530               0.325     0.00327
#> 
#> Point estimates and 95% CIs:
#> -------------------------------------------------------------------
#> Prevalence ratio                               4.01 (1.43, 11.23)
#> Odds ratio                                     4.03 (1.43, 11.31)
#> Attrib prevalence in the exposed *             0.45 (0.10, 0.80)
#> Attrib fraction in the exposed (%)            75.05 (30.11, 91.09)
#> Attrib prevalence in the population *          0.18 (-0.02, 0.38)
#> Attrib fraction in the population (%)         54.20 (3.61, 78.24)
#> -------------------------------------------------------------------
#> Uncorrected chi2 test that OR = 1: chi2(1) = 8.177 Pr>chi2 = 0.004
#> Fisher exact test that OR = 1: Pr>chi2 = 0.006
#>  Wald confidence limits
#>  CI: confidence interval
#>  * Outcomes per 100 population units

The prevalence of FLUTD in DCF exposed cats was 4.01 (95% CI 1.43 to 11.23) times greater than the prevalence of FLUTD in non-DCF exposed cats.

In DCF exposed cats, 75% of FLUTD was attributable to DCF (95% CI 30% to 91%). Fifty-four percent of FLUTD cases in this cat population were attributable to DCF (95% CI 4% to 78%).

Need a hand getting the correct wording to explain each of the listed measures of association and measures of effect? Set interpret = TRUE in epi.2by2:

epi.2by2(dat = dat.v01, method = "cross.sectional", conf.level = 0.95, units = 100, 
   interpret = TRUE, outcome = "as.columns")
#>              Outcome +    Outcome -      Total        Prevalence *        Odds
#> Exposed +           13         2163       2176               0.597     0.00601
#> Exposed -            5         3349       3354               0.149     0.00149
#> Total               18         5512       5530               0.325     0.00327
#> 
#> Point estimates and 95% CIs:
#> -------------------------------------------------------------------
#> Prevalence ratio                               4.01 (1.43, 11.23)
#> Odds ratio                                     4.03 (1.43, 11.31)
#> Attrib prevalence in the exposed *             0.45 (0.10, 0.80)
#> Attrib fraction in the exposed (%)            75.05 (30.11, 91.09)
#> Attrib prevalence in the population *          0.18 (-0.02, 0.38)
#> Attrib fraction in the population (%)         54.20 (3.61, 78.24)
#> -------------------------------------------------------------------
#> Uncorrected chi2 test that OR = 1: chi2(1) = 8.177 Pr>chi2 = 0.004
#> Fisher exact test that OR = 1: Pr>chi2 = 0.006
#>  Wald confidence limits
#>  CI: confidence interval
#>  * Outcomes per 100 population units 
#> 
#>  Measures of association strength:
#>  The outcome prevalence among the exposed was 4.01 (95% CI 1.43 to 11.23) times greater than the outcome prevalence among the unexposed. 
#>  
#>  The outcome odds among the exposed was 4.03 (95% CI 1.43 to 11.31) times greater than the outcome odds among the unexposed. 
#> 
#>  Measures of effect in the exposed:
#>  Exposure changed the outcome prevalence in the exposed by 0.45 (95% CI 0.1 to 0.8) per 100 population units. 75.2% of outcomes in the exposed were attributable to exposure (95% CI 25.7% to 93.1%). 
#> 
#>  Number needed to treat for benefit (NNTB) and harm (NNTH):
#>  The number needed to treat for one subject to benefit (NNTB) is 223 (95% CI 125 to 1008). 
#> 
#>  Measures of effect in the population:
#>  Exposure changed the outcome prevalence in the population by 0.18 (95% CI -0.02 to 0.38) per 100 population units. 54.3% of outcomes in the population were attributable to exposure (95% CI 32.5% to 75%).

Data frame with one row per observation

Here we provide examples where you have exposure status and outcome status listed for each member of your study population. There are two options for contingency table preparation in this situation: (1) using base R’s table function; or (2) using the tidyverse package.

For this example we use the low infant birth weight data presented by Hosmer and Lemeshow (2000) and available in the MASS package in R. The birthwt data frame has 189 rows and 10 columns. The data were collected at Baystate Medical Center, Springfield, Massachusetts USA during 1986.

Two by two table preparation using the table function in base R

library(MASS)
#> 
#> Attaching package: 'MASS'
#> The following object is masked from 'package:spatstat.geom':
#> 
#>     area

# Load and view the data:
dat.df02 <- birthwt; head(dat.df02)
#>    low age lwt race smoke ptl ht ui ftv  bwt
#> 85   0  19 182    2     0   0  0  1   0 2523
#> 86   0  33 155    3     0   0  0  0   3 2551
#> 87   0  20 105    1     1   0  0  0   1 2557
#> 88   0  21 108    1     1   0  0  1   2 2594
#> 89   0  18 107    1     1   0  0  1   0 2600
#> 91   0  21 124    3     0   0  0  0   0 2622

Each row of this data set represents data for one mother. We’re interested in the association between smoke (the mother’s smoking status during pregnancy) and low (delivery of a baby less than 2.5 kg bodyweight).

Its important that the table you present to epi.2by2 is in the correct format: Outcome positives in the first column, outcome negatives in the second column, exposure positives in the first row and exposure negatives in the second row. If we run the table function on the bwt data the output table is in the wrong format:

dat.tab02 <- table(dat.df02$smoke, dat.df02$low, dnn = c("Smoke", "Low BW")); dat.tab02
#>      Low BW
#> Smoke  0  1
#>     0 86 29
#>     1 44 30

There are two ways to fix this problem. The quick fix is to simply ask R to switch the order of the rows and columns in the output table:

dat.tab02 <- table(dat.df02$smoke, dat.df02$low, dnn = c("Smoke", "Low BW")); dat.tab02
#>      Low BW
#> Smoke  0  1
#>     0 86 29
#>     1 44 30
dat.tab02 <- dat.tab02[2:1,2:1]; dat.tab02
#>      Low BW
#> Smoke  1  0
#>     1 30 44
#>     0 29 86

The second approach is to set the exposure variable and the outcome variable as a factor and to define the levels of each factor using levels = c(1,0):

dat.df02$low <- factor(dat.df02$low, levels = c(1,0))
dat.df02$smoke <- factor(dat.df02$smoke, levels = c(1,0))
dat.df02$race <- factor(dat.df02$race, levels = c(1,2,3))

dat.tab02 <- table(dat.df02$smoke, dat.df02$low, dnn = c("Smoke", "Low BW")); dat.tab02
#>      Low BW
#> Smoke  1  0
#>     1 30 44
#>     0 29 86

Now compute the odds ratio for smoking and delivery of a low birth weight baby:

dat.epi02 <- epi.2by2(dat = dat.tab02, method = "cohort.count", conf.level = 0.95, 
   units = 100, interpret = FALSE, outcome = "as.columns")
dat.epi02
#>              Outcome +    Outcome -      Total        Inc risk *        Odds
#> Exposed +           30           44         74              40.5       0.682
#> Exposed -           29           86        115              25.2       0.337
#> Total               59          130        189              31.2       0.454
#> 
#> Point estimates and 95% CIs:
#> -------------------------------------------------------------------
#> Inc risk ratio                                 1.61 (1.06, 2.44)
#> Odds ratio                                     2.02 (1.08, 3.78)
#> Attrib risk in the exposed *                   15.32 (1.61, 29.04)
#> Attrib fraction in the exposed (%)            37.80 (5.47, 59.07)
#> Attrib risk in the population *                6.00 (-4.33, 16.33)
#> Attrib fraction in the population (%)         19.22 (-0.21, 34.88)
#> -------------------------------------------------------------------
#> Uncorrected chi2 test that OR = 1: chi2(1) = 4.924 Pr>chi2 = 0.026
#> Fisher exact test that OR = 1: Pr>chi2 = 0.036
#>  Wald confidence limits
#>  CI: confidence interval
#>  * Outcomes per 100 population units

The odds of having a low birth weight child for smokers is 2.02 (95% CI 1.08 to 3.78) times greater than the odds of having a low birth weight child for non-smokers.

Two by two table preparation using tidyverse

The tidyverse package can also be used to prepare data in the required format:

library(tidyverse)
#> Registered S3 method overwritten by 'cli':
#>   method     from         
#>   print.boxx spatstat.geom
#> -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
#> v tibble  3.1.5     v dplyr   1.0.7
#> v tidyr   1.1.4     v stringr 1.4.0
#> v readr   2.0.2     v forcats 0.5.1
#> v purrr   0.3.4
#> -- Conflicts ------------------------------------------ tidyverse_conflicts() --
#> x dplyr::arrange()    masks plyr::arrange()
#> x readr::col_factor() masks scales::col_factor()
#> x dplyr::collapse()   masks nlme::collapse()
#> x purrr::compact()    masks plyr::compact()
#> x dplyr::count()      masks plyr::count()
#> x purrr::discard()    masks scales::discard()
#> x dplyr::failwith()   masks plyr::failwith()
#> x dplyr::filter()     masks stats::filter()
#> x dplyr::group_rows() masks kableExtra::group_rows()
#> x dplyr::id()         masks plyr::id()
#> x dplyr::lag()        masks stats::lag()
#> x dplyr::mutate()     masks plyr::mutate()
#> x dplyr::rename()     masks plyr::rename()
#> x dplyr::select()     masks MASS::select()
#> x dplyr::summarise()  masks plyr::summarise()
#> x dplyr::summarize()  masks plyr::summarize()

dat.df03 <- birthwt; head(dat.df03)
#>    low age lwt race smoke ptl ht ui ftv  bwt
#> 85   0  19 182    2     0   0  0  1   0 2523
#> 86   0  33 155    3     0   0  0  0   3 2551
#> 87   0  20 105    1     1   0  0  0   1 2557
#> 88   0  21 108    1     1   0  0  1   2 2594
#> 89   0  18 107    1     1   0  0  1   0 2600
#> 91   0  21 124    3     0   0  0  0   0 2622

# Here we set the factor levels and tabulate the data in a single call using pipe operators:
dat.tab03 <- dat.df03 %>%
  mutate(low = factor(low, levels = c(1,0), labels = c("yes","no"))) %>%
  mutate(smoke = factor(smoke, levels = c(1,0), labels = c("yes","no"))) %>%
  group_by(smoke, low) %>%
  summarise(n = n()) 
#> `summarise()` has grouped output by 'smoke'. You can override using the `.groups` argument.

# View the data:
dat.tab03
#> # A tibble: 4 x 3
#> # Groups:   smoke [2]
#>   smoke low       n
#>   <fct> <fct> <int>
#> 1 yes   yes      30
#> 2 yes   no       44
#> 3 no    yes      29
#> 4 no    no       86

## View the data in conventional 2 by 2 table format:
pivot_wider(dat.tab03, id_cols = c(smoke), 
   names_from = low, values_from = n)
#> # A tibble: 2 x 3
#> # Groups:   smoke [2]
#>   smoke   yes    no
#>   <fct> <int> <int>
#> 1 yes      30    44
#> 2 no       29    86

As before, compute the odds ratio for smoking and delivery of a low birth weight baby:

dat.epi03 <- epi.2by2(dat = dat.tab03, method = "cohort.count", 
   conf.level = 0.95, units = 100, interpret = FALSE, outcome = "as.columns")
dat.epi03
#>              Outcome +    Outcome -      Total        Inc risk *        Odds
#> Exposed +           30           44         74              40.5       0.682
#> Exposed -           29           86        115              25.2       0.337
#> Total               59          130        189              31.2       0.454
#> 
#> Point estimates and 95% CIs:
#> -------------------------------------------------------------------
#> Inc risk ratio                                 1.61 (1.06, 2.44)
#> Odds ratio                                     2.02 (1.08, 3.78)
#> Attrib risk in the exposed *                   15.32 (1.61, 29.04)
#> Attrib fraction in the exposed (%)            37.80 (5.47, 59.07)
#> Attrib risk in the population *                6.00 (-4.33, 16.33)
#> Attrib fraction in the population (%)         19.22 (-0.21, 34.88)
#> -------------------------------------------------------------------
#> Uncorrected chi2 test that OR = 1: chi2(1) = 4.924 Pr>chi2 = 0.026
#> Fisher exact test that OR = 1: Pr>chi2 = 0.036
#>  Wald confidence limits
#>  CI: confidence interval
#>  * Outcomes per 100 population units

The odds of having a low birth weight child for smokers is 2.02 (95% CI 1.08 to 3.78) times greater than the odds of having a low birth weight child for non-smokers.

Confounding

We’re concerned that the mother’s race may confound the association between low birth weight and delivery of a low birth weight baby so we’ll stratify the data by race and compute the Mantel-Haenszel adjusted odds ratio. As before, our tables can be prepared using either base R or tidyverse.

Stratified two by two table preparation using the table function in base R

dat.df04 <- birthwt; head(dat.df04)
#>    low age lwt race smoke ptl ht ui ftv  bwt
#> 85   0  19 182    2     0   0  0  1   0 2523
#> 86   0  33 155    3     0   0  0  0   3 2551
#> 87   0  20 105    1     1   0  0  0   1 2557
#> 88   0  21 108    1     1   0  0  1   2 2594
#> 89   0  18 107    1     1   0  0  1   0 2600
#> 91   0  21 124    3     0   0  0  0   0 2622

dat.tab04 <- table(dat.df04$smoke, dat.df04$low, dat.df04$race, 
   dnn = c("Smoke", "Low BW", "Race")); dat.tab04
#> , , Race = 1
#> 
#>      Low BW
#> Smoke  0  1
#>     0 40  4
#>     1 33 19
#> 
#> , , Race = 2
#> 
#>      Low BW
#> Smoke  0  1
#>     0 11  5
#>     1  4  6
#> 
#> , , Race = 3
#> 
#>      Low BW
#> Smoke  0  1
#>     0 35 20
#>     1  7  5

Compute the Mantel-Haenszel adjusted odds ratio for smoking and delivery of a low birth weight baby, adjusting for the effect of race. Function epi.2by2 automatically calculates the Mantel-Haenszel odds ratio and risk ratio when it is presented with stratified contingency tables.

dat.epi04 <- epi.2by2(dat = dat.tab04, method = "cohort.count", 
   conf.level = 0.95, units = 100, interpret = FALSE, outcome = "as.columns")
dat.epi04
#>              Outcome +    Outcome -      Total        Inc risk *        Odds
#> Exposed +           86           29        115              74.8        2.97
#> Exposed -           44           30         74              59.5        1.47
#> Total              130           59        189              68.8        2.20
#> 
#> 
#> Point estimates and 95% CIs:
#> -------------------------------------------------------------------
#> Inc risk ratio (crude)                         1.26 (1.01, 1.56)
#> Inc risk ratio (M-H)                           1.38 (1.12, 1.70)
#> Inc risk ratio (crude:M-H)                     0.91
#> Odds ratio (crude)                             2.02 (1.08, 3.78)
#> Odds ratio (M-H)                               3.09 (1.49, 6.39)
#> Odds ratio (crude:M-H)                         0.66
#> Attrib risk in the exposed (crude) *           15.32 (1.61, 29.04)
#> Attrib risk in the exposed (M-H) *             22.17 (2.55, 41.79)
#> Attrib risk (crude:M-H)                        0.69
#> -------------------------------------------------------------------
#>  M-H test of homogeneity of PRs: chi2(2) = 1.160 Pr>chi2 = 0.560
#>  M-H test of homogeneity of ORs: chi2(2) = 2.800 Pr>chi2 = 0.247
#>  Test that M-H adjusted OR = 1:  chi2(1) = 9.413 Pr>chi2 = 0.001
#>  Wald confidence limits
#>  M-H: Mantel-Haenszel; CI: confidence interval
#>  * Outcomes per 100 population units

The Mantel-Haenszel test of homogeneity of the strata odds ratios is not significant (chi square test statistic 2.800; df 2; p-value = 0.25). We accept the null hypothesis and conclude that the odds ratios for each strata of race are the same. Because the stratum specific odds ratios are the same it is appropriate to compute a Mantel-Haenszel adjusted odds ratio.

After accounting for the confounding effect of race, the odds of having a low birth weight child for smokers was 3.09 (95% CI 1.49 to 6.39) times that of non-smokers.

Stratified two by two table preparation using tidyverse

dat.df05 <- birthwt; head(dat.df05)
#>    low age lwt race smoke ptl ht ui ftv  bwt
#> 85   0  19 182    2     0   0  0  1   0 2523
#> 86   0  33 155    3     0   0  0  0   3 2551
#> 87   0  20 105    1     1   0  0  0   1 2557
#> 88   0  21 108    1     1   0  0  1   2 2594
#> 89   0  18 107    1     1   0  0  1   0 2600
#> 91   0  21 124    3     0   0  0  0   0 2622

dat.tab05 <- dat.df05 %>%
  mutate(low = factor(low, levels = c(1,0), labels = c("yes","no"))) %>%
  mutate(smoke = factor(smoke, levels = c(1,0), labels = c("yes","no"))) %>%
  mutate(race = factor(race)) %>%
  group_by(race, smoke, low) %>%
  summarise(n = n()) 
#> `summarise()` has grouped output by 'race', 'smoke'. You can override using the `.groups` argument.
dat.tab05
#> # A tibble: 12 x 4
#> # Groups:   race, smoke [6]
#>   race  smoke low       n
#>   <fct> <fct> <fct> <int>
#> 1 1     yes   yes      19
#> 2 1     yes   no       33
#> 3 1     no    yes       4
#> 4 1     no    no       40
#> # ... with 8 more rows

## View the data in conventional 2 by 2 table format:
pivot_wider(dat.tab05, id_cols = c(race, smoke), 
   names_from = low, values_from = n)
#> # A tibble: 6 x 4
#> # Groups:   race, smoke [6]
#>   race  smoke   yes    no
#>   <fct> <fct> <int> <int>
#> 1 1     yes      19    33
#> 2 1     no        4    40
#> 3 2     yes       6     4
#> 4 2     no        5    11
#> # ... with 2 more rows

Compute the Mantel-Haenszel adjusted odds ratio for smoking and delivery of a low birth weight baby, adjusting for the effect of race:

dat.epi05 <- epi.2by2(dat = dat.tab05, method = "cohort.count", 
   conf.level = 0.95, units = 100, interpret = FALSE, outcome = "as.columns")
dat.epi05
#>              Outcome +    Outcome -      Total        Inc risk *        Odds
#> Exposed +           30           44         74              40.5       0.682
#> Exposed -           29           86        115              25.2       0.337
#> Total               59          130        189              31.2       0.454
#> 
#> 
#> Point estimates and 95% CIs:
#> -------------------------------------------------------------------
#> Inc risk ratio (crude)                         1.61 (1.06, 2.44)
#> Inc risk ratio (M-H)                           2.15 (1.29, 3.58)
#> Inc risk ratio (crude:M-H)                     0.75
#> Odds ratio (crude)                             2.02 (1.08, 3.78)
#> Odds ratio (M-H)                               3.09 (1.49, 6.39)
#> Odds ratio (crude:M-H)                         0.66
#> Attrib risk in the exposed (crude) *           15.32 (1.61, 29.04)
#> Attrib risk in the exposed (M-H) *             22.17 (1.41, 42.94)
#> Attrib risk (crude:M-H)                        0.69
#> -------------------------------------------------------------------
#>  M-H test of homogeneity of PRs: chi2(2) = 3.862 Pr>chi2 = 0.145
#>  M-H test of homogeneity of ORs: chi2(2) = 2.800 Pr>chi2 = 0.247
#>  Test that M-H adjusted OR = 1:  chi2(1) = 9.413 Pr>chi2 = 0.001
#>  Wald confidence limits
#>  M-H: Mantel-Haenszel; CI: confidence interval
#>  * Outcomes per 100 population units

The Mantel-Haenszel test of homogeneity of the strata odds ratios is not significant (chi square test statistic 2.800; df 2; p-value = 0.25) so we accept the null hypothesis and conclude that the odds ratios for each strata of race are the same. After accounting for the confounding effect of race, the odds of having a low birth weight child for smokers is 3.09 (95% CI 1.49 to 6.39) times that of non-smokers.

Plot the individual strata odds ratios and the Mantel-Haenszel summary odds ratio as an error bar plot to better understand how the Mantel-Haenszel adjusted odds ratio relates to the individual strata odds ratios:

library(ggplot2); library(scales)

nstrata <- 1:length(unique(dat.tab05$race))
strata.lab <- paste("Strata ", nstrata, sep = "")
y.at <- c(nstrata, max(nstrata) + 1)
y.lab <- c("M-H", strata.lab)
x.at <- c(0.25,0.5,1,2,4,8,16,32)

or.p <- c(dat.epi05$massoc.detail$OR.mh$est, 
   dat.epi05$massoc.detail$OR.strata.cfield$est)
or.l <- c(dat.epi05$massoc.detail$OR.mh$lower, 
   dat.epi05$massoc.detail$OR.strata.cfield$lower)
or.u <- c(dat.epi05$massoc.detail$OR.mh$upper, 
   dat.epi05$massoc.detail$OR.strata.cfield$upper)
gdat.df05 <- data.frame(y.at, y.lab, or.p, or.l, or.u)

ggplot(data = gdat.df05, aes(x = or.p, y = y.at)) +
   geom_point() + 
   geom_errorbarh(aes(xmax = or.l, xmin = or.u, height = 0.2)) + 
   labs(x = "Odds ratio", y = "Strata") + 
   scale_x_continuous(trans = log2_trans(), breaks = x.at, 
      limits = c(0.25,32)) + 
   scale_y_continuous(breaks = y.at, labels = y.lab) + 
   geom_vline(xintercept = 1, lwd = 1) + 
   coord_fixed(ratio = 0.75 / 1) + 
   theme(axis.title.y = element_text(vjust = 0))

References

Altman, DG. 1998. “Confidence Intervals for the Number Needed to Treat.” British Medical Journal 317: 1309–12.

Grimes, DA, and KF Schulz. 2008. “Making Sense of Odds and Odds Ratios.” Obstetrics and Gynecology 111: 423–26.

Hosmer, DW, and S Lemeshow. 2000. Applied Logistic Regression. London: Jon Wiley; Sons Inc.

Kuritz, SJ, JR Landis, and GG Koch. 1988. “A general overview of Mantel-Haenszel methods: Applications and recent developments.” Annual Reviews in Public Health 9: 123–60.

Porta, M, S Greenland, and JM Last. 2014. A Dictionary of Epidemiology. London: Oxford University Press.

Prasad, K, R Jaeschke, P Wyer, S Keitz, and G Guyatt. 2007. “Tips for teachers of evidence-based medicine: Understanding odds ratios and their relationship to risk ratios.” Journal of General Internal Medicine 23 (5): 635–40.

Siegerink, B, and JL Rohmann. 2018. “Impact of your results: Beyond the relative risk.” Research and Practice in Thrombosis and Haemostasis 2: 653–57.

Willeberg, P. 1977. “Animal disease information processing: Epidemiologic analyses of the feline urologic syndrome.” Acta Veterinaria Scandinavica 64: 1–48.

epiR/inst/doc/epiR_RSurveillance.Rmd0000644000176200001440000002210514100646226017075 0ustar liggesusers--- title: "epiR - RSurveillance function mapping" author: "Evan Sergeant and Mark Stevenson" date: "`r Sys.Date()`" link-citations: yes output: knitr:::html_vignette: toc: true vignette: > %\VignetteEngine{knitr::rmarkdown} %\VignetteIndexEntry{epiR-RSurveillance function mapping} %\usepackage[utf8]{inputenc} --- \setmainfont{Calibri Light} ```{r, echo = FALSE, message = FALSE} # If you want to create a PDF document paste the following after line 9 above: # pdf_document: # toc: true # highlight: tango # number_sections: no # latex_engine: xelatex # header-includes: # - \usepackage{fontspec} knitr::opts_chunk$set(collapse = TRUE, comment = "#>") options(tibble.print_min = 4L, tibble.print_max = 4L) ``` The following tables lists each of the functions in `RSurveillance` and their equivalent in `epiR`. ## Representative sampling ### Sample size estimation ```{r ssrs.tab, echo=FALSE, message=FALSE, warnings=FALSE, results='asis'} library(pander) panderOptions('table.split.table', Inf) set.caption("Functions to estimate sample size using representative population sampling data.") ssrs.tab <- " Sampling | Outcome | RSurveillance | epiR Representative | Prob disease freedom | `n.pfree` | `rsu.sspfree.rs` Representative | SSe | `n.freedom` | `rsu.sssep.rs` Two stage representative | SSe | `n.2stage` | `rsu.sssep.rs2st` Representative | SSe | `n.freecalc` | `rsu.sssep.rsfreecalc` Pooled representative | SSe | `n.pooled` | `rsu.sssep.rspool`" ssrs.df <- read.delim(textConnection(ssrs.tab), header = FALSE, sep = "|", strip.white = TRUE, stringsAsFactors = FALSE) names(ssrs.df) <- unname(as.list(ssrs.df[1,])) # put headers on ssrs.df <- ssrs.df[-1,] # remove first row row.names(ssrs.df) <- NULL pander(ssrs.df, style = 'rmarkdown') ``` ### Estimation of surveillance system sensitivity ```{r seprs.tab, echo=FALSE, message=FALSE, warnings=FALSE, results='asis'} set.caption("Functions to estimate surveillance system sensitivity (SSe) using representative population sampling data.") seprs.tab <- " Sampling | Outcome | RSurveillance | epiR Representative | SSe | `sep.binom` | `rsu.sep.rs` Representative | SSe | `sep.hypergeo` | `rsu.sep.rs` Representative | SSe | `sep` | `rsu.sep.rs` Two stage representative | SSe | `sep.sys` | `rsu.sep.rs2st` Representative | SSe | `sse.combined` | `rsu.sep.rsmult` Representative | SSe | `sep.freecalc` | `rsu.sep.rsfreecalc` Representative | SSe | `sep.binom.imperfect`| `rsu.sep.rsfreecalc` Pooled representative | SSe | `sep.pooled` | `rsu.sep.rspool` Representative | SSe | `sep.var.se` | `rsu.sep.rsvarse` Representative | SSp | `spp` | `rsu.spp.rs` Representative | SSp | `sph.hp` | `rsu.spp.rs`" seprs.df <- read.delim(textConnection(seprs.tab), header = FALSE, sep = "|", strip.white = TRUE, stringsAsFactors = FALSE) names(seprs.df) <- unname(as.list(seprs.df[1,])) # put headers on seprs.df <- seprs.df[-1,] # remove first row row.names(seprs.df) <- NULL pander(seprs.df, style = 'rmarkdown') ``` ### Estimation of the probability of disease freedom ```{r pfreers.tab, echo=FALSE, message=FALSE, warnings=FALSE, results='asis'} set.caption("Functions to estimate the probability of disease freedom using representative population sampling data.") pfreers.tab <- " Sampling | Outcome | RSurveillance | epiR Representative | Prob disease of freedom | `pfree.1` | `rsu.pfree.rs` Representative | Prob disease of freedom | `pfree.calc` | `rsu.pfree.rs` Representative | Equilibrium prob of disease freedom | `pfree.equ` | `rsu.pfree.equ`" pfreers.df <- read.delim(textConnection(pfreers.tab), header = FALSE, sep = "|", strip.white = TRUE, stringsAsFactors = FALSE) names(pfreers.df) <- unname(as.list(pfreers.df[1,])) # put headers on pfreers.df <- pfreers.df[-1,] # remove first row row.names(pfreers.df) <- NULL pander(pfreers.df, style = 'rmarkdown') ``` ## Risk based sampling ### Sample size estimation ```{r ssrb.tab, echo=FALSE, message=FALSE, warnings=FALSE, results='asis'} set.caption("Functions to estimate sample size using risk based sampling data.") ssrb.tab <- " Sampling | Outcome | RSurveillance | epiR Risk-based | SSe | `n.rb` | `rsu.sssep.rbsrg` Risk-based | SSe | `n.rb.varse` | `rsu.sssep.rbmrg` Risk-based | SSe | `n.rb.2stage.1` | `rsu.sssep.rb2st1rf` Risk-based | SSe | `n.rb.2stage.2` | `rsu.sssep.rb2st2rf`" ssrb.df <- read.delim(textConnection(ssrb.tab), header = FALSE, sep = "|", strip.white = TRUE, stringsAsFactors = FALSE) names(ssrb.df) <- unname(as.list(ssrb.df[1,])) # put headers on ssrb.df <- ssrb.df[-1,] # remove first row row.names(ssrb.df) <- NULL pander(ssrb.df, style = 'rmarkdown') ``` ### Estimation of surveillance system sensitivity ```{r seprb.tab, echo=FALSE, message=FALSE, warnings=FALSE, results='asis'} set.caption("Functions to estimate surveillance system sensitivity (SSe) using risk based sampling data.") seprb.tab <- " Sampling | Outcome | RSurveillance | epiR Risk-based | SSe | `sep.rb.bin.varse` | `rsu.sep.rb` Risk-based | SSe | `sep.rb.bin` | `rsu.sep.rb1rf` Risk-based | SSe | `sep.rb.hypergeo` | `rsu.sep.rb1rf` Risk-based | SSe | `sep.rb2.bin` | `rsu.sep.rb2rf` Risk-based | SSe | `sep.rb2.hypergeo` | `rsu.sep.rb2rf` Risk-based | SSe | `sep.rb.hypergeo.varse` | `rsu.sep.rbvarse` Risk-based | SSe | `sse.rb2stage` | `rsu.sep.rb2stage`" seprb.df <- read.delim(textConnection(seprb.tab), header = FALSE, sep = "|", strip.white = TRUE, stringsAsFactors = FALSE) names(seprb.df) <- unname(as.list(seprb.df[1,])) # put headers on seprb.df <- seprb.df[-1,] # remove first row row.names(seprb.df) <- NULL pander(seprb.df, style = 'rmarkdown') ``` ## Census data ### Estimation of surveillance system sensitivity ```{r sepcen.tab, echo=FALSE, message=FALSE, warnings=FALSE, results='asis'} set.caption("Functions to estimate surveillance system sensitivity (SSe) using census data.") sepcen.tab <- " Sampling | Outcome | RSurveillance | epiR Risk-based | SSe | `sep.exact` | `rsu.sep.cens`" sepcen.df <- read.delim(textConnection(sepcen.tab), header = FALSE, sep = "|", strip.white = TRUE, stringsAsFactors = FALSE) names(sepcen.df) <- unname(as.list(sepcen.df[1,])) # put headers on sepcen.df <- sepcen.df[-1,] # remove first row row.names(sepcen.df) <- NULL pander(sepcen.df, style = 'rmarkdown') ``` ## Passive surveillance data ```{r seppas.tab, echo=FALSE, message=FALSE, warnings=FALSE, results='asis'} set.caption("Functions to estimate surveillance system sensitivity (SSe) using passively collected surveillance data.") sepcen.tab <- " Sampling | Outcome | RSurveillance | epiR Risk-based | SSe | `sep.passive` | `rsu.sep.pass`" seppas.df <- read.delim(textConnection(sepcen.tab), header = FALSE, sep = "|", strip.white = TRUE, stringsAsFactors = FALSE) names(seppas.df) <- unname(as.list(seppas.df[1,])) # put headers on seppas.df <- seppas.df[-1,] # remove first row row.names(seppas.df) <- NULL pander(seppas.df, style = 'rmarkdown') ``` ## Miscellaneous functions ```{r misc.tab, echo=FALSE, message=FALSE, warnings=FALSE, results='asis'} set.caption("Miscellaneous functions.") misc.tab <- " Details | RSurveillance | epiR Adjusted risk | `adj.risk` | `rsu.adjrisk` Adjusted risk | `adj.risk.sim` | `rsu.adjrisk` Series test interpretation, Se | `se.series` | `rsu.dxtest` Parallel test interpretation, Se | `se.parallel` | `rsu.dxtest` Series test interpretation, Sp | `sp.series` | `rsu.dxtest` Parallel test interpretation, Sp | `sp.parallel` | `rsu.dxtest` Effective probability of infection | `epi.calc` | `rsu.epinf` Design prevalence back calculation | `pstar.calc` | `rsu.pstar` Prob disease is less than design prevalence | | `rsu.sep`" misc.df <- read.delim(textConnection(misc.tab), header = FALSE, sep = "|", strip.white = TRUE, stringsAsFactors = FALSE) names(misc.df) <- unname(as.list(misc.df[1,])) # put headers on misc.df <- misc.df[-1,] # remove first row row.names(misc.df) <- NULL pander(misc.df, style = 'rmarkdown') ``` epiR/inst/doc/epiR_sample_size.html0000644000176200001440000010201014166006705017052 0ustar liggesusers Sample Size Calculations Using epiR

Sample Size Calculations Using epiR

Mark Stevenson

2022-01-07

Prevalence estimation

A review of sample size calculations in (veterinary) epidemiological research is provided by Stevenson (2021).

The expected seroprevalence of brucellosis in a population of cattle is thought to be in the order of 15%. How many cattle need to be sampled and tested to be 95% certain that our seroprevalence estimate is within 20% of the true population value. That is, from 15 - (0.20 \(\times\) 0.15) to 15 + (0.20 \(\times\) 0.15 = 0.03) i.e. from 12% to 18%. Assume the test you will use has perfect sensitivity and specificity. This formula requires the population size to be specified so we set N to a large number, 1,000,000:

library(epiR)
epi.sssimpleestb(N = 1E+06, Py = 0.15, epsilon = 0.20, error = "relative", se = 1, sp = 1, nfractional = FALSE, conf.level = 0.95)
#> [1] 545

A total of 545 cows are required to meet the requirements of the study.

Prospective cohort study

A prospective cohort study of dry food diets and feline lower urinary tract disease (FLUTD) in mature male cats is planned. A sample of cats will be selected at random from the population of cats in a given area and owners who agree to participate in the study will be asked to complete a questionnaire at the time of enrollment. Cats enrolled into the study will be followed for at least 5 years to identify incident cases of FLUTD. The investigators would like to be 0.80 certain of being able to detect when the risk ratio of FLUTD is 1.4 for cats habitually fed a dry food diet, using a 0.05 significance test. Previous evidence suggests that the incidence risk of FLUTD in cats not on a dry food (i.e. ‘other’) diet is around 50 per 1000. Assuming equal numbers of cats on dry food and other diets are sampled, how many cats should be sampled to meet the requirements of the study?

epi.sscohortt(irexp1 = 50/1000, irexp0 = 70/1000, FT = 5, n = NA, power = 0.80, r = 1, 
   design = 1, sided.test = 2, nfractional = FALSE, conf.level = 0.95)$n.total
#> [1] 2080

A total of 2080 subjects are required (1040 exposed and 1040 unexposed).

Case-control study

A case-control study of the relationship between white pigmentation around the eyes and ocular squamous cell carcinoma in Hereford cattle is planned. A sample of cattle with newly diagnosed squamous cell carcinoma will be compared for white pigmentation around the eyes with a sample of controls. Assuming an equal number of cases and controls, how many study subjects are required to detect an odds ratio of 2.0 with 0.80 power using a two-sided 0.05 test? Previous surveys have shown that around 0.30 of Hereford cattle without squamous cell carcinoma have white pigmentation around the eyes.

epi.sscc(OR = 2.0, p1 = NA, p0 = 0.30, n = NA, power = 0.80, 
   r = 1, phi.coef = 0, design = 1, sided.test = 2, conf.level = 0.95, 
   method = "unmatched", nfractional = FALSE, fleiss = FALSE)$n.total
#> [1] 282

If the true odds for squamous cell carcinoma in exposed subjects relative to unexposed subjects is 2.0, we will need to enroll 141 cases and 141 controls (282 cattle in total) to reject the null hypothesis that the odds ratio equals one with probability (power) 0.80. The Type I error probability associated with this test of this null hypothesis is 0.05.

Non-inferiority trial

Suppose a pharmaceutical company would like to conduct a clinical trial to compare the efficacy of two antimicrobial agents when administered orally to patients with skin infections. Assume the true mean cure rate of the treatment is 0.85 and the true mean cure rate of the control is 0.65. We consider a difference of less than 0.10 in cure rate to be of no clinical importance (i.e. delta = 0.10). Assuming a one-sided test size of 5% and a power of 80% how many subjects should be included in the trial?

epi.ssninfb(treat = 0.85, control = 0.65, delta = 0.10, n = NA, 
   r = 1, power = 0.80, nfractional = FALSE, alpha = 0.05)$n.total
#> [1] 50

A total of 50 subjects need to be enrolled in the trial, 25 in the treatment group and 25 in the control group.

One-stage cluster sampling

An aid project has distributed cook stoves in a single province in a resource-poor country. At the end of three years, the donors would like to know what proportion of households are still using their donated stove. A cross-sectional study is planned where villages in a province will be sampled and all households (approximately 75 per village) will be visited to determine if the donated stove is still in use. A pilot study of the prevalence of stove usage in five villages showed that 0.46 of householders were still using their stove and the intracluster correlation coefficient (ICC) for stove use within villages is in the order of 0.20. If the donor wanted to be 95% confident that the survey estimate of stove usage was within 10% of the true population value, how many villages (clusters) need to be sampled?

epi.ssclus1estb(b = 75, Py = 0.46, epsilon = 0.10, error = "relative", rho = 0.20, conf.level = 0.95)$n.psu
#> [1] 96

A total of 96 villages need to be sampled to meet the requirements of the study.

One-stage cluster sampling (continued)

Continuing the example above, we are now told that the number of households per village varies. The average number of households per village is 75 with a 0.025 quartile of 40 households and a 0.975 quartile of 180. Assuming the number of households per village follows a normal distribution the expected standard deviation of the number of households per village is in the order of (180 - 40) \(\div\) 4 = 35. How many villages need to be sampled?

epi.ssclus1estb(b = c(75,35), Py = 0.46, epsilon = 0.10, error = "relative", rho = 0.20, conf.level = 0.95)$n.psu
#> [1] 115

A total of 115 villages need to be sampled to meet the requirements of the study.

Two-stage cluster sampling

This example is adapted from Bennett et al. (1991). We intend to conduct a cross-sectional study to determine the prevalence of disease X in a given country. The expected prevalence of disease is thought to be around 20%. Previous studies report an intracluster correlation coefficient for this disease to be 0.02. Suppose that we want to be 95% certain that our estimate of the prevalence of disease is within 5% of the true population value and that we intend to sample 20 individuals per cluster. How many clusters should be sampled to meet the requirements of the study?

# From first principles:
n.crude <- epi.sssimpleestb(N = 1E+06, Py = 0.20, epsilon = 0.05 / 0.20, 
   error = "relative", se = 1, sp = 1, nfractional = FALSE, conf.level = 0.95)
n.crude
#> [1] 246

# A total of 246 subjects need to be enrolled into the study. Calculate the design effect:
rho <- 0.02; b <- 20
D <- rho * (b - 1) + 1; D
#> [1] 1.38
# The design effect is 1.38. Our crude sample size estimate needs to be increased by a factor of 1.38.

n.adj <- ceiling(n.crude * D)
n.adj
#> [1] 340
# After accounting for lack of independence in the data a total of 340 subjects need to be enrolled into the study. How many clusters are required?

ceiling(n.adj / b)
#> [1] 17

# Do all of the above using epi.ssclus2estb:
epi.ssclus2estb(b = 20, Py = 0.20, epsilon = 0.05 / 0.20, error = "relative", 
   rho = 0.02, nfractional = FALSE, conf.level = 0.95)
#> Warning: The calculated number of primary sampling units (n.psu) is 17. At
#> least 25 primary sampling units are recommended for two-stage cluster sampling
#> designs.
#> $n.psu
#> [1] 17
#> 
#> $n.ssu
#> [1] 340
#> 
#> $DEF
#> [1] 1.38
#> 
#> $rho
#> [1] 0.02

A total of 17 clusters need to be sampled to meet the specifications of this study. Function epi.ssclus2estb returns a warning message that the number of clusters is less than 25.

Two-stage cluster sampling (continued)

Continuing the brucellosis prevalence example (above) being seropositive to brucellosis is likely to cluster within herds. Otte and Gumm (1997) cite the intracluster correlation coefficient for Brucella abortus in cattle to be in the order of 0.09. Adjust your sample size of 545 cows to account for lack of independence in the data, i.e. clustering at the herd level. Assume that b = 10 animals will be sampled per herd:

n.crude <- epi.sssimpleestb(N = 1E+06, Py = 0.15, epsilon = 0.20,
   error = "relative", se = 1, sp = 1, nfractional = FALSE, conf.level = 0.95)
n.crude
#> [1] 545

rho <- 0.09; b <- 10
D <- rho * (b - 1) + 1; D
#> [1] 1.81

n.adj <- ceiling(n.crude * D)
n.adj
#> [1] 987

# Similar to the example above, we can do all of these calculations using epi.ssclus2estb:
epi.ssclus2estb(b = 10, Py = 0.15, epsilon = 0.20, error = "relative", 
   rho = 0.09, nfractional = FALSE, conf.level = 0.95)
#> $n.psu
#> [1] 99
#> 
#> $n.ssu
#> [1] 986
#> 
#> $DEF
#> [1] 1.81
#> 
#> $rho
#> [1] 0.09

After accounting for clustering at the herd level we estimate that a total of (545 \(\times\) 1.81) = 986 cattle need to be sampled to meet the requirements of the survey. If 10 cows are sampled per herd this means that a total of (987 \(\div\) 10) = 99 herds are required.

References

Bennett, S, T Woods, W Liyanage, and D Smith. 1991. “A Simplified General Method for Cluster-Sample Surveys of Health in Developing Countries.” World Health Statistics Quarterly 44: 98–106.

Otte, JM, and ID Gumm. 1997. “Intra-Cluster Correlation Coefficients of 20 Infections Calculated from the Results of Cluster-Sample Surveys.” Preventive Veterinary Medicine 31: 147–50.

Stevenson, MA. 2021. “Sample Size Estimation in Veterinary Epidemiological Research.” Journal Article. Frontiers in Veterinary Science 7: 539573.