incidence/0000755000176200001440000000000014626320772012204 5ustar liggesusersincidence/NAMESPACE0000644000176200001440000000342214621104516013413 0ustar liggesusers# Generated by roxygen2: do not edit by hand S3method("[",incidence) S3method("group_names<-",default) S3method("group_names<-",incidence) S3method(as.data.frame,incidence) S3method(as.incidence,data.frame) S3method(as.incidence,matrix) S3method(as.incidence,numeric) S3method(cumulate,default) S3method(cumulate,incidence) S3method(dim,incidence) S3method(get_counts,incidence) S3method(get_dates,default) S3method(get_dates,incidence) S3method(get_fit,incidence_fit) S3method(get_fit,incidence_fit_list) S3method(get_info,incidence_fit) S3method(get_info,incidence_fit_list) S3method(get_interval,default) S3method(get_interval,incidence) S3method(get_n,default) S3method(get_n,incidence) S3method(get_timespan,default) S3method(get_timespan,incidence) S3method(group_names,default) S3method(group_names,incidence) S3method(incidence,Date) S3method(incidence,POSIXt) S3method(incidence,character) S3method(incidence,default) S3method(incidence,integer) S3method(incidence,numeric) S3method(plot,incidence) S3method(plot,incidence_fit) S3method(plot,incidence_fit_list) S3method(print,incidence) S3method(print,incidence_fit) S3method(print,incidence_fit_list) S3method(subset,incidence) export("group_names<-") export(add_incidence_fit) export(as.incidence) export(bootstrap) export(cumulate) export(estimate_peak) export(find_peak) export(fit) export(fit_optim_split) export(get_counts) export(get_dates) export(get_fit) export(get_info) export(get_interval) export(get_n) export(get_timespan) export(group_names) export(incidence) export(incidence_pal1) export(incidence_pal1_dark) export(incidence_pal1_light) export(make_breaks) export(pool) export(scale_x_incidence) importFrom(grDevices,colorRampPalette) importFrom(graphics,plot) importFrom(stats,as.ts) importFrom(utils,head) importFrom(utils,tail) incidence/demo/0000755000176200001440000000000014621104516013117 5ustar liggesusersincidence/demo/00Index0000644000176200001440000000007014621104516014246 0ustar liggesusersincidence-demo Demonstration of the incidence package incidence/demo/incidence-demo.R0000644000176200001440000000602714621104516016112 0ustar liggesusers#' Example 1 ------------------------------------------------------------------ #' #' **computing and manipulating stratified weekly incidence** #' #' 1) import data #' library('outbreaks') dat1 <- ebola_sim_clean$linelist str(dat1, strict.width = "cut", width = 76) #' 2) build an incidence object #' #+ incidence-curve, fig.width=9, fig.height=5 library('incidence') library('ggplot2') # compute weekly stratified incidence i.7.group <- incidence(dat1$date_of_onset, interval = 7, groups = dat1$hospital) # print incidence object i.7.group # plot incidence object my_theme <- theme_bw(base_size = 12) + theme(panel.grid.minor = element_blank()) + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, color = "black")) plot(i.7.group, border = "white") + my_theme + theme(legend.position = c(0.8, 0.75)) #' 3) Manipulate incidence object #' #+ incidence-early-curve, fig.width=6, fig.height=6 # plot the first 18 weeks, defined hospitals, and use different colors i.7.sub <- i.7.group[1:18, c(1:2, 4:5)] hosp_colors <- c("#899DA4", "#C93312", "#FAEFD1", "#DC863B") plot(i.7.sub, show_cases = TRUE, border = "black", color = hosp_colors) + my_theme + theme(legend.position = c(0.35, 0.8)) # exclude NA group by disabling treating NA as a separate group i.7.group0 <- incidence(dat1$date_of_onset, interval = 7, groups = dat1$hospital, na_as_group = FALSE) # exclude NA using [ operator i.7.group1 <- subset(i.7.group, groups = -ncol(i.7.group)) # exclude NA group using [ operator i.7.group2 <- i.7.group[, -ncol(i.7.group)] # check the resulting incidence objects are identical identical(i.7.group0$counts, i.7.group1$counts) identical(i.7.group1, i.7.group2) # check groups colnames(i.7.group1$counts) #' Example 2 ------------------------------------------------------------------ #' #' **importing pre-computed daily incidence and fitting log-linear model** #' #' 1) Import pre-computed daily incidence #' #+ incidence-curve2, fig.width=9, fig.height=6 # preview datasets head(zika_girardot_2015, 3) head(zika_sanandres_2015, 3) # combine two datasets into one dat2 <- merge(zika_girardot_2015, zika_sanandres_2015, by = "date", all = TRUE) # rename variables names(dat2)[2:3] <- c("Girardot", "San Andres") # replace NA with 0 dat2[is.na(dat2)] <- 0 # convert pre-computed incidence in data.frame into incidence object # grouped by locations i.group <- as.incidence(x = dat2[, 2:3], dates = dat2$date) # pool incidence across two locations i.pooled <- pool(i.group) cowplot::plot_grid( plot(i.group, border = "white") + my_theme + theme(legend.position = c(0.9, 0.7)), plot(i.pooled, border = "white") + my_theme, ncol = 1, labels = c("(A)", "(B)"), label_size = 16, label_x = 0.06, label_y = 0.94 ) #' 2) Fit log-linear regression model #' #+ incidence-fit, fig.width=9, fig.height=4 library('magrittr') fos <- fit_optim_split(i.pooled) fos$split fos$fit plot(i.pooled, border = "white") %>% add_incidence_fit(fos$fit) + my_theme incidence/LICENSE0000644000176200001440000000005414621104516013177 0ustar liggesusersYEAR: 2016 COPYRIGHT HOLDER: Thibaut Jombartincidence/man/0000755000176200001440000000000014621104516012746 5ustar liggesusersincidence/man/palettes.Rd0000644000176200001440000000160314621104516015056 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/palettes.R \name{incidence_pal1} \alias{incidence_pal1} \alias{palettes} \alias{incidence_pal1_light} \alias{incidence_pal1_dark} \title{Color palettes used in incidence} \usage{ incidence_pal1(n) incidence_pal1_light(n) incidence_pal1_dark(n) } \arguments{ \item{n}{a number of colors} } \description{ These functions are color palettes used in incidence. } \examples{ plot(1:4, cex=8, pch=20, col = incidence_pal1(4), main = "palette: incidence_pal1") plot(1:100, cex=8, pch=20, col = incidence_pal1(100), main ="palette: incidence_pal1") plot(1:100, cex=8, pch=20, col = incidence_pal1_light(100), main="palette: incidence_pal1_light") plot(1:100, cex=8, pch=20, col = incidence_pal1_dark(100), main="palette: incidence_pal1_dark") } \author{ Thibaut Jombart \email{thibautjombart@gmail.com} } incidence/man/find_peak.Rd0000644000176200001440000000222614621104516015157 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/find_peak.R \name{find_peak} \alias{find_peak} \title{Find the peak date of an incidence curve} \usage{ find_peak(x, pool = TRUE) } \arguments{ \item{x}{An \code{incidence} object.} \item{pool}{If \code{TRUE} (default), any groups will be pooled before finding a peak. If \code{FALSE}, separate peaks will be found for each group.} } \value{ The date of the (first) highest incidence in the data. } \description{ This function can be used to find the peak of an epidemic curve stored as an \code{incidence} object. } \examples{ if (require(outbreaks) && require(ggplot2)) { withAutoprint({ i <- incidence(fluH7N9_china_2013$date_of_onset) i plot(i) ## one simple bootstrap x <- bootstrap(i) x plot(x) ## find 95\% CI for peak time using bootstrap find_peak(i) ## show confidence interval plot(i) + geom_vline(xintercept = find_peak(i), col = "red", lty = 2) })} } \seealso{ \code{\link[=estimate_peak]{estimate_peak()}} for bootstrap estimates of the peak time } \author{ Thibaut Jombart \email{thibautjombart@gmail.com}, Zhian N. Kamvar \email{zkamvar@gmail.com} } incidence/man/cumulate.Rd0000644000176200001440000000220214621104516015050 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/cumulate.R \name{cumulate} \alias{cumulate} \alias{cumulate.default} \alias{cumulate.incidence} \title{Compute cumulative 'incidence'} \usage{ cumulate(x) \method{cumulate}{default}(x) \method{cumulate}{incidence}(x) } \arguments{ \item{x}{An incidence object.} } \description{ \code{cumulate} is an S3 generic to compute cumulative numbers, with methods for different types of objects: } \details{ \itemize{ \item default method is a wrapper for \code{cumsum} \item \code{incidence} objects: computes cumulative incidence over time \item \code{projections} objects: same, for \code{projections} objects, implemented in the similarly named package; see \code{?cumulate.projections} for more information, after loading the package } } \examples{ dat <- as.integer(c(0,1,2,2,3,5,7)) group <- factor(c(1, 2, 3, 3, 3, 3, 1)) i <- incidence(dat, groups = group) i plot(i) i_cum <- cumulate(i) i_cum plot(i_cum) } \seealso{ The \code{\link[=incidence]{incidence()}} function to generate the 'incidence' objects. } \author{ Thibaut Jombart \email{thibautjombart@gmail.com} } incidence/man/fit.Rd0000644000176200001440000001305314621104516014021 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/fit.R, R/fit_optim_split.R, R/print.R \name{fit} \alias{fit} \alias{fit_optim_split} \alias{print.incidence_fit} \alias{print.incidence_fit_list} \title{Fit exponential models to incidence data} \usage{ fit(x, split = NULL, level = 0.95, quiet = FALSE) fit_optim_split( x, window = x$timespan/4, plot = TRUE, quiet = TRUE, separate_split = TRUE ) \method{print}{incidence_fit}(x, ...) \method{print}{incidence_fit_list}(x, ...) } \arguments{ \item{x}{An incidence object, generated by the function \code{\link[=incidence]{incidence()}}. For the plotting function, an \code{incidence_fit} object.} \item{split}{An optional time point identifying the separation between the two models. If NULL, a single model is fitted. If provided, two models would be fitted on the time periods on either side of the split.} \item{level}{The confidence interval to be used for predictions; defaults to 95\%.} \item{quiet}{A logical indicating if warnings from \code{fit} should be hidden; FALSE by default. Warnings typically indicate some zero incidence, which are removed before performing the log-linear regression.} \item{window}{The size, in days, of the time window either side of the split.} \item{plot}{A logical indicating whether a plot should be added to the output (\code{TRUE}, default), showing the mean R2 for various splits.} \item{separate_split}{If groups are present, should separate split dates be determined for each group? Defaults to \code{TRUE}, in which separate split dates and thus, separate models will be constructed for each group. When \code{FALSE}, the split date will be determined from the pooled data and modelled with the groups as main effects and interactions with date.} \item{...}{currently unused.} } \value{ For \code{fit()}, a list with the class \code{incidence_fit} (for a single model), or a list containing two \code{incidence_fit} objects (when fitting two models). \code{incidence_fit} objects contain: \itemize{ \item \verb{$model}: the fitted linear model \item \verb{$info}: a list containing various information extracted from the model (detailed further) \item \verb{$origin}: the date corresponding to day '0' } The \verb{$info} item is a list containing: \itemize{ \item \code{r}: the growth rate \item \code{r.conf}: the confidence interval of 'r' \item \code{pred}: a \code{data.frame} containing predictions of the model, including the true dates (\code{dates}), their numeric version used in the model (\code{dates.x}), the predicted value (\code{fit}), and the lower (\code{lwr}) and upper (\code{upr}) bounds of the associated confidence interval. \item \code{doubling}: the predicted doubling time in days; exists only if 'r' is positive \item \code{doubling.conf}: the confidence interval of the doubling time \item \code{halving}: the predicted halving time in days; exists only if 'r' is negative \item \code{halving.conf}: the confidence interval of the halving time } For \code{fit_optim_split}, a list containing: \itemize{ \item \code{df}: a \code{data.frame} of dates that were used in the optimization procedure, and the corresponding average R2 of the resulting models. \item \code{split}: the optimal splitting date \item \code{fit}: an \code{incidence_fit_list} object containing the fit for each split. If the \code{separate_split = TRUE}, then the \code{incidence_fit_list} object will contain these splits nested within each group. All of the \code{incidence_fit} objects can be retrieved with \code{\link[=get_fit]{get_fit()}}. \item \code{plot}: a plot showing the content of \code{df} (ggplot2 object) } } \description{ The function \code{fit} fits two exponential models to incidence data, of the form: \eqn{log(y) = r * t + b} \cr where 'y' is the incidence, 't' is time (in days), 'r' is the growth rate, and 'b' is the origin. The function \code{fit} will fit one model by default, but will fit two models on either side of a splitting date (typically the peak of the epidemic) if the argument \code{split} is provided. When groups are present, these are included in the model as main effects and interactions with dates. The function \code{fit_optim_split()} can be used to find the optimal 'splitting' date, defined as the one for which the best average R2 of the two models is obtained. Plotting can be done using \code{plot}, or added to an existing incidence plot by the piping-friendly function \code{add_incidence_fit()}. } \examples{ if (require(outbreaks)) { withAutoprint({ dat <- ebola_sim$linelist$date_of_onset ## EXAMPLE WITH A SINGLE MODEL ## compute weekly incidence i.7 <- incidence(dat, interval=7) plot(i.7) plot(i.7[1:20]) ## fit a model on the first 20 weeks f <- fit(i.7[1:20]) f names(f) head(get_info(f, "pred")) ## plot model alone (not recommended) plot(f) ## plot data and model (recommended) plot(i.7, fit = f) plot(i.7[1:25], fit = f) ## piping versions if (require(magrittr)) { withAutoprint({ plot(i.7) \%>\% add_incidence_fit(f) ## EXAMPLE WITH 2 PHASES ## specifying the peak manually f2 <- fit(i.7, split = as.Date("2014-10-15")) f2 plot(i.7) \%>\% add_incidence_fit(f2) ## finding the best 'peak' date f3 <- fit_optim_split(i.7) f3 plot(i.7) \%>\% add_incidence_fit(f3$fit) })} })} } \seealso{ the \code{\link[=incidence]{incidence()}} function to generate the 'incidence' objects. The \code{\link[=get_fit]{get_fit()}} function to flatten \code{incidence_fit_list} objects to a list of \code{incidence_fit} objects. } \author{ Thibaut Jombart \email{thibautjombart@gmail.com}, Zhian N. Kamvar \email{zkamvar@gmail.com}. } incidence/man/subset.Rd0000644000176200001440000000322214621104516014541 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/subset.R \name{subset.incidence} \alias{subset.incidence} \alias{"subset.incidence"} \alias{"[.incidence"} \alias{[.incidence} \title{Subsetting 'incidence' objects} \usage{ \method{subset}{incidence}(x, ..., from = min(x$dates), to = max(x$dates), groups = TRUE) \method{[}{incidence}(x, i, j) } \arguments{ \item{x}{An incidence object, generated by the function \code{\link[=incidence]{incidence()}}.} \item{...}{Further arguments passed to other methods (not used).} \item{from}{The starting date; data strictly before this date are discarded.} \item{to}{The ending date; data strictly after this date are discarded.} \item{groups}{(optional) The groups to retained, indicated as subsets of the columns of x$counts.} \item{i}{a subset of dates to retain} \item{j}{a subset of groups to retain} } \description{ Two functions can be used to subset incidence objects. The function \code{subset} permits to retain dates within a specified range and, optionally, specific groups. The operator "[" can be used as for matrices, using the syntax \code{x[i,j]} where 'i' is a subset of dates, and 'j' is a subset of groups. } \examples{ ## example using simulated dataset if(require(outbreaks)) { withAutoprint({ onset <- ebola_sim$linelist$date_of_onset ## weekly incidence inc <- incidence(onset, interval = 7) inc inc[1:10] # first 10 weeks plot(inc[1:10]) inc[-c(11:15)] # remove weeks 11-15 plot(inc[-c(11:15)]) })} } \seealso{ The \code{\link[=incidence]{incidence()}} function to generate the 'incidence' objects. } \author{ Thibaut Jombart \email{thibautjombart@gmail.com} } incidence/man/get_fit.Rd0000644000176200001440000000420014621104516014652 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/get_fit.R, R/get_info.R \name{get_fit} \alias{get_fit} \alias{get_fit.incidence_fit} \alias{get_fit.incidence_fit_list} \alias{get_info} \alias{get_info.incidence_fit} \alias{get_info.incidence_fit_list} \title{Accessors for \code{incidence_fit} objects} \usage{ get_fit(x) \method{get_fit}{incidence_fit}(x) \method{get_fit}{incidence_fit_list}(x) get_info(x, what = "r", ...) \method{get_info}{incidence_fit}(x, what = "r", ...) \method{get_info}{incidence_fit_list}(x, what = "r", groups = NULL, na.rm = TRUE, ...) } \arguments{ \item{x}{an \code{incidence_fit} or \code{incidence_fit_list} object.} \item{what}{the name of the item in the "info" element of the \code{incidence_fit} object.} \item{...}{currently unused.} \item{groups}{if \code{what = "pred"} and \code{x} is an \code{incidence_fit_list} object, then this indicates what part of the nesting hierarchy becomes the column named "groups". Defaults to \code{NULL}, indicating that no groups column will be added/modified.} \item{na.rm}{when \code{TRUE} (default), missing values will be excluded from the results.} } \value{ a list of \code{incidence_fit} objects. } \description{ Accessors for \code{incidence_fit} objects } \examples{ if (require(outbreaks)) { withAutoprint({ dat <- ebola_sim$linelist$date_of_onset ## EXAMPLE WITH A SINGLE MODEL ## compute weekly incidence sex <- ebola_sim$linelist$gender i.sex <- incidence(dat, interval = 7, group = sex) ## Compute the optimal split for each group separately fits <- fit_optim_split(i.sex, separate_split = TRUE) ## `fits` contains an `incidence_fit_list` object fits$fit ## Grab the list of `incidence_fit` objects get_fit(fits$fit) ## Get the predictions for all groups get_info(fits$fit, "pred", groups = 1) ## Get the predictions, but set `groups` to "before" and "after" get_info(fits$fit, "pred", groups = 2) ## Get the reproduction number get_info(fits$fit, "r") ## Get the doubling confidence interval get_info(fits$fit, "doubling.conf") ## Get the halving confidence interval get_info(fits$fit, "halving.conf") })} } incidence/man/get_dates.Rd0000644000176200001440000000240114621104516015171 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/get_dates.R \name{get_dates} \alias{get_dates} \alias{get_dates.default} \alias{get_dates.incidence} \title{Retrieve dates from an incidence object} \usage{ get_dates(x, ...) \method{get_dates}{default}(x, ...) \method{get_dates}{incidence}(x, position = "left", count_days = FALSE, ...) } \arguments{ \item{x}{an \link{incidence} object} \item{...}{Unused} \item{position}{One of "left", "center", "middle", or "right" specifying what side of the bin the date should be drawn from.} \item{count_days}{If \code{TRUE}, the result will be represented as the number of days from the first date.} } \value{ a vector of dates or numerics } \description{ Retrieve dates from an incidence object } \examples{ set.seed(999) dat <- as.Date(Sys.Date()) + sample(-3:50, 100, replace = TRUE) x <- incidence(dat, interval = "month") get_dates(x) get_dates(x, position = "middle") set.seed(999) dat <- as.Date(Sys.Date()) + sample(-3:50, 100, replace = TRUE) x <- incidence(dat, interval = "month") get_dates(x) get_dates(x, "center") get_dates(x, "right") # Return dates by number of days from the first date get_dates(x, count_days = TRUE) get_dates(incidence(-5:5), count_days = TRUE) } \keyword{accessors} incidence/man/plot.incidence.Rd0000644000176200001440000001302114621104516016130 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/plot.R, R/scale_x_incidence.R \name{plot.incidence} \alias{plot.incidence} \alias{add_incidence_fit} \alias{plot.incidence_fit} \alias{plot.incidence_fit_list} \alias{scale_x_incidence} \alias{make_breaks} \title{Plot function for incidence objects} \usage{ \method{plot}{incidence}( x, ..., fit = NULL, stack = is.null(fit), color = "black", border = NA, col_pal = incidence_pal1, alpha = 0.7, xlab = "", ylab = NULL, labels_week = !is.null(x$weeks), labels_iso = !is.null(x$isoweeks), show_cases = FALSE, n_breaks = 6 ) add_incidence_fit(p, x, col_pal = incidence_pal1) \method{plot}{incidence_fit}(x, ...) \method{plot}{incidence_fit_list}(x, ...) scale_x_incidence(x, n_breaks = 6, labels_week = TRUE, ...) make_breaks(x, n_breaks = 6L, labels_week = TRUE) } \arguments{ \item{x}{An incidence object, generated by the function \code{\link[=incidence]{incidence()}}.} \item{...}{arguments passed to \code{\link[ggplot2:scale_date]{ggplot2::scale_x_date()}}, \code{\link[ggplot2:scale_date]{ggplot2::scale_x_datetime()}}, or \code{\link[ggplot2:scale_continuous]{ggplot2::scale_x_continuous()}}, depending on how the \verb{$date} element is stored in the incidence object.} \item{fit}{An 'incidence_fit' object as returned by \code{\link[=fit]{fit()}}.} \item{stack}{A logical indicating if bars of multiple groups should be stacked, or displayed side-by-side.} \item{color}{The color to be used for the filling of the bars; NA for invisible bars; defaults to "black".} \item{border}{The color to be used for the borders of the bars; NA for invisible borders; defaults to NA.} \item{col_pal}{The color palette to be used for the groups; defaults to \code{incidence_pal1}. See \code{\link[=incidence_pal1]{incidence_pal1()}} for other palettes implemented in incidence.} \item{alpha}{The alpha level for color transparency, with 1 being fully opaque and 0 fully transparent; defaults to 0.7.} \item{xlab}{The label to be used for the x-axis; empty by default.} \item{ylab}{The label to be used for the y-axis; by default, a label will be generated automatically according to the time interval used in incidence computation.} \item{labels_week}{a logical value indicating whether labels x axis tick marks are in week format YYYY-Www when plotting weekly incidence; defaults to TRUE.} \item{labels_iso}{(deprecated) This has been superceded by \code{labels_iso}. Previously:a logical value indicating whether labels x axis tick marks are in ISO 8601 week format yyyy-Www when plotting ISO week-based weekly incidence; defaults to be TRUE.} \item{show_cases}{if \code{TRUE} (default: \code{FALSE}), then each observation will be colored by a border. The border defaults to a white border unless specified otherwise. This is normally used outbreaks with a small number of cases. Note: this can only be used if \code{stack = TRUE}} \item{n_breaks}{the ideal number of breaks to be used for the x-axis labeling} \item{p}{An existing incidence plot.} } \value{ \itemize{ \item \code{plot()} a \code{\link[ggplot2:ggplot]{ggplot2::ggplot()}} object. \item \code{make_breaks()} a two-element list. The "breaks" element will contain the evenly-spaced breaks as either dates or numbers and the "labels" element will contain either a vector of weeks OR a \code{\link[ggplot2:waiver]{ggplot2::waiver()}} object. \item \code{scale_x_incidence()} a \pkg{ggplot2} "ScaleContinuous" object. } } \description{ This function is used to visualise the output of the \code{\link[=incidence]{incidence()}} function using the package \code{ggplot2}. #' } \details{ \itemize{ \item \code{plot()} will visualise an incidence object using \code{ggplot2} \item \code{make_breaks()} calculates breaks from an incidence object that always align with the bins and start on the first observed incidence. \item \code{scale_x_incidence()} produces and appropriate \code{ggplot2} scale based on an incidence object. } } \examples{ if(require(outbreaks) && require(ggplot2)) { withAutoprint({ onset <- outbreaks::ebola_sim$linelist$date_of_onset ## daily incidence inc <- incidence(onset) inc plot(inc) ## weekly incidence inc.week <- incidence(onset, interval = 7) inc.week plot(inc.week) # default to label x axis tick marks with isoweeks plot(inc.week, labels_week = FALSE) # label x axis tick marks with dates plot(inc.week, border = "white") # with visible border ## use group information sex <- outbreaks::ebola_sim$linelist$gender inc.week.gender <- incidence(onset, interval = "1 epiweek", groups = sex) plot(inc.week.gender) plot(inc.week.gender, labels_week = FALSE) ## show individual cases at the beginning of the epidemic inc.week.8 <- subset(inc.week.gender, to = "2014-06-01") p <- plot(inc.week.8, show_cases = TRUE, border = "black") p ## update the range of the scale lim <- c(min(get_dates(inc.week.8)) - 7*5, aweek::week2date("2014-W50", "Sunday")) lim p + scale_x_incidence(inc.week.gender, limits = lim) ## customize plot with ggplot2 plot(inc.week.8, show_cases = TRUE, border = "black") + theme_classic(base_size = 16) + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) ## adding fit fit <- fit_optim_split(inc.week.gender)$fit plot(inc.week.gender, fit = fit) plot(inc.week.gender, fit = fit, labels_week = FALSE) })} } \seealso{ The \code{\link[=incidence]{incidence()}} function to generate the 'incidence' objects. } \author{ Thibaut Jombart \email{thibautjombart@gmail.com} Zhian N. Kamvar \email{zkamvar@gmail.com} } incidence/man/bootstrap.Rd0000644000176200001440000000250214621104516015251 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/bootstrap.R \name{bootstrap} \alias{bootstrap} \title{Bootstrap incidence time series} \usage{ bootstrap(x, randomise_groups = FALSE) } \arguments{ \item{x}{An \code{incidence} object.} \item{randomise_groups}{A \code{logical} indicating whether groups should be randomised as well in the resampling procedure; respective group sizes will be preserved, but this can be used to remove any group-specific temporal dynamics. If \code{FALSE} (default), data are resampled within groups.} } \value{ An \code{incidence} object. } \description{ This function can be used to bootstrap \code{incidence} objects. Bootstrapping is done by sampling with replacement the original input dates. See \code{details} for more information on how this is implemented. } \details{ As original data are not stored in \code{incidence} objects, the bootstrapping is achieved by multinomial sampling of date bins weighted by their relative incidence. } \examples{ if (require(outbreaks) && require(ggplot2)) { withAutoprint({ i <- incidence(fluH7N9_china_2013$date_of_onset) i plot(i) ## one simple bootstrap x <- bootstrap(i) x plot(x) })} } \seealso{ \link{find_peak} to use estimate peak date using bootstrap } \author{ Thibaut Jombart \email{thibautjombart@gmail.com} } incidence/man/accessors.Rd0000644000176200001440000000432514621104516015226 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dim.R, R/get_interval.R, R/get_n.R, % R/get_timespan.R \name{dim.incidence} \alias{dim.incidence} \alias{get_interval} \alias{get_interval.default} \alias{get_interval.incidence} \alias{get_n} \alias{get_n.default} \alias{get_n.incidence} \alias{get_timespan} \alias{get_timespan.default} \alias{get_timespan.incidence} \title{Access various elements of an incidence object} \usage{ \method{dim}{incidence}(x) get_interval(x, ...) \method{get_interval}{default}(x, ...) \method{get_interval}{incidence}(x, integer = TRUE, ...) get_n(x) \method{get_n}{default}(x) \method{get_n}{incidence}(x) get_timespan(x) \method{get_timespan}{default}(x) \method{get_timespan}{incidence}(x) } \arguments{ \item{x}{an \link{incidence} object.} \item{...}{Unused} \item{integer}{When \code{TRUE} (default), the interval will be converted to an integer vector if it is stored as a character in the incidence object.} } \value{ \itemize{ \item \code{dim()} the dimensions in the number of bins and number of groups } \itemize{ \item \code{get_interval()} if \code{integer = TRUE}: an integer vector, otherwise: the value stored in \code{x$interval} } \itemize{ \item \code{get_n()} The total number of cases stored in the object } \itemize{ \item \code{get_timespan()}: an \code{integer} denoting the timespan represented by the incidence object. } } \description{ Access various elements of an incidence object } \examples{ set.seed(999) dat <- as.Date(Sys.Date()) + sample(-3:50, 100, replace = TRUE) x <- incidence(dat, interval = "month") # the value stored in the interval element get_interval(x) # the numeric value of the interval in days get_interval(x, integer = FALSE) # the number of observations in the object get_n(x) # the length of time represented get_timespan(x) # the number of groups ncol(x) # the number of bins (intervals) nrow(x) } \seealso{ \itemize{ \item \code{\link[=get_counts]{get_counts()}} to access the matrix of counts \item \code{\link[=get_dates]{get_dates()}} to access the dates on the right, left, and center of the interval. \item \code{\link[=group_names]{group_names()}} to access and possibly re-name the groups } } \keyword{accessors} incidence/man/pool.Rd0000644000176200001440000000151414621104516014207 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/pool.R \name{pool} \alias{pool} \title{Pool 'incidence' across groups} \usage{ pool(x) } \arguments{ \item{x}{An 'incidence' object.} } \description{ This function pools incidence across all groups of an \code{incidence} object. The resulting \code{\link[=incidence]{incidence()}} object will contains counts summed over all groups present in the input. } \examples{ dat <- as.integer(c(0,1,2,2,3,5,7)) group <- factor(c(1, 2, 3, 3, 3, 3, 1)) i <- incidence(dat, groups = group) i i$counts ## pool all groups pool(i) pool(i)$counts ## pool only groups 1 and 3 pool(i[,c(1,3)]) pool(i[,c(1,3)])$counts } \seealso{ The \code{\link[=incidence]{incidence()}} function to generate the 'incidence' objects. } \author{ Thibaut Jombart \email{thibautjombart@gmail.com} } incidence/man/group_names.Rd0000644000176200001440000000352014621104516015554 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/group_names.R \name{group_names} \alias{group_names} \alias{group_names<-} \alias{group_names.default} \alias{group_names<-.default} \alias{`group_names<-`.default} \alias{group_names.incidence} \alias{group_names<-.incidence} \title{extract and set group names} \usage{ group_names(x, value) group_names(x) <- value \method{group_names}{default}(x, value) \method{group_names}{default}(x) <- value \method{group_names}{incidence}(x, value = NULL) \method{group_names}{incidence}(x) <- value } \arguments{ \item{x}{an \code{\link[=incidence]{incidence()}} object.} \item{value}{character vector used to rename groups} } \value{ an integer indicating the number of groups present in the incidence object. } \description{ extract and set group names } \details{ This accessor will return a } \examples{ i <- incidence(dates = sample(10, 100, replace = TRUE), interval = 1L, groups = sample(letters[1:3], 100, replace = TRUE)) i group_names(i) # change the names of the groups group_names(i) <- c("Group 1", "Group 2", "Group 3") i # example if there are mistakes in the original data, e.g. # something is misspelled set.seed(50) grps <- sample(c("child", "adult", "adlut"), 100, replace = TRUE, prob = c(0.45, 0.45, 0.05)) i <- incidence(dates = sample(10, 100, replace = TRUE), interval = 1L, groups = grps) colSums(get_counts(i)) # If you change the name of the mis-spelled group, it will be merged with the # correctly-spelled group gname <- group_names(i) gname[gname == "adlut"] <- "adult" # without side-effects print(ii <- group_names(i, gname)) colSums(get_counts(i)) # original still has three groups colSums(get_counts(ii)) # with side-effects group_names(i) <- gname colSums(get_counts(i)) } \keyword{accessors} incidence/man/get_counts.Rd0000644000176200001440000000227414621104516015414 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/get_counts.R \name{get_counts} \alias{get_counts} \alias{get_counts.incidence} \title{Get counts from an incidence object} \usage{ get_counts(x, groups = NULL) \method{get_counts}{incidence}(x, groups = NULL) } \arguments{ \item{x}{an \code{incidence} object.} \item{groups}{if there are groups, use this to specify a group or groups to subset. Defaults to \code{NULL} indicating that all groups are returned.} } \value{ a matrix of counts where each row represents a date bin } \description{ Get counts from an incidence object } \examples{ if (require(outbreaks)) { withAutoprint({ dat <- ebola_sim$linelist$date_of_onset gend <- ebola_sim$linelist$gender i <- incidence(dat, interval = "week", groups = gend) ## Use with an object and no arguments gives the counts matrix head(get_counts(i)) ## Specifying a position or group name will return a matrix subset to that ## group head(get_counts(i, 1L)) head(get_counts(i, "f")) ## Specifying multiple groups allows you to rearrange columns head(get_counts(i, c("m", "f"))) ## If you want a vector, you can use drop drop(get_counts(i, "f")) })} } incidence/man/incidence.Rd0000644000176200001440000002457314621110071015162 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/incidence.R, R/print.R \name{incidence} \alias{incidence} \alias{incidence.default} \alias{incidence.Date} \alias{incidence.character} \alias{incidence.integer} \alias{incidence.numeric} \alias{incidence.POSIXt} \alias{print.incidence} \title{Compute incidence of events from a vector of dates.} \usage{ incidence(dates, interval = 1L, ...) \method{incidence}{default}(dates, interval = 1L, ...) \method{incidence}{Date}( dates, interval = 1L, standard = TRUE, groups = NULL, na_as_group = TRUE, first_date = NULL, last_date = NULL, ... ) \method{incidence}{character}( dates, interval = 1L, standard = TRUE, groups = NULL, na_as_group = TRUE, first_date = NULL, last_date = NULL, ... ) \method{incidence}{integer}( dates, interval = 1L, groups = NULL, na_as_group = TRUE, first_date = NULL, last_date = NULL, ... ) \method{incidence}{numeric}( dates, interval = 1L, groups = NULL, na_as_group = TRUE, first_date = NULL, last_date = NULL, ... ) \method{incidence}{POSIXt}( dates, interval = 1L, standard = TRUE, groups = NULL, na_as_group = TRUE, first_date = NULL, last_date = NULL, ... ) \method{print}{incidence}(x, ...) } \arguments{ \item{dates}{A vector of dates, which can be provided as objects of the class: integer, numeric, Date, POSIXct, POSIXlt, and character. (See Note about \code{numeric} and \code{character} formats)} \item{interval}{An integer or character indicating the (fixed) size of the time interval used for computing the incidence; defaults to 1 day. This can also be a text string that corresponds to a valid date interval: day, week, month, quarter, or year. (See Note).} \item{...}{Additional arguments passed to other methods (none are used).} \item{standard}{(Only applicable to Date objects) When \code{TRUE} (default) and the \code{interval} one of "week", "month", "quarter", or "year", then this will cause the bins for the counts to start at the beginning of the interval (See Note).} \item{groups}{An optional factor defining groups of observations for which incidence should be computed separately.} \item{na_as_group}{A logical value indicating if missing group (NA) should be treated as a separate group.} \item{first_date, last_date}{optional first/last dates to be used in the epicurve. When these are \code{NULL} (default), the dates from the first/last dates are taken from the observations. If these dates are provided, the observations will be trimmed to the range of [first_date, last_date].} \item{x}{An 'incidence' object.} } \value{ An list with the class \code{incidence}, which contains the following items: \itemize{ \item \strong{dates}: The dates marking the left side of the bins used for counting events. When \code{standard = TRUE} and the interval represents weeks, months, quarters, or years, the first date will represent the first standard date (See Interval specification, below). \item \strong{counts}: A matrix of incidence counts, which one column per group (and a single column if no groups were used). \item \strong{timespan}: The length of the period for which incidence is computed, in days. \item \strong{interval}: The bin size. If it's an integer, it represents the number of days between each bin. It can also be a character, e.g. "2 weeks" or "6 months". \item \strong{n}: The total number of cases. \item \strong{weeks}: Dates in week format (YYYY-Www), where YYYY corresponds to the year of the given week and ww represents the numeric week of the year. This will be a produced from the function \code{\link[aweek:date2week]{aweek::date2week()}}. Note that these will have a special \code{"week_start"} attribute indicating which day of the ISO week the week starts on (see Weeks, below). \item \strong{isoweeks}: ISO 8601 week format YYYY-Www, which is returned only when ISO week-based weekly incidence is computed. } } \description{ This function computes incidence based on dates of events provided in various formats. A fixed interval, provided as numbers of days, is used to define time intervals. Counts within an interval always include the first date, after which they are labeled, and exclude the second. For instance, intervals labeled as 0, 3, 6, ... mean that the first bin includes days 0, 1 and 2, the second interval includes 3, 4 and 5 etc. } \details{ For details about the \verb{incidence class}, see the dedicated vignette:\cr \code{vignette("incidence_class", package = "incidence")} } \note{ \subsection{Input data (\code{dates})}{ \itemize{ \item \strong{Decimal (numeric) dates}: will be truncated with a warning \item \strong{Character dates} should be in the unambiguous \code{yyyy-mm-dd} (ISO 8601) format. Any other format will trigger an error. } } \subsection{Interval specification (\code{interval})}{ If \code{interval} is a valid character (e.g. "week" or "1 month"), then the bin will start at the beginning of the interval just before the first observation by default. For example, if the first case was recorded on Wednesday, 2018-05-09: \itemize{ \item "week" : first day of the week (i.e. Monday, 2018-05-07) (defaults to ISO weeks, see "Week intervals", below) \item "month" : first day of the month (i.e. 2018-05-01) \item "quarter" : first day of the quarter (i.e. 2018-04-01) \item "year" : first day of the calendar year (i.e. 2018-01-01) } These default intervals can be overridden with \code{standard = FALSE}, which sets the interval to begin at the first observed case. } \subsection{Week intervals}{ As of \emph{incidence} version 1.7.0, it is possible to construct standardized incidence objects standardized to any day of the week thanks to the \code{\link[aweek:date2week]{aweek::date2week()}} function from the \pkg{aweek} package. The default state is to use ISO 8601 definition of weeks, which start on Monday. You can specify the day of the week an incidence object should be standardised to by using the pattern "\{n\} \{W\} weeks" where "\{W\}" represents the weekday in an English or current locale and "\{n\}" represents the duration, but this can be ommitted. Below are examples of specifying weeks starting on different days assuming we had data that started on 2016-09-05, which is ISO week 36 of 2016: \itemize{ \item interval = "2 monday weeks" (Monday 2016-09-05) \item interval = "1 tue week" (Tuesday 2016-08-30) \item interval = "1 Wed week" (Wednesday 2016-08-31) \item interval = "1 Thursday week" (Thursday 2016-09-01) \item interval = "1 F week" (Friday 2016-09-02) \item interval = "1 Saturday week" (Saturday 2016-09-03) \item interval = "Sunday week" (Sunday 2016-09-04) } It's also possible to use something like "3 weeks: Saturday"; In addition, there are keywords reserved for specific days of the week: \itemize{ \item interval = "week", standard = TRUE (Default, Monday) \item interval = "ISOweek" (Monday) \item interval = "EPIweek" (Sunday) \item interval = "MMWRweek" (Sunday) } The "EPIweek" specification is not strictly reserved for CDC epiweeks, but can be prefixed (or posfixed) by a day of the week: "1 epiweek: Saturday". } \subsection{The \code{first_date} argument}{ Previous versions of \emph{incidence} had the \code{first_date} argument override \code{standard = TRUE}. It has been changed as of \emph{incidence} version 1.6.0 to be more consistent with the behavior when \code{first_date = NULL}. This, however may be a change in behaviour, so a warning is now issued once and only once if \code{first_date} is specified, but \code{standard} is not. To never see this warning, use \code{options(incidence.warn.first_date = FALSE)}. } The intervals for "month", "quarter", and "year" will necessarily vary in the number of days they encompass and warnings will be generated when the first date falls outside of a calendar date that is easily represented across the interval. } \examples{ ## toy example incidence(c(1, 5, 8, 3, 7, 2, 4, 6, 9, 2)) incidence(c(1, 5, 8, 3, 7, 2, 4, 6, 9, 2), 2) ## example using simulated dataset if(require(outbreaks)) { withAutoprint({ onset <- outbreaks::ebola_sim$linelist$date_of_onset ## daily incidence inc <- incidence(onset) inc plot(inc) ## weekly incidence inc.week <- incidence(onset, interval = 7, standard = FALSE) inc.week plot(inc.week) plot(inc.week, border = "white") # with visible border # Starting on Monday inc.isoweek <- incidence(onset, interval = "isoweek") inc.isoweek # Starting on Sunday inc.epiweek <- incidence(onset, interval = "epiweek") inc.epiweek # Starting on Saturday inc.epiweek <- incidence(onset, interval = "saturday epiweek") inc.epiweek ## use group information sex <- outbreaks::ebola_sim$linelist$gender inc.week.gender <- incidence(onset, interval = 7, groups = sex, standard = FALSE) inc.week.gender head(inc.week.gender$counts) plot(inc.week.gender, border = "grey90") inc.satweek.gender <- incidence(onset, interval = "2 epiweeks: saturday", groups = sex) inc.satweek.gender plot(inc.satweek.gender, border = "grey90") })} # Use of first_date d <- Sys.Date() + sample(-3:10, 10, replace = TRUE) # `standard` specified, no warning di <- incidence(d, interval = "week", first_date = Sys.Date() - 10, standard = TRUE) # warning issued if `standard` not specified di <- incidence(d, interval = "week", first_date = Sys.Date() - 10) # second instance: no warning issued di <- incidence(d, interval = "week", first_date = Sys.Date() - 10) } \seealso{ The main other functions of the package include: \itemize{ \item \code{\link[=plot.incidence]{plot.incidence()}}: Plot epicurves from an incidence object. \item \code{\link[=fit]{fit()}}: Fit log-linear model to computed incidence. \item \code{\link[=fit_optim_split]{fit_optim_split()}}: Find the optimal peak of the epidemic and fits log-linear models on either side of the peak. \item \code{\link[=subset]{subset()}}: Handling of \code{incidence} objects. \item \code{\link[=pool]{pool()}}: Sum incidence over groups. \item \code{\link[=as.data.frame.incidence]{as.data.frame.incidence()}}: Convert an \code{incidence} object to a \code{data.frame}. } The following vignettes are also available: \itemize{ \item \code{overview}: Provides an overview of the package's features. \item \code{customize_plot}: Provides some tips on finer plot customization. \item \code{incidence_class}: Details the content of the \code{incidence} class. } } \author{ Thibaut Jombart, Rich Fitzjohn, Zhian Kamvar } incidence/man/estimate_peak.Rd0000644000176200001440000000476314621104516016062 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/estimate_peak.R \name{estimate_peak} \alias{estimate_peak} \title{Estimate the peak date of an incidence curve using bootstrap} \usage{ estimate_peak(x, n = 100, alpha = 0.05) } \arguments{ \item{x}{An \code{incidence} object.} \item{n}{The number of bootstrap datasets to be generated; defaults to 100.} \item{alpha}{The type 1 error chosen for the confidence interval; defaults to 0.05.} } \value{ A list containing the following items: \itemize{ \item \code{observed}: the peak incidence of the original dataset \item \code{estimated}: the mean peak time of the bootstrap datasets \item \code{ci}: the confidence interval based on bootstrap datasets \item \code{peaks}: the peak times of the bootstrap datasets } } \description{ This function can be used to estimate the peak of an epidemic curve stored as \code{incidence}, using bootstrap. See \link{bootstrap} for more information on the resampling. } \details{ Input dates are resampled with replacement to form bootstrapped datasets; the peak is reported for each, resulting in a distribution of peak times. When there are ties for peak incidence, only the first date is reported. Note that the bootstrapping approach used for estimating the peak time makes the following assumptions: \itemize{ \item the total number of event is known (no uncertainty on total incidence) \item dates with no events (zero incidence) will never be in bootstrapped datasets \item the reporting is assumed to be constant over time, i.e. every case is equally likely to be reported } } \examples{ if (require(outbreaks) && require(ggplot2)) { withAutoprint({ i <- incidence(fluH7N9_china_2013$date_of_onset) i plot(i) ## one simple bootstrap x <- bootstrap(i) x plot(x) ## find 95\% CI for peak time using bootstrap peak_data <- estimate_peak(i) peak_data summary(peak_data$peaks) ## show confidence interval plot(i) + geom_vline(xintercept = peak_data$ci, col = "red", lty = 2) ## show the distribution of bootstrapped peaks df <- data.frame(peak = peak_data$peaks) plot(i) + geom_density(data = df, aes(x = peak, y = 10 * ..scaled..), alpha = .2, fill = "red", color = "red") })} } \seealso{ \link{bootstrap} for the bootstrapping underlying this approach and \link{find_peak} to find the peak in a single \code{incidence} object. } \author{ Thibaut Jombart \email{thibautjombart@gmail.com}, with inputs on caveats from Michael Höhle. } incidence/man/conversions.Rd0000644000176200001440000000521414621104516015607 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/conversion.R \name{as.data.frame.incidence} \alias{as.data.frame.incidence} \alias{as.incidence} \alias{as.incidence.matrix} \alias{as.incidence.data.frame} \alias{as.incidence.numeric} \title{Conversion of incidence objects} \usage{ \method{as.data.frame}{incidence}(x, ..., long = FALSE) as.incidence(x, ...) \method{as.incidence}{matrix}( x, dates = NULL, interval = NULL, standard = TRUE, isoweeks = standard, ... ) \method{as.incidence}{data.frame}(x, dates = NULL, interval = NULL, isoweeks = TRUE, ...) \method{as.incidence}{numeric}(x, dates = NULL, interval = NULL, isoweeks = TRUE, ...) } \arguments{ \item{x}{An \code{incidence} object, or an object to be converted as \code{incidence} (see details).} \item{...}{Further arguments passed to other functions (no used).} \item{long}{A logical indicating if the output data.frame should be 'long', i.e. where a single column containing 'groups' is added in case of data computed on several groups.} \item{dates}{A vector of dates, each corresponding to the (inclusive) lower limit of the bins.} \item{interval}{An integer indicating the time interval used in the computation of the incidence. If NULL, it will be determined from the first time interval between provided dates. If only one date is provided, it will trigger an error.} \item{standard}{A logical indicating whether standardised dates should be used. Defaults to \code{TRUE}.} \item{isoweeks}{Deprecated. Use standard.} } \description{ These functions convert \code{incidence} objects into other classes. } \details{ Conversion to \code{incidence} objects should only be done when the original dates are not available. In such case, the argument \code{x} should be a matrix corresponding to the \verb{$counts} element of an \code{incidence} object, i.e. giving counts with time intervals in rows and named groups in columns. In the absence of groups, a single unnamed columns should be given. \code{data.frame} and vectors will be coerced to a matrix. } \examples{ ## create fake data data <- c(0,1,1,2,1,3,4,5,5,5,5,4,4,26,6,7,9) sex <- sample(c("m","f"), length(data), replace=TRUE) ## get incidence per group (sex) i <- incidence(data, groups = sex) i plot(i) ## convert to data.frame as.data.frame(i) ## same, 'long format' as.data.frame(i, long = TRUE) ## conversion from a matrix of counts to an incidence object i$counts new_i <- as.incidence(i$counts, i$dates) new_i all.equal(i, new_i) } \seealso{ the \code{\link[=incidence]{incidence()}} function to generate the 'incidence' objects. } \author{ Thibaut Jombart \email{thibautjombart@gmail.com}, Rich Fitzjohn } incidence/DESCRIPTION0000644000176200001440000000447014626320772013717 0ustar liggesusersPackage: incidence Type: Package Title: Compute, Handle, Plot and Model Incidence of Dated Events Version: 1.7.5 Authors@R: c( person("Thibaut", "Jombart", role = c("aut"), email = "thibautjombart@gmail.com"), person("Zhian N.", "Kamvar", role = "aut", email = "zkamvar@gmail.com", comment = c(ORCID = "0000-0003-1458-7108")), person("Rich", "FitzJohn", role = "aut", email = "rich.fitzjohn@gmail.com"), person("Tim", "Taylor", role = "cre", email = "tim.taylor@hiddenelephants.co.uk", comment = c(ORCID = "0000-0002-8587-7113")), person("Jun", "Cai", role = "ctb", email = "cai-j12@mails.tsinghua.edu.cn", comment = c(ORCID = "0000-0001-9495-1226")), person("Sangeeta", "Bhatia", role = "ctb", email = "sangeetabhatia03@gmail.com"), person("Jakob", "Schumacher", role = "ctb"), person("Juliet R.C.", "Pulliam", role = "ctb", email = "pulliam@sun.ac.za", comment = c(ORCID = "0000-0003-3314-8223")) ) Description: Provides functions and classes to compute, handle and visualise incidence from dated events for a defined time interval. Dates can be provided in various standard formats. The class 'incidence' is used to store computed incidence and can be easily manipulated, subsetted, and plotted. In addition, log-linear models can be fitted to 'incidence' objects using 'fit'. This package is part of the RECON () toolkit for outbreak analysis. Encoding: UTF-8 License: MIT + file LICENSE URL: https://www.repidemicsconsortium.org/incidence/ BugReports: https://github.com/reconhub/incidence/issues RoxygenNote: 7.3.1 Imports: ggplot2 (>= 3.3.2), aweek (>= 0.2.0) Suggests: magrittr, outbreaks, testthat, vdiffr, knitr, rmarkdown, scales, cowplot VignetteBuilder: knitr Config/runiverse/noindex: true NeedsCompilation: no Packaged: 2024-05-31 09:49:25 UTC; tim Author: Thibaut Jombart [aut], Zhian N. Kamvar [aut] (), Rich FitzJohn [aut], Tim Taylor [cre] (), Jun Cai [ctb] (), Sangeeta Bhatia [ctb], Jakob Schumacher [ctb], Juliet R.C. Pulliam [ctb] () Maintainer: Tim Taylor Repository: CRAN Date/Publication: 2024-05-31 10:10:02 UTC incidence/build/0000755000176200001440000000000014626316445013305 5ustar liggesusersincidence/build/vignette.rds0000644000176200001440000000047714626316445015654 0ustar liggesusersN@ħ@EIOKib<v` B{)= Yv~ X` 91,L| >ǁw!y>iF\vK|PΏ<}鮵B&KS3**qj14ªF|˼t|tβd2ٳΟT vޝ~/Ho5c x; Tb5zPQ ʸ=ف2ÐUx ?\2T8bʜ(ڰ&sިۑf?[SJI4H1fpuse=$HvI 3/&?Bincidence/tests/0000755000176200001440000000000014626316445013350 5ustar liggesusersincidence/tests/testthat/0000755000176200001440000000000014626320772015206 5ustar liggesusersincidence/tests/testthat/test-non-exported.R0000644000176200001440000000400314621104516020714 0ustar liggesuserscontext("Non-exported functions") test_that("check_dates works", { msg <- "NA detected in the dates" expect_error(check_dates(c(1,2,NA), TRUE), msg) msg <- paste0("Flooring from non-integer date caused approximations:\n", "Mean relative difference: 0.1") expect_warning(check_dates(1.1), msg) msg <- paste0("Input could not be converted to date. Accepted formats are:\n", "Date, POSIXct, integer, numeric") expect_error(check_dates(factor("2001-01-01")), msg) x <- list(1L, as.POSIXct("2001-01-01"), as.Date("2001-01-01") + 1:10, 1.0, 100:1) for (e in x) { expect_equal(e, check_dates(e)) } }) test_that("check_interval", { skip_on_cran() expect_error(check_interval(), "Interval is missing or NULL") expect_error(check_interval(NULL), "Interval is missing or NULL") expect_error(check_interval(1:2), "Exactly one value should be provided as interval \\(2 provided\\)") expect_error(check_interval(integer(0)), "Exactly one value should be provided as interval \\(0 provided\\)") expect_error(check_interval(NA), "Interval is not finite") expect_error(check_interval(-Inf), "Interval is not finite") expect_error(check_interval(.1), "Interval must be at least 1 \\(input: 0.100; after rounding: 0\\)") expect_equal(check_interval(1), 1) expect_equal(check_interval(2.4), 2) expect_equal(check_interval(2.7), 3) }) test_that("check_groups", { skip_on_cran() expect_is(check_groups(1, 1, FALSE), "factor") expect_error(check_groups(1, 1:2, FALSE), "'x' does not have the same length as dates \\(1 vs 2\\)") expect_equal(check_groups(NULL, NULL, FALSE), NULL) expect_equal(check_groups(c(1, 1, 2, NA, 2), 1:5, na_as_group = FALSE), factor(c(1, 1, 2, NA, 2))) expect_equal(check_groups(c(1,1,2,NA,2), 1:5, na_as_group = TRUE), factor(c(1, 1, 2, "NA", 2))) }) incidence/tests/testthat/test-cumulate.R0000644000176200001440000000063114621104516020114 0ustar liggesuserscontext("Test cumulate") test_that("Results as expected", { skip_on_cran() dat <- as.Date("2016-01-02") + as.integer(c(7,7,7,0,1,2,2,2,3,3,5,7)) i <- incidence(dat) out <- cumulate(i) expect_identical(cumsum(i$counts), as.vector(out$counts)) expect_true(out$cumulative) expect_identical(cumulate(-3:10), cumsum(-3:10)) expect_error(cumulate(out), "x is already a cumulative incidence") }) incidence/tests/testthat/test-bootstrap.R0000644000176200001440000000431514621104516020315 0ustar liggesuserscontext("Bootstrapping incidence") set.seed(1) dates <- as.integer(sample(-3:100, 50, replace = TRUE)) DATES <- as.Date("2016-09-20") + dates groups <- sample(c("toto", "tata"), 50, replace = TRUE) test_that("Bootstrap needs an incidence object", { expect_error(bootstrap(DATES), "x is not an incidence object") }) test_that("estimate_peak needs an incidence object", { expect_error(estimate_peak(DATES), "x is not an incidence object") }) test_that("Bootstrap incidence with groups", { skip_on_cran() x <- incidence(DATES, 3, groups = groups) y <- bootstrap(x) z <- bootstrap(x, TRUE) expect_identical(colSums(x$counts), colSums(y$counts)) expect_identical(colSums(x$counts), colSums(z$counts)) expect_identical(colnames(x$counts), colnames(y$counts)) expect_identical(colnames(x$counts), colnames(z$counts)) expect_identical(x$interval, y$interval) expect_identical(x$interval, z$interval) }) context("Mountain Climbing") test_that("find_peak can find the peak", { skip_on_cran() x <- incidence(DATES, 3, groups = groups) expect_error(find_peak(1:10), "x is not an incidence object") expect_message(p1 <- find_peak(x), "'x' is stratified by groups\npooling groups before finding peaks") expect_length(p1, 1L) expect_named(find_peak(x, pool = FALSE), c("tata", "toto")) }) test_that("estimate_peak can roughly estimate it", { x <- incidence(DATES, 3, groups = groups) y <- incidence(dates, 3) expect_message(e1 <- estimate_peak(x), "'x' is stratified by groups\npooling groups before finding peaks") e2 <- estimate_peak(y) expect_named(e1, c("observed", "estimated", "ci", "peaks")) expect_named(e2, c("observed", "estimated", "ci", "peaks")) ## The observed is identical to find_peak expect_identical(e1$observed, find_peak(pool(x))) expect_identical(e2$observed, find_peak(pool(y))) ## The number of peaks defaults to 100 expect_length(e1$peaks, 100) expect_length(e2$peaks, 100) ## The observed falls within the confidence interval expect_gt(as.integer(e1$observed), as.integer(e1$ci[1])) expect_lt(as.integer(e1$observed), as.integer(e1$ci[2])) expect_gt(as.integer(e2$observed), as.integer(e2$ci[1])) expect_lt(as.integer(e2$observed), as.integer(e2$ci[2])) }) incidence/tests/testthat/rds/0000755000176200001440000000000014621104516015765 5ustar liggesusersincidence/tests/testthat/rds/fit.i.rds0000644000176200001440000000537714621104516017524 0ustar liggesusersZ \eYdAL¦>#A>R3]wgarwf^1Ŭ(/R ,*6(^5{Vfj>zj{gwvū,9g|}9bF`292N< 㺺{.,g} "8uܪPvNN_TƏ:(w$K A58 Nb2=D܅}=ޅ짚/O?`Cj$g9u-Z`t{hZ?uK'b!KjVdl:FW6iT ٦Ț%;o~7bKu*񏈻S#Z$$"! HHAB*Ґ݂DAJdAJTAJtA0CoJ`>'=\LmKEbQd#H!2Q~.D;eϘH%e%⑯o&O>tiuɣc[keϞ4EZYS6:eirU!vC_.|&supJ>4kv_R1! `Hi/  !&u}oXh@N: wOy5Gĸ@̈*R\?z!' sXBxAQ1&Bkt:J,I6o!\R w{g`f)mCϑ>:KIW5gح#ɷy:Kv3m`רgT֩"׉ek6-uu۔OHgTz.|!={XTg)؃'b?4'ݱӧv?j~L O Zh .y*!.Q=Dؾs<<'1BW)RH=UGM.zw΢:vKk׺^-ouw%*N6HJ?^.❭dw }:A08*P&m)nEiSoL B?x9D;QS7;)4g1hҠT!%)Ƞ7yYna3<j/E7۪+VI&jRbnGK*RAǺ|^3, O;6.G-j8V Ğ)E . 6./YA5&Z+۲ nMM'& ZѨ~ ^^ ò&߻PSӶ Oq~}PoL+/';H;S׭4hAȖ9%}34+s+@"W]֍_NMlkrחU]gk$h4QG()5.gf %!{{/4nAW*r@ȇO˦(A?xܴ29k&$^Vz)5@Z fH9ynLaCf4[JI3Ǜ@ujhRm M\[Vt1cgd ^]L&n*O5="jm;Ψ@/g|7W.ȸL#"Rd|ە$@ψ?E›@q4P;; ~5duIm]#O4jB629fښ  ~iG&Ġ26:vBy'~:2E+UoE!UTHKf|I߽|5QB%8#|,uʲҪVe`}.}+^<:5z'G}8$89D80FrѽMh:G| jJ3oT{z4V !$ *rhCv{5!G͂apQtF31>|TXjrm3}X 4couo<JoH0aP|=JE!BQG]HuX>G}"盽e=9&$uo| }gulX)4%Dr rH/^5wPE@ZL%H'0~$N`H'0n, 8p?Y@incidence/tests/testthat/rds/fit.i.sex.rds0000644000176200001440000001053014621104516020305 0ustar liggesusers< \GC. (x!"G:cZRD ahH DDնz޷xG@TTP[o(* (6 V+n~a}3o͛޼yK!alȱP!sQ["ׁ:cz 76Cm|yGfAzCS]6t0"ו"Uv&KLGɝ&pœUÉ2ilTL@-$ k$ewvzZz֣<U+C*:nOi z^<_6l<-T*rBB̒Qɡ[=$u EϴnZ~}'-=6>ejVG0 %=uW?,c2TY- 0 :'؈CKۥ|ֵ[ r7 3wtJgZbtc3V}v'>o>.Թd =Y1W )mKRJT*S~T?U @vK\.%wԏ.Kti]qi|qi|qi|ÍdWߨJG wNNQX=eORz$'Wl; ~)IɆ,HRvE@~GA뜇'e0tBTg*SQMȕ7УYӖV؝w@~>wneuF tjm[ _oRӔɉݺv)\<Td-Μ(#Zz]DF濄U?>\IJIBe+UP+ִA=M5w!fW6N5n] ='Xpy9Pl^`W4{lxZ.du6 !ԇ$>YF <̏na#RPAKht^7yQ2{7?i CϾVBLd”(޿5x'P\1S9hub4yp綩'  ~X}Aԋ]OW,?}u* *{d3T.rVuS<+z, (5yf sY0Gp𗌧G]z-U7=(AفyzCPv)):0nx܄(;T4':r6MB'v'u]O䤣sc> N r Λe r3G\WGDn 僣^'JխckD 5) w@^4A^?`fkTB+1*яSý5T6䳩>f_X~@Wa] GǏfZ4gB9бUg9cW}"ro>;(x1'e S,?6;y!,:y0')s)Z0gؚUhbG^vwֹ~z:p9¼(}饑e(oʈ/KQeǖZT tח!Dy. 6U cO*S!^wk:>H<:~w8/7sU6x&?ٴɘ벟>]vv/C?F7K/AONҧ}P9I E*҆ w ֭\MO#(yVЂ6J%r FN1ۂjxr.Gy3McUχGʤ:&R[ +'AM$|koCk5f1CU]ٟ5(|χ҇aZ& t)FȒnLKP7J'S$tP[ ;DT2"[H\&\SY3!&A#W\+vS f[pIcj`݄#PCl.-w9ew@)qI:hx@Ʈ}+?͠QuFR!W13 ơǥ9,.k𓪅nsЙ3a9^|mu?3n:2;Տ}I} ]gdEOPCGdݷ\iGOPd{'XBV+uIE5IW*" ݚ-ˆVXV|pJȦ9Ha JL"[h{SҥU L:G]=mm_ !#`/:X(#"DB4)Tx(ZJkFC˼હq "fߖ"ܽuǡw_M.cSP֩+CSɧ F"aRh+PإYwh}Haͫ.6ݍp ;Q`27Vm:TB'mq3f)V~vٝ6GWRMc g jr̈^yҘ{ShR8kaohӅV=+ 7޳{g݂:C 54env)y`ZjP|QcGBӱU*!=zAZ#!IDC %ˍBӂ2˝m |nEhx\9<_Ȗ J⪀y]}Y~CTBω^UOْqaP tjx;o]}aF)=Tw :%n4tl?tUYr !<0dÏ=xg|.~j2y:bsy_áKMЩsK6BE7*6Cw0r>t,n_(t}O6u{N6lο2˫SRF@x?>H?qJTKϏ`sv6`o&2=JeLױ8"]QS)͉}cnZ7Zy86LȯЫ/8_`@oEYe~UJfoz2aFP#+FNDJVھeݗH-DH)RhHF7B{EӜ#teⰘ8pbⰘ8La1qXL~d.9LƵ\;nE1uD&1IķMhF |hZc^3HTiI9[Lp,dM$epLw"9#"}DD&H")Ho?5cH":9,$ krNDH$g"T 556c-^gjP$]yQ(#uIx(qsb7ǣԊYS&GJ 5M0*)~O VZVQ#@[iEPe|rYeyu(^h#E1QJQ}G-PU(eI?Ȩn1QbU.Fs6J*?X+©5(UnN 0~ô? -aކ {K{>y_0҇GW_ӯ'lincidence/tests/testthat/rds/print.fit.i.rds0000644000176200001440000000054414621104516020646 0ustar liggesusers]K0 E q7bkIu@0tmEDlcdtINHyΛ!4@'hpjBtc-:Cf{a"g9(~moQ{N- SPRi Kt@&Jg\ 1DzE˭=1M&mSn1Jɭ^4\1azHaڵ M&]8q% Odϰb QC-zɭ4ٮWV@OWGQKp/ aC^єĤ}3/E3Tx|޲\pIƖf kьhincidence/tests/testthat/rds/res.g.3.rds0000644000176200001440000000032514621104516017656 0ustar liggesusers}M0H0$l\zmShH)񇨓2%ʹdV6xP*Xbf2bhnK̯JiS(B1r8՛;Ȉ=7nmp=WFz(_)?gha/<"Ug뼹M`|Q;d+u^faqڄ=GeFincidence/tests/testthat/rds/incidence.res1.rds0000644000176200001440000000026514621104516021274 0ustar liggesusers]OA $Dg*AHli4ҵMHљqIXjw NzPkԺNЬa} paG|0떦/~Fh&?9v3 h=DKIs1#jN iG#hfb Gb_t2bincidence/tests/testthat/rds/x.sub5.rds0000644000176200001440000000025014621104516017620 0ustar liggesusers]O ږUV3*~~ٱ5 N8ķ&"IEUZ_Y#ކ'?_#J&*ttF3d<,dSt<&1fBтmy &]W`^fN> Fincidence/tests/testthat/rds/x.sub4.rds0000644000176200001440000000027414621104516017625 0ustar liggesusers]Q 0DWRK($=FCG@R$B1d<;"*Iոj@l ~GT =Y)k2Ϩ&^U4~wӳWXatdgK7܋Ƽ:[{B,2V{\iF=Ji7Xolnincidence/tests/testthat/rds/df5.rds0000644000176200001440000000032314621104516017153 0ustar liggesusersuO0ܖBDcwHx6X %D~"_nM=L;KF:Nx˱C>ְF`weåD_{}%[\35%3M&Xd銟E/dܼ}-~#I0O;FԲNbRkߵChk7y?y4ah@h֟incidence/tests/testthat/rds/incidence.res2.rds0000644000176200001440000000024614621104516021274 0ustar liggesusers]O `[ 7|m@`߽/0sJrwm.rjH8:RJÔ.Ylw<v Q?C@ofj.MUIȨ)NC6DZU޵P'.?R9[d'&(?;]2incidence/tests/testthat/rds/print1.rds0000644000176200001440000000034714621104516017720 0ustar liggesusersMO=O1 ( t*?C$N1Ll,HZCO(9g?gen91RgEɹ95/oY$ ]e_[p)ÐbnV9h7Uʺ8j/سnP]c\V:wE_#(e%4hX²(_ݡ/rR`iF;azuJAincidence/tests/testthat/rds/res.g.1.rds0000644000176200001440000000030614621104516017653 0ustar liggesusersmPQ -*FM\LE vD2o+.I- syDo Ckhb@Z"|u8?ڙL Pl=^]I#Bq=KZ\YL.Hy*oQPkynjŸu^'f~GI-<:mG /uincidence/tests/testthat/rds/df.rds0000644000176200001440000000023414621104516017067 0ustar liggesusersb```b`fad`b2 10%r@HpF,#c0pܓZ dLI, B80afgQ~ +ۖX &-Q/7NAWincidence/tests/testthat/rds/res.g.2.rds0000644000176200001440000000027014621104516017654 0ustar liggesusers]OI 0 RyPѫ`8yw^P*/Mf1#$TP뙞r&'\Ÿ"/XO$^ E ezz/$~pX~=l9Js@L:|`C'zbgWQ֍]@]LaW=rD/UT)UM7Oǹ ό70WS1FwaMo3,#/ǔ{SJyl[8t]d4\!ΗqFincidence/tests/testthat/rds/o.fit.i.sex.rds0000644000176200001440000005621414621104516020553 0ustar liggesusers]|Tޒͦ:Y $^.(",&naPlX={>E};gf.I 9s̙3soI&b&U~VBS(IXS64'}}oulm۷!玍Cxyίڞuْ9wtXeFnK??/=x:\[zߗ=r?~/~p9UŬ(%LnodIZry-.9RVuղZXvG_^rVPbrgeS~_}MmDoE1Ͷ 744_hi(IT* +K&rHm+k1 J0U,Q)µ8迪tE# 8ߠ䂉unLJ9͇gM3rvفz x||Ѝw}y漏:,bGv/=2zs?П?  0}\ס@o#v\|;yn?·眒ZvG\z?ggJ`1U\=EHjj6  o]sWhk}x凿?1aZ\ 4˧b-mClt\'͔g6=}]W:e R\7gδh?y=pqǸA?:3k}hׯZ J[>8'>l,ǷOF~_Ɲ5H*p9xI0!!9YSY8nЇwW~p~/8g\yy |nibZ-<'.9m5:k3U}'MEx$97?0˺w[~d[o,7kB.uRE?'~شz9GN l#:bQE:7qϣ7t>tƤ.M~K*0g>،uY z9?]w?{%fK^cхoM l^k[)_Xysm^tn[zO՜nsE74c,4!IK6u?R=Ć Z{ɹ3wh|~I:vI{m]W秖{rl㮻0p:hYul[#21վn+?cunZ&t |jM/=ny#Fv>8ec;oؖk&t||׽bJ2B8=s~jK3:w~ֻ"ᐝ_dA&}MGOcOĖ?ZItf-_ly|:>"?.y^Hxrb 6 4:Hf M~d xx)@)tNglqCًussG|7_WmPYd[r|9Ȳj>#iUZm{|{f?fSn oŴzgk_Q훇BvӓpV8{9ߠzdh796]Vxv9ofsuU0|痗V+|y O,=o?9g\}R]wZ-uqyZmecͱ%֝i?lsZz{F'ɣ))U.[n%}hOR:_nek1[Rwy!cfKzUQG?\u_uk3(Rȿg8l"v@; Hlc~0v*Q;;NELũH㓌On~r@F7܈hƾMlc4gc =jPC~b?԰7ڱj8~!b_mV=AN}}c'~SSlSM@vZl_eĞŲ?cOԚG2 %2Hᢌe(E!\Qe\KN.JȞP&؂+'?^PЊL ~yUE|D%J{K(u8!J;Q:SLĴ% gʾxS3LSLo~GM [nPrָx'Kh73TV/:rAwB'*HRK3T ˟#ԉuK݋h=:_nw E(3?󾕈ϙ_ʨdkt5{htB:,ߋ]`R'Mxw+ms*E2zʦc-11azQRULapZyRI,ϥ=je`ZLfR^ȩqEyTz,UKN8U/~qrYWzC"Dͽ34)˿bA&J%nr ǜnGwS(w-r*bqD! Ds.S]5^_cC,=bu)j*P]Xү];rwu}u5usfQp{ iQ.ZcV_ku/k9-f[b{QZ >19\~F,E|nqq:]X-w#nWRݠ1VwV6"bG\ κE\֋ ]9Ԋze|յuF5;-uW;5ea 2m|^]̂k"KZGK4v1A* ]UܸF_H2:XStce>m%|vv߲z; #pT+is=J74CVɉC!m*G&]M-Y@=W"]])<7rkҎN.k;kaps ٓ+_v0?C}5ۤѓ)2.Wl*~MvnWNbƷDYZ We5r~.q$ꠋ`q 4o)dVUQwٰjwEWw _-Jq\ȏƏ` ]fmnQ[ ,\H^&1窩񮒃(z'_Ax<͠Z/$t]s[Ic7Q/_ΗS#5}hjΰZy#"P iƈX\Q01L_ _zDO1;NeQleOEPp<]HĽ` U.y)H7!қώ^ttY23Fs+P!7FP+B#r -=VNK$"^9!`@g#Vy=Ub]- BGoGvKF.x?!P'g7isLibiTy\DԕCm(9,ɿ/W_ODu~Bd7xup*'ݞK']FnVX.+lc}mP\v/GXtY /':ZQ[*MLo€yquc{H<|D2קyw';"L"BY\j r]QܟW9Ք؋{KV\E 9/JKOW$nMnw9_fa" Elvfyg}"uĢ0"y h'm](WLU.w~p\U~\vYKj \ʻmwa+uvQN(}19jjtBSOFKHe#R^B&9kF`ȰqYyS;V[P#:t9Gz8}sITXSUwFӳ=xn\x[ma>=7fɞ#zn# W]bps!o8-jb>٤xs\u)I}վ&kv~FCyT窪u_/郩+/LgL=q~;Zg8VV ،?d.UuSx+h&ۉ#^DAL>xS赽ۉG$eq,S)(J|'Ⓣ8SO%֤Fw#bߊVQ">f+ 3m-!=S/MuC|aGҗ#%3.RH;@{J^^7B@|Kv({!&qg>g(a ϏRXЬi}9FTl 9">? AC!c HQ89ǃh@2;v&Ma7ٿ_M`oCٿ't3[A_4s (E9R9JHsY|1<Q #y1$Od}ޭ| &8)O}(.>_|a&_}_h_K8g%VC /0~LW_\;.7}[zQgI#{(LWn9۴**;UO>P)Fw%Nrmq {v>w gߗG2>r:9~rW>w\QSsB8V?' ? /25/ %O%^ Q"y ϕܠgWL ZkI@_? xKtB7I(# fQ([UI6QTہBP_> .EA Ϧ&^"ϲ)O$O> JIܾO!oEiaQE,@QTs|¶ߣ%y;@cl~< k>xoq|]'䓝]x4ɓigAgDy䩀?6|@?| oT&y:!/I?/*s9^eB<@kA|@i__eB|7@^ |@mQ!y w>?ܟmߋg Nm|ٿA cǃ )|J(+xsПvS+xAI6@i_-#Y_r<Ot@r3Low$^0w߉xp~i6ߙx߅. WA>(>xSO%+]MӉ%^bw3#^b7೉'>?oO|7ED|wG߃Q|."$8 (.P> L9`Iv40*~Ob1&y@_"}[Q?%MRޑ׏"N>w,~ .4 [+Ky(Nb~٫gN[{XM$o!rq }?lB>9<`kߑ,$d'(' w2=|Hڗ>N'y/@_܋ܾrf2id)>۟|1a ؟/9_) g}?(E+b/B? (K RH eB;x^wN3 w9o+O%y. w(W+ٿS#4Q y $? ހ/w(*x\/dWߗpw#ٿ w(|4 E\g/~ !d\"e$ål2E7 _ȯI5d@5{K[B87| F\&Qn!y@͢oK :QM|g |Sx7 {EyG(ϓ?'+ }<}A ʋ$@\}_~p Q"@M}-EqH@o3!?&(!8@DXxz_m$@oSԏ}O{l@n\1;h(m9_/sNO9?|| FsD6hy'm y~#isg9?v3i_}9߈/IGS)~[ohH. }_LB>_Z=Ov>@߁D;rEҒwkl/x/!dE_X?"f }? C3$Hk6gmB ;*k(w!^섉<9f@Z҃䙀icB|a[_! ˱%*"w2g#,J ~ϖ~$ -C_?0E[k> C}PEDrm3|yu͏~$/+ $OeBO>/2b-aB4@i_-׵LS)"l *3 NWǧ,ǒ<ǧw=!_dr$ɻr|rB?®yE\@n|}\_o/(e$O/ūge'b~RA4@/(KlV`R{$gYIlB̯*b~URC._(\ 9_D%y. okc,Jʯ0E׉L$@nIg'Lu(< TQPӀoE OWc~erh<_Rgb~|{B̯sE9]9EX|!\H\@nlBE?Cl RʯK;-7O<(< w)_",Wߊ/\%5wBբܤ]|B]+$o@_?K.Ծlff\( ~I_S}r' Q }NSy!(w<ۿlB~HOQ$yG@QSDDy9G1hUO*`%GߢdB<,<˳Odϋ >׿ @x;/8$&ӫ'kAN|]- } 7Ey9ܾ ;Y? ɻr~m>ϑ |{{_1?c'B |2aBޯ+"Lրl3Q<s};𙀬x;/8$&߈;b+ };}y9ܾ NKg]9~>J=l7]A?xhj!y ghS} [}B5vߢ63~( $ Y?x;/8k'gS_B_ bvuV+k[>ן|@ߚ~Er2 ~&@V}Y;+ W;s {;@| 0%bXX< m)~W acUGrV~')?|پ$o @| < ZLL@_Yہg!y{@_|`x;l_p$C o :yl;l vپI^D_:LaoWi| '^"/(HDgO&5ZKp$O#5t@?N `1@| O< :䙀)go :t;evپsHg gG|@?n=ہgC"B_/dG@oWAD_Dq< ۿX2>wL(S@_p$O˶FݢT5 _!GWAO֥ }ؿH%y m;O κ$.UjQjH{\ OS@?:ଫH[|>!?[ x/8$/2߱w%}L~OL>2)FS id(CAL8?پ|( ?aNyv#E\B$@bQ.SKO&R>NT@|);ؾ|>s[rB BT'8$ !<&w":sHr 7@|Q)@?߭ہg}Y"y  qq'vپO~_BݢC8!>O d'8H>0 q{پkI>? 0Q`1Kgk h{?̳Bm iz~\b<.L$OyΟ/"SZ7/ P{˂ (< 5QTsL7p/jSwXJ=8~>o2>o!Mw"Jw\乀ܿ[Mm+H}O\oe LB??ZacH _|F B~|!߿d|>>xOp$@i_m+<[RH@ "@[ٿ/% }/Nd~0gv?ןҿ]d~$Q~#d@~5g~>M9֤;d_>: LI_rqH ({ .q'Ͽ}1DZ<LJCpv3y|"쟐ť \'S}9~1񼙩oEx?>*>"qY 3L׎9>m>x#5q >@'DG\CtBOg31>]O\w8D?1W<0a1|'?&y RB^?);i~@'IO;>!#i~+&җ>|@i_-b7{XƉX T@/d)G$|) o8} y%?ѤF_ yCWʓyE\)<?qz&<g1DI k2m y,g ߙ0& ;ggߕoe6i7K }?n{rG_@X x?_{"~Crq.a!yXQ)xX~0!လnGT*xߥ'Gh^p] .kZd-#^3/~2ٗHs\{(rQL!)JQN y *QNR'L\$O'Q}O>7\"ʩ v:g*ŝL9̳٦M$8k?xt~_/2'z(7)ˀO"󧸿r9S.ʕ }9i` ~:_ *3 |Azw}>"~]\'$wv5fn6r* }6}03ET@(l.!gEy@EW=Gh_;@ί="ʣ$23!d>.ʓ$@?_xUr'~2E30KQ"y wB|+`k~k~hL,B_Nw0~eg@ߦKp7Q y2 BͪgBݔNs)}/uK.> #mveY|*}y$l-] rEl"l}l?t@/b[U60?۲0JVB򶀲k|;B)_t$yW@/Km\Gs Nt!yw@/T[Bw 'G0g]c^! o7{{@JEKAo@PpE%GK_$O%5@ }?tv`(CHPOR!w" 2yflDl8m(#۟|'BɢL%y Qf*TrMeg TY"?h%MmILb<۪l>il_c[@V<ﶅ }t,B? لt3{߱'X$yŢT)~OSK@ _&9~T*پ~B %yoB>QWlx4_O)/1K2Q y< / adI_'ʉ }dBؿؖ|_aW*l,@Y\|6 or ~g~[@TE|Q4J}\jwmgr6>6Iv(I>w>_Pgs\gK!~VE$We]-ʵ$om -`?\#?lZu@6<]/M }لpcQ;I򎀼N`Qn#y o5o> S_AxpbGHޚ^QR"r/$럢-*3'm ܿߚϿ}l@o(lˢJ$Bޟ" }nre^ r_S>րM @ gI l;:G߻{>|/B<PI^ >JT?NYND\|)W$O!+W'b~~-ʷ$Od }- (? lSHw!^"O؜OqK3bvS8X_H+bgrpOExy~[|'hXƻI^[nE[ |?@a%p@Ο ?"p~%F2ʟ?CW'r'&Aԋ7>_| ! n( J (~('# }D;}y:=}9N>?^}"g? tQU3@Q"p@ϙl,E?BFJrXJ.'!OB΀]$% _) 2E"7^%ڠ+DvB_q((U']բ\K׈rB_ |G`:Qn y@(kls9>7r3~Eο_$OCH^N}.E|p}_ Q_)߯"^"âvBGDyK|!2?e-9 >_ |+tǁ},B>ߔRɳ92gW3 d-aGO>GN߳s?d^ a[eQ^%y;@W ٿW#*(/׷"O~99o'l߻ɀܾ ٿ (Kߤ+)r}(<PDD |"a B?g #Cp߁xOpvnKk)._ y|}m|2!/~y#3y||!w 'uC|23o !?[!p@/$Lzc$Ucފi<>E~SO'= x;$HeO# 5v~&3 _ {/Z R_Žޞ {@|_@X i|\w@Or|:< c>1*>C|=ſ>*X<'tB> @/3;g{?◓D~'7@n`f=v1WG+!;E[Wy$=@SG$g_Dn<ߝo2?eg Aױ"{;ހe墸<@vY5~y76/mI2Ox1BZH~)ƾJx/&}g)x4Y?Yw%1q]. |ǻkukե1ӧN7mGM=-7,:~јQS4̸o U sӴQmS:"f_F^/Tf5 K=gVD R);M67M֡&D؇H +K>(:efTP/Jj v@ɑ̛>iF%jP-( &:4HǃLj/֕p6sA¿HCp9N~Ԩݽ}arܿE;߻9-,Q=4ߌr%'m '# 6En)[3 M:˔fxG"{^ݢi/%/c?=,-1r~9Y,͘ɂ'$֢苔?vp6Meµ|azpG7"VǤg '=&s=&mFvNj{+u~ RR^&8?_^r;*<,q%_+<9KݎZ "QZU[U8ꄴ#UnŅn9`mSWj:(W)syEM<ՕKA.ڥzoc*/w;|PXaW_WS_(XW|׹k"KZGK4]Q.͖jjaUVꪲJ$"_HG_铉0:h J-#]m[Wu\G3EQ857Ҵ-[-'rh=!.y&_櫯51$D1ݣ$-6inԔ<Yc#4s?42cr>Q䰉\.(ʺzvQhm6SkT/B8h {4UѸO̞PW5䉍- D_žF3тp =DQEM)+CaO6 XvJ~jj@7=Jč[.4|K\S.u-w>7 o~o5nUKbgr5M69gƹy0N~8%1\>qՉL^R/_?h;"MM7/s>q 2#?{|4>4z/`?^sG' e6N\zf?M TvŐoh|F&#o3߬0zX_U\+:mFq2G_SItFU39o /guݡ+#ssG{[*0MVA㮨pVj*;ͧ7uWr0,nzF4!RMNXahA4nwyS4D$- C )_ 灸9OFmCF:w##M-2&޶8u wov1\~z8mZ-vW/]VG__~x7DCXMmrV́ Z{z(6:i8xacl]qKrA/4/ FDA^.tբnCQ`Ӌ润ѝJghhȟEq^@YS':C؉kw{B bpNsp3S>4?p!=V,*ėzB&*<^wD -6/.+v9Uzr9\&}QzN.w4c  ?i󺗋A#>& G*]1Zc;7&Sk@g܆[uYv۶vډϖTAf($LEdWY;xeTALN%@FtT:)d&ƑDzkZU^~m6Ay/bEk_W[=F9b>Ix_|/NǚIn~9Z͞s/9[KqҥEhI~9ZZY;Z-NO>5MtG'jy˥FNC,cY'.ԇz z C箨yCDq׻6rT.o}h+Bgm[NtUu*jVM$߻hDA nq?w W9}c}IAa6nbwt-2(-<śxwRKgmݴf"~oHkm΅Z/>kcrGM8Z9_?Zss6-ڸġïK&\FX0fI:DO7|ʬ;zþݥ G ˺WN-~oMkK]tUm>X۴a/+˅ڸ) 6Gk9I0ETE*w9zW\{\s)M1N ]uߩAxzYQKг$s?П?~GTo{vsפ6_+x~SKս}W9\yq]8TdZ=Zl^ֺ)n]0ֈ%GLln[g?cv?zv: ]2pӋg=[Q/l[7=Z u} &fpffVc7_1ֹsK75| |)6:-yHj?./yPH;|`(bᕾ<'ćڔu:N38ˡ:ιQǹ#~ΛLEvG {Tio,z9|rOxdSKE4*uW=U)Ά7bZM{m5ίVϨC!}IK+=orWo=c.leZZ ϜM3}r9: *TtK|g<'7ќ3>.;ߖ:8<6ܲؒqUϹcw-y#j攔 -uRR7֒ ~dҌg<8햝s>u_svI < cV.1G }Ur=Mҏ$/}7GDJQ;_7E,'0M5W|˛})7/ݠw]K}3OtY2O\9:7nyK2f՜ް-#9SޠK.tYO9Z.ۼHܖx݀N0@˾I~QZs\ʑh Bv/tN5}_2vď?eڜIZ׷z<5K+/po o[it~0mJ}ң3w-?--+d8CP$jIl(:gGɿ5[WPVt\]9wO: ?;Wi~nYέOοypÿǓu :|{n⺙Qx) ou/n󗜍_;/ٵZƆ<79{lʚ50p}UWٓw+&DC9~m4?y@eHYl'dpx-Σ7]>)sszuDz`cBkqzXcf\qq]L;٧?q_Sg&7yWj Pݓ k7Z>_eOUiGNm+fϽJ{Q!{%#W<<{c3;umD-ƷLK]~S6S3_ڽ۷]ps1!{ǟ/}ڨN='hmS.v [qjZ7svCW̺F,%՚2[5yC?` }7ة mOڊv*sw^y+?8v̈4|L^;g3b s9Cl;g1v9C!v;g3 LNLdB'b?9!dB,WdBlFA'LLdB'ǦB?cd¾27D.1{c @썁=1Y{cd5ZǞEcObObOf{5ƞ<'tMQ{wHW^[)P򊾢 e(D/dQfr("Q:D#'[x |r(EDCCîDx%Q[%%KVR Jo)_q1D!v_R\4%7(,-7Yjq15$Z mc1UȘ&yܵ5. ^f&\Y6sb[8=܂40M[Kp5aE zeo)YX5ΠgPsF3>c2pArXg`QCT YuwM"Z {ӒLa䰅Gb)}.)%  QN {Zֲv{eX/z|յ+ XEi'Emrί+k!a">,l6Qo)_4|ijb6,~5~ ȱc/ ^@Gb{ ȱʓIa+$"+_Yf bܢ3/N0}R%yKq{ԈtagOWDZ-^sjkI}QNincidence/tests/testthat/rds/df3.rds0000644000176200001440000000031414621104516017151 0ustar liggesusersuNA0\BDc;$3!"M('"l$643Pҝ)x7O[i_t+6μGs7ثKTVM>KϢxR;zV-|FvXßaѠĬMrincidence/tests/testthat/rds/incidence.res5.rds0000644000176200001440000000026214621104516021275 0ustar liggesusers]O] 0 NDcZ>*'Ӛ0X!~>\.ZSwc:\P;ֺ` }fc~y2(N`[˭l&}Euih ѽ6VRli17w\iJ1{ gk^<:Jincidence/tests/testthat/rds/print.fit.sex.rds0000644000176200001440000000063414621104516021215 0ustar liggesusersj0ݤ=C Cٖ %ْ@",YݐțY *88;U˟vWS~V]ͯEzeΫi΂]y*wPբZm)@ljW08ccA66 v==xp)5J Qe*L Z R#ؤoE۠AS/:jQ<9)~jH+B sRFNIҚ׈ "Y O hF(]XolXe,L(G̉+[(qhXE?^8kh2NeҫB 6@\X\3w3Ȇ!incidence/tests/testthat/rds/o.fit.i.rds0000644000176200001440000005124514621104516017754 0ustar liggesusers]|[ִ=,'Vl'#{9z$d&Bز-"K,g @B Uf(aN٣ e5%F zHb;:'s=weI&b&U~dJbR,daBF $Mrw݋]ëɵvo}[w;y,轰I;dIf͔(O.5P_ lxj Y$ eA3``}%+c9YںG3J[1 )B%+jisw?~{gYdnCrL4J&jmK {Kr/='вsu=/i)i'_:u{^8uJO~-6.z'̷_,s-nyjH34.$*\aFa K,XuɬmZVr=;8{>t1<-~׻n8/n_mya玽7?uYRzϖU76{Z>TcoIտc%@Os}rqI-3̛g5nf+^yvi8-R'k||A򿴬m_|_{;ULNٞ8{ٸח/rT=|ݮo/9J;yۀvnqf~-|[;b}f󪖿wwuSy3y Cǿ75>k^M\gC`mp˅חS_ kkڇQ hi;0 m}FvFqvc'^oNkŠH̠! +;:-:mqIjZfMmڢisC7?sfT{ D~ۛrʭZٰ6Iz꼴=vmLڇϥMy$[WM}õo7"~^2n-vsWsҪ`{Ѻ7h͆7W'9ZGWܤ >-M=hZ{>+EjIO~3:n_:Y _J޺But%O=Yb&mLrmWymuh&MovlCc}coϬbD6n빫[!-8wNၡ(qw9<0[+*.i=teL,kyW:qnǕc>Ȼ_rAooT#YS^ocڳW{BYyG~pL]_JKELXНvm[?/i剱[ygzw_|G 7gޘORo9 \?JۭglOY*{k -o66Ҷj&sŪlGݟNMqt-ϟʋoc?}?yhM[^3c7kIӎG5:4q9/nמ7}˫=?_tlƎm#o:B:B1ut3{Vpif9j_Ŋ=u-_>6}zk i mK~t|j98NvQ렡 M?=O˾Lov '9P46{wt\q*_LMAnզNМ6sՃn׆6׹S{JLԜMOrrxPK0-VhU}Uy*c"~'ν#7a6>oj'_vZ!3vZa^?Y=!-o}Q7ӷ\{6e]ߺwm ,ژwͺy&gIm`ߥ{ߔҺ8l͔|[,x^M`sq=S5M@S+NQ*WhpKF_ [ijWh=an@ƹtxj5U8 4CU.Yu7B%t(C6̹pS|N{MzMmÜ #1Y9֩O?Nwl_f#_;vG^kZyWvܨUhrrfm E_vF0KK'SljzAqo[;g{k?ur#V0rևH{'<(j>_xG^sw%.jMdRʑ½9N^0aW[}ubӁ⍯:l``4M~8i'tk (\6^2`ua̩hS)_,qW+eJ:t%7+;0_SZo3ugs@ZY Yp`pM"Fbۆ".r|ݹ`8#{1gv`抚.ׁ/rD(#&bq&u 87p`"#zWل#T7X(c*Mݞbk]9#AR|[݄J^wxvPF'а1 eԗ;#jgCXU_ZDFp?INC+$">9! i\AW6 b_- ({%Ρΰp%.ğ[}Os6%gw`4FW!Ͳz6 ''R1KJ,űcݧ.T +}@F[hWG}r,ZttaeGl­Ž4V{F{tA7oeɊ +yEt2 75=0e(Ĕ;^,x}i{q~*9'VAy# UQdK>N~DDV)h,.}P<ѩĽo,+P䐻bE"않 ,1.·aqk_mcܝY^YI}aFr i'}S"wL bOQ6h渲`YfXjն,wipn#zYJ> }&a")9mp}^R?'}% /m8}; (J^^7@Ώ]sH?P/539?B8 W{~P"fmLUX(ĥٿ Gr~5E~~ A?? i&!o0RgV/%wsek CEz7!r Ňߗp0g=oPrm|)$>Sd_יkLStlrž[|6!l4E_a/߇@a7ߛW|>RO8x9$D{$K SN~!d~u:Gr\=w:r& !Tس3K8\<~~B4@m^rht'^"e/r}.rž_|2!%w-O]a`(O^rHAj {Cr-r'dž˿<@uBn }_@ Iaoa!Qa_7A|E+?F+x@zO^O ۅ|A19y~ig𰐧#Gf`> Rć>%}!>_dyFs/O\sr~Er/ yŀW WP@ ,-m'j!*x])!? ,B>/}y~Զ{qv(xAIv@_-?'^r<W~&}8#^"_%^b26c '$^b ')|K|6K|.E@8y$~`}>_L|)fg$qPLyw>?mO7E 2=i51`PسŜcJހ4̥|?J<}y(d/ fqOapHy̏>x?F~ȏQBDߙHBCk} {> L?\n>Na'K2~1盧 }i b0/'}! ?"@i/< ~!I2y勵ȼLa|) o HGB+B"}9/*9?KeJ@nˁN8U_)VaW? W@O `_ d:y/B֒G/&ež>LϦ2}d=?ʕHJk:ih^| awoI mž;xރ6;|Bo+7;I {o3pb>Yȩ/y=?C?<~!L Y# | w.I>ɀ>rž?)|#|!~& س b,@ˡ x }Ur?!e~r-r'^=-ń~( w=儣ND1 9Gn~ Ƿ) r+r žM{;H?Eso. Kp> Hs{$CB"@@_~)Gd y#B^UسG繻?& WDžCa tB\gr6g(io b|>?y bw̳']> >>F2/|Ey7=y~%isg}| |?@7r_A2R,& 3otYK?=/m#|6!x-b$' b7g &K"}I"riIQ\~ ;9T L=6Ǘ<_&̿h 31G[a?c҃cZr Ep~ H>5A'^mD!ɀ"S b/aDl@οQC+ W|B'rl9?C 9R !rr|s?2t9ež<[FH{?JQ(BFqɅ]q;̳x~*_ 'D22Iװ,Tس'"/٤O {?tL?Ga\/ ;OQ?K\BrX|/B^_d r~9Fa |PB"} o=?JQ~@ X_#a_n!դOSسj %} /z=>0 )C|ـ~,HK^ؿBINa7Ok&rIQ>uP?@k$#MkI?A {"ɄؿNr2mrž |:a6?EaT/sE,>/EL!$}?@YBSs>ׯC@E΃ԿF/vd>BD@B%} S`tBxr+L6˅\]|wB"J_(?K.R^w'/׹HJ{9_LAMO$s(9O!Bn!} /.Nrž |!(.rߥ(9};E#Bk y{H#Oa!)#'Y%}!??g3e+=Yl ^pHKs"B^ }!_w=B^&}> %{2N@MgrzB~?Gg:[/Bv> Ʉi_ {xZr]O'̀ |& _ȇl@xl/8%)&sb?!(ρnr0gw׊`/8 [{@~ {㈗_[-O6=>0[~)ϰK-j> 'Fl@j| 'g#})=+"|/BX|ȕž Ϥ_.\,{ s9^pޤ'~g!G<\X> u* ac-PGrV~')?"3ٿAh`> g-#}6 žPY+I ߛ/<ua;g&}!  |))x/8җ5E1^X+x? ~ K2I\M=|2! {_I4B_Sdӄ0E7#\ ~^pֹ>?x_p ߇ח;g.&}> _/<u)?<җZ|9 ?JrCC㫊x8V q> JHžO&L)`/8:ҧejO#!ukx\~YpdW>rߝ7_|_pֵ#I]d~!w=Z/)xu# 9/"~wg=;v<or"r?Ai {.DGLdG{YAрӁC8?rٿି$8@& ],ˁgs(iHD~-|'r| Pa> 8 zSy4Sw|gr La_|!*qu sˁNgy8|@I*/sW4 xl/8ͤ/|!?n<~җ{ qEH?翭B@Mm (@SMр??翻/ z{O8 |9>h??9'erm;\y\]?BϏ5KL3d;eb>PڋXSJ{?#&ʙׇI~ yQa dB8߷}ﰔH{x|6!߿,5&C^>?}^>xOp6+ }NddSB$} BVsonL>߆2y}x)d?//!>O!Kg's|@~܇<BB>?g EO 9]r|| 9c  MnS59ogG ׷N5#N"3x-ɀ~lO#;|% BN9"G_[7ΖJ<@$rOBb.񼙭O'~: [);~' ,Bx?Ŗ |"\l=I<@O.P~ئA~{?ψˏ֖O BO_1?Om r~O̕"A| , m(9WYO6QL&L*@%`߉4?ʈ?%:~À/ Pe\Ɛ~ UȕmG}1 ?ZQ>ڏ~0!YWr&?d"׶ ˟H3M`ՠ}g)4~>ǿX/l F,K/ʵ"}!#"n'5u$=y,g6}}ߗ0+d>靀\mp/9) Q}O>7ܛ$dgޡO̡3rF/2y~ lB>_-B$}?@߿5TGz'aSB@nk\O"@nPs/p]'or3SMB+?~/d+-BPسBn%}@nmVE=םr\=%+,o`P/*Hdۿ8Sž4B_|)g'.&D޿񁐏HC!_(G'B|,dy}Dg { tL>?9پ!}!_k7=;g_]C% !ߓ>;afUسO!ԝP9_ڶ)ϗ\~>0;m2xOodU~i4?~ !YO%'-d,{>F$= |a?Nv){ '*#)$E~LwGŵ4!3HJ7]"=|!Bf7̯3,TسYb)d f Ygs7W<_ E~K)XHL'xi_c_Nt@w =_|a?Far[M\BZr;^?V qPrmZ%Aa_ j!SسZG|.z+ǀ5  IS r%( r yAG&!ͤOdM {dBY7g윟@Y|. or~g'}@"h2T!ck']iB }" BVsgDϿr~gسW6OqV@9x lϪ/ BLSop+so\E<])= \B8_o"}@yZ!דsߤ/[ED8_Aȭ$&A=B~-mB }o@_ Ga!ޟ)O/G{'Ex&8 (˗_ }B>G??R&GIo'e>"V(w#/|93}>Ͽ}\@n' ӂU3B#}!ߟ?+=9S ~ y\ ٩g/ (eg~` |!?ɳ׀ (yywBA?/$ | C|]!;H_EUSg q]OgO"#} LQس>υ|A6g_߃痯B|@k<||@n_HK#{ {Oa}) oZoQؓiVF'`oR~ {%pH'8A8|!?g $EAΊo /MCa3 HKi~Np11 ?'V>ao1*'+ YEl@n?q?PسU8 |o@VGX Ks|VB|$ >O) WkӾȯ5?d|!!HHYE {B>z!>3(aM;|_/רͤ (˱u3C'J\Sg?υN q"?p~*H]?{sJ8k!+x>ا( h|h/r|)ǥBPسˀO"˅l!}o@WMa߇/H_)dž_|> j!#k / _{ Snr3y|d`ݬB+W)wPl ^"? Bw yIa>o?r+=t h?{?9|)r{k!OĬ; =@||qr|s|$fB|O = gҧvIC}!'!_N}|oB^d||&\/' 7Q^L1v <>5?>Ǘ<_Y' a'}?@W<b%= |) e_+c&sXH?Ưl`OI ۡ蟙I?Pڋ#ID^_1VO#)# B?:Ҁw ΑMހԿggCrŘpC/,_^H8b OO+9?|>/o Gu̸}>͔#ƺi YBss }/@{:!>f'7d|/$CG1dH? |!CH4?'Qs{A~'N:dFЬ!JGrɹS1^s㟠(@뗬`@$rO2EsS~( Thl?Ưԗ?B<o?_"#%}!g ssxIk y(g>=}ߋķPbrh=ǿѱDRr/UG$XI<?7?B`_ XE~~% aX P@nj9!{_ xƍx^p\mId(|< }Ъ=ǛbolRl7"aN"^}2l/8MQsfϟY% @#uJxM7ZǔysLk¢LM=m╓-9c.l AmSϜǜuP Y9}9f/\P -dT4HHEXA%sVN7wbu*S%"7Gm5ˣ_9em/3n}heT6iaJYFlψه%ɢ%DѼo/AWnaӴhke(ZIQ%b!֥eBJGm X״Eq^gO܁WR}po:RjC3mwwpQO?hۻ0XC2ʢCtC]MRH9ڠn߾t{rxPoOeҔKjv9RWs̔[thCjTAt߽+08CuFs͐ FsOy}{x6U5vمvE"2iٳ_Gܖ Dt_fs{9Og',Mb%Ļ{:?pmߟuoЎ԰\9 |>WʝO NvnKt?hܩ %VliwYfw;?ٗ-i:\N1~9ٟg݂':vhr~S9*`r~0 rTAPEV;47Mmu~czKܡd~V]IIA>1O1ikvʴO=l-HI6 q`qzYk)gJt78{5gxOwָ76:ΐІ ] MP\X!*65 @% yug~MS}W\qkj<5Pʚ*C!g L[UԺ}Ri^z7x~a@й(چ97fGbȳ2|sf.hnlrgcJ9FXw,R4<~Q]];t9a7 rUD{5*/R,*raCkUM`suMeΥsI`U)+-/  4/k |⻸&\Ԭkp lr6E=jQmi WDʒ6P+;LJLjaw\6qޯY]¢xr445qzs9ϔ)lǗhodi_t$-wȶ?lzkShW|JSiK4CMF%FCec80NcPxJ.tNq}ё;6"ɷ9<##mwLX'z h46P6 P շiՃ8Feɨ\ȍhҺ~W#Wm61{#=*6Z[/c1g̑{bbK-=+CaO4 X_qnG_хὒh:pˍF`qb+e{'6|6D_<΢V T?)J焺:INTnf.:s_-vkjJ7&q__2|j8Ukź4 2l;׉uJd/pB 6+Yd8Zؑ9)~0yd6\? ["x!u%m;6Yōa"mHdVJSHYEYɳc񩱬m6(IE 47E:qyUpX|I,77xn~j 6u﭂b$66+J;YWto˻C_Xƍ7h\]]\g=0rm|6J2 A K. ?6?bj! Capg_d}nAw le@FBUp*}}[A>$|wh^m#2v6C MOfNUm܁{C0RMe(}qsnL:cJsm2>@I˾ށu W\Ӵ)댊En -^E7jl׭@[:Oaec8İwf?`o"8ߟܫ=XF1Gqd* o#] o7>oQ& :sym:;km 95EZdA-qvsRM|Y!"}'DڗvUop[$qj8|{}p41yQo`;>Q; 7_Gtr vPsCQ&_j״ 1bk*r&_*)|Ɉl_t7+&˚Dx &O+q*/h FTD0  DwLjbokbLW[ w%ǸN?` 4<`n@o ntk<^ D?#&ptg$QdS^0<3QV83o˵0"7@,Ư()v[PXm{LNn቏!s;hXNԢM7wfUʂeMkAOͪ< 4f`dл 9PAkߢQ6' .m酅Φzw^}~qcY0z# bmSGo ;Z6+޲*1̢m(i;(U{6ϣ Tb!QL:Ow/;jN\' A:8ULj a&T_~%B ]<ՁFR)}$%ߩ"TzD(3OMuQzx6DE'E}Ob}ubЈB$6XCD7Lژ>+m>g/>gF->wkުvOْJכٌdv&]L/Nxj)sPS=%z{q,#z+t}6.ƑDzkZ >~m6U+se95m^3>b64U)[hmC*^7'+ЪV-&2ala a:"Oi:Nޮ4>c;EH~]kFPrVxė+FuS[zb)4}>v$iyun_s^<-+k#} %)NU*Zd'm6TڳJw?wzsꗮ_+Y\cnXHoљCH:P36Kn:h3ͬ_35Sا{?o%Z޼Sn0pȆI#Vkc>|.Tm#z6j?}kqk_mkEמVEk6u:y_?j]&mȖiDmoAЪ޻])ڴU Ozё-|*LAdW5v hB 7KJylp+aJ$ŏ-=%?={'eϔ:R/}_.zed6RQ#oj'_vZ!3vZa^?Y=!-o}Q7ӷ\{6e]ߺwm ,ژwͺy&gIm`ߥ{ߔҺ8ly)=,B!d>DɃd3[m7,uCcOp+OȲ>_ %qE|#~7pcDd#搿 W3)j n q+"Ga~QP֛htS* hu2,ҡ)䮳>u¦XGPE%k7GOXiincidence/tests/testthat/rds/incidence.res7.rds0000644000176200001440000000036614621104516021304 0ustar liggesusers]MK0+L *9eýCSsY{ּ=.}-]۫x>S XL;bi"uҳM)ۊ_&܇KSII6uSnsK[E2>X_`bCPincidence/tests/testthat/rds/x.sub2.rds0000644000176200001440000000030114621104516017612 0ustar liggesusers]O[ 4>$%K=F!$4uqWc3p Pq#pX)eN@| EK=g٭Ө5RSyF0Zy w9<Uu!Ҿj}z!`n+͢>I~incidence/tests/testthat/rds/df6.rds0000644000176200001440000000036514621104516017162 0ustar liggesusersP0 \Dcw@Dg^ƈ0x;1]ڥGqP!C@WՖ4;V ΁ .`/ͤ1ZE<Ü/2-tޡ: ڟN™̢V>I6?>чڬƬؓ 1b[ŊyAnñ\ektsӡxe\3udE2XbE-incidence/tests/testthat/rds/print2.rds0000644000176200001440000000035714621104516017722 0ustar liggesusersEON1 ( rTLL ѡ7Ll,H!ڴS%NKrtg?}\)BDj$չ:}td]db*sق53 )!8BU'XXéj6baax-`h/r]xՑc+7ӽaGf8Z<2&f40lB 1`6#Y-8֯&2incidence/tests/testthat/rds/print3.rds0000644000176200001440000000041414621104516017715 0ustar liggesusers]J0@ԋ ГTna a+l:u㶉$ӭf@11s*]:c t/?]W#F(҉ʤJez_Wrr筃Agh.nJt,1qs5 o#hW}.virZZvU?LRhBa6{Z!TX:vP'rnUBtb4 0{xI}incidence/tests/testthat/rds/dfl.rds0000644000176200001440000000030514621104516017242 0ustar liggesusersPK >մOD[ C[z\O!EC7N  )[yX 5!b!u|79ٻNX܏ N!'s|/7l`?GERBza+2]n MqSR U\fqQW=I%=k7dEݞhi1xvR~Fev>ΐvY5qÞ|6эʰ|e/뾽In{(WJQ'mڂ&/SK A;6 f[F_:inRV5HMym)hKP ;N+ȓxI&QkTCincidence/tests/testthat/rds/incidence.res3.rds0000644000176200001440000000057714621104516021304 0ustar liggesusers]N@aX{޻^x-a ES3%~fd-M2oP5Z8h@V(DF+ .JhFmhG:хn}1a`c&1i`se`kX6m`{qcg8.qkwC=G[rH\HmJT.ɭ%sve% \+-eI6IoϴpRRgJXmR7%)չPwQu5fyԙݨ7$oP&srX~ᓦ>Gߺ)a?W e,{1FEj^ao^EںZV#_Ֆ #?,$f! incidence/tests/testthat/test-accessors.R0000644000176200001440000001711114621104516020263 0ustar liggesuserscontext("incidence object accessor tests") set.seed(999) int <- sample(-3:50, 100, replace = TRUE) dat <- as.Date("2018-01-31") + int grp <- sample(letters[1:3], length(dat), replace = TRUE) xg <- incidence(dat, groups = grp) x.1 <- incidence(int) x.7 <- incidence(int, 7L) x.day <- incidence(dat, "day") x.wee <- incidence(dat, "week") x.mon <- incidence(dat, "month") x.mon2yr <- incidence(c(as.Date("2017-12-15"), dat), "month") x.yer <- incidence(c(dat, dat - 365, dat - 365 * 2), "year") test_that("get_interval works for integers", { expect_equal(get_interval(x.1), 1L) expect_equal(get_interval(x.1, integer = FALSE), 1L) }) test_that("get_timespan works", { expect_equal(get_timespan(x.1), x.1$timespan) expect_equal(get_timespan(x.mon), x.mon$timespan) expect_error(get_timespan("coffee"), "Not implemented for class character") }) test_that("get_n works", { expect_equal(get_n(x.1), x.1$n) expect_equal(get_n(x.7), x.7$n) expect_equal(get_n(x.mon), x.mon$n) expect_error(get_n("coffee"), "Not implemented for class character") }) test_that("group_names works", { expect_identical(group_names(xg), letters[1:3]) expect_null(group_names(x.1)) group_names(xg) <- c("foo", "bar", "baz") expect_identical(group_names(xg), c("foo", "bar", "baz")) expect_error(group_names(letters), "Not implemented for class character") expect_error(group_names(letters) <- 1:10) }) test_that("group_names can collapse groups", { xg2 <- group_names(xg, rep("a", 3)) xg3 <- group_names(xg, c("a", "b", "b")) expect_error(group_names(xg, letters[1:4]), "value must be the same length as the number of groups") expect_error(group_names(xg, c(letters[1:2], NA)), "value must be able to be coerced to a character vector") expect_equal(ncol(xg3), 2L) expect_equal(sum(get_counts(xg3)), sum(get_counts(xg))) expect_equal(colSums(get_counts(xg3))[["b"]], sum(colSums(get_counts(xg))[c("b", "c")])) expect_equivalent(xg2, pool(xg)) }) test_that("ncol works", { expect_equal(ncol(xg), 3L) expect_equal(ncol(x.1), 1L) expect_equal(ncol(subset(xg, groups = 1:2)), 2L) }) test_that("get_dates works for integers", { expect_equal(get_dates(x.1), x.1$dates) expect_equal(get_dates(x.1, count_days = TRUE), seq_along(x.1$dates) - 1.0) expect_equal(get_dates(x.1, "center"), x.1$dates + 0.5) expect_equal(get_dates(x.1, "right"), x.1$dates + 1) }) test_that("get_dates borks correctly", { expect_error(get_dates("grind", "Not implemented for class character")) }) test_that("get_interval works for integer weeks", { expect_equal(get_interval(x.7), 7L) expect_equal(get_interval(x.7, integer = FALSE), 7L) expect_error(get_interval("pizza"), "Not implemented for class character") }) test_that("get_dates works for integer weeks", { expect_equal(get_dates(x.7), x.7$dates) expect_equal(get_dates(x.7, count_days = TRUE), 7 * (seq_along(x.7$dates) - 1.0)) expect_equal(get_dates(x.7, "center"), x.7$dates + 3.5) expect_equal(get_dates(x.7, "right"), x.7$dates + 7) }) test_that("get_interval works for character days", { expect_equal(get_interval(x.day), 1L) expect_equal(get_interval(x.day, integer = FALSE), "day") }) test_that("get_dates works for character days", { expect_equal(get_dates(x.day), x.day$dates) expect_equal(get_dates(x.day, count_days = TRUE), (seq_along(x.day$dates) - 1.0)) expect_equal(get_dates(x.day, "center"), x.day$dates + 0.5) expect_equal(get_dates(x.day, "right"), x.day$dates + 1.0) }) test_that("get_interval works for character weeks", { expect_equal(get_interval(x.wee), 7L) expect_equal(get_interval(x.wee, integer = FALSE), "week") }) test_that("get_dates works for character weeks", { expect_equal(get_dates(x.wee), x.wee$dates) expect_equal(get_dates(x.wee, count_days = TRUE), 7 * (seq_along(x.wee$dates) - 1.0)) expect_equal(get_dates(x.wee, "center"), x.wee$dates + 3.5) expect_equal(get_dates(x.wee, "right"), x.wee$dates + 7.0) }) test_that("get_interval works for character months", { expect_equal(get_interval(x.mon), c(31, 28, 31)) expect_equal(get_interval(x.mon, integer = FALSE), "month") expect_equal(get_interval(x.mon2yr), c(31, 31, 28, 31)) }) test_that("get_interval works for character years", { expect_equal(get_interval(x.yer), c(366, 365, 365)) expect_equal(get_interval(x.yer, integer = FALSE), "year") }) test_that("get_dates works for character months", { expect_equal(get_dates(x.mon), x.mon$dates) expect_equal(get_dates(x.mon, count_days = TRUE), c(0, 31, 59)) expect_equal(get_dates(x.mon, "center"), x.mon$dates + c(31, 28, 31) / 2) expect_equal(get_dates(x.mon, "right"), x.mon$dates + c(31, 28, 31)) }) test_that("errors happen", { xx <- x.1 xx$interval <- factor("what") expect_error(get_interval(xx), "factor") }) # Data for the get_info and get_fit accessors set.seed(1) dat2 <- sample(1:50, 200, replace = TRUE, prob = 1 + exp(1:50 * 0.1)) sex <- sample(c("female", "male"), 200, replace = TRUE) i.sex.o <- incidence(c(dat2, abs(dat2 - 45) + 45), 7L, groups = c(sex, rev(sex))) test_that("get_counts works with and without groups", { expect_is(get_counts(i.sex.o), "matrix") expect_identical(get_counts(i.sex.o), i.sex.o$counts) expect_identical(get_counts(i.sex.o, "female"), get_counts(i.sex.o, 1)) expect_message(get_counts(i.sex.o, c("female", "nb")), "The following groups were not recognised: nb") expect_error(suppressMessages(get_counts(i.sex.o, "what")), "No groups matched those present in the data: female, male") }) context("incidence_fit* object accessor tests") i.fitlist <- fit_optim_split(i.sex.o) fits <- get_fit(i.fitlist$fit) # Creating an `incidence_fit_list` object with no groups column fits_list <- fits for (i in names(fits)) { fits_list[[i]]$info$pred$groups <- NULL } class(fits_list) <- "incidence_fit_list" attr(fits_list, "locations") <- as.list(names(fits)) test_that("fit_optim_split() returns an incidence_fit_list", { expect_is(i.fitlist$fit, "incidence_fit_list") }) test_that("get_fit() returns a list of incidence fit objects", { for (i in names(fits)) { expect_is(fits[[i]], "incidence_fit", label = i) } expect_identical(fits[[1]], get_fit(fits[[1]])) }) test_that("get_info() will return a vector for r", { rvec <- get_info(i.fitlist$fit, "r") expect_length(rvec, 4) }) test_that("get_info() will return a vector for doubling/halving", { dvec <- get_info(i.fitlist$fit, "doubling") dvec.na <- get_info(i.fitlist$fit, "doubling", na.rm = FALSE) expect_length(dvec, 2) expect_length(dvec.na, 4) expect_identical(dvec, dvec.na[1:2]) hvec <- get_info(i.fitlist$fit, "halving") hvec.na <- get_info(i.fitlist$fit, "halving", na.rm = FALSE) expect_length(hvec, 2) expect_length(hvec.na, 4) expect_identical(hvec, hvec.na[3:4]) }) test_that("get_info() will return a data frame for pred", { # Should have groups be female and male pred.g <- get_info(i.fitlist$fit, "pred") # Should have no groups pred.ng <- get_info(fits_list, "pred", groups = NULL) # Should have groups be female and male pred.g1 <- get_info(fits_list, "pred", groups = 1) # Should have groups be before and after pred.g2 <- get_info(i.fitlist$fit, "pred", groups = 2) expect_null(pred.ng$groups) expect_identical(pred.g$groups, pred.g1$groups) expect_identical(levels(pred.g2$groups), c("before", "after")) }) test_that("get_info() will return matrices for *.conf", { hconf <- get_info(i.fitlist$fit, "halving.conf") hconf.na <- get_info(i.fitlist$fit, "halving.conf", na.rm = FALSE) expect_is(hconf, "matrix") expect_is(hconf.na, "matrix") expect_length(hconf, 4) expect_identical(hconf, hconf.na[-(1:2), ]) }) incidence/tests/testthat/test-standards.R0000644000176200001440000000331614621104516020263 0ustar liggesuserscontext("standardisation tests") d <- c('2019-04-18', '2019-04-14', '2019-03-31', '2019-04-03', '2019-03-30', '2019-04-05', '2019-03-12', '2019-04-07', '2019-04-02', '2019-03-09', '2019-04-20', '2019-04-23', '2019-03-07', '2019-03-25', '2019-03-27', '2019-04-13', '2019-04-15', '2019-04-04', '2019-03-30', '2019-03-19') test_that("standard will override first_date", { expect_output(print(incidence(d, interval = "week", standard = TRUE)), "2019-03-04") expect_output(print(incidence(d, interval = "week", standard = TRUE)), "2019-W10") expect_output(print(incidence(d, interval = "isoweek", standard = TRUE)), "2019-W10") expect_output(print(incidence(d, interval = "1 isoweek", standard = TRUE)), "2019-W10") expect_output(print(incidence(d, interval = "monday week", standard = TRUE)), "2019-W10") expect_output(print(incidence(d, interval = "week", standard = FALSE)), "2019-03-07") expect_error(incidence(d, interval = "isoweek", standard = FALSE), "The interval 'isoweek' implies a standard and cannot be used with `standard = FALSE`") expect_error(incidence(d, interval = "monday week", standard = FALSE), "The interval 'monday week' implies a standard and cannot be used with `standard = FALSE`") expect_output(print(incidence(d, interval = "month", standard = TRUE)), "2019-03-01") expect_output(print(incidence(d, interval = "month", standard = FALSE)), "2019-03-07") expect_output(print(incidence(d, interval = "year", standard = TRUE)), "2019-01-01") expect_output(print(incidence(d, interval = "year", standard = FALSE)), "2019-03-07") }) incidence/tests/testthat/data_cache/0000755000176200001440000000000014621104516017231 5ustar liggesusersincidence/tests/testthat/data_cache/mfcat.rds0000644000176200001440000000032614621104516021036 0ustar liggesusersW10 SL}+C_x2@/I6sD,rP0 ʸ၊jz_6jٻ[o10z\x˭2ӞW~ fh>.jM-7_Q镋Ilv5,feB_Iu sRo}i2sEO۵ >Zincidence/tests/testthat/data_cache/mfdat.rds0000644000176200001440000000077414621104516021046 0ustar liggesusers]m/Q >x-R[[V[ZUDFO6#ĤNgΜB)OBi\a&9SA", `N#M9W*ty|orr5N寣Iݶ`OV1&x5 ޭ-#ruq]3˽ʙ  sݜ5G nOk'ǿ@_n;ϩyaǼi7'7p:֫構NƏyoHzCyhؑCۙt7=GWy/+46w") expect_output(print(i.fit.sex$fit), "[^N][^A]") expect_true(any(is.na(get_info(i.fit.sex$fit, "halving", na.rm = FALSE)))) ## errors expect_error(fit_optim_split(i, window = -1), "No date left to try after defining splits to try.") }) test_that("internals for fitting", { skip_on_cran() expect_null(extract_info(NULL)) }) test_that("fitting results are the same for incidence fits on Dates and POSIXct", { days <- 1:14 dat_cases <- round(exp(.2*(days))) dat_dates_Date <- rep(as.Date(Sys.Date() + days), dat_cases) dat_dates_POSIXct <- as.POSIXct(dat_dates_Date) iD <- incidence(dat_dates_Date) iP <- incidence(dat_dates_POSIXct) expect_equal(fit(iP), fit(iD)) }) incidence/tests/testthat/test-plot.R0000644000176200001440000002146514621104516017263 0ustar liggesuserscontext("Test plotting") test_that("plot for incidence object", { skip_on_cran() set.seed(1) # dat <- sample(1:50, 200, replace = TRUE, prob = 1 + exp(1:50 * 0.1)) dat <- readRDS("data_cache/mfdat.rds")[1:200] dat2 <- as.Date("2016-01-02") + dat dat3 <- as.POSIXct(dat2) sex <- c(1, 1, 2, 1, 1, 2, 2, 1, 1, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 1, 1, 1, 2, 1, 2, 2, 2, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 2, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 2, 1, 1, 1, 2, 2, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 1, 2, 2, 1, 1, 2, 1, 1, 1, 1, 2, 2, 2, 1, 2, 1, 2, 1, 1, 1, 1, 2, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 1, 1, 2, 1, 2, 2, 1, 2, 2, 1, 1, 2, 2, 1, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 1, 1, 2, 2, 1, 2, 1, 1, 2, 2, 1, 1, 2, 1, 1, 1) sex <- ifelse(sex == 1, "female", "male") dat4 <- c(dat2, sample(dat2, replace = TRUE) + 50, sample(dat2, replace = TRUE) + 100 ) # constructing data i <- incidence(dat) iog <- incidence(dat, groups = rep("this group", 200)) i.3 <- incidence(dat, 3L) i.14 <- incidence(dat, 14L) i.sex <- incidence(dat, 7L, groups = sex) # Dates --------------------------------------------------------------------- i.POSIX <- incidence(as.POSIXct(dat2, tz = "GMT")) i.isoweek <- incidence(dat2, 7L, standard = TRUE) # character intervals ------------------------------------------------------- # Weekly intervals ------------------------------------------ i.epiweek <- incidence(dat2, "1 epiweek", standard = TRUE) i.twoepiweek <- incidence(dat2, "2 epiweeks", standard = TRUE) i.sunweek <- incidence(dat2, "1 sunday week") i.monweek <- incidence(dat2, "1 monday week") i.tueweek <- incidence(dat2, "1 tuesday week") i.wedweek <- incidence(dat2, "1 wednesday week") i.thuweek <- incidence(dat2, "1 thursday week") i.friweek <- incidence(dat2, "1 friday week") i.satweek <- incidence(dat2, "1 saturday week") expect_identical(i.epiweek, i.sunweek) expect_identical(i.isoweek$weeks, i.monweek$weeks) expect_identical(get_counts(i.isoweek), get_counts(i.monweek)) # months and quarters --------------------------------------- i.sexmonth <- incidence(dat4, "1 month", groups = rep(sex, 3)) i.sexquarter <- incidence(dat4, "1 quarter", groups = rep(sex, 3)) # special case for fit_optim_split: i.sex.o <- incidence(c(dat, abs(dat - 45) + 45), 7L, groups = c(sex, rev(sex))) fit.i <- suppressWarnings(fit(i)) fit.i.2 <- suppressWarnings(fit(i, split = 30)) fit.i.3 <- suppressWarnings(fit(i.3[5:13])) fit.POSIX <- suppressWarnings(fit(i.POSIX)) fit.sex <- suppressWarnings(fit(i.sex)) fit.sex.o <- suppressWarnings(fit_optim_split(i.sex.o)) fit.o <- suppressWarnings(fit_optim_split(pool(i.sex.o))) p.fit.i <- plot(fit.i) p.fit.i.2 <- plot(i, fit = fit.i.2, color = "lightblue") p.fit.sex <- plot(fit.sex) p.optim.sex <- fit.sex.o$plot p.optim.sex.fit <- plot(fit.sex.o$fit) p.optim <- fit.o$plot p.i <- plot(i) p.i.cum <- plot(cumulate(i)) p.i.square <- plot(i, show_cases = TRUE) p.i.14 <- plot(i.14) p.i.2 <- plot(i, color = "blue", alpha = .2) p.i.3 <- plot(i.3, fit = fit.i.3, color = "red") p.sex <- plot(i.sex) p.sex.cum <- plot(cumulate(i.sex)) p.sex.2 <- plot(i.sex, fit = fit.sex) suppressMessages(p.sex.o <- plot(i.sex, fit = fit.sex.o$fit)) p.sex.3 <- plot(i.sex, fit = fit.sex, col_pal = rainbow) p.sex.4 <- plot(i.sex, fit = fit.sex, color = c(male = "salmon3", female = "gold2")) p.isoweek <- plot(i.isoweek) p.isoweek.2 <- plot(i.isoweek, labels_week = FALSE) p.epiweek <- plot(i.epiweek) p.epiweek.2 <- plot(i.epiweek, labels_week = FALSE) p.epiweek.b <- plot(i.epiweek, labels_week = FALSE, n_breaks = nrow(i.epiweek)) p.twoepiweek <- plot(i.twoepiweek, n_breaks = nrow(i.twoepiweek)) p.sunweek <- plot(i.sunweek) expect_warning({ p.sunweek.2 <- plot(i.sunweek, labels_iso = FALSE) }, "labels_iso is deprecated. Use `labels_week` instead") p.monweek <- plot(i.monweek) expect_warning({ p.monweek.2 <- plot(i.monweek, labels_week = FALSE, labels_iso = TRUE) }, "labels_iso is deprecated. The value of `labels_week` will be used") p.tueweek <- plot(i.tueweek) p.tueweek.2 <- plot(i.tueweek, labels_week = FALSE) p.wedweek <- plot(i.wedweek) p.wedweek.2 <- plot(i.wedweek, labels_week = FALSE) p.thuweek <- plot(i.thuweek) p.thuweek.2 <- plot(i.thuweek, labels_week = FALSE) p.friweek <- plot(i.friweek) p.friweek.2 <- plot(i.friweek, labels_week = FALSE) p.satweek <- plot(i.satweek) p.satweek.2 <- plot(i.satweek, labels_week = FALSE) p.POSIX <- plot(i.POSIX) p.POSIX.f <- plot(i.POSIX, fit = fit.POSIX) p.month <- plot(i.sexmonth) p.quarter <- plot(i.sexquarter) ## messages expect_message(plot(i.sex, show_cases = TRUE, stack = FALSE), "`show_cases` requires the argument `stack = TRUE`") expect_message(p.iog <- plot(iog, color = c("this group" = "blue", "that group" = "red")), "1 colors were not used: \"that group\" = \"red\"") ## errors expect_error(plot(i, fit = "tamere"), "Fit must be a 'incidence_fit' object, or a list of these") expect_error(plot(i, fit = list(fit.i, "tamere")), "The 2-th item in 'fit' is not an 'incidence_fit' object, but a character") ## Normal plots vdiffr::expect_doppelganger("incidence fit", p.fit.i) vdiffr::expect_doppelganger("incidence plot with two fitting models", p.fit.i.2) vdiffr::expect_doppelganger("grouped incidence fit", p.fit.sex) vdiffr::expect_doppelganger("incidence plot with default interval", p.i) vdiffr::expect_doppelganger("incidence plot with default interval, cumulative", p.i.cum) vdiffr::expect_doppelganger("incidence plot with interval of 14 days", p.i.14) vdiffr::expect_doppelganger("incidence plot with specified color and alpha", p.i.2) vdiffr::expect_doppelganger("incidence plot with interval of 3 days, fit and specified color", p.i.3) vdiffr::expect_doppelganger("grouped incidence plot", p.sex) vdiffr::expect_doppelganger("grouped incidence plot with one group", p.iog) vdiffr::expect_doppelganger("grouped incidence plot, cumulative", p.sex.cum) vdiffr::expect_doppelganger("grouped incidence plot with fit", p.sex.2) vdiffr::expect_doppelganger("grouped incidence plot with color palette", p.sex.3) vdiffr::expect_doppelganger("grouped incidence plot with specified color", p.sex.4) vdiffr::expect_doppelganger("incidence plot from POSIXct data", p.POSIX) vdiffr::expect_doppelganger("incidence plot from POSIXct data with fit", p.POSIX.f) vdiffr::expect_doppelganger("incidence plot with isoweek labels", p.isoweek) vdiffr::expect_doppelganger("incidence plot without isoweek labels", p.isoweek.2) vdiffr::expect_doppelganger("incidence plot by month", p.month) vdiffr::expect_doppelganger("incidence plot by quarter", p.quarter) vdiffr::expect_doppelganger("incidence fit plot with split", p.sex.o) vdiffr::expect_doppelganger("incidence fit list plot with split", p.optim.sex.fit) vdiffr::expect_doppelganger("split optimum plot", p.optim.sex) vdiffr::expect_doppelganger("split optimum plot pooled", p.optim) vdiffr::expect_doppelganger("epiquares single plot", p.i.square) ## Weekly plots vdiffr::expect_doppelganger("sun weekly incidence with labels", p.sunweek) vdiffr::expect_doppelganger("sun weekly incidence with labels", p.epiweek) vdiffr::expect_doppelganger("sun weekly incidence with dates", p.sunweek.2) vdiffr::expect_doppelganger("sun weekly incidence with dates", p.epiweek.2) vdiffr::expect_doppelganger("sun weekly incidence with dates and full breaks", p.epiweek.b) vdiffr::expect_doppelganger("sun semi-weekly incidence with dates and full breaks", p.twoepiweek) vdiffr::expect_doppelganger("mon weekly incidence with labels", p.monweek) vdiffr::expect_doppelganger("mon weekly incidence with dates", p.monweek.2) vdiffr::expect_doppelganger("tue weekly incidence with labels", p.tueweek) vdiffr::expect_doppelganger("tue weekly incidence with dates", p.tueweek.2) vdiffr::expect_doppelganger("wed weekly incidence with labels", p.wedweek) vdiffr::expect_doppelganger("wed weekly incidence with dates", p.wedweek.2) vdiffr::expect_doppelganger("thu weekly incidence with labels", p.thuweek) vdiffr::expect_doppelganger("thu weekly incidence with dates", p.thuweek.2) vdiffr::expect_doppelganger("fri weekly incidence with labels", p.friweek) vdiffr::expect_doppelganger("fri weekly incidence with dates", p.friweek.2) vdiffr::expect_doppelganger("sat weekly incidence with labels", p.satweek) vdiffr::expect_doppelganger("sat weekly incidence with dates", p.satweek.2) }) incidence/tests/testthat/test-palettes.R0000644000176200001440000000110714621104516020115 0ustar liggesuserscontext("Color palettes") test_that("incidence_pal1", { skip_on_cran() expect_error(incidence_pal1(NULL), "n is not a number") for (n in 1:30) { expect_length(incidence_pal1(n), n) } }) test_that("incidence_pal1_light", { skip_on_cran() expect_error(incidence_pal1_light(NULL), "n is not a number") for (n in 1:30) { expect_length(incidence_pal1_light(n), n) } }) test_that("incidence_pal1_dark", { skip_on_cran() expect_error(incidence_pal1_dark(NULL), "n is not a number") for (n in 1:30) { expect_length(incidence_pal1_dark(n), n) } }) incidence/tests/testthat/test-subset.R0000644000176200001440000000650714621104516017612 0ustar liggesuserscontext("Subset of incidence objects") test_that("[ operator for incidence objects", { skip_on_cran() dat <- c(1L, 8L, 2L, 9L, 10L, -3L, 4L, 9L, 4L, 3L, 10L, 3L, 6L, 5L, -2L, 9L, 0L, -3L, 1L, 10L, 9L, 6L, 5L, 10L, 6L, 6L, 4L, 5L, 1L, -1L, 10L, 9L, 6L, 8L, -3L, 3L, 7L, 0L, 1L, 0L, -2L, 2L, 2L, 2L, -1L, -2L, 0L, 3L, 0L, 9L) # set.seed(123) # dat <- as.integer(sample(-3:10, 50, replace = TRUE)) x <- incidence(dat) y <- incidence(dat + as.Date("2016-01-12"), 7L) z <- incidence(dat + as.Date("2016-01-12"), "epiweek") s <- incidence(dat + as.Date("2016-01-12"), "sunday week") m <- incidence(dat + as.Date("2016-01-12"), "MMWR week") # epiweeks and MMWR weeks start on Sunday expect_identical(z[1:2], s[1:2]) expect_identical(z[], m[]) x.sub1 <- x[c(3,5,7,8)] expect_equal_to_reference(x.sub1, file = "rds/x.sub1.rds") x.sub2 <- x[-c(5,1,2)] expect_equal_to_reference(x.sub2, file = "rds/x.sub2.rds") expect_equal(y[1:2]$weeks, y$weeks[1:2]) expect_equal(z[1:2]$weeks, z$weeks[1:2]) y.sub1 <- y[1:2] expect_equal_to_reference(y.sub1, file = "rds/y.sub1.rds") }) test_that("subset for incidence objects", { skip_on_cran() dat <- c(1L, 8L, 2L, 9L, 10L, -3L, 4L, 9L, 4L, 3L, 10L, 3L, 6L, 5L, -2L, 9L, 0L, -3L, 1L, 10L, 9L, 6L, 5L, 10L, 6L, 6L, 4L, 5L, 1L, -1L, 10L, 9L, 6L, 8L, -3L, 3L, 7L, 0L, 1L, 0L, -2L, 2L, 2L, 2L, -1L, -2L, 0L, 3L, 0L, 9L) # set.seed(123) # dat <- as.integer(sample(-3:10, 50, replace = TRUE)) x <- incidence(dat) x.sub3 <- subset(x, from = 0) expect_equal_to_reference(x.sub3, file = "rds/x.sub3.rds") x.sub4 <- subset(x, to = 5) expect_equal_to_reference(x.sub4, file = "rds/x.sub4.rds") x.sub5 <- subset(x, from = 1, to = 4) expect_equal_to_reference(x.sub5, file = "rds/x.sub5.rds") ## round trip expect_identical(x, x[]) ## corner cases expect_error(subset(x, from = 19), "No data retained.") expect_error(subset(x, to = -2319), "No data retained.") expect_error(subset(x, from = Inf, to = -2319), "No data retained.") }) test_that("numeric subset works with dates ", { skip_on_cran() x <- incidence(as.Date("2001-01-01") + 1:10, 2L) expect_equal(subset(x, from = 1, to = 2)$dates, x$dates[1:2]) expect_equal(subset(x, from = -10, to = 2)$dates, x$dates[1:2]) expect_equal(as.data.frame(subset(x, from = 0, to = 1e3)), as.data.frame(x)) expect_identical(x, x[]) }) test_that("numeric subset works with dates and character strings", { x <- incidence(as.Date("2001-01-01") + 1:100, "month") expect_equal(subset(x, from = 1, to = -1)$dates, x$dates[1]) expect_equal(subset(x, from = 1, to = 2)$dates, x$dates[1:2]) expect_equal(subset(x, from = -10, to = 2)$dates, x$dates[1:2]) expect_equal(subset(x, from = 5, to = 5)$dates, x$dates[4]) expect_equal(as.data.frame(subset(x, from = 0, to = 1e3)), as.data.frame(x)) expect_identical(x, x[]) }) test_that("an erroneous group will give a sensible error", { i <- incidence(sample(1:10, 100, replace = TRUE), groups = rep(c("a", "b"), length.out = 100)) expect_error(i[, "c"], "The following group does not exist: 'c'") expect_error(i[, c("grind", "core")], "The following groups do not exist: 'grind', 'core'") }) incidence/tests/testthat/test-conversions.R0000644000176200001440000001310514621104516020645 0ustar liggesuserscontext("Conversions of incidence objects") test_that("as.data.frame works", { skip_on_cran() # skip("re-write with aweek in place") dat <- as.integer(c(0,1,2,2,3,5,7)) dat2 <- as.Date("2016-01-02") + dat fac <- factor(c(1, 2, 3, 3, 3, 3, 1)) one_group <- rep("a", 7) i_group_df <- data.frame( dates = c(0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L), "1" = c(1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L), "2" = c(0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L), "3" = c(0L, 0L, 2L, 1L, 0L, 1L, 0L, 0L), check.names = FALSE, stringsAsFactors = TRUE ) # one group i_og_df <- i_group_df[, 1, drop = FALSE] i_og_df$a <- rowSums(i_group_df[-1]) i_group_df_long <- data.frame( dates = c(0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L), counts = c(1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 2L, 1L, 0L, 1L, 0L, 0L), groups = as.factor(c("1", "1", "1", "1", "1", "1", "1", "1", "2", "2", "2", "2", "2", "2", "2", "2", "3", "3", "3", "3", "3", "3", "3", "3")), stringsAsFactors = TRUE ) i <- incidence(dat, groups = fac) iog <- incidence(dat, groups = one_group) i.7 <- incidence(dat2, 7L, standard = TRUE) i.7.group <- incidence(dat2, 7L, standard = TRUE, groups = fac) df <- as.data.frame(i) df1 <- as.data.frame(i, long = TRUE) df2 <- as.data.frame(incidence(1:2)) df2res <- data.frame( dates = c(1L, 2L), counts = c(1L, 1L) ) df3 <- as.data.frame(i.7) df3res <- data.frame( dates = as.Date(c("2015-12-28", "2016-01-04")), weeks = c("2015-W53", "2016-W01"), isoweeks = as.factor(c("2015-W53", "2016-W01")), counts = c(2L, 5L) ) df3res$weeks <- aweek::date2week(df3res$dates, 1L, floor_day = TRUE, factor = TRUE) df4 <- as.data.frame(i.7, long = TRUE) df5 <- as.data.frame(i.7.group) df5res <- data.frame( dates = as.Date(c("2015-12-28", "2016-01-04")), weeks = c("2015-W53", "2016-W01"), isoweeks = as.factor(c("2015-W53", "2016-W01")), "1" = c(1L, 1L), "2" = c(1L, 0L), "3" = c(0L, 4L), check.names = FALSE ) df5res$weeks <- aweek::date2week(df5res$dates, 1L, floor_day = TRUE, factor = TRUE) df6 <- as.data.frame(i.7.group, long = TRUE) df6res <- data.frame( dates = as.Date(c("2015-12-28", "2016-01-04", "2015-12-28", "2016-01-04", "2015-12-28", "2016-01-04")), weeks = c("2015-W53", "2016-W01", "2015-W53", "2016-W01", "2015-W53", "2016-W01"), isoweeks = as.factor(c("2015-W53", "2016-W01", "2015-W53", "2016-W01", "2015-W53", "2016-W01")), counts = c(1L, 1L, 1L, 0L, 0L, 4L), groups = as.factor(c("1", "1", "2", "2", "3", "3")) ) df6res$weeks <- aweek::date2week(df6res$dates, 1L, floor_day = TRUE, factor = TRUE) df7 <- as.data.frame(iog) df7res <- data.frame( dates = c(0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L), a = c(1L, 1L, 2L, 1L, 0L, 1L, 0L, 1L) ) df8 <- as.data.frame(iog, long = TRUE) df8res <- data.frame( dates = c(0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L), counts = c(1L, 1L, 2L, 1L, 0L, 1L, 0L, 1L), groups = as.factor(c("a", "a", "a", "a", "a", "a", "a", "a")) ) # expect_equal_to_reference(df, file = "rds/df.rds") expect_identical(df, i_group_df) # expect_equal_to_reference(dfl, file = "rds/dfl.rds") expect_identical(df1, i_group_df_long) # expect_equal_to_reference(df2, file = "rds/df2.rds") expect_identical(df2, df2res) # expect_equal_to_reference(df3, file = "rds/df3.rds") expect_identical(df3, df3res) expect_equal(df3, df4) # expect_equal_to_reference(df5, file = "rds/df5.rds") expect_identical(df5, df5res) # expect_equal_to_reference(df6, file = "rds/df6.rds") expect_identical(df6, df6res) expect_named(df7, c("dates", "a")) expect_named(df8, c("dates", "counts", "groups")) }) test_that("as.incidence works", { skip_on_cran() dates_int <- sample(1:15, 100, replace = TRUE) dates <- as.Date("2017-04-01") + dates_int groups <- sample(letters[1:3], 100, replace = TRUE) i1 <- incidence(dates, interval = 2) i2 <- incidence(dates_int) i3 <- incidence(dates, interval = 7, groups = groups) i4 <- incidence(dates_int, interval = 7, groups = groups) expect_equal(as.incidence(i1$counts, i1$dates), i1) expect_equal(as.incidence(as.vector(i1$counts), i1$dates), i1) expect_equal(as.incidence(i2$counts, i2$dates), i2) expect_equal(as.incidence(i3$counts, i3$dates), i3) expect_equal(as.incidence(rep(1,10)), incidence(1:10)) expect_equal(as.incidence(get_counts(i4), interval = 7L), i4) expect_equal(as.incidence(as.data.frame(get_counts(i4)), interval = 7L), i4) msg <- "Interval needs to be specified if there is only one date." expect_error(as.incidence(i3$counts[1,,drop = FALSE], i3$dates[1]), msg) msg <- "Columns should be named to label groups." expect_error(as.incidence(unname(i3$counts), i3$dates), msg) }) incidence/tests/testthat/test-pool.R0000644000176200001440000000057314621104516017253 0ustar liggesuserscontext("Pool") test_that("Pool works", { skip_on_cran() expect_error(pool(1), "x should be an 'incidence' object \\(its class is: numeric\\)") expect_identical(incidence(1L), pool(incidence(1L))) dat <- as.integer(c(0, 1, 2, 2, 3, 5, 7)) fac <- factor(c(1, 2, 3, 3, 3, 3, 1)) expect_identical(pool(incidence(dat, groups = fac)), incidence(dat)) }) incidence/tests/testthat.R0000644000176200001440000000007614621104516015323 0ustar liggesuserslibrary(testthat) library(incidence) test_check("incidence") incidence/vignettes/0000755000176200001440000000000014626316445014216 5ustar liggesusersincidence/vignettes/overview.Rmd0000644000176200001440000002237014621107763016530 0ustar liggesusers--- title: "Overview of the incidence package" author: "Thibaut Jombart, Zhian N. Kamvar" date: "`r Sys.Date()`" output: rmarkdown::html_vignette: toc: true toc_depth: 2 vignette: > %\VignetteIndexEntry{Overview} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, echo = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width=7, fig.height=5 ) ``` *incidence* implements functions and classes to compute, handle, visualise and model incidences from dates data. This vignette provides an overview of current features. It largely reproduces the content of `REAME.md`.
# Installing the package To install the current stable, CRAN version of the package, type: ```{r install, eval=FALSE} install.packages("incidence") ``` To benefit from the latest features and bug fixes, install the development, *github* version of the package using: ```{r install2, eval=FALSE} devtools::install_github("reconhub/incidence") ``` Note that this requires the package *devtools* installed.
# What does it do? The main functions of the package include: - **`incidence`**: compute incidence from dates in various formats; any fixed time interval can be used; the returned object is an instance of the (S3) class *incidence*. - **`plot`**: this method (see `?plot.incidence` for details) plots *incidence* objects, and can also add predictions of the model(s) contained in an *incidence_fit* object (or a list of such objects). - **`fit`**: fit one or two exponential models (i.e. linear regression on log-incidence) to an *incidence* object; two models are calibrated only if a date is provided to split the time series in two (argument `split`); this is typically useful to model the two phases of exponential growth, and decrease of an outbreak; each model returned is an instance of the (S3) class *incidence_fit*, each of which contains various useful information (e.g. growth rate *r*, doubling/halving time, predictions and confidence intervals); results can be plotted using `plot`, or added to an existing `uncudence` plot using the piping-friendly function `add_incidence_fit`. - **`fit_optim_split`**: finds the optimal date to split the time series in two, typically around the peak of the epidemic. - **`[`**: lower-level subsetting of *incidence* objects, permitting to specify which dates and groups to retain; uses a syntax similar to matrices, i.e. `x[i, j]`, where `x` is the *incidence* object, `i` a subset of dates, and `j` a subset of groups. - **`subset`**: subset an *incidence* object by specifying a time window. - **`pool`**: pool incidence from different groups into one global incidence time series. - **`cumulate`**: computes cumulative incidence over time from and `incidence` object. - **`as.data.frame`**: converts an *incidence* object into a `data.frame` containing dates and incidence values. - **`bootstrap`**: generates a bootstrapped *incidence* object by re-sampling, with replacement, the original dates of events. - **`find_peak`**: locates the peak time of the epicurve. - **`estimate_peak`**: uses bootstrap to estimate the peak time (and related confidence interval) of a partially observed outbreak. # Worked example: simulated Ebola outbreak ## Loading the data This example uses the simulated Ebola Virus Disease (EVD) outbreak from the package [*outbreaks*](https://cran.r-project.org/package=outbreaks). We will compute incidence for various time steps, calibrate two exponential models around the peak of the epidemic, and analyse the results. First, we load the data: ```{r, data} library(outbreaks) library(ggplot2) library(incidence) dat <- ebola_sim$linelist$date_of_onset class(dat) head(dat) ``` ## Computing and plotting incidence We compute the daily incidence: ```{r, incid1} i <- incidence(dat) i plot(i) ``` The daily incidence is quite noisy, but we can easily compute other incidence using larger time intervals: ```{r, interv} # weekly, starting on Monday (ISO week, default) i.7 <- incidence(dat, interval = "1 week") plot(i.7) # semi-weekly, starting on Saturday i.14 <- incidence(dat, interval = "2 saturday weeks") plot(i.14, border = "white") ## monthly i.month <- incidence(dat, interval = "1 month") plot(i.month, border = "white") ``` `incidence` can also compute incidence by specified groups using the `groups` argument. For instance, we can compute incidence by gender: ```{r, gender} i.7.sex <- incidence(dat, interval = "1 week", groups = ebola_sim$linelist$gender) i.7.sex plot(i.7.sex, stack = TRUE, border = "grey") ``` We can do the same for hospitals, using the 'clean' version of the dataset, with some customization of the legend: ```{r, hosp} i.7.hosp <- with(ebola_sim_clean$linelist, incidence(date_of_onset, interval = "week", groups = hospital)) i.7.hosp head(get_counts(i.7.hosp)) plot(i.7.hosp, stack=TRUE) + theme(legend.position= "top") + labs(fill="") ``` ## Handling `incidence` objects `incidence` objects can be manipulated easily. The `[` operator implements subsetting of dates (first argument) and groups (second argument). For instance, to keep only the peak of the distribution: ```{r, middle} i[100:250] plot(i[100:250]) ``` Or to keep every other week: ```{r, stripes} i.7[c(TRUE,FALSE)] plot(i.7[c(TRUE,FALSE)]) ``` Some temporal subsetting can be even simpler using `subset`, which permits to retain data within a specified time window: ```{r, tail} i.tail <- subset(i, from=as.Date("2015-01-01")) i.tail plot(i.tail, border="white") ``` Subsetting groups can also matter. For instance, let's try and visualise the incidence based on onset of symptoms by outcome: ```{r, i7outcome} i.7.outcome <- incidence(dat, 7, groups=ebola_sim$linelist$outcome) i.7.outcome plot(i.7.outcome, stack = TRUE, border = "grey") ``` By default, `incidence` treats missing data (NA) as a separate group (see argument `na_as_group`). We could disable this to retain only known outcomes, but alternatively we can simply subset the object to exclude the last (3rd) group: ```{r, groupsub} i.7.outcome[,1:2] plot(i.7.outcome[,1:2], stack = TRUE, border = "grey") ``` Groups can also be collapsed into a single time series using `pool`: ```{r, pool} i.pooled <- pool(i.7.outcome) i.pooled identical(i.7$counts, i.pooled$counts) ``` ## Modelling incidence Incidence data, excluding zeros, can be modelled using log-linear regression of the form: log(*y*) = *r* x *t* + *b* where *y* is the incidence, *r* is the growth rate, *t* is the number of days since a specific point in time (typically the start of the outbreak), and *b* is the intercept. Such model can be fitted to any incidence object using `fit`. Of course, a single log-linear model is not sufficient for modelling our epidemic curve, as there is clearly an growing and a decreasing phase. As a start, we can calibrate a model on the first 20 weeks of the epidemic: ```{r, fit1} plot(i.7[1:20]) early.fit <- fit(i.7[1:20]) early.fit ``` The resulting objects (known as `incidence_fit` objects) can be plotted, in which case the prediction and its confidence interval is displayed: ```{r} plot(early.fit) ``` However, a better way to display these predictions is adding them to the incidence plot using the argument `fit`: ```{r} plot(i.7[1:20], fit = early.fit) ``` In this case, we would ideally like to fit two models, before and after the peak of the epidemic. This is possible using the following approach, if you know what date to use to split the data in two phases: ```{r, fit.both} fit.both <- fit(i.7, split=as.Date("2014-10-15")) fit.both plot(i.7, fit=fit.both) ``` This is much better, but the splitting date is not completely optimal. To look for the best possible splitting date (i.e. the one maximizing the average fit of both models), we use: ```{r, optim} best.fit <- fit_optim_split(i.7) best.fit plot(i.7, fit=best.fit$fit) ``` These models are very good approximation of these data, showing a doubling time of `r round(get_info(best.fit$fit, "doubling"), 1)` days during the first phase, and a halving time of `r round(get_info(best.fit$fit, "halving"), 1)` days during the second. To access these parameters, you can use the `get_info()` function. The possible parameters are: - "r", the daily growth rate - "doubling" the rate of doubling in days (if "r" is positive) - "halving" the rate of halving in days (if "r" is negative) - "pred" a data frame of incidence predictions For "r", "doubling", and "halving", you can also add ".conf" to get the confidence intervals. Here's how you can get the doubling and halving times of the above epi curve: ```{r, get_info} get_info(best.fit$fit, "doubling") # doubling time get_info(best.fit$fit, "doubling.conf") # confidence interval get_info(best.fit$fit, "halving") get_info(best.fit$fit, "halving.conf") ``` Note that `fit` will also take groups into account if incidence has been computed for several groups: ```{r, optim2} best.fit2 <- fit_optim_split(i.7.sex) best.fit2 plot(i.7.sex, fit=best.fit2$fit) ``` Using `get_info()` on this fit object will return all groups together: ```{r, get_info_groups} get_info(best.fit2$fit, "doubling") # doubling time get_info(best.fit2$fit, "doubling.conf") # confidence interval get_info(best.fit2$fit, "halving") get_info(best.fit2$fit, "halving.conf") ``` incidence/vignettes/conversions.Rmd0000644000176200001440000001061314621104516017220 0ustar liggesusers--- title: "Conversions to and from the incidence class" author: "Thibaut Jombart, Zhian N. Kamvar" date: "`r Sys.Date()`" output: rmarkdown::html_vignette: toc: true toc_depth: 2 vignette: > %\VignetteIndexEntry{Conversions} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, echo = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width=7, fig.height=5 ) ``` This vignette documents to types of conversion which can be made using the *incidence* class: - *'exports'*: conversion from an *incidence* object to another type of object; this can be useful for processing incidence data in another software, or for reporting results. - *'imports'*conversion from already computed incidence into an *incidence* object; this can be useful for using features of the *incidence* package for data handling and plotting with incidence data computed elsewhere.
# Exporting results To export results, we first compute semi-weekly incidence (with weeks starting on Sunday, the beginning of the CDC epiweek) by gender from the simulated Ebola data used in the [overview vignette](https://www.repidemicsconsortium.org/incidence/articles/overview.html): ```{r example} library(outbreaks) library(incidence) dat <- ebola_sim$linelist$date_of_onset i_14 <- incidence(dat, interval = "2 epiweeks", groups = ebola_sim$linelist$gender) i_14 plot(i_14, border = "white") ``` To export the data to a `data.frame`, one simply needs: ```{r} as.data.frame(i_14) ``` The first column contains the dates marking the (inclusive) left side of the time intervals used for computing incidence, and the other columns give counts for the different groups. This function also has an option for exporting data as a 'long' format, i.e. with a column for 'groups' and a column for counts. This format can be useful especially when working with *ggplot2*, which expect data in this shape: ```{r, long} df <- as.data.frame(i_14, long = TRUE) head(df) tail(df) ## example of custom plot using steps: library(ggplot2) ggplot(df, aes(x = dates, y = counts)) + geom_step(aes(color = groups)) ``` Finally, note that when ISO weeks are used, these are also reported in the output: ```{r, iso} i_7 <- incidence(dat, interval = "week") i_7 plot(i_7, border = "white") head(as.data.frame(i_7)) tail(as.data.frame(i_7)) ```
# Importing pre-computed incidence The function `as.incidence` facilitates the conversion of pre-computed incidences to an *incidence* object. Typically, the input will be imported into R from a *.csv* file or other spreadsheet formats. `as.incidence` is a generic with methods for several types of objects (see `?as.incidence`). The main method is `matrix`, as other types are coerced to `matrix` first and then passed to `as.incidence.matrix`: ```{r, conversions} args(incidence:::as.incidence.matrix) ``` The only mandatory argument `x` is a table of counts, with time intervals in rows and groups in columns; if there are no groups, then the column doesn't need a name; but if there are several groups, then columns should be named to indicate group labels. Optionally, `dates` can be provided to indicate the (inclusive) lower bounds of the time intervals, corresponding to the rows of `x`; most sensible date formats will do; if indicated as a character string, make sure the format is `YYYY-mm-dd`, e.g. `2017-04-01` for the 1st April 2017. Let us illustrate the conversion using a simple vector of incidence: ```{r} vec <- c(1,2,3,0,3,2,4,1,2,1) i <- as.incidence(vec) i plot(vec, type = "s") plot(i, border = "white") ``` Assuming the above incidences are computed weekly, we would then use: ```{r} i <- as.incidence(vec, interval = 7) i plot(i, border = "white") ``` Note that in this case, incidences have been treated as per week, and corresponding dates in days have been computed during the conversion (the first day is always '1'), so that the first days of weeks 1, 2, 3... are: ```{r} i$dates ``` In practice, it is best to provide the actual dates marking the lower bounds of the time intervals. We can illustrate this by a round trip using the example of the previous section: ```{r, round_trip} ## convertion: incidence --> data.frame: i_14 df <- as.data.frame(i_14) head(df) tail(df) ## conversion: data.frame --> incidence new_i <- as.incidence(df[group_names(i_14)], df$dates, interval = "2 epiweeks") new_i ## check round trip identical(new_i, i_14) ``` incidence/vignettes/customize_plot.Rmd0000644000176200001440000002012014621106437017726 0ustar liggesusers--- title: "Customize plots of incidence" author: "Thibaut Jombart, Zhian N. Kamvar" date: "`r Sys.Date()`" output: rmarkdown::html_vignette: toc: true toc_depth: 4 vignette: > %\VignetteIndexEntry{Customise graphics} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, echo = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width=7, fig.height=5 ) ``` This vignette provides some tips for the most common customisations of graphics produced by `plot.incidence`. Our graphics use *ggplot2*, which is a distinct graphical system from base graphics. If you want advanced customisation of your incidence plots, we recommend following an introduction to *ggplot2*.
# Example data: simulated Ebola outbreak This example uses the simulated Ebola Virus Disease (EVD) outbreak from the package [*outbreaks*](https://cran.r-project.org/package=outbreaks): `ebola_sim_clean`. First, we load the data: ```{r, data} library(outbreaks) library(ggplot2) library(incidence) onset <- ebola_sim_clean$linelist$date_of_onset class(onset) head(onset) ``` We compute the weekly incidence: ```{r, incid1} i <- incidence(onset, interval = 7) i i.sex <- incidence(onset, interval = 7, group = ebola_sim_clean$linelist$gender) i.sex i.hosp <- incidence(onset, interval = 7, group = ebola_sim_clean$linelist$hospital) i.hosp ```
# The `plot.incidence` function When calling `plot` on an *incidence* object, the function `plot.incidence` is implicitly used. To access its documentation, use `?plot.incidence`. In this section, we illustrate existing customisations. ## Default behaviour By default, the function uses grey for single time series, and colors from the color palette `incidence_pal1` when incidence is computed by groups: ```{r, default} plot(i) plot(i.sex) plot(i.hosp) ``` However, some of these defaults can be altered through the various arguments of the function: ```{r, args} args(incidence:::plot.incidence) ``` ## Changing colors ### The default palette A color palette is a function which outputs a specified number of colors. By default, the color used in *incidence* is called `incidence_pal1`. Its behaviour is different from usual palettes, in the sense that the first 4 colours are not interpolated: ```{r, incidence_pal1, fig.height = 8} par(mfrow = c(3, 1), mar = c(4,2,1,1)) barplot(1:2, col = incidence_pal1(2)) barplot(1:4, col = incidence_pal1(4)) barplot(1:20, col = incidence_pal1(20)) ``` This palette also has a light and a dark version: ```{r, pal2, fig.height = 8} par(mfrow = c(3,1)) barplot(1:20, col = incidence_pal1_dark(20), main = "palette: incidence_pal1_dark") barplot(1:20, col = incidence_pal1(20), main = "palette: incidence_pal1") barplot(1:20, col = incidence_pal1_light(20), main = "palette: incidence_pal1_light") ``` ### Using different palettes Other color palettes can be provided via `col_pal`. Various palettes are part of the base R distribution, and many more are provided in additional packages. We provide a couple of examples: ```{r, palettes} plot(i.hosp, col_pal = rainbow) plot(i.sex, col_pal = cm.colors) ``` ### Specifying colors manually Colors can be specified manually using the argument `color`; note that whenever incidence is computed by groups, the number of colors must match the number of groups, otherwise `color` is ignored. #### Example 1: changing a single color ```{r, colors1} plot(i, color = "darkred") ``` #### Example 2: changing several colors (note that naming colors is optional) ```{r, colors2} plot(i.sex, color = c(m = "orange2", f = "purple3")) ``` #### Example 3: using color to highlight specific groups ```{r, colors3} plot(i.hosp, color = c("#ac3973", "#6666ff", "white", "white", "white", "white")) ```
# Useful *ggplot2* tweaks Numerous tweaks for *ggplot2* are documented online. In the following, we merely provide a few useful tips in the context of *incidence*. ## Changing dates on the *x*-axis ### Changing date format By default, the dates indicated on the *x*-axis of an incidence plot may not have the suitable format. The package *scales* can be used to change the way dates are labeled (see `?strptime` for possible formats): ```{r, scales1} library(scales) plot(i, labels_week = FALSE) + scale_x_date(labels = date_format("%d %b %Y")) ``` Notice how the labels are all situated at the first of the month? If you want to make sure the labels are situated in a different orientation, you can use the `make_breaks()` function to calculate breaks for the plot: ```{r scales_breaks} b <- make_breaks(i, labels_week = FALSE) b plot(i) + scale_x_date(breaks = b$breaks, labels = date_format("%d %b %Y")) ``` And for another example, with a subset of the data (first 50 weeks), using more detailed dates and rotating the annotations: ```{r, scales2} plot(i[1:50]) + scale_x_date(breaks = b$breaks, labels = date_format("%a %d %B %Y")) + theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 12)) ``` Note that you can save customisations for later use: ```{r, scales3} rotate.big <- theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 12)) ``` ### Changing the grid The last example above illustrates that it can be useful to have denser annotations of the *x*-axis, especially over short time periods. Here, we provide an example where we try to zoom on the peak of the epidemic, using the data by hospital: ```{r, grid1} plot(i.hosp) ``` Let us look at the data 40 days before and after the 1st of October: ```{r, grid2} period <- as.Date("2014-10-01") + c(-40, 40) i.zoom <- subset(i.hosp, from = period[1], to = period[2]) detailed.x <- scale_x_date(labels = date_format("%a %d %B %Y"), date_breaks = "2 weeks", date_minor_breaks = "week") plot(i.zoom, border = "black") + detailed.x + rotate.big ``` ### Handling non-ISO weeks If you have weekly incidence that starts on a day other than monday, then the above solution may produce breaks that fall inside of the bins: ```{r, saturday-epiweek} i.sat <- incidence(onset, interval = "1 week: saturday", groups = ebola_sim_clean$linelist$hospital) i.szoom <- subset(i.sat, from = period[1], to = period[2]) plot(i.szoom, border = "black") + detailed.x + rotate.big ``` In this case, you may want to either calculate breaks using `make_breaks()` or use the `scale_x_incidence()` function to automatically calculate these for you: ```{r, saturday-epiweek2} plot(i.szoom, border = "black") + scale_x_incidence(i.szoom, n_breaks = nrow(i.szoom)/2, labels_week = FALSE) + rotate.big ``` ```{r, saturday-epiweek3} sat_breaks <- make_breaks(i.szoom, n_breaks = nrow(i.szoom)/2) plot(i.szoom, border = "black") + scale_x_date(breaks = sat_breaks$breaks, labels = date_format("%a %d %B %Y")) + rotate.big ``` ### Labelling every bin Sometimes you may want to label every bin of the incidence object. To do this, you can simply set `n_breaks` to the number of rows in your incidence object: ```{r label-bins} plot(i.szoom, n_breaks = nrow(i.szoom), border = "black") + rotate.big ``` ## Changing the legend The previous plot has a fairly large legend which we may want to move around. Let us save the plot as a new object `p` and customize the legend: ```{r, legend1} p <- plot(i.zoom, border = "black") + detailed.x + rotate.big p + theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 12), legend.position = "top", legend.direction = "horizontal", legend.title = element_blank()) ``` ## Applying the style of European Programme for Intervention Epidemiology Training (EPIET) ### Display individual cases For small datasets it is convention of EPIET to display individual cases as rectangles. It can be done by doing two things: first, adding using the option `show_cases = TRUE` with a white border and second, setting the background to white. We also add `coord_equal()` which forces each case to be a square. ```{r, EPIET1} i.small <- incidence(onset[160:180]) plot(i.small, border = "white", show_cases = TRUE) + theme(panel.background = element_rect(fill = "white")) + rotate.big + coord_equal() ``` incidence/vignettes/incidence_fit_class.Rmd0000644000176200001440000002266214621104516020627 0ustar liggesusers--- title: "Details of the incidence_fit class" author: "Zhian N. Kamvar" date: "`r Sys.Date()`" output: rmarkdown::html_vignette: toc: true toc_depth: 3 vignette: > %\VignetteIndexEntry{Incidence fit class} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width=7, fig.height=5 ) ``` This vignette details the structure and construction of the `incidence_fit` and `incidence_fit_list` classes, which are produced by the `fit()` and `fit_optim_split()` functions, respectively. By the end of this tutorial, you should be able to construct `incidence_fit` and `incidence_fit_list` objects for use with your own models. # Structure of an `incidence_fit` object An `incidence_fit` object contains three elements: - `$model`: The model fit to an `incidence` object. Currently, this represents a log-linear model, but it can be any model. - `$info`: Information derived from the model - `r` The growth rate - `r.conf` the confidence interval of `r` - `pred` a data frame containing the predictions of the model using the true dates (`dates`), their numeric version used in the model (`dates.x`), the predicted value (`fit`), and the lower (`lwr`) and upper (`upr`) bounds of the associated confidence interval. - `doubling` the predicted doubling time in days (only if `r` is positive) - `doubling.conf` the confidence interval of the doubling time - `halving` the predicted halving time in days (only if `r` is negative) - `halving.conf` the confidence interval of the halving time - `$origin`: the date corresponding to day '0' Internally, when `fit()` is run, these elements are constructed by function `incidence:::extract_info()`. First we need to setup data. We will use simulated Ebola outbreak data from the *outbreaks* package over weekly intervals and calculate the fit for the first 20 weeks: ```{r fit_dates} library(outbreaks) library(incidence) dat <- ebola_sim$linelist$date_of_onset i <- incidence(dat, interval = "week") i f <- fit(i[1:20]) f plot(i, fit = f) ``` As you can see, the `incidence_fit` object has a print method and a plot method. If you want to access individual elements in the `$info` element, you can use the `get_info()` function: ```{r get_info} get_info(f, "r") get_info(f, "r.conf") get_info(f, "doubling.conf") ``` This will be important later when we combine several `incidence_fit` objects into a single `incidence_fit_list`. # Building an `incidence_fit` object from scratch The `incidence_fit` object can be constructed from any model from which you can derive the daily growth rate, doubling/halving times, predictions, and confidence intervals. The following three steps show roughly how it is done from model fitting to construction. ### Step 1: create the model The default model for `fit()` is a log-linear model on the intervals between dates. To fit this model, we will need to create a data frame with the counts and the midpoints of the intervals: ```{r create_model} # ensure all dates have at least one incidence i2 <- i[1:20] i2 <- i2[apply(get_counts(i2), 1, min) > 0] df <- as.data.frame(i2, long = TRUE) df$dates.x <- get_dates(i2, position = "center", count_days = TRUE) head(df) lm1 <- stats::lm(log(counts) ~ dates.x, data = df) lm1 ``` If we compare that to the `$model` element produced from `fit()`, we can see that it is identical: ```{r fit_model} all.equal(f$model, lm1) ``` ### Step 2: creation of the `$info` list: The `$info` list is created directly from the model itself: ```{r make_info} r <- stats::coef(lm1)["dates.x"] r.conf <- stats::confint(lm1, "dates.x", 0.95) new.data <- data.frame(dates.x = sort(unique(lm1$model$dates.x))) pred <- exp(stats::predict(lm1, newdata = new.data, interval = "confidence", level = 0.95)) pred <- cbind.data.frame(new.data, pred) info_list <- list( r = r, r.conf = r.conf, doubling = log(2) / r, doubling.conf = log(2) / r.conf, pred = pred ) info_list ``` ### Step 3: combine lists and create object the last step is to combine everything into a list and create the object. ```{r combine} origin <- min(get_dates(i2)) info_list$pred$dates <- origin + info_list$pred$dates.x the_fit <- list( lm = lm1, info = info_list, origin = min(get_dates(i2)) ) class(the_fit) <- "incidence_fit" the_fit plot(i, fit = the_fit) ``` # Structure of an `incidence_fit_list` object There are several reasons for having multiple fits to a single `incidence` object. One may want to have a separate fit for different groups represented in the object, or one may want to split the fits at the peak of the epidemic. To aid in plotting and summarizing the different fits, we've created the `incidence_fit_list` class. This class has two defining features: - It consists of a named list containing one or more `incidence_fit` objects or lists containing `incidence_fit` objects. - An attribute called "locations" contains a list whose length is equal to the number of `incidence_fit` objects in the object. Each list element contains a vector that defines where an `incidence_fit` object is within the `incidence_fit_list`. The reason for this structure is because it is sometimes necessary to nest lists of `incidence_fit` objects within lists. When this happens, accessing individual elements of the objects cumbersome. To alleviate this, each object has a distinct path within the named list in the "locations" attribute that allows one to access the object directly since R allows you to traverse the elements of a nested list by subsetting with a vector: ```{r nest} l <- list(a = list(b = 1, c = 2),d = list(e = list(f = 3, g = 4), h = 5)) str(l) l[[c("a", "b")]] l[[c("d", "e", "f")]] ``` ## Example: A tale of two fits The function `fit_optim_split()` attempts to find the optimal split point in an epicurve, producing an `incidence_fit_list` object in the `$fit` element of the returned list: ```{r incidence_fit_list} fl <- fit_optim_split(i) fl$fit plot(i, fit = fl$fit) ``` Here you can see that the object looks very similar to the `incidence_fit` object, but it has extra information. The first thing you may notice is the fact that both "doubling" and "halving" are shown. This is because the two fits have different signs for the daily growth rate. The second thing you may notice is the fact that there is something called `attr(x, 'locations')`. This attribute gives the location of the `incidence_fit` objects within the list. We can illustrate how this works if we look at the structure of the object: ```{r incidence_fit_list_str} str(fl$fit, max.level = 2) ``` Internally, all of the methods for `incidence_fit_list` use the 'locations' attribute to navigate: ```{r incidence_fit_methods} methods(class = "incidence_fit_list") ``` For example, it's often useful to extract the growth rate for all models at once. The `get_info()` method allows us to do this easily: ```{r get_info_incidence_fit_list} get_info(fl$fit, "r") get_info(fl$fit, "r.conf") ``` Because doubling or halving is determined by whether or not `r` is negative, we automatically filter out the results that don't make sense, but you can include them with `na.rm = FALSE`: ```{r get_doubling} get_info(fl$fit, "doubling.conf") get_info(fl$fit, "doubling.conf", na.rm = FALSE) ``` ## Example: Nested incidence_fit Above, we showed the example of a basic `incidence_fit_list` class with two objects representing the fits before and after the peak of an epicurve. However, it is often useful evaluate fits for different groups separately. Here, we will construct an incidence object, but define groups by gender: ```{r incidence_by_gender} gen <- ebola_sim$linelist$gender ig <- incidence(dat, interval = "week", group = gen) plot(ig, border = "grey98") ``` Now if we calculate an optimal fit split, we will end up with four different fits: two for each defined gender. ```{r fit_gender} fg <- fit_optim_split(ig) plot(ig, fit = fg$fit, border = "grey98", stack = FALSE) ``` If we look at the fit object, we can see again that it is an `incidence_fit_list` but this time with four fits defined. ```{r fit_gender_print} fg$fit str(fg$fit, max.level = 3) ``` > Notice that the nested lists themselves are of class `incidence_fit_list`. Now, even though the fits within nested lists, the 'locations' attributes still defines where they are within the object so that the `get_info()` function still operates normally: ```{r get_info_gender} get_info(fg$fit, "r.conf") ``` If you need to access all the fits easily, a convenience function to flatten the list is available in `get_fit()`: ```{r get_fit} str(get_fit(fg$fit), max.level = 2) ``` Because all that defines an `incidence_fit_list` is the class definition and the 'locations' attribute that defines the positions of the `incidence_fit` objects within the nesting, then it's also possible to define the output of `fit_optim_split()` as an `incidence_fit_list` class: ```{r incidence_fit_listify} print(locs <- attributes(fg$fit)$locations) for (i in seq_along(locs)) { locs[[i]] <- c("fit", locs[[i]]) } print(locs) fg.ifl <- fg attributes(fg.ifl)$locations<- locs class(fg.ifl) <- "incidence_fit_list" ``` Now when we print the object, we can see that it prints only the information related to the `incidence_fit_list`: ```{r new_fit_list_print} fg.ifl ``` But, we still retain all of the extra information in the list: ```{r list_stuff} str(fg.ifl, max.level = 1) fg.ifl$split fg.ifl$df fg.ifl$plot ``` incidence/vignettes/incidence_class.Rmd0000644000176200001440000001263514621104516017764 0ustar liggesusers--- title: "Details of the incidence class" author: "Thibaut Jombart, Zhian N. Kamvar" date: "`r Sys.Date()`" output: rmarkdown::html_vignette: toc: true toc_depth: 3 vignette: > %\VignetteIndexEntry{Incidence class} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, options, echo = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width=7, fig.height=5 ) ``` This vignette details the structure of *incidence* objects, as produced by the `incidence` function.
# Structure of an *incidence* object. We generate a toy dataset of dates to examine the content of *incidence* objects. ```{r, data} library(incidence) set.seed(1) dat <- sample(1:50, 200, replace = TRUE, prob = 1 + exp(1:50 * 0.1)) sex <- sample(c("female", "male"), 200, replace = TRUE) ``` The incidence by 48h period is computed as: ```{r, i} i <- incidence(dat, interval = 2) i plot(i) ``` We also compute incidence by gender: ```{r, sex} i.sex <- incidence(dat, interval = 2, group = sex) i.sex plot(i.sex) ``` The object `i` is a `list` with the class *incidence*: ```{r, names} class(i) is.list(i) names(i) ``` Items in `i` can be accessed using the same indexing as any lists, but it's safer to use the accessors for each item: ```{r, access} ## use name head(i$dates) head(get_dates(i)) ``` In the following sections, we examine each of the components of the object. ## `$dates` The `$dates` component contains a vector for all the dates for which incidence have been computed, in the format of the input dataset (e.g. `Date`, `numeric`, `integer`). ```{r, dates1} date_bins <- get_dates(i) class(date_bins) class(dat) date_bins ``` The dates correspond to the lower bounds of the time intervals used as bins for the incidence. Bins always include the lower bound and exclude the upper bound. In the example provided above, this means that the first bin counts events that happened at day 5-6, the second bin counts events from 7-8, etc. Note that if we had actual `Date`-class dates, they would be returned as dates ```{r date-dates1} dat_Date <- as.Date("2018-10-31") + dat head(dat_Date) i.date <- incidence(dat_Date, interval = 2, group = sex) i.date get_dates(i.date) class(get_dates(i.date)) ``` These can be converted to integers, counting the number of days from the first date. ```{r get-dates-integer} get_dates(i.date, count_days = TRUE) get_dates(i, count_days = TRUE) ``` To facilitate modelling, it's also possible to get the center of the interval by using the `position = "center"` argument: ```{r get-dates-center} get_dates(i.date, position = "center") get_dates(i.date, position = "center", count_days = TRUE) ``` ## `$counts` The `$counts` component contains the actual incidence, i.e. counts of events for the defined bins. It is a `matrix` of `integers` where rows correspond to time intervals, with one column for each group for which incidence is computed (a single, unnamed column if no groups were provided). If groups were provided, columns are named after the groups. We illustrate the difference comparing the two objects `i` and `i.sex`: ```{r, counts1} counts <- get_counts(i) class(counts) storage.mode(counts) counts get_counts(i.sex) ``` You can see the dimensions of the incidence object by using `dim()`, `ncol()`, and `nrow()`, which returns the dimensions of the counts matrix: ```{r counts1.1} dim(get_counts(i.sex)) dim(i.sex) nrow(i.sex) # number of date bins ncol(i.sex) # number of groups ``` There are also accessors for handling groups: ```{r groups} # Number of groups ncol(i.sex) ncol(i) # Names of groups group_names(i.sex) group_names(i) # You can also rename the groups group_names(i.sex) <- c("F", "M") group_names(i.sex) ``` Note that a `data.frame` containing *dates* and *counts* can be obtained using `as.data.frame`: ```{r, as.data.frame} ## basic conversion as.data.frame(i) as.data.frame(i.sex) ## long format for ggplot2 as.data.frame(i.sex, long = TRUE) ``` Note that `incidence` has an argument called `na_as_group` which is `TRUE` by default, which will pool all missing groups into a separate group, in which case it will be a separate column in `$counts`. ## `$timespan` The `$timespan` component stores the length of the time period covered by the object: ```{r, timespan} get_timespan(i) print(date_range <- range(get_dates(i))) diff(date_range) + 1 ``` ## `$interval` The `$interval` component contains the length of the time interval for the bins: ```{r, interval} get_interval(i) diff(get_dates(i)) ``` ## `$n` The `$n` component stores the total number of events in the data: ```{r, n} get_n(i) ``` Note that to obtain the number of cases by groups, one can use: ```{r, n2} colSums(get_counts(i.sex)) ``` ## `$weeks` The `$weeks` component is optional, and used to store [aweek](https://www.repidemicsconsortium.org/aweek/) objects whenever they have been used. Weeks are used by default when weekly incidence is computed from dates (see argument `standard` in `?incidence`). ```{r, isoweek} library(outbreaks) dat <- ebola_sim$linelist$date_of_onset i.7 <- incidence(dat, "1 epiweek", standard = TRUE) i.7 i.7$weeks ``` Because `$weeks` is an optional element, it does not have a dedicated accessor. If the element is not present, attempting to access it will result in a `NULL`: ```{r isoweek-null} i$weeks ``` Both dates and weeks are returned when converting an `incidence` object to `data.frame`: ```{r, isoweek3} head(as.data.frame(i.7)) ``` incidence/R/0000755000176200001440000000000014621104516012374 5ustar liggesusersincidence/R/check_interval.R0000644000176200001440000000155614621104516015507 0ustar liggesusers#' Check the interval between bins #' #' This enforces that an interval is: #' - strictly positive #' - integer (rounded) OR compatibile with date #' - finite #' - of length 1 #' #' @param x an integer or numeric interval #' @return an integer interval #' @noRd check_interval <- function(x, standard = TRUE){ if (missing(x) || is.null(x)) { stop("Interval is missing or NULL") } if (length(x) != 1L) { stop(sprintf( "Exactly one value should be provided as interval (%d provided)", length(x))) } if (!is.finite(x)) { if (is.character(x)) { x <- valid_interval_character(x, standard) } else { stop("Interval is not finite") } } if (is.numeric(x)) { x <- as.integer(round(old <- x)) } if (x < 1L) { stop(sprintf( "Interval must be at least 1 (input: %.3f; after rounding: %d)", old, x)) } x } incidence/R/print.R0000644000176200001440000000665114621104516013663 0ustar liggesusers#' @export #' @rdname incidence #' @param x An 'incidence' object. print.incidence <- function(x, ...) { cat("\n") cat(sprintf("[%d cases from days %s to %s]\n", sum(x$n), min(x$dates), max(x$dates))) if ("weeks" %in% names(x)) { type_of_week <- get_type_of_week(x) cat(sprintf("[%d cases from %s weeks %s to %s]\n", sum(x$n), type_of_week, head(x$weeks, 1), tail(x$weeks, 1))) } if (!is.null(group_names(x))) { groups.txt <- paste(group_names(x), collapse = ", ") cat(sprintf("[%d groups: %s]\n", ncol(x), groups.txt)) } cat(sprintf("\n$counts: matrix with %d rows and %d columns\n", nrow(x$counts), ncol(x$counts))) cat(sprintf("$n: %d cases in total\n", x$n)) cat(sprintf("$dates: %d dates marking the left-side of bins\n", length(x$dates))) if (is.integer(x$interval)) { cat(sprintf("$interval: %d %s\n", x$interval, ifelse(x$interval < 2, "day", "days"))) } else if (grepl("\\d", x$interval)) { cat(sprintf("$interval: %s\n", x$interval)) } else { cat(sprintf("$interval: 1 %s\n", x$interval)) } cat(sprintf("$timespan: %d days\n", x$timespan)) if (!is.null(x$cumulative)) { cat(sprintf("$cumulative: %s\n", x$cumulative)) } cat("\n") invisible(x) } #' @export #' @rdname fit #' @param ... currently unused. print.incidence_fit <- function(x, ...) { cat("\n\n") cat("$model: regression of log-incidence over time\n\n") cat("$info: list containing the following items:\n") cat(" $r (daily growth rate):\n") print(x$info$r) cat("\n $r.conf (confidence interval):\n") print(x$info$r.conf) if (x$info$r[1] > 0) { cat("\n $doubling (doubling time in days):\n") print(x$info$doubling) cat("\n $doubling.conf (confidence interval):\n") print(x$info$doubling.conf) } else { cat("\n $halving (halving time in days):\n") print(x$info$halving) cat("\n $halving.conf (confidence interval):\n") print(x$info$halving.conf) } cat(sprintf( "\n $pred: data.frame of incidence predictions (%d rows, %d columns)\n", nrow(x$info$pred), ncol(x$info$pred))) invisible(x) } #' @export #' @rdname fit print.incidence_fit_list <- function(x, ...) { cat("\n\n") cat("attr(x, 'locations'): list of vectors with the locations of each incidence_fit object\n\n") locations <- attr(x, "locations") cat(sprintf("'%s'", vapply(locations, paste, character(1), collapse = "', '")), sep = "\n") cat("\n") cat("$model: regression of log-incidence over time\n\n") cat("$info: list containing the following items:\n") cat(" $r (daily growth rate):\n") print(get_info(x, "r")) cat("\n $r.conf (confidence interval):\n") print(get_info(x, "r.conf")) if (any(get_info(x, "r") > 0)) { cat("\n $doubling (doubling time in days):\n") print(get_info(x, "doubling", na.rm = TRUE)) cat("\n $doubling.conf (confidence interval):\n") print(get_info(x, "doubling.conf", na.rm = TRUE)) } if (any(get_info(x, "r") < 0)) { cat("\n $halving (halving time in days):\n") print(get_info(x, "halving", na.rm = TRUE)) cat("\n $halving.conf (confidence interval):\n") print(get_info(x, "halving.conf", na.rm = TRUE)) } preds <- get_info(x, "pred") cat(sprintf( "\n $pred: data.frame of incidence predictions (%d rows, %d columns)\n", nrow(preds), ncol(preds))) invisible(x) } incidence/R/group_names.R0000644000176200001440000000541014621104516015036 0ustar liggesusers#' extract and set group names #' @param x an [incidence()] object. #' @param value character vector used to rename groups #' @return an integer indicating the number of groups present in the incidence #' object. #' @details This accessor will return a #' @export #' @examples #' i <- incidence(dates = sample(10, 100, replace = TRUE), #' interval = 1L, #' groups = sample(letters[1:3], 100, replace = TRUE)) #' i #' group_names(i) #' #' # change the names of the groups #' group_names(i) <- c("Group 1", "Group 2", "Group 3") #' i #' #' # example if there are mistakes in the original data, e.g. #' # something is misspelled #' set.seed(50) #' grps <- sample(c("child", "adult", "adlut"), 100, replace = TRUE, prob = c(0.45, 0.45, 0.05)) #' i <- incidence(dates = sample(10, 100, replace = TRUE), #' interval = 1L, #' groups = grps) #' colSums(get_counts(i)) #' #' # If you change the name of the mis-spelled group, it will be merged with the #' # correctly-spelled group #' gname <- group_names(i) #' gname[gname == "adlut"] <- "adult" #' # without side-effects #' print(ii <- group_names(i, gname)) #' colSums(get_counts(i)) # original still has three groups #' colSums(get_counts(ii)) #' # with side-effects #' group_names(i) <- gname #' colSums(get_counts(i)) group_names <- function(x, value) { UseMethod("group_names", x) } #' @export #' @rdname group_names "group_names<-" <- function(x, value) { UseMethod("group_names<-", x) } #' @rdname group_names #' @export #' @aliases group_names.default group_names.default <- function(x, value) { stop(sprintf("Not implemented for class %s", paste(class(x), collapse = ", "))) } #' @rdname group_names #' @export #' @aliases `group_names<-`.default "group_names<-.default" <- function(x, value) { stop(sprintf("Not implemented for class %s", paste(class(x), collapse = ", "))) } #' @rdname group_names #' @export #' @keywords accessors group_names.incidence <- function(x, value = NULL){ if (is.null(value)) { colnames(x$counts) } else { `group_names<-`(x, value) } } #' @rdname group_names #' @export "group_names<-.incidence" <- function(x, value) { if (length(value) != ncol(x)) { stop("value must be the same length as the number of groups.") } if (anyNA(value <- as.character(value))) { stop("value must be able to be coerced to a character vector") } uval <- unique(value) if (identical(uval, value)) { colnames(x$counts) <- value } else { the_counts <- x$counts out <- matrix(integer(nrow(the_counts)*length(uval)), nrow = nrow(the_counts), ncol = length(uval) ) colnames(out) <- uval for (i in uval) { out[, i] <- rowSums(the_counts[, value == i, drop = FALSE]) } x$counts <- out } x } incidence/R/get_fit.R0000644000176200001440000000277314621104516014151 0ustar liggesusers#' Accessors for `incidence_fit` objects #' #' @param x an `incidence_fit` or `incidence_fit_list` #' object. #' @return a list of `incidence_fit` objects. #' @export #' @examples #' #' if (require(outbreaks)) { withAutoprint({ #' #' dat <- ebola_sim$linelist$date_of_onset #' #' ## EXAMPLE WITH A SINGLE MODEL #' #' ## compute weekly incidence #' sex <- ebola_sim$linelist$gender #' i.sex <- incidence(dat, interval = 7, group = sex) #' #' ## Compute the optimal split for each group separately #' fits <- fit_optim_split(i.sex, separate_split = TRUE) #' #' ## `fits` contains an `incidence_fit_list` object #' fits$fit #' #' ## Grab the list of `incidence_fit` objects #' get_fit(fits$fit) #' #' ## Get the predictions for all groups #' get_info(fits$fit, "pred", groups = 1) #' #' ## Get the predictions, but set `groups` to "before" and "after" #' get_info(fits$fit, "pred", groups = 2) #' #' ## Get the reproduction number #' get_info(fits$fit, "r") #' #' ## Get the doubling confidence interval #' get_info(fits$fit, "doubling.conf") #' #' ## Get the halving confidence interval #' get_info(fits$fit, "halving.conf") #' })} get_fit <- function(x) { UseMethod("get_fit") } #' @rdname get_fit #' @export get_fit.incidence_fit <- function(x) { x } #' @rdname get_fit #' @export get_fit.incidence_fit_list <- function(x) { locations <- attr(x, "locations") res <- lapply(locations, function(i) x[[i]]) names(res) <- vapply(locations, paste, character(1), collapse = "_") res } incidence/R/get_counts.R0000644000176200001440000000343714621104516014700 0ustar liggesusers#' Get counts from an incidence object #' #' @param x an `incidence` object. #' @param groups if there are groups, use this to specify a group or groups to #' subset. Defaults to `NULL` indicating that all groups are returned. #' #' @return a matrix of counts where each row represents a date bin #' @export #' @examples #' if (require(outbreaks)) { withAutoprint({ #' dat <- ebola_sim$linelist$date_of_onset #' gend <- ebola_sim$linelist$gender #' i <- incidence(dat, interval = "week", groups = gend) #' #' ## Use with an object and no arguments gives the counts matrix #' head(get_counts(i)) #' #' ## Specifying a position or group name will return a matrix subset to that #' ## group #' head(get_counts(i, 1L)) #' head(get_counts(i, "f")) #' #' ## Specifying multiple groups allows you to rearrange columns #' head(get_counts(i, c("m", "f"))) #' #' ## If you want a vector, you can use drop #' drop(get_counts(i, "f")) #' })} get_counts <- function(x, groups = NULL) { UseMethod("get_counts") } #' @rdname get_counts #' @export get_counts.incidence <- function(x, groups = NULL){ if (is.null(groups) || ncol(x$counts) == 1) { return(x$counts) } if (is.character(groups)) { correct_groups <- groups[groups %in% colnames(x$counts)] } if (is.numeric(groups)) { correct_groups <- groups[groups %in% seq(ncol(x$counts))] } if (!identical(correct_groups, groups)) { grps <- paste(setdiff(groups, correct_groups), collapse = ", ") msg <- sprintf("The following groups were not recognised: %s", grps) message(msg) } if (length(correct_groups) == 0) { grps <- paste(colnames(x$counts), collapse = ", ") stop(sprintf("No groups matched those present in the data: %s", grps)) } return(x$counts[, correct_groups, drop = FALSE]) } incidence/R/check_groups.R0000644000176200001440000000157614621104516015204 0ustar liggesusers#' Enforce cromulence of groups #' #' This enforces that groups is either: #' #' - NULL #' - a factor and the same length as dates #' It also treats missing groups (NA) as a separate group is needed. #' #' @param x a vector denoting groups #' @param dates a vector representing dates #' @param na_as_group a logical indicating whether or not NA should be #' considered a separate group. #' @noRd check_groups <- function(x, dates, na_as_group){ if (is.null(x)) { return(NULL) } x <- factor(x) lev <- levels(x) if (na_as_group && any(is.na(x))) { x <- as.character(x) x[is.na(x)] <- "NA" lev <- c(lev, "NA") } if (length(x) != length(dates)) { stop(sprintf( "'x' does not have the same length as dates (%d vs %d)", length(x), length(dates) ) ) } factor(x, levels = lev) } incidence/R/zzz.R0000644000176200001440000000036114621104516013354 0ustar liggesusers.onLoad <- function(...) { op <- options() op.incidence <- list(incidence.max.days = 18262, incidence.warn.first_date = TRUE) toset <- !names(op.incidence) %in% op if (any(toset)) options(op.incidence[toset]) } incidence/R/subset.R0000644000176200001440000000730514621104516014031 0ustar liggesusers##' Subsetting 'incidence' objects ##' ##' Two functions can be used to subset incidence objects. The function ##' `subset` permits to retain dates within a specified range and, ##' optionally, specific groups. The operator "[" can be used as for matrices, ##' using the syntax `x[i,j]` where 'i' is a subset of dates, and 'j' is a ##' subset of groups. ##' ##' @author Thibaut Jombart \email{thibautjombart@@gmail.com} ##' ##' @export ##' ##' @rdname subset ##' ##' @aliases "subset.incidence" "[.incidence" ##' ##' @seealso The [incidence()] function to generate the 'incidence' ##' objects. ##' ##' @param x An incidence object, generated by the function ##' [incidence()]. ##' ##' @param from The starting date; data strictly before this date are discarded. ##' ##' @param to The ending date; data strictly after this date are discarded. ##' ##' @param groups (optional) The groups to retained, indicated as subsets of the ##' columns of x$counts. ##' ##' @param ... Further arguments passed to other methods (not used). ##' ##' @examples ##' ## example using simulated dataset ##' if(require(outbreaks)) { withAutoprint({ ##' onset <- ebola_sim$linelist$date_of_onset ##' ##' ## weekly incidence ##' inc <- incidence(onset, interval = 7) ##' inc ##' inc[1:10] # first 10 weeks ##' plot(inc[1:10]) ##' inc[-c(11:15)] # remove weeks 11-15 ##' plot(inc[-c(11:15)]) ##' })} ##' subset.incidence <- function(x, ..., from = min(x$dates), to = max(x$dates), groups = TRUE){ ## We need to make sure the comparison with dates is going to work. As for the ## [ operator, 'from' and 'to' are assumed to be expressed in the same way as ## the x$dates. is_date <- inherits(x$dates, "Date") numeric_from <- is.numeric(from) numeric_to <- is.numeric(to) if (is_date && (numeric_from || numeric_to)) { the_intervals <- get_interval(x, integer = TRUE) if (length(the_intervals) == 1L) { the_intervals <- rep(the_intervals, length(x$dates)) } the_intervals <- cumsum(c(0, the_intervals)) } if (is_date && numeric_from) { if (from <= 0) { from <- 0L } else if (from >= length(the_intervals) - 1L) { from <- the_intervals[length(the_intervals) - 1L] } else { from <- the_intervals[from] } from <- min(x$dates) + from } if (is_date && numeric_to) { if (to <= 0) { to <- 0L } else if (to >= length(the_intervals) - 1L) { to <- the_intervals[length(the_intervals) - 1L] } else { to <- the_intervals[to] } to <- min(x$dates) + to } to.keep <- x$dates >= from & x$dates <= to if (sum(to.keep) < 1) { stop("No data retained.") } x[to.keep, groups] } ##' @export ##' @rdname subset ##' @param i a subset of dates to retain ##' @param j a subset of groups to retain "[.incidence" <- function(x, i, j){ if (missing(i)) { i <- TRUE } if (missing(j)) { j <- TRUE } out <- x if (is.character(j) && !all(j %in% group_names(x))) { odd_names <- j[!j %in% group_names(x)] groups <- if (length(odd_names) > 1) "groups do" else "group does" odd_names <- paste(j[!j %in% group_names(x)], collapse = "', '") msg <- sprintf("The following %s not exist: '%s'", groups, odd_names) stop(msg) } out$counts <- out$counts[i, j, drop = FALSE] out$dates <- out$dates[i] if ("weeks" %in% names(x)) { out$weeks <- out$weeks[i] out$isoweeks <- out$isoweeks[i] } # Need to use 1L here to keep things type-stable: # double + integer = double # integer + integer = integer # Date + integer = Date out$timespan <- diff(range(out$dates, na.rm = TRUE)) + 1L out$n <- sum(out$counts) out } incidence/R/get_week.R0000644000176200001440000000331314621104516014311 0ustar liggesusers#' translate user input to the start date of the week #' #' @param a weekday specification: ISOweek, MMWRweek, EPIweek, Mon-week, Tue-week, etc. #' #' @return the corresponding weekday #' @keywords internal #' @noRd #' @examples #' get_week_start("ISOweek") #' get_week_start("MMWRweek") #' get_week_start("EPIweek") #' #' # weeks that start on saturday #' #' get_week_start("Sat-week") #' get_week_start("week: Saturday") #' get_week_start("2 weeks: Saturday") #' get_week_start("epiweek: Saturday") get_week_start <- function(weekday) { wkdy <- gsub("weeks?", "", tolower(weekday)) wkdy <- gsub('[[:punct:][:blank:][:digit:]]*', "", wkdy) wkdy <- if (wkdy == "") "monday" else wkdy # the input was "weeks" res <- switch(wkdy, "mmwr" = "sunday", # MMWR == CDC epiweek "epi" = "sunday", # CDC epiweek "iso" = "monday", # ISOweek == WHO epiweek wkdy # all others ) gsub("epi", "", res) # if they specify something like "epiweek:saturday" } #' Translate a custom interval to a valid interval #' #' @param the_interval an interval like 2 epiweeks or 1 ISOweek #' @return an interval compatible with `seq.Date()` #' @keywords internal #' @noRd #' @examples #' get_week_duration("2 weeks (wednesday)") # 2 weeks #' get_week_duration("2 epiweeks") # 2 weeks get_week_duration <- function(the_interval) { if (the_interval == 7) return(the_interval) res <- gsub('^(\\d*) ?.*(weeks?).*$', '\\1 \\2', tolower(the_interval), perl = TRUE) trimws(res) } get_type_of_week <- function(x) { switch(as.character(attr(x$weeks, "week_start")), "1" = "ISO", "7" = "MMWR", sprintf("(%s)", weekdays(x$dates[1])) ) } incidence/R/check_boundaries.R0000644000176200001440000000223714621104516016013 0ustar liggesusers#' A cromulence check for first_date and _last date #' #' This will create a boundary from the data if there is none provided. #' #' @param dates a vector of dates, integers, or numerics #' @param boundary a date, integer, numeric, or character string that can resolve to a date #' @param what "first" or "last" for the first_date and last_date arguments #' #' @return a date or integer #' @noRd #' @keywords internal check_boundaries <- function(dates, boundary = NULL, what = "first") { if (is.null(boundary)) { MINMAX <- if (what == "first") min else max boundary <- MINMAX(dates, na.rm = TRUE) } msg <- "%s_date (%s) could not be converted to Date." if (is.character(boundary) && !grepl("^[0-9]{4}-[01][0-9]-[0-3][0-9]$", boundary)) { msg <- paste(msg, 'Dates must be in ISO 8601 standard format (yyyy-mm-dd).') stop(sprintf(msg, what, boundary), call. = FALSE) } res <- try(check_dates(boundary), silent = TRUE) if (inherits(res, "try-error")) { msg <- paste(msg, "Accepted formats are:", "\n Date, POSIXct, integer, numeric, character.") stop(sprintf(msg, what, deparse(substitute(boundary))), call. = FALSE) } res } incidence/R/cumulate.R0000644000176200001440000000242014621104516014334 0ustar liggesusers#' Compute cumulative 'incidence' #' #' `cumulate` is an S3 generic to compute cumulative numbers, with methods #' for different types of objects: #' #' \itemize{ #' #' \item default method is a wrapper for `cumsum` #' #' \item `incidence` objects: computes cumulative incidence over time #' #' \item `projections` objects: same, for `projections` objects, #' implemented in the similarly named package; see `?cumulate.projections` #' for more information, after loading the package #' #' } #' #' #' @author Thibaut Jombart \email{thibautjombart@@gmail.com} #' #' @seealso The [incidence()] function to generate the 'incidence' #' objects. #' #' @param x An incidence object. #' #' @export #' #' @examples #' dat <- as.integer(c(0,1,2,2,3,5,7)) #' group <- factor(c(1, 2, 3, 3, 3, 3, 1)) #' i <- incidence(dat, groups = group) #' i #' plot(i) #' #' i_cum <- cumulate(i) #' i_cum #' plot(i_cum) #' #' @rdname cumulate cumulate <- function(x) { UseMethod("cumulate", x) } #' @rdname cumulate #' @export cumulate.default <- function(x) { cumsum(x) } #' @rdname cumulate #' @export cumulate.incidence <- function(x) { if (isTRUE(x$cumulative)) { stop("x is already a cumulative incidence") } out <- x out$counts <- apply(x$counts, 2, cumsum) out$cumulative <- TRUE out } incidence/R/scale_x_incidence.R0000644000176200001440000000637214621104516016146 0ustar liggesusers#' @param ... arguments passed to [ggplot2::scale_x_date()], #' [ggplot2::scale_x_datetime()], or [ggplot2::scale_x_continuous()], depending #' on how the `$date` element is stored in the incidence object. #' @export #' @rdname plot.incidence scale_x_incidence <- function(x, n_breaks = 6, labels_week = TRUE, ...) { breaks <- make_breaks(x, n_breaks, labels_week) if (inherits(x$dates, "Date")) { out <- ggplot2::scale_x_date(breaks = breaks$breaks, labels = breaks$labels, ...) } else if (inherits(x$dates, "POSIXt")) { breaks$breaks <- as.POSIXct(as.POSIXlt(breaks$breaks)) out <- ggplot2::scale_x_datetime(breaks = breaks$breaks, labels = breaks$labels, timezone = "UTC", ... ) } else { out <- ggplot2::scale_x_continuous(breaks = breaks$breaks, ...) } out } #' @export #' @rdname plot.incidence make_breaks <- function(x, n_breaks = 6L, labels_week = TRUE) { stopifnot(inherits(x, "incidence"), is.logical(labels_week), is.numeric(n_breaks)) ## Defining breaks for the x axis -------------------------------------------- ## ## The x axis can either be integers, Dates, or POSIXt scales. Moreover, ## we need to make sure that the breaks align with the left-hand side of the ## bins (for now). This section first defines what the breaks should be ## and then treats them according to whether or not the interval was specified ## as a character. if (n_breaks == nrow(x)) { # The number of breaks are equal to the number of dates... don't worry about # adjusting breaks <- x$dates } else { # adjust breaks to force first date to beginning. breaks <- pretty(x$dates, n_breaks) breaks <- breaks + (x$dates[1] - breaks[1]) } ## Defining the x axis scale ------------------------------------------------- ## ## Choosing between scale_x_date, scale_x_datetime, and scale_x_continuous # labels should be dates or numbers if (is.character(x$interval)) { # The interval is a character like "2 weeks" and we have to figure out how # to split these manually has_number <- grepl("\\d", x$interval) tims <- ceiling(x$timespan/(n_breaks*mean(get_interval(x, integer = TRUE)))) if (has_number) { ni <- as.integer(strsplit(x$interval, " ", fixed = TRUE)[[1L]][1L]) # the replacement should be a multiple of the number replacement <- if (tims <= ni) ni else ceiling(tims/ni)*ni db <- gsub("\\d+", replacement, x$interval) } else if (x$interval == "quarter") { db <- paste(tims * 3, "months") } else { db <- sprintf("%d %s", tims, x$interval) } breaks <- seq(x$dates[1], x$dates[nrow(x)], by = db) } if (!is.null(x$weeks)) { # If the data are in weeks, we should make sure that the line up correctly w <- aweek::date2week(breaks, week_start = attr(x$weeks, "week_start"), floor_day = TRUE) breaks <- aweek::week2date(w) labels <- if (labels_week) w else ggplot2::waiver() } else { labels <- ggplot2::waiver() } list(breaks = breaks, labels = labels) } incidence/R/check_dots.R0000644000176200001440000000311314621104516014623 0ustar liggesusers#' Check the user-fed arguments and give warnings if they are wrong. #' #' @param dots a list of user-supplied arguments #' @param args names of arguments appropriate for the function. #' #' @return dots, modified if necessary #' @noRd #' @keywords internal #' #' @examples #' dots <- c(hAy = 1, "lsdflk" = 1, "stuperman" = TRUE, iso_week = TRUE) #' args <- c("superman", "hey", "ho", "lets", "go", "standard") #' check_dots(dots, args) check_dots <- function(dots, args) { if (length(dots) == 0) { return(dots) } dnames <- names(dots) scores <- utils::adist(paste0("^", dnames), args, fixed = FALSE) < 2 recognized <- rowSums(scores) > 0 msg <- "" if (sum(scores) > 0) { words <- apply(scores[recognized, , drop = FALSE], 1, function(i) paste(args[i], collapse = ", ")) errs <- paste(format(dnames[recognized]), format(words), sep = " : ") errs <- paste(errs, collapse = "\n\t") msg <- sprintf("\n\nPotentially misspelled options:\n\t%s", errs) } if (sum(!recognized) > 0) { dre <- paste(dnames[!recognized & dnames != "iso_week"], collapse = ", ") msg <- if (dre != "") paste0(msg, "\n\nUnrecognised options:\n\t", dre) else msg } if ("iso_week" %in% dnames) { warning(paste("The parameter `iso_week` has been deprecated as of incidence", "version 1.3. Please use `standard` instead."), call. = FALSE ) names(dots)[dnames == "iso_week"] <- "standard" } MSG <- "Misspelled or unrecognized options were found." if (msg != "") { stop(paste(MSG, msg), call. = FALSE) } dots } incidence/R/make_breaks.R0000644000176200001440000000707414621104516014773 0ustar liggesusers#' Make breaks with dates #' #' Because Date objects have a specific `seq` method, it's possible #' to make breaks with both integers and date objects. This function #' will check to make sure that the interval is valid. #' #' @param date an integer, numeric, or Date vector #' @param the_interval an integer or character #' @param last_date an integer, numeric, or Date #' @param first_date an integer, numeric, or Date #' @param dots a named list of options #' #' @author Zhian Kamvar #' @return a vector of integers or Dates #' @noRd #' @examples #' #' set.seed(999) #' d <- sample(10, replace = TRUE) #' make_breaks_easier(d, 2L) make_breaks_easier <- function(dates, the_interval, first_date = NULL, last_date = NULL, dots = 1L) { the_interval <- valid_interval_character(the_interval) date_interval <- is.character(the_interval) && is_date_interval(the_interval) is_month <- grepl("month", the_interval, ignore.case = TRUE) is_quarter <- grepl("quarter", the_interval, ignore.case = TRUE) is_year <- grepl("year", the_interval, ignore.case = TRUE) uneven_interval <- date_interval && (is_month || is_quarter || is_year) # getting information about the date fd <- as.character(first_date) the_day <- as.integer(substr(fd, 9, 10)) the_month <- as.integer(substr(fd, 6, 7)) if ("standard" %in% names(dots)) { if (isTRUE(dots$standard)) { is_a_week <- !uneven_interval && check_week(the_interval) if (is_a_week) { week_start <- get_week_start(the_interval) the_interval <- get_week_duration(the_interval) # This returns something like 2018-W29 first_isoweek <- aweek::date2week(first_date, week_start, floor_day = TRUE) # here we convert it back to a date first_date <- aweek::week2date(first_isoweek) } if (uneven_interval) { # Replace the day with the first day of the month substr(fd, 9, 10) <- "01" if (is_quarter) { # Replace the month with the first month of the quarter m <- (as.integer(substr(fd, 6, 7)) - 1L) %/% 3L substr(fd, 6, 7) <- sprintf("%02d", (m * 3) + 1L) } if (is_year) { # Replace the month with the first month of the year substr(fd, 6, 7) <- "01" } # re-cast the date first_date <- as.Date(fd) } } else { if (uneven_interval && !is_year && the_day > 28) { # The first date represents a day that doesn't occur in all months msg <- paste("The first_date (%s) represents a day that does not", "occur in all months. Because of this, bins may not", "conform to monthly boundaries. To prevent this", "behavior, plese specify a different first_date that", "represents a day within [1, 28]." ) msg <- paste(strwrap(msg), collapse = "\n") warning(sprintf(msg, fd), call. = FALSE) } if (is_year && the_month == 2 && the_day == 29) { # The first date occurs on a leap day. msg <- paste("The first_date (%s) represents a day that does not", "occur in all years. Because of this, bins may not", "fall on the same day. To prevent this behavior, please", "specify a first_date that represents a different day." ) msg <- paste(strwrap(msg), collapse = "\n") warning(sprintf(msg, fd), call. = FALSE) } } } seq(first_date, last_date, by = the_interval) } incidence/R/get_timespan.R0000644000176200001440000000106214621104516015175 0ustar liggesusers#' @return #' - `get_timespan()`: an `integer` denoting the timespan represented by the #' incidence object. #' @export #' @rdname accessors #' @aliases get_timespan get_timespan <- function(x) { UseMethod("get_timespan") } #' @export #' @rdname accessors #' @aliases get_timespan.default get_timespan.default <- function(x) { stop(sprintf("Not implemented for class %s", paste(class(x), collapse = ", "))) } #' @export #' @rdname accessors #' @aliases get_timespan.incidence get_timespan.incidence <- function(x) { x$timespan } incidence/R/fit_optim_split.R0000644000176200001440000001102314621104516015721 0ustar liggesusers#' @export #' @rdname fit #' #' @param window The size, in days, of the time window either side of the #' split. #' #' @param plot A logical indicating whether a plot should be added to the #' output (`TRUE`, default), showing the mean R2 for various splits. #' #' @param separate_split If groups are present, should separate split dates be #' determined for each group? Defaults to `TRUE`, in which separate split dates #' and thus, separate models will be constructed for each group. When `FALSE`, #' the split date will be determined from the pooled data and modelled with the #' groups as main effects and interactions with date. #' fit_optim_split <- function(x, window = x$timespan/4, plot = TRUE, quiet = TRUE, separate_split = TRUE) { if (ncol(x$counts) > 1 && separate_split) { # Calculate split for each group separately -------------------------------- res <- vector(mode = "list", length = ncol(x$counts)) names(res) <- colnames(x$counts) for (i in names(res)) { res[[i]] <- fit_optim_split(x[, i], separate_split = FALSE, plot = FALSE) } # Rearrange data ----------------------------------------------------------- # The resulting object will have the follwing structure # $df # $plot # $group1 # $group2 # $split # $fit # $group1 # $before # $after # $group2 # $before # $after dates <- get_dates(x)[[1]] dfrows <- vapply(res, function(i) nrow(i$df), integer(1)) out <- list( df = data.frame(dates = seq(dates, by = 1, length.out = sum(dfrows)), mean.R2 = vector(mode = "numeric", length = sum(dfrows)), groups = factor(rep(names(res), dfrows), names(res)), stringsAsFactors = TRUE ), plot = ggplot2::ggplot(), split = seq(dates, by = 1, length.out = length(res)), fit = vector(mode = "list", length = length(res)) ) names(out$fit) <- names(res) names(out$plot) <- names(res) names(out$split) <- names(res) for (i in names(res)) { n <- factor(i, names(res)) out$fit[[i]] <- res[[i]]$fit out$plot[[i]] <- res[[i]]$plot out$split[[i]] <- res[[i]]$split out$fit[[i]]$after$info$pred$groups <- n out$fit[[i]]$before$info$pred$groups <- n out$df[out$df$groups == i, ]$dates <- res[[i]]$df$dates out$df[out$df$groups == i, ]$mean.R2 <- res[[i]]$df$mean.R2 } if (plot) { out$plot <- ggplot2::ggplot( out$df, ggplot2::aes_string(x = "dates", y = "mean.R2", color = "groups") ) + ggplot2::geom_point() + ggplot2::geom_line() + ggplot2::geom_text(ggplot2::aes_string(label="dates"), hjust = -0.1, angle = 35 ) + ggplot2::ylim(min = min(out$df$mean.R2) - 0.1, max = 1) } else { out$plot <- NULL } # Adding attributes for incidence_fit_list --------------------------------- attr(out$fit, "locations") <- c( lapply(names(res), c, "before"), lapply(names(res), c, "after") ) class(out$fit) <- "incidence_fit_list" return(out) } date.peak <- x$dates[which.max(pool(x)$counts)] try.since <- date.peak - window / 2 try.until <- date.peak + window / 2 to.keep <- x$dates >= try.since & x$dates <= try.until if (sum(to.keep) < 1) { stop("No date left to try after defining splits to try.") } splits.to.try <- x$dates[to.keep] need.to.try <- length(splits.to.try) > 1 f <- function(split) { fits <- fit(x, split = split, quiet = quiet) mean(vapply(fits, function(e) summary(e$model)$`adj.r.squared`, double(1)), na.rm = TRUE) } results <- vapply(splits.to.try, f, double(1)) ## shape output df <- data.frame(dates = splits.to.try, mean.R2 = results, stringsAsFactors = TRUE) split <- if (need.to.try) splits.to.try[which.max(results)] else splits.to.try fit <- suppressWarnings(fit(x, split = split)) out <- list(df = df, split = split, fit = fit) if (plot) { out$plot <- ggplot2::ggplot( df, ggplot2::aes_string(x = "dates", y = "mean.R2")) + ggplot2::geom_point() + ggplot2::geom_line() + ggplot2::geom_text(ggplot2::aes_string(label="dates"), hjust=-.1, angle=35) + ggplot2::ylim(min=min(results)-.1, max=1) } out } incidence/R/get_info.R0000644000176200001440000000373614621104516014322 0ustar liggesusers#' @rdname get_fit #' @param what the name of the item in the "info" element of the `incidence_fit` #' object. #' @param groups if `what = "pred"` and `x` is an `incidence_fit_list` object, #' then this indicates what part of the nesting hierarchy becomes the column #' named "groups". Defaults to `NULL`, indicating that no groups column will #' be added/modified. #' @param na.rm when `TRUE` (default), missing values will be excluded from the #' results. #' @param ... currently unused. #' @export get_info <- function(x, what = "r", ...) { UseMethod("get_info") } #' @rdname get_fit #' @export get_info.incidence_fit <- function(x, what = "r", ...) { x$info[[what]] } #' @rdname get_fit #' @export get_info.incidence_fit_list <- function(x, what = "r", groups = NULL, na.rm = TRUE, ...) { locations <- attr(x, "locations") n <- length(locations) if (what == "pred") { fits <- get_fit(x) for (i in names(fits)) { fits[[i]] <- fits[[i]]$info$pred fits[[i]]$location <- i if (!is.null(groups)) { tmp <- strsplit(i, "_")[[1]][[groups]] fits[[i]]$groups <- factor(tmp, tmp) } } res <- do.call("rbind", fits) return(res) } is_matrix <- grepl("conf", what) the_names <- vapply(locations, paste, character(1), collapse = "_") need_col_names <- TRUE if (is_matrix) { res <- matrix(0.0, nrow = n, ncol = 2L) } else { res <- numeric(n) } for (i in seq_len(n)) { tmp <- x[[locations[[i]]]]$info[[what]] tmp <- if (is.null(tmp)) NA_real_ else tmp if (is_matrix) { if (need_col_names && all(!is.na(tmp))) { colnames(res) <- colnames(tmp) need_col_names <- FALSE } res[i, ] <- tmp } else { res[[i]] <- tmp } } if (is_matrix) { rownames(res) <- the_names } else { names(res) <- the_names } if (na.rm) { nonas <- stats::complete.cases(res) res <- if (is_matrix) res[nonas, , drop = FALSE] else res[nonas] } res } incidence/R/fit.R0000644000176200001440000001676414621104516013317 0ustar liggesusers#' Fit exponential models to incidence data #' #' The function `fit` fits two exponential models to incidence data, of the #' form: \eqn{log(y) = r * t + b} \cr where 'y' is the incidence, 't' is time #' (in days), 'r' is the growth rate, and 'b' is the origin. The function `fit` #' will fit one model by default, but will fit two models on either side of a #' splitting date (typically the peak of the epidemic) if the argument `split` #' is provided. When groups are present, these are included in the model as main #' effects and interactions with dates. The function `fit_optim_split()` can be #' used to find the optimal 'splitting' date, defined as the one for which the #' best average R2 of the two models is obtained. Plotting can be done using #' `plot`, or added to an existing incidence plot by the piping-friendly #' function `add_incidence_fit()`. #' #' @export #' #' @rdname fit #' #' @return For `fit()`, a list with the class `incidence_fit` (for a #' single model), or a list containing two `incidence_fit` objects (when #' fitting two models). `incidence_fit` objects contain: #' #' - `$model`: the fitted linear model #' - `$info`: a list containing various information extracted from the model #' (detailed further) #' - `$origin`: the date corresponding to day '0' #' #' The `$info` item is a list containing: #' #' - `r`: the growth rate #' - `r.conf`: the confidence interval of 'r' #' - `pred`: a `data.frame` containing predictions of the model, #' including the true dates (`dates`), their numeric version used in the #' model (`dates.x`), the predicted value (`fit`), and the lower #' (`lwr`) and upper (`upr`) bounds of the associated confidence #' interval. #' #' - `doubling`: the predicted doubling time in days; exists only if 'r' is #' positive #' - `doubling.conf`: the confidence interval of the doubling time #' - `halving`: the predicted halving time in days; exists only if 'r' is #' negative #' - `halving.conf`: the confidence interval of the halving time #' #' For `fit_optim_split`, a list containing: #' #' - `df`: a `data.frame` of dates that were used in the optimization #' procedure, and the corresponding average R2 of the resulting models. #' - `split`: the optimal splitting date #' - `fit`: an `incidence_fit_list` object containing the fit for each split. #' If the `separate_split = TRUE`, then the `incidence_fit_list` object will #' contain these splits nested within each group. All of the `incidence_fit` #' objects can be retrieved with [get_fit()]. #' - `plot`: a plot showing the content of `df` (ggplot2 object) #' #' @author Thibaut Jombart \email{thibautjombart@@gmail.com}, Zhian N. Kamvar #' \email{zkamvar@@gmail.com}. #' #' @seealso the [incidence()] function to generate the 'incidence' #' objects. The [get_fit()] function to flatten `incidence_fit_list` objects to #' a list of `incidence_fit` objects. #' #' @param x An incidence object, generated by the function #' [incidence()]. For the plotting function, an `incidence_fit` #' object. #' #' @param split An optional time point identifying the separation between the #' two models. If NULL, a single model is fitted. If provided, two models would #' be fitted on the time periods on either side of the split. #' #' @param level The confidence interval to be used for predictions; defaults to #' 95%. #' #' @param quiet A logical indicating if warnings from `fit` should be #' hidden; FALSE by default. Warnings typically indicate some zero incidence, #' which are removed before performing the log-linear regression. #' #' @examples #' #' if (require(outbreaks)) { withAutoprint({ #' dat <- ebola_sim$linelist$date_of_onset #' #' ## EXAMPLE WITH A SINGLE MODEL #' #' ## compute weekly incidence #' i.7 <- incidence(dat, interval=7) #' plot(i.7) #' plot(i.7[1:20]) #' #' ## fit a model on the first 20 weeks #' f <- fit(i.7[1:20]) #' f #' names(f) #' head(get_info(f, "pred")) #' #' ## plot model alone (not recommended) #' plot(f) #' #' ## plot data and model (recommended) #' plot(i.7, fit = f) #' plot(i.7[1:25], fit = f) #' #' ## piping versions #' if (require(magrittr)) { withAutoprint({ #' plot(i.7) %>% add_incidence_fit(f) #' #' #' ## EXAMPLE WITH 2 PHASES #' ## specifying the peak manually #' f2 <- fit(i.7, split = as.Date("2014-10-15")) #' f2 #' plot(i.7) %>% add_incidence_fit(f2) #' #' ## finding the best 'peak' date #' f3 <- fit_optim_split(i.7) #' f3 #' plot(i.7) %>% add_incidence_fit(f3$fit) #' })} #' #' })} #' ## The model fitted is a simple linear regression on the log-incidence. ## Non-trivial bits involve: ## 1) Fitting several models ## I.e. in case there is a increasing and a decreasing phase, we fit one ## model for each phase separately. ## 2) log(0) ## No satisfying solutions so far; for now removing the NAs ## 3) Several groups ## In this case, the number of models does not change, but models automatically ## include groups with interaction, whether or not it is significant. ## 4) Values of dates used as 'x' ## To retain generality, we need to use numbers (not Date or POSIXct) as 'x' ## axis for the model. Therefore, all dates are expressed as numbers of days ## since the first case (aka 'day 0' or 'origin'), picking the middle of each ## time interval. We also keep track of the origin, so that actual dates can be ## reconstructed during the plotting. Each 'fit' object has its own origin. fit <- function(x, split = NULL, level = 0.95, quiet = FALSE){ n.groups <- ncol(x$counts) ## remove dates with one incidence of zero to.keep <- apply(x$counts, 1, min) > 0 if (!quiet && !all(to.keep)) { warning(sprintf("%d dates with incidence of 0 ignored for fitting", sum(!to.keep))) } x <- x[to.keep] ## If there is only one date with non-zero incidence ## then no model cannot be fit. If there are no days with ## non-zero incidence, creation of incidence object throws ## error anyway. if (x$timespan == 1) { stop("Only 1 date with non-zero incidence. Cannot fit model to 1 data point.") } ## Ensure that model coefficients are based on a daily timestep (not seconds) if (inherits(x$dates, 'POSIXt')) { x$dates <- as.Date(x$dates) } ## Constructing the model based on number of groups present if (n.groups == 1) { the_model <- "log(counts) ~ dates.x" } else { the_model <- "log(counts) ~ dates.x * groups" } the_model <- stats::formula(the_model) ## model without split (1 model) if (is.null(split)) { df <- as.data.frame(x, long = TRUE) ## exact dates df$dates.x <- get_dates(x, position = "center", count_days = TRUE) lm1 <- stats::lm(the_model, data = df) # updating the call for easier inspection by the user lm1$call[[2]] <- the_model out <- extract_info(lm1, min(get_dates(x)), level) } else { x1 <- x[x$dates <= split] x2 <- x[x$dates >= split] df1 <- as.data.frame(x1, long = TRUE) df2 <- as.data.frame(x2, long = TRUE) ## exact dates df1$dates.x <- get_dates(x1, position = "center", count_days = TRUE) df2$dates.x <- get_dates(x2, position = "center", count_days = TRUE) lm1 <- stats::lm(the_model, data = df1) lm2 <- stats::lm(the_model, data = df2) # updating the call for easier inspection by the user lm1$call[[2]] <- the_model -> lm2$call[[2]] before <- extract_info(lm1, min(get_dates(x1)), level) after <- extract_info(lm2, min(get_dates(x2)), level) out <- list(before = before, after = after ) attr(out, "locations") <- list("before", "after") class(out) <- "incidence_fit_list" } out } incidence/R/check_timespan.R0000644000176200001440000000131614621104516015475 0ustar liggesuserscheck_timespan <- function(x) { max_days <- as.difftime(getOption("incidence.max.days"), units = "days") my_range <- range(x, na.rm = TRUE) if (diff(my_range) > max_days) { msg <- paste("The data has a date range of greater than %d days [%s to %s].", "Please check your data to ensure this is accurate.\n", "To remove this warning, set the `incidence.max.days` option", "to Inf:\n\n\toptions(incidence.max.days = Inf)" ) msg <- sprintf(msg, max_days, as.character(my_range[1]), as.character(my_range[2])) warning(msg, immediate. = TRUE, call. = FALSE) } invisible() } incidence/R/check_dates.R0000644000176200001440000000310114621104516014747 0ustar liggesusers#' Check date cromulence #' #' This function checks that usable dates are provided, and set non-finite #' values to NA. It also makes a few trivial conversions on the fly. #' #' @param x a vector that represents dates. Can be in almost any format #' @param error_on_NA a logical specifing whether or not an error should be #' thrown if NAs are present in the dates. Defaults to FALSE. #' @return an object in either integer, Date, or POSIXt #' @noRd check_dates <- function(x, error_on_NA = FALSE, ...) { if (is.null(x)) { stop("dates is NULL", call. = FALSE) } if (is.character(x)) { x <- as.Date(x, ...) } not_finite <- !is.finite(x) if (sum(not_finite) > 0) { x[not_finite] <- NA } if (any(is.na(x)) && error_on_NA) { msg <- "NA detected in the dates" stop(msg, call. = FALSE) } if (sum(!is.na(x)) < 1) { stop("At least one (non-NA) date must be provided", call. = FALSE) } if (inherits(x, "Date")) { check_timespan(x) return(x) } if (inherits(x, "POSIXt")) { check_timespan(x) return(x) } if (is.integer(x)) { return(x) } if (is.numeric(x)) { x_ori <- x x <- as.integer(floor(x)) if (!isTRUE(note <- all.equal(x, x_ori))) { msg <- paste0( "Flooring from non-integer date caused approximations:\n", note) warning(msg, call. = FALSE) } return(x) } formats <- c("Date", "POSIXct", "integer", "numeric", "character") msg <- paste0( "Input could not be converted to date. Accepted formats are:\n", paste(formats, collapse = ", ")) stop(msg) } incidence/R/incidence.R0000644000176200001440000003617314621110064014445 0ustar liggesusers#' Compute incidence of events from a vector of dates. #' #' This function computes incidence based on dates of events provided in #' various formats. A fixed interval, provided as numbers of days, is used to #' define time intervals. Counts within an interval always include the first #' date, after which they are labeled, and exclude the second. For instance, #' intervals labeled as 0, 3, 6, ... mean that the first bin includes days 0, 1 #' and 2, the second interval includes 3, 4 and 5 etc. #' #' @param dates A vector of dates, which can be provided as objects of the #' class: integer, numeric, Date, POSIXct, POSIXlt, and character. (See Note #' about `numeric` and `character` formats) #' #' @param interval An integer or character indicating the (fixed) size of the #' time interval used for computing the incidence; defaults to 1 day. This can #' also be a text string that corresponds to a valid date interval: day, week, #' month, quarter, or year. (See Note). #' #' @param groups An optional factor defining groups of observations for which #' incidence should be computed separately. #' #' @param na_as_group A logical value indicating if missing group (NA) should be #' treated as a separate group. #' #' @param first_date,last_date optional first/last dates to be used in the #' epicurve. When these are `NULL` (default), the dates from the first/last #' dates are taken from the observations. If these dates are provided, the #' observations will be trimmed to the range of \[first_date, last_date\]. #' #' @param ... Additional arguments passed to other methods (none are used). #' #' @return An list with the class `incidence`, which contains the #' following items: #' #' #' - **dates**: The dates marking the left side of the bins used for counting #' events. When `standard = TRUE` and the interval represents weeks, months, #' quarters, or years, the first date will represent the first standard date #' (See Interval specification, below). #' #' - **counts**: A matrix of incidence counts, which one column per group (and #' a single column if no groups were used). #' #' - **timespan**: The length of the period for which incidence is computed, in #' days. #' #' - **interval**: The bin size. If it's an integer, it represents the number #' of days between each bin. It can also be a character, e.g. "2 weeks" or #' "6 months". #' #' - **n**: The total number of cases. #' #' - **weeks**: Dates in week format (YYYY-Www), where YYYY corresponds to the #' year of the given week and ww represents the numeric week of the year. #' This will be a produced from the function [aweek::date2week()]. Note that #' these will have a special `"week_start"` attribute indicating which day of #' the ISO week the week starts on (see Weeks, below). #' #' - **isoweeks**: ISO 8601 week format YYYY-Www, which is returned only when #' ISO week-based weekly incidence is computed. #' #' #' @details For details about the `incidence class`, see the dedicated #' vignette:\cr `vignette("incidence_class", package = "incidence")` #' #' @note \subsection{Input data (`dates`)}{ #' - **Decimal (numeric) dates**: will be truncated with a warning #' - **Character dates** should be in the unambiguous `yyyy-mm-dd` (ISO 8601) #' format. Any other format will trigger an error. #' } #' #' \subsection{Interval specification (`interval`)}{ #' If `interval` is a valid character (e.g. "week" or "1 month"), then #' the bin will start at the beginning of the interval just before the first #' observation by default. For example, if the first case was recorded on #' Wednesday, 2018-05-09: #' #' - "week" : first day of the week (i.e. Monday, 2018-05-07) (defaults to ISO weeks, see "Week intervals", below) #' - "month" : first day of the month (i.e. 2018-05-01) #' - "quarter" : first day of the quarter (i.e. 2018-04-01) #' - "year" : first day of the calendar year (i.e. 2018-01-01) #' #' These default intervals can be overridden with `standard = FALSE`, which #' sets the interval to begin at the first observed case. #' } #' #' \subsection{Week intervals}{ #' #' As of _incidence_ version 1.7.0, it is possible to construct standardized #' incidence objects standardized to any day of the week thanks to the #' [aweek::date2week()] function from the \pkg{aweek} package. The default #' state is to use ISO 8601 definition of weeks, which start on Monday. You can #' specify the day of the week an incidence object should be standardised to by #' using the pattern "\{n\} \{W\} weeks" where "\{W\}" represents the weekday in an #' English or current locale and "\{n\}" represents the duration, but this can be #' ommitted. Below are examples of specifying weeks starting on different days #' assuming we had data that started on 2016-09-05, which is ISO week 36 of #' 2016: #' #' - interval = "2 monday weeks" (Monday 2016-09-05) #' - interval = "1 tue week" (Tuesday 2016-08-30) #' - interval = "1 Wed week" (Wednesday 2016-08-31) #' - interval = "1 Thursday week" (Thursday 2016-09-01) #' - interval = "1 F week" (Friday 2016-09-02) #' - interval = "1 Saturday week" (Saturday 2016-09-03) #' - interval = "Sunday week" (Sunday 2016-09-04) #' #' It's also possible to use something like "3 weeks: Saturday"; In addition, #' there are keywords reserved for specific days of the week: #' #' - interval = "week", standard = TRUE (Default, Monday) #' - interval = "ISOweek" (Monday) #' - interval = "EPIweek" (Sunday) #' - interval = "MMWRweek" (Sunday) #' #' The "EPIweek" specification is not strictly reserved for CDC epiweeks, but #' can be prefixed (or posfixed) by a day of the week: "1 epiweek: Saturday". #' #' } #' #' \subsection{The `first_date` argument}{ #' Previous versions of _incidence_ had the `first_date` argument override #' `standard = TRUE`. It has been changed as of _incidence_ version 1.6.0 to #' be more consistent with the behavior when `first_date = NULL`. This, however #' may be a change in behaviour, so a warning is now issued once and only once #' if `first_date` is specified, but `standard` is not. To never see this #' warning, use `options(incidence.warn.first_date = FALSE)`. #' } #' #' The intervals for "month", "quarter", and "year" will necessarily vary in the #' number of days they encompass and warnings will be generated when the first #' date falls outside of a calendar date that is easily represented across the #' interval. #' #' #' @seealso #' The main other functions of the package include: #' #' - [incidence::plot.incidence()]: Plot epicurves from an incidence object. #' #' - [incidence::fit()]: Fit log-linear model to computed incidence. #' #' - [incidence::fit_optim_split()]: Find the optimal peak of the epidemic #' and fits log-linear models on either side of the peak. #' #' - [incidence::subset()]: Handling of `incidence` #' objects. #' #' - [incidence::pool()]: Sum incidence over groups. #' #' - [incidence::as.data.frame.incidence()]: Convert an `incidence` object to a #' `data.frame`. #' #' The following vignettes are also available: #' #' - `overview`: Provides an overview of the package's features. #' #' - `customize_plot`: Provides some tips on finer plot customization. #' #' - `incidence_class`: Details the content of the `incidence` #' class. #' #' #' #' #' @author Thibaut Jombart, Rich Fitzjohn, Zhian Kamvar #' #' @rdname incidence #' #' @importFrom utils head tail #' #' @export #' #' @examples #' ## toy example #' incidence(c(1, 5, 8, 3, 7, 2, 4, 6, 9, 2)) #' incidence(c(1, 5, 8, 3, 7, 2, 4, 6, 9, 2), 2) #' #' ## example using simulated dataset #' if(require(outbreaks)) { withAutoprint({ #' onset <- outbreaks::ebola_sim$linelist$date_of_onset #' #' ## daily incidence #' inc <- incidence(onset) #' inc #' plot(inc) #' #' ## weekly incidence #' inc.week <- incidence(onset, interval = 7, standard = FALSE) #' inc.week #' plot(inc.week) #' plot(inc.week, border = "white") # with visible border #' #' # Starting on Monday #' inc.isoweek <- incidence(onset, interval = "isoweek") #' inc.isoweek #' #' # Starting on Sunday #' inc.epiweek <- incidence(onset, interval = "epiweek") #' inc.epiweek #' #' # Starting on Saturday #' inc.epiweek <- incidence(onset, interval = "saturday epiweek") #' inc.epiweek #' #' ## use group information #' sex <- outbreaks::ebola_sim$linelist$gender #' inc.week.gender <- incidence(onset, interval = 7, #' groups = sex, standard = FALSE) #' inc.week.gender #' head(inc.week.gender$counts) #' plot(inc.week.gender, border = "grey90") #' inc.satweek.gender <- incidence(onset, interval = "2 epiweeks: saturday", #' groups = sex) #' inc.satweek.gender #' plot(inc.satweek.gender, border = "grey90") #' #' })} #' #' # Use of first_date #' d <- Sys.Date() + sample(-3:10, 10, replace = TRUE) #' #' # `standard` specified, no warning #' di <- incidence(d, interval = "week", first_date = Sys.Date() - 10, standard = TRUE) #' #' # warning issued if `standard` not specified #' di <- incidence(d, interval = "week", first_date = Sys.Date() - 10) #' #' # second instance: no warning issued #' di <- incidence(d, interval = "week", first_date = Sys.Date() - 10) #' #' incidence <- function(dates, interval = 1L, ...) { UseMethod("incidence") } #' @export #' @rdname incidence incidence.default <- function(dates, interval = 1L, ...) { incidence(check_dates(dates), interval = interval, ...) } #' @export #' @rdname incidence #' #' @param standard (Only applicable to Date objects) When `TRUE` (default) and the #' `interval` one of "week", "month", "quarter", or "year", then this will #' cause the bins for the counts to start at the beginning of the interval #' (See Note). incidence.Date <- function(dates, interval = 1L, standard = TRUE, groups = NULL, na_as_group = TRUE, first_date = NULL, last_date = NULL, ...) { the_call <- match.call() warnme <- getOption('incidence.warn.first_date', FALSE) if (warnme && !is.null(the_call[["first_date"]]) && is.null(the_call[["standard"]])) { fd <- as.character(deparse(the_call[["first_date"]])) msg <- "\n\nAs of incidence version 1.6.0, the default behavior has been" msg <- paste(msg, "modified so that `first_date` no longer overrides") msg <- paste(msg, "`standard`. This means that the first date will be") msg <- paste(msg, "either on or before %s.\nIf you want to use %s as the precise") msg <- paste(msg, "`first_date`, set `standard = FALSE`.") msg <- paste(msg, "To remove this warning in the future, explicitly set the `standard` argument OR use `options(incidence.warn.first_date = FALSE)`\n", sep = "\n\n") warning(sprintf(msg, first_date, fd)) # turn the warning off so that it's not so noisy options(incidence.warn.first_date = FALSE) } dots <- check_dots(list(...), names(formals(incidence.Date))) ## make sure input can be used if (!is.logical(standard)) { stop("The argument `standard` must be either `TRUE` or `FALSE`.") } if ("standard" %in% names(dots)) { # the user specified iso_week and was given a warning. standard <- dots$standard } out <- make_incidence(dates = dates, interval = interval, groups = groups, na_as_group = na_as_group, first_date = first_date, last_date = last_date, standard = standard, ...) if (check_week(interval) && standard) { # dates are the first days of corresponding ISOweeks. week_start <- get_week_start(interval) out$weeks <- aweek::date2week(out$dates, week_start, floor_day = TRUE) if (attr(out$weeks, "week_start") == 1) { out$isoweeks <- as.character(out$weeks) } } out } #' @export #' @rdname incidence incidence.character <- function(dates, interval = 1L, standard = TRUE, groups = NULL, na_as_group = TRUE, first_date = NULL, last_date = NULL, ...) { iso_std <- grepl("^[0-9]{4}-[01][0-9]-[0-3][0-9]$", trimws(dates)) iso_std[is.na(dates)] <- TRUE # prevent false alarms if (!all(iso_std)) { msg <- paste("Not all dates are in ISO 8601 standard format (yyyy-mm-dd).", "The first incorrect date is %s" ) stop(sprintf(msg, dates[!iso_std][1])) } dots <- check_dots(list(...), names(formals(incidence.Date))) dates <- check_dates(dates) ret <- incidence(as.Date(trimws(dates)), interval = interval, standard = standard, groups = groups, na_as_group = na_as_group, first_date = first_date, last_date = last_date, ...) ret } ## The default incidence is designed for dates provided as integers, and a fixed ## time interval defaulting to 1. 'bins' are time intervals, identified by the ## left date, left-inclusive and right-exclusive, i.e. the time interval defined ## by d1 and d2 is [d1, d2[. #' @export #' @rdname incidence incidence.integer <- function(dates, interval = 1L, groups = NULL, na_as_group = TRUE, first_date = NULL, last_date = NULL, ...) { dots <- check_dots(list(...), names(formals(incidence.integer))) interval <- valid_interval_integer(interval) out <- make_incidence(dates = dates, interval = interval, groups = groups, na_as_group = na_as_group, first_date = first_date, last_date = last_date, ...) out$dates <- as.integer(out$dates) out$timespan <- as.integer(out$timespan) out$interval <- as.integer(out$interval) out } #' @export #' @rdname incidence incidence.numeric <- function(dates, interval = 1L, groups = NULL, na_as_group = TRUE, first_date = NULL, last_date = NULL, ...) { dots <- check_dots(list(...), names(formals(incidence.numeric))) interval <- valid_interval_integer(interval) ## make sure input can be used out <- make_incidence(dates = dates, interval = interval, groups = groups, na_as_group = na_as_group, first_date = first_date, last_date = last_date, ...) out$dates <- as.numeric(out$dates) out } #' @export #' @rdname incidence incidence.POSIXt <- function(dates, interval = 1L, standard = TRUE, groups = NULL, na_as_group = TRUE, first_date = NULL, last_date = NULL, ...) { ## make sure input can be used dots <- check_dots(list(...), names(formals(incidence.Date))) dates <- check_dates(as.POSIXct(dates)) ret <- incidence(as.Date(dates), interval = interval, standard = standard, groups = groups, na_as_group = na_as_group, first_date = first_date, last_date = last_date, ...) ret$dates <- as.POSIXlt(ret$dates) if (inherits(dates, "POSIXct")) { ret$dates <- as.POSIXct(ret$dates) } ret } incidence/R/dim.R0000644000176200001440000000024614621104516013272 0ustar liggesusers#' @rdname accessors #' @export #' @return #' - `dim()` the dimensions in the number of bins and number of groups dim.incidence <- function(x) { dim(x$counts) } incidence/R/pool.R0000644000176200001440000000177214621104516013477 0ustar liggesusers##' Pool 'incidence' across groups ##' ##' This function pools incidence across all groups of an `incidence` ##' object. The resulting [incidence()] object will contains counts ##' summed over all groups present in the input. ##' ##' @author Thibaut Jombart \email{thibautjombart@@gmail.com} ##' ##' @seealso The [incidence()] function to generate the 'incidence' ##' objects. ##' ##' @inheritParams incidence ##' ##' @export ##' ##' @examples ##' dat <- as.integer(c(0,1,2,2,3,5,7)) ##' group <- factor(c(1, 2, 3, 3, 3, 3, 1)) ##' i <- incidence(dat, groups = group) ##' i ##' i$counts ##' ##' ## pool all groups ##' pool(i) ##' pool(i)$counts ##' ##' ## pool only groups 1 and 3 ##' pool(i[,c(1,3)]) ##' pool(i[,c(1,3)])$counts ##' pool <- function(x){ if (!inherits(x, "incidence")) { stop(sprintf( "x should be an 'incidence' object (its class is: %s)", class(x))) } if (ncol(x$counts) == 1) return(x) x$counts <- matrix(apply(x$counts, 1 , sum), ncol = 1) x } incidence/R/get_dates.R0000644000176200001440000000362114621104516014460 0ustar liggesusers#' Retrieve dates from an incidence object #' #' @param x an [incidence] object #' @param ... Unused #' #' @return a vector of dates or numerics #' @export #' #' @examples #' #' set.seed(999) #' dat <- as.Date(Sys.Date()) + sample(-3:50, 100, replace = TRUE) #' x <- incidence(dat, interval = "month") #' get_dates(x) #' get_dates(x, position = "middle") get_dates <- function(x, ...) { UseMethod("get_dates") } #' @rdname get_dates #' @export #' @aliases get_dates.default get_dates.default <- function(x, ...) { stop(sprintf("Not implemented for class %s", paste(class(x), collapse = ", "))) } #' @param position One of "left", "center", "middle", or "right" specifying what #' side of the bin the date should be drawn from. #' @param count_days If `TRUE`, the result will be represented as the number of #' days from the first date. #' #' @rdname get_dates #' @keywords accessors #' @export #' #' @examples #' set.seed(999) #' dat <- as.Date(Sys.Date()) + sample(-3:50, 100, replace = TRUE) #' x <- incidence(dat, interval = "month") #' get_dates(x) #' get_dates(x, "center") #' get_dates(x, "right") #' #' # Return dates by number of days from the first date #' get_dates(x, count_days = TRUE) #' get_dates(incidence(-5:5), count_days = TRUE) get_dates.incidence <- function(x, position = "left", count_days = FALSE, ...) { POSITION <- match.arg(position, c("left", "center", "middle", "right")) if (!count_days && POSITION == "left") return(x$dates) # Default: left side of bins first_date <- min(x$dates) res <- x$dates - first_date if (POSITION %in% c("center", "middle", "right")) { divisor <- if (POSITION == "right") 1L else 2L res <- res + get_interval(x, integer = TRUE)/divisor } # This part is necessary to avoid the Date class rounding the result -_- res <- as.numeric(res) if (count_days) { return(res) } else { return(first_date + res) } } incidence/R/make_incidence.R0000644000176200001440000000567614621104516015453 0ustar liggesusers##' Default internal constructor for incidence objects. ##' ##' ##' ##' @param dates A vector of dates, which can be provided as objects of the ##' class: integer, numeric, Date, POSIXct. Note that decimal numbers will be ##' floored with a warning. ##' ##' @param interval An integer indicating the (fixed) size of the time interval ##' used for computing the incidence; defaults to 1 day. ##' ##' @param groups An optional factor defining groups of observations for which ##' incidence should be computed separately. ##' ##' @param na_as_group A logical value indicating if missing group (NA) should be ##' treated as a separate group. ##' ##' @param last_date The last date to be included in the produced epicurve. If ##' `NULL` (default), the last date will be the most recent provided in ##' `dates`. ##' ##' @param ... Additional arguments passed to other methods (none are used). ##' ##' @author Zhian Kamvar ##' @return an incidence object ##' @noRd make_incidence <- function(dates, interval = 1L, groups = NULL, na_as_group = TRUE, first_date = NULL, last_date = NULL, ...) { dots <- list(...) ## make sure input can be used dates <- check_dates(dates) interval <- check_interval(interval, if (is.null(dots$standard)) TRUE else dots$standard) groups <- check_groups(groups, dates, na_as_group) ## Check the interval and arrange the breaks first_date <- check_boundaries(dates, first_date, "first") last_date <- check_boundaries(dates, last_date, "last") breaks <- make_breaks_easier(dates, the_interval = interval, first_date = first_date, last_date = last_date, dots = dots ) if (!is.numeric(interval) && grepl("week", interval)) { interval <- get_week_duration(interval) } ## Trim the dates and groups as necessary trimmed <- trim_observations(dates, first_date, last_date) dates <- dates[trimmed] groups <- groups[trimmed] ## compute counts within bins defined by the breaks if (!is.null(groups)) { counts <- tapply(dates, groups, count.dates, breaks) counts <- matrix(as.integer(unlist(counts, use.names = FALSE)), ncol = length(levels(groups))) colnames(counts) <- levels(groups) } else { counts <- count.dates(dates, breaks) counts <- matrix(as.integer(counts), ncol = 1L) } out <- list(dates = breaks, # left side of bins (incl left, excl right) counts = counts, # computed incidence, 1 col / group timespan = diff(range(breaks, na.rm = TRUE)) + 1, interval = interval, # fixed bin size n = sum(counts), # total number of cases cumulative = FALSE) # not cumulative at creation class(out) <- "incidence" out } incidence/R/conversion.R0000644000176200001440000001437214621104516014713 0ustar liggesusers#' Conversion of incidence objects #' #' These functions convert `incidence` objects into other classes. #' #' @rdname conversions #' #' @author Thibaut Jombart \email{thibautjombart@@gmail.com}, Rich Fitzjohn #' #' @importFrom stats as.ts #' #' @export #' #' @param x An `incidence` object, or an object to be converted as #' `incidence` (see details). #' #' @param ... Further arguments passed to other functions (no used). #' #' @param long A logical indicating if the output data.frame should be 'long', i.e. where a single #' column containing 'groups' is added in case of data computed on several groups. #' #' #' @export #' #' #' @seealso the [incidence()] function to generate the 'incidence' objects. #' #' #' @details Conversion to `incidence` objects should only be done when the #' original dates are not available. In such case, the argument `x` #' should be a matrix corresponding to the `$counts` element of an #' `incidence` object, i.e. giving counts with time intervals in rows and #' named groups in columns. In the absence of groups, a single unnamed columns #' should be given. `data.frame` and vectors will be coerced to a matrix. #' #' #' @examples #' ## create fake data #' data <- c(0,1,1,2,1,3,4,5,5,5,5,4,4,26,6,7,9) #' sex <- sample(c("m","f"), length(data), replace=TRUE) #' #' ## get incidence per group (sex) #' i <- incidence(data, groups = sex) #' i #' plot(i) #' #' ## convert to data.frame #' as.data.frame(i) #' #' ## same, 'long format' #' as.data.frame(i, long = TRUE) #' #' #' #' ## conversion from a matrix of counts to an incidence object #' i$counts #' new_i <- as.incidence(i$counts, i$dates) #' new_i #' all.equal(i, new_i) #' as.data.frame.incidence <- function(x, ..., long = FALSE){ counts <- x$counts gnames <- group_names(x) unnamed <- is.null(gnames) && ncol(counts) == 1L if (unnamed) { colnames(counts) <- "counts" } ws <- attr(x$weeks, "week_start") if ("weeks" %in% names(x)) { out <- data.frame(dates = x$dates, weeks = as.character(x$weeks), isoweeks = as.character(x$weeks), counts, check.names = FALSE, stringsAsFactors = TRUE) out$weeks <- aweek::date2week(out$dates, ws, floor_day = TRUE, factor = TRUE) } else { out <- data.frame(dates = x$dates, counts, check.names = FALSE, stringsAsFactors = TRUE) } ## handle the long format here if (long && !unnamed) { groups <- factor(rep(gnames, each = nrow(out)), levels = gnames) counts <- as.vector(x$counts) if ("weeks" %in% names(x)) { out <- data.frame(dates = out$dates, weeks = as.character(out$weeks), isoweeks = out$isoweeks, counts = counts, groups = groups, check.names = FALSE, stringsAsFactors = TRUE) out$weeks <- aweek::date2week(out$dates, ws, floor_day = TRUE, factor = TRUE) } else { out <- data.frame(dates = out$dates, counts = counts, groups = groups, check.names = FALSE, stringsAsFactors = TRUE) } } if (all(names(x) != "isoweeks")) out$isoweeks <- NULL out } ## Conversion to 'incidence' class can be handy to plot and handle data for ## which incidence has already been computed. To ensure that the ouput is a ## correct object, we use the 'incidence' function on fake data that match the ## counts inputs. This avoids potential issues such as non-regular intervals ## (the first time interval is used for the entire data. #' @export #' @rdname conversions as.incidence <- function(x, ...) { UseMethod("as.incidence", x) } #' @export #' #' @rdname conversions #' #' @param dates A vector of dates, each corresponding to the (inclusive) lower #' limit of the bins. #' #' @param interval An integer indicating the time interval used in the #' computation of the incidence. If NULL, it will be determined from the first #' time interval between provided dates. If only one date is provided, it will #' trigger an error. #' #' @param standard A logical indicating whether standardised dates should be #' used. Defaults to `TRUE`. #' #' @param isoweeks Deprecated. Use standard. #' as.incidence.matrix <- function(x, dates = NULL, interval = NULL, standard = TRUE, isoweeks = standard, ...) { if (is.null(dates)) { if (!is.null(interval)) { interval <- check_interval(interval) dates <- seq(1, length = nrow(x), by = interval) } else{ dates <- seq(1, length = nrow(x), by = 1L) } } dates <- check_dates(dates, error_on_NA = TRUE) last_date <- max(dates) ## determine interval if (is.null(interval)) { if (length(dates) < 2L) { msg <- "Interval needs to be specified if there is only one date." stop(msg) } else { interval <- as.integer(diff(dates[1:2])) } } else { interval <- check_interval(interval) } ## generate fake dates x_vector <- as.vector(x) fake_dates <- rep(rep(dates, ncol(x)), x_vector) ## determine groups if (ncol(x) > 1L) { x_groups <- colnames(x) if (is.null(x_groups)) { msg <- "Columns should be named to label groups." stop(msg) } group_sizes <- colSums(x) fake_groups <- rep(x_groups, group_sizes) } else { fake_groups <- NULL } if (inherits(fake_dates, c("Date", "POSIXt"))) { incidence(fake_dates, interval = interval, groups = fake_groups, standard = isoweeks, last_date = last_date) } else { incidence(fake_dates, interval = interval, groups = fake_groups, last_date = last_date) } } #' @export #' #' @rdname conversions as.incidence.data.frame <- function(x, dates = NULL, interval = NULL, isoweeks = TRUE, ...) { as.incidence(as.matrix(x), dates, interval, isoweeks, ...) } #' @export #' #' @rdname conversions as.incidence.numeric <- function(x, dates = NULL, interval = NULL, isoweeks = TRUE, ...) { as.incidence(as.matrix(x), dates, interval, isoweeks, ...) } incidence/R/internals.R0000644000176200001440000000226414621104516014522 0ustar liggesusers ## These functions are meant for internal use only, and are not ## exported. Functions which check content return it, after potential trivial ## conversions. #' Count dates within bins #' #' @param dates a vector of dates, integers, or numerics #' @param breaks an ordered vector of dates or integers #' #' @author Thibaut Jombart #' @return an integer vector of the number of incidences per date #' @noRd #' count.dates <- function(dates, breaks){ counts <- table(cut(as.integer(dates), breaks = c(breaks, Inf), right = FALSE)) as.integer(counts) } ## Implement isTRUE and isFALSE to avoid dep on R 3.5.0 isFALSE <- function(x) { is.logical(x) && length(x) == 1L && !is.na(x) && !x } isTRUE <- function(x) { is.logical(x) && length(x) == 1L && !is.na(x) && x } ## A fix for the nonesensical behaviour of `sample` when first argument is of ## length 1. sample_ <- function(x, ...) { x[sample.int(length(x), ...)] } ## quantiles for Date objects quantile_Date <- function(x, ...) { if (!inherits(x, "Date")) { stop("'x' is not a 'Date' object") } first_date <- min(x, na.rm = TRUE) x_num <- as.numeric(x - min(x)) out <- stats::quantile(x_num, ...) first_date + out } incidence/R/plot.R0000644000176200001440000003405514621104516013504 0ustar liggesusers#' Plot function for incidence objects #' #' This function is used to visualise the output of the [incidence()] #' function using the package `ggplot2`. #' #' #' @export #' #' @importFrom graphics plot #' #' @author Thibaut Jombart \email{thibautjombart@@gmail.com} #' Zhian N. Kamvar \email{zkamvar@@gmail.com} #' #' @seealso The [incidence()] function to generate the 'incidence' #' objects. #' #' @param x An incidence object, generated by the function #' [incidence()]. #' #' @param fit An 'incidence_fit' object as returned by [fit()]. #' #' @param stack A logical indicating if bars of multiple groups should be #' stacked, or displayed side-by-side. #' #' @param color The color to be used for the filling of the bars; NA for #' invisible bars; defaults to "black". #' #' @param border The color to be used for the borders of the bars; NA for #' invisible borders; defaults to NA. #' #' @param col_pal The color palette to be used for the groups; defaults to #' `incidence_pal1`. See [incidence_pal1()] for other palettes implemented in #' incidence. #' #' @param alpha The alpha level for color transparency, with 1 being fully #' opaque and 0 fully transparent; defaults to 0.7. #' #' @param xlab The label to be used for the x-axis; empty by default. #' #' @param ylab The label to be used for the y-axis; by default, a label will be #' generated automatically according to the time interval used in incidence #' computation. #' #' @param labels_week a logical value indicating whether labels x axis tick #' marks are in week format YYYY-Www when plotting weekly incidence; defaults to #' TRUE. #' #' @param labels_iso (deprecated) This has been superceded by `labels_iso`. #' Previously:a logical value indicating whether labels x axis tick marks are #' in ISO 8601 week format yyyy-Www when plotting ISO week-based weekly #' incidence; defaults to be TRUE. #' #' @param show_cases if `TRUE` (default: `FALSE`), then each observation will be #' colored by a border. The border defaults to a white border unless specified #' otherwise. This is normally used outbreaks with a small number of cases. #' Note: this can only be used if `stack = TRUE` #' #' @param n_breaks the ideal number of breaks to be used for the x-axis #' labeling #' #' @return #' - `plot()` a [ggplot2::ggplot()] object. #' - `make_breaks()` a two-element list. The "breaks" element will contain the #' evenly-spaced breaks as either dates or numbers and the "labels" element #' will contain either a vector of weeks OR a [ggplot2::waiver()] object. #' - `scale_x_incidence()` a \pkg{ggplot2} "ScaleContinuous" object. #' #' @details #' - `plot()` will visualise an incidence object using `ggplot2` #' - `make_breaks()` calculates breaks from an incidence object that always #' align with the bins and start on the first observed incidence. #' - `scale_x_incidence()` produces and appropriate `ggplot2` scale based on #' an incidence object. #' #' @examples #' #' if(require(outbreaks) && require(ggplot2)) { withAutoprint({ #' onset <- outbreaks::ebola_sim$linelist$date_of_onset #' #' ## daily incidence #' inc <- incidence(onset) #' inc #' plot(inc) #' #' ## weekly incidence #' inc.week <- incidence(onset, interval = 7) #' inc.week #' plot(inc.week) # default to label x axis tick marks with isoweeks #' plot(inc.week, labels_week = FALSE) # label x axis tick marks with dates #' plot(inc.week, border = "white") # with visible border #' #' ## use group information #' sex <- outbreaks::ebola_sim$linelist$gender #' inc.week.gender <- incidence(onset, interval = "1 epiweek", groups = sex) #' plot(inc.week.gender) #' plot(inc.week.gender, labels_week = FALSE) #' #' ## show individual cases at the beginning of the epidemic #' inc.week.8 <- subset(inc.week.gender, to = "2014-06-01") #' p <- plot(inc.week.8, show_cases = TRUE, border = "black") #' p #' #' ## update the range of the scale #' lim <- c(min(get_dates(inc.week.8)) - 7*5, #' aweek::week2date("2014-W50", "Sunday")) #' lim #' p + scale_x_incidence(inc.week.gender, limits = lim) #' #' ## customize plot with ggplot2 #' plot(inc.week.8, show_cases = TRUE, border = "black") + #' theme_classic(base_size = 16) + #' theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) #' #' ## adding fit #' fit <- fit_optim_split(inc.week.gender)$fit #' plot(inc.week.gender, fit = fit) #' plot(inc.week.gender, fit = fit, labels_week = FALSE) #' #' })} #' plot.incidence <- function(x, ..., fit = NULL, stack = is.null(fit), color = "black", border = NA, col_pal = incidence_pal1, alpha = .7, xlab = "", ylab = NULL, labels_week = !is.null(x$weeks), labels_iso = !is.null(x$isoweeks), show_cases = FALSE, n_breaks = 6) { stopifnot(is.logical(labels_iso), is.logical(labels_week)) the_call <- match.call() if (any(names(the_call) == "labels_iso")) { if (any(names(the_call) == "labels_week")) { warning("labels_iso is deprecated. The value of `labels_week` will be used.") } else { warning("labels_iso is deprecated. Use `labels_week` instead.") labels_week <- labels_iso } } ## extract data in suitable format for ggplot2 df <- as.data.frame(x, long = TRUE, stringsAsFactors = TRUE) n.groups <- ncol(x$counts) gnames <- group_names(x) ## Use custom labels for usual time intervals if (is.null(ylab)) { if (is.numeric(x$interval)) { if (x$interval == 1) { ylab <- "daily incidence" } else if (x$interval == 7) { ylab <- "weekly incidence" } else if (x$interval == 14) { ylab <- "semi-weekly incidence" } else { ylab <- sprintf("incidence by period of %d days", x$interval) } } else if (is.character(x$interval)) { # capturing the number and type p <- "(\\d*)\\s?([a-z]+?)s?$" num <- gsub(p, "\\1", tolower(x$interval)) itype <- gsub(p, "\\2", tolower(x$interval)) if (num == "" || num == "1") { ylab <- sprintf("%sly incidence", itype) } else { ylab <- sprintf("incidence by a period of %s %ss", num, itype) } } if (!is.null(x$weeks)) { type_of_week <- get_type_of_week(x) ylab <- gsub("(weekl?y?)", sprintf("%s \\1", type_of_week), ylab) } if (isTRUE(x$cumulative)) { ylab <- sub("incidence", "cumulative incidence", ylab) } first_letter <- substring(ylab, 1, 1) substring(ylab, 1, 1) <- toupper(first_letter) } ## Handle stacking stack.txt <- if (stack) "stack" else "dodge" ## By default, the annotation of bars in geom_bar puts the label in the ## middle of the bar. This is wrong in our case as the annotation of a time ## interval is the lower (left) bound, and should therefore be left-aligned ## with the bar. Note that we cannot use position_nudge to create the ## x-offset as we need the 'position' argument for stacking. This can be ## addressed by adding interval/2 to the x-axis, but this only works until we ## have an interval such as "month", "quarter", or "year" where the number of ## days for each can vary. To alleviate this, we can create a new column that ## counts the number of days within each interval. ## Adding a variable for width in ggplot df$interval_days <- get_interval(x, integer = TRUE) ## if the date type is POSIXct, then the interval is actually interval seconds ## and needs to be converted to days if (inherits(df$dates, "POSIXct")) { df$interval_days <- df$interval_days * 86400 # 24h * 60m * 60s } ## Important note: it seems safest to specify the aes() as part of the geom, ## not in ggplot(), as it interacts badly with some other geoms like ## geom_ribbon - used e.g. in projections::add_projections(). ## add mid-interval positions for x-axis ## THIS BREAKS THE PLOT WITH ggplot2_3.3.0 ## See bug reports at: ## https://github.com/reconhub/incidence/issues/119 ## https://github.com/tidyverse/ggplot2/issues/3873 ## Temporary fix: changing placement to default of ggplot2::scale_x_date x_axis <- "dates + (interval_days/2)" y_axis <- "counts" out <- ggplot2::ggplot(df) + ggplot2::geom_col(ggplot2::aes_string( x = x_axis, y = y_axis ), width = df$interval_days, position = stack.txt, color = border, alpha = alpha) + ggplot2::labs(x = xlab, y = ylab) ## Handle show_cases here if (show_cases && stack) { squaredf <- df[rep(seq.int(nrow(df)), df$counts), ] squaredf$counts <- 1 squares <- ggplot2::geom_col(ggplot2::aes_string( x = x_axis, y = y_axis ), color = if (is.na(border)) "white" else border, fill = NA, position = "stack", data = squaredf, width = squaredf$interval_days ) out <- out + squares } if (show_cases && !stack) { message("the argument `show_cases` requires the argument `stack = TRUE`") } ## Handle fit objects here; 'fit' can be either an 'incidence_fit' object, ## or a list of these. In the case of a list, we add geoms one after the ## other. if (!is.null(fit)) { if (inherits(fit, "incidence_fit")) { out <- add_incidence_fit(out, fit) } else if (is.list(fit)) { for (i in seq_along(fit)) { fit.i <- fit[[i]] if (!inherits(fit.i, c("incidence_fit", "incidence_fit_list"))) { stop(sprintf( "The %d-th item in 'fit' is not an 'incidence_fit' object, but a %s", i, class(fit.i))) } out <- add_incidence_fit(out, fit.i) } } else { stop("Fit must be a 'incidence_fit' object, or a list of these") } } ## Handle colors ## Note 1: because of the way 'fill' works, we need to specify it through ## 'aes' if not directly in the geom. This causes the kludge below, where we ## make a fake constant group to specify the color and remove the legend. ## Note 2: when there are groups, and the 'color' argument does not have one ## value per group, we generate colors from a color palette. This means that ## by default, the palette is used, but the user can manually specify the ## colors. if (n.groups < 2 && is.null(gnames)) { out <- out + ggplot2::aes(fill = 'a') + ggplot2::scale_fill_manual(values = color, guide = FALSE) } else { if (!is.null(names(color))) { tmp <- color[gnames] matched <- names(color) %in% names(tmp) if (!all(matched)) { removed <- paste(names(color)[!matched], color[!matched], sep = '" = "', collapse = '", "') message(sprintf("%d colors were not used: \"%s\"", sum(!matched), removed)) } color <- tmp } ## find group colors if (length(color) != n.groups) { msg <- "The number of colors (%d) did not match the number of groups (%d)" msg <- paste0(msg, ".\nUsing `col_pal` instead.") default_color <- length(color) == 1L && color == "black" if (!default_color) { message(sprintf(msg, length(color), n.groups)) } group.colors <- col_pal(n.groups) } else { group.colors <- color } ## add colors to the plot out <- out + ggplot2::aes_string(fill = "groups") + ggplot2::scale_fill_manual(values = group.colors) if (!is.null(fit)) { out <- out + ggplot2::aes_string(color = "groups") + ggplot2::scale_color_manual(values = group.colors) } } out <- out + scale_x_incidence(x, n_breaks, labels_week, ...) out } ## This function will take an existing 'incidence' plot object ('p') and add lines from an ## 'incidence_fit' object ('x') #' @export #' @rdname plot.incidence #' #' @param p An existing incidence plot. add_incidence_fit <- function(p, x, col_pal = incidence_pal1){ if (inherits(x, "incidence_fit_list")) { x <- get_fit(x) } ## 'x' could be a list of fit, in which case all fits are added to the plot if (is.list(x) && !inherits(x, "incidence_fit")) { out <- p for (e in x) { if (inherits(e, "incidence_fit")) { out <- add_incidence_fit(out, e, col_pal) } } return(out) } df <- get_info(x, "pred") # In the case that the incidence object is of the type POSIXt, the data from # the fit object must be presented as POSIXt or ggplot2 will throw a fit. if (inherits(p$data$dates, "POSIXt")) { # I add half a day here because any fractional days (0.5) will be thrown out # on conversion and I'm not quite sure why that is df$dates <- as.POSIXlt(df$dates) + 43200 # adding half a day if (inherits(p$data$dates, "POSIXct")) { df$dates <- as.POSIXct(df$dates) } } out <- suppressMessages( p + ggplot2::geom_line( data = df, ggplot2::aes_string(x = "dates", y = "fit"), linetype = 1) + ggplot2::geom_line( data = df, ggplot2::aes_string(x = "dates", y = "lwr"), linetype = 2) + ggplot2::geom_line( data = df, ggplot2::aes_string(x = "dates", y = "upr"), linetype = 2) ) if ("groups" %in% names(df)) { n.groups <- length(levels(df$groups)) out <- out + ggplot2::aes_string(color = "groups") + ggplot2::scale_color_manual(values = col_pal(n.groups)) } out } #' @export #' @rdname plot.incidence plot.incidence_fit <- function(x, ...){ base <- ggplot2::ggplot() out <- add_incidence_fit(base, x, ...) + ggplot2::labs(x = "", y = "Predicted incidence") out } #' @export #' @rdname plot.incidence plot.incidence_fit_list <- function(x, ...){ base <- ggplot2::ggplot() fits <- get_fit(x) out <- add_incidence_fit(base, fits, ...) + ggplot2::labs(x = "", y = "Predicted incidence") out } incidence/R/trim_observations.R0000644000176200001440000000201014621104516016261 0ustar liggesusers#' Trim observations based on the first and last dates #' #' @param observations a vector of dates or integers #' @param first_date a single date or integer #' @param last_date a single date or integer #' #' @return the trimmed observations as a logical vector #' @noRd #' @keywords internal trim_observations <- function(dates, first_date = NULL, last_date = NULL) { # Remove the missing observations -------------------- res <- !is.na(dates) if (sum(res) < length(dates)) { message(sprintf("%d missing observations were removed.", length(dates) - sum(res) ) ) } dates <- dates[res] # Trim ends ------------------------------------------ res <- dates >= first_date & dates <= last_date if (sum(res) < length(dates)) { message(sprintf("%d observations outside of [%s, %s] were removed.", length(dates) - sum(res), format(first_date), format(last_date) ) ) } res } incidence/R/get_interval.R0000644000176200001440000000743014621104516015206 0ustar liggesusers#' Access various elements of an incidence object #' #' @param x an [incidence] object. #' @param ... Unused #' #' @return #' - `get_interval()` if `integer = TRUE`: an integer vector, otherwise: the #' value stored in `x$interval` #' @export #' @keywords accessors #' #' @rdname accessors #' @aliases get_interval #' @seealso #' - [get_counts()] to access the matrix of counts #' - [get_dates()] to access the dates on the right, left, and center of the #' interval. #' - [group_names()] to access and possibly re-name the groups #' @examples #' #' set.seed(999) #' dat <- as.Date(Sys.Date()) + sample(-3:50, 100, replace = TRUE) #' x <- incidence(dat, interval = "month") #' #' # the value stored in the interval element #' get_interval(x) #' #' # the numeric value of the interval in days #' get_interval(x, integer = FALSE) #' #' # the number of observations in the object #' get_n(x) #' #' # the length of time represented #' get_timespan(x) #' #' # the number of groups #' ncol(x) #' #' # the number of bins (intervals) #' nrow(x) get_interval <- function(x, ...) { UseMethod("get_interval") } #' @export #' @rdname accessors #' @aliases get_interval.default get_interval.default <- function(x, ...) { stop(sprintf("Not implemented for class %s", paste(class(x), collapse = ", "))) } #' @param integer When `TRUE` (default), the interval will be converted to an #' integer vector if it is stored as a character in the incidence object. #' @export #' @rdname accessors #' @aliases get_interval.incidence get_interval.incidence <- function(x, integer = TRUE, ...) { if (!integer || is.numeric(x$interval)) { return(x$interval) } if (is.character(x$interval)) { res <- get_interval_type(x$interval) n <- get_interval_number(x$interval) res <- switch(res, day = 1L * n, week = 7L * n, month = get_days_in_month(x$dates, n), quarter = get_days_in_quarter(x$dates, n), year = get_days_in_year(x$dates, n) ) return(res) } else { stop(sprintf("I don't know how to convert a %s to an integer", paste(class(x$interval), collapse = ", "))) } } get_interval_type <- function(x) { res <- NULL res <- if (grepl("day", x, ignore.case = TRUE)) "day" else res res <- if (grepl("week", x, ignore.case = TRUE)) "week" else res res <- if (grepl("month", x, ignore.case = TRUE)) "month" else res res <- if (grepl("quarter", x, ignore.case = TRUE)) "quarter" else res res <- if (grepl("year", x, ignore.case = TRUE)) "year" else res res } get_interval_number <- function(x) { if (!grepl("^\\d", x)) return(1L) as.integer(gsub("^(\\d*).+$", "\\1", x)) } get_days_in_month <- function(dates, m = 1L) { dates <- floor_month(dates) res <- vapply(strsplit(format(dates), "-"), add_months, character(1), months = m) as.integer(as.Date(res) - dates) } get_days_in_quarter <- function(dates, m = 1L) { dates <- floor_month(dates) res <- vapply(strsplit(format(dates), "-"), FUN = add_months, FUN.VALUE = character(1), months = 3L * m) as.integer(as.Date(res) - dates) } get_days_in_year <- function(dates, m = 1L) { dates <- floor_month(dates) res <- vapply(strsplit(format(dates), "-"), FUN = add_months, FUN.VALUE = character(1), months = 12L * m) as.integer(as.Date(res) - dates) } floor_month <- function(x) { x - as.integer(format(x, "%d")) + 1L } add_months <- function(x, months = 1L) { i <- as.integer(x[2]) + months if (i > 12L) { x[1] <- as.character(as.integer(x[1]) + 1L) i <- i - 12L } x[2] <- sprintf("%02d", i) paste(x, collapse = "-") } incidence/R/palettes.R0000644000176200001440000000272314621104516014344 0ustar liggesusers#' Color palettes used in incidence #' #' These functions are color palettes used in incidence. #' #' @author Thibaut Jombart \email{thibautjombart@@gmail.com} #' #' @param n a number of colors #' #' @rdname palettes #' @aliases palettes incidence_pal1 incidence_pal1_light incidence_pal1_dark #' #' @export #' @importFrom grDevices colorRampPalette #' #' @examples #' #' plot(1:4, cex=8, pch=20, col = incidence_pal1(4), #' main = "palette: incidence_pal1") #' plot(1:100, cex=8, pch=20, col = incidence_pal1(100), #' main ="palette: incidence_pal1") #' plot(1:100, cex=8, pch=20, col = incidence_pal1_light(100), #' main="palette: incidence_pal1_light") #' plot(1:100, cex=8, pch=20, col = incidence_pal1_dark(100), #' main="palette: incidence_pal1_dark") #' incidence_pal1 <- function(n){ if(!is.numeric(n)) stop("n is not a number") colors <- c("#aa3939", "#4a6a8a", "#d4aa6a","#499371") if (n < 4) return(colors[1:n]) return(colorRampPalette(colors)(n)) } #' @export #' @rdname palettes incidence_pal1_light <- function(n){ if(!is.numeric(n)) stop("n is not a number") colors <- c("#d46a6a", "#738ca6", "#ffddaa","#76b096") if (n < 4) return(colors[1:n]) return(colorRampPalette(colors)(n)) } #' @export #' @rdname palettes incidence_pal1_dark <- function(n){ if(!is.numeric(n)) stop("n is not a number") colors <- c("#801515", "#2b4c6f", "#aa7d39","#277552") if (n < 4) return(colors[1:n]) return(colorRampPalette(colors)(n)) } incidence/R/find_peak.R0000644000176200001440000000277314621104516014450 0ustar liggesusers#' Find the peak date of an incidence curve #' #' This function can be used to find the peak of an epidemic curve stored as an #' `incidence` object. #' #' @author Thibaut Jombart \email{thibautjombart@@gmail.com}, Zhian N. Kamvar #' \email{zkamvar@@gmail.com} #' #' @md #' #' @export #' #' @param x An `incidence` object. #' @param pool If `TRUE` (default), any groups will be pooled before finding #' a peak. If `FALSE`, separate peaks will be found for each group. #' #' @return The date of the (first) highest incidence in the data. #' #' @seealso [estimate_peak()] for bootstrap estimates of the peak time #' #' @examples #' #' if (require(outbreaks) && require(ggplot2)) { withAutoprint({ #' i <- incidence(fluH7N9_china_2013$date_of_onset) #' i #' plot(i) #' #' ## one simple bootstrap #' x <- bootstrap(i) #' x #' plot(x) #' #' ## find 95% CI for peak time using bootstrap #' find_peak(i) #' #' #' ## show confidence interval #' plot(i) + geom_vline(xintercept = find_peak(i), col = "red", lty = 2) #' #' })} #' find_peak <- function(x, pool = TRUE) { if (!inherits(x, "incidence")) { stop("x is not an incidence object") } if (ncol(x$counts) > 1L && pool) { msg <- paste("'x' is stratified by groups", "pooling groups before finding peaks", sep = "\n") message(msg) x <- pool(x) } the_max <- apply(get_counts(x), MARGIN = 2L, FUN = which.max ) out <- stats::setNames(x$dates[the_max], colnames(x$counts)) out } incidence/R/extract_info.R0000644000176200001440000000513514621104516015210 0ustar liggesusers## Non-exported function extracting info and predictions from a lm object ## - reg is a lm object ## - origin is the first date of the incidence ## - level is a confidence level, defaulting to .95 extract_info <- function(reg, origin, level){ if (is.null(reg)) { return(NULL) } ## extract growth rates (r) ## here we need to keep all coefficients when there are interactions to.keep <- grep("^dates.x.*$", names(stats::coef(reg)), value = TRUE) r <- stats::coef(reg)[to.keep] use.groups <- length(r) > 1 if (use.groups) { names(r) <- reg$xlevels[[1]] # names = levels if groups } else { names(r) <- NULL # no names otherwise } r.conf <- stats::confint(reg, to.keep, level) rownames(r.conf) <- names(r) if (use.groups) { r[-1] <- r[-1] + r[1] # add coefs to intercept r.conf[-1,] <- r.conf[-1,] + r.conf[1,] # add coefs to intercept } ## need to pass new data spanning all dates and groups here if (use.groups) { new.data <- expand.grid(sort(unique(reg$model$dates.x)), levels(reg$model$groups)) names(new.data) <- c("dates.x", "groups") } else { new.data <- data.frame(dates.x = sort(unique(reg$model$dates.x))) } pred <- exp(stats::predict(reg, newdata = new.data, interval = "confidence", level = level)) ## keep track of dates and groups for plotting pred <- cbind.data.frame(new.data, pred) info <- list(r = r, r.conf = r.conf, pred = pred) if (r[1] > 0 ) { # note: choice of doubling vs halving only based on 1st group info$doubling <- log(2) / r info$doubling.conf <- log(2) / r.conf o.names <- colnames(info$doubling.conf) info$doubling.conf <- info$doubling.conf[, rev(seq_along(o.names)), drop = FALSE] colnames(info$doubling.conf) <- o.names } else { info$halving <- log(0.5) / r info$halving.conf <- log(0.5) / r.conf } ## We need to store the date corresponding to 'day 0', as this will be used ## to create actual dates afterwards (as opposed to mere numbers of days). ## origin <- min(x$dates) ## Dates are reconstructed from info$pred$dates.x and origin). Note that ## this is approximate, as dates are forced to be integers. A better option ## would be to convert the dates to numbers, but ggplot2 is no longer ## consistent when mixing up Date and decimal numbers (it works only in some ## cases / geom). dates <- origin + pred$dates.x info$pred <- cbind.data.frame(dates, info$pred) out <- list(model = reg, info = info, origin = origin) class(out) <- "incidence_fit" out } incidence/R/valid_interval.R0000644000176200001440000000435514621104516015531 0ustar liggesusers#' Return TRUE if the interval is a valid date character #' #' @param the_interval an interval string #' #' @return a logical value #' @noRd #' @keywords internal is_date_interval <- function(the_interval) { valid_intervals <- "day|week|month|quarter|year" grepl(valid_intervals, the_interval, ignore.case = TRUE) } #' Validate potential character values for interval #' #' Characters are valid for intervals if they are of the #' form "day", "week", "month", etc. They can ALSO be #' valid if they are characters that convert to numbers. #' #' @param the_interval a character string of length one #' #' @author Zhian Kamvar #' @return the character string OR a numeric value. #' @noRd valid_interval_character <- function(the_interval, standard = TRUE) { if (is.character(the_interval)) { if (!is_date_interval(the_interval)) { suppressWarnings({ the_interval <- as.numeric(the_interval) }) if (is.na(the_interval)) { stop('The interval must be a number or one of the following: "day", "week", "month", "quarter" or "year"', call. = FALSE) } } else { valid_intervals <- "^\\d?\\s?(day|week|month|quarter|year)s?$" must_be_standard <- !grepl(valid_intervals, the_interval, ignore.case = TRUE) if (!standard && must_be_standard) { stop(sprintf("The interval '%s' implies a standard and cannot be used with `standard = FALSE`", the_interval)) } } } the_interval } #' Check to make sure an interval is valid for integer dates #' #' This will try to convert the interval if its a character, but complain if #' it doesn't pass check. #' #' @param interval either an integer or character #' #' @return interval or it will stop #' @noRd #' @keywords internal valid_interval_integer <- function(interval) { if (is.character(interval)) { res <- try(valid_interval_character(interval), silent = TRUE) if (inherits(res, "try-error")) { msg <- sprintf("The interval '%s' is not valid. Please supply an integer.", interval) stop(msg, call. = FALSE) } else if (is.character(res)) { msg <- sprintf("The interval '%s' can only be used for Dates, not integers or numerics.", interval) stop(msg, call. = FALSE) } } interval } incidence/R/check_week.R0000644000176200001440000000074214621104516014612 0ustar liggesusers#' Check for a valid week interval #' #' @param the_interval character, integer, or numeric #' #' @return a logical value indicating if any of the tests pass #' @noRd #' @keywords internal check_week <- function(the_interval) { num_week <- is.numeric(the_interval) && the_interval == 7 int_week <- is.integer(the_interval) && the_interval == 7L char_week <- is.character(the_interval) && grepl("week", the_interval, ignore.case = TRUE) num_week || int_week || char_week } incidence/R/get_n.R0000644000176200001440000000072314621104516013615 0ustar liggesusers#' @return #' - `get_n()` The total number of cases stored in the object #' @export #' @rdname accessors #' @aliases get_n get_n <- function(x) { UseMethod("get_n") } #' @export #' @rdname accessors #' @aliases get_n.default get_n.default <- function(x) { stop(sprintf("Not implemented for class %s", paste(class(x), collapse = ", "))) } #' @export #' @rdname accessors #' @aliases get_n.incidence get_n.incidence <- function(x) { x$n } incidence/R/estimate_peak.R0000644000176200001440000000651014621104516015334 0ustar liggesusers#' Estimate the peak date of an incidence curve using bootstrap #' #' This function can be used to estimate the peak of an epidemic curve stored as #' `incidence`, using bootstrap. See [incidence::bootstrap] for more information #' on the resampling. #' #' @author Thibaut Jombart \email{thibautjombart@@gmail.com}, with inputs on #' caveats from Michael Höhle. #' #' @md #' #' @export #' #' @details Input dates are resampled with replacement to form bootstrapped #' datasets; the peak is reported for each, resulting in a distribution of #' peak times. When there are ties for peak incidence, only the first date is #' reported. #' #' Note that the bootstrapping approach used for estimating the peak time makes #' the following assumptions: #' #' - the total number of event is known (no uncertainty on total incidence) #' - dates with no events (zero incidence) will never be in bootstrapped datasets #' - the reporting is assumed to be constant over time, i.e. every case is #' equally likely to be reported #' #' @param x An `incidence` object. #' #' @param n The number of bootstrap datasets to be generated; defaults to 100. #' #' @param alpha The type 1 error chosen for the confidence interval; defaults to #' 0.05. #' #' @return A list containing the following items: #' #' - `observed`: the peak incidence of the original dataset #' - `estimated`: the mean peak time of the bootstrap datasets #' - `ci`: the confidence interval based on bootstrap datasets #' - `peaks`: the peak times of the bootstrap datasets #' #' @seealso [incidence::bootstrap] for the bootstrapping underlying this #' approach and [incidence::find_peak] to find the peak in a single #' `incidence` object. #' #' @examples #' #' if (require(outbreaks) && require(ggplot2)) { withAutoprint({ #' i <- incidence(fluH7N9_china_2013$date_of_onset) #' i #' plot(i) #' #' ## one simple bootstrap #' x <- bootstrap(i) #' x #' plot(x) #' #' ## find 95% CI for peak time using bootstrap #' peak_data <- estimate_peak(i) #' peak_data #' summary(peak_data$peaks) #' #' ## show confidence interval #' plot(i) + geom_vline(xintercept = peak_data$ci, col = "red", lty = 2) #' #' ## show the distribution of bootstrapped peaks #' df <- data.frame(peak = peak_data$peaks) #' plot(i) + geom_density(data = df, #' aes(x = peak, y = 10 * ..scaled..), #' alpha = .2, fill = "red", color = "red") #' #' })} #' estimate_peak <- function(x, n = 100, alpha = 0.05) { if (!inherits(x, "incidence")) { stop("x is not an incidence object") } if (ncol(x$counts) > 1L) { msg <- paste("'x' is stratified by groups", "pooling groups before finding peaks", sep = "\n") message(msg) x <- pool(x) } out <- list() ## use it to find CI for epidemic peak out$observed <- find_peak(x) ## peaks on 'n' bootstrap samples peak_boot <- replicate(n, find_peak(bootstrap(x)), simplify = FALSE) ## convert to vector without losing Date class peak_boot <- do.call(c, peak_boot) ## store relevant stats and sod off out$estimated <- mean(peak_boot) QUANTILE <- if(inherits(peak_boot, c("Date", "POSIX"))) quantile_Date else stats::quantile out$ci <- QUANTILE(peak_boot, c(alpha / 2, 1 - alpha / 2)) out$peaks <- peak_boot out } incidence/R/bootstrap.R0000644000176200001440000000362414621104516014541 0ustar liggesusers#' Bootstrap incidence time series #' #' This function can be used to bootstrap `incidence` objects. Bootstrapping is #' done by sampling with replacement the original input dates. See `details` for #' more information on how this is implemented. #' #' @author Thibaut Jombart \email{thibautjombart@@gmail.com} #' #' @md #' #' @export #' #' @details As original data are not stored in `incidence` objects, the #' bootstrapping is achieved by multinomial sampling of date bins weighted by #' their relative incidence. #' #' @param x An `incidence` object. #' #' @param randomise_groups A `logical` indicating whether groups should be #' randomised as well in the resampling procedure; respective group sizes will #' be preserved, but this can be used to remove any group-specific temporal #' dynamics. If `FALSE` (default), data are resampled within groups. #' #' @return An `incidence` object. #' #' @seealso [incidence::find_peak] to use estimate peak date using bootstrap #' #' @examples #' #' if (require(outbreaks) && require(ggplot2)) { withAutoprint({ #' i <- incidence(fluH7N9_china_2013$date_of_onset) #' i #' plot(i) #' #' ## one simple bootstrap #' x <- bootstrap(i) #' x #' plot(x) #' #' })} #' bootstrap <- function(x, randomise_groups = FALSE) { if (!inherits(x, "incidence")) { stop("x is not an incidence object") } ## `counts` is a vector of event counts, meant to be a column of x$counts boot_one_group <- function(counts) { sample_(x$dates, size = sum(counts), replace = TRUE, prob = counts) } new_dates <- do.call(c, lapply(seq.int(ncol(x$counts)), function(i) boot_one_group(x$counts[, i]))) group_sizes <- colSums(x$counts) new_groups <- rep(colnames(x$counts), group_sizes) if (randomise_groups) { new_groups <- sample_(new_groups) } incidence(new_dates, interval = x$interval, groups = new_groups) } incidence/NEWS.md0000644000176200001440000003143014626313255013301 0ustar liggesusersincidence 1.7.5 ============================ * No user facing changes. incidence 1.7.4 ============================ * No user facing changes. * Documentation tweaks and general upkeep for CRAN. incidence 1.7.3 ============================ * change of maintainer only. incidence 1.7.2 ============================ ### BUG FIXES * `plot.incidence()` now reverts to the previous behaviour of plotting ticks on either side of the interval as opposed to centering within the interval. * The default version for {ggplot2} has been set to 3.3.2. incidence 1.7.1 ============================ ### BUG FIXES * Fix for a [bug in](https://github.com/reconhub/incidence/issues/119) `plot.incidence()` introduced with new release of *ggplot2* ([bug report](https://github.com/tidyverse/ggplot2/issues/3873)) As a temporary fix, dates are now centered within the interval instead of to the left of the interval. incidence 1.7.0 ============================ ### NEW FEATURES * Any interval `seq.Date()` can handle (e.g. "5 weeks") can be handled by `incidence()` (see https://github.com/reconhub/incidence/issues/67) * Weekly intervals can start on any day of the week by allowing things like "epiweek", "isoweek", "wednesday week", "2 Saturday weeks", etc. (see https://github.com/reconhub/incidence/issues/55#issuecomment-405297526) * the item `$weeks` is now added to the incidence object, which contains an "aweek" class * plotting will now force the first tick to be the starting point of the incidence curve ### NEW FUNCTIONS * `make_breaks()` will automatically calculate breaks from an incidence object for plotting. * `scale_x_incidence()` will produce a ggplot2 "ScaleContinuous" object to add to a ggplot. ### DEPRECATED * `plot.incidence()` argument `labels_iso` is deprecated in favor of `labels_week` * Incidence objects will still have `$isoweeks` if the weeks are ISO 8601 standard, but users should rely intead on `$weeks` instead. The `$isoweeks` element will be removed in a future version of incidence. * `as.incidence()` argument `isoweeks` has been deprecated in favour of `standard` ### DEPENDENCIES - ISOweek import changed to [aweek](https://www.repidemicsconsortium.org/aweek/) ### Documentation - Vignettes have been updated with examples. incidence 1.6.0 (2019-03-05) ============================ ### BEHAVIORAL CHANGE * `incidence()` will no longer allow a non-standard `first_date` to override `standard = TRUE. The first call to `incidence()` specifying `first_date` without `standard` will issue a warning. To use non-standard first dates, specify `standard = FALSE`. To remove the warning, use `options(incidence.warn.first_date = FALSE)`. See https://github.com/reconhub/incidence/issues/87 for details. ### MISC * `citation("incidence")` will now give the proper citation for our article in F1000 research and the global DOI for archived code. See https://github.com/reconhub/incidence/pulls/106 * Tests have been updated to avoid randomisation errors on R 3.6.0 See https://github.com/reconhub/incidence/issues/107 incidence 1.5.4 (2019-01-15) ============================ ### BUG FIX * `incidence()` now returns an error when supplied a character vector that is not formatted as (yyyy-mm-dd). (See https://github.com/reconhub/incidence/issues/88) * `fit()` now returns correct coefficients when dates is POSIXt by converting to Date. (See https://github.com/reconhub/incidence/issues/91) * `plot.incidence()` now plots in UTC by default for POSIXt incidence objects. this prevents a bug where different time zones would cause a shift in the bars (See https://github.com/reconhub/incidence/issues/99). ### MISC * A test that randomly failed on CRAN has been fixed. (See https://github.com/reconhub/incidence/issues/95). * Plotting tests have been updated for new version of vdiffr (See https://github.com/reconhub/incidence/issues/96). * POSIXct incidence are first passed through POSIXlt when initialized. * A more informative error message is generated for non ISO 8601 formatted `first_date` and `last_date` parameters. incidence 1.5.3 (2018-12-07) ============================ ### BUG FIX * `plot.incidence()` will now respect single groups. (See https://github.com/reconhub/incidence/issues/84) * `as.data.frame.incidence()` will now respect single groups. (See https://github.com/reconhub/incidence/issues/84) ### MISC * `demo("incidence-demo" package = "incidence")` has been updated to show use of custom colors. incidence 1.5.2 (2018-11-30) ============================ ### BUG FIX * `print.incidence()` will now print isoweeks even if the `$interval` element is "week". ### MISC * `subset.incidence()` will now give a more informative error message when the user specifies a group that does not exist. * `demo('incidence-demo', package = 'incidence')` now shows plotting with `show_cases = TRUE`. * In the the case where a date is accidentally mis-typed leading to a gross mis-calculation of the date range (i.e. 2018 is mis-typed as 3018), a warning will be issued. The default threshold is set at 18262 days (50 years), but the user can define their own threshold by setting the `incidence.max.days` option incidence 1.5.1 (2018-11-14) ============================ ### BUG FIX * Two bugs regarding the ordering of groups when the user specifies a factor/ column order have been fixed. This affects `plot.incidence()`, `incidence()`, and `as.data.frame.incidence()` For details, see https://github.com/reconhub/incidence/issues/79 incidence 1.5.0 (2018-11-01) ============================ ### NEW FUNCTIONS * `group_names()` allows the user to retrieve and set the group names. * `get_timespan()` returns the `$timespan` element. * `get_n()` returns the `$n` element. * `dim()`, `nrow()`, and `ncol()` are now available for incidence objects, returning the dimensions of the number of bins and the number of groups. ### NEW FEATURES * A new argument to `plot()` called `show_cases` has been added to draw borders around individual cases for EPIET-style curves. See https://github.com/reconhub/incidence/pull/72 for details. ### DOCUMENTATION UPDATES * An example of EPIET-style bars for small data sets has been added to the plot customisation vignette by @jakobschumacher. See https://github.com/reconhub/incidence/pull/68 for details. * The incidence class vignette has been updated to use the available accessors. ### BUG FIX * `estimate_peak()` no longer fails with integer dates * `incidence()` no longer fails when providing both group information and a `first_date` or `last_date` parameter that is inside the bounds of the observed dates. Thanks to @mfaber for reporting this bug. See https://github.com/reconhub/incidence/issues/70 for details. ### MISC * code has been spread out into a more logical file structure where the `internal_checks.R` file has been split into the relative components. * A message is now printed if missing observations are present when creating the incidence object. incidence 1.4.1 (2018-08-24) ============================ ### BEHAVIORAL CHANGES * The `$lm` field of the `incidence_fit` class is now named `$model` to clearly indicate that this can contain any model. ### NEW FEATURES * `incidence()` will now accept text-based intervals that are valid date intervals: day, week, month, quarter, and year. * `incidence()` now verifies that all user-supplied arguments are accurate and spelled correctly. * `fit_optim_split()` now gains a `separate_split` argument that will determine the optimal split separately for groups. * A new class, `incidence_fit_list`, has been implemented to store and summarise `incidence_fit` objects within a nested list. This is the class returned by in the `$fit` element of `fit_optim_split()`. ### NEW FUNCTIONS * `bootstrap()` will bootstrap epicurves stored as `incidence` objects. * `find_peak()` identifies the peak date of an `incidence` objects. * `estimate_peak()` uses bootstrap to estimate the peak time of a partially observed outbreak. * `get_interval()` will return the numeric interval or several intervals in the case of intervals that can't be represented in a fixed number of days (e.g. months). * `get_dates()` returns the dates or counts of days on the right, center, or left of the interval. * `get_counts()` returns the matrix of case counts for each date. * `get_fit()` returns a list of `incidence_fit` objects from an `incidence_fit_list` object. * `get_info()` returns information stored in the `$info` element of an `incidence_fit`/`incidence_fit_list` object. ### DOCUMENTATION * The new vignette `incidence_fit_class` instructs the user on how `incidence_fit` and `incidence_fit_list` objects are created and accessed. ### DEPRECATED * In the `incidence()` function, the `iso_week` parameter is deprecated in favor of `standard` for a more general way of indicating that the interval should start at the beginning of a valid date timeframe. ### BUG FIXES * The `$timespan` item in the incidence object from Dates was not type-stable and would change if subsetted. A re-working of the incidence constructor fixed this issue. * Misspelled or unrecgonized parameters passed to `incidence()` will now cause an error instead of being silently ignored. * Plotting for POSIXct data has been fixed. incidence 1.3.1 (2018-06-11) ============================ ### BUG FIXES * tweak of the plotting of `incidence` object to avoid conflicts with additional geoms such as `geom_ribbon`, now used in `projections::add_projections`. incidence 1.3.0 (2018-06-01) ============================ ### BUG FIXES * fixed [issue](https://github.com/reconhub/incidence/issues/34) caused by new version of `ggplot2` ### NEW FEATURES * the argument `n_breaks` has been added to `plot.incidence`, to specify the ideal number of breaks for the date legends; will work with ggplot2 > 2.2.1 * added the internal function `make_iso_weeks_breaks` to generate dates and labels for date x-axis legends using ISO weeks * added a function `add_incidence_fit`, which can be used for adding fits to epicurves in a piping-friendly way * added a function `cumulate`, which computes cumulative incidence and returns an `incidence` object incidence 1.2.1 (2017-10-19) ============================ ### BUG FIXES * fixed issues in testing incidence plots by employing *vdiffr* package. incidence 1.2.0 (2017-04-03) ============================ ### NEW FEATURES * new generic *as.incidence*, to create incidence objects from already computed incidences. Methods for: matrix, data.frame, numeric vectors * better processing of input dates, including: automatic conversion from characters, issuing errors for factors, and silently converting numeric vectors which are essentially integers (issuing a warning otherwise) * new vignette on [*conversions*](https://www.repidemicsconsortium.org/incidence/articles/conversions.html) to and from *incidence* objects * new tests ### BUG FIXES * fixed issues caused by variables which changed names in some datasets of the *outbreaks* package, used in the documentation * disabled by default the isoweeks in `incidence`; this part of the code will break with changes made in the devel version of *ggplot2*, which is now required by *plotly* incidence 1.1.2 (2017-03-24) ================== ### BUG FIXES * it is now possible to subset an incidence object based on `Date` dates using numeric values, which are interpreted as number of intervals since the first date (origin = 1) * NAs are no longer removed from the input dates, as it would cause mismatches with grouping factors. incidence 1.1.1 (2017-02-15) ================== ### BUG FIXES * adapting to new names of datasets in [*outbreaks*](https://cran.r-project.org/package=outbreaks): `ebola.sim` -> `ebola_sim` and `ebola.sim.clean` -> `ebola_sim_clean` incidence 1.1.0 (2016-12-13) ================== ### NEW FEATURES * add an argument `iso_week` to incidence.Date() and incidence.POSIXt() to support ISO week-based incidence when computing weekly incidence. * add an argument `labels_iso_week` to plot.incidence() to label x axis tick marks with ISO weeks when plotting ISO week-based weekly incidence.
incidence 1.0.1 (2016-11-23) ================== ### NEW FEATURES * The README.Rmd / README.md now contains information about various websites for *incidence* as well as guidelines for posting questions on the RECON forum. * incidence now has a dedicated website [https://www.repidemicsconsortium.org/incidence/](https://www.repidemicsconsortium.org/incidence/) generated with *pkgdown* ### MINOR IMPROVEMENTS * Vignettes titles are now correctly displayed on CRAN (they read '*Vignette title*').
incidence 1.0.0 (2016-11-03) ================== First release of the incidence package on CRAN! incidence/MD50000644000176200001440000001635314626320772012524 0ustar liggesuserse4802e2236d5a4e29875f3fc5e7e2df1 *DESCRIPTION eca4b40065e4dfda3e24fd8669252c25 *LICENSE c267d6e4ac6af6199ed6ef51ffc13a79 *NAMESPACE 46791eacba437b4b03d02b11e733eb63 *NEWS.md 1b60f3806bf4ebc9555d460586d4d9ef *R/bootstrap.R 700af6fa1ffb4dee9a4a3f6246495214 *R/check_boundaries.R 0332144a9ca17f79421356b4bb61c6c0 *R/check_dates.R 855adc028e1f007f1cf7e3bca469da2c *R/check_dots.R 11d91173d7bb9570c8e6d4c27844c03d *R/check_groups.R 52aa69c4ef51561f342d307b3215287c *R/check_interval.R bba33e7776e720a07b3287775852c399 *R/check_timespan.R 4580363cc816383080b98927b3a905f8 *R/check_week.R 37f049a1a6f2e823db40df470dbbbd57 *R/conversion.R 0cdc9bb86130b44a85b9bc232668cd5c *R/cumulate.R a7ef1071b64eef80ea248a634e3e3e25 *R/dim.R bac7bdf837ff3713502d658149039415 *R/estimate_peak.R 8b8215e236816de5fbbd9da0e0c4c26e *R/extract_info.R 88b3307c0d2077f966bbf262a0d3555f *R/find_peak.R 36cecaf104c395e1fa0b62eddcf67f91 *R/fit.R 126f4983aaa7388220c5258caff55207 *R/fit_optim_split.R 95d41836d27a2afda0b7b5d61794e70a *R/get_counts.R 852326cf7498110334ce88cd85241f21 *R/get_dates.R 92d1dfae0fe75878e90bcde1ad6f657a *R/get_fit.R 36a425a9828a536ead27a8d2fd54ab36 *R/get_info.R 8af487e3806f47cac06bcc11c4bd92c6 *R/get_interval.R cc05e33bb43da9323929ff1f8cfee523 *R/get_n.R e58abd179a572e9febf60b993d8f2448 *R/get_timespan.R dd9d23d317d221f4f597b85fc4553a17 *R/get_week.R faacaeaf8d4560f5ad974c7174156623 *R/group_names.R 3c0c8d3a08b1e8be3d9f9d5ee176af0c *R/incidence.R 90ff6f62bed5f64c5ff8a75c236361eb *R/internals.R 3624191ec26b75f37dd00e2d3d9a4ac4 *R/make_breaks.R 7ddc2a055095574e6f36724ee54392b4 *R/make_incidence.R 8fc2a044587c969dbff4fc95a1a5128e *R/palettes.R 667f11a8df0d7fda53b4f38e7b15837c *R/plot.R 7f52e65e748732b185a2968da0d9dc2a *R/pool.R 2663204ed57f0ad3aabde34c800f7033 *R/print.R 7a979a48ad26e41259418c755e173ce5 *R/scale_x_incidence.R f98beeee30931baa02999379845b8e8b *R/subset.R 3b3f2f856f216280566a7ca4e45e3182 *R/trim_observations.R e72baa216b6091bfd7f168472b43ccc5 *R/valid_interval.R 390deccd3e91a620e72854d14da41520 *R/zzz.R 176953a23c9250aecf3f2493660faae7 *build/vignette.rds 04de2be6f208bba13353da3f06da1e5a *demo/00Index e4d55577beddbb60225f4624edfddefb *demo/incidence-demo.R 26251717636cb3f700e094d800e0318c *inst/CITATION 864c55119b696b088339e854a868814d *inst/WORDLIST a88bdf7162e7b299b7c7340a08b0e27b *inst/doc/conversions.R 18670cb185ae7cb42cc4d5adc2a48971 *inst/doc/conversions.Rmd b9de99191bd9658fbbd42b3dbf1e383f *inst/doc/conversions.html 8b8322e03888b6adcdf88343db0440d8 *inst/doc/customize_plot.R 93312c28fa69277b756be634a9527f49 *inst/doc/customize_plot.Rmd 9fd96db4db10eec5dd4c156cadcaf207 *inst/doc/customize_plot.html 1a4b44af9a30de2155edb7646fb37bc7 *inst/doc/incidence_class.R 762b11741e3fdf20c87bff3322b421ba *inst/doc/incidence_class.Rmd cae252be69f2b3f628de7d5db9599317 *inst/doc/incidence_class.html d987907fbbe733e777abba57086a42fe *inst/doc/incidence_fit_class.R c1fa6b93b93f12b855df217ad70a08ec *inst/doc/incidence_fit_class.Rmd acd2cd3768c3b8224aa406863ff049f5 *inst/doc/incidence_fit_class.html 7eacdbc25831cc8e9c09c8e8b7902f35 *inst/doc/overview.R cde40f2dd28872a787e76566b604e86f *inst/doc/overview.Rmd 0332d7e3806420e46b98ec1a6bae66fe *inst/doc/overview.html 9053939178bef4f18c504366a254a21b *man/accessors.Rd c8d463ebb00eb71b05acfc68293301ff *man/bootstrap.Rd b48296d578a2edea7236360debb1c9b3 *man/conversions.Rd 50f8330f97d8199b7c38aa50c0c33fbb *man/cumulate.Rd 54ca31f140cbb44bf43ea20ad703236a *man/estimate_peak.Rd 9e19272466b9440bd152f234defde4d1 *man/find_peak.Rd 3b86d9a9f5f52b0b8c625f53fba4be6c *man/fit.Rd d6a9b63cbb04dbd29990e69cb355cfd6 *man/get_counts.Rd 74e94e869f8ac8272b0cbbfee14e6090 *man/get_dates.Rd debd0c93057985e7e23b2e9892835cb4 *man/get_fit.Rd 4156c32ac13f582ec1ec1681806b435b *man/group_names.Rd daff75878d0dadaddf7b5797cf2e56e6 *man/incidence.Rd f5ec27dd1ad381482271838783d3b457 *man/palettes.Rd 8a0bd65d186a093cd30229dff8e2cc31 *man/plot.incidence.Rd 2f1ed63649f5fba0f54c20fbda26c1b7 *man/pool.Rd b484cc22c537b25f858db9d52e161da8 *man/subset.Rd 3d28024bfb7b679ea6bf1f4814a32697 *tests/testthat.R 49749a093e92a1ff7b0372f91205e4f7 *tests/testthat/data_cache/mfcat.rds 04c435031d599d6975cb158a52d538fe *tests/testthat/data_cache/mfdat.rds 8b5e1df8a711a9500070045381f925b7 *tests/testthat/rds/df.rds 31fdfb9f33c66b91907925188fc44162 *tests/testthat/rds/df2.rds d8ac02fa57da9a6e9fb8523bde7b20dc *tests/testthat/rds/df3.rds 7750a0ea82256a95f270a3b69ebfe40d *tests/testthat/rds/df5.rds e12b745a407a4a81d337b9e6ae0f1415 *tests/testthat/rds/df6.rds 7d3405e64c951a3b5d932dec9f7659a5 *tests/testthat/rds/dfl.rds 2e93158c0255b2cdd90844835dc74b3a *tests/testthat/rds/fit.i.rds da39a68587d56ae83a373a33fa1196d5 *tests/testthat/rds/fit.i.sex.rds 6fd3d6377b5ff0270537ad8a2257b896 *tests/testthat/rds/incidence.res1.rds fa1aab2b57af9818659e9662aa4f4d04 *tests/testthat/rds/incidence.res2.rds 51ebe13ae57c2469ac86c56c80e563cc *tests/testthat/rds/incidence.res3.rds afc0318670a26bbbea7ad3634279d58c *tests/testthat/rds/incidence.res4.rds df4de2db2a6504b71c241c10564b7486 *tests/testthat/rds/incidence.res5.rds 5057091a4b224746cc54385d59d57d30 *tests/testthat/rds/incidence.res6.rds 1d7b009cc985909b55962b19cf60de4b *tests/testthat/rds/incidence.res7.rds 7909f6ce2d63d0580b6cca636722aff5 *tests/testthat/rds/incidence.res8.rds a24b4e372b592b386fb2f78b2ac6d071 *tests/testthat/rds/o.fit.i.rds 0242a9b30adbdb07ce2015039a360d63 *tests/testthat/rds/o.fit.i.sex.rds 31961b4142df30d5e012ad8fce06f67d *tests/testthat/rds/print.fit.i.rds 269fc45cd4b24b85bde0112081699cf2 *tests/testthat/rds/print.fit.sex.rds 95c32b7dce302d5f18d62f03fec06bf4 *tests/testthat/rds/print1.rds 61a8a650741b3736b00f1ae1c68f9cc2 *tests/testthat/rds/print2.rds bcbaaadac5c9080f468917483d65b947 *tests/testthat/rds/print3.rds 0f43d7d91754679e97dab57990ab5f87 *tests/testthat/rds/res.g.1.rds e5b0554f70f67164cf0169d17f83e347 *tests/testthat/rds/res.g.2.rds b451537eec4575210bbbb9943265cf1c *tests/testthat/rds/res.g.3.rds fc4ba9a7cfc3339606bdbb3d082a6157 *tests/testthat/rds/x.sub1.rds dbc17ce8fb86a619c7e930d003004754 *tests/testthat/rds/x.sub2.rds 11970747d47061b824d769ca9a49c8ad *tests/testthat/rds/x.sub3.rds 5231f0421410ad4f62801766e2310d16 *tests/testthat/rds/x.sub4.rds 3b5c7ace07b16ab5b5b5e51bb553c85b *tests/testthat/rds/x.sub5.rds 82c1f82d508e3efcf82c46cdeab7f164 *tests/testthat/rds/y.sub1.rds 23afa37e8b21b2508e7a57109d6bdc31 *tests/testthat/test-accessors.R 8401035ce255073f72e06a270a94d688 *tests/testthat/test-bootstrap.R 4859d51e124340a47fbd41f83f1b6c61 *tests/testthat/test-conversions.R e79e8c8b2580df95b34df430a0ae63c7 *tests/testthat/test-cumulate.R 4c8cc0c64d43aa4fad98ce96ea486d2c *tests/testthat/test-fit.R 19ca8f4ed2ff7dc93e88c2b85708902d *tests/testthat/test-incidence.R 9e4507a2c852124961f5c1d82f995395 *tests/testthat/test-non-exported.R 435e145a6bc93b732218e55ec2d0eec6 *tests/testthat/test-palettes.R 5498c9b6c633d7849178c1969b151688 *tests/testthat/test-plot.R 66c8dd3934e1340a4c4ff392451ec55e *tests/testthat/test-pool.R 21bd181b6c1e2a39d1220cb9271102bf *tests/testthat/test-standards.R 820e2a55ce144a2141e708210f2e2e15 *tests/testthat/test-subset.R 18670cb185ae7cb42cc4d5adc2a48971 *vignettes/conversions.Rmd 93312c28fa69277b756be634a9527f49 *vignettes/customize_plot.Rmd 762b11741e3fdf20c87bff3322b421ba *vignettes/incidence_class.Rmd c1fa6b93b93f12b855df217ad70a08ec *vignettes/incidence_fit_class.Rmd cde40f2dd28872a787e76566b604e86f *vignettes/overview.Rmd incidence/inst/0000755000176200001440000000000014626316445013163 5ustar liggesusersincidence/inst/doc/0000755000176200001440000000000014626316445013730 5ustar liggesusersincidence/inst/doc/customize_plot.html0000644000176200001440000267234614626316440017715 0ustar liggesusers Customize plots of incidence

Customize plots of incidence

Thibaut Jombart, Zhian N. Kamvar

2024-05-31

This vignette provides some tips for the most common customisations of graphics produced by plot.incidence. Our graphics use ggplot2, which is a distinct graphical system from base graphics. If you want advanced customisation of your incidence plots, we recommend following an introduction to ggplot2.


Example data: simulated Ebola outbreak

This example uses the simulated Ebola Virus Disease (EVD) outbreak from the package outbreaks: ebola_sim_clean.

First, we load the data:

library(outbreaks)
library(ggplot2)
library(incidence)

onset <- ebola_sim_clean$linelist$date_of_onset
class(onset)
#> [1] "Date"
head(onset)
#> [1] "2014-04-07" "2014-04-15" "2014-04-21" "2014-04-27" "2014-04-26"
#> [6] "2014-04-25"

We compute the weekly incidence:

i <- incidence(onset, interval = 7)
i
#> <incidence object>
#> [5829 cases from days 2014-04-07 to 2015-04-27]
#> [5829 cases from ISO weeks 2014-W15 to 2015-W18]
#> 
#> $counts: matrix with 56 rows and 1 columns
#> $n: 5829 cases in total
#> $dates: 56 dates marking the left-side of bins
#> $interval: 7 days
#> $timespan: 386 days
#> $cumulative: FALSE

i.sex <- incidence(onset, interval = 7, group =  ebola_sim_clean$linelist$gender)
i.sex
#> <incidence object>
#> [5829 cases from days 2014-04-07 to 2015-04-27]
#> [5829 cases from ISO weeks 2014-W15 to 2015-W18]
#> [2 groups: f, m]
#> 
#> $counts: matrix with 56 rows and 2 columns
#> $n: 5829 cases in total
#> $dates: 56 dates marking the left-side of bins
#> $interval: 7 days
#> $timespan: 386 days
#> $cumulative: FALSE

i.hosp <- incidence(onset, interval = 7, group =  ebola_sim_clean$linelist$hospital)
i.hosp
#> <incidence object>
#> [5829 cases from days 2014-04-07 to 2015-04-27]
#> [5829 cases from ISO weeks 2014-W15 to 2015-W18]
#> [6 groups: Connaught Hospital, Military Hospital, other, Princess Christian Maternity Hospital (PCMH), Rokupa Hospital, NA]
#> 
#> $counts: matrix with 56 rows and 6 columns
#> $n: 5829 cases in total
#> $dates: 56 dates marking the left-side of bins
#> $interval: 7 days
#> $timespan: 386 days
#> $cumulative: FALSE


The plot.incidence function

When calling plot on an incidence object, the function plot.incidence is implicitly used. To access its documentation, use ?plot.incidence. In this section, we illustrate existing customisations.

Default behaviour

By default, the function uses grey for single time series, and colors from the color palette incidence_pal1 when incidence is computed by groups:

plot(i)

plot(i.sex)

plot(i.hosp)

However, some of these defaults can be altered through the various arguments of the function:

args(incidence:::plot.incidence)
#> function (x, ..., fit = NULL, stack = is.null(fit), color = "black", 
#>     border = NA, col_pal = incidence_pal1, alpha = 0.7, xlab = "", 
#>     ylab = NULL, labels_week = !is.null(x$weeks), labels_iso = !is.null(x$isoweeks), 
#>     show_cases = FALSE, n_breaks = 6) 
#> NULL

Changing colors

The default palette

A color palette is a function which outputs a specified number of colors. By default, the color used in incidence is called incidence_pal1. Its behaviour is different from usual palettes, in the sense that the first 4 colours are not interpolated:

par(mfrow = c(3, 1), mar = c(4,2,1,1))
barplot(1:2, col = incidence_pal1(2))
barplot(1:4, col = incidence_pal1(4))
barplot(1:20, col = incidence_pal1(20))

This palette also has a light and a dark version:

par(mfrow = c(3,1))
barplot(1:20, col = incidence_pal1_dark(20), main = "palette:  incidence_pal1_dark")
barplot(1:20, col = incidence_pal1(20), main = "palette:  incidence_pal1")
barplot(1:20, col = incidence_pal1_light(20), main = "palette:  incidence_pal1_light")

Using different palettes

Other color palettes can be provided via col_pal. Various palettes are part of the base R distribution, and many more are provided in additional packages. We provide a couple of examples:

plot(i.hosp, col_pal = rainbow)

plot(i.sex, col_pal = cm.colors)

Specifying colors manually

Colors can be specified manually using the argument color; note that whenever incidence is computed by groups, the number of colors must match the number of groups, otherwise color is ignored.

Example 1: changing a single color

plot(i, color = "darkred")

Example 2: changing several colors (note that naming colors is optional)

plot(i.sex, color = c(m = "orange2", f = "purple3"))

Example 3: using color to highlight specific groups

plot(i.hosp, 
     color = c("#ac3973", "#6666ff", "white", "white", "white", "white"))


Useful ggplot2 tweaks

Numerous tweaks for ggplot2 are documented online. In the following, we merely provide a few useful tips in the context of incidence.

Changing dates on the x-axis

Changing date format

By default, the dates indicated on the x-axis of an incidence plot may not have the suitable format. The package scales can be used to change the way dates are labeled (see ?strptime for possible formats):

library(scales)
plot(i, labels_week = FALSE) +
   scale_x_date(labels = date_format("%d %b %Y"))
#> Scale for x is already present.
#> Adding another scale for x, which will replace the existing scale.

Notice how the labels are all situated at the first of the month? If you want to make sure the labels are situated in a different orientation, you can use the make_breaks() function to calculate breaks for the plot:

b <- make_breaks(i, labels_week = FALSE)
b
#> $breaks
#> [1] "2014-04-07" "2014-07-07" "2014-10-06" "2015-01-05" "2015-04-06"
#> [6] "2015-07-06"
#> 
#> $labels
#> list()
#> attr(,"class")
#> [1] "waiver"
plot(i) +
  scale_x_date(breaks = b$breaks, 
               labels = date_format("%d %b %Y"))
#> Scale for x is already present.
#> Adding another scale for x, which will replace the existing scale.

And for another example, with a subset of the data (first 50 weeks), using more detailed dates and rotating the annotations:

plot(i[1:50]) +  
  scale_x_date(breaks = b$breaks, labels = date_format("%a %d %B %Y")) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 12))
#> Scale for x is already present.
#> Adding another scale for x, which will replace the existing scale.

Note that you can save customisations for later use:

rotate.big <- theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 12))

Changing the grid

The last example above illustrates that it can be useful to have denser annotations of the x-axis, especially over short time periods. Here, we provide an example where we try to zoom on the peak of the epidemic, using the data by hospital:

plot(i.hosp)

Let us look at the data 40 days before and after the 1st of October:

period <- as.Date("2014-10-01") + c(-40, 40)
i.zoom <- subset(i.hosp, from = period[1], to = period[2])
detailed.x <- scale_x_date(labels = date_format("%a %d %B %Y"), 
                           date_breaks = "2 weeks", 
                           date_minor_breaks = "week")

plot(i.zoom, border = "black") + detailed.x + rotate.big
#> Scale for x is already present.
#> Adding another scale for x, which will replace the existing scale.

Handling non-ISO weeks

If you have weekly incidence that starts on a day other than monday, then the above solution may produce breaks that fall inside of the bins:

i.sat <- incidence(onset, interval = "1 week: saturday", groups = ebola_sim_clean$linelist$hospital)
i.szoom <- subset(i.sat, from = period[1], to = period[2])

plot(i.szoom, border = "black") + detailed.x + rotate.big
#> Scale for x is already present.
#> Adding another scale for x, which will replace the existing scale.

In this case, you may want to either calculate breaks using make_breaks() or use the scale_x_incidence() function to automatically calculate these for you:

plot(i.szoom, border = "black") + 
  scale_x_incidence(i.szoom, n_breaks = nrow(i.szoom)/2, labels_week = FALSE) +
  rotate.big
#> Scale for x is already present.
#> Adding another scale for x, which will replace the existing scale.

sat_breaks <- make_breaks(i.szoom, n_breaks = nrow(i.szoom)/2)
plot(i.szoom, border = "black") + 
  scale_x_date(breaks = sat_breaks$breaks, labels = date_format("%a %d %B %Y")) +
  rotate.big
#> Scale for x is already present.
#> Adding another scale for x, which will replace the existing scale.

Labelling every bin

Sometimes you may want to label every bin of the incidence object. To do this, you can simply set n_breaks to the number of rows in your incidence object:

plot(i.szoom, n_breaks = nrow(i.szoom), border = "black") + rotate.big

Changing the legend

The previous plot has a fairly large legend which we may want to move around. Let us save the plot as a new object p and customize the legend:

p <- plot(i.zoom, border = "black") + detailed.x + rotate.big
#> Scale for x is already present.
#> Adding another scale for x, which will replace the existing scale.
p + theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 12), 
          legend.position = "top", legend.direction = "horizontal", 
          legend.title = element_blank())

Applying the style of European Programme for Intervention Epidemiology Training (EPIET)

Display individual cases

For small datasets it is convention of EPIET to display individual cases as rectangles. It can be done by doing two things: first, adding using the option show_cases = TRUE with a white border and second, setting the background to white. We also add coord_equal() which forces each case to be a square.

i.small <- incidence(onset[160:180])

plot(i.small, border = "white", show_cases = TRUE) +
  theme(panel.background = element_rect(fill = "white")) + 
  rotate.big +
  coord_equal() 

incidence/inst/doc/overview.Rmd0000644000176200001440000002237014621107763016242 0ustar liggesusers--- title: "Overview of the incidence package" author: "Thibaut Jombart, Zhian N. Kamvar" date: "`r Sys.Date()`" output: rmarkdown::html_vignette: toc: true toc_depth: 2 vignette: > %\VignetteIndexEntry{Overview} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, echo = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width=7, fig.height=5 ) ``` *incidence* implements functions and classes to compute, handle, visualise and model incidences from dates data. This vignette provides an overview of current features. It largely reproduces the content of `REAME.md`.
# Installing the package To install the current stable, CRAN version of the package, type: ```{r install, eval=FALSE} install.packages("incidence") ``` To benefit from the latest features and bug fixes, install the development, *github* version of the package using: ```{r install2, eval=FALSE} devtools::install_github("reconhub/incidence") ``` Note that this requires the package *devtools* installed.
# What does it do? The main functions of the package include: - **`incidence`**: compute incidence from dates in various formats; any fixed time interval can be used; the returned object is an instance of the (S3) class *incidence*. - **`plot`**: this method (see `?plot.incidence` for details) plots *incidence* objects, and can also add predictions of the model(s) contained in an *incidence_fit* object (or a list of such objects). - **`fit`**: fit one or two exponential models (i.e. linear regression on log-incidence) to an *incidence* object; two models are calibrated only if a date is provided to split the time series in two (argument `split`); this is typically useful to model the two phases of exponential growth, and decrease of an outbreak; each model returned is an instance of the (S3) class *incidence_fit*, each of which contains various useful information (e.g. growth rate *r*, doubling/halving time, predictions and confidence intervals); results can be plotted using `plot`, or added to an existing `uncudence` plot using the piping-friendly function `add_incidence_fit`. - **`fit_optim_split`**: finds the optimal date to split the time series in two, typically around the peak of the epidemic. - **`[`**: lower-level subsetting of *incidence* objects, permitting to specify which dates and groups to retain; uses a syntax similar to matrices, i.e. `x[i, j]`, where `x` is the *incidence* object, `i` a subset of dates, and `j` a subset of groups. - **`subset`**: subset an *incidence* object by specifying a time window. - **`pool`**: pool incidence from different groups into one global incidence time series. - **`cumulate`**: computes cumulative incidence over time from and `incidence` object. - **`as.data.frame`**: converts an *incidence* object into a `data.frame` containing dates and incidence values. - **`bootstrap`**: generates a bootstrapped *incidence* object by re-sampling, with replacement, the original dates of events. - **`find_peak`**: locates the peak time of the epicurve. - **`estimate_peak`**: uses bootstrap to estimate the peak time (and related confidence interval) of a partially observed outbreak. # Worked example: simulated Ebola outbreak ## Loading the data This example uses the simulated Ebola Virus Disease (EVD) outbreak from the package [*outbreaks*](https://cran.r-project.org/package=outbreaks). We will compute incidence for various time steps, calibrate two exponential models around the peak of the epidemic, and analyse the results. First, we load the data: ```{r, data} library(outbreaks) library(ggplot2) library(incidence) dat <- ebola_sim$linelist$date_of_onset class(dat) head(dat) ``` ## Computing and plotting incidence We compute the daily incidence: ```{r, incid1} i <- incidence(dat) i plot(i) ``` The daily incidence is quite noisy, but we can easily compute other incidence using larger time intervals: ```{r, interv} # weekly, starting on Monday (ISO week, default) i.7 <- incidence(dat, interval = "1 week") plot(i.7) # semi-weekly, starting on Saturday i.14 <- incidence(dat, interval = "2 saturday weeks") plot(i.14, border = "white") ## monthly i.month <- incidence(dat, interval = "1 month") plot(i.month, border = "white") ``` `incidence` can also compute incidence by specified groups using the `groups` argument. For instance, we can compute incidence by gender: ```{r, gender} i.7.sex <- incidence(dat, interval = "1 week", groups = ebola_sim$linelist$gender) i.7.sex plot(i.7.sex, stack = TRUE, border = "grey") ``` We can do the same for hospitals, using the 'clean' version of the dataset, with some customization of the legend: ```{r, hosp} i.7.hosp <- with(ebola_sim_clean$linelist, incidence(date_of_onset, interval = "week", groups = hospital)) i.7.hosp head(get_counts(i.7.hosp)) plot(i.7.hosp, stack=TRUE) + theme(legend.position= "top") + labs(fill="") ``` ## Handling `incidence` objects `incidence` objects can be manipulated easily. The `[` operator implements subsetting of dates (first argument) and groups (second argument). For instance, to keep only the peak of the distribution: ```{r, middle} i[100:250] plot(i[100:250]) ``` Or to keep every other week: ```{r, stripes} i.7[c(TRUE,FALSE)] plot(i.7[c(TRUE,FALSE)]) ``` Some temporal subsetting can be even simpler using `subset`, which permits to retain data within a specified time window: ```{r, tail} i.tail <- subset(i, from=as.Date("2015-01-01")) i.tail plot(i.tail, border="white") ``` Subsetting groups can also matter. For instance, let's try and visualise the incidence based on onset of symptoms by outcome: ```{r, i7outcome} i.7.outcome <- incidence(dat, 7, groups=ebola_sim$linelist$outcome) i.7.outcome plot(i.7.outcome, stack = TRUE, border = "grey") ``` By default, `incidence` treats missing data (NA) as a separate group (see argument `na_as_group`). We could disable this to retain only known outcomes, but alternatively we can simply subset the object to exclude the last (3rd) group: ```{r, groupsub} i.7.outcome[,1:2] plot(i.7.outcome[,1:2], stack = TRUE, border = "grey") ``` Groups can also be collapsed into a single time series using `pool`: ```{r, pool} i.pooled <- pool(i.7.outcome) i.pooled identical(i.7$counts, i.pooled$counts) ``` ## Modelling incidence Incidence data, excluding zeros, can be modelled using log-linear regression of the form: log(*y*) = *r* x *t* + *b* where *y* is the incidence, *r* is the growth rate, *t* is the number of days since a specific point in time (typically the start of the outbreak), and *b* is the intercept. Such model can be fitted to any incidence object using `fit`. Of course, a single log-linear model is not sufficient for modelling our epidemic curve, as there is clearly an growing and a decreasing phase. As a start, we can calibrate a model on the first 20 weeks of the epidemic: ```{r, fit1} plot(i.7[1:20]) early.fit <- fit(i.7[1:20]) early.fit ``` The resulting objects (known as `incidence_fit` objects) can be plotted, in which case the prediction and its confidence interval is displayed: ```{r} plot(early.fit) ``` However, a better way to display these predictions is adding them to the incidence plot using the argument `fit`: ```{r} plot(i.7[1:20], fit = early.fit) ``` In this case, we would ideally like to fit two models, before and after the peak of the epidemic. This is possible using the following approach, if you know what date to use to split the data in two phases: ```{r, fit.both} fit.both <- fit(i.7, split=as.Date("2014-10-15")) fit.both plot(i.7, fit=fit.both) ``` This is much better, but the splitting date is not completely optimal. To look for the best possible splitting date (i.e. the one maximizing the average fit of both models), we use: ```{r, optim} best.fit <- fit_optim_split(i.7) best.fit plot(i.7, fit=best.fit$fit) ``` These models are very good approximation of these data, showing a doubling time of `r round(get_info(best.fit$fit, "doubling"), 1)` days during the first phase, and a halving time of `r round(get_info(best.fit$fit, "halving"), 1)` days during the second. To access these parameters, you can use the `get_info()` function. The possible parameters are: - "r", the daily growth rate - "doubling" the rate of doubling in days (if "r" is positive) - "halving" the rate of halving in days (if "r" is negative) - "pred" a data frame of incidence predictions For "r", "doubling", and "halving", you can also add ".conf" to get the confidence intervals. Here's how you can get the doubling and halving times of the above epi curve: ```{r, get_info} get_info(best.fit$fit, "doubling") # doubling time get_info(best.fit$fit, "doubling.conf") # confidence interval get_info(best.fit$fit, "halving") get_info(best.fit$fit, "halving.conf") ``` Note that `fit` will also take groups into account if incidence has been computed for several groups: ```{r, optim2} best.fit2 <- fit_optim_split(i.7.sex) best.fit2 plot(i.7.sex, fit=best.fit2$fit) ``` Using `get_info()` on this fit object will return all groups together: ```{r, get_info_groups} get_info(best.fit2$fit, "doubling") # doubling time get_info(best.fit2$fit, "doubling.conf") # confidence interval get_info(best.fit2$fit, "halving") get_info(best.fit2$fit, "halving.conf") ``` incidence/inst/doc/conversions.R0000644000176200001440000000360114626316436016423 0ustar liggesusers## ----echo = FALSE------------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width=7, fig.height=5 ) ## ----example------------------------------------------------------------------ library(outbreaks) library(incidence) dat <- ebola_sim$linelist$date_of_onset i_14 <- incidence(dat, interval = "2 epiweeks", groups = ebola_sim$linelist$gender) i_14 plot(i_14, border = "white") ## ----------------------------------------------------------------------------- as.data.frame(i_14) ## ----long--------------------------------------------------------------------- df <- as.data.frame(i_14, long = TRUE) head(df) tail(df) ## example of custom plot using steps: library(ggplot2) ggplot(df, aes(x = dates, y = counts)) + geom_step(aes(color = groups)) ## ----iso---------------------------------------------------------------------- i_7 <- incidence(dat, interval = "week") i_7 plot(i_7, border = "white") head(as.data.frame(i_7)) tail(as.data.frame(i_7)) ## ----conversions-------------------------------------------------------------- args(incidence:::as.incidence.matrix) ## ----------------------------------------------------------------------------- vec <- c(1,2,3,0,3,2,4,1,2,1) i <- as.incidence(vec) i plot(vec, type = "s") plot(i, border = "white") ## ----------------------------------------------------------------------------- i <- as.incidence(vec, interval = 7) i plot(i, border = "white") ## ----------------------------------------------------------------------------- i$dates ## ----round_trip--------------------------------------------------------------- ## convertion: incidence --> data.frame: i_14 df <- as.data.frame(i_14) head(df) tail(df) ## conversion: data.frame --> incidence new_i <- as.incidence(df[group_names(i_14)], df$dates, interval = "2 epiweeks") new_i ## check round trip identical(new_i, i_14) incidence/inst/doc/incidence_class.html0000644000176200001440000026532414626316441017734 0ustar liggesusers Details of the incidence class

Details of the incidence class

Thibaut Jombart, Zhian N. Kamvar

2024-05-31

This vignette details the structure of incidence objects, as produced by the incidence function.


Structure of an incidence object.

We generate a toy dataset of dates to examine the content of incidence objects.

library(incidence)
set.seed(1)
dat <- sample(1:50, 200, replace = TRUE, prob = 1 + exp(1:50 * 0.1))
sex <- sample(c("female", "male"), 200, replace = TRUE)

The incidence by 48h period is computed as:

i <- incidence(dat, interval = 2)
i
#> <incidence object>
#> [200 cases from days 5 to 49]
#> 
#> $counts: matrix with 23 rows and 1 columns
#> $n: 200 cases in total
#> $dates: 23 dates marking the left-side of bins
#> $interval: 2 days
#> $timespan: 45 days
#> $cumulative: FALSE
plot(i)

We also compute incidence by gender:

i.sex <- incidence(dat, interval = 2, group = sex)
i.sex
#> <incidence object>
#> [200 cases from days 5 to 49]
#> [2 groups: female, male]
#> 
#> $counts: matrix with 23 rows and 2 columns
#> $n: 200 cases in total
#> $dates: 23 dates marking the left-side of bins
#> $interval: 2 days
#> $timespan: 45 days
#> $cumulative: FALSE
plot(i.sex)

The object i is a list with the class incidence:

class(i)
#> [1] "incidence"
is.list(i)
#> [1] TRUE
names(i)
#> [1] "dates"      "counts"     "timespan"   "interval"   "n"         
#> [6] "cumulative"

Items in i can be accessed using the same indexing as any lists, but it’s safer to use the accessors for each item:

## use name
head(i$dates)
#> [1]  5  7  9 11 13 15

head(get_dates(i))
#> [1]  5  7  9 11 13 15

In the following sections, we examine each of the components of the object.

$dates

The $dates component contains a vector for all the dates for which incidence have been computed, in the format of the input dataset (e.g. Date, numeric, integer).

date_bins <- get_dates(i)
class(date_bins)
#> [1] "integer"
class(dat)
#> [1] "integer"

date_bins
#>  [1]  5  7  9 11 13 15 17 19 21 23 25 27 29 31 33 35 37 39 41 43 45 47 49

The dates correspond to the lower bounds of the time intervals used as bins for the incidence. Bins always include the lower bound and exclude the upper bound. In the example provided above, this means that the first bin counts events that happened at day 5-6, the second bin counts events from 7-8, etc.

Note that if we had actual Date-class dates, they would be returned as dates

dat_Date <- as.Date("2018-10-31") + dat
head(dat_Date)
#> [1] "2018-12-17" "2018-12-16" "2018-12-12" "2018-11-26" "2018-12-18"
#> [6] "2018-11-27"
i.date <- incidence(dat_Date, interval = 2, group = sex)
i.date
#> <incidence object>
#> [200 cases from days 2018-11-05 to 2018-12-19]
#> [2 groups: female, male]
#> 
#> $counts: matrix with 23 rows and 2 columns
#> $n: 200 cases in total
#> $dates: 23 dates marking the left-side of bins
#> $interval: 2 days
#> $timespan: 45 days
#> $cumulative: FALSE
get_dates(i.date)
#>  [1] "2018-11-05" "2018-11-07" "2018-11-09" "2018-11-11" "2018-11-13"
#>  [6] "2018-11-15" "2018-11-17" "2018-11-19" "2018-11-21" "2018-11-23"
#> [11] "2018-11-25" "2018-11-27" "2018-11-29" "2018-12-01" "2018-12-03"
#> [16] "2018-12-05" "2018-12-07" "2018-12-09" "2018-12-11" "2018-12-13"
#> [21] "2018-12-15" "2018-12-17" "2018-12-19"
class(get_dates(i.date))
#> [1] "Date"

These can be converted to integers, counting the number of days from the first date.

get_dates(i.date, count_days = TRUE)
#>  [1]  0  2  4  6  8 10 12 14 16 18 20 22 24 26 28 30 32 34 36 38 40 42 44
get_dates(i, count_days = TRUE)
#>  [1]  0  2  4  6  8 10 12 14 16 18 20 22 24 26 28 30 32 34 36 38 40 42 44

To facilitate modelling, it’s also possible to get the center of the interval by using the position = "center" argument:

get_dates(i.date, position = "center")
#>  [1] "2018-11-06" "2018-11-08" "2018-11-10" "2018-11-12" "2018-11-14"
#>  [6] "2018-11-16" "2018-11-18" "2018-11-20" "2018-11-22" "2018-11-24"
#> [11] "2018-11-26" "2018-11-28" "2018-11-30" "2018-12-02" "2018-12-04"
#> [16] "2018-12-06" "2018-12-08" "2018-12-10" "2018-12-12" "2018-12-14"
#> [21] "2018-12-16" "2018-12-18" "2018-12-20"
get_dates(i.date, position = "center", count_days = TRUE)
#>  [1]  1  3  5  7  9 11 13 15 17 19 21 23 25 27 29 31 33 35 37 39 41 43 45

$counts

The $counts component contains the actual incidence, i.e. counts of events for the defined bins. It is a matrix of integers where rows correspond to time intervals, with one column for each group for which incidence is computed (a single, unnamed column if no groups were provided). If groups were provided, columns are named after the groups. We illustrate the difference comparing the two objects i and i.sex:

counts <- get_counts(i)
class(counts)
#> [1] "matrix" "array"
storage.mode(counts)
#> [1] "integer"

counts
#>       [,1]
#>  [1,]    3
#>  [2,]    0
#>  [3,]    1
#>  [4,]    0
#>  [5,]    1
#>  [6,]    0
#>  [7,]    1
#>  [8,]    0
#>  [9,]    3
#> [10,]    3
#> [11,]    3
#> [12,]    5
#> [13,]    9
#> [14,]    4
#> [15,]    5
#> [16,]   12
#> [17,]   13
#> [18,]   16
#> [19,]   15
#> [20,]   25
#> [21,]   28
#> [22,]   28
#> [23,]   25
get_counts(i.sex)
#>       female male
#>  [1,]      2    1
#>  [2,]      0    0
#>  [3,]      0    1
#>  [4,]      0    0
#>  [5,]      1    0
#>  [6,]      0    0
#>  [7,]      0    1
#>  [8,]      0    0
#>  [9,]      2    1
#> [10,]      1    2
#> [11,]      1    2
#> [12,]      2    3
#> [13,]      3    6
#> [14,]      3    1
#> [15,]      0    5
#> [16,]      6    6
#> [17,]      8    5
#> [18,]      7    9
#> [19,]      7    8
#> [20,]     17    8
#> [21,]     15   13
#> [22,]     12   16
#> [23,]     12   13

You can see the dimensions of the incidence object by using dim(), ncol(), and nrow(), which returns the dimensions of the counts matrix:

dim(get_counts(i.sex))
#> [1] 23  2
dim(i.sex)
#> [1] 23  2
nrow(i.sex) # number of date bins
#> [1] 23
ncol(i.sex) # number of groups
#> [1] 2

There are also accessors for handling groups:

# Number of groups
ncol(i.sex)
#> [1] 2
ncol(i)
#> [1] 1

# Names of groups
group_names(i.sex)
#> [1] "female" "male"
group_names(i)
#> NULL

# You can also rename the groups
group_names(i.sex) <- c("F", "M")
group_names(i.sex)
#> [1] "F" "M"

Note that a data.frame containing dates and counts can be obtained using as.data.frame:

## basic conversion
as.data.frame(i)
#>    dates counts
#> 1      5      3
#> 2      7      0
#> 3      9      1
#> 4     11      0
#> 5     13      1
#> 6     15      0
#> 7     17      1
#> 8     19      0
#> 9     21      3
#> 10    23      3
#> 11    25      3
#> 12    27      5
#> 13    29      9
#> 14    31      4
#> 15    33      5
#> 16    35     12
#> 17    37     13
#> 18    39     16
#> 19    41     15
#> 20    43     25
#> 21    45     28
#> 22    47     28
#> 23    49     25
as.data.frame(i.sex)
#>    dates  F  M
#> 1      5  2  1
#> 2      7  0  0
#> 3      9  0  1
#> 4     11  0  0
#> 5     13  1  0
#> 6     15  0  0
#> 7     17  0  1
#> 8     19  0  0
#> 9     21  2  1
#> 10    23  1  2
#> 11    25  1  2
#> 12    27  2  3
#> 13    29  3  6
#> 14    31  3  1
#> 15    33  0  5
#> 16    35  6  6
#> 17    37  8  5
#> 18    39  7  9
#> 19    41  7  8
#> 20    43 17  8
#> 21    45 15 13
#> 22    47 12 16
#> 23    49 12 13

## long format for ggplot2
as.data.frame(i.sex, long = TRUE)
#>    dates counts groups
#> 1      5      2      F
#> 2      7      0      F
#> 3      9      0      F
#> 4     11      0      F
#> 5     13      1      F
#> 6     15      0      F
#> 7     17      0      F
#> 8     19      0      F
#> 9     21      2      F
#> 10    23      1      F
#> 11    25      1      F
#> 12    27      2      F
#> 13    29      3      F
#> 14    31      3      F
#> 15    33      0      F
#> 16    35      6      F
#> 17    37      8      F
#> 18    39      7      F
#> 19    41      7      F
#> 20    43     17      F
#> 21    45     15      F
#> 22    47     12      F
#> 23    49     12      F
#> 24     5      1      M
#> 25     7      0      M
#> 26     9      1      M
#> 27    11      0      M
#> 28    13      0      M
#> 29    15      0      M
#> 30    17      1      M
#> 31    19      0      M
#> 32    21      1      M
#> 33    23      2      M
#> 34    25      2      M
#> 35    27      3      M
#> 36    29      6      M
#> 37    31      1      M
#> 38    33      5      M
#> 39    35      6      M
#> 40    37      5      M
#> 41    39      9      M
#> 42    41      8      M
#> 43    43      8      M
#> 44    45     13      M
#> 45    47     16      M
#> 46    49     13      M

Note that incidence has an argument called na_as_group which is TRUE by default, which will pool all missing groups into a separate group, in which case it will be a separate column in $counts.

$timespan

The $timespan component stores the length of the time period covered by the object:

get_timespan(i)
#> [1] 45
print(date_range <- range(get_dates(i)))
#> [1]  5 49
diff(date_range) + 1
#> [1] 45

$interval

The $interval component contains the length of the time interval for the bins:

get_interval(i)
#> [1] 2
diff(get_dates(i))
#>  [1] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2

$n

The $n component stores the total number of events in the data:

get_n(i)
#> [1] 200

Note that to obtain the number of cases by groups, one can use:

colSums(get_counts(i.sex))
#>   F   M 
#>  99 101

$weeks

The $weeks component is optional, and used to store aweek objects whenever they have been used. Weeks are used by default when weekly incidence is computed from dates (see argument standard in ?incidence).

library(outbreaks)
dat <- ebola_sim$linelist$date_of_onset
i.7 <- incidence(dat, "1 epiweek", standard = TRUE)
i.7
#> <incidence object>
#> [5888 cases from days 2014-04-06 to 2015-04-26]
#> [5888 cases from MMWR weeks 2014-W15 to 2015-W17]
#> 
#> $counts: matrix with 56 rows and 1 columns
#> $n: 5888 cases in total
#> $dates: 56 dates marking the left-side of bins
#> $interval: 1 week
#> $timespan: 386 days
#> $cumulative: FALSE
i.7$weeks
#> <aweek start: Sunday>
#>  [1] "2014-W15" "2014-W16" "2014-W17" "2014-W18" "2014-W19" "2014-W20"
#>  [7] "2014-W21" "2014-W22" "2014-W23" "2014-W24" "2014-W25" "2014-W26"
#> [13] "2014-W27" "2014-W28" "2014-W29" "2014-W30" "2014-W31" "2014-W32"
#> [19] "2014-W33" "2014-W34" "2014-W35" "2014-W36" "2014-W37" "2014-W38"
#> [25] "2014-W39" "2014-W40" "2014-W41" "2014-W42" "2014-W43" "2014-W44"
#> [31] "2014-W45" "2014-W46" "2014-W47" "2014-W48" "2014-W49" "2014-W50"
#> [37] "2014-W51" "2014-W52" "2014-W53" "2015-W01" "2015-W02" "2015-W03"
#> [43] "2015-W04" "2015-W05" "2015-W06" "2015-W07" "2015-W08" "2015-W09"
#> [49] "2015-W10" "2015-W11" "2015-W12" "2015-W13" "2015-W14" "2015-W15"
#> [55] "2015-W16" "2015-W17"

Because $weeks is an optional element, it does not have a dedicated accessor. If the element is not present, attempting to access it will result in a NULL:

i$weeks
#> NULL

Both dates and weeks are returned when converting an incidence object to data.frame:

head(as.data.frame(i.7))
#>        dates    weeks counts
#> 1 2014-04-06 2014-W15      1
#> 2 2014-04-13 2014-W16      1
#> 3 2014-04-20 2014-W17      4
#> 4 2014-04-27 2014-W18      4
#> 5 2014-05-04 2014-W19     12
#> 6 2014-05-11 2014-W20     15
incidence/inst/doc/conversions.Rmd0000644000176200001440000001061314621104516016732 0ustar liggesusers--- title: "Conversions to and from the incidence class" author: "Thibaut Jombart, Zhian N. Kamvar" date: "`r Sys.Date()`" output: rmarkdown::html_vignette: toc: true toc_depth: 2 vignette: > %\VignetteIndexEntry{Conversions} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, echo = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width=7, fig.height=5 ) ``` This vignette documents to types of conversion which can be made using the *incidence* class: - *'exports'*: conversion from an *incidence* object to another type of object; this can be useful for processing incidence data in another software, or for reporting results. - *'imports'*conversion from already computed incidence into an *incidence* object; this can be useful for using features of the *incidence* package for data handling and plotting with incidence data computed elsewhere.
# Exporting results To export results, we first compute semi-weekly incidence (with weeks starting on Sunday, the beginning of the CDC epiweek) by gender from the simulated Ebola data used in the [overview vignette](https://www.repidemicsconsortium.org/incidence/articles/overview.html): ```{r example} library(outbreaks) library(incidence) dat <- ebola_sim$linelist$date_of_onset i_14 <- incidence(dat, interval = "2 epiweeks", groups = ebola_sim$linelist$gender) i_14 plot(i_14, border = "white") ``` To export the data to a `data.frame`, one simply needs: ```{r} as.data.frame(i_14) ``` The first column contains the dates marking the (inclusive) left side of the time intervals used for computing incidence, and the other columns give counts for the different groups. This function also has an option for exporting data as a 'long' format, i.e. with a column for 'groups' and a column for counts. This format can be useful especially when working with *ggplot2*, which expect data in this shape: ```{r, long} df <- as.data.frame(i_14, long = TRUE) head(df) tail(df) ## example of custom plot using steps: library(ggplot2) ggplot(df, aes(x = dates, y = counts)) + geom_step(aes(color = groups)) ``` Finally, note that when ISO weeks are used, these are also reported in the output: ```{r, iso} i_7 <- incidence(dat, interval = "week") i_7 plot(i_7, border = "white") head(as.data.frame(i_7)) tail(as.data.frame(i_7)) ```
# Importing pre-computed incidence The function `as.incidence` facilitates the conversion of pre-computed incidences to an *incidence* object. Typically, the input will be imported into R from a *.csv* file or other spreadsheet formats. `as.incidence` is a generic with methods for several types of objects (see `?as.incidence`). The main method is `matrix`, as other types are coerced to `matrix` first and then passed to `as.incidence.matrix`: ```{r, conversions} args(incidence:::as.incidence.matrix) ``` The only mandatory argument `x` is a table of counts, with time intervals in rows and groups in columns; if there are no groups, then the column doesn't need a name; but if there are several groups, then columns should be named to indicate group labels. Optionally, `dates` can be provided to indicate the (inclusive) lower bounds of the time intervals, corresponding to the rows of `x`; most sensible date formats will do; if indicated as a character string, make sure the format is `YYYY-mm-dd`, e.g. `2017-04-01` for the 1st April 2017. Let us illustrate the conversion using a simple vector of incidence: ```{r} vec <- c(1,2,3,0,3,2,4,1,2,1) i <- as.incidence(vec) i plot(vec, type = "s") plot(i, border = "white") ``` Assuming the above incidences are computed weekly, we would then use: ```{r} i <- as.incidence(vec, interval = 7) i plot(i, border = "white") ``` Note that in this case, incidences have been treated as per week, and corresponding dates in days have been computed during the conversion (the first day is always '1'), so that the first days of weeks 1, 2, 3... are: ```{r} i$dates ``` In practice, it is best to provide the actual dates marking the lower bounds of the time intervals. We can illustrate this by a round trip using the example of the previous section: ```{r, round_trip} ## convertion: incidence --> data.frame: i_14 df <- as.data.frame(i_14) head(df) tail(df) ## conversion: data.frame --> incidence new_i <- as.incidence(df[group_names(i_14)], df$dates, interval = "2 epiweeks") new_i ## check round trip identical(new_i, i_14) ``` incidence/inst/doc/customize_plot.Rmd0000644000176200001440000002012014621106437017440 0ustar liggesusers--- title: "Customize plots of incidence" author: "Thibaut Jombart, Zhian N. Kamvar" date: "`r Sys.Date()`" output: rmarkdown::html_vignette: toc: true toc_depth: 4 vignette: > %\VignetteIndexEntry{Customise graphics} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, echo = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width=7, fig.height=5 ) ``` This vignette provides some tips for the most common customisations of graphics produced by `plot.incidence`. Our graphics use *ggplot2*, which is a distinct graphical system from base graphics. If you want advanced customisation of your incidence plots, we recommend following an introduction to *ggplot2*.
# Example data: simulated Ebola outbreak This example uses the simulated Ebola Virus Disease (EVD) outbreak from the package [*outbreaks*](https://cran.r-project.org/package=outbreaks): `ebola_sim_clean`. First, we load the data: ```{r, data} library(outbreaks) library(ggplot2) library(incidence) onset <- ebola_sim_clean$linelist$date_of_onset class(onset) head(onset) ``` We compute the weekly incidence: ```{r, incid1} i <- incidence(onset, interval = 7) i i.sex <- incidence(onset, interval = 7, group = ebola_sim_clean$linelist$gender) i.sex i.hosp <- incidence(onset, interval = 7, group = ebola_sim_clean$linelist$hospital) i.hosp ```
# The `plot.incidence` function When calling `plot` on an *incidence* object, the function `plot.incidence` is implicitly used. To access its documentation, use `?plot.incidence`. In this section, we illustrate existing customisations. ## Default behaviour By default, the function uses grey for single time series, and colors from the color palette `incidence_pal1` when incidence is computed by groups: ```{r, default} plot(i) plot(i.sex) plot(i.hosp) ``` However, some of these defaults can be altered through the various arguments of the function: ```{r, args} args(incidence:::plot.incidence) ``` ## Changing colors ### The default palette A color palette is a function which outputs a specified number of colors. By default, the color used in *incidence* is called `incidence_pal1`. Its behaviour is different from usual palettes, in the sense that the first 4 colours are not interpolated: ```{r, incidence_pal1, fig.height = 8} par(mfrow = c(3, 1), mar = c(4,2,1,1)) barplot(1:2, col = incidence_pal1(2)) barplot(1:4, col = incidence_pal1(4)) barplot(1:20, col = incidence_pal1(20)) ``` This palette also has a light and a dark version: ```{r, pal2, fig.height = 8} par(mfrow = c(3,1)) barplot(1:20, col = incidence_pal1_dark(20), main = "palette: incidence_pal1_dark") barplot(1:20, col = incidence_pal1(20), main = "palette: incidence_pal1") barplot(1:20, col = incidence_pal1_light(20), main = "palette: incidence_pal1_light") ``` ### Using different palettes Other color palettes can be provided via `col_pal`. Various palettes are part of the base R distribution, and many more are provided in additional packages. We provide a couple of examples: ```{r, palettes} plot(i.hosp, col_pal = rainbow) plot(i.sex, col_pal = cm.colors) ``` ### Specifying colors manually Colors can be specified manually using the argument `color`; note that whenever incidence is computed by groups, the number of colors must match the number of groups, otherwise `color` is ignored. #### Example 1: changing a single color ```{r, colors1} plot(i, color = "darkred") ``` #### Example 2: changing several colors (note that naming colors is optional) ```{r, colors2} plot(i.sex, color = c(m = "orange2", f = "purple3")) ``` #### Example 3: using color to highlight specific groups ```{r, colors3} plot(i.hosp, color = c("#ac3973", "#6666ff", "white", "white", "white", "white")) ```
# Useful *ggplot2* tweaks Numerous tweaks for *ggplot2* are documented online. In the following, we merely provide a few useful tips in the context of *incidence*. ## Changing dates on the *x*-axis ### Changing date format By default, the dates indicated on the *x*-axis of an incidence plot may not have the suitable format. The package *scales* can be used to change the way dates are labeled (see `?strptime` for possible formats): ```{r, scales1} library(scales) plot(i, labels_week = FALSE) + scale_x_date(labels = date_format("%d %b %Y")) ``` Notice how the labels are all situated at the first of the month? If you want to make sure the labels are situated in a different orientation, you can use the `make_breaks()` function to calculate breaks for the plot: ```{r scales_breaks} b <- make_breaks(i, labels_week = FALSE) b plot(i) + scale_x_date(breaks = b$breaks, labels = date_format("%d %b %Y")) ``` And for another example, with a subset of the data (first 50 weeks), using more detailed dates and rotating the annotations: ```{r, scales2} plot(i[1:50]) + scale_x_date(breaks = b$breaks, labels = date_format("%a %d %B %Y")) + theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 12)) ``` Note that you can save customisations for later use: ```{r, scales3} rotate.big <- theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 12)) ``` ### Changing the grid The last example above illustrates that it can be useful to have denser annotations of the *x*-axis, especially over short time periods. Here, we provide an example where we try to zoom on the peak of the epidemic, using the data by hospital: ```{r, grid1} plot(i.hosp) ``` Let us look at the data 40 days before and after the 1st of October: ```{r, grid2} period <- as.Date("2014-10-01") + c(-40, 40) i.zoom <- subset(i.hosp, from = period[1], to = period[2]) detailed.x <- scale_x_date(labels = date_format("%a %d %B %Y"), date_breaks = "2 weeks", date_minor_breaks = "week") plot(i.zoom, border = "black") + detailed.x + rotate.big ``` ### Handling non-ISO weeks If you have weekly incidence that starts on a day other than monday, then the above solution may produce breaks that fall inside of the bins: ```{r, saturday-epiweek} i.sat <- incidence(onset, interval = "1 week: saturday", groups = ebola_sim_clean$linelist$hospital) i.szoom <- subset(i.sat, from = period[1], to = period[2]) plot(i.szoom, border = "black") + detailed.x + rotate.big ``` In this case, you may want to either calculate breaks using `make_breaks()` or use the `scale_x_incidence()` function to automatically calculate these for you: ```{r, saturday-epiweek2} plot(i.szoom, border = "black") + scale_x_incidence(i.szoom, n_breaks = nrow(i.szoom)/2, labels_week = FALSE) + rotate.big ``` ```{r, saturday-epiweek3} sat_breaks <- make_breaks(i.szoom, n_breaks = nrow(i.szoom)/2) plot(i.szoom, border = "black") + scale_x_date(breaks = sat_breaks$breaks, labels = date_format("%a %d %B %Y")) + rotate.big ``` ### Labelling every bin Sometimes you may want to label every bin of the incidence object. To do this, you can simply set `n_breaks` to the number of rows in your incidence object: ```{r label-bins} plot(i.szoom, n_breaks = nrow(i.szoom), border = "black") + rotate.big ``` ## Changing the legend The previous plot has a fairly large legend which we may want to move around. Let us save the plot as a new object `p` and customize the legend: ```{r, legend1} p <- plot(i.zoom, border = "black") + detailed.x + rotate.big p + theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 12), legend.position = "top", legend.direction = "horizontal", legend.title = element_blank()) ``` ## Applying the style of European Programme for Intervention Epidemiology Training (EPIET) ### Display individual cases For small datasets it is convention of EPIET to display individual cases as rectangles. It can be done by doing two things: first, adding using the option `show_cases = TRUE` with a white border and second, setting the background to white. We also add `coord_equal()` which forces each case to be a square. ```{r, EPIET1} i.small <- incidence(onset[160:180]) plot(i.small, border = "white", show_cases = TRUE) + theme(panel.background = element_rect(fill = "white")) + rotate.big + coord_equal() ``` incidence/inst/doc/customize_plot.R0000644000176200001440000001146214626316440017132 0ustar liggesusers## ----echo = FALSE------------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width=7, fig.height=5 ) ## ----data--------------------------------------------------------------------- library(outbreaks) library(ggplot2) library(incidence) onset <- ebola_sim_clean$linelist$date_of_onset class(onset) head(onset) ## ----incid1------------------------------------------------------------------- i <- incidence(onset, interval = 7) i i.sex <- incidence(onset, interval = 7, group = ebola_sim_clean$linelist$gender) i.sex i.hosp <- incidence(onset, interval = 7, group = ebola_sim_clean$linelist$hospital) i.hosp ## ----default------------------------------------------------------------------ plot(i) plot(i.sex) plot(i.hosp) ## ----args--------------------------------------------------------------------- args(incidence:::plot.incidence) ## ----incidence_pal1, fig.height = 8----------------------------------------- par(mfrow = c(3, 1), mar = c(4,2,1,1)) barplot(1:2, col = incidence_pal1(2)) barplot(1:4, col = incidence_pal1(4)) barplot(1:20, col = incidence_pal1(20)) ## ----pal2, fig.height = 8----------------------------------------------------- par(mfrow = c(3,1)) barplot(1:20, col = incidence_pal1_dark(20), main = "palette: incidence_pal1_dark") barplot(1:20, col = incidence_pal1(20), main = "palette: incidence_pal1") barplot(1:20, col = incidence_pal1_light(20), main = "palette: incidence_pal1_light") ## ----palettes----------------------------------------------------------------- plot(i.hosp, col_pal = rainbow) plot(i.sex, col_pal = cm.colors) ## ----colors1------------------------------------------------------------------ plot(i, color = "darkred") ## ----colors2------------------------------------------------------------------ plot(i.sex, color = c(m = "orange2", f = "purple3")) ## ----colors3------------------------------------------------------------------ plot(i.hosp, color = c("#ac3973", "#6666ff", "white", "white", "white", "white")) ## ----scales1------------------------------------------------------------------ library(scales) plot(i, labels_week = FALSE) + scale_x_date(labels = date_format("%d %b %Y")) ## ----scales_breaks------------------------------------------------------------ b <- make_breaks(i, labels_week = FALSE) b plot(i) + scale_x_date(breaks = b$breaks, labels = date_format("%d %b %Y")) ## ----scales2------------------------------------------------------------------ plot(i[1:50]) + scale_x_date(breaks = b$breaks, labels = date_format("%a %d %B %Y")) + theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 12)) ## ----scales3------------------------------------------------------------------ rotate.big <- theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 12)) ## ----grid1-------------------------------------------------------------------- plot(i.hosp) ## ----grid2-------------------------------------------------------------------- period <- as.Date("2014-10-01") + c(-40, 40) i.zoom <- subset(i.hosp, from = period[1], to = period[2]) detailed.x <- scale_x_date(labels = date_format("%a %d %B %Y"), date_breaks = "2 weeks", date_minor_breaks = "week") plot(i.zoom, border = "black") + detailed.x + rotate.big ## ----saturday-epiweek--------------------------------------------------------- i.sat <- incidence(onset, interval = "1 week: saturday", groups = ebola_sim_clean$linelist$hospital) i.szoom <- subset(i.sat, from = period[1], to = period[2]) plot(i.szoom, border = "black") + detailed.x + rotate.big ## ----saturday-epiweek2-------------------------------------------------------- plot(i.szoom, border = "black") + scale_x_incidence(i.szoom, n_breaks = nrow(i.szoom)/2, labels_week = FALSE) + rotate.big ## ----saturday-epiweek3-------------------------------------------------------- sat_breaks <- make_breaks(i.szoom, n_breaks = nrow(i.szoom)/2) plot(i.szoom, border = "black") + scale_x_date(breaks = sat_breaks$breaks, labels = date_format("%a %d %B %Y")) + rotate.big ## ----label-bins--------------------------------------------------------------- plot(i.szoom, n_breaks = nrow(i.szoom), border = "black") + rotate.big ## ----legend1------------------------------------------------------------------ p <- plot(i.zoom, border = "black") + detailed.x + rotate.big p + theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 12), legend.position = "top", legend.direction = "horizontal", legend.title = element_blank()) ## ----EPIET1------------------------------------------------------------------- i.small <- incidence(onset[160:180]) plot(i.small, border = "white", show_cases = TRUE) + theme(panel.background = element_rect(fill = "white")) + rotate.big + coord_equal() incidence/inst/doc/incidence_fit_class.html0000644000176200001440000132257214626316442020577 0ustar liggesusers Details of the incidence_fit class

Details of the incidence_fit class

Zhian N. Kamvar

2024-05-31

This vignette details the structure and construction of the incidence_fit and incidence_fit_list classes, which are produced by the fit() and fit_optim_split() functions, respectively. By the end of this tutorial, you should be able to construct incidence_fit and incidence_fit_list objects for use with your own models.

Structure of an incidence_fit object

An incidence_fit object contains three elements:

  • $model: The model fit to an incidence object. Currently, this represents a log-linear model, but it can be any model.
  • $info: Information derived from the model
    • r The growth rate
    • r.conf the confidence interval of r
    • pred a data frame containing the predictions of the model using the true dates (dates), their numeric version used in the model (dates.x), the predicted value (fit), and the lower (lwr) and upper (upr) bounds of the associated confidence interval.
    • doubling the predicted doubling time in days (only if r is positive)
    • doubling.conf the confidence interval of the doubling time
    • halving the predicted halving time in days (only if r is negative)
    • halving.conf the confidence interval of the halving time
  • $origin: the date corresponding to day ‘0’

Internally, when fit() is run, these elements are constructed by function incidence:::extract_info(). First we need to setup data. We will use simulated Ebola outbreak data from the outbreaks package over weekly intervals and calculate the fit for the first 20 weeks:

library(outbreaks)
library(incidence)
dat <- ebola_sim$linelist$date_of_onset
i <- incidence(dat, interval = "week")
i
#> <incidence object>
#> [5888 cases from days 2014-04-07 to 2015-04-27]
#> [5888 cases from ISO weeks 2014-W15 to 2015-W18]
#> 
#> $counts: matrix with 56 rows and 1 columns
#> $n: 5888 cases in total
#> $dates: 56 dates marking the left-side of bins
#> $interval: 1 week
#> $timespan: 386 days
#> $cumulative: FALSE
f <- fit(i[1:20])
f
#> <incidence_fit object>
#> 
#> $model: regression of log-incidence over time
#> 
#> $info: list containing the following items:
#>   $r (daily growth rate):
#> [1] 0.03175771
#> 
#>   $r.conf (confidence interval):
#>           2.5 %     97.5 %
#> [1,] 0.02596229 0.03755314
#> 
#>   $doubling (doubling time in days):
#> [1] 21.8261
#> 
#>   $doubling.conf (confidence interval):
#>         2.5 %   97.5 %
#> [1,] 18.45777 26.69823
#> 
#>   $pred: data.frame of incidence predictions (20 rows, 5 columns)
plot(i, fit = f)

As you can see, the incidence_fit object has a print method and a plot method. If you want to access individual elements in the $info element, you can use the get_info() function:

get_info(f, "r")
#> [1] 0.03175771
get_info(f, "r.conf")
#>           2.5 %     97.5 %
#> [1,] 0.02596229 0.03755314
get_info(f, "doubling.conf")
#>         2.5 %   97.5 %
#> [1,] 18.45777 26.69823

This will be important later when we combine several incidence_fit objects into a single incidence_fit_list.

Building an incidence_fit object from scratch

The incidence_fit object can be constructed from any model from which you can derive the daily growth rate, doubling/halving times, predictions, and confidence intervals. The following three steps show roughly how it is done from model fitting to construction.

Step 1: create the model

The default model for fit() is a log-linear model on the intervals between dates. To fit this model, we will need to create a data frame with the counts and the midpoints of the intervals:

# ensure all dates have at least one incidence
i2 <- i[1:20]
i2 <- i2[apply(get_counts(i2), 1, min) > 0]
df <- as.data.frame(i2, long = TRUE)
df$dates.x <- get_dates(i2, position = "center", count_days = TRUE)
head(df)
#>        dates    weeks isoweeks counts dates.x
#> 1 2014-04-07 2014-W15 2014-W15      1     3.5
#> 2 2014-04-14 2014-W16 2014-W16      1    10.5
#> 3 2014-04-21 2014-W17 2014-W17      5    17.5
#> 4 2014-04-28 2014-W18 2014-W18      4    24.5
#> 5 2014-05-05 2014-W19 2014-W19     12    31.5
#> 6 2014-05-12 2014-W20 2014-W20     18    38.5
lm1 <- stats::lm(log(counts) ~ dates.x, data = df)
lm1
#> 
#> Call:
#> stats::lm(formula = log(counts) ~ dates.x, data = df)
#> 
#> Coefficients:
#> (Intercept)      dates.x  
#>     0.81660      0.03176

If we compare that to the $model element produced from fit(), we can see that it is identical:

all.equal(f$model, lm1)
#> [1] TRUE

Step 2: creation of the $info list:

The $info list is created directly from the model itself:

r <- stats::coef(lm1)["dates.x"]
r.conf <- stats::confint(lm1, "dates.x", 0.95)
new.data <- data.frame(dates.x = sort(unique(lm1$model$dates.x)))
pred     <- exp(stats::predict(lm1, newdata = new.data, interval = "confidence",
                               level = 0.95))
pred <- cbind.data.frame(new.data, pred)
info_list <- list(
  r = r,
  r.conf = r.conf,
  doubling = log(2) / r,
  doubling.conf = log(2) / r.conf,
  pred = pred
)
info_list
#> $r
#>    dates.x 
#> 0.03175771 
#> 
#> $r.conf
#>              2.5 %     97.5 %
#> dates.x 0.02596229 0.03755314
#> 
#> $doubling
#> dates.x 
#> 21.8261 
#> 
#> $doubling.conf
#>            2.5 %   97.5 %
#> dates.x 26.69823 18.45777
#> 
#> $pred
#>    dates.x        fit        lwr        upr
#> 1      3.5   2.528815   1.611099   3.969283
#> 2     10.5   3.158367   2.082082   4.791013
#> 3     17.5   3.944645   2.687383   5.790104
#> 4     24.5   4.926668   3.463102   7.008763
#> 5     31.5   6.153167   4.453513   8.501484
#> 6     38.5   7.685004   5.711842  10.339797
#> 7     45.5   9.598194   7.300398  12.619220
#> 8     52.5  11.987674   9.289763  15.469105
#> 9     59.5  14.972017  11.757263  19.065771
#> 10    66.5  18.699315  14.786028  23.648299
#> 11    73.5  23.354529  18.467024  29.535566
#> 12    80.5  29.168662  22.905640  37.144163
#> 13    87.5  36.430229  28.231348  47.010209
#> 14    94.5  45.499570  34.607028  59.820534
#> 15   101.5  56.826734  42.236196  76.457588
#> 16   108.5  70.973805  51.369116  98.060497
#> 17   115.5  88.642805  62.309665 126.104782
#> 18   122.5 110.710519  75.424156 162.505217
#> 19   129.5 138.272012  91.152689 209.748604
#> 20   136.5 172.694966 110.023332 271.065700

Step 3: combine lists and create object

the last step is to combine everything into a list and create the object.

origin <- min(get_dates(i2))
info_list$pred$dates <- origin + info_list$pred$dates.x
the_fit <- list(
  lm = lm1,
  info = info_list,
  origin = min(get_dates(i2))
)
class(the_fit) <- "incidence_fit"
the_fit
#> <incidence_fit object>
#> 
#> $model: regression of log-incidence over time
#> 
#> $info: list containing the following items:
#>   $r (daily growth rate):
#>    dates.x 
#> 0.03175771 
#> 
#>   $r.conf (confidence interval):
#>              2.5 %     97.5 %
#> dates.x 0.02596229 0.03755314
#> 
#>   $doubling (doubling time in days):
#> dates.x 
#> 21.8261 
#> 
#>   $doubling.conf (confidence interval):
#>            2.5 %   97.5 %
#> dates.x 26.69823 18.45777
#> 
#>   $pred: data.frame of incidence predictions (20 rows, 5 columns)
plot(i, fit = the_fit)

Structure of an incidence_fit_list object

There are several reasons for having multiple fits to a single incidence object. One may want to have a separate fit for different groups represented in the object, or one may want to split the fits at the peak of the epidemic. To aid in plotting and summarizing the different fits, we’ve created the incidence_fit_list class. This class has two defining features:

  • It consists of a named list containing one or more incidence_fit objects or lists containing incidence_fit objects.
  • An attribute called “locations” contains a list whose length is equal to the number of incidence_fit objects in the object. Each list element contains a vector that defines where an incidence_fit object is within the incidence_fit_list.

The reason for this structure is because it is sometimes necessary to nest lists of incidence_fit objects within lists. When this happens, accessing individual elements of the objects cumbersome. To alleviate this, each object has a distinct path within the named list in the “locations” attribute that allows one to access the object directly since R allows you to traverse the elements of a nested list by subsetting with a vector:

l <- list(a = list(b = 1, c = 2),d = list(e = list(f = 3, g = 4), h = 5))
str(l)
#> List of 2
#>  $ a:List of 2
#>   ..$ b: num 1
#>   ..$ c: num 2
#>  $ d:List of 2
#>   ..$ e:List of 2
#>   .. ..$ f: num 3
#>   .. ..$ g: num 4
#>   ..$ h: num 5
l[[c("a", "b")]]
#> [1] 1
l[[c("d", "e", "f")]]
#> [1] 3

Example: A tale of two fits

The function fit_optim_split() attempts to find the optimal split point in an epicurve, producing an incidence_fit_list object in the $fit element of the returned list:

fl <- fit_optim_split(i)
fl$fit
#> <list of incidence_fit objects>
#> 
#> attr(x, 'locations'): list of vectors with the locations of each incidence_fit object
#> 
#> 'before'
#> 'after'
#> 
#> $model: regression of log-incidence over time
#> 
#> $info: list containing the following items:
#>   $r (daily growth rate):
#>      before       after 
#>  0.02982209 -0.01016191 
#> 
#>   $r.conf (confidence interval):
#>              2.5 %       97.5 %
#> before  0.02608945  0.033554736
#> after  -0.01102526 -0.009298561
#> 
#>   $doubling (doubling time in days):
#>   before 
#> 23.24274 
#> 
#>   $doubling.conf (confidence interval):
#>           2.5 %  97.5 %
#> before 20.65721 26.5681
#> 
#>   $halving (halving time in days):
#>    after 
#> 68.21031 
#> 
#>   $halving.conf (confidence interval):
#>          2.5 %   97.5 %
#> after 62.86899 74.54349
#> 
#>   $pred: data.frame of incidence predictions (57 rows, 6 columns)
plot(i, fit = fl$fit)

Here you can see that the object looks very similar to the incidence_fit object, but it has extra information. The first thing you may notice is the fact that both “doubling” and “halving” are shown. This is because the two fits have different signs for the daily growth rate. The second thing you may notice is the fact that there is something called attr(x, 'locations'). This attribute gives the location of the incidence_fit objects within the list. We can illustrate how this works if we look at the structure of the object:

str(fl$fit, max.level = 2)
#> List of 2
#>  $ before:List of 3
#>   ..$ model :List of 12
#>   .. ..- attr(*, "class")= chr "lm"
#>   ..$ info  :List of 5
#>   ..$ origin: Date[1:1], format: "2014-04-07"
#>   ..- attr(*, "class")= chr "incidence_fit"
#>  $ after :List of 3
#>   ..$ model :List of 12
#>   .. ..- attr(*, "class")= chr "lm"
#>   ..$ info  :List of 5
#>   ..$ origin: Date[1:1], format: "2014-09-22"
#>   ..- attr(*, "class")= chr "incidence_fit"
#>  - attr(*, "locations")=List of 2
#>   ..$ : chr "before"
#>   ..$ : chr "after"
#>  - attr(*, "class")= chr "incidence_fit_list"

Internally, all of the methods for incidence_fit_list use the ‘locations’ attribute to navigate:

methods(class = "incidence_fit_list")
#> [1] get_fit  get_info plot     print   
#> see '?methods' for accessing help and source code

For example, it’s often useful to extract the growth rate for all models at once. The get_info() method allows us to do this easily:

get_info(fl$fit, "r")
#>      before       after 
#>  0.02982209 -0.01016191
get_info(fl$fit, "r.conf")
#>              2.5 %       97.5 %
#> before  0.02608945  0.033554736
#> after  -0.01102526 -0.009298561

Because doubling or halving is determined by whether or not r is negative, we automatically filter out the results that don’t make sense, but you can include them with na.rm = FALSE:

get_info(fl$fit, "doubling.conf")
#>           2.5 %  97.5 %
#> before 20.65721 26.5681
get_info(fl$fit, "doubling.conf", na.rm = FALSE)
#>           2.5 %  97.5 %
#> before 20.65721 26.5681
#> after        NA      NA

Example: Nested incidence_fit

Above, we showed the example of a basic incidence_fit_list class with two objects representing the fits before and after the peak of an epicurve. However, it is often useful evaluate fits for different groups separately. Here, we will construct an incidence object, but define groups by gender:

gen <- ebola_sim$linelist$gender
ig <- incidence(dat, interval = "week", group = gen)
plot(ig, border = "grey98")

Now if we calculate an optimal fit split, we will end up with four different fits: two for each defined gender.

fg <- fit_optim_split(ig)
plot(ig, fit = fg$fit, border = "grey98", stack = FALSE)
#> Scale for colour is already present.
#> Adding another scale for colour, which will replace the existing scale.
#> Scale for colour is already present.
#> Adding another scale for colour, which will replace the existing scale.
#> Scale for colour is already present.
#> Adding another scale for colour, which will replace the existing scale.
#> Scale for colour is already present.
#> Adding another scale for colour, which will replace the existing scale.

If we look at the fit object, we can see again that it is an incidence_fit_list but this time with four fits defined.

fg$fit
#> <list of incidence_fit objects>
#> 
#> attr(x, 'locations'): list of vectors with the locations of each incidence_fit object
#> 
#> 'f', 'before'
#> 'm', 'before'
#> 'f', 'after'
#> 'm', 'after'
#> 
#> $model: regression of log-incidence over time
#> 
#> $info: list containing the following items:
#>   $r (daily growth rate):
#>    f_before    m_before     f_after     m_after 
#>  0.02570604  0.02883607 -0.01002297 -0.01038307 
#> 
#>   $r.conf (confidence interval):
#>                2.5 %       97.5 %
#> f_before  0.02289333  0.028518743
#> m_before  0.02502254  0.032649606
#> f_after  -0.01102735 -0.009018595
#> m_after  -0.01138910 -0.009377034
#> 
#>   $doubling (doubling time in days):
#> f_before m_before 
#> 26.96437 24.03750 
#> 
#>   $doubling.conf (confidence interval):
#>             2.5 %   97.5 %
#> f_before 24.30497 30.27725
#> m_before 21.22988 27.70091
#> 
#>   $halving (halving time in days):
#>  f_after  m_after 
#> 69.15586 66.75746 
#> 
#>   $halving.conf (confidence interval):
#>            2.5 %   97.5 %
#> f_after 62.85711 76.85756
#> m_after 60.86059 73.91966
#> 
#>   $pred: data.frame of incidence predictions (111 rows, 7 columns)
str(fg$fit, max.level = 3)
#> List of 2
#>  $ f:List of 2
#>   ..$ before:List of 3
#>   .. ..$ model :List of 12
#>   .. .. ..- attr(*, "class")= chr "lm"
#>   .. ..$ info  :List of 5
#>   .. ..$ origin: Date[1:1], format: "2014-04-07"
#>   .. ..- attr(*, "class")= chr "incidence_fit"
#>   ..$ after :List of 3
#>   .. ..$ model :List of 12
#>   .. .. ..- attr(*, "class")= chr "lm"
#>   .. ..$ info  :List of 5
#>   .. ..$ origin: Date[1:1], format: "2014-09-22"
#>   .. ..- attr(*, "class")= chr "incidence_fit"
#>   ..- attr(*, "locations")=List of 2
#>   .. ..$ : chr "before"
#>   .. ..$ : chr "after"
#>   ..- attr(*, "class")= chr "incidence_fit_list"
#>  $ m:List of 2
#>   ..$ before:List of 3
#>   .. ..$ model :List of 12
#>   .. .. ..- attr(*, "class")= chr "lm"
#>   .. ..$ info  :List of 5
#>   .. ..$ origin: Date[1:1], format: "2014-04-14"
#>   .. ..- attr(*, "class")= chr "incidence_fit"
#>   ..$ after :List of 3
#>   .. ..$ model :List of 12
#>   .. .. ..- attr(*, "class")= chr "lm"
#>   .. ..$ info  :List of 5
#>   .. ..$ origin: Date[1:1], format: "2014-09-15"
#>   .. ..- attr(*, "class")= chr "incidence_fit"
#>   ..- attr(*, "locations")=List of 2
#>   .. ..$ : chr "before"
#>   .. ..$ : chr "after"
#>   ..- attr(*, "class")= chr "incidence_fit_list"
#>  - attr(*, "locations")=List of 4
#>   ..$ : chr [1:2] "f" "before"
#>   ..$ : chr [1:2] "m" "before"
#>   ..$ : chr [1:2] "f" "after"
#>   ..$ : chr [1:2] "m" "after"
#>  - attr(*, "class")= chr "incidence_fit_list"

Notice that the nested lists themselves are of class incidence_fit_list.

Now, even though the fits within nested lists, the ‘locations’ attributes still defines where they are within the object so that the get_info() function still operates normally:

get_info(fg$fit, "r.conf")
#>                2.5 %       97.5 %
#> f_before  0.02289333  0.028518743
#> m_before  0.02502254  0.032649606
#> f_after  -0.01102735 -0.009018595
#> m_after  -0.01138910 -0.009377034

If you need to access all the fits easily, a convenience function to flatten the list is available in get_fit():

str(get_fit(fg$fit), max.level = 2)
#> List of 4
#>  $ f_before:List of 3
#>   ..$ model :List of 12
#>   .. ..- attr(*, "class")= chr "lm"
#>   ..$ info  :List of 5
#>   ..$ origin: Date[1:1], format: "2014-04-07"
#>   ..- attr(*, "class")= chr "incidence_fit"
#>  $ m_before:List of 3
#>   ..$ model :List of 12
#>   .. ..- attr(*, "class")= chr "lm"
#>   ..$ info  :List of 5
#>   ..$ origin: Date[1:1], format: "2014-04-14"
#>   ..- attr(*, "class")= chr "incidence_fit"
#>  $ f_after :List of 3
#>   ..$ model :List of 12
#>   .. ..- attr(*, "class")= chr "lm"
#>   ..$ info  :List of 5
#>   ..$ origin: Date[1:1], format: "2014-09-22"
#>   ..- attr(*, "class")= chr "incidence_fit"
#>  $ m_after :List of 3
#>   ..$ model :List of 12
#>   .. ..- attr(*, "class")= chr "lm"
#>   ..$ info  :List of 5
#>   ..$ origin: Date[1:1], format: "2014-09-15"
#>   ..- attr(*, "class")= chr "incidence_fit"

Because all that defines an incidence_fit_list is the class definition and the ‘locations’ attribute that defines the positions of the incidence_fit objects within the nesting, then it’s also possible to define the output of fit_optim_split() as an incidence_fit_list class:

print(locs <- attributes(fg$fit)$locations)
#> [[1]]
#> [1] "f"      "before"
#> 
#> [[2]]
#> [1] "m"      "before"
#> 
#> [[3]]
#> [1] "f"     "after"
#> 
#> [[4]]
#> [1] "m"     "after"

for (i in seq_along(locs)) {
    locs[[i]] <- c("fit", locs[[i]])
}
print(locs)
#> [[1]]
#> [1] "fit"    "f"      "before"
#> 
#> [[2]]
#> [1] "fit"    "m"      "before"
#> 
#> [[3]]
#> [1] "fit"   "f"     "after"
#> 
#> [[4]]
#> [1] "fit"   "m"     "after"
fg.ifl <- fg
attributes(fg.ifl)$locations<- locs
class(fg.ifl) <- "incidence_fit_list"

Now when we print the object, we can see that it prints only the information related to the incidence_fit_list:

fg.ifl
#> <list of incidence_fit objects>
#> 
#> attr(x, 'locations'): list of vectors with the locations of each incidence_fit object
#> 
#> 'fit', 'f', 'before'
#> 'fit', 'm', 'before'
#> 'fit', 'f', 'after'
#> 'fit', 'm', 'after'
#> 
#> $model: regression of log-incidence over time
#> 
#> $info: list containing the following items:
#>   $r (daily growth rate):
#> fit_f_before fit_m_before  fit_f_after  fit_m_after 
#>   0.02570604   0.02883607  -0.01002297  -0.01038307 
#> 
#>   $r.conf (confidence interval):
#>                    2.5 %       97.5 %
#> fit_f_before  0.02289333  0.028518743
#> fit_m_before  0.02502254  0.032649606
#> fit_f_after  -0.01102735 -0.009018595
#> fit_m_after  -0.01138910 -0.009377034
#> 
#>   $doubling (doubling time in days):
#> fit_f_before fit_m_before 
#>     26.96437     24.03750 
#> 
#>   $doubling.conf (confidence interval):
#>                 2.5 %   97.5 %
#> fit_f_before 24.30497 30.27725
#> fit_m_before 21.22988 27.70091
#> 
#>   $halving (halving time in days):
#> fit_f_after fit_m_after 
#>    69.15586    66.75746 
#> 
#>   $halving.conf (confidence interval):
#>                2.5 %   97.5 %
#> fit_f_after 62.85711 76.85756
#> fit_m_after 60.86059 73.91966
#> 
#>   $pred: data.frame of incidence predictions (111 rows, 7 columns)

But, we still retain all of the extra information in the list:

str(fg.ifl, max.level = 1)
#> List of 4
#>  $ df   :'data.frame':   26 obs. of  3 variables:
#>  $ plot :List of 11
#>   ..- attr(*, "class")= chr [1:2] "gg" "ggplot"
#>  $ split: Date[1:2], format: "2014-09-22" "2014-09-15"
#>   ..- attr(*, "names")="f" "m"
#>  $ fit  :List of 2
#>   ..- attr(*, "locations")=List of 4
#>   ..- attr(*, "class")= chr "incidence_fit_list"
#>  - attr(*, "locations")=List of 4
#>  - attr(*, "class")= chr "incidence_fit_list"
fg.ifl$split
#>            f            m 
#> "2014-09-22" "2014-09-15"
fg.ifl$df
#>         dates   mean.R2 groups
#> 1  2014-08-04 0.7546016      f
#> 2  2014-08-11 0.8096672      f
#> 3  2014-08-18 0.8513743      f
#> 4  2014-08-25 0.8864424      f
#> 5  2014-09-01 0.9165063      f
#> 6  2014-09-08 0.9270248      f
#> 7  2014-09-15 0.9345352      f
#> 8  2014-09-22 0.9350323      f
#> 9  2014-09-29 0.9339121      f
#> 10 2014-10-06 0.9288956      f
#> 11 2014-10-13 0.9226037      f
#> 12 2014-10-20 0.9122727      f
#> 13 2014-10-27 0.9027890      f
#> 14 2014-08-04 0.7566712      m
#> 15 2014-08-11 0.8164693      m
#> 16 2014-08-18 0.8567850      m
#> 17 2014-08-25 0.8820669      m
#> 18 2014-09-01 0.9006668      m
#> 19 2014-09-08 0.9166004      m
#> 20 2014-09-15 0.9271862      m
#> 21 2014-09-22 0.9263339      m
#> 22 2014-09-29 0.9260695      m
#> 23 2014-10-06 0.9216350      m
#> 24 2014-10-13 0.9144120      m
#> 25 2014-10-20 0.9077086      m
#> 26 2014-10-27 0.8966333      m
fg.ifl$plot

incidence/inst/doc/conversions.html0000644000176200001440000047375414626316436017212 0ustar liggesusers Conversions to and from the incidence class

Conversions to and from the incidence class

Thibaut Jombart, Zhian N. Kamvar

2024-05-31

This vignette documents to types of conversion which can be made using the incidence class:

  • ‘exports’: conversion from an incidence object to another type of object; this can be useful for processing incidence data in another software, or for reporting results.

  • ‘imports’conversion from already computed incidence into an incidence object; this can be useful for using features of the incidence package for data handling and plotting with incidence data computed elsewhere.


Exporting results

To export results, we first compute semi-weekly incidence (with weeks starting on Sunday, the beginning of the CDC epiweek) by gender from the simulated Ebola data used in the overview vignette:

library(outbreaks)
library(incidence)
dat <- ebola_sim$linelist$date_of_onset
i_14 <- incidence(dat, interval = "2 epiweeks", groups = ebola_sim$linelist$gender)
i_14
#> <incidence object>
#> [5888 cases from days 2014-04-06 to 2015-04-19]
#> [5888 cases from MMWR weeks 2014-W15 to 2015-W16]
#> [2 groups: f, m]
#> 
#> $counts: matrix with 28 rows and 2 columns
#> $n: 5888 cases in total
#> $dates: 28 dates marking the left-side of bins
#> $interval: 2 weeks
#> $timespan: 379 days
#> $cumulative: FALSE
plot(i_14, border = "white")

To export the data to a data.frame, one simply needs:

as.data.frame(i_14)
#>         dates    weeks   f   m
#> 1  2014-04-06 2014-W15   1   1
#> 2  2014-04-20 2014-W17   7   1
#> 3  2014-05-04 2014-W19  16  11
#> 4  2014-05-18 2014-W21  19  20
#> 5  2014-06-01 2014-W23  18  22
#> 6  2014-06-15 2014-W25  27  28
#> 7  2014-06-29 2014-W27  26  43
#> 8  2014-07-13 2014-W29  72  43
#> 9  2014-07-27 2014-W31  89  78
#> 10 2014-08-10 2014-W33 115 133
#> 11 2014-08-24 2014-W35 181 188
#> 12 2014-09-07 2014-W37 280 266
#> 13 2014-09-21 2014-W39 319 282
#> 14 2014-10-05 2014-W41 279 282
#> 15 2014-10-19 2014-W43 249 265
#> 16 2014-11-02 2014-W45 211 202
#> 17 2014-11-16 2014-W47 152 166
#> 18 2014-11-30 2014-W49 138 148
#> 19 2014-12-14 2014-W51 122 127
#> 20 2014-12-28 2014-W53  94 101
#> 21 2015-01-11 2015-W02 112  96
#> 22 2015-01-25 2015-W04  85  83
#> 23 2015-02-08 2015-W06  76  71
#> 24 2015-02-22 2015-W08  75  81
#> 25 2015-03-08 2015-W10  59  70
#> 26 2015-03-22 2015-W12  64  45
#> 27 2015-04-05 2015-W14  44  40
#> 28 2015-04-19 2015-W16  32  33

The first column contains the dates marking the (inclusive) left side of the time intervals used for computing incidence, and the other columns give counts for the different groups. This function also has an option for exporting data as a ‘long’ format, i.e. with a column for ‘groups’ and a column for counts. This format can be useful especially when working with ggplot2, which expect data in this shape:

df <- as.data.frame(i_14, long = TRUE)
head(df)
#>        dates    weeks counts groups
#> 1 2014-04-06 2014-W15      1      f
#> 2 2014-04-20 2014-W17      7      f
#> 3 2014-05-04 2014-W19     16      f
#> 4 2014-05-18 2014-W21     19      f
#> 5 2014-06-01 2014-W23     18      f
#> 6 2014-06-15 2014-W25     27      f
tail(df)
#>         dates    weeks counts groups
#> 51 2015-02-08 2015-W06     71      m
#> 52 2015-02-22 2015-W08     81      m
#> 53 2015-03-08 2015-W10     70      m
#> 54 2015-03-22 2015-W12     45      m
#> 55 2015-04-05 2015-W14     40      m
#> 56 2015-04-19 2015-W16     33      m

## example of custom plot using steps:
library(ggplot2)
ggplot(df, aes(x = dates, y = counts)) + geom_step(aes(color = groups))

Finally, note that when ISO weeks are used, these are also reported in the output:

i_7 <- incidence(dat, interval = "week")
i_7
#> <incidence object>
#> [5888 cases from days 2014-04-07 to 2015-04-27]
#> [5888 cases from ISO weeks 2014-W15 to 2015-W18]
#> 
#> $counts: matrix with 56 rows and 1 columns
#> $n: 5888 cases in total
#> $dates: 56 dates marking the left-side of bins
#> $interval: 1 week
#> $timespan: 386 days
#> $cumulative: FALSE
plot(i_7, border = "white")
#> Warning: The `guide` argument in `scale_*()` cannot be `FALSE`. This was deprecated in
#> ggplot2 3.3.4.
#> ℹ Please use "none" instead.
#> ℹ The deprecated feature was likely used in the incidence package.
#>   Please report the issue at <https://github.com/reconhub/incidence/issues>.
#> This warning is displayed once every 8 hours.
#> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
#> generated.

head(as.data.frame(i_7))
#>        dates    weeks isoweeks counts
#> 1 2014-04-07 2014-W15 2014-W15      1
#> 2 2014-04-14 2014-W16 2014-W16      1
#> 3 2014-04-21 2014-W17 2014-W17      5
#> 4 2014-04-28 2014-W18 2014-W18      4
#> 5 2014-05-05 2014-W19 2014-W19     12
#> 6 2014-05-12 2014-W20 2014-W20     18
tail(as.data.frame(i_7))
#>         dates    weeks isoweeks counts
#> 51 2015-03-23 2015-W13 2015-W13     51
#> 52 2015-03-30 2015-W14 2015-W14     54
#> 53 2015-04-06 2015-W15 2015-W15     42
#> 54 2015-04-13 2015-W16 2015-W16     45
#> 55 2015-04-20 2015-W17 2015-W17     40
#> 56 2015-04-27 2015-W18 2015-W18     19


Importing pre-computed incidence

The function as.incidence facilitates the conversion of pre-computed incidences to an incidence object. Typically, the input will be imported into R from a .csv file or other spreadsheet formats.

as.incidence is a generic with methods for several types of objects (see ?as.incidence). The main method is matrix, as other types are coerced to matrix first and then passed to as.incidence.matrix:

args(incidence:::as.incidence.matrix)
#> function (x, dates = NULL, interval = NULL, standard = TRUE, 
#>     isoweeks = standard, ...) 
#> NULL

The only mandatory argument x is a table of counts, with time intervals in rows and groups in columns; if there are no groups, then the column doesn’t need a name; but if there are several groups, then columns should be named to indicate group labels. Optionally, dates can be provided to indicate the (inclusive) lower bounds of the time intervals, corresponding to the rows of x; most sensible date formats will do; if indicated as a character string, make sure the format is YYYY-mm-dd, e.g. 2017-04-01 for the 1st April 2017.

Let us illustrate the conversion using a simple vector of incidence:

vec <- c(1,2,3,0,3,2,4,1,2,1)

i <- as.incidence(vec)
i
#> <incidence object>
#> [19 cases from days 1 to 10]
#> 
#> $counts: matrix with 10 rows and 1 columns
#> $n: 19 cases in total
#> $dates: 10 dates marking the left-side of bins
#> $interval: 1 day
#> $timespan: 10 days
#> $cumulative: FALSE

plot(vec, type = "s")

plot(i, border = "white")

Assuming the above incidences are computed weekly, we would then use:

i <- as.incidence(vec, interval = 7)
i
#> <incidence object>
#> [19 cases from days 1 to 64]
#> 
#> $counts: matrix with 10 rows and 1 columns
#> $n: 19 cases in total
#> $dates: 10 dates marking the left-side of bins
#> $interval: 7 days
#> $timespan: 64 days
#> $cumulative: FALSE
plot(i, border = "white")

Note that in this case, incidences have been treated as per week, and corresponding dates in days have been computed during the conversion (the first day is always ‘1’), so that the first days of weeks 1, 2, 3… are:

i$dates
#>  [1]  1  8 15 22 29 36 43 50 57 64

In practice, it is best to provide the actual dates marking the lower bounds of the time intervals. We can illustrate this by a round trip using the example of the previous section:

## convertion: incidence --> data.frame:
i_14
#> <incidence object>
#> [5888 cases from days 2014-04-06 to 2015-04-19]
#> [5888 cases from MMWR weeks 2014-W15 to 2015-W16]
#> [2 groups: f, m]
#> 
#> $counts: matrix with 28 rows and 2 columns
#> $n: 5888 cases in total
#> $dates: 28 dates marking the left-side of bins
#> $interval: 2 weeks
#> $timespan: 379 days
#> $cumulative: FALSE
df <- as.data.frame(i_14)
head(df)
#>        dates    weeks  f  m
#> 1 2014-04-06 2014-W15  1  1
#> 2 2014-04-20 2014-W17  7  1
#> 3 2014-05-04 2014-W19 16 11
#> 4 2014-05-18 2014-W21 19 20
#> 5 2014-06-01 2014-W23 18 22
#> 6 2014-06-15 2014-W25 27 28
tail(df)
#>         dates    weeks  f  m
#> 23 2015-02-08 2015-W06 76 71
#> 24 2015-02-22 2015-W08 75 81
#> 25 2015-03-08 2015-W10 59 70
#> 26 2015-03-22 2015-W12 64 45
#> 27 2015-04-05 2015-W14 44 40
#> 28 2015-04-19 2015-W16 32 33

## conversion: data.frame --> incidence
new_i <- as.incidence(df[group_names(i_14)], df$dates, interval = "2 epiweeks")
new_i
#> <incidence object>
#> [5888 cases from days 2014-04-06 to 2015-04-19]
#> [5888 cases from MMWR weeks 2014-W15 to 2015-W16]
#> [2 groups: f, m]
#> 
#> $counts: matrix with 28 rows and 2 columns
#> $n: 5888 cases in total
#> $dates: 28 dates marking the left-side of bins
#> $interval: 2 weeks
#> $timespan: 379 days
#> $cumulative: FALSE

## check round trip
identical(new_i, i_14)
#> [1] TRUE
incidence/inst/doc/incidence_class.R0000644000176200001440000000642114626316441017160 0ustar liggesusers## ----options, echo = FALSE---------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width=7, fig.height=5 ) ## ----data--------------------------------------------------------------------- library(incidence) set.seed(1) dat <- sample(1:50, 200, replace = TRUE, prob = 1 + exp(1:50 * 0.1)) sex <- sample(c("female", "male"), 200, replace = TRUE) ## ----i------------------------------------------------------------------------ i <- incidence(dat, interval = 2) i plot(i) ## ----sex---------------------------------------------------------------------- i.sex <- incidence(dat, interval = 2, group = sex) i.sex plot(i.sex) ## ----names-------------------------------------------------------------------- class(i) is.list(i) names(i) ## ----access------------------------------------------------------------------- ## use name head(i$dates) head(get_dates(i)) ## ----dates1------------------------------------------------------------------- date_bins <- get_dates(i) class(date_bins) class(dat) date_bins ## ----date-dates1-------------------------------------------------------------- dat_Date <- as.Date("2018-10-31") + dat head(dat_Date) i.date <- incidence(dat_Date, interval = 2, group = sex) i.date get_dates(i.date) class(get_dates(i.date)) ## ----get-dates-integer-------------------------------------------------------- get_dates(i.date, count_days = TRUE) get_dates(i, count_days = TRUE) ## ----get-dates-center--------------------------------------------------------- get_dates(i.date, position = "center") get_dates(i.date, position = "center", count_days = TRUE) ## ----counts1------------------------------------------------------------------ counts <- get_counts(i) class(counts) storage.mode(counts) counts get_counts(i.sex) ## ----counts1.1---------------------------------------------------------------- dim(get_counts(i.sex)) dim(i.sex) nrow(i.sex) # number of date bins ncol(i.sex) # number of groups ## ----groups------------------------------------------------------------------- # Number of groups ncol(i.sex) ncol(i) # Names of groups group_names(i.sex) group_names(i) # You can also rename the groups group_names(i.sex) <- c("F", "M") group_names(i.sex) ## ----as.data.frame------------------------------------------------------------ ## basic conversion as.data.frame(i) as.data.frame(i.sex) ## long format for ggplot2 as.data.frame(i.sex, long = TRUE) ## ----timespan----------------------------------------------------------------- get_timespan(i) print(date_range <- range(get_dates(i))) diff(date_range) + 1 ## ----interval----------------------------------------------------------------- get_interval(i) diff(get_dates(i)) ## ----n------------------------------------------------------------------------ get_n(i) ## ----n2----------------------------------------------------------------------- colSums(get_counts(i.sex)) ## ----isoweek------------------------------------------------------------------ library(outbreaks) dat <- ebola_sim$linelist$date_of_onset i.7 <- incidence(dat, "1 epiweek", standard = TRUE) i.7 i.7$weeks ## ----isoweek-null------------------------------------------------------------- i$weeks ## ----isoweek3----------------------------------------------------------------- head(as.data.frame(i.7)) incidence/inst/doc/overview.R0000644000176200001440000000763514626316444015733 0ustar liggesusers## ----echo = FALSE------------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width=7, fig.height=5 ) ## ----install, eval=FALSE------------------------------------------------------ # install.packages("incidence") ## ----install2, eval=FALSE----------------------------------------------------- # devtools::install_github("reconhub/incidence") ## ----data--------------------------------------------------------------------- library(outbreaks) library(ggplot2) library(incidence) dat <- ebola_sim$linelist$date_of_onset class(dat) head(dat) ## ----incid1------------------------------------------------------------------- i <- incidence(dat) i plot(i) ## ----interv------------------------------------------------------------------- # weekly, starting on Monday (ISO week, default) i.7 <- incidence(dat, interval = "1 week") plot(i.7) # semi-weekly, starting on Saturday i.14 <- incidence(dat, interval = "2 saturday weeks") plot(i.14, border = "white") ## monthly i.month <- incidence(dat, interval = "1 month") plot(i.month, border = "white") ## ----gender------------------------------------------------------------------- i.7.sex <- incidence(dat, interval = "1 week", groups = ebola_sim$linelist$gender) i.7.sex plot(i.7.sex, stack = TRUE, border = "grey") ## ----hosp--------------------------------------------------------------------- i.7.hosp <- with(ebola_sim_clean$linelist, incidence(date_of_onset, interval = "week", groups = hospital)) i.7.hosp head(get_counts(i.7.hosp)) plot(i.7.hosp, stack=TRUE) + theme(legend.position= "top") + labs(fill="") ## ----middle------------------------------------------------------------------- i[100:250] plot(i[100:250]) ## ----stripes------------------------------------------------------------------ i.7[c(TRUE,FALSE)] plot(i.7[c(TRUE,FALSE)]) ## ----tail--------------------------------------------------------------------- i.tail <- subset(i, from=as.Date("2015-01-01")) i.tail plot(i.tail, border="white") ## ----i7outcome---------------------------------------------------------------- i.7.outcome <- incidence(dat, 7, groups=ebola_sim$linelist$outcome) i.7.outcome plot(i.7.outcome, stack = TRUE, border = "grey") ## ----groupsub----------------------------------------------------------------- i.7.outcome[,1:2] plot(i.7.outcome[,1:2], stack = TRUE, border = "grey") ## ----pool--------------------------------------------------------------------- i.pooled <- pool(i.7.outcome) i.pooled identical(i.7$counts, i.pooled$counts) ## ----fit1--------------------------------------------------------------------- plot(i.7[1:20]) early.fit <- fit(i.7[1:20]) early.fit ## ----------------------------------------------------------------------------- plot(early.fit) ## ----------------------------------------------------------------------------- plot(i.7[1:20], fit = early.fit) ## ----fit.both----------------------------------------------------------------- fit.both <- fit(i.7, split=as.Date("2014-10-15")) fit.both plot(i.7, fit=fit.both) ## ----optim-------------------------------------------------------------------- best.fit <- fit_optim_split(i.7) best.fit plot(i.7, fit=best.fit$fit) ## ----get_info----------------------------------------------------------------- get_info(best.fit$fit, "doubling") # doubling time get_info(best.fit$fit, "doubling.conf") # confidence interval get_info(best.fit$fit, "halving") get_info(best.fit$fit, "halving.conf") ## ----optim2------------------------------------------------------------------- best.fit2 <- fit_optim_split(i.7.sex) best.fit2 plot(i.7.sex, fit=best.fit2$fit) ## ----get_info_groups---------------------------------------------------------- get_info(best.fit2$fit, "doubling") # doubling time get_info(best.fit2$fit, "doubling.conf") # confidence interval get_info(best.fit2$fit, "halving") get_info(best.fit2$fit, "halving.conf") incidence/inst/doc/incidence_fit_class.Rmd0000644000176200001440000002266214621104516020341 0ustar liggesusers--- title: "Details of the incidence_fit class" author: "Zhian N. Kamvar" date: "`r Sys.Date()`" output: rmarkdown::html_vignette: toc: true toc_depth: 3 vignette: > %\VignetteIndexEntry{Incidence fit class} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width=7, fig.height=5 ) ``` This vignette details the structure and construction of the `incidence_fit` and `incidence_fit_list` classes, which are produced by the `fit()` and `fit_optim_split()` functions, respectively. By the end of this tutorial, you should be able to construct `incidence_fit` and `incidence_fit_list` objects for use with your own models. # Structure of an `incidence_fit` object An `incidence_fit` object contains three elements: - `$model`: The model fit to an `incidence` object. Currently, this represents a log-linear model, but it can be any model. - `$info`: Information derived from the model - `r` The growth rate - `r.conf` the confidence interval of `r` - `pred` a data frame containing the predictions of the model using the true dates (`dates`), their numeric version used in the model (`dates.x`), the predicted value (`fit`), and the lower (`lwr`) and upper (`upr`) bounds of the associated confidence interval. - `doubling` the predicted doubling time in days (only if `r` is positive) - `doubling.conf` the confidence interval of the doubling time - `halving` the predicted halving time in days (only if `r` is negative) - `halving.conf` the confidence interval of the halving time - `$origin`: the date corresponding to day '0' Internally, when `fit()` is run, these elements are constructed by function `incidence:::extract_info()`. First we need to setup data. We will use simulated Ebola outbreak data from the *outbreaks* package over weekly intervals and calculate the fit for the first 20 weeks: ```{r fit_dates} library(outbreaks) library(incidence) dat <- ebola_sim$linelist$date_of_onset i <- incidence(dat, interval = "week") i f <- fit(i[1:20]) f plot(i, fit = f) ``` As you can see, the `incidence_fit` object has a print method and a plot method. If you want to access individual elements in the `$info` element, you can use the `get_info()` function: ```{r get_info} get_info(f, "r") get_info(f, "r.conf") get_info(f, "doubling.conf") ``` This will be important later when we combine several `incidence_fit` objects into a single `incidence_fit_list`. # Building an `incidence_fit` object from scratch The `incidence_fit` object can be constructed from any model from which you can derive the daily growth rate, doubling/halving times, predictions, and confidence intervals. The following three steps show roughly how it is done from model fitting to construction. ### Step 1: create the model The default model for `fit()` is a log-linear model on the intervals between dates. To fit this model, we will need to create a data frame with the counts and the midpoints of the intervals: ```{r create_model} # ensure all dates have at least one incidence i2 <- i[1:20] i2 <- i2[apply(get_counts(i2), 1, min) > 0] df <- as.data.frame(i2, long = TRUE) df$dates.x <- get_dates(i2, position = "center", count_days = TRUE) head(df) lm1 <- stats::lm(log(counts) ~ dates.x, data = df) lm1 ``` If we compare that to the `$model` element produced from `fit()`, we can see that it is identical: ```{r fit_model} all.equal(f$model, lm1) ``` ### Step 2: creation of the `$info` list: The `$info` list is created directly from the model itself: ```{r make_info} r <- stats::coef(lm1)["dates.x"] r.conf <- stats::confint(lm1, "dates.x", 0.95) new.data <- data.frame(dates.x = sort(unique(lm1$model$dates.x))) pred <- exp(stats::predict(lm1, newdata = new.data, interval = "confidence", level = 0.95)) pred <- cbind.data.frame(new.data, pred) info_list <- list( r = r, r.conf = r.conf, doubling = log(2) / r, doubling.conf = log(2) / r.conf, pred = pred ) info_list ``` ### Step 3: combine lists and create object the last step is to combine everything into a list and create the object. ```{r combine} origin <- min(get_dates(i2)) info_list$pred$dates <- origin + info_list$pred$dates.x the_fit <- list( lm = lm1, info = info_list, origin = min(get_dates(i2)) ) class(the_fit) <- "incidence_fit" the_fit plot(i, fit = the_fit) ``` # Structure of an `incidence_fit_list` object There are several reasons for having multiple fits to a single `incidence` object. One may want to have a separate fit for different groups represented in the object, or one may want to split the fits at the peak of the epidemic. To aid in plotting and summarizing the different fits, we've created the `incidence_fit_list` class. This class has two defining features: - It consists of a named list containing one or more `incidence_fit` objects or lists containing `incidence_fit` objects. - An attribute called "locations" contains a list whose length is equal to the number of `incidence_fit` objects in the object. Each list element contains a vector that defines where an `incidence_fit` object is within the `incidence_fit_list`. The reason for this structure is because it is sometimes necessary to nest lists of `incidence_fit` objects within lists. When this happens, accessing individual elements of the objects cumbersome. To alleviate this, each object has a distinct path within the named list in the "locations" attribute that allows one to access the object directly since R allows you to traverse the elements of a nested list by subsetting with a vector: ```{r nest} l <- list(a = list(b = 1, c = 2),d = list(e = list(f = 3, g = 4), h = 5)) str(l) l[[c("a", "b")]] l[[c("d", "e", "f")]] ``` ## Example: A tale of two fits The function `fit_optim_split()` attempts to find the optimal split point in an epicurve, producing an `incidence_fit_list` object in the `$fit` element of the returned list: ```{r incidence_fit_list} fl <- fit_optim_split(i) fl$fit plot(i, fit = fl$fit) ``` Here you can see that the object looks very similar to the `incidence_fit` object, but it has extra information. The first thing you may notice is the fact that both "doubling" and "halving" are shown. This is because the two fits have different signs for the daily growth rate. The second thing you may notice is the fact that there is something called `attr(x, 'locations')`. This attribute gives the location of the `incidence_fit` objects within the list. We can illustrate how this works if we look at the structure of the object: ```{r incidence_fit_list_str} str(fl$fit, max.level = 2) ``` Internally, all of the methods for `incidence_fit_list` use the 'locations' attribute to navigate: ```{r incidence_fit_methods} methods(class = "incidence_fit_list") ``` For example, it's often useful to extract the growth rate for all models at once. The `get_info()` method allows us to do this easily: ```{r get_info_incidence_fit_list} get_info(fl$fit, "r") get_info(fl$fit, "r.conf") ``` Because doubling or halving is determined by whether or not `r` is negative, we automatically filter out the results that don't make sense, but you can include them with `na.rm = FALSE`: ```{r get_doubling} get_info(fl$fit, "doubling.conf") get_info(fl$fit, "doubling.conf", na.rm = FALSE) ``` ## Example: Nested incidence_fit Above, we showed the example of a basic `incidence_fit_list` class with two objects representing the fits before and after the peak of an epicurve. However, it is often useful evaluate fits for different groups separately. Here, we will construct an incidence object, but define groups by gender: ```{r incidence_by_gender} gen <- ebola_sim$linelist$gender ig <- incidence(dat, interval = "week", group = gen) plot(ig, border = "grey98") ``` Now if we calculate an optimal fit split, we will end up with four different fits: two for each defined gender. ```{r fit_gender} fg <- fit_optim_split(ig) plot(ig, fit = fg$fit, border = "grey98", stack = FALSE) ``` If we look at the fit object, we can see again that it is an `incidence_fit_list` but this time with four fits defined. ```{r fit_gender_print} fg$fit str(fg$fit, max.level = 3) ``` > Notice that the nested lists themselves are of class `incidence_fit_list`. Now, even though the fits within nested lists, the 'locations' attributes still defines where they are within the object so that the `get_info()` function still operates normally: ```{r get_info_gender} get_info(fg$fit, "r.conf") ``` If you need to access all the fits easily, a convenience function to flatten the list is available in `get_fit()`: ```{r get_fit} str(get_fit(fg$fit), max.level = 2) ``` Because all that defines an `incidence_fit_list` is the class definition and the 'locations' attribute that defines the positions of the `incidence_fit` objects within the nesting, then it's also possible to define the output of `fit_optim_split()` as an `incidence_fit_list` class: ```{r incidence_fit_listify} print(locs <- attributes(fg$fit)$locations) for (i in seq_along(locs)) { locs[[i]] <- c("fit", locs[[i]]) } print(locs) fg.ifl <- fg attributes(fg.ifl)$locations<- locs class(fg.ifl) <- "incidence_fit_list" ``` Now when we print the object, we can see that it prints only the information related to the `incidence_fit_list`: ```{r new_fit_list_print} fg.ifl ``` But, we still retain all of the extra information in the list: ```{r list_stuff} str(fg.ifl, max.level = 1) fg.ifl$split fg.ifl$df fg.ifl$plot ``` incidence/inst/doc/incidence_class.Rmd0000644000176200001440000001263514621104516017476 0ustar liggesusers--- title: "Details of the incidence class" author: "Thibaut Jombart, Zhian N. Kamvar" date: "`r Sys.Date()`" output: rmarkdown::html_vignette: toc: true toc_depth: 3 vignette: > %\VignetteIndexEntry{Incidence class} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, options, echo = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width=7, fig.height=5 ) ``` This vignette details the structure of *incidence* objects, as produced by the `incidence` function.
# Structure of an *incidence* object. We generate a toy dataset of dates to examine the content of *incidence* objects. ```{r, data} library(incidence) set.seed(1) dat <- sample(1:50, 200, replace = TRUE, prob = 1 + exp(1:50 * 0.1)) sex <- sample(c("female", "male"), 200, replace = TRUE) ``` The incidence by 48h period is computed as: ```{r, i} i <- incidence(dat, interval = 2) i plot(i) ``` We also compute incidence by gender: ```{r, sex} i.sex <- incidence(dat, interval = 2, group = sex) i.sex plot(i.sex) ``` The object `i` is a `list` with the class *incidence*: ```{r, names} class(i) is.list(i) names(i) ``` Items in `i` can be accessed using the same indexing as any lists, but it's safer to use the accessors for each item: ```{r, access} ## use name head(i$dates) head(get_dates(i)) ``` In the following sections, we examine each of the components of the object. ## `$dates` The `$dates` component contains a vector for all the dates for which incidence have been computed, in the format of the input dataset (e.g. `Date`, `numeric`, `integer`). ```{r, dates1} date_bins <- get_dates(i) class(date_bins) class(dat) date_bins ``` The dates correspond to the lower bounds of the time intervals used as bins for the incidence. Bins always include the lower bound and exclude the upper bound. In the example provided above, this means that the first bin counts events that happened at day 5-6, the second bin counts events from 7-8, etc. Note that if we had actual `Date`-class dates, they would be returned as dates ```{r date-dates1} dat_Date <- as.Date("2018-10-31") + dat head(dat_Date) i.date <- incidence(dat_Date, interval = 2, group = sex) i.date get_dates(i.date) class(get_dates(i.date)) ``` These can be converted to integers, counting the number of days from the first date. ```{r get-dates-integer} get_dates(i.date, count_days = TRUE) get_dates(i, count_days = TRUE) ``` To facilitate modelling, it's also possible to get the center of the interval by using the `position = "center"` argument: ```{r get-dates-center} get_dates(i.date, position = "center") get_dates(i.date, position = "center", count_days = TRUE) ``` ## `$counts` The `$counts` component contains the actual incidence, i.e. counts of events for the defined bins. It is a `matrix` of `integers` where rows correspond to time intervals, with one column for each group for which incidence is computed (a single, unnamed column if no groups were provided). If groups were provided, columns are named after the groups. We illustrate the difference comparing the two objects `i` and `i.sex`: ```{r, counts1} counts <- get_counts(i) class(counts) storage.mode(counts) counts get_counts(i.sex) ``` You can see the dimensions of the incidence object by using `dim()`, `ncol()`, and `nrow()`, which returns the dimensions of the counts matrix: ```{r counts1.1} dim(get_counts(i.sex)) dim(i.sex) nrow(i.sex) # number of date bins ncol(i.sex) # number of groups ``` There are also accessors for handling groups: ```{r groups} # Number of groups ncol(i.sex) ncol(i) # Names of groups group_names(i.sex) group_names(i) # You can also rename the groups group_names(i.sex) <- c("F", "M") group_names(i.sex) ``` Note that a `data.frame` containing *dates* and *counts* can be obtained using `as.data.frame`: ```{r, as.data.frame} ## basic conversion as.data.frame(i) as.data.frame(i.sex) ## long format for ggplot2 as.data.frame(i.sex, long = TRUE) ``` Note that `incidence` has an argument called `na_as_group` which is `TRUE` by default, which will pool all missing groups into a separate group, in which case it will be a separate column in `$counts`. ## `$timespan` The `$timespan` component stores the length of the time period covered by the object: ```{r, timespan} get_timespan(i) print(date_range <- range(get_dates(i))) diff(date_range) + 1 ``` ## `$interval` The `$interval` component contains the length of the time interval for the bins: ```{r, interval} get_interval(i) diff(get_dates(i)) ``` ## `$n` The `$n` component stores the total number of events in the data: ```{r, n} get_n(i) ``` Note that to obtain the number of cases by groups, one can use: ```{r, n2} colSums(get_counts(i.sex)) ``` ## `$weeks` The `$weeks` component is optional, and used to store [aweek](https://www.repidemicsconsortium.org/aweek/) objects whenever they have been used. Weeks are used by default when weekly incidence is computed from dates (see argument `standard` in `?incidence`). ```{r, isoweek} library(outbreaks) dat <- ebola_sim$linelist$date_of_onset i.7 <- incidence(dat, "1 epiweek", standard = TRUE) i.7 i.7$weeks ``` Because `$weeks` is an optional element, it does not have a dedicated accessor. If the element is not present, attempting to access it will result in a `NULL`: ```{r isoweek-null} i$weeks ``` Both dates and weeks are returned when converting an `incidence` object to `data.frame`: ```{r, isoweek3} head(as.data.frame(i.7)) ``` incidence/inst/doc/overview.html0000644000176200001440000260743514626316445016505 0ustar liggesusers Overview of the incidence package

Overview of the incidence package

Thibaut Jombart, Zhian N. Kamvar

2024-05-31

incidence implements functions and classes to compute, handle, visualise and model incidences from dates data. This vignette provides an overview of current features. It largely reproduces the content of REAME.md.


Installing the package

To install the current stable, CRAN version of the package, type:

install.packages("incidence")

To benefit from the latest features and bug fixes, install the development, github version of the package using:

devtools::install_github("reconhub/incidence")

Note that this requires the package devtools installed.


What does it do?

The main functions of the package include:

  • incidence: compute incidence from dates in various formats; any fixed time interval can be used; the returned object is an instance of the (S3) class incidence.

  • plot: this method (see ?plot.incidence for details) plots incidence objects, and can also add predictions of the model(s) contained in an incidence_fit object (or a list of such objects).

  • fit: fit one or two exponential models (i.e. linear regression on log-incidence) to an incidence object; two models are calibrated only if a date is provided to split the time series in two (argument split); this is typically useful to model the two phases of exponential growth, and decrease of an outbreak; each model returned is an instance of the (S3) class incidence_fit, each of which contains various useful information (e.g. growth rate r, doubling/halving time, predictions and confidence intervals); results can be plotted using plot, or added to an existing uncudence plot using the piping-friendly function add_incidence_fit.

  • fit_optim_split: finds the optimal date to split the time series in two, typically around the peak of the epidemic.

  • [: lower-level subsetting of incidence objects, permitting to specify which dates and groups to retain; uses a syntax similar to matrices, i.e. x[i, j], where x is the incidence object, i a subset of dates, and j a subset of groups.

  • subset: subset an incidence object by specifying a time window.

  • pool: pool incidence from different groups into one global incidence time series.

  • cumulate: computes cumulative incidence over time from and incidence object.

  • as.data.frame: converts an incidence object into a data.frame containing dates and incidence values.

  • bootstrap: generates a bootstrapped incidence object by re-sampling, with replacement, the original dates of events.

  • find_peak: locates the peak time of the epicurve.

  • estimate_peak: uses bootstrap to estimate the peak time (and related confidence interval) of a partially observed outbreak.

Worked example: simulated Ebola outbreak

Loading the data

This example uses the simulated Ebola Virus Disease (EVD) outbreak from the package outbreaks. We will compute incidence for various time steps, calibrate two exponential models around the peak of the epidemic, and analyse the results.

First, we load the data:

library(outbreaks)
library(ggplot2)
library(incidence)

dat <- ebola_sim$linelist$date_of_onset
class(dat)
#> [1] "Date"
head(dat)
#> [1] "2014-04-07" "2014-04-15" "2014-04-21" "2014-04-27" "2014-04-26"
#> [6] "2014-04-25"

Computing and plotting incidence

We compute the daily incidence:

i <- incidence(dat)
i
#> <incidence object>
#> [5888 cases from days 2014-04-07 to 2015-04-30]
#> 
#> $counts: matrix with 389 rows and 1 columns
#> $n: 5888 cases in total
#> $dates: 389 dates marking the left-side of bins
#> $interval: 1 day
#> $timespan: 389 days
#> $cumulative: FALSE
plot(i)

The daily incidence is quite noisy, but we can easily compute other incidence using larger time intervals:

# weekly, starting on Monday (ISO week, default)
i.7 <- incidence(dat, interval = "1 week")
plot(i.7)


# semi-weekly, starting on Saturday
i.14 <- incidence(dat, interval = "2 saturday weeks")
plot(i.14, border = "white")


## monthly
i.month <- incidence(dat, interval = "1 month")
plot(i.month, border = "white")

incidence can also compute incidence by specified groups using the groups argument. For instance, we can compute incidence by gender:

i.7.sex <- incidence(dat, interval = "1 week", groups = ebola_sim$linelist$gender)
i.7.sex
#> <incidence object>
#> [5888 cases from days 2014-04-07 to 2015-04-27]
#> [5888 cases from ISO weeks 2014-W15 to 2015-W18]
#> [2 groups: f, m]
#> 
#> $counts: matrix with 56 rows and 2 columns
#> $n: 5888 cases in total
#> $dates: 56 dates marking the left-side of bins
#> $interval: 1 week
#> $timespan: 386 days
#> $cumulative: FALSE
plot(i.7.sex, stack = TRUE, border = "grey")

We can do the same for hospitals, using the ‘clean’ version of the dataset, with some customization of the legend:

i.7.hosp <- with(ebola_sim_clean$linelist, 
     incidence(date_of_onset, interval = "week", groups = hospital))
i.7.hosp
#> <incidence object>
#> [5829 cases from days 2014-04-07 to 2015-04-27]
#> [5829 cases from ISO weeks 2014-W15 to 2015-W18]
#> [6 groups: Connaught Hospital, Military Hospital, other, Princess Christian Maternity Hospital (PCMH), Rokupa Hospital, NA]
#> 
#> $counts: matrix with 56 rows and 6 columns
#> $n: 5829 cases in total
#> $dates: 56 dates marking the left-side of bins
#> $interval: 1 week
#> $timespan: 386 days
#> $cumulative: FALSE
head(get_counts(i.7.hosp))
#>      Connaught Hospital Military Hospital other
#> [1,]                  0                 1     0
#> [2,]                  1                 0     0
#> [3,]                  0                 0     3
#> [4,]                  1                 0     0
#> [5,]                  3                 5     1
#> [6,]                  2                 4     5
#>      Princess Christian Maternity Hospital (PCMH) Rokupa Hospital NA
#> [1,]                                            0               0  0
#> [2,]                                            0               0  0
#> [3,]                                            0               0  2
#> [4,]                                            1               1  1
#> [5,]                                            1               1  1
#> [6,]                                            1               1  4
plot(i.7.hosp, stack=TRUE) + 
    theme(legend.position= "top") + 
    labs(fill="")

Handling incidence objects

incidence objects can be manipulated easily. The [ operator implements subsetting of dates (first argument) and groups (second argument). For instance, to keep only the peak of the distribution:

i[100:250]
#> <incidence object>
#> [4103 cases from days 2014-07-15 to 2014-12-12]
#> 
#> $counts: matrix with 151 rows and 1 columns
#> $n: 4103 cases in total
#> $dates: 151 dates marking the left-side of bins
#> $interval: 1 day
#> $timespan: 151 days
#> $cumulative: FALSE
plot(i[100:250])

Or to keep every other week:

i.7[c(TRUE,FALSE)]
#> <incidence object>
#> [2891 cases from days 2014-04-07 to 2015-04-20]
#> [2891 cases from ISO weeks 2014-W15 to 2015-W17]
#> 
#> $counts: matrix with 28 rows and 1 columns
#> $n: 2891 cases in total
#> $dates: 28 dates marking the left-side of bins
#> $interval: 1 week
#> $timespan: 379 days
#> $cumulative: FALSE
plot(i.7[c(TRUE,FALSE)])

Some temporal subsetting can be even simpler using subset, which permits to retain data within a specified time window:

i.tail <- subset(i, from=as.Date("2015-01-01"))
i.tail
#> <incidence object>
#> [1205 cases from days 2015-01-01 to 2015-04-30]
#> 
#> $counts: matrix with 120 rows and 1 columns
#> $n: 1205 cases in total
#> $dates: 120 dates marking the left-side of bins
#> $interval: 1 day
#> $timespan: 120 days
#> $cumulative: FALSE
plot(i.tail, border="white")

Subsetting groups can also matter. For instance, let’s try and visualise the incidence based on onset of symptoms by outcome:

i.7.outcome <- incidence(dat, 7, groups=ebola_sim$linelist$outcome)
i.7.outcome
#> <incidence object>
#> [5888 cases from days 2014-04-07 to 2015-04-27]
#> [5888 cases from ISO weeks 2014-W15 to 2015-W18]
#> [3 groups: Death, Recover, NA]
#> 
#> $counts: matrix with 56 rows and 3 columns
#> $n: 5888 cases in total
#> $dates: 56 dates marking the left-side of bins
#> $interval: 7 days
#> $timespan: 386 days
#> $cumulative: FALSE
plot(i.7.outcome, stack = TRUE, border = "grey")

By default, incidence treats missing data (NA) as a separate group (see argument na_as_group). We could disable this to retain only known outcomes, but alternatively we can simply subset the object to exclude the last (3rd) group:

i.7.outcome[,1:2]
#> <incidence object>
#> [4565 cases from days 2014-04-07 to 2015-04-27]
#> [4565 cases from ISO weeks 2014-W15 to 2015-W18]
#> [2 groups: Death, Recover]
#> 
#> $counts: matrix with 56 rows and 2 columns
#> $n: 4565 cases in total
#> $dates: 56 dates marking the left-side of bins
#> $interval: 7 days
#> $timespan: 386 days
#> $cumulative: FALSE
plot(i.7.outcome[,1:2], stack = TRUE, border = "grey")

Groups can also be collapsed into a single time series using pool:

i.pooled <- pool(i.7.outcome)
i.pooled
#> <incidence object>
#> [5888 cases from days 2014-04-07 to 2015-04-27]
#> [5888 cases from ISO weeks 2014-W15 to 2015-W18]
#> 
#> $counts: matrix with 56 rows and 1 columns
#> $n: 5888 cases in total
#> $dates: 56 dates marking the left-side of bins
#> $interval: 7 days
#> $timespan: 386 days
#> $cumulative: FALSE
identical(i.7$counts, i.pooled$counts)
#> [1] TRUE

Modelling incidence

Incidence data, excluding zeros, can be modelled using log-linear regression of the form: log(y) = r x t + b

where y is the incidence, r is the growth rate, t is the number of days since a specific point in time (typically the start of the outbreak), and b is the intercept.

Such model can be fitted to any incidence object using fit. Of course, a single log-linear model is not sufficient for modelling our epidemic curve, as there is clearly an growing and a decreasing phase. As a start, we can calibrate a model on the first 20 weeks of the epidemic:

plot(i.7[1:20])

early.fit <- fit(i.7[1:20])
early.fit
#> <incidence_fit object>
#> 
#> $model: regression of log-incidence over time
#> 
#> $info: list containing the following items:
#>   $r (daily growth rate):
#> [1] 0.03175771
#> 
#>   $r.conf (confidence interval):
#>           2.5 %     97.5 %
#> [1,] 0.02596229 0.03755314
#> 
#>   $doubling (doubling time in days):
#> [1] 21.8261
#> 
#>   $doubling.conf (confidence interval):
#>         2.5 %   97.5 %
#> [1,] 18.45777 26.69823
#> 
#>   $pred: data.frame of incidence predictions (20 rows, 5 columns)

The resulting objects (known as incidence_fit objects) can be plotted, in which case the prediction and its confidence interval is displayed:

plot(early.fit)

However, a better way to display these predictions is adding them to the incidence plot using the argument fit:

plot(i.7[1:20], fit = early.fit)

In this case, we would ideally like to fit two models, before and after the peak of the epidemic. This is possible using the following approach, if you know what date to use to split the data in two phases:

fit.both <- fit(i.7, split=as.Date("2014-10-15"))
fit.both
#> <list of incidence_fit objects>
#> 
#> attr(x, 'locations'): list of vectors with the locations of each incidence_fit object
#> 
#> 'before'
#> 'after'
#> 
#> $model: regression of log-incidence over time
#> 
#> $info: list containing the following items:
#>   $r (daily growth rate):
#>      before       after 
#>  0.02741985 -0.01014465 
#> 
#>   $r.conf (confidence interval):
#>              2.5 %       97.5 %
#> before  0.02407933  0.030760379
#> after  -0.01127733 -0.009011981
#> 
#>   $doubling (doubling time in days):
#>   before 
#> 25.27902 
#> 
#>   $doubling.conf (confidence interval):
#>           2.5 %   97.5 %
#> before 22.53377 28.78598
#> 
#>   $halving (halving time in days):
#>    after 
#> 68.32636 
#> 
#>   $halving.conf (confidence interval):
#>          2.5 %   97.5 %
#> after 61.46379 76.91397
#> 
#>   $pred: data.frame of incidence predictions (56 rows, 6 columns)
plot(i.7, fit=fit.both)

This is much better, but the splitting date is not completely optimal. To look for the best possible splitting date (i.e. the one maximizing the average fit of both models), we use:

best.fit <- fit_optim_split(i.7)
best.fit
#> $df
#>         dates   mean.R2
#> 1  2014-08-04 0.7650406
#> 2  2014-08-11 0.8203351
#> 3  2014-08-18 0.8598316
#> 4  2014-08-25 0.8882682
#> 5  2014-09-01 0.9120857
#> 6  2014-09-08 0.9246023
#> 7  2014-09-15 0.9338797
#> 8  2014-09-22 0.9339813
#> 9  2014-09-29 0.9333246
#> 10 2014-10-06 0.9291131
#> 11 2014-10-13 0.9232523
#> 12 2014-10-20 0.9160439
#> 13 2014-10-27 0.9071665
#> 
#> $split
#> [1] "2014-09-22"
#> 
#> $fit
#> <list of incidence_fit objects>
#> 
#> attr(x, 'locations'): list of vectors with the locations of each incidence_fit object
#> 
#> 'before'
#> 'after'
#> 
#> $model: regression of log-incidence over time
#> 
#> $info: list containing the following items:
#>   $r (daily growth rate):
#>      before       after 
#>  0.02982209 -0.01016191 
#> 
#>   $r.conf (confidence interval):
#>              2.5 %       97.5 %
#> before  0.02608945  0.033554736
#> after  -0.01102526 -0.009298561
#> 
#>   $doubling (doubling time in days):
#>   before 
#> 23.24274 
#> 
#>   $doubling.conf (confidence interval):
#>           2.5 %  97.5 %
#> before 20.65721 26.5681
#> 
#>   $halving (halving time in days):
#>    after 
#> 68.21031 
#> 
#>   $halving.conf (confidence interval):
#>          2.5 %   97.5 %
#> after 62.86899 74.54349
#> 
#>   $pred: data.frame of incidence predictions (57 rows, 6 columns)
#> 
#> $plot

plot(i.7, fit=best.fit$fit)

These models are very good approximation of these data, showing a doubling time of 23.2 days during the first phase, and a halving time of 68.2 days during the second. To access these parameters, you can use the get_info() function.

The possible parameters are:

  • “r”, the daily growth rate
  • “doubling” the rate of doubling in days (if “r” is positive)
  • “halving” the rate of halving in days (if “r” is negative)
  • “pred” a data frame of incidence predictions

For “r”, “doubling”, and “halving”, you can also add “.conf” to get the confidence intervals. Here’s how you can get the doubling and halving times of the above epi curve:

get_info(best.fit$fit, "doubling")      # doubling time
#>   before 
#> 23.24274
get_info(best.fit$fit, "doubling.conf") # confidence interval
#>           2.5 %  97.5 %
#> before 20.65721 26.5681
get_info(best.fit$fit, "halving")       
#>    after 
#> 68.21031
get_info(best.fit$fit, "halving.conf")       
#>          2.5 %   97.5 %
#> after 62.86899 74.54349

Note that fit will also take groups into account if incidence has been computed for several groups:

best.fit2 <- fit_optim_split(i.7.sex)
best.fit2
#> $df
#>         dates   mean.R2 groups
#> 1  2014-08-04 0.7546016      f
#> 2  2014-08-11 0.8096672      f
#> 3  2014-08-18 0.8513743      f
#> 4  2014-08-25 0.8864424      f
#> 5  2014-09-01 0.9165063      f
#> 6  2014-09-08 0.9270248      f
#> 7  2014-09-15 0.9345352      f
#> 8  2014-09-22 0.9350323      f
#> 9  2014-09-29 0.9339121      f
#> 10 2014-10-06 0.9288956      f
#> 11 2014-10-13 0.9226037      f
#> 12 2014-10-20 0.9122727      f
#> 13 2014-10-27 0.9027890      f
#> 14 2014-08-04 0.7566712      m
#> 15 2014-08-11 0.8164693      m
#> 16 2014-08-18 0.8567850      m
#> 17 2014-08-25 0.8820669      m
#> 18 2014-09-01 0.9006668      m
#> 19 2014-09-08 0.9166004      m
#> 20 2014-09-15 0.9271862      m
#> 21 2014-09-22 0.9263339      m
#> 22 2014-09-29 0.9260695      m
#> 23 2014-10-06 0.9216350      m
#> 24 2014-10-13 0.9144120      m
#> 25 2014-10-20 0.9077086      m
#> 26 2014-10-27 0.8966333      m
#> 
#> $plot

#> 
#> $split
#>            f            m 
#> "2014-09-22" "2014-09-15" 
#> 
#> $fit
#> <list of incidence_fit objects>
#> 
#> attr(x, 'locations'): list of vectors with the locations of each incidence_fit object
#> 
#> 'f', 'before'
#> 'm', 'before'
#> 'f', 'after'
#> 'm', 'after'
#> 
#> $model: regression of log-incidence over time
#> 
#> $info: list containing the following items:
#>   $r (daily growth rate):
#>    f_before    m_before     f_after     m_after 
#>  0.02570604  0.02883607 -0.01002297 -0.01038307 
#> 
#>   $r.conf (confidence interval):
#>                2.5 %       97.5 %
#> f_before  0.02289333  0.028518743
#> m_before  0.02502254  0.032649606
#> f_after  -0.01102735 -0.009018595
#> m_after  -0.01138910 -0.009377034
#> 
#>   $doubling (doubling time in days):
#> f_before m_before 
#> 26.96437 24.03750 
#> 
#>   $doubling.conf (confidence interval):
#>             2.5 %   97.5 %
#> f_before 24.30497 30.27725
#> m_before 21.22988 27.70091
#> 
#>   $halving (halving time in days):
#>  f_after  m_after 
#> 69.15586 66.75746 
#> 
#>   $halving.conf (confidence interval):
#>            2.5 %   97.5 %
#> f_after 62.85711 76.85756
#> m_after 60.86059 73.91966
#> 
#>   $pred: data.frame of incidence predictions (111 rows, 7 columns)
plot(i.7.sex, fit=best.fit2$fit)
#> Scale for colour is already present.
#> Adding another scale for colour, which will replace the existing scale.
#> Scale for colour is already present.
#> Adding another scale for colour, which will replace the existing scale.
#> Scale for colour is already present.
#> Adding another scale for colour, which will replace the existing scale.
#> Scale for colour is already present.
#> Adding another scale for colour, which will replace the existing scale.

Using get_info() on this fit object will return all groups together:

get_info(best.fit2$fit, "doubling")      # doubling time
#> f_before m_before 
#> 26.96437 24.03750
get_info(best.fit2$fit, "doubling.conf") # confidence interval
#>             2.5 %   97.5 %
#> f_before 24.30497 30.27725
#> m_before 21.22988 27.70091
get_info(best.fit2$fit, "halving")       
#>  f_after  m_after 
#> 69.15586 66.75746
get_info(best.fit2$fit, "halving.conf")       
#>            2.5 %   97.5 %
#> f_after 62.85711 76.85756
#> m_after 60.86059 73.91966
incidence/inst/doc/incidence_fit_class.R0000644000176200001440000000756714626316442020037 0ustar liggesusers## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width=7, fig.height=5 ) ## ----fit_dates---------------------------------------------------------------- library(outbreaks) library(incidence) dat <- ebola_sim$linelist$date_of_onset i <- incidence(dat, interval = "week") i f <- fit(i[1:20]) f plot(i, fit = f) ## ----get_info----------------------------------------------------------------- get_info(f, "r") get_info(f, "r.conf") get_info(f, "doubling.conf") ## ----create_model------------------------------------------------------------- # ensure all dates have at least one incidence i2 <- i[1:20] i2 <- i2[apply(get_counts(i2), 1, min) > 0] df <- as.data.frame(i2, long = TRUE) df$dates.x <- get_dates(i2, position = "center", count_days = TRUE) head(df) lm1 <- stats::lm(log(counts) ~ dates.x, data = df) lm1 ## ----fit_model---------------------------------------------------------------- all.equal(f$model, lm1) ## ----make_info---------------------------------------------------------------- r <- stats::coef(lm1)["dates.x"] r.conf <- stats::confint(lm1, "dates.x", 0.95) new.data <- data.frame(dates.x = sort(unique(lm1$model$dates.x))) pred <- exp(stats::predict(lm1, newdata = new.data, interval = "confidence", level = 0.95)) pred <- cbind.data.frame(new.data, pred) info_list <- list( r = r, r.conf = r.conf, doubling = log(2) / r, doubling.conf = log(2) / r.conf, pred = pred ) info_list ## ----combine------------------------------------------------------------------ origin <- min(get_dates(i2)) info_list$pred$dates <- origin + info_list$pred$dates.x the_fit <- list( lm = lm1, info = info_list, origin = min(get_dates(i2)) ) class(the_fit) <- "incidence_fit" the_fit plot(i, fit = the_fit) ## ----nest--------------------------------------------------------------------- l <- list(a = list(b = 1, c = 2),d = list(e = list(f = 3, g = 4), h = 5)) str(l) l[[c("a", "b")]] l[[c("d", "e", "f")]] ## ----incidence_fit_list------------------------------------------------------- fl <- fit_optim_split(i) fl$fit plot(i, fit = fl$fit) ## ----incidence_fit_list_str--------------------------------------------------- str(fl$fit, max.level = 2) ## ----incidence_fit_methods---------------------------------------------------- methods(class = "incidence_fit_list") ## ----get_info_incidence_fit_list---------------------------------------------- get_info(fl$fit, "r") get_info(fl$fit, "r.conf") ## ----get_doubling------------------------------------------------------------- get_info(fl$fit, "doubling.conf") get_info(fl$fit, "doubling.conf", na.rm = FALSE) ## ----incidence_by_gender------------------------------------------------------ gen <- ebola_sim$linelist$gender ig <- incidence(dat, interval = "week", group = gen) plot(ig, border = "grey98") ## ----fit_gender--------------------------------------------------------------- fg <- fit_optim_split(ig) plot(ig, fit = fg$fit, border = "grey98", stack = FALSE) ## ----fit_gender_print--------------------------------------------------------- fg$fit str(fg$fit, max.level = 3) ## ----get_info_gender---------------------------------------------------------- get_info(fg$fit, "r.conf") ## ----get_fit------------------------------------------------------------------ str(get_fit(fg$fit), max.level = 2) ## ----incidence_fit_listify---------------------------------------------------- print(locs <- attributes(fg$fit)$locations) for (i in seq_along(locs)) { locs[[i]] <- c("fit", locs[[i]]) } print(locs) fg.ifl <- fg attributes(fg.ifl)$locations<- locs class(fg.ifl) <- "incidence_fit_list" ## ----new_fit_list_print------------------------------------------------------- fg.ifl ## ----list_stuff--------------------------------------------------------------- str(fg.ifl, max.level = 1) fg.ifl$split fg.ifl$df fg.ifl$plot incidence/inst/CITATION0000644000176200001440000000362514621110504014305 0ustar liggesuserscitHeader("To cite the incidence package in publications use:") bibentry(bibtype = "Article", title = "Epidemic curves made easy using the R package incidence [version 1; referees: awaiting peer review]", author = c(as.person("Zhian N. Kamvar"), as.person("Jun Cai"), as.person("Juliet R.C. Pulliam"), as.person("Jakob Schumacher"), as.person("Thibaut Jombart")), journal = "F1000Research", year = "2019", volume = "8", number = "139", url = "https://doi.org/10.12688/f1000research.18002.1", textVersion = paste("Kamvar ZN, Cai J, Pulliam JRC, Schumacher J, Jombart T.", "Epidemic curves made easy using the R package incidence [version 1; referees: awaiting peer review].", "F1000Research 2019, 8:139.", "URL https://doi.org/10.12688/f1000research.18002.1.") ) year = sub('.*(2[[:digit:]]{3})-.*', '\\1', meta$Date, perl = TRUE) vers = paste('R package version', meta$Version) bibentry( bibtype = 'Manual', title = paste('incidence:', meta$Title), author = c(as.person("Thibaut Jombart"), as.person("Zhian N. Kamvar"), as.person("Rich FitzJohn"), as.person("Jun Cai"), as.person("Sangeeta Bhatia"), as.person("Jakob Schumacher"), as.person("Juliet R.C. Pulliam")), year = year, note = vers, url = "https://doi.org/10.5281/zenodo.2584018", textVersion = paste('Thibaut Jombart, Zhian N. Kamvar, Rich FitzJohn, Jun Cai, ', 'Sangeeta Bhatia, Jakob Schumacher and Juliet R.C. Pulliam (', year, '). incidence: ', meta$Title, '. ', vers, '. URL https://doi.org/10.5281/zenodo.2584018', sep = ''), header = "To cite the specific version of incidence package in publications use:" ) incidence/inst/WORDLIST0000644000176200001440000000061514621104516014344 0ustar liggesusersanalyse behaviour colours conf csv customisation customisations Customise devtools epi epicurve epicurves EPIET EVD Fitzjohn ggplot github grey Höhle http iso ISOweek isoweeks Jombart Kamvar knitr modelled modelling Modelling POSIXct pre pred Programme randomised repidemicsconsortium rmarkdown Thibaut timespan toc VignetteEncoding VignetteEngine VignetteIndexEntry visualise www Www yyyy Zhian