Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Day of week effect #89

Merged
merged 18 commits into from
Dec 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ Suggests:
primarycensored,
vdiffr,
knitr,
rmarkdown
rmarkdown,
pkgload
Config/testthat/edition: 3
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

## Features
* Add day of week effect to simulated timeseries (#91)
* Estimate day-of-week effects as a random effect in the fitted model and correct for them in growth rate estimation (#89)

# RtGam v0.2.0

Expand Down
37 changes: 31 additions & 6 deletions R/RtGam.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# nolint start: line_length_linter
#' Fit a generalized additive model to incident cases
#'
#' Incident cases are modeled as a smooth function of time with a generalized
Expand All @@ -8,10 +9,10 @@
#'
#' Incident cases (\eqn{y}) are modeled as smoothly changing over time:
#'
#' \deqn{\text{log}\{E(y)\} = \alpha + f_{\text{global}}(t)}
#' \deqn{\text{log}\{E(y)\} = \alpha + f_{\text{global}}(t) + \omega(\text{wday}(t))}
#'
#' where incidence is negative-binomially distributed and \eqn{f(t)} is a smooth
#' function of time.
#' where incidence is negative-binomially distributed, \eqn{f(t)} is a smooth
#' function of time, and \eqn{\omega(\text{wday}(t))} is a random day-of-week effect.
#'
#' @param cases A vector of non-negative incident case counts occurring on an
#' associated `reference_date`. Missing values (NAs) are not allowed.
Expand All @@ -20,6 +21,13 @@
#' once.
#' @param group The grouping variable for the case/reference-date pair. Not yet
#' implemented and a value other than `NULL` will throw an error.
#' @param day_of_week A boolean or a vector of custom values to be applied to the
#' model as a random effect. If `TRUE`, then `RtGam` will use the parsed
#' `reference_date` values to infer the day of week. If a vector of the same
#' length as `reference_date`, then the user-supplied values are used as the
#' day-of-week effect, overriding the actual day of week. This approach can
#' be used to, for example, adjust for atypical reporting due to a holiday.
#' If `FALSE` no day of week effect is applied.
#' @param k An integer, the _total_ dimension of all the smoothing basis
#' functions. Defaults to `smooth_dim_heuristic(length(cases))`, which picks a
#' reasonable estimate based on the number of provided data points. This total
Expand Down Expand Up @@ -58,6 +66,12 @@
#' * backend: The user-provided `backend` argument
#' * formula: The formula object provided to the model
#' * diagnostics: The quantitative diagnostics of the model fit
#' @references
#' Mellor, Jonathon, et al. "An Application of Nowcasting Methods: Cases of
#' Norovirus during the Winter 2023/2024 in England." medRxiv (2024): 2024-07. \cr
#' Ward, Thomas, et al. "Growth, reproduction numbers and factors affecting the
#' spread of SARS-CoV-2 novel variants of concern in the UK from October 2020
#' to July 2021: a modelling analysis." BMJ open 11.11 (2021): e056636. \cr
#' @export
#' @examples
#' withr::with_seed(12345, {
Expand All @@ -70,9 +84,11 @@
#' )
#' fit <- RtGam(cases, reference_date)
#' fit
# nolint end: line_length_linter
RtGam <- function(cases,
reference_date,
group = NULL,
day_of_week = TRUE,
k = smooth_dim_heuristic(length(cases)),
m = penalty_dim_heuristic(length(unique(reference_date))),
backend = "gam",
Expand All @@ -82,17 +98,25 @@ RtGam <- function(cases,
cases,
reference_date,
group,
day_of_week,
k,
m,
backend
)
reference_date <- validate(cases, reference_date, group, k, m)
reference_date <- validate(cases, reference_date, group, day_of_week, k, m)

df <- dataset_creator(cases, reference_date, group, backend)
df <- dataset_creator(
cases = cases,
reference_date = reference_date,
group = group,
day_of_week = day_of_week,
backend = backend
)
formula <- formula_creator(
k = k,
m = m,
is_grouped = !rlang::is_null(group)
is_grouped = !rlang::is_null(group),
day_of_week = df[["day_of_week"]]
)

fit <- do.call(
Expand All @@ -109,6 +133,7 @@ RtGam <- function(cases,
fit = fit,
df = df,
group = group,
day_of_week = day_of_week,
k = k,
m = m,
backend = backend,
Expand Down
32 changes: 13 additions & 19 deletions R/checkers.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,29 +30,21 @@ check_vector_length <- function(
}

check_vectors_equal_length <- function(
cases,
reference_date,
group,
...,
call = rlang::caller_env()) {
if (rlang::is_null(group)) {
args <- c("cases", "reference_date")
lengths <- c(length(cases), length(reference_date))
all_equal <- length(cases) == length(reference_date)
} else {
args <- c("cases", "reference_date", "group")
lengths <- c(length(cases), length(reference_date), length(group))
all_equal <- all(
length(cases) == length(reference_date),
length(cases) == length(group)
)
}
# Drop NULL elements
raw_args <- list(...)
args <- raw_args[which(sapply(raw_args, \(x) !rlang::is_null(x)))]

if (!all_equal) {
# Check equal length
equal_length <- sapply(args, \(arg) length(arg) == length(reference_date))

# Error if not
args <- names(equal_length)
if (!all(equal_length)) {
cli::cli_abort(
c(
"{.arg {args}} must be the same length",
"i" = "{.arg {args}} are of lengths {.val {lengths}}"
),
"{.arg {args}} is not the same length as {.arg reference_date}",
class = "RtGam_invalid_input",
call = call
)
Expand Down Expand Up @@ -109,13 +101,15 @@ check_required_inputs_provided <- function(
cases,
reference_date,
group,
day_of_week,
k,
m,
backend,
call = rlang::caller_env()) {
rlang::check_required(cases, "cases", call = call)
rlang::check_required(reference_date, "reference_date", call = call)
rlang::check_required(group, "group", call = call)
rlang::check_required(day_of_week, "day_of_week", call = call)
rlang::check_required(k, "k", call = call)
rlang::check_required(m, "m", call = call)

Expand Down
34 changes: 32 additions & 2 deletions R/dataset_creator.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,12 @@
#' @return A dataframe for mgcv
#' @export
#' @keywords internal
dataset_creator <- function(cases, reference_date, group, backend) {
dataset_creator <- function(
cases,
reference_date,
group,
day_of_week,
backend) {
cases_int <- integerify_cases(cases)

timestep <- dates_to_timesteps(
Expand All @@ -21,7 +26,11 @@ dataset_creator <- function(cases, reference_date, group, backend) {
cases = cases_int,
timestep = timestep,
reference_date = reference_date,
group = group
group = group,
day_of_week = set_day_of_week_factor(
day_of_week,
reference_date
)
)

class(dat) <- c(glue::glue("RtGam_{backend}"), class(dat))
Expand Down Expand Up @@ -75,3 +84,24 @@ dates_to_timesteps <- function(reference_date,

(ref_date_int - min_int) / (max_int - min_int)
}

#' Map user input to model-expected format
#' Downstream the type of the return is used as a sentinel for
#' whether to implement a day of week effect. If a factor, then
#' the day of week effect is added to the model. Otherwise, the
#' day of week effect is excluded.
#' @noRd
set_day_of_week_factor <- function(day_of_week, reference_date) {
if (rlang::is_true(day_of_week)) {
as.factor(format(reference_date, "%A"))
} else if (rlang::is_false(day_of_week)) {
rep(FALSE, length(reference_date))
} else if (rlang::is_bare_vector(day_of_week)) {
as.factor(day_of_week)
} else {
# This case shouldn't be reachable by the user
cli::cli_abort(c(
"{.arg day_of_week} has an unexpected type. See {.code ?RtGam()}"
))
}
}
12 changes: 9 additions & 3 deletions R/formula.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
#' @param is_grouped Whether to use a hierarchical model. Not yet supported.
#' @return A formula to be used by [`mgcv::gam()`]
#' @noRd
formula_creator <- function(k, m, is_grouped) {
formula_creator <- function(k, m, is_grouped, day_of_week) {
outcome <- "cases"
intercept <- "1"

Expand All @@ -42,9 +42,15 @@ formula_creator <- function(k, m, is_grouped) {
k = {smooth_basis_dim[['global_trend']]},
bs = 'tp')")
}
# nolint end

f <- glue::glue("{outcome} ~ {intercept} {plus_global_trend}")
if (is.factor(day_of_week)) {
plus_day_of_week <- "+ s(day_of_week, bs = 're')"
} else {
plus_day_of_week <- ""
}

f <- glue::glue("{outcome} ~ {intercept} {plus_global_trend} {plus_day_of_week}")
# nolint end
stats::as.formula(f)
}

Expand Down
Loading
Loading