From cca8c86950b2f5f762bda24e9a3877a4f3bc58a5 Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Fri, 19 Jan 2024 21:54:14 +0000 Subject: [PATCH 1/9] add option to accumulate observations --- R/create.R | 1 + R/opts.R | 4 ++++ inst/stan/data/observation_model.stan | 1 + inst/stan/estimate_infections.stan | 4 ++-- inst/stan/functions/observation_model.stan | 27 +++++++++++++++++----- man/obs_opts.Rd | 1 + 6 files changed, 30 insertions(+), 8 deletions(-) diff --git a/R/create.R b/R/create.R index 1b73f26e9..547b6906d 100644 --- a/R/create.R +++ b/R/create.R @@ -397,6 +397,7 @@ create_obs_model <- function(obs = obs_opts(), dates) { week_effect = ifelse(obs$week_effect, obs$week_length, 1), obs_weight = obs$weight, obs_scale = as.numeric(length(obs$scale) != 0), + accumulate = obs$accumulate, likelihood = as.numeric(obs$likelihood), return_likelihood = as.numeric(obs$return_likelihood) ) diff --git a/R/opts.R b/R/opts.R index 9c02cbaa3..69ac83f06 100644 --- a/R/opts.R +++ b/R/opts.R @@ -471,11 +471,14 @@ obs_opts <- function(family = "negbin", week_effect = TRUE, week_length = 7, scale = list(), + na = "missing", likelihood = TRUE, return_likelihood = FALSE) { if (length(phi) != 2 || !is.numeric(phi)) { stop("phi be numeric and of length two") } + na <- arg_match(na, values = c("missing", "accumulate")) + obs <- list( family = arg_match(family, values = c("poisson", "negbin")), phi = phi, @@ -483,6 +486,7 @@ obs_opts <- function(family = "negbin", week_effect = week_effect, week_length = week_length, scale = scale, + accumulate = as.integer(na == "accumulate"), likelihood = likelihood, return_likelihood = return_likelihood ) diff --git a/inst/stan/data/observation_model.stan b/inst/stan/data/observation_model.stan index 0ce9ef3bb..671004ef4 100644 --- a/inst/stan/data/observation_model.stan +++ b/inst/stan/data/observation_model.stan @@ -9,5 +9,6 @@ real obs_weight; // weight given to observation in log density int likelihood; // Should the likelihood be included in the model int return_likelihood; // Should the likehood be returned by the model + int accumulate; // Should missing values be accumulated int trunc_id; // id of truncation int delay_id; // id of delay diff --git a/inst/stan/estimate_infections.stan b/inst/stan/estimate_infections.stan index c1ac8c63e..7eb128b2d 100644 --- a/inst/stan/estimate_infections.stan +++ b/inst/stan/estimate_infections.stan @@ -148,8 +148,8 @@ model { // observed reports from mean of reports (update likelihood) if (likelihood) { report_lp( - cases, obs_reports[cases_time], rep_phi, phi_mean, phi_sd, model_type, - obs_weight + cases, cases_time, obs_reports, rep_phi, phi_mean, phi_sd, model_type, + obs_weight, accumulate ); } } diff --git a/inst/stan/functions/observation_model.stan b/inst/stan/functions/observation_model.stan index ed3364ae6..157afe914 100644 --- a/inst/stan/functions/observation_model.stan +++ b/inst/stan/functions/observation_model.stan @@ -51,22 +51,37 @@ void truncation_lp(array[] real truncation_mean, array[] real truncation_sd, } } // update log density for reported cases -void report_lp(array[] int cases, vector reports, +void report_lp(array[] int cases, array[] int cases_time, vector reports, array[] real rep_phi, real phi_mean, real phi_sd, - int model_type, real weight) { + int model_type, real weight, int accumulate) { + int n = num_elements(cases); // number of observations + vector[n] obs_reports; // reports at observation time + if (accumulate) { + int t = num_elements(reports); + int current_obs = 1; + obs_reports = rep_vector(0, n); + for (i in 1:t) { + obs_reports[current_obs] += reports[i]; + if (i == cases_time[current_obs]) { + current_obs += 1; + } + } + } else { + obs_reports = reports[cases_time]; + } if (model_type) { real dispersion = 1 / pow(rep_phi[model_type], 2); rep_phi[model_type] ~ normal(phi_mean, phi_sd) T[0,]; if (weight == 1) { - cases ~ neg_binomial_2(reports, dispersion); + cases ~ neg_binomial_2(obs_reports, dispersion); } else { - target += neg_binomial_2_lpmf(cases | reports, dispersion) * weight; + target += neg_binomial_2_lpmf(cases | obs_reports, dispersion) * weight; } } else { if (weight == 1) { - cases ~ poisson(reports); + cases ~ poisson(obs_reports); } else { - target += poisson_lpmf(cases | reports) * weight; + target += poisson_lpmf(cases | obs_reports) * weight; } } } diff --git a/man/obs_opts.Rd b/man/obs_opts.Rd index f9c617dd5..5a3d27f72 100644 --- a/man/obs_opts.Rd +++ b/man/obs_opts.Rd @@ -11,6 +11,7 @@ obs_opts( week_effect = TRUE, week_length = 7, scale = list(), + na = "missing", likelihood = TRUE, return_likelihood = FALSE ) From 1707152726d16e99a36bf1c9062d29ff7cea8432 Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Fri, 19 Jan 2024 21:54:43 +0000 Subject: [PATCH 2/9] accumulate in estimate_secondary model --- R/estimate_secondary.R | 1 + inst/stan/estimate_secondary.stan | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/R/estimate_secondary.R b/R/estimate_secondary.R index b29e3d34c..fecbf8b4b 100644 --- a/R/estimate_secondary.R +++ b/R/estimate_secondary.R @@ -174,6 +174,7 @@ estimate_secondary <- function(reports, data <- list( t = nrow(reports), obs = reports$secondary, + obs_time = seq_along(reports$secondary), primary = reports$primary, burn_in = burn_in, seeding_time = 0 diff --git a/inst/stan/estimate_secondary.stan b/inst/stan/estimate_secondary.stan index e2209349b..6cd359a7c 100644 --- a/inst/stan/estimate_secondary.stan +++ b/inst/stan/estimate_secondary.stan @@ -9,6 +9,7 @@ functions { data { int t; // time of observations array[t] int obs; // observed secondary data + array[t] int obs_time; // observed secondary data vector[t] primary; // observed primary data int burn_in; // time period to not use for fitting #include data/secondary.stan @@ -83,8 +84,8 @@ model { } // observed secondary reports from mean of secondary reports (update likelihood) if (likelihood) { - report_lp(obs[(burn_in + 1):t], secondary[(burn_in + 1):t], - rep_phi, phi_mean, phi_sd, model_type, 1); + report_lp(obs[(burn_in + 1):t], obs_time, secondary[(burn_in + 1):t], + rep_phi, phi_mean, phi_sd, model_type, 1, accumulate); } } From 7668d00a9f1b850f4bae56df6e595be64a1dafae Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Fri, 19 Jan 2024 21:55:10 +0000 Subject: [PATCH 3/9] add test for weekly accumulation --- tests/testthat/test-estimate_infections.R | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/testthat/test-estimate_infections.R b/tests/testthat/test-estimate_infections.R index ecb35a2d6..07f682cf0 100644 --- a/tests/testthat/test-estimate_infections.R +++ b/tests/testthat/test-estimate_infections.R @@ -43,6 +43,14 @@ test_that("estimate_infections successfully returns estimates when passed NA val test_estimate_infections(reported_cases_na) }) +test_that("estimate_infections successfully returns estimates when accumulating to weekly", { + skip_on_cran() + reported_cases_weekly <- data.table::copy(reported_cases) + reported_cases_weekly[, confirm := frollsum(confirm, 7)] + reported_cases_weekly <- + reported_cases_weekly[seq(7, nrow(reported_cases_weekly), 7)] + test_estimate_infections(reported_cases_weekly, obs = obs_opts(na = "accumulate")) +}) test_that("estimate_infections successfully returns estimates using no delays", { skip_on_cran() From 5e7d20b7a9a3346db5d902910d42559921bafd9b Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Fri, 19 Jan 2024 21:55:24 +0000 Subject: [PATCH 4/9] check there's data to fit initial growth model --- R/create.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/create.R b/R/create.R index 547b6906d..c04443ef6 100644 --- a/R/create.R +++ b/R/create.R @@ -482,7 +482,7 @@ create_stan_data <- function(reported_cases, seeding_time, is.na(data$prior_infections) || is.null(data$prior_infections), 0, data$prior_infections ) - if (data$seeding_time > 1) { + if (data$seeding_time > 1 & nrow(first_week) > 1) { safe_lm <- purrr::safely(stats::lm) data$prior_growth <- safe_lm(log(confirm) ~ t, data = first_week)[[1]] data$prior_growth <- ifelse(is.null(data$prior_growth), 0, From 6d095c485c112547d680411a77c55a4ea4f6d98f Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Sat, 20 Jan 2024 09:16:07 +0000 Subject: [PATCH 5/9] ignore first observation when accumulating --- inst/stan/functions/observation_model.stan | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/inst/stan/functions/observation_model.stan b/inst/stan/functions/observation_model.stan index 157afe914..ec5822d3b 100644 --- a/inst/stan/functions/observation_model.stan +++ b/inst/stan/functions/observation_model.stan @@ -54,34 +54,41 @@ void truncation_lp(array[] real truncation_mean, array[] real truncation_sd, void report_lp(array[] int cases, array[] int cases_time, vector reports, array[] real rep_phi, real phi_mean, real phi_sd, int model_type, real weight, int accumulate) { - int n = num_elements(cases); // number of observations + int n = num_elements(cases) - accumulate; // number of observations vector[n] obs_reports; // reports at observation time + array[n] int obs_cases; // observed cases at observation time if (accumulate) { int t = num_elements(reports); - int current_obs = 1; + int current_obs = 0; obs_reports = rep_vector(0, n); for (i in 1:t) { - obs_reports[current_obs] += reports[i]; + if (current_obs > 0) { // first observation gets ignored when acucmulating + obs_reports[current_obs] += reports[i]; + } if (i == cases_time[current_obs]) { current_obs += 1; } } + obs_cases = cases[2:(n - 1)]; } else { obs_reports = reports[cases_time]; + obs_cases = cases; } if (model_type) { real dispersion = 1 / pow(rep_phi[model_type], 2); rep_phi[model_type] ~ normal(phi_mean, phi_sd) T[0,]; if (weight == 1) { - cases ~ neg_binomial_2(obs_reports, dispersion); + obs_cases ~ neg_binomial_2(obs_reports, dispersion); } else { - target += neg_binomial_2_lpmf(cases | obs_reports, dispersion) * weight; + target += neg_binomial_2_lpmf( + obs_cases | obs_reports, dispersion + ) * weight; } } else { if (weight == 1) { - cases ~ poisson(obs_reports); + obs_cases ~ poisson(obs_reports); } else { - target += poisson_lpmf(cases | obs_reports) * weight; + target += poisson_lpmf(obs_cases | obs_reports) * weight; } } } From c930384615a7a3e00c86cfc2e43490eca2281bef Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Mon, 22 Jan 2024 09:31:08 +0000 Subject: [PATCH 6/9] document "na" argument --- R/opts.R | 9 +++++++++ man/obs_opts.Rd | 10 ++++++++++ 2 files changed, 19 insertions(+) diff --git a/R/opts.R b/R/opts.R index 69ac83f06..e18e73d88 100644 --- a/R/opts.R +++ b/R/opts.R @@ -446,6 +446,15 @@ gp_opts <- function(basis_prop = 0.2, #' empty a mean (`mean`) and standard deviation (`sd`) needs to be supplied #' defining the normally distributed scaling factor. #' +#' @param na Character. Options are "missing" (the default) and "accumulate". +#' This determines how NA values in the data are interpreted. If set to +#' "missing", any NA values in the observation data set will be interpreted as +#' missing and skipped in the likelihood. If set to "accumulate", modelled +#' observations will be accumulated and added to the next non-NA data point. +#' This can be used to model incidence data that is reported at less than +#' daily intervals. If set to "accumulate", the first data point is not +#' included in the data point but used only to reset modelled observations to +#' zero. #' @param likelihood Logical, defaults to `TRUE`. Should the likelihood be #' included in the model. #' diff --git a/man/obs_opts.Rd b/man/obs_opts.Rd index 5a3d27f72..a62492a31 100644 --- a/man/obs_opts.Rd +++ b/man/obs_opts.Rd @@ -39,6 +39,16 @@ applied to map latent infections (convolved to date of report). If none empty a mean (\code{mean}) and standard deviation (\code{sd}) needs to be supplied defining the normally distributed scaling factor.} +\item{na}{Character. Options are "missing" (the default) and "accumulate". +This determines how NA values in the data are interpreted. If set to +"missing", any NA values in the observation data set will be interpreted as +missing and skipped in the likelihood. If set to "accumulate", modelled +observations will be accumulated and added to the next non-NA data point. +This can be used to model incidence data that is reported at less than +daily intervals. If set to "accumulate", the first data point is not +included in the data point but used only to reset modelled observations to +zero.} + \item{likelihood}{Logical, defaults to \code{TRUE}. Should the likelihood be included in the model.} From 0ddb2d40393ddd2a83e52523214eed1fbee98b29 Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Mon, 22 Jan 2024 09:31:23 +0000 Subject: [PATCH 7/9] add news item --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index d6c62f148..adf62b116 100644 --- a/NEWS.md +++ b/NEWS.md @@ -25,6 +25,7 @@ ## Model changes * Updated the parameterisation of the dispersion term `phi` to be `phi = 1 / sqrt_phi ^ 2` rather than the previous parameterisation `phi = 1 / sqrt(sqrt_phi)` based on the suggested prior [here](https://github.com/stan-dev/stan/wiki/Prior-Choice-Recommendations#story-when-the-generic-prior-fails-the-case-of-the-negative-binomial) and the performance benefits seen in the `epinowcast` package (see [here](https://github.com/epinowcast/epinowcast/blob/8eff560d1fd8305f5fb26c21324b2bfca1f002b4/inst/stan/epinowcast.stan#L314)). By @seabbs in # and reviewed by @sbfnk. +* Added an `na` argument to `obs_opts()` that allows the user to specify whether NA values in the data should be interpreted as missing or accumulated in the next non-NA data point. By @sbfnk. # EpiNow2 1.4.0 From c22a7ab592c54d08600575fc981df835889375af Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Mon, 22 Jan 2024 12:13:26 +0000 Subject: [PATCH 8/9] update obs_opts tests --- tests/testthat/test-create_obs_model.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-create_obs_model.R b/tests/testthat/test-create_obs_model.R index 4829bb0af..4211c7dbe 100644 --- a/tests/testthat/test-create_obs_model.R +++ b/tests/testthat/test-create_obs_model.R @@ -3,10 +3,10 @@ dates <- seq(as.Date("2020-03-15"), by = "days", length.out = 15) test_that("create_obs_model works with default settings", { obs <- create_obs_model(dates = dates) - expect_equal(length(obs), 11) + expect_equal(length(obs), 12) expect_equal(names(obs), c( "model_type", "phi_mean", "phi_sd", "week_effect", "obs_weight", - "obs_scale", "likelihood", "return_likelihood", + "obs_scale", "accumulate", "likelihood", "return_likelihood", "day_of_week", "obs_scale_mean", "obs_scale_sd" )) From 4d186fcbc01cc9b91c9d7d2f167d9214a1519fff Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Mon, 22 Jan 2024 12:14:17 +0000 Subject: [PATCH 9/9] make logical operator scalar --- R/create.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/create.R b/R/create.R index c04443ef6..b4767547b 100644 --- a/R/create.R +++ b/R/create.R @@ -482,7 +482,7 @@ create_stan_data <- function(reported_cases, seeding_time, is.na(data$prior_infections) || is.null(data$prior_infections), 0, data$prior_infections ) - if (data$seeding_time > 1 & nrow(first_week) > 1) { + if (data$seeding_time > 1 && nrow(first_week) > 1) { safe_lm <- purrr::safely(stats::lm) data$prior_growth <- safe_lm(log(confirm) ~ t, data = first_week)[[1]] data$prior_growth <- ifelse(is.null(data$prior_growth), 0,