From cca8c86950b2f5f762bda24e9a3877a4f3bc58a5 Mon Sep 17 00:00:00 2001
From: Sebastian Funk <sebastian.funk@lshtm.ac.uk>
Date: Fri, 19 Jan 2024 21:54:14 +0000
Subject: [PATCH 1/9] add option to accumulate observations

---
 R/create.R                                 |  1 +
 R/opts.R                                   |  4 ++++
 inst/stan/data/observation_model.stan      |  1 +
 inst/stan/estimate_infections.stan         |  4 ++--
 inst/stan/functions/observation_model.stan | 27 +++++++++++++++++-----
 man/obs_opts.Rd                            |  1 +
 6 files changed, 30 insertions(+), 8 deletions(-)
diff --git a/R/create.R b/R/create.R
index 1b73f26e9..547b6906d 100644
--- a/R/create.R
+++ b/R/create.R
@@ -397,6 +397,7 @@ create_obs_model <- function(obs = obs_opts(), dates) {
     week_effect = ifelse(obs$week_effect, obs$week_length, 1),
     obs_weight = obs$weight,
     obs_scale = as.numeric(length(obs$scale) != 0),
+    accumulate = obs$accumulate,
     likelihood = as.numeric(obs$likelihood),
     return_likelihood = as.numeric(obs$return_likelihood)
   )
diff --git a/R/opts.R b/R/opts.R
index 9c02cbaa3..69ac83f06 100644
--- a/R/opts.R
+++ b/R/opts.R
@@ -471,11 +471,14 @@ obs_opts <- function(family = "negbin",
                      week_effect = TRUE,
                      week_length = 7,
                      scale = list(),
+                     na = "missing",
                      likelihood = TRUE,
                      return_likelihood = FALSE) {
   if (length(phi) != 2 || !is.numeric(phi)) {
     stop("phi be numeric and of length two")
   }
+  na <- arg_match(na, values = c("missing", "accumulate"))
+
   obs <- list(
     family = arg_match(family, values = c("poisson", "negbin")),
     phi = phi,
@@ -483,6 +486,7 @@ obs_opts <- function(family = "negbin",
     week_effect = week_effect,
     week_length = week_length,
     scale = scale,
+    accumulate = as.integer(na == "accumulate"),
     likelihood = likelihood,
     return_likelihood = return_likelihood
   )
diff --git a/inst/stan/data/observation_model.stan b/inst/stan/data/observation_model.stan
index 0ce9ef3bb..671004ef4 100644
--- a/inst/stan/data/observation_model.stan
+++ b/inst/stan/data/observation_model.stan
@@ -9,5 +9,6 @@
   real obs_weight;                   // weight given to observation in log density
   int likelihood;                    // Should the likelihood be included in the model
   int return_likelihood; // Should the likehood be returned by the model
+  int accumulate; // Should missing values be accumulated
   int<lower = 0> trunc_id; // id of truncation
   int<lower = 0> delay_id; // id of delay
diff --git a/inst/stan/estimate_infections.stan b/inst/stan/estimate_infections.stan
index c1ac8c63e..7eb128b2d 100644
--- a/inst/stan/estimate_infections.stan
+++ b/inst/stan/estimate_infections.stan
@@ -148,8 +148,8 @@ model {
   // observed reports from mean of reports (update likelihood)
   if (likelihood) {
     report_lp(
-      cases, obs_reports[cases_time], rep_phi, phi_mean, phi_sd, model_type,
-      obs_weight
+      cases, cases_time, obs_reports, rep_phi, phi_mean, phi_sd, model_type,
+      obs_weight, accumulate
     );
   }
 }
diff --git a/inst/stan/functions/observation_model.stan b/inst/stan/functions/observation_model.stan
index ed3364ae6..157afe914 100644
--- a/inst/stan/functions/observation_model.stan
+++ b/inst/stan/functions/observation_model.stan
@@ -51,22 +51,37 @@ void truncation_lp(array[] real truncation_mean, array[] real truncation_sd,
   }
 }
 // update log density for reported cases
-void report_lp(array[] int cases, vector reports,
+void report_lp(array[] int cases, array[] int cases_time, vector reports,
                array[] real rep_phi, real phi_mean, real phi_sd,
-               int model_type, real weight) {
+               int model_type, real weight, int accumulate) {
+  int n = num_elements(cases); // number of observations
+  vector[n] obs_reports; // reports at observation time
+  if (accumulate) {
+    int t = num_elements(reports);
+    int current_obs = 1;
+    obs_reports = rep_vector(0, n);
+    for (i in 1:t) {
+      obs_reports[current_obs] += reports[i];
+      if (i == cases_time[current_obs]) {
+        current_obs += 1;
+      }
+    }
+  } else {
+    obs_reports = reports[cases_time];
+  }
   if (model_type) {
     real dispersion = 1 / pow(rep_phi[model_type], 2); 
     rep_phi[model_type] ~ normal(phi_mean, phi_sd) T[0,];
     if (weight == 1) {
-      cases ~ neg_binomial_2(reports, dispersion);
+      cases ~ neg_binomial_2(obs_reports, dispersion);
     } else {
-      target += neg_binomial_2_lpmf(cases | reports, dispersion) * weight;
+      target += neg_binomial_2_lpmf(cases | obs_reports, dispersion) * weight;
     }
   } else {
     if (weight == 1) {
-      cases ~ poisson(reports);
+      cases ~ poisson(obs_reports);
     } else {
-      target += poisson_lpmf(cases | reports) * weight;
+      target += poisson_lpmf(cases | obs_reports) * weight;
     }
   }
 }
diff --git a/man/obs_opts.Rd b/man/obs_opts.Rd
index f9c617dd5..5a3d27f72 100644
--- a/man/obs_opts.Rd
+++ b/man/obs_opts.Rd
@@ -11,6 +11,7 @@ obs_opts(
   week_effect = TRUE,
   week_length = 7,
   scale = list(),
+  na = "missing",
   likelihood = TRUE,
   return_likelihood = FALSE
 )

From 1707152726d16e99a36bf1c9062d29ff7cea8432 Mon Sep 17 00:00:00 2001
From: Sebastian Funk <sebastian.funk@lshtm.ac.uk>
Date: Fri, 19 Jan 2024 21:54:43 +0000
Subject: [PATCH 2/9] accumulate in estimate_secondary model

---
 R/estimate_secondary.R            | 1 +
 inst/stan/estimate_secondary.stan | 5 +++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/R/estimate_secondary.R b/R/estimate_secondary.R
index b29e3d34c..fecbf8b4b 100644
--- a/R/estimate_secondary.R
+++ b/R/estimate_secondary.R
@@ -174,6 +174,7 @@ estimate_secondary <- function(reports,
   data <- list(
     t = nrow(reports),
     obs = reports$secondary,
+    obs_time = seq_along(reports$secondary),
     primary = reports$primary,
     burn_in = burn_in,
     seeding_time = 0
diff --git a/inst/stan/estimate_secondary.stan b/inst/stan/estimate_secondary.stan
index e2209349b..6cd359a7c 100644
--- a/inst/stan/estimate_secondary.stan
+++ b/inst/stan/estimate_secondary.stan
@@ -9,6 +9,7 @@ functions {
 data {
   int t;                             // time of observations
   array[t] int<lower = 0> obs;             // observed secondary data
+  array[t] int obs_time;             // observed secondary data
   vector[t] primary;                 // observed primary data
   int burn_in;                       // time period to not use for fitting
 #include data/secondary.stan
@@ -83,8 +84,8 @@ model {
    }
   // observed secondary reports from mean of secondary reports (update likelihood)
   if (likelihood) {
-    report_lp(obs[(burn_in + 1):t], secondary[(burn_in + 1):t],
-              rep_phi, phi_mean, phi_sd, model_type, 1);
+    report_lp(obs[(burn_in + 1):t], obs_time, secondary[(burn_in + 1):t],
+              rep_phi, phi_mean, phi_sd, model_type, 1, accumulate);
   }
 }
 

From 7668d00a9f1b850f4bae56df6e595be64a1dafae Mon Sep 17 00:00:00 2001
From: Sebastian Funk <sebastian.funk@lshtm.ac.uk>
Date: Fri, 19 Jan 2024 21:55:10 +0000
Subject: [PATCH 3/9] add test for weekly accumulation

---
 tests/testthat/test-estimate_infections.R | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tests/testthat/test-estimate_infections.R b/tests/testthat/test-estimate_infections.R
index ecb35a2d6..07f682cf0 100644
--- a/tests/testthat/test-estimate_infections.R
+++ b/tests/testthat/test-estimate_infections.R
@@ -43,6 +43,14 @@ test_that("estimate_infections successfully returns estimates when passed NA val
   test_estimate_infections(reported_cases_na)
 })
 
+test_that("estimate_infections successfully returns estimates when accumulating to weekly", {
+  skip_on_cran()
+  reported_cases_weekly <- data.table::copy(reported_cases)
+  reported_cases_weekly[, confirm := frollsum(confirm, 7)]
+  reported_cases_weekly <-
+    reported_cases_weekly[seq(7, nrow(reported_cases_weekly), 7)]
+  test_estimate_infections(reported_cases_weekly, obs = obs_opts(na = "accumulate"))
+})
 
 test_that("estimate_infections successfully returns estimates using no delays", {
   skip_on_cran()

From 5e7d20b7a9a3346db5d902910d42559921bafd9b Mon Sep 17 00:00:00 2001
From: Sebastian Funk <sebastian.funk@lshtm.ac.uk>
Date: Fri, 19 Jan 2024 21:55:24 +0000
Subject: [PATCH 4/9] check there's data to fit initial growth model

---
 R/create.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/create.R b/R/create.R
index 547b6906d..c04443ef6 100644
--- a/R/create.R
+++ b/R/create.R
@@ -482,7 +482,7 @@ create_stan_data <- function(reported_cases, seeding_time,
     is.na(data$prior_infections) || is.null(data$prior_infections),
     0, data$prior_infections
   )
-  if (data$seeding_time > 1) {
+  if (data$seeding_time > 1 & nrow(first_week) > 1) {
     safe_lm <- purrr::safely(stats::lm)
     data$prior_growth <- safe_lm(log(confirm) ~ t, data = first_week)[[1]]
     data$prior_growth <- ifelse(is.null(data$prior_growth), 0,

From 6d095c485c112547d680411a77c55a4ea4f6d98f Mon Sep 17 00:00:00 2001
From: Sebastian Funk <sebastian.funk@lshtm.ac.uk>
Date: Sat, 20 Jan 2024 09:16:07 +0000
Subject: [PATCH 5/9] ignore first observation when accumulating

---
 inst/stan/functions/observation_model.stan | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/inst/stan/functions/observation_model.stan b/inst/stan/functions/observation_model.stan
index 157afe914..ec5822d3b 100644
--- a/inst/stan/functions/observation_model.stan
+++ b/inst/stan/functions/observation_model.stan
@@ -54,34 +54,41 @@ void truncation_lp(array[] real truncation_mean, array[] real truncation_sd,
 void report_lp(array[] int cases, array[] int cases_time, vector reports,
                array[] real rep_phi, real phi_mean, real phi_sd,
                int model_type, real weight, int accumulate) {
-  int n = num_elements(cases); // number of observations
+  int n = num_elements(cases) - accumulate; // number of observations
   vector[n] obs_reports; // reports at observation time
+  array[n] int obs_cases; // observed cases at observation time
   if (accumulate) {
     int t = num_elements(reports);
-    int current_obs = 1;
+    int current_obs = 0;
     obs_reports = rep_vector(0, n);
     for (i in 1:t) {
-      obs_reports[current_obs] += reports[i];
+      if (current_obs > 0) { // first observation gets ignored when acucmulating
+        obs_reports[current_obs] += reports[i];
+      }
       if (i == cases_time[current_obs]) {
         current_obs += 1;
       }
     }
+    obs_cases = cases[2:(n - 1)];
   } else {
     obs_reports = reports[cases_time];
+    obs_cases = cases;
   }
   if (model_type) {
     real dispersion = 1 / pow(rep_phi[model_type], 2); 
     rep_phi[model_type] ~ normal(phi_mean, phi_sd) T[0,];
     if (weight == 1) {
-      cases ~ neg_binomial_2(obs_reports, dispersion);
+      obs_cases ~ neg_binomial_2(obs_reports, dispersion);
     } else {
-      target += neg_binomial_2_lpmf(cases | obs_reports, dispersion) * weight;
+      target += neg_binomial_2_lpmf(
+        obs_cases | obs_reports, dispersion
+      ) * weight;
     }
   } else {
     if (weight == 1) {
-      cases ~ poisson(obs_reports);
+      obs_cases ~ poisson(obs_reports);
     } else {
-      target += poisson_lpmf(cases | obs_reports) * weight;
+      target += poisson_lpmf(obs_cases | obs_reports) * weight;
     }
   }
 }

From c930384615a7a3e00c86cfc2e43490eca2281bef Mon Sep 17 00:00:00 2001
From: Sebastian Funk <sebastian.funk@lshtm.ac.uk>
Date: Mon, 22 Jan 2024 09:31:08 +0000
Subject: [PATCH 6/9] document "na" argument

---
 R/opts.R        |  9 +++++++++
 man/obs_opts.Rd | 10 ++++++++++
 2 files changed, 19 insertions(+)

diff --git a/R/opts.R b/R/opts.R
index 69ac83f06..e18e73d88 100644
--- a/R/opts.R
+++ b/R/opts.R
@@ -446,6 +446,15 @@ gp_opts <- function(basis_prop = 0.2,
 #' empty a mean (`mean`) and standard deviation (`sd`) needs to be supplied
 #' defining the normally distributed scaling factor.
 #'
+#' @param na Character. Options are "missing" (the default) and "accumulate".
+#'   This determines how NA values in the data are interpreted. If set to
+#'   "missing", any NA values in the observation data set will be interpreted as
+#'   missing and skipped in the likelihood. If set to "accumulate", modelled
+#'   observations will be accumulated and added to the next non-NA data point.
+#'   This can be used to model incidence data that is reported at less than
+#'   daily intervals. If set to "accumulate", the first data point is not
+#'   included in the data point but used only to reset modelled observations to
+#'   zero.
 #' @param likelihood Logical, defaults to `TRUE`. Should the likelihood be
 #' included in the model.
 #'
diff --git a/man/obs_opts.Rd b/man/obs_opts.Rd
index 5a3d27f72..a62492a31 100644
--- a/man/obs_opts.Rd
+++ b/man/obs_opts.Rd
@@ -39,6 +39,16 @@ applied to map latent infections (convolved to date of report). If none
 empty a mean (\code{mean}) and standard deviation (\code{sd}) needs to be supplied
 defining the normally distributed scaling factor.}
 
+\item{na}{Character. Options are "missing" (the default) and "accumulate".
+This determines how NA values in the data are interpreted. If set to
+"missing", any NA values in the observation data set will be interpreted as
+missing and skipped in the likelihood. If set to "accumulate", modelled
+observations will be accumulated and added to the next non-NA data point.
+This can be used to model incidence data that is reported at less than
+daily intervals. If set to "accumulate", the first data point is not
+included in the data point but used only to reset modelled observations to
+zero.}
+
 \item{likelihood}{Logical, defaults to \code{TRUE}. Should the likelihood be
 included in the model.}
 

From 0ddb2d40393ddd2a83e52523214eed1fbee98b29 Mon Sep 17 00:00:00 2001
From: Sebastian Funk <sebastian.funk@lshtm.ac.uk>
Date: Mon, 22 Jan 2024 09:31:23 +0000
Subject: [PATCH 7/9] add news item

---
 NEWS.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/NEWS.md b/NEWS.md
index d6c62f148..adf62b116 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -25,6 +25,7 @@
 ## Model changes
 
 * Updated the parameterisation of the dispersion term `phi` to be `phi = 1 / sqrt_phi ^ 2` rather than the previous parameterisation `phi = 1 / sqrt(sqrt_phi)` based on the suggested prior [here](https://github.com/stan-dev/stan/wiki/Prior-Choice-Recommendations#story-when-the-generic-prior-fails-the-case-of-the-negative-binomial) and the performance benefits seen in the `epinowcast` package (see [here](https://github.com/epinowcast/epinowcast/blob/8eff560d1fd8305f5fb26c21324b2bfca1f002b4/inst/stan/epinowcast.stan#L314)). By @seabbs in # and reviewed by @sbfnk.
+* Added an `na` argument to `obs_opts()` that allows the user to specify whether NA values in the data should be interpreted as missing or accumulated in the next non-NA data point. By @sbfnk.
 
 # EpiNow2 1.4.0
 

From c22a7ab592c54d08600575fc981df835889375af Mon Sep 17 00:00:00 2001
From: Sebastian Funk <sebastian.funk@lshtm.ac.uk>
Date: Mon, 22 Jan 2024 12:13:26 +0000
Subject: [PATCH 8/9] update obs_opts tests

---
 tests/testthat/test-create_obs_model.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/testthat/test-create_obs_model.R b/tests/testthat/test-create_obs_model.R
index 4829bb0af..4211c7dbe 100644
--- a/tests/testthat/test-create_obs_model.R
+++ b/tests/testthat/test-create_obs_model.R
@@ -3,10 +3,10 @@ dates <- seq(as.Date("2020-03-15"), by = "days", length.out = 15)
 
 test_that("create_obs_model works with default settings", {
   obs <- create_obs_model(dates = dates)
-  expect_equal(length(obs), 11)
+  expect_equal(length(obs), 12)
   expect_equal(names(obs), c(
     "model_type", "phi_mean", "phi_sd", "week_effect", "obs_weight",
-    "obs_scale", "likelihood", "return_likelihood",
+    "obs_scale", "accumulate", "likelihood", "return_likelihood",
     "day_of_week", "obs_scale_mean",
     "obs_scale_sd"
   ))

From 4d186fcbc01cc9b91c9d7d2f167d9214a1519fff Mon Sep 17 00:00:00 2001
From: Sebastian Funk <sebastian.funk@lshtm.ac.uk>
Date: Mon, 22 Jan 2024 12:14:17 +0000
Subject: [PATCH 9/9] make logical operator scalar

---
 R/create.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/create.R b/R/create.R
index c04443ef6..b4767547b 100644
--- a/R/create.R
+++ b/R/create.R
@@ -482,7 +482,7 @@ create_stan_data <- function(reported_cases, seeding_time,
     is.na(data$prior_infections) || is.null(data$prior_infections),
     0, data$prior_infections
   )
-  if (data$seeding_time > 1 & nrow(first_week) > 1) {
+  if (data$seeding_time > 1 && nrow(first_week) > 1) {
     safe_lm <- purrr::safely(stats::lm)
     data$prior_growth <- safe_lm(log(confirm) ~ t, data = first_week)[[1]]
     data$prior_growth <- ifelse(is.null(data$prior_growth), 0,