From 07b3b4e4f83af58040310139b76dd2464261458d Mon Sep 17 00:00:00 2001
From: Glenda Yenni <glenda@weecology.org>
Date: Wed, 18 Sep 2024 19:06:48 -0600
Subject: [PATCH] Force species-level filling of missing newmoons when na_drop
 = FALSE, closes #294

---
 NEWS.md                             |  6 +++---
 R/process_data_utils.R              | 20 +++++++++++++-------
 R/summarize_individual_rodents.R    | 25 +++++++++++++++++++------
 R/summarize_rodents.R               | 22 +++++++++++++++-------
 man/summarize_plant_data.Rd         |  2 +-
 man/summarize_rodent_data.Rd        |  2 +-
 tests/testthat/test-99-regression.R |  8 ++++----
 7 files changed, 56 insertions(+), 29 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 31eead64..39bc031d 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,7 +1,7 @@
 # portalr
-* `load_rodent_data()` now returns an object with an S3 class, and provides a useful message on `print`
-* Users can now pass arguments to `download_observations()` from `load_rodent_data()` and other calling functions
-
+* `load_rodent_data()` now returns an object with an S3 class, and provides a useful message on `print`.
+* Users can now pass arguments to `download_observations()` from `load_rodent_data()` and other calling functions.
+* Fix bug in `na_drop = FALSE` that failed to complete missing rows to the species level when `time = "newmoon"`.
 
 Version numbers follow [Semantic Versioning](https://semver.org/).
 
diff --git a/R/process_data_utils.R b/R/process_data_utils.R
index f6684b80..c913b870 100644
--- a/R/process_data_utils.R
+++ b/R/process_data_utils.R
@@ -28,7 +28,7 @@ clean_data <- function(full_data, trapping_table, ...) {
 #'
 #' @param data any data.frame with a plot column.
 #' @param plots specify subset of plots; can be a vector of plots, or specific
-#'   sets: "all" plots or "Longterm" plots (plots that have had the same
+#'   sets: "all" plots or "longterm" plots (plots that have had the same
 #'   treatment for the entire time series)
 #' @return Data.table filtered to the desired subset of plots.
 #'
@@ -79,15 +79,15 @@ join_plots <- function(df, plots_table) {
 #' period codes denote the number of censuses that have occurred, but are
 #' not the same as the number of censuses that should have occurred. Sometimes
 #' censuses are missed (weather, transport issues,etc). You can't pick this
-#' up with the period code. Because censues may not always occur monthly due to
-#' the newmoon -  a new moon code was devised to give a standardized language
-#' of time for forcasting in particular. This function allows the user to decide
+#' up with the period code. Because censuses may not always occur monthly due to
+#' the new moon -  a new moon code was devised to give a standardized language
+#' of time for forecasting in particular. This function allows the user to decide
 #' if they want to use the rodent period code, the new moon code, the date of
 #' the rodent census, or have their data with all three time formats
 #'
 #' @param summary_table Data.table with summarized rodent data.
-#' @param newmoon_table Data_table linking newmoon codes with period codes.
-#' @param time Character. Denotes whether newmoon codes, period codes,
+#' @param newmoon_table Data_table linking new moon codes with period codes.
+#' @param time Character. Denotes whether new moon codes, period codes,
 #' and/or date are desired.
 #'
 #' @return Data.table of summarized rodent data with user-specified time format
@@ -104,8 +104,14 @@ add_time <- function(summary_table, newmoons_table, time = "period") {
   } else {
     newmoons_table$censusdate[is.na(newmoons_table$censusdate)] <-
       newmoons_table$newmoondate[is.na(newmoons_table$censusdate)]
+    vars_to_complete <- names(dplyr::select(summary_table,tidyselect::any_of(c("species","plot"))))
     join_summary_newmoon <- dplyr::left_join(newmoons_table, summary_table,
-                                             by = "period")
+                                             by = "period") %>%
+                            tidyr::complete(tidyr::nesting(!!!rlang::syms(c("newmoonnumber",
+                                                           "newmoondate",
+                                                           "censusdate"))),
+                                            !!!rlang::syms(vars_to_complete)) %>%
+                            tidyr::drop_na(tidyselect::any_of(c("species","plot")))
   }
   date_vars <- c("newmoondate", "newmoonnumber", "period", "censusdate")
   vars_to_keep <- switch(tolower(time),
diff --git a/R/summarize_individual_rodents.R b/R/summarize_individual_rodents.R
index eb1c515e..f03df728 100644
--- a/R/summarize_individual_rodents.R
+++ b/R/summarize_individual_rodents.R
@@ -12,10 +12,16 @@
 #' @export
 #'
 summarize_individual_rodents <- function(path = get_default_data_path(),
-                                         clean = TRUE, type = "Rodents",
-                                         length = "all", unknowns = FALSE, time = "period",
-                                         fillweight = FALSE, min_plots = 1, min_traps = 1,
-                                         download_if_missing = TRUE, quiet = FALSE)
+                                         clean = TRUE,
+                                         type = "Rodents",
+                                         length = "all",
+                                         unknowns = FALSE,
+                                         time = "period",
+                                         fillweight = FALSE,
+                                         min_plots = 1,
+                                         min_traps = 1,
+                                         download_if_missing = TRUE,
+                                         quiet = FALSE)
 {
 
   #### Get Data ----
@@ -38,8 +44,15 @@ summarize_individual_rodents <- function(path = get_default_data_path(),
                     "sex", "reprod", "age", "testes", "vagina","pregnant", "nipples","lactation",
                     "hfl", "wgt", "tag", "note2", "ltag", "note3"))
 
-  #### use new moon number as time index if time == "newmoon" ----
-  return(add_time(rodents, data_tables$newmoons_table, time))
+  rodents <- add_time(rodents, data_tables$newmoons_table, time)
+
+  if(time == "newmoon") {
+      rodents <- rodents %>%
+                    dplyr::select("newmoonnumber","month","day","year","treatment","plot","stake",
+                    "species","sex","reprod","age","testes","vagina","pregnant",
+                    "nipples","lactation","hfl","wgt","tag","note2","ltag","note3")
+            }
+ return(rodents)
 }
 
 #' @rdname summarize_individual_rodents
diff --git a/R/summarize_rodents.R b/R/summarize_rodents.R
index 8be49ad7..796f9fc0 100644
--- a/R/summarize_rodents.R
+++ b/R/summarize_rodents.R
@@ -33,7 +33,7 @@
 #'  2) that combo was skipped that month, or
 #'  3) that combo was trapped, but is unusable (a negative period code))
 #' @param zero_drop logical, drop 0s (representing sufficient sampling, but no
-#'   detections)
+#'   detection)
 #' @param min_traps minimum number of traps for a plot to be included
 #' @param min_plots minimum number of plots within a period for an
 #'   observation to be included
@@ -50,18 +50,26 @@
 #' @export
 #'
 summarize_rodent_data <- function(path = get_default_data_path(),
-                                  clean = TRUE, level = "Site",
-                                  type = "Rodents", length = "all", plots = length,
-                                  unknowns = FALSE, shape = "crosstab",
-                                  time = "period", output = "abundance",
+                                  clean = TRUE,
+                                  level = "Site",
+                                  type = "Rodents",
+                                  length = "all",
+                                  plots = length,
+                                  unknowns = FALSE,
+                                  shape = "crosstab",
+                                  time = "period",
+                                  output = "abundance",
                                   fillweight = (output != "abundance"),
                                   na_drop = TRUE,
                                   zero_drop = switch(tolower(level),
                                                      "plot" = FALSE,
                                                      "treatment" = TRUE,
                                                      "site" = TRUE),
-                                  min_traps = 1, min_plots = 24, effort = FALSE,
-                                  download_if_missing = TRUE, quiet = FALSE,
+                                  min_traps = 1,
+                                  min_plots = 24,
+                                  effort = FALSE,
+                                  download_if_missing = TRUE,
+                                  quiet = FALSE,
                                   include_unsampled = FALSE)
 {
   if (include_unsampled)
diff --git a/man/summarize_plant_data.Rd b/man/summarize_plant_data.Rd
index 72936fc0..32c2b2e7 100644
--- a/man/summarize_plant_data.Rd
+++ b/man/summarize_plant_data.Rd
@@ -87,7 +87,7 @@ represent one of a few slightly different meanings:
  3) that combo was trapped, but is unusable (a negative period code))}
 
 \item{zero_drop}{logical, drop 0s (representing sufficient sampling, but no
-detections)}
+detection)}
 
 \item{min_quads}{numeric [1:16], minimum number of quadrats (out of 16) for a plot to be included}
 
diff --git a/man/summarize_rodent_data.Rd b/man/summarize_rodent_data.Rd
index 789bd1a8..3c982608 100644
--- a/man/summarize_rodent_data.Rd
+++ b/man/summarize_rodent_data.Rd
@@ -102,7 +102,7 @@ represent one of a few slightly different meanings:
  3) that combo was trapped, but is unusable (a negative period code))}
 
 \item{zero_drop}{logical, drop 0s (representing sufficient sampling, but no
-detections)}
+detection)}
 
 \item{min_traps}{minimum number of traps for a plot to be included}
 
diff --git a/tests/testthat/test-99-regression.R b/tests/testthat/test-99-regression.R
index 1456ecec..257c62a5 100644
--- a/tests/testthat/test-99-regression.R
+++ b/tests/testthat/test-99-regression.R
@@ -28,7 +28,7 @@ test_that("data generated by level = plot, time = newmoon, type = granivore, sha
                     shape = "flat", time = "newmoon", na_drop = FALSE) %>%
     dplyr::filter(newmoonnumber < 465)
   attributes(data) <- attributes(data)[sort(names(attributes(data)))]
-  expect_known_hash(data, "20d3d2287c")
+  expect_known_hash(data, "f5167f2c0e")
 
   sampled_newmoons <- abundance(portal_data_path, time = "all",
                                 na_drop = FALSE, min_plots = 1) %>%
@@ -37,7 +37,7 @@ test_that("data generated by level = plot, time = newmoon, type = granivore, sha
     dplyr::filter(newmoonnumber %in% sampled_newmoons)
   attributes(data) <- attributes(data)[sort(names(attributes(data)))]
   expect_equal(dim(data), c(155880, 5))
-  expect_known_hash(data, "efbecf7764")
+  expect_known_hash(data, "d11b6b8ef3")
 })
 
 test_that("data generated by na_drop = FALSE, zero_drop = FALSE is same", {
@@ -49,7 +49,7 @@ test_that("data generated by na_drop = FALSE, zero_drop = FALSE is same", {
   expect_equal(dim(data), c(464, 22))
   expect_known_hash(is.na(data), "0294bfffde")
   data[is.na(data)] <- -999
-  expect_known_hash(data, "638d5588ce")
+  expect_known_hash(data, "62f714b7c9")
   abundances <- data %>% dplyr::select(-censusdate)
 
   data <- abundance(portal_data_path, time = "newmoon", min_plots = 1,
@@ -59,7 +59,7 @@ test_that("data generated by na_drop = FALSE, zero_drop = FALSE is same", {
   expect_equal(dim(data), c(464, 22))
   expect_known_hash(is.na(data), "b2d5abb360")
   data[is.na(data)] <- -999
-  expect_known_hash(data, "59b85b7415")
+  expect_known_hash(data, "6c800c6b50")
   expect_equal(data %>% dplyr::select(-newmoonnumber),
                abundances)
 })