From 07b3b4e4f83af58040310139b76dd2464261458d Mon Sep 17 00:00:00 2001 From: Glenda Yenni Date: Wed, 18 Sep 2024 19:06:48 -0600 Subject: [PATCH] Force species-level filling of missing newmoons when na_drop = FALSE, closes #294 --- NEWS.md | 6 +++--- R/process_data_utils.R | 20 +++++++++++++------- R/summarize_individual_rodents.R | 25 +++++++++++++++++++------ R/summarize_rodents.R | 22 +++++++++++++++------- man/summarize_plant_data.Rd | 2 +- man/summarize_rodent_data.Rd | 2 +- tests/testthat/test-99-regression.R | 8 ++++---- 7 files changed, 56 insertions(+), 29 deletions(-) diff --git a/NEWS.md b/NEWS.md index 31eead64..39bc031d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,7 +1,7 @@ # portalr -* `load_rodent_data()` now returns an object with an S3 class, and provides a useful message on `print` -* Users can now pass arguments to `download_observations()` from `load_rodent_data()` and other calling functions - +* `load_rodent_data()` now returns an object with an S3 class, and provides a useful message on `print`. +* Users can now pass arguments to `download_observations()` from `load_rodent_data()` and other calling functions. +* Fix bug in `na_drop = FALSE` that failed to complete missing rows to the species level when `time = "newmoon"`. Version numbers follow [Semantic Versioning](https://semver.org/). diff --git a/R/process_data_utils.R b/R/process_data_utils.R index f6684b80..c913b870 100644 --- a/R/process_data_utils.R +++ b/R/process_data_utils.R @@ -28,7 +28,7 @@ clean_data <- function(full_data, trapping_table, ...) { #' #' @param data any data.frame with a plot column. #' @param plots specify subset of plots; can be a vector of plots, or specific -#' sets: "all" plots or "Longterm" plots (plots that have had the same +#' sets: "all" plots or "longterm" plots (plots that have had the same #' treatment for the entire time series) #' @return Data.table filtered to the desired subset of plots. #' @@ -79,15 +79,15 @@ join_plots <- function(df, plots_table) { #' period codes denote the number of censuses that have occurred, but are #' not the same as the number of censuses that should have occurred. Sometimes #' censuses are missed (weather, transport issues,etc). You can't pick this -#' up with the period code. Because censues may not always occur monthly due to -#' the newmoon - a new moon code was devised to give a standardized language -#' of time for forcasting in particular. This function allows the user to decide +#' up with the period code. Because censuses may not always occur monthly due to +#' the new moon - a new moon code was devised to give a standardized language +#' of time for forecasting in particular. This function allows the user to decide #' if they want to use the rodent period code, the new moon code, the date of #' the rodent census, or have their data with all three time formats #' #' @param summary_table Data.table with summarized rodent data. -#' @param newmoon_table Data_table linking newmoon codes with period codes. -#' @param time Character. Denotes whether newmoon codes, period codes, +#' @param newmoon_table Data_table linking new moon codes with period codes. +#' @param time Character. Denotes whether new moon codes, period codes, #' and/or date are desired. #' #' @return Data.table of summarized rodent data with user-specified time format @@ -104,8 +104,14 @@ add_time <- function(summary_table, newmoons_table, time = "period") { } else { newmoons_table$censusdate[is.na(newmoons_table$censusdate)] <- newmoons_table$newmoondate[is.na(newmoons_table$censusdate)] + vars_to_complete <- names(dplyr::select(summary_table,tidyselect::any_of(c("species","plot")))) join_summary_newmoon <- dplyr::left_join(newmoons_table, summary_table, - by = "period") + by = "period") %>% + tidyr::complete(tidyr::nesting(!!!rlang::syms(c("newmoonnumber", + "newmoondate", + "censusdate"))), + !!!rlang::syms(vars_to_complete)) %>% + tidyr::drop_na(tidyselect::any_of(c("species","plot"))) } date_vars <- c("newmoondate", "newmoonnumber", "period", "censusdate") vars_to_keep <- switch(tolower(time), diff --git a/R/summarize_individual_rodents.R b/R/summarize_individual_rodents.R index eb1c515e..f03df728 100644 --- a/R/summarize_individual_rodents.R +++ b/R/summarize_individual_rodents.R @@ -12,10 +12,16 @@ #' @export #' summarize_individual_rodents <- function(path = get_default_data_path(), - clean = TRUE, type = "Rodents", - length = "all", unknowns = FALSE, time = "period", - fillweight = FALSE, min_plots = 1, min_traps = 1, - download_if_missing = TRUE, quiet = FALSE) + clean = TRUE, + type = "Rodents", + length = "all", + unknowns = FALSE, + time = "period", + fillweight = FALSE, + min_plots = 1, + min_traps = 1, + download_if_missing = TRUE, + quiet = FALSE) { #### Get Data ---- @@ -38,8 +44,15 @@ summarize_individual_rodents <- function(path = get_default_data_path(), "sex", "reprod", "age", "testes", "vagina","pregnant", "nipples","lactation", "hfl", "wgt", "tag", "note2", "ltag", "note3")) - #### use new moon number as time index if time == "newmoon" ---- - return(add_time(rodents, data_tables$newmoons_table, time)) + rodents <- add_time(rodents, data_tables$newmoons_table, time) + + if(time == "newmoon") { + rodents <- rodents %>% + dplyr::select("newmoonnumber","month","day","year","treatment","plot","stake", + "species","sex","reprod","age","testes","vagina","pregnant", + "nipples","lactation","hfl","wgt","tag","note2","ltag","note3") + } + return(rodents) } #' @rdname summarize_individual_rodents diff --git a/R/summarize_rodents.R b/R/summarize_rodents.R index 8be49ad7..796f9fc0 100644 --- a/R/summarize_rodents.R +++ b/R/summarize_rodents.R @@ -33,7 +33,7 @@ #' 2) that combo was skipped that month, or #' 3) that combo was trapped, but is unusable (a negative period code)) #' @param zero_drop logical, drop 0s (representing sufficient sampling, but no -#' detections) +#' detection) #' @param min_traps minimum number of traps for a plot to be included #' @param min_plots minimum number of plots within a period for an #' observation to be included @@ -50,18 +50,26 @@ #' @export #' summarize_rodent_data <- function(path = get_default_data_path(), - clean = TRUE, level = "Site", - type = "Rodents", length = "all", plots = length, - unknowns = FALSE, shape = "crosstab", - time = "period", output = "abundance", + clean = TRUE, + level = "Site", + type = "Rodents", + length = "all", + plots = length, + unknowns = FALSE, + shape = "crosstab", + time = "period", + output = "abundance", fillweight = (output != "abundance"), na_drop = TRUE, zero_drop = switch(tolower(level), "plot" = FALSE, "treatment" = TRUE, "site" = TRUE), - min_traps = 1, min_plots = 24, effort = FALSE, - download_if_missing = TRUE, quiet = FALSE, + min_traps = 1, + min_plots = 24, + effort = FALSE, + download_if_missing = TRUE, + quiet = FALSE, include_unsampled = FALSE) { if (include_unsampled) diff --git a/man/summarize_plant_data.Rd b/man/summarize_plant_data.Rd index 72936fc0..32c2b2e7 100644 --- a/man/summarize_plant_data.Rd +++ b/man/summarize_plant_data.Rd @@ -87,7 +87,7 @@ represent one of a few slightly different meanings: 3) that combo was trapped, but is unusable (a negative period code))} \item{zero_drop}{logical, drop 0s (representing sufficient sampling, but no -detections)} +detection)} \item{min_quads}{numeric [1:16], minimum number of quadrats (out of 16) for a plot to be included} diff --git a/man/summarize_rodent_data.Rd b/man/summarize_rodent_data.Rd index 789bd1a8..3c982608 100644 --- a/man/summarize_rodent_data.Rd +++ b/man/summarize_rodent_data.Rd @@ -102,7 +102,7 @@ represent one of a few slightly different meanings: 3) that combo was trapped, but is unusable (a negative period code))} \item{zero_drop}{logical, drop 0s (representing sufficient sampling, but no -detections)} +detection)} \item{min_traps}{minimum number of traps for a plot to be included} diff --git a/tests/testthat/test-99-regression.R b/tests/testthat/test-99-regression.R index 1456ecec..257c62a5 100644 --- a/tests/testthat/test-99-regression.R +++ b/tests/testthat/test-99-regression.R @@ -28,7 +28,7 @@ test_that("data generated by level = plot, time = newmoon, type = granivore, sha shape = "flat", time = "newmoon", na_drop = FALSE) %>% dplyr::filter(newmoonnumber < 465) attributes(data) <- attributes(data)[sort(names(attributes(data)))] - expect_known_hash(data, "20d3d2287c") + expect_known_hash(data, "f5167f2c0e") sampled_newmoons <- abundance(portal_data_path, time = "all", na_drop = FALSE, min_plots = 1) %>% @@ -37,7 +37,7 @@ test_that("data generated by level = plot, time = newmoon, type = granivore, sha dplyr::filter(newmoonnumber %in% sampled_newmoons) attributes(data) <- attributes(data)[sort(names(attributes(data)))] expect_equal(dim(data), c(155880, 5)) - expect_known_hash(data, "efbecf7764") + expect_known_hash(data, "d11b6b8ef3") }) test_that("data generated by na_drop = FALSE, zero_drop = FALSE is same", { @@ -49,7 +49,7 @@ test_that("data generated by na_drop = FALSE, zero_drop = FALSE is same", { expect_equal(dim(data), c(464, 22)) expect_known_hash(is.na(data), "0294bfffde") data[is.na(data)] <- -999 - expect_known_hash(data, "638d5588ce") + expect_known_hash(data, "62f714b7c9") abundances <- data %>% dplyr::select(-censusdate) data <- abundance(portal_data_path, time = "newmoon", min_plots = 1, @@ -59,7 +59,7 @@ test_that("data generated by na_drop = FALSE, zero_drop = FALSE is same", { expect_equal(dim(data), c(464, 22)) expect_known_hash(is.na(data), "b2d5abb360") data[is.na(data)] <- -999 - expect_known_hash(data, "59b85b7415") + expect_known_hash(data, "6c800c6b50") expect_equal(data %>% dplyr::select(-newmoonnumber), abundances) })