fix(FIMSOutput): Get all info out of sdreport

Use all the information in sdreport rather than relying on specific functions to pull out the values. Indexing vectors are created and then they are used based on the current order of variables. This could be made smarter in the future. There are a lot of notes in the .R file on things that need changed or augmented for the future. Committing now and giving Sam output.
NOAA-FIMS · Jul 17, 2024 · 4b8fb6d · 4b8fb6d
1 parent d49bd58
commit 4b8fb6d
Showing 1 changed file with 149 additions and 110 deletions.
diff --git a/R/FIMSOutput.R b/R/FIMSOutput.R
@@ -29,6 +29,14 @@ setClass(
   contains = class(dplyr::tibble())
 )
 
+# Getters
+
+setGeneric("estimates", function(x) standardGeneric("estimates"))
+setMethod("estimates", "FIMSOutput", function(x) x@estimates)
+
+setGeneric("fits", function(x) standardGeneric("fits"))
+setMethod("fits", "FIMSOutput", function(x) x@fits)
+
 # Constructors ----
 # All constructors in this file are documented in 1 roxygen file via @rdname.
 
@@ -41,7 +49,7 @@ setClass(
 #' @export
 #' @rdname FIMSOutput
 #' @param obj The output from `obj$report(obj$env$last.par.best)`.
-#' @param sdr sdr The returned object from running [TMB::sdreport()] on `obj`.
+#' @param sdr sdr The returned object from running [TMB::sdreport(obj)].
 #' @param call The call the users made to run FIMS. The default is `NULL`.
 #'   This is not available for FIMS yet.
 #' @param data The input data used to fit the model. This needs to be a
@@ -52,130 +60,161 @@ setClass(
 #' called `data` to store the input data frame. Additional slots are dependent
 #' on the child class. Use [showClass()] to see all available slots.
 create_fims_output <- function(obj, sdr, call = NULL, data) {
-  # report <- obj$report(obj$env$last.par.best) # causes failure
-  # SSB and Biomass
-  # will pull the information from obj and/or sdreport for estimates par is a
-  # placeholder The following only applies to SSB and Biomass and need to
-  # modify to include other parameters and quantities
-  estimate_biomass_tibble <- function(derived_quantity_name,
-                                      sdr = sdr,
-                                      data = data) {
-    dplyr::tibble(
-      label = derived_quantity_name,
-      age = NA,
-      time = 0:n_years(data),
-      initial = NA,
-      estimates = sdr$value[names(sdr$value) == derived_quantity_name],
-      uncertainty = sdr$sd[names(sdr$value) == derived_quantity_name],
-      likelihood = NA,
-      gradient = NA,
-      estimated = NA
-    )
-  }
-  # NAA
-  estimate_naa_tibble <- function(derived_quantity_name,
-                                  sdr = sdr,
-                                  data = data) {
-    dplyr::tibble(
-      label = derived_quantity_name,
-      age = rep(1:n_ages(data), times = (n_years(data) + 1)),
-      time = rep(0:n_years(data), each = n_ages(data)),
-      initial = NA,
-      estimates = sdr$value[names(sdr$value) == derived_quantity_name],
-      uncertainty = sdr$sd[names(sdr$value) == derived_quantity_name],
-      likelihood = NA,
-      gradient = NA,
-      estimated = NA
-    )
-  }
-
-  estimates_biomass <- do.call(
-    "rbind",
-    lapply(
-      c("SSB", "Biomass"),
-      estimate_biomass_tibble,
-      sdr = sdr,
-      data = data
-    )
-  )
+  # report <- obj$report(obj$env$last.par.best)
 
-  # recruitment (need to add uncertainty)
-  estimate_recruitment_tibble <- function(obj, data) {
-    dplyr::tibble(
-      label = "recruitment",
-      age = NA,
-      time = 1:(n_years(data) + 1),
-      initial = NA,
-      estimates = 0.5, # Should be report$recruitment[[1]],
-      uncertainty = NA,
-      likelihood = NA,
-      gradient = NA,
-      estimated = NA
-    )
-  }
-
-  # the fishing mortality needs the year vector
-  estimate_Fmort_tibble <- function(par_name, sdr = sdr) {
-    dplyr::tibble(
-      label = par_name,
-      age = NA,
-      time = NA,
-      initial = NA,
-      estimates = sdr$value[names(sdr$value) == par_name],
-      uncertainty = sdr$sd[names(sdr$value) == par_name],
-      likelihood = NA,
-      gradient = NA,
-      estimated = NA
-    )
-  }
-
-  Fmort_estimate <- estimate_Fmort_tibble("FMort", sdr)
+  # The code will break if
+  # * The age bins are not the same as the ages in the data, e.g., if age 3 is
+  #   not in the data or if there is a plus group that is created internally
+  # * The model reports values for age zero fish but age zero is not in the data
+  # Questions:
+  # * How should the initial year should be labeled, e.g., should it always be
+  #   zero or one minus the initial year?
+  # * Should we be reporting the covariance from sdr?
+  # * Should time by year because that is what it actually is at the moment?
+  # * What order is the index and fishing mortality information given in?
+  # Things to change in C++ code
+  # [ ] SSB should be SB
+  # [ ] Recruitment should be in sdr to get uncertainty
+  # Things to change in FIMSFrame
+  # [x] fleets(data) should give fleet names not the number of fleets
+  # [x] need n_fleets(data)
 
-  # natural mortality
-  estimate_M_tibble <- function(obj, data) {
-    dplyr::tibble(
-      label = "natM",
-      age = rep(0:n_ages(data), times = n_years(data)),
-      # TODO:
-      # I think that we want the actual range of years rather than starting at 1
-      time = rep(1:n_years(data), each = 1 + n_ages(data)),
-      initial = NA,
-      estimates = 0.2, # should be report$M[[1]],
-      uncertainty = NA,
-      likelihood = NA,
-      gradient = NA,
-      estimated = NA
-    )
-  }
-
-  M_estimate <- estimate_M_tibble(obj = obj, data = data)
-
-  estimates <- rbind(
-    M_estimate,
-    Fmort_estimate,
-    estimates_biomass,
-    estimate_recruitment_tibble(obj = obj, data = data),
-    estimate_naa_tibble("NAA", sdr, data = data)
-  )
-
-  # will pull the information from obj and/or sdreport for fits
-  # NA is a placeholder
-  fits <- dplyr::tibble()
   # Need to run the following timestamp code before running FIMS and after.
   timestamp <- as.POSIXlt(Sys.time(), tz = "UTC")
+  # The version should be taken from the FIMS wrapper to run the model rather
+  # than the version used to read in the results because they in theory
+  # could be different?
   version <- utils::packageVersion("FIMS")
   # match.call has to be used to create the call when the user run FIMS
   # call <- match.call()
 
   # Fill the empty data frames with data extracted from the data file
   out <- new(
     "FIMSOutput",
-    estimates = estimates,
-    fits = fits,
+    estimates = get_estimates(sdr, data),
+    fits = get_fits(),
     tmb = obj,
     call = call,
     timestamp = timestamp,
     version = version
   )
   return(out)
 }
+
+# Questions
+# * Is there a purrr or dplyr::split way to make the indexing more robust?
+#   E.g., if if the name is length 30 then, length 31 then, length 720 then as
+#   in can you use n_fleets, n_ages, n_years to determine the order of the
+#   index?
+get_estimates <- function(sdr, data) {
+  # Need to define tibble with zero rows so NA will be used to fill in
+  # missing sections when combining estimates and derived quantities later
+  estimates_outline <- dplyr::tibble(
+    label = character(),
+    fleet = character(),
+    age = numeric(),
+    time = numeric(),
+    initial = numeric(),
+    estimate = numeric(),
+    uncertainty = numeric(),
+    likelihood = numeric(),
+    gradient = numeric(),
+    estimated = logical()
+  )
+
+  # Rules of reporting in sdreport
+  # Ages are always reported by age then by year, e.g.,
+  # age 1, age 2, age 3 for year 1 then age 1, age 2, age 3 for year 2
+  year_int_line <- (start_year(data) - 1):end_year(data)
+  age_int_line <- 1:n_ages(data)
+  year_vector <- c(
+    # Numbers at age
+    rep(year_int_line, each = n_ages(data)),
+    # Biomass
+    rep(year_int_line, 1),
+    # Spawning biomass
+    rep(year_int_line, 1),
+    # Ln Recruitment Deviations
+    rep(year_int_line[-1], 1),
+    # Fishing mortality
+    rep(year_int_line[-1], times = n_fleets(data)),
+    # Index
+    rep(year_int_line[-1], times = n_fleets(data)),
+    # Catch numbers at age
+    rep(rep(year_int_line[-1], each = n_ages(data)), times = n_fleets(data))
+  )
+  age_vector <- c(
+    # Numbers at age
+    rep(age_int_line, times = length(year_int_line)),
+    # Biomass
+    rep(NA, length(year_int_line)),
+    # Spawning biomass
+    rep(NA, length(year_int_line)),
+    # Ln Recruitment Deviations
+    rep(NA, n_years(data)),
+    # Fishing mortality
+    rep(NA, each = n_years(data) * n_fleets(data)),
+    # Index
+    rep(NA, each = n_years(data) * n_fleets(data)),
+    # Catch numbers at age
+    rep(rep(age_int_line, times = n_years(data)), times = n_fleets(data))
+  )
+  fleet_vector <- c(
+    # Numbers at age
+    rep(NA, n_ages(data) * length(year_int_line)),
+    # Biomass
+    rep(NA, length(year_int_line)),
+    # Spawning biomass
+    rep(NA, length(year_int_line)),
+    # Ln Recruitment Deviations
+    rep(NA, n_years(data)),
+    # Fishing mortality
+    rep(fleets(data), each = n_years(data)),
+    # Index
+    rep(fleets(data), each = n_years(data)),
+    # Catch numbers at age
+    rep(fleets(data), each = n_ages(data) * n_years(data))
+  )
+  sdr_tibble <- tibble::tibble(
+    label = names(sdr[["value"]]),
+    fleet = fleet_vector,
+    age = age_vector,
+    time = year_vector,
+    estimate = sdr[["value"]],
+    uncertainty = sdr[["sd"]]
+  )
+
+  fixed_effects_tibble <- dplyr::tibble(
+    label = "",
+    estimate = sdr[["par.fixed"]],
+    uncertainty = sqrt(diag(sdr[["cov.fixed"]])),
+    gradient = sdr[["gradient.fixed"]][1, ]
+  )
+  estimates <- estimates_outline |>
+    dplyr::full_join(
+      sdr_tibble,
+      by = c("label", "fleet", "age", "time", "estimate", "uncertainty")
+    ) |>
+    dplyr::full_join(
+      fixed_effects_tibble,
+      by = c("label", "estimate", "uncertainty", "gradient")
+    ) |>
+    dplyr::mutate(
+      label = dplyr::case_when(
+        label == "Biomass" ~ "pop-b",
+        label == "ExpectedIndex" ~ "pop-index",
+        label == "FMort" ~ "pop-F_mort",
+        label == "LogRecDev" ~ "rec-ln_rec_dev",
+        grepl("NAA", label) ~ "pop-naa",
+        label == "SSB" ~ "pop-sb",
+        .default = label
+      )
+    )
+  return(estimates)
+}
+
+get_fits <- function(obj, sdr) {
+  fits <- dplyr::tibble(
+  )
+  return(fits)
+}