IA - exported functions (#154)

* export four user facing functions * better name to avoid cnflicts * update * update cookbook * update pkgdown * improve names of exported functions
RMI-PACTA · Sep 24, 2024 · b58c728 · b58c728
1 parent 9ba41a9
commit b58c728
Show file tree

Hide file tree

Showing 12 changed files with 355 additions and 157 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,11 +1,17 @@
 # Generated by roxygen2: do not edit by hand
 
+export(analyse)
+export(analyze)
+export(match_loanbooks)
 export(plot_sankey)
 export(plot_scatter)
 export(plot_scatter_alignment_exposure)
 export(prep_sankey)
 export(prep_scatter)
 export(prep_scatter_alignment_exposure)
+export(prepare_abcd)
+export(prioritise_and_diagnose)
+export(prioritize_and_diagnose)
 importFrom(dplyr,"%>%")
 importFrom(rlang,":=")
 importFrom(rlang,.data)

diff --git a/R/analyse.R b/R/analyse.R
@@ -0,0 +1,29 @@
+#' Analyse the loan book data sets used in the PACTA for Supervisors analysis
+#'
+#' @description
+#' `analyse()` runs the necessary steps to analyse the matched loan books,
+#' producing both the outputs of the standard PACTA analysis and the outputs of
+#' the net aggregated alignment metric, including tables and plots.
+#' Parameters for all steps are read from a `config.yml` file. The function is
+#' called for its side effects and writes the prepared and diagnosed data sets
+#' to the directory `output/analysis`, where `output` is the
+#' output directory  specified in the `config.yml`.
+#'
+#' `analyse()` and `analyze()` are synonyms.
+#'
+#' @param config either a path to a config.yml file or a list of parameters
+#'
+#' @return NULL
+#' @export
+#'
+#' @examples
+#' # TODO
+analyse <- function(config) {
+  run_pacta(config)
+  run_aggregate_alignment_metric(config)
+  plot_aggregate_loanbooks(config)
+}
+
+#' @rdname analyse
+#' @export
+analyze <- analyse
diff --git a/R/run_matching.R → R/match_loanbooks.R b/R/run_matching.R → R/match_loanbooks.R
@@ -1,4 +1,24 @@
-run_matching <- function(config) {
+#' Match raw input loan books with ABCD for PACTA for Supervisors analysis
+#'
+#' @description
+#' `match_loanbooks()` runs the necessary steps to match the raw input loan books with the
+#' asset based company data (ABCD) used in the PACTA for Supervisors analysis.
+#' Specifically, it prepares matched loan books based on name matching or direct
+#' identifiers, depending on the configuration. The output matched loan books
+#' need to be manually validated for further processing.
+#' Parameters the matching step are read from a `config.yml` file and follow the
+#' options available in `r2dii.match::match_name`. The function is called for its
+#' side effects and writes the prepared data sets to the directory `output/match`,
+#' where `output` is the output directory specified in the `config.yml`.
+#'
+#' @param config either a path to a config.yml file or a list of parameters
+#'
+#' @return NULL
+#' @export
+#'
+#' @examples
+#' # TODO
+match_loanbooks <- function(config) {
   config <- load_config(config)
 
   dir_raw <- get_raw_dir(config)

diff --git a/R/prepare_abcd.R b/R/prepare_abcd.R
@@ -1,128 +1,23 @@
+#' Prepare input data sets for PACTA for Supervisors analysis
+#'
+#' @description
+#' `prepare_abcd()` runs the necessary steps to prepare the input data sets for
+#' the PACTA for Supervisors analysis. Specifically it prepares the abcd_final
+#' data set by removing inactive companies if desired. And it allows preparing
+#' the ratios by which the exposures to counterparties are split along the sectors.
+#' Parameters for both steps are read from a `config.yml` file. The function is
+#' called for its side effects and writes the prepared data sets to the directory
+#' `output/prepare`, where `output` is the output directory specified in the
+#' `config.yml`.
+#'
+#' @param config either a path to a config.yml file or a list of parameters
+#'
+#' @return NULL
+#' @export
+#'
+#' @examples
+#' # TODO
 prepare_abcd <- function(config) {
-  config <- load_config(config)
-
-  abcd_dir <- get_abcd_dir(config)
-  path_abcd <- get_abcd_path(config)
-  sheet_abcd <- get_abcd_sheet(config)
-
-  remove_inactive_companies <- get_remove_inactive_companies(config)
-  start_year <- get_start_year(config)
-  time_frame <- get_time_frame(config)
-
-  # validate config values----
-  stop_if_not_length(path_abcd, 1L)
-  stop_if_not_inherits(path_abcd, "character")
-  stop_if_file_not_found(path_abcd, desc = "ABCD")
-
-  stop_if_not_length(sheet_abcd, 1L)
-  stop_if_not_inherits(sheet_abcd, "character")
-  stop_if_sheet_not_found(sheet_abcd, path_abcd)
-
-  if (!is.null(remove_inactive_companies)) {
-    stop_if_not_length(remove_inactive_companies, 1L)
-    stop_if_not_inherits(remove_inactive_companies, "logical")
-  }
-
-  stop_if_not_length(start_year, 1L)
-  stop_if_not_inherits(start_year, "integer")
-
-  stop_if_not_length(time_frame, 1L)
-  stop_if_not_inherits(time_frame, "integer")
-
-
-  # load data----
-  abcd <- read_abcd_raw(path_abcd, sheet_abcd)
-  stop_if_not_expected_columns(abcd, cols_abcd, desc = "ABCD")
-
-  # optional: remove inactive companies----
-
-  # (1) remove company-sector combinations where production in t5 = 0 when
-  # it was greater than 0 in t0.
-  # (2) remove company-sector combinations where production is 0 for the entire
-  # time frame from t0 to t5.
-  rm_inactive_companies <- function(data,
-                                    start_year,
-                                    time_frame) {
-    comp_sec_no_prod_t5 <- data %>%
-      dplyr::filter(
-        .data[["year"]] %in% c(.env[["start_year"]], .env[["start_year"]] + .env[["time_frame"]])
-      ) %>%
-      dplyr::summarise(
-        sum_production = sum(.data[["production"]], na.rm = TRUE),
-        .by = c("name_company", "sector", "year")
-      ) %>%
-      tidyr::pivot_wider(
-        names_from = "year",
-        names_prefix = "prod_",
-        values_from = "sum_production"
-      ) %>%
-      dplyr::filter(
-        .data[[paste0("prod_", start_year)]] > 0,
-        .data[[paste0("prod_", start_year + time_frame)]] == 0
-      ) %>%
-      dplyr::distinct(
-        .data[["name_company"]],
-        .data[["sector"]]
-      )
-
-    comp_sec_no_prod_t0_to_t5 <- data %>%
-      dplyr::filter(
-        .data[["year"]] %in% c(.env[["start_year"]], .env[["start_year"]] + .env[["time_frame"]])
-      ) %>%
-      dplyr::summarise(
-        sum_production = sum(.data[["production"]], na.rm = TRUE),
-        .by = c("name_company", "sector")
-      ) %>%
-      dplyr::filter(
-        .data[["sum_production"]] == 0
-      ) %>%
-      dplyr::distinct(
-        .data[["name_company"]],
-        .data[["sector"]]
-      )
-
-    data <- data %>%
-      dplyr::anti_join(
-        comp_sec_no_prod_t5,
-        by = c("name_company", "sector")
-      ) %>%
-      dplyr::anti_join(
-        comp_sec_no_prod_t0_to_t5,
-        by = c("name_company", "sector")
-      )
-
-    return(data)
-  }
-
-  if (remove_inactive_companies) {
-    abcd_keep <- abcd %>%
-      rm_inactive_companies(
-        start_year = start_year,
-        time_frame = time_frame
-      )
-
-    abcd_removed <- abcd %>%
-      dplyr::anti_join(
-        abcd_keep,
-        by = c("company_id", "sector")
-      )
-
-    # write removed inactive companies to file for inspection
-    abcd_removed %>%
-      readr::write_csv(
-        file.path(abcd_dir, "abcd_removed_inactive_companies.csv"),
-        na = ""
-      )
-
-    abcd <- abcd_keep
-
-    rm(abcd_keep)
-  }
-
-  # write final version of abcd to file for use PACTA analysis
-  abcd %>%
-    readr::write_csv(
-      file.path(abcd_dir, "abcd_final.csv"),
-      na = ""
-    )
+  remove_inactive_companies(config)
+  prepare_sector_split(config)
 }
diff --git a/R/prioritise_and_diagnose.R b/R/prioritise_and_diagnose.R
@@ -0,0 +1,29 @@
+#' Prioritise and diagnose the loan book data sets used in the PACTA for Supervisors analysis
+#'
+#' @description
+#' `prioritise_and_diagnose()` runs the necessary steps to prioritise the matched
+#' loan books and diagnose both the match success rate and the coverage of the
+#' real economy assets by the matched loan books.
+#' Parameters for all steps are read from a `config.yml` file. The function is
+#' called for its side effects and writes the prepared and diagnosed data sets
+#' to the directory `output/prioritise_and_diagnose`, where `output` is the
+#' output directory  specified in the `config.yml`.
+#'
+#' `prioritise_and_diagnose()` and `prioritize_and_diagnose()` are synonyms.
+#'
+#' @param config either a path to a config.yml file or a list of parameters
+#'
+#' @return NULL
+#' @export
+#'
+#' @examples
+#' # TODO
+prioritise_and_diagnose <- function(config) {
+  run_match_prioritize(config)
+  run_calculate_match_success_rate(config)
+  run_calculate_loanbook_coverage(config)
+}
+
+#' @rdname prioritise_and_diagnose
+#' @export
+prioritize_and_diagnose <- prioritise_and_diagnose
diff --git a/R/remove_inactive_companies.R b/R/remove_inactive_companies.R
@@ -0,0 +1,128 @@
+remove_inactive_companies <- function(config) {
+  config <- load_config(config)
+
+  abcd_dir <- get_abcd_dir(config)
+  path_abcd <- get_abcd_path(config)
+  sheet_abcd <- get_abcd_sheet(config)
+
+  remove_inactive_companies <- get_remove_inactive_companies(config)
+  start_year <- get_start_year(config)
+  time_frame <- get_time_frame(config)
+
+  # validate config values----
+  stop_if_not_length(path_abcd, 1L)
+  stop_if_not_inherits(path_abcd, "character")
+  stop_if_file_not_found(path_abcd, desc = "ABCD")
+
+  stop_if_not_length(sheet_abcd, 1L)
+  stop_if_not_inherits(sheet_abcd, "character")
+  stop_if_sheet_not_found(sheet_abcd, path_abcd)
+
+  if (!is.null(remove_inactive_companies)) {
+    stop_if_not_length(remove_inactive_companies, 1L)
+    stop_if_not_inherits(remove_inactive_companies, "logical")
+  }
+
+  stop_if_not_length(start_year, 1L)
+  stop_if_not_inherits(start_year, "integer")
+
+  stop_if_not_length(time_frame, 1L)
+  stop_if_not_inherits(time_frame, "integer")
+
+
+  # load data----
+  abcd <- read_abcd_raw(path_abcd, sheet_abcd)
+  stop_if_not_expected_columns(abcd, cols_abcd, desc = "ABCD")
+
+  # optional: remove inactive companies----
+
+  # (1) remove company-sector combinations where production in t5 = 0 when
+  # it was greater than 0 in t0.
+  # (2) remove company-sector combinations where production is 0 for the entire
+  # time frame from t0 to t5.
+  rm_inactive_companies <- function(data,
+                                    start_year,
+                                    time_frame) {
+    comp_sec_no_prod_t5 <- data %>%
+      dplyr::filter(
+        .data[["year"]] %in% c(.env[["start_year"]], .env[["start_year"]] + .env[["time_frame"]])
+      ) %>%
+      dplyr::summarise(
+        sum_production = sum(.data[["production"]], na.rm = TRUE),
+        .by = c("name_company", "sector", "year")
+      ) %>%
+      tidyr::pivot_wider(
+        names_from = "year",
+        names_prefix = "prod_",
+        values_from = "sum_production"
+      ) %>%
+      dplyr::filter(
+        .data[[paste0("prod_", start_year)]] > 0,
+        .data[[paste0("prod_", start_year + time_frame)]] == 0
+      ) %>%
+      dplyr::distinct(
+        .data[["name_company"]],
+        .data[["sector"]]
+      )
+
+    comp_sec_no_prod_t0_to_t5 <- data %>%
+      dplyr::filter(
+        .data[["year"]] %in% c(.env[["start_year"]], .env[["start_year"]] + .env[["time_frame"]])
+      ) %>%
+      dplyr::summarise(
+        sum_production = sum(.data[["production"]], na.rm = TRUE),
+        .by = c("name_company", "sector")
+      ) %>%
+      dplyr::filter(
+        .data[["sum_production"]] == 0
+      ) %>%
+      dplyr::distinct(
+        .data[["name_company"]],
+        .data[["sector"]]
+      )
+
+    data <- data %>%
+      dplyr::anti_join(
+        comp_sec_no_prod_t5,
+        by = c("name_company", "sector")
+      ) %>%
+      dplyr::anti_join(
+        comp_sec_no_prod_t0_to_t5,
+        by = c("name_company", "sector")
+      )
+
+    return(data)
+  }
+
+  if (remove_inactive_companies) {
+    abcd_keep <- abcd %>%
+      rm_inactive_companies(
+        start_year = start_year,
+        time_frame = time_frame
+      )
+
+    abcd_removed <- abcd %>%
+      dplyr::anti_join(
+        abcd_keep,
+        by = c("company_id", "sector")
+      )
+
+    # write removed inactive companies to file for inspection
+    abcd_removed %>%
+      readr::write_csv(
+        file.path(abcd_dir, "abcd_removed_inactive_companies.csv"),
+        na = ""
+      )
+
+    abcd <- abcd_keep
+
+    rm(abcd_keep)
+  }
+
+  # write final version of abcd to file for use PACTA analysis
+  abcd %>%
+    readr::write_csv(
+      file.path(abcd_dir, "abcd_final.csv"),
+      na = ""
+    )
+}
diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -6,6 +6,10 @@ reference:
 - title: Analysis
   desc:  Run the analysis.
 - contents:
+  - prepare_abcd
+  - match_loanbooks
+  - prioritise_and_diagnose
+  - analyse
   - run_pacta
 
 - title: Plotting