Skip to content

Commit

Permalink
IA - exported functions (#154)
Browse files Browse the repository at this point in the history
* export four user facing functions

* better name to avoid cnflicts

* update

* update cookbook

* update pkgdown

* improve names of exported functions
  • Loading branch information
jacobvjk authored Sep 24, 2024
1 parent 9ba41a9 commit b58c728
Show file tree
Hide file tree
Showing 12 changed files with 355 additions and 157 deletions.
6 changes: 6 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
# Generated by roxygen2: do not edit by hand

export(analyse)
export(analyze)
export(match_loanbooks)
export(plot_sankey)
export(plot_scatter)
export(plot_scatter_alignment_exposure)
export(prep_sankey)
export(prep_scatter)
export(prep_scatter_alignment_exposure)
export(prepare_abcd)
export(prioritise_and_diagnose)
export(prioritize_and_diagnose)
importFrom(dplyr,"%>%")
importFrom(rlang,":=")
importFrom(rlang,.data)
Expand Down
29 changes: 29 additions & 0 deletions R/analyse.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#' Analyse the loan book data sets used in the PACTA for Supervisors analysis
#'
#' @description
#' `analyse()` runs the necessary steps to analyse the matched loan books,
#' producing both the outputs of the standard PACTA analysis and the outputs of
#' the net aggregated alignment metric, including tables and plots.
#' Parameters for all steps are read from a `config.yml` file. The function is
#' called for its side effects and writes the prepared and diagnosed data sets
#' to the directory `output/analysis`, where `output` is the
#' output directory specified in the `config.yml`.
#'
#' `analyse()` and `analyze()` are synonyms.
#'
#' @param config either a path to a config.yml file or a list of parameters
#'
#' @return NULL
#' @export
#'
#' @examples
#' # TODO
analyse <- function(config) {
run_pacta(config)
run_aggregate_alignment_metric(config)
plot_aggregate_loanbooks(config)
}

#' @rdname analyse
#' @export
analyze <- analyse
22 changes: 21 additions & 1 deletion R/run_matching.R → R/match_loanbooks.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,24 @@
run_matching <- function(config) {
#' Match raw input loan books with ABCD for PACTA for Supervisors analysis
#'
#' @description
#' `match_loanbooks()` runs the necessary steps to match the raw input loan books with the
#' asset based company data (ABCD) used in the PACTA for Supervisors analysis.
#' Specifically, it prepares matched loan books based on name matching or direct
#' identifiers, depending on the configuration. The output matched loan books
#' need to be manually validated for further processing.
#' Parameters the matching step are read from a `config.yml` file and follow the
#' options available in `r2dii.match::match_name`. The function is called for its
#' side effects and writes the prepared data sets to the directory `output/match`,
#' where `output` is the output directory specified in the `config.yml`.
#'
#' @param config either a path to a config.yml file or a list of parameters
#'
#' @return NULL
#' @export
#'
#' @examples
#' # TODO
match_loanbooks <- function(config) {
config <- load_config(config)

dir_raw <- get_raw_dir(config)
Expand Down
147 changes: 21 additions & 126 deletions R/prepare_abcd.R
Original file line number Diff line number Diff line change
@@ -1,128 +1,23 @@
#' Prepare input data sets for PACTA for Supervisors analysis
#'
#' @description
#' `prepare_abcd()` runs the necessary steps to prepare the input data sets for
#' the PACTA for Supervisors analysis. Specifically it prepares the abcd_final
#' data set by removing inactive companies if desired. And it allows preparing
#' the ratios by which the exposures to counterparties are split along the sectors.
#' Parameters for both steps are read from a `config.yml` file. The function is
#' called for its side effects and writes the prepared data sets to the directory
#' `output/prepare`, where `output` is the output directory specified in the
#' `config.yml`.
#'
#' @param config either a path to a config.yml file or a list of parameters
#'
#' @return NULL
#' @export
#'
#' @examples
#' # TODO
prepare_abcd <- function(config) {
config <- load_config(config)

abcd_dir <- get_abcd_dir(config)
path_abcd <- get_abcd_path(config)
sheet_abcd <- get_abcd_sheet(config)

remove_inactive_companies <- get_remove_inactive_companies(config)
start_year <- get_start_year(config)
time_frame <- get_time_frame(config)

# validate config values----
stop_if_not_length(path_abcd, 1L)
stop_if_not_inherits(path_abcd, "character")
stop_if_file_not_found(path_abcd, desc = "ABCD")

stop_if_not_length(sheet_abcd, 1L)
stop_if_not_inherits(sheet_abcd, "character")
stop_if_sheet_not_found(sheet_abcd, path_abcd)

if (!is.null(remove_inactive_companies)) {
stop_if_not_length(remove_inactive_companies, 1L)
stop_if_not_inherits(remove_inactive_companies, "logical")
}

stop_if_not_length(start_year, 1L)
stop_if_not_inherits(start_year, "integer")

stop_if_not_length(time_frame, 1L)
stop_if_not_inherits(time_frame, "integer")


# load data----
abcd <- read_abcd_raw(path_abcd, sheet_abcd)
stop_if_not_expected_columns(abcd, cols_abcd, desc = "ABCD")

# optional: remove inactive companies----

# (1) remove company-sector combinations where production in t5 = 0 when
# it was greater than 0 in t0.
# (2) remove company-sector combinations where production is 0 for the entire
# time frame from t0 to t5.
rm_inactive_companies <- function(data,
start_year,
time_frame) {
comp_sec_no_prod_t5 <- data %>%
dplyr::filter(
.data[["year"]] %in% c(.env[["start_year"]], .env[["start_year"]] + .env[["time_frame"]])
) %>%
dplyr::summarise(
sum_production = sum(.data[["production"]], na.rm = TRUE),
.by = c("name_company", "sector", "year")
) %>%
tidyr::pivot_wider(
names_from = "year",
names_prefix = "prod_",
values_from = "sum_production"
) %>%
dplyr::filter(
.data[[paste0("prod_", start_year)]] > 0,
.data[[paste0("prod_", start_year + time_frame)]] == 0
) %>%
dplyr::distinct(
.data[["name_company"]],
.data[["sector"]]
)

comp_sec_no_prod_t0_to_t5 <- data %>%
dplyr::filter(
.data[["year"]] %in% c(.env[["start_year"]], .env[["start_year"]] + .env[["time_frame"]])
) %>%
dplyr::summarise(
sum_production = sum(.data[["production"]], na.rm = TRUE),
.by = c("name_company", "sector")
) %>%
dplyr::filter(
.data[["sum_production"]] == 0
) %>%
dplyr::distinct(
.data[["name_company"]],
.data[["sector"]]
)

data <- data %>%
dplyr::anti_join(
comp_sec_no_prod_t5,
by = c("name_company", "sector")
) %>%
dplyr::anti_join(
comp_sec_no_prod_t0_to_t5,
by = c("name_company", "sector")
)

return(data)
}

if (remove_inactive_companies) {
abcd_keep <- abcd %>%
rm_inactive_companies(
start_year = start_year,
time_frame = time_frame
)

abcd_removed <- abcd %>%
dplyr::anti_join(
abcd_keep,
by = c("company_id", "sector")
)

# write removed inactive companies to file for inspection
abcd_removed %>%
readr::write_csv(
file.path(abcd_dir, "abcd_removed_inactive_companies.csv"),
na = ""
)

abcd <- abcd_keep

rm(abcd_keep)
}

# write final version of abcd to file for use PACTA analysis
abcd %>%
readr::write_csv(
file.path(abcd_dir, "abcd_final.csv"),
na = ""
)
remove_inactive_companies(config)
prepare_sector_split(config)
}
29 changes: 29 additions & 0 deletions R/prioritise_and_diagnose.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#' Prioritise and diagnose the loan book data sets used in the PACTA for Supervisors analysis
#'
#' @description
#' `prioritise_and_diagnose()` runs the necessary steps to prioritise the matched
#' loan books and diagnose both the match success rate and the coverage of the
#' real economy assets by the matched loan books.
#' Parameters for all steps are read from a `config.yml` file. The function is
#' called for its side effects and writes the prepared and diagnosed data sets
#' to the directory `output/prioritise_and_diagnose`, where `output` is the
#' output directory specified in the `config.yml`.
#'
#' `prioritise_and_diagnose()` and `prioritize_and_diagnose()` are synonyms.
#'
#' @param config either a path to a config.yml file or a list of parameters
#'
#' @return NULL
#' @export
#'
#' @examples
#' # TODO
prioritise_and_diagnose <- function(config) {
run_match_prioritize(config)
run_calculate_match_success_rate(config)
run_calculate_loanbook_coverage(config)
}

#' @rdname prioritise_and_diagnose
#' @export
prioritize_and_diagnose <- prioritise_and_diagnose
128 changes: 128 additions & 0 deletions R/remove_inactive_companies.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
remove_inactive_companies <- function(config) {
config <- load_config(config)

abcd_dir <- get_abcd_dir(config)
path_abcd <- get_abcd_path(config)
sheet_abcd <- get_abcd_sheet(config)

remove_inactive_companies <- get_remove_inactive_companies(config)
start_year <- get_start_year(config)
time_frame <- get_time_frame(config)

# validate config values----
stop_if_not_length(path_abcd, 1L)
stop_if_not_inherits(path_abcd, "character")
stop_if_file_not_found(path_abcd, desc = "ABCD")

stop_if_not_length(sheet_abcd, 1L)
stop_if_not_inherits(sheet_abcd, "character")
stop_if_sheet_not_found(sheet_abcd, path_abcd)

if (!is.null(remove_inactive_companies)) {
stop_if_not_length(remove_inactive_companies, 1L)
stop_if_not_inherits(remove_inactive_companies, "logical")
}

stop_if_not_length(start_year, 1L)
stop_if_not_inherits(start_year, "integer")

stop_if_not_length(time_frame, 1L)
stop_if_not_inherits(time_frame, "integer")


# load data----
abcd <- read_abcd_raw(path_abcd, sheet_abcd)
stop_if_not_expected_columns(abcd, cols_abcd, desc = "ABCD")

# optional: remove inactive companies----

# (1) remove company-sector combinations where production in t5 = 0 when
# it was greater than 0 in t0.
# (2) remove company-sector combinations where production is 0 for the entire
# time frame from t0 to t5.
rm_inactive_companies <- function(data,
start_year,
time_frame) {
comp_sec_no_prod_t5 <- data %>%
dplyr::filter(
.data[["year"]] %in% c(.env[["start_year"]], .env[["start_year"]] + .env[["time_frame"]])
) %>%
dplyr::summarise(
sum_production = sum(.data[["production"]], na.rm = TRUE),
.by = c("name_company", "sector", "year")
) %>%
tidyr::pivot_wider(
names_from = "year",
names_prefix = "prod_",
values_from = "sum_production"
) %>%
dplyr::filter(
.data[[paste0("prod_", start_year)]] > 0,
.data[[paste0("prod_", start_year + time_frame)]] == 0
) %>%
dplyr::distinct(
.data[["name_company"]],
.data[["sector"]]
)

comp_sec_no_prod_t0_to_t5 <- data %>%
dplyr::filter(
.data[["year"]] %in% c(.env[["start_year"]], .env[["start_year"]] + .env[["time_frame"]])
) %>%
dplyr::summarise(
sum_production = sum(.data[["production"]], na.rm = TRUE),
.by = c("name_company", "sector")
) %>%
dplyr::filter(
.data[["sum_production"]] == 0
) %>%
dplyr::distinct(
.data[["name_company"]],
.data[["sector"]]
)

data <- data %>%
dplyr::anti_join(
comp_sec_no_prod_t5,
by = c("name_company", "sector")
) %>%
dplyr::anti_join(
comp_sec_no_prod_t0_to_t5,
by = c("name_company", "sector")
)

return(data)
}

if (remove_inactive_companies) {
abcd_keep <- abcd %>%
rm_inactive_companies(
start_year = start_year,
time_frame = time_frame
)

abcd_removed <- abcd %>%
dplyr::anti_join(
abcd_keep,
by = c("company_id", "sector")
)

# write removed inactive companies to file for inspection
abcd_removed %>%
readr::write_csv(
file.path(abcd_dir, "abcd_removed_inactive_companies.csv"),
na = ""
)

abcd <- abcd_keep

rm(abcd_keep)
}

# write final version of abcd to file for use PACTA analysis
abcd %>%
readr::write_csv(
file.path(abcd_dir, "abcd_final.csv"),
na = ""
)
}
4 changes: 4 additions & 0 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ reference:
- title: Analysis
desc: Run the analysis.
- contents:
- prepare_abcd
- match_loanbooks
- prioritise_and_diagnose
- analyse
- run_pacta

- title: Plotting
Expand Down
Loading

0 comments on commit b58c728

Please sign in to comment.