diff --git a/NEWS.md b/NEWS.md index 226629b7..9990ed9e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,7 +1,10 @@ # r2dii.analysis (development version) * `target_market_share()` now correctly outputs `technology_share` with - multiple loans to the same company (@georgeharris2deg #262). + multiple loans to the same company (@georgeharris2deg #262, @ab-bbva #265). + +* `target_market_share()` now errors if input `data` has an unexpected column + (@georgeharris2deg #267). # r2dii.analysis 0.1.4 diff --git a/R/target_market_share.R b/R/target_market_share.R index bb7ff169..f3b2b346 100644 --- a/R/target_market_share.R +++ b/R/target_market_share.R @@ -94,7 +94,7 @@ target_market_share <- function(data, } data <- ungroup(warn_grouped(data, "Ungrouping input data.")) - + check_unexpected_columns(data) data <- aggregate_by_loan_id(data) crucial_scenario <- c("scenario", "tmsr", "smsp") @@ -390,10 +390,15 @@ reweight_technology_share <- function(data, ...) { } aggregate_by_loan_id <- function(data) { - aggregate_columns <- c("id_loan", "loan_size_outstanding", "loan_size_credit_limit") data %>% - dplyr::group_by_at(setdiff(names(data), aggregate_columns)) %>% + group_by( + .data$level, + .data$loan_size_outstanding_currency, + .data$loan_size_credit_limit_currency, + .data$name_ald, + .data$sector_ald + ) %>% summarize( id_loan = first(.data$id_loan), loan_size_outstanding = sum(.data$loan_size_outstanding), @@ -401,3 +406,49 @@ aggregate_by_loan_id <- function(data) { ) %>% ungroup() } + +check_unexpected_columns <- function(data) { + + possible_matched_columns <- c( + "id_loan", + "id_direct_loantaker", + "name_direct_loantaker", + "id_intermediate_parent_1", + "name_intermediate_parent_1", + "id_ultimate_parent", + "name_ultimate_parent", + "loan_size_outstanding", + "loan_size_outstanding_currency", + "loan_size_credit_limit", + "loan_size_credit_limit_currency", + "sector_classification_system", + "sector_classification_input_type", + "sector_classification_direct_loantaker", + "fi_type", + "flag_project_finance_loan", + "name_project", + "lei_direct_loantaker", + "isin_direct_loantaker", + "id_2dii", + "level", + "sector", + "sector_ald", + "name", + "name_ald", + "score", + "source", + "borderline" + ) + +unexpected_names <- setdiff(names(data), possible_matched_columns) + + if (length(unexpected_names) != 0) { + abort( + glue("Loanbook has unexpected names: `{unexpected_names}`."), + class = "unexpected_names" + ) + } + + invisible(data) + +} diff --git a/tests/testthat/test-target_market_share.R b/tests/testthat/test-target_market_share.R index 99d5f02b..3b089964 100644 --- a/tests/testthat/test-target_market_share.R +++ b/tests/testthat/test-target_market_share.R @@ -676,3 +676,68 @@ test_that("for one company with multiple loans of different size, unweighted expect_equal(projected$production, fake_ald()$production) }) + +test_that("with bad column errors with informative message (#267)", { + + bad_matched <- fake_matched( + bad_column = "bad" + ) + + expect_error( + class = "unexpected_names", + target_market_share( + bad_matched, + fake_ald(), + fake_scenario() + ) + ) +}) + +test_that("`technology_share` outputs consistently when multiple + direct_loantakers match to a single company (#265)", { + + matched <- fake_matched( + id_loan = c("L1", "L2", "L3", "L4", "L5"), + name_ald = c(rep("company a", 4), "company b") + ) + + matched_split_dl <- matched %>% + mutate(name_direct_loantaker = c("company a1", "company a2", "company a3", "company a4", "company b")) + + ald <- fake_ald( + name_company = rep(c("company a","company b"), each = 2), + technology = rep(c("ice", "electric"), 2), + production = c(8, 2, 15, 5) + ) + + scenario <- fake_scenario( + technology = c("ice", "electric") + ) + + out <- target_market_share( + matched, + ald, + scenario, + region_isos_stable + ) %>% + filter( + metric == "projected", + year == 2025, + technology == "ice" + ) + + out_split_dl <- target_market_share( + matched_split_dl, + ald, + scenario, + region_isos_stable + ) %>% + filter( + metric == "projected", + year == 2025, + technology == "ice" + ) + + expect_equal(out$technology_share, out_split_dl$technology_share) + +})