diff --git a/R/target_sda.R b/R/target_sda.R index 3dc4a261..655774a8 100644 --- a/R/target_sda.R +++ b/R/target_sda.R @@ -106,6 +106,7 @@ target_sda <- function(data, ) check_crucial_names(data, crucial_portfolio) + check_unique_id(data, "id_loan") walk(crucial_portfolio, ~ check_no_value_is_missing(data, .x)) check_crucial_names(ald, crucial_ald) @@ -120,11 +121,13 @@ target_sda <- function(data, by_company ) - loanbook_with_weighted_emission_factors <- data %>% - calculate_weighted_emission_factor(ald, - !!!rlang::syms(loanbook_summary_groups), - use_credit_limit = use_credit_limit - ) + loanbook_with_weighted_emission_factors <- calculate_weighted_emission_factor( + data, + ald, + !!!rlang::syms(loanbook_summary_groups), + use_credit_limit = use_credit_limit, + by_company = by_company + ) if (identical(nrow(loanbook_with_weighted_emission_factors), 0L)) { rlang::warn("Found no match between loanbook and ald.") @@ -183,10 +186,14 @@ maybe_add_name_ald <- function(data, by_company = FALSE) { calculate_weighted_emission_factor <- function(data, ald, ..., - use_credit_limit = FALSE) { + use_credit_limit = FALSE, + by_company = FALSE) { data %>% inner_join(ald, by = ald_columns()) %>% - add_loan_weighted_emission_factor(use_credit_limit = use_credit_limit) %>% + add_loan_weighted_emission_factor( + use_credit_limit = use_credit_limit, + by_company = by_company + ) %>% group_by(...) %>% summarize( emission_factor_projected = sum(.data$weighted_loan_emission_factor) @@ -195,25 +202,30 @@ calculate_weighted_emission_factor <- function(data, rename(sector = .data$sector_ald) } -add_loan_weighted_emission_factor <- function(data, use_credit_limit) { - loan_size <- paste0( - "loan_size_", ifelse(use_credit_limit, "credit_limit", "outstanding") - ) +add_loan_weighted_emission_factor <- function(data, use_credit_limit, by_company = FALSE) { + if (by_company) { + data %>% + mutate(weighted_loan_emission_factor = .data$emission_factor) + } else { + loan_size <- paste0( + "loan_size_", ifelse(use_credit_limit, "credit_limit", "outstanding") + ) - distinct_loans_by_sector <- data %>% - ungroup() %>% - group_by(.data$sector_ald) %>% - distinct(.data$id_loan, .data[[loan_size]]) + distinct_loans_by_sector <- data %>% + ungroup() %>% + group_by(.data$sector_ald) %>% + distinct(.data$id_loan, .data[[loan_size]]) - total_size_by_sector <- distinct_loans_by_sector %>% - summarize(total_size = sum(.data[[loan_size]])) + total_size_by_sector <- distinct_loans_by_sector %>% + summarize(total_size = sum(.data[[loan_size]])) - data %>% - left_join(total_size_by_sector, by = "sector_ald") %>% - mutate( - loan_weight = .data[[loan_size]] / .data$total_size, - weighted_loan_emission_factor = .data$emission_factor * .data$loan_weight - ) + data %>% + left_join(total_size_by_sector, by = "sector_ald") %>% + mutate( + loan_weight = .data[[loan_size]] / .data$total_size, + weighted_loan_emission_factor = .data$emission_factor * .data$loan_weight + ) + } } calculate_market_average <- function(data) { diff --git a/R/utils.R b/R/utils.R index 1571d4dc..10fbe047 100644 --- a/R/utils.R +++ b/R/utils.R @@ -9,6 +9,17 @@ check_no_value_is_missing <- function(data, column) { invisible(data) } +check_unique_id <- function(data, column) { + if (sum(duplicated(data[[column]]))) { + abort( + class = "unique_ids", + sprintf("Column `%s` must not contain any duplicates.", column) + ) + } + + invisible(data) +} + warn_grouped <- function(data, message) { if (dplyr::is_grouped_df(data)) warn(message) diff --git a/tests/testthat/test-target_sda.R b/tests/testthat/test-target_sda.R index 67c1daab..8e325219 100644 --- a/tests/testthat/test-target_sda.R +++ b/tests/testthat/test-target_sda.R @@ -208,6 +208,7 @@ test_that("with known input outputs as expected", { test_that("with known input outputs as expected, at company level (#155)", { matched <- fake_matched( + id_loan = c(1, 2), name_ald = c("shaanxi auto", "company 2"), sector_ald = "cement" ) @@ -261,6 +262,36 @@ test_that("with no matching data warns", { ) }) +test_that("with duplicated id_loan weights emission_factor as expected (#160)", { + match_result <- fake_matched( + id_loan = c(1, 1), + name_ald = rep("large company", 2), + sector_ald = "cement" + ) + + ald <- fake_ald( + sector = "cement", + name_company = "large company", + emission_factor = 2, + year = c(2020, 2025) + ) + + scen <- fake_co2_scenario( + year = c(2020, 2025), + emission_factor = c(1, 0.5) + ) + + expect_error( + target_sda( + match_result, + ald, + scen + ) %>% + filter(year == min(year)), + class = "unique_ids" + ) +}) + test_that("with NAs in crucial columns errors with informative message (#146)", { expect_error_crucial_NAs_portfolio <- function(name) { data <- fake_matched(sector_ald = "cement")