Skip to content

Commit

Permalink
target_sda() fails if id_loan is duplicated (#164)
Browse files Browse the repository at this point in the history
* target_sda failes if id_loan is duplicated
* don't weight emission_factor if by_company = TRUE

Co-authored-by: Mauro Lepore <[email protected]>
  • Loading branch information
jdhoffa and maurolepore authored Aug 6, 2020
1 parent 5509ac2 commit 555b12e
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 23 deletions.
58 changes: 35 additions & 23 deletions R/target_sda.R
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ target_sda <- function(data,
)

check_crucial_names(data, crucial_portfolio)
check_unique_id(data, "id_loan")
walk(crucial_portfolio, ~ check_no_value_is_missing(data, .x))

check_crucial_names(ald, crucial_ald)
Expand All @@ -120,11 +121,13 @@ target_sda <- function(data,
by_company
)

loanbook_with_weighted_emission_factors <- data %>%
calculate_weighted_emission_factor(ald,
!!!rlang::syms(loanbook_summary_groups),
use_credit_limit = use_credit_limit
)
loanbook_with_weighted_emission_factors <- calculate_weighted_emission_factor(
data,
ald,
!!!rlang::syms(loanbook_summary_groups),
use_credit_limit = use_credit_limit,
by_company = by_company
)

if (identical(nrow(loanbook_with_weighted_emission_factors), 0L)) {
rlang::warn("Found no match between loanbook and ald.")
Expand Down Expand Up @@ -183,10 +186,14 @@ maybe_add_name_ald <- function(data, by_company = FALSE) {
calculate_weighted_emission_factor <- function(data,
ald,
...,
use_credit_limit = FALSE) {
use_credit_limit = FALSE,
by_company = FALSE) {
data %>%
inner_join(ald, by = ald_columns()) %>%
add_loan_weighted_emission_factor(use_credit_limit = use_credit_limit) %>%
add_loan_weighted_emission_factor(
use_credit_limit = use_credit_limit,
by_company = by_company
) %>%
group_by(...) %>%
summarize(
emission_factor_projected = sum(.data$weighted_loan_emission_factor)
Expand All @@ -195,25 +202,30 @@ calculate_weighted_emission_factor <- function(data,
rename(sector = .data$sector_ald)
}

add_loan_weighted_emission_factor <- function(data, use_credit_limit) {
loan_size <- paste0(
"loan_size_", ifelse(use_credit_limit, "credit_limit", "outstanding")
)
add_loan_weighted_emission_factor <- function(data, use_credit_limit, by_company = FALSE) {
if (by_company) {
data %>%
mutate(weighted_loan_emission_factor = .data$emission_factor)
} else {
loan_size <- paste0(
"loan_size_", ifelse(use_credit_limit, "credit_limit", "outstanding")
)

distinct_loans_by_sector <- data %>%
ungroup() %>%
group_by(.data$sector_ald) %>%
distinct(.data$id_loan, .data[[loan_size]])
distinct_loans_by_sector <- data %>%
ungroup() %>%
group_by(.data$sector_ald) %>%
distinct(.data$id_loan, .data[[loan_size]])

total_size_by_sector <- distinct_loans_by_sector %>%
summarize(total_size = sum(.data[[loan_size]]))
total_size_by_sector <- distinct_loans_by_sector %>%
summarize(total_size = sum(.data[[loan_size]]))

data %>%
left_join(total_size_by_sector, by = "sector_ald") %>%
mutate(
loan_weight = .data[[loan_size]] / .data$total_size,
weighted_loan_emission_factor = .data$emission_factor * .data$loan_weight
)
data %>%
left_join(total_size_by_sector, by = "sector_ald") %>%
mutate(
loan_weight = .data[[loan_size]] / .data$total_size,
weighted_loan_emission_factor = .data$emission_factor * .data$loan_weight
)
}
}

calculate_market_average <- function(data) {
Expand Down
11 changes: 11 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,17 @@ check_no_value_is_missing <- function(data, column) {
invisible(data)
}

check_unique_id <- function(data, column) {
if (sum(duplicated(data[[column]]))) {
abort(
class = "unique_ids",
sprintf("Column `%s` must not contain any duplicates.", column)
)
}

invisible(data)
}

warn_grouped <- function(data, message) {
if (dplyr::is_grouped_df(data)) warn(message)

Expand Down
31 changes: 31 additions & 0 deletions tests/testthat/test-target_sda.R
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,7 @@ test_that("with known input outputs as expected", {

test_that("with known input outputs as expected, at company level (#155)", {
matched <- fake_matched(
id_loan = c(1, 2),
name_ald = c("shaanxi auto", "company 2"),
sector_ald = "cement"
)
Expand Down Expand Up @@ -261,6 +262,36 @@ test_that("with no matching data warns", {
)
})

test_that("with duplicated id_loan weights emission_factor as expected (#160)", {
match_result <- fake_matched(
id_loan = c(1, 1),
name_ald = rep("large company", 2),
sector_ald = "cement"
)

ald <- fake_ald(
sector = "cement",
name_company = "large company",
emission_factor = 2,
year = c(2020, 2025)
)

scen <- fake_co2_scenario(
year = c(2020, 2025),
emission_factor = c(1, 0.5)
)

expect_error(
target_sda(
match_result,
ald,
scen
) %>%
filter(year == min(year)),
class = "unique_ids"
)
})

test_that("with NAs in crucial columns errors with informative message (#146)", {
expect_error_crucial_NAs_portfolio <- function(name) {
data <- fake_matched(sector_ald = "cement")
Expand Down

0 comments on commit 555b12e

Please sign in to comment.