Skip to content

Commit

Permalink
Improve data masking (#138)
Browse files Browse the repository at this point in the history
* robust data masking

* more robust data masking

* robust data masking in tests
  • Loading branch information
jacobvjk authored Sep 18, 2024
1 parent 847758e commit b436300
Show file tree
Hide file tree
Showing 22 changed files with 244 additions and 245 deletions.
10 changes: 5 additions & 5 deletions R/plot_aggregate_loanbooks.R
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ plot_aggregate_loanbooks <- function(config) {
### scatter plot alignment by exposure and sector comparison----
year_scatter_alignment_exposure <- 2027
region_scatter_alignment_exposure <- region_select
currency <- unique(company_aggregated_alignment_net$loan_size_outstanding_currency)
currency <- unique(company_aggregated_alignment_net[["loan_size_outstanding_currency"]])
if (length(by_group) <= 1) {
if (
nrow(loanbook_exposure_aggregated_alignment_net) > 0
Expand Down Expand Up @@ -533,7 +533,7 @@ plot_aggregate_loanbooks <- function(config) {
if (length(by_group) == 1) {
unique_by_group <- company_aggregated_alignment_bo_po %>%
dplyr::filter(
.data$sector == .env$sector_scatter,
.data[["sector"]] == .env[["sector_scatter"]],
!grepl("benchmark_corporate_economy_", !!rlang::sym(by_group))
) %>%
dplyr::pull(!!rlang::sym(by_group)) %>%
Expand Down Expand Up @@ -596,7 +596,7 @@ plot_aggregate_loanbooks <- function(config) {
if (length(by_group) == 1) {
unique_by_group <- company_aggregated_alignment_bo_po %>%
dplyr::filter(
.data$sector == .env$sector_scatter,
.data[["sector"]] == .env[["sector_scatter"]],
!grepl("benchmark_corporate_economy_", !!rlang::sym(by_group))
) %>%
dplyr::pull(!!rlang::sym(by_group)) %>%
Expand Down Expand Up @@ -663,7 +663,7 @@ plot_aggregate_loanbooks <- function(config) {
if (length(by_group) == 1) {
unique_by_group <- company_aggregated_alignment_bo_po %>%
dplyr::filter(
.data$sector == .env$sector_scatter,
.data[["sector"]] == .env[["sector_scatter"]],
!grepl("benchmark_corporate_economy_", !!rlang::sym(by_group))
) %>%
dplyr::pull(!!rlang::sym(by_group)) %>%
Expand Down Expand Up @@ -726,7 +726,7 @@ plot_aggregate_loanbooks <- function(config) {
if (length(by_group) == 1) {
unique_by_group <- company_aggregated_alignment_bo_po %>%
dplyr::filter(
.data$sector == .env$sector_scatter,
.data[["sector"]] == .env[["sector_scatter"]],
!grepl("benchmark_corporate_economy_", !!rlang::sym(by_group))
) %>%
dplyr::pull(!!rlang::sym(by_group)) %>%
Expand Down
20 changes: 10 additions & 10 deletions R/plot_sankey.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,12 @@ plot_sankey <- function(data,
data_links <- data %>%
dplyr::mutate(
group_var = r2dii.plot::to_title(!!rlang::sym(group_var)),
middle_node = r2dii.plot::to_title(.data$middle_node)
middle_node = r2dii.plot::to_title(.data[["middle_node"]])
)
if ("middle_node2" %in% names(data_links)) {
data_links <- data_links %>%
dplyr::mutate(
middle_node2 = r2dii.plot::to_title(.data$middle_node2)
middle_node2 = r2dii.plot::to_title(.data[["middle_node2"]])
)
}
} else {
Expand All @@ -62,7 +62,7 @@ plot_sankey <- function(data,

links_1 <- data_links %>%
dplyr::select(
source = .env$group_var,
source = .env[["group_var"]],
target = "middle_node",
value = "loan_size_outstanding",
group = "is_aligned"
Expand All @@ -71,7 +71,7 @@ plot_sankey <- function(data,
if ("middle_node2" %in% names(data_links)) {
links_2 <- data_links %>%
dplyr::select(
.env$group_var,
.env[["group_var"]],
source = "middle_node",
target = "middle_node2",
value = "loan_size_outstanding",
Expand All @@ -80,7 +80,7 @@ plot_sankey <- function(data,

links_3 <- data_links %>%
dplyr::select(
.env$group_var,
.env[["group_var"]],
source = "middle_node2",
target = "is_aligned",
value = "loan_size_outstanding",
Expand All @@ -91,7 +91,7 @@ plot_sankey <- function(data,
} else {
links_2 <- data_links %>%
dplyr::select(
.env$group_var,
.env[["group_var"]],
source = "middle_node",
target = "is_aligned",
value = "loan_size_outstanding",
Expand All @@ -102,10 +102,10 @@ plot_sankey <- function(data,
}

links <- links %>%
dplyr::group_by(.data$source, .data$target, .data$group) %>%
dplyr::summarise(value = sum(.data$value, na.rm = TRUE)) %>%
dplyr::group_by(.data[["source"]], .data[["target"]], .data[["group"]]) %>%
dplyr::summarise(value = sum(.data[["value"]], na.rm = TRUE)) %>%
dplyr::ungroup() %>%
dplyr::arrange(.data$source, .data$group) %>%
dplyr::arrange(.data[["source"]], .data[["group"]]) %>%
as.data.frame()

# TODO: colour the companies if fully aligned or not
Expand All @@ -114,7 +114,7 @@ plot_sankey <- function(data,
) %>%
dplyr::mutate(
group = dplyr::case_when(
.data$name %in% c("Aligned", "Not aligned", "Unknown") ~ .data$name,
.data[["name"]] %in% c("Aligned", "Not aligned", "Unknown") ~ .data[["name"]],
TRUE ~ "other"
)
)
Expand Down
16 changes: 8 additions & 8 deletions R/plot_scatter.R
Original file line number Diff line number Diff line change
Expand Up @@ -85,18 +85,18 @@ plot_scatter <- function(data,
if (!is.null(floor_outliers)) {
data <- data %>%
dplyr::mutate(
buildout = dplyr::if_else(.data$buildout <= .env$floor_outliers, .env$floor_outliers, .data$buildout),
phaseout = dplyr::if_else(.data$phaseout <= .env$floor_outliers, .env$floor_outliers, .data$phaseout),
net = dplyr::if_else(.data$net <= .env$floor_outliers, .env$floor_outliers, .data$net)
buildout = dplyr::if_else(.data[["buildout"]] <= .env[["floor_outliers"]], .env[["floor_outliers"]], .data[["buildout"]]),
phaseout = dplyr::if_else(.data[["phaseout"]] <= .env[["floor_outliers"]], .env[["floor_outliers"]], .data[["phaseout"]]),
net = dplyr::if_else(.data[["net"]] <= .env[["floor_outliers"]], .env[["floor_outliers"]], .data[["net"]])
)
subtitle <- glue::glue("{subtitle}\nThe outliers are displayed on the borders of the plot.", .trim = FALSE)
}
if (!is.null(cap_outliers)) {
data <- data %>%
dplyr::mutate(
buildout = dplyr::if_else(.data$buildout >= .env$cap_outliers, .env$cap_outliers, .data$buildout),
phaseout = dplyr::if_else(.data$phaseout >= .env$cap_outliers, .env$cap_outliers, .data$phaseout),
net = dplyr::if_else(.data$net >= .env$cap_outliers, .env$cap_outliers, .data$net)
buildout = dplyr::if_else(.data[["buildout"]] >= .env[["cap_outliers"]], .env[["cap_outliers"]], .data[["buildout"]]),
phaseout = dplyr::if_else(.data[["phaseout"]] >= .env[["cap_outliers"]], .env[["cap_outliers"]], .data[["phaseout"]]),
net = dplyr::if_else(.data[["net"]] >= .env[["cap_outliers"]], .env[["cap_outliers"]], .data[["net"]])
)
if (is.null(floor_outliers)) {
subtitle <- glue::glue("{subtitle}\nThe outliers are displayed on the borders of the plot.", .trim = FALSE)
Expand All @@ -113,7 +113,7 @@ plot_scatter <- function(data,
net = c(0, 0, 0)
)

p <- ggplot2::ggplot(data, ggplot2::aes(x = .data$buildout, y = .data$phaseout, colour = .data$net)) +
p <- ggplot2::ggplot(data, ggplot2::aes(x = .data[["buildout"]], y = .data[["phaseout"]], colour = .data[["net"]])) +
ggplot2::geom_hline(yintercept = 0, colour = "#c0c0c0") +
ggplot2::geom_vline(xintercept = 0, colour = "#c0c0c0") +
ggplot2::geom_line(data = data_net_0) +
Expand Down Expand Up @@ -162,7 +162,7 @@ plot_scatter <- function(data,
size = 3,
hjust = 0
) +
ggplot2::geom_point(ggplot2::aes(shape = .data$datapoint)) +
ggplot2::geom_point(ggplot2::aes(shape = .data[["datapoint"]])) +
ggplot2::scale_x_continuous(
name = "Deviation from scenario value\nfor low-carbon technologies build-out",
labels = scales::percent,
Expand Down
18 changes: 9 additions & 9 deletions R/plot_scatter_alignment_exposure.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ plot_scatter_alignment_exposure <- function(data,
data <- data %>%
dplyr::mutate(
exposure_weighted_net_alignment = dplyr::if_else(
.data$exposure_weighted_net_alignment <= .env$floor_outliers,
.env$floor_outliers,
.data$exposure_weighted_net_alignment
.data[["exposure_weighted_net_alignment"]] <= .env[["floor_outliers"]],
.env[["floor_outliers"]],
.data[["exposure_weighted_net_alignment"]]
)
)
}
Expand All @@ -51,9 +51,9 @@ plot_scatter_alignment_exposure <- function(data,
data <- data %>%
dplyr::mutate(
exposure_weighted_net_alignment = dplyr::if_else(
.data$exposure_weighted_net_alignment >= .env$cap_outliers,
.env$cap_outliers,
.data$exposure_weighted_net_alignment
.data[["exposure_weighted_net_alignment"]] >= .env[["cap_outliers"]],
.env[["cap_outliers"]],
.data[["exposure_weighted_net_alignment"]]
)
)
}
Expand All @@ -68,11 +68,11 @@ plot_scatter_alignment_exposure <- function(data,
}

plot <- data %>%
dplyr::mutate(sector = tools::toTitleCase(.data$sector)) %>%
dplyr::mutate(sector = tools::toTitleCase(.data[["sector"]])) %>%
ggplot2::ggplot(
ggplot2::aes(
x = .data$sum_loan_size_outstanding,
y = .data$exposure_weighted_net_alignment,
x = .data[["sum_loan_size_outstanding"]],
y = .data[["exposure_weighted_net_alignment"]],
color = !!rlang::sym(group_var)
)
) +
Expand Down
20 changes: 10 additions & 10 deletions R/plot_scatter_animated.R
Original file line number Diff line number Diff line change
Expand Up @@ -81,25 +81,25 @@ plot_scatter_animated <- function(data,
if (!is.null(floor_outliers)) {
data <- data %>%
dplyr::mutate(
buildout = dplyr::if_else(.data$buildout <= .env$floor_outliers, .env$floor_outliers, .data$buildout),
phaseout = dplyr::if_else(.data$phaseout <= .env$floor_outliers, .env$floor_outliers, .data$phaseout),
buildout = dplyr::if_else(.data[["buildout"]] <= .env[["floor_outliers"]], .env[["floor_outliers"]], .data[["buildout"]]),
phaseout = dplyr::if_else(.data[["phaseout"]] <= .env[["floor_outliers"]], .env[["floor_outliers"]], .data[["phaseout"]]),
net = dplyr::if_else(
.data$buildout <= .env$floor_outliers | .data$phaseout <= .env$floor_outliers,
.data$buildout + .data$phaseout,
.data$net
.data[["buildout"]] <= .env[["floor_outliers"]] | .data[["phaseout"]] <= .env[["floor_outliers"]],
.data[["buildout"]] + .data[["phaseout"]],
.data[["net"]]
) # net is a sum of buildout and phaseout
)
subtitle <- glue::glue("{subtitle}\nThe outliers are displayed on the borders of the plot.", .trim = FALSE)
}
if (!is.null(cap_outliers)) {
data <- data %>%
dplyr::mutate(
buildout = dplyr::if_else(.data$buildout >= .env$cap_outliers, .env$cap_outliers, .data$buildout),
phaseout = dplyr::if_else(.data$phaseout >= .env$cap_outliers, .env$cap_outliers, .data$phaseout),
buildout = dplyr::if_else(.data[["buildout"]] >= .env[["cap_outliers"]], .env[["cap_outliers"]], .data[["buildout"]]),
phaseout = dplyr::if_else(.data[["phaseout"]] >= .env[["cap_outliers"]], .env[["cap_outliers"]], .data[["phaseout"]]),
net = dplyr::if_else(
.data$buildout >= .env$cap_outliers | .data$phaseout >= .env$cap_outliers,
.data$buildout + .data$phaseout,
.data$net
.data[["buildout"]] >= .env[["cap_outliers"]] | .data[["phaseout"]] >= .env[["cap_outliers"]],
.data[["buildout"]] + .data[["phaseout"]],
.data[["net"]]
) # net is a sum of buildout and phaseout
)
if (is.null(floor_outliers)) {
Expand Down
48 changes: 24 additions & 24 deletions R/plots.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ plot_match_success_rate <- function(data,
stop_if_not_inherits(currency, "character")

data <- data %>%
dplyr::filter(.data$sector != "not in scope") %>%
dplyr::filter(.data$metric_type == .env$metric_type) %>%
dplyr::filter(.data$match_success_type == .env$match_success_type)
dplyr::filter(.data[["sector"]] != "not in scope") %>%
dplyr::filter(.data[["metric_type"]] == .env[["metric_type"]]) %>%
dplyr::filter(.data[["match_success_type"]] == .env[["match_success_type"]])

# plot design
fill_scale <- c(
Expand Down Expand Up @@ -139,30 +139,30 @@ generate_individual_outputs <- function(data,

data <- data %>%
dplyr::filter(
.data[[by_group]] == .env$by_group_value,
.data[["scenario_source"]] == .env$scenario_source,
.data[["region"]] == .env$region,
.data[["sector"]] %in% .env$sector
.data[[by_group]] == .env[["by_group_value"]],
.data[["scenario_source"]] == .env[["scenario_source"]],
.data[["region"]] == .env[["region"]],
.data[["sector"]] %in% .env[["sector"]]
)

matched_prioritized <- matched_prioritized %>%
dplyr::filter(
.data[[by_group]] == .env$by_group_value,
.data[["sector"]] %in% .env$sector
.data[[by_group]] == .env[["by_group_value"]],
.data[["sector"]] %in% .env[["sector"]]
)

if (target_type == "tms") {
# plot tech mix for given sector
data_techmix <- data %>%
dplyr::filter(
.data[["metric"]] %in% c("projected", "corporate_economy", .env$target_scenario),
dplyr::between(.data[["year"]], .env$start_year, .env$start_year + .env$time_horizon)
.data[["metric"]] %in% c("projected", "corporate_economy", .env[["target_scenario"]]),
dplyr::between(.data[["year"]], .env[["start_year"]], .env[["start_year"]] + .env[["time_horizon"]])
) %>%
dplyr::mutate(
label = dplyr::case_when(
.data[["metric"]] == "projected" ~ "Portfolio",
.data[["metric"]] == "corporate_economy" ~ "Corporate Economy",
.data[["metric"]] == .env$target_scenario ~ glue::glue("{r2dii.plot::to_title(toupper(.env$scenario))} Scenario")
.data[["metric"]] == .env[["target_scenario"]] ~ glue::glue("{r2dii.plot::to_title(toupper(.env$scenario))} Scenario")
)
) %>%
r2dii.plot::prep_techmix(
Expand Down Expand Up @@ -207,13 +207,13 @@ generate_individual_outputs <- function(data,

# plot trajectory charts for all available techs in given sector
technologies_in_sector <- r2dii.data::increasing_or_decreasing %>%
dplyr::filter(.data[["sector"]] == .env$sector) %>%
dplyr::filter(.data[["sector"]] == .env[["sector"]]) %>%
dplyr::pull(.data[["technology"]])

technologies_to_plot <- data %>%
dplyr::filter(
.data[["metric"]] == .env$target_scenario,
.data[["technology"]] %in% .env$technologies_in_sector
.data[["metric"]] == .env[["target_scenario"]],
.data[["technology"]] %in% .env[["technologies_in_sector"]]
) %>%
dplyr::distinct(.data[["technology"]]) %>%
dplyr::arrange(.data[["technology"]]) %>%
Expand All @@ -222,8 +222,8 @@ generate_individual_outputs <- function(data,
for (i in 1:length(technologies_to_plot)) {
data_trajectory <- data %>%
dplyr::filter(
.data[["technology"]] == .env$technologies_to_plot[i],
dplyr::between(.data[["year"]], .env$start_year, .env$start_year + .env$time_horizon)
.data[["technology"]] == .env[["technologies_to_plot"]][i],
dplyr::between(.data[["year"]], .env[["start_year"]], .env[["start_year"]] + .env[["time_horizon"]])
) %>%
r2dii.plot::prep_trajectory(
convert_label = r2dii.plot::recode_metric_trajectory,
Expand Down Expand Up @@ -271,15 +271,15 @@ generate_individual_outputs <- function(data,
dplyr::filter(
dplyr::between(
.data[["year"]],
.env$start_year,
.env$start_year + .env$time_horizon)
.env[["start_year"]],
.env[["start_year"]] + .env[["time_horizon"]])
) %>%
dplyr::filter(
.data[["emission_factor_metric"]] %in% c(
"projected",
"corporate_economy",
.env$target_scenario,
.env$adjusted_scenario
.env[["target_scenario"]],
.env[["adjusted_scenario"]]
)
) %>%
dplyr::mutate(
Expand All @@ -288,8 +288,8 @@ generate_individual_outputs <- function(data,
levels = c(
"projected",
"corporate_economy",
.env$target_scenario,
.env$adjusted_scenario
.env[["target_scenario"]],
.env[["adjusted_scenario"]]
)
)
) %>%
Expand Down Expand Up @@ -350,7 +350,7 @@ generate_individual_outputs <- function(data,
dplyr::select(
dplyr::all_of(
c(
.env$by_group, "name_abcd", "sector_abcd", "loan_size_outstanding",
.env[["by_group"]], "name_abcd", "sector_abcd", "loan_size_outstanding",
"loan_size_outstanding_currency", "loan_size_credit_limit",
"loan_size_credit_limit_currency"
)
Expand Down
Loading

0 comments on commit b436300

Please sign in to comment.