From 27dc52e38966a67f47b49f16f2f5d65fb60a105c Mon Sep 17 00:00:00 2001 From: Mauro Lepore Date: Fri, 27 Nov 2020 12:49:23 -0600 Subject: [PATCH 1/5] New option allows reserved loanbook columns Closes #326 Closes #327 -- this is a simpler/safer way to solve it. Relates to: * https://github.com/2DegreesInvesting/r2dii.analysis/pull/227 * https://github.com/2DegreesInvesting/r2dii.analysis/pull/230 This change is internal, and motivated by our need to break the dependency between r2dii.data and its reverse dependencies. The problem we are trying to solve is this: r2dii.data provides data that is used in some functions if its reverse dependencies -- like r2dii.match. Concretely, changes in the sector classification dataset (in r2dii.data) make tests to fail in reverse dependencies like r2dii.match that store a snapshot of expected output (e.g. of match_name()). This commit allows us to use generate such snaphsots with a frozen loanbook, and thus make r2dii packages less prone to meaningless errors when CRAN checks for reverse dependencies. --- R/match_name.R | 6 +++++- man/prioritize.Rd | 11 +++-------- tests/testthat/test-match_name.R | 9 +++++++++ 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/R/match_name.R b/R/match_name.R index d67b0a50..0c2e5c16 100644 --- a/R/match_name.R +++ b/R/match_name.R @@ -94,7 +94,7 @@ match_name_impl <- function(loanbook, old_groups <- dplyr::groups(loanbook) loanbook <- ungroup(loanbook) - abort_reserved_column(loanbook) + if (!allow_reserved_columns()) abort_reserved_column(loanbook) loanbook_rowid <- tibble::rowid_to_column(loanbook) prep_lbk <- restructure_loanbook(loanbook_rowid, overwrite = overwrite) @@ -159,6 +159,10 @@ match_name_impl <- function(loanbook, matched } +allow_reserved_columns <- function() { + isTRUE(getOption("allow_reserved_columns")) +} + abort_reserved_column <- function(data) { reserved_chr <- c("alias", "rowid", "sector") is_reserved <- names(data) %in% reserved_chr diff --git a/man/prioritize.Rd b/man/prioritize.Rd index c99642dd..f0c03ff7 100644 --- a/man/prioritize.Rd +++ b/man/prioritize.Rd @@ -39,17 +39,12 @@ matched \%>\% Compare, edit, and save the data manually: \itemize{ -\item Open \emph{matched.csv} with any spreadsheet editor (Excel, Google -Sheets, etc.). -\item Compare the columns \code{name} and \code{name_ald} manually to determine if -the match is valid. Other information can be used in conjunction -with just the names to ensure the two entities match (sector, -internal information on the company structure, etc.) +\item Open \emph{matched.csv} with any spreadsheet editor (Excel, Google Sheets, etc.). +\item Compare the columns \code{name} and \code{name_ald} manually to determine if the match is valid. Other information can be used in conjunction with just the names to ensure the two entities match (sector, internal information on the company structure, etc.) \item Edit the data: \itemize{ \item If you are happy with the match, set the \code{score} value to \code{1}. -\item Otherwise set or leave the \code{score} value to anything other than -\code{1}. +\item Otherwise set or leave the \code{score} value to anything other than \code{1}. } \item Save the edited file as, say, \emph{valid_matches.csv}. } diff --git a/tests/testthat/test-match_name.R b/tests/testthat/test-match_name.R index b7c6564f..3cbbfff1 100644 --- a/tests/testthat/test-match_name.R +++ b/tests/testthat/test-match_name.R @@ -587,3 +587,12 @@ test_that("matches any case of ald$name_company, but preserves original case", { # The original uppercase is preserved expect_equal(upp$name_ald, "ALPINE KNITS") }) + +test_that("with relevant options allows loanbook with reserved columns", { + restore <- options(allow_reserved_columns = TRUE) + on.exit(options(restore), add = TRUE, after = FALSE) + + # Must add both `sector` and `borderline` -- match_name errors with just one + lbk <- mutate(fake_lbk(), sector = "a", borderline = FALSE) + expect_no_error(suppressWarnings(match_name(lbk, fake_ald()))) +}) From 92fc4644a8037a92fdb1e195e8d162cb4dd5cd3c Mon Sep 17 00:00:00 2001 From: Mauro Lepore Date: Fri, 27 Nov 2020 13:23:29 -0600 Subject: [PATCH 2/5] Avoid error on R CMD check with R <= 3.4 It's best practice to use on.exit() with add = TRUE and after = FALSE; or instead to use withr::defer(). But add = TRUE was added after 3.4, and I want to avoid a dependency on withr. So I just go with on.exit() and add = TRUE, as this only really matters when you call on.exit() more than once. --- tests/testthat/test-match_name.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-match_name.R b/tests/testthat/test-match_name.R index 3cbbfff1..3405f8af 100644 --- a/tests/testthat/test-match_name.R +++ b/tests/testthat/test-match_name.R @@ -590,7 +590,7 @@ test_that("matches any case of ald$name_company, but preserves original case", { test_that("with relevant options allows loanbook with reserved columns", { restore <- options(allow_reserved_columns = TRUE) - on.exit(options(restore), add = TRUE, after = FALSE) + on.exit(options(restore), add = TRUE) # Must add both `sector` and `borderline` -- match_name errors with just one lbk <- mutate(fake_lbk(), sector = "a", borderline = FALSE) From 671e6a30d77609ee96691952faa21568fdd7977f Mon Sep 17 00:00:00 2001 From: Mauro Lepore Date: Fri, 27 Nov 2020 13:30:41 -0600 Subject: [PATCH 3/5] Explain why suppressWarnings() --- tests/testthat/test-match_name.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test-match_name.R b/tests/testthat/test-match_name.R index 3405f8af..e6c29733 100644 --- a/tests/testthat/test-match_name.R +++ b/tests/testthat/test-match_name.R @@ -594,5 +594,8 @@ test_that("with relevant options allows loanbook with reserved columns", { # Must add both `sector` and `borderline` -- match_name errors with just one lbk <- mutate(fake_lbk(), sector = "a", borderline = FALSE) - expect_no_error(suppressWarnings(match_name(lbk, fake_ald()))) + expect_no_error( + # Don't warn if found no match + suppressWarnings(match_name(lbk, fake_ald())) + ) }) From f6c925ae871f6c34787ea1e5a323dfeb832b2a7f Mon Sep 17 00:00:00 2001 From: Mauro Lepore Date: Fri, 27 Nov 2020 14:28:50 -0600 Subject: [PATCH 4/5] Use naming convention from usethis --- R/match_name.R | 2 +- tests/testthat/test-match_name.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/match_name.R b/R/match_name.R index 0c2e5c16..6280770b 100644 --- a/R/match_name.R +++ b/R/match_name.R @@ -160,7 +160,7 @@ match_name_impl <- function(loanbook, } allow_reserved_columns <- function() { - isTRUE(getOption("allow_reserved_columns")) + isTRUE(getOption("r2dii.match.allow_reserved_columns")) } abort_reserved_column <- function(data) { diff --git a/tests/testthat/test-match_name.R b/tests/testthat/test-match_name.R index e6c29733..0ae3c1f2 100644 --- a/tests/testthat/test-match_name.R +++ b/tests/testthat/test-match_name.R @@ -589,7 +589,7 @@ test_that("matches any case of ald$name_company, but preserves original case", { }) test_that("with relevant options allows loanbook with reserved columns", { - restore <- options(allow_reserved_columns = TRUE) + restore <- options(r2dii.match.allow_reserved_columns = TRUE) on.exit(options(restore), add = TRUE) # Must add both `sector` and `borderline` -- match_name errors with just one From dbd514b550852c66e39c79583b1e393fa328f4eb Mon Sep 17 00:00:00 2001 From: Mauro Lepore Date: Fri, 27 Nov 2020 14:30:45 -0600 Subject: [PATCH 5/5] Revert man/prioritize.Rd --- man/prioritize.Rd | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/man/prioritize.Rd b/man/prioritize.Rd index f0c03ff7..c99642dd 100644 --- a/man/prioritize.Rd +++ b/man/prioritize.Rd @@ -39,12 +39,17 @@ matched \%>\% Compare, edit, and save the data manually: \itemize{ -\item Open \emph{matched.csv} with any spreadsheet editor (Excel, Google Sheets, etc.). -\item Compare the columns \code{name} and \code{name_ald} manually to determine if the match is valid. Other information can be used in conjunction with just the names to ensure the two entities match (sector, internal information on the company structure, etc.) +\item Open \emph{matched.csv} with any spreadsheet editor (Excel, Google +Sheets, etc.). +\item Compare the columns \code{name} and \code{name_ald} manually to determine if +the match is valid. Other information can be used in conjunction +with just the names to ensure the two entities match (sector, +internal information on the company structure, etc.) \item Edit the data: \itemize{ \item If you are happy with the match, set the \code{score} value to \code{1}. -\item Otherwise set or leave the \code{score} value to anything other than \code{1}. +\item Otherwise set or leave the \code{score} value to anything other than +\code{1}. } \item Save the edited file as, say, \emph{valid_matches.csv}. }