From 0e3eb9dc32755da1bbd1cde109c1fcd3c98e1c73 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 11 Aug 2023 15:14:02 -0700 Subject: [PATCH] file path detection in paste_linter (#2074) --- NEWS.md | 1 + R/methods.R | 3 +- R/paste_linter.R | 104 ++++++++++++++++++++++++++++- R/xp_utils.R | 14 ++++ man/paste_linter.Rd | 19 +++++- tests/testthat/test-paste_linter.R | 92 +++++++++++++++++++++++++ 6 files changed, 229 insertions(+), 4 deletions(-) diff --git a/NEWS.md b/NEWS.md index 53074f117..d7b1494d8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -23,6 +23,7 @@ + `yoda_test_linter()` * `sprintf_linter()` is pipe-aware, so that `x %>% sprintf(fmt = "%s")` no longer lints (#1943, @MichaelChirico). * `line_length_linter()` helpfully includes the line length in the lint message (#2057, @MichaelChirico). +* `paste_linter()` gains detection for file paths that are better constructed with `file.path()`, e.g. `paste0(dir, "/", file)` would be better as `file.path(dir, file)` (part of #884, @MichaelChirico). ### New linters diff --git a/R/methods.R b/R/methods.R index b65c9f8c9..58bc9543b 100644 --- a/R/methods.R +++ b/R/methods.R @@ -45,8 +45,7 @@ markdown <- function(x, info, ...) { as.character(x$line_number), ":", as.character(x$column_number), ":", "]", "(", - paste( - sep = "/", + file.path( "https://github.com", info$user, info$repo, diff --git a/R/paste_linter.R b/R/paste_linter.R index ed4f35424..a9346c452 100644 --- a/R/paste_linter.R +++ b/R/paste_linter.R @@ -21,6 +21,8 @@ #' `paste()` with `sep = ""` is not linted. #' @param allow_to_string Logical, default `FALSE`. If `TRUE`, usage of #' `paste()` and `paste0()` with `collapse = ", "` is not linted. +#' @param allow_file_path Logical, default `FALSE`. If `TRUE`, usage of +#' `paste()` and `paste0()` to construct file paths is not linted. #' #' @examples #' # will produce lints @@ -44,6 +46,11 @@ #' linters = paste_linter() #' ) #' +#' lint( +#' text = 'paste0(dir, "/", file)', +#' linters = paste_linter() +#' ) +#' #' # okay #' lint( #' text = 'paste0("a", "b")', @@ -75,9 +82,14 @@ #' linters = paste_linter() #' ) #' +#' lint( +#' text = 'paste0(year, "/", month, "/", day)', +#' linters = paste_linter(allow_file_path = TRUE) +#' ) +#' #' @seealso [linters] for a complete list of linters available in lintr. #' @export -paste_linter <- function(allow_empty_sep = FALSE, allow_to_string = FALSE) { +paste_linter <- function(allow_empty_sep = FALSE, allow_to_string = FALSE, allow_file_path = FALSE) { sep_xpath <- " //SYMBOL_FUNCTION_CALL[text() = 'paste'] /parent::expr @@ -111,6 +123,70 @@ paste_linter <- function(allow_empty_sep = FALSE, allow_to_string = FALSE) { /parent::expr " + slash_str <- sprintf("STR_CONST[%s]", xp_text_in_table(c("'/'", '"/"'))) + str_not_start_with_slash <- + "STR_CONST[not(substring(text(), 2, 1) = '/')]" + str_not_end_with_slash <- + "STR_CONST[not(substring(text(), string-length(text()) - 1, 1) = '/')]" + non_str <- "SYMBOL or expr" + + # Type I: paste(..., sep = "/") + paste_file_path_xpath <- glue(" + //SYMBOL_FUNCTION_CALL[text() = 'paste'] + /parent::expr + /parent::expr[ + SYMBOL_SUB[text() = 'sep']/following-sibling::expr[1][{slash_str}] + and not(SYMBOL_SUB[text() = 'collapse']) + ] + ") + + # Type II: paste0(x, "/", y, "/", z) + paste0_file_path_xpath <- xp_strip_comments(glue(" + //SYMBOL_FUNCTION_CALL[text() = 'paste0'] + /parent::expr + /parent::expr[ + (: exclude paste0(x) :) + count(expr) > 2 + (: An expression matching _any_ of these conditions is _not_ a file path :) + and not( + (: Any numeric input :) + expr/NUM_CONST + (: A call using collapse= :) + or SYMBOL_SUB[text() = 'collapse'] + (: First input is '/', meaning file.path() would need to start with '' :) + or expr[2][{slash_str}] + (: Last input is '/', meaning file.path() would need to end with '' :) + or expr[last()][{slash_str}] + (: String starting or ending with multiple / :) + (: TODO(#2075): run this logic on the actual R string :) + or expr/STR_CONST[ + (: NB: this is (text, initial_index, n_characters) :) + substring(text(), 2, 2) = '//' + or substring(text(), string-length(text()) - 2, 2) = '//' + ] + (: Consecutive non-strings like paste0(x, y) :) + or expr[({non_str}) and following-sibling::expr[1][{non_str}]] + (: A string not ending with /, followed by non-string or string not starting with / :) + or expr[ + {str_not_end_with_slash} + and following-sibling::expr[1][ + {non_str} + or {str_not_start_with_slash} + ] + ] + (: A string not starting with /, preceded by a non-string :) + (: NB: consecutive strings is covered by the previous condition :) + or expr[ + {str_not_start_with_slash} + and preceding-sibling::expr[1][{non_str}] + ] + ) + ] + ")) + + empty_paste_note <- + 'Note that paste() converts empty inputs to "", whereas file.path() leaves it empty.' + Linter(function(source_expression) { if (!is_lint_level(source_expression, "expression")) { return(list()) @@ -170,6 +246,32 @@ paste_linter <- function(allow_empty_sep = FALSE, allow_to_string = FALSE) { type = "warning" ) + if (!allow_file_path) { + paste_file_path_expr <- xml_find_all(xml, paste_file_path_xpath) + optional_lints <- c(optional_lints, xml_nodes_to_lints( + paste_file_path_expr, + source_expression = source_expression, + lint_message = paste( + 'Construct file paths with file.path(...) instead of paste(..., sep = "/").', + 'If you are using paste(sep = "/") to construct a date,', + "consider using format() or lubridate helpers instead.", + empty_paste_note + ), + type = "warning" + )) + + paste0_file_path_expr <- xml_find_all(xml, paste0_file_path_xpath) + optional_lints <- c(optional_lints, xml_nodes_to_lints( + paste0_file_path_expr, + source_expression = source_expression, + lint_message = paste( + 'Construct file paths with file.path(...) instead of paste0(x, "/", y, "/", z).', + empty_paste_note + ), + type = "warning" + )) + } + c(optional_lints, paste0_sep_lints, paste_strrep_lints) }) } diff --git a/R/xp_utils.R b/R/xp_utils.R index 29d767137..e5bec73e9 100644 --- a/R/xp_utils.R +++ b/R/xp_utils.R @@ -65,3 +65,17 @@ xp_find_location <- function(xml, xpath) { as.integer(xml_find_num(xml, xpath)) } } + +#' Strip XPath 2.0-style comments from an XPath +#' +#' xml2 uses XPath 1.0, which has no support for comments. But comments are +#' useful in a codebase with as many XPaths as we maintain, so we fudge our +#' way to XPath 2.0-ish support by writing this simple function to remove comments. +#' +#' @noRd +xpath_comment_re <- rex::rex( + "(:", + zero_or_more(not(":)")), + ":)" +) +xp_strip_comments <- function(xpath) rex::re_substitutes(xpath, xpath_comment_re, "", global = TRUE) diff --git a/man/paste_linter.Rd b/man/paste_linter.Rd index e83352f76..0c34ed5a4 100644 --- a/man/paste_linter.Rd +++ b/man/paste_linter.Rd @@ -4,7 +4,11 @@ \alias{paste_linter} \title{Raise lints for several common poor usages of \code{paste()}} \usage{ -paste_linter(allow_empty_sep = FALSE, allow_to_string = FALSE) +paste_linter( + allow_empty_sep = FALSE, + allow_to_string = FALSE, + allow_file_path = FALSE +) } \arguments{ \item{allow_empty_sep}{Logical, default \code{FALSE}. If \code{TRUE}, usage of @@ -12,6 +16,9 @@ paste_linter(allow_empty_sep = FALSE, allow_to_string = FALSE) \item{allow_to_string}{Logical, default \code{FALSE}. If \code{TRUE}, usage of \code{paste()} and \code{paste0()} with \code{collapse = ", "} is not linted.} + +\item{allow_file_path}{Logical, default \code{FALSE}. If \code{TRUE}, usage of +\code{paste()} and \code{paste0()} to construct file paths is not linted.} } \description{ The following issues are linted by default by this linter @@ -55,6 +62,11 @@ lint( linters = paste_linter() ) +lint( + text = 'paste0(dir, "/", file)', + linters = paste_linter() +) + # okay lint( text = 'paste0("a", "b")', @@ -86,6 +98,11 @@ lint( linters = paste_linter() ) +lint( + text = 'paste0(year, "/", month, "/", day)', + linters = paste_linter(allow_file_path = TRUE) +) + } \seealso{ \link{linters} for a complete list of linters available in lintr. diff --git a/tests/testthat/test-paste_linter.R b/tests/testthat/test-paste_linter.R index a65bbb898..73e677f18 100644 --- a/tests/testthat/test-paste_linter.R +++ b/tests/testthat/test-paste_linter.R @@ -113,3 +113,95 @@ test_that("paste_linter blocks simple disallowed usages", { expect_lint("paste0(rep('*', 20L), collapse='')", lint_msg, linter) expect_lint("paste(rep('#', width), collapse='')", lint_msg, linter) }) + +test_that("paste_linter skips allowed usages for file paths", { + linter <- paste_linter() + + expect_lint("paste('a', 'b', 'c')", NULL, linter) + expect_lint("paste('a', 'b', 'c', sep = ',')", NULL, linter) + expect_lint("paste('a', 'b', collapse = '/')", NULL, linter) + expect_lint("cat(paste('a', 'b'), sep = '/')", NULL, linter) + expect_lint("sep <- '/'; paste('a', sep)", NULL, linter) + expect_lint("paste(sep = ',', '/', 'a')", NULL, linter) + + # paste(..., sep='/', collapse=collapse) is not a trivial swap to file.path + expect_lint("paste(x, y, sep = '/', collapse = ':')", NULL, linter) + + expect_lint("file.path('a', 'b', 'c')", NULL, linter) + + # testing the sep starts with / is not enough + expect_lint("paste('a', 'b', sep = '//')", NULL, linter) +}) + +test_that("paste_linter blocks simple disallowed usages for file paths", { + linter <- paste_linter() + lint_msg <- rex::rex("Construct file paths with file.path(...) instead of") + + expect_lint("paste(sep = '/', 'a', 'b')", lint_msg, linter) + expect_lint("paste('a', 'b', sep = '/')", lint_msg, linter) +}) + +test_that("paste_linter ignores non-path cases with paste0", { + linter <- paste_linter() + + expect_lint("paste0(x, y)", NULL, linter) + expect_lint("paste0('abc', 'def')", NULL, linter) + expect_lint("paste0('/abc', 'def/')", NULL, linter) + expect_lint("paste0(x, 'def/')", NULL, linter) + expect_lint("paste0('/abc', y)", NULL, linter) + expect_lint("paste0(foo(x), y)", NULL, linter) + expect_lint("paste0(foo(x), 'def')", NULL, linter) + + # these might be a different lint (as.character instead, e.g.) but not here + expect_lint("paste0(x)", NULL, linter) + expect_lint("paste0('a')", NULL, linter) + expect_lint("paste0('a', 1)", NULL, linter) + + # paste0(..., collapse=collapse) not directly mapped to file.path + expect_lint("paste0(x, collapse = '/')", NULL, linter) +}) + +test_that("paste_linter detects paths built with '/' and paste0", { + linter <- paste_linter() + lint_msg <- rex::rex("Construct file paths with file.path(...) instead of") + + expect_lint("paste0(x, '/', y)", lint_msg, linter) + expect_lint("paste0(x, '/', y, '/', z)", lint_msg, linter) + expect_lint("paste0(x, '/abc/', 'def/', y)", lint_msg, linter) + expect_lint("paste0(foo(x), '/abc/', 'def/', bar(y))", lint_msg, linter) +}) + +test_that("paste_linter skips initial/terminal '/' and repeated '/' for paths", { + linter <- paste_linter() + + expect_lint("paste0('/', x)", NULL, linter) + expect_lint("paste0(x, '/')", NULL, linter) + expect_lint("paste0(x, '//hey/', y)", NULL, linter) + expect_lint("paste0(x, '/hey//', y)", NULL, linter) +}) + +test_that("paste_linter doesn't skip all initial/terminal '/' for paths", { + linter <- paste_linter() + lint_msg <- rex::rex("Construct file paths with file.path(...) instead of") + + expect_lint('paste0("/abc/", "def")', lint_msg, linter) + expect_lint('paste0("abc/", "def/")', lint_msg, linter) +}) + +test_that("multiple path lints are generated correctly", { + expect_lint( + trim_some("{ + paste(x, y, sep = '/') + paste0(x, '/', y) + }"), + list( + rex::rex('paste(..., sep = "/")'), + rex::rex('paste0(x, "/", y, "/", z)') + ), + paste_linter() + ) +}) + +test_that("allow_file_path argument works", { + expect_lint("paste(x, y, sep = '/')", NULL, paste_linter(allow_file_path = TRUE)) +})