diff --git a/NEWS.md b/NEWS.md index d7b1494d8b..fa051b8b57 100644 --- a/NEWS.md +++ b/NEWS.md @@ -23,6 +23,7 @@ + `yoda_test_linter()` * `sprintf_linter()` is pipe-aware, so that `x %>% sprintf(fmt = "%s")` no longer lints (#1943, @MichaelChirico). * `line_length_linter()` helpfully includes the line length in the lint message (#2057, @MichaelChirico). +* `sort_linter()` checks for code like `x == sort(x)` which is better served by using the function `is.unsorted()` (part of #884, @MichaelChirico). * `paste_linter()` gains detection for file paths that are better constructed with `file.path()`, e.g. `paste0(dir, "/", file)` would be better as `file.path(dir, file)` (part of #884, @MichaelChirico). ### New linters diff --git a/R/sort_linter.R b/R/sort_linter.R index 642ac3e142..3b7b3148bb 100644 --- a/R/sort_linter.R +++ b/R/sort_linter.R @@ -1,9 +1,24 @@ -#' Require usage of `sort()` over `.[order(.)]` +#' Check for common mistakes around sorting vectors +#' +#' This linter checks for some common mistakes when using [order()] or [sort()]. +#' +#' First, it requires usage of `sort()` over `.[order(.)]`. #' #' [sort()] is the dedicated option to sort a list or vector. It is more legible #' and around twice as fast as `.[order(.)]`, with the gap in performance #' growing with the vector size. #' +#' Second, it requires usage of [is.unsorted()] over equivalents using `sort()`. +#' +#' The base function `is.unsorted()` exists to test the sortedness of a vector. +#' Prefer it to inefficient and less-readable equivalents like +#' `x != sort(x)`. The same goes for checking `x == sort(x)` -- use +#' `!is.unsorted(x)` instead. +#' +#' Moreover, use of `x == sort(x)` can be risky because [sort()] drops missing +#' elements by default, meaning `==` might end up trying to compare vectors +#' of differing lengths. +#' #' @examples #' # will produce lints #' lint( @@ -16,6 +31,11 @@ #' linters = sort_linter() #' ) #' +#' lint( +#' text = "sort(x) == x", +#' linters = sort_linter() +#' ) +#' #' # okay #' lint( #' text = "x[sample(order(x))]", @@ -27,6 +47,11 @@ #' linters = sort_linter() #' ) #' +#' lint( +#' text = "sort(x, decreasing = TRUE) == x", +#' linters = sort_linter() +#' ) +#' #' # If you are sorting several objects based on the order of one of them, such #' # as: #' x <- sample(1:26) @@ -44,7 +69,7 @@ #' @seealso [linters] for a complete list of linters available in lintr. #' @export sort_linter <- function() { - xpath <- " + order_xpath <- " //OP-LEFT-BRACKET /following-sibling::expr[1][ expr[1][ @@ -57,6 +82,17 @@ sort_linter <- function() { ] " + sorted_xpath <- " + //SYMBOL_FUNCTION_CALL[text() = 'sort'] + /parent::expr + /parent::expr[not(SYMBOL_SUB)] + /parent::expr[ + (EQ or NE) + and expr/expr = expr + ] + " + + args_xpath <- ".//SYMBOL_SUB[text() = 'method' or text() = 'decreasing' or text() = 'na.last']" @@ -70,45 +106,51 @@ sort_linter <- function() { xml <- source_expression$xml_parsed_content - bad_expr <- xml_find_all(xml, xpath) + order_expr <- xml_find_all(xml, order_xpath) - var <- xml_text( - xml_find_first( - bad_expr, - ".//SYMBOL_FUNCTION_CALL[text() = 'order']/parent::expr[1]/following-sibling::expr[1]" - ) - ) + var <- xml_text(xml_find_first( + order_expr, + ".//SYMBOL_FUNCTION_CALL[text() = 'order']/parent::expr[1]/following-sibling::expr[1]" + )) - orig_call <- sprintf( - "%1$s[%2$s]", - var, - get_r_string(bad_expr) - ) + orig_call <- sprintf("%s[%s]", var, get_r_string(order_expr)) # Reconstruct new argument call for each expression separately - args <- vapply(bad_expr, function(e) { + args <- vapply(order_expr, function(e) { arg_names <- xml_text(xml_find_all(e, args_xpath)) - arg_values <- xml_text( - xml_find_all(e, arg_values_xpath) - ) + arg_values <- xml_text(xml_find_all(e, arg_values_xpath)) if (!"na.last" %in% arg_names) { arg_names <- c(arg_names, "na.last") arg_values <- c(arg_values, "TRUE") } - toString(paste(arg_names, "=", arg_values)) + paste(arg_names, "=", arg_values, collapse = ", ") }, character(1L)) - new_call <- sprintf( - "sort(%1$s, %2$s)", - var, - args - ) + new_call <- sprintf("sort(%s, %s)", var, args) - xml_nodes_to_lints( - bad_expr, + order_lints <- xml_nodes_to_lints( + order_expr, source_expression = source_expression, lint_message = paste0(new_call, " is better than ", orig_call, "."), type = "warning" ) + + sorted_expr <- xml_find_all(xml, sorted_xpath) + + sorted_op <- xml_text(xml_find_first(sorted_expr, "*[2]")) + lint_message <- ifelse( + sorted_op == "==", + "Use !is.unsorted(x) to test the sortedness of a vector.", + "Use is.unsorted(x) to test the unsortedness of a vector." + ) + + sorted_lints <- xml_nodes_to_lints( + sorted_expr, + source_expression = source_expression, + lint_message = lint_message, + type = "warning" + ) + + c(order_lints, sorted_lints) }) } diff --git a/man/sort_linter.Rd b/man/sort_linter.Rd index 1348b46e61..d37bb543cd 100644 --- a/man/sort_linter.Rd +++ b/man/sort_linter.Rd @@ -2,14 +2,30 @@ % Please edit documentation in R/sort_linter.R \name{sort_linter} \alias{sort_linter} -\title{Require usage of \code{sort()} over \code{.[order(.)]}} +\title{Check for common mistakes around sorting vectors} \usage{ sort_linter() } \description{ +This linter checks for some common mistakes when using \code{\link[=order]{order()}} or \code{\link[=sort]{sort()}}. +} +\details{ +First, it requires usage of \code{sort()} over \code{.[order(.)]}. + \code{\link[=sort]{sort()}} is the dedicated option to sort a list or vector. It is more legible and around twice as fast as \code{.[order(.)]}, with the gap in performance growing with the vector size. + +Second, it requires usage of \code{\link[=is.unsorted]{is.unsorted()}} over equivalents using \code{sort()}. + +The base function \code{is.unsorted()} exists to test the sortedness of a vector. +Prefer it to inefficient and less-readable equivalents like +\code{x != sort(x)}. The same goes for checking \code{x == sort(x)} -- use +\code{!is.unsorted(x)} instead. + +Moreover, use of \code{x == sort(x)} can be risky because \code{\link[=sort]{sort()}} drops missing +elements by default, meaning \code{==} might end up trying to compare vectors +of differing lengths. } \examples{ # will produce lints @@ -23,6 +39,11 @@ lint( linters = sort_linter() ) +lint( + text = "sort(x) == x", + linters = sort_linter() +) + # okay lint( text = "x[sample(order(x))]", @@ -34,6 +55,11 @@ lint( linters = sort_linter() ) +lint( + text = "sort(x, decreasing = TRUE) == x", + linters = sort_linter() +) + # If you are sorting several objects based on the order of one of them, such # as: x <- sample(1:26) diff --git a/tests/testthat/test-sort_linter.R b/tests/testthat/test-sort_linter.R index 878828a29a..e3abd5a001 100644 --- a/tests/testthat/test-sort_linter.R +++ b/tests/testthat/test-sort_linter.R @@ -79,3 +79,37 @@ test_that("sort_linter works with multiple lints in a single expression", { ) }) + +test_that("sort_linter skips usages calling sort arguments", { + linter <- sort_linter() + + # any arguments to sort --> not compatible + expect_lint("sort(x, decreasing = TRUE) == x", NULL, linter) + expect_lint("sort(x, na.last = TRUE) != x", NULL, linter) + expect_lint("sort(x, method_arg = TRUE) == x", NULL, linter) +}) + +test_that("sort_linter skips when inputs don't match", { + linter <- sort_linter() + + expect_lint("sort(x) == y", NULL, linter) + expect_lint("sort(x) == foo(x)", NULL, linter) + expect_lint("sort(foo(x)) == x", NULL, linter) +}) + +test_that("sort_linter blocks simple disallowed usages", { + linter <- sort_linter() + unsorted_msg <- rex::rex("Use is.unsorted(x) to test the unsortedness of a vector.") + sorted_msg <- rex::rex("Use !is.unsorted(x) to test the sortedness of a vector.") + + expect_lint("sort(x) == x", sorted_msg, linter) + + # argument order doesn't matter + expect_lint("x == sort(x)", sorted_msg, linter) + + # inverted version + expect_lint("sort(x) != x", unsorted_msg, linter) + + # expression matching + expect_lint("sort(foo(x)) == foo(x)", sorted_msg, linter) +})