From 444c139cb0dbb05a6640d731276769dc225388a4 Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Fri, 22 Jul 2022 12:36:41 -0500 Subject: [PATCH] Respect requested variable order in distinct() (#6346) Fixes #6156 --- NEWS.md | 3 +++ R/distinct.R | 10 ++++++---- tests/testthat/_snaps/distinct.md | 2 +- tests/testthat/test-distinct.R | 13 +++++++++---- 4 files changed, 19 insertions(+), 9 deletions(-) diff --git a/NEWS.md b/NEWS.md index 4e3d935534..227416ef41 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,8 @@ # dplyr (development version) +* `distinct()` returns columns ordered the way you request, not the same + as the input data (#6156). + * The `.keep`, `.before`, and `.after` arguments to `mutate()` are no longer experimental. diff --git a/R/distinct.R b/R/distinct.R index 556fe20ef4..d760c34913 100644 --- a/R/distinct.R +++ b/R/distinct.R @@ -106,16 +106,18 @@ distinct_prepare <- function(.data, abort(bullets, call = error_call) } - # Always include grouping variables preserving input order - out_vars <- intersect(names(.data), c(distinct_vars, group_vars)) + # Only keep unique vars + distinct_vars <- unique(distinct_vars) + # Missing grouping variables are added to the front + new_vars <- c(setdiff(group_vars, distinct_vars), distinct_vars) if (.keep_all) { keep <- seq_along(.data) } else { - keep <- out_vars + keep <- new_vars } - list(data = .data, vars = out_vars, keep = keep) + list(data = .data, vars = new_vars, keep = keep) } #' @export diff --git a/tests/testthat/_snaps/distinct.md b/tests/testthat/_snaps/distinct.md index a86e3ca7fa..1630db74e6 100644 --- a/tests/testthat/_snaps/distinct.md +++ b/tests/testthat/_snaps/distinct.md @@ -1,4 +1,4 @@ -# distinct gives a warning when selecting an unknown column (#3140) +# distinct errors when selecting an unknown column (#3140) Code df <- tibble(g = c(1, 2), x = c(1, 2)) diff --git a/tests/testthat/test-distinct.R b/tests/testthat/test-distinct.R index 0a978a52b5..95f5839461 100644 --- a/tests/testthat/test-distinct.R +++ b/tests/testthat/test-distinct.R @@ -102,9 +102,15 @@ test_that("distinct handles 0 columns edge case (#2954)", { expect_equal(nrow(distinct(tibble())), 0L) }) -test_that("distinct preserves order of the input variables (#3195)",{ +test_that("distinct respects order of the specified variables (#3195, #6156)",{ d <- data.frame(x = 1:2, y = 3:4) - expect_equal(names(distinct(d, y, x)), c("x", "y")) + expect_named(distinct(d, y, x), c("y", "x")) +}) + +test_that("distinct adds grouping variables to front if missing",{ + d <- data.frame(x = 1:2, y = 3:4) + expect_named(distinct(group_by(d, y), x), c("y", "x")) + expect_named(distinct(group_by(d, y), x, y), c("x", "y")) }) test_that("distinct() understands both NA variants (#4516)", { @@ -169,8 +175,7 @@ test_that("distinct() preserves attributes on bare data frames (#6318)", { # Errors ------------------------------------------------------------------ - -test_that("distinct gives a warning when selecting an unknown column (#3140)", { +test_that("distinct errors when selecting an unknown column (#3140)", { expect_snapshot({ df <- tibble(g = c(1, 2), x = c(1, 2))