diff --git a/NAMESPACE b/NAMESPACE index 85dd9fe84..9fbdb6315 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -321,6 +321,10 @@ S3method(vec_proxy_equal,array) S3method(vec_proxy_equal,default) S3method(vec_proxy_equal,integer64) S3method(vec_proxy_equal,numeric_version) +S3method(vec_proxy_missing,AsIs) +S3method(vec_proxy_missing,POSIXlt) +S3method(vec_proxy_missing,array) +S3method(vec_proxy_missing,default) S3method(vec_proxy_order,AsIs) S3method(vec_proxy_order,array) S3method(vec_proxy_order,default) @@ -597,6 +601,7 @@ export(vec_order) export(vec_proxy) export(vec_proxy_compare) export(vec_proxy_equal) +export(vec_proxy_missing) export(vec_proxy_order) export(vec_ptype) export(vec_ptype2) diff --git a/R/equal.R b/R/equal.R index de4bc7d4f..25c05510f 100644 --- a/R/equal.R +++ b/R/equal.R @@ -3,8 +3,7 @@ #' Returns a proxy object (i.e. an atomic vector or data frame of atomic #' vectors). For [vctr]s, this determines the behaviour of `==` and #' `!=` (via [vec_equal()]); [unique()], [duplicated()] (via -#' [vec_unique()] and [vec_duplicate_detect()]); [is.na()] and [anyNA()] -#' (via [vec_detect_missing()]). +#' [vec_unique()] and [vec_duplicate_detect()]) #' #' The default method calls [vec_proxy()], as the default underlying #' vector data should be equal-able in most cases. If your class is diff --git a/R/missing.R b/R/missing.R index afda18d2c..870e8859c 100644 --- a/R/missing.R +++ b/R/missing.R @@ -48,6 +48,43 @@ #' df NULL +#' Missing proxy +#' +#' Returns a proxy object (i.e. an atomic vector or data frame of atomic +#' vectors). For [vctr]s, this determines the behaviour of +#' [is.na()] and [anyNA()] (via [vec_detect_missing()]). +#' +#' The default method calls [vec_proxy_equal()], as the default +#' equal-able proxy should be used to detect missingness in most cases. +#' +#' @section Data frames: +#' If the proxy for `x` is a data frame, the proxy function is automatically +#' recursively applied on all columns as well. After applying the proxy +#' recursively, if there are any data frame columns present in the proxy, then +#' they are unpacked. Finally, if the resulting data frame only has a single +#' column, then it is unwrapped and a vector is returned as the proxy. +#' +#' @param x A vector x. +#' @inheritParams rlang::args_dots_empty +#' +#' @return A 1d atomic vector or a data frame. +#' @keywords internal +#' +#' @section Dependencies: +#' - [vec_proxy_equal()] called by default +#' +#' @export +vec_proxy_missing <- function(x, ...) { + check_dots_empty0(...) + return(.Call(vctrs_proxy_missing, x)) + UseMethod("vec_proxy_missing") +} + +#' @export +vec_proxy_missing.default <- function(x, ...) { + stop_native_implementation("vec_proxy_missing.default") +} + #' @rdname missing #' @export vec_detect_missing <- function(x) { diff --git a/R/type-asis.R b/R/type-asis.R index 8ba2b967b..2fc236781 100644 --- a/R/type-asis.R +++ b/R/type-asis.R @@ -44,6 +44,12 @@ vec_proxy_equal.AsIs <- function(x, ...) { vec_proxy_equal(x) } +#' @export +vec_proxy_missing.AsIs <- function(x, ...) { + x <- asis_strip(x) + vec_proxy_missing(x) +} + #' @export vec_proxy_compare.AsIs <- function(x, ...) { x <- asis_strip(x) diff --git a/R/type-bare.R b/R/type-bare.R index 84cf6e470..9d9c05bb6 100644 --- a/R/type-bare.R +++ b/R/type-bare.R @@ -375,6 +375,17 @@ vec_proxy_equal.array <- function(x, ...) { vec_proxy_equal(x) } +# missing -------------------------------------------------------------- + +#' @export +vec_proxy_missing.array <- function(x, ...) { + # The conversion to data frame is only a stopgap, in the long + # term, we'll hash arrays natively. Note that hashing functions + # similarly convert to data frames. + x <- as.data.frame(x) + vec_proxy_missing(x) +} + # compare ------------------------------------------------------------ #' @export diff --git a/R/type-date-time.R b/R/type-date-time.R index 1369b67a5..798a11528 100644 --- a/R/type-date-time.R +++ b/R/type-date-time.R @@ -66,6 +66,11 @@ vec_proxy_equal.POSIXlt <- function(x, ...) { vec_proxy_equal(x, ...) } #' @export +vec_proxy_missing.POSIXlt <- function(x, ...) { + x <- vec_cast(x, new_datetime(tzone = tzone(x))) + vec_proxy_missing(x, ...) +} +#' @export vec_proxy_compare.POSIXlt <- function(x, ...) { x <- vec_cast(x, new_datetime(tzone = tzone(x))) vec_proxy_compare(x) diff --git a/man/theory-faq-coercion.Rd b/man/theory-faq-coercion.Rd index 606acd748..62273f4a9 100644 --- a/man/theory-faq-coercion.Rd +++ b/man/theory-faq-coercion.Rd @@ -92,7 +92,7 @@ character vectors than to round numbers: \if{html}{\out{
}}\preformatted{# Two factors are compatible vec_ptype2(factor("a"), factor("b")) -#> factor() +#> factor(0) #> Levels: a b # Factors are compatible with a character @@ -141,11 +141,11 @@ the inputs are permuted. This is not always possible, for example factor levels are aggregated in order: \if{html}{\out{
}}\preformatted{vec_ptype2(factor(c("a", "c")), factor("b")) -#> factor() +#> factor(0) #> Levels: a c b vec_ptype2(factor("b"), factor(c("a", "c"))) -#> factor() +#> factor(0) #> Levels: b a c }\if{html}{\out{
}} diff --git a/man/vec_proxy_equal.Rd b/man/vec_proxy_equal.Rd index e8aea2473..64a053c8e 100644 --- a/man/vec_proxy_equal.Rd +++ b/man/vec_proxy_equal.Rd @@ -18,8 +18,7 @@ A 1d atomic vector or a data frame. Returns a proxy object (i.e. an atomic vector or data frame of atomic vectors). For \link{vctr}s, this determines the behaviour of \code{==} and \code{!=} (via \code{\link[=vec_equal]{vec_equal()}}); \code{\link[=unique]{unique()}}, \code{\link[=duplicated]{duplicated()}} (via -\code{\link[=vec_unique]{vec_unique()}} and \code{\link[=vec_duplicate_detect]{vec_duplicate_detect()}}); \code{\link[=is.na]{is.na()}} and \code{\link[=anyNA]{anyNA()}} -(via \code{\link[=vec_detect_missing]{vec_detect_missing()}}). +\code{\link[=vec_unique]{vec_unique()}} and \code{\link[=vec_duplicate_detect]{vec_duplicate_detect()}}) } \details{ The default method calls \code{\link[=vec_proxy]{vec_proxy()}}, as the default underlying diff --git a/man/vec_proxy_missing.Rd b/man/vec_proxy_missing.Rd new file mode 100644 index 000000000..8888748ad --- /dev/null +++ b/man/vec_proxy_missing.Rd @@ -0,0 +1,42 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/missing.R +\name{vec_proxy_missing} +\alias{vec_proxy_missing} +\title{Missing proxy} +\usage{ +vec_proxy_missing(x, ...) +} +\arguments{ +\item{x}{A vector x.} + +\item{...}{These dots are for future extensions and must be empty.} +} +\value{ +A 1d atomic vector or a data frame. +} +\description{ +Returns a proxy object (i.e. an atomic vector or data frame of atomic +vectors). For \link{vctr}s, this determines the behaviour of +\code{\link[=is.na]{is.na()}} and \code{\link[=anyNA]{anyNA()}} (via \code{\link[=vec_detect_missing]{vec_detect_missing()}}). +} +\details{ +The default method calls \code{\link[=vec_proxy_equal]{vec_proxy_equal()}}, as the default +equal-able proxy should be used to detect missingness in most cases. +} +\section{Data frames}{ + +If the proxy for \code{x} is a data frame, the proxy function is automatically +recursively applied on all columns as well. After applying the proxy +recursively, if there are any data frame columns present in the proxy, then +they are unpacked. Finally, if the resulting data frame only has a single +column, then it is unwrapped and a vector is returned as the proxy. +} + +\section{Dependencies}{ + +\itemize{ +\item \code{\link[=vec_proxy_equal]{vec_proxy_equal()}} called by default +} +} + +\keyword{internal} diff --git a/src/complete.c b/src/complete.c index 1723f0c21..0d90bd8e1 100644 --- a/src/complete.c +++ b/src/complete.c @@ -49,7 +49,7 @@ static inline void vec_detect_complete_switch(SEXP x, R_len_t size, int* p_out); // [[ include("complete.h") ]] SEXP vec_detect_complete(SEXP x) { - SEXP proxy = PROTECT(vec_proxy_equal(x)); + SEXP proxy = PROTECT(vec_proxy_missing(x)); R_len_t size = vec_size(proxy); diff --git a/src/decl/proxy-decl.h b/src/decl/proxy-decl.h index 39623c500..8f87f900e 100644 --- a/src/decl/proxy-decl.h +++ b/src/decl/proxy-decl.h @@ -1,12 +1,15 @@ r_obj* syms_vec_proxy; r_obj* syms_vec_proxy_equal; r_obj* syms_vec_proxy_equal_array; +r_obj* syms_vec_proxy_missing; +r_obj* syms_vec_proxy_missing_array; r_obj* syms_vec_proxy_compare; r_obj* syms_vec_proxy_compare_array; r_obj* syms_vec_proxy_order; r_obj* syms_vec_proxy_order_array; r_obj* fns_vec_proxy_equal_array; +r_obj* fns_vec_proxy_missing_array; r_obj* fns_vec_proxy_compare_array; r_obj* fns_vec_proxy_order_array; @@ -16,6 +19,8 @@ r_obj* vec_proxy_2(r_obj* x, enum vctrs_recurse recurse); static inline r_obj* vec_proxy_equal_impl(r_obj* x); static inline +r_obj* vec_proxy_missing_impl(r_obj* x); +static inline r_obj* vec_proxy_compare_impl(r_obj* x); static inline r_obj* vec_proxy_order_impl(r_obj* x); @@ -26,6 +31,12 @@ r_obj* vec_proxy_equal_method(r_obj* x); static inline r_obj* vec_proxy_equal_invoke(r_obj* x, r_obj* method); +static inline +r_obj* vec_proxy_missing_method(r_obj* x); + +static inline +r_obj* vec_proxy_missing_invoke(r_obj* x, r_obj* method); + static inline r_obj* vec_proxy_compare_method(r_obj* x); diff --git a/src/init.c b/src/init.c index 3f92af630..3e10ea922 100644 --- a/src/init.c +++ b/src/init.c @@ -59,6 +59,7 @@ extern r_obj* ffi_vec_restore(r_obj*, r_obj*); extern r_obj* ffi_vec_restore_recurse(r_obj*, r_obj*); extern r_obj* ffi_vec_restore_default(r_obj*, r_obj*); extern SEXP vec_proxy_equal(SEXP); +extern SEXP vec_proxy_missing(SEXP); extern SEXP vec_proxy_compare(SEXP); extern SEXP vec_proxy_order(SEXP); extern r_obj* ffi_df_proxy(r_obj*, r_obj*); @@ -245,6 +246,7 @@ static const R_CallMethodDef CallEntries[] = { {"ffi_vec_proxy", (DL_FUNC) &vec_proxy, 1}, {"ffi_vec_proxy_recurse", (DL_FUNC) &vec_proxy_recurse, 1}, {"vctrs_proxy_equal", (DL_FUNC) &vec_proxy_equal, 1}, + {"vctrs_proxy_missing", (DL_FUNC) &vec_proxy_missing, 1}, {"vctrs_proxy_compare", (DL_FUNC) &vec_proxy_compare, 1}, {"vctrs_proxy_order", (DL_FUNC) &vec_proxy_order, 1}, {"ffi_df_proxy", (DL_FUNC) &ffi_df_proxy, 2}, diff --git a/src/missing.c b/src/missing.c index d4058f170..82a4f12bc 100644 --- a/src/missing.c +++ b/src/missing.c @@ -9,7 +9,7 @@ r_obj* ffi_vec_detect_missing(r_obj* x) { // [[ include("missing.h") ]] r_obj* vec_detect_missing(r_obj* x) { - r_obj* proxy = KEEP(vec_proxy_equal(x)); + r_obj* proxy = KEEP(vec_proxy_missing(x)); r_obj* out = proxy_detect_missing(proxy); FREE(1); return out; @@ -269,7 +269,7 @@ bool vec_any_missing(r_obj* x) { } r_ssize vec_first_missing(r_obj* x) { - r_obj* proxy = KEEP(vec_proxy_equal(x)); + r_obj* proxy = KEEP(vec_proxy_missing(x)); r_ssize out = proxy_first_missing(proxy); FREE(1); return out; diff --git a/src/proxy.c b/src/proxy.c index b17ea8365..9f78824e7 100644 --- a/src/proxy.c +++ b/src/proxy.c @@ -67,6 +67,20 @@ r_obj* vec_proxy_equal(r_obj* x) { return out; } +// [[ register() ]] +r_obj* vec_proxy_missing(r_obj* x) { + r_obj* out = KEEP(vec_proxy_missing_impl(x)); + + if (is_data_frame(out)) { + // Automatically proxy df-proxies recursively. + // Also flattens and unwraps them (#1537, #1664). + out = df_proxy(out, VCTRS_PROXY_KIND_missing); + } + + FREE(1); + return out; +} + // [[ register() ]] r_obj* vec_proxy_compare(r_obj* x) { r_obj* out = KEEP(vec_proxy_compare_impl(x)); @@ -111,6 +125,10 @@ r_obj* vec_proxy_equal_impl(r_obj* x) { VEC_PROXY_KIND_IMPL(vec_proxy_equal_method, vec_proxy_equal_invoke); } static inline +r_obj* vec_proxy_missing_impl(r_obj* x) { + VEC_PROXY_KIND_IMPL(vec_proxy_missing_method, vec_proxy_missing_invoke); +} +static inline r_obj* vec_proxy_compare_impl(r_obj* x) { VEC_PROXY_KIND_IMPL(vec_proxy_compare_method, vec_proxy_compare_invoke); } @@ -164,6 +182,10 @@ r_obj* vec_proxy_equal_method(r_obj* x) { return vec_proxy_method_impl(x, "vec_proxy_equal", fns_vec_proxy_equal_array); } static inline +r_obj* vec_proxy_missing_method(r_obj* x) { + return vec_proxy_method_impl(x, "vec_proxy_missing", fns_vec_proxy_missing_array); +} +static inline r_obj* vec_proxy_compare_method(r_obj* x) { return vec_proxy_method_impl(x, "vec_proxy_compare", fns_vec_proxy_compare_array); } @@ -194,6 +216,10 @@ r_obj* vec_proxy_equal_invoke(r_obj* x, r_obj* method) { return vec_proxy_invoke_impl(x, method, syms_vec_proxy_equal, vec_proxy); } static inline +r_obj* vec_proxy_missing_invoke(r_obj* x, r_obj* method) { + return vec_proxy_invoke_impl(x, method, syms_vec_proxy_missing, &vec_proxy_equal_impl); +} +static inline r_obj* vec_proxy_compare_invoke(r_obj* x, r_obj* method) { return vec_proxy_invoke_impl(x, method, syms_vec_proxy_compare, &vec_proxy_equal_impl); } @@ -222,6 +248,7 @@ r_obj* df_proxy(r_obj* x, enum vctrs_proxy_kind kind) { switch (kind) { case VCTRS_PROXY_KIND_equal: DF_PROXY(vec_proxy_equal); break; + case VCTRS_PROXY_KIND_missing: DF_PROXY(vec_proxy_missing); break; case VCTRS_PROXY_KIND_compare: DF_PROXY(vec_proxy_compare); break; case VCTRS_PROXY_KIND_order: DF_PROXY(vec_proxy_order); break; } @@ -264,6 +291,9 @@ void vctrs_init_data(r_obj* ns) { syms_vec_proxy_equal = r_sym("vec_proxy_equal"); syms_vec_proxy_equal_array = r_sym("vec_proxy_equal.array"); + syms_vec_proxy_missing = r_sym("vec_proxy_missing"); + syms_vec_proxy_missing_array = r_sym("vec_proxy_missing.array"); + syms_vec_proxy_compare = r_sym("vec_proxy_compare"); syms_vec_proxy_compare_array = r_sym("vec_proxy_compare.array"); @@ -271,6 +301,7 @@ void vctrs_init_data(r_obj* ns) { syms_vec_proxy_order_array = r_sym("vec_proxy_order.array"); fns_vec_proxy_equal_array = r_env_get(ns, syms_vec_proxy_equal_array); + fns_vec_proxy_missing_array = r_env_get(ns, syms_vec_proxy_missing_array); fns_vec_proxy_compare_array = r_env_get(ns, syms_vec_proxy_compare_array); fns_vec_proxy_order_array = r_env_get(ns, syms_vec_proxy_order_array); } @@ -278,11 +309,14 @@ void vctrs_init_data(r_obj* ns) { r_obj* syms_vec_proxy = NULL; r_obj* syms_vec_proxy_equal = NULL; r_obj* syms_vec_proxy_equal_array = NULL; +r_obj* syms_vec_proxy_missing = NULL; +r_obj* syms_vec_proxy_missing_array = NULL; r_obj* syms_vec_proxy_compare = NULL; r_obj* syms_vec_proxy_compare_array = NULL; r_obj* syms_vec_proxy_order = NULL; r_obj* syms_vec_proxy_order_array = NULL; r_obj* fns_vec_proxy_equal_array = NULL; +r_obj* fns_vec_proxy_missing_array = NULL; r_obj* fns_vec_proxy_compare_array = NULL; r_obj* fns_vec_proxy_order_array = NULL; diff --git a/src/proxy.h b/src/proxy.h index c33bafd9a..cf896822c 100644 --- a/src/proxy.h +++ b/src/proxy.h @@ -5,6 +5,7 @@ r_obj* vec_proxy(r_obj* x); r_obj* vec_proxy_equal(r_obj* x); +r_obj* vec_proxy_missing(r_obj* x); r_obj* vec_proxy_compare(r_obj* x); r_obj* vec_proxy_order(r_obj* x); r_obj* vec_proxy_recurse(r_obj* x); diff --git a/src/vctrs.h b/src/vctrs.h index bedce903d..745844341 100644 --- a/src/vctrs.h +++ b/src/vctrs.h @@ -68,12 +68,14 @@ bool vec_is_unspecified(SEXP x); enum vctrs_proxy_kind { VCTRS_PROXY_KIND_equal = 0, + VCTRS_PROXY_KIND_missing, VCTRS_PROXY_KIND_compare, VCTRS_PROXY_KIND_order }; SEXP vec_proxy(SEXP x); SEXP vec_proxy_equal(SEXP x); +SEXP vec_proxy_missing(SEXP x); SEXP vec_proxy_compare(SEXP x); SEXP vec_proxy_order(SEXP x); SEXP vec_proxy_unwrap(SEXP x); diff --git a/tests/testthat/test-complete.R b/tests/testthat/test-complete.R index 0ca0ca4a6..a8103f99f 100644 --- a/tests/testthat/test-complete.R +++ b/tests/testthat/test-complete.R @@ -78,6 +78,20 @@ test_that("takes the equality proxy", { expect_identical(vec_detect_complete(df), expect) }) +test_that("takes the missing proxy if defined", { + local_methods( + vec_proxy_missing.vctrs_foobar = function(x, ...) ( + data_frame(a=ifelse(x$a == -99, NA, x$a), b=x$b) + ), + ) + + df <- foobar(data_frame(a = c(1, 2, -99), b = c(1, NA, 2))) + + expect <- c(TRUE, FALSE, FALSE) + + expect_identical(vec_detect_complete(df), expect) +}) + test_that("columns with a data frame proxy are incomplete if any columns of the proxy are incomplete (#1404)", { df <- data_frame( x = c(NA, 0, 1, 2, 3), diff --git a/tests/testthat/test-missing.R b/tests/testthat/test-missing.R index 4d3456bc8..171ebd659 100644 --- a/tests/testthat/test-missing.R +++ b/tests/testthat/test-missing.R @@ -143,3 +143,25 @@ test_that(">0 row, 0 col data frame always returns `TRUE` (#1585)", { any(vec_detect_missing(df)) ) }) + +# ------------------------------------------------------------------------------ +# vec_proxy_missing() + +test_that("vec_proxy_missing()/vec_any_missing() takes vec_proxy_equal() by default", { + local_methods( + vec_proxy_equal.vctrs_foobar = function(x, ...) (ifelse(x == -99, NA, x)), + ) + + expect_identical(vec_detect_missing(foobar(c(1, 2, -99, 3))), c(FALSE, FALSE, TRUE, FALSE)) + expect_identical(vec_any_missing(foobar(c(1, 2, -99, 3))), TRUE) + expect_identical(vec_any_missing(foobar(c(1, 2, 3))), FALSE) +}) + +test_that("vec_detect_missing() calls vec_proxy_missing(), if implemented", { + local_methods( + vec_proxy_missing.vctrs_foobar = function(x, ...) (ifelse(x == -99, NA, x)), + ) + expect_identical(vec_detect_missing(foobar(c(1, 2, -99, 3))), c(FALSE, FALSE, TRUE, FALSE)) + expect_identical(vec_any_missing(foobar(c(1, 2, -99, 3))), TRUE) + expect_identical(vec_any_missing(foobar(c(1, 2, 3))), FALSE) +}) diff --git a/tests/testthat/test-vctrs.R b/tests/testthat/test-vctrs.R index 56aaf99f8..790549f6d 100644 --- a/tests/testthat/test-vctrs.R +++ b/tests/testthat/test-vctrs.R @@ -4,6 +4,7 @@ test_that("generics are extensible", { expect_error(vec_restore(NA, NA, NA), class = "rlib_error_dots_nonempty") expect_error(vec_proxy(NA, NA), class = "rlib_error_dots_nonempty") expect_error(vec_proxy_equal(NA, NA), class = "rlib_error_dots_nonempty") + expect_error(vec_proxy_missing(NA, NA), class = "rlib_error_dots_nonempty") expect_error(vec_proxy_compare(NA, NA), class = "rlib_error_dots_nonempty") expect_error(vec_ptype2(NA, NA, NA), class = "rlib_error_dots_nonempty") expect_error(vec_ptype_abbr(NA, NA), class = "rlib_error_dots_nonempty")