diff --git a/NAMESPACE b/NAMESPACE
index 85dd9fe84..9fbdb6315 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -321,6 +321,10 @@ S3method(vec_proxy_equal,array)
S3method(vec_proxy_equal,default)
S3method(vec_proxy_equal,integer64)
S3method(vec_proxy_equal,numeric_version)
+S3method(vec_proxy_missing,AsIs)
+S3method(vec_proxy_missing,POSIXlt)
+S3method(vec_proxy_missing,array)
+S3method(vec_proxy_missing,default)
S3method(vec_proxy_order,AsIs)
S3method(vec_proxy_order,array)
S3method(vec_proxy_order,default)
@@ -597,6 +601,7 @@ export(vec_order)
export(vec_proxy)
export(vec_proxy_compare)
export(vec_proxy_equal)
+export(vec_proxy_missing)
export(vec_proxy_order)
export(vec_ptype)
export(vec_ptype2)
diff --git a/R/equal.R b/R/equal.R
index de4bc7d4f..25c05510f 100644
--- a/R/equal.R
+++ b/R/equal.R
@@ -3,8 +3,7 @@
#' Returns a proxy object (i.e. an atomic vector or data frame of atomic
#' vectors). For [vctr]s, this determines the behaviour of `==` and
#' `!=` (via [vec_equal()]); [unique()], [duplicated()] (via
-#' [vec_unique()] and [vec_duplicate_detect()]); [is.na()] and [anyNA()]
-#' (via [vec_detect_missing()]).
+#' [vec_unique()] and [vec_duplicate_detect()])
#'
#' The default method calls [vec_proxy()], as the default underlying
#' vector data should be equal-able in most cases. If your class is
diff --git a/R/missing.R b/R/missing.R
index afda18d2c..870e8859c 100644
--- a/R/missing.R
+++ b/R/missing.R
@@ -48,6 +48,43 @@
#' df
NULL
+#' Missing proxy
+#'
+#' Returns a proxy object (i.e. an atomic vector or data frame of atomic
+#' vectors). For [vctr]s, this determines the behaviour of
+#' [is.na()] and [anyNA()] (via [vec_detect_missing()]).
+#'
+#' The default method calls [vec_proxy_equal()], as the default
+#' equal-able proxy should be used to detect missingness in most cases.
+#'
+#' @section Data frames:
+#' If the proxy for `x` is a data frame, the proxy function is automatically
+#' recursively applied on all columns as well. After applying the proxy
+#' recursively, if there are any data frame columns present in the proxy, then
+#' they are unpacked. Finally, if the resulting data frame only has a single
+#' column, then it is unwrapped and a vector is returned as the proxy.
+#'
+#' @param x A vector x.
+#' @inheritParams rlang::args_dots_empty
+#'
+#' @return A 1d atomic vector or a data frame.
+#' @keywords internal
+#'
+#' @section Dependencies:
+#' - [vec_proxy_equal()] called by default
+#'
+#' @export
+vec_proxy_missing <- function(x, ...) {
+ check_dots_empty0(...)
+ return(.Call(vctrs_proxy_missing, x))
+ UseMethod("vec_proxy_missing")
+}
+
+#' @export
+vec_proxy_missing.default <- function(x, ...) {
+ stop_native_implementation("vec_proxy_missing.default")
+}
+
#' @rdname missing
#' @export
vec_detect_missing <- function(x) {
diff --git a/R/type-asis.R b/R/type-asis.R
index 8ba2b967b..2fc236781 100644
--- a/R/type-asis.R
+++ b/R/type-asis.R
@@ -44,6 +44,12 @@ vec_proxy_equal.AsIs <- function(x, ...) {
vec_proxy_equal(x)
}
+#' @export
+vec_proxy_missing.AsIs <- function(x, ...) {
+ x <- asis_strip(x)
+ vec_proxy_missing(x)
+}
+
#' @export
vec_proxy_compare.AsIs <- function(x, ...) {
x <- asis_strip(x)
diff --git a/R/type-bare.R b/R/type-bare.R
index 84cf6e470..9d9c05bb6 100644
--- a/R/type-bare.R
+++ b/R/type-bare.R
@@ -375,6 +375,17 @@ vec_proxy_equal.array <- function(x, ...) {
vec_proxy_equal(x)
}
+# missing --------------------------------------------------------------
+
+#' @export
+vec_proxy_missing.array <- function(x, ...) {
+ # The conversion to data frame is only a stopgap, in the long
+ # term, we'll hash arrays natively. Note that hashing functions
+ # similarly convert to data frames.
+ x <- as.data.frame(x)
+ vec_proxy_missing(x)
+}
+
# compare ------------------------------------------------------------
#' @export
diff --git a/R/type-date-time.R b/R/type-date-time.R
index 1369b67a5..798a11528 100644
--- a/R/type-date-time.R
+++ b/R/type-date-time.R
@@ -66,6 +66,11 @@ vec_proxy_equal.POSIXlt <- function(x, ...) {
vec_proxy_equal(x, ...)
}
#' @export
+vec_proxy_missing.POSIXlt <- function(x, ...) {
+ x <- vec_cast(x, new_datetime(tzone = tzone(x)))
+ vec_proxy_missing(x, ...)
+}
+#' @export
vec_proxy_compare.POSIXlt <- function(x, ...) {
x <- vec_cast(x, new_datetime(tzone = tzone(x)))
vec_proxy_compare(x)
diff --git a/man/theory-faq-coercion.Rd b/man/theory-faq-coercion.Rd
index 606acd748..62273f4a9 100644
--- a/man/theory-faq-coercion.Rd
+++ b/man/theory-faq-coercion.Rd
@@ -92,7 +92,7 @@ character vectors than to round numbers:
\if{html}{\out{
}}\preformatted{# Two factors are compatible
vec_ptype2(factor("a"), factor("b"))
-#> factor()
+#> factor(0)
#> Levels: a b
# Factors are compatible with a character
@@ -141,11 +141,11 @@ the inputs are permuted. This is not always possible, for example factor
levels are aggregated in order:
\if{html}{\out{
}}\preformatted{vec_ptype2(factor(c("a", "c")), factor("b"))
-#> factor()
+#> factor(0)
#> Levels: a c b
vec_ptype2(factor("b"), factor(c("a", "c")))
-#> factor()
+#> factor(0)
#> Levels: b a c
}\if{html}{\out{
}}
diff --git a/man/vec_proxy_equal.Rd b/man/vec_proxy_equal.Rd
index e8aea2473..64a053c8e 100644
--- a/man/vec_proxy_equal.Rd
+++ b/man/vec_proxy_equal.Rd
@@ -18,8 +18,7 @@ A 1d atomic vector or a data frame.
Returns a proxy object (i.e. an atomic vector or data frame of atomic
vectors). For \link{vctr}s, this determines the behaviour of \code{==} and
\code{!=} (via \code{\link[=vec_equal]{vec_equal()}}); \code{\link[=unique]{unique()}}, \code{\link[=duplicated]{duplicated()}} (via
-\code{\link[=vec_unique]{vec_unique()}} and \code{\link[=vec_duplicate_detect]{vec_duplicate_detect()}}); \code{\link[=is.na]{is.na()}} and \code{\link[=anyNA]{anyNA()}}
-(via \code{\link[=vec_detect_missing]{vec_detect_missing()}}).
+\code{\link[=vec_unique]{vec_unique()}} and \code{\link[=vec_duplicate_detect]{vec_duplicate_detect()}})
}
\details{
The default method calls \code{\link[=vec_proxy]{vec_proxy()}}, as the default underlying
diff --git a/man/vec_proxy_missing.Rd b/man/vec_proxy_missing.Rd
new file mode 100644
index 000000000..8888748ad
--- /dev/null
+++ b/man/vec_proxy_missing.Rd
@@ -0,0 +1,42 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/missing.R
+\name{vec_proxy_missing}
+\alias{vec_proxy_missing}
+\title{Missing proxy}
+\usage{
+vec_proxy_missing(x, ...)
+}
+\arguments{
+\item{x}{A vector x.}
+
+\item{...}{These dots are for future extensions and must be empty.}
+}
+\value{
+A 1d atomic vector or a data frame.
+}
+\description{
+Returns a proxy object (i.e. an atomic vector or data frame of atomic
+vectors). For \link{vctr}s, this determines the behaviour of
+\code{\link[=is.na]{is.na()}} and \code{\link[=anyNA]{anyNA()}} (via \code{\link[=vec_detect_missing]{vec_detect_missing()}}).
+}
+\details{
+The default method calls \code{\link[=vec_proxy_equal]{vec_proxy_equal()}}, as the default
+equal-able proxy should be used to detect missingness in most cases.
+}
+\section{Data frames}{
+
+If the proxy for \code{x} is a data frame, the proxy function is automatically
+recursively applied on all columns as well. After applying the proxy
+recursively, if there are any data frame columns present in the proxy, then
+they are unpacked. Finally, if the resulting data frame only has a single
+column, then it is unwrapped and a vector is returned as the proxy.
+}
+
+\section{Dependencies}{
+
+\itemize{
+\item \code{\link[=vec_proxy_equal]{vec_proxy_equal()}} called by default
+}
+}
+
+\keyword{internal}
diff --git a/src/complete.c b/src/complete.c
index 1723f0c21..0d90bd8e1 100644
--- a/src/complete.c
+++ b/src/complete.c
@@ -49,7 +49,7 @@ static inline void vec_detect_complete_switch(SEXP x, R_len_t size, int* p_out);
// [[ include("complete.h") ]]
SEXP vec_detect_complete(SEXP x) {
- SEXP proxy = PROTECT(vec_proxy_equal(x));
+ SEXP proxy = PROTECT(vec_proxy_missing(x));
R_len_t size = vec_size(proxy);
diff --git a/src/decl/proxy-decl.h b/src/decl/proxy-decl.h
index 39623c500..8f87f900e 100644
--- a/src/decl/proxy-decl.h
+++ b/src/decl/proxy-decl.h
@@ -1,12 +1,15 @@
r_obj* syms_vec_proxy;
r_obj* syms_vec_proxy_equal;
r_obj* syms_vec_proxy_equal_array;
+r_obj* syms_vec_proxy_missing;
+r_obj* syms_vec_proxy_missing_array;
r_obj* syms_vec_proxy_compare;
r_obj* syms_vec_proxy_compare_array;
r_obj* syms_vec_proxy_order;
r_obj* syms_vec_proxy_order_array;
r_obj* fns_vec_proxy_equal_array;
+r_obj* fns_vec_proxy_missing_array;
r_obj* fns_vec_proxy_compare_array;
r_obj* fns_vec_proxy_order_array;
@@ -16,6 +19,8 @@ r_obj* vec_proxy_2(r_obj* x, enum vctrs_recurse recurse);
static inline
r_obj* vec_proxy_equal_impl(r_obj* x);
static inline
+r_obj* vec_proxy_missing_impl(r_obj* x);
+static inline
r_obj* vec_proxy_compare_impl(r_obj* x);
static inline
r_obj* vec_proxy_order_impl(r_obj* x);
@@ -26,6 +31,12 @@ r_obj* vec_proxy_equal_method(r_obj* x);
static inline
r_obj* vec_proxy_equal_invoke(r_obj* x, r_obj* method);
+static inline
+r_obj* vec_proxy_missing_method(r_obj* x);
+
+static inline
+r_obj* vec_proxy_missing_invoke(r_obj* x, r_obj* method);
+
static inline
r_obj* vec_proxy_compare_method(r_obj* x);
diff --git a/src/init.c b/src/init.c
index 3f92af630..3e10ea922 100644
--- a/src/init.c
+++ b/src/init.c
@@ -59,6 +59,7 @@ extern r_obj* ffi_vec_restore(r_obj*, r_obj*);
extern r_obj* ffi_vec_restore_recurse(r_obj*, r_obj*);
extern r_obj* ffi_vec_restore_default(r_obj*, r_obj*);
extern SEXP vec_proxy_equal(SEXP);
+extern SEXP vec_proxy_missing(SEXP);
extern SEXP vec_proxy_compare(SEXP);
extern SEXP vec_proxy_order(SEXP);
extern r_obj* ffi_df_proxy(r_obj*, r_obj*);
@@ -245,6 +246,7 @@ static const R_CallMethodDef CallEntries[] = {
{"ffi_vec_proxy", (DL_FUNC) &vec_proxy, 1},
{"ffi_vec_proxy_recurse", (DL_FUNC) &vec_proxy_recurse, 1},
{"vctrs_proxy_equal", (DL_FUNC) &vec_proxy_equal, 1},
+ {"vctrs_proxy_missing", (DL_FUNC) &vec_proxy_missing, 1},
{"vctrs_proxy_compare", (DL_FUNC) &vec_proxy_compare, 1},
{"vctrs_proxy_order", (DL_FUNC) &vec_proxy_order, 1},
{"ffi_df_proxy", (DL_FUNC) &ffi_df_proxy, 2},
diff --git a/src/missing.c b/src/missing.c
index d4058f170..82a4f12bc 100644
--- a/src/missing.c
+++ b/src/missing.c
@@ -9,7 +9,7 @@ r_obj* ffi_vec_detect_missing(r_obj* x) {
// [[ include("missing.h") ]]
r_obj* vec_detect_missing(r_obj* x) {
- r_obj* proxy = KEEP(vec_proxy_equal(x));
+ r_obj* proxy = KEEP(vec_proxy_missing(x));
r_obj* out = proxy_detect_missing(proxy);
FREE(1);
return out;
@@ -269,7 +269,7 @@ bool vec_any_missing(r_obj* x) {
}
r_ssize vec_first_missing(r_obj* x) {
- r_obj* proxy = KEEP(vec_proxy_equal(x));
+ r_obj* proxy = KEEP(vec_proxy_missing(x));
r_ssize out = proxy_first_missing(proxy);
FREE(1);
return out;
diff --git a/src/proxy.c b/src/proxy.c
index b17ea8365..9f78824e7 100644
--- a/src/proxy.c
+++ b/src/proxy.c
@@ -67,6 +67,20 @@ r_obj* vec_proxy_equal(r_obj* x) {
return out;
}
+// [[ register() ]]
+r_obj* vec_proxy_missing(r_obj* x) {
+ r_obj* out = KEEP(vec_proxy_missing_impl(x));
+
+ if (is_data_frame(out)) {
+ // Automatically proxy df-proxies recursively.
+ // Also flattens and unwraps them (#1537, #1664).
+ out = df_proxy(out, VCTRS_PROXY_KIND_missing);
+ }
+
+ FREE(1);
+ return out;
+}
+
// [[ register() ]]
r_obj* vec_proxy_compare(r_obj* x) {
r_obj* out = KEEP(vec_proxy_compare_impl(x));
@@ -111,6 +125,10 @@ r_obj* vec_proxy_equal_impl(r_obj* x) {
VEC_PROXY_KIND_IMPL(vec_proxy_equal_method, vec_proxy_equal_invoke);
}
static inline
+r_obj* vec_proxy_missing_impl(r_obj* x) {
+ VEC_PROXY_KIND_IMPL(vec_proxy_missing_method, vec_proxy_missing_invoke);
+}
+static inline
r_obj* vec_proxy_compare_impl(r_obj* x) {
VEC_PROXY_KIND_IMPL(vec_proxy_compare_method, vec_proxy_compare_invoke);
}
@@ -164,6 +182,10 @@ r_obj* vec_proxy_equal_method(r_obj* x) {
return vec_proxy_method_impl(x, "vec_proxy_equal", fns_vec_proxy_equal_array);
}
static inline
+r_obj* vec_proxy_missing_method(r_obj* x) {
+ return vec_proxy_method_impl(x, "vec_proxy_missing", fns_vec_proxy_missing_array);
+}
+static inline
r_obj* vec_proxy_compare_method(r_obj* x) {
return vec_proxy_method_impl(x, "vec_proxy_compare", fns_vec_proxy_compare_array);
}
@@ -194,6 +216,10 @@ r_obj* vec_proxy_equal_invoke(r_obj* x, r_obj* method) {
return vec_proxy_invoke_impl(x, method, syms_vec_proxy_equal, vec_proxy);
}
static inline
+r_obj* vec_proxy_missing_invoke(r_obj* x, r_obj* method) {
+ return vec_proxy_invoke_impl(x, method, syms_vec_proxy_missing, &vec_proxy_equal_impl);
+}
+static inline
r_obj* vec_proxy_compare_invoke(r_obj* x, r_obj* method) {
return vec_proxy_invoke_impl(x, method, syms_vec_proxy_compare, &vec_proxy_equal_impl);
}
@@ -222,6 +248,7 @@ r_obj* df_proxy(r_obj* x, enum vctrs_proxy_kind kind) {
switch (kind) {
case VCTRS_PROXY_KIND_equal: DF_PROXY(vec_proxy_equal); break;
+ case VCTRS_PROXY_KIND_missing: DF_PROXY(vec_proxy_missing); break;
case VCTRS_PROXY_KIND_compare: DF_PROXY(vec_proxy_compare); break;
case VCTRS_PROXY_KIND_order: DF_PROXY(vec_proxy_order); break;
}
@@ -264,6 +291,9 @@ void vctrs_init_data(r_obj* ns) {
syms_vec_proxy_equal = r_sym("vec_proxy_equal");
syms_vec_proxy_equal_array = r_sym("vec_proxy_equal.array");
+ syms_vec_proxy_missing = r_sym("vec_proxy_missing");
+ syms_vec_proxy_missing_array = r_sym("vec_proxy_missing.array");
+
syms_vec_proxy_compare = r_sym("vec_proxy_compare");
syms_vec_proxy_compare_array = r_sym("vec_proxy_compare.array");
@@ -271,6 +301,7 @@ void vctrs_init_data(r_obj* ns) {
syms_vec_proxy_order_array = r_sym("vec_proxy_order.array");
fns_vec_proxy_equal_array = r_env_get(ns, syms_vec_proxy_equal_array);
+ fns_vec_proxy_missing_array = r_env_get(ns, syms_vec_proxy_missing_array);
fns_vec_proxy_compare_array = r_env_get(ns, syms_vec_proxy_compare_array);
fns_vec_proxy_order_array = r_env_get(ns, syms_vec_proxy_order_array);
}
@@ -278,11 +309,14 @@ void vctrs_init_data(r_obj* ns) {
r_obj* syms_vec_proxy = NULL;
r_obj* syms_vec_proxy_equal = NULL;
r_obj* syms_vec_proxy_equal_array = NULL;
+r_obj* syms_vec_proxy_missing = NULL;
+r_obj* syms_vec_proxy_missing_array = NULL;
r_obj* syms_vec_proxy_compare = NULL;
r_obj* syms_vec_proxy_compare_array = NULL;
r_obj* syms_vec_proxy_order = NULL;
r_obj* syms_vec_proxy_order_array = NULL;
r_obj* fns_vec_proxy_equal_array = NULL;
+r_obj* fns_vec_proxy_missing_array = NULL;
r_obj* fns_vec_proxy_compare_array = NULL;
r_obj* fns_vec_proxy_order_array = NULL;
diff --git a/src/proxy.h b/src/proxy.h
index c33bafd9a..cf896822c 100644
--- a/src/proxy.h
+++ b/src/proxy.h
@@ -5,6 +5,7 @@
r_obj* vec_proxy(r_obj* x);
r_obj* vec_proxy_equal(r_obj* x);
+r_obj* vec_proxy_missing(r_obj* x);
r_obj* vec_proxy_compare(r_obj* x);
r_obj* vec_proxy_order(r_obj* x);
r_obj* vec_proxy_recurse(r_obj* x);
diff --git a/src/vctrs.h b/src/vctrs.h
index bedce903d..745844341 100644
--- a/src/vctrs.h
+++ b/src/vctrs.h
@@ -68,12 +68,14 @@ bool vec_is_unspecified(SEXP x);
enum vctrs_proxy_kind {
VCTRS_PROXY_KIND_equal = 0,
+ VCTRS_PROXY_KIND_missing,
VCTRS_PROXY_KIND_compare,
VCTRS_PROXY_KIND_order
};
SEXP vec_proxy(SEXP x);
SEXP vec_proxy_equal(SEXP x);
+SEXP vec_proxy_missing(SEXP x);
SEXP vec_proxy_compare(SEXP x);
SEXP vec_proxy_order(SEXP x);
SEXP vec_proxy_unwrap(SEXP x);
diff --git a/tests/testthat/test-complete.R b/tests/testthat/test-complete.R
index 0ca0ca4a6..a8103f99f 100644
--- a/tests/testthat/test-complete.R
+++ b/tests/testthat/test-complete.R
@@ -78,6 +78,20 @@ test_that("takes the equality proxy", {
expect_identical(vec_detect_complete(df), expect)
})
+test_that("takes the missing proxy if defined", {
+ local_methods(
+ vec_proxy_missing.vctrs_foobar = function(x, ...) (
+ data_frame(a=ifelse(x$a == -99, NA, x$a), b=x$b)
+ ),
+ )
+
+ df <- foobar(data_frame(a = c(1, 2, -99), b = c(1, NA, 2)))
+
+ expect <- c(TRUE, FALSE, FALSE)
+
+ expect_identical(vec_detect_complete(df), expect)
+})
+
test_that("columns with a data frame proxy are incomplete if any columns of the proxy are incomplete (#1404)", {
df <- data_frame(
x = c(NA, 0, 1, 2, 3),
diff --git a/tests/testthat/test-missing.R b/tests/testthat/test-missing.R
index 4d3456bc8..171ebd659 100644
--- a/tests/testthat/test-missing.R
+++ b/tests/testthat/test-missing.R
@@ -143,3 +143,25 @@ test_that(">0 row, 0 col data frame always returns `TRUE` (#1585)", {
any(vec_detect_missing(df))
)
})
+
+# ------------------------------------------------------------------------------
+# vec_proxy_missing()
+
+test_that("vec_proxy_missing()/vec_any_missing() takes vec_proxy_equal() by default", {
+ local_methods(
+ vec_proxy_equal.vctrs_foobar = function(x, ...) (ifelse(x == -99, NA, x)),
+ )
+
+ expect_identical(vec_detect_missing(foobar(c(1, 2, -99, 3))), c(FALSE, FALSE, TRUE, FALSE))
+ expect_identical(vec_any_missing(foobar(c(1, 2, -99, 3))), TRUE)
+ expect_identical(vec_any_missing(foobar(c(1, 2, 3))), FALSE)
+})
+
+test_that("vec_detect_missing() calls vec_proxy_missing(), if implemented", {
+ local_methods(
+ vec_proxy_missing.vctrs_foobar = function(x, ...) (ifelse(x == -99, NA, x)),
+ )
+ expect_identical(vec_detect_missing(foobar(c(1, 2, -99, 3))), c(FALSE, FALSE, TRUE, FALSE))
+ expect_identical(vec_any_missing(foobar(c(1, 2, -99, 3))), TRUE)
+ expect_identical(vec_any_missing(foobar(c(1, 2, 3))), FALSE)
+})
diff --git a/tests/testthat/test-vctrs.R b/tests/testthat/test-vctrs.R
index 56aaf99f8..790549f6d 100644
--- a/tests/testthat/test-vctrs.R
+++ b/tests/testthat/test-vctrs.R
@@ -4,6 +4,7 @@ test_that("generics are extensible", {
expect_error(vec_restore(NA, NA, NA), class = "rlib_error_dots_nonempty")
expect_error(vec_proxy(NA, NA), class = "rlib_error_dots_nonempty")
expect_error(vec_proxy_equal(NA, NA), class = "rlib_error_dots_nonempty")
+ expect_error(vec_proxy_missing(NA, NA), class = "rlib_error_dots_nonempty")
expect_error(vec_proxy_compare(NA, NA), class = "rlib_error_dots_nonempty")
expect_error(vec_ptype2(NA, NA, NA), class = "rlib_error_dots_nonempty")
expect_error(vec_ptype_abbr(NA, NA), class = "rlib_error_dots_nonempty")