Skip to content

Commit

Permalink
Merge pull request #1204 from tidymodels/stricter-sparsevctrs-tests
Browse files Browse the repository at this point in the history
Stricter sparsevctrs tests
  • Loading branch information
EmilHvitfeldt authored Sep 18, 2024
2 parents 6bf39b5 + 4cf99f0 commit ee072ce
Show file tree
Hide file tree
Showing 6 changed files with 137 additions and 41 deletions.
2 changes: 1 addition & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

* `fit_xy()` can now take dgCMatrix input for `x` argument (#1121).

* `fit()` and `fit_xy()` can now take sparse tibbles as data values (#1165).
* `fit_xy()` can now take sparse tibbles as data values (#1165).

* `predict()` can now take dgCMatrix and sparse tibble input for `new_data` argument, and error informatively when model doesn't support it (#1167).

Expand Down
7 changes: 7 additions & 0 deletions R/convert_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,13 @@
)
}

if (is_sparse_tibble(data)) {
cli::cli_abort(
"Sparse data cannot be used with formula interface. Please use
{.fn fit_xy} instead."
)
}

if (remove_intercept) {
data <- data[, colnames(data) != "(Intercept)", drop = FALSE]
}
Expand Down
4 changes: 4 additions & 0 deletions R/predict.R
Original file line number Diff line number Diff line change
Expand Up @@ -470,6 +470,10 @@ prepare_data <- function(object, new_data) {
if (allow_sparse(object) && inherits(new_data, "dgCMatrix")) {
return(new_data)
}
if (allow_sparse(object) && is_sparse_tibble(new_data)) {
new_data <- sparsevctrs::coerce_to_sparse_matrix(new_data)
return(new_data)
}

fit_interface <- object$spec$method$fit$interface
switch(
Expand Down
36 changes: 30 additions & 6 deletions tests/testthat/_snaps/sparsevctrs.md
Original file line number Diff line number Diff line change
@@ -1,51 +1,75 @@
# sparse tibble can be passed to `fit()
# sparse tibble can be passed to `fit() - supported

Code
xgb_fit <- fit(spec, avg_price_per_room ~ ., data = hotel_data)
Condition
Error in `.convert_form_to_xy_fit()`:
! Sparse data cannot be used with formula interface. Please use `fit_xy()` instead.

# sparse tibble can be passed to `fit() - unsupported

Code
lm_fit <- fit(spec, avg_price_per_room ~ ., data = hotel_data[1:100, ])
Condition
Warning:
`data` is a sparse tibble, but `linear_reg()` with engine "lm" doesn't accept that. Converting to non-sparse.

# sparse matrix can be passed to `fit()
# sparse matrix can be passed to `fit() - supported

Code
xgb_fit <- fit(spec, avg_price_per_room ~ ., data = hotel_data)
Condition
Error in `.convert_form_to_xy_fit()`:
! Sparse data cannot be used with formula interface. Please use `fit_xy()` instead.

# sparse matrix can be passed to `fit() - unsupported

Code
lm_fit <- fit(spec, avg_price_per_room ~ ., data = hotel_data[1:100, ])
Condition
Warning:
`data` is a sparse tibble, but `linear_reg()` with engine "lm" doesn't accept that. Converting to non-sparse.

# sparse tibble can be passed to `fit_xy()
# sparse tibble can be passed to `fit_xy() - unsupported

Code
lm_fit <- fit_xy(spec, x = hotel_data[1:100, -1], y = hotel_data[1:100, 1])
Condition
Warning:
`x` is a sparse tibble, but `linear_reg()` with engine "lm" doesn't accept that. Converting to non-sparse.

# sparse matrices can be passed to `fit_xy()
# sparse matrices can be passed to `fit_xy() - unsupported

Code
lm_fit <- fit_xy(spec, x = hotel_data[1:100, -1], y = hotel_data[1:100, 1])
Condition
Error in `fit_xy()`:
! `x` is a sparse matrix, but `linear_reg()` with engine "lm" doesn't accept that.

# sparse tibble can be passed to `predict()
# sparse tibble can be passed to `predict() - unsupported

Code
preds <- predict(lm_fit, sparse_mtcars)
Condition
Warning:
`x` is a sparse tibble, but `linear_reg()` with engine "lm" doesn't accept that. Converting to non-sparse.

# sparse matrices can be passed to `predict()
# sparse matrices can be passed to `predict() - unsupported

Code
predict(lm_fit, sparse_mtcars)
Condition
Error in `predict()`:
! `x` is a sparse matrix, but `linear_reg()` with engine "lm" doesn't accept that.

# sparse data work with xgboost engine

Code
xgb_fit <- fit(spec, avg_price_per_room ~ ., data = hotel_data)
Condition
Error in `.convert_form_to_xy_fit()`:
! Sparse data cannot be used with formula interface. Please use `fit_xy()` instead.

# to_sparse_data_frame() is used correctly

Code
Expand Down
14 changes: 12 additions & 2 deletions tests/testthat/helper-objects.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ is_tf_ok <- function() {
# ------------------------------------------------------------------------------
# For sparse tibble testing

sparse_hotel_rates <- function() {
sparse_hotel_rates <- function(tibble = FALSE) {
# 99.2 sparsity
hotel_rates <- modeldata::hotel_rates

Expand All @@ -49,5 +49,15 @@ sparse_hotel_rates <- function() {
)

res <- as.matrix(res)
Matrix::Matrix(res, sparse = TRUE)
res <- Matrix::Matrix(res, sparse = TRUE)

if (tibble) {
res <- sparsevctrs::coerce_to_sparse_tibble(res)

# materialize outcome
withr::local_options("sparsevctrs.verbose_materialize" = NULL)
res$avg_price_per_room <- res$avg_price_per_room[]
}

res
}
115 changes: 83 additions & 32 deletions tests/testthat/test-sparsevctrs.R
Original file line number Diff line number Diff line change
@@ -1,16 +1,23 @@
test_that("sparse tibble can be passed to `fit()", {
test_that("sparse tibble can be passed to `fit() - supported", {
skip_if_not_installed("xgboost")
# Make materialization of sparse vectors throw an error
# https://r-lib.github.io/sparsevctrs/dev/reference/sparsevctrs_options.html
withr::local_options("sparsevctrs.verbose_materialize" = 3)

hotel_data <- sparse_hotel_rates()
hotel_data <- sparsevctrs::coerce_to_sparse_tibble(hotel_data)
hotel_data <- sparse_hotel_rates(tibble = TRUE)

spec <- boost_tree() %>%
set_mode("regression") %>%
set_engine("xgboost")

expect_no_error(
lm_fit <- fit(spec, avg_price_per_room ~ ., data = hotel_data)

expect_snapshot(
error = TRUE,
xgb_fit <- fit(spec, avg_price_per_room ~ ., data = hotel_data)
)
})

test_that("sparse tibble can be passed to `fit() - unsupported", {
hotel_data <- sparse_hotel_rates(tibble = TRUE)

spec <- linear_reg() %>%
set_mode("regression") %>%
Expand All @@ -21,19 +28,28 @@ test_that("sparse tibble can be passed to `fit()", {
)
})

test_that("sparse matrix can be passed to `fit()", {
test_that("sparse matrix can be passed to `fit() - supported", {
skip_if_not_installed("xgboost")

# Make materialization of sparse vectors throw an error
# https://r-lib.github.io/sparsevctrs/dev/reference/sparsevctrs_options.html
withr::local_options("sparsevctrs.verbose_materialize" = 3)

hotel_data <- sparse_hotel_rates()

spec <- boost_tree() %>%
set_mode("regression") %>%
set_engine("xgboost")

expect_no_error(
lm_fit <- fit(spec, avg_price_per_room ~ ., data = hotel_data)
expect_snapshot(
error = TRUE,
xgb_fit <- fit(spec, avg_price_per_room ~ ., data = hotel_data)
)

})

test_that("sparse matrix can be passed to `fit() - unsupported", {
hotel_data <- sparse_hotel_rates()

spec <- linear_reg() %>%
set_mode("regression") %>%
set_engine("lm")
Expand All @@ -43,19 +59,25 @@ test_that("sparse matrix can be passed to `fit()", {
)
})

test_that("sparse tibble can be passed to `fit_xy()", {
test_that("sparse tibble can be passed to `fit_xy() - supported", {
skip_if_not_installed("xgboost")

hotel_data <- sparse_hotel_rates()
hotel_data <- sparsevctrs::coerce_to_sparse_tibble(hotel_data)
# Make materialization of sparse vectors throw an error
# https://r-lib.github.io/sparsevctrs/dev/reference/sparsevctrs_options.html
withr::local_options("sparsevctrs.verbose_materialize" = 3)

hotel_data <- sparse_hotel_rates(tibble = TRUE)

spec <- boost_tree() %>%
set_mode("regression") %>%
set_engine("xgboost")

expect_no_error(
lm_fit <- fit_xy(spec, x = hotel_data[, -1], y = hotel_data[, 1])
xgb_fit <- fit_xy(spec, x = hotel_data[, -1], y = hotel_data[, 1])
)
})

test_that("sparse tibble can be passed to `fit_xy() - unsupported", {
hotel_data <- sparse_hotel_rates(tibble = TRUE)

spec <- linear_reg() %>%
set_mode("regression") %>%
Expand All @@ -66,8 +88,11 @@ test_that("sparse tibble can be passed to `fit_xy()", {
)
})

test_that("sparse matrices can be passed to `fit_xy()", {
test_that("sparse matrices can be passed to `fit_xy() - supported", {
skip_if_not_installed("xgboost")
# Make materialization of sparse vectors throw an error
# https://r-lib.github.io/sparsevctrs/dev/reference/sparsevctrs_options.html
withr::local_options("sparsevctrs.verbose_materialize" = 3)

hotel_data <- sparse_hotel_rates()

Expand All @@ -76,8 +101,12 @@ test_that("sparse matrices can be passed to `fit_xy()", {
set_engine("xgboost")

expect_no_error(
lm_fit <- fit_xy(spec, x = hotel_data[, -1], y = hotel_data[, 1])
xgb_fit <- fit_xy(spec, x = hotel_data[, -1], y = hotel_data[, 1])
)
})

test_that("sparse matrices can be passed to `fit_xy() - unsupported", {
hotel_data <- sparse_hotel_rates()

spec <- linear_reg() %>%
set_mode("regression") %>%
Expand All @@ -89,11 +118,13 @@ test_that("sparse matrices can be passed to `fit_xy()", {
)
})

test_that("sparse tibble can be passed to `predict()", {
test_that("sparse tibble can be passed to `predict() - supported", {
skip_if_not_installed("ranger")
# Make materialization of sparse vectors throw an error
# https://r-lib.github.io/sparsevctrs/dev/reference/sparsevctrs_options.html
withr::local_options("sparsevctrs.verbose_materialize" = 3)

hotel_data <- sparse_hotel_rates()
hotel_data <- sparsevctrs::coerce_to_sparse_tibble(hotel_data)
hotel_data <- sparse_hotel_rates(tibble = TRUE)

spec <- rand_forest(trees = 10) %>%
set_mode("regression") %>%
Expand All @@ -104,6 +135,10 @@ test_that("sparse tibble can be passed to `predict()", {
expect_no_error(
predict(tree_fit, hotel_data)
)
})

test_that("sparse tibble can be passed to `predict() - unsupported", {
hotel_data <- sparse_hotel_rates(tibble = TRUE)

spec <- linear_reg() %>%
set_mode("regression") %>%
Expand All @@ -120,8 +155,11 @@ test_that("sparse tibble can be passed to `predict()", {
)
})

test_that("sparse matrices can be passed to `predict()", {
test_that("sparse matrices can be passed to `predict() - supported", {
skip_if_not_installed("ranger")
# Make materialization of sparse vectors throw an error
# https://r-lib.github.io/sparsevctrs/dev/reference/sparsevctrs_options.html
withr::local_options("sparsevctrs.verbose_materialize" = 3)

hotel_data <- sparse_hotel_rates()

Expand All @@ -134,6 +172,10 @@ test_that("sparse matrices can be passed to `predict()", {
expect_no_error(
predict(tree_fit, hotel_data)
)
})

test_that("sparse matrices can be passed to `predict() - unsupported", {
hotel_data <- sparse_hotel_rates()

spec <- linear_reg() %>%
set_mode("regression") %>%
Expand All @@ -151,6 +193,9 @@ test_that("sparse matrices can be passed to `predict()", {

test_that("sparse data work with xgboost engine", {
skip_if_not_installed("xgboost")
# Make materialization of sparse vectors throw an error
# https://r-lib.github.io/sparsevctrs/dev/reference/sparsevctrs_options.html
withr::local_options("sparsevctrs.verbose_materialize" = 3)

spec <- boost_tree() %>%
set_mode("regression") %>%
Expand All @@ -159,35 +204,38 @@ test_that("sparse data work with xgboost engine", {
hotel_data <- sparse_hotel_rates()

expect_no_error(
tree_fit <- fit_xy(spec, x = hotel_data[, -1], y = hotel_data[, 1])
xgb_fit <- fit_xy(spec, x = hotel_data[, -1], y = hotel_data[, 1])
)

expect_no_error(
predict(tree_fit, hotel_data)
predict(xgb_fit, hotel_data)
)

hotel_data <- sparsevctrs::coerce_to_sparse_tibble(hotel_data)
hotel_data <- sparse_hotel_rates(tibble = TRUE)


expect_no_error(
tree_fit <- fit(spec, avg_price_per_room ~ ., data = hotel_data)
expect_snapshot(
error = TRUE,
xgb_fit <- fit(spec, avg_price_per_room ~ ., data = hotel_data)
)

expect_no_error(
predict(tree_fit, hotel_data)
predict(xgb_fit, hotel_data)
)

expect_no_error(
tree_fit <- fit_xy(spec, x = hotel_data[, -1], y = hotel_data[, 1])
xgb_fit <- fit_xy(spec, x = hotel_data[, -1], y = hotel_data[, 1])
)

expect_no_error(
predict(tree_fit, hotel_data)
predict(xgb_fit, hotel_data)
)
})

test_that("to_sparse_data_frame() is used correctly", {
skip_if_not_installed("xgboost")
# Make materialization of sparse vectors throw an error
# https://r-lib.github.io/sparsevctrs/dev/reference/sparsevctrs_options.html
withr::local_options("sparsevctrs.verbose_materialize" = 3)

local_mocked_bindings(
to_sparse_data_frame = function(x, object) {
Expand Down Expand Up @@ -228,6 +276,9 @@ test_that("to_sparse_data_frame() is used correctly", {

test_that("maybe_sparse_matrix() is used correctly", {
skip_if_not_installed("xgboost")
# Make materialization of sparse vectors throw an error
# https://r-lib.github.io/sparsevctrs/dev/reference/sparsevctrs_options.html
withr::local_options("sparsevctrs.verbose_materialize" = 3)

local_mocked_bindings(
maybe_sparse_matrix = function(x) {
Expand Down

0 comments on commit ee072ce

Please sign in to comment.