Skip to content

Commit ee072ce

Browse files
Merge pull request #1204 from tidymodels/stricter-sparsevctrs-tests
Stricter sparsevctrs tests
2 parents 6bf39b5 + 4cf99f0 commit ee072ce

File tree

6 files changed

+137
-41
lines changed

6 files changed

+137
-41
lines changed

NEWS.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
* `fit_xy()` can now take dgCMatrix input for `x` argument (#1121).
44

5-
* `fit()` and `fit_xy()` can now take sparse tibbles as data values (#1165).
5+
* `fit_xy()` can now take sparse tibbles as data values (#1165).
66

77
* `predict()` can now take dgCMatrix and sparse tibble input for `new_data` argument, and error informatively when model doesn't support it (#1167).
88

R/convert_data.R

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,13 @@
4848
)
4949
}
5050

51+
if (is_sparse_tibble(data)) {
52+
cli::cli_abort(
53+
"Sparse data cannot be used with formula interface. Please use
54+
{.fn fit_xy} instead."
55+
)
56+
}
57+
5158
if (remove_intercept) {
5259
data <- data[, colnames(data) != "(Intercept)", drop = FALSE]
5360
}

R/predict.R

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -470,6 +470,10 @@ prepare_data <- function(object, new_data) {
470470
if (allow_sparse(object) && inherits(new_data, "dgCMatrix")) {
471471
return(new_data)
472472
}
473+
if (allow_sparse(object) && is_sparse_tibble(new_data)) {
474+
new_data <- sparsevctrs::coerce_to_sparse_matrix(new_data)
475+
return(new_data)
476+
}
473477

474478
fit_interface <- object$spec$method$fit$interface
475479
switch(

tests/testthat/_snaps/sparsevctrs.md

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,51 +1,75 @@
1-
# sparse tibble can be passed to `fit()
1+
# sparse tibble can be passed to `fit() - supported
2+
3+
Code
4+
xgb_fit <- fit(spec, avg_price_per_room ~ ., data = hotel_data)
5+
Condition
6+
Error in `.convert_form_to_xy_fit()`:
7+
! Sparse data cannot be used with formula interface. Please use `fit_xy()` instead.
8+
9+
# sparse tibble can be passed to `fit() - unsupported
210

311
Code
412
lm_fit <- fit(spec, avg_price_per_room ~ ., data = hotel_data[1:100, ])
513
Condition
614
Warning:
715
`data` is a sparse tibble, but `linear_reg()` with engine "lm" doesn't accept that. Converting to non-sparse.
816

9-
# sparse matrix can be passed to `fit()
17+
# sparse matrix can be passed to `fit() - supported
18+
19+
Code
20+
xgb_fit <- fit(spec, avg_price_per_room ~ ., data = hotel_data)
21+
Condition
22+
Error in `.convert_form_to_xy_fit()`:
23+
! Sparse data cannot be used with formula interface. Please use `fit_xy()` instead.
24+
25+
# sparse matrix can be passed to `fit() - unsupported
1026

1127
Code
1228
lm_fit <- fit(spec, avg_price_per_room ~ ., data = hotel_data[1:100, ])
1329
Condition
1430
Warning:
1531
`data` is a sparse tibble, but `linear_reg()` with engine "lm" doesn't accept that. Converting to non-sparse.
1632

17-
# sparse tibble can be passed to `fit_xy()
33+
# sparse tibble can be passed to `fit_xy() - unsupported
1834

1935
Code
2036
lm_fit <- fit_xy(spec, x = hotel_data[1:100, -1], y = hotel_data[1:100, 1])
2137
Condition
2238
Warning:
2339
`x` is a sparse tibble, but `linear_reg()` with engine "lm" doesn't accept that. Converting to non-sparse.
2440

25-
# sparse matrices can be passed to `fit_xy()
41+
# sparse matrices can be passed to `fit_xy() - unsupported
2642

2743
Code
2844
lm_fit <- fit_xy(spec, x = hotel_data[1:100, -1], y = hotel_data[1:100, 1])
2945
Condition
3046
Error in `fit_xy()`:
3147
! `x` is a sparse matrix, but `linear_reg()` with engine "lm" doesn't accept that.
3248

33-
# sparse tibble can be passed to `predict()
49+
# sparse tibble can be passed to `predict() - unsupported
3450

3551
Code
3652
preds <- predict(lm_fit, sparse_mtcars)
3753
Condition
3854
Warning:
3955
`x` is a sparse tibble, but `linear_reg()` with engine "lm" doesn't accept that. Converting to non-sparse.
4056

41-
# sparse matrices can be passed to `predict()
57+
# sparse matrices can be passed to `predict() - unsupported
4258

4359
Code
4460
predict(lm_fit, sparse_mtcars)
4561
Condition
4662
Error in `predict()`:
4763
! `x` is a sparse matrix, but `linear_reg()` with engine "lm" doesn't accept that.
4864

65+
# sparse data work with xgboost engine
66+
67+
Code
68+
xgb_fit <- fit(spec, avg_price_per_room ~ ., data = hotel_data)
69+
Condition
70+
Error in `.convert_form_to_xy_fit()`:
71+
! Sparse data cannot be used with formula interface. Please use `fit_xy()` instead.
72+
4973
# to_sparse_data_frame() is used correctly
5074

5175
Code

tests/testthat/helper-objects.R

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ is_tf_ok <- function() {
2828
# ------------------------------------------------------------------------------
2929
# For sparse tibble testing
3030

31-
sparse_hotel_rates <- function() {
31+
sparse_hotel_rates <- function(tibble = FALSE) {
3232
# 99.2 sparsity
3333
hotel_rates <- modeldata::hotel_rates
3434

@@ -49,5 +49,15 @@ sparse_hotel_rates <- function() {
4949
)
5050

5151
res <- as.matrix(res)
52-
Matrix::Matrix(res, sparse = TRUE)
52+
res <- Matrix::Matrix(res, sparse = TRUE)
53+
54+
if (tibble) {
55+
res <- sparsevctrs::coerce_to_sparse_tibble(res)
56+
57+
# materialize outcome
58+
withr::local_options("sparsevctrs.verbose_materialize" = NULL)
59+
res$avg_price_per_room <- res$avg_price_per_room[]
60+
}
61+
62+
res
5363
}

tests/testthat/test-sparsevctrs.R

Lines changed: 83 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,23 @@
1-
test_that("sparse tibble can be passed to `fit()", {
1+
test_that("sparse tibble can be passed to `fit() - supported", {
22
skip_if_not_installed("xgboost")
3+
# Make materialization of sparse vectors throw an error
4+
# https://r-lib.github.io/sparsevctrs/dev/reference/sparsevctrs_options.html
5+
withr::local_options("sparsevctrs.verbose_materialize" = 3)
36

4-
hotel_data <- sparse_hotel_rates()
5-
hotel_data <- sparsevctrs::coerce_to_sparse_tibble(hotel_data)
7+
hotel_data <- sparse_hotel_rates(tibble = TRUE)
68

79
spec <- boost_tree() %>%
810
set_mode("regression") %>%
911
set_engine("xgboost")
10-
11-
expect_no_error(
12-
lm_fit <- fit(spec, avg_price_per_room ~ ., data = hotel_data)
12+
13+
expect_snapshot(
14+
error = TRUE,
15+
xgb_fit <- fit(spec, avg_price_per_room ~ ., data = hotel_data)
1316
)
17+
})
18+
19+
test_that("sparse tibble can be passed to `fit() - unsupported", {
20+
hotel_data <- sparse_hotel_rates(tibble = TRUE)
1421

1522
spec <- linear_reg() %>%
1623
set_mode("regression") %>%
@@ -21,19 +28,28 @@ test_that("sparse tibble can be passed to `fit()", {
2128
)
2229
})
2330

24-
test_that("sparse matrix can be passed to `fit()", {
31+
test_that("sparse matrix can be passed to `fit() - supported", {
2532
skip_if_not_installed("xgboost")
26-
33+
# Make materialization of sparse vectors throw an error
34+
# https://r-lib.github.io/sparsevctrs/dev/reference/sparsevctrs_options.html
35+
withr::local_options("sparsevctrs.verbose_materialize" = 3)
36+
2737
hotel_data <- sparse_hotel_rates()
28-
38+
2939
spec <- boost_tree() %>%
3040
set_mode("regression") %>%
3141
set_engine("xgboost")
3242

33-
expect_no_error(
34-
lm_fit <- fit(spec, avg_price_per_room ~ ., data = hotel_data)
43+
expect_snapshot(
44+
error = TRUE,
45+
xgb_fit <- fit(spec, avg_price_per_room ~ ., data = hotel_data)
3546
)
3647

48+
})
49+
50+
test_that("sparse matrix can be passed to `fit() - unsupported", {
51+
hotel_data <- sparse_hotel_rates()
52+
3753
spec <- linear_reg() %>%
3854
set_mode("regression") %>%
3955
set_engine("lm")
@@ -43,19 +59,25 @@ test_that("sparse matrix can be passed to `fit()", {
4359
)
4460
})
4561

46-
test_that("sparse tibble can be passed to `fit_xy()", {
62+
test_that("sparse tibble can be passed to `fit_xy() - supported", {
4763
skip_if_not_installed("xgboost")
48-
49-
hotel_data <- sparse_hotel_rates()
50-
hotel_data <- sparsevctrs::coerce_to_sparse_tibble(hotel_data)
64+
# Make materialization of sparse vectors throw an error
65+
# https://r-lib.github.io/sparsevctrs/dev/reference/sparsevctrs_options.html
66+
withr::local_options("sparsevctrs.verbose_materialize" = 3)
67+
68+
hotel_data <- sparse_hotel_rates(tibble = TRUE)
5169

5270
spec <- boost_tree() %>%
5371
set_mode("regression") %>%
5472
set_engine("xgboost")
5573

5674
expect_no_error(
57-
lm_fit <- fit_xy(spec, x = hotel_data[, -1], y = hotel_data[, 1])
75+
xgb_fit <- fit_xy(spec, x = hotel_data[, -1], y = hotel_data[, 1])
5876
)
77+
})
78+
79+
test_that("sparse tibble can be passed to `fit_xy() - unsupported", {
80+
hotel_data <- sparse_hotel_rates(tibble = TRUE)
5981

6082
spec <- linear_reg() %>%
6183
set_mode("regression") %>%
@@ -66,8 +88,11 @@ test_that("sparse tibble can be passed to `fit_xy()", {
6688
)
6789
})
6890

69-
test_that("sparse matrices can be passed to `fit_xy()", {
91+
test_that("sparse matrices can be passed to `fit_xy() - supported", {
7092
skip_if_not_installed("xgboost")
93+
# Make materialization of sparse vectors throw an error
94+
# https://r-lib.github.io/sparsevctrs/dev/reference/sparsevctrs_options.html
95+
withr::local_options("sparsevctrs.verbose_materialize" = 3)
7196

7297
hotel_data <- sparse_hotel_rates()
7398

@@ -76,8 +101,12 @@ test_that("sparse matrices can be passed to `fit_xy()", {
76101
set_engine("xgboost")
77102

78103
expect_no_error(
79-
lm_fit <- fit_xy(spec, x = hotel_data[, -1], y = hotel_data[, 1])
104+
xgb_fit <- fit_xy(spec, x = hotel_data[, -1], y = hotel_data[, 1])
80105
)
106+
})
107+
108+
test_that("sparse matrices can be passed to `fit_xy() - unsupported", {
109+
hotel_data <- sparse_hotel_rates()
81110

82111
spec <- linear_reg() %>%
83112
set_mode("regression") %>%
@@ -89,11 +118,13 @@ test_that("sparse matrices can be passed to `fit_xy()", {
89118
)
90119
})
91120

92-
test_that("sparse tibble can be passed to `predict()", {
121+
test_that("sparse tibble can be passed to `predict() - supported", {
93122
skip_if_not_installed("ranger")
123+
# Make materialization of sparse vectors throw an error
124+
# https://r-lib.github.io/sparsevctrs/dev/reference/sparsevctrs_options.html
125+
withr::local_options("sparsevctrs.verbose_materialize" = 3)
94126

95-
hotel_data <- sparse_hotel_rates()
96-
hotel_data <- sparsevctrs::coerce_to_sparse_tibble(hotel_data)
127+
hotel_data <- sparse_hotel_rates(tibble = TRUE)
97128

98129
spec <- rand_forest(trees = 10) %>%
99130
set_mode("regression") %>%
@@ -104,6 +135,10 @@ test_that("sparse tibble can be passed to `predict()", {
104135
expect_no_error(
105136
predict(tree_fit, hotel_data)
106137
)
138+
})
139+
140+
test_that("sparse tibble can be passed to `predict() - unsupported", {
141+
hotel_data <- sparse_hotel_rates(tibble = TRUE)
107142

108143
spec <- linear_reg() %>%
109144
set_mode("regression") %>%
@@ -120,8 +155,11 @@ test_that("sparse tibble can be passed to `predict()", {
120155
)
121156
})
122157

123-
test_that("sparse matrices can be passed to `predict()", {
158+
test_that("sparse matrices can be passed to `predict() - supported", {
124159
skip_if_not_installed("ranger")
160+
# Make materialization of sparse vectors throw an error
161+
# https://r-lib.github.io/sparsevctrs/dev/reference/sparsevctrs_options.html
162+
withr::local_options("sparsevctrs.verbose_materialize" = 3)
125163

126164
hotel_data <- sparse_hotel_rates()
127165

@@ -134,6 +172,10 @@ test_that("sparse matrices can be passed to `predict()", {
134172
expect_no_error(
135173
predict(tree_fit, hotel_data)
136174
)
175+
})
176+
177+
test_that("sparse matrices can be passed to `predict() - unsupported", {
178+
hotel_data <- sparse_hotel_rates()
137179

138180
spec <- linear_reg() %>%
139181
set_mode("regression") %>%
@@ -151,6 +193,9 @@ test_that("sparse matrices can be passed to `predict()", {
151193

152194
test_that("sparse data work with xgboost engine", {
153195
skip_if_not_installed("xgboost")
196+
# Make materialization of sparse vectors throw an error
197+
# https://r-lib.github.io/sparsevctrs/dev/reference/sparsevctrs_options.html
198+
withr::local_options("sparsevctrs.verbose_materialize" = 3)
154199

155200
spec <- boost_tree() %>%
156201
set_mode("regression") %>%
@@ -159,35 +204,38 @@ test_that("sparse data work with xgboost engine", {
159204
hotel_data <- sparse_hotel_rates()
160205

161206
expect_no_error(
162-
tree_fit <- fit_xy(spec, x = hotel_data[, -1], y = hotel_data[, 1])
207+
xgb_fit <- fit_xy(spec, x = hotel_data[, -1], y = hotel_data[, 1])
163208
)
164-
209+
165210
expect_no_error(
166-
predict(tree_fit, hotel_data)
211+
predict(xgb_fit, hotel_data)
167212
)
168213

169-
hotel_data <- sparsevctrs::coerce_to_sparse_tibble(hotel_data)
214+
hotel_data <- sparse_hotel_rates(tibble = TRUE)
170215

171-
172-
expect_no_error(
173-
tree_fit <- fit(spec, avg_price_per_room ~ ., data = hotel_data)
216+
expect_snapshot(
217+
error = TRUE,
218+
xgb_fit <- fit(spec, avg_price_per_room ~ ., data = hotel_data)
174219
)
175220

176221
expect_no_error(
177-
predict(tree_fit, hotel_data)
222+
predict(xgb_fit, hotel_data)
178223
)
179224

180225
expect_no_error(
181-
tree_fit <- fit_xy(spec, x = hotel_data[, -1], y = hotel_data[, 1])
226+
xgb_fit <- fit_xy(spec, x = hotel_data[, -1], y = hotel_data[, 1])
182227
)
183228

184229
expect_no_error(
185-
predict(tree_fit, hotel_data)
230+
predict(xgb_fit, hotel_data)
186231
)
187232
})
188233

189234
test_that("to_sparse_data_frame() is used correctly", {
190235
skip_if_not_installed("xgboost")
236+
# Make materialization of sparse vectors throw an error
237+
# https://r-lib.github.io/sparsevctrs/dev/reference/sparsevctrs_options.html
238+
withr::local_options("sparsevctrs.verbose_materialize" = 3)
191239

192240
local_mocked_bindings(
193241
to_sparse_data_frame = function(x, object) {
@@ -228,6 +276,9 @@ test_that("to_sparse_data_frame() is used correctly", {
228276

229277
test_that("maybe_sparse_matrix() is used correctly", {
230278
skip_if_not_installed("xgboost")
279+
# Make materialization of sparse vectors throw an error
280+
# https://r-lib.github.io/sparsevctrs/dev/reference/sparsevctrs_options.html
281+
withr::local_options("sparsevctrs.verbose_materialize" = 3)
231282

232283
local_mocked_bindings(
233284
maybe_sparse_matrix = function(x) {

0 commit comments

Comments
 (0)