Skip to content

Commit 3198429

Browse files
committed
Test filter.epi_archive, fix&tweak some behaviors
1 parent 1f2bd85 commit 3198429

File tree

2 files changed

+113
-6
lines changed

2 files changed

+113
-6
lines changed

R/methods-epi_archive.R

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1042,7 +1042,7 @@ filter.epi_archive <- function(.data, ..., .by = NULL, .format_aware = FALSE) {
10421042
in_tbl <- tibble::as_tibble(as.list(.data$DT), .name_repair = "minimal")
10431043
if (.format_aware) {
10441044
out_tbl <- in_tbl %>%
1045-
filter(..., .by = .by)
1045+
filter(..., .by = {{.by}})
10461046
} else {
10471047
measurement_colnames <- setdiff(names(.data$DT), key_colnames(.data))
10481048
forbidden_colnames <- c("version", measurement_colnames)
@@ -1067,13 +1067,13 @@ filter.epi_archive <- function(.data, ..., .by = NULL, .format_aware = FALSE) {
10671067
"Using `version` in `filter.epi_archive` may produce unexpected results.",
10681068
">" = "See if `epix_as_of` or `epix_slide` would work instead.",
10691069
">" = "If not, see `?filter.epi_archive` details for how to proceed."
1070-
)), assign.env = e)
1070+
), class = "epiprocess__filter_archive__used_version"), assign.env = e)
10711071
for (measurement_colname in measurement_colnames) {
10721072
delayedAssign(measurement_colname, cli::cli_abort(c(
10731073
"Using `{format_varname(measurement_colname)}`
10741074
in `filter.epi_archive` may produce unexpected results.",
10751075
">" = "See `?filter.epi_archive` details for how to proceed."
1076-
)), assign.env = e)
1076+
), class = "epiprocess__filter_archive__used_measurement"), assign.env = e)
10771077
}
10781078
break
10791079
}
@@ -1082,16 +1082,23 @@ filter.epi_archive <- function(.data, ..., .by = NULL, .format_aware = FALSE) {
10821082
TRUE
10831083
},
10841084
...,
1085-
.by = .by
1085+
.by = {{.by}}
10861086
)
10871087
}
10881088
# We could try to re-infer the geo_type, e.g., when filtering from
10891089
# national+state to just state. However, we risk inference failures such as
10901090
# "hrr" -> "hhs" from filtering to hrr 10, or "custom" -> USA-related when
10911091
# working with non-USA data:
10921092
out_geo_type <- .data$geo_type
1093-
# We might be going from daily to weekly; re-infer:
1094-
out_time_type <- guess_time_type(out_tbl$time_value)
1093+
if (.data$time_type == "day") {
1094+
# We might be going from daily to weekly; re-infer:
1095+
out_time_type <- guess_time_type(out_tbl$time_value)
1096+
} else {
1097+
# We might be filtering weekly to a single time_value; avoid re-inferring to
1098+
# stay "week". Or in other cases, just skip inferring, as re-inferring is
1099+
# expected to match the input time_type:
1100+
out_time_type <- .data$time_type
1101+
}
10951102
# Even if they narrow down to just a single value of an other_keys column,
10961103
# it's probably still better (& simpler) to treat it as an other_keys column
10971104
# since it still exists in the result:

tests/testthat/test-methods-epi_archive.R

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,3 +128,103 @@ test_that("group_vars works as expected", {
128128
"geo_value"
129129
)
130130
})
131+
132+
test_that("filter.epi_archive works as expected", {
133+
134+
ea2 <- ea2_data %>%
135+
as_epi_archive()
136+
137+
# Some basic output value checks:
138+
139+
expect_equal(
140+
ea2 %>% filter(geo_value == "tn"),
141+
new_epi_archive(
142+
ea2$DT[FALSE],
143+
ea2$geo_type, ea2$time_type, ea2$other_keys,
144+
ea2$clobberable_versions_start, ea2$versions_end
145+
)
146+
)
147+
148+
expect_equal(
149+
ea2 %>% filter(geo_value == "ca", time_value == as.Date("2020-06-02")),
150+
new_epi_archive(
151+
data.table::data.table(geo_value = "ca", time_value = as.Date("2020-06-02"),
152+
version = as.Date("2020-06-02") + 0:2, cases = 0:2),
153+
ea2$geo_type, ea2$time_type, ea2$other_keys,
154+
ea2$clobberable_versions_start, ea2$versions_end
155+
)
156+
)
157+
158+
# Output geo_type and time_type behavior:
159+
160+
hrr_day_ea <- tibble(
161+
geo_value = c(rep(1, 14), 100),
162+
time_value = as.Date("2020-01-01") - 1 + c(1:14, 14),
163+
version = time_value + 3,
164+
value = 1:15
165+
) %>%
166+
as_epi_archive()
167+
168+
expect_equal(hrr_day_ea$geo_type, "hrr")
169+
expect_equal(hrr_day_ea$time_type, "day")
170+
171+
hrr_week_ea <- hrr_day_ea %>%
172+
filter(geo_value == 1, as.POSIXlt(time_value)$wday == 6L)
173+
174+
expect_equal(hrr_week_ea$geo_type, "hrr")
175+
expect_equal(hrr_week_ea$time_type, "week")
176+
177+
hrr_one_week_ea <- hrr_week_ea %>%
178+
filter(time_value == time_value[[1]])
179+
180+
expect_equal(hrr_one_week_ea$time_type, "week")
181+
182+
intcustom_day_ea <- hrr_day_ea
183+
intcustom_day_ea$geo_type <- "custom"
184+
185+
intcustom_week_ea <- intcustom_day_ea %>%
186+
filter(geo_value == 1, as.POSIXlt(time_value)$wday == 6L)
187+
188+
expect_equal(intcustom_week_ea$geo_type, "custom")
189+
expect_equal(intcustom_week_ea$time_type, "week")
190+
191+
# Error-raising:
192+
expect_error(
193+
ea2 %>% filter(version == as.Date("2020-06-02")),
194+
class = "epiprocess__filter_archive__used_version"
195+
)
196+
expect_error(
197+
ea2 %>% filter(version <= as.Date("2020-06-02")),
198+
class = "epiprocess__filter_archive__used_version"
199+
)
200+
expect_snapshot(
201+
ea2 %>% filter(version <= as.Date("2020-06-02")),
202+
error = TRUE, cnd_class = TRUE
203+
)
204+
expect_error(
205+
ea2 %>% filter(time_value >= as.Date("2020-06-02"), cases >= 2L),
206+
class = "epiprocess__filter_archive__used_measurement"
207+
)
208+
expect_snapshot(
209+
ea2 %>% filter(time_value >= as.Date("2020-06-02"), cases >= 2L),
210+
error = TRUE, cnd_class = TRUE
211+
)
212+
expect_error(
213+
ea2 %>% filter(time_value >= as.Date("2020-06-02"), cases >= 2L),
214+
class = "epiprocess__filter_archive__used_measurement"
215+
)
216+
expect_error(
217+
ea2 %>% filter(cases >= median(cases), .by = geo_value),
218+
class = "epiprocess__filter_archive__used_measurement"
219+
)
220+
221+
# Escape hatch:
222+
expect_equal(
223+
ea2 %>%
224+
filter(version <= time_value + as.difftime(1, units = "days"),
225+
.format_aware = TRUE) %>%
226+
.$DT,
227+
ea2$DT[version <= time_value + as.difftime(1, units = "days"), ]
228+
)
229+
230+
})

0 commit comments

Comments
 (0)