cmu-delphi
diff --git a/‎DESCRIPTION‎
Lines changed: 2 additions & 1 deletion b/‎DESCRIPTION‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎NEWS.md‎
Lines changed: 6 additions & 0 deletions b/‎NEWS.md‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎R/methods-epi_df.R‎
Lines changed: 6 additions & 1 deletion b/‎R/methods-epi_df.R‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎R/slide.R‎
Lines changed: 67 additions & 54 deletions b/‎R/slide.R‎
Lines changed: 67 additions & 54 deletions
diff --git a/‎R/utils.R‎
Lines changed: 16 additions & 9 deletions b/‎R/utils.R‎
Lines changed: 16 additions & 9 deletions
diff --git a/‎man/complete.epi_df.Rd‎
Lines changed: 6 additions & 1 deletion b/‎man/complete.epi_df.Rd‎
Lines changed: 6 additions & 1 deletion
@@ -1,7 +1,7 @@
 Type: Package
 Package: epiprocess
 Title: Tools for basic signal processing in epidemiology
-Version: 0.8.1
+Version: 0.8.2
 Authors@R: c(
     person("Jacob", "Bien", role = "ctb"),
     person("Logan", "Brooks", email = "lcbrooks@andrew.cmu.edu", role = c("aut", "cre")),
@@ -32,6 +32,7 @@ Imports:
     dplyr (>= 1.0.0),
     genlasso,
     ggplot2,
+    glue,
     lifecycle (>= 1.0.1),
     lubridate,
     magrittr,
 
@@ -9,6 +9,12 @@ Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.x.y will indicat
 - Added `complete.epi_df`, which fills in missing values in an `epi_df` with
   `NA`s. Uses `tidyr::complete` underneath and preserves `epi_df` metadata.
 
+## Bug fixes
+
+- Fix `epi_slide_opt` (and related functions) to correctly handle `before=Inf`.
+- Disallow `after=Inf` in slide functions, since it doesn't seem like a likely
+  use case and complicates code.
+
 # epiprocess 0.8
 
 ## Breaking changes
 
@@ -286,7 +286,12 @@ group_modify.epi_df <- function(.data, .f, ..., .keep = FALSE) {
 #' ) %>%
 #'   as_epi_df(as_of = start_date + 3)
 #' daily_edf %>%
-#'   complete(geo_value, time_value = full_seq(time_value, period = 1), fill = list(value = 0), explicit = FALSE)
+#'   complete(
+#'     geo_value,
+#'     time_value = full_seq(time_value, period = 1),
+#'     fill = list(value = 0),
+#'     explicit = FALSE
+#'   )
 #' # Complete works for weekly data and can take a fill value
 #' # No grouping
 #' weekly_edf <- tibble::tribble(
 
@@ -112,6 +112,8 @@ epi_slide <- function(x, f, ..., before = NULL, after = NULL, ref_time_values =
   }
   ref_time_values <- sort(ref_time_values)
 
+  # Handle defaults for before/after
+  time_type <- attr(x, "metadata")$time_type
   if (is.null(before) && !is.null(after)) {
     if (inherits(after, "difftime")) {
       before <- as.difftime(0, units = units(after))
@@ -123,11 +125,15 @@ epi_slide <- function(x, f, ..., before = NULL, after = NULL, ref_time_values =
     if (inherits(before, "difftime")) {
       after <- as.difftime(0, units = units(before))
     } else {
-      after <- 0
+      if (before == Inf && time_type %in% c("day", "week")) {
+        after <- as.difftime(0, units = glue::glue("{time_type}s"))
+      } else {
+        after <- 0
+      }
     }
   }
-  validate_slide_window_arg(before, attr(x, "metadata")$time_type)
-  validate_slide_window_arg(after, attr(x, "metadata")$time_type)
+  validate_slide_window_arg(before, time_type)
+  validate_slide_window_arg(after, time_type, allow_inf = FALSE)
 
   # Arrange by increasing time_value
   x <- arrange(x, .data$time_value)
@@ -462,6 +468,8 @@ epi_slide_opt <- function(x, col_names, f, ..., before = NULL, after = NULL, ref
   }
   ref_time_values <- sort(ref_time_values)
 
+  # Handle defaults for before/after
+  time_type <- attr(x, "metadata")$time_type
   if (is.null(before) && !is.null(after)) {
     if (inherits(after, "difftime")) {
       before <- as.difftime(0, units = units(after))
@@ -473,22 +481,22 @@ epi_slide_opt <- function(x, col_names, f, ..., before = NULL, after = NULL, ref
     if (inherits(before, "difftime")) {
       after <- as.difftime(0, units = units(before))
     } else {
-      after <- 0
+      if (before == Inf && time_type %in% c("day", "week")) {
+        after <- as.difftime(0, units = glue::glue("{time_type}s"))
+      } else {
+        after <- 0
+      }
     }
   }
-  validate_slide_window_arg(before, attr(x, "metadata")$time_type)
-  validate_slide_window_arg(after, attr(x, "metadata")$time_type)
+  validate_slide_window_arg(before, time_type)
+  validate_slide_window_arg(after, time_type, allow_inf = FALSE)
 
   # Make a complete date sequence between min(x$time_value) and max(x$time_value).
-  date_seq_list <- full_date_seq(x, before, after, attr(x, "metadata")$time_type)
+  date_seq_list <- full_date_seq(x, before, after, time_type)
   all_dates <- date_seq_list$all_dates
   pad_early_dates <- date_seq_list$pad_early_dates
   pad_late_dates <- date_seq_list$pad_late_dates
 
-  # `frollmean` is 1-indexed, so create a new window width based on our
-  # `before` and `after` params.
-  window_size <- before + after + 1L
-
   # The position of a given column can be differ between input `x` and
   # `.data_group` since the grouping step by default drops grouping columns.
   # To avoid rerunning `eval_select` for every `.data_group`, convert
@@ -501,7 +509,6 @@ epi_slide_opt <- function(x, col_names, f, ..., before = NULL, after = NULL, ref
   result_col_names <- paste0("slide_value_", col_names_chr)
   slide_one_grp <- function(.data_group, .group_key, ...) {
     missing_times <- all_dates[!(all_dates %in% .data_group$time_value)]
-
     # `frollmean` requires a full window to compute a result. Add NA values
     # to beginning and end of the group so that we get results for the
     # first `before` and last `after` elements.
@@ -511,55 +518,61 @@ epi_slide_opt <- function(x, col_names, f, ..., before = NULL, after = NULL, ref
     ) %>%
       arrange(.data$time_value)
 
-    # If a group contains duplicate time values, `frollmean` will still only
-    # use the last `k` obs. It isn't looking at dates, it just goes in row
-    # order. So if the computation is aggregating across multiple obs for the
-    # same date, `epi_slide_opt` and derivates will produce incorrect
-    # results; `epi_slide` should be used instead.
-    if (anyDuplicated(.data_group$time_value) != 0L) {
-      cli_abort(
-        c(
-          "group contains duplicate time values. Using `epi_slide_[opt/mean/sum]` on this
-            group will result in incorrect results",
-          "i" = "Please change the grouping structure of the input data so that
-            each group has non-duplicate time values (e.g. `x %>% group_by(geo_value)
-            %>% epi_slide_opt(f = frollmean)`)",
-          "i" = "Use `epi_slide` to aggregate across groups"
-        ),
-        class = "epiprocess__epi_slide_opt__duplicate_time_values",
-        epiprocess__data_group = .data_group,
-        epiprocess__group_key = .group_key
-      )
-    }
-    if (nrow(.data_group) != length(c(all_dates, pad_early_dates, pad_late_dates))) {
-      cli_abort(
-        c(
-          "group contains an unexpected number of rows",
-          "i" = c("Input data may contain `time_values` closer together than the
-             expected `time_step` size")
-        ),
-        class = "epiprocess__epi_slide_opt__unexpected_row_number",
-        epiprocess__data_group = .data_group,
-        epiprocess__group_key = .group_key
-      )
-    }
-
     if (f_from_package == "data.table") {
-      roll_output <- f(
-        x = .data_group[, col_names_chr], n = window_size, align = "right", ...
-      )
+      # If a group contains duplicate time values, `frollmean` will still only
+      # use the last `k` obs. It isn't looking at dates, it just goes in row
+      # order. So if the computation is aggregating across multiple obs for the
+      # same date, `epi_slide_opt` and derivates will produce incorrect results;
+      # `epi_slide` should be used instead.
+      if (anyDuplicated(.data_group$time_value) != 0L) {
+        cli_abort(
+          c(
+            "group contains duplicate time values. Using `epi_slide_[opt/mean/sum]` on this
+              group will result in incorrect results",
+            "i" = "Please change the grouping structure of the input data so that
+              each group has non-duplicate time values (e.g. `x %>% group_by(geo_value)
+              %>% epi_slide_opt(f = frollmean)`)",
+            "i" = "Use `epi_slide` to aggregate across groups"
+          ),
+          class = "epiprocess__epi_slide_opt__duplicate_time_values",
+          epiprocess__data_group = .data_group,
+          epiprocess__group_key = .group_key
+        )
+      }
+
+      if (nrow(.data_group) != length(c(all_dates, pad_early_dates, pad_late_dates))) {
+        cli_abort(
+          c(
+            "group contains an unexpected number of rows",
+            "i" = c("Input data may contain `time_values` closer together than the
+              expected `time_step` size")
+          ),
+          class = "epiprocess__epi_slide_opt__unexpected_row_number",
+          epiprocess__data_group = .data_group,
+          epiprocess__group_key = .group_key
+        )
+      }
 
+      # `frollmean` is 1-indexed, so create a new window width based on our
+      # `before` and `after` params. Right-aligned `frollmean` results'
+      # `ref_time_value`s will be `after` timesteps ahead of where they should
+      # be; shift results to the left by `after` timesteps.
+      if (before != Inf) {
+        window_size <- before + after + 1L
+        roll_output <- f(x = .data_group[, col_names_chr], n = window_size, ...)
+      } else {
+        window_size <- list(seq_along(.data_group$time_value))
+        roll_output <- f(x = .data_group[, col_names_chr], n = window_size, adaptive = TRUE, ...)
+      }
       if (after >= 1) {
-        # Right-aligned `frollmean` results' `ref_time_value`s will be `after`
-        # timesteps ahead of where they should be. Shift results to the left by
-        # `after` timesteps.
         .data_group[, result_col_names] <- purrr::map(roll_output, function(.x) {
           c(.x[(after + 1L):length(.x)], rep(NA, after))
         })
       } else {
         .data_group[, result_col_names] <- roll_output
       }
-    } else if (f_from_package == "slider") {
+    }
+    if (f_from_package == "slider") {
       for (i in seq_along(col_names_chr)) {
         .data_group[, result_col_names[i]] <- f(
           x = .data_group[[col_names_chr[i]]],
@@ -746,7 +759,7 @@ full_date_seq <- function(x, before, after, time_type) {
   if (time_type %in% c("yearmonth", "integer")) {
     all_dates <- seq(min(x$time_value), max(x$time_value), by = 1L)
 
-    if (before != 0) {
+    if (before != 0 && before != Inf) {
       pad_early_dates <- all_dates[1L] - before:1
     }
     if (after != 0) {
@@ -759,7 +772,7 @@ full_date_seq <- function(x, before, after, time_type) {
     )
 
     all_dates <- seq(min(x$time_value), max(x$time_value), by = by)
-    if (before != 0) {
+    if (before != 0 && before != Inf) {
       # The behavior is analogous to the branch with tsibble types above. For
       # more detail, note that the function `seq.Date(from, ..., length.out =
       # n)` returns `from + 0:n`. Since we want `from + 1:n`, we drop the first
 
@@ -803,40 +803,47 @@ guess_period.POSIXt <- function(time_values, time_values_arg = rlang::caller_arg
   as.numeric(NextMethod(), units = "secs")
 }
 
-
-validate_slide_window_arg <- function(arg, time_type, arg_name = rlang::caller_arg(arg)) {
+validate_slide_window_arg <- function(arg, time_type, allow_inf = TRUE, arg_name = rlang::caller_arg(arg)) {
   if (is.null(arg)) {
-    cli_abort("`{arg_name}` is a required argument.")
+    cli_abort("`{arg_name}` is a required argument for slide functions.")
   }
 
   if (!checkmate::test_scalar(arg)) {
-    cli_abort("Expected `{arg_name}` to be a scalar value.")
+    cli_abort("Slide function expected `{arg_name}` to be a scalar value.")
   }
 
   if (time_type == "custom") {
     cli_abort("Unsure how to interpret slide units with a custom time type. Consider converting your time
     column to a Date, yearmonth, or integer type.")
   }
 
+  msg <- ""
   if (!identical(arg, Inf)) {
     if (time_type == "day") {
       if (!test_int(arg, lower = 0L) && !(inherits(arg, "difftime") && units(arg) == "days")) {
-        cli_abort("Expected `{arg_name}` to be a difftime with units in days or a non-negative integer.")
+        msg <- glue::glue_collapse(c("difftime with units in days", "non-negative integer", "Inf"), " or ")
       }
     } else if (time_type == "week") {
       if (!(inherits(arg, "difftime") && units(arg) == "weeks")) {
-        cli_abort("Expected `{arg_name}` to be a difftime with units in weeks.")
+        msg <- glue::glue_collapse(c("difftime with units in weeks", "Inf"), " or ")
       }
     } else if (time_type == "yearmonth") {
       if (!test_int(arg, lower = 0L) || inherits(arg, "difftime")) {
-        cli_abort("Expected `{arg_name}` to be a non-negative integer.")
+        msg <- glue::glue_collapse(c("non-negative integer", "Inf"), " or ")
       }
     } else if (time_type == "integer") {
       if (!test_int(arg, lower = 0L) || inherits(arg, "difftime")) {
-        cli_abort("Expected `{arg_name}` to be a non-negative integer.")
+        msg <- glue::glue_collapse(c("non-negative integer", "Inf"), " or ")
       }
     } else {
-      cli_abort("Expected `{arg_name}` to be Inf, an appropriate a difftime, or a non-negative integer.")
+      msg <- glue::glue_collapse(c("difftime", "non-negative integer", "Inf"), " or ")
+    }
+  } else {
+    if (!allow_inf) {
+      msg <- glue::glue_collapse(c("a difftime", "a non-negative integer"), " or ")
     }
   }
+  if (msg != "") {
+    cli_abort("Slide function expected `{arg_name}` to be a {msg}.")
+  }
 }
Original file line number	Diff line number	Diff line change
`@@ -803,40 +803,47 @@ guess_period.POSIXt <- function(time_values, time_values_arg = rlang::caller_arg`
`803`	`803`	`as.numeric(NextMethod(), units = "secs")`
`804`	`804`	`}`
`805`	`805`
`806`		`-`
`807`		`-validate_slide_window_arg <- function(arg, time_type, arg_name = rlang::caller_arg(arg)) {`
	`806`	`+validate_slide_window_arg <- function(arg, time_type, allow_inf = TRUE, arg_name = rlang::caller_arg(arg)) {`
`808`	`807`	`if (is.null(arg)) {`
`809`		- cli_abort("`{arg_name}` is a required argument.")
	`808`	+ cli_abort("`{arg_name}` is a required argument for slide functions.")
`810`	`809`	`}`
`811`	`810`
`812`	`811`	`if (!checkmate::test_scalar(arg)) {`
`813`		- cli_abort("Expected `{arg_name}` to be a scalar value.")
	`812`	+ cli_abort("Slide function expected `{arg_name}` to be a scalar value.")
`814`	`813`	`}`
`815`	`814`
`816`	`815`	`if (time_type == "custom") {`
`817`	`816`	`cli_abort("Unsure how to interpret slide units with a custom time type. Consider converting your time`
`818`	`817`	`column to a Date, yearmonth, or integer type.")`
`819`	`818`	`}`
`820`	`819`
	`820`	`+ msg <- ""`
`821`	`821`	`if (!identical(arg, Inf)) {`
`822`	`822`	`if (time_type == "day") {`
`823`	`823`	`if (!test_int(arg, lower = 0L) && !(inherits(arg, "difftime") && units(arg) == "days")) {`
`824`		- cli_abort("Expected `{arg_name}` to be a difftime with units in days or a non-negative integer.")
	`824`	`+ msg <- glue::glue_collapse(c("difftime with units in days", "non-negative integer", "Inf"), " or ")`
`825`	`825`	`}`
`826`	`826`	`} else if (time_type == "week") {`
`827`	`827`	`if (!(inherits(arg, "difftime") && units(arg) == "weeks")) {`
`828`		- cli_abort("Expected `{arg_name}` to be a difftime with units in weeks.")
	`828`	`+ msg <- glue::glue_collapse(c("difftime with units in weeks", "Inf"), " or ")`
`829`	`829`	`}`
`830`	`830`	`} else if (time_type == "yearmonth") {`
`831`	`831`	`if (!test_int(arg, lower = 0L) \|\| inherits(arg, "difftime")) {`
`832`		- cli_abort("Expected `{arg_name}` to be a non-negative integer.")
	`832`	`+ msg <- glue::glue_collapse(c("non-negative integer", "Inf"), " or ")`
`833`	`833`	`}`
`834`	`834`	`} else if (time_type == "integer") {`
`835`	`835`	`if (!test_int(arg, lower = 0L) \|\| inherits(arg, "difftime")) {`
`836`		- cli_abort("Expected `{arg_name}` to be a non-negative integer.")
	`836`	`+ msg <- glue::glue_collapse(c("non-negative integer", "Inf"), " or ")`
`837`	`837`	`}`
`838`	`838`	`} else {`
`839`		- cli_abort("Expected `{arg_name}` to be Inf, an appropriate a difftime, or a non-negative integer.")
	`839`	`+ msg <- glue::glue_collapse(c("difftime", "non-negative integer", "Inf"), " or ")`
	`840`	`+ }`
	`841`	`+ } else {`
	`842`	`+ if (!allow_inf) {`
	`843`	`+ msg <- glue::glue_collapse(c("a difftime", "a non-negative integer"), " or ")`
`840`	`844`	`}`
`841`	`845`	`}`
	`846`	`+ if (msg != "") {`
	`847`	+ cli_abort("Slide function expected `{arg_name}` to be a {msg}.")
	`848`	`+ }`
`842`	`849`	`}`