From 0c21801b591258a2483bf2a745d5efdb30fe7d59 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Wed, 26 Feb 2025 11:31:57 -0800
Subject: [PATCH 001/107] refactor: hoist some epi_slide_opt pre-processing to
 helpers

For re-use in an epix_epi_slide_opt
---
 R/slide.R                             | 292 +++++++++++++++-----------
 man/across_ish_names_info.Rd          |  46 ++++
 man/upstream_slide_f_info.Rd          |  23 ++
 man/upstream_slide_f_possibilities.Rd |  16 ++
 4 files changed, 259 insertions(+), 118 deletions(-)
 create mode 100644 man/across_ish_names_info.Rd
 create mode 100644 man/upstream_slide_f_info.Rd
 create mode 100644 man/upstream_slide_f_possibilities.Rd

diff --git a/R/slide.R b/R/slide.R
index abc7c3b77..b0de0f8d2 100644
--- a/R/slide.R
+++ b/R/slide.R
@@ -549,6 +549,164 @@ get_before_after_from_window <- function(window_size, align, time_type) {
   list(before = before, after = after)
 }
 
+
+#' Information about upstream (`{data.table}`/`{slider}`) slide functions
+#'
+#' Underlies [`upstream_slide_f_info`].
+#'
+#' @keywords internal
+upstream_slide_f_possibilities <- tibble::tribble(
+  ~f, ~package, ~namer,
+  frollmean, "data.table", ~ if (is.logical(.x)) "prop" else "av",
+  frollsum, "data.table", ~ if (is.logical(.x)) "count" else "sum",
+  frollapply, "data.table", ~"slide",
+  slide_sum, "slider", ~ if (is.logical(.x)) "count" else "sum",
+  slide_prod, "slider", ~"prod",
+  slide_mean, "slider", ~ if (is.logical(.x)) "prop" else "av",
+  slide_min, "slider", ~"min",
+  slide_max, "slider", ~"max",
+  slide_all, "slider", ~"all",
+  slide_any, "slider", ~"any",
+)
+
+#' Validate & get information about an upstream slide function
+#'
+#' @param .f function such as `data.table::frollmean` or `slider::slide_mean`;
+#'   must appear in [`upstream_slide_f_possibilities`]
+#' @return named list with two elements: `from_package`, a string containing the
+#'   upstream package name ("data.table" or "slider"), and `namer`, a function
+#'   that takes a column to call `.f` on and outputs a basic name or
+#'   abbreviation for what operation `.f` represents on that kind of column
+#'   (e.g., "sum", "av", "count").
+#'
+#' @keywords internal
+upstream_slide_f_info <- function(.f) {
+  # Check that slide function `.f` is one of those short-listed from
+  # `data.table` and `slider` (or a function that has the exact same definition,
+  # e.g. if the function has been reexported or defined locally). Extract some
+  # metadata. `namer` will be mapped over columns (.x will be a column, not the
+  # entire edf).
+  f_info_row <- upstream_slide_f_possibilities %>%
+    filter(map_lgl(.data$f, ~ identical(.f, .x)))
+  if (nrow(f_info_row) == 0L) {
+    # `f` is from somewhere else and not supported
+    cli_abort(
+      c(
+        "problem with {rlang::expr_label(rlang::caller_arg(f))}",
+        "i" = "`f` must be one of `data.table`'s rolling functions (`frollmean`,
+              `frollsum`, `frollapply`. See `?data.table::roll`) or one of
+              `slider`'s specialized sliding functions (`slide_mean`, `slide_sum`,
+              etc. See `?slider::\`summary-slide\`` for more options)."
+      ),
+      class = "epiprocess__epi_slide_opt__unsupported_slide_function",
+      epiprocess__f = .f
+    )
+  }
+  if (nrow(f_info_row) > 1L) {
+    cli_abort('epiprocess internal error: looking up `.f` in table of possible
+               functions yielded multiple matches. Please report it using "New
+               issue" at https://github.com/cmu-delphi/epiprocess/issues, using
+               reprex::reprex to provide a minimal reproducible example.')
+  }
+  f_from_package <- f_info_row$package
+  list(
+    from_package = f_from_package,
+    namer = unwrap(f_info_row$namer)
+  )
+}
+
+#' Calculate input and output column names for an `{epiprocess}` [`dplyr::across`]-like operations
+#'
+#' @param .x data.frame to perform input column tidyselection on
+#' @param time_type as in [`new_epi_df`]
+#' @param col_names_quo enquosed input column tidyselect expression
+#' @param .f_namer function taking an input column object and outputting a name
+#'   for a corresponding output column; see [`upstream_slide_f_info`]
+#' @param .window_size as in [`epi_slide_opt`]
+#' @param .align as in [`epi_slide_opt`]
+#' @param .prefix as in [`epi_slide_opt`]
+#' @param .suffix as in [`epi_slide_opt`]
+#' @param .new_col_names as in [`epi_slide_opt`]
+#' @return named list with two elements: `input_col_names`, chr, subset of
+#'   `names(.x)`; and `output_colnames`, chr, same length as `input_col_names`
+#'
+#' @keywords internal
+across_ish_names_info <- function(.x, time_type, col_names_quo, .f_namer, .window_size, .align, .prefix, .suffix, .new_col_names) {
+  # The position of a given column can be differ between input `.x` and
+  # `.data_group` since the grouping step by default drops grouping columns.
+  # To avoid rerunning `eval_select` for every `.data_group`, convert
+  # positions of user-provided `col_names` into string column names. We avoid
+  # using `names(pos)` directly for robustness and in case we later want to
+  # allow users to rename fields via tidyselection.
+  pos <- eval_select(col_names_quo, data = .x, allow_rename = FALSE)
+  input_col_names <- names(.x)[pos]
+
+  # Handle output naming
+  if ((!is.null(.prefix) || !is.null(.suffix)) && !is.null(.new_col_names)) {
+    cli_abort(
+      "Can't use both .prefix/.suffix and .new_col_names at the same time.",
+      class = "epiprocess__epi_slide_opt_incompatible_naming_args"
+    )
+  }
+  assert_string(.prefix, null.ok = TRUE)
+  assert_string(.suffix, null.ok = TRUE)
+  assert_character(.new_col_names, len = length(input_col_names), null.ok = TRUE)
+  if (is.null(.prefix) && is.null(.suffix) && is.null(.new_col_names)) {
+    .suffix <- "_{.n}{.time_unit_abbr}{.align_abbr}{.f_abbr}"
+    # ^ does not account for any arguments specified to underlying functions via
+    # `...` such as `na.rm =`, nor does it distinguish between functions from
+    # different packages accomplishing the same type of computation. Those are
+    # probably only set one way per task, so this probably produces cleaner
+    # names without clashes (though maybe some confusion if switching between
+    # code with different settings).
+  }
+  if (!is.null(.prefix) || !is.null(.suffix)) {
+    .prefix <- .prefix %||% ""
+    .suffix <- .suffix %||% ""
+    if (identical(.window_size, Inf)) {
+      n <- "running_"
+      time_unit_abbr <- ""
+      align_abbr <- ""
+    } else {
+      n <- time_delta_to_n_steps(.window_size, time_type)
+      time_unit_abbr <- time_type_unit_abbr(time_type)
+      align_abbr <- c(right = "", center = "c", left = "l")[[.align]]
+    }
+    glue_env <- rlang::env(
+      .n = n,
+      .time_unit_abbr = time_unit_abbr,
+      .align_abbr = align_abbr,
+      .f_abbr = purrr::map_chr(.x[, c(input_col_names)], .f_namer), # compat between DT and tbl selection
+      quo_get_env(col_names_quo)
+    )
+    .new_col_names <- unclass(
+      glue(.prefix, .envir = glue_env) +
+        input_col_names +
+        glue(.suffix, .envir = glue_env)
+    )
+  } else {
+    # `.new_col_names` was provided by user; we don't need to do anything.
+  }
+  if (any(.new_col_names %in% names(.x))) {
+    cli_abort(c(
+      "Naming conflict between new columns and existing columns",
+      "x" = "Overlapping names: {format_varnames(intersect(.new_col_names, names(.x)))}"
+    ), class = "epiprocess__epi_slide_opt_old_new_name_conflict")
+  }
+  if (anyDuplicated(.new_col_names)) {
+    cli_abort(c(
+      "New column names contain duplicates",
+      "x" = "Duplicated names: {format_varnames(unique(.new_col_names[duplicated(.new_col_names)]))}"
+    ), class = "epiprocess__epi_slide_opt_new_name_duplicated")
+  }
+  output_col_names <- .new_col_names
+
+  return(list(
+    input_col_names = input_col_names,
+    output_col_names = output_col_names
+  ))
+}
+
 #' Optimized slide functions for common cases
 #'
 #' @description `epi_slide_opt` allows sliding an n-timestep [data.table::froll]
@@ -748,59 +906,12 @@ epi_slide_opt <- function(
   # Check for duplicated time values within groups
   assert(check_ukey_unique(ungroup(.x), c(group_vars(.x), "time_value")))
 
-  # The position of a given column can be differ between input `.x` and
-  # `.data_group` since the grouping step by default drops grouping columns.
-  # To avoid rerunning `eval_select` for every `.data_group`, convert
-  # positions of user-provided `col_names` into string column names. We avoid
-  # using `names(pos)` directly for robustness and in case we later want to
-  # allow users to rename fields via tidyselection.
+  # Validate/process .col_names, .f:
   col_names_quo <- enquo(.col_names)
-  pos <- eval_select(col_names_quo, data = .x, allow_rename = FALSE)
-  col_names_chr <- names(.x)[pos]
-
-  # Check that slide function `.f` is one of those short-listed from
-  # `data.table` and `slider` (or a function that has the exact same definition,
-  # e.g. if the function has been reexported or defined locally). Extract some
-  # metadata. `namer` will be mapped over columns (.x will be a column, not the
-  # entire edf).
-  f_possibilities <-
-    tibble::tribble(
-      ~f, ~package, ~namer,
-      frollmean, "data.table", ~ if (is.logical(.x)) "prop" else "av",
-      frollsum, "data.table", ~ if (is.logical(.x)) "count" else "sum",
-      frollapply, "data.table", ~"slide",
-      slide_sum, "slider", ~ if (is.logical(.x)) "count" else "sum",
-      slide_prod, "slider", ~"prod",
-      slide_mean, "slider", ~ if (is.logical(.x)) "prop" else "av",
-      slide_min, "slider", ~"min",
-      slide_max, "slider", ~"max",
-      slide_all, "slider", ~"all",
-      slide_any, "slider", ~"any",
-    )
-  f_info <- f_possibilities %>%
-    filter(map_lgl(.data$f, ~ identical(.f, .x)))
-  if (nrow(f_info) == 0L) {
-    # `f` is from somewhere else and not supported
-    cli_abort(
-      c(
-        "problem with {rlang::expr_label(rlang::caller_arg(f))}",
-        "i" = "`f` must be one of `data.table`'s rolling functions (`frollmean`,
-              `frollsum`, `frollapply`. See `?data.table::roll`) or one of
-              `slider`'s specialized sliding functions (`slide_mean`, `slide_sum`,
-              etc. See `?slider::\`summary-slide\`` for more options)."
-      ),
-      class = "epiprocess__epi_slide_opt__unsupported_slide_function",
-      epiprocess__f = .f
-    )
-  }
-  if (nrow(f_info) > 1L) {
-    cli_abort('epiprocess internal error: looking up `.f` in table of possible
-               functions yielded multiple matches. Please report it using "New
-               issue" at https://github.com/cmu-delphi/epiprocess/issues, using
-               reprex::reprex to provide a minimal reproducible example.')
-  }
-  f_from_package <- f_info$package
+  f_info <- upstream_slide_f_info(.f)
+  f_from_package <- f_info$from_package
 
+  # Validate/process .ref_time_values:
   user_provided_rtvs <- !is.null(.ref_time_values)
   if (!user_provided_rtvs) {
     .ref_time_values <- unique(.x$time_value)
@@ -830,65 +941,10 @@ epi_slide_opt <- function(
   validate_slide_window_arg(.window_size, time_type)
   window_args <- get_before_after_from_window(.window_size, .align, time_type)
 
-  # Handle output naming
-  if ((!is.null(.prefix) || !is.null(.suffix)) && !is.null(.new_col_names)) {
-    cli_abort(
-      "Can't use both .prefix/.suffix and .new_col_names at the same time.",
-      class = "epiprocess__epi_slide_opt_incompatible_naming_args"
-    )
-  }
-  assert_string(.prefix, null.ok = TRUE)
-  assert_string(.suffix, null.ok = TRUE)
-  assert_character(.new_col_names, len = length(col_names_chr), null.ok = TRUE)
-  if (is.null(.prefix) && is.null(.suffix) && is.null(.new_col_names)) {
-    .suffix <- "_{.n}{.time_unit_abbr}{.align_abbr}{.f_abbr}"
-    # ^ does not account for any arguments specified to underlying functions via
-    # `...` such as `na.rm =`, nor does it distinguish between functions from
-    # different packages accomplishing the same type of computation. Those are
-    # probably only set one way per task, so this probably produces cleaner
-    # names without clashes (though maybe some confusion if switching between
-    # code with different settings).
-  }
-  if (!is.null(.prefix) || !is.null(.suffix)) {
-    .prefix <- .prefix %||% ""
-    .suffix <- .suffix %||% ""
-    if (identical(.window_size, Inf)) {
-      n <- "running_"
-      time_unit_abbr <- ""
-      align_abbr <- ""
-    } else {
-      n <- time_delta_to_n_steps(.window_size, time_type)
-      time_unit_abbr <- time_type_unit_abbr(time_type)
-      align_abbr <- c(right = "", center = "c", left = "l")[[.align]]
-    }
-    glue_env <- rlang::env(
-      .n = n,
-      .time_unit_abbr = time_unit_abbr,
-      .align_abbr = align_abbr,
-      .f_abbr = purrr::map_chr(.x[col_names_chr], unwrap(f_info$namer)),
-      quo_get_env(col_names_quo)
-    )
-    .new_col_names <- unclass(
-      glue(.prefix, .envir = glue_env) +
-        col_names_chr +
-        glue(.suffix, .envir = glue_env)
-    )
-  } else {
-    # `.new_col_names` was provided by user; we don't need to do anything.
-  }
-  if (any(.new_col_names %in% names(.x))) {
-    cli_abort(c(
-      "Naming conflict between new columns and existing columns",
-      "x" = "Overlapping names: {format_varnames(intersect(.new_col_names, names(.x)))}"
-    ), class = "epiprocess__epi_slide_opt_old_new_name_conflict")
-  }
-  if (anyDuplicated(.new_col_names)) {
-    cli_abort(c(
-      "New column names contain duplicates",
-      "x" = "Duplicated names: {format_varnames(unique(.new_col_names[duplicated(.new_col_names)]))}"
-    ), class = "epiprocess__epi_slide_opt_new_name_duplicated")
-  }
-  result_col_names <- .new_col_names
+  # Handle output naming:
+  names_info <- across_ish_names_info(.x, time_type, col_names_quo, f_info$namer, .window_size, .align, .prefix, .suffix, .new_col_names)
+  input_col_names <- names_info$input_col_names
+  output_col_names <- names_info$output_col_names
 
   # Make a complete date sequence between min(.x$time_value) and max(.x$time_value).
   date_seq_list <- full_date_seq(.x, window_args$before, window_args$after, time_type)
@@ -933,23 +989,23 @@ epi_slide_opt <- function(
       # be; shift results to the left by `after` timesteps.
       if (window_args$before != Inf) {
         window_size <- window_args$before + window_args$after + 1L
-        roll_output <- .f(x = .data_group[, col_names_chr], n = window_size, ...)
+        roll_output <- .f(x = .data_group[, input_col_names], n = window_size, ...)
       } else {
         window_size <- list(seq_along(.data_group$time_value))
-        roll_output <- .f(x = .data_group[, col_names_chr], n = window_size, adaptive = TRUE, ...)
+        roll_output <- .f(x = .data_group[, input_col_names], n = window_size, adaptive = TRUE, ...)
       }
       if (window_args$after >= 1) {
-        .data_group[, result_col_names] <- purrr::map(roll_output, function(.x) {
+        .data_group[, output_col_names] <- purrr::map(roll_output, function(.x) {
           c(.x[(window_args$after + 1L):length(.x)], rep(NA, window_args$after))
         })
       } else {
-        .data_group[, result_col_names] <- roll_output
+        .data_group[, output_col_names] <- roll_output
       }
     }
     if (f_from_package == "slider") {
-      for (i in seq_along(col_names_chr)) {
-        .data_group[, result_col_names[i]] <- .f(
-          x = .data_group[[col_names_chr[i]]],
+      for (i in seq_along(input_col_names)) {
+        .data_group[, output_col_names[i]] <- .f(
+          x = .data_group[[input_col_names[i]]],
           before = as.numeric(window_args$before),
           after = as.numeric(window_args$after),
           ...
@@ -968,7 +1024,7 @@ epi_slide_opt <- function(
     group_by(!!!.x_orig_groups)
 
   if (.all_rows) {
-    result[!vec_in(result$time_value, ref_time_values), result_col_names] <- NA
+    result[!vec_in(result$time_value, ref_time_values), output_col_names] <- NA
   } else if (user_provided_rtvs) {
     result <- result[vec_in(result$time_value, ref_time_values), ]
   }
diff --git a/man/across_ish_names_info.Rd b/man/across_ish_names_info.Rd
new file mode 100644
index 000000000..70eb40c4f
--- /dev/null
+++ b/man/across_ish_names_info.Rd
@@ -0,0 +1,46 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/slide.R
+\name{across_ish_names_info}
+\alias{across_ish_names_info}
+\title{Calculate input and output column names for an \code{{epiprocess}} \code{\link[dplyr:across]{dplyr::across}}-like operations}
+\usage{
+across_ish_names_info(
+  .x,
+  time_type,
+  col_names_quo,
+  .f_namer,
+  .window_size,
+  .align,
+  .prefix,
+  .suffix,
+  .new_col_names
+)
+}
+\arguments{
+\item{.x}{data.frame to perform input column tidyselection on}
+
+\item{time_type}{as in \code{\link{new_epi_df}}}
+
+\item{col_names_quo}{enquosed input column tidyselect expression}
+
+\item{.f_namer}{function taking an input column object and outputting a name
+for a corresponding output column; see \code{\link{upstream_slide_f_info}}}
+
+\item{.window_size}{as in \code{\link{epi_slide_opt}}}
+
+\item{.align}{as in \code{\link{epi_slide_opt}}}
+
+\item{.prefix}{as in \code{\link{epi_slide_opt}}}
+
+\item{.suffix}{as in \code{\link{epi_slide_opt}}}
+
+\item{.new_col_names}{as in \code{\link{epi_slide_opt}}}
+}
+\value{
+named list with two elements: \code{input_col_names}, chr, subset of
+\code{names(.x)}; and \code{output_colnames}, chr, same length as \code{input_col_names}
+}
+\description{
+Calculate input and output column names for an \code{{epiprocess}} \code{\link[dplyr:across]{dplyr::across}}-like operations
+}
+\keyword{internal}
diff --git a/man/upstream_slide_f_info.Rd b/man/upstream_slide_f_info.Rd
new file mode 100644
index 000000000..b0e928c4d
--- /dev/null
+++ b/man/upstream_slide_f_info.Rd
@@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/slide.R
+\name{upstream_slide_f_info}
+\alias{upstream_slide_f_info}
+\title{Validate & get information about an upstream slide function}
+\usage{
+upstream_slide_f_info(.f)
+}
+\arguments{
+\item{.f}{function such as \code{data.table::frollmean} or \code{slider::slide_mean};
+must appear in \code{\link{upstream_slide_f_possibilities}}}
+}
+\value{
+named list with two elements: \code{from_package}, a string containing the
+upstream package name ("data.table" or "slider"), and \code{namer}, a function
+that takes a column to call \code{.f} on and outputs a basic name or
+abbreviation for what operation \code{.f} represents on that kind of column
+(e.g., "sum", "av", "count").
+}
+\description{
+Validate & get information about an upstream slide function
+}
+\keyword{internal}
diff --git a/man/upstream_slide_f_possibilities.Rd b/man/upstream_slide_f_possibilities.Rd
new file mode 100644
index 000000000..9ba949ef1
--- /dev/null
+++ b/man/upstream_slide_f_possibilities.Rd
@@ -0,0 +1,16 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/slide.R
+\docType{data}
+\name{upstream_slide_f_possibilities}
+\alias{upstream_slide_f_possibilities}
+\title{Information about upstream (\code{{data.table}}/\code{{slider}}) slide functions}
+\format{
+An object of class \code{tbl_df} (inherits from \code{tbl}, \code{data.frame}) with 10 rows and 3 columns.
+}
+\usage{
+upstream_slide_f_possibilities
+}
+\description{
+Underlies \code{\link{upstream_slide_f_info}}.
+}
+\keyword{internal}

From 8471a284764f76574e8b61d086ad53b8539181f6 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Wed, 26 Feb 2025 13:03:31 -0800
Subject: [PATCH 002/107] feat: WIP epix_epi_slide_opt

---
 DESCRIPTION            |   2 +
 NAMESPACE              |  11 ++
 R/epiprocess-package.R |   8 ++
 R/epix_epi_slide_opt.R | 143 +++++++++++++++++++++++++
 R/patch.R              | 236 +++++++++++++++++++++++++++++++++++++++++
 man/tbl_diff2.Rd       |  44 ++++++++
 man/tbl_patch.Rd       |  26 +++++
 7 files changed, 470 insertions(+)
 create mode 100644 R/epix_epi_slide_opt.R
 create mode 100644 R/patch.R
 create mode 100644 man/tbl_diff2.Rd
 create mode 100644 man/tbl_patch.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index abb0cf86c..013d32600 100755
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -99,6 +99,7 @@ Collate:
     'epi_df.R'
     'epi_df_forbidden_methods.R'
     'epiprocess-package.R'
+    'epix_epi_slide_opt.R'
     'group_by_epi_df_methods.R'
     'methods-epi_archive.R'
     'grouped_epi_archive.R'
@@ -106,6 +107,7 @@ Collate:
     'key_colnames.R'
     'methods-epi_df.R'
     'outliers.R'
+    'patch.R'
     'reexports.R'
     'revision_analysis.R'
     'slide.R'
diff --git a/NAMESPACE b/NAMESPACE
index 6f3ef6a11..0fce99981 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -27,6 +27,8 @@ S3method(dplyr_col_modify,col_modify_recorder_df)
 S3method(dplyr_col_modify,epi_df)
 S3method(dplyr_reconstruct,epi_df)
 S3method(dplyr_row_slice,epi_df)
+S3method(epix_epi_slide_opt,epi_archive)
+S3method(epix_epi_slide_opt,grouped_epi_archive)
 S3method(epix_slide,epi_archive)
 S3method(epix_slide,grouped_epi_archive)
 S3method(epix_truncate_versions_after,epi_archive)
@@ -73,6 +75,7 @@ export(epi_slide_mean)
 export(epi_slide_opt)
 export(epi_slide_sum)
 export(epix_as_of)
+export(epix_epi_slide_opt)
 export(epix_fill_through_version)
 export(epix_merge)
 export(epix_slide)
@@ -144,6 +147,7 @@ importFrom(data.table,address)
 importFrom(data.table,as.data.table)
 importFrom(data.table,between)
 importFrom(data.table,copy)
+importFrom(data.table,fifelse)
 importFrom(data.table,frollapply)
 importFrom(data.table,frollmean)
 importFrom(data.table,frollsum)
@@ -201,6 +205,7 @@ importFrom(rlang,"%||%")
 importFrom(rlang,.data)
 importFrom(rlang,.env)
 importFrom(rlang,arg_match)
+importFrom(rlang,arg_match0)
 importFrom(rlang,caller_arg)
 importFrom(rlang,caller_env)
 importFrom(rlang,check_dots_empty)
@@ -236,10 +241,12 @@ importFrom(slider,slide_sum)
 importFrom(stats,cor)
 importFrom(stats,median)
 importFrom(tibble,as_tibble)
+importFrom(tibble,is_tibble)
 importFrom(tibble,new_tibble)
 importFrom(tibble,validate_tibble)
 importFrom(tidyr,complete)
 importFrom(tidyr,full_seq)
+importFrom(tidyr,nest)
 importFrom(tidyr,unnest)
 importFrom(tidyselect,any_of)
 importFrom(tidyselect,eval_select)
@@ -252,12 +259,16 @@ importFrom(vctrs,"vec_slice<-")
 importFrom(vctrs,vec_cast)
 importFrom(vctrs,vec_data)
 importFrom(vctrs,vec_duplicate_any)
+importFrom(vctrs,vec_duplicate_id)
 importFrom(vctrs,vec_equal)
 importFrom(vctrs,vec_in)
+importFrom(vctrs,vec_match)
 importFrom(vctrs,vec_order)
 importFrom(vctrs,vec_rbind)
 importFrom(vctrs,vec_recycle_common)
 importFrom(vctrs,vec_rep)
+importFrom(vctrs,vec_rep_each)
+importFrom(vctrs,vec_seq_along)
 importFrom(vctrs,vec_size)
 importFrom(vctrs,vec_slice)
 importFrom(vctrs,vec_sort)
diff --git a/R/epiprocess-package.R b/R/epiprocess-package.R
index 61eccf993..9f01904b9 100644
--- a/R/epiprocess-package.R
+++ b/R/epiprocess-package.R
@@ -17,6 +17,7 @@
 #' @importFrom cli pluralize
 #' @importFrom cli qty
 #' @importFrom data.table as.data.table
+#' @importFrom data.table fifelse
 #' @importFrom data.table key
 #' @importFrom data.table setkeyv
 #' @importFrom dplyr arrange
@@ -26,16 +27,23 @@
 #' @importFrom lifecycle deprecated
 #' @importFrom purrr list_rbind
 #' @importFrom rlang %||%
+#' @importFrom rlang arg_match0
 #' @importFrom rlang is_bare_integerish
+#' @importFrom tibble is_tibble
+#' @importFrom tidyr nest
 #' @importFrom tools toTitleCase
 #' @importFrom vctrs vec_cast
 #' @importFrom vctrs vec_data
+#' @importFrom vctrs vec_duplicate_id
 #' @importFrom vctrs vec_equal
 #' @importFrom vctrs vec_in
+#' @importFrom vctrs vec_match
 #' @importFrom vctrs vec_order
 #' @importFrom vctrs vec_rbind
 #' @importFrom vctrs vec_recycle_common
 #' @importFrom vctrs vec_rep
+#' @importFrom vctrs vec_rep_each
+#' @importFrom vctrs vec_seq_along
 #' @importFrom vctrs vec_slice
 #' @importFrom vctrs vec_slice<-
 #' @importFrom vctrs vec_sort
diff --git a/R/epix_epi_slide_opt.R b/R/epix_epi_slide_opt.R
new file mode 100644
index 000000000..24ffba919
--- /dev/null
+++ b/R/epix_epi_slide_opt.R
@@ -0,0 +1,143 @@
+epix_epi_slide_opt_one_epikey <- function(updates, in_colnames, f, f_from_package, before, after, time_type, out_colnames) {
+  if (before == Inf) {
+    cli_abort("epiprocess internal error: epix_epi_slide_opt_one_epikey() called with before == Inf")
+  }
+  unit_step <- epiprocess:::unit_time_delta(time_type)
+  prev_inp_snapshot <- NULL
+  prev_out_snapshot <- NULL
+  result <- map(seq_len(nrow(updates)), function(update_i) {
+    version <- updates$version[[update_i]]
+    inp_update <- updates$subtbl[[update_i]] # TODO decide whether DT
+    ## setDF(inp_update)
+    ## inp_update <- new_tibble(inp_update, nrow = nrow(inp_update))
+    inp_snapshot <- tbl_patch(prev_inp_snapshot, inp_update, "time_value")
+    inp_update_min_t <- min(inp_update$time_value) # TODO check efficiency
+    inp_update_max_t <- max(inp_update$time_value)
+    # If the input had updates in the range t1..t2, this could produce changes
+    # in slide outputs in the range t1-after..t2+before, and to compute those
+    # slide values, we need to look at the input snapshot from
+    # t1-after-before..t2+before+after.
+    slide_min_t <- inp_update_min_t - (before + after) * unit_step
+    slide_max_t <- inp_update_max_t + (before + after) * unit_step
+    slide_n <- time_delta_to_n_steps(slide_max_t - slide_min_t, time_type) + 1L
+    slide_time_values <- slide_min_t + 0L:(slide_n - 1L) * unit_step
+    slide_inp_backrefs <- vec_match(slide_time_values, inp_snapshot$time_value)
+    slide <- inp_snapshot[slide_inp_backrefs, ] # TODO vs. DT key index vs ....
+    slide$time_value <- slide_time_values
+    # TODO ensure before & after as integers?
+    # TODO parameterize naming, slide function, options, ...
+    if (f_from_package == "data.table") {
+      for (col_i in seq_along(in_colnames)) {
+        # FIXME other arg forwarding
+        slide[[out_colnames[[col_i]]]] <- f(slide[[in_colnames[[col_i]]]], before + after + 1L)
+      }
+    } else if (f_from_package == "slider") {
+      for (col_i in seq_along(in_colnames)) {
+        # with adaptive tails that incorporate fewer inputs:
+        # FIXME other arg forwarding
+        out_col <- f(slide[[in_colnames[[col_i]]]], before = before, after = after)
+        # XXX is this actually required or being done at the right time? we are
+        # already chopping off a good amount that might include this?
+        #
+        # FIXME can this generate an error on very short series?
+        vec_slice(out_col, seq_len(before)) <- NA
+        vec_slice(out_col, slide_n - after + seq_len(after)) <- NA
+        slide[[out_colnames[[col_i]]]] <- out_col
+      }
+    } else {
+      cli_abort("epiprocess internal error: `f_from_package` was {format_chr_deparse(f_from_package)}")
+    }
+    out_update <- slide[
+      # Get back to t1-after..t2+before; times outside this range were included
+      # only so those inside would have enough context for their slide
+      # computations, but these "context" rows may contain invalid slide
+      # computation outputs:
+      vec_rep_each(c(FALSE, TRUE, FALSE), c(before, slide_n - before - after, after)) &
+        # Only include time_values that appeared in the input snapshot:
+        !is.na(slide_inp_backrefs),
+    ]
+    out_diff <- tbl_diff2(prev_out_snapshot, out_update, "time_value", "update")
+    out_snapshot <- tbl_patch(prev_out_snapshot, out_diff)
+    prev_inp_snapshot <<- inp_snapshot
+    prev_out_snapshot <<- out_snapshot # TODO avoid need to patch twice?
+    out_diff$version <- version
+    out_diff
+  })
+  result
+}
+
+#' @export
+epix_epi_slide_opt <-
+  function(.x, .col_names, .f, ...,
+           .window_size = NULL, .align = c("right", "center", "left"),
+           .prefix = NULL, .suffix = NULL, .new_col_names = NULL # ,
+           ## .ref_time_values = NULL, .all_rows = FALSE
+  ) {
+    UseMethod("epix_epi_slide_opt")
+  }
+
+#' @method epix_epi_slide_opt grouped_epi_archive
+#' @export
+epix_epi_slide_opt.grouped_epi_archive <- function(.x, ...) {
+  assert_set_equal(
+    group_vars(.x),
+    key_colnames(.x, exclude = c("time_value", "version"))
+  )
+  orig_group_vars <- group_vars(.x)
+  orig_drop <- .x$private$drop
+  .x %>%
+    ungroup() %>%
+    epix_epi_slide_opt(...) %>%
+    group_by(pick(all_of(orig_group_vars)), .drop = orig_drop)
+}
+
+#' @method epix_epi_slide_opt epi_archive
+#' @export
+epix_epi_slide_opt.epi_archive <-
+  function(.x, .col_names, .f, ...,
+           .window_size = NULL, .align = c("right", "center", "left"),
+           .prefix = NULL, .suffix = NULL, .new_col_names = NULL,
+           ## , .ref_time_values = NULL, .all_rows = FALSE
+           .progress = FALSE) {
+    # Extract metadata:
+    time_type <- .x$time_type
+    epikey_names <- key_colnames(.x, exclude = c("time_value", "version"))
+    # Validation & pre-processing:
+    .align <- arg_match(.align)
+    f_info <- upstream_slide_f_info(.f)
+    col_names_quo <- enquo(.col_names)
+    names_info <- across_ish_names_info(.x$DT, time_type, col_names_quo, f_info$namer, .window_size, .align, .prefix, .suffix, .new_col_names)
+    window_args <- get_before_after_from_window(.window_size, .align, time_type)
+    assert(
+      checkmate::check_logical(.progress, any.missing = FALSE, len = 1L, names = "unnamed"),
+      checkmate::check_string(.progress)
+    )
+    if (isTRUE(.progress)) {
+      .progress <- "Time series processed:"
+    }
+    use_progress <- !isFALSE(.progress)
+    # Perform the slide:
+    updates_grouped <- .x$DT %>%
+      as.data.frame() %>%
+      as_tibble(.name_repair = "minimal") %>%
+      # 0 rows input -> 0 rows output, so we can just say drop = TRUE:
+      grouped_df(epikey_names, TRUE)
+    if (use_progress) progress_bar_id <- cli::cli_progress_bar(.progress, total = n_groups(updates_grouped))
+    result <- updates_grouped %>%
+      group_modify(function(group_values, group_key) {
+        group_updates <- group_values %>%
+          nest(.by = version, .key = "subtbl") %>%
+          arrange(version)
+        res <- epix_epi_slide_opt_one_epikey(group_updates, names_info$input_col_names, .f, f_info$from_package, window_args$before, window_args$after, time_type, names_info$output_col_names) %>%
+          list_rbind()
+        if (use_progress) cli::cli_progress_update(id = progress_bar_id)
+        res
+      }) %>%
+      ungroup() %>%
+      new_epi_archive(
+        .x$geo_type, .x$time_type, .x$other_keys,
+        .x$clobberable_versions_start, .x$versions_end
+      )
+    if (use_progress) cli::cli_progress_done(id = progress_bar_id)
+    result
+  }
diff --git a/R/patch.R b/R/patch.R
new file mode 100644
index 000000000..b582488a6
--- /dev/null
+++ b/R/patch.R
@@ -0,0 +1,236 @@
+# TODO use these in apply_compactify
+approx_equal <- function(vec1, vec2, abs_tol, na_equal, .ptype = NULL, recurse = approx_equal, inds1 = NULL, inds2 = NULL) {
+  vecs <- list(vec1, vec2)
+  if (!is.null(inds1)) {
+    # XXX could have logical or integerish inds; just leave it to later checks
+    # to hopefully abort for now
+  } else {
+    vecs <- vec_recycle_common(!!!vecs)
+  }
+  vecs <- vec_cast_common(!!!vecs, .to = .ptype)
+  approx_equal0(vecs[[1]], vecs[[2]], abs_tol, na_equal, rec = approx_equal, inds1, inds2)
+}
+
+approx_equal0 <- function(vec1, vec2, abs_tol, na_equal, recurse = approx_equal0, inds1 = NULL, inds2 = NULL) {
+  if (is_bare_numeric(vec1) && abs_tol != 0) {
+    if (!is.null(inds1)) {
+      vec1 <- vec1[inds1]
+      vec2 <- vec2[inds2]
+    }
+    # perf: since we're working with bare numerics and logicals: we can use
+    # fifelse, and stop it from propagating attributes when there's no special
+    # class to guide the meaning
+    res <- fifelse(
+      !is.na(vec1) & !is.na(vec2),
+      abs(vec1 - vec2) <= abs_tol,
+      if (na_equal) is.na(vec1) & is.na(vec2) else FALSE
+    )
+    attributes(res) <- NULL
+    return(res)
+  } else if (is.data.frame(vec1) && abs_tol != 0) {
+    # (we only need to manually recurse if we potentially have columns that would
+    # be specially processed by the above)
+    if (ncol(vec1) == 0) {
+      rep(TRUE, nrow(vec1))
+    } else {
+      Reduce(`&`, lapply(seq_len(ncol(vec1)), function(col_i) {
+        recurse(vec1[[col_i]], vec2[[col_i]], abs_tol, na_equal, recurse, inds1, inds2)
+      }))
+    }
+  } else {
+    # No special handling for any other types/situations. Makes sense for
+    # unclassed atomic things; bare lists and certain vctrs classes might want
+    # recursion / specialization, though.
+    if (!is.null(inds1)) {
+      vec1 <- vec_slice(vec1, inds1)
+      vec2 <- vec_slice(vec2, inds2)
+    }
+    res <- vec_equal(vec1, vec2, na_equal = na_equal)
+    return(res)
+  }
+}
+
+#' Calculate compact patch to move from one snapshot/update to another
+#'
+#' @param earlier_snapshot tibble or `NULL`; `NULL` represents that there was no
+#'   data before `later_tbl`.
+#' @param later_tbl tibble; must have the same column names as
+#'   `earlier_snapshot` if it is a tibble.
+#' @param ukey_names character; column names that together, form a unique key
+#'   for `earlier_snapshot` and for `later_tbl`. This is unchecked; see
+#'   [`check_ukey_unique`] if you don't already have this guaranteed.
+#' @param later_format "snapshot" or "update"; default is "snapshot". If
+#'   "snapshot", `later_tbl` will be interpreted as a full snapshot of the data
+#'   set including all ukeys, and any ukeys that are in `earlier_snapshot` but
+#'   not in `later_tbl` are interpreted as deletions, which are currently
+#'   (imprecisely) represented in the output patch as revisions of all
+#'   non-`ukey_names` columns to NA values (using `{vctrs}`). If "update", then
+#'   it's assumed that any deletions have already been represented this way in
+#'   `later_tbl` and any ukeys not in `later_tbl` are simply unchanged; we are
+#'   just ensuring that the update is fully compact for the given
+#'   `compactify_abs_tol`.
+#' @param compactify_abs_tol compactification tolerance; see `apply_compactify`
+#' @return a tibble in compact "update" (diff) format
+tbl_diff2 <- function(earlier_snapshot, later_tbl,
+                      ukey_names,
+                      later_format = c("snapshot", "update"),
+                      compactify_abs_tol = 0) {
+  # Most input validation + handle NULL earlier_snapshot. This is a small function so
+  # use faster validation variants:
+  if (!is_tibble(later_tbl)) {
+    cli_abort("`later_tbl` must be a tibble")
+  }
+  if (is.null(earlier_snapshot)) {
+    return(later_tbl)
+  }
+  if (!is_tibble(earlier_snapshot)) {
+    cli_abort("`earlier_snapshot` must be a tibble or `NULL`")
+  }
+  later_format <- arg_match0(later_format, c("snapshot", "update"))
+  if (!(is.vector(compactify_abs_tol, mode = "numeric") && length(compactify_abs_tol) == 1L && compactify_abs_tol >= 0)) {
+    # Give a specific message:
+    assert_numeric(compactify_abs_tol, lower = 0, any.missing = FALSE, len = 1L)
+    # Fallback e.g. for invalid classes not caught by assert_numeric:
+    cli_abort("`compactify_abs_tol` must be a length-1 double/integer >= 0")
+  }
+
+  # Extract metadata:
+  earlier_n <- nrow(earlier_snapshot)
+  later_n <- nrow(later_tbl)
+  tbl_names <- names(earlier_snapshot)
+  val_names <- tbl_names[!tbl_names %in% ukey_names]
+
+  # More input validation:
+  if (!identical(tbl_names, names(later_tbl))) {
+    # XXX is this check actually necessary?
+    cli_abort(c("`earlier_snapshot` and `later_tbl` should have identical column
+                 names and ordering.",
+      "*" = "`earlier_snapshot` colnames: {format_chr_deparse(tbl_names)}",
+      "*" = "`later_tbl` colnames: {format_chr_deparse(names(later_tbl))}"
+    ))
+  }
+
+  combined_tbl <- vec_rbind(earlier_snapshot, later_tbl)
+  combined_n <- nrow(combined_tbl)
+
+  # We'll also need epikeytimes and value columns separately:
+  combined_ukeys <- combined_tbl[ukey_names]
+  combined_vals <- combined_tbl[val_names]
+
+  # We have five types of rows in combined_tbl:
+  # 1. From earlier_snapshot, no matching ukey in later_tbl (deletion; turn vals to
+  #    NAs to match epi_archive format)
+  # 2. From earlier_snapshot, with matching ukey in later_tbl (context; exclude from
+  #    result)
+  # 3. From later_tbl, with matching ukey in earlier_snapshot, with value "close" (change
+  #    that we'll compactify away)
+  # 4. From later_tbl, with matching ukey in earlier_snapshot, value not "close" (change
+  #    that we'll record)
+  # 5. From later_tbl, with no matching ukey in later_tbl (addition)
+
+  # For "snapshot" later_format, we need to filter to 1., 4., and 5., and alter
+  # values for 1.  For "update" later_format, we need to filter to 4. and 5.
+
+  # (For compactify_abs_tol = 0, we could potentially streamline things by dropping
+  # ukey+val duplicates (cases 2. and 3.).)
+
+  # Row indices of first occurrence of each ukey; will be the same as
+  # seq_len(combined_n) except for when that ukey has been re-reported in
+  # `later_tbl`, in which case (3. or 4.) it will point back to the row index of
+  # the same ukey in `earlier_snapshot`:
+  combined_ukey_firsts <- vec_duplicate_id(combined_ukeys)
+
+  # Which rows from combined are cases 3. or 4.?
+  combined_ukey_is_repeat <- combined_ukey_firsts != seq_len(combined_n)
+  # For each row in 3. or 4., row numbers of the ukey appearance in earlier:
+  ukey_repeat_first_i <- combined_ukey_firsts[combined_ukey_is_repeat]
+
+  # Which rows from combined are in case 3.?
+  combined_compactify_away <- rep(FALSE, combined_n)
+  combined_compactify_away[combined_ukey_is_repeat] <-
+    approx_equal0(combined_vals,
+      combined_vals,
+      # TODO move inds closer to vals to not be as confusing?
+      abs_tol = compactify_abs_tol,
+      na_equal = TRUE,
+      inds1 = combined_ukey_is_repeat,
+      inds2 = ukey_repeat_first_i
+    )
+
+  # Which rows from combined are in cases 3., 4., or 5.?
+  combined_from_later <- vec_rep_each(c(FALSE, TRUE), c(earlier_n, later_n))
+
+  if (later_format == "update") {
+    # Cases 4. and 5.:
+    combined_tbl <- combined_tbl[combined_from_later & !combined_compactify_away, ]
+  } else { # later_format == "snapshot"
+    # Which rows from combined are in case 1.?
+    combined_is_deletion <- vec_rep_each(c(TRUE, FALSE), c(earlier_n, later_n))
+    combined_is_deletion[ukey_repeat_first_i] <- FALSE
+    # Which rows from combined are in cases 1., 4., or 5.?
+    combined_include <- combined_is_deletion | combined_from_later & !combined_compactify_away
+    combined_tbl <- combined_tbl[combined_include, ]
+    # Represent deletion in 1. with NA-ing of all value columns. (In some
+    # previous approaches to epi_diff2, this seemed to be faster than using
+    # vec_rbind(case_1_ukeys, cases_45_tbl) or bind_rows to fill with NAs, and more
+    # general than data.table's rbind(case_1_ukeys, cases_45_tbl, fill = TRUE).)
+    combined_tbl[combined_is_deletion[combined_include], val_names] <- NA
+  }
+
+  combined_tbl
+}
+
+epi_diff2 <- function(earlier_snapshot, later_edf,
+                      later_format = c("snapshot", "update"),
+                      compactify_abs_tol = 0) {
+  ukey_names <- key_colnames(later_edf)
+  dplyr_reconstruct(tbl_diff2(as_tibble(earlier_snapshot), as_tibble(later_edf), ukey_names, later_format, compactify_abs_tol), later_edf)
+}
+
+# XXX vs. tbl_patch_apply?
+
+
+
+#' Apply an update (e.g., from `tbl_diff2`) to a snapshot
+#'
+#' @param snapshot tibble or `NULL`; entire data set as of some version, or
+#'   `NULL` to treat `update` as the initial version of the data set.
+#' @param update tibble; ukeys + initial values for added rows, ukeys + new
+#'   values for changed rows. Deletions must be imprecisely represented as
+#'   changing all values to NAs.
+#' @param ukey_names character; names of columns that should form a unique key
+#'   for `snapshot` and for `update`. Uniqueness is unchecked; if you don't have
+#'   this guaranteed, see [`check_ukey_unique()`].
+#' @return tibble; snapshot of the data set with the update applied.
+tbl_patch <- function(snapshot, update, ukey_names) {
+  # Most input validation. This is a small function so use faster validation
+  # variants:
+  if (!is_tibble(update)) {
+    # XXX debating about whether to have a specialized class for updates/diffs.
+    # Seems nice for type-based reasoning and might remove some args from
+    # interfaces, but would require constructor/converter functions for that
+    # type.
+    cli_abort("`update` must be a tibble")
+  }
+  if (is.null(snapshot)) {
+    return(update)
+  }
+  if (!is_tibble(snapshot)) {
+    cli_abort("`snapshot` must be a tibble")
+  }
+
+  result_tbl <- vec_rbind(update, snapshot)
+
+  dup_ids <- vec_duplicate_id(result_tbl[ukey_names])
+  not_overwritten <- dup_ids == vec_seq_along(result_tbl)
+  result_tbl <- result_tbl[not_overwritten, ]
+
+  ## result_tbl <- arrange_canonical(result_tbl)
+
+  result_tbl
+}
+
+epi_patch <- function(snapshot, update) {
+  ukey_names <- key_colnames(update)
+  dplyr_reconstruct(tbl_patch(as_tibble(snapshot), as_tibble(update), ukey_names), update)
+}
diff --git a/man/tbl_diff2.Rd b/man/tbl_diff2.Rd
new file mode 100644
index 000000000..d9b652258
--- /dev/null
+++ b/man/tbl_diff2.Rd
@@ -0,0 +1,44 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/patch.R
+\name{tbl_diff2}
+\alias{tbl_diff2}
+\title{Calculate compact patch to move from one snapshot/update to another}
+\usage{
+tbl_diff2(
+  earlier_snapshot,
+  later_tbl,
+  ukey_names,
+  later_format = c("snapshot", "update"),
+  compactify_abs_tol = 0
+)
+}
+\arguments{
+\item{earlier_snapshot}{tibble or \code{NULL}; \code{NULL} represents that there was no
+data before \code{later_tbl}.}
+
+\item{later_tbl}{tibble; must have the same column names as
+\code{earlier_snapshot} if it is a tibble.}
+
+\item{ukey_names}{character; column names that together, form a unique key
+for \code{earlier_snapshot} and for \code{later_tbl}. This is unchecked; see
+\code{\link{check_ukey_unique}} if you don't already have this guaranteed.}
+
+\item{later_format}{"snapshot" or "update"; default is "snapshot". If
+"snapshot", \code{later_tbl} will be interpreted as a full snapshot of the data
+set including all ukeys, and any ukeys that are in \code{earlier_snapshot} but
+not in \code{later_tbl} are interpreted as deletions, which are currently
+(imprecisely) represented in the output patch as revisions of all
+non-\code{ukey_names} columns to NA values (using \code{{vctrs}}). If "update", then
+it's assumed that any deletions have already been represented this way in
+\code{later_tbl} and any ukeys not in \code{later_tbl} are simply unchanged; we are
+just ensuring that the update is fully compact for the given
+\code{compactify_abs_tol}.}
+
+\item{compactify_abs_tol}{compactification tolerance; see \code{apply_compactify}}
+}
+\value{
+a tibble in compact "update" (diff) format
+}
+\description{
+Calculate compact patch to move from one snapshot/update to another
+}
diff --git a/man/tbl_patch.Rd b/man/tbl_patch.Rd
new file mode 100644
index 000000000..2b4e8288b
--- /dev/null
+++ b/man/tbl_patch.Rd
@@ -0,0 +1,26 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/patch.R
+\name{tbl_patch}
+\alias{tbl_patch}
+\title{Apply an update (e.g., from \code{tbl_diff2}) to a snapshot}
+\usage{
+tbl_patch(snapshot, update, ukey_names)
+}
+\arguments{
+\item{snapshot}{tibble or \code{NULL}; entire data set as of some version, or
+\code{NULL} to treat \code{update} as the initial version of the data set.}
+
+\item{update}{tibble; ukeys + initial values for added rows, ukeys + new
+values for changed rows. Deletions must be imprecisely represented as
+changing all values to NAs.}
+
+\item{ukey_names}{character; names of columns that should form a unique key
+for \code{snapshot} and for \code{update}. Uniqueness is unchecked; if you don't have
+this guaranteed, see \code{\link[=check_ukey_unique]{check_ukey_unique()}}.}
+}
+\value{
+tibble; snapshot of the data set with the update applied.
+}
+\description{
+Apply an update (e.g., from \code{tbl_diff2}) to a snapshot
+}

From 4cc0961b78e1b20718f07053d5c9d4091fca08e5 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Fri, 28 Feb 2025 12:47:46 -0800
Subject: [PATCH 003/107] epix_epi_slide_opt: don't re-order input columns in
 output

---
 NAMESPACE              | 1 +
 R/epiprocess-package.R | 1 +
 R/epix_epi_slide_opt.R | 4 +++-
 3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/NAMESPACE b/NAMESPACE
index 0fce99981..168b7507d 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -156,6 +156,7 @@ importFrom(data.table,key)
 importFrom(data.table,rbindlist)
 importFrom(data.table,set)
 importFrom(data.table,setDF)
+importFrom(data.table,setcolorder)
 importFrom(data.table,setkeyv)
 importFrom(dplyr,"%>%")
 importFrom(dplyr,across)
diff --git a/R/epiprocess-package.R b/R/epiprocess-package.R
index 9f01904b9..8a380c092 100644
--- a/R/epiprocess-package.R
+++ b/R/epiprocess-package.R
@@ -19,6 +19,7 @@
 #' @importFrom data.table as.data.table
 #' @importFrom data.table fifelse
 #' @importFrom data.table key
+#' @importFrom data.table setcolorder
 #' @importFrom data.table setkeyv
 #' @importFrom dplyr arrange
 #' @importFrom dplyr grouped_df
diff --git a/R/epix_epi_slide_opt.R b/R/epix_epi_slide_opt.R
index 24ffba919..bb201cf18 100644
--- a/R/epix_epi_slide_opt.R
+++ b/R/epix_epi_slide_opt.R
@@ -56,7 +56,7 @@ epix_epi_slide_opt_one_epikey <- function(updates, in_colnames, f, f_from_packag
         # Only include time_values that appeared in the input snapshot:
         !is.na(slide_inp_backrefs),
     ]
-    out_diff <- tbl_diff2(prev_out_snapshot, out_update, "time_value", "update")
+    out_diff <- tbl_diff2(prev_out_snapshot, out_update, "time_value", "update") # TODO avoid redundant diff2 work? though depends on compactify parms...
     out_snapshot <- tbl_patch(prev_out_snapshot, out_diff)
     prev_inp_snapshot <<- inp_snapshot
     prev_out_snapshot <<- out_snapshot # TODO avoid need to patch twice?
@@ -139,5 +139,7 @@ epix_epi_slide_opt.epi_archive <-
         .x$clobberable_versions_start, .x$versions_end
       )
     if (use_progress) cli::cli_progress_done(id = progress_bar_id)
+    # Keep ordering of old columns, place new columns at end:
+    setcolorder(result$DT, names(.x$DT))
     result
   }

From 3be711e9cc8b3c46484645dc697c6c13e94c9a66 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Mon, 3 Mar 2025 10:09:01 -0800
Subject: [PATCH 004/107] feat: support before = Inf in epix_epi_slide_opt

---
 R/epix_epi_slide_opt.R | 69 +++++++++++++++++++++++++++---------------
 1 file changed, 44 insertions(+), 25 deletions(-)

diff --git a/R/epix_epi_slide_opt.R b/R/epix_epi_slide_opt.R
index bb201cf18..d42f1c3c8 100644
--- a/R/epix_epi_slide_opt.R
+++ b/R/epix_epi_slide_opt.R
@@ -1,7 +1,5 @@
+# TODO just make this an epi_slide_opt impl?
 epix_epi_slide_opt_one_epikey <- function(updates, in_colnames, f, f_from_package, before, after, time_type, out_colnames) {
-  if (before == Inf) {
-    cli_abort("epiprocess internal error: epix_epi_slide_opt_one_epikey() called with before == Inf")
-  }
   unit_step <- epiprocess:::unit_time_delta(time_type)
   prev_inp_snapshot <- NULL
   prev_out_snapshot <- NULL
@@ -11,25 +9,41 @@ epix_epi_slide_opt_one_epikey <- function(updates, in_colnames, f, f_from_packag
     ## setDF(inp_update)
     ## inp_update <- new_tibble(inp_update, nrow = nrow(inp_update))
     inp_snapshot <- tbl_patch(prev_inp_snapshot, inp_update, "time_value")
-    inp_update_min_t <- min(inp_update$time_value) # TODO check efficiency
-    inp_update_max_t <- max(inp_update$time_value)
-    # If the input had updates in the range t1..t2, this could produce changes
-    # in slide outputs in the range t1-after..t2+before, and to compute those
-    # slide values, we need to look at the input snapshot from
-    # t1-after-before..t2+before+after.
-    slide_min_t <- inp_update_min_t - (before + after) * unit_step
-    slide_max_t <- inp_update_max_t + (before + after) * unit_step
-    slide_n <- time_delta_to_n_steps(slide_max_t - slide_min_t, time_type) + 1L
-    slide_time_values <- slide_min_t + 0L:(slide_n - 1L) * unit_step
+    if (before == Inf) {
+      if (after != 0) {
+        cli_abort('.window_size = Inf is only supported with .align = "right"')
+      }
+      # We need to use the entire input snapshot range, filling in time gaps. We
+      # shouldn't pad the ends.
+      slide_min_t <- min(inp_snapshot$time_value) # TODO check efficiency
+      slide_max_t <- max(inp_snapshot$time_value)
+    } else {
+      # If the input had updates in the range t1..t2, this could produce changes
+      # in slide outputs in the range t1-after..t2+before, and to compute those
+      # slide values, we need to look at the input snapshot from
+      # t1-after-before..t2+before+after.
+      inp_update_min_t <- min(inp_update$time_value) # TODO check efficiency
+      inp_update_max_t <- max(inp_update$time_value)
+      slide_min_t <- inp_update_min_t - (before + after) * unit_step
+      slide_max_t <- inp_update_max_t + (before + after) * unit_step
+    }
+    slide_nrow <- time_delta_to_n_steps(slide_max_t - slide_min_t, time_type) + 1L
+    slide_time_values <- slide_min_t + 0L:(slide_nrow - 1L) * unit_step
     slide_inp_backrefs <- vec_match(slide_time_values, inp_snapshot$time_value)
     slide <- inp_snapshot[slide_inp_backrefs, ] # TODO vs. DT key index vs ....
     slide$time_value <- slide_time_values
     # TODO ensure before & after as integers?
     # TODO parameterize naming, slide function, options, ...
     if (f_from_package == "data.table") {
+      if (before == Inf) {
+        n_arg <- seq_len(slide_nrow)
+        adaptive_arg <- TRUE
+      } else {
+        n_arg <- before + after + 1L
+        adaptive_arg <- FALSE
+      }
       for (col_i in seq_along(in_colnames)) {
-        # FIXME other arg forwarding
-        slide[[out_colnames[[col_i]]]] <- f(slide[[in_colnames[[col_i]]]], before + after + 1L)
+        slide[[out_colnames[[col_i]]]] <- f(slide[[in_colnames[[col_i]]]], n_arg, adaptive = adaptive_arg)
       }
     } else if (f_from_package == "slider") {
       for (col_i in seq_along(in_colnames)) {
@@ -41,21 +55,26 @@ epix_epi_slide_opt_one_epikey <- function(updates, in_colnames, f, f_from_packag
         #
         # FIXME can this generate an error on very short series?
         vec_slice(out_col, seq_len(before)) <- NA
-        vec_slice(out_col, slide_n - after + seq_len(after)) <- NA
+        vec_slice(out_col, slide_nrow - after + seq_len(after)) <- NA
         slide[[out_colnames[[col_i]]]] <- out_col
       }
     } else {
       cli_abort("epiprocess internal error: `f_from_package` was {format_chr_deparse(f_from_package)}")
     }
-    out_update <- slide[
-      # Get back to t1-after..t2+before; times outside this range were included
-      # only so those inside would have enough context for their slide
-      # computations, but these "context" rows may contain invalid slide
-      # computation outputs:
-      vec_rep_each(c(FALSE, TRUE, FALSE), c(before, slide_n - before - after, after)) &
-        # Only include time_values that appeared in the input snapshot:
-        !is.na(slide_inp_backrefs),
-    ]
+    rows_should_keep <-
+      if (before == Inf) {
+        # Re-introduce time gaps:
+        !is.na(slide_inp_backrefs)
+      } else {
+        # Get back to t1-after..t2+before; times outside this range were included
+        # only so those inside would have enough context for their slide
+        # computations, but these "context" rows may contain invalid slide
+        # computation outputs:
+        vec_rep_each(c(FALSE, TRUE, FALSE), c(before, slide_nrow - before - after, after)) &
+          # Only include time_values that appeared in the input snapshot:
+          !is.na(slide_inp_backrefs)
+      }
+    out_update <- slide[rows_should_keep, ]
     out_diff <- tbl_diff2(prev_out_snapshot, out_update, "time_value", "update") # TODO avoid redundant diff2 work? though depends on compactify parms...
     out_snapshot <- tbl_patch(prev_out_snapshot, out_diff)
     prev_inp_snapshot <<- inp_snapshot

From 78c78852d1e21ffd79b4caffab204e0150b4d5c3 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Mon, 3 Mar 2025 10:36:36 -0800
Subject: [PATCH 005/107] feat(epi_slide_opt): improve feedback when .f is
 forgotten

---
 R/epiprocess-package.R | 1 +
 R/slide.R              | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/R/epiprocess-package.R b/R/epiprocess-package.R
index 8a380c092..93af0b737 100644
--- a/R/epiprocess-package.R
+++ b/R/epiprocess-package.R
@@ -6,6 +6,7 @@
 #' @importFrom checkmate anyInfinite anyMissing assert assert_character
 #' @importFrom checkmate assert_class assert_data_frame assert_int assert_list
 #' @importFrom checkmate assert_false
+#' @importFrom checkmate assert_function
 #' @importFrom checkmate assert_logical assert_numeric assert_scalar checkInt
 #' @importFrom checkmate assert_string
 #' @importFrom checkmate assert_subset
diff --git a/R/slide.R b/R/slide.R
index b0de0f8d2..0da569deb 100644
--- a/R/slide.R
+++ b/R/slide.R
@@ -581,6 +581,8 @@ upstream_slide_f_possibilities <- tibble::tribble(
 #'
 #' @keywords internal
 upstream_slide_f_info <- function(.f) {
+  assert_function(.f)
+
   # Check that slide function `.f` is one of those short-listed from
   # `data.table` and `slider` (or a function that has the exact same definition,
   # e.g. if the function has been reexported or defined locally). Extract some
@@ -776,7 +778,6 @@ across_ish_names_info <- function(.x, time_type, col_names_quo, .f_namer, .windo
 #' @importFrom purrr map map_lgl
 #' @importFrom data.table frollmean frollsum frollapply
 #' @importFrom lubridate as.period
-#' @importFrom checkmate assert_function
 #' @importFrom slider slide_sum slide_prod slide_mean slide_min slide_max slide_all slide_any
 #' @export
 #' @seealso [`epi_slide`] for the more general slide function
@@ -1164,7 +1165,6 @@ epi_slide_sum <- function(
 #' `before` and `after` args are assumed to have been validated by the calling
 #' function (using `validate_slide_window_arg`).
 #'
-#' @importFrom checkmate assert_function
 #' @keywords internal
 full_date_seq <- function(x, before, after, time_type) {
   if (!time_type %in% c("day", "week", "yearmonth", "integer")) {

From f5c235f61196e746af253ed4549092e599745a41 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Mon, 3 Mar 2025 10:47:03 -0800
Subject: [PATCH 006/107] fix(epix_epi_slide_opt): support ... forwarding

---
 R/epix_epi_slide_opt.R | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/R/epix_epi_slide_opt.R b/R/epix_epi_slide_opt.R
index d42f1c3c8..502a757e4 100644
--- a/R/epix_epi_slide_opt.R
+++ b/R/epix_epi_slide_opt.R
@@ -1,5 +1,5 @@
 # TODO just make this an epi_slide_opt impl?
-epix_epi_slide_opt_one_epikey <- function(updates, in_colnames, f, f_from_package, before, after, time_type, out_colnames) {
+epix_epi_slide_opt_one_epikey <- function(updates, in_colnames, f_dots_baked, f_from_package, before, after, time_type, out_colnames) {
   unit_step <- epiprocess:::unit_time_delta(time_type)
   prev_inp_snapshot <- NULL
   prev_out_snapshot <- NULL
@@ -43,13 +43,13 @@ epix_epi_slide_opt_one_epikey <- function(updates, in_colnames, f, f_from_packag
         adaptive_arg <- FALSE
       }
       for (col_i in seq_along(in_colnames)) {
-        slide[[out_colnames[[col_i]]]] <- f(slide[[in_colnames[[col_i]]]], n_arg, adaptive = adaptive_arg)
+        slide[[out_colnames[[col_i]]]] <- f_dots_baked(slide[[in_colnames[[col_i]]]], n_arg, adaptive = adaptive_arg)
       }
     } else if (f_from_package == "slider") {
       for (col_i in seq_along(in_colnames)) {
         # with adaptive tails that incorporate fewer inputs:
         # FIXME other arg forwarding
-        out_col <- f(slide[[in_colnames[[col_i]]]], before = before, after = after)
+        out_col <- f_dots_baked(slide[[in_colnames[[col_i]]]], before = before, after = after)
         # XXX is this actually required or being done at the right time? we are
         # already chopping off a good amount that might include this?
         #
@@ -109,7 +109,6 @@ epix_epi_slide_opt.grouped_epi_archive <- function(.x, ...) {
     epix_epi_slide_opt(...) %>%
     group_by(pick(all_of(orig_group_vars)), .drop = orig_drop)
 }
-
 #' @method epix_epi_slide_opt epi_archive
 #' @export
 epix_epi_slide_opt.epi_archive <-
@@ -123,9 +122,17 @@ epix_epi_slide_opt.epi_archive <-
     epikey_names <- key_colnames(.x, exclude = c("time_value", "version"))
     # Validation & pre-processing:
     .align <- arg_match(.align)
-    f_info <- upstream_slide_f_info(.f)
+    .f_info <- upstream_slide_f_info(.f)
+    .f_dots_baked <-
+      if (rlang::dots_n(...) == 0L) {
+        # Leaving `.f` unchanged slightly improves computation speed and trims
+        # debug stack traces:
+        .f
+      } else {
+        purrr::partial(.f, ...)
+      }
     col_names_quo <- enquo(.col_names)
-    names_info <- across_ish_names_info(.x$DT, time_type, col_names_quo, f_info$namer, .window_size, .align, .prefix, .suffix, .new_col_names)
+    names_info <- across_ish_names_info(.x$DT, time_type, col_names_quo, .f_info$namer, .window_size, .align, .prefix, .suffix, .new_col_names)
     window_args <- get_before_after_from_window(.window_size, .align, time_type)
     assert(
       checkmate::check_logical(.progress, any.missing = FALSE, len = 1L, names = "unnamed"),
@@ -147,7 +154,7 @@ epix_epi_slide_opt.epi_archive <-
         group_updates <- group_values %>%
           nest(.by = version, .key = "subtbl") %>%
           arrange(version)
-        res <- epix_epi_slide_opt_one_epikey(group_updates, names_info$input_col_names, .f, f_info$from_package, window_args$before, window_args$after, time_type, names_info$output_col_names) %>%
+        res <- epix_epi_slide_opt_one_epikey(group_updates, names_info$input_col_names, .f_dots_baked, .f_info$from_package, window_args$before, window_args$after, time_type, names_info$output_col_names) %>%
           list_rbind()
         if (use_progress) cli::cli_progress_update(id = progress_bar_id)
         res

From 15e8c7da721b418793eb3192c2ecec64099beb89 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Mon, 3 Mar 2025 12:27:14 -0800
Subject: [PATCH 007/107] Clean up some comments&code + note bug

---
 R/epix_epi_slide_opt.R | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/R/epix_epi_slide_opt.R b/R/epix_epi_slide_opt.R
index 502a757e4..22b83eb1a 100644
--- a/R/epix_epi_slide_opt.R
+++ b/R/epix_epi_slide_opt.R
@@ -30,10 +30,11 @@ epix_epi_slide_opt_one_epikey <- function(updates, in_colnames, f_dots_baked, f_
     slide_nrow <- time_delta_to_n_steps(slide_max_t - slide_min_t, time_type) + 1L
     slide_time_values <- slide_min_t + 0L:(slide_nrow - 1L) * unit_step
     slide_inp_backrefs <- vec_match(slide_time_values, inp_snapshot$time_value)
+    # Get additional values needed from inp_snapshot + perform any NA
+    # tail-padding needed to make slider results a fixed window size rather than
+    # adaptive at tails + perform any NA gap-filling needed:
     slide <- inp_snapshot[slide_inp_backrefs, ] # TODO vs. DT key index vs ....
     slide$time_value <- slide_time_values
-    # TODO ensure before & after as integers?
-    # TODO parameterize naming, slide function, options, ...
     if (f_from_package == "data.table") {
       if (before == Inf) {
         n_arg <- seq_len(slide_nrow)
@@ -43,20 +44,12 @@ epix_epi_slide_opt_one_epikey <- function(updates, in_colnames, f_dots_baked, f_
         adaptive_arg <- FALSE
       }
       for (col_i in seq_along(in_colnames)) {
+        # FIXME wrong with .align = "left"
         slide[[out_colnames[[col_i]]]] <- f_dots_baked(slide[[in_colnames[[col_i]]]], n_arg, adaptive = adaptive_arg)
       }
     } else if (f_from_package == "slider") {
       for (col_i in seq_along(in_colnames)) {
-        # with adaptive tails that incorporate fewer inputs:
-        # FIXME other arg forwarding
-        out_col <- f_dots_baked(slide[[in_colnames[[col_i]]]], before = before, after = after)
-        # XXX is this actually required or being done at the right time? we are
-        # already chopping off a good amount that might include this?
-        #
-        # FIXME can this generate an error on very short series?
-        vec_slice(out_col, seq_len(before)) <- NA
-        vec_slice(out_col, slide_nrow - after + seq_len(after)) <- NA
-        slide[[out_colnames[[col_i]]]] <- out_col
+        slide[[out_colnames[[col_i]]]] <- f_dots_baked(slide[[in_colnames[[col_i]]]], before = before, after = after)
       }
     } else {
       cli_abort("epiprocess internal error: `f_from_package` was {format_chr_deparse(f_from_package)}")

From 2cc751da97c0608dcdfc5511b9b0fb2c04bba6f1 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Mon, 3 Mar 2025 12:56:18 -0800
Subject: [PATCH 008/107] fix(epix_epi_slide_opt): on data.table `.f`s with
 `.align != "right"`

---
 R/epix_epi_slide_opt.R | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/R/epix_epi_slide_opt.R b/R/epix_epi_slide_opt.R
index 22b83eb1a..6256efb4e 100644
--- a/R/epix_epi_slide_opt.R
+++ b/R/epix_epi_slide_opt.R
@@ -36,16 +36,25 @@ epix_epi_slide_opt_one_epikey <- function(updates, in_colnames, f_dots_baked, f_
     slide <- inp_snapshot[slide_inp_backrefs, ] # TODO vs. DT key index vs ....
     slide$time_value <- slide_time_values
     if (f_from_package == "data.table") {
-      if (before == Inf) {
-        n_arg <- seq_len(slide_nrow)
-        adaptive_arg <- TRUE
-      } else {
-        n_arg <- before + after + 1L
-        adaptive_arg <- FALSE
-      }
+      # if (before == Inf) {
+      #   n_arg <- seq_len(slide_nrow)
+      #   adaptive_arg <- TRUE
+      # } else {
+      #   n_arg <- before + after + 1L
+      #   adaptive_arg <- FALSE
+      # }
       for (col_i in seq_along(in_colnames)) {
-        # FIXME wrong with .align = "left"
-        slide[[out_colnames[[col_i]]]] <- f_dots_baked(slide[[in_colnames[[col_i]]]], n_arg, adaptive = adaptive_arg)
+        if (before == Inf) {
+          slide[[out_colnames[[col_i]]]] <- f_dots_baked(slide[[in_colnames[[col_i]]]], seq_len(slide_nrow), adaptive = TRUE)
+        } else {
+          out_col <- f_dots_baked(slide[[in_colnames[[col_i]]]], before + after + 1L)
+          if (after != 0L) {
+            # data.table always puts NAs at tails, even with na.rm = TRUE; chop
+            # off extra NAs from beginning and place missing NAs at end:
+            out_col <- c(out_col[seq(after + 1L, slide_nrow)], rep(NA, after))
+          }
+          slide[[out_colnames[[col_i]]]] <- out_col
+        }
       }
     } else if (f_from_package == "slider") {
       for (col_i in seq_along(in_colnames)) {

From 9cc8468689cc59e025f3549aa5ee4dcf2b5ac7a9 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Mon, 3 Mar 2025 15:01:03 -0800
Subject: [PATCH 009/107] Clear out some more comments

Changing min() to [[1L]] or min & max to range don't seem to make a notable
performance difference, and require extra assumptions / uglier code.
---
 R/epix_epi_slide_opt.R | 29 ++++++++++-------------------
 1 file changed, 10 insertions(+), 19 deletions(-)

diff --git a/R/epix_epi_slide_opt.R b/R/epix_epi_slide_opt.R
index 6256efb4e..9e09af8f6 100644
--- a/R/epix_epi_slide_opt.R
+++ b/R/epix_epi_slide_opt.R
@@ -5,9 +5,7 @@ epix_epi_slide_opt_one_epikey <- function(updates, in_colnames, f_dots_baked, f_
   prev_out_snapshot <- NULL
   result <- map(seq_len(nrow(updates)), function(update_i) {
     version <- updates$version[[update_i]]
-    inp_update <- updates$subtbl[[update_i]] # TODO decide whether DT
-    ## setDF(inp_update)
-    ## inp_update <- new_tibble(inp_update, nrow = nrow(inp_update))
+    inp_update <- updates$subtbl[[update_i]]
     inp_snapshot <- tbl_patch(prev_inp_snapshot, inp_update, "time_value")
     if (before == Inf) {
       if (after != 0) {
@@ -15,34 +13,27 @@ epix_epi_slide_opt_one_epikey <- function(updates, in_colnames, f_dots_baked, f_
       }
       # We need to use the entire input snapshot range, filling in time gaps. We
       # shouldn't pad the ends.
-      slide_min_t <- min(inp_snapshot$time_value) # TODO check efficiency
-      slide_max_t <- max(inp_snapshot$time_value)
+      slide_t_min <- min(inp_snapshot$time_value)
+      slide_t_max <- max(inp_snapshot$time_value)
     } else {
       # If the input had updates in the range t1..t2, this could produce changes
       # in slide outputs in the range t1-after..t2+before, and to compute those
       # slide values, we need to look at the input snapshot from
       # t1-after-before..t2+before+after.
-      inp_update_min_t <- min(inp_update$time_value) # TODO check efficiency
-      inp_update_max_t <- max(inp_update$time_value)
-      slide_min_t <- inp_update_min_t - (before + after) * unit_step
-      slide_max_t <- inp_update_max_t + (before + after) * unit_step
+      inp_update_t_min <- min(inp_update$time_value)
+      inp_update_t_max <- max(inp_update$time_value)
+      slide_t_min <- inp_update_t_min - (before + after) * unit_step
+      slide_t_max <- inp_update_t_max + (before + after) * unit_step
     }
-    slide_nrow <- time_delta_to_n_steps(slide_max_t - slide_min_t, time_type) + 1L
-    slide_time_values <- slide_min_t + 0L:(slide_nrow - 1L) * unit_step
+    slide_nrow <- time_delta_to_n_steps(slide_t_max - slide_t_min, time_type) + 1L
+    slide_time_values <- slide_t_min + 0L:(slide_nrow - 1L) * unit_step
     slide_inp_backrefs <- vec_match(slide_time_values, inp_snapshot$time_value)
     # Get additional values needed from inp_snapshot + perform any NA
     # tail-padding needed to make slider results a fixed window size rather than
     # adaptive at tails + perform any NA gap-filling needed:
-    slide <- inp_snapshot[slide_inp_backrefs, ] # TODO vs. DT key index vs ....
+    slide <- inp_snapshot[slide_inp_backrefs, ]
     slide$time_value <- slide_time_values
     if (f_from_package == "data.table") {
-      # if (before == Inf) {
-      #   n_arg <- seq_len(slide_nrow)
-      #   adaptive_arg <- TRUE
-      # } else {
-      #   n_arg <- before + after + 1L
-      #   adaptive_arg <- FALSE
-      # }
       for (col_i in seq_along(in_colnames)) {
         if (before == Inf) {
           slide[[out_colnames[[col_i]]]] <- f_dots_baked(slide[[in_colnames[[col_i]]]], seq_len(slide_nrow), adaptive = TRUE)

From 762f04afac3c1e70a681cc2cec7b7301194c5ea4 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Tue, 4 Mar 2025 11:07:55 -0800
Subject: [PATCH 010/107] Fix missing ukey_names arg

---
 R/epix_epi_slide_opt.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/epix_epi_slide_opt.R b/R/epix_epi_slide_opt.R
index 9e09af8f6..74cbcd8cc 100644
--- a/R/epix_epi_slide_opt.R
+++ b/R/epix_epi_slide_opt.R
@@ -69,7 +69,7 @@ epix_epi_slide_opt_one_epikey <- function(updates, in_colnames, f_dots_baked, f_
       }
     out_update <- slide[rows_should_keep, ]
     out_diff <- tbl_diff2(prev_out_snapshot, out_update, "time_value", "update") # TODO avoid redundant diff2 work? though depends on compactify parms...
-    out_snapshot <- tbl_patch(prev_out_snapshot, out_diff)
+    out_snapshot <- tbl_patch(prev_out_snapshot, out_diff, "time_value")
     prev_inp_snapshot <<- inp_snapshot
     prev_out_snapshot <<- out_snapshot # TODO avoid need to patch twice?
     out_diff$version <- version

From bd0a7087fe9e7d3abc00e6069abe0ba9fee53f2e Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Tue, 4 Mar 2025 11:19:37 -0800
Subject: [PATCH 011/107] Check for missing & improper ukey_names args

---
 R/patch.R | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/R/patch.R b/R/patch.R
index b582488a6..ba04246d5 100644
--- a/R/patch.R
+++ b/R/patch.R
@@ -86,6 +86,9 @@ tbl_diff2 <- function(earlier_snapshot, later_tbl,
   if (!is_tibble(earlier_snapshot)) {
     cli_abort("`earlier_snapshot` must be a tibble or `NULL`")
   }
+  if (!is.character(ukey_names) || !all(ukey_names %in% names(earlier_snapshot))) {
+    cli_abort("`ukey_names` must be a subset of column names")
+  }
   later_format <- arg_match0(later_format, c("snapshot", "update"))
   if (!(is.vector(compactify_abs_tol, mode = "numeric") && length(compactify_abs_tol) == 1L && compactify_abs_tol >= 0)) {
     # Give a specific message:
@@ -218,6 +221,9 @@ tbl_patch <- function(snapshot, update, ukey_names) {
   if (!is_tibble(snapshot)) {
     cli_abort("`snapshot` must be a tibble")
   }
+  if (!is.character(ukey_names) || !all(ukey_names %in% names(snapshot))) {
+    cli_abort("`ukey_names` must be a subset of column names")
+  }
 
   result_tbl <- vec_rbind(update, snapshot)
 

From 860831398c5f3cd897ab49e9c30dd5e006fb76af Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Tue, 4 Mar 2025 11:57:43 -0800
Subject: [PATCH 012/107] Remove some commented ideas that aren't quick wins

---
 R/epix_epi_slide_opt.R | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/R/epix_epi_slide_opt.R b/R/epix_epi_slide_opt.R
index 74cbcd8cc..8fb9d5905 100644
--- a/R/epix_epi_slide_opt.R
+++ b/R/epix_epi_slide_opt.R
@@ -1,4 +1,3 @@
-# TODO just make this an epi_slide_opt impl?
 epix_epi_slide_opt_one_epikey <- function(updates, in_colnames, f_dots_baked, f_from_package, before, after, time_type, out_colnames) {
   unit_step <- epiprocess:::unit_time_delta(time_type)
   prev_inp_snapshot <- NULL
@@ -68,16 +67,18 @@ epix_epi_slide_opt_one_epikey <- function(updates, in_colnames, f_dots_baked, f_
           !is.na(slide_inp_backrefs)
       }
     out_update <- slide[rows_should_keep, ]
-    out_diff <- tbl_diff2(prev_out_snapshot, out_update, "time_value", "update") # TODO avoid redundant diff2 work? though depends on compactify parms...
+    out_diff <- tbl_diff2(prev_out_snapshot, out_update, "time_value", "update")
     out_snapshot <- tbl_patch(prev_out_snapshot, out_diff, "time_value")
     prev_inp_snapshot <<- inp_snapshot
-    prev_out_snapshot <<- out_snapshot # TODO avoid need to patch twice?
+    prev_out_snapshot <<- out_snapshot
     out_diff$version <- version
     out_diff
   })
   result
 }
 
+# TODO just make this an epi_slide_opt impl?
+
 #' @export
 epix_epi_slide_opt <-
   function(.x, .col_names, .f, ...,

From 111bac53be0547cce9f71d121cf6c836b76bfb27 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Tue, 4 Mar 2025 18:18:47 -0800
Subject: [PATCH 013/107] WIP cleaning up approx_equal

---
 NAMESPACE              |  3 +++
 R/epiprocess-package.R |  3 +++
 R/patch.R              | 42 +++++++++++++++++++++++++-----------------
 3 files changed, 31 insertions(+), 17 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 168b7507d..2d4760771 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -258,6 +258,7 @@ importFrom(utils,capture.output)
 importFrom(utils,tail)
 importFrom(vctrs,"vec_slice<-")
 importFrom(vctrs,vec_cast)
+importFrom(vctrs,vec_cast_common)
 importFrom(vctrs,vec_data)
 importFrom(vctrs,vec_duplicate_any)
 importFrom(vctrs,vec_duplicate_id)
@@ -266,10 +267,12 @@ importFrom(vctrs,vec_in)
 importFrom(vctrs,vec_match)
 importFrom(vctrs,vec_order)
 importFrom(vctrs,vec_rbind)
+importFrom(vctrs,vec_recycle)
 importFrom(vctrs,vec_recycle_common)
 importFrom(vctrs,vec_rep)
 importFrom(vctrs,vec_rep_each)
 importFrom(vctrs,vec_seq_along)
 importFrom(vctrs,vec_size)
+importFrom(vctrs,vec_size_common)
 importFrom(vctrs,vec_slice)
 importFrom(vctrs,vec_sort)
diff --git a/R/epiprocess-package.R b/R/epiprocess-package.R
index 93af0b737..a0a6f0424 100644
--- a/R/epiprocess-package.R
+++ b/R/epiprocess-package.R
@@ -35,6 +35,7 @@
 #' @importFrom tidyr nest
 #' @importFrom tools toTitleCase
 #' @importFrom vctrs vec_cast
+#' @importFrom vctrs vec_cast_common
 #' @importFrom vctrs vec_data
 #' @importFrom vctrs vec_duplicate_id
 #' @importFrom vctrs vec_equal
@@ -42,10 +43,12 @@
 #' @importFrom vctrs vec_match
 #' @importFrom vctrs vec_order
 #' @importFrom vctrs vec_rbind
+#' @importFrom vctrs vec_recycle
 #' @importFrom vctrs vec_recycle_common
 #' @importFrom vctrs vec_rep
 #' @importFrom vctrs vec_rep_each
 #' @importFrom vctrs vec_seq_along
+#' @importFrom vctrs vec_size_common
 #' @importFrom vctrs vec_slice
 #' @importFrom vctrs vec_slice<-
 #' @importFrom vctrs vec_sort
diff --git a/R/patch.R b/R/patch.R
index ba04246d5..245be4741 100644
--- a/R/patch.R
+++ b/R/patch.R
@@ -1,30 +1,38 @@
 # TODO use these in apply_compactify
-approx_equal <- function(vec1, vec2, abs_tol, na_equal, .ptype = NULL, recurse = approx_equal, inds1 = NULL, inds2 = NULL) {
-  vecs <- list(vec1, vec2)
-  if (!is.null(inds1)) {
-    # XXX could have logical or integerish inds; just leave it to later checks
-    # to hopefully abort for now
+approx_equal <- function(vec1, vec2, abs_tol, na_equal, .ptype = NULL, inds1 = NULL, inds2 = NULL) {
+  # Recycle inds if provided; vecs if not:
+  common_size <- vec_size_common(
+    if (is.null(inds1)) vec1 else inds1,
+    if (is.null(inds2)) vec2 else inds2
+  )
+  if (is.null(inds1)) {
+    vec1 <- vec_recycle(vec1, common_size)
   } else {
-    vecs <- vec_recycle_common(!!!vecs)
+    inds1 <- vec_recycle(inds1, common_size)
   }
-  vecs <- vec_cast_common(!!!vecs, .to = .ptype)
-  approx_equal0(vecs[[1]], vecs[[2]], abs_tol, na_equal, rec = approx_equal, inds1, inds2)
+  if (is.null(inds2)) {
+    vec2 <- vec_recycle(vec2, common_size)
+  } else {
+    inds2 <- vec_recycle(inds2, common_size)
+  }
+  vecs <- vec_cast_common(vec1, vec2, .to = .ptype)
+  approx_equal0(vecs[[1]], vecs[[2]], abs_tol, na_equal, inds1, inds2)
 }
 
-approx_equal0 <- function(vec1, vec2, abs_tol, na_equal, recurse = approx_equal0, inds1 = NULL, inds2 = NULL) {
+approx_equal0 <- function(vec1, vec2, abs_tol, na_equal, inds1 = NULL, inds2 = NULL) {
   if (is_bare_numeric(vec1) && abs_tol != 0) {
-    if (!is.null(inds1)) {
-      vec1 <- vec1[inds1]
-      vec2 <- vec2[inds2]
-    }
-    # perf: since we're working with bare numerics and logicals: we can use
-    # fifelse, and stop it from propagating attributes when there's no special
-    # class to guide the meaning
+    # perf: since we're working with bare numerics and logicals: we can use `[`
+    # and `fifelse`. Matching vec_equal, we ignore names and other attributes.
+
+    # FIXME matrices can make their way in here though...
+    if (!is.null(inds1)) vec1 <- vec1[inds1]
+    if (!is.null(inds2)) vec2 <- vec2[inds2]
     res <- fifelse(
       !is.na(vec1) & !is.na(vec2),
       abs(vec1 - vec2) <= abs_tol,
       if (na_equal) is.na(vec1) & is.na(vec2) else FALSE
     )
+    # `fifelse` inherits any unrecognized attributes; drop them instead:
     attributes(res) <- NULL
     return(res)
   } else if (is.data.frame(vec1) && abs_tol != 0) {
@@ -34,7 +42,7 @@ approx_equal0 <- function(vec1, vec2, abs_tol, na_equal, recurse = approx_equal0
       rep(TRUE, nrow(vec1))
     } else {
       Reduce(`&`, lapply(seq_len(ncol(vec1)), function(col_i) {
-        recurse(vec1[[col_i]], vec2[[col_i]], abs_tol, na_equal, recurse, inds1, inds2)
+        approx_equal0(vec1[[col_i]], vec2[[col_i]], abs_tol, na_equal, inds1, inds2)
       }))
     }
   } else {

From 1d8a5b9de6a46237e73b146cdd0f3aedd01367b0 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Wed, 5 Mar 2025 09:44:07 -0800
Subject: [PATCH 014/107] fix(apply_compactify): avoid arrange on data.table,
 `i` parsing issues

---
 R/archive.R | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/R/archive.R b/R/archive.R
index 922371f1c..6652ccea1 100644
--- a/R/archive.R
+++ b/R/archive.R
@@ -424,10 +424,23 @@ apply_compactify <- function(updates_df, ukey_names, abs_tol = 0) {
   }
   assert_numeric(abs_tol, len = 1, lower = 0)
 
-  if (!is.data.table(updates_df) || !identical(key(updates_df), ukey_names)) {
+  if (is.data.table(updates_df)) {
+    if (!identical(key(updates_df), ukey_names)) {
+      cli_abort(c("`ukey_names` should match `key(updates_df)`",
+        "i" = "`ukey_names` was {format_chr_deparse(ukey_names)}",
+        "i" = "`key(updates_df)` was {format_chr_deparse(key(updates_df))}"
+      ))
+    }
+  } else {
     updates_df <- updates_df %>% arrange(pick(all_of(ukey_names)))
   }
-  updates_df[!update_is_locf(updates_df, ukey_names, abs_tol), ]
+
+  # In case updates_df is a data.table, store keep flags in a local: "When the
+  # first argument inside DT[...] is a single symbol (e.g. DT[var]), data.table
+  # looks for var in calling scope". In case it's not a data.table, make sure to
+  # use df[i,] not just df[i].
+  to_keep <- !update_is_locf(updates_df, ukey_names, abs_tol)
+  updates_df[to_keep, ]
 }
 
 #' get the entries that `compactify` would remove

From a224336dc4598becb4e184ab29d96aee1ebf8e10 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Wed, 5 Mar 2025 10:10:09 -0800
Subject: [PATCH 015/107] perf: speed up compactification with `approx_equal`

---
 NAMESPACE                     |  2 --
 R/archive.R                   | 66 ++++++++++++-----------------------
 R/patch.R                     |  1 -
 man/is_locf.Rd                | 32 -----------------
 tests/testthat/test-archive.R |  4 +--
 5 files changed, 25 insertions(+), 80 deletions(-)
 delete mode 100644 man/is_locf.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 2d4760771..14a001dfa 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -179,7 +179,6 @@ importFrom(dplyr,if_all)
 importFrom(dplyr,if_any)
 importFrom(dplyr,if_else)
 importFrom(dplyr,is_grouped_df)
-importFrom(dplyr,lag)
 importFrom(dplyr,mutate)
 importFrom(dplyr,pick)
 importFrom(dplyr,pull)
@@ -219,7 +218,6 @@ importFrom(rlang,expr_label)
 importFrom(rlang,f_env)
 importFrom(rlang,f_rhs)
 importFrom(rlang,is_bare_integerish)
-importFrom(rlang,is_bare_numeric)
 importFrom(rlang,is_environment)
 importFrom(rlang,is_formula)
 importFrom(rlang,is_function)
diff --git a/R/archive.R b/R/archive.R
index 6652ccea1..a9200a7ff 100644
--- a/R/archive.R
+++ b/R/archive.R
@@ -477,50 +477,30 @@ update_is_locf <- function(arranged_updates_df, ukey_names, abs_tol) {
   ekt_names <- ukey_names[ukey_names != "version"]
   val_names <- all_names[!all_names %in% ukey_names]
 
-  Reduce(`&`, lapply(updates_col_refs[ekt_names], is_locf, abs_tol, TRUE)) &
-    Reduce(`&`, lapply(updates_col_refs[val_names], is_locf, abs_tol, FALSE))
-}
-
-#' Checks to see if a value in a vector is LOCF
-#' @description LOCF meaning last observation carried forward (to later
-#'   versions). Lags the vector by 1, then compares with itself. If `is_key` is
-#'   `TRUE`, only values that are exactly the same between the lagged and
-#'   original are considered LOCF. If `is_key` is `FALSE` and `vec` is a vector
-#'   of numbers ([`base::is.numeric`]), then approximate equality will be used,
-#'   checking whether the absolute difference between each pair of entries is
-#'   `<= abs_tol`; if `vec` is something else, then exact equality is used
-#'   instead.
-#'
-#' @details
-#'
-#' We include epikey-time columns in LOCF comparisons as part of an optimization
-#' to avoid slower grouped operations while still ensuring that the first
-#' observation for each time series will not be marked as LOCF. We test these
-#' key columns for exact equality to prevent chopping off consecutive
-#' time_values during flat periods when `abs_tol` is high.
-#'
-#' We use exact equality for non-`is.numeric` double/integer columns such as
-#' dates, datetimes, difftimes, `tsibble::yearmonth`s, etc., as these may be
-#' used as part of re-indexing or grouping procedures, and we don't want to
-#' change the number of groups for those operations when we remove LOCF data
-#' during compactification.
-#'
-#' @importFrom dplyr lag if_else
-#' @importFrom rlang is_bare_numeric
-#' @importFrom vctrs vec_equal
-#' @keywords internal
-is_locf <- function(vec, abs_tol, is_key) { # nolint: object_usage_linter
-  lag_vec <- lag(vec)
-  if (is.vector(vec, mode = "numeric") && !is_key) {
-    # (integer or double vector, no class (& no dims); maybe names, which we'll
-    # ignore like `vec_equal`); not a key column
-    unname(if_else(
-      !is.na(vec) & !is.na(lag_vec),
-      abs(vec - lag_vec) <= abs_tol,
-      is.na(vec) & is.na(lag_vec)
-    ))
+  n_updates <- nrow(arranged_updates_df)
+  if (n_updates == 0L) {
+    logical(0L)
+  } else if (n_updates == 1L) {
+    FALSE # sole observation is not LOCF
   } else {
-    vec_equal(vec, lag_vec, na_equal = TRUE)
+    ekts_tbl <- new_tibble(updates_col_refs[ekt_names])
+    vals_tbl <- new_tibble(updates_col_refs[val_names])
+    # n_updates >= 2L so we can use `:` naturally (this is the reason for
+    # separating out n_updates == 1L from this case):
+    inds1 <- 2L:n_updates
+    inds2 <- 1L:(n_updates - 1L)
+    c(
+      FALSE, # first observation is not LOCF
+      approx_equal0(ekts_tbl,
+        inds1 = inds1, ekts_tbl, inds2 = inds2,
+        # check ekt cols without tolerance:
+        abs_tol = 0, na_equal = TRUE
+      ) &
+        approx_equal0(vals_tbl,
+          inds1 = inds1, vals_tbl, inds2 = inds2,
+          abs_tol = abs_tol, na_equal = TRUE
+        )
+    )
   }
 }
 
diff --git a/R/patch.R b/R/patch.R
index 245be4741..2f0a2f00f 100644
--- a/R/patch.R
+++ b/R/patch.R
@@ -1,4 +1,3 @@
-# TODO use these in apply_compactify
 approx_equal <- function(vec1, vec2, abs_tol, na_equal, .ptype = NULL, inds1 = NULL, inds2 = NULL) {
   # Recycle inds if provided; vecs if not:
   common_size <- vec_size_common(
diff --git a/man/is_locf.Rd b/man/is_locf.Rd
deleted file mode 100644
index f8f8eefcb..000000000
--- a/man/is_locf.Rd
+++ /dev/null
@@ -1,32 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/archive.R
-\name{is_locf}
-\alias{is_locf}
-\title{Checks to see if a value in a vector is LOCF}
-\usage{
-is_locf(vec, abs_tol, is_key)
-}
-\description{
-LOCF meaning last observation carried forward (to later
-versions). Lags the vector by 1, then compares with itself. If \code{is_key} is
-\code{TRUE}, only values that are exactly the same between the lagged and
-original are considered LOCF. If \code{is_key} is \code{FALSE} and \code{vec} is a vector
-of numbers (\code{\link[base:numeric]{base::is.numeric}}), then approximate equality will be used,
-checking whether the absolute difference between each pair of entries is
-\verb{<= abs_tol}; if \code{vec} is something else, then exact equality is used
-instead.
-}
-\details{
-We include epikey-time columns in LOCF comparisons as part of an optimization
-to avoid slower grouped operations while still ensuring that the first
-observation for each time series will not be marked as LOCF. We test these
-key columns for exact equality to prevent chopping off consecutive
-time_values during flat periods when \code{abs_tol} is high.
-
-We use exact equality for non-\code{is.numeric} double/integer columns such as
-dates, datetimes, difftimes, \code{tsibble::yearmonth}s, etc., as these may be
-used as part of re-indexing or grouping procedures, and we don't want to
-change the number of groups for those operations when we remove LOCF data
-during compactification.
-}
-\keyword{internal}
diff --git a/tests/testthat/test-archive.R b/tests/testthat/test-archive.R
index 0e84b03ba..12d9ed413 100644
--- a/tests/testthat/test-archive.R
+++ b/tests/testthat/test-archive.R
@@ -217,8 +217,8 @@ test_that("`epi_archive` rejects dataframes where time_value and version columns
   expect_error(as_epi_archive(tbl3), class = "epiprocess__time_value_version_mismatch")
 })
 
-test_that("is_locf works as expected", {
+test_that("is_locf replacement works as expected", {
   vec <- c(1, 1, 1e-10, 1.1e-10, NA, NA, NaN, NaN)
   is_repeated <- c(0, 1, 0, 1, 0, 1, 1, 1)
-  expect_equal(is_locf(vec, .Machine$double.eps^0.5, FALSE), as.logical(is_repeated))
+  expect_equal(c(FALSE, approx_equal(head(vec, -1L), tail(vec, -1L), .Machine$double.eps^0.5, na_equal = TRUE)), as.logical(is_repeated))
 })

From ab010c8b975a3180f3f104088dd34bedd89b65ae Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Wed, 5 Mar 2025 10:52:00 -0800
Subject: [PATCH 016/107] fix(approx_equal): missing import

---
 NAMESPACE              | 1 +
 R/epiprocess-package.R | 1 +
 2 files changed, 2 insertions(+)

diff --git a/NAMESPACE b/NAMESPACE
index 14a001dfa..aa936b4ae 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -218,6 +218,7 @@ importFrom(rlang,expr_label)
 importFrom(rlang,f_env)
 importFrom(rlang,f_rhs)
 importFrom(rlang,is_bare_integerish)
+importFrom(rlang,is_bare_numeric)
 importFrom(rlang,is_environment)
 importFrom(rlang,is_formula)
 importFrom(rlang,is_function)
diff --git a/R/epiprocess-package.R b/R/epiprocess-package.R
index a0a6f0424..ab4d2ba26 100644
--- a/R/epiprocess-package.R
+++ b/R/epiprocess-package.R
@@ -31,6 +31,7 @@
 #' @importFrom rlang %||%
 #' @importFrom rlang arg_match0
 #' @importFrom rlang is_bare_integerish
+#' @importFrom rlang is_bare_numeric
 #' @importFrom tibble is_tibble
 #' @importFrom tidyr nest
 #' @importFrom tools toTitleCase

From 72056a3a17b2577960996d8b9351ab9f0ce3b589 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Wed, 5 Mar 2025 10:52:09 -0800
Subject: [PATCH 017/107] docs(approx_equal): roxygen2 + comment on
 inconsistencies/bugs

---
 R/patch.R           | 20 +++++++++++++++++++-
 man/approx_equal.Rd | 39 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 58 insertions(+), 1 deletion(-)
 create mode 100644 man/approx_equal.Rd

diff --git a/R/patch.R b/R/patch.R
index 2f0a2f00f..69de6d709 100644
--- a/R/patch.R
+++ b/R/patch.R
@@ -1,3 +1,18 @@
+#' Test two vctrs vectors for equality with some tolerance in some cases
+#'
+#' @param vec1,vec2 vctrs vectors (includes data frames)
+#' @param abs_tol tolerance; will be used for bare numeric `vec1`, `vec2`, or
+#'   any such columns within `vec1`, `vec2` if they are data frames
+#' @param na_equal should `NA`s be considered equal to each other? (In
+#'   epiprocess, we usually want this to be `TRUE`, but that doesn't match the
+#'   [`vctrs::vec_equal()`] default, so this is mandatory.)
+#' @param .ptype as in [`vctrs::vec_equal()`]
+#' @param inds1,inds2 optional (row) indices into vec1 and vec2; output should
+#'   be consistent with `vec_slice`-ing to these indices beforehand, but can
+#'   give faster computation if `vec1` and `vec2` are data frames.
+#'
+#' @return logical vector; no nonmissing entries if `na_equal = TRUE`. Behavior
+#'   may differ from `vec_equal` with non-`NA` `NaN`s involved.
 approx_equal <- function(vec1, vec2, abs_tol, na_equal, .ptype = NULL, inds1 = NULL, inds2 = NULL) {
   # Recycle inds if provided; vecs if not:
   common_size <- vec_size_common(
@@ -30,6 +45,9 @@ approx_equal0 <- function(vec1, vec2, abs_tol, na_equal, inds1 = NULL, inds2 = N
       !is.na(vec1) & !is.na(vec2),
       abs(vec1 - vec2) <= abs_tol,
       if (na_equal) is.na(vec1) & is.na(vec2) else FALSE
+      # XXX ^ inconsistent with vec_equal treatment: NA vs. NaN comparison
+      # behavior with na_equal = TRUE is different; plus output with na_equal =
+      # FALSE on two NAs is different
     )
     # `fifelse` inherits any unrecognized attributes; drop them instead:
     attributes(res) <- NULL
@@ -45,7 +63,7 @@ approx_equal0 <- function(vec1, vec2, abs_tol, na_equal, inds1 = NULL, inds2 = N
       }))
     }
   } else {
-    # No special handling for any other types/situations. Makes sense for
+    # XXX No special handling for any other types/situations. Makes sense for
     # unclassed atomic things; bare lists and certain vctrs classes might want
     # recursion / specialization, though.
     if (!is.null(inds1)) {
diff --git a/man/approx_equal.Rd b/man/approx_equal.Rd
new file mode 100644
index 000000000..88fd8b63a
--- /dev/null
+++ b/man/approx_equal.Rd
@@ -0,0 +1,39 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/patch.R
+\name{approx_equal}
+\alias{approx_equal}
+\title{Test two vctrs vectors for equality with some tolerance in some cases}
+\usage{
+approx_equal(
+  vec1,
+  vec2,
+  abs_tol,
+  na_equal,
+  .ptype = NULL,
+  inds1 = NULL,
+  inds2 = NULL
+)
+}
+\arguments{
+\item{vec1, vec2}{vctrs vectors (includes data frames)}
+
+\item{abs_tol}{tolerance; will be used for bare numeric \code{vec1}, \code{vec2}, or
+any such columns within \code{vec1}, \code{vec2} if they are data frames}
+
+\item{na_equal}{should \code{NA}s be considered equal to each other? (In
+epiprocess, we usually want this to be \code{TRUE}, but that doesn't match the
+\code{\link[vctrs:vec_equal]{vctrs::vec_equal()}} default, so this is mandatory.)}
+
+\item{.ptype}{as in \code{\link[vctrs:vec_equal]{vctrs::vec_equal()}}}
+
+\item{inds1, inds2}{optional (row) indices into vec1 and vec2; output should
+be consistent with \code{vec_slice}-ing to these indices beforehand, but can
+give faster computation if \code{vec1} and \code{vec2} are data frames.}
+}
+\value{
+logical vector; no nonmissing entries if \code{na_equal = TRUE}. Behavior
+may differ from \code{vec_equal} with non-\code{NA} \code{NaN}s involved.
+}
+\description{
+Test two vctrs vectors for equality with some tolerance in some cases
+}

From e9ea2caf3b6bf871e10ab1fe2c1d5614059c3709 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Wed, 5 Mar 2025 11:30:56 -0800
Subject: [PATCH 018/107] fix(approx_equal): consistency with
 vec_slice(na_equal=FALSE)

---
 R/patch.R | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/R/patch.R b/R/patch.R
index 69de6d709..d67fba1cc 100644
--- a/R/patch.R
+++ b/R/patch.R
@@ -44,10 +44,9 @@ approx_equal0 <- function(vec1, vec2, abs_tol, na_equal, inds1 = NULL, inds2 = N
     res <- fifelse(
       !is.na(vec1) & !is.na(vec2),
       abs(vec1 - vec2) <= abs_tol,
-      if (na_equal) is.na(vec1) & is.na(vec2) else FALSE
+      if (na_equal) is.na(vec1) & is.na(vec2) else NA
       # XXX ^ inconsistent with vec_equal treatment: NA vs. NaN comparison
-      # behavior with na_equal = TRUE is different; plus output with na_equal =
-      # FALSE on two NAs is different
+      # behavior with na_equal = TRUE is different
     )
     # `fifelse` inherits any unrecognized attributes; drop them instead:
     attributes(res) <- NULL

From b13e82547558784292f4d0f94f6189af214cb053 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Wed, 5 Mar 2025 11:32:41 -0800
Subject: [PATCH 019/107] fix(approx_equal): on bare numeric matrices

---
 R/patch.R | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/R/patch.R b/R/patch.R
index d67fba1cc..074d3d427 100644
--- a/R/patch.R
+++ b/R/patch.R
@@ -37,10 +37,8 @@ approx_equal0 <- function(vec1, vec2, abs_tol, na_equal, inds1 = NULL, inds2 = N
   if (is_bare_numeric(vec1) && abs_tol != 0) {
     # perf: since we're working with bare numerics and logicals: we can use `[`
     # and `fifelse`. Matching vec_equal, we ignore names and other attributes.
-
-    # FIXME matrices can make their way in here though...
-    if (!is.null(inds1)) vec1 <- vec1[inds1]
-    if (!is.null(inds2)) vec2 <- vec2[inds2]
+    if (!is.null(inds1)) vec1 <- vec_slice(vec1, inds1)
+    if (!is.null(inds2)) vec2 <- vec_slice(vec2, inds2)
     res <- fifelse(
       !is.na(vec1) & !is.na(vec2),
       abs(vec1 - vec2) <= abs_tol,
@@ -48,6 +46,10 @@ approx_equal0 <- function(vec1, vec2, abs_tol, na_equal, inds1 = NULL, inds2 = N
       # XXX ^ inconsistent with vec_equal treatment: NA vs. NaN comparison
       # behavior with na_equal = TRUE is different
     )
+    if (!is.null(dim(vec1))) {
+      dim(res) <- dim(vec1)
+      res <- rowSums(res) == ncol(res)
+    }
     # `fifelse` inherits any unrecognized attributes; drop them instead:
     attributes(res) <- NULL
     return(res)

From 0c9f46227351ce05ab1352831ddea58c18a23c79 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Wed, 5 Mar 2025 11:57:37 -0800
Subject: [PATCH 020/107] feat: approx_equal on lists

---
 NAMESPACE              |  2 ++
 R/epiprocess-package.R |  2 ++
 R/patch.R              | 22 +++++++++++++++++++---
 man/approx_equal.Rd    |  3 ++-
 4 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index aa936b4ae..e1b111c0a 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -218,6 +218,7 @@ importFrom(rlang,expr_label)
 importFrom(rlang,f_env)
 importFrom(rlang,f_rhs)
 importFrom(rlang,is_bare_integerish)
+importFrom(rlang,is_bare_list)
 importFrom(rlang,is_bare_numeric)
 importFrom(rlang,is_environment)
 importFrom(rlang,is_formula)
@@ -265,6 +266,7 @@ importFrom(vctrs,vec_equal)
 importFrom(vctrs,vec_in)
 importFrom(vctrs,vec_match)
 importFrom(vctrs,vec_order)
+importFrom(vctrs,vec_ptype)
 importFrom(vctrs,vec_rbind)
 importFrom(vctrs,vec_recycle)
 importFrom(vctrs,vec_recycle_common)
diff --git a/R/epiprocess-package.R b/R/epiprocess-package.R
index ab4d2ba26..89e785397 100644
--- a/R/epiprocess-package.R
+++ b/R/epiprocess-package.R
@@ -31,6 +31,7 @@
 #' @importFrom rlang %||%
 #' @importFrom rlang arg_match0
 #' @importFrom rlang is_bare_integerish
+#' @importFrom rlang is_bare_list
 #' @importFrom rlang is_bare_numeric
 #' @importFrom tibble is_tibble
 #' @importFrom tidyr nest
@@ -43,6 +44,7 @@
 #' @importFrom vctrs vec_in
 #' @importFrom vctrs vec_match
 #' @importFrom vctrs vec_order
+#' @importFrom vctrs vec_ptype
 #' @importFrom vctrs vec_rbind
 #' @importFrom vctrs vec_recycle
 #' @importFrom vctrs vec_recycle_common
diff --git a/R/patch.R b/R/patch.R
index 074d3d427..3bcddb383 100644
--- a/R/patch.R
+++ b/R/patch.R
@@ -12,7 +12,8 @@
 #'   give faster computation if `vec1` and `vec2` are data frames.
 #'
 #' @return logical vector; no nonmissing entries if `na_equal = TRUE`. Behavior
-#'   may differ from `vec_equal` with non-`NA` `NaN`s involved.
+#'   may differ from `vec_equal` with non-`NA` `NaN`s involved, or for bare
+#'   lists that contain named vectors.
 approx_equal <- function(vec1, vec2, abs_tol, na_equal, .ptype = NULL, inds1 = NULL, inds2 = NULL) {
   # Recycle inds if provided; vecs if not:
   common_size <- vec_size_common(
@@ -63,10 +64,25 @@ approx_equal0 <- function(vec1, vec2, abs_tol, na_equal, inds1 = NULL, inds2 = N
         approx_equal0(vec1[[col_i]], vec2[[col_i]], abs_tol, na_equal, inds1, inds2)
       }))
     }
+  } else if (is_bare_list(vec1)) {
+    vapply(seq_along(vec1), function(i) {
+      entry1 <- vec1[[i]]
+      entry2 <- vec2[[i]]
+      vec_size(entry1) == vec_size(entry2) &&
+        # This is inconsistent with vec_equal on named vectors; to be
+        # consistently inconsistent, we avoid dispatching to vec_equal for bare
+        # lists even with abs_tol = 0:
+        identical(vec_ptype(entry1), vec_ptype(entry2)) &&
+        all(approx_equal0(entry1, entry2, abs_tol, na_equal))
+    }, logical(1L))
   } else {
     # XXX No special handling for any other types/situations. Makes sense for
-    # unclassed atomic things; bare lists and certain vctrs classes might want
-    # recursion / specialization, though.
+    # unclassed atomic things; custom classes (e.g., distributions) might want
+    # recursion / specialization, though. approx_equal0 should probably be an S3
+    # method. Also, abs_tol == 0 --> vec_equal logic should maybe be either be
+    # hoisted to approx_equal or we should manually recurse on data frames even
+    # with abs_tol = 0 when that's faster (might depend on presence of inds*),
+    # after some inconsistencies are ironed out.
     if (!is.null(inds1)) {
       vec1 <- vec_slice(vec1, inds1)
       vec2 <- vec_slice(vec2, inds2)
diff --git a/man/approx_equal.Rd b/man/approx_equal.Rd
index 88fd8b63a..d58f24bb9 100644
--- a/man/approx_equal.Rd
+++ b/man/approx_equal.Rd
@@ -32,7 +32,8 @@ give faster computation if \code{vec1} and \code{vec2} are data frames.}
 }
 \value{
 logical vector; no nonmissing entries if \code{na_equal = TRUE}. Behavior
-may differ from \code{vec_equal} with non-\code{NA} \code{NaN}s involved.
+may differ from \code{vec_equal} with non-\code{NA} \code{NaN}s involved, or for bare
+lists that contain named vectors.
 }
 \description{
 Test two vctrs vectors for equality with some tolerance in some cases

From 39816f58c93fab845a0ca0c65afd26cc5b22ccde Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Wed, 5 Mar 2025 12:53:00 -0800
Subject: [PATCH 021/107] docs(approx_equal): iterate on @return + doc
 approx_equal0

---
 R/patch.R            | 11 ++++++++---
 man/approx_equal.Rd  |  8 +++++---
 man/approx_equal0.Rd | 12 ++++++++++++
 3 files changed, 25 insertions(+), 6 deletions(-)
 create mode 100644 man/approx_equal0.Rd

diff --git a/R/patch.R b/R/patch.R
index 3bcddb383..37af285f9 100644
--- a/R/patch.R
+++ b/R/patch.R
@@ -11,9 +11,11 @@
 #'   be consistent with `vec_slice`-ing to these indices beforehand, but can
 #'   give faster computation if `vec1` and `vec2` are data frames.
 #'
-#' @return logical vector; no nonmissing entries if `na_equal = TRUE`. Behavior
-#'   may differ from `vec_equal` with non-`NA` `NaN`s involved, or for bare
-#'   lists that contain named vectors.
+#' @return logical vector, with length matching the result of recycling `vec1`
+#'   (at `inds1` if provided) and `vec2` (at `inds2` if provided); entries
+#'   should all be `TRUE` or `FALSE` if `na_equal = TRUE`. Behavior may differ
+#'   from `vec_equal` with non-`NA` `NaN`s involved, or for bare lists that
+#'   contain named vectors.
 approx_equal <- function(vec1, vec2, abs_tol, na_equal, .ptype = NULL, inds1 = NULL, inds2 = NULL) {
   # Recycle inds if provided; vecs if not:
   common_size <- vec_size_common(
@@ -34,6 +36,9 @@ approx_equal <- function(vec1, vec2, abs_tol, na_equal, .ptype = NULL, inds1 = N
   approx_equal0(vecs[[1]], vecs[[2]], abs_tol, na_equal, inds1, inds2)
 }
 
+#' Helper for [`approx_equal`] for vecs guaranteed to have the same ptype and size
+#'
+#' @keywords internal
 approx_equal0 <- function(vec1, vec2, abs_tol, na_equal, inds1 = NULL, inds2 = NULL) {
   if (is_bare_numeric(vec1) && abs_tol != 0) {
     # perf: since we're working with bare numerics and logicals: we can use `[`
diff --git a/man/approx_equal.Rd b/man/approx_equal.Rd
index d58f24bb9..4eac410f0 100644
--- a/man/approx_equal.Rd
+++ b/man/approx_equal.Rd
@@ -31,9 +31,11 @@ be consistent with \code{vec_slice}-ing to these indices beforehand, but can
 give faster computation if \code{vec1} and \code{vec2} are data frames.}
 }
 \value{
-logical vector; no nonmissing entries if \code{na_equal = TRUE}. Behavior
-may differ from \code{vec_equal} with non-\code{NA} \code{NaN}s involved, or for bare
-lists that contain named vectors.
+logical vector, with length matching the result of recycling \code{vec1}
+(at \code{inds1} if provided) and \code{vec2} (at \code{inds2} if provided); entries
+should all be \code{TRUE} or \code{FALSE} if \code{na_equal = TRUE}. Behavior may differ
+from \code{vec_equal} with non-\code{NA} \code{NaN}s involved, or for bare lists that
+contain named vectors.
 }
 \description{
 Test two vctrs vectors for equality with some tolerance in some cases
diff --git a/man/approx_equal0.Rd b/man/approx_equal0.Rd
new file mode 100644
index 000000000..f7a69f1fe
--- /dev/null
+++ b/man/approx_equal0.Rd
@@ -0,0 +1,12 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/patch.R
+\name{approx_equal0}
+\alias{approx_equal0}
+\title{Helper for \code{\link{approx_equal}} for vecs guaranteed to have the same ptype and size}
+\usage{
+approx_equal0(vec1, vec2, abs_tol, na_equal, inds1 = NULL, inds2 = NULL)
+}
+\description{
+Helper for \code{\link{approx_equal}} for vecs guaranteed to have the same ptype and size
+}
+\keyword{internal}

From ba40405bd8b127a53bb53664c2032a2d8f6e5a17 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Wed, 5 Mar 2025 14:10:19 -0800
Subject: [PATCH 022/107] WIP docs(epix_epi_slide_opt_one_epikey): initial

---
 R/epix_epi_slide_opt.R               | 48 ++++++++++++++++++
 man/epix_epi_slide_opt_one_epikey.Rd | 74 ++++++++++++++++++++++++++++
 2 files changed, 122 insertions(+)
 create mode 100644 man/epix_epi_slide_opt_one_epikey.Rd

diff --git a/R/epix_epi_slide_opt.R b/R/epix_epi_slide_opt.R
index 8fb9d5905..5e446cdca 100644
--- a/R/epix_epi_slide_opt.R
+++ b/R/epix_epi_slide_opt.R
@@ -1,4 +1,51 @@
+#' Core operation of `epix_epi_slide_opt` for a single epikey's history
+#'
+#' @param updates tibble with two columns: `version` and `subtbl`; `subtbl` is a
+#'   list of tibbles, each with a `time_value` column and measurement columns.
+#'   The epikey should not appear.
+#' @param in_colnames chr; names of columns to which to apply `f_dots_baked`
+#' @param f_dots_baked supported sliding function from `{data.table}` or
+#'   `{slider}`, potentially with some arguments baked in with
+#'   [`purrr::partial`]
+#' @param f_from_package string; name of package from which `f_dots_baked`
+#'   (pre-`partial`) originates
+#' @param before integerish >=0 or Inf; number of time steps before each
+#'   ref_time_value to include in the sliding window computation; Inf to include
+#'   all times beginning with the min `time_value`
+#' @param after integerish >=0; number of time steps after each ref_time_value
+#'   to include in the sliding window computation
+#' @param time_type as in `new_epi_archive`
+#' @param out_colnames chr, same length as `in_colnames`; column names to use
+#'   for results
+#' @return list of tibbles with same names as `subtbl`s plus: `c(out_colnames,
+#'   "version")`; (compactified) diff data to put into an `epi_archive`
+#'
+#' @examples
+#'
+#' library(dplyr)
+#' updates <- bind_rows(
+#'   tibble(
+#'     version = 40, time_value = 1:10, value = 1:10
+#'   ),
+#'   tibble(
+#'     version = 12, time_value = 2:3, value = 3:2
+#'   ),
+#'   tibble(
+#'     version = 13, time_value = 6, value = 7,
+#'   ),
+#'   tibble(
+#'     version = 13, time_value = 7, value = NA,
+#'   )
+#' ) %>%
+#'   mutate(across(c(version, time_value), ~ as.Date("2020-01-01") - 1 + .x)) %>%
+#'   tidyr::nest(.by = version, .key = "subtbl")
+#'
+#' updates %>%
+#'   epix_epi_slide_opt_one_epikey("value", data.table::frollmean, "data.table", 1L, 0L, "day", "slide_value")
+#'
+#' @keywords internal
 epix_epi_slide_opt_one_epikey <- function(updates, in_colnames, f_dots_baked, f_from_package, before, after, time_type, out_colnames) {
+  # TODO check for col name clobbering
   unit_step <- epiprocess:::unit_time_delta(time_type)
   prev_inp_snapshot <- NULL
   prev_out_snapshot <- NULL
@@ -148,6 +195,7 @@ epix_epi_slide_opt.epi_archive <-
         group_updates <- group_values %>%
           nest(.by = version, .key = "subtbl") %>%
           arrange(version)
+        # TODO move nesting inside the helper?
         res <- epix_epi_slide_opt_one_epikey(group_updates, names_info$input_col_names, .f_dots_baked, .f_info$from_package, window_args$before, window_args$after, time_type, names_info$output_col_names) %>%
           list_rbind()
         if (use_progress) cli::cli_progress_update(id = progress_bar_id)
diff --git a/man/epix_epi_slide_opt_one_epikey.Rd b/man/epix_epi_slide_opt_one_epikey.Rd
new file mode 100644
index 000000000..e8321616e
--- /dev/null
+++ b/man/epix_epi_slide_opt_one_epikey.Rd
@@ -0,0 +1,74 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/epix_epi_slide_opt.R
+\name{epix_epi_slide_opt_one_epikey}
+\alias{epix_epi_slide_opt_one_epikey}
+\title{Core operation of \code{epix_epi_slide_opt} for a single epikey's history}
+\usage{
+epix_epi_slide_opt_one_epikey(
+  updates,
+  in_colnames,
+  f_dots_baked,
+  f_from_package,
+  before,
+  after,
+  time_type,
+  out_colnames
+)
+}
+\arguments{
+\item{updates}{tibble with two columns: \code{version} and \code{subtbl}; \code{subtbl} is a
+list of tibbles, each with a \code{time_value} column and measurement columns.
+The epikey should not appear.}
+
+\item{in_colnames}{chr; names of columns to which to apply \code{f_dots_baked}}
+
+\item{f_dots_baked}{supported sliding function from \code{{data.table}} or
+\code{{slider}}, potentially with some arguments baked in with
+\code{\link[purrr:partial]{purrr::partial}}}
+
+\item{f_from_package}{string; name of package from which \code{f_dots_baked}
+(pre-\code{partial}) originates}
+
+\item{before}{integerish >=0 or Inf; number of time steps before each
+ref_time_value to include in the sliding window computation; Inf to include
+all times beginning with the min \code{time_value}}
+
+\item{after}{integerish >=0; number of time steps after each ref_time_value
+to include in the sliding window computation}
+
+\item{time_type}{as in \code{new_epi_archive}}
+
+\item{out_colnames}{chr, same length as \code{in_colnames}; column names to use
+for results}
+}
+\value{
+list of tibbles with same names as \code{subtbl}s plus: \code{c(out_colnames, "version")}; (compactified) diff data to put into an \code{epi_archive}
+}
+\description{
+Core operation of \code{epix_epi_slide_opt} for a single epikey's history
+}
+\examples{
+
+library(dplyr)
+updates <- bind_rows(
+  tibble(
+    version = 40, time_value = 1:10, value = 1:10
+  ),
+  tibble(
+    version = 12, time_value = 2:3, value = 3:2
+  ),
+  tibble(
+    version = 13, time_value = 6, value = 7,
+  ),
+  tibble(
+    version = 13, time_value = 7, value = NA,
+  )
+) \%>\%
+  mutate(across(c(version, time_value), ~as.Date("2020-01-01") - 1 + .x)) \%>\%
+  tidyr::nest(.by = version, .key = "subtbl")
+
+updates \%>\%
+  epix_epi_slide_opt_one_epikey("value", data.table::frollmean, "data.table", 1L, 0L, "day", "slide_value")
+
+}
+\keyword{internal}

From 764c624dc48e39d54a8d05951bc9e2cf768c63e2 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Thu, 6 Mar 2025 17:22:27 -0800
Subject: [PATCH 023/107] refactor: move epi_slide_opt & helpers to its own
 file

---
 DESCRIPTION                           |   1 +
 R/epi_slide_opt_edf.R                 | 664 +++++++++++++++++++++++++
 R/slide.R                             | 666 --------------------------
 man/across_ish_names_info.Rd          |   2 +-
 man/epi_slide_opt.Rd                  |   2 +-
 man/epix_epi_slide_opt_one_epikey.Rd  |   2 +-
 man/full_date_seq.Rd                  |   2 +-
 man/upstream_slide_f_info.Rd          |   2 +-
 man/upstream_slide_f_possibilities.Rd |   2 +-
 9 files changed, 671 insertions(+), 672 deletions(-)
 create mode 100644 R/epi_slide_opt_edf.R

diff --git a/DESCRIPTION b/DESCRIPTION
index 013d32600..1732bbc97 100755
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -98,6 +98,7 @@ Collate:
     'correlation.R'
     'epi_df.R'
     'epi_df_forbidden_methods.R'
+    'epi_slide_opt_edf.R'
     'epiprocess-package.R'
     'epix_epi_slide_opt.R'
     'group_by_epi_df_methods.R'
diff --git a/R/epi_slide_opt_edf.R b/R/epi_slide_opt_edf.R
new file mode 100644
index 000000000..44eaa1470
--- /dev/null
+++ b/R/epi_slide_opt_edf.R
@@ -0,0 +1,664 @@
+#' Information about upstream (`{data.table}`/`{slider}`) slide functions
+#'
+#' Underlies [`upstream_slide_f_info`].
+#'
+#' @keywords internal
+upstream_slide_f_possibilities <- tibble::tribble(
+  ~f, ~package, ~namer,
+  frollmean, "data.table", ~ if (is.logical(.x)) "prop" else "av",
+  frollsum, "data.table", ~ if (is.logical(.x)) "count" else "sum",
+  frollapply, "data.table", ~"slide",
+  slide_sum, "slider", ~ if (is.logical(.x)) "count" else "sum",
+  slide_prod, "slider", ~"prod",
+  slide_mean, "slider", ~ if (is.logical(.x)) "prop" else "av",
+  slide_min, "slider", ~"min",
+  slide_max, "slider", ~"max",
+  slide_all, "slider", ~"all",
+  slide_any, "slider", ~"any",
+)
+
+#' Validate & get information about an upstream slide function
+#'
+#' @param .f function such as `data.table::frollmean` or `slider::slide_mean`;
+#'   must appear in [`upstream_slide_f_possibilities`]
+#' @return named list with two elements: `from_package`, a string containing the
+#'   upstream package name ("data.table" or "slider"), and `namer`, a function
+#'   that takes a column to call `.f` on and outputs a basic name or
+#'   abbreviation for what operation `.f` represents on that kind of column
+#'   (e.g., "sum", "av", "count").
+#'
+#' @keywords internal
+upstream_slide_f_info <- function(.f) {
+  assert_function(.f)
+
+  # Check that slide function `.f` is one of those short-listed from
+  # `data.table` and `slider` (or a function that has the exact same definition,
+  # e.g. if the function has been reexported or defined locally). Extract some
+  # metadata. `namer` will be mapped over columns (.x will be a column, not the
+  # entire edf).
+  f_info_row <- upstream_slide_f_possibilities %>%
+    filter(map_lgl(.data$f, ~ identical(.f, .x)))
+  if (nrow(f_info_row) == 0L) {
+    # `f` is from somewhere else and not supported
+    cli_abort(
+      c(
+        "problem with {rlang::expr_label(rlang::caller_arg(f))}",
+        "i" = "`f` must be one of `data.table`'s rolling functions (`frollmean`,
+              `frollsum`, `frollapply`. See `?data.table::roll`) or one of
+              `slider`'s specialized sliding functions (`slide_mean`, `slide_sum`,
+              etc. See `?slider::\`summary-slide\`` for more options)."
+      ),
+      class = "epiprocess__epi_slide_opt__unsupported_slide_function",
+      epiprocess__f = .f
+    )
+  }
+  if (nrow(f_info_row) > 1L) {
+    cli_abort('epiprocess internal error: looking up `.f` in table of possible
+               functions yielded multiple matches. Please report it using "New
+               issue" at https://github.com/cmu-delphi/epiprocess/issues, using
+               reprex::reprex to provide a minimal reproducible example.')
+  }
+  f_from_package <- f_info_row$package
+  list(
+    from_package = f_from_package,
+    namer = unwrap(f_info_row$namer)
+  )
+}
+
+#' Calculate input and output column names for an `{epiprocess}` [`dplyr::across`]-like operations
+#'
+#' @param .x data.frame to perform input column tidyselection on
+#' @param time_type as in [`new_epi_df`]
+#' @param col_names_quo enquosed input column tidyselect expression
+#' @param .f_namer function taking an input column object and outputting a name
+#'   for a corresponding output column; see [`upstream_slide_f_info`]
+#' @param .window_size as in [`epi_slide_opt`]
+#' @param .align as in [`epi_slide_opt`]
+#' @param .prefix as in [`epi_slide_opt`]
+#' @param .suffix as in [`epi_slide_opt`]
+#' @param .new_col_names as in [`epi_slide_opt`]
+#' @return named list with two elements: `input_col_names`, chr, subset of
+#'   `names(.x)`; and `output_colnames`, chr, same length as `input_col_names`
+#'
+#' @keywords internal
+across_ish_names_info <- function(.x, time_type, col_names_quo, .f_namer, .window_size, .align, .prefix, .suffix, .new_col_names) {
+  # The position of a given column can be differ between input `.x` and
+  # `.data_group` since the grouping step by default drops grouping columns.
+  # To avoid rerunning `eval_select` for every `.data_group`, convert
+  # positions of user-provided `col_names` into string column names. We avoid
+  # using `names(pos)` directly for robustness and in case we later want to
+  # allow users to rename fields via tidyselection.
+  pos <- eval_select(col_names_quo, data = .x, allow_rename = FALSE)
+  input_col_names <- names(.x)[pos]
+
+  # Handle output naming
+  if ((!is.null(.prefix) || !is.null(.suffix)) && !is.null(.new_col_names)) {
+    cli_abort(
+      "Can't use both .prefix/.suffix and .new_col_names at the same time.",
+      class = "epiprocess__epi_slide_opt_incompatible_naming_args"
+    )
+  }
+  assert_string(.prefix, null.ok = TRUE)
+  assert_string(.suffix, null.ok = TRUE)
+  assert_character(.new_col_names, len = length(input_col_names), null.ok = TRUE)
+  if (is.null(.prefix) && is.null(.suffix) && is.null(.new_col_names)) {
+    .suffix <- "_{.n}{.time_unit_abbr}{.align_abbr}{.f_abbr}"
+    # ^ does not account for any arguments specified to underlying functions via
+    # `...` such as `na.rm =`, nor does it distinguish between functions from
+    # different packages accomplishing the same type of computation. Those are
+    # probably only set one way per task, so this probably produces cleaner
+    # names without clashes (though maybe some confusion if switching between
+    # code with different settings).
+  }
+  if (!is.null(.prefix) || !is.null(.suffix)) {
+    .prefix <- .prefix %||% ""
+    .suffix <- .suffix %||% ""
+    if (identical(.window_size, Inf)) {
+      n <- "running_"
+      time_unit_abbr <- ""
+      align_abbr <- ""
+    } else {
+      n <- time_delta_to_n_steps(.window_size, time_type)
+      time_unit_abbr <- time_type_unit_abbr(time_type)
+      align_abbr <- c(right = "", center = "c", left = "l")[[.align]]
+    }
+    glue_env <- rlang::env(
+      .n = n,
+      .time_unit_abbr = time_unit_abbr,
+      .align_abbr = align_abbr,
+      .f_abbr = purrr::map_chr(.x[, c(input_col_names)], .f_namer), # compat between DT and tbl selection
+      quo_get_env(col_names_quo)
+    )
+    .new_col_names <- unclass(
+      glue(.prefix, .envir = glue_env) +
+        input_col_names +
+        glue(.suffix, .envir = glue_env)
+    )
+  } else {
+    # `.new_col_names` was provided by user; we don't need to do anything.
+  }
+  if (any(.new_col_names %in% names(.x))) {
+    cli_abort(c(
+      "Naming conflict between new columns and existing columns",
+      "x" = "Overlapping names: {format_varnames(intersect(.new_col_names, names(.x)))}"
+    ), class = "epiprocess__epi_slide_opt_old_new_name_conflict")
+  }
+  if (anyDuplicated(.new_col_names)) {
+    cli_abort(c(
+      "New column names contain duplicates",
+      "x" = "Duplicated names: {format_varnames(unique(.new_col_names[duplicated(.new_col_names)]))}"
+    ), class = "epiprocess__epi_slide_opt_new_name_duplicated")
+  }
+  output_col_names <- .new_col_names
+
+  return(list(
+    input_col_names = input_col_names,
+    output_col_names = output_col_names
+  ))
+}
+
+#' Optimized slide functions for common cases
+#'
+#' @description `epi_slide_opt` allows sliding an n-timestep [data.table::froll]
+#' or [slider::summary-slide] function over variables in an `epi_df` object.
+#' These functions tend to be much faster than `epi_slide()`. See
+#' `vignette("epi_df")` for more examples.
+#'
+#' @template basic-slide-params
+#' @param .col_names <[`tidy-select`][dplyr_tidy_select]> An unquoted column
+#'   name (e.g., `cases`), multiple column names (e.g., `c(cases, deaths)`),
+#'   [other tidy-select expression][tidyselect::language], or a vector of
+#'   characters (e.g. `c("cases", "deaths")`). Variable names can be used as if
+#'   they were positions in the data frame, so expressions like `x:y` can be
+#'   used to select a range of variables.
+#'
+#'   The tidy-selection renaming interface is not supported, and cannot be used
+#'   to provide output column names; if you want to customize the output column
+#'   names, use [`dplyr::rename`] after the slide.
+#' @param .f Function; together with `...` specifies the computation to slide.
+#'  `.f` must be one of `data.table`'s rolling functions
+#'  (`frollmean`, `frollsum`, `frollapply`. See [data.table::roll]) or one
+#'  of `slider`'s specialized sliding functions (`slide_mean`, `slide_sum`,
+#'  etc. See [slider::summary-slide]).
+#'
+#'  The optimized `data.table` and `slider` functions can't be directly passed
+#'  as the computation function in `epi_slide` without careful handling to make
+#'  sure each computation group is made up of the `.window_size` dates rather
+#'  than `.window_size` points. `epi_slide_opt` (and wrapper functions
+#'  `epi_slide_mean` and `epi_slide_sum`) take care of window completion
+#'  automatically to prevent associated errors.
+#' @param ... Additional arguments to pass to the slide computation `.f`, for
+#'   example, `algo` or `na.rm` in data.table functions. You don't need to
+#'   specify `.x`, `.window_size`, or `.align` (or `before`/`after` for slider
+#'   functions).
+#' @param .prefix Optional [`glue::glue`] format string; name the slide result
+#'   column(s) by attaching this prefix to the corresponding input column(s).
+#'   Some shorthand is supported for basing the output names on `.window_size`
+#'   or other arguments; see "Prefix and suffix shorthand" below.
+#' @param .suffix Optional [`glue::glue`] format string; like `.prefix`. The
+#'   default naming behavior is equivalent to `.suffix =
+#'   "_{.n}{.time_unit_abbr}{.align_abbr}{.f_abbr}"`. Can be used in combination
+#'   with `.prefix`.
+#' @param .new_col_names Optional character vector with length matching the
+#'   number of input columns from `.col_names`; name the slide result column(s)
+#'   with these names. Cannot be used in combination with `.prefix` and/or
+#'   `.suffix`.
+#'
+#' @section Prefix and suffix shorthand:
+#'
+#' [`glue::glue`] format strings specially interpret content within curly
+#' braces. E.g., `glue::glue("ABC{2 + 2}")` evaluates to `"ABC4"`. For `.prefix`
+#' and `.suffix`, we provide `glue` with some additional variable bindings:
+#'
+#' - `{.n}` will be the number of time steps in the computation
+#'    corresponding to the `.window_size`.
+#' - `{.time_unit_abbr}` will be a lower-case letter corresponding to the
+#'     `time_type` of `.x`
+#' - `{.align_abbr}` will be `""` if `.align` is the default of `"right"`;
+#'    otherwise, it will be the first letter of `.align`
+#' - `{.f_abbr}` will be a character vector containing a short abbreviation
+#'    for `.f` factoring in the input column type(s) for `.col_names`
+#'
+#' @importFrom dplyr mutate %>% arrange tibble select all_of
+#' @importFrom rlang enquo expr_label caller_arg quo_get_env
+#' @importFrom tidyselect eval_select
+#' @importFrom glue glue
+#' @importFrom purrr map map_lgl
+#' @importFrom data.table frollmean frollsum frollapply
+#' @importFrom lubridate as.period
+#' @importFrom slider slide_sum slide_prod slide_mean slide_min slide_max slide_all slide_any
+#' @export
+#' @seealso [`epi_slide`] for the more general slide function
+#' @examples
+#' library(dplyr)
+#'
+#' # Add a column (`cases_7dsum`) containing a 7-day trailing sum on `cases`:
+#' cases_deaths_subset %>%
+#'   select(geo_value, time_value, cases) %>%
+#'   epi_slide_sum(cases, .window_size = 7)
+#'
+#' # Add a column (`cases_rate_7dav`) containing a 7-day trailing average on `case_rate`:
+#' covid_case_death_rates_extended %>%
+#'   epi_slide_mean(case_rate, .window_size = 7)
+#'
+#' # Use a less common specialized slide function:
+#' cases_deaths_subset %>%
+#'   epi_slide_opt(cases, slider::slide_min, .window_size = 7)
+#'
+#' # Specify output column names and/or a naming scheme:
+#' cases_deaths_subset %>%
+#'   select(geo_value, time_value, cases) %>%
+#'   group_by(geo_value) %>%
+#'   epi_slide_sum(cases, .window_size = 7, .new_col_names = "case_sum") %>%
+#'   ungroup()
+#' cases_deaths_subset %>%
+#'   select(geo_value, time_value, cases) %>%
+#'   group_by(geo_value) %>%
+#'   epi_slide_sum(cases, .window_size = 7, .prefix = "sum_") %>%
+#'   ungroup()
+#'
+#' # Additional settings can be sent to the {data.table} and {slider} functions
+#' # via `...`. This example passes some arguments to `frollmean` settings for
+#' # speed, accuracy, and to allow partially-missing windows:
+#' covid_case_death_rates_extended %>%
+#'   epi_slide_mean(
+#'     case_rate,
+#'     .window_size = 7,
+#'     na.rm = TRUE, algo = "exact", hasNA = TRUE
+#'   )
+#'
+#' # If the more specialized possibilities for `.f` don't cover your needs, you
+#' # can use `epi_slide_opt` with `.f = data.table::frollapply` to apply a
+#' # custom function at the cost of more computation time. See also `epi_slide`
+#' # if you need something even more general.
+#' cases_deaths_subset %>%
+#'   select(geo_value, time_value, case_rate_7d_av, death_rate_7d_av) %>%
+#'   epi_slide_opt(c(case_rate_7d_av, death_rate_7d_av),
+#'     data.table::frollapply,
+#'     FUN = median, .window_size = 28,
+#'     .suffix = "_{.n}{.time_unit_abbr}_median"
+#'   ) %>%
+#'   print(n = 40)
+epi_slide_opt <- function(
+    .x, .col_names, .f, ...,
+    .window_size = NULL, .align = c("right", "center", "left"),
+    .prefix = NULL, .suffix = NULL, .new_col_names = NULL,
+    .ref_time_values = NULL, .all_rows = FALSE) {
+  assert_class(.x, "epi_df")
+
+  # Deprecated argument handling
+  provided_args <- rlang::call_args_names(rlang::call_match())
+  if (any(purrr::map_lgl(provided_args, ~ .x %in% c("x", "col_names", "f", "ref_time_values", "all_rows")))) {
+    cli::cli_abort(
+      "epi_slide_opt: you are using one of the following old argument names: `x`, `col_names`, `f`, `ref_time_values`,
+      or `all_rows`. Please use the new dot-prefixed names: `.x`, `.col_names`, `.f`,
+      `.ref_time_values`, `.all_rows`."
+    )
+  }
+  if ("as_list_col" %in% provided_args) {
+    cli::cli_abort(
+      "epi_slide_opt: the argument `as_list_col` is deprecated. If FALSE, you can just remove it.
+      If TRUE, have your given computation wrap its result using `list(result)` instead."
+    )
+  }
+  if ("before" %in% provided_args || "after" %in% provided_args) {
+    cli::cli_abort(
+      "epi_slide_opt: `before` and `after` are deprecated for `epi_slide`. Use `.window_size` and `.align` instead.
+      See the slide documentation for more details."
+    )
+  }
+  if ("new_col_name" %in% provided_args || ".new_col_name" %in% provided_args) {
+    cli::cli_abort(
+      "epi_slide_opt: the argument `new_col_name` is not supported for `epi_slide_opt`. If you want to customize
+      the output column names, use `.prefix =`, `.suffix =`, or `.new_col_**names** =`.",
+      class = "epiprocess__epi_slide_opt__new_name_not_supported"
+    )
+  }
+  if ("names_sep" %in% provided_args || ".names_sep" %in% provided_args) {
+    cli::cli_abort(
+      "epi_slide_opt: the argument `names_sep` is not supported for `epi_slide_opt`. If you want to customize
+      the output column names, use `dplyr::rename` after the slide.",
+      class = "epiprocess__epi_slide_opt__name_sep_not_supported"
+    )
+  }
+
+  assert_class(.x, "epi_df")
+  .x_orig_groups <- groups(.x)
+  if (inherits(.x, "grouped_df")) {
+    expected_group_keys <- .x %>%
+      key_colnames(exclude = "time_value") %>%
+      sort()
+    if (!identical(.x %>% group_vars() %>% sort(), expected_group_keys)) {
+      cli_abort(
+        "`.x` must be either grouped by {expected_group_keys} or ungrouped; if the latter,
+         we'll temporarily group by {expected_group_keys} for this operation. You may need
+        to aggregate your data first; see sum_groups_epi_df().",
+        class = "epiprocess__epi_slide_opt__invalid_grouping"
+      )
+    }
+  } else {
+    .x <- group_epi_df(.x, exclude = "time_value")
+  }
+  if (nrow(.x) == 0L) {
+    cli_abort(
+      c(
+        "input data `.x` unexpectedly has 0 rows",
+        "i" = "If this computation is occuring within an `epix_slide` call,
+          check that `epix_slide` `.versions` argument was set appropriately
+          so that you don't get any completely-empty snapshots"
+      ),
+      class = "epiprocess__epi_slide_opt__0_row_input",
+      epiprocess__x = .x
+    )
+  }
+
+  # Check for duplicated time values within groups
+  assert(check_ukey_unique(ungroup(.x), c(group_vars(.x), "time_value")))
+
+  # Validate/process .col_names, .f:
+  col_names_quo <- enquo(.col_names)
+  f_info <- upstream_slide_f_info(.f)
+  f_from_package <- f_info$from_package
+
+  # Validate/process .ref_time_values:
+  user_provided_rtvs <- !is.null(.ref_time_values)
+  if (!user_provided_rtvs) {
+    .ref_time_values <- unique(.x$time_value)
+  } else {
+    assert_numeric(.ref_time_values, min.len = 1L, null.ok = FALSE, any.missing = FALSE)
+    if (!test_subset(.ref_time_values, unique(.x$time_value))) {
+      cli_abort(
+        "`ref_time_values` must be a unique subset of the time values in `x`.",
+        class = "epiprocess__epi_slide_opt_invalid_ref_time_values"
+      )
+    }
+    if (anyDuplicated(.ref_time_values) != 0L) {
+      cli_abort(
+        "`ref_time_values` must not contain any duplicates; use `unique` if appropriate.",
+        class = "epiprocess__epi_slide_opt_invalid_ref_time_values"
+      )
+    }
+  }
+  ref_time_values <- sort(.ref_time_values)
+
+  # Handle window arguments
+  .align <- rlang::arg_match(.align)
+  time_type <- attr(.x, "metadata")$time_type
+  if (is.null(.window_size)) {
+    cli_abort("epi_slide_opt: `.window_size` must be specified.")
+  }
+  validate_slide_window_arg(.window_size, time_type)
+  window_args <- get_before_after_from_window(.window_size, .align, time_type)
+
+  # Handle output naming:
+  names_info <- across_ish_names_info(.x, time_type, col_names_quo, f_info$namer, .window_size, .align, .prefix, .suffix, .new_col_names)
+  input_col_names <- names_info$input_col_names
+  output_col_names <- names_info$output_col_names
+
+  # Make a complete date sequence between min(.x$time_value) and max(.x$time_value).
+  date_seq_list <- full_date_seq(.x, window_args$before, window_args$after, time_type)
+  all_dates <- date_seq_list$all_dates
+  pad_early_dates <- date_seq_list$pad_early_dates
+  pad_late_dates <- date_seq_list$pad_late_dates
+
+  slide_one_grp <- function(.data_group, .group_key, ...) {
+    missing_times <- all_dates[!vec_in(all_dates, .data_group$time_value)]
+    # `frollmean` requires a full window to compute a result. Add NA values
+    # to beginning and end of the group so that we get results for the
+    # first `before` and last `after` elements.
+    .data_group <- vec_rbind(
+      .data_group, # (tibble; epi_slide_opt uses .keep = FALSE)
+      new_tibble(vec_recycle_common(
+        time_value = c(missing_times, pad_early_dates, pad_late_dates),
+        .real = FALSE
+      ))
+    ) %>%
+      `[`(vec_order(.$time_value), )
+
+    if (f_from_package == "data.table") {
+      # Grouping should ensure that we don't have duplicate time values.
+      # Completion above should ensure we have at least .window_size rows. Check
+      # that we don't have more than .window_size rows (or fewer somehow):
+      if (nrow(.data_group) != length(c(all_dates, pad_early_dates, pad_late_dates))) {
+        cli_abort(
+          c(
+            "group contains an unexpected number of rows",
+            "i" = c("Input data may contain `time_values` closer together than the
+              expected `time_step` size")
+          ),
+          class = "epiprocess__epi_slide_opt__unexpected_row_number",
+          epiprocess__data_group = .data_group,
+          epiprocess__group_key = .group_key
+        )
+      }
+
+      # `frollmean` is 1-indexed, so create a new window width based on our
+      # `before` and `after` params. Right-aligned `frollmean` results'
+      # `ref_time_value`s will be `after` timesteps ahead of where they should
+      # be; shift results to the left by `after` timesteps.
+      if (window_args$before != Inf) {
+        window_size <- window_args$before + window_args$after + 1L
+        roll_output <- .f(x = .data_group[, input_col_names], n = window_size, ...)
+      } else {
+        window_size <- list(seq_along(.data_group$time_value))
+        roll_output <- .f(x = .data_group[, input_col_names], n = window_size, adaptive = TRUE, ...)
+      }
+      if (window_args$after >= 1) {
+        .data_group[, output_col_names] <- purrr::map(roll_output, function(.x) {
+          c(.x[(window_args$after + 1L):length(.x)], rep(NA, window_args$after))
+        })
+      } else {
+        .data_group[, output_col_names] <- roll_output
+      }
+    }
+    if (f_from_package == "slider") {
+      for (i in seq_along(input_col_names)) {
+        .data_group[, output_col_names[i]] <- .f(
+          x = .data_group[[input_col_names[i]]],
+          before = as.numeric(window_args$before),
+          after = as.numeric(window_args$after),
+          ...
+        )
+      }
+    }
+
+    .data_group
+  }
+
+  result <- .x %>%
+    `[[<-`(".real", value = TRUE) %>%
+    group_modify(slide_one_grp, ..., .keep = FALSE) %>%
+    `[`(.$.real, names(.) != ".real") %>%
+    arrange_col_canonical() %>%
+    group_by(!!!.x_orig_groups)
+
+  if (.all_rows) {
+    result[!vec_in(result$time_value, ref_time_values), output_col_names] <- NA
+  } else if (user_provided_rtvs) {
+    result <- result[vec_in(result$time_value, ref_time_values), ]
+  }
+
+  if (!is_epi_df(result)) {
+    # `.all_rows` handling strips epi_df format and metadata.
+    # Restore them.
+    result <- reclass(result, attributes(.x)$metadata)
+  }
+
+  return(result)
+}
+
+#' @rdname epi_slide_opt
+#' @description `epi_slide_mean` is a wrapper around `epi_slide_opt` with `.f =
+#' data.table::frollmean`.
+#'
+#' @export
+epi_slide_mean <- function(
+    .x, .col_names, ...,
+    .window_size = NULL, .align = c("right", "center", "left"),
+    .prefix = NULL, .suffix = NULL, .new_col_names = NULL,
+    .ref_time_values = NULL, .all_rows = FALSE) {
+  # Deprecated argument handling
+  provided_args <- rlang::call_args_names(rlang::call_match())
+  if (any(purrr::map_lgl(provided_args, ~ .x %in% c("x", "col_names", "f", "ref_time_values", "all_rows")))) {
+    cli::cli_abort(
+      "epi_slide_mean: you are using one of the following old argument names: `x`, `col_names`, `f`, `ref_time_values`,
+      or `all_rows`. Please use the new dot-prefixed names: `.x`, `.col_names`, `.f`,
+      `.ref_time_values`, `.all_rows`."
+    )
+  }
+  if ("as_list_col" %in% provided_args) {
+    cli::cli_abort(
+      "epi_slide_mean: the argument `as_list_col` is deprecated. If FALSE, you can just remove it.
+      If TRUE, have your given computation wrap its result using `list(result)` instead."
+    )
+  }
+  if ("before" %in% provided_args || "after" %in% provided_args) {
+    cli::cli_abort(
+      "epi_slide_mean: `before` and `after` are deprecated for `epi_slide`. Use `.window_size` and `.align` instead.
+      See the slide documentation for more details."
+    )
+  }
+  if ("new_col_name" %in% provided_args || ".new_col_name" %in% provided_args) {
+    cli::cli_abort(
+      "epi_slide_opt: the argument `new_col_name` is not supported for `epi_slide_opt`. If you want to customize
+      the output column names, use `.prefix =`, `.suffix =`, or `.new_col_**names** =`.",
+      class = "epiprocess__epi_slide_opt__new_name_not_supported"
+    )
+  }
+  if ("names_sep" %in% provided_args || ".names_sep" %in% provided_args) {
+    cli::cli_abort(
+      "epi_slide_mean: the argument `names_sep` is not supported. If you want to customize
+      the output column names, use `dplyr::rename` after the slide."
+    )
+  }
+
+  epi_slide_opt(
+    .x = .x,
+    .col_names = {{ .col_names }},
+    .f = data.table::frollmean,
+    ...,
+    .window_size = .window_size,
+    .align = .align,
+    .prefix = .prefix,
+    .suffix = .suffix,
+    .new_col_names = .new_col_names,
+    .ref_time_values = .ref_time_values,
+    .all_rows = .all_rows
+  )
+}
+
+#' @rdname epi_slide_opt
+#' @description `epi_slide_sum` is a wrapper around `epi_slide_opt` with `.f =
+#' data.table::frollsum`.
+#'
+#' @export
+epi_slide_sum <- function(
+    .x, .col_names, ...,
+    .window_size = NULL, .align = c("right", "center", "left"),
+    .prefix = NULL, .suffix = NULL, .new_col_names = NULL,
+    .ref_time_values = NULL, .all_rows = FALSE) {
+  # Deprecated argument handling
+  provided_args <- rlang::call_args_names(rlang::call_match())
+  if (any(purrr::map_lgl(provided_args, ~ .x %in% c("x", "col_names", "f", "ref_time_values", "all_rows")))) {
+    cli::cli_abort(
+      "epi_slide_sum: you are using one of the following old argument names: `x`, `col_names`, `f`, `ref_time_values`,
+      or `all_rows`. Please use the new dot-prefixed names: `.x`, `.col_names`, `.f`,
+      `.ref_time_values`, `.all_rows`."
+    )
+  }
+  if ("as_list_col" %in% provided_args) {
+    cli::cli_abort(
+      "epi_slide_sum: the argument `as_list_col` is deprecated. If FALSE, you can just remove it.
+      If TRUE, have your given computation wrap its result using `list(result)` instead."
+    )
+  }
+  if ("before" %in% provided_args || "after" %in% provided_args) {
+    cli::cli_abort(
+      "epi_slide_sum: `before` and `after` are deprecated for `epi_slide`. Use `.window_size` and `.align` instead.
+      See the slide documentation for more details."
+    )
+  }
+  if ("new_col_name" %in% provided_args || ".new_col_name" %in% provided_args) {
+    cli::cli_abort(
+      "epi_slide_opt: the argument `new_col_name` is not supported for `epi_slide_opt`. If you want to customize
+      the output column names, use `.prefix =`, `.suffix =`, or `.new_col_**names** =`.",
+      class = "epiprocess__epi_slide_opt__new_name_not_supported"
+    )
+  }
+  if ("names_sep" %in% provided_args || ".names_sep" %in% provided_args) {
+    cli::cli_abort(
+      "epi_slide_sum: the argument `names_sep` is not supported. If you want to customize
+      the output column names, use `dplyr::rename` after the slide."
+    )
+  }
+  epi_slide_opt(
+    .x = .x,
+    .col_names = {{ .col_names }},
+    .f = data.table::frollsum,
+    ...,
+    .window_size = .window_size,
+    .align = .align,
+    .prefix = .prefix,
+    .suffix = .suffix,
+    .new_col_names = .new_col_names,
+    .ref_time_values = .ref_time_values,
+    .all_rows = .all_rows
+  )
+}
+
+#' Make a complete date sequence between min(x$time_value) and max
+#' (x$time_value). Produce lists of dates before min(x$time_value) and after
+#' max(x$time_value) for padding initial and final windows to size `n`.
+#'
+#' `before` and `after` args are assumed to have been validated by the calling
+#' function (using `validate_slide_window_arg`).
+#'
+#' @keywords internal
+full_date_seq <- function(x, before, after, time_type) {
+  if (!time_type %in% c("day", "week", "yearmonth", "integer")) {
+    cli_abort(
+      "time_type must be one of 'day', 'week', or 'integer'."
+    )
+  }
+
+  pad_early_dates <- c()
+  pad_late_dates <- c()
+
+  # `tsibble` time types have their own behavior, where adding 1 corresponds to
+  # incrementing by a quantum (smallest resolvable unit) of the date class. For
+  # example, one step = 1 quarter for `yearquarter`.
+  if (time_type %in% c("yearmonth", "integer")) {
+    all_dates <- seq(min(x$time_value), max(x$time_value), by = 1L)
+
+    if (before != 0 && before != Inf) {
+      pad_early_dates <- all_dates[1L] - before:1
+    }
+    if (after != 0) {
+      pad_late_dates <- all_dates[length(all_dates)] + 1:after
+    }
+  } else {
+    by <- switch(time_type,
+      day = "days",
+      week = "weeks",
+    )
+
+    all_dates <- seq(min(x$time_value), max(x$time_value), by = by)
+    if (before != 0 && before != Inf) {
+      # The behavior is analogous to the branch with tsibble types above. For
+      # more detail, note that the function `seq.Date(from, ..., length.out =
+      # n)` returns `from + 0:n`. Since we want `from + 1:n`, we drop the first
+      # element. Adding "-1" to the `by` arg makes `seq.Date` go backwards in
+      # time.
+      pad_early_dates <- sort(seq(all_dates[1L], by = paste("-1", by), length.out = before + 1)[-1])
+    }
+    if (after != 0) {
+      pad_late_dates <- seq(all_dates[length(all_dates)], by = by, length.out = after + 1)[-1]
+    }
+  }
+
+  list(
+    all_dates = all_dates,
+    pad_early_dates = pad_early_dates,
+    pad_late_dates = pad_late_dates
+  )
+}
diff --git a/R/slide.R b/R/slide.R
index 0da569deb..c05fbe98f 100644
--- a/R/slide.R
+++ b/R/slide.R
@@ -548,669 +548,3 @@ get_before_after_from_window <- function(window_size, align, time_type) {
   }
   list(before = before, after = after)
 }
-
-
-#' Information about upstream (`{data.table}`/`{slider}`) slide functions
-#'
-#' Underlies [`upstream_slide_f_info`].
-#'
-#' @keywords internal
-upstream_slide_f_possibilities <- tibble::tribble(
-  ~f, ~package, ~namer,
-  frollmean, "data.table", ~ if (is.logical(.x)) "prop" else "av",
-  frollsum, "data.table", ~ if (is.logical(.x)) "count" else "sum",
-  frollapply, "data.table", ~"slide",
-  slide_sum, "slider", ~ if (is.logical(.x)) "count" else "sum",
-  slide_prod, "slider", ~"prod",
-  slide_mean, "slider", ~ if (is.logical(.x)) "prop" else "av",
-  slide_min, "slider", ~"min",
-  slide_max, "slider", ~"max",
-  slide_all, "slider", ~"all",
-  slide_any, "slider", ~"any",
-)
-
-#' Validate & get information about an upstream slide function
-#'
-#' @param .f function such as `data.table::frollmean` or `slider::slide_mean`;
-#'   must appear in [`upstream_slide_f_possibilities`]
-#' @return named list with two elements: `from_package`, a string containing the
-#'   upstream package name ("data.table" or "slider"), and `namer`, a function
-#'   that takes a column to call `.f` on and outputs a basic name or
-#'   abbreviation for what operation `.f` represents on that kind of column
-#'   (e.g., "sum", "av", "count").
-#'
-#' @keywords internal
-upstream_slide_f_info <- function(.f) {
-  assert_function(.f)
-
-  # Check that slide function `.f` is one of those short-listed from
-  # `data.table` and `slider` (or a function that has the exact same definition,
-  # e.g. if the function has been reexported or defined locally). Extract some
-  # metadata. `namer` will be mapped over columns (.x will be a column, not the
-  # entire edf).
-  f_info_row <- upstream_slide_f_possibilities %>%
-    filter(map_lgl(.data$f, ~ identical(.f, .x)))
-  if (nrow(f_info_row) == 0L) {
-    # `f` is from somewhere else and not supported
-    cli_abort(
-      c(
-        "problem with {rlang::expr_label(rlang::caller_arg(f))}",
-        "i" = "`f` must be one of `data.table`'s rolling functions (`frollmean`,
-              `frollsum`, `frollapply`. See `?data.table::roll`) or one of
-              `slider`'s specialized sliding functions (`slide_mean`, `slide_sum`,
-              etc. See `?slider::\`summary-slide\`` for more options)."
-      ),
-      class = "epiprocess__epi_slide_opt__unsupported_slide_function",
-      epiprocess__f = .f
-    )
-  }
-  if (nrow(f_info_row) > 1L) {
-    cli_abort('epiprocess internal error: looking up `.f` in table of possible
-               functions yielded multiple matches. Please report it using "New
-               issue" at https://github.com/cmu-delphi/epiprocess/issues, using
-               reprex::reprex to provide a minimal reproducible example.')
-  }
-  f_from_package <- f_info_row$package
-  list(
-    from_package = f_from_package,
-    namer = unwrap(f_info_row$namer)
-  )
-}
-
-#' Calculate input and output column names for an `{epiprocess}` [`dplyr::across`]-like operations
-#'
-#' @param .x data.frame to perform input column tidyselection on
-#' @param time_type as in [`new_epi_df`]
-#' @param col_names_quo enquosed input column tidyselect expression
-#' @param .f_namer function taking an input column object and outputting a name
-#'   for a corresponding output column; see [`upstream_slide_f_info`]
-#' @param .window_size as in [`epi_slide_opt`]
-#' @param .align as in [`epi_slide_opt`]
-#' @param .prefix as in [`epi_slide_opt`]
-#' @param .suffix as in [`epi_slide_opt`]
-#' @param .new_col_names as in [`epi_slide_opt`]
-#' @return named list with two elements: `input_col_names`, chr, subset of
-#'   `names(.x)`; and `output_colnames`, chr, same length as `input_col_names`
-#'
-#' @keywords internal
-across_ish_names_info <- function(.x, time_type, col_names_quo, .f_namer, .window_size, .align, .prefix, .suffix, .new_col_names) {
-  # The position of a given column can be differ between input `.x` and
-  # `.data_group` since the grouping step by default drops grouping columns.
-  # To avoid rerunning `eval_select` for every `.data_group`, convert
-  # positions of user-provided `col_names` into string column names. We avoid
-  # using `names(pos)` directly for robustness and in case we later want to
-  # allow users to rename fields via tidyselection.
-  pos <- eval_select(col_names_quo, data = .x, allow_rename = FALSE)
-  input_col_names <- names(.x)[pos]
-
-  # Handle output naming
-  if ((!is.null(.prefix) || !is.null(.suffix)) && !is.null(.new_col_names)) {
-    cli_abort(
-      "Can't use both .prefix/.suffix and .new_col_names at the same time.",
-      class = "epiprocess__epi_slide_opt_incompatible_naming_args"
-    )
-  }
-  assert_string(.prefix, null.ok = TRUE)
-  assert_string(.suffix, null.ok = TRUE)
-  assert_character(.new_col_names, len = length(input_col_names), null.ok = TRUE)
-  if (is.null(.prefix) && is.null(.suffix) && is.null(.new_col_names)) {
-    .suffix <- "_{.n}{.time_unit_abbr}{.align_abbr}{.f_abbr}"
-    # ^ does not account for any arguments specified to underlying functions via
-    # `...` such as `na.rm =`, nor does it distinguish between functions from
-    # different packages accomplishing the same type of computation. Those are
-    # probably only set one way per task, so this probably produces cleaner
-    # names without clashes (though maybe some confusion if switching between
-    # code with different settings).
-  }
-  if (!is.null(.prefix) || !is.null(.suffix)) {
-    .prefix <- .prefix %||% ""
-    .suffix <- .suffix %||% ""
-    if (identical(.window_size, Inf)) {
-      n <- "running_"
-      time_unit_abbr <- ""
-      align_abbr <- ""
-    } else {
-      n <- time_delta_to_n_steps(.window_size, time_type)
-      time_unit_abbr <- time_type_unit_abbr(time_type)
-      align_abbr <- c(right = "", center = "c", left = "l")[[.align]]
-    }
-    glue_env <- rlang::env(
-      .n = n,
-      .time_unit_abbr = time_unit_abbr,
-      .align_abbr = align_abbr,
-      .f_abbr = purrr::map_chr(.x[, c(input_col_names)], .f_namer), # compat between DT and tbl selection
-      quo_get_env(col_names_quo)
-    )
-    .new_col_names <- unclass(
-      glue(.prefix, .envir = glue_env) +
-        input_col_names +
-        glue(.suffix, .envir = glue_env)
-    )
-  } else {
-    # `.new_col_names` was provided by user; we don't need to do anything.
-  }
-  if (any(.new_col_names %in% names(.x))) {
-    cli_abort(c(
-      "Naming conflict between new columns and existing columns",
-      "x" = "Overlapping names: {format_varnames(intersect(.new_col_names, names(.x)))}"
-    ), class = "epiprocess__epi_slide_opt_old_new_name_conflict")
-  }
-  if (anyDuplicated(.new_col_names)) {
-    cli_abort(c(
-      "New column names contain duplicates",
-      "x" = "Duplicated names: {format_varnames(unique(.new_col_names[duplicated(.new_col_names)]))}"
-    ), class = "epiprocess__epi_slide_opt_new_name_duplicated")
-  }
-  output_col_names <- .new_col_names
-
-  return(list(
-    input_col_names = input_col_names,
-    output_col_names = output_col_names
-  ))
-}
-
-#' Optimized slide functions for common cases
-#'
-#' @description `epi_slide_opt` allows sliding an n-timestep [data.table::froll]
-#' or [slider::summary-slide] function over variables in an `epi_df` object.
-#' These functions tend to be much faster than `epi_slide()`. See
-#' `vignette("epi_df")` for more examples.
-#'
-#' @template basic-slide-params
-#' @param .col_names <[`tidy-select`][dplyr_tidy_select]> An unquoted column
-#'   name (e.g., `cases`), multiple column names (e.g., `c(cases, deaths)`),
-#'   [other tidy-select expression][tidyselect::language], or a vector of
-#'   characters (e.g. `c("cases", "deaths")`). Variable names can be used as if
-#'   they were positions in the data frame, so expressions like `x:y` can be
-#'   used to select a range of variables.
-#'
-#'   The tidy-selection renaming interface is not supported, and cannot be used
-#'   to provide output column names; if you want to customize the output column
-#'   names, use [`dplyr::rename`] after the slide.
-#' @param .f Function; together with `...` specifies the computation to slide.
-#'  `.f` must be one of `data.table`'s rolling functions
-#'  (`frollmean`, `frollsum`, `frollapply`. See [data.table::roll]) or one
-#'  of `slider`'s specialized sliding functions (`slide_mean`, `slide_sum`,
-#'  etc. See [slider::summary-slide]).
-#'
-#'  The optimized `data.table` and `slider` functions can't be directly passed
-#'  as the computation function in `epi_slide` without careful handling to make
-#'  sure each computation group is made up of the `.window_size` dates rather
-#'  than `.window_size` points. `epi_slide_opt` (and wrapper functions
-#'  `epi_slide_mean` and `epi_slide_sum`) take care of window completion
-#'  automatically to prevent associated errors.
-#' @param ... Additional arguments to pass to the slide computation `.f`, for
-#'   example, `algo` or `na.rm` in data.table functions. You don't need to
-#'   specify `.x`, `.window_size`, or `.align` (or `before`/`after` for slider
-#'   functions).
-#' @param .prefix Optional [`glue::glue`] format string; name the slide result
-#'   column(s) by attaching this prefix to the corresponding input column(s).
-#'   Some shorthand is supported for basing the output names on `.window_size`
-#'   or other arguments; see "Prefix and suffix shorthand" below.
-#' @param .suffix Optional [`glue::glue`] format string; like `.prefix`. The
-#'   default naming behavior is equivalent to `.suffix =
-#'   "_{.n}{.time_unit_abbr}{.align_abbr}{.f_abbr}"`. Can be used in combination
-#'   with `.prefix`.
-#' @param .new_col_names Optional character vector with length matching the
-#'   number of input columns from `.col_names`; name the slide result column(s)
-#'   with these names. Cannot be used in combination with `.prefix` and/or
-#'   `.suffix`.
-#'
-#' @section Prefix and suffix shorthand:
-#'
-#' [`glue::glue`] format strings specially interpret content within curly
-#' braces. E.g., `glue::glue("ABC{2 + 2}")` evaluates to `"ABC4"`. For `.prefix`
-#' and `.suffix`, we provide `glue` with some additional variable bindings:
-#'
-#' - `{.n}` will be the number of time steps in the computation
-#'    corresponding to the `.window_size`.
-#' - `{.time_unit_abbr}` will be a lower-case letter corresponding to the
-#'     `time_type` of `.x`
-#' - `{.align_abbr}` will be `""` if `.align` is the default of `"right"`;
-#'    otherwise, it will be the first letter of `.align`
-#' - `{.f_abbr}` will be a character vector containing a short abbreviation
-#'    for `.f` factoring in the input column type(s) for `.col_names`
-#'
-#' @importFrom dplyr mutate %>% arrange tibble select all_of
-#' @importFrom rlang enquo expr_label caller_arg quo_get_env
-#' @importFrom tidyselect eval_select
-#' @importFrom glue glue
-#' @importFrom purrr map map_lgl
-#' @importFrom data.table frollmean frollsum frollapply
-#' @importFrom lubridate as.period
-#' @importFrom slider slide_sum slide_prod slide_mean slide_min slide_max slide_all slide_any
-#' @export
-#' @seealso [`epi_slide`] for the more general slide function
-#' @examples
-#' library(dplyr)
-#'
-#' # Add a column (`cases_7dsum`) containing a 7-day trailing sum on `cases`:
-#' cases_deaths_subset %>%
-#'   select(geo_value, time_value, cases) %>%
-#'   epi_slide_sum(cases, .window_size = 7)
-#'
-#' # Add a column (`cases_rate_7dav`) containing a 7-day trailing average on `case_rate`:
-#' covid_case_death_rates_extended %>%
-#'   epi_slide_mean(case_rate, .window_size = 7)
-#'
-#' # Use a less common specialized slide function:
-#' cases_deaths_subset %>%
-#'   epi_slide_opt(cases, slider::slide_min, .window_size = 7)
-#'
-#' # Specify output column names and/or a naming scheme:
-#' cases_deaths_subset %>%
-#'   select(geo_value, time_value, cases) %>%
-#'   group_by(geo_value) %>%
-#'   epi_slide_sum(cases, .window_size = 7, .new_col_names = "case_sum") %>%
-#'   ungroup()
-#' cases_deaths_subset %>%
-#'   select(geo_value, time_value, cases) %>%
-#'   group_by(geo_value) %>%
-#'   epi_slide_sum(cases, .window_size = 7, .prefix = "sum_") %>%
-#'   ungroup()
-#'
-#' # Additional settings can be sent to the {data.table} and {slider} functions
-#' # via `...`. This example passes some arguments to `frollmean` settings for
-#' # speed, accuracy, and to allow partially-missing windows:
-#' covid_case_death_rates_extended %>%
-#'   epi_slide_mean(
-#'     case_rate,
-#'     .window_size = 7,
-#'     na.rm = TRUE, algo = "exact", hasNA = TRUE
-#'   )
-#'
-#' # If the more specialized possibilities for `.f` don't cover your needs, you
-#' # can use `epi_slide_opt` with `.f = data.table::frollapply` to apply a
-#' # custom function at the cost of more computation time. See also `epi_slide`
-#' # if you need something even more general.
-#' cases_deaths_subset %>%
-#'   select(geo_value, time_value, case_rate_7d_av, death_rate_7d_av) %>%
-#'   epi_slide_opt(c(case_rate_7d_av, death_rate_7d_av),
-#'     data.table::frollapply,
-#'     FUN = median, .window_size = 28,
-#'     .suffix = "_{.n}{.time_unit_abbr}_median"
-#'   ) %>%
-#'   print(n = 40)
-epi_slide_opt <- function(
-    .x, .col_names, .f, ...,
-    .window_size = NULL, .align = c("right", "center", "left"),
-    .prefix = NULL, .suffix = NULL, .new_col_names = NULL,
-    .ref_time_values = NULL, .all_rows = FALSE) {
-  assert_class(.x, "epi_df")
-
-  # Deprecated argument handling
-  provided_args <- rlang::call_args_names(rlang::call_match())
-  if (any(purrr::map_lgl(provided_args, ~ .x %in% c("x", "col_names", "f", "ref_time_values", "all_rows")))) {
-    cli::cli_abort(
-      "epi_slide_opt: you are using one of the following old argument names: `x`, `col_names`, `f`, `ref_time_values`,
-      or `all_rows`. Please use the new dot-prefixed names: `.x`, `.col_names`, `.f`,
-      `.ref_time_values`, `.all_rows`."
-    )
-  }
-  if ("as_list_col" %in% provided_args) {
-    cli::cli_abort(
-      "epi_slide_opt: the argument `as_list_col` is deprecated. If FALSE, you can just remove it.
-      If TRUE, have your given computation wrap its result using `list(result)` instead."
-    )
-  }
-  if ("before" %in% provided_args || "after" %in% provided_args) {
-    cli::cli_abort(
-      "epi_slide_opt: `before` and `after` are deprecated for `epi_slide`. Use `.window_size` and `.align` instead.
-      See the slide documentation for more details."
-    )
-  }
-  if ("new_col_name" %in% provided_args || ".new_col_name" %in% provided_args) {
-    cli::cli_abort(
-      "epi_slide_opt: the argument `new_col_name` is not supported for `epi_slide_opt`. If you want to customize
-      the output column names, use `.prefix =`, `.suffix =`, or `.new_col_**names** =`.",
-      class = "epiprocess__epi_slide_opt__new_name_not_supported"
-    )
-  }
-  if ("names_sep" %in% provided_args || ".names_sep" %in% provided_args) {
-    cli::cli_abort(
-      "epi_slide_opt: the argument `names_sep` is not supported for `epi_slide_opt`. If you want to customize
-      the output column names, use `dplyr::rename` after the slide.",
-      class = "epiprocess__epi_slide_opt__name_sep_not_supported"
-    )
-  }
-
-  assert_class(.x, "epi_df")
-  .x_orig_groups <- groups(.x)
-  if (inherits(.x, "grouped_df")) {
-    expected_group_keys <- .x %>%
-      key_colnames(exclude = "time_value") %>%
-      sort()
-    if (!identical(.x %>% group_vars() %>% sort(), expected_group_keys)) {
-      cli_abort(
-        "`.x` must be either grouped by {expected_group_keys} or ungrouped; if the latter,
-         we'll temporarily group by {expected_group_keys} for this operation. You may need
-        to aggregate your data first; see sum_groups_epi_df().",
-        class = "epiprocess__epi_slide_opt__invalid_grouping"
-      )
-    }
-  } else {
-    .x <- group_epi_df(.x, exclude = "time_value")
-  }
-  if (nrow(.x) == 0L) {
-    cli_abort(
-      c(
-        "input data `.x` unexpectedly has 0 rows",
-        "i" = "If this computation is occuring within an `epix_slide` call,
-          check that `epix_slide` `.versions` argument was set appropriately
-          so that you don't get any completely-empty snapshots"
-      ),
-      class = "epiprocess__epi_slide_opt__0_row_input",
-      epiprocess__x = .x
-    )
-  }
-
-  # Check for duplicated time values within groups
-  assert(check_ukey_unique(ungroup(.x), c(group_vars(.x), "time_value")))
-
-  # Validate/process .col_names, .f:
-  col_names_quo <- enquo(.col_names)
-  f_info <- upstream_slide_f_info(.f)
-  f_from_package <- f_info$from_package
-
-  # Validate/process .ref_time_values:
-  user_provided_rtvs <- !is.null(.ref_time_values)
-  if (!user_provided_rtvs) {
-    .ref_time_values <- unique(.x$time_value)
-  } else {
-    assert_numeric(.ref_time_values, min.len = 1L, null.ok = FALSE, any.missing = FALSE)
-    if (!test_subset(.ref_time_values, unique(.x$time_value))) {
-      cli_abort(
-        "`ref_time_values` must be a unique subset of the time values in `x`.",
-        class = "epiprocess__epi_slide_opt_invalid_ref_time_values"
-      )
-    }
-    if (anyDuplicated(.ref_time_values) != 0L) {
-      cli_abort(
-        "`ref_time_values` must not contain any duplicates; use `unique` if appropriate.",
-        class = "epiprocess__epi_slide_opt_invalid_ref_time_values"
-      )
-    }
-  }
-  ref_time_values <- sort(.ref_time_values)
-
-  # Handle window arguments
-  .align <- rlang::arg_match(.align)
-  time_type <- attr(.x, "metadata")$time_type
-  if (is.null(.window_size)) {
-    cli_abort("epi_slide_opt: `.window_size` must be specified.")
-  }
-  validate_slide_window_arg(.window_size, time_type)
-  window_args <- get_before_after_from_window(.window_size, .align, time_type)
-
-  # Handle output naming:
-  names_info <- across_ish_names_info(.x, time_type, col_names_quo, f_info$namer, .window_size, .align, .prefix, .suffix, .new_col_names)
-  input_col_names <- names_info$input_col_names
-  output_col_names <- names_info$output_col_names
-
-  # Make a complete date sequence between min(.x$time_value) and max(.x$time_value).
-  date_seq_list <- full_date_seq(.x, window_args$before, window_args$after, time_type)
-  all_dates <- date_seq_list$all_dates
-  pad_early_dates <- date_seq_list$pad_early_dates
-  pad_late_dates <- date_seq_list$pad_late_dates
-
-  slide_one_grp <- function(.data_group, .group_key, ...) {
-    missing_times <- all_dates[!vec_in(all_dates, .data_group$time_value)]
-    # `frollmean` requires a full window to compute a result. Add NA values
-    # to beginning and end of the group so that we get results for the
-    # first `before` and last `after` elements.
-    .data_group <- vec_rbind(
-      .data_group, # (tibble; epi_slide_opt uses .keep = FALSE)
-      new_tibble(vec_recycle_common(
-        time_value = c(missing_times, pad_early_dates, pad_late_dates),
-        .real = FALSE
-      ))
-    ) %>%
-      `[`(vec_order(.$time_value), )
-
-    if (f_from_package == "data.table") {
-      # Grouping should ensure that we don't have duplicate time values.
-      # Completion above should ensure we have at least .window_size rows. Check
-      # that we don't have more than .window_size rows (or fewer somehow):
-      if (nrow(.data_group) != length(c(all_dates, pad_early_dates, pad_late_dates))) {
-        cli_abort(
-          c(
-            "group contains an unexpected number of rows",
-            "i" = c("Input data may contain `time_values` closer together than the
-              expected `time_step` size")
-          ),
-          class = "epiprocess__epi_slide_opt__unexpected_row_number",
-          epiprocess__data_group = .data_group,
-          epiprocess__group_key = .group_key
-        )
-      }
-
-      # `frollmean` is 1-indexed, so create a new window width based on our
-      # `before` and `after` params. Right-aligned `frollmean` results'
-      # `ref_time_value`s will be `after` timesteps ahead of where they should
-      # be; shift results to the left by `after` timesteps.
-      if (window_args$before != Inf) {
-        window_size <- window_args$before + window_args$after + 1L
-        roll_output <- .f(x = .data_group[, input_col_names], n = window_size, ...)
-      } else {
-        window_size <- list(seq_along(.data_group$time_value))
-        roll_output <- .f(x = .data_group[, input_col_names], n = window_size, adaptive = TRUE, ...)
-      }
-      if (window_args$after >= 1) {
-        .data_group[, output_col_names] <- purrr::map(roll_output, function(.x) {
-          c(.x[(window_args$after + 1L):length(.x)], rep(NA, window_args$after))
-        })
-      } else {
-        .data_group[, output_col_names] <- roll_output
-      }
-    }
-    if (f_from_package == "slider") {
-      for (i in seq_along(input_col_names)) {
-        .data_group[, output_col_names[i]] <- .f(
-          x = .data_group[[input_col_names[i]]],
-          before = as.numeric(window_args$before),
-          after = as.numeric(window_args$after),
-          ...
-        )
-      }
-    }
-
-    .data_group
-  }
-
-  result <- .x %>%
-    `[[<-`(".real", value = TRUE) %>%
-    group_modify(slide_one_grp, ..., .keep = FALSE) %>%
-    `[`(.$.real, names(.) != ".real") %>%
-    arrange_col_canonical() %>%
-    group_by(!!!.x_orig_groups)
-
-  if (.all_rows) {
-    result[!vec_in(result$time_value, ref_time_values), output_col_names] <- NA
-  } else if (user_provided_rtvs) {
-    result <- result[vec_in(result$time_value, ref_time_values), ]
-  }
-
-  if (!is_epi_df(result)) {
-    # `.all_rows` handling strips epi_df format and metadata.
-    # Restore them.
-    result <- reclass(result, attributes(.x)$metadata)
-  }
-
-  return(result)
-}
-
-#' @rdname epi_slide_opt
-#' @description `epi_slide_mean` is a wrapper around `epi_slide_opt` with `.f =
-#' data.table::frollmean`.
-#'
-#' @export
-epi_slide_mean <- function(
-    .x, .col_names, ...,
-    .window_size = NULL, .align = c("right", "center", "left"),
-    .prefix = NULL, .suffix = NULL, .new_col_names = NULL,
-    .ref_time_values = NULL, .all_rows = FALSE) {
-  # Deprecated argument handling
-  provided_args <- rlang::call_args_names(rlang::call_match())
-  if (any(purrr::map_lgl(provided_args, ~ .x %in% c("x", "col_names", "f", "ref_time_values", "all_rows")))) {
-    cli::cli_abort(
-      "epi_slide_mean: you are using one of the following old argument names: `x`, `col_names`, `f`, `ref_time_values`,
-      or `all_rows`. Please use the new dot-prefixed names: `.x`, `.col_names`, `.f`,
-      `.ref_time_values`, `.all_rows`."
-    )
-  }
-  if ("as_list_col" %in% provided_args) {
-    cli::cli_abort(
-      "epi_slide_mean: the argument `as_list_col` is deprecated. If FALSE, you can just remove it.
-      If TRUE, have your given computation wrap its result using `list(result)` instead."
-    )
-  }
-  if ("before" %in% provided_args || "after" %in% provided_args) {
-    cli::cli_abort(
-      "epi_slide_mean: `before` and `after` are deprecated for `epi_slide`. Use `.window_size` and `.align` instead.
-      See the slide documentation for more details."
-    )
-  }
-  if ("new_col_name" %in% provided_args || ".new_col_name" %in% provided_args) {
-    cli::cli_abort(
-      "epi_slide_opt: the argument `new_col_name` is not supported for `epi_slide_opt`. If you want to customize
-      the output column names, use `.prefix =`, `.suffix =`, or `.new_col_**names** =`.",
-      class = "epiprocess__epi_slide_opt__new_name_not_supported"
-    )
-  }
-  if ("names_sep" %in% provided_args || ".names_sep" %in% provided_args) {
-    cli::cli_abort(
-      "epi_slide_mean: the argument `names_sep` is not supported. If you want to customize
-      the output column names, use `dplyr::rename` after the slide."
-    )
-  }
-
-  epi_slide_opt(
-    .x = .x,
-    .col_names = {{ .col_names }},
-    .f = data.table::frollmean,
-    ...,
-    .window_size = .window_size,
-    .align = .align,
-    .prefix = .prefix,
-    .suffix = .suffix,
-    .new_col_names = .new_col_names,
-    .ref_time_values = .ref_time_values,
-    .all_rows = .all_rows
-  )
-}
-
-#' @rdname epi_slide_opt
-#' @description `epi_slide_sum` is a wrapper around `epi_slide_opt` with `.f =
-#' data.table::frollsum`.
-#'
-#' @export
-epi_slide_sum <- function(
-    .x, .col_names, ...,
-    .window_size = NULL, .align = c("right", "center", "left"),
-    .prefix = NULL, .suffix = NULL, .new_col_names = NULL,
-    .ref_time_values = NULL, .all_rows = FALSE) {
-  # Deprecated argument handling
-  provided_args <- rlang::call_args_names(rlang::call_match())
-  if (any(purrr::map_lgl(provided_args, ~ .x %in% c("x", "col_names", "f", "ref_time_values", "all_rows")))) {
-    cli::cli_abort(
-      "epi_slide_sum: you are using one of the following old argument names: `x`, `col_names`, `f`, `ref_time_values`,
-      or `all_rows`. Please use the new dot-prefixed names: `.x`, `.col_names`, `.f`,
-      `.ref_time_values`, `.all_rows`."
-    )
-  }
-  if ("as_list_col" %in% provided_args) {
-    cli::cli_abort(
-      "epi_slide_sum: the argument `as_list_col` is deprecated. If FALSE, you can just remove it.
-      If TRUE, have your given computation wrap its result using `list(result)` instead."
-    )
-  }
-  if ("before" %in% provided_args || "after" %in% provided_args) {
-    cli::cli_abort(
-      "epi_slide_sum: `before` and `after` are deprecated for `epi_slide`. Use `.window_size` and `.align` instead.
-      See the slide documentation for more details."
-    )
-  }
-  if ("new_col_name" %in% provided_args || ".new_col_name" %in% provided_args) {
-    cli::cli_abort(
-      "epi_slide_opt: the argument `new_col_name` is not supported for `epi_slide_opt`. If you want to customize
-      the output column names, use `.prefix =`, `.suffix =`, or `.new_col_**names** =`.",
-      class = "epiprocess__epi_slide_opt__new_name_not_supported"
-    )
-  }
-  if ("names_sep" %in% provided_args || ".names_sep" %in% provided_args) {
-    cli::cli_abort(
-      "epi_slide_sum: the argument `names_sep` is not supported. If you want to customize
-      the output column names, use `dplyr::rename` after the slide."
-    )
-  }
-  epi_slide_opt(
-    .x = .x,
-    .col_names = {{ .col_names }},
-    .f = data.table::frollsum,
-    ...,
-    .window_size = .window_size,
-    .align = .align,
-    .prefix = .prefix,
-    .suffix = .suffix,
-    .new_col_names = .new_col_names,
-    .ref_time_values = .ref_time_values,
-    .all_rows = .all_rows
-  )
-}
-
-#' Make a complete date sequence between min(x$time_value) and max
-#' (x$time_value). Produce lists of dates before min(x$time_value) and after
-#' max(x$time_value) for padding initial and final windows to size `n`.
-#'
-#' `before` and `after` args are assumed to have been validated by the calling
-#' function (using `validate_slide_window_arg`).
-#'
-#' @keywords internal
-full_date_seq <- function(x, before, after, time_type) {
-  if (!time_type %in% c("day", "week", "yearmonth", "integer")) {
-    cli_abort(
-      "time_type must be one of 'day', 'week', or 'integer'."
-    )
-  }
-
-  pad_early_dates <- c()
-  pad_late_dates <- c()
-
-  # `tsibble` time types have their own behavior, where adding 1 corresponds to
-  # incrementing by a quantum (smallest resolvable unit) of the date class. For
-  # example, one step = 1 quarter for `yearquarter`.
-  if (time_type %in% c("yearmonth", "integer")) {
-    all_dates <- seq(min(x$time_value), max(x$time_value), by = 1L)
-
-    if (before != 0 && before != Inf) {
-      pad_early_dates <- all_dates[1L] - before:1
-    }
-    if (after != 0) {
-      pad_late_dates <- all_dates[length(all_dates)] + 1:after
-    }
-  } else {
-    by <- switch(time_type,
-      day = "days",
-      week = "weeks",
-    )
-
-    all_dates <- seq(min(x$time_value), max(x$time_value), by = by)
-    if (before != 0 && before != Inf) {
-      # The behavior is analogous to the branch with tsibble types above. For
-      # more detail, note that the function `seq.Date(from, ..., length.out =
-      # n)` returns `from + 0:n`. Since we want `from + 1:n`, we drop the first
-      # element. Adding "-1" to the `by` arg makes `seq.Date` go backwards in
-      # time.
-      pad_early_dates <- sort(seq(all_dates[1L], by = paste("-1", by), length.out = before + 1)[-1])
-    }
-    if (after != 0) {
-      pad_late_dates <- seq(all_dates[length(all_dates)], by = by, length.out = after + 1)[-1]
-    }
-  }
-
-  list(
-    all_dates = all_dates,
-    pad_early_dates = pad_early_dates,
-    pad_late_dates = pad_late_dates
-  )
-}
diff --git a/man/across_ish_names_info.Rd b/man/across_ish_names_info.Rd
index 70eb40c4f..c993c2bf5 100644
--- a/man/across_ish_names_info.Rd
+++ b/man/across_ish_names_info.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/slide.R
+% Please edit documentation in R/epi_slide_opt_edf.R
 \name{across_ish_names_info}
 \alias{across_ish_names_info}
 \title{Calculate input and output column names for an \code{{epiprocess}} \code{\link[dplyr:across]{dplyr::across}}-like operations}
diff --git a/man/epi_slide_opt.Rd b/man/epi_slide_opt.Rd
index 4b75e9ffb..946e7c981 100644
--- a/man/epi_slide_opt.Rd
+++ b/man/epi_slide_opt.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/slide.R
+% Please edit documentation in R/epi_slide_opt_edf.R
 \name{epi_slide_opt}
 \alias{epi_slide_opt}
 \alias{epi_slide_mean}
diff --git a/man/epix_epi_slide_opt_one_epikey.Rd b/man/epix_epi_slide_opt_one_epikey.Rd
index e8321616e..9b3650f42 100644
--- a/man/epix_epi_slide_opt_one_epikey.Rd
+++ b/man/epix_epi_slide_opt_one_epikey.Rd
@@ -64,7 +64,7 @@ updates <- bind_rows(
     version = 13, time_value = 7, value = NA,
   )
 ) \%>\%
-  mutate(across(c(version, time_value), ~as.Date("2020-01-01") - 1 + .x)) \%>\%
+  mutate(across(c(version, time_value), ~ as.Date("2020-01-01") - 1 + .x)) \%>\%
   tidyr::nest(.by = version, .key = "subtbl")
 
 updates \%>\%
diff --git a/man/full_date_seq.Rd b/man/full_date_seq.Rd
index eb36b2c17..1a29c5e92 100644
--- a/man/full_date_seq.Rd
+++ b/man/full_date_seq.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/slide.R
+% Please edit documentation in R/epi_slide_opt_edf.R
 \name{full_date_seq}
 \alias{full_date_seq}
 \title{Make a complete date sequence between min(x$time_value) and max
diff --git a/man/upstream_slide_f_info.Rd b/man/upstream_slide_f_info.Rd
index b0e928c4d..787790ca6 100644
--- a/man/upstream_slide_f_info.Rd
+++ b/man/upstream_slide_f_info.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/slide.R
+% Please edit documentation in R/epi_slide_opt_edf.R
 \name{upstream_slide_f_info}
 \alias{upstream_slide_f_info}
 \title{Validate & get information about an upstream slide function}
diff --git a/man/upstream_slide_f_possibilities.Rd b/man/upstream_slide_f_possibilities.Rd
index 9ba949ef1..0fae2cc4f 100644
--- a/man/upstream_slide_f_possibilities.Rd
+++ b/man/upstream_slide_f_possibilities.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/slide.R
+% Please edit documentation in R/epi_slide_opt_edf.R
 \docType{data}
 \name{upstream_slide_f_possibilities}
 \alias{upstream_slide_f_possibilities}

From e9b96f12f02ee718b918029e26afff4f64aed5f8 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Thu, 6 Mar 2025 17:27:44 -0800
Subject: [PATCH 024/107] Actually turn epi_slide_opt into S3 method

---
 DESCRIPTION                                   |  2 +-
 NAMESPACE                                     |  6 +--
 ...pi_slide_opt.R => epi_slide_opt_archive.R} | 39 ++++++++-----------
 R/epi_slide_opt_edf.R                         | 36 ++++++++++++++---
 man/epi_slide_opt.Rd                          | 25 ++++++++++--
 ...Rd => epi_slide_opt_archive_one_epikey.Rd} | 14 +++----
 6 files changed, 80 insertions(+), 42 deletions(-)
 rename R/{epix_epi_slide_opt.R => epi_slide_opt_archive.R} (87%)
 rename man/{epix_epi_slide_opt_one_epikey.Rd => epi_slide_opt_archive_one_epikey.Rd} (80%)

diff --git a/DESCRIPTION b/DESCRIPTION
index 1732bbc97..a746b53eb 100755
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -98,9 +98,9 @@ Collate:
     'correlation.R'
     'epi_df.R'
     'epi_df_forbidden_methods.R'
+    'epi_slide_opt_archive.R'
     'epi_slide_opt_edf.R'
     'epiprocess-package.R'
-    'epix_epi_slide_opt.R'
     'group_by_epi_df_methods.R'
     'methods-epi_archive.R'
     'grouped_epi_archive.R'
diff --git a/NAMESPACE b/NAMESPACE
index e1b111c0a..49b5a3acf 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -27,8 +27,9 @@ S3method(dplyr_col_modify,col_modify_recorder_df)
 S3method(dplyr_col_modify,epi_df)
 S3method(dplyr_reconstruct,epi_df)
 S3method(dplyr_row_slice,epi_df)
-S3method(epix_epi_slide_opt,epi_archive)
-S3method(epix_epi_slide_opt,grouped_epi_archive)
+S3method(epi_slide_opt,epi_archive)
+S3method(epi_slide_opt,epi_df)
+S3method(epi_slide_opt,grouped_epi_archive)
 S3method(epix_slide,epi_archive)
 S3method(epix_slide,grouped_epi_archive)
 S3method(epix_truncate_versions_after,epi_archive)
@@ -75,7 +76,6 @@ export(epi_slide_mean)
 export(epi_slide_opt)
 export(epi_slide_sum)
 export(epix_as_of)
-export(epix_epi_slide_opt)
 export(epix_fill_through_version)
 export(epix_merge)
 export(epix_slide)
diff --git a/R/epix_epi_slide_opt.R b/R/epi_slide_opt_archive.R
similarity index 87%
rename from R/epix_epi_slide_opt.R
rename to R/epi_slide_opt_archive.R
index 5e446cdca..bf90313c2 100644
--- a/R/epix_epi_slide_opt.R
+++ b/R/epi_slide_opt_archive.R
@@ -1,4 +1,4 @@
-#' Core operation of `epix_epi_slide_opt` for a single epikey's history
+#' Core operation of `epi_slide_opt.epi_archive` for a single epikey's history
 #'
 #' @param updates tibble with two columns: `version` and `subtbl`; `subtbl` is a
 #'   list of tibbles, each with a `time_value` column and measurement columns.
@@ -41,10 +41,10 @@
 #'   tidyr::nest(.by = version, .key = "subtbl")
 #'
 #' updates %>%
-#'   epix_epi_slide_opt_one_epikey("value", data.table::frollmean, "data.table", 1L, 0L, "day", "slide_value")
+#'   epi_slide_opt_one_epikey("value", data.table::frollmean, "data.table", 1L, 0L, "day", "slide_value")
 #'
 #' @keywords internal
-epix_epi_slide_opt_one_epikey <- function(updates, in_colnames, f_dots_baked, f_from_package, before, after, time_type, out_colnames) {
+epi_slide_opt_archive_one_epikey <- function(updates, in_colnames, f_dots_baked, f_from_package, before, after, time_type, out_colnames) {
   # TODO check for col name clobbering
   unit_step <- epiprocess:::unit_time_delta(time_type)
   prev_inp_snapshot <- NULL
@@ -124,21 +124,9 @@ epix_epi_slide_opt_one_epikey <- function(updates, in_colnames, f_dots_baked, f_
   result
 }
 
-# TODO just make this an epi_slide_opt impl?
-
+#' @method epi_slide_opt grouped_epi_archive
 #' @export
-epix_epi_slide_opt <-
-  function(.x, .col_names, .f, ...,
-           .window_size = NULL, .align = c("right", "center", "left"),
-           .prefix = NULL, .suffix = NULL, .new_col_names = NULL # ,
-           ## .ref_time_values = NULL, .all_rows = FALSE
-  ) {
-    UseMethod("epix_epi_slide_opt")
-  }
-
-#' @method epix_epi_slide_opt grouped_epi_archive
-#' @export
-epix_epi_slide_opt.grouped_epi_archive <- function(.x, ...) {
+epi_slide_opt.grouped_epi_archive <- function(.x, ...) {
   assert_set_equal(
     group_vars(.x),
     key_colnames(.x, exclude = c("time_value", "version"))
@@ -147,16 +135,17 @@ epix_epi_slide_opt.grouped_epi_archive <- function(.x, ...) {
   orig_drop <- .x$private$drop
   .x %>%
     ungroup() %>%
-    epix_epi_slide_opt(...) %>%
+    epi_slide_opt(...) %>%
     group_by(pick(all_of(orig_group_vars)), .drop = orig_drop)
 }
-#' @method epix_epi_slide_opt epi_archive
+
+#' @method epi_slide_opt epi_archive
 #' @export
-epix_epi_slide_opt.epi_archive <-
+epi_slide_opt.epi_archive <-
   function(.x, .col_names, .f, ...,
            .window_size = NULL, .align = c("right", "center", "left"),
            .prefix = NULL, .suffix = NULL, .new_col_names = NULL,
-           ## , .ref_time_values = NULL, .all_rows = FALSE
+           .ref_time_values = NULL, .all_rows = FALSE,
            .progress = FALSE) {
     # Extract metadata:
     time_type <- .x$time_type
@@ -175,6 +164,12 @@ epix_epi_slide_opt.epi_archive <-
     col_names_quo <- enquo(.col_names)
     names_info <- across_ish_names_info(.x$DT, time_type, col_names_quo, .f_info$namer, .window_size, .align, .prefix, .suffix, .new_col_names)
     window_args <- get_before_after_from_window(.window_size, .align, time_type)
+    if (!is.null(.ref_time_values)) {
+      cli_abort("epi_slide.epi_archive does not support the `.ref_time_values` argument")
+    }
+    if (!identical(.all_rows, FALSE)) {
+      cli_abort("epi_slide.epi_archive does not support the `.all_rows` argument")
+    }
     assert(
       checkmate::check_logical(.progress, any.missing = FALSE, len = 1L, names = "unnamed"),
       checkmate::check_string(.progress)
@@ -196,7 +191,7 @@ epix_epi_slide_opt.epi_archive <-
           nest(.by = version, .key = "subtbl") %>%
           arrange(version)
         # TODO move nesting inside the helper?
-        res <- epix_epi_slide_opt_one_epikey(group_updates, names_info$input_col_names, .f_dots_baked, .f_info$from_package, window_args$before, window_args$after, time_type, names_info$output_col_names) %>%
+        res <- epi_slide_opt_archive_one_epikey(group_updates, names_info$input_col_names, .f_dots_baked, .f_info$from_package, window_args$before, window_args$after, time_type, names_info$output_col_names) %>%
           list_rbind()
         if (use_progress) cli::cli_progress_update(id = progress_bar_id)
         res
diff --git a/R/epi_slide_opt_edf.R b/R/epi_slide_opt_edf.R
index 44eaa1470..9dd7190c8 100644
--- a/R/epi_slide_opt_edf.R
+++ b/R/epi_slide_opt_edf.R
@@ -159,10 +159,23 @@ across_ish_names_info <- function(.x, time_type, col_names_quo, .f_namer, .windo
 
 #' Optimized slide functions for common cases
 #'
-#' @description `epi_slide_opt` allows sliding an n-timestep [data.table::froll]
-#' or [slider::summary-slide] function over variables in an `epi_df` object.
-#' These functions tend to be much faster than `epi_slide()`. See
-#' `vignette("epi_df")` for more examples.
+#' @description
+#'
+#' `epi_slide_opt` calculates n-time-step rolling means&sums,
+#' cumulative/"running" means&sums, or other operations supported by
+#' [`data.table::froll`] or [`slider::summary-slide`] functions.
+#'
+#' * On `epi_df`s, it will take care of looping over `geo_value`s, temporarily
+#'   filling in time gaps with `NA`s and other work needed to ensure there are
+#'   exactly n consecutive time steps per computation, and has some other
+#'   convenience features. See `vignette("epi_df")` for more examples.
+#'
+#' * On `epi_archive`s, it will calculate the version history for these slide
+#'   computations and combine it with the version history for the rest of the
+#'   columns.
+#'
+#' This function tends to be much faster than using `epi_slide()` and
+#' `epix_slide()` directly.
 #'
 #' @template basic-slide-params
 #' @param .col_names <[`tidy-select`][dplyr_tidy_select]> An unquoted column
@@ -279,13 +292,26 @@ across_ish_names_info <- function(.x, time_type, col_names_quo, .f_namer, .windo
 #'     .suffix = "_{.n}{.time_unit_abbr}_median"
 #'   ) %>%
 #'   print(n = 40)
+#'
+#' # You can calculate entire version histories for the derived signals by
+#' # calling `epi_slide_opt()` on an `epi_archive`:
+#' case_death_rate_archive %>%
+#'   epi_slide_mean(case_rate, .window_size = 14)
+#'
+#' @export
 epi_slide_opt <- function(
     .x, .col_names, .f, ...,
     .window_size = NULL, .align = c("right", "center", "left"),
     .prefix = NULL, .suffix = NULL, .new_col_names = NULL,
     .ref_time_values = NULL, .all_rows = FALSE) {
-  assert_class(.x, "epi_df")
+  UseMethod("epi_slide_opt")
+}
 
+#' @export
+epi_slide_opt.epi_df <- function(.x, .col_names, .f, ...,
+                                 .window_size = NULL, .align = c("right", "center", "left"),
+                                 .prefix = NULL, .suffix = NULL, .new_col_names = NULL,
+                                 .ref_time_values = NULL, .all_rows = FALSE) {
   # Deprecated argument handling
   provided_args <- rlang::call_args_names(rlang::call_match())
   if (any(purrr::map_lgl(provided_args, ~ .x %in% c("x", "col_names", "f", "ref_time_values", "all_rows")))) {
diff --git a/man/epi_slide_opt.Rd b/man/epi_slide_opt.Rd
index 946e7c981..1d5999578 100644
--- a/man/epi_slide_opt.Rd
+++ b/man/epi_slide_opt.Rd
@@ -134,10 +134,21 @@ added. It will be ungrouped if \code{.x} was ungrouped, and have the same groups
 as \code{.x} if \code{.x} was grouped.
 }
 \description{
-\code{epi_slide_opt} allows sliding an n-timestep \link[data.table:froll]{data.table::froll}
-or \link[slider:summary-slide]{slider::summary-slide} function over variables in an \code{epi_df} object.
-These functions tend to be much faster than \code{epi_slide()}. See
-\code{vignette("epi_df")} for more examples.
+\code{epi_slide_opt} calculates n-time-step rolling means&sums,
+cumulative/"running" means&sums, or other operations supported by
+\code{\link[data.table:froll]{data.table::froll}} or \code{\link[slider:summary-slide]{slider::summary-slide}} functions.
+\itemize{
+\item On \code{epi_df}s, it will take care of looping over \code{geo_value}s, temporarily
+filling in time gaps with \code{NA}s and other work needed to ensure there are
+exactly n consecutive time steps per computation, and has some other
+convenience features. See \code{vignette("epi_df")} for more examples.
+\item On \code{epi_archive}s, it will calculate the version history for these slide
+computations and combine it with the version history for the rest of the
+columns.
+}
+
+This function tends to be much faster than using \code{epi_slide()} and
+\code{epix_slide()} directly.
 
 \code{epi_slide_mean} is a wrapper around \code{epi_slide_opt} with \code{.f = data.table::frollmean}.
 
@@ -211,6 +222,12 @@ cases_deaths_subset \%>\%
     .suffix = "_{.n}{.time_unit_abbr}_median"
   ) \%>\%
   print(n = 40)
+
+# You can calculate entire version histories for the derived signals by
+# calling `epi_slide_opt()` on an `epi_archive`:
+case_death_rate_archive \%>\%
+  epi_slide_mean(case_rate, .window_size = 14)
+
 }
 \seealso{
 \code{\link{epi_slide}} for the more general slide function
diff --git a/man/epix_epi_slide_opt_one_epikey.Rd b/man/epi_slide_opt_archive_one_epikey.Rd
similarity index 80%
rename from man/epix_epi_slide_opt_one_epikey.Rd
rename to man/epi_slide_opt_archive_one_epikey.Rd
index 9b3650f42..95f97909f 100644
--- a/man/epix_epi_slide_opt_one_epikey.Rd
+++ b/man/epi_slide_opt_archive_one_epikey.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/epix_epi_slide_opt.R
-\name{epix_epi_slide_opt_one_epikey}
-\alias{epix_epi_slide_opt_one_epikey}
-\title{Core operation of \code{epix_epi_slide_opt} for a single epikey's history}
+% Please edit documentation in R/epi_slide_opt_archive.R
+\name{epi_slide_opt_archive_one_epikey}
+\alias{epi_slide_opt_archive_one_epikey}
+\title{Core operation of \code{epi_slide_opt.epi_archive} for a single epikey's history}
 \usage{
-epix_epi_slide_opt_one_epikey(
+epi_slide_opt_archive_one_epikey(
   updates,
   in_colnames,
   f_dots_baked,
@@ -45,7 +45,7 @@ for results}
 list of tibbles with same names as \code{subtbl}s plus: \code{c(out_colnames, "version")}; (compactified) diff data to put into an \code{epi_archive}
 }
 \description{
-Core operation of \code{epix_epi_slide_opt} for a single epikey's history
+Core operation of \code{epi_slide_opt.epi_archive} for a single epikey's history
 }
 \examples{
 
@@ -68,7 +68,7 @@ updates <- bind_rows(
   tidyr::nest(.by = version, .key = "subtbl")
 
 updates \%>\%
-  epix_epi_slide_opt_one_epikey("value", data.table::frollmean, "data.table", 1L, 0L, "day", "slide_value")
+  epi_slide_opt_one_epikey("value", data.table::frollmean, "data.table", 1L, 0L, "day", "slide_value")
 
 }
 \keyword{internal}

From ddbd05d571a598ac685147d66b2d0976fc4748b6 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Wed, 5 Mar 2025 17:56:34 -0800
Subject: [PATCH 025/107] Clean up unnecessary comments and unused helper
 functions, +@keywords internal

---
 R/patch.R        | 35 +++++++++++------------------------
 man/tbl_diff2.Rd |  1 +
 man/tbl_patch.Rd |  1 +
 3 files changed, 13 insertions(+), 24 deletions(-)

diff --git a/R/patch.R b/R/patch.R
index 37af285f9..32b7dc96a 100644
--- a/R/patch.R
+++ b/R/patch.R
@@ -118,6 +118,8 @@ approx_equal0 <- function(vec1, vec2, abs_tol, na_equal, inds1 = NULL, inds2 = N
 #'   `compactify_abs_tol`.
 #' @param compactify_abs_tol compactification tolerance; see `apply_compactify`
 #' @return a tibble in compact "update" (diff) format
+#'
+#' @keywords internal
 tbl_diff2 <- function(earlier_snapshot, later_tbl,
                       ukey_names,
                       later_format = c("snapshot", "update"),
@@ -152,7 +154,6 @@ tbl_diff2 <- function(earlier_snapshot, later_tbl,
 
   # More input validation:
   if (!identical(tbl_names, names(later_tbl))) {
-    # XXX is this check actually necessary?
     cli_abort(c("`earlier_snapshot` and `later_tbl` should have identical column
                  names and ordering.",
       "*" = "`earlier_snapshot` colnames: {format_chr_deparse(tbl_names)}",
@@ -200,7 +201,6 @@ tbl_diff2 <- function(earlier_snapshot, later_tbl,
   combined_compactify_away[combined_ukey_is_repeat] <-
     approx_equal0(combined_vals,
       combined_vals,
-      # TODO move inds closer to vals to not be as confusing?
       abs_tol = compactify_abs_tol,
       na_equal = TRUE,
       inds1 = combined_ukey_is_repeat,
@@ -230,17 +230,6 @@ tbl_diff2 <- function(earlier_snapshot, later_tbl,
   combined_tbl
 }
 
-epi_diff2 <- function(earlier_snapshot, later_edf,
-                      later_format = c("snapshot", "update"),
-                      compactify_abs_tol = 0) {
-  ukey_names <- key_colnames(later_edf)
-  dplyr_reconstruct(tbl_diff2(as_tibble(earlier_snapshot), as_tibble(later_edf), ukey_names, later_format, compactify_abs_tol), later_edf)
-}
-
-# XXX vs. tbl_patch_apply?
-
-
-
 #' Apply an update (e.g., from `tbl_diff2`) to a snapshot
 #'
 #' @param snapshot tibble or `NULL`; entire data set as of some version, or
@@ -252,14 +241,12 @@ epi_diff2 <- function(earlier_snapshot, later_edf,
 #'   for `snapshot` and for `update`. Uniqueness is unchecked; if you don't have
 #'   this guaranteed, see [`check_ukey_unique()`].
 #' @return tibble; snapshot of the data set with the update applied.
+#'
+#' @keywords internal
 tbl_patch <- function(snapshot, update, ukey_names) {
   # Most input validation. This is a small function so use faster validation
   # variants:
   if (!is_tibble(update)) {
-    # XXX debating about whether to have a specialized class for updates/diffs.
-    # Seems nice for type-based reasoning and might remove some args from
-    # interfaces, but would require constructor/converter functions for that
-    # type.
     cli_abort("`update` must be a tibble")
   }
   if (is.null(snapshot)) {
@@ -271,6 +258,13 @@ tbl_patch <- function(snapshot, update, ukey_names) {
   if (!is.character(ukey_names) || !all(ukey_names %in% names(snapshot))) {
     cli_abort("`ukey_names` must be a subset of column names")
   }
+  if (!identical(names(snapshot), names(update))) {
+    cli_abort(c("`snapshot` and `update` should have identical column
+                 names and ordering.",
+      "*" = "`snapshot` colnames: {format_chr_deparse(tbl_names)}",
+      "*" = "`update` colnames: {format_chr_deparse(names(update))}"
+    ))
+  }
 
   result_tbl <- vec_rbind(update, snapshot)
 
@@ -278,12 +272,5 @@ tbl_patch <- function(snapshot, update, ukey_names) {
   not_overwritten <- dup_ids == vec_seq_along(result_tbl)
   result_tbl <- result_tbl[not_overwritten, ]
 
-  ## result_tbl <- arrange_canonical(result_tbl)
-
   result_tbl
 }
-
-epi_patch <- function(snapshot, update) {
-  ukey_names <- key_colnames(update)
-  dplyr_reconstruct(tbl_patch(as_tibble(snapshot), as_tibble(update), ukey_names), update)
-}
diff --git a/man/tbl_diff2.Rd b/man/tbl_diff2.Rd
index d9b652258..f4e393b95 100644
--- a/man/tbl_diff2.Rd
+++ b/man/tbl_diff2.Rd
@@ -42,3 +42,4 @@ a tibble in compact "update" (diff) format
 \description{
 Calculate compact patch to move from one snapshot/update to another
 }
+\keyword{internal}
diff --git a/man/tbl_patch.Rd b/man/tbl_patch.Rd
index 2b4e8288b..6d9eee270 100644
--- a/man/tbl_patch.Rd
+++ b/man/tbl_patch.Rd
@@ -24,3 +24,4 @@ tibble; snapshot of the data set with the update applied.
 \description{
 Apply an update (e.g., from \code{tbl_diff2}) to a snapshot
 }
+\keyword{internal}

From 10b0de10a8d1c0d162f06453a5acb4253afe2bcd Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Thu, 6 Mar 2025 10:08:32 -0800
Subject: [PATCH 026/107] approx_equal: make "abs_tol=" mandatory, +validation,
 +docs

---
 NAMESPACE                     |  6 +++
 R/archive.R                   |  6 +--
 R/epiprocess-package.R        |  5 ++
 R/patch.R                     | 98 +++++++++++++++++++++++++++++------
 man/approx_equal.Rd           | 69 +++++++++++++++++++-----
 man/approx_equal0.Rd          |  2 +-
 tests/testthat/test-archive.R |  2 +-
 7 files changed, 154 insertions(+), 34 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 49b5a3acf..66532c7cb 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -58,6 +58,7 @@ S3method(ungroup,epi_df)
 S3method(ungroup,grouped_epi_archive)
 S3method(unnest,epi_df)
 export("%>%")
+export(approx_equal)
 export(arrange)
 export(arrange_canonical)
 export(as_epi_archive)
@@ -126,8 +127,12 @@ importFrom(checkmate,assert_subset)
 importFrom(checkmate,assert_tibble)
 importFrom(checkmate,checkInt)
 importFrom(checkmate,check_atomic)
+importFrom(checkmate,check_character)
 importFrom(checkmate,check_data_frame)
+importFrom(checkmate,check_logical)
 importFrom(checkmate,check_names)
+importFrom(checkmate,check_null)
+importFrom(checkmate,check_numeric)
 importFrom(checkmate,expect_class)
 importFrom(checkmate,test_int)
 importFrom(checkmate,test_set_equal)
@@ -257,6 +262,7 @@ importFrom(tsibble,as_tsibble)
 importFrom(utils,capture.output)
 importFrom(utils,tail)
 importFrom(vctrs,"vec_slice<-")
+importFrom(vctrs,obj_is_vector)
 importFrom(vctrs,vec_cast)
 importFrom(vctrs,vec_cast_common)
 importFrom(vctrs,vec_data)
diff --git a/R/archive.R b/R/archive.R
index a9200a7ff..d27f7126b 100644
--- a/R/archive.R
+++ b/R/archive.R
@@ -493,12 +493,12 @@ update_is_locf <- function(arranged_updates_df, ukey_names, abs_tol) {
       FALSE, # first observation is not LOCF
       approx_equal0(ekts_tbl,
         inds1 = inds1, ekts_tbl, inds2 = inds2,
-        # check ekt cols without tolerance:
-        abs_tol = 0, na_equal = TRUE
+        # check ekt (key) cols with 0 tolerance:
+        na_equal = TRUE, abs_tol = 0
       ) &
         approx_equal0(vals_tbl,
           inds1 = inds1, vals_tbl, inds2 = inds2,
-          abs_tol = abs_tol, na_equal = TRUE
+          na_equal = TRUE, abs_tol = abs_tol
         )
     )
   }
diff --git a/R/epiprocess-package.R b/R/epiprocess-package.R
index 89e785397..f73f67a4d 100644
--- a/R/epiprocess-package.R
+++ b/R/epiprocess-package.R
@@ -12,7 +12,11 @@
 #' @importFrom checkmate assert_subset
 #' @importFrom checkmate assert_tibble
 #' @importFrom checkmate check_atomic check_data_frame expect_class test_int
+#' @importFrom checkmate check_character
+#' @importFrom checkmate check_logical
 #' @importFrom checkmate check_names
+#' @importFrom checkmate check_null
+#' @importFrom checkmate check_numeric
 #' @importFrom checkmate test_subset test_set_equal vname
 #' @importFrom cli cli_abort cli_warn
 #' @importFrom cli pluralize
@@ -36,6 +40,7 @@
 #' @importFrom tibble is_tibble
 #' @importFrom tidyr nest
 #' @importFrom tools toTitleCase
+#' @importFrom vctrs obj_is_vector
 #' @importFrom vctrs vec_cast
 #' @importFrom vctrs vec_cast_common
 #' @importFrom vctrs vec_data
diff --git a/R/patch.R b/R/patch.R
index 32b7dc96a..39f69417f 100644
--- a/R/patch.R
+++ b/R/patch.R
@@ -1,22 +1,88 @@
 #' Test two vctrs vectors for equality with some tolerance in some cases
 #'
-#' @param vec1,vec2 vctrs vectors (includes data frames)
-#' @param abs_tol tolerance; will be used for bare numeric `vec1`, `vec2`, or
-#'   any such columns within `vec1`, `vec2` if they are data frames
+#' Similar to [`vctrs::vec_equal`]. Behavior may differ from `vec_equal` with
+#' non-`NA` `NaN`s involved, or for bare lists that contain named vectors, and
+#' the precise behavior in these cases may change and should not be relied upon.
+#'
+#' @param vec1,vec2 vctrs vectors (includes data frames). Take care when using
+#'   on named vectors or "keyed" data frames; [`vec_names()`] are largely
+#'   ignored, and key columns are treated as normal value columns (when they
+#'   should probably generate an error if they are not lined up correctly, or be
+#'   tested for exact rather than approximate equality).
 #' @param na_equal should `NA`s be considered equal to each other? (In
 #'   epiprocess, we usually want this to be `TRUE`, but that doesn't match the
 #'   [`vctrs::vec_equal()`] default, so this is mandatory.)
-#' @param .ptype as in [`vctrs::vec_equal()`]
-#' @param inds1,inds2 optional (row) indices into vec1 and vec2; output should
-#'   be consistent with `vec_slice`-ing to these indices beforehand, but can
-#'   give faster computation if `vec1` and `vec2` are data frames.
+#' @param abs_tol absolute tolerance; will be used for bare numeric `vec1`,
+#'   `vec2`, or any such columns within `vec1`, `vec2` if they are data frames.
+#' @param .ptype as in [`vctrs::vec_equal()`].
+#' @param inds1,inds2 optional (row) indices into vec1 and vec2 compatible with
+#'   [`vctrs::vec_slice()`]; output should be consistent with `vec_slice`-ing to
+#'   these indices beforehand, but can give faster computation if `vec1` and
+#'   `vec2` are data frames.
 #'
 #' @return logical vector, with length matching the result of recycling `vec1`
 #'   (at `inds1` if provided) and `vec2` (at `inds2` if provided); entries
-#'   should all be `TRUE` or `FALSE` if `na_equal = TRUE`. Behavior may differ
-#'   from `vec_equal` with non-`NA` `NaN`s involved, or for bare lists that
-#'   contain named vectors.
-approx_equal <- function(vec1, vec2, abs_tol, na_equal, .ptype = NULL, inds1 = NULL, inds2 = NULL) {
+#'   should all be `TRUE` or `FALSE` if `na_equal = TRUE`.
+#'
+#' @examples
+#'
+#' # On numeric vectors:
+#' approx_equal(
+#'   c(1, 2, 3, NA),
+#'   c(1, 2 + 1e-10, NA, NA),
+#'   na_equal = TRUE,
+#'   abs_tol = 1e-8
+#' )
+#'
+#' # On tibbles:
+#' tbl1 <- tibble(
+#'   a = 1:5,
+#'   b = list(1:5, 1:4, 1:3, 1:2, 1:1) %>% lapply(as.numeric),
+#'   c = tibble(
+#'     c1 = 1:5
+#'   ),
+#'   d = matrix(1:10, 5, 2)
+#' )
+#' tbl2 <- tbl1
+#' tbl2$a[[2]] <- tbl1$a[[2]] + 1e-10
+#' tbl2$b[[3]][[1]] <- tbl1$b[[3]][[1]] + 1e-10
+#' tbl2$c$c1[[4]] <- tbl1$c$c1[[4]] + 1e-10
+#' tbl2$d[[5, 2]] <- tbl1$d[[5, 2]] + 1e-10
+#' vctrs::vec_equal(tbl1, tbl2, na_equal = TRUE)
+#' approx_equal(tbl1, tbl2, na_equal = TRUE, abs_tol = 1e-12)
+#' approx_equal(tbl1, tbl2, na_equal = TRUE, abs_tol = 1e-8)
+#'
+#'
+#'
+#'
+#'
+#' # Type comparison within lists is stricter, matching vctrs:
+#' vctrs::vec_equal(list(1:2), list(as.numeric(1:2)))
+#' approx_equal(list(1:2), list(as.numeric(1:2)), FALSE, abs_tol = 0)
+#'
+#' @export
+approx_equal <- function(vec1, vec2, na_equal, .ptype = NULL, ..., abs_tol, inds1 = NULL, inds2 = NULL) {
+  if (!obj_is_vector(vec1)) cli_abort("`vec1` must be recognized by vctrs as a vector")
+  if (!obj_is_vector(vec2)) cli_abort("`vec2` must be recognized by vctrs as a vector")
+  # Leave vec size checking to vctrs recycling ops.
+  assert_logical(na_equal, any.missing = FALSE, len = 1L)
+  # Leave .ptype checks to cast operation.
+  check_dots_empty()
+  assert_numeric(abs_tol, lower = 0, len = 1L)
+  assert(
+    check_null(inds1),
+    check_numeric(inds1),
+    check_logical(inds1),
+    check_character(inds1)
+  )
+  assert(
+    check_null(inds2),
+    check_numeric(inds2),
+    check_logical(inds2),
+    check_character(inds2)
+  )
+  # Leave heavier index validation to the vctrs recycling & indexing ops.
+
   # Recycle inds if provided; vecs if not:
   common_size <- vec_size_common(
     if (is.null(inds1)) vec1 else inds1,
@@ -33,13 +99,13 @@ approx_equal <- function(vec1, vec2, abs_tol, na_equal, .ptype = NULL, inds1 = N
     inds2 <- vec_recycle(inds2, common_size)
   }
   vecs <- vec_cast_common(vec1, vec2, .to = .ptype)
-  approx_equal0(vecs[[1]], vecs[[2]], abs_tol, na_equal, inds1, inds2)
+  approx_equal0(vecs[[1]], vecs[[2]], na_equal, abs_tol, inds1, inds2)
 }
 
 #' Helper for [`approx_equal`] for vecs guaranteed to have the same ptype and size
 #'
 #' @keywords internal
-approx_equal0 <- function(vec1, vec2, abs_tol, na_equal, inds1 = NULL, inds2 = NULL) {
+approx_equal0 <- function(vec1, vec2, na_equal, abs_tol, inds1 = NULL, inds2 = NULL) {
   if (is_bare_numeric(vec1) && abs_tol != 0) {
     # perf: since we're working with bare numerics and logicals: we can use `[`
     # and `fifelse`. Matching vec_equal, we ignore names and other attributes.
@@ -66,7 +132,7 @@ approx_equal0 <- function(vec1, vec2, abs_tol, na_equal, inds1 = NULL, inds2 = N
       rep(TRUE, nrow(vec1))
     } else {
       Reduce(`&`, lapply(seq_len(ncol(vec1)), function(col_i) {
-        approx_equal0(vec1[[col_i]], vec2[[col_i]], abs_tol, na_equal, inds1, inds2)
+        approx_equal0(vec1[[col_i]], vec2[[col_i]], na_equal, abs_tol, inds1, inds2)
       }))
     }
   } else if (is_bare_list(vec1)) {
@@ -78,7 +144,7 @@ approx_equal0 <- function(vec1, vec2, abs_tol, na_equal, inds1 = NULL, inds2 = N
         # consistently inconsistent, we avoid dispatching to vec_equal for bare
         # lists even with abs_tol = 0:
         identical(vec_ptype(entry1), vec_ptype(entry2)) &&
-        all(approx_equal0(entry1, entry2, abs_tol, na_equal))
+        all(approx_equal0(entry1, entry2, na_equal, abs_tol))
     }, logical(1L))
   } else {
     # XXX No special handling for any other types/situations. Makes sense for
@@ -201,8 +267,8 @@ tbl_diff2 <- function(earlier_snapshot, later_tbl,
   combined_compactify_away[combined_ukey_is_repeat] <-
     approx_equal0(combined_vals,
       combined_vals,
-      abs_tol = compactify_abs_tol,
       na_equal = TRUE,
+      abs_tol = compactify_abs_tol,
       inds1 = combined_ukey_is_repeat,
       inds2 = ukey_repeat_first_i
     )
diff --git a/man/approx_equal.Rd b/man/approx_equal.Rd
index 4eac410f0..237b33be4 100644
--- a/man/approx_equal.Rd
+++ b/man/approx_equal.Rd
@@ -7,36 +7,79 @@
 approx_equal(
   vec1,
   vec2,
-  abs_tol,
   na_equal,
   .ptype = NULL,
+  ...,
+  abs_tol,
   inds1 = NULL,
   inds2 = NULL
 )
 }
 \arguments{
-\item{vec1, vec2}{vctrs vectors (includes data frames)}
-
-\item{abs_tol}{tolerance; will be used for bare numeric \code{vec1}, \code{vec2}, or
-any such columns within \code{vec1}, \code{vec2} if they are data frames}
+\item{vec1, vec2}{vctrs vectors (includes data frames). Take care when using
+on named vectors or "keyed" data frames; \code{\link[=vec_names]{vec_names()}} are largely
+ignored, and key columns are treated as normal value columns (when they
+should probably generate an error if they are not lined up correctly, or be
+tested for exact rather than approximate equality).}
 
 \item{na_equal}{should \code{NA}s be considered equal to each other? (In
 epiprocess, we usually want this to be \code{TRUE}, but that doesn't match the
 \code{\link[vctrs:vec_equal]{vctrs::vec_equal()}} default, so this is mandatory.)}
 
-\item{.ptype}{as in \code{\link[vctrs:vec_equal]{vctrs::vec_equal()}}}
+\item{.ptype}{as in \code{\link[vctrs:vec_equal]{vctrs::vec_equal()}}.}
+
+\item{abs_tol}{absolute tolerance; will be used for bare numeric \code{vec1},
+\code{vec2}, or any such columns within \code{vec1}, \code{vec2} if they are data frames.}
 
-\item{inds1, inds2}{optional (row) indices into vec1 and vec2; output should
-be consistent with \code{vec_slice}-ing to these indices beforehand, but can
-give faster computation if \code{vec1} and \code{vec2} are data frames.}
+\item{inds1, inds2}{optional (row) indices into vec1 and vec2 compatible with
+\code{\link[vctrs:vec_slice]{vctrs::vec_slice()}}; output should be consistent with \code{vec_slice}-ing to
+these indices beforehand, but can give faster computation if \code{vec1} and
+\code{vec2} are data frames.}
 }
 \value{
 logical vector, with length matching the result of recycling \code{vec1}
 (at \code{inds1} if provided) and \code{vec2} (at \code{inds2} if provided); entries
-should all be \code{TRUE} or \code{FALSE} if \code{na_equal = TRUE}. Behavior may differ
-from \code{vec_equal} with non-\code{NA} \code{NaN}s involved, or for bare lists that
-contain named vectors.
+should all be \code{TRUE} or \code{FALSE} if \code{na_equal = TRUE}.
 }
 \description{
-Test two vctrs vectors for equality with some tolerance in some cases
+Similar to \code{\link[vctrs:vec_equal]{vctrs::vec_equal}}. Behavior may differ from \code{vec_equal} with
+non-\code{NA} \code{NaN}s involved, or for bare lists that contain named vectors, and
+the precise behavior in these cases may change and should not be relied upon.
+}
+\examples{
+
+# On numeric vectors:
+approx_equal(
+  c(1, 2, 3, NA),
+  c(1, 2 + 1e-10, NA, NA),
+  na_equal = TRUE,
+  abs_tol = 1e-8
+)
+
+# On tibbles:
+tbl1 <- tibble(
+  a = 1:5,
+  b = list(1:5, 1:4, 1:3, 1:2, 1:1) \%>\% lapply(as.numeric),
+  c = tibble(
+    c1 = 1:5
+  ),
+  d = matrix(1:10, 5, 2)
+)
+tbl2 <- tbl1
+tbl2$a[[2]] <- tbl1$a[[2]] + 1e-10
+tbl2$b[[3]][[1]] <- tbl1$b[[3]][[1]] + 1e-10
+tbl2$c$c1[[4]] <- tbl1$c$c1[[4]] + 1e-10
+tbl2$d[[5, 2]] <- tbl1$d[[5, 2]] + 1e-10
+vctrs::vec_equal(tbl1, tbl2, na_equal = TRUE)
+approx_equal(tbl1, tbl2, na_equal = TRUE, abs_tol = 1e-12)
+approx_equal(tbl1, tbl2, na_equal = TRUE, abs_tol = 1e-8)
+
+
+
+
+
+# Type comparison within lists is stricter, matching vctrs:
+vctrs::vec_equal(list(1:2), list(as.numeric(1:2)))
+approx_equal(list(1:2), list(as.numeric(1:2)), FALSE, abs_tol = 0)
+
 }
diff --git a/man/approx_equal0.Rd b/man/approx_equal0.Rd
index f7a69f1fe..9726f5013 100644
--- a/man/approx_equal0.Rd
+++ b/man/approx_equal0.Rd
@@ -4,7 +4,7 @@
 \alias{approx_equal0}
 \title{Helper for \code{\link{approx_equal}} for vecs guaranteed to have the same ptype and size}
 \usage{
-approx_equal0(vec1, vec2, abs_tol, na_equal, inds1 = NULL, inds2 = NULL)
+approx_equal0(vec1, vec2, na_equal, abs_tol, inds1 = NULL, inds2 = NULL)
 }
 \description{
 Helper for \code{\link{approx_equal}} for vecs guaranteed to have the same ptype and size
diff --git a/tests/testthat/test-archive.R b/tests/testthat/test-archive.R
index 12d9ed413..4d1be4c52 100644
--- a/tests/testthat/test-archive.R
+++ b/tests/testthat/test-archive.R
@@ -220,5 +220,5 @@ test_that("`epi_archive` rejects dataframes where time_value and version columns
 test_that("is_locf replacement works as expected", {
   vec <- c(1, 1, 1e-10, 1.1e-10, NA, NA, NaN, NaN)
   is_repeated <- c(0, 1, 0, 1, 0, 1, 1, 1)
-  expect_equal(c(FALSE, approx_equal(head(vec, -1L), tail(vec, -1L), .Machine$double.eps^0.5, na_equal = TRUE)), as.logical(is_repeated))
+  expect_equal(c(FALSE, approx_equal(head(vec, -1L), tail(vec, -1L), na_equal = TRUE, abs_tol = .Machine$double.eps^0.5)), as.logical(is_repeated))
 })

From 2b7d786d9874b64f9e77959970bab71c1a2ed57a Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Thu, 6 Mar 2025 10:28:46 -0800
Subject: [PATCH 027/107] Expand epi_slide_opt_archive_one_epikey example

---
 R/epi_slide_opt_archive.R | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/R/epi_slide_opt_archive.R b/R/epi_slide_opt_archive.R
index bf90313c2..1a6faad26 100644
--- a/R/epi_slide_opt_archive.R
+++ b/R/epi_slide_opt_archive.R
@@ -24,24 +24,20 @@
 #'
 #' library(dplyr)
 #' updates <- bind_rows(
-#'   tibble(
-#'     version = 40, time_value = 1:10, value = 1:10
-#'   ),
-#'   tibble(
-#'     version = 12, time_value = 2:3, value = 3:2
-#'   ),
-#'   tibble(
-#'     version = 13, time_value = 6, value = 7,
-#'   ),
-#'   tibble(
-#'     version = 13, time_value = 7, value = NA,
-#'   )
+#'   tibble(version = 10, time_value = 1:20, value = 1:20),
+#'   tibble(version = 12, time_value = 4:5, value = 5:4),
+#'   tibble(version = 13, time_value = 8, value = 9),
+#'   tibble(version = 14, time_value = 11, value = NA),
+#'   tibble(version = 15, time_value = -10, value = -10),
+#'   tibble(version = 16, time_value = 50, value = 50)
 #' ) %>%
 #'   mutate(across(c(version, time_value), ~ as.Date("2020-01-01") - 1 + .x)) %>%
 #'   tidyr::nest(.by = version, .key = "subtbl")
 #'
+#' f <- purrr::partial(data.table::frollmean, algo = "exact")
+#'
 #' updates %>%
-#'   epi_slide_opt_one_epikey("value", data.table::frollmean, "data.table", 1L, 0L, "day", "slide_value")
+#'   epi_slide_opt_archive_one_epikey("value", f, "data.table", 2L, 0L, "day", "slide_value")
 #'
 #' @keywords internal
 epi_slide_opt_archive_one_epikey <- function(updates, in_colnames, f_dots_baked, f_from_package, before, after, time_type, out_colnames) {

From a7bdd0d3f9fca77f07f28b169115bd36f7a4f51a Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Thu, 6 Mar 2025 11:22:16 -0800
Subject: [PATCH 028/107] WIP epi_slide_opt.epi_archive tests

---
 tests/testthat/test-epi_slide_opt_archive.R | 74 +++++++++++++++++++++
 1 file changed, 74 insertions(+)
 create mode 100644 tests/testthat/test-epi_slide_opt_archive.R

diff --git a/tests/testthat/test-epi_slide_opt_archive.R b/tests/testthat/test-epi_slide_opt_archive.R
new file mode 100644
index 000000000..0f9f0b82b
--- /dev/null
+++ b/tests/testthat/test-epi_slide_opt_archive.R
@@ -0,0 +1,74 @@
+library(dplyr)
+
+test_that("epi_slide_opt_archive_one_epikey works as expected", {
+  start_date <- as.Date("2020-01-01")
+
+  updates <- bind_rows(
+    tibble(version = 10, time_value = 0:20, value = 0:20),
+    tibble(version = 12, time_value = 4:5, value = 5:4),
+    tibble(version = 13, time_value = 8, value = 9),
+    tibble(version = 14, time_value = 11, value = NA),
+    tibble(version = 15, time_value = -10, value = -10),
+    tibble(version = 16, time_value = 50, value = 50)
+  ) %>%
+    mutate(across(c(version, time_value), ~ start_date - 1 + .x)) %>%
+    tidyr::nest(.by = version, .key = "subtbl")
+
+  expected <- list(
+    vctrs::vec_cbind(
+      tibble(version = 10),
+      updates$subtbl[[1L]] %>%
+        mutate(time_value = as.numeric(time_value - start_date) + 1) %>%
+        mutate(slide_value = frollmean(value, 3, algo = "exact"))
+    ),
+    tibble(
+      version = 12,
+      time_value = c(4, 5, 7), # time 6 unchanged, compactified away
+      # time 7 `value` unchanged, but here because `slide_value` changed:
+      value = c(5, 4, 7),
+      slide_value = c(
+        mean(c(2, 3, 5)),
+        # time 5 `slide_value` unchanged, but here because `value` changed:
+        mean(c(3, 5, 4)),
+        mean(c(4, 6, 7))
+      )
+    ),
+    tibble(
+      version = 13, time_value = 8:10, value = c(9, 9, 10),
+      slide_value = frollmean(c(6, 7, 9, 9, 10),  3,  algo = "exact")[-(1:2)]
+    ),
+    tibble(
+      version = 14, time_value = 11:13, value = c(NA, 12, 13), slide_value = rep(NA_real_, 3L)
+    ),
+    tibble(
+      version = 15, time_value = -10, value = -10, slide_value = NA_real_
+    ),
+    tibble(
+      version = 16, time_value = 50, value = 50, slide_value = NA_real_
+    )
+  ) %>%
+    lapply(function(x) {
+      x %>%
+        mutate(across(c(version, time_value), ~ start_date - 1 + .x))
+    })
+
+  f <- purrr::partial(data.table::frollmean, algo = "exact")
+
+  result <- updates %>%
+    epi_slide_opt_archive_one_epikey("value", f, "data.table", 2L, 0L, "day", "slide_value")
+
+  expect_equal(
+    result %>% lapply(function(x) {
+      x %>%
+        arrange(time_value) %>%
+        select(version, time_value, everything())
+    })
+   ,
+    expected
+  )
+
+  # TODO check about version nesting ordering
+
+})
+
+# TODO tests on example data sets

From eb5b020f46e46874164ea1affbf6a8e1cfbc280b Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Fri, 7 Mar 2025 09:59:43 -0800
Subject: [PATCH 029/107] More WIP on tests

---
 tests/testthat/test-epi_slide_opt_archive.R | 78 ++++++++++++++++++---
 1 file changed, 69 insertions(+), 9 deletions(-)

diff --git a/tests/testthat/test-epi_slide_opt_archive.R b/tests/testthat/test-epi_slide_opt_archive.R
index 0f9f0b82b..3cea0891b 100644
--- a/tests/testthat/test-epi_slide_opt_archive.R
+++ b/tests/testthat/test-epi_slide_opt_archive.R
@@ -55,20 +55,80 @@ test_that("epi_slide_opt_archive_one_epikey works as expected", {
   f <- purrr::partial(data.table::frollmean, algo = "exact")
 
   result <- updates %>%
-    epi_slide_opt_archive_one_epikey("value", f, "data.table", 2L, 0L, "day", "slide_value")
-
-  expect_equal(
-    result %>% lapply(function(x) {
+    epi_slide_opt_archive_one_epikey("value", f, "data.table", 2L, 0L, "day", "slide_value") %>%
+    lapply(function(x) {
       x %>%
         arrange(time_value) %>%
         select(version, time_value, everything())
     })
-   ,
-    expected
-  )
 
-  # TODO check about version nesting ordering
+  expect_equal(result, expected)
+})
 
+
+test_that("epi_slide_opt.epi_archive is not confused by unique(DT$version) unsorted", {
+  start_date <- as.Date("2020-01-01")
+  tibble(
+    geo_value = 1,
+    time_value = start_date - 1 + 1:4,
+    version = start_date - 1 + c(5, 5, 4, 4),
+    value = c(1, 2, 3, 4)
+  ) %>%
+    as_epi_archive() %>%
+    epi_slide_opt(value, frollmean, .window_size = 2L) %>%
+    expect_equal(
+      tibble(
+        geo_value = 1,
+        time_value = start_date - 1 + c(1, 2, 3, 3, 4),
+        version = start_date - 1 + c(5, 5, 4, 5, 4),
+        value = c(1, 2, 3, 3, 4),
+        value_2dav = c(NA, 1.5, NA, 2.5, 3.5)
+      ) %>%
+        as_epi_archive()
+    )
 })
 
-# TODO tests on example data sets
+test_that("epi_slide_opt.epi_archive is not confused by unique(DT$time_value) unsorted", {
+
+  start_date <- as.Date("2020-01-01")
+  tibble(
+    geo_value = c(1, 1, 2, 2),
+    time_value = start_date - 1 + c(2, 3, 1, 2),
+    version = start_date - 1 + c(1, 2, 2, 2),
+    value = c(1, 2, 3, 4)
+  ) %>%
+    as_epi_archive() %>%
+    epi_slide_opt(value, frollmean, .window_size = 2L) %>%
+    expect_equal(
+      tibble(
+        geo_value = c(1, 1, 2, 2),
+        time_value = start_date - 1 + c(2, 3, 1, 2),
+        version = start_date - 1 + c(1, 2, 2, 2),
+        value = c(1, 2, 3, 4),
+        value_2dav = c(NA, 1.5, NA, 3.5)
+      ) %>%
+        as_epi_archive()
+    )
+
+})
+
+test_that("epi_slide_opt.epi_archive is equivalent to epix_slide reconversion on example data", {
+
+  case_death_rate_archive %>%
+    epi_slide_opt(case_rate, frollmean, .window_size = 7
+                  # , algo = "exact"
+                  ) %>%
+    .$DT %>%
+    as.data.frame() %>%
+    as_tibble() %>%
+    filter(!approx_equal(case_rate_7dav, case_rate_7d_av, 1e-6, TRUE)) %>%
+    dplyr::transmute(version, geo_value, time_value, case_rate_7dav, case_rate_7d_av,
+                     abs_diff = abs(case_rate_7dav - case_rate_7d_av)) %>%
+    {}
+
+    # TODO finish tests on example data sets
+
+  })
+
+
+# TODO grouped behavior checks

From c674e973eb11615e7a928e2678e4c3ad2962f1de Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Fri, 7 Mar 2025 13:46:00 -0800
Subject: [PATCH 030/107] Mark renaming TODO on approx_equal

---
 R/patch.R | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/R/patch.R b/R/patch.R
index 39f69417f..bf42e9250 100644
--- a/R/patch.R
+++ b/R/patch.R
@@ -340,3 +340,5 @@ tbl_patch <- function(snapshot, update, ukey_names) {
 
   result_tbl
 }
+
+# TODO rename approx_equal to vec_approx_equal

From 3072b3343ab721a0fbf059ed6e7bc26752245f08 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Mon, 10 Mar 2025 03:14:12 -0700
Subject: [PATCH 031/107] fix!: as_epi_archive(tibble) key setting; + distrust
 key if data.table

---
 NAMESPACE              |  2 ++
 NEWS.md                |  9 +++++++
 R/archive.R            | 57 +++++++++++++++++++++---------------------
 R/epiprocess-package.R |  2 ++
 4 files changed, 42 insertions(+), 28 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 66532c7cb..7ec1bf1e9 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -125,6 +125,7 @@ importFrom(checkmate,assert_scalar)
 importFrom(checkmate,assert_string)
 importFrom(checkmate,assert_subset)
 importFrom(checkmate,assert_tibble)
+importFrom(checkmate,assert_true)
 importFrom(checkmate,checkInt)
 importFrom(checkmate,check_atomic)
 importFrom(checkmate,check_character)
@@ -161,6 +162,7 @@ importFrom(data.table,key)
 importFrom(data.table,rbindlist)
 importFrom(data.table,set)
 importFrom(data.table,setDF)
+importFrom(data.table,setDT)
 importFrom(data.table,setcolorder)
 importFrom(data.table,setkeyv)
 importFrom(dplyr,"%>%")
diff --git a/NEWS.md b/NEWS.md
index 39274ff4a..52330c483 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -7,6 +7,15 @@ Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.x.y will indicat
 ## New features
 
 - `is_epi_archive` function has been reintroduced.
+## Breaking changes
+
+- `new_epi_archive()`'s `x` argument has been replaced with a `data_table`
+  argument, which must be a `data.table` with the key already set appropriately.
+
+## Bug fixes
+
+- `as_epi_archive()` no longer has issues setting its `DT`'s `key` on some
+  versions of `{data.table}` when `x` is a tibble.
 
 # epiprocess 0.11
 
diff --git a/R/archive.R b/R/archive.R
index d27f7126b..a5bc02a27 100644
--- a/R/archive.R
+++ b/R/archive.R
@@ -278,41 +278,22 @@ next_after.Date <- function(x) x + 1L
 #' @order 3
 #' @export
 new_epi_archive <- function(
-    x,
+    data_table,
     geo_type,
     time_type,
     other_keys,
     clobberable_versions_start,
     versions_end) {
-  assert_data_frame(x)
+  assert_class(data_table, "data.table")
   assert_string(geo_type)
   assert_string(time_type)
   assert_character(other_keys, any.missing = FALSE)
   if (any(c("geo_value", "time_value", "version") %in% other_keys)) {
     cli_abort("`other_keys` cannot contain \"geo_value\", \"time_value\", or \"version\".")
   }
-  validate_version_bound(clobberable_versions_start, x, na_ok = TRUE)
-  validate_version_bound(versions_end, x, na_ok = FALSE)
-
-  key_vars <- c("geo_value", "time_value", other_keys, "version")
-  if (!all(key_vars %in% names(x))) {
-    # Give a more tailored error message than as.data.table would:
-    cli_abort(c(
-      "`x` is missing the following expected columns:
-       {format_varnames(setdiff(key_vars, names(x)))}.",
-      ">" = "You might need to `dplyr::rename()` beforehand
-                       or use `as_epi_archive()`'s renaming feature.",
-      ">" = if (!all(other_keys %in% names(x))) {
-        "Check also for typos in `other_keys`."
-      }
-    ))
-  }
-
-  # Create the data table; if x was an un-keyed data.table itself,
-  # then the call to as.data.table() will fail to set keys, so we
-  # need to check this, then do it manually if needed
-  data_table <- as.data.table(x, key = key_vars)
-  if (!identical(key_vars, key(data_table))) setkeyv(data_table, cols = key_vars)
+  assert_true(identical(key(data_table), c("geo_value", "time_value", other_keys, "version")))
+  validate_version_bound(clobberable_versions_start, data_table, na_ok = TRUE)
+  validate_version_bound(versions_end, data_table, na_ok = FALSE)
 
   structure(
     list(
@@ -529,11 +510,32 @@ as_epi_archive <- function(
     .versions_end = max_version_with_row_in(x), ...,
     versions_end = .versions_end) {
   assert_data_frame(x)
+  # Convert first to data.frame to guard against data.table#6859 and potentially
+  # other things epiprocess#618:
+  x_already_copied <- identical(class(x), c("data.table", "data.frame"))
+  x <- as.data.frame(x)
   x <- rename(x, ...)
-  x <- guess_column_name(x, "time_value", time_column_names())
   x <- guess_column_name(x, "geo_value", geo_column_names())
+  if (!all(other_keys %in% names(x))) {
+    # Give a more tailored error message than as.data.table would:
+    cli_abort(c(
+      "`x` is missing the following expected columns:
+       {format_varnames(setdiff(other_keys, names(x)))}.",
+      ">" = "You might need to `dplyr::rename()` beforehand
+             or using `as_epi_archive()`'s renaming feature."
+    ))
+  }
+  x <- guess_column_name(x, "time_value", time_column_names())
   x <- guess_column_name(x, "version", version_column_names())
 
+  # Convert to data.table:
+  key_vars <- c("geo_value", "time_value", other_keys, "version")
+  if (x_already_copied) {
+    setDT(x, key = key_vars)
+  } else {
+    x <- as.data.table(x, key = key_vars)
+  }
+
   if (lifecycle::is_present(geo_type)) {
     cli_warn("epi_archive constructor argument `geo_type` is now ignored. Consider removing.")
   }
@@ -554,11 +556,10 @@ as_epi_archive <- function(
     cli_abort('`compactify` must be `TRUE`, `FALSE`, or `"message"`')
   }
 
-  data_table <- result$DT
-  key_vars <- key(data_table)
+  data_table <- result$DT # probably just `x`, but take no chances
 
   nrow_before_compactify <- nrow(data_table)
-  # Runs compactify on data frame
+  # Runs compactify on data_table
   if (identical(compactify, TRUE) || identical(compactify, "message")) {
     compactified <- apply_compactify(data_table, key_vars, compactify_abs_tol)
   } else {
diff --git a/R/epiprocess-package.R b/R/epiprocess-package.R
index f73f67a4d..195bc0258 100644
--- a/R/epiprocess-package.R
+++ b/R/epiprocess-package.R
@@ -11,6 +11,7 @@
 #' @importFrom checkmate assert_string
 #' @importFrom checkmate assert_subset
 #' @importFrom checkmate assert_tibble
+#' @importFrom checkmate assert_true
 #' @importFrom checkmate check_atomic check_data_frame expect_class test_int
 #' @importFrom checkmate check_character
 #' @importFrom checkmate check_logical
@@ -25,6 +26,7 @@
 #' @importFrom data.table fifelse
 #' @importFrom data.table key
 #' @importFrom data.table setcolorder
+#' @importFrom data.table setDT
 #' @importFrom data.table setkeyv
 #' @importFrom dplyr arrange
 #' @importFrom dplyr grouped_df

From 279c842f67e3d2d9ccdf61554176c69b068bce53 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Mon, 10 Mar 2025 03:28:52 -0700
Subject: [PATCH 032/107] Make epi_archive key order geo !!!other time version

---
 NEWS.md                       |  2 ++
 R/archive.R                   |  6 +++---
 tests/testthat/test-archive.R | 12 ++++++------
 3 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 52330c483..1218da12f 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -11,6 +11,8 @@ Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.x.y will indicat
 
 - `new_epi_archive()`'s `x` argument has been replaced with a `data_table`
   argument, which must be a `data.table` with the key already set appropriately.
+  The `key()` of its `DT` will also now place `other_keys` before rather than after
+  `"time_value"`.
 
 ## Bug fixes
 
diff --git a/R/archive.R b/R/archive.R
index a5bc02a27..2e6ae5497 100644
--- a/R/archive.R
+++ b/R/archive.R
@@ -291,7 +291,7 @@ new_epi_archive <- function(
   if (any(c("geo_value", "time_value", "version") %in% other_keys)) {
     cli_abort("`other_keys` cannot contain \"geo_value\", \"time_value\", or \"version\".")
   }
-  assert_true(identical(key(data_table), c("geo_value", "time_value", other_keys, "version")))
+  assert_true(identical(key(data_table), c("geo_value", other_keys, "time_value", "version")))
   validate_version_bound(clobberable_versions_start, data_table, na_ok = TRUE)
   validate_version_bound(versions_end, data_table, na_ok = FALSE)
 
@@ -319,7 +319,7 @@ new_epi_archive <- function(
 validate_epi_archive <- function(x) {
   assert_class(x, "epi_archive")
 
-  ukey_vars1 <- c("geo_value", "time_value", x$other_keys, "version")
+  ukey_vars1 <- c("geo_value", x$other_keys, "time_value", "version")
   ukey_vars2 <- key(x$DT)
   if (!identical(ukey_vars1, ukey_vars2)) {
     cli_abort(c("`data.table::key(x$DT)` not as expected",
@@ -529,7 +529,7 @@ as_epi_archive <- function(
   x <- guess_column_name(x, "version", version_column_names())
 
   # Convert to data.table:
-  key_vars <- c("geo_value", "time_value", other_keys, "version")
+  key_vars <- c("geo_value", other_keys, "time_value", "version")
   if (x_already_copied) {
     setDT(x, key = key_vars)
   } else {
diff --git a/tests/testthat/test-archive.R b/tests/testthat/test-archive.R
index 4d1be4c52..051503e05 100644
--- a/tests/testthat/test-archive.R
+++ b/tests/testthat/test-archive.R
@@ -92,7 +92,7 @@ test_that("epi_archives are correctly instantiated with a variety of data types"
   expect_equal(key(ea1$DT), c("geo_value", "time_value", "version"))
 
   ea2 <- as_epi_archive(df, other_keys = "value", compactify = FALSE)
-  expect_equal(key(ea2$DT), c("geo_value", "time_value", "value", "version"))
+  expect_equal(key(ea2$DT), c("geo_value", "value", "time_value", "version"))
 
   # Tibble
   tib <- tibble::tibble(df, code = "x")
@@ -101,7 +101,7 @@ test_that("epi_archives are correctly instantiated with a variety of data types"
   expect_equal(key(ea3$DT), c("geo_value", "time_value", "version"))
 
   ea4 <- as_epi_archive(tib, other_keys = "code", compactify = FALSE)
-  expect_equal(key(ea4$DT), c("geo_value", "time_value", "code", "version"))
+  expect_equal(key(ea4$DT), c("geo_value", "code", "time_value", "version"))
 
   # Keyed data.table
   kdt <- data.table::data.table(
@@ -119,7 +119,7 @@ test_that("epi_archives are correctly instantiated with a variety of data types"
 
   ea6 <- as_epi_archive(kdt, other_keys = "value", compactify = FALSE)
   # Mismatched keys, but the one from as_epi_archive overrides
-  expect_equal(key(ea6$DT), c("geo_value", "time_value", "value", "version"))
+  expect_equal(key(ea6$DT), c("geo_value", "value", "time_value", "version"))
 
   # Unkeyed data.table
   udt <- data.table::data.table(
@@ -134,7 +134,7 @@ test_that("epi_archives are correctly instantiated with a variety of data types"
   expect_equal(key(ea7$DT), c("geo_value", "time_value", "version"))
 
   ea8 <- as_epi_archive(udt, other_keys = "code", compactify = FALSE)
-  expect_equal(key(ea8$DT), c("geo_value", "time_value", "code", "version"))
+  expect_equal(key(ea8$DT), c("geo_value", "code", "time_value", "version"))
 
   # epi_df
   edf1 <- cases_deaths_subset %>%
@@ -145,7 +145,7 @@ test_that("epi_archives are correctly instantiated with a variety of data types"
   expect_equal(key(ea9$DT), c("geo_value", "time_value", "version"))
 
   ea10 <- as_epi_archive(edf1, other_keys = "code", compactify = FALSE)
-  expect_equal(key(ea10$DT), c("geo_value", "time_value", "code", "version"))
+  expect_equal(key(ea10$DT), c("geo_value", "code", "time_value", "version"))
 
   # Keyed epi_df
   edf2 <- data.frame(
@@ -164,7 +164,7 @@ test_that("epi_archives are correctly instantiated with a variety of data types"
   expect_equal(key(ea11$DT), c("geo_value", "time_value", "version"))
 
   ea12 <- as_epi_archive(edf2, other_keys = "misc", compactify = FALSE)
-  expect_equal(key(ea12$DT), c("geo_value", "time_value", "misc", "version"))
+  expect_equal(key(ea12$DT), c("geo_value", "misc", "time_value", "version"))
 })
 
 test_that("`epi_archive` rejects nonunique keys", {

From c7704af0e2bab2f24b6ea49aa9e3aa76fc50b6c0 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Mon, 10 Mar 2025 03:34:01 -0700
Subject: [PATCH 033/107] docs(new_epi_archive): roxygen2 for new param
 requirements

---
 R/archive.R                             |  6 +++--
 man/epi_archive.Rd                      | 36 +++++++++++++++++++++++--
 man/epi_slide_opt_archive_one_epikey.Rd | 22 +++++++--------
 3 files changed, 47 insertions(+), 17 deletions(-)

diff --git a/R/archive.R b/R/archive.R
index 2e6ae5497..3935709ac 100644
--- a/R/archive.R
+++ b/R/archive.R
@@ -186,8 +186,8 @@ next_after.Date <- function(x) x + 1L
 #'  archive. Unexpected behavior may result from modifying the metadata
 #'  directly.
 #'
-#' @param x A data.frame, data.table, or tibble, with columns `geo_value`,
-#'   `time_value`, `version`, and then any additional number of columns.
+#' @param data_table a data.table with [`data.table::key()`] equal to
+#'   `c("geo_value", other_keys, "time_value", "version")`.
 #' @param geo_type DEPRECATED Has no effect. Geo value type is inferred from the
 #' location column and set to "custom" if not recognized.
 #' @param time_type DEPRECATED Has no effect. Time value type inferred from the time
@@ -488,6 +488,8 @@ update_is_locf <- function(arranged_updates_df, ukey_names, abs_tol) {
 #' `as_epi_archive` converts a data frame, data table, or tibble into an
 #' `epi_archive` object.
 #'
+#' @param x A data.frame, data.table, or tibble, with columns `geo_value`,
+#'   `time_value`, `version`, and then any additional number of columns.
 #' @param ... used for specifying column names, as in [`dplyr::rename`]. For
 #'   example `version = release_date`
 #' @param .versions_end location based versions_end, used to avoid prefix
diff --git a/man/epi_archive.Rd b/man/epi_archive.Rd
index b92cd5057..fd6e8b39e 100644
--- a/man/epi_archive.Rd
+++ b/man/epi_archive.Rd
@@ -9,6 +9,17 @@
 \title{\code{as_epi_archive} converts a data frame, data table, or tibble into an
 \code{epi_archive} object.}
 \usage{
+new_epi_archive(
+  data_table,
+  geo_type,
+  time_type,
+  other_keys,
+  clobberable_versions_start,
+  versions_end
+)
+
+validate_epi_archive(x)
+
 as_epi_archive(
   x,
   geo_type = deprecated(),
@@ -36,8 +47,8 @@ new_epi_archive(
 validate_epi_archive(x)
 }
 \arguments{
-\item{x}{A data.frame, data.table, or tibble, with columns \code{geo_value},
-\code{time_value}, \code{version}, and then any additional number of columns.}
+\item{data_table}{a data.table with \code{\link[data.table:setkey]{data.table::key()}} equal to
+\code{c("geo_value", other_keys, "time_value", "version")}.}
 
 \item{geo_type}{DEPRECATED Has no effect. Geo value type is inferred from the
 location column and set to "custom" if not recognized.}
@@ -94,6 +105,27 @@ beyond \code{max(x$version)}, but they all contained empty updates. (The default
 value of \code{clobberable_versions_start} does not fully trust these empty
 updates, and assumes that any version \verb{>= max(x$version)} could be
 clobbered.) If \code{nrow(x) == 0}, then this argument is mandatory.}
+
+\item{x}{A data.frame, data.table, or tibble, with columns \code{geo_value},
+\code{time_value}, \code{version}, and then any additional number of columns.}
+
+\item{compactify}{Optional; \code{TRUE}, \code{FALSE}, or \code{"message"}. \code{TRUE} will
+remove some redundant rows, \code{FALSE} will not. \code{"message"} is like \code{TRUE}
+but will emit a message if anything was changed. Default is \code{TRUE}. See
+more information below under "Compactification:".}
+
+\item{compactify_abs_tol}{Optional; double. A tolerance level used to detect
+approximate equality for compactification. The default is 0, which
+corresponds to exact equality. Consider using this if your value columns
+undergo tiny nonmeaningful revisions and the archive object with the
+default setting is too large.}
+
+\item{.versions_end}{location based versions_end, used to avoid prefix
+\code{version = issue} from being assigned to \code{versions_end} instead of being
+used to rename columns.}
+
+\item{...}{used for specifying column names, as in \code{\link[dplyr:rename]{dplyr::rename}}. For
+example \code{version = release_date}}
 }
 \value{
 \itemize{
diff --git a/man/epi_slide_opt_archive_one_epikey.Rd b/man/epi_slide_opt_archive_one_epikey.Rd
index 95f97909f..cec4b1d02 100644
--- a/man/epi_slide_opt_archive_one_epikey.Rd
+++ b/man/epi_slide_opt_archive_one_epikey.Rd
@@ -51,24 +51,20 @@ Core operation of \code{epi_slide_opt.epi_archive} for a single epikey's history
 
 library(dplyr)
 updates <- bind_rows(
-  tibble(
-    version = 40, time_value = 1:10, value = 1:10
-  ),
-  tibble(
-    version = 12, time_value = 2:3, value = 3:2
-  ),
-  tibble(
-    version = 13, time_value = 6, value = 7,
-  ),
-  tibble(
-    version = 13, time_value = 7, value = NA,
-  )
+  tibble(version = 10, time_value = 1:20, value = 1:20),
+  tibble(version = 12, time_value = 4:5, value = 5:4),
+  tibble(version = 13, time_value = 8, value = 9),
+  tibble(version = 14, time_value = 11, value = NA),
+  tibble(version = 15, time_value = -10, value = -10),
+  tibble(version = 16, time_value = 50, value = 50)
 ) \%>\%
   mutate(across(c(version, time_value), ~ as.Date("2020-01-01") - 1 + .x)) \%>\%
   tidyr::nest(.by = version, .key = "subtbl")
 
+f <- purrr::partial(data.table::frollmean, algo = "exact")
+
 updates \%>\%
-  epi_slide_opt_one_epikey("value", data.table::frollmean, "data.table", 1L, 0L, "day", "slide_value")
+  epi_slide_opt_archive_one_epikey("value", f, "data.table", 2L, 0L, "day", "slide_value")
 
 }
 \keyword{internal}

From 3d4f167e94677a16af7be0924b2c4f881f06d62d Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Mon, 10 Mar 2025 03:35:54 -0700
Subject: [PATCH 034/107] fix(epi_slide_opt.epi_archive): as.data.table(tibble)
 key setting

---
 R/epi_slide_opt_archive.R | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/R/epi_slide_opt_archive.R b/R/epi_slide_opt_archive.R
index 1a6faad26..43387a2d0 100644
--- a/R/epi_slide_opt_archive.R
+++ b/R/epi_slide_opt_archive.R
@@ -193,6 +193,8 @@ epi_slide_opt.epi_archive <-
         res
       }) %>%
       ungroup() %>%
+      as.data.frame() %>% # data.table#6859
+      as.data.table(key = key(.x$DT)) %>%
       new_epi_archive(
         .x$geo_type, .x$time_type, .x$other_keys,
         .x$clobberable_versions_start, .x$versions_end

From 4801c7b45899c558e73e97fc175a2af34bf3158a Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Mon, 10 Mar 2025 03:36:23 -0700
Subject: [PATCH 035/107] tests(epi_slide_opt): on example data sets

---
 tests/testthat/test-epi_slide_opt_archive.R | 92 ++++++++++++++++-----
 1 file changed, 71 insertions(+), 21 deletions(-)

diff --git a/tests/testthat/test-epi_slide_opt_archive.R b/tests/testthat/test-epi_slide_opt_archive.R
index 3cea0891b..d300e0bcc 100644
--- a/tests/testthat/test-epi_slide_opt_archive.R
+++ b/tests/testthat/test-epi_slide_opt_archive.R
@@ -35,7 +35,7 @@ test_that("epi_slide_opt_archive_one_epikey works as expected", {
     ),
     tibble(
       version = 13, time_value = 8:10, value = c(9, 9, 10),
-      slide_value = frollmean(c(6, 7, 9, 9, 10),  3,  algo = "exact")[-(1:2)]
+      slide_value = frollmean(c(6, 7, 9, 9, 10), 3, algo = "exact")[-(1:2)]
     ),
     tibble(
       version = 14, time_value = 11:13, value = c(NA, 12, 13), slide_value = rep(NA_real_, 3L)
@@ -89,7 +89,6 @@ test_that("epi_slide_opt.epi_archive is not confused by unique(DT$version) unsor
 })
 
 test_that("epi_slide_opt.epi_archive is not confused by unique(DT$time_value) unsorted", {
-
   start_date <- as.Date("2020-01-01")
   tibble(
     geo_value = c(1, 1, 2, 2),
@@ -109,26 +108,77 @@ test_that("epi_slide_opt.epi_archive is not confused by unique(DT$time_value) un
       ) %>%
         as_epi_archive()
     )
-
 })
 
-test_that("epi_slide_opt.epi_archive is equivalent to epix_slide reconversion on example data", {
-
-  case_death_rate_archive %>%
-    epi_slide_opt(case_rate, frollmean, .window_size = 7
-                  # , algo = "exact"
-                  ) %>%
-    .$DT %>%
-    as.data.frame() %>%
-    as_tibble() %>%
-    filter(!approx_equal(case_rate_7dav, case_rate_7d_av, 1e-6, TRUE)) %>%
-    dplyr::transmute(version, geo_value, time_value, case_rate_7dav, case_rate_7d_av,
-                     abs_diff = abs(case_rate_7dav - case_rate_7d_av)) %>%
-    {}
-
-    # TODO finish tests on example data sets
-
-  })
-
+test_that("epi_slide_opt.epi_archive gives expected results on example data", {
+  # vs. built-in case_rate_7d_av column.
+  #
+  # If we were to compare the keyset vs.
+  # the original, it changes, as the original contains some tiny deviations in
+  # values that don't seem achievable with available sliding functions. E.g., in
+  # the recomputed result, geo "ak" version "2020-11-01" changes time 2020-03-13
+  # from 0 to 0.138 and time 2020-03-14 from a slightly different value of 0.138
+  # to 0, while nearby times remained stable; in the original, this resulted in
+  # a tiny update to the 7d_av for 2020-03-14 but not following times somehow,
+  # while in the recomputation there are also minute updates to 2020-03-15 and
+  # 2020-03-16; 2020-03-17 onward have other case_rate changes factoring in.
+  # Compactifying and comparing with tolerances would help account for some of
+  # these differences, but only through writing this was it realized that both
+  # archives would need the recompactification with tolerance; it's not just
+  # epi_slide_opt.epi_archive's very rigid compactification that's the cause.
+  # (Side note: allowing configurable compactification tolerance in
+  # epi_slide_opt.epi_archive wasn't included due to either feeling strange
+  # applying the compactification tolerance to all columns rather than just
+  # computed columns, and a slowdown when using one approach to compactify just
+  # the new columns + also awkward not matching what's possible with just
+  # construction functions.)
+  #
+  # --> just compare essentially an epix_merge of the original & the recomputation:
+  case_death_rate_archive_time <- system.time(
+    case_death_rate_archive_result <- case_death_rate_archive %>%
+      epi_slide_opt(case_rate, frollmean, algo = "exact", .window_size = 7)
+  )
+  expect_equal(
+    case_death_rate_archive_result$DT$case_rate_7dav,
+    case_death_rate_archive_result$DT$case_rate_7d_av
+  )
+
+  # vs. computing via epix_slide:
+
+  mini_case_death_rate_archive <- case_death_rate_archive %>%
+    {
+      as_tibble(as.data.frame(.$DT))
+    } %>%
+    filter(geo_value %in% head(unique(geo_value), 4L)) %>%
+    as_epi_archive()
+
+  mini_case_death_rate_archive_time_opt <- system.time(
+    mini_case_death_rate_archive_result <- mini_case_death_rate_archive %>%
+      epi_slide_opt(case_rate, frollmean, .window_size = 7)
+  )
+
+  mini_case_death_rate_archive_time_gen <- system.time(
+    mini_case_death_rate_archive_expected <- mini_case_death_rate_archive %>%
+      epix_slide(~ .x %>% epi_slide_opt(case_rate, frollmean, .window_size = 7)) %>%
+      select(names(mini_case_death_rate_archive$DT), everything()) %>%
+      as_epi_archive()
+  )
+
+  expect_equal(mini_case_death_rate_archive_result, mini_case_death_rate_archive_expected)
+
+  archive_cases_dv_subset_time_opt <- system.time(
+    archive_cases_dv_subset_result <- archive_cases_dv_subset %>%
+      epi_slide_opt(percent_cli, frollmean, .window_size = 7)
+  )
+
+  archive_cases_dv_subset_time_gen <- system.time(
+    archive_cases_dv_subset_expected <- archive_cases_dv_subset %>%
+      epix_slide(~ .x %>% epi_slide_opt(percent_cli, frollmean, .window_size = 7)) %>%
+      select(geo_value, time_value, version, everything()) %>%
+      as_epi_archive()
+  )
+
+  expect_equal(archive_cases_dv_subset_result, archive_cases_dv_subset_expected)
+})
 
 # TODO grouped behavior checks

From 8e7d5070c59630a24f8e7de51954f86fe8a37d3a Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Mon, 10 Mar 2025 12:59:48 -0700
Subject: [PATCH 036/107] Fix & test epi_slide_opt.grouped_epi_archive behavior

---
 NAMESPACE                                   |  1 +
 R/epi_slide_opt_archive.R                   |  2 +-
 R/epiprocess-package.R                      |  1 +
 tests/testthat/test-epi_slide_opt_archive.R | 36 +++++++++++++++++++--
 4 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 7ec1bf1e9..0963a9a6f 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -122,6 +122,7 @@ importFrom(checkmate,assert_logical)
 importFrom(checkmate,assert_number)
 importFrom(checkmate,assert_numeric)
 importFrom(checkmate,assert_scalar)
+importFrom(checkmate,assert_set_equal)
 importFrom(checkmate,assert_string)
 importFrom(checkmate,assert_subset)
 importFrom(checkmate,assert_tibble)
diff --git a/R/epi_slide_opt_archive.R b/R/epi_slide_opt_archive.R
index 43387a2d0..3d55bccca 100644
--- a/R/epi_slide_opt_archive.R
+++ b/R/epi_slide_opt_archive.R
@@ -125,7 +125,7 @@ epi_slide_opt_archive_one_epikey <- function(updates, in_colnames, f_dots_baked,
 epi_slide_opt.grouped_epi_archive <- function(.x, ...) {
   assert_set_equal(
     group_vars(.x),
-    key_colnames(.x, exclude = c("time_value", "version"))
+    key_colnames(.x$private$ungrouped, exclude = c("time_value", "version"))
   )
   orig_group_vars <- group_vars(.x)
   orig_drop <- .x$private$drop
diff --git a/R/epiprocess-package.R b/R/epiprocess-package.R
index 195bc0258..c5c76da8b 100644
--- a/R/epiprocess-package.R
+++ b/R/epiprocess-package.R
@@ -8,6 +8,7 @@
 #' @importFrom checkmate assert_false
 #' @importFrom checkmate assert_function
 #' @importFrom checkmate assert_logical assert_numeric assert_scalar checkInt
+#' @importFrom checkmate assert_set_equal
 #' @importFrom checkmate assert_string
 #' @importFrom checkmate assert_subset
 #' @importFrom checkmate assert_tibble
diff --git a/tests/testthat/test-epi_slide_opt_archive.R b/tests/testthat/test-epi_slide_opt_archive.R
index d300e0bcc..c41072339 100644
--- a/tests/testthat/test-epi_slide_opt_archive.R
+++ b/tests/testthat/test-epi_slide_opt_archive.R
@@ -110,7 +110,7 @@ test_that("epi_slide_opt.epi_archive is not confused by unique(DT$time_value) un
     )
 })
 
-test_that("epi_slide_opt.epi_archive gives expected results on example data", {
+test_that("epi_slide_opt.epi_archive gives expected results on example data; also grouped behavior", {
   # vs. built-in case_rate_7d_av column.
   #
   # If we were to compare the keyset vs.
@@ -166,6 +166,38 @@ test_that("epi_slide_opt.epi_archive gives expected results on example data", {
 
   expect_equal(mini_case_death_rate_archive_result, mini_case_death_rate_archive_expected)
 
+  mini_case_death_rate_archive_result2 <- mini_case_death_rate_archive %>%
+    group_by(geo_value) %>%
+    epi_slide_opt(case_rate, frollmean, .window_size = 7)
+
+  expect_equal(
+    mini_case_death_rate_archive_result2,
+    mini_case_death_rate_archive_result %>%
+      group_by(geo_value)
+  )
+
+  mini_case_death_rate_archive_b <- mini_case_death_rate_archive %>%
+    {
+      as_tibble(as.data.frame(.$DT))
+    } %>%
+    mutate(age_group = "overall") %>%
+    as_epi_archive(other_keys = "age_group")
+
+  expect_equal(
+    mini_case_death_rate_archive_b %>%
+      group_by(geo_value, age_group) %>%
+      epi_slide_opt(case_rate, frollmean, .window_size = 7),
+    mini_case_death_rate_archive_b %>%
+      epi_slide_opt(case_rate, frollmean, .window_size = 7) %>%
+      group_by(geo_value, age_group)
+  )
+
+  expect_error(
+    mini_case_death_rate_archive_b %>%
+      group_by(age_group) %>%
+      epi_slide_opt(case_rate, frollmean, .window_size = 7)
+  )
+
   archive_cases_dv_subset_time_opt <- system.time(
     archive_cases_dv_subset_result <- archive_cases_dv_subset %>%
       epi_slide_opt(percent_cli, frollmean, .window_size = 7)
@@ -180,5 +212,3 @@ test_that("epi_slide_opt.epi_archive gives expected results on example data", {
 
   expect_equal(archive_cases_dv_subset_result, archive_cases_dv_subset_expected)
 })
-
-# TODO grouped behavior checks

From 6fa2c3b718255b09dffd7ba27cad8ea697e93efe Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Mon, 10 Mar 2025 13:02:35 -0700
Subject: [PATCH 037/107] Rename approx_equal -> vec_approx_equal

---
 NAMESPACE                                    |  2 +-
 R/archive.R                                  |  4 +--
 R/patch.R                                    | 28 +++++++++-----------
 man/approx_equal0.Rd                         | 12 ---------
 man/{approx_equal.Rd => vec_approx_equal.Rd} | 14 +++++-----
 man/vec_approx_equal0.Rd                     | 12 +++++++++
 tests/testthat/test-archive.R                |  2 +-
 7 files changed, 36 insertions(+), 38 deletions(-)
 delete mode 100644 man/approx_equal0.Rd
 rename man/{approx_equal.Rd => vec_approx_equal.Rd} (90%)
 create mode 100644 man/vec_approx_equal0.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 0963a9a6f..6fce796ca 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -58,7 +58,6 @@ S3method(ungroup,epi_df)
 S3method(ungroup,grouped_epi_archive)
 S3method(unnest,epi_df)
 export("%>%")
-export(approx_equal)
 export(arrange)
 export(arrange_canonical)
 export(as_epi_archive)
@@ -106,6 +105,7 @@ export(time_column_names)
 export(ungroup)
 export(unnest)
 export(validate_epi_archive)
+export(vec_approx_equal)
 export(version_column_names)
 import(epidatasets)
 importFrom(checkmate,anyInfinite)
diff --git a/R/archive.R b/R/archive.R
index 3935709ac..5ce1938b9 100644
--- a/R/archive.R
+++ b/R/archive.R
@@ -472,12 +472,12 @@ update_is_locf <- function(arranged_updates_df, ukey_names, abs_tol) {
     inds2 <- 1L:(n_updates - 1L)
     c(
       FALSE, # first observation is not LOCF
-      approx_equal0(ekts_tbl,
+      vec_approx_equal0(ekts_tbl,
         inds1 = inds1, ekts_tbl, inds2 = inds2,
         # check ekt (key) cols with 0 tolerance:
         na_equal = TRUE, abs_tol = 0
       ) &
-        approx_equal0(vals_tbl,
+        vec_approx_equal0(vals_tbl,
           inds1 = inds1, vals_tbl, inds2 = inds2,
           na_equal = TRUE, abs_tol = abs_tol
         )
diff --git a/R/patch.R b/R/patch.R
index bf42e9250..035c0ad58 100644
--- a/R/patch.R
+++ b/R/patch.R
@@ -27,7 +27,7 @@
 #' @examples
 #'
 #' # On numeric vectors:
-#' approx_equal(
+#' vec_approx_equal(
 #'   c(1, 2, 3, NA),
 #'   c(1, 2 + 1e-10, NA, NA),
 #'   na_equal = TRUE,
@@ -49,8 +49,8 @@
 #' tbl2$c$c1[[4]] <- tbl1$c$c1[[4]] + 1e-10
 #' tbl2$d[[5, 2]] <- tbl1$d[[5, 2]] + 1e-10
 #' vctrs::vec_equal(tbl1, tbl2, na_equal = TRUE)
-#' approx_equal(tbl1, tbl2, na_equal = TRUE, abs_tol = 1e-12)
-#' approx_equal(tbl1, tbl2, na_equal = TRUE, abs_tol = 1e-8)
+#' vec_approx_equal(tbl1, tbl2, na_equal = TRUE, abs_tol = 1e-12)
+#' vec_approx_equal(tbl1, tbl2, na_equal = TRUE, abs_tol = 1e-8)
 #'
 #'
 #'
@@ -58,10 +58,10 @@
 #'
 #' # Type comparison within lists is stricter, matching vctrs:
 #' vctrs::vec_equal(list(1:2), list(as.numeric(1:2)))
-#' approx_equal(list(1:2), list(as.numeric(1:2)), FALSE, abs_tol = 0)
+#' vec_approx_equal(list(1:2), list(as.numeric(1:2)), FALSE, abs_tol = 0)
 #'
 #' @export
-approx_equal <- function(vec1, vec2, na_equal, .ptype = NULL, ..., abs_tol, inds1 = NULL, inds2 = NULL) {
+vec_approx_equal <- function(vec1, vec2, na_equal, .ptype = NULL, ..., abs_tol, inds1 = NULL, inds2 = NULL) {
   if (!obj_is_vector(vec1)) cli_abort("`vec1` must be recognized by vctrs as a vector")
   if (!obj_is_vector(vec2)) cli_abort("`vec2` must be recognized by vctrs as a vector")
   # Leave vec size checking to vctrs recycling ops.
@@ -99,13 +99,13 @@ approx_equal <- function(vec1, vec2, na_equal, .ptype = NULL, ..., abs_tol, inds
     inds2 <- vec_recycle(inds2, common_size)
   }
   vecs <- vec_cast_common(vec1, vec2, .to = .ptype)
-  approx_equal0(vecs[[1]], vecs[[2]], na_equal, abs_tol, inds1, inds2)
+  vec_approx_equal0(vecs[[1]], vecs[[2]], na_equal, abs_tol, inds1, inds2)
 }
 
-#' Helper for [`approx_equal`] for vecs guaranteed to have the same ptype and size
+#' Helper for [`vec_approx_equal`] for vecs guaranteed to have the same ptype and size
 #'
 #' @keywords internal
-approx_equal0 <- function(vec1, vec2, na_equal, abs_tol, inds1 = NULL, inds2 = NULL) {
+vec_approx_equal0 <- function(vec1, vec2, na_equal, abs_tol, inds1 = NULL, inds2 = NULL) {
   if (is_bare_numeric(vec1) && abs_tol != 0) {
     # perf: since we're working with bare numerics and logicals: we can use `[`
     # and `fifelse`. Matching vec_equal, we ignore names and other attributes.
@@ -132,7 +132,7 @@ approx_equal0 <- function(vec1, vec2, na_equal, abs_tol, inds1 = NULL, inds2 = N
       rep(TRUE, nrow(vec1))
     } else {
       Reduce(`&`, lapply(seq_len(ncol(vec1)), function(col_i) {
-        approx_equal0(vec1[[col_i]], vec2[[col_i]], na_equal, abs_tol, inds1, inds2)
+        vec_approx_equal0(vec1[[col_i]], vec2[[col_i]], na_equal, abs_tol, inds1, inds2)
       }))
     }
   } else if (is_bare_list(vec1)) {
@@ -144,14 +144,14 @@ approx_equal0 <- function(vec1, vec2, na_equal, abs_tol, inds1 = NULL, inds2 = N
         # consistently inconsistent, we avoid dispatching to vec_equal for bare
         # lists even with abs_tol = 0:
         identical(vec_ptype(entry1), vec_ptype(entry2)) &&
-        all(approx_equal0(entry1, entry2, na_equal, abs_tol))
+        all(vec_approx_equal0(entry1, entry2, na_equal, abs_tol))
     }, logical(1L))
   } else {
     # XXX No special handling for any other types/situations. Makes sense for
     # unclassed atomic things; custom classes (e.g., distributions) might want
-    # recursion / specialization, though. approx_equal0 should probably be an S3
+    # recursion / specialization, though. vec_approx_equal0 should probably be an S3
     # method. Also, abs_tol == 0 --> vec_equal logic should maybe be either be
-    # hoisted to approx_equal or we should manually recurse on data frames even
+    # hoisted to vec_approx_equal or we should manually recurse on data frames even
     # with abs_tol = 0 when that's faster (might depend on presence of inds*),
     # after some inconsistencies are ironed out.
     if (!is.null(inds1)) {
@@ -265,7 +265,7 @@ tbl_diff2 <- function(earlier_snapshot, later_tbl,
   # Which rows from combined are in case 3.?
   combined_compactify_away <- rep(FALSE, combined_n)
   combined_compactify_away[combined_ukey_is_repeat] <-
-    approx_equal0(combined_vals,
+    vec_approx_equal0(combined_vals,
       combined_vals,
       na_equal = TRUE,
       abs_tol = compactify_abs_tol,
@@ -340,5 +340,3 @@ tbl_patch <- function(snapshot, update, ukey_names) {
 
   result_tbl
 }
-
-# TODO rename approx_equal to vec_approx_equal
diff --git a/man/approx_equal0.Rd b/man/approx_equal0.Rd
deleted file mode 100644
index 9726f5013..000000000
--- a/man/approx_equal0.Rd
+++ /dev/null
@@ -1,12 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/patch.R
-\name{approx_equal0}
-\alias{approx_equal0}
-\title{Helper for \code{\link{approx_equal}} for vecs guaranteed to have the same ptype and size}
-\usage{
-approx_equal0(vec1, vec2, na_equal, abs_tol, inds1 = NULL, inds2 = NULL)
-}
-\description{
-Helper for \code{\link{approx_equal}} for vecs guaranteed to have the same ptype and size
-}
-\keyword{internal}
diff --git a/man/approx_equal.Rd b/man/vec_approx_equal.Rd
similarity index 90%
rename from man/approx_equal.Rd
rename to man/vec_approx_equal.Rd
index 237b33be4..80e037923 100644
--- a/man/approx_equal.Rd
+++ b/man/vec_approx_equal.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/patch.R
-\name{approx_equal}
-\alias{approx_equal}
+\name{vec_approx_equal}
+\alias{vec_approx_equal}
 \title{Test two vctrs vectors for equality with some tolerance in some cases}
 \usage{
-approx_equal(
+vec_approx_equal(
   vec1,
   vec2,
   na_equal,
@@ -49,7 +49,7 @@ the precise behavior in these cases may change and should not be relied upon.
 \examples{
 
 # On numeric vectors:
-approx_equal(
+vec_approx_equal(
   c(1, 2, 3, NA),
   c(1, 2 + 1e-10, NA, NA),
   na_equal = TRUE,
@@ -71,8 +71,8 @@ tbl2$b[[3]][[1]] <- tbl1$b[[3]][[1]] + 1e-10
 tbl2$c$c1[[4]] <- tbl1$c$c1[[4]] + 1e-10
 tbl2$d[[5, 2]] <- tbl1$d[[5, 2]] + 1e-10
 vctrs::vec_equal(tbl1, tbl2, na_equal = TRUE)
-approx_equal(tbl1, tbl2, na_equal = TRUE, abs_tol = 1e-12)
-approx_equal(tbl1, tbl2, na_equal = TRUE, abs_tol = 1e-8)
+vec_approx_equal(tbl1, tbl2, na_equal = TRUE, abs_tol = 1e-12)
+vec_approx_equal(tbl1, tbl2, na_equal = TRUE, abs_tol = 1e-8)
 
 
 
@@ -80,6 +80,6 @@ approx_equal(tbl1, tbl2, na_equal = TRUE, abs_tol = 1e-8)
 
 # Type comparison within lists is stricter, matching vctrs:
 vctrs::vec_equal(list(1:2), list(as.numeric(1:2)))
-approx_equal(list(1:2), list(as.numeric(1:2)), FALSE, abs_tol = 0)
+vec_approx_equal(list(1:2), list(as.numeric(1:2)), FALSE, abs_tol = 0)
 
 }
diff --git a/man/vec_approx_equal0.Rd b/man/vec_approx_equal0.Rd
new file mode 100644
index 000000000..01a1141ba
--- /dev/null
+++ b/man/vec_approx_equal0.Rd
@@ -0,0 +1,12 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/patch.R
+\name{vec_approx_equal0}
+\alias{vec_approx_equal0}
+\title{Helper for \code{\link{vec_approx_equal}} for vecs guaranteed to have the same ptype and size}
+\usage{
+vec_approx_equal0(vec1, vec2, na_equal, abs_tol, inds1 = NULL, inds2 = NULL)
+}
+\description{
+Helper for \code{\link{vec_approx_equal}} for vecs guaranteed to have the same ptype and size
+}
+\keyword{internal}
diff --git a/tests/testthat/test-archive.R b/tests/testthat/test-archive.R
index 051503e05..510d2adfc 100644
--- a/tests/testthat/test-archive.R
+++ b/tests/testthat/test-archive.R
@@ -220,5 +220,5 @@ test_that("`epi_archive` rejects dataframes where time_value and version columns
 test_that("is_locf replacement works as expected", {
   vec <- c(1, 1, 1e-10, 1.1e-10, NA, NA, NaN, NaN)
   is_repeated <- c(0, 1, 0, 1, 0, 1, 1, 1)
-  expect_equal(c(FALSE, approx_equal(head(vec, -1L), tail(vec, -1L), na_equal = TRUE, abs_tol = .Machine$double.eps^0.5)), as.logical(is_repeated))
+  expect_equal(c(FALSE, vec_approx_equal(head(vec, -1L), tail(vec, -1L), na_equal = TRUE, abs_tol = .Machine$double.eps^0.5)), as.logical(is_repeated))
 })

From 1b893314f7fe6da1eff728fa62f9e1909cad6cd5 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Mon, 10 Mar 2025 13:30:46 -0700
Subject: [PATCH 038/107] Fix missing n_groups import + epiprocess::: CHECK
 lint

---
 NAMESPACE                 | 1 +
 R/epi_slide_opt_archive.R | 2 +-
 R/epiprocess-package.R    | 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/NAMESPACE b/NAMESPACE
index 6fce796ca..114f77c1d 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -188,6 +188,7 @@ importFrom(dplyr,if_any)
 importFrom(dplyr,if_else)
 importFrom(dplyr,is_grouped_df)
 importFrom(dplyr,mutate)
+importFrom(dplyr,n_groups)
 importFrom(dplyr,pick)
 importFrom(dplyr,pull)
 importFrom(dplyr,relocate)
diff --git a/R/epi_slide_opt_archive.R b/R/epi_slide_opt_archive.R
index 3d55bccca..714ec0227 100644
--- a/R/epi_slide_opt_archive.R
+++ b/R/epi_slide_opt_archive.R
@@ -42,7 +42,7 @@
 #' @keywords internal
 epi_slide_opt_archive_one_epikey <- function(updates, in_colnames, f_dots_baked, f_from_package, before, after, time_type, out_colnames) {
   # TODO check for col name clobbering
-  unit_step <- epiprocess:::unit_time_delta(time_type)
+  unit_step <- unit_time_delta(time_type)
   prev_inp_snapshot <- NULL
   prev_out_snapshot <- NULL
   result <- map(seq_len(nrow(updates)), function(update_i) {
diff --git a/R/epiprocess-package.R b/R/epiprocess-package.R
index c5c76da8b..7c7f54394 100644
--- a/R/epiprocess-package.R
+++ b/R/epiprocess-package.R
@@ -32,6 +32,7 @@
 #' @importFrom dplyr arrange
 #' @importFrom dplyr grouped_df
 #' @importFrom dplyr is_grouped_df
+#' @importFrom dplyr n_groups
 #' @importFrom dplyr select
 #' @importFrom lifecycle deprecated
 #' @importFrom purrr list_rbind

From 0418b03cdaa53307e32c92e194b0f7431c195bd9 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Mon, 10 Mar 2025 14:06:33 -0700
Subject: [PATCH 039/107] Address additional lints, CHECK issues

---
 R/archive.R                                 |  2 +-
 R/epi_slide_opt_archive.R                   | 23 ++++++++++++++++-----
 R/epi_slide_opt_edf.R                       | 12 +++++++----
 R/patch.R                                   | 10 ++++++---
 man/apply_compactify.Rd                     |  2 +-
 man/vec_approx_equal.Rd                     |  3 +++
 tests/testthat/test-archive.R               | 10 ++++++++-
 tests/testthat/test-epi_slide_opt_archive.R |  8 +++++--
 8 files changed, 53 insertions(+), 17 deletions(-)

diff --git a/R/archive.R b/R/archive.R
index 5ce1938b9..35a9e775c 100644
--- a/R/archive.R
+++ b/R/archive.R
@@ -386,7 +386,7 @@ validate_epi_archive <- function(x) {
 #'   would be `key(DT)`.
 #' @param abs_tol numeric, >=0; absolute tolerance to use on numeric measurement
 #'   columns when determining whether something can be compactified away; see
-#'   [`is_locf`]
+#'   [`vec_approx_equal`]
 #'
 #' @importFrom data.table is.data.table key
 #' @importFrom dplyr arrange filter
diff --git a/R/epi_slide_opt_archive.R b/R/epi_slide_opt_archive.R
index 714ec0227..eaf5bbb88 100644
--- a/R/epi_slide_opt_archive.R
+++ b/R/epi_slide_opt_archive.R
@@ -40,7 +40,11 @@
 #'   epi_slide_opt_archive_one_epikey("value", f, "data.table", 2L, 0L, "day", "slide_value")
 #'
 #' @keywords internal
-epi_slide_opt_archive_one_epikey <- function(updates, in_colnames, f_dots_baked, f_from_package, before, after, time_type, out_colnames) {
+epi_slide_opt_archive_one_epikey <- function(
+    updates,
+    in_colnames,
+    f_dots_baked, f_from_package, before, after, time_type,
+    out_colnames) {
   # TODO check for col name clobbering
   unit_step <- unit_time_delta(time_type)
   prev_inp_snapshot <- NULL
@@ -61,7 +65,7 @@ epi_slide_opt_archive_one_epikey <- function(updates, in_colnames, f_dots_baked,
       # If the input had updates in the range t1..t2, this could produce changes
       # in slide outputs in the range t1-after..t2+before, and to compute those
       # slide values, we need to look at the input snapshot from
-      # t1-after-before..t2+before+after.
+      # t1-after-before..t2+before+after. nolint: commented_code_linter
       inp_update_t_min <- min(inp_update$time_value)
       inp_update_t_max <- max(inp_update$time_value)
       slide_t_min <- inp_update_t_min - (before + after) * unit_step
@@ -78,7 +82,8 @@ epi_slide_opt_archive_one_epikey <- function(updates, in_colnames, f_dots_baked,
     if (f_from_package == "data.table") {
       for (col_i in seq_along(in_colnames)) {
         if (before == Inf) {
-          slide[[out_colnames[[col_i]]]] <- f_dots_baked(slide[[in_colnames[[col_i]]]], seq_len(slide_nrow), adaptive = TRUE)
+          slide[[out_colnames[[col_i]]]] <-
+            f_dots_baked(slide[[in_colnames[[col_i]]]], seq_len(slide_nrow), adaptive = TRUE)
         } else {
           out_col <- f_dots_baked(slide[[in_colnames[[col_i]]]], before + after + 1L)
           if (after != 0L) {
@@ -158,7 +163,10 @@ epi_slide_opt.epi_archive <-
         purrr::partial(.f, ...)
       }
     col_names_quo <- enquo(.col_names)
-    names_info <- across_ish_names_info(.x$DT, time_type, col_names_quo, .f_info$namer, .window_size, .align, .prefix, .suffix, .new_col_names)
+    names_info <- across_ish_names_info(
+      .x$DT, time_type, col_names_quo, .f_info$namer,
+      .window_size, .align, .prefix, .suffix, .new_col_names
+    )
     window_args <- get_before_after_from_window(.window_size, .align, time_type)
     if (!is.null(.ref_time_values)) {
       cli_abort("epi_slide.epi_archive does not support the `.ref_time_values` argument")
@@ -187,7 +195,12 @@ epi_slide_opt.epi_archive <-
           nest(.by = version, .key = "subtbl") %>%
           arrange(version)
         # TODO move nesting inside the helper?
-        res <- epi_slide_opt_archive_one_epikey(group_updates, names_info$input_col_names, .f_dots_baked, .f_info$from_package, window_args$before, window_args$after, time_type, names_info$output_col_names) %>%
+        res <- epi_slide_opt_archive_one_epikey(
+          group_updates,
+          names_info$input_col_names,
+          .f_dots_baked, .f_info$from_package, window_args$before, window_args$after, time_type,
+          names_info$output_col_names
+        ) %>%
           list_rbind()
         if (use_progress) cli::cli_progress_update(id = progress_bar_id)
         res
diff --git a/R/epi_slide_opt_edf.R b/R/epi_slide_opt_edf.R
index 9dd7190c8..b6be49a1a 100644
--- a/R/epi_slide_opt_edf.R
+++ b/R/epi_slide_opt_edf.R
@@ -81,7 +81,8 @@ upstream_slide_f_info <- function(.f) {
 #'   `names(.x)`; and `output_colnames`, chr, same length as `input_col_names`
 #'
 #' @keywords internal
-across_ish_names_info <- function(.x, time_type, col_names_quo, .f_namer, .window_size, .align, .prefix, .suffix, .new_col_names) {
+across_ish_names_info <- function(.x, time_type, col_names_quo, .f_namer,
+                                  .window_size, .align, .prefix, .suffix, .new_col_names) {
   # The position of a given column can be differ between input `.x` and
   # `.data_group` since the grouping step by default drops grouping columns.
   # To avoid rerunning `eval_select` for every `.data_group`, convert
@@ -151,10 +152,10 @@ across_ish_names_info <- function(.x, time_type, col_names_quo, .f_namer, .windo
   }
   output_col_names <- .new_col_names
 
-  return(list(
+  list(
     input_col_names = input_col_names,
     output_col_names = output_col_names
-  ))
+  )
 }
 
 #' Optimized slide functions for common cases
@@ -417,7 +418,10 @@ epi_slide_opt.epi_df <- function(.x, .col_names, .f, ...,
   window_args <- get_before_after_from_window(.window_size, .align, time_type)
 
   # Handle output naming:
-  names_info <- across_ish_names_info(.x, time_type, col_names_quo, f_info$namer, .window_size, .align, .prefix, .suffix, .new_col_names)
+  names_info <- across_ish_names_info(
+    .x, time_type, col_names_quo, f_info$namer,
+    .window_size, .align, .prefix, .suffix, .new_col_names
+  )
   input_col_names <- names_info$input_col_names
   output_col_names <- names_info$output_col_names
 
diff --git a/R/patch.R b/R/patch.R
index 035c0ad58..f8384ad22 100644
--- a/R/patch.R
+++ b/R/patch.R
@@ -12,9 +12,11 @@
 #' @param na_equal should `NA`s be considered equal to each other? (In
 #'   epiprocess, we usually want this to be `TRUE`, but that doesn't match the
 #'   [`vctrs::vec_equal()`] default, so this is mandatory.)
+#' @param .ptype as in [`vctrs::vec_equal()`].
+#' @param ... should be empty (it's here to force later arguments to be passed
+#'   by name)
 #' @param abs_tol absolute tolerance; will be used for bare numeric `vec1`,
 #'   `vec2`, or any such columns within `vec1`, `vec2` if they are data frames.
-#' @param .ptype as in [`vctrs::vec_equal()`].
 #' @param inds1,inds2 optional (row) indices into vec1 and vec2 compatible with
 #'   [`vctrs::vec_slice()`]; output should be consistent with `vec_slice`-ing to
 #'   these indices beforehand, but can give faster computation if `vec1` and
@@ -205,7 +207,9 @@ tbl_diff2 <- function(earlier_snapshot, later_tbl,
     cli_abort("`ukey_names` must be a subset of column names")
   }
   later_format <- arg_match0(later_format, c("snapshot", "update"))
-  if (!(is.vector(compactify_abs_tol, mode = "numeric") && length(compactify_abs_tol) == 1L && compactify_abs_tol >= 0)) {
+  if (!(is.vector(compactify_abs_tol, mode = "numeric") &&
+    length(compactify_abs_tol) == 1L && # nolint:indentation_linter
+    compactify_abs_tol >= 0)) {
     # Give a specific message:
     assert_numeric(compactify_abs_tol, lower = 0, any.missing = FALSE, len = 1L)
     # Fallback e.g. for invalid classes not caught by assert_numeric:
@@ -279,7 +283,7 @@ tbl_diff2 <- function(earlier_snapshot, later_tbl,
   if (later_format == "update") {
     # Cases 4. and 5.:
     combined_tbl <- combined_tbl[combined_from_later & !combined_compactify_away, ]
-  } else { # later_format == "snapshot"
+  } else { # later_format is "snapshot"
     # Which rows from combined are in case 1.?
     combined_is_deletion <- vec_rep_each(c(TRUE, FALSE), c(earlier_n, later_n))
     combined_is_deletion[ukey_repeat_first_i] <- FALSE
diff --git a/man/apply_compactify.Rd b/man/apply_compactify.Rd
index e96108789..6bd6ce770 100644
--- a/man/apply_compactify.Rd
+++ b/man/apply_compactify.Rd
@@ -16,7 +16,7 @@ would be \code{key(DT)}.}
 
 \item{abs_tol}{numeric, >=0; absolute tolerance to use on numeric measurement
 columns when determining whether something can be compactified away; see
-\code{\link{is_locf}}}
+\code{\link{vec_approx_equal}}}
 }
 \description{
 Works by shifting all rows except the version, then comparing values to see
diff --git a/man/vec_approx_equal.Rd b/man/vec_approx_equal.Rd
index 80e037923..f3612fa19 100644
--- a/man/vec_approx_equal.Rd
+++ b/man/vec_approx_equal.Rd
@@ -28,6 +28,9 @@ epiprocess, we usually want this to be \code{TRUE}, but that doesn't match the
 
 \item{.ptype}{as in \code{\link[vctrs:vec_equal]{vctrs::vec_equal()}}.}
 
+\item{...}{should be empty (it's here to force later arguments to be passed
+by name)}
+
 \item{abs_tol}{absolute tolerance; will be used for bare numeric \code{vec1},
 \code{vec2}, or any such columns within \code{vec1}, \code{vec2} if they are data frames.}
 
diff --git a/tests/testthat/test-archive.R b/tests/testthat/test-archive.R
index 510d2adfc..336fe4baa 100644
--- a/tests/testthat/test-archive.R
+++ b/tests/testthat/test-archive.R
@@ -220,5 +220,13 @@ test_that("`epi_archive` rejects dataframes where time_value and version columns
 test_that("is_locf replacement works as expected", {
   vec <- c(1, 1, 1e-10, 1.1e-10, NA, NA, NaN, NaN)
   is_repeated <- c(0, 1, 0, 1, 0, 1, 1, 1)
-  expect_equal(c(FALSE, vec_approx_equal(head(vec, -1L), tail(vec, -1L), na_equal = TRUE, abs_tol = .Machine$double.eps^0.5)), as.logical(is_repeated))
+  expect_equal(
+    c(
+      FALSE,
+      vec_approx_equal(head(vec, -1L), tail(vec, -1L),
+        na_equal = TRUE, abs_tol = .Machine$double.eps^0.5
+      )
+    ),
+    as.logical(is_repeated)
+  )
 })
diff --git a/tests/testthat/test-epi_slide_opt_archive.R b/tests/testthat/test-epi_slide_opt_archive.R
index c41072339..aa2a94905 100644
--- a/tests/testthat/test-epi_slide_opt_archive.R
+++ b/tests/testthat/test-epi_slide_opt_archive.R
@@ -159,7 +159,9 @@ test_that("epi_slide_opt.epi_archive gives expected results on example data; als
 
   mini_case_death_rate_archive_time_gen <- system.time(
     mini_case_death_rate_archive_expected <- mini_case_death_rate_archive %>%
-      epix_slide(~ .x %>% epi_slide_opt(case_rate, frollmean, .window_size = 7)) %>%
+      epix_slide(
+        ~ .x %>% epi_slide_opt(case_rate, frollmean, .window_size = 7)
+      ) %>%
       select(names(mini_case_death_rate_archive$DT), everything()) %>%
       as_epi_archive()
   )
@@ -205,7 +207,9 @@ test_that("epi_slide_opt.epi_archive gives expected results on example data; als
 
   archive_cases_dv_subset_time_gen <- system.time(
     archive_cases_dv_subset_expected <- archive_cases_dv_subset %>%
-      epix_slide(~ .x %>% epi_slide_opt(percent_cli, frollmean, .window_size = 7)) %>%
+      epix_slide(
+        ~ .x %>% epi_slide_opt(percent_cli, frollmean, .window_size = 7)
+      ) %>%
       select(geo_value, time_value, version, everything()) %>%
       as_epi_archive()
   )

From f2ecb3174c1cec551343d5c0d4e743f02ccdb63e Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Mon, 10 Mar 2025 14:24:28 -0700
Subject: [PATCH 040/107] Address missing ::: in tests

---
 tests/testthat/test-epi_slide_opt_archive.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/testthat/test-epi_slide_opt_archive.R b/tests/testthat/test-epi_slide_opt_archive.R
index aa2a94905..088452da5 100644
--- a/tests/testthat/test-epi_slide_opt_archive.R
+++ b/tests/testthat/test-epi_slide_opt_archive.R
@@ -55,7 +55,7 @@ test_that("epi_slide_opt_archive_one_epikey works as expected", {
   f <- purrr::partial(data.table::frollmean, algo = "exact")
 
   result <- updates %>%
-    epi_slide_opt_archive_one_epikey("value", f, "data.table", 2L, 0L, "day", "slide_value") %>%
+    epiprocess:::epi_slide_opt_archive_one_epikey("value", f, "data.table", 2L, 0L, "day", "slide_value") %>%
     lapply(function(x) {
       x %>%
         arrange(time_value) %>%

From a737dbe175563e5174197fcf910986f8253ba74f Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Mon, 10 Mar 2025 16:03:07 -0700
Subject: [PATCH 041/107] Fix another missing `:::`

---
 R/epi_slide_opt_archive.R               | 2 +-
 man/epi_slide_opt_archive_one_epikey.Rd | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/epi_slide_opt_archive.R b/R/epi_slide_opt_archive.R
index eaf5bbb88..1250d6fb2 100644
--- a/R/epi_slide_opt_archive.R
+++ b/R/epi_slide_opt_archive.R
@@ -37,7 +37,7 @@
 #' f <- purrr::partial(data.table::frollmean, algo = "exact")
 #'
 #' updates %>%
-#'   epi_slide_opt_archive_one_epikey("value", f, "data.table", 2L, 0L, "day", "slide_value")
+#'   epiprocess:::epi_slide_opt_archive_one_epikey("value", f, "data.table", 2L, 0L, "day", "slide_value")
 #'
 #' @keywords internal
 epi_slide_opt_archive_one_epikey <- function(
diff --git a/man/epi_slide_opt_archive_one_epikey.Rd b/man/epi_slide_opt_archive_one_epikey.Rd
index cec4b1d02..f1a992a77 100644
--- a/man/epi_slide_opt_archive_one_epikey.Rd
+++ b/man/epi_slide_opt_archive_one_epikey.Rd
@@ -64,7 +64,7 @@ updates <- bind_rows(
 f <- purrr::partial(data.table::frollmean, algo = "exact")
 
 updates \%>\%
-  epi_slide_opt_archive_one_epikey("value", f, "data.table", 2L, 0L, "day", "slide_value")
+  epiprocess:::epi_slide_opt_archive_one_epikey("value", f, "data.table", 2L, 0L, "day", "slide_value")
 
 }
 \keyword{internal}

From d1c1e1d428d5b74ed2e8848d9a728f3a16daa277 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Tue, 11 Mar 2025 09:34:19 -0700
Subject: [PATCH 042/107] Fix + add NEWS.md entries

We actually coincidentally caught the tibble-to-DT key-setting issue with code
intended for addressing a separate issue with `data.table`s passed in.
---
 NEWS.md            | 14 +++++++-------
 R/archive.R        | 20 ++++++++++++--------
 man/epi_archive.Rd | 18 +++++++++++-------
 3 files changed, 30 insertions(+), 22 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 1218da12f..89435c9df 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -9,15 +9,15 @@ Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.x.y will indicat
 - `is_epi_archive` function has been reintroduced.
 ## Breaking changes
 
-- `new_epi_archive()`'s `x` argument has been replaced with a `data_table`
-  argument, which must be a `data.table` with the key already set appropriately.
-  The `key()` of its `DT` will also now place `other_keys` before rather than after
-  `"time_value"`.
+- The low-level `new_epi_archive()` function's `x` argument has been replaced
+  with a `data_table` argument, which now has extra requirements; see
+  `?new_epi_archive`. Users should still be using `as_epi_archive()` unless they
+  have a need for something lower-level.
 
-## Bug fixes
+## New features
 
-- `as_epi_archive()` no longer has issues setting its `DT`'s `key` on some
-  versions of `{data.table}` when `x` is a tibble.
+- `epi_slide_{mean,sum,opt}` now work on `epi_archive`s, preparing version
+  histories for 7-day-averages of signals, etc.
 
 # epiprocess 0.11
 
diff --git a/R/archive.R b/R/archive.R
index 35a9e775c..80adbc379 100644
--- a/R/archive.R
+++ b/R/archive.R
@@ -186,17 +186,21 @@ next_after.Date <- function(x) x + 1L
 #'  archive. Unexpected behavior may result from modifying the metadata
 #'  directly.
 #'
-#' @param data_table a data.table with [`data.table::key()`] equal to
-#'   `c("geo_value", other_keys, "time_value", "version")`.
+#' @param data_table a `data.table` with [`data.table::key()`] equal to
+#'   `c("geo_value", other_keys, "time_value", "version")`. For `data.table`
+#'   users: this sets up an alias of `data_table`; if you plan to keep on
+#'   working with `data_table` or working directly with the archive's `$DT`
+#'   using mutating operations, you should `copy()` if appropriate. We will not
+#'   mutate the `DT` with any exported {epiprocess} functions, though.
 #' @param geo_type DEPRECATED Has no effect. Geo value type is inferred from the
-#' location column and set to "custom" if not recognized.
-#' @param time_type DEPRECATED Has no effect. Time value type inferred from the time
-#' column and set to "custom" if not recognized. Unpredictable behavior may result
-#' if the time type is not recognized.
+#'   location column and set to "custom" if not recognized.
+#' @param time_type DEPRECATED Has no effect. Time value type inferred from the
+#'   time column and set to "custom" if not recognized. Unpredictable behavior
+#'   may result if the time type is not recognized.
 #' @param other_keys Character vector specifying the names of variables in `x`
 #'   that should be considered key variables (in the language of `data.table`)
-#'   apart from "geo_value", "time_value", and "version". Typical examples
-#'   are "age" or more granular geographies.
+#'   apart from "geo_value", "time_value", and "version". Typical examples are
+#'   "age" or more granular geographies.
 #' @param compactify Optional; `TRUE`, `FALSE`, or `"message"`. `TRUE` will
 #'   remove some redundant rows, `FALSE` will not. `"message"` is like `TRUE`
 #'   but will emit a message if anything was changed. Default is `TRUE`. See
diff --git a/man/epi_archive.Rd b/man/epi_archive.Rd
index fd6e8b39e..1f0bf6bdf 100644
--- a/man/epi_archive.Rd
+++ b/man/epi_archive.Rd
@@ -47,20 +47,24 @@ new_epi_archive(
 validate_epi_archive(x)
 }
 \arguments{
-\item{data_table}{a data.table with \code{\link[data.table:setkey]{data.table::key()}} equal to
-\code{c("geo_value", other_keys, "time_value", "version")}.}
+\item{data_table}{a \code{data.table} with \code{\link[data.table:setkey]{data.table::key()}} equal to
+\code{c("geo_value", other_keys, "time_value", "version")}. For \code{data.table}
+users: this sets up an alias of \code{data_table}; if you plan to keep on
+working with \code{data_table} or working directly with the archive's \verb{$DT}
+using mutating operations, you should \code{copy()} if appropriate. We will not
+mutate the \code{DT} with any exported {epiprocess} functions, though.}
 
 \item{geo_type}{DEPRECATED Has no effect. Geo value type is inferred from the
 location column and set to "custom" if not recognized.}
 
-\item{time_type}{DEPRECATED Has no effect. Time value type inferred from the time
-column and set to "custom" if not recognized. Unpredictable behavior may result
-if the time type is not recognized.}
+\item{time_type}{DEPRECATED Has no effect. Time value type inferred from the
+time column and set to "custom" if not recognized. Unpredictable behavior
+may result if the time type is not recognized.}
 
 \item{other_keys}{Character vector specifying the names of variables in \code{x}
 that should be considered key variables (in the language of \code{data.table})
-apart from "geo_value", "time_value", and "version". Typical examples
-are "age" or more granular geographies.}
+apart from "geo_value", "time_value", and "version". Typical examples are
+"age" or more granular geographies.}
 
 \item{compactify}{Optional; \code{TRUE}, \code{FALSE}, or \code{"message"}. \code{TRUE} will
 remove some redundant rows, \code{FALSE} will not. \code{"message"} is like \code{TRUE}

From f4447b08be4b7de544b9d5da0bd011ee82f2ba28 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Tue, 11 Mar 2025 09:54:35 -0700
Subject: [PATCH 043/107] Fix CHECK doc line length lint

---
 R/epi_slide_opt_archive.R               | 4 +++-
 man/epi_slide_opt_archive_one_epikey.Rd | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/R/epi_slide_opt_archive.R b/R/epi_slide_opt_archive.R
index 1250d6fb2..070dc431c 100644
--- a/R/epi_slide_opt_archive.R
+++ b/R/epi_slide_opt_archive.R
@@ -37,7 +37,9 @@
 #' f <- purrr::partial(data.table::frollmean, algo = "exact")
 #'
 #' updates %>%
-#'   epiprocess:::epi_slide_opt_archive_one_epikey("value", f, "data.table", 2L, 0L, "day", "slide_value")
+#'   epiprocess:::epi_slide_opt_archive_one_epikey(
+#'     "value", f, "data.table", 2L, 0L, "day", "slide_value"
+#'   )
 #'
 #' @keywords internal
 epi_slide_opt_archive_one_epikey <- function(
diff --git a/man/epi_slide_opt_archive_one_epikey.Rd b/man/epi_slide_opt_archive_one_epikey.Rd
index f1a992a77..78eec4fdf 100644
--- a/man/epi_slide_opt_archive_one_epikey.Rd
+++ b/man/epi_slide_opt_archive_one_epikey.Rd
@@ -64,7 +64,9 @@ updates <- bind_rows(
 f <- purrr::partial(data.table::frollmean, algo = "exact")
 
 updates \%>\%
-  epiprocess:::epi_slide_opt_archive_one_epikey("value", f, "data.table", 2L, 0L, "day", "slide_value")
+  epiprocess:::epi_slide_opt_archive_one_epikey(
+    "value", f, "data.table", 2L, 0L, "day", "slide_value"
+  )
 
 }
 \keyword{internal}

From 2c08e01e8f5ca033bcef2270b3859fd6437a7772 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Tue, 11 Mar 2025 09:56:00 -0700
Subject: [PATCH 044/107] Fix missing library(dplyr) in example

---
 R/patch.R               | 2 ++
 man/vec_approx_equal.Rd | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/R/patch.R b/R/patch.R
index f8384ad22..0cd42f3cd 100644
--- a/R/patch.R
+++ b/R/patch.R
@@ -28,6 +28,8 @@
 #'
 #' @examples
 #'
+#' library(dplyr)
+#'
 #' # On numeric vectors:
 #' vec_approx_equal(
 #'   c(1, 2, 3, NA),
diff --git a/man/vec_approx_equal.Rd b/man/vec_approx_equal.Rd
index f3612fa19..ff403a1c6 100644
--- a/man/vec_approx_equal.Rd
+++ b/man/vec_approx_equal.Rd
@@ -51,6 +51,8 @@ the precise behavior in these cases may change and should not be relied upon.
 }
 \examples{
 
+library(dplyr)
+
 # On numeric vectors:
 vec_approx_equal(
   c(1, 2, 3, NA),

From 2aa030166120e51f9c9758ff085aba8ad77ecad1 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Tue, 11 Mar 2025 10:04:25 -0700
Subject: [PATCH 045/107] Fix {epiprocess} -> `{epiprocess}` in roxygen

---
 R/archive.R        | 2 +-
 man/epi_archive.Rd | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/archive.R b/R/archive.R
index 80adbc379..5afabb13b 100644
--- a/R/archive.R
+++ b/R/archive.R
@@ -191,7 +191,7 @@ next_after.Date <- function(x) x + 1L
 #'   users: this sets up an alias of `data_table`; if you plan to keep on
 #'   working with `data_table` or working directly with the archive's `$DT`
 #'   using mutating operations, you should `copy()` if appropriate. We will not
-#'   mutate the `DT` with any exported {epiprocess} functions, though.
+#'   mutate the `DT` with any exported `{epiprocess}` functions, though.
 #' @param geo_type DEPRECATED Has no effect. Geo value type is inferred from the
 #'   location column and set to "custom" if not recognized.
 #' @param time_type DEPRECATED Has no effect. Time value type inferred from the
diff --git a/man/epi_archive.Rd b/man/epi_archive.Rd
index 1f0bf6bdf..ab9da219f 100644
--- a/man/epi_archive.Rd
+++ b/man/epi_archive.Rd
@@ -52,7 +52,7 @@ validate_epi_archive(x)
 users: this sets up an alias of \code{data_table}; if you plan to keep on
 working with \code{data_table} or working directly with the archive's \verb{$DT}
 using mutating operations, you should \code{copy()} if appropriate. We will not
-mutate the \code{DT} with any exported {epiprocess} functions, though.}
+mutate the \code{DT} with any exported \code{{epiprocess}} functions, though.}
 
 \item{geo_type}{DEPRECATED Has no effect. Geo value type is inferred from the
 location column and set to "custom" if not recognized.}

From 56cacaafde9f785e6cdeb72780dc28d22bb991d0 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Tue, 11 Mar 2025 12:27:00 -0700
Subject: [PATCH 046/107] docs: add vec_approx_equal to pkgdown reference index

---
 _pkgdown.yml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/_pkgdown.yml b/_pkgdown.yml
index 3742bd416..8d3a09d5c 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -93,6 +93,10 @@ reference:
       - epidatasets::covid_incidence_outliers
       - epidatasets::covid_case_death_rates_extended
 
+  - title: Other utilities
+  - contents:
+      - vec_approx_equal
+
   - title: internal
   - contents:
       - starts_with("internal")

From 77f7bc0a6dfda47ae1181a317f95694ff2f2c734 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Mon, 17 Mar 2025 17:18:00 -0700
Subject: [PATCH 047/107] docs(vec_approx_equal): mention
 vctrs::vec_proxy_equal

---
 R/patch.R | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/R/patch.R b/R/patch.R
index 0cd42f3cd..af60eb1a1 100644
--- a/R/patch.R
+++ b/R/patch.R
@@ -153,11 +153,13 @@ vec_approx_equal0 <- function(vec1, vec2, na_equal, abs_tol, inds1 = NULL, inds2
   } else {
     # XXX No special handling for any other types/situations. Makes sense for
     # unclassed atomic things; custom classes (e.g., distributions) might want
-    # recursion / specialization, though. vec_approx_equal0 should probably be an S3
-    # method. Also, abs_tol == 0 --> vec_equal logic should maybe be either be
-    # hoisted to vec_approx_equal or we should manually recurse on data frames even
-    # with abs_tol = 0 when that's faster (might depend on presence of inds*),
-    # after some inconsistencies are ironed out.
+    # recursion / specialization, though. vec_approx_equal0 should probably be
+    # an S3 method; see also `vctrs::vec_proxy_equal` though it's probably not
+    # sufficient (e.g., for keyed data frames such as `epi_df`s that should have
+    # strict & nonstrict columns). Also, abs_tol == 0 --> vec_equal logic should
+    # maybe be either be hoisted to vec_approx_equal or we should manually
+    # recurse on data frames even with abs_tol = 0 when that's faster (might
+    # depend on presence of inds*), after some inconsistencies are ironed out.
     if (!is.null(inds1)) {
       vec1 <- vec_slice(vec1, inds1)
       vec2 <- vec_slice(vec2, inds2)

From 1cb15554f10f1f3ca707d81a80feef26922491e3 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Tue, 25 Mar 2025 16:24:37 -0700
Subject: [PATCH 048/107] Fix & test some vec_approx_equal vs. vec_equal
 behaviors

---
 NAMESPACE                              |  1 +
 R/epiprocess-package.R                 |  1 +
 R/patch.R                              | 38 ++++++++++-------
 tests/testthat/test-archive.R          | 14 -------
 tests/testthat/test-vec_approx_equal.R | 57 ++++++++++++++++++++++++++
 5 files changed, 82 insertions(+), 29 deletions(-)
 create mode 100644 tests/testthat/test-vec_approx_equal.R

diff --git a/NAMESPACE b/NAMESPACE
index 114f77c1d..62c3e7fc8 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -283,6 +283,7 @@ importFrom(vctrs,vec_recycle_common)
 importFrom(vctrs,vec_rep)
 importFrom(vctrs,vec_rep_each)
 importFrom(vctrs,vec_seq_along)
+importFrom(vctrs,vec_set_names)
 importFrom(vctrs,vec_size)
 importFrom(vctrs,vec_size_common)
 importFrom(vctrs,vec_slice)
diff --git a/R/epiprocess-package.R b/R/epiprocess-package.R
index 7c7f54394..03b1838a6 100644
--- a/R/epiprocess-package.R
+++ b/R/epiprocess-package.R
@@ -60,6 +60,7 @@
 #' @importFrom vctrs vec_rep
 #' @importFrom vctrs vec_rep_each
 #' @importFrom vctrs vec_seq_along
+#' @importFrom vctrs vec_set_names
 #' @importFrom vctrs vec_size_common
 #' @importFrom vctrs vec_slice
 #' @importFrom vctrs vec_slice<-
diff --git a/R/patch.R b/R/patch.R
index af60eb1a1..fe9d1b6db 100644
--- a/R/patch.R
+++ b/R/patch.R
@@ -115,12 +115,18 @@ vec_approx_equal0 <- function(vec1, vec2, na_equal, abs_tol, inds1 = NULL, inds2
     # and `fifelse`. Matching vec_equal, we ignore names and other attributes.
     if (!is.null(inds1)) vec1 <- vec_slice(vec1, inds1)
     if (!is.null(inds2)) vec2 <- vec_slice(vec2, inds2)
+    na_or_nan1 <- is.na(vec1)
+    na_or_nan2 <- is.na(vec2)
     res <- fifelse(
-      !is.na(vec1) & !is.na(vec2),
+      !na_or_nan1 & !na_or_nan2,
       abs(vec1 - vec2) <= abs_tol,
-      if (na_equal) is.na(vec1) & is.na(vec2) else NA
-      # XXX ^ inconsistent with vec_equal treatment: NA vs. NaN comparison
-      # behavior with na_equal = TRUE is different
+      if (na_equal) {
+        na_or_nan1 & na_or_nan2 & (is.nan(vec1) == is.nan(vec2))
+      } else {
+        # Like `==` and `vec_equal`, we consider NaN == {NA, NaN, anything else}
+        # to be NA.
+        NA
+      }
     )
     if (!is.null(dim(vec1))) {
       dim(res) <- dim(vec1)
@@ -139,27 +145,29 @@ vec_approx_equal0 <- function(vec1, vec2, na_equal, abs_tol, inds1 = NULL, inds2
         vec_approx_equal0(vec1[[col_i]], vec2[[col_i]], na_equal, abs_tol, inds1, inds2)
       }))
     }
-  } else if (is_bare_list(vec1)) {
+  } else if (is_bare_list(vec1) && abs_tol != 0) {
     vapply(seq_along(vec1), function(i) {
       entry1 <- vec1[[i]]
       entry2 <- vec2[[i]]
       vec_size(entry1) == vec_size(entry2) &&
-        # This is inconsistent with vec_equal on named vectors; to be
-        # consistently inconsistent, we avoid dispatching to vec_equal for bare
-        # lists even with abs_tol = 0:
-        identical(vec_ptype(entry1), vec_ptype(entry2)) &&
+        # Trying to follow vec_equal: strict on ptypes aside from vec_namedness:
+        identical(
+          vec_set_names(vec_ptype(entry1), NULL),
+          vec_set_names(vec_ptype(entry2), NULL)
+        ) &&
         all(vec_approx_equal0(entry1, entry2, na_equal, abs_tol))
     }, logical(1L))
   } else {
     # XXX No special handling for any other types/situations. Makes sense for
     # unclassed atomic things; custom classes (e.g., distributions) might want
     # recursion / specialization, though. vec_approx_equal0 should probably be
-    # an S3 method; see also `vctrs::vec_proxy_equal` though it's probably not
-    # sufficient (e.g., for keyed data frames such as `epi_df`s that should have
-    # strict & nonstrict columns). Also, abs_tol == 0 --> vec_equal logic should
-    # maybe be either be hoisted to vec_approx_equal or we should manually
-    # recurse on data frames even with abs_tol = 0 when that's faster (might
-    # depend on presence of inds*), after some inconsistencies are ironed out.
+    # an S3 method; see also `vctrs::vec_proxy_{equal,compare}` though they
+    # might not be sufficient (e.g., for keyed data frames such as `epi_df`s
+    # that should have strict & nonstrict columns). Also, abs_tol == 0 -->
+    # vec_equal logic should maybe be either be hoisted to vec_approx_equal or
+    # we should manually recurse on data frames even with abs_tol = 0 when
+    # that's faster (might depend on presence of inds*), after some
+    # inconsistencies are ironed out.
     if (!is.null(inds1)) {
       vec1 <- vec_slice(vec1, inds1)
       vec2 <- vec_slice(vec2, inds2)
diff --git a/tests/testthat/test-archive.R b/tests/testthat/test-archive.R
index 336fe4baa..4383272d2 100644
--- a/tests/testthat/test-archive.R
+++ b/tests/testthat/test-archive.R
@@ -216,17 +216,3 @@ test_that("`epi_archive` rejects dataframes where time_value and version columns
     )
   expect_error(as_epi_archive(tbl3), class = "epiprocess__time_value_version_mismatch")
 })
-
-test_that("is_locf replacement works as expected", {
-  vec <- c(1, 1, 1e-10, 1.1e-10, NA, NA, NaN, NaN)
-  is_repeated <- c(0, 1, 0, 1, 0, 1, 1, 1)
-  expect_equal(
-    c(
-      FALSE,
-      vec_approx_equal(head(vec, -1L), tail(vec, -1L),
-        na_equal = TRUE, abs_tol = .Machine$double.eps^0.5
-      )
-    ),
-    as.logical(is_repeated)
-  )
-})
diff --git a/tests/testthat/test-vec_approx_equal.R b/tests/testthat/test-vec_approx_equal.R
new file mode 100644
index 000000000..709f82bbf
--- /dev/null
+++ b/tests/testthat/test-vec_approx_equal.R
@@ -0,0 +1,57 @@
+test_that("is_locf replacement works as expected", {
+  vec <- c(1, 1, 1e-10, 1.1e-10, NA, NA, NaN, NaN)
+  is_repeated <- c(0, 1, 0, 1, 0, 1, 1, 1)
+  expect_equal(
+    c(
+      FALSE,
+      vec_approx_equal(
+        head(vec, -1L), tail(vec, -1L),
+        na_equal = TRUE, abs_tol = .Machine$double.eps^0.5
+      )
+    ),
+    as.logical(is_repeated)
+  )
+})
+
+test_that("vec_approx_equal is compatible with vec_equal on some edge cases", {
+  # Match (`==` and) `vec_equal` on NaN behavior:
+  tbl <- tibble::tribble(
+    ~x,   ~y,
+    NaN,   5,
+    NaN,  NA,
+    NA,  NaN,
+    NaN, NaN,
+  )
+  expect_identical(
+    vec_approx_equal(tbl$x, tbl$y, na_equal = FALSE, abs_tol = 1e-8),
+    vctrs::vec_equal(tbl$x, tbl$y, na_equal = FALSE)
+  )
+  expect_identical(
+    vec_approx_equal(tbl$x, tbl$y, na_equal = TRUE, abs_tol = 1e-8),
+    vctrs::vec_equal(tbl$x, tbl$y, na_equal = TRUE)
+  )
+
+  # Match `vec_equal` behavior on namedness, including within elements:
+  unnamed_list <- list(5)
+  named_list <- list(a = 5)
+  expect_identical(
+    vec_approx_equal(unnamed_list, named_list, na_equal = TRUE, abs_tol = 1e-8),
+    vec_equal(unnamed_list, named_list, na_equal = TRUE)
+  )
+  expect_identical(
+    vec_approx_equal(list(unnamed_list), list(named_list), na_equal = TRUE, abs_tol = 1e-8),
+    vec_equal(list(unnamed_list), list(named_list), na_equal = TRUE)
+  )
+
+  # Match `vec_equal` behavior on (p)types, including within elements:
+  dbl <- 5.0
+  int <- 5L
+  expect_identical(
+    vec_approx_equal(dbl, int, na_equal = TRUE, abs_tol = 1e-8),
+    vec_equal(dbl, int, na_equal = TRUE)
+  )
+  expect_identical(
+    vec_approx_equal(list(dbl), list(int), na_equal = TRUE, abs_tol = 1e-8),
+    vec_equal(list(dbl), list(int), na_equal = TRUE)
+  )
+})

From 5c8abd3b8b7c204206a6b5a418bf74004224779c Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Tue, 25 Mar 2025 16:33:02 -0700
Subject: [PATCH 049/107] fix(vec_approx_equal): don't assume inds1 and inds2
 non-NULL together

---
 R/patch.R | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/R/patch.R b/R/patch.R
index fe9d1b6db..843bc1c10 100644
--- a/R/patch.R
+++ b/R/patch.R
@@ -170,6 +170,8 @@ vec_approx_equal0 <- function(vec1, vec2, na_equal, abs_tol, inds1 = NULL, inds2
     # inconsistencies are ironed out.
     if (!is.null(inds1)) {
       vec1 <- vec_slice(vec1, inds1)
+    }
+    if (!is.null(inds2)) {
       vec2 <- vec_slice(vec2, inds2)
     }
     res <- vec_equal(vec1, vec2, na_equal = na_equal)

From 71104999edb8887eed5f98182d9d625e8c82bd45 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Tue, 25 Mar 2025 17:40:03 -0700
Subject: [PATCH 050/107] perf(vec_approx_equal): avoid vec_cast_common, ptype2
 work

---
 R/patch.R | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/R/patch.R b/R/patch.R
index 843bc1c10..189ae9615 100644
--- a/R/patch.R
+++ b/R/patch.R
@@ -102,7 +102,12 @@ vec_approx_equal <- function(vec1, vec2, na_equal, .ptype = NULL, ..., abs_tol,
   } else {
     inds2 <- vec_recycle(inds2, common_size)
   }
-  vecs <- vec_cast_common(vec1, vec2, .to = .ptype)
+  if (!identical(vec_ptype(vec1), vec_ptype(vec2)) || !is.null(.ptype)) {
+    # perf: this is slow, so try to avoid it if it's not needed
+    vecs <- vec_cast_common(vec1, vec2, .to = .ptype)
+  } else {
+    vecs <- list(vec1, vec2)
+  }
   vec_approx_equal0(vecs[[1]], vecs[[2]], na_equal, abs_tol, inds1, inds2)
 }
 
@@ -174,7 +179,9 @@ vec_approx_equal0 <- function(vec1, vec2, na_equal, abs_tol, inds1 = NULL, inds2
     if (!is.null(inds2)) {
       vec2 <- vec_slice(vec2, inds2)
     }
-    res <- vec_equal(vec1, vec2, na_equal = na_equal)
+    # perf: vec1 and vec2 have already been cast to a common ptype; we can't
+    # disable casts, but can say to cast (again...) to that ptype
+    res <- vec_equal(vec1, vec2, na_equal = na_equal, vec_ptype(vec1))
     return(res)
   }
 }

From 90c9da7094e85b49f11235978c2e6f8a9a2a75aa Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Tue, 25 Mar 2025 17:42:04 -0700
Subject: [PATCH 051/107] docs(vec_approx_equal): convert comment to issue

---
 R/patch.R | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/R/patch.R b/R/patch.R
index 189ae9615..7167bc8c0 100644
--- a/R/patch.R
+++ b/R/patch.R
@@ -163,16 +163,9 @@ vec_approx_equal0 <- function(vec1, vec2, na_equal, abs_tol, inds1 = NULL, inds2
         all(vec_approx_equal0(entry1, entry2, na_equal, abs_tol))
     }, logical(1L))
   } else {
-    # XXX No special handling for any other types/situations. Makes sense for
-    # unclassed atomic things; custom classes (e.g., distributions) might want
-    # recursion / specialization, though. vec_approx_equal0 should probably be
-    # an S3 method; see also `vctrs::vec_proxy_{equal,compare}` though they
-    # might not be sufficient (e.g., for keyed data frames such as `epi_df`s
-    # that should have strict & nonstrict columns). Also, abs_tol == 0 -->
-    # vec_equal logic should maybe be either be hoisted to vec_approx_equal or
-    # we should manually recurse on data frames even with abs_tol = 0 when
-    # that's faster (might depend on presence of inds*), after some
-    # inconsistencies are ironed out.
+    # No special handling for any other types/situations. We may want to allow
+    # S3 extension of this method or of a new appropriate vec_proxy_* variant.
+    # See Issue #640.
     if (!is.null(inds1)) {
       vec1 <- vec_slice(vec1, inds1)
     }

From 2fcb00c94508aeb0dad1029e0dce5e4b80e34aa0 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Tue, 25 Mar 2025 18:01:08 -0700
Subject: [PATCH 052/107] docs(vec_approx_equal): clean up some
 outdated/misleading comments

---
 R/patch.R               | 7 ++++---
 man/vec_approx_equal.Rd | 3 ++-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/R/patch.R b/R/patch.R
index 7167bc8c0..28176366a 100644
--- a/R/patch.R
+++ b/R/patch.R
@@ -20,7 +20,8 @@
 #' @param inds1,inds2 optional (row) indices into vec1 and vec2 compatible with
 #'   [`vctrs::vec_slice()`]; output should be consistent with `vec_slice`-ing to
 #'   these indices beforehand, but can give faster computation if `vec1` and
-#'   `vec2` are data frames.
+#'   `vec2` are data frames. Currently, any speedup is only by making sure that
+#'   `vec_slice` is used rather than `[` for data frames.
 #'
 #' @return logical vector, with length matching the result of recycling `vec1`
 #'   (at `inds1` if provided) and `vec2` (at `inds2` if provided); entries
@@ -116,12 +117,12 @@ vec_approx_equal <- function(vec1, vec2, na_equal, .ptype = NULL, ..., abs_tol,
 #' @keywords internal
 vec_approx_equal0 <- function(vec1, vec2, na_equal, abs_tol, inds1 = NULL, inds2 = NULL) {
   if (is_bare_numeric(vec1) && abs_tol != 0) {
-    # perf: since we're working with bare numerics and logicals: we can use `[`
-    # and `fifelse`. Matching vec_equal, we ignore names and other attributes.
+    # Matching vec_equal, we ignore names and other attributes.
     if (!is.null(inds1)) vec1 <- vec_slice(vec1, inds1)
     if (!is.null(inds2)) vec2 <- vec_slice(vec2, inds2)
     na_or_nan1 <- is.na(vec1)
     na_or_nan2 <- is.na(vec2)
+    # Since above are bare logical vectors, we can use `fifelse`
     res <- fifelse(
       !na_or_nan1 & !na_or_nan2,
       abs(vec1 - vec2) <= abs_tol,
diff --git a/man/vec_approx_equal.Rd b/man/vec_approx_equal.Rd
index ff403a1c6..0b80d9910 100644
--- a/man/vec_approx_equal.Rd
+++ b/man/vec_approx_equal.Rd
@@ -37,7 +37,8 @@ by name)}
 \item{inds1, inds2}{optional (row) indices into vec1 and vec2 compatible with
 \code{\link[vctrs:vec_slice]{vctrs::vec_slice()}}; output should be consistent with \code{vec_slice}-ing to
 these indices beforehand, but can give faster computation if \code{vec1} and
-\code{vec2} are data frames.}
+\code{vec2} are data frames. Currently, any speedup is only by making sure that
+\code{vec_slice} is used rather than \code{[} for data frames.}
 }
 \value{
 logical vector, with length matching the result of recycling \code{vec1}

From 1b10cbe2113820e13dc45937b440aba402a95c70 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Wed, 26 Mar 2025 11:33:57 -0700
Subject: [PATCH 053/107] Update is_locf replacement test for new NA vs. NaN
 results

---
 tests/testthat/test-vec_approx_equal.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/testthat/test-vec_approx_equal.R b/tests/testthat/test-vec_approx_equal.R
index 709f82bbf..18235224c 100644
--- a/tests/testthat/test-vec_approx_equal.R
+++ b/tests/testthat/test-vec_approx_equal.R
@@ -1,6 +1,6 @@
 test_that("is_locf replacement works as expected", {
   vec <- c(1, 1, 1e-10, 1.1e-10, NA, NA, NaN, NaN)
-  is_repeated <- c(0, 1, 0, 1, 0, 1, 1, 1)
+  is_repeated <- c(0, 1, 0, 1, 0, 1, 0, 1)
   expect_equal(
     c(
       FALSE,

From 118d0edca71a31ba50d0b55d3829cc712b5140d9 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Wed, 26 Mar 2025 11:34:16 -0700
Subject: [PATCH 054/107] docs(vec_approx_equal): update wrt compatibility
 fixes

---
 R/patch.R               | 4 +---
 man/vec_approx_equal.Rd | 4 +---
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/R/patch.R b/R/patch.R
index 28176366a..51ae2486d 100644
--- a/R/patch.R
+++ b/R/patch.R
@@ -1,8 +1,6 @@
 #' Test two vctrs vectors for equality with some tolerance in some cases
 #'
-#' Similar to [`vctrs::vec_equal`]. Behavior may differ from `vec_equal` with
-#' non-`NA` `NaN`s involved, or for bare lists that contain named vectors, and
-#' the precise behavior in these cases may change and should not be relied upon.
+#' Generalizes [`vctrs::vec_equal`].
 #'
 #' @param vec1,vec2 vctrs vectors (includes data frames). Take care when using
 #'   on named vectors or "keyed" data frames; [`vec_names()`] are largely
diff --git a/man/vec_approx_equal.Rd b/man/vec_approx_equal.Rd
index 0b80d9910..4bab9f77f 100644
--- a/man/vec_approx_equal.Rd
+++ b/man/vec_approx_equal.Rd
@@ -46,9 +46,7 @@ logical vector, with length matching the result of recycling \code{vec1}
 should all be \code{TRUE} or \code{FALSE} if \code{na_equal = TRUE}.
 }
 \description{
-Similar to \code{\link[vctrs:vec_equal]{vctrs::vec_equal}}. Behavior may differ from \code{vec_equal} with
-non-\code{NA} \code{NaN}s involved, or for bare lists that contain named vectors, and
-the precise behavior in these cases may change and should not be relied upon.
+Generalizes \code{\link[vctrs:vec_equal]{vctrs::vec_equal}}.
 }
 \examples{
 

From 33414598c13412babf077d5f622722c3d432ec20 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Wed, 26 Mar 2025 13:48:01 -0700
Subject: [PATCH 055/107] refactor(epi_slide_opt.epi_archive): address TODOs

- Column name clobbering is already checked for / avoided in main function code.
- Move input nesting and output rbinding inside of
  epi_slide_opt_archive_one_epikey.
---
 R/epi_slide_opt_archive.R                   | 48 ++++++++++-----------
 man/epi_slide_opt_archive_one_epikey.Rd     | 21 +++++----
 tests/testthat/test-epi_slide_opt_archive.R | 27 +++++-------
 3 files changed, 47 insertions(+), 49 deletions(-)

diff --git a/R/epi_slide_opt_archive.R b/R/epi_slide_opt_archive.R
index 070dc431c..f9cc00f02 100644
--- a/R/epi_slide_opt_archive.R
+++ b/R/epi_slide_opt_archive.R
@@ -1,8 +1,9 @@
 #' Core operation of `epi_slide_opt.epi_archive` for a single epikey's history
 #'
-#' @param updates tibble with two columns: `version` and `subtbl`; `subtbl` is a
-#'   list of tibbles, each with a `time_value` column and measurement columns.
-#'   The epikey should not appear.
+#' @param grp_updates tibble with a `version` column and measurement columns for
+#'   a single epikey, without the epikey labeling columns (e.g., from
+#'   `group_modify`). Interpretation is analogous to an `epi_archive` `DT`, but
+#'   a specific row order is not required.
 #' @param in_colnames chr; names of columns to which to apply `f_dots_baked`
 #' @param f_dots_baked supported sliding function from `{data.table}` or
 #'   `{slider}`, potentially with some arguments baked in with
@@ -17,13 +18,15 @@
 #' @param time_type as in `new_epi_archive`
 #' @param out_colnames chr, same length as `in_colnames`; column names to use
 #'   for results
-#' @return list of tibbles with same names as `subtbl`s plus: `c(out_colnames,
-#'   "version")`; (compactified) diff data to put into an `epi_archive`
+#' @return tibble with a `version` column, pre-existing measurement columns, and
+#'   new measurement columns; (compactified) diff data to put into an
+#'   `epi_archive`. May not match column ordering; may not ensure any row
+#'   ordering.
 #'
 #' @examples
 #'
 #' library(dplyr)
-#' updates <- bind_rows(
+#' grp_updates <- bind_rows(
 #'   tibble(version = 10, time_value = 1:20, value = 1:20),
 #'   tibble(version = 12, time_value = 4:5, value = 5:4),
 #'   tibble(version = 13, time_value = 8, value = 9),
@@ -31,29 +34,30 @@
 #'   tibble(version = 15, time_value = -10, value = -10),
 #'   tibble(version = 16, time_value = 50, value = 50)
 #' ) %>%
-#'   mutate(across(c(version, time_value), ~ as.Date("2020-01-01") - 1 + .x)) %>%
-#'   tidyr::nest(.by = version, .key = "subtbl")
+#'   mutate(across(c(version, time_value), ~ as.Date("2020-01-01") - 1 + .x))
 #'
 #' f <- purrr::partial(data.table::frollmean, algo = "exact")
 #'
-#' updates %>%
+#' grp_updates %>%
 #'   epiprocess:::epi_slide_opt_archive_one_epikey(
 #'     "value", f, "data.table", 2L, 0L, "day", "slide_value"
 #'   )
 #'
 #' @keywords internal
 epi_slide_opt_archive_one_epikey <- function(
-    updates,
+    grp_updates,
     in_colnames,
     f_dots_baked, f_from_package, before, after, time_type,
     out_colnames) {
-  # TODO check for col name clobbering
+  grp_updates_by_version <- grp_updates %>%
+    nest(.by = version, .key = "subtbl") %>%
+    arrange(version)
   unit_step <- unit_time_delta(time_type)
   prev_inp_snapshot <- NULL
   prev_out_snapshot <- NULL
-  result <- map(seq_len(nrow(updates)), function(update_i) {
-    version <- updates$version[[update_i]]
-    inp_update <- updates$subtbl[[update_i]]
+  result <- map(seq_len(nrow(grp_updates_by_version)), function(version_i) {
+    version <- grp_updates_by_version$version[[version_i]]
+    inp_update <- grp_updates_by_version$subtbl[[version_i]]
     inp_snapshot <- tbl_patch(prev_inp_snapshot, inp_update, "time_value")
     if (before == Inf) {
       if (after != 0) {
@@ -124,6 +128,7 @@ epi_slide_opt_archive_one_epikey <- function(
     out_diff$version <- version
     out_diff
   })
+  result <- list_rbind(result)
   result
 }
 
@@ -188,26 +193,21 @@ epi_slide_opt.epi_archive <-
     updates_grouped <- .x$DT %>%
       as.data.frame() %>%
       as_tibble(.name_repair = "minimal") %>%
-      # 0 rows input -> 0 rows output, so we can just say drop = TRUE:
-      grouped_df(epikey_names, TRUE)
+      # 0 rows input -> 0 rows output for any drop = FALSE groups with 0 rows, so
+      # we can just say drop = TRUE:
+      grouped_df(epikey_names, drop = TRUE)
     if (use_progress) progress_bar_id <- cli::cli_progress_bar(.progress, total = n_groups(updates_grouped))
     result <- updates_grouped %>%
       group_modify(function(group_values, group_key) {
-        group_updates <- group_values %>%
-          nest(.by = version, .key = "subtbl") %>%
-          arrange(version)
-        # TODO move nesting inside the helper?
         res <- epi_slide_opt_archive_one_epikey(
-          group_updates,
+          group_values,
           names_info$input_col_names,
           .f_dots_baked, .f_info$from_package, window_args$before, window_args$after, time_type,
           names_info$output_col_names
-        ) %>%
-          list_rbind()
+        )
         if (use_progress) cli::cli_progress_update(id = progress_bar_id)
         res
       }) %>%
-      ungroup() %>%
       as.data.frame() %>% # data.table#6859
       as.data.table(key = key(.x$DT)) %>%
       new_epi_archive(
diff --git a/man/epi_slide_opt_archive_one_epikey.Rd b/man/epi_slide_opt_archive_one_epikey.Rd
index 78eec4fdf..79d635c88 100644
--- a/man/epi_slide_opt_archive_one_epikey.Rd
+++ b/man/epi_slide_opt_archive_one_epikey.Rd
@@ -5,7 +5,7 @@
 \title{Core operation of \code{epi_slide_opt.epi_archive} for a single epikey's history}
 \usage{
 epi_slide_opt_archive_one_epikey(
-  updates,
+  grp_updates,
   in_colnames,
   f_dots_baked,
   f_from_package,
@@ -16,9 +16,10 @@ epi_slide_opt_archive_one_epikey(
 )
 }
 \arguments{
-\item{updates}{tibble with two columns: \code{version} and \code{subtbl}; \code{subtbl} is a
-list of tibbles, each with a \code{time_value} column and measurement columns.
-The epikey should not appear.}
+\item{grp_updates}{tibble with a \code{version} column and measurement columns for
+a single epikey, without the epikey labeling columns (e.g., from
+\code{group_modify}). Interpretation is analogous to an \code{epi_archive} \code{DT}, but
+a specific row order is not required.}
 
 \item{in_colnames}{chr; names of columns to which to apply \code{f_dots_baked}}
 
@@ -42,7 +43,10 @@ to include in the sliding window computation}
 for results}
 }
 \value{
-list of tibbles with same names as \code{subtbl}s plus: \code{c(out_colnames, "version")}; (compactified) diff data to put into an \code{epi_archive}
+tibble with a \code{version} column, pre-existing measurement columns, and
+new measurement columns; (compactified) diff data to put into an
+\code{epi_archive}. May not match column ordering; may not ensure any row
+ordering.
 }
 \description{
 Core operation of \code{epi_slide_opt.epi_archive} for a single epikey's history
@@ -50,7 +54,7 @@ Core operation of \code{epi_slide_opt.epi_archive} for a single epikey's history
 \examples{
 
 library(dplyr)
-updates <- bind_rows(
+grp_updates <- bind_rows(
   tibble(version = 10, time_value = 1:20, value = 1:20),
   tibble(version = 12, time_value = 4:5, value = 5:4),
   tibble(version = 13, time_value = 8, value = 9),
@@ -58,12 +62,11 @@ updates <- bind_rows(
   tibble(version = 15, time_value = -10, value = -10),
   tibble(version = 16, time_value = 50, value = 50)
 ) \%>\%
-  mutate(across(c(version, time_value), ~ as.Date("2020-01-01") - 1 + .x)) \%>\%
-  tidyr::nest(.by = version, .key = "subtbl")
+  mutate(across(c(version, time_value), ~ as.Date("2020-01-01") - 1 + .x))
 
 f <- purrr::partial(data.table::frollmean, algo = "exact")
 
-updates \%>\%
+grp_updates \%>\%
   epiprocess:::epi_slide_opt_archive_one_epikey(
     "value", f, "data.table", 2L, 0L, "day", "slide_value"
   )
diff --git a/tests/testthat/test-epi_slide_opt_archive.R b/tests/testthat/test-epi_slide_opt_archive.R
index 088452da5..768107a4d 100644
--- a/tests/testthat/test-epi_slide_opt_archive.R
+++ b/tests/testthat/test-epi_slide_opt_archive.R
@@ -3,7 +3,7 @@ library(dplyr)
 test_that("epi_slide_opt_archive_one_epikey works as expected", {
   start_date <- as.Date("2020-01-01")
 
-  updates <- bind_rows(
+  grp_updates <- bind_rows(
     tibble(version = 10, time_value = 0:20, value = 0:20),
     tibble(version = 12, time_value = 4:5, value = 5:4),
     tibble(version = 13, time_value = 8, value = 9),
@@ -11,16 +11,13 @@ test_that("epi_slide_opt_archive_one_epikey works as expected", {
     tibble(version = 15, time_value = -10, value = -10),
     tibble(version = 16, time_value = 50, value = 50)
   ) %>%
-    mutate(across(c(version, time_value), ~ start_date - 1 + .x)) %>%
-    tidyr::nest(.by = version, .key = "subtbl")
+    mutate(across(c(version, time_value), ~ start_date - 1 + .x))
 
   expected <- list(
-    vctrs::vec_cbind(
-      tibble(version = 10),
-      updates$subtbl[[1L]] %>%
-        mutate(time_value = as.numeric(time_value - start_date) + 1) %>%
-        mutate(slide_value = frollmean(value, 3, algo = "exact"))
-    ),
+    grp_updates %>%
+      slice_min(version) %>%
+      mutate(across(c(version, time_value), ~ as.numeric(.x - start_date) + 1)) %>%
+      mutate(slide_value = frollmean(value, 3, algo = "exact")),
     tibble(
       version = 12,
       time_value = c(4, 5, 7), # time 6 unchanged, compactified away
@@ -50,17 +47,15 @@ test_that("epi_slide_opt_archive_one_epikey works as expected", {
     lapply(function(x) {
       x %>%
         mutate(across(c(version, time_value), ~ start_date - 1 + .x))
-    })
+    }) %>%
+    list_rbind()
 
   f <- purrr::partial(data.table::frollmean, algo = "exact")
 
-  result <- updates %>%
+  result <- grp_updates %>%
     epiprocess:::epi_slide_opt_archive_one_epikey("value", f, "data.table", 2L, 0L, "day", "slide_value") %>%
-    lapply(function(x) {
-      x %>%
-        arrange(time_value) %>%
-        select(version, time_value, everything())
-    })
+    arrange(version, time_value) %>%
+    select(version, time_value, everything())
 
   expect_equal(result, expected)
 })

From 06b4286e5ce44124338bb953437d43efe967ddf9 Mon Sep 17 00:00:00 2001
From: brookslogan <lcbrooks+github@andrew.cmu.edu>
Date: Wed, 26 Mar 2025 14:16:54 -0700
Subject: [PATCH 056/107] Update R/epi_slide_opt_edf.R

Co-authored-by: nmdefries <42820733+nmdefries@users.noreply.github.com>
---
 R/epi_slide_opt_edf.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/epi_slide_opt_edf.R b/R/epi_slide_opt_edf.R
index b6be49a1a..24de63f2c 100644
--- a/R/epi_slide_opt_edf.R
+++ b/R/epi_slide_opt_edf.R
@@ -42,8 +42,8 @@ upstream_slide_f_info <- function(.f) {
     # `f` is from somewhere else and not supported
     cli_abort(
       c(
-        "problem with {rlang::expr_label(rlang::caller_arg(f))}",
-        "i" = "`f` must be one of `data.table`'s rolling functions (`frollmean`,
+        "problem with {rlang::expr_label(rlang::caller_arg(.f))}",
+        "i" = "`.f` must be one of `data.table`'s rolling functions (`frollmean`,
               `frollsum`, `frollapply`. See `?data.table::roll`) or one of
               `slider`'s specialized sliding functions (`slide_mean`, `slide_sum`,
               etc. See `?slider::\`summary-slide\`` for more options)."

From 82cfbddb2965cfa89adf2fa5dedc55b23196b1e5 Mon Sep 17 00:00:00 2001
From: brookslogan <lcbrooks+github@andrew.cmu.edu>
Date: Wed, 26 Mar 2025 14:17:14 -0700
Subject: [PATCH 057/107] Update R/epi_slide_opt_edf.R

Co-authored-by: nmdefries <42820733+nmdefries@users.noreply.github.com>
---
 R/epi_slide_opt_edf.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/epi_slide_opt_edf.R b/R/epi_slide_opt_edf.R
index 24de63f2c..0444b857e 100644
--- a/R/epi_slide_opt_edf.R
+++ b/R/epi_slide_opt_edf.R
@@ -83,7 +83,7 @@ upstream_slide_f_info <- function(.f) {
 #' @keywords internal
 across_ish_names_info <- function(.x, time_type, col_names_quo, .f_namer,
                                   .window_size, .align, .prefix, .suffix, .new_col_names) {
-  # The position of a given column can be differ between input `.x` and
+  # The position of a given column can differ between input `.x` and
   # `.data_group` since the grouping step by default drops grouping columns.
   # To avoid rerunning `eval_select` for every `.data_group`, convert
   # positions of user-provided `col_names` into string column names. We avoid

From c2216a21e3dd1cb09fc03adc3bbb71c873bc8661 Mon Sep 17 00:00:00 2001
From: brookslogan <lcbrooks+github@andrew.cmu.edu>
Date: Wed, 26 Mar 2025 14:17:26 -0700
Subject: [PATCH 058/107] Update R/epi_slide_opt_edf.R

Co-authored-by: nmdefries <42820733+nmdefries@users.noreply.github.com>
---
 R/epi_slide_opt_edf.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/epi_slide_opt_edf.R b/R/epi_slide_opt_edf.R
index 0444b857e..5d28774f8 100644
--- a/R/epi_slide_opt_edf.R
+++ b/R/epi_slide_opt_edf.R
@@ -168,7 +168,7 @@ across_ish_names_info <- function(.x, time_type, col_names_quo, .f_namer,
 #'
 #' * On `epi_df`s, it will take care of looping over `geo_value`s, temporarily
 #'   filling in time gaps with `NA`s and other work needed to ensure there are
-#'   exactly n consecutive time steps per computation, and has some other
+#'   exactly `n` consecutive time steps per computation, and has some other
 #'   convenience features. See `vignette("epi_df")` for more examples.
 #'
 #' * On `epi_archive`s, it will calculate the version history for these slide

From 3296b2d5f40434f87b0536dd1d3afbbc37efb032 Mon Sep 17 00:00:00 2001
From: brookslogan <lcbrooks+github@andrew.cmu.edu>
Date: Wed, 26 Mar 2025 14:17:45 -0700
Subject: [PATCH 059/107] Update R/epi_slide_opt_edf.R

Co-authored-by: nmdefries <42820733+nmdefries@users.noreply.github.com>
---
 R/epi_slide_opt_edf.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/epi_slide_opt_edf.R b/R/epi_slide_opt_edf.R
index 5d28774f8..508d7e6b4 100644
--- a/R/epi_slide_opt_edf.R
+++ b/R/epi_slide_opt_edf.R
@@ -163,7 +163,7 @@ across_ish_names_info <- function(.x, time_type, col_names_quo, .f_namer,
 #' @description
 #'
 #' `epi_slide_opt` calculates n-time-step rolling means&sums,
-#' cumulative/"running" means&sums, or other operations supported by
+#' cumulative/"running" means&sums, and other operations supported by
 #' [`data.table::froll`] or [`slider::summary-slide`] functions.
 #'
 #' * On `epi_df`s, it will take care of looping over `geo_value`s, temporarily

From f14c26da978fd55810ae628c0a2e42544dc92a52 Mon Sep 17 00:00:00 2001
From: brookslogan <brookslogan@users.noreply.github.com>
Date: Wed, 26 Mar 2025 21:19:21 +0000
Subject: [PATCH 060/107] docs: document (GHA)

---
 man/epi_slide_opt.Rd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/man/epi_slide_opt.Rd b/man/epi_slide_opt.Rd
index 1d5999578..bd213828f 100644
--- a/man/epi_slide_opt.Rd
+++ b/man/epi_slide_opt.Rd
@@ -140,7 +140,7 @@ cumulative/"running" means&sums, or other operations supported by
 \itemize{
 \item On \code{epi_df}s, it will take care of looping over \code{geo_value}s, temporarily
 filling in time gaps with \code{NA}s and other work needed to ensure there are
-exactly n consecutive time steps per computation, and has some other
+exactly \code{n} consecutive time steps per computation, and has some other
 convenience features. See \code{vignette("epi_df")} for more examples.
 \item On \code{epi_archive}s, it will calculate the version history for these slide
 computations and combine it with the version history for the rest of the

From 87a01d004cd3acd6e747d1ca5d124df8e8962016 Mon Sep 17 00:00:00 2001
From: brookslogan <brookslogan@users.noreply.github.com>
Date: Wed, 26 Mar 2025 21:19:47 +0000
Subject: [PATCH 061/107] docs: document (GHA)

---
 man/epi_slide_opt.Rd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/man/epi_slide_opt.Rd b/man/epi_slide_opt.Rd
index bd213828f..8c7d5ad49 100644
--- a/man/epi_slide_opt.Rd
+++ b/man/epi_slide_opt.Rd
@@ -135,7 +135,7 @@ as \code{.x} if \code{.x} was grouped.
 }
 \description{
 \code{epi_slide_opt} calculates n-time-step rolling means&sums,
-cumulative/"running" means&sums, or other operations supported by
+cumulative/"running" means&sums, and other operations supported by
 \code{\link[data.table:froll]{data.table::froll}} or \code{\link[slider:summary-slide]{slider::summary-slide}} functions.
 \itemize{
 \item On \code{epi_df}s, it will take care of looping over \code{geo_value}s, temporarily

From 4bc5b234d438d62f5b61faf2d3336b2c83c077dc Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Wed, 26 Mar 2025 14:35:00 -0700
Subject: [PATCH 062/107] refactor(epi_slide_opt_archive): add classes to
 errors

---
 R/epi_slide_opt_archive.R | 12 ++++++++----
 R/patch.R                 | 25 ++++++++++++++++---------
 2 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/R/epi_slide_opt_archive.R b/R/epi_slide_opt_archive.R
index f9cc00f02..eba73e196 100644
--- a/R/epi_slide_opt_archive.R
+++ b/R/epi_slide_opt_archive.R
@@ -61,7 +61,8 @@ epi_slide_opt_archive_one_epikey <- function(
     inp_snapshot <- tbl_patch(prev_inp_snapshot, inp_update, "time_value")
     if (before == Inf) {
       if (after != 0) {
-        cli_abort('.window_size = Inf is only supported with .align = "right"')
+        cli_abort('.window_size = Inf is only supported with .align = "right"',
+                  class = "epiprocess__epi_slide_opt_archive__inf_window_invalid_align")
       }
       # We need to use the entire input snapshot range, filling in time gaps. We
       # shouldn't pad the ends.
@@ -105,7 +106,8 @@ epi_slide_opt_archive_one_epikey <- function(
         slide[[out_colnames[[col_i]]]] <- f_dots_baked(slide[[in_colnames[[col_i]]]], before = before, after = after)
       }
     } else {
-      cli_abort("epiprocess internal error: `f_from_package` was {format_chr_deparse(f_from_package)}")
+      cli_abort("epiprocess internal error: `f_from_package` was {format_chr_deparse(f_from_package)}",
+                class = "epiprocess__epi_slide_opt_archive__f_from_package_invalid")
     }
     rows_should_keep <-
       if (before == Inf) {
@@ -176,10 +178,12 @@ epi_slide_opt.epi_archive <-
     )
     window_args <- get_before_after_from_window(.window_size, .align, time_type)
     if (!is.null(.ref_time_values)) {
-      cli_abort("epi_slide.epi_archive does not support the `.ref_time_values` argument")
+      cli_abort("epi_slide.epi_archive does not support the `.ref_time_values` argument",
+                class = "epiprocess__epi_slide_opt_archive__ref_time_values_unsupported")
     }
     if (!identical(.all_rows, FALSE)) {
-      cli_abort("epi_slide.epi_archive does not support the `.all_rows` argument")
+      cli_abort("epi_slide.epi_archive does not support the `.all_rows` argument",
+                class = "epiprocess__epi_slide_opt_archive__all_rows_unsupported")
     }
     assert(
       checkmate::check_logical(.progress, any.missing = FALSE, len = 1L, names = "unnamed"),
diff --git a/R/patch.R b/R/patch.R
index 51ae2486d..ed09541c9 100644
--- a/R/patch.R
+++ b/R/patch.R
@@ -208,16 +208,19 @@ tbl_diff2 <- function(earlier_snapshot, later_tbl,
   # Most input validation + handle NULL earlier_snapshot. This is a small function so
   # use faster validation variants:
   if (!is_tibble(later_tbl)) {
-    cli_abort("`later_tbl` must be a tibble")
+    cli_abort("`later_tbl` must be a tibble",
+              class = "epiprocess__tbl_diff2__later_tbl_invalid")
   }
   if (is.null(earlier_snapshot)) {
     return(later_tbl)
   }
   if (!is_tibble(earlier_snapshot)) {
-    cli_abort("`earlier_snapshot` must be a tibble or `NULL`")
+    cli_abort("`earlier_snapshot` must be a tibble or `NULL`",
+              class = "epiprocess__tbl_diff2__earlier_tbl_class_invalid")
   }
   if (!is.character(ukey_names) || !all(ukey_names %in% names(earlier_snapshot))) {
-    cli_abort("`ukey_names` must be a subset of column names")
+    cli_abort("`ukey_names` must be a subset of column names",
+              class = "epiprocess__tbl_diff2__ukey_names_class_invalid")
   }
   later_format <- arg_match0(later_format, c("snapshot", "update"))
   if (!(is.vector(compactify_abs_tol, mode = "numeric") &&
@@ -226,7 +229,8 @@ tbl_diff2 <- function(earlier_snapshot, later_tbl,
     # Give a specific message:
     assert_numeric(compactify_abs_tol, lower = 0, any.missing = FALSE, len = 1L)
     # Fallback e.g. for invalid classes not caught by assert_numeric:
-    cli_abort("`compactify_abs_tol` must be a length-1 double/integer >= 0")
+    cli_abort("`compactify_abs_tol` must be a length-1 double/integer >= 0",
+              class = "epiprocess__tbl_diff2__compactify_abs_tol_invalid")
   }
 
   # Extract metadata:
@@ -241,7 +245,7 @@ tbl_diff2 <- function(earlier_snapshot, later_tbl,
                  names and ordering.",
       "*" = "`earlier_snapshot` colnames: {format_chr_deparse(tbl_names)}",
       "*" = "`later_tbl` colnames: {format_chr_deparse(names(later_tbl))}"
-    ))
+    ), class = "epiprocess__tbl_diff2__tbl_names_differ")
   }
 
   combined_tbl <- vec_rbind(earlier_snapshot, later_tbl)
@@ -330,23 +334,26 @@ tbl_patch <- function(snapshot, update, ukey_names) {
   # Most input validation. This is a small function so use faster validation
   # variants:
   if (!is_tibble(update)) {
-    cli_abort("`update` must be a tibble")
+    cli_abort("`update` must be a tibble",
+              class = "epiprocess__tbl_patch__update_class_invalid")
   }
   if (is.null(snapshot)) {
     return(update)
   }
   if (!is_tibble(snapshot)) {
-    cli_abort("`snapshot` must be a tibble")
+    cli_abort("`snapshot` must be a tibble",
+              class = "epiprocess__tbl_patch__snapshot_class_invalid")
   }
   if (!is.character(ukey_names) || !all(ukey_names %in% names(snapshot))) {
-    cli_abort("`ukey_names` must be a subset of column names")
+    cli_abort("`ukey_names` must be a subset of column names",
+              class = "epiprocess__tbl_patch__ukey_names_invalid")
   }
   if (!identical(names(snapshot), names(update))) {
     cli_abort(c("`snapshot` and `update` should have identical column
                  names and ordering.",
       "*" = "`snapshot` colnames: {format_chr_deparse(tbl_names)}",
       "*" = "`update` colnames: {format_chr_deparse(names(update))}"
-    ))
+      ), class = "epiprocess__tbl_patch__tbl_names_invalid")
   }
 
   result_tbl <- vec_rbind(update, snapshot)

From 711419a9f4c3f9499e7c90117bd2f258e89599cc Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Wed, 26 Mar 2025 14:35:24 -0700
Subject: [PATCH 063/107] Skip malfunctioning test being updated separately

---
 tests/testthat/test-compactify.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/testthat/test-compactify.R b/tests/testthat/test-compactify.R
index 229af8453..e314b299f 100644
--- a/tests/testthat/test-compactify.R
+++ b/tests/testthat/test-compactify.R
@@ -124,7 +124,7 @@ quantile_pred_once <- function(estimates_vec, levels_vec) {
   hardhat::quantile_pred(t(as.matrix(estimates_vec)), levels_vec)
 }
 test_that("compactify works on distributions", {
-  skip("Until #611 is merged or hardhat/epipredict is patched")
+  skip("See #631.")
   forecasts <- tibble(
     ahead = 2L,
     geo_value = "ak",

From 818f25a7dbb165eb112bb7b29160db4209e11cd2 Mon Sep 17 00:00:00 2001
From: brookslogan <brookslogan@users.noreply.github.com>
Date: Wed, 26 Mar 2025 21:39:21 +0000
Subject: [PATCH 064/107] style: styler (GHA)

---
 R/epi_slide_opt_archive.R | 12 ++++++++----
 R/patch.R                 | 23 +++++++++++++++--------
 2 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/R/epi_slide_opt_archive.R b/R/epi_slide_opt_archive.R
index eba73e196..f98b80fbb 100644
--- a/R/epi_slide_opt_archive.R
+++ b/R/epi_slide_opt_archive.R
@@ -62,7 +62,8 @@ epi_slide_opt_archive_one_epikey <- function(
     if (before == Inf) {
       if (after != 0) {
         cli_abort('.window_size = Inf is only supported with .align = "right"',
-                  class = "epiprocess__epi_slide_opt_archive__inf_window_invalid_align")
+          class = "epiprocess__epi_slide_opt_archive__inf_window_invalid_align"
+        )
       }
       # We need to use the entire input snapshot range, filling in time gaps. We
       # shouldn't pad the ends.
@@ -107,7 +108,8 @@ epi_slide_opt_archive_one_epikey <- function(
       }
     } else {
       cli_abort("epiprocess internal error: `f_from_package` was {format_chr_deparse(f_from_package)}",
-                class = "epiprocess__epi_slide_opt_archive__f_from_package_invalid")
+        class = "epiprocess__epi_slide_opt_archive__f_from_package_invalid"
+      )
     }
     rows_should_keep <-
       if (before == Inf) {
@@ -179,11 +181,13 @@ epi_slide_opt.epi_archive <-
     window_args <- get_before_after_from_window(.window_size, .align, time_type)
     if (!is.null(.ref_time_values)) {
       cli_abort("epi_slide.epi_archive does not support the `.ref_time_values` argument",
-                class = "epiprocess__epi_slide_opt_archive__ref_time_values_unsupported")
+        class = "epiprocess__epi_slide_opt_archive__ref_time_values_unsupported"
+      )
     }
     if (!identical(.all_rows, FALSE)) {
       cli_abort("epi_slide.epi_archive does not support the `.all_rows` argument",
-                class = "epiprocess__epi_slide_opt_archive__all_rows_unsupported")
+        class = "epiprocess__epi_slide_opt_archive__all_rows_unsupported"
+      )
     }
     assert(
       checkmate::check_logical(.progress, any.missing = FALSE, len = 1L, names = "unnamed"),
diff --git a/R/patch.R b/R/patch.R
index ed09541c9..dad535683 100644
--- a/R/patch.R
+++ b/R/patch.R
@@ -209,18 +209,21 @@ tbl_diff2 <- function(earlier_snapshot, later_tbl,
   # use faster validation variants:
   if (!is_tibble(later_tbl)) {
     cli_abort("`later_tbl` must be a tibble",
-              class = "epiprocess__tbl_diff2__later_tbl_invalid")
+      class = "epiprocess__tbl_diff2__later_tbl_invalid"
+    )
   }
   if (is.null(earlier_snapshot)) {
     return(later_tbl)
   }
   if (!is_tibble(earlier_snapshot)) {
     cli_abort("`earlier_snapshot` must be a tibble or `NULL`",
-              class = "epiprocess__tbl_diff2__earlier_tbl_class_invalid")
+      class = "epiprocess__tbl_diff2__earlier_tbl_class_invalid"
+    )
   }
   if (!is.character(ukey_names) || !all(ukey_names %in% names(earlier_snapshot))) {
     cli_abort("`ukey_names` must be a subset of column names",
-              class = "epiprocess__tbl_diff2__ukey_names_class_invalid")
+      class = "epiprocess__tbl_diff2__ukey_names_class_invalid"
+    )
   }
   later_format <- arg_match0(later_format, c("snapshot", "update"))
   if (!(is.vector(compactify_abs_tol, mode = "numeric") &&
@@ -230,7 +233,8 @@ tbl_diff2 <- function(earlier_snapshot, later_tbl,
     assert_numeric(compactify_abs_tol, lower = 0, any.missing = FALSE, len = 1L)
     # Fallback e.g. for invalid classes not caught by assert_numeric:
     cli_abort("`compactify_abs_tol` must be a length-1 double/integer >= 0",
-              class = "epiprocess__tbl_diff2__compactify_abs_tol_invalid")
+      class = "epiprocess__tbl_diff2__compactify_abs_tol_invalid"
+    )
   }
 
   # Extract metadata:
@@ -335,25 +339,28 @@ tbl_patch <- function(snapshot, update, ukey_names) {
   # variants:
   if (!is_tibble(update)) {
     cli_abort("`update` must be a tibble",
-              class = "epiprocess__tbl_patch__update_class_invalid")
+      class = "epiprocess__tbl_patch__update_class_invalid"
+    )
   }
   if (is.null(snapshot)) {
     return(update)
   }
   if (!is_tibble(snapshot)) {
     cli_abort("`snapshot` must be a tibble",
-              class = "epiprocess__tbl_patch__snapshot_class_invalid")
+      class = "epiprocess__tbl_patch__snapshot_class_invalid"
+    )
   }
   if (!is.character(ukey_names) || !all(ukey_names %in% names(snapshot))) {
     cli_abort("`ukey_names` must be a subset of column names",
-              class = "epiprocess__tbl_patch__ukey_names_invalid")
+      class = "epiprocess__tbl_patch__ukey_names_invalid"
+    )
   }
   if (!identical(names(snapshot), names(update))) {
     cli_abort(c("`snapshot` and `update` should have identical column
                  names and ordering.",
       "*" = "`snapshot` colnames: {format_chr_deparse(tbl_names)}",
       "*" = "`update` colnames: {format_chr_deparse(names(update))}"
-      ), class = "epiprocess__tbl_patch__tbl_names_invalid")
+    ), class = "epiprocess__tbl_patch__tbl_names_invalid")
   }
 
   result_tbl <- vec_rbind(update, snapshot)

From 916318a3b1e17a3cdac85c21a32effd58de211e0 Mon Sep 17 00:00:00 2001
From: Nat DeFries <42820733+nmdefries@users.noreply.github.com>
Date: Wed, 26 Mar 2025 14:47:43 -0700
Subject: [PATCH 065/107] Use data.table column looping in
 epi_slide_opt.epi_archive

---
 R/epi_slide_opt_archive.R | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/R/epi_slide_opt_archive.R b/R/epi_slide_opt_archive.R
index f98b80fbb..a5d373d89 100644
--- a/R/epi_slide_opt_archive.R
+++ b/R/epi_slide_opt_archive.R
@@ -88,19 +88,19 @@ epi_slide_opt_archive_one_epikey <- function(
     slide <- inp_snapshot[slide_inp_backrefs, ]
     slide$time_value <- slide_time_values
     if (f_from_package == "data.table") {
-      for (col_i in seq_along(in_colnames)) {
-        if (before == Inf) {
-          slide[[out_colnames[[col_i]]]] <-
-            f_dots_baked(slide[[in_colnames[[col_i]]]], seq_len(slide_nrow), adaptive = TRUE)
-        } else {
-          out_col <- f_dots_baked(slide[[in_colnames[[col_i]]]], before + after + 1L)
-          if (after != 0L) {
-            # data.table always puts NAs at tails, even with na.rm = TRUE; chop
-            # off extra NAs from beginning and place missing NAs at end:
-            out_col <- c(out_col[seq(after + 1L, slide_nrow)], rep(NA, after))
-          }
-          slide[[out_colnames[[col_i]]]] <- out_col
+      if (before == Inf) {
+        slide[, out_colnames] <-
+          f_dots_baked(slide[, in_colnames], seq_len(slide_nrow), adaptive = TRUE)
+      } else {
+        out_cols <- f_dots_baked(slide[, in_colnames], before + after + 1L)
+        if (after != 0L) {
+          # data.table always puts NAs at tails, even with na.rm = TRUE; chop
+          # off extra NAs from beginning and place missing NAs at end:
+          out_cols <- purrr::map(out_cols, function(.x) {
+            c(.x[seq(after + 1L, slide_nrow)], rep(NA, after))
+          })
         }
+        slide[, out_colnames] <- out_cols
       }
     } else if (f_from_package == "slider") {
       for (col_i in seq_along(in_colnames)) {

From 6575f3d485b0cda0226a3e18cf0ea4c8d7e30e29 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Wed, 26 Mar 2025 14:56:32 -0700
Subject: [PATCH 066/107] Avoid `map` overhead in loop

---
 R/epi_slide_opt_archive.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/epi_slide_opt_archive.R b/R/epi_slide_opt_archive.R
index a5d373d89..818c273c1 100644
--- a/R/epi_slide_opt_archive.R
+++ b/R/epi_slide_opt_archive.R
@@ -96,8 +96,8 @@ epi_slide_opt_archive_one_epikey <- function(
         if (after != 0L) {
           # data.table always puts NAs at tails, even with na.rm = TRUE; chop
           # off extra NAs from beginning and place missing NAs at end:
-          out_cols <- purrr::map(out_cols, function(.x) {
-            c(.x[seq(after + 1L, slide_nrow)], rep(NA, after))
+          out_cols <- lapply(out_cols, function(out_col) {
+            c(out_col[seq(after + 1L, slide_nrow)], rep(NA, after))
           })
         }
         slide[, out_colnames] <- out_cols

From 9b30f46841c69544db7a454cf3061853d6c32a3e Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Wed, 26 Mar 2025 15:23:43 -0700
Subject: [PATCH 067/107] Work toward proper `fill =` + `.align` support in
 `epi_slide_opt()`

This approach to getting the `fill` value makes things make more sense
internally, but we still may need to manually deal with the impact of our own
padding and directly intercept or disallow `fill =`.

Suggested-by: nmdefries <nmdefries@users.noreply.github.com>
---
 R/epi_slide_opt_archive.R                   |  6 +++---
 R/epi_slide_opt_edf.R                       |  6 ++++--
 tests/testthat/test-epi_slide.R             | 23 +++++++++++++++++++++
 tests/testthat/test-epi_slide_opt_archive.R | 16 ++++++++++++++
 4 files changed, 46 insertions(+), 5 deletions(-)

diff --git a/R/epi_slide_opt_archive.R b/R/epi_slide_opt_archive.R
index 818c273c1..0cbea1277 100644
--- a/R/epi_slide_opt_archive.R
+++ b/R/epi_slide_opt_archive.R
@@ -94,10 +94,10 @@ epi_slide_opt_archive_one_epikey <- function(
       } else {
         out_cols <- f_dots_baked(slide[, in_colnames], before + after + 1L)
         if (after != 0L) {
-          # data.table always puts NAs at tails, even with na.rm = TRUE; chop
-          # off extra NAs from beginning and place missing NAs at end:
+          # data.table always puts `fill` arg (default NA) at the tails, even
+          # with na.rm = TRUE; chop off extra from beginning and place at end:
           out_cols <- lapply(out_cols, function(out_col) {
-            c(out_col[seq(after + 1L, slide_nrow)], rep(NA, after))
+            c(out_col[(after + 1L):length(out_col)], out_col[seq_len(after)])
           })
         }
         slide[, out_colnames] <- out_cols
diff --git a/R/epi_slide_opt_edf.R b/R/epi_slide_opt_edf.R
index 508d7e6b4..fc75d1eaf 100644
--- a/R/epi_slide_opt_edf.R
+++ b/R/epi_slide_opt_edf.R
@@ -474,8 +474,10 @@ epi_slide_opt.epi_df <- function(.x, .col_names, .f, ...,
         roll_output <- .f(x = .data_group[, input_col_names], n = window_size, adaptive = TRUE, ...)
       }
       if (window_args$after >= 1) {
-        .data_group[, output_col_names] <- purrr::map(roll_output, function(.x) {
-          c(.x[(window_args$after + 1L):length(.x)], rep(NA, window_args$after))
+        .data_group[, output_col_names] <- lapply(roll_output, function(out_col) {
+          # data.table always puts `fill` arg (default NA) at the tails, even
+          # with na.rm = TRUE; chop off extra from beginning and place at end:
+          c(out_col[(window_args$after + 1L):length(out_col)], out_col[seq_len(window_args$after)])
         })
       } else {
         .data_group[, output_col_names] <- roll_output
diff --git a/tests/testthat/test-epi_slide.R b/tests/testthat/test-epi_slide.R
index 0aa4aca7f..975e17121 100644
--- a/tests/testthat/test-epi_slide.R
+++ b/tests/testthat/test-epi_slide.R
@@ -937,3 +937,26 @@ test_that("epi_slide* output grouping matches input grouping", {
     character(0)
   )
 })
+
+test_that('`epi_slide_opt .align != "right"` respects `fill` arg', {
+  test_date <- as.Date("2020-01-01")
+  toy_edf <- tibble(
+    geo_value = 1,
+    time_value = test_date - 1 + 1:5,
+    value = c(1:3, NA, 5)
+  ) %>%
+    as_epi_df(as_of = test_date + 10)
+
+  result <- toy_edf %>%
+    epi_slide_opt(value, frollmean, .window_size = 3, .align = "left", fill = -1000)
+
+  expected <- tibble(
+    geo_value = 1,
+    time_value = as.Date("2020-01-01") - 1 + 1:5,
+    value = c(1:3, NA, 5),
+    value_3dlav = c(2, NA, NA, -1000, -1000)
+  ) %>%
+    as_epi_df(as_of = test_date + 10)
+
+  expect_equal(result, expected)
+})
diff --git a/tests/testthat/test-epi_slide_opt_archive.R b/tests/testthat/test-epi_slide_opt_archive.R
index 768107a4d..d472a20c4 100644
--- a/tests/testthat/test-epi_slide_opt_archive.R
+++ b/tests/testthat/test-epi_slide_opt_archive.R
@@ -210,4 +210,20 @@ test_that("epi_slide_opt.epi_archive gives expected results on example data; als
   )
 
   expect_equal(archive_cases_dv_subset_result, archive_cases_dv_subset_expected)
+
+  archive_cases_dv_subset_time_opt2 <- system.time(
+    archive_cases_dv_subset_result2 <- archive_cases_dv_subset %>%
+      epi_slide_opt(percent_cli, frollmean, .window_size = 7, .align = "left", fill = -1000)
+  )
+
+  archive_cases_dv_subset_time_gen2 <- system.time(
+    archive_cases_dv_subset_expected2 <- archive_cases_dv_subset %>%
+      epix_slide(
+        ~ .x %>% epi_slide_opt(percent_cli, frollmean, .window_size = 7, .align = "left", fill = -1000)
+      ) %>%
+      select(geo_value, time_value, version, everything()) %>%
+      as_epi_archive()
+  )
+
+  expect_equal(archive_cases_dv_subset_result2, archive_cases_dv_subset_expected2)
 })

From 21a0b38f4256657b1946aede0d809d2f8e14cb68 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Wed, 26 Mar 2025 18:15:41 -0700
Subject: [PATCH 068/107] Don't try to support `epi_slide_opt(fill =)`

---
 R/epi_slide_opt_archive.R                   |  2 +-
 R/epi_slide_opt_edf.R                       | 12 ++++++++++--
 tests/testthat/test-epi_slide.R             | 19 ++++++-------------
 tests/testthat/test-epi_slide_opt_archive.R | 18 ++++--------------
 4 files changed, 21 insertions(+), 30 deletions(-)

diff --git a/R/epi_slide_opt_archive.R b/R/epi_slide_opt_archive.R
index 0cbea1277..1629bee9f 100644
--- a/R/epi_slide_opt_archive.R
+++ b/R/epi_slide_opt_archive.R
@@ -164,7 +164,7 @@ epi_slide_opt.epi_archive <-
     epikey_names <- key_colnames(.x, exclude = c("time_value", "version"))
     # Validation & pre-processing:
     .align <- arg_match(.align)
-    .f_info <- upstream_slide_f_info(.f)
+    .f_info <- upstream_slide_f_info(.f, ...)
     .f_dots_baked <-
       if (rlang::dots_n(...) == 0L) {
         # Leaving `.f` unchanged slightly improves computation speed and trims
diff --git a/R/epi_slide_opt_edf.R b/R/epi_slide_opt_edf.R
index fc75d1eaf..51d88e785 100644
--- a/R/epi_slide_opt_edf.R
+++ b/R/epi_slide_opt_edf.R
@@ -21,6 +21,8 @@ upstream_slide_f_possibilities <- tibble::tribble(
 #'
 #' @param .f function such as `data.table::frollmean` or `slider::slide_mean`;
 #'   must appear in [`upstream_slide_f_possibilities`]
+#' @param ... additional configuration args to `.f` (besides the data and window
+#'   size&alignment); used to validate `.f` is used in a supported way
 #' @return named list with two elements: `from_package`, a string containing the
 #'   upstream package name ("data.table" or "slider"), and `namer`, a function
 #'   that takes a column to call `.f` on and outputs a basic name or
@@ -28,7 +30,7 @@ upstream_slide_f_possibilities <- tibble::tribble(
 #'   (e.g., "sum", "av", "count").
 #'
 #' @keywords internal
-upstream_slide_f_info <- function(.f) {
+upstream_slide_f_info <- function(.f, ...) {
   assert_function(.f)
 
   # Check that slide function `.f` is one of those short-listed from
@@ -59,6 +61,12 @@ upstream_slide_f_info <- function(.f) {
                reprex::reprex to provide a minimal reproducible example.')
   }
   f_from_package <- f_info_row$package
+  if (f_from_package == "data.table" && "fill" %in% names(rlang::call_match(dots_expand = FALSE)[["..."]])) {
+    cli_abort("`epi_slide_opt` does not support `data.table::froll*()` with a
+               custom `fill =` arg",
+      class = "epiprocess__epi_slide_opt__fill_unsupported"
+    )
+  }
   list(
     from_package = f_from_package,
     namer = unwrap(f_info_row$namer)
@@ -384,7 +392,7 @@ epi_slide_opt.epi_df <- function(.x, .col_names, .f, ...,
 
   # Validate/process .col_names, .f:
   col_names_quo <- enquo(.col_names)
-  f_info <- upstream_slide_f_info(.f)
+  f_info <- upstream_slide_f_info(.f, ...)
   f_from_package <- f_info$from_package
 
   # Validate/process .ref_time_values:
diff --git a/tests/testthat/test-epi_slide.R b/tests/testthat/test-epi_slide.R
index 975e17121..6fab55d44 100644
--- a/tests/testthat/test-epi_slide.R
+++ b/tests/testthat/test-epi_slide.R
@@ -938,7 +938,7 @@ test_that("epi_slide* output grouping matches input grouping", {
   )
 })
 
-test_that('`epi_slide_opt .align != "right"` respects `fill` arg', {
+test_that('`epi_slide_opt .align != "right"` errors on `fill` arg', {
   test_date <- as.Date("2020-01-01")
   toy_edf <- tibble(
     geo_value = 1,
@@ -947,16 +947,9 @@ test_that('`epi_slide_opt .align != "right"` respects `fill` arg', {
   ) %>%
     as_epi_df(as_of = test_date + 10)
 
-  result <- toy_edf %>%
-    epi_slide_opt(value, frollmean, .window_size = 3, .align = "left", fill = -1000)
-
-  expected <- tibble(
-    geo_value = 1,
-    time_value = as.Date("2020-01-01") - 1 + 1:5,
-    value = c(1:3, NA, 5),
-    value_3dlav = c(2, NA, NA, -1000, -1000)
-  ) %>%
-    as_epi_df(as_of = test_date + 10)
-
-  expect_equal(result, expected)
+  expect_error(
+    toy_edf %>%
+      epi_slide_opt(value, frollmean, .window_size = 3, .align = "left", fill = -1000),
+    class = "epiprocess__epi_slide_opt__fill_unsupported"
+  )
 })
diff --git a/tests/testthat/test-epi_slide_opt_archive.R b/tests/testthat/test-epi_slide_opt_archive.R
index d472a20c4..6e6fd2479 100644
--- a/tests/testthat/test-epi_slide_opt_archive.R
+++ b/tests/testthat/test-epi_slide_opt_archive.R
@@ -211,19 +211,9 @@ test_that("epi_slide_opt.epi_archive gives expected results on example data; als
 
   expect_equal(archive_cases_dv_subset_result, archive_cases_dv_subset_expected)
 
-  archive_cases_dv_subset_time_opt2 <- system.time(
-    archive_cases_dv_subset_result2 <- archive_cases_dv_subset %>%
-      epi_slide_opt(percent_cli, frollmean, .window_size = 7, .align = "left", fill = -1000)
-  )
-
-  archive_cases_dv_subset_time_gen2 <- system.time(
-    archive_cases_dv_subset_expected2 <- archive_cases_dv_subset %>%
-      epix_slide(
-        ~ .x %>% epi_slide_opt(percent_cli, frollmean, .window_size = 7, .align = "left", fill = -1000)
-      ) %>%
-      select(geo_value, time_value, version, everything()) %>%
-      as_epi_archive()
+  expect_error(
+    archive_cases_dv_subset %>%
+      epi_slide_opt(percent_cli, frollmean, .window_size = 7, .align = "left", fill = -1000),
+    class = "epiprocess__epi_slide_opt__fill_unsupported"
   )
-
-  expect_equal(archive_cases_dv_subset_result2, archive_cases_dv_subset_expected2)
 })

From c885bbac5a7737d5763f35b4d315e8a572269132 Mon Sep 17 00:00:00 2001
From: brookslogan <brookslogan@users.noreply.github.com>
Date: Thu, 27 Mar 2025 01:18:18 +0000
Subject: [PATCH 069/107] docs: document (GHA)

---
 man/upstream_slide_f_info.Rd | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/man/upstream_slide_f_info.Rd b/man/upstream_slide_f_info.Rd
index 787790ca6..6b8ba7683 100644
--- a/man/upstream_slide_f_info.Rd
+++ b/man/upstream_slide_f_info.Rd
@@ -4,11 +4,14 @@
 \alias{upstream_slide_f_info}
 \title{Validate & get information about an upstream slide function}
 \usage{
-upstream_slide_f_info(.f)
+upstream_slide_f_info(.f, ...)
 }
 \arguments{
 \item{.f}{function such as \code{data.table::frollmean} or \code{slider::slide_mean};
 must appear in \code{\link{upstream_slide_f_possibilities}}}
+
+\item{...}{additional configuration args to \code{.f} (besides the data and window
+size&alignment); used to validate \code{.f} is used in a supported way}
 }
 \value{
 named list with two elements: \code{from_package}, a string containing the

From f0e495895bf4bac55f42e3eaf00c50cfd0a87a89 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Wed, 26 Mar 2025 18:26:29 -0700
Subject: [PATCH 070/107] docs(epi_slide_opt): update comments re. after>=1
 padding relocation

---
 R/epi_slide_opt_archive.R | 7 ++++---
 R/epi_slide_opt_edf.R     | 7 ++++---
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/R/epi_slide_opt_archive.R b/R/epi_slide_opt_archive.R
index 1629bee9f..48d43cceb 100644
--- a/R/epi_slide_opt_archive.R
+++ b/R/epi_slide_opt_archive.R
@@ -94,10 +94,11 @@ epi_slide_opt_archive_one_epikey <- function(
       } else {
         out_cols <- f_dots_baked(slide[, in_colnames], before + after + 1L)
         if (after != 0L) {
-          # data.table always puts `fill` arg (default NA) at the tails, even
-          # with na.rm = TRUE; chop off extra from beginning and place at end:
+          # Shift an appropriate amount of NA padding from the start to the end.
+          # (This padding will later be cut off when we filter down to the
+          # original time_values.)
           out_cols <- lapply(out_cols, function(out_col) {
-            c(out_col[(after + 1L):length(out_col)], out_col[seq_len(after)])
+            c(out_col[(after + 1L):length(out_col)], rep(NA, after))
           })
         }
         slide[, out_colnames] <- out_cols
diff --git a/R/epi_slide_opt_edf.R b/R/epi_slide_opt_edf.R
index 51d88e785..cb80aac0c 100644
--- a/R/epi_slide_opt_edf.R
+++ b/R/epi_slide_opt_edf.R
@@ -483,9 +483,10 @@ epi_slide_opt.epi_df <- function(.x, .col_names, .f, ...,
       }
       if (window_args$after >= 1) {
         .data_group[, output_col_names] <- lapply(roll_output, function(out_col) {
-          # data.table always puts `fill` arg (default NA) at the tails, even
-          # with na.rm = TRUE; chop off extra from beginning and place at end:
-          c(out_col[(window_args$after + 1L):length(out_col)], out_col[seq_len(window_args$after)])
+          # Shift an appropriate amount of NA padding from the start to the end.
+          # (This padding will later be cut off when we filter down to the
+          # original time_values.)
+          c(out_col[(window_args$after + 1L):length(out_col)], rep(NA, window_args$after))
         })
       } else {
         .data_group[, output_col_names] <- roll_output

From 60563deb0f4c6ca0da9f3eb7a134110bfd516d39 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Wed, 26 Mar 2025 18:38:08 -0700
Subject: [PATCH 071/107] Address styler & linter indentation conflict

---
 R/patch.R | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/R/patch.R b/R/patch.R
index dad535683..929075de3 100644
--- a/R/patch.R
+++ b/R/patch.R
@@ -245,8 +245,9 @@ tbl_diff2 <- function(earlier_snapshot, later_tbl,
 
   # More input validation:
   if (!identical(tbl_names, names(later_tbl))) {
-    cli_abort(c("`earlier_snapshot` and `later_tbl` should have identical column
-                 names and ordering.",
+    cli_abort(c(
+      "`earlier_snapshot` and `later_tbl` should have identical column
+       names and ordering.",
       "*" = "`earlier_snapshot` colnames: {format_chr_deparse(tbl_names)}",
       "*" = "`later_tbl` colnames: {format_chr_deparse(names(later_tbl))}"
     ), class = "epiprocess__tbl_diff2__tbl_names_differ")
@@ -356,8 +357,9 @@ tbl_patch <- function(snapshot, update, ukey_names) {
     )
   }
   if (!identical(names(snapshot), names(update))) {
-    cli_abort(c("`snapshot` and `update` should have identical column
-                 names and ordering.",
+    cli_abort(c(
+      "`snapshot` and `update` should have identical column
+       names and ordering.",
       "*" = "`snapshot` colnames: {format_chr_deparse(tbl_names)}",
       "*" = "`update` colnames: {format_chr_deparse(names(update))}"
     ), class = "epiprocess__tbl_patch__tbl_names_invalid")

From 09756df09d9cb0d0f21606b9644bf2927bb99086 Mon Sep 17 00:00:00 2001
From: dsweber2 <dsweber2@users.noreply.github.com>
Date: Wed, 2 Apr 2025 17:12:01 +0000
Subject: [PATCH 072/107] docs: document (GHA)

---
 man/epi_archive.Rd | 46 ++++++++--------------------------------------
 1 file changed, 8 insertions(+), 38 deletions(-)

diff --git a/man/epi_archive.Rd b/man/epi_archive.Rd
index ab9da219f..5aa4ce785 100644
--- a/man/epi_archive.Rd
+++ b/man/epi_archive.Rd
@@ -9,17 +9,6 @@
 \title{\code{as_epi_archive} converts a data frame, data table, or tibble into an
 \code{epi_archive} object.}
 \usage{
-new_epi_archive(
-  data_table,
-  geo_type,
-  time_type,
-  other_keys,
-  clobberable_versions_start,
-  versions_end
-)
-
-validate_epi_archive(x)
-
 as_epi_archive(
   x,
   geo_type = deprecated(),
@@ -36,7 +25,7 @@ as_epi_archive(
 is_epi_archive(x)
 
 new_epi_archive(
-  x,
+  data_table,
   geo_type,
   time_type,
   other_keys,
@@ -47,12 +36,7 @@ new_epi_archive(
 validate_epi_archive(x)
 }
 \arguments{
-\item{data_table}{a \code{data.table} with \code{\link[data.table:setkey]{data.table::key()}} equal to
-\code{c("geo_value", other_keys, "time_value", "version")}. For \code{data.table}
-users: this sets up an alias of \code{data_table}; if you plan to keep on
-working with \code{data_table} or working directly with the archive's \verb{$DT}
-using mutating operations, you should \code{copy()} if appropriate. We will not
-mutate the \code{DT} with any exported \code{{epiprocess}} functions, though.}
+\item{x}{An object.}
 
 \item{geo_type}{DEPRECATED Has no effect. Geo value type is inferred from the
 location column and set to "custom" if not recognized.}
@@ -110,26 +94,12 @@ value of \code{clobberable_versions_start} does not fully trust these empty
 updates, and assumes that any version \verb{>= max(x$version)} could be
 clobbered.) If \code{nrow(x) == 0}, then this argument is mandatory.}
 
-\item{x}{A data.frame, data.table, or tibble, with columns \code{geo_value},
-\code{time_value}, \code{version}, and then any additional number of columns.}
-
-\item{compactify}{Optional; \code{TRUE}, \code{FALSE}, or \code{"message"}. \code{TRUE} will
-remove some redundant rows, \code{FALSE} will not. \code{"message"} is like \code{TRUE}
-but will emit a message if anything was changed. Default is \code{TRUE}. See
-more information below under "Compactification:".}
-
-\item{compactify_abs_tol}{Optional; double. A tolerance level used to detect
-approximate equality for compactification. The default is 0, which
-corresponds to exact equality. Consider using this if your value columns
-undergo tiny nonmeaningful revisions and the archive object with the
-default setting is too large.}
-
-\item{.versions_end}{location based versions_end, used to avoid prefix
-\code{version = issue} from being assigned to \code{versions_end} instead of being
-used to rename columns.}
-
-\item{...}{used for specifying column names, as in \code{\link[dplyr:rename]{dplyr::rename}}. For
-example \code{version = release_date}}
+\item{data_table}{a \code{data.table} with \code{\link[data.table:setkey]{data.table::key()}} equal to
+\code{c("geo_value", other_keys, "time_value", "version")}. For \code{data.table}
+users: this sets up an alias of \code{data_table}; if you plan to keep on
+working with \code{data_table} or working directly with the archive's \verb{$DT}
+using mutating operations, you should \code{copy()} if appropriate. We will not
+mutate the \code{DT} with any exported \code{{epiprocess}} functions, though.}
 }
 \value{
 \itemize{

From 3ce86a00827e8eb031393213a10e13b4ba30b2a5 Mon Sep 17 00:00:00 2001
From: dsweber2 <david.weber2@pm.me>
Date: Thu, 3 Apr 2025 17:20:40 -0500
Subject: [PATCH 073/107] comments and some nit rewrites

---
 R/archive.R                                 | 12 +++++++--
 R/epi_slide_opt_archive.R                   | 14 +++++-----
 R/epi_slide_opt_edf.R                       |  3 ++-
 R/patch.R                                   | 30 ++++++++++++---------
 tests/testthat/test-epi_slide_opt_archive.R |  9 ++++---
 5 files changed, 42 insertions(+), 26 deletions(-)

diff --git a/R/archive.R b/R/archive.R
index 5afabb13b..543cb4516 100644
--- a/R/archive.R
+++ b/R/archive.R
@@ -438,7 +438,8 @@ removed_by_compactify <- function(updates_df, ukey_names, abs_tol) {
   updates_df[update_is_locf(updates_df, ukey_names, abs_tol), ]
 }
 
-#' Internal helper; lgl; which updates are LOCF
+#' Internal helper; lgl; which updates are LOCF and should thus be dropped when
+#' compactifying
 #'
 #' (Not validated:) Must be called inside certain dplyr data masking verbs (e.g.,
 #' `filter` or `mutate`) being run on an `epi_archive`'s `DT` or a data frame
@@ -470,12 +471,18 @@ update_is_locf <- function(arranged_updates_df, ukey_names, abs_tol) {
   } else {
     ekts_tbl <- new_tibble(updates_col_refs[ekt_names])
     vals_tbl <- new_tibble(updates_col_refs[val_names])
+    # grab the data and a shifted version of the data, and compute the
+    # entry-wise difference to see if the value has changed
     # n_updates >= 2L so we can use `:` naturally (this is the reason for
     # separating out n_updates == 1L from this case):
     inds1 <- 2L:n_updates
     inds2 <- 1L:(n_updates - 1L)
     c(
       FALSE, # first observation is not LOCF
+      # for the rest, check that both the keys are exactly the same, and the
+      # values are within abs_tol
+      # the key comparison effectively implements a group_by, so that when the
+      # key changes we're guaranteed the value is correct
       vec_approx_equal0(ekts_tbl,
         inds1 = inds1, ekts_tbl, inds2 = inds2,
         # check ekt (key) cols with 0 tolerance:
@@ -493,7 +500,8 @@ update_is_locf <- function(arranged_updates_df, ukey_names, abs_tol) {
 #' `epi_archive` object.
 #'
 #' @param x A data.frame, data.table, or tibble, with columns `geo_value`,
-#'   `time_value`, `version`, and then any additional number of columns.
+#'   `time_value`, `version`, and then any additional number of columns, either
+#'   keys or values.
 #' @param ... used for specifying column names, as in [`dplyr::rename`]. For
 #'   example `version = release_date`
 #' @param .versions_end location based versions_end, used to avoid prefix
diff --git a/R/epi_slide_opt_archive.R b/R/epi_slide_opt_archive.R
index 48d43cceb..c28c56b90 100644
--- a/R/epi_slide_opt_archive.R
+++ b/R/epi_slide_opt_archive.R
@@ -27,12 +27,12 @@
 #'
 #' library(dplyr)
 #' grp_updates <- bind_rows(
-#'   tibble(version = 10, time_value = 1:20, value = 1:20),
-#'   tibble(version = 12, time_value = 4:5, value = 5:4),
-#'   tibble(version = 13, time_value = 8, value = 9),
-#'   tibble(version = 14, time_value = 11, value = NA),
-#'   tibble(version = 15, time_value = -10, value = -10),
-#'   tibble(version = 16, time_value = 50, value = 50)
+#'   tibble(version = 20, time_value = 1:20, value = 1:20),
+#'   tibble(version = 22, time_value = 4:5, value = 5:4),
+#'   tibble(version = 23, time_value = 8, value = 9),
+#'   tibble(version = 24, time_value = 11, value = NA),
+#'   tibble(version = 25, time_value = -10, value = -10),
+#'   tibble(version = 26, time_value = 50, value = 50)
 #' ) %>%
 #'   mutate(across(c(version, time_value), ~ as.Date("2020-01-01") - 1 + .x))
 #'
@@ -108,7 +108,7 @@ epi_slide_opt_archive_one_epikey <- function(
         slide[[out_colnames[[col_i]]]] <- f_dots_baked(slide[[in_colnames[[col_i]]]], before = before, after = after)
       }
     } else {
-      cli_abort("epiprocess internal error: `f_from_package` was {format_chr_deparse(f_from_package)}",
+      cli_abort("epiprocess internal error: `f_from_package` was {format_chr_deparse(f_from_package)}, which is unsupported",
         class = "epiprocess__epi_slide_opt_archive__f_from_package_invalid"
       )
     }
diff --git a/R/epi_slide_opt_edf.R b/R/epi_slide_opt_edf.R
index cb80aac0c..1e2401e2d 100644
--- a/R/epi_slide_opt_edf.R
+++ b/R/epi_slide_opt_edf.R
@@ -73,7 +73,8 @@ upstream_slide_f_info <- function(.f, ...) {
   )
 }
 
-#' Calculate input and output column names for an `{epiprocess}` [`dplyr::across`]-like operations
+#' Calculate input and output column names for an `{epiprocess}`
+#' [`dplyr::across`]-like operations
 #'
 #' @param .x data.frame to perform input column tidyselection on
 #' @param time_type as in [`new_epi_df`]
diff --git a/R/patch.R b/R/patch.R
index 929075de3..97d185569 100644
--- a/R/patch.R
+++ b/R/patch.R
@@ -121,17 +121,22 @@ vec_approx_equal0 <- function(vec1, vec2, na_equal, abs_tol, inds1 = NULL, inds2
     na_or_nan1 <- is.na(vec1)
     na_or_nan2 <- is.na(vec2)
     # Since above are bare logical vectors, we can use `fifelse`
-    res <- fifelse(
-      !na_or_nan1 & !na_or_nan2,
-      abs(vec1 - vec2) <= abs_tol,
-      if (na_equal) {
+    if (na_equal) {
+      res <- fifelse(
+        !na_or_nan1 & !na_or_nan2,
+        abs(vec1 - vec2) <= abs_tol,
         na_or_nan1 & na_or_nan2 & (is.nan(vec1) == is.nan(vec2))
-      } else {
-        # Like `==` and `vec_equal`, we consider NaN == {NA, NaN, anything else}
-        # to be NA.
+      )
+    } else {
+      # Like `==` and `vec_equal`, we consider NaN == {NA, NaN, anything else}
+      # to be NA.
+      res <- fifelse(
+        !na_or_nan1 & !na_or_nan2,
+        abs(vec1 - vec2) <= abs_tol,
         NA
-      }
-    )
+      )
+    }
+
     if (!is.null(dim(vec1))) {
       dim(res) <- dim(vec1)
       res <- rowSums(res) == ncol(res)
@@ -278,9 +283,9 @@ tbl_diff2 <- function(earlier_snapshot, later_tbl,
   # ukey+val duplicates (cases 2. and 3.).)
 
   # Row indices of first occurrence of each ukey; will be the same as
-  # seq_len(combined_n) except for when that ukey has been re-reported in
-  # `later_tbl`, in which case (3. or 4.) it will point back to the row index of
-  # the same ukey in `earlier_snapshot`:
+  # seq_len(combined_n) (cases 1., 2., or 5.) except for when that ukey has been
+  # re-reported in `later_tbl`, in which case (3. or 4.) it will point back to
+  # the row index of the same ukey in `earlier_snapshot`:
   combined_ukey_firsts <- vec_duplicate_id(combined_ukeys)
 
   # Which rows from combined are cases 3. or 4.?
@@ -368,6 +373,7 @@ tbl_patch <- function(snapshot, update, ukey_names) {
   result_tbl <- vec_rbind(update, snapshot)
 
   dup_ids <- vec_duplicate_id(result_tbl[ukey_names])
+  # check that the index hasn't be reset to something lower that it duplicates
   not_overwritten <- dup_ids == vec_seq_along(result_tbl)
   result_tbl <- result_tbl[not_overwritten, ]
 
diff --git a/tests/testthat/test-epi_slide_opt_archive.R b/tests/testthat/test-epi_slide_opt_archive.R
index 6e6fd2479..999234217 100644
--- a/tests/testthat/test-epi_slide_opt_archive.R
+++ b/tests/testthat/test-epi_slide_opt_archive.R
@@ -173,13 +173,14 @@ test_that("epi_slide_opt.epi_archive gives expected results on example data; als
       group_by(geo_value)
   )
 
-  mini_case_death_rate_archive_b <- mini_case_death_rate_archive %>%
-    {
-      as_tibble(as.data.frame(.$DT))
-    } %>%
+  mini_case_death_rate_archive_b <-
+    mini_case_death_rate_archive$DT %>%
+    as.data.frame() %>%
+    as_tibble() %>%
     mutate(age_group = "overall") %>%
     as_epi_archive(other_keys = "age_group")
 
+  # grouping shouldn't change the outcome
   expect_equal(
     mini_case_death_rate_archive_b %>%
       group_by(geo_value, age_group) %>%

From fcbf695b48d2826d43e461b1c65bc44632985a8e Mon Sep 17 00:00:00 2001
From: dsweber2 <dsweber2@users.noreply.github.com>
Date: Thu, 3 Apr 2025 22:23:28 +0000
Subject: [PATCH 074/107] docs: document (GHA)

---
 man/across_ish_names_info.Rd            |  6 ++++--
 man/epi_slide_opt_archive_one_epikey.Rd | 12 ++++++------
 man/update_is_locf.Rd                   |  3 ++-
 3 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/man/across_ish_names_info.Rd b/man/across_ish_names_info.Rd
index c993c2bf5..36b9ed040 100644
--- a/man/across_ish_names_info.Rd
+++ b/man/across_ish_names_info.Rd
@@ -2,7 +2,8 @@
 % Please edit documentation in R/epi_slide_opt_edf.R
 \name{across_ish_names_info}
 \alias{across_ish_names_info}
-\title{Calculate input and output column names for an \code{{epiprocess}} \code{\link[dplyr:across]{dplyr::across}}-like operations}
+\title{Calculate input and output column names for an \code{{epiprocess}}
+\code{\link[dplyr:across]{dplyr::across}}-like operations}
 \usage{
 across_ish_names_info(
   .x,
@@ -41,6 +42,7 @@ named list with two elements: \code{input_col_names}, chr, subset of
 \code{names(.x)}; and \code{output_colnames}, chr, same length as \code{input_col_names}
 }
 \description{
-Calculate input and output column names for an \code{{epiprocess}} \code{\link[dplyr:across]{dplyr::across}}-like operations
+Calculate input and output column names for an \code{{epiprocess}}
+\code{\link[dplyr:across]{dplyr::across}}-like operations
 }
 \keyword{internal}
diff --git a/man/epi_slide_opt_archive_one_epikey.Rd b/man/epi_slide_opt_archive_one_epikey.Rd
index 79d635c88..ca92c5cf5 100644
--- a/man/epi_slide_opt_archive_one_epikey.Rd
+++ b/man/epi_slide_opt_archive_one_epikey.Rd
@@ -55,12 +55,12 @@ Core operation of \code{epi_slide_opt.epi_archive} for a single epikey's history
 
 library(dplyr)
 grp_updates <- bind_rows(
-  tibble(version = 10, time_value = 1:20, value = 1:20),
-  tibble(version = 12, time_value = 4:5, value = 5:4),
-  tibble(version = 13, time_value = 8, value = 9),
-  tibble(version = 14, time_value = 11, value = NA),
-  tibble(version = 15, time_value = -10, value = -10),
-  tibble(version = 16, time_value = 50, value = 50)
+  tibble(version = 20, time_value = 1:20, value = 1:20),
+  tibble(version = 22, time_value = 4:5, value = 5:4),
+  tibble(version = 23, time_value = 8, value = 9),
+  tibble(version = 24, time_value = 11, value = NA),
+  tibble(version = 25, time_value = -10, value = -10),
+  tibble(version = 26, time_value = 50, value = 50)
 ) \%>\%
   mutate(across(c(version, time_value), ~ as.Date("2020-01-01") - 1 + .x))
 
diff --git a/man/update_is_locf.Rd b/man/update_is_locf.Rd
index 722f3d5c4..aaa70bad2 100644
--- a/man/update_is_locf.Rd
+++ b/man/update_is_locf.Rd
@@ -2,7 +2,8 @@
 % Please edit documentation in R/archive.R
 \name{update_is_locf}
 \alias{update_is_locf}
-\title{Internal helper; lgl; which updates are LOCF}
+\title{Internal helper; lgl; which updates are LOCF and should thus be dropped when
+compactifying}
 \usage{
 update_is_locf(arranged_updates_df, ukey_names, abs_tol)
 }

From a7ff734ce3e484e73822ed26e32eeff8d3ae616a Mon Sep 17 00:00:00 2001
From: brookslogan <lcbrooks+github@andrew.cmu.edu>
Date: Fri, 4 Apr 2025 12:04:24 -0700
Subject: [PATCH 075/107] Update R/epi_slide_opt_archive.R

Co-authored-by: David Weber <david.weber2@pm.me>
---
 R/epi_slide_opt_archive.R | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/R/epi_slide_opt_archive.R b/R/epi_slide_opt_archive.R
index 48d43cceb..a731bd072 100644
--- a/R/epi_slide_opt_archive.R
+++ b/R/epi_slide_opt_archive.R
@@ -127,9 +127,8 @@ epi_slide_opt_archive_one_epikey <- function(
       }
     out_update <- slide[rows_should_keep, ]
     out_diff <- tbl_diff2(prev_out_snapshot, out_update, "time_value", "update")
-    out_snapshot <- tbl_patch(prev_out_snapshot, out_diff, "time_value")
     prev_inp_snapshot <<- inp_snapshot
-    prev_out_snapshot <<- out_snapshot
+    prev_out_snapshot <<-  tbl_patch(prev_out_snapshot, out_diff, "time_value")
     out_diff$version <- version
     out_diff
   })

From 5d2782dcad8621a7527fa93184ebe5979b9474f0 Mon Sep 17 00:00:00 2001
From: brookslogan <brookslogan@users.noreply.github.com>
Date: Fri, 4 Apr 2025 19:06:16 +0000
Subject: [PATCH 076/107] style: styler (GHA)

---
 R/epi_slide_opt_archive.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/epi_slide_opt_archive.R b/R/epi_slide_opt_archive.R
index a731bd072..ca2e6a03f 100644
--- a/R/epi_slide_opt_archive.R
+++ b/R/epi_slide_opt_archive.R
@@ -128,7 +128,7 @@ epi_slide_opt_archive_one_epikey <- function(
     out_update <- slide[rows_should_keep, ]
     out_diff <- tbl_diff2(prev_out_snapshot, out_update, "time_value", "update")
     prev_inp_snapshot <<- inp_snapshot
-    prev_out_snapshot <<-  tbl_patch(prev_out_snapshot, out_diff, "time_value")
+    prev_out_snapshot <<- tbl_patch(prev_out_snapshot, out_diff, "time_value")
     out_diff$version <- version
     out_diff
   })

From cdd9fee46f1b572df09ed4cb45c9d9fa42119176 Mon Sep 17 00:00:00 2001
From: brookslogan <lcbrooks+github@andrew.cmu.edu>
Date: Fri, 4 Apr 2025 12:41:43 -0700
Subject: [PATCH 077/107] Update tests/testthat/test-epi_slide_opt_archive.R

Co-authored-by: David Weber <david.weber2@pm.me>
---
 tests/testthat/test-epi_slide_opt_archive.R | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/testthat/test-epi_slide_opt_archive.R b/tests/testthat/test-epi_slide_opt_archive.R
index 6e6fd2479..66b15b1bd 100644
--- a/tests/testthat/test-epi_slide_opt_archive.R
+++ b/tests/testthat/test-epi_slide_opt_archive.R
@@ -173,10 +173,10 @@ test_that("epi_slide_opt.epi_archive gives expected results on example data; als
       group_by(geo_value)
   )
 
-  mini_case_death_rate_archive_b <- mini_case_death_rate_archive %>%
-    {
-      as_tibble(as.data.frame(.$DT))
-    } %>%
+  mini_case_death_rate_archive_b <-
+    mini_case_death_rate_archive$DT %>%
+    as.data.frame() %>%
+    as_tibble() %>%
     mutate(age_group = "overall") %>%
     as_epi_archive(other_keys = "age_group")
 

From 722bae8d1dce1e7853711dda7f7f77d265e46454 Mon Sep 17 00:00:00 2001
From: brookslogan <lcbrooks+github@andrew.cmu.edu>
Date: Fri, 4 Apr 2025 16:58:45 -0700
Subject: [PATCH 078/107] Update R/epi_slide_opt_archive.R

---
 R/epi_slide_opt_archive.R | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/R/epi_slide_opt_archive.R b/R/epi_slide_opt_archive.R
index c28c56b90..d503483d0 100644
--- a/R/epi_slide_opt_archive.R
+++ b/R/epi_slide_opt_archive.R
@@ -27,12 +27,12 @@
 #'
 #' library(dplyr)
 #' grp_updates <- bind_rows(
-#'   tibble(version = 20, time_value = 1:20, value = 1:20),
-#'   tibble(version = 22, time_value = 4:5, value = 5:4),
-#'   tibble(version = 23, time_value = 8, value = 9),
-#'   tibble(version = 24, time_value = 11, value = NA),
-#'   tibble(version = 25, time_value = -10, value = -10),
-#'   tibble(version = 26, time_value = 50, value = 50)
+#'   tibble(version = 30, time_value = 1:20, value = 1:20),
+#'   tibble(version = 32, time_value = 4:5, value = 5:4),
+#'   tibble(version = 33, time_value = 8, value = 9),
+#'   tibble(version = 34, time_value = 11, value = NA),
+#'   tibble(version = 35, time_value = -10, value = -10),
+#'   tibble(version = 56, time_value = 50, value = 50)
 #' ) %>%
 #'   mutate(across(c(version, time_value), ~ as.Date("2020-01-01") - 1 + .x))
 #'

From 0f2ee96819985824ed8af8ffe1573dcf4ccf9564 Mon Sep 17 00:00:00 2001
From: brookslogan <lcbrooks+github@andrew.cmu.edu>
Date: Fri, 4 Apr 2025 16:58:51 -0700
Subject: [PATCH 079/107] Update R/patch.R

---
 R/patch.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/patch.R b/R/patch.R
index 97d185569..69a7dd0ae 100644
--- a/R/patch.R
+++ b/R/patch.R
@@ -283,8 +283,8 @@ tbl_diff2 <- function(earlier_snapshot, later_tbl,
   # ukey+val duplicates (cases 2. and 3.).)
 
   # Row indices of first occurrence of each ukey; will be the same as
-  # seq_len(combined_n) (cases 1., 2., or 5.) except for when that ukey has been
-  # re-reported in `later_tbl`, in which case (3. or 4.) it will point back to
+  # seq_len(combined_n) for each ukey's first appearance (cases 1., 2., or 5.);
+  # for re-reported ukeys in `later_tbl` (cases 3. or 4.), it will point back to
   # the row index of the same ukey in `earlier_snapshot`:
   combined_ukey_firsts <- vec_duplicate_id(combined_ukeys)
 

From d20541562fbea5345ea3443d3521e6bdc781540d Mon Sep 17 00:00:00 2001
From: brookslogan <lcbrooks+github@andrew.cmu.edu>
Date: Fri, 4 Apr 2025 16:58:58 -0700
Subject: [PATCH 080/107] Update R/patch.R

---
 R/patch.R | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/R/patch.R b/R/patch.R
index 69a7dd0ae..262914638 100644
--- a/R/patch.R
+++ b/R/patch.R
@@ -373,7 +373,11 @@ tbl_patch <- function(snapshot, update, ukey_names) {
   result_tbl <- vec_rbind(update, snapshot)
 
   dup_ids <- vec_duplicate_id(result_tbl[ukey_names])
-  # check that the index hasn't be reset to something lower that it duplicates
+  # Find the "first" appearance of each ukey; since `update` is ordered before `snapshot`,
+  # this means favoring the rows from `update` over those in `snapshot`.
+  # This is like `!duplicated()` but faster, and like `vec_unique_loc()` but guaranteeing
+  # that we get the first appearance since `vec_duplicate_id()` guarantees that
+  # it points to the first appearance.
   not_overwritten <- dup_ids == vec_seq_along(result_tbl)
   result_tbl <- result_tbl[not_overwritten, ]
 

From ae70f7847289e65c886a9a3f588c58f0009bacb3 Mon Sep 17 00:00:00 2001
From: brookslogan <brookslogan@users.noreply.github.com>
Date: Sat, 5 Apr 2025 00:00:45 +0000
Subject: [PATCH 081/107] docs: document (GHA)

---
 man/epi_slide_opt_archive_one_epikey.Rd | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/man/epi_slide_opt_archive_one_epikey.Rd b/man/epi_slide_opt_archive_one_epikey.Rd
index ca92c5cf5..073e684c5 100644
--- a/man/epi_slide_opt_archive_one_epikey.Rd
+++ b/man/epi_slide_opt_archive_one_epikey.Rd
@@ -55,12 +55,12 @@ Core operation of \code{epi_slide_opt.epi_archive} for a single epikey's history
 
 library(dplyr)
 grp_updates <- bind_rows(
-  tibble(version = 20, time_value = 1:20, value = 1:20),
-  tibble(version = 22, time_value = 4:5, value = 5:4),
-  tibble(version = 23, time_value = 8, value = 9),
-  tibble(version = 24, time_value = 11, value = NA),
-  tibble(version = 25, time_value = -10, value = -10),
-  tibble(version = 26, time_value = 50, value = 50)
+  tibble(version = 30, time_value = 1:20, value = 1:20),
+  tibble(version = 32, time_value = 4:5, value = 5:4),
+  tibble(version = 33, time_value = 8, value = 9),
+  tibble(version = 34, time_value = 11, value = NA),
+  tibble(version = 35, time_value = -10, value = -10),
+  tibble(version = 56, time_value = 50, value = 50)
 ) \%>\%
   mutate(across(c(version, time_value), ~ as.Date("2020-01-01") - 1 + .x))
 

From 71116cbe4c0cf0befecbc6b9fe82572ccd3d1a54 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Sun, 6 Apr 2025 15:32:19 -0700
Subject: [PATCH 082/107] Simplify `tbl_diff2` with `tbl_fast_anti_join`

---
 R/epi_slide_opt_archive.R |   2 +-
 R/patch.R                 | 139 +++++++++++++-------------------------
 2 files changed, 48 insertions(+), 93 deletions(-)

diff --git a/R/epi_slide_opt_archive.R b/R/epi_slide_opt_archive.R
index efd703186..e6cc9cdc3 100644
--- a/R/epi_slide_opt_archive.R
+++ b/R/epi_slide_opt_archive.R
@@ -126,7 +126,7 @@ epi_slide_opt_archive_one_epikey <- function(
           !is.na(slide_inp_backrefs)
       }
     out_update <- slide[rows_should_keep, ]
-    out_diff <- tbl_diff2(prev_out_snapshot, out_update, "time_value", "update")
+    out_diff <- tbl_diff2_alt2(prev_out_snapshot, out_update, "time_value", "update")
     prev_inp_snapshot <<- inp_snapshot
     prev_out_snapshot <<- tbl_patch(prev_out_snapshot, out_diff, "time_value")
     out_diff$version <- version
diff --git a/R/patch.R b/R/patch.R
index 262914638..4b7c22d0e 100644
--- a/R/patch.R
+++ b/R/patch.R
@@ -183,6 +183,37 @@ vec_approx_equal0 <- function(vec1, vec2, na_equal, abs_tol, inds1 = NULL, inds2
   }
 }
 
+#' Variation on [`dplyr::anti_join`] for speed + tolerance setting
+#'
+#' @param x tibble; `x[ukey_names]` must not have any duplicate rows
+#' @param y tibble; `y[ukey_names]` must not have any duplicate rows
+#' @param ukey_names chr; names of columns that form a unique key, for `x` and
+#'   for `y`
+#' @param val_names chr; names of columns which should be treated as
+#'   value/measurement columns, and compared with a tolerance
+#' @param abs_tol scalar non-negative numeric; absolute tolerance with which to
+#'   compare value columns; see [`vec_approx_equal`]
+#' @return rows from `x` that either (a) don't have a (0-tolerance) matching
+#'   ukey in `y`, or (b) have a matching ukey in `y`, but don't have
+#'   approximately equal value column values
+tbl_fast_anti_join <- function(x, y, ukey_names, val_names, abs_tol = 0) {
+  x <- x[c(ukey_names, val_names)]
+  y <- y[c(ukey_names, val_names)]
+  xy <- vec_rbind(x, y)
+  if (abs_tol == 0) {
+    x_exclude <- vec_duplicate_detect(xy)
+    x_exclude <- vec_slice(x_exclude, seq_len(nrow(x)))
+  } else {
+    xy_dup_ids <- vec_duplicate_id(xy[ukey_names])
+    xy_dup_inds2 <- which(xy_dup_ids != seq_along(xy_dup_ids))
+    xy_dup_inds1 <- xy_dup_ids[xy_dup_inds2]
+    x_exclude <- rep(FALSE, nrow(x))
+    xy_vals <- xy[val_names]
+    x_exclude[xy_dup_inds1] <- vec_approx_equal(xy_vals, inds1 = xy_dup_inds2, xy_vals, inds2 = xy_dup_inds1, na_equal = TRUE, abs_tol = abs_tol)
+  }
+  vec_slice(x, !x_exclude)
+}
+
 #' Calculate compact patch to move from one snapshot/update to another
 #'
 #' @param earlier_snapshot tibble or `NULL`; `NULL` represents that there was no
@@ -214,117 +245,41 @@ tbl_diff2 <- function(earlier_snapshot, later_tbl,
   # use faster validation variants:
   if (!is_tibble(later_tbl)) {
     cli_abort("`later_tbl` must be a tibble",
-      class = "epiprocess__tbl_diff2__later_tbl_invalid"
-    )
+              class = "epiprocess__tbl_diff2__later_tbl_invalid"
+              )
   }
   if (is.null(earlier_snapshot)) {
     return(later_tbl)
   }
   if (!is_tibble(earlier_snapshot)) {
     cli_abort("`earlier_snapshot` must be a tibble or `NULL`",
-      class = "epiprocess__tbl_diff2__earlier_tbl_class_invalid"
-    )
+              class = "epiprocess__tbl_diff2__earlier_tbl_class_invalid"
+              )
   }
   if (!is.character(ukey_names) || !all(ukey_names %in% names(earlier_snapshot))) {
     cli_abort("`ukey_names` must be a subset of column names",
-      class = "epiprocess__tbl_diff2__ukey_names_class_invalid"
-    )
+              class = "epiprocess__tbl_diff2__ukey_names_class_invalid"
+              )
   }
   later_format <- arg_match0(later_format, c("snapshot", "update"))
   if (!(is.vector(compactify_abs_tol, mode = "numeric") &&
-    length(compactify_abs_tol) == 1L && # nolint:indentation_linter
-    compactify_abs_tol >= 0)) {
+          length(compactify_abs_tol) == 1L && # nolint:indentation_linter
+          compactify_abs_tol >= 0)) {
     # Give a specific message:
     assert_numeric(compactify_abs_tol, lower = 0, any.missing = FALSE, len = 1L)
     # Fallback e.g. for invalid classes not caught by assert_numeric:
     cli_abort("`compactify_abs_tol` must be a length-1 double/integer >= 0",
-      class = "epiprocess__tbl_diff2__compactify_abs_tol_invalid"
-    )
+              class = "epiprocess__tbl_diff2__compactify_abs_tol_invalid")
   }
 
-  # Extract metadata:
-  earlier_n <- nrow(earlier_snapshot)
-  later_n <- nrow(later_tbl)
-  tbl_names <- names(earlier_snapshot)
-  val_names <- tbl_names[!tbl_names %in% ukey_names]
-
-  # More input validation:
-  if (!identical(tbl_names, names(later_tbl))) {
-    cli_abort(c(
-      "`earlier_snapshot` and `later_tbl` should have identical column
-       names and ordering.",
-      "*" = "`earlier_snapshot` colnames: {format_chr_deparse(tbl_names)}",
-      "*" = "`later_tbl` colnames: {format_chr_deparse(names(later_tbl))}"
-    ), class = "epiprocess__tbl_diff2__tbl_names_differ")
+  all_names <- names(later_tbl)
+  val_names <- all_names[! (all_names %in% ukey_names)]
+  updates <- tbl_fast_anti_join(later_tbl, earlier_snapshot, ukey_names, val_names, compactify_abs_tol)
+  if (later_format == "snapshot") {
+    deletions <- tbl_fast_anti_join(earlier_snapshot[ukey_names], later_tbl[ukey_names], ukey_names, character(), 0)
+    updates <- vec_rbind(updates, deletions) # fills vals with NAs
   }
-
-  combined_tbl <- vec_rbind(earlier_snapshot, later_tbl)
-  combined_n <- nrow(combined_tbl)
-
-  # We'll also need epikeytimes and value columns separately:
-  combined_ukeys <- combined_tbl[ukey_names]
-  combined_vals <- combined_tbl[val_names]
-
-  # We have five types of rows in combined_tbl:
-  # 1. From earlier_snapshot, no matching ukey in later_tbl (deletion; turn vals to
-  #    NAs to match epi_archive format)
-  # 2. From earlier_snapshot, with matching ukey in later_tbl (context; exclude from
-  #    result)
-  # 3. From later_tbl, with matching ukey in earlier_snapshot, with value "close" (change
-  #    that we'll compactify away)
-  # 4. From later_tbl, with matching ukey in earlier_snapshot, value not "close" (change
-  #    that we'll record)
-  # 5. From later_tbl, with no matching ukey in later_tbl (addition)
-
-  # For "snapshot" later_format, we need to filter to 1., 4., and 5., and alter
-  # values for 1.  For "update" later_format, we need to filter to 4. and 5.
-
-  # (For compactify_abs_tol = 0, we could potentially streamline things by dropping
-  # ukey+val duplicates (cases 2. and 3.).)
-
-  # Row indices of first occurrence of each ukey; will be the same as
-  # seq_len(combined_n) for each ukey's first appearance (cases 1., 2., or 5.);
-  # for re-reported ukeys in `later_tbl` (cases 3. or 4.), it will point back to
-  # the row index of the same ukey in `earlier_snapshot`:
-  combined_ukey_firsts <- vec_duplicate_id(combined_ukeys)
-
-  # Which rows from combined are cases 3. or 4.?
-  combined_ukey_is_repeat <- combined_ukey_firsts != seq_len(combined_n)
-  # For each row in 3. or 4., row numbers of the ukey appearance in earlier:
-  ukey_repeat_first_i <- combined_ukey_firsts[combined_ukey_is_repeat]
-
-  # Which rows from combined are in case 3.?
-  combined_compactify_away <- rep(FALSE, combined_n)
-  combined_compactify_away[combined_ukey_is_repeat] <-
-    vec_approx_equal0(combined_vals,
-      combined_vals,
-      na_equal = TRUE,
-      abs_tol = compactify_abs_tol,
-      inds1 = combined_ukey_is_repeat,
-      inds2 = ukey_repeat_first_i
-    )
-
-  # Which rows from combined are in cases 3., 4., or 5.?
-  combined_from_later <- vec_rep_each(c(FALSE, TRUE), c(earlier_n, later_n))
-
-  if (later_format == "update") {
-    # Cases 4. and 5.:
-    combined_tbl <- combined_tbl[combined_from_later & !combined_compactify_away, ]
-  } else { # later_format is "snapshot"
-    # Which rows from combined are in case 1.?
-    combined_is_deletion <- vec_rep_each(c(TRUE, FALSE), c(earlier_n, later_n))
-    combined_is_deletion[ukey_repeat_first_i] <- FALSE
-    # Which rows from combined are in cases 1., 4., or 5.?
-    combined_include <- combined_is_deletion | combined_from_later & !combined_compactify_away
-    combined_tbl <- combined_tbl[combined_include, ]
-    # Represent deletion in 1. with NA-ing of all value columns. (In some
-    # previous approaches to epi_diff2, this seemed to be faster than using
-    # vec_rbind(case_1_ukeys, cases_45_tbl) or bind_rows to fill with NAs, and more
-    # general than data.table's rbind(case_1_ukeys, cases_45_tbl, fill = TRUE).)
-    combined_tbl[combined_is_deletion[combined_include], val_names] <- NA
-  }
-
-  combined_tbl
+  updates
 }
 
 #' Apply an update (e.g., from `tbl_diff2`) to a snapshot

From 1c5f3c9fa6f42dcf24eb496d9d5d5868d1ab4f07 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Sun, 6 Apr 2025 16:23:10 -0700
Subject: [PATCH 083/107] fix(tbl_fast_anti_join): include non-ukey, non-val
 cols in result

---
 R/patch.R | 60 ++++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 39 insertions(+), 21 deletions(-)

diff --git a/R/patch.R b/R/patch.R
index 4b7c22d0e..00f2145f8 100644
--- a/R/patch.R
+++ b/R/patch.R
@@ -196,7 +196,10 @@ vec_approx_equal0 <- function(vec1, vec2, na_equal, abs_tol, inds1 = NULL, inds2
 #' @return rows from `x` that either (a) don't have a (0-tolerance) matching
 #'   ukey in `y`, or (b) have a matching ukey in `y`, but don't have
 #'   approximately equal value column values
+#'
+#' @keywords internal
 tbl_fast_anti_join <- function(x, y, ukey_names, val_names, abs_tol = 0) {
+  x_orig <- x
   x <- x[c(ukey_names, val_names)]
   y <- y[c(ukey_names, val_names)]
   xy <- vec_rbind(x, y)
@@ -211,7 +214,7 @@ tbl_fast_anti_join <- function(x, y, ukey_names, val_names, abs_tol = 0) {
     xy_vals <- xy[val_names]
     x_exclude[xy_dup_inds1] <- vec_approx_equal(xy_vals, inds1 = xy_dup_inds2, xy_vals, inds2 = xy_dup_inds1, na_equal = TRUE, abs_tol = abs_tol)
   }
-  vec_slice(x, !x_exclude)
+  vec_slice(x_orig, !x_exclude)
 }
 
 #' Calculate compact patch to move from one snapshot/update to another
@@ -244,41 +247,53 @@ tbl_diff2 <- function(earlier_snapshot, later_tbl,
   # Most input validation + handle NULL earlier_snapshot. This is a small function so
   # use faster validation variants:
   if (!is_tibble(later_tbl)) {
-    cli_abort("`later_tbl` must be a tibble",
-              class = "epiprocess__tbl_diff2__later_tbl_invalid"
-              )
+    cli_abort(
+      "`later_tbl` must be a tibble",
+      class = "epiprocess__tbl_diff2__later_tbl_invalid"
+    )
   }
   if (is.null(earlier_snapshot)) {
     return(later_tbl)
   }
   if (!is_tibble(earlier_snapshot)) {
-    cli_abort("`earlier_snapshot` must be a tibble or `NULL`",
-              class = "epiprocess__tbl_diff2__earlier_tbl_class_invalid"
-              )
+    cli_abort(
+      "`earlier_snapshot` must be a tibble or `NULL`",
+      class = "epiprocess__tbl_diff2__earlier_tbl_class_invalid"
+    )
   }
   if (!is.character(ukey_names) || !all(ukey_names %in% names(earlier_snapshot))) {
-    cli_abort("`ukey_names` must be a subset of column names",
-              class = "epiprocess__tbl_diff2__ukey_names_class_invalid"
-              )
+    cli_abort(
+      "`ukey_names` must be a subset of column names",
+      class = "epiprocess__tbl_diff2__ukey_names_class_invalid"
+    )
   }
   later_format <- arg_match0(later_format, c("snapshot", "update"))
   if (!(is.vector(compactify_abs_tol, mode = "numeric") &&
-          length(compactify_abs_tol) == 1L && # nolint:indentation_linter
-          compactify_abs_tol >= 0)) {
+    length(compactify_abs_tol) == 1L && # nolint:indentation_linter
+    compactify_abs_tol >= 0)) {
     # Give a specific message:
     assert_numeric(compactify_abs_tol, lower = 0, any.missing = FALSE, len = 1L)
     # Fallback e.g. for invalid classes not caught by assert_numeric:
-    cli_abort("`compactify_abs_tol` must be a length-1 double/integer >= 0",
-              class = "epiprocess__tbl_diff2__compactify_abs_tol_invalid")
+    cli_abort(
+      "`compactify_abs_tol` must be a length-1 double/integer >= 0",
+      class = "epiprocess__tbl_diff2__compactify_abs_tol_invalid"
+    )
   }
 
   all_names <- names(later_tbl)
-  val_names <- all_names[! (all_names %in% ukey_names)]
+  val_names <- all_names[!(all_names %in% ukey_names)]
   updates <- tbl_fast_anti_join(later_tbl, earlier_snapshot, ukey_names, val_names, compactify_abs_tol)
   if (later_format == "snapshot") {
+    # Interpret `later_tbl` as a full snapshot, rather than a diff / sparse
+    # update. That means that any ukeys in `earlier_snapshot` that don't appear
+    # in `later_tbl` were deleted in the later snapshot.
     deletions <- tbl_fast_anti_join(earlier_snapshot[ukey_names], later_tbl[ukey_names], ukey_names, character(), 0)
-    updates <- vec_rbind(updates, deletions) # fills vals with NAs
+    updates <- vec_rbind(updates, deletions) # fills val cols with NAs
   }
+  # If `later_format == "update"`, we don't need to do anything special about
+  # the above ukeys. The full snapshot for the later version would include the
+  # corresponding rows unchanged, and the diff for these unchanged rows would be
+  # empty.
   updates
 }
 
@@ -299,7 +314,8 @@ tbl_patch <- function(snapshot, update, ukey_names) {
   # Most input validation. This is a small function so use faster validation
   # variants:
   if (!is_tibble(update)) {
-    cli_abort("`update` must be a tibble",
+    cli_abort(
+      "`update` must be a tibble",
       class = "epiprocess__tbl_patch__update_class_invalid"
     )
   }
@@ -307,12 +323,14 @@ tbl_patch <- function(snapshot, update, ukey_names) {
     return(update)
   }
   if (!is_tibble(snapshot)) {
-    cli_abort("`snapshot` must be a tibble",
+    cli_abort(
+      "`snapshot` must be a tibble",
       class = "epiprocess__tbl_patch__snapshot_class_invalid"
     )
   }
   if (!is.character(ukey_names) || !all(ukey_names %in% names(snapshot))) {
-    cli_abort("`ukey_names` must be a subset of column names",
+    cli_abort(
+      "`ukey_names` must be a subset of column names",
       class = "epiprocess__tbl_patch__ukey_names_invalid"
     )
   }
@@ -333,8 +351,8 @@ tbl_patch <- function(snapshot, update, ukey_names) {
   # This is like `!duplicated()` but faster, and like `vec_unique_loc()` but guaranteeing
   # that we get the first appearance since `vec_duplicate_id()` guarantees that
   # it points to the first appearance.
-  not_overwritten <- dup_ids == vec_seq_along(result_tbl)
-  result_tbl <- result_tbl[not_overwritten, ]
+  is_only_or_favored_appearance <- dup_ids == vec_seq_along(result_tbl)
+  result_tbl <- result_tbl[is_only_or_favored_appearance, ]
 
   result_tbl
 }

From 5fc1dc594e1c9f41d0bfe6cd0675086f3b503a6f Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Sun, 6 Apr 2025 16:40:05 -0700
Subject: [PATCH 084/107] Comment, clean, style, fix @keywords in archive opt
 slide & helpers

---
 R/epi_slide_opt_archive.R |  3 ++-
 R/patch.R                 | 39 +++++++++++++++++++++++++++++----------
 2 files changed, 31 insertions(+), 11 deletions(-)

diff --git a/R/epi_slide_opt_archive.R b/R/epi_slide_opt_archive.R
index e6cc9cdc3..066fc3178 100644
--- a/R/epi_slide_opt_archive.R
+++ b/R/epi_slide_opt_archive.R
@@ -108,7 +108,8 @@ epi_slide_opt_archive_one_epikey <- function(
         slide[[out_colnames[[col_i]]]] <- f_dots_baked(slide[[in_colnames[[col_i]]]], before = before, after = after)
       }
     } else {
-      cli_abort("epiprocess internal error: `f_from_package` was {format_chr_deparse(f_from_package)}, which is unsupported",
+      cli_abort(
+        "epiprocess internal error: `f_from_package` was {format_chr_deparse(f_from_package)}, which is unsupported",
         class = "epiprocess__epi_slide_opt_archive__f_from_package_invalid"
       )
     }
diff --git a/R/patch.R b/R/patch.R
index 00f2145f8..2191c42b0 100644
--- a/R/patch.R
+++ b/R/patch.R
@@ -199,20 +199,39 @@ vec_approx_equal0 <- function(vec1, vec2, na_equal, abs_tol, inds1 = NULL, inds2
 #'
 #' @keywords internal
 tbl_fast_anti_join <- function(x, y, ukey_names, val_names, abs_tol = 0) {
-  x_orig <- x
-  x <- x[c(ukey_names, val_names)]
-  y <- y[c(ukey_names, val_names)]
-  xy <- vec_rbind(x, y)
+  x_keyvals <- x[c(ukey_names, val_names)]
+  y_keyvals <- y[c(ukey_names, val_names)]
+  xy_keyvals <- vec_rbind(x, y)
   if (abs_tol == 0) {
-    x_exclude <- vec_duplicate_detect(xy)
+    # perf: 0 tolerance is just like a normal `anti_join` by both ukey_names and
+    # val_names together. We can do that more quickly than `anti_join` with
+    # `vctrs` by checking for keyvals of `x` that are not duplicated in `y`.
+    # (`vec_duplicate_detect` will mark those, unlike `duplicated`.)
+    x_exclude <- vec_duplicate_detect(xy_keyvals)
     x_exclude <- vec_slice(x_exclude, seq_len(nrow(x)))
   } else {
-    xy_dup_ids <- vec_duplicate_id(xy[ukey_names])
-    xy_dup_inds2 <- which(xy_dup_ids != seq_along(xy_dup_ids))
-    xy_dup_inds1 <- xy_dup_ids[xy_dup_inds2]
+    xy_ukeys <- xy_keyvals[ukey_names]
+    # Locate ukeys in `y` that match ukeys in `x` and where in `x` they map back
+    # to. It's faster to do this with `vec_duplicate_id` on `xy_ukeys` than to
+    # perform a `inner_join`.
+    xy_ukey_dup_ids <- vec_duplicate_id(xy_ukeys)
+    xy_ukey_dup_inds2 <- which(xy_ukey_dup_ids != seq_along(xy_ukey_dup_ids))
+    # ^ these should point to rows from y that had a ukey match in x
+    xy_ukey_dup_inds1 <- xy_ukey_dup_ids[xy_ukey_dup_inds2]
+    # ^ these should point to the respectively corresponding rows from x
+
+    # Anything in `x` without a ukey match in `y` should be kept; start off with
+    # `FALSE` for everything and just fill in `TRUE`/`FALSE` results for the
+    # ukeys with matches in `y`:
     x_exclude <- rep(FALSE, nrow(x))
     xy_vals <- xy[val_names]
-    x_exclude[xy_dup_inds1] <- vec_approx_equal(xy_vals, inds1 = xy_dup_inds2, xy_vals, inds2 = xy_dup_inds1, na_equal = TRUE, abs_tol = abs_tol)
+    x_exclude[xy_ukey_dup_inds1] <- vec_approx_equal(
+      xy_vals,
+      inds1 = xy_ukey_dup_inds2,
+      xy_vals,
+      inds2 = xy_ukey_dup_inds1,
+      na_equal = TRUE, abs_tol = abs_tol
+    )
   }
   vec_slice(x_orig, !x_exclude)
 }
@@ -269,7 +288,7 @@ tbl_diff2 <- function(earlier_snapshot, later_tbl,
   }
   later_format <- arg_match0(later_format, c("snapshot", "update"))
   if (!(is.vector(compactify_abs_tol, mode = "numeric") &&
-    length(compactify_abs_tol) == 1L && # nolint:indentation_linter
+    length(compactify_abs_tol) == 1L && # nolint: indentation_linter
     compactify_abs_tol >= 0)) {
     # Give a specific message:
     assert_numeric(compactify_abs_tol, lower = 0, any.missing = FALSE, len = 1L)

From 436fbbac82787bc12b9d461d95dc2ac5df934ece Mon Sep 17 00:00:00 2001
From: brookslogan <brookslogan@users.noreply.github.com>
Date: Sun, 6 Apr 2025 23:43:54 +0000
Subject: [PATCH 085/107] docs: document (GHA)

---
 man/tbl_fast_anti_join.Rd | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)
 create mode 100644 man/tbl_fast_anti_join.Rd

diff --git a/man/tbl_fast_anti_join.Rd b/man/tbl_fast_anti_join.Rd
new file mode 100644
index 000000000..91104b2a1
--- /dev/null
+++ b/man/tbl_fast_anti_join.Rd
@@ -0,0 +1,31 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/patch.R
+\name{tbl_fast_anti_join}
+\alias{tbl_fast_anti_join}
+\title{Variation on \code{\link[dplyr:filter-joins]{dplyr::anti_join}} for speed + tolerance setting}
+\usage{
+tbl_fast_anti_join(x, y, ukey_names, val_names, abs_tol = 0)
+}
+\arguments{
+\item{x}{tibble; \code{x[ukey_names]} must not have any duplicate rows}
+
+\item{y}{tibble; \code{y[ukey_names]} must not have any duplicate rows}
+
+\item{ukey_names}{chr; names of columns that form a unique key, for \code{x} and
+for \code{y}}
+
+\item{val_names}{chr; names of columns which should be treated as
+value/measurement columns, and compared with a tolerance}
+
+\item{abs_tol}{scalar non-negative numeric; absolute tolerance with which to
+compare value columns; see \code{\link{vec_approx_equal}}}
+}
+\value{
+rows from \code{x} that either (a) don't have a (0-tolerance) matching
+ukey in \code{y}, or (b) have a matching ukey in \code{y}, but don't have
+approximately equal value column values
+}
+\description{
+Variation on \code{\link[dplyr:filter-joins]{dplyr::anti_join}} for speed + tolerance setting
+}
+\keyword{internal}

From 54223639a3a40e8aab672244fb8b42ec5b44b191 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Mon, 7 Apr 2025 10:50:02 -0700
Subject: [PATCH 086/107] Fix incomplete rename refactor

---
 R/epi_slide_opt_archive.R | 2 +-
 R/patch.R                 | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/epi_slide_opt_archive.R b/R/epi_slide_opt_archive.R
index 066fc3178..8da76d413 100644
--- a/R/epi_slide_opt_archive.R
+++ b/R/epi_slide_opt_archive.R
@@ -127,7 +127,7 @@ epi_slide_opt_archive_one_epikey <- function(
           !is.na(slide_inp_backrefs)
       }
     out_update <- slide[rows_should_keep, ]
-    out_diff <- tbl_diff2_alt2(prev_out_snapshot, out_update, "time_value", "update")
+    out_diff <- tbl_diff2(prev_out_snapshot, out_update, "time_value", "update")
     prev_inp_snapshot <<- inp_snapshot
     prev_out_snapshot <<- tbl_patch(prev_out_snapshot, out_diff, "time_value")
     out_diff$version <- version
diff --git a/R/patch.R b/R/patch.R
index 2191c42b0..c9cbc3f91 100644
--- a/R/patch.R
+++ b/R/patch.R
@@ -233,7 +233,7 @@ tbl_fast_anti_join <- function(x, y, ukey_names, val_names, abs_tol = 0) {
       na_equal = TRUE, abs_tol = abs_tol
     )
   }
-  vec_slice(x_orig, !x_exclude)
+  vec_slice(x, !x_exclude)
 }
 
 #' Calculate compact patch to move from one snapshot/update to another

From f48fbf04d28fe867521a87173aeb8556e23d3b22 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Mon, 7 Apr 2025 11:14:16 -0700
Subject: [PATCH 087/107] perf(epi_slide_opt.epi_archive): more `[` ->
 `vec_slice` changes

14% or 17% time reduction for 7daving on a couple of test archives.
---
 R/epi_slide_opt_archive.R | 4 ++--
 R/patch.R                 | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/R/epi_slide_opt_archive.R b/R/epi_slide_opt_archive.R
index 8da76d413..fd4b9f6bb 100644
--- a/R/epi_slide_opt_archive.R
+++ b/R/epi_slide_opt_archive.R
@@ -85,7 +85,7 @@ epi_slide_opt_archive_one_epikey <- function(
     # Get additional values needed from inp_snapshot + perform any NA
     # tail-padding needed to make slider results a fixed window size rather than
     # adaptive at tails + perform any NA gap-filling needed:
-    slide <- inp_snapshot[slide_inp_backrefs, ]
+    slide <- vec_slice(inp_snapshot, slide_inp_backrefs)
     slide$time_value <- slide_time_values
     if (f_from_package == "data.table") {
       if (before == Inf) {
@@ -126,7 +126,7 @@ epi_slide_opt_archive_one_epikey <- function(
           # Only include time_values that appeared in the input snapshot:
           !is.na(slide_inp_backrefs)
       }
-    out_update <- slide[rows_should_keep, ]
+    out_update <- vec_slice(slide, rows_should_keep)
     out_diff <- tbl_diff2(prev_out_snapshot, out_update, "time_value", "update")
     prev_inp_snapshot <<- inp_snapshot
     prev_out_snapshot <<- tbl_patch(prev_out_snapshot, out_diff, "time_value")
diff --git a/R/patch.R b/R/patch.R
index c9cbc3f91..307ce0a2b 100644
--- a/R/patch.R
+++ b/R/patch.R
@@ -217,7 +217,7 @@ tbl_fast_anti_join <- function(x, y, ukey_names, val_names, abs_tol = 0) {
     xy_ukey_dup_ids <- vec_duplicate_id(xy_ukeys)
     xy_ukey_dup_inds2 <- which(xy_ukey_dup_ids != seq_along(xy_ukey_dup_ids))
     # ^ these should point to rows from y that had a ukey match in x
-    xy_ukey_dup_inds1 <- xy_ukey_dup_ids[xy_ukey_dup_inds2]
+    xy_ukey_dup_inds1 <- vec_slice(xy_ukey_dup_ids, xy_ukey_dup_inds2)
     # ^ these should point to the respectively corresponding rows from x
 
     # Anything in `x` without a ukey match in `y` should be kept; start off with
@@ -371,7 +371,7 @@ tbl_patch <- function(snapshot, update, ukey_names) {
   # that we get the first appearance since `vec_duplicate_id()` guarantees that
   # it points to the first appearance.
   is_only_or_favored_appearance <- dup_ids == vec_seq_along(result_tbl)
-  result_tbl <- result_tbl[is_only_or_favored_appearance, ]
+  result_tbl <- vec_slice(result_tbl, is_only_or_favored_appearance)
 
   result_tbl
 }

From ad0ec0f56f342951e4051f55148f402decfdc591 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Mon, 7 Apr 2025 12:53:15 -0700
Subject: [PATCH 088/107] docs(patch.R): fix comment grammar

---
 R/patch.R | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/R/patch.R b/R/patch.R
index 307ce0a2b..0a5efa014 100644
--- a/R/patch.R
+++ b/R/patch.R
@@ -211,9 +211,9 @@ tbl_fast_anti_join <- function(x, y, ukey_names, val_names, abs_tol = 0) {
     x_exclude <- vec_slice(x_exclude, seq_len(nrow(x)))
   } else {
     xy_ukeys <- xy_keyvals[ukey_names]
-    # Locate ukeys in `y` that match ukeys in `x` and where in `x` they map back
-    # to. It's faster to do this with `vec_duplicate_id` on `xy_ukeys` than to
-    # perform a `inner_join`.
+    # Locate ukeys in `y` that match ukeys in `x`, and where in `x` they map
+    # back to. It's faster to do this with `vec_duplicate_id` on `xy_ukeys` than
+    # to perform an `inner_join`.
     xy_ukey_dup_ids <- vec_duplicate_id(xy_ukeys)
     xy_ukey_dup_inds2 <- which(xy_ukey_dup_ids != seq_along(xy_ukey_dup_ids))
     # ^ these should point to rows from y that had a ukey match in x

From 172c3c10c4b1853a79726fb6d2db38ae178901d6 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Mon, 7 Apr 2025 15:34:33 -0700
Subject: [PATCH 089/107] fix: add missing importFrom

---
 NAMESPACE              | 1 +
 R/epiprocess-package.R | 1 +
 2 files changed, 2 insertions(+)

diff --git a/NAMESPACE b/NAMESPACE
index 62c3e7fc8..920422a00 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -271,6 +271,7 @@ importFrom(vctrs,vec_cast)
 importFrom(vctrs,vec_cast_common)
 importFrom(vctrs,vec_data)
 importFrom(vctrs,vec_duplicate_any)
+importFrom(vctrs,vec_duplicate_detect)
 importFrom(vctrs,vec_duplicate_id)
 importFrom(vctrs,vec_equal)
 importFrom(vctrs,vec_in)
diff --git a/R/epiprocess-package.R b/R/epiprocess-package.R
index 03b1838a6..97221a9fc 100644
--- a/R/epiprocess-package.R
+++ b/R/epiprocess-package.R
@@ -48,6 +48,7 @@
 #' @importFrom vctrs vec_cast
 #' @importFrom vctrs vec_cast_common
 #' @importFrom vctrs vec_data
+#' @importFrom vctrs vec_duplicate_detect
 #' @importFrom vctrs vec_duplicate_id
 #' @importFrom vctrs vec_equal
 #' @importFrom vctrs vec_in

From 7a5708f11b7ddc847697724547eec8a56652578b Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Tue, 8 Apr 2025 09:10:24 -0700
Subject: [PATCH 090/107] Refactor and speed up branch of `vec_approx_equal`
 NA/NaN testing

---
 R/patch.R | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/R/patch.R b/R/patch.R
index 0a5efa014..44077d2bb 100644
--- a/R/patch.R
+++ b/R/patch.R
@@ -118,10 +118,10 @@ vec_approx_equal0 <- function(vec1, vec2, na_equal, abs_tol, inds1 = NULL, inds2
     # Matching vec_equal, we ignore names and other attributes.
     if (!is.null(inds1)) vec1 <- vec_slice(vec1, inds1)
     if (!is.null(inds2)) vec2 <- vec_slice(vec2, inds2)
-    na_or_nan1 <- is.na(vec1)
-    na_or_nan2 <- is.na(vec2)
-    # Since above are bare logical vectors, we can use `fifelse`
     if (na_equal) {
+      na_or_nan1 <- is.na(vec1)
+      na_or_nan2 <- is.na(vec2)
+      # Since above are bare logical vectors, we can use `fifelse`
       res <- fifelse(
         !na_or_nan1 & !na_or_nan2,
         abs(vec1 - vec2) <= abs_tol,
@@ -129,12 +129,8 @@ vec_approx_equal0 <- function(vec1, vec2, na_equal, abs_tol, inds1 = NULL, inds2
       )
     } else {
       # Like `==` and `vec_equal`, we consider NaN == {NA, NaN, anything else}
-      # to be NA.
-      res <- fifelse(
-        !na_or_nan1 & !na_or_nan2,
-        abs(vec1 - vec2) <= abs_tol,
-        NA
-      )
+      # to be NA.  That logic is actually baked into the basic formula:
+      res <- abs(vec1 - vec2) <= abs_tol
     }
 
     if (!is.null(dim(vec1))) {

From fdee9de47837af70dd78a24aeba704a79b6fc42b Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Tue, 8 Apr 2025 14:40:48 -0700
Subject: [PATCH 091/107] Test unifying epi_slide_opt inner comps between edf
 and archive

---
 R/epi_slide_opt_archive.R |  71 +----------------
 R/epi_slide_opt_edf.R     | 162 ++++++++++++++++++--------------------
 2 files changed, 80 insertions(+), 153 deletions(-)

diff --git a/R/epi_slide_opt_archive.R b/R/epi_slide_opt_archive.R
index fd4b9f6bb..8ff1710c7 100644
--- a/R/epi_slide_opt_archive.R
+++ b/R/epi_slide_opt_archive.R
@@ -52,81 +52,14 @@ epi_slide_opt_archive_one_epikey <- function(
   grp_updates_by_version <- grp_updates %>%
     nest(.by = version, .key = "subtbl") %>%
     arrange(version)
-  unit_step <- unit_time_delta(time_type)
+  unit_step <- unit_time_delta(time_type, format = "fast")
   prev_inp_snapshot <- NULL
   prev_out_snapshot <- NULL
   result <- map(seq_len(nrow(grp_updates_by_version)), function(version_i) {
     version <- grp_updates_by_version$version[[version_i]]
     inp_update <- grp_updates_by_version$subtbl[[version_i]]
     inp_snapshot <- tbl_patch(prev_inp_snapshot, inp_update, "time_value")
-    if (before == Inf) {
-      if (after != 0) {
-        cli_abort('.window_size = Inf is only supported with .align = "right"',
-          class = "epiprocess__epi_slide_opt_archive__inf_window_invalid_align"
-        )
-      }
-      # We need to use the entire input snapshot range, filling in time gaps. We
-      # shouldn't pad the ends.
-      slide_t_min <- min(inp_snapshot$time_value)
-      slide_t_max <- max(inp_snapshot$time_value)
-    } else {
-      # If the input had updates in the range t1..t2, this could produce changes
-      # in slide outputs in the range t1-after..t2+before, and to compute those
-      # slide values, we need to look at the input snapshot from
-      # t1-after-before..t2+before+after. nolint: commented_code_linter
-      inp_update_t_min <- min(inp_update$time_value)
-      inp_update_t_max <- max(inp_update$time_value)
-      slide_t_min <- inp_update_t_min - (before + after) * unit_step
-      slide_t_max <- inp_update_t_max + (before + after) * unit_step
-    }
-    slide_nrow <- time_delta_to_n_steps(slide_t_max - slide_t_min, time_type) + 1L
-    slide_time_values <- slide_t_min + 0L:(slide_nrow - 1L) * unit_step
-    slide_inp_backrefs <- vec_match(slide_time_values, inp_snapshot$time_value)
-    # Get additional values needed from inp_snapshot + perform any NA
-    # tail-padding needed to make slider results a fixed window size rather than
-    # adaptive at tails + perform any NA gap-filling needed:
-    slide <- vec_slice(inp_snapshot, slide_inp_backrefs)
-    slide$time_value <- slide_time_values
-    if (f_from_package == "data.table") {
-      if (before == Inf) {
-        slide[, out_colnames] <-
-          f_dots_baked(slide[, in_colnames], seq_len(slide_nrow), adaptive = TRUE)
-      } else {
-        out_cols <- f_dots_baked(slide[, in_colnames], before + after + 1L)
-        if (after != 0L) {
-          # Shift an appropriate amount of NA padding from the start to the end.
-          # (This padding will later be cut off when we filter down to the
-          # original time_values.)
-          out_cols <- lapply(out_cols, function(out_col) {
-            c(out_col[(after + 1L):length(out_col)], rep(NA, after))
-          })
-        }
-        slide[, out_colnames] <- out_cols
-      }
-    } else if (f_from_package == "slider") {
-      for (col_i in seq_along(in_colnames)) {
-        slide[[out_colnames[[col_i]]]] <- f_dots_baked(slide[[in_colnames[[col_i]]]], before = before, after = after)
-      }
-    } else {
-      cli_abort(
-        "epiprocess internal error: `f_from_package` was {format_chr_deparse(f_from_package)}, which is unsupported",
-        class = "epiprocess__epi_slide_opt_archive__f_from_package_invalid"
-      )
-    }
-    rows_should_keep <-
-      if (before == Inf) {
-        # Re-introduce time gaps:
-        !is.na(slide_inp_backrefs)
-      } else {
-        # Get back to t1-after..t2+before; times outside this range were included
-        # only so those inside would have enough context for their slide
-        # computations, but these "context" rows may contain invalid slide
-        # computation outputs:
-        vec_rep_each(c(FALSE, TRUE, FALSE), c(before, slide_nrow - before - after, after)) &
-          # Only include time_values that appeared in the input snapshot:
-          !is.na(slide_inp_backrefs)
-      }
-    out_update <- vec_slice(slide, rows_should_keep)
+    out_update <- epi_slide_opt_one_epikey(inp_snapshot, f_dots_baked, f_from_package, before, after, unit_step, time_type, inp_update$time_value, in_colnames, out_colnames)
     out_diff <- tbl_diff2(prev_out_snapshot, out_update, "time_value", "update")
     prev_inp_snapshot <<- inp_snapshot
     prev_out_snapshot <<- tbl_patch(prev_out_snapshot, out_diff, "time_value")
diff --git a/R/epi_slide_opt_edf.R b/R/epi_slide_opt_edf.R
index 1e2401e2d..ec0e017b4 100644
--- a/R/epi_slide_opt_edf.R
+++ b/R/epi_slide_opt_edf.R
@@ -167,6 +167,67 @@ across_ish_names_info <- function(.x, time_type, col_names_quo, .f_namer,
   )
 }
 
+epi_slide_opt_one_epikey <- function(inp_snapshot, f_dots_baked, f_from_package, before, after, unit_step, time_type, ref_time_values, in_colnames, out_colnames) {
+  # TODO try converting time values to reals, do work on reals, convert back at very end?
+  if (before == Inf) {
+    if (after != 0L) {
+      cli_abort('.window_size = Inf is only supported with .align = "right"',
+        class = "epiprocess__epi_slide_opt_archive__inf_window_invalid_align"
+      )
+    }
+    # We need to use the entire input snapshot range, filling in time gaps. We
+    # shouldn't pad the ends.
+    slide_t_min <- min(inp_snapshot$time_value)
+    slide_t_max <- max(inp_snapshot$time_value)
+  } else {
+    # If the input had updates in the range t1..t2, this could produce changes
+    # in slide outputs in the range t1-after..t2+before, and to compute those
+    # slide values, we need to look at the input snapshot from
+    # t1-after-before..t2+before+after. nolint: commented_code_linter
+    inp_update_t_min <- min(ref_time_values)
+    inp_update_t_max <- max(ref_time_values)
+    slide_t_min <- inp_update_t_min - (before + after) * unit_step
+    slide_t_max <- inp_update_t_max + (before + after) * unit_step
+  }
+  slide_nrow <- time_delta_to_n_steps(slide_t_max - slide_t_min, time_type) + 1L
+  slide_time_values <- slide_t_min + 0L:(slide_nrow - 1L) * unit_step
+  slide_inp_backrefs <- vec_match(slide_time_values, inp_snapshot$time_value)
+  # Get additional values needed from inp_snapshot + perform any NA
+  # tail-padding needed to make slider results a fixed window size rather than
+  # adaptive at tails + perform any NA gap-filling needed:
+  slide <- vec_slice(inp_snapshot, slide_inp_backrefs)
+  slide$time_value <- slide_time_values
+  if (f_from_package == "data.table") {
+    if (before == Inf) {
+      slide[, out_colnames] <-
+        f_dots_baked(slide[, in_colnames], seq_len(slide_nrow), adaptive = TRUE)
+    } else {
+      out_cols <- f_dots_baked(slide[, in_colnames], before + after + 1L)
+      if (after != 0L) {
+        # Shift an appropriate amount of NA padding from the start to the end.
+        # (This padding will later be cut off when we filter down to the
+        # original time_values.)
+        out_cols <- lapply(out_cols, function(out_col) {
+          c(out_col[(after + 1L):length(out_col)], rep(NA, after))
+        })
+      }
+      slide[, out_colnames] <- out_cols
+    }
+  } else if (f_from_package == "slider") {
+    for (col_i in seq_along(in_colnames)) {
+      slide[[out_colnames[[col_i]]]] <- f_dots_baked(slide[[in_colnames[[col_i]]]], before = before, after = after)
+    }
+  } else {
+    cli_abort(
+      "epiprocess internal error: `f_from_package` was {format_chr_deparse(f_from_package)}, which is unsupported",
+      class = "epiprocess__epi_slide_opt_archive__f_from_package_invalid"
+    )
+  }
+  rows_should_keep <- vec_match(ref_time_values, slide_time_values)
+  out_update <- vec_slice(slide, rows_should_keep)
+  out_update
+}
+
 #' Optimized slide functions for common cases
 #'
 #' @description
@@ -425,6 +486,9 @@ epi_slide_opt.epi_df <- function(.x, .col_names, .f, ...,
   }
   validate_slide_window_arg(.window_size, time_type)
   window_args <- get_before_after_from_window(.window_size, .align, time_type)
+  before <- time_delta_to_n_steps(window_args$before, time_type)
+  after <- time_delta_to_n_steps(window_args$after, time_type)
+  unit_step <- unit_time_delta(time_type, format = "fast")
 
   # Handle output naming:
   names_info <- across_ish_names_info(
@@ -434,97 +498,27 @@ epi_slide_opt.epi_df <- function(.x, .col_names, .f, ...,
   input_col_names <- names_info$input_col_names
   output_col_names <- names_info$output_col_names
 
-  # Make a complete date sequence between min(.x$time_value) and max(.x$time_value).
-  date_seq_list <- full_date_seq(.x, window_args$before, window_args$after, time_type)
-  all_dates <- date_seq_list$all_dates
-  pad_early_dates <- date_seq_list$pad_early_dates
-  pad_late_dates <- date_seq_list$pad_late_dates
-
-  slide_one_grp <- function(.data_group, .group_key, ...) {
-    missing_times <- all_dates[!vec_in(all_dates, .data_group$time_value)]
-    # `frollmean` requires a full window to compute a result. Add NA values
-    # to beginning and end of the group so that we get results for the
-    # first `before` and last `after` elements.
-    .data_group <- vec_rbind(
-      .data_group, # (tibble; epi_slide_opt uses .keep = FALSE)
-      new_tibble(vec_recycle_common(
-        time_value = c(missing_times, pad_early_dates, pad_late_dates),
-        .real = FALSE
-      ))
-    ) %>%
-      `[`(vec_order(.$time_value), )
-
-    if (f_from_package == "data.table") {
-      # Grouping should ensure that we don't have duplicate time values.
-      # Completion above should ensure we have at least .window_size rows. Check
-      # that we don't have more than .window_size rows (or fewer somehow):
-      if (nrow(.data_group) != length(c(all_dates, pad_early_dates, pad_late_dates))) {
-        cli_abort(
-          c(
-            "group contains an unexpected number of rows",
-            "i" = c("Input data may contain `time_values` closer together than the
-              expected `time_step` size")
-          ),
-          class = "epiprocess__epi_slide_opt__unexpected_row_number",
-          epiprocess__data_group = .data_group,
-          epiprocess__group_key = .group_key
-        )
-      }
-
-      # `frollmean` is 1-indexed, so create a new window width based on our
-      # `before` and `after` params. Right-aligned `frollmean` results'
-      # `ref_time_value`s will be `after` timesteps ahead of where they should
-      # be; shift results to the left by `after` timesteps.
-      if (window_args$before != Inf) {
-        window_size <- window_args$before + window_args$after + 1L
-        roll_output <- .f(x = .data_group[, input_col_names], n = window_size, ...)
-      } else {
-        window_size <- list(seq_along(.data_group$time_value))
-        roll_output <- .f(x = .data_group[, input_col_names], n = window_size, adaptive = TRUE, ...)
-      }
-      if (window_args$after >= 1) {
-        .data_group[, output_col_names] <- lapply(roll_output, function(out_col) {
-          # Shift an appropriate amount of NA padding from the start to the end.
-          # (This padding will later be cut off when we filter down to the
-          # original time_values.)
-          c(out_col[(window_args$after + 1L):length(out_col)], rep(NA, window_args$after))
-        })
-      } else {
-        .data_group[, output_col_names] <- roll_output
-      }
-    }
-    if (f_from_package == "slider") {
-      for (i in seq_along(input_col_names)) {
-        .data_group[, output_col_names[i]] <- .f(
-          x = .data_group[[input_col_names[i]]],
-          before = as.numeric(window_args$before),
-          after = as.numeric(window_args$after),
-          ...
-        )
-      }
+  f_dots_baked <-
+    if (rlang::dots_n(...) == 0L) {
+      # Leaving `.f` unchanged slightly improves computation speed and trims
+      # debug stack traces:
+      .f
+    } else {
+      purrr::partial(.f, ...)
     }
 
-    .data_group
-  }
-
   result <- .x %>%
-    `[[<-`(".real", value = TRUE) %>%
-    group_modify(slide_one_grp, ..., .keep = FALSE) %>%
-    `[`(.$.real, names(.) != ".real") %>%
-    arrange_col_canonical() %>%
-    group_by(!!!.x_orig_groups)
+    group_modify(function(grp_data, grp_key) {
+      epi_slide_opt_one_epikey(grp_data, f_dots_baked, f_from_package, before, after, unit_step, time_type, vctrs::vec_set_intersect(ref_time_values, grp_data$time_value), names_info$input_col_names, names_info$output_col_names)
+    }) %>%
+    arrange_col_canonical()
 
   if (.all_rows) {
-    result[!vec_in(result$time_value, ref_time_values), output_col_names] <- NA
-  } else if (user_provided_rtvs) {
-    result <- result[vec_in(result$time_value, ref_time_values), ]
+    ekt_names <- key_colnames(.x)
+    result <- left_join(ungroup(.x), result[c(ekt_names, output_col_names)], by = ekt_names)
   }
 
-  if (!is_epi_df(result)) {
-    # `.all_rows` handling strips epi_df format and metadata.
-    # Restore them.
-    result <- reclass(result, attributes(.x)$metadata)
-  }
+  result <- group_by(result, !!!.x_orig_groups)
 
   return(result)
 }

From 68065c6dd0a8ee8749dfc3280424040b4d52895f Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Wed, 9 Apr 2025 10:51:08 -0700
Subject: [PATCH 092/107] Refactor unified epi_slide_opt_one_epikey for clarity

---
 R/epi_slide_opt_archive.R | 11 ++++++++++-
 R/epi_slide_opt_edf.R     | 38 +++++++++++++++++++-------------------
 2 files changed, 29 insertions(+), 20 deletions(-)

diff --git a/R/epi_slide_opt_archive.R b/R/epi_slide_opt_archive.R
index 8ff1710c7..543881f90 100644
--- a/R/epi_slide_opt_archive.R
+++ b/R/epi_slide_opt_archive.R
@@ -59,7 +59,16 @@ epi_slide_opt_archive_one_epikey <- function(
     version <- grp_updates_by_version$version[[version_i]]
     inp_update <- grp_updates_by_version$subtbl[[version_i]]
     inp_snapshot <- tbl_patch(prev_inp_snapshot, inp_update, "time_value")
-    out_update <- epi_slide_opt_one_epikey(inp_snapshot, f_dots_baked, f_from_package, before, after, unit_step, time_type, inp_update$time_value, in_colnames, out_colnames)
+    inp_update_min_t <- min(inp_update$time_value)
+    inp_update_max_t <- max(inp_update$time_value)
+    # Time inp_update_max_t + before should have an output update, since it
+    # depends on inp_update_max_t + before - before = inp_update_max_t, which
+    # has an input update. Similarly, we could have updates beginning with
+    # inp_update_min_t - after, or anything in between these two bounds.
+    out_update_min_t <- time_minus_n_steps(inp_update_min_t, after, time_type)
+    out_update_max_t <- time_plus_n_steps(inp_update_max_t, before, time_type)
+    out_update_ts <- vec_slice(inp_snapshot$time_value, between(inp_snapshot$time_value, out_update_min_t, out_update_max_t))
+    out_update <- epi_slide_opt_one_epikey(inp_snapshot, f_dots_baked, f_from_package, before, after, unit_step, time_type, out_update_ts, in_colnames, out_colnames)
     out_diff <- tbl_diff2(prev_out_snapshot, out_update, "time_value", "update")
     prev_inp_snapshot <<- inp_snapshot
     prev_out_snapshot <<- tbl_patch(prev_out_snapshot, out_diff, "time_value")
diff --git a/R/epi_slide_opt_edf.R b/R/epi_slide_opt_edf.R
index ec0e017b4..8b2945f0c 100644
--- a/R/epi_slide_opt_edf.R
+++ b/R/epi_slide_opt_edf.R
@@ -167,7 +167,10 @@ across_ish_names_info <- function(.x, time_type, col_names_quo, .f_namer,
   )
 }
 
-epi_slide_opt_one_epikey <- function(inp_snapshot, f_dots_baked, f_from_package, before, after, unit_step, time_type, ref_time_values, in_colnames, out_colnames) {
+epi_slide_opt_one_epikey <- function(inp_tbl,
+                                     f_dots_baked, f_from_package, before, after, unit_step, time_type,
+                                     out_time_values,
+                                     in_colnames, out_colnames) {
   # TODO try converting time values to reals, do work on reals, convert back at very end?
   if (before == Inf) {
     if (after != 0L) {
@@ -175,27 +178,23 @@ epi_slide_opt_one_epikey <- function(inp_snapshot, f_dots_baked, f_from_package,
         class = "epiprocess__epi_slide_opt_archive__inf_window_invalid_align"
       )
     }
-    # We need to use the entire input snapshot range, filling in time gaps. We
-    # shouldn't pad the ends.
-    slide_t_min <- min(inp_snapshot$time_value)
-    slide_t_max <- max(inp_snapshot$time_value)
+    # We need to use the entire input range, filling in time gaps. We shouldn't
+    # pad the ends.
+    slide_t_min <- min(inp_tbl$time_value) # FIXME match existing behavior or complete change
+    slide_t_max <- max(inp_tbl$time_value)
   } else {
-    # If the input had updates in the range t1..t2, this could produce changes
-    # in slide outputs in the range t1-after..t2+before, and to compute those
-    # slide values, we need to look at the input snapshot from
-    # t1-after-before..t2+before+after. nolint: commented_code_linter
-    inp_update_t_min <- min(ref_time_values)
-    inp_update_t_max <- max(ref_time_values)
-    slide_t_min <- inp_update_t_min - (before + after) * unit_step
-    slide_t_max <- inp_update_t_max + (before + after) * unit_step
+    slide_t_min <- min(out_time_values) - before
+    slide_t_max <- max(out_time_values) + after
   }
   slide_nrow <- time_delta_to_n_steps(slide_t_max - slide_t_min, time_type) + 1L
   slide_time_values <- slide_t_min + 0L:(slide_nrow - 1L) * unit_step
-  slide_inp_backrefs <- vec_match(slide_time_values, inp_snapshot$time_value)
-  # Get additional values needed from inp_snapshot + perform any NA
+  slide_inp_backrefs <- vec_match(slide_time_values, inp_tbl$time_value)
+  # Get additional values needed from inp_tbl + perform any NA
   # tail-padding needed to make slider results a fixed window size rather than
   # adaptive at tails + perform any NA gap-filling needed:
-  slide <- vec_slice(inp_snapshot, slide_inp_backrefs)
+  slide <- vec_slice(inp_tbl, slide_inp_backrefs)
+  # TODO refactor to use a join if not using backrefs later anymore? or perf:
+  # try removing time_value column before slice?
   slide$time_value <- slide_time_values
   if (f_from_package == "data.table") {
     if (before == Inf) {
@@ -223,9 +222,10 @@ epi_slide_opt_one_epikey <- function(inp_snapshot, f_dots_baked, f_from_package,
       class = "epiprocess__epi_slide_opt_archive__f_from_package_invalid"
     )
   }
-  rows_should_keep <- vec_match(ref_time_values, slide_time_values)
-  out_update <- vec_slice(slide, rows_should_keep)
-  out_update
+  # TODO remove NAs from the match result to make `out_time_values` easier to use?  Or rename to `out_time_values`?
+  rows_should_keep <- vec_match(out_time_values, slide_time_values)
+  out_tbl <- vec_slice(slide, rows_should_keep)
+  out_tbl
 }
 
 #' Optimized slide functions for common cases

From f0645782f4ba1e5cf3a019c8f8cb808155c84d09 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Wed, 9 Apr 2025 11:13:47 -0700
Subject: [PATCH 093/107] Clean up slide range logic

---
 R/epi_slide_opt_edf.R | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/R/epi_slide_opt_edf.R b/R/epi_slide_opt_edf.R
index 8b2945f0c..77b501455 100644
--- a/R/epi_slide_opt_edf.R
+++ b/R/epi_slide_opt_edf.R
@@ -180,8 +180,8 @@ epi_slide_opt_one_epikey <- function(inp_tbl,
     }
     # We need to use the entire input range, filling in time gaps. We shouldn't
     # pad the ends.
-    slide_t_min <- min(inp_tbl$time_value) # FIXME match existing behavior or complete change
-    slide_t_max <- max(inp_tbl$time_value)
+    slide_t_min <- min(inp_tbl$time_value) # FIXME match existing behavior, or complete changeover
+    slide_t_max <- max(out_time_values)
   } else {
     slide_t_min <- min(out_time_values) - before
     slide_t_max <- max(out_time_values) + after
@@ -189,9 +189,9 @@ epi_slide_opt_one_epikey <- function(inp_tbl,
   slide_nrow <- time_delta_to_n_steps(slide_t_max - slide_t_min, time_type) + 1L
   slide_time_values <- slide_t_min + 0L:(slide_nrow - 1L) * unit_step
   slide_inp_backrefs <- vec_match(slide_time_values, inp_tbl$time_value)
-  # Get additional values needed from inp_tbl + perform any NA
-  # tail-padding needed to make slider results a fixed window size rather than
-  # adaptive at tails + perform any NA gap-filling needed:
+  # Get values needed from inp_tbl + perform any NA tail-padding needed to make
+  # slider results a fixed window size rather than adaptive at tails, and
+  # perform any NA gap-filling needed:
   slide <- vec_slice(inp_tbl, slide_inp_backrefs)
   # TODO refactor to use a join if not using backrefs later anymore? or perf:
   # try removing time_value column before slice?
@@ -222,7 +222,6 @@ epi_slide_opt_one_epikey <- function(inp_tbl,
       class = "epiprocess__epi_slide_opt_archive__f_from_package_invalid"
     )
   }
-  # TODO remove NAs from the match result to make `out_time_values` easier to use?  Or rename to `out_time_values`?
   rows_should_keep <- vec_match(out_time_values, slide_time_values)
   out_tbl <- vec_slice(slide, rows_should_keep)
   out_tbl

From 5295f1b225f485ce1680cd9e0b5a4c8cb16006a2 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Wed, 9 Apr 2025 12:24:27 -0700
Subject: [PATCH 094/107] perf(unit_time_delta): faster arg matching

---
 R/time-utils.R | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/R/time-utils.R b/R/time-utils.R
index 73fbc8a56..5ab8aaabf 100644
--- a/R/time-utils.R
+++ b/R/time-utils.R
@@ -110,6 +110,8 @@ validate_slide_window_arg <- function(arg, time_type, lower = 1, allow_inf = TRU
   }
 }
 
+time_delta_formats <- c("friendly", "fast")
+
 #' Object that, added to time_values of time_type, advances by one time step/interval
 #'
 #' @param time_type string; `epi_df`'s or `epi_archive`'s `time_type`
@@ -129,7 +131,7 @@ validate_slide_window_arg <- function(arg, time_type, lower = 1, allow_inf = TRU
 #'
 #' @keywords internal
 unit_time_delta <- function(time_type, format = c("friendly", "fast")) {
-  format <- rlang::arg_match(format)
+  format <- rlang::arg_match0(format, time_delta_formats)
   switch(format,
     friendly = switch(time_type,
       day = as.difftime(1, units = "days"),

From c568774c9bb04c76adaf678f17c07401b4dda1f0 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Wed, 9 Apr 2025 16:13:27 -0700
Subject: [PATCH 095/107] refactor: handle time +/- Inf in helper function

---
 R/epi_slide_opt_archive.R         |  8 ++++---
 R/epi_slide_opt_edf.R             | 18 ++++-----------
 R/time-utils.R                    | 38 ++++++++++++++++++++++++++++---
 man/time_minus_time_in_n_steps.Rd |  4 ++--
 man/time_plus_n_steps.Rd          |  9 ++++++--
 man/time_plus_slide_window_arg.Rd | 33 +++++++++++++++++++++++++++
 6 files changed, 86 insertions(+), 24 deletions(-)
 create mode 100644 man/time_plus_slide_window_arg.Rd

diff --git a/R/epi_slide_opt_archive.R b/R/epi_slide_opt_archive.R
index 543881f90..e40e87079 100644
--- a/R/epi_slide_opt_archive.R
+++ b/R/epi_slide_opt_archive.R
@@ -64,9 +64,11 @@ epi_slide_opt_archive_one_epikey <- function(
     # Time inp_update_max_t + before should have an output update, since it
     # depends on inp_update_max_t + before - before = inp_update_max_t, which
     # has an input update. Similarly, we could have updates beginning with
-    # inp_update_min_t - after, or anything in between these two bounds.
-    out_update_min_t <- time_minus_n_steps(inp_update_min_t, after, time_type)
-    out_update_max_t <- time_plus_n_steps(inp_update_max_t, before, time_type)
+    # inp_update_min_t - after, or anything in between these two bounds. If
+    # before == Inf, we need to update outputs all the way to the end of the
+    # input *snapshot*.
+    out_update_min_t <- time_minus_slide_window_arg(inp_update_min_t, after, time_type)
+    out_update_max_t <- time_plus_slide_window_arg(inp_update_max_t, before, time_type, max(inp_snapshot$time_value))
     out_update_ts <- vec_slice(inp_snapshot$time_value, between(inp_snapshot$time_value, out_update_min_t, out_update_max_t))
     out_update <- epi_slide_opt_one_epikey(inp_snapshot, f_dots_baked, f_from_package, before, after, unit_step, time_type, out_update_ts, in_colnames, out_colnames)
     out_diff <- tbl_diff2(prev_out_snapshot, out_update, "time_value", "update")
diff --git a/R/epi_slide_opt_edf.R b/R/epi_slide_opt_edf.R
index 77b501455..f0585543d 100644
--- a/R/epi_slide_opt_edf.R
+++ b/R/epi_slide_opt_edf.R
@@ -172,20 +172,10 @@ epi_slide_opt_one_epikey <- function(inp_tbl,
                                      out_time_values,
                                      in_colnames, out_colnames) {
   # TODO try converting time values to reals, do work on reals, convert back at very end?
-  if (before == Inf) {
-    if (after != 0L) {
-      cli_abort('.window_size = Inf is only supported with .align = "right"',
-        class = "epiprocess__epi_slide_opt_archive__inf_window_invalid_align"
-      )
-    }
-    # We need to use the entire input range, filling in time gaps. We shouldn't
-    # pad the ends.
-    slide_t_min <- min(inp_tbl$time_value) # FIXME match existing behavior, or complete changeover
-    slide_t_max <- max(out_time_values)
-  } else {
-    slide_t_min <- min(out_time_values) - before
-    slide_t_max <- max(out_time_values) + after
-  }
+  #
+  # FIXME min time_value for this epikey vs. entire edf; match existing behavior, or complete changeover
+  slide_t_min <- time_minus_slide_window_arg(min(out_time_values), before, time_type, min(inp_tbl$time_value))
+  slide_t_max <- time_plus_slide_window_arg(max(out_time_values), after, time_type)
   slide_nrow <- time_delta_to_n_steps(slide_t_max - slide_t_min, time_type) + 1L
   slide_time_values <- slide_t_min + 0L:(slide_nrow - 1L) * unit_step
   slide_inp_backrefs <- vec_match(slide_time_values, inp_tbl$time_value)
diff --git a/R/time-utils.R b/R/time-utils.R
index 5ab8aaabf..e2eba6807 100644
--- a/R/time-utils.R
+++ b/R/time-utils.R
@@ -339,7 +339,7 @@ difftime_approx_ceiling_time_delta <- function(difftime, time_type) {
   )
 }
 
-#' Difference between two time value vectors in terms of number of time "steps"
+#' Difference between two finite `time_value` vectors in terms of number of time "steps"
 #'
 #' @param x a time_value (vector) of time type `time_type`
 #' @param y a time_value (vector) of time type `time_type`
@@ -352,15 +352,18 @@ time_minus_time_in_n_steps <- function(x, y, time_type) {
   time_delta_to_n_steps(x - y, time_type)
 }
 
-#' Advance/retreat time_values by specified number of time "steps"
+#' Advance/retreat time_value(s) by bare-integerish number(s) of time "steps"
 #'
 #' Here, a "step" is based on the `time_type`, not just the class of `x`.
 #'
 #' @param x a time_value (vector) of time type `time_type`
-#' @param y integerish (vector)
+#' @param y bare integerish (vector)
 #' @param time_type as in [`validate_slide_window_arg()`]
 #' @return a time_value (vector) of time type `time_type`
 #'
+#' @seealso [`time_plus_slide_window_arg`] if you're working with a `y` that is
+#'   a slide window arg, which is scalar but otherwise more general (class-wise,
+#'   Inf-wise) than an integerish vector.
 #' @keywords internal
 time_plus_n_steps <- function(x, y, time_type) {
   x + y * unit_time_delta(time_type, "fast")
@@ -370,3 +373,32 @@ time_plus_n_steps <- function(x, y, time_type) {
 time_minus_n_steps <- function(x, y, time_type) {
   x - y * unit_time_delta(time_type, "fast")
 }
+
+#' Advance/retreat time_value(s) by specified amount (slide window arg)
+#'
+#' @param x a time_value (vector) of time type `time_type`
+#' @param y a (scalar) slide window arg; should pass [`validate_slide_window_arg()`]
+#' @param time_type as in [`validate_slide_window_arg()`]
+#' @param max_time_value when `y == Inf`, what should be the result of adding `y`?
+#' @param min_time_value when `y == Inf`, what should be the result of subtracting `y`?
+#' @return a time_value (vector) of time type `time_type`
+#'
+#' @keywords internal
+#' @seealso [`time_plus_n_steps`], if you're working with an integerish vector
+#'   number of time steps `y` (output from other `*n_steps` functions) instead.
+time_plus_slide_window_arg <- function(x, y, time_type, max_time_value) {
+  if (y == Inf) {
+    rep(max_time_value, vec_size(x))
+  } else {
+    time_plus_n_steps(x, y, time_type)
+  }
+}
+
+#' @rdname time_plus_slide_window_arg
+time_minus_slide_window_arg <- function(x, y, time_type, min_time_value) {
+  if (y == Inf) {
+    rep(min_time_value, vec_size(x))
+  } else {
+    time_minus_n_steps(x, y, time_type)
+  }
+}
diff --git a/man/time_minus_time_in_n_steps.Rd b/man/time_minus_time_in_n_steps.Rd
index aab030dea..926b79034 100644
--- a/man/time_minus_time_in_n_steps.Rd
+++ b/man/time_minus_time_in_n_steps.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/time-utils.R
 \name{time_minus_time_in_n_steps}
 \alias{time_minus_time_in_n_steps}
-\title{Difference between two time value vectors in terms of number of time "steps"}
+\title{Difference between two finite \code{time_value} vectors in terms of number of time "steps"}
 \usage{
 time_minus_time_in_n_steps(x, y, time_type)
 }
@@ -18,6 +18,6 @@ integerish vector such that \code{x + n_steps_to_time_delta_fast(result)}
 should equal \code{y}.
 }
 \description{
-Difference between two time value vectors in terms of number of time "steps"
+Difference between two finite \code{time_value} vectors in terms of number of time "steps"
 }
 \keyword{internal}
diff --git a/man/time_plus_n_steps.Rd b/man/time_plus_n_steps.Rd
index f7071c132..26edf9053 100644
--- a/man/time_plus_n_steps.Rd
+++ b/man/time_plus_n_steps.Rd
@@ -3,7 +3,7 @@
 \name{time_plus_n_steps}
 \alias{time_plus_n_steps}
 \alias{time_minus_n_steps}
-\title{Advance/retreat time_values by specified number of time "steps"}
+\title{Advance/retreat time_value(s) by bare-integerish number(s) of time "steps"}
 \usage{
 time_plus_n_steps(x, y, time_type)
 
@@ -12,7 +12,7 @@ time_minus_n_steps(x, y, time_type)
 \arguments{
 \item{x}{a time_value (vector) of time type \code{time_type}}
 
-\item{y}{integerish (vector)}
+\item{y}{bare integerish (vector)}
 
 \item{time_type}{as in \code{\link[=validate_slide_window_arg]{validate_slide_window_arg()}}}
 }
@@ -22,4 +22,9 @@ a time_value (vector) of time type \code{time_type}
 \description{
 Here, a "step" is based on the \code{time_type}, not just the class of \code{x}.
 }
+\seealso{
+\code{\link{time_plus_slide_window_arg}} if you're working with a \code{y} that is
+a slide window arg, which is scalar but otherwise more general (class-wise,
+Inf-wise) than an integerish vector.
+}
 \keyword{internal}
diff --git a/man/time_plus_slide_window_arg.Rd b/man/time_plus_slide_window_arg.Rd
new file mode 100644
index 000000000..1aa117f47
--- /dev/null
+++ b/man/time_plus_slide_window_arg.Rd
@@ -0,0 +1,33 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/time-utils.R
+\name{time_plus_slide_window_arg}
+\alias{time_plus_slide_window_arg}
+\alias{time_minus_slide_window_arg}
+\title{Advance/retreat time_value(s) by specified amount (slide window arg)}
+\usage{
+time_plus_slide_window_arg(x, y, time_type, max_time_value)
+
+time_minus_slide_window_arg(x, y, time_type, min_time_value)
+}
+\arguments{
+\item{x}{a time_value (vector) of time type \code{time_type}}
+
+\item{y}{a (scalar) slide window arg; should pass \code{\link[=validate_slide_window_arg]{validate_slide_window_arg()}}}
+
+\item{time_type}{as in \code{\link[=validate_slide_window_arg]{validate_slide_window_arg()}}}
+
+\item{max_time_value}{when \code{y == Inf}, what should be the result of adding \code{y}?}
+
+\item{min_time_value}{when \code{y == Inf}, what should be the result of subtracting \code{y}?}
+}
+\value{
+a time_value (vector) of time type \code{time_type}
+}
+\description{
+Advance/retreat time_value(s) by specified amount (slide window arg)
+}
+\seealso{
+\code{\link{time_plus_n_steps}}, if you're working with an integerish vector
+number of time steps \code{y} (output from other \verb{*n_steps} functions) instead.
+}
+\keyword{internal}

From 80966e21f29910cd7a2f010755b39151844312d1 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Wed, 9 Apr 2025 18:56:09 -0700
Subject: [PATCH 096/107] Attempt rewriting combined opt slide logic to match
 old archive perf chars

Something still seems different; some Date arithmetic stuck out, and attempts to
avoid helped somewhat, but this still seems slower for archives than pre-unified
approach.
---
 NAMESPACE                 |  1 +
 R/epi_slide_opt_archive.R |  3 +--
 R/epi_slide_opt_edf.R     | 38 ++++++++++++++++++++++++++++++++------
 R/epiprocess-package.R    |  1 +
 4 files changed, 35 insertions(+), 8 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 920422a00..c596f67a6 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -284,6 +284,7 @@ importFrom(vctrs,vec_recycle_common)
 importFrom(vctrs,vec_rep)
 importFrom(vctrs,vec_rep_each)
 importFrom(vctrs,vec_seq_along)
+importFrom(vctrs,vec_set_intersect)
 importFrom(vctrs,vec_set_names)
 importFrom(vctrs,vec_size)
 importFrom(vctrs,vec_size_common)
diff --git a/R/epi_slide_opt_archive.R b/R/epi_slide_opt_archive.R
index e40e87079..2351db8d9 100644
--- a/R/epi_slide_opt_archive.R
+++ b/R/epi_slide_opt_archive.R
@@ -69,8 +69,7 @@ epi_slide_opt_archive_one_epikey <- function(
     # input *snapshot*.
     out_update_min_t <- time_minus_slide_window_arg(inp_update_min_t, after, time_type)
     out_update_max_t <- time_plus_slide_window_arg(inp_update_max_t, before, time_type, max(inp_snapshot$time_value))
-    out_update_ts <- vec_slice(inp_snapshot$time_value, between(inp_snapshot$time_value, out_update_min_t, out_update_max_t))
-    out_update <- epi_slide_opt_one_epikey(inp_snapshot, f_dots_baked, f_from_package, before, after, unit_step, time_type, out_update_ts, in_colnames, out_colnames)
+    out_update <- epi_slide_opt_one_epikey(inp_snapshot, f_dots_baked, f_from_package, before, after, unit_step, time_type, c(out_update_min_t, out_update_max_t), NULL, in_colnames, out_colnames)
     out_diff <- tbl_diff2(prev_out_snapshot, out_update, "time_value", "update")
     prev_inp_snapshot <<- inp_snapshot
     prev_out_snapshot <<- tbl_patch(prev_out_snapshot, out_diff, "time_value")
diff --git a/R/epi_slide_opt_edf.R b/R/epi_slide_opt_edf.R
index f0585543d..673227550 100644
--- a/R/epi_slide_opt_edf.R
+++ b/R/epi_slide_opt_edf.R
@@ -169,13 +169,26 @@ across_ish_names_info <- function(.x, time_type, col_names_quo, .f_namer,
 
 epi_slide_opt_one_epikey <- function(inp_tbl,
                                      f_dots_baked, f_from_package, before, after, unit_step, time_type,
-                                     out_time_values,
+                                     out_filter_time_range, out_filter_time_set,
                                      in_colnames, out_colnames) {
   # TODO try converting time values to reals, do work on reals, convert back at very end?
   #
-  # FIXME min time_value for this epikey vs. entire edf; match existing behavior, or complete changeover
-  slide_t_min <- time_minus_slide_window_arg(min(out_time_values), before, time_type, min(inp_tbl$time_value))
-  slide_t_max <- time_plus_slide_window_arg(max(out_time_values), after, time_type)
+  # TODO loosen restrictions here.  each filter optional?
+  if (!is.null(out_filter_time_range) && is.null(out_filter_time_set)) {
+    out_filter_time_style <- "range"
+    out_t_min <- out_filter_time_range[[1L]]
+    out_t_max <- out_filter_time_range[[2L]]
+  } else if (is.null(out_filter_time_range) && !is.null(out_filter_time_set)) {
+    # FIXME min time_value for this epikey vs. entire edf; match existing behavior, or complete changeover
+    out_filter_time_style <- "set"
+    out_time_values <- vec_set_intersect(inp_tbl$time_value, out_filter_time_set)
+    out_t_min <- min(out_time_values)
+    out_t_max <- max(out_time_values)
+  } else {
+    cli_abort("Exactly one of `out_filter_time_range` and `out_filter_time_set` must be non-`NULL`.")
+  }
+  slide_t_min <- time_minus_slide_window_arg(out_t_min, before, time_type, min(inp_tbl$time_value))
+  slide_t_max <- time_plus_slide_window_arg(out_t_max, after, time_type)
   slide_nrow <- time_delta_to_n_steps(slide_t_max - slide_t_min, time_type) + 1L
   slide_time_values <- slide_t_min + 0L:(slide_nrow - 1L) * unit_step
   slide_inp_backrefs <- vec_match(slide_time_values, inp_tbl$time_value)
@@ -212,7 +225,20 @@ epi_slide_opt_one_epikey <- function(inp_tbl,
       class = "epiprocess__epi_slide_opt_archive__f_from_package_invalid"
     )
   }
-  rows_should_keep <- vec_match(out_time_values, slide_time_values)
+  rows_should_keep1 <- !is.na(slide_inp_backrefs)
+  rows_should_keep2 <- switch(
+    out_filter_time_style,
+    range = vec_rep_each(
+      c(FALSE, TRUE, FALSE),
+      time_minus_time_in_n_steps(
+        vctrs::vec_c(out_t_min,   out_t_max, slide_t_max),
+        vctrs::vec_c(slide_t_min, out_t_min, out_t_max),
+        time_type
+      ) + c(0L, 1L, 0L)
+    ),
+    set = vec_in(slide_time_values, out_time_values)
+  )
+  rows_should_keep <- rows_should_keep1 & rows_should_keep2
   out_tbl <- vec_slice(slide, rows_should_keep)
   out_tbl
 }
@@ -498,7 +524,7 @@ epi_slide_opt.epi_df <- function(.x, .col_names, .f, ...,
 
   result <- .x %>%
     group_modify(function(grp_data, grp_key) {
-      epi_slide_opt_one_epikey(grp_data, f_dots_baked, f_from_package, before, after, unit_step, time_type, vctrs::vec_set_intersect(ref_time_values, grp_data$time_value), names_info$input_col_names, names_info$output_col_names)
+      epi_slide_opt_one_epikey(grp_data, f_dots_baked, f_from_package, before, after, unit_step, time_type, NULL, ref_time_values, names_info$input_col_names, names_info$output_col_names)
     }) %>%
     arrange_col_canonical()
 
diff --git a/R/epiprocess-package.R b/R/epiprocess-package.R
index 97221a9fc..c5a975a5f 100644
--- a/R/epiprocess-package.R
+++ b/R/epiprocess-package.R
@@ -61,6 +61,7 @@
 #' @importFrom vctrs vec_rep
 #' @importFrom vctrs vec_rep_each
 #' @importFrom vctrs vec_seq_along
+#' @importFrom vctrs vec_set_intersect
 #' @importFrom vctrs vec_set_names
 #' @importFrom vctrs vec_size_common
 #' @importFrom vctrs vec_slice

From db2f274556603758ded088f2cd90f913fcbaf313 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Thu, 10 Apr 2025 08:41:40 -0700
Subject: [PATCH 097/107] perf: try `vec_c` -> `c` on 3 time_values

As `vec_c` is slower on 3 scalar `Date`s, and it's probably pretty uncommon to
use `time_value`s with `c` incompatible with `vec_c` (though possible... maybe
tibbles with year, week, wday, like from MMWRweek; though these would probably
break inside archive DT).
---
 R/epi_slide_opt_edf.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/epi_slide_opt_edf.R b/R/epi_slide_opt_edf.R
index 673227550..74ff62753 100644
--- a/R/epi_slide_opt_edf.R
+++ b/R/epi_slide_opt_edf.R
@@ -231,8 +231,8 @@ epi_slide_opt_one_epikey <- function(inp_tbl,
     range = vec_rep_each(
       c(FALSE, TRUE, FALSE),
       time_minus_time_in_n_steps(
-        vctrs::vec_c(out_t_min,   out_t_max, slide_t_max),
-        vctrs::vec_c(slide_t_min, out_t_min, out_t_max),
+        c(out_t_min,   out_t_max, slide_t_max),
+        c(slide_t_min, out_t_min, out_t_max),
         time_type
       ) + c(0L, 1L, 0L)
     ),

From 4ec96bee3a49dd7ad3a65e75e95a773c0981b548 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Thu, 10 Apr 2025 08:45:17 -0700
Subject: [PATCH 098/107] perf: avoid `c.Date` when possible

Revert earlier change to try to reduce time inside `time_minus_time_in_n_steps`,
as `c.Date` is more costly.  This also works back towards generality from the
previous `vec_c` -> `c` change.
---
 R/epi_slide_opt_edf.R | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/R/epi_slide_opt_edf.R b/R/epi_slide_opt_edf.R
index 74ff62753..3247c7116 100644
--- a/R/epi_slide_opt_edf.R
+++ b/R/epi_slide_opt_edf.R
@@ -230,11 +230,9 @@ epi_slide_opt_one_epikey <- function(inp_tbl,
     out_filter_time_style,
     range = vec_rep_each(
       c(FALSE, TRUE, FALSE),
-      time_minus_time_in_n_steps(
-        c(out_t_min,   out_t_max, slide_t_max),
-        c(slide_t_min, out_t_min, out_t_max),
-        time_type
-      ) + c(0L, 1L, 0L)
+      c(time_minus_time_in_n_steps(out_t_min, slide_t_min, time_type),
+        time_minus_time_in_n_steps(out_t_max, out_t_min, time_type) + 1L,
+        time_minus_time_in_n_steps(slide_t_max, out_t_max, time_type))
     ),
     set = vec_in(slide_time_values, out_time_values)
   )

From cc1a5634a07498a30b4ac166cb4ecf4777f21972 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Thu, 10 Apr 2025 14:04:45 -0700
Subject: [PATCH 099/107] perf: old, faster slide edge-trimming args are
 actually usable

---
 R/epi_slide_opt_edf.R | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/R/epi_slide_opt_edf.R b/R/epi_slide_opt_edf.R
index 3247c7116..850c7ffe4 100644
--- a/R/epi_slide_opt_edf.R
+++ b/R/epi_slide_opt_edf.R
@@ -230,9 +230,7 @@ epi_slide_opt_one_epikey <- function(inp_tbl,
     out_filter_time_style,
     range = vec_rep_each(
       c(FALSE, TRUE, FALSE),
-      c(time_minus_time_in_n_steps(out_t_min, slide_t_min, time_type),
-        time_minus_time_in_n_steps(out_t_max, out_t_min, time_type) + 1L,
-        time_minus_time_in_n_steps(slide_t_max, out_t_max, time_type))
+      c(before, slide_nrow - before - after, after),
     ),
     set = vec_in(slide_time_values, out_time_values)
   )

From 4792aeadbc25b6753a65a5696213e44cdebdaaa9 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Thu, 10 Apr 2025 14:23:07 -0700
Subject: [PATCH 100/107] fix(time_plus_slide_window_arg): on non-integerish,
 non-Inf `y`

---
 R/time-utils.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/time-utils.R b/R/time-utils.R
index e2eba6807..26ca803de 100644
--- a/R/time-utils.R
+++ b/R/time-utils.R
@@ -390,7 +390,7 @@ time_plus_slide_window_arg <- function(x, y, time_type, max_time_value) {
   if (y == Inf) {
     rep(max_time_value, vec_size(x))
   } else {
-    time_plus_n_steps(x, y, time_type)
+    x + y
   }
 }
 
@@ -399,6 +399,6 @@ time_minus_slide_window_arg <- function(x, y, time_type, min_time_value) {
   if (y == Inf) {
     rep(min_time_value, vec_size(x))
   } else {
-    time_minus_n_steps(x, y, time_type)
+    x - y
   }
 }

From 62375f8f49b82fcbe4457c65ab9f92ff63551d3d Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Thu, 10 Apr 2025 15:25:28 -0700
Subject: [PATCH 101/107] fix: output filter w/ before=Inf, wrong before/after
 types

Also change a map -> lapply so we can get srcref to internal error immediately,
add a missed .window_size-missingness check, delete some helper functions that
were sort of helpful but also tacked on significant run time when used in a
natural way.
---
 R/epi_slide_opt_archive.R               | 25 ++++++++++----
 R/epi_slide_opt_edf.R                   | 46 +++++++++++++++++--------
 R/time-utils.R                          | 29 ----------------
 man/epi_slide_opt_archive_one_epikey.Rd | 14 ++++----
 man/time_plus_slide_window_arg.Rd       | 33 ------------------
 5 files changed, 57 insertions(+), 90 deletions(-)
 delete mode 100644 man/time_plus_slide_window_arg.Rd

diff --git a/R/epi_slide_opt_archive.R b/R/epi_slide_opt_archive.R
index 2351db8d9..6fe0c1da4 100644
--- a/R/epi_slide_opt_archive.R
+++ b/R/epi_slide_opt_archive.R
@@ -47,7 +47,8 @@
 epi_slide_opt_archive_one_epikey <- function(
     grp_updates,
     in_colnames,
-    f_dots_baked, f_from_package, before, after, time_type,
+    f_dots_baked, f_from_package,
+    before_steps, after_steps, time_type,
     out_colnames) {
   grp_updates_by_version <- grp_updates %>%
     nest(.by = version, .key = "subtbl") %>%
@@ -55,7 +56,7 @@ epi_slide_opt_archive_one_epikey <- function(
   unit_step <- unit_time_delta(time_type, format = "fast")
   prev_inp_snapshot <- NULL
   prev_out_snapshot <- NULL
-  result <- map(seq_len(nrow(grp_updates_by_version)), function(version_i) {
+  result <- lapply(seq_len(nrow(grp_updates_by_version)), function(version_i) {
     version <- grp_updates_by_version$version[[version_i]]
     inp_update <- grp_updates_by_version$subtbl[[version_i]]
     inp_snapshot <- tbl_patch(prev_inp_snapshot, inp_update, "time_value")
@@ -67,9 +68,13 @@ epi_slide_opt_archive_one_epikey <- function(
     # inp_update_min_t - after, or anything in between these two bounds. If
     # before == Inf, we need to update outputs all the way to the end of the
     # input *snapshot*.
-    out_update_min_t <- time_minus_slide_window_arg(inp_update_min_t, after, time_type)
-    out_update_max_t <- time_plus_slide_window_arg(inp_update_max_t, before, time_type, max(inp_snapshot$time_value))
-    out_update <- epi_slide_opt_one_epikey(inp_snapshot, f_dots_baked, f_from_package, before, after, unit_step, time_type, c(out_update_min_t, out_update_max_t), NULL, in_colnames, out_colnames)
+    out_update_min_t <- inp_update_min_t - after_steps * unit_step
+    if (before_steps == Inf) {
+      out_update_max_t <- max(inp_snapshot$time_value)
+    } else {
+      out_update_max_t <- inp_update_max_t + before_steps * unit_step
+    }
+    out_update <- epi_slide_opt_one_epikey(inp_snapshot, f_dots_baked, f_from_package, before_steps, after_steps, unit_step, time_type, c(out_update_min_t, out_update_max_t), NULL, in_colnames, out_colnames)
     out_diff <- tbl_diff2(prev_out_snapshot, out_update, "time_value", "update")
     prev_inp_snapshot <<- inp_snapshot
     prev_out_snapshot <<- tbl_patch(prev_out_snapshot, out_diff, "time_value")
@@ -118,11 +123,19 @@ epi_slide_opt.epi_archive <-
         purrr::partial(.f, ...)
       }
     col_names_quo <- enquo(.col_names)
+    if (is.null(.window_size)) {
+      cli_abort(
+        "epi_slide_opt: `.window_size` must be specified.",
+        class = "epiprocess__epi_slide_opt__window_size_missing"
+      )
+    }
     names_info <- across_ish_names_info(
       .x$DT, time_type, col_names_quo, .f_info$namer,
       .window_size, .align, .prefix, .suffix, .new_col_names
     )
     window_args <- get_before_after_from_window(.window_size, .align, time_type)
+    before_steps <- time_delta_to_n_steps(window_args$before, time_type)
+    after_steps <- time_delta_to_n_steps(window_args$after, time_type)
     if (!is.null(.ref_time_values)) {
       cli_abort("epi_slide.epi_archive does not support the `.ref_time_values` argument",
         class = "epiprocess__epi_slide_opt_archive__ref_time_values_unsupported"
@@ -154,7 +167,7 @@ epi_slide_opt.epi_archive <-
         res <- epi_slide_opt_archive_one_epikey(
           group_values,
           names_info$input_col_names,
-          .f_dots_baked, .f_info$from_package, window_args$before, window_args$after, time_type,
+          .f_dots_baked, .f_info$from_package, before_steps, after_steps, time_type,
           names_info$output_col_names
         )
         if (use_progress) cli::cli_progress_update(id = progress_bar_id)
diff --git a/R/epi_slide_opt_edf.R b/R/epi_slide_opt_edf.R
index 850c7ffe4..b5c99d9b4 100644
--- a/R/epi_slide_opt_edf.R
+++ b/R/epi_slide_opt_edf.R
@@ -168,9 +168,12 @@ across_ish_names_info <- function(.x, time_type, col_names_quo, .f_namer,
 }
 
 epi_slide_opt_one_epikey <- function(inp_tbl,
-                                     f_dots_baked, f_from_package, before, after, unit_step, time_type,
+                                     f_dots_baked, f_from_package,
+                                     before_steps, after_steps, unit_step, time_type,
                                      out_filter_time_range, out_filter_time_set,
                                      in_colnames, out_colnames) {
+  # TODO rename function, reorder args, roxygen2
+  #
   # TODO try converting time values to reals, do work on reals, convert back at very end?
   #
   # TODO loosen restrictions here.  each filter optional?
@@ -187,8 +190,14 @@ epi_slide_opt_one_epikey <- function(inp_tbl,
   } else {
     cli_abort("Exactly one of `out_filter_time_range` and `out_filter_time_set` must be non-`NULL`.")
   }
-  slide_t_min <- time_minus_slide_window_arg(out_t_min, before, time_type, min(inp_tbl$time_value))
-  slide_t_max <- time_plus_slide_window_arg(out_t_max, after, time_type)
+  if (before_steps == Inf) {
+    slide_t_min <- min(inp_tbl$time_value)
+    slide_start_padding_n <- time_minus_time_in_n_steps(out_t_min, slide_t_min, time_type)
+  } else {
+    slide_t_min <- out_t_min - before_steps * unit_step
+    slide_start_padding_n <- before_steps # perf: avoid time_minus_time_in_n_steps
+  }
+  slide_t_max <- out_t_max + after_steps * unit_step
   slide_nrow <- time_delta_to_n_steps(slide_t_max - slide_t_min, time_type) + 1L
   slide_time_values <- slide_t_min + 0L:(slide_nrow - 1L) * unit_step
   slide_inp_backrefs <- vec_match(slide_time_values, inp_tbl$time_value)
@@ -200,24 +209,24 @@ epi_slide_opt_one_epikey <- function(inp_tbl,
   # try removing time_value column before slice?
   slide$time_value <- slide_time_values
   if (f_from_package == "data.table") {
-    if (before == Inf) {
+    if (before_steps == Inf) {
       slide[, out_colnames] <-
         f_dots_baked(slide[, in_colnames], seq_len(slide_nrow), adaptive = TRUE)
     } else {
-      out_cols <- f_dots_baked(slide[, in_colnames], before + after + 1L)
-      if (after != 0L) {
+      out_cols <- f_dots_baked(slide[, in_colnames], before_steps + after_steps + 1L)
+      if (after_steps != 0L) {
         # Shift an appropriate amount of NA padding from the start to the end.
         # (This padding will later be cut off when we filter down to the
         # original time_values.)
         out_cols <- lapply(out_cols, function(out_col) {
-          c(out_col[(after + 1L):length(out_col)], rep(NA, after))
+          c(out_col[(after_steps + 1L):length(out_col)], rep(NA, after_steps))
         })
       }
       slide[, out_colnames] <- out_cols
     }
   } else if (f_from_package == "slider") {
     for (col_i in seq_along(in_colnames)) {
-      slide[[out_colnames[[col_i]]]] <- f_dots_baked(slide[[in_colnames[[col_i]]]], before = before, after = after)
+      slide[[out_colnames[[col_i]]]] <- f_dots_baked(slide[[in_colnames[[col_i]]]], before = before_steps, after = after_steps)
     }
   } else {
     cli_abort(
@@ -225,12 +234,16 @@ epi_slide_opt_one_epikey <- function(inp_tbl,
       class = "epiprocess__epi_slide_opt_archive__f_from_package_invalid"
     )
   }
+  # We should filter down the slide time values to ones in the input time values
+  # when preparing the output:
   rows_should_keep1 <- !is.na(slide_inp_backrefs)
-  rows_should_keep2 <- switch(
-    out_filter_time_style,
+  # We also need to apply the out_filter.
+  #
+  # TODO comments + test vs. just using inequality
+  rows_should_keep2 <- switch(out_filter_time_style,
     range = vec_rep_each(
       c(FALSE, TRUE, FALSE),
-      c(before, slide_nrow - before - after, after),
+      c(slide_start_padding_n, slide_nrow - slide_start_padding_n - after_steps, after_steps),
     ),
     set = vec_in(slide_time_values, out_time_values)
   )
@@ -493,12 +506,15 @@ epi_slide_opt.epi_df <- function(.x, .col_names, .f, ...,
   .align <- rlang::arg_match(.align)
   time_type <- attr(.x, "metadata")$time_type
   if (is.null(.window_size)) {
-    cli_abort("epi_slide_opt: `.window_size` must be specified.")
+    cli_abort(
+      "epi_slide_opt: `.window_size` must be specified.",
+      class = "epiprocess__epi_slide_opt__window_size_missing"
+    )
   }
   validate_slide_window_arg(.window_size, time_type)
   window_args <- get_before_after_from_window(.window_size, .align, time_type)
-  before <- time_delta_to_n_steps(window_args$before, time_type)
-  after <- time_delta_to_n_steps(window_args$after, time_type)
+  before_steps <- time_delta_to_n_steps(window_args$before, time_type)
+  after_steps <- time_delta_to_n_steps(window_args$after, time_type)
   unit_step <- unit_time_delta(time_type, format = "fast")
 
   # Handle output naming:
@@ -520,7 +536,7 @@ epi_slide_opt.epi_df <- function(.x, .col_names, .f, ...,
 
   result <- .x %>%
     group_modify(function(grp_data, grp_key) {
-      epi_slide_opt_one_epikey(grp_data, f_dots_baked, f_from_package, before, after, unit_step, time_type, NULL, ref_time_values, names_info$input_col_names, names_info$output_col_names)
+      epi_slide_opt_one_epikey(grp_data, f_dots_baked, f_from_package, before_steps, after_steps, unit_step, time_type, NULL, ref_time_values, names_info$input_col_names, names_info$output_col_names)
     }) %>%
     arrange_col_canonical()
 
diff --git a/R/time-utils.R b/R/time-utils.R
index 26ca803de..a5740f7f5 100644
--- a/R/time-utils.R
+++ b/R/time-utils.R
@@ -373,32 +373,3 @@ time_plus_n_steps <- function(x, y, time_type) {
 time_minus_n_steps <- function(x, y, time_type) {
   x - y * unit_time_delta(time_type, "fast")
 }
-
-#' Advance/retreat time_value(s) by specified amount (slide window arg)
-#'
-#' @param x a time_value (vector) of time type `time_type`
-#' @param y a (scalar) slide window arg; should pass [`validate_slide_window_arg()`]
-#' @param time_type as in [`validate_slide_window_arg()`]
-#' @param max_time_value when `y == Inf`, what should be the result of adding `y`?
-#' @param min_time_value when `y == Inf`, what should be the result of subtracting `y`?
-#' @return a time_value (vector) of time type `time_type`
-#'
-#' @keywords internal
-#' @seealso [`time_plus_n_steps`], if you're working with an integerish vector
-#'   number of time steps `y` (output from other `*n_steps` functions) instead.
-time_plus_slide_window_arg <- function(x, y, time_type, max_time_value) {
-  if (y == Inf) {
-    rep(max_time_value, vec_size(x))
-  } else {
-    x + y
-  }
-}
-
-#' @rdname time_plus_slide_window_arg
-time_minus_slide_window_arg <- function(x, y, time_type, min_time_value) {
-  if (y == Inf) {
-    rep(min_time_value, vec_size(x))
-  } else {
-    x - y
-  }
-}
diff --git a/man/epi_slide_opt_archive_one_epikey.Rd b/man/epi_slide_opt_archive_one_epikey.Rd
index 073e684c5..6d140ba23 100644
--- a/man/epi_slide_opt_archive_one_epikey.Rd
+++ b/man/epi_slide_opt_archive_one_epikey.Rd
@@ -9,8 +9,8 @@ epi_slide_opt_archive_one_epikey(
   in_colnames,
   f_dots_baked,
   f_from_package,
-  before,
-  after,
+  before_steps,
+  after_steps,
   time_type,
   out_colnames
 )
@@ -30,17 +30,17 @@ a specific row order is not required.}
 \item{f_from_package}{string; name of package from which \code{f_dots_baked}
 (pre-\code{partial}) originates}
 
+\item{time_type}{as in \code{new_epi_archive}}
+
+\item{out_colnames}{chr, same length as \code{in_colnames}; column names to use
+for results}
+
 \item{before}{integerish >=0 or Inf; number of time steps before each
 ref_time_value to include in the sliding window computation; Inf to include
 all times beginning with the min \code{time_value}}
 
 \item{after}{integerish >=0; number of time steps after each ref_time_value
 to include in the sliding window computation}
-
-\item{time_type}{as in \code{new_epi_archive}}
-
-\item{out_colnames}{chr, same length as \code{in_colnames}; column names to use
-for results}
 }
 \value{
 tibble with a \code{version} column, pre-existing measurement columns, and
diff --git a/man/time_plus_slide_window_arg.Rd b/man/time_plus_slide_window_arg.Rd
deleted file mode 100644
index 1aa117f47..000000000
--- a/man/time_plus_slide_window_arg.Rd
+++ /dev/null
@@ -1,33 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/time-utils.R
-\name{time_plus_slide_window_arg}
-\alias{time_plus_slide_window_arg}
-\alias{time_minus_slide_window_arg}
-\title{Advance/retreat time_value(s) by specified amount (slide window arg)}
-\usage{
-time_plus_slide_window_arg(x, y, time_type, max_time_value)
-
-time_minus_slide_window_arg(x, y, time_type, min_time_value)
-}
-\arguments{
-\item{x}{a time_value (vector) of time type \code{time_type}}
-
-\item{y}{a (scalar) slide window arg; should pass \code{\link[=validate_slide_window_arg]{validate_slide_window_arg()}}}
-
-\item{time_type}{as in \code{\link[=validate_slide_window_arg]{validate_slide_window_arg()}}}
-
-\item{max_time_value}{when \code{y == Inf}, what should be the result of adding \code{y}?}
-
-\item{min_time_value}{when \code{y == Inf}, what should be the result of subtracting \code{y}?}
-}
-\value{
-a time_value (vector) of time type \code{time_type}
-}
-\description{
-Advance/retreat time_value(s) by specified amount (slide window arg)
-}
-\seealso{
-\code{\link{time_plus_n_steps}}, if you're working with an integerish vector
-number of time steps \code{y} (output from other \verb{*n_steps} functions) instead.
-}
-\keyword{internal}

From 68de47be2dc59d3a2d8244bb9350aecc8b9de27c Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Thu, 10 Apr 2025 17:11:50 -0700
Subject: [PATCH 102/107] refactor(epi_slide_opt): helper function&variable
 naming, arg ordering

---
 R/epi_slide_opt_archive.R               | 16 +++++-----
 R/epi_slide_opt_edf.R                   | 39 ++++++++++++-------------
 man/epi_slide_opt_archive_one_epikey.Rd |  4 +--
 3 files changed, 29 insertions(+), 30 deletions(-)

diff --git a/R/epi_slide_opt_archive.R b/R/epi_slide_opt_archive.R
index 6fe0c1da4..0d21aa58f 100644
--- a/R/epi_slide_opt_archive.R
+++ b/R/epi_slide_opt_archive.R
@@ -48,7 +48,7 @@ epi_slide_opt_archive_one_epikey <- function(
     grp_updates,
     in_colnames,
     f_dots_baked, f_from_package,
-    before_steps, after_steps, time_type,
+    before_n_steps, after_n_steps, time_type,
     out_colnames) {
   grp_updates_by_version <- grp_updates %>%
     nest(.by = version, .key = "subtbl") %>%
@@ -68,13 +68,13 @@ epi_slide_opt_archive_one_epikey <- function(
     # inp_update_min_t - after, or anything in between these two bounds. If
     # before == Inf, we need to update outputs all the way to the end of the
     # input *snapshot*.
-    out_update_min_t <- inp_update_min_t - after_steps * unit_step
-    if (before_steps == Inf) {
+    out_update_min_t <- inp_update_min_t - after_n_steps * unit_step
+    if (before_n_steps == Inf) {
       out_update_max_t <- max(inp_snapshot$time_value)
     } else {
-      out_update_max_t <- inp_update_max_t + before_steps * unit_step
+      out_update_max_t <- inp_update_max_t + before_n_steps * unit_step
     }
-    out_update <- epi_slide_opt_one_epikey(inp_snapshot, f_dots_baked, f_from_package, before_steps, after_steps, unit_step, time_type, c(out_update_min_t, out_update_max_t), NULL, in_colnames, out_colnames)
+    out_update <- epi_slide_opt_edf_one_epikey(inp_snapshot, in_colnames, f_dots_baked, f_from_package, before_n_steps, after_n_steps, unit_step, time_type, out_colnames, c(out_update_min_t, out_update_max_t), NULL)
     out_diff <- tbl_diff2(prev_out_snapshot, out_update, "time_value", "update")
     prev_inp_snapshot <<- inp_snapshot
     prev_out_snapshot <<- tbl_patch(prev_out_snapshot, out_diff, "time_value")
@@ -134,8 +134,8 @@ epi_slide_opt.epi_archive <-
       .window_size, .align, .prefix, .suffix, .new_col_names
     )
     window_args <- get_before_after_from_window(.window_size, .align, time_type)
-    before_steps <- time_delta_to_n_steps(window_args$before, time_type)
-    after_steps <- time_delta_to_n_steps(window_args$after, time_type)
+    before_n_steps <- time_delta_to_n_steps(window_args$before, time_type)
+    after_n_steps <- time_delta_to_n_steps(window_args$after, time_type)
     if (!is.null(.ref_time_values)) {
       cli_abort("epi_slide.epi_archive does not support the `.ref_time_values` argument",
         class = "epiprocess__epi_slide_opt_archive__ref_time_values_unsupported"
@@ -167,7 +167,7 @@ epi_slide_opt.epi_archive <-
         res <- epi_slide_opt_archive_one_epikey(
           group_values,
           names_info$input_col_names,
-          .f_dots_baked, .f_info$from_package, before_steps, after_steps, time_type,
+          .f_dots_baked, .f_info$from_package, before_n_steps, after_n_steps, time_type,
           names_info$output_col_names
         )
         if (use_progress) cli::cli_progress_update(id = progress_bar_id)
diff --git a/R/epi_slide_opt_edf.R b/R/epi_slide_opt_edf.R
index b5c99d9b4..98f659237 100644
--- a/R/epi_slide_opt_edf.R
+++ b/R/epi_slide_opt_edf.R
@@ -167,13 +167,12 @@ across_ish_names_info <- function(.x, time_type, col_names_quo, .f_namer,
   )
 }
 
-epi_slide_opt_one_epikey <- function(inp_tbl,
-                                     f_dots_baked, f_from_package,
-                                     before_steps, after_steps, unit_step, time_type,
-                                     out_filter_time_range, out_filter_time_set,
-                                     in_colnames, out_colnames) {
-  # TODO rename function, reorder args, roxygen2
-  #
+epi_slide_opt_edf_one_epikey <- function(inp_tbl,
+                                         in_colnames,
+                                         f_dots_baked, f_from_package,
+                                         before_n_steps, after_n_steps, unit_step, time_type,
+                                         out_colnames,
+                                         out_filter_time_range, out_filter_time_set) {
   # TODO try converting time values to reals, do work on reals, convert back at very end?
   #
   # TODO loosen restrictions here.  each filter optional?
@@ -190,14 +189,14 @@ epi_slide_opt_one_epikey <- function(inp_tbl,
   } else {
     cli_abort("Exactly one of `out_filter_time_range` and `out_filter_time_set` must be non-`NULL`.")
   }
-  if (before_steps == Inf) {
+  if (before_n_steps == Inf) {
     slide_t_min <- min(inp_tbl$time_value)
     slide_start_padding_n <- time_minus_time_in_n_steps(out_t_min, slide_t_min, time_type)
   } else {
-    slide_t_min <- out_t_min - before_steps * unit_step
-    slide_start_padding_n <- before_steps # perf: avoid time_minus_time_in_n_steps
+    slide_t_min <- out_t_min - before_n_steps * unit_step
+    slide_start_padding_n <- before_n_steps # perf: avoid time_minus_time_in_n_steps
   }
-  slide_t_max <- out_t_max + after_steps * unit_step
+  slide_t_max <- out_t_max + after_n_steps * unit_step
   slide_nrow <- time_delta_to_n_steps(slide_t_max - slide_t_min, time_type) + 1L
   slide_time_values <- slide_t_min + 0L:(slide_nrow - 1L) * unit_step
   slide_inp_backrefs <- vec_match(slide_time_values, inp_tbl$time_value)
@@ -209,24 +208,24 @@ epi_slide_opt_one_epikey <- function(inp_tbl,
   # try removing time_value column before slice?
   slide$time_value <- slide_time_values
   if (f_from_package == "data.table") {
-    if (before_steps == Inf) {
+    if (before_n_steps == Inf) {
       slide[, out_colnames] <-
         f_dots_baked(slide[, in_colnames], seq_len(slide_nrow), adaptive = TRUE)
     } else {
-      out_cols <- f_dots_baked(slide[, in_colnames], before_steps + after_steps + 1L)
-      if (after_steps != 0L) {
+      out_cols <- f_dots_baked(slide[, in_colnames], before_n_steps + after_n_steps + 1L)
+      if (after_n_steps != 0L) {
         # Shift an appropriate amount of NA padding from the start to the end.
         # (This padding will later be cut off when we filter down to the
         # original time_values.)
         out_cols <- lapply(out_cols, function(out_col) {
-          c(out_col[(after_steps + 1L):length(out_col)], rep(NA, after_steps))
+          c(out_col[(after_n_steps + 1L):length(out_col)], rep(NA, after_n_steps))
         })
       }
       slide[, out_colnames] <- out_cols
     }
   } else if (f_from_package == "slider") {
     for (col_i in seq_along(in_colnames)) {
-      slide[[out_colnames[[col_i]]]] <- f_dots_baked(slide[[in_colnames[[col_i]]]], before = before_steps, after = after_steps)
+      slide[[out_colnames[[col_i]]]] <- f_dots_baked(slide[[in_colnames[[col_i]]]], before = before_n_steps, after = after_n_steps)
     }
   } else {
     cli_abort(
@@ -243,7 +242,7 @@ epi_slide_opt_one_epikey <- function(inp_tbl,
   rows_should_keep2 <- switch(out_filter_time_style,
     range = vec_rep_each(
       c(FALSE, TRUE, FALSE),
-      c(slide_start_padding_n, slide_nrow - slide_start_padding_n - after_steps, after_steps),
+      c(slide_start_padding_n, slide_nrow - slide_start_padding_n - after_n_steps, after_n_steps),
     ),
     set = vec_in(slide_time_values, out_time_values)
   )
@@ -513,8 +512,8 @@ epi_slide_opt.epi_df <- function(.x, .col_names, .f, ...,
   }
   validate_slide_window_arg(.window_size, time_type)
   window_args <- get_before_after_from_window(.window_size, .align, time_type)
-  before_steps <- time_delta_to_n_steps(window_args$before, time_type)
-  after_steps <- time_delta_to_n_steps(window_args$after, time_type)
+  before_n_steps <- time_delta_to_n_steps(window_args$before, time_type)
+  after_n_steps <- time_delta_to_n_steps(window_args$after, time_type)
   unit_step <- unit_time_delta(time_type, format = "fast")
 
   # Handle output naming:
@@ -536,7 +535,7 @@ epi_slide_opt.epi_df <- function(.x, .col_names, .f, ...,
 
   result <- .x %>%
     group_modify(function(grp_data, grp_key) {
-      epi_slide_opt_one_epikey(grp_data, f_dots_baked, f_from_package, before_steps, after_steps, unit_step, time_type, NULL, ref_time_values, names_info$input_col_names, names_info$output_col_names)
+      epi_slide_opt_edf_one_epikey(grp_data, names_info$input_col_names, f_dots_baked, f_from_package, before_n_steps, after_n_steps, unit_step, time_type, names_info$output_col_names, NULL, ref_time_values)
     }) %>%
     arrange_col_canonical()
 
diff --git a/man/epi_slide_opt_archive_one_epikey.Rd b/man/epi_slide_opt_archive_one_epikey.Rd
index 6d140ba23..795b46ce4 100644
--- a/man/epi_slide_opt_archive_one_epikey.Rd
+++ b/man/epi_slide_opt_archive_one_epikey.Rd
@@ -9,8 +9,8 @@ epi_slide_opt_archive_one_epikey(
   in_colnames,
   f_dots_baked,
   f_from_package,
-  before_steps,
-  after_steps,
+  before_n_steps,
+  after_n_steps,
   time_type,
   out_colnames
 )

From 4763e367af30f03a35e93805ae357d0a577b0781 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Thu, 10 Apr 2025 17:43:18 -0700
Subject: [PATCH 103/107] More internal renames and documentation

---
 R/epi_slide_opt_archive.R               | 31 +++-----
 R/epi_slide_opt_edf.R                   | 65 ++++++++++++++++
 man/epi_slide_opt_archive_one_epikey.Rd | 22 +++---
 man/epi_slide_opt_edf_one_epikey.Rd     | 98 +++++++++++++++++++++++++
 4 files changed, 183 insertions(+), 33 deletions(-)
 create mode 100644 man/epi_slide_opt_edf_one_epikey.Rd

diff --git a/R/epi_slide_opt_archive.R b/R/epi_slide_opt_archive.R
index 0d21aa58f..3869639c7 100644
--- a/R/epi_slide_opt_archive.R
+++ b/R/epi_slide_opt_archive.R
@@ -1,23 +1,10 @@
 #' Core operation of `epi_slide_opt.epi_archive` for a single epikey's history
 #'
-#' @param grp_updates tibble with a `version` column and measurement columns for
+#' @param inp_updates tibble with a `version` column and measurement columns for
 #'   a single epikey, without the epikey labeling columns (e.g., from
 #'   `group_modify`). Interpretation is analogous to an `epi_archive` `DT`, but
 #'   a specific row order is not required.
-#' @param in_colnames chr; names of columns to which to apply `f_dots_baked`
-#' @param f_dots_baked supported sliding function from `{data.table}` or
-#'   `{slider}`, potentially with some arguments baked in with
-#'   [`purrr::partial`]
-#' @param f_from_package string; name of package from which `f_dots_baked`
-#'   (pre-`partial`) originates
-#' @param before integerish >=0 or Inf; number of time steps before each
-#'   ref_time_value to include in the sliding window computation; Inf to include
-#'   all times beginning with the min `time_value`
-#' @param after integerish >=0; number of time steps after each ref_time_value
-#'   to include in the sliding window computation
-#' @param time_type as in `new_epi_archive`
-#' @param out_colnames chr, same length as `in_colnames`; column names to use
-#'   for results
+#' @inheritParams epi_slide_opt_edf_one_epikey
 #' @return tibble with a `version` column, pre-existing measurement columns, and
 #'   new measurement columns; (compactified) diff data to put into an
 #'   `epi_archive`. May not match column ordering; may not ensure any row
@@ -26,7 +13,7 @@
 #' @examples
 #'
 #' library(dplyr)
-#' grp_updates <- bind_rows(
+#' inp_updates <- bind_rows(
 #'   tibble(version = 30, time_value = 1:20, value = 1:20),
 #'   tibble(version = 32, time_value = 4:5, value = 5:4),
 #'   tibble(version = 33, time_value = 8, value = 9),
@@ -38,27 +25,27 @@
 #'
 #' f <- purrr::partial(data.table::frollmean, algo = "exact")
 #'
-#' grp_updates %>%
+#' inp_updates %>%
 #'   epiprocess:::epi_slide_opt_archive_one_epikey(
 #'     "value", f, "data.table", 2L, 0L, "day", "slide_value"
 #'   )
 #'
 #' @keywords internal
 epi_slide_opt_archive_one_epikey <- function(
-    grp_updates,
+    inp_updates,
     in_colnames,
     f_dots_baked, f_from_package,
     before_n_steps, after_n_steps, time_type,
     out_colnames) {
-  grp_updates_by_version <- grp_updates %>%
+  inp_updates_by_version <- inp_updates %>%
     nest(.by = version, .key = "subtbl") %>%
     arrange(version)
   unit_step <- unit_time_delta(time_type, format = "fast")
   prev_inp_snapshot <- NULL
   prev_out_snapshot <- NULL
-  result <- lapply(seq_len(nrow(grp_updates_by_version)), function(version_i) {
-    version <- grp_updates_by_version$version[[version_i]]
-    inp_update <- grp_updates_by_version$subtbl[[version_i]]
+  result <- lapply(seq_len(nrow(inp_updates_by_version)), function(version_i) {
+    version <- inp_updates_by_version$version[[version_i]]
+    inp_update <- inp_updates_by_version$subtbl[[version_i]]
     inp_snapshot <- tbl_patch(prev_inp_snapshot, inp_update, "time_value")
     inp_update_min_t <- min(inp_update$time_value)
     inp_update_max_t <- max(inp_update$time_value)
diff --git a/R/epi_slide_opt_edf.R b/R/epi_slide_opt_edf.R
index 98f659237..49096697b 100644
--- a/R/epi_slide_opt_edf.R
+++ b/R/epi_slide_opt_edf.R
@@ -167,6 +167,71 @@ across_ish_names_info <- function(.x, time_type, col_names_quo, .f_namer,
   )
 }
 
+#' Run a specialized slide computation on a single `epi_df` epikey, with temporary completion
+#'
+#' @param inp_tbl tibble; should have a `time_value` column and columns named in
+#'   `in_colnames`; should not contain any columns named in `out_colnames`
+#' @param in_colnames chr; names of columns to which to apply `f_dots_baked`
+#' @param f_dots_baked supported sliding function from `{data.table}` or
+#'   `{slider}`, potentially with some arguments baked in with
+#'   [`purrr::partial`]
+#' @param f_from_package string; name of package from which `f_dots_baked`
+#'   (pre-`partial`) originates
+#' @param before_n_steps integerish `>=0` or `Inf`; number of time steps before
+#'   each `ref_time_value` to include in the sliding window computation; `Inf`
+#'   to include all times beginning with the min `time_value`
+#' @param after_n_steps integerish `>=0`; number of time steps after each
+#'   `ref_time_value` to include in the sliding window computation
+#' @param time_type as in `new_epi_archive`
+#' @param out_colnames chr, same length as `in_colnames`; column names to use
+#'   for results
+#' @param out_filter_time_range,out_filter_time_set `time_value` filter;
+#'   `time_values` in the output should match the result of applying this filter
+#'   to `inp_tbl$time_value`. Exactly one of the two must be provided
+#'   (non-`NULL`) and the other must be `NULL`. `out_filter_time_range`, if
+#'   provided, should be a length-2 vector/list containing the minimum and
+#'   maximum `time_value` to allow in the output. `out_filter_time_set`, if
+#'   provided, should be a vector of `time_values` to intersect with the input
+#'   `time_value`s.
+#' @return tibble; like `inp_tbl` with addition of `out_colnames` holding the
+#'   slide computation results, with times filtered down as specified
+#'
+#' @examples
+#'
+#' library(dplyr)
+#' tbl <- tibble(
+#'   time_value = c(11:12, 15:18) + 0,
+#'   value = c(c(1, 2), c(4, 8, 16, 32))
+#' )
+#'
+#' tbl %>%
+#'   epi_slide_opt_edf_one_epikey(
+#'     "value",
+#'     frollmean, "data.table",
+#'     1L, 0L, 1L, "integer",
+#'     "slide_value",
+#'     c(11L, 16L), NULL
+#'   )
+#'
+#' tbl %>%
+#'   epi_slide_opt_edf_one_epikey(
+#'     "value",
+#'     frollmean, "data.table",
+#'     0L, 1L, 1L, "integer",
+#'     "slide_value",
+#'     NULL, c(11, 15, 16, 17, 18)
+#'   )
+#'
+#' tbl %>%
+#'   epi_slide_opt_edf_one_epikey(
+#'     "value",
+#'     frollmean, "data.table",
+#'     Inf, 0L, 1L, "integer",
+#'     "slide_value",
+#'     NULL, c(12, 17)
+#'   )
+#'
+#' @keywords internal
 epi_slide_opt_edf_one_epikey <- function(inp_tbl,
                                          in_colnames,
                                          f_dots_baked, f_from_package,
diff --git a/man/epi_slide_opt_archive_one_epikey.Rd b/man/epi_slide_opt_archive_one_epikey.Rd
index 795b46ce4..847d0d162 100644
--- a/man/epi_slide_opt_archive_one_epikey.Rd
+++ b/man/epi_slide_opt_archive_one_epikey.Rd
@@ -5,7 +5,7 @@
 \title{Core operation of \code{epi_slide_opt.epi_archive} for a single epikey's history}
 \usage{
 epi_slide_opt_archive_one_epikey(
-  grp_updates,
+  inp_updates,
   in_colnames,
   f_dots_baked,
   f_from_package,
@@ -16,7 +16,7 @@ epi_slide_opt_archive_one_epikey(
 )
 }
 \arguments{
-\item{grp_updates}{tibble with a \code{version} column and measurement columns for
+\item{inp_updates}{tibble with a \code{version} column and measurement columns for
 a single epikey, without the epikey labeling columns (e.g., from
 \code{group_modify}). Interpretation is analogous to an \code{epi_archive} \code{DT}, but
 a specific row order is not required.}
@@ -30,17 +30,17 @@ a specific row order is not required.}
 \item{f_from_package}{string; name of package from which \code{f_dots_baked}
 (pre-\code{partial}) originates}
 
+\item{before_n_steps}{integerish \verb{>=0} or \code{Inf}; number of time steps before
+each \code{ref_time_value} to include in the sliding window computation; \code{Inf}
+to include all times beginning with the min \code{time_value}}
+
+\item{after_n_steps}{integerish \verb{>=0}; number of time steps after each
+\code{ref_time_value} to include in the sliding window computation}
+
 \item{time_type}{as in \code{new_epi_archive}}
 
 \item{out_colnames}{chr, same length as \code{in_colnames}; column names to use
 for results}
-
-\item{before}{integerish >=0 or Inf; number of time steps before each
-ref_time_value to include in the sliding window computation; Inf to include
-all times beginning with the min \code{time_value}}
-
-\item{after}{integerish >=0; number of time steps after each ref_time_value
-to include in the sliding window computation}
 }
 \value{
 tibble with a \code{version} column, pre-existing measurement columns, and
@@ -54,7 +54,7 @@ Core operation of \code{epi_slide_opt.epi_archive} for a single epikey's history
 \examples{
 
 library(dplyr)
-grp_updates <- bind_rows(
+inp_updates <- bind_rows(
   tibble(version = 30, time_value = 1:20, value = 1:20),
   tibble(version = 32, time_value = 4:5, value = 5:4),
   tibble(version = 33, time_value = 8, value = 9),
@@ -66,7 +66,7 @@ grp_updates <- bind_rows(
 
 f <- purrr::partial(data.table::frollmean, algo = "exact")
 
-grp_updates \%>\%
+inp_updates \%>\%
   epiprocess:::epi_slide_opt_archive_one_epikey(
     "value", f, "data.table", 2L, 0L, "day", "slide_value"
   )
diff --git a/man/epi_slide_opt_edf_one_epikey.Rd b/man/epi_slide_opt_edf_one_epikey.Rd
new file mode 100644
index 000000000..b3cfad5b3
--- /dev/null
+++ b/man/epi_slide_opt_edf_one_epikey.Rd
@@ -0,0 +1,98 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/epi_slide_opt_edf.R
+\name{epi_slide_opt_edf_one_epikey}
+\alias{epi_slide_opt_edf_one_epikey}
+\title{Run a specialized slide computation on a single \code{epi_df} epikey, with temporary completion}
+\usage{
+epi_slide_opt_edf_one_epikey(
+  inp_tbl,
+  in_colnames,
+  f_dots_baked,
+  f_from_package,
+  before_n_steps,
+  after_n_steps,
+  unit_step,
+  time_type,
+  out_colnames,
+  out_filter_time_range,
+  out_filter_time_set
+)
+}
+\arguments{
+\item{inp_tbl}{tibble; should have a \code{time_value} column and columns named in
+\code{in_colnames}; should not contain any columns named in \code{out_colnames}}
+
+\item{in_colnames}{chr; names of columns to which to apply \code{f_dots_baked}}
+
+\item{f_dots_baked}{supported sliding function from \code{{data.table}} or
+\code{{slider}}, potentially with some arguments baked in with
+\code{\link[purrr:partial]{purrr::partial}}}
+
+\item{f_from_package}{string; name of package from which \code{f_dots_baked}
+(pre-\code{partial}) originates}
+
+\item{before_n_steps}{integerish \verb{>=0} or \code{Inf}; number of time steps before
+each \code{ref_time_value} to include in the sliding window computation; \code{Inf}
+to include all times beginning with the min \code{time_value}}
+
+\item{after_n_steps}{integerish \verb{>=0}; number of time steps after each
+\code{ref_time_value} to include in the sliding window computation}
+
+\item{time_type}{as in \code{new_epi_archive}}
+
+\item{out_colnames}{chr, same length as \code{in_colnames}; column names to use
+for results}
+
+\item{out_filter_time_range, out_filter_time_set}{\code{time_value} filter;
+\code{time_values} in the output should match the result of applying this filter
+to \code{inp_tbl$time_value}. Exactly one of the two must be provided
+(non-\code{NULL}) and the other must be \code{NULL}. \code{out_filter_time_range}, if
+provided, should be a length-2 vector/list containing the minimum and
+maximum \code{time_value} to allow in the output. \code{out_filter_time_set}, if
+provided, should be a vector of \code{time_values} to intersect with the input
+\code{time_value}s.}
+}
+\value{
+tibble; like \code{inp_tbl} with addition of \code{out_colnames} holding the
+slide computation results, with times filtered down as specified
+}
+\description{
+Run a specialized slide computation on a single \code{epi_df} epikey, with temporary completion
+}
+\examples{
+
+library(dplyr)
+tbl <- tibble(
+  time_value = c(11:12, 15:18) + 0,
+  value = c(c(1, 2), c(4, 8, 16, 32))
+)
+
+tbl \%>\%
+  epi_slide_opt_edf_one_epikey(
+    "value",
+    frollmean, "data.table",
+    1L, 0L, 1L, "integer",
+    "slide_value",
+    c(11L, 16L), NULL
+  )
+
+tbl \%>\%
+  epi_slide_opt_edf_one_epikey(
+    "value",
+    frollmean, "data.table",
+    0L, 1L, 1L, "integer",
+    "slide_value",
+    NULL, c(11, 15, 16, 17, 18)
+  )
+
+tbl \%>\%
+  epi_slide_opt_edf_one_epikey(
+    "value",
+    frollmean, "data.table",
+    Inf, 0L, 1L, "integer",
+    "slide_value",
+    NULL, c(12, 17)
+  )
+
+}
+\keyword{internal}

From 184f98a325a07a1df4b1bd76477647ddf7cf1912 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Thu, 10 Apr 2025 20:21:34 -0700
Subject: [PATCH 104/107] perf: c -> list when specifying slide out date min,
 max

---
 R/epi_slide_opt_archive.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/epi_slide_opt_archive.R b/R/epi_slide_opt_archive.R
index 3869639c7..d0a16749a 100644
--- a/R/epi_slide_opt_archive.R
+++ b/R/epi_slide_opt_archive.R
@@ -61,7 +61,7 @@ epi_slide_opt_archive_one_epikey <- function(
     } else {
       out_update_max_t <- inp_update_max_t + before_n_steps * unit_step
     }
-    out_update <- epi_slide_opt_edf_one_epikey(inp_snapshot, in_colnames, f_dots_baked, f_from_package, before_n_steps, after_n_steps, unit_step, time_type, out_colnames, c(out_update_min_t, out_update_max_t), NULL)
+    out_update <- epi_slide_opt_edf_one_epikey(inp_snapshot, in_colnames, f_dots_baked, f_from_package, before_n_steps, after_n_steps, unit_step, time_type, out_colnames, list(out_update_min_t, out_update_max_t), NULL)
     out_diff <- tbl_diff2(prev_out_snapshot, out_update, "time_value", "update")
     prev_inp_snapshot <<- inp_snapshot
     prev_out_snapshot <<- tbl_patch(prev_out_snapshot, out_diff, "time_value")

From a82d48d03969f514a682ae175f3b6c80fd147eff Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Fri, 11 Apr 2025 16:30:28 -0700
Subject: [PATCH 105/107] fix(epi_slide_opt): on `out_filter_time_set`
 narrowing to 0

---
 R/epi_slide_opt_edf.R | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/R/epi_slide_opt_edf.R b/R/epi_slide_opt_edf.R
index 49096697b..de3ed45a5 100644
--- a/R/epi_slide_opt_edf.R
+++ b/R/epi_slide_opt_edf.R
@@ -249,8 +249,13 @@ epi_slide_opt_edf_one_epikey <- function(inp_tbl,
     # FIXME min time_value for this epikey vs. entire edf; match existing behavior, or complete changeover
     out_filter_time_style <- "set"
     out_time_values <- vec_set_intersect(inp_tbl$time_value, out_filter_time_set)
-    out_t_min <- min(out_time_values)
-    out_t_max <- max(out_time_values)
+    if (vec_size(out_time_values) == 0L) {
+      out_t_min <- inp_tbl$time_value[[1L]]
+      out_t_max <- inp_tbl$time_value[[1L]] - 1L * unit_step
+    } else {
+      out_t_min <- min(out_time_values)
+      out_t_max <- max(out_time_values)
+    }
   } else {
     cli_abort("Exactly one of `out_filter_time_range` and `out_filter_time_set` must be non-`NULL`.")
   }

From 4aadc9504361bebdd59a09524ba80904c1f44a33 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Sat, 12 Apr 2025 16:26:16 -0700
Subject: [PATCH 106/107] docs(slide.R): correct error message regarding
 data.frame rownames

The error message says that slide computations can't be rownamed data frames.
This is both bad wording (having rownames != having non-automatic rownames) and
we're not actually enforcing any restriction of the sort anyway. We drop/ignore
the rownames though (via dplyr/tibble). In `epix_slide`, the allowance of
non-automatic rownames was deliberate; in `epi_slide`, it also seems likely the
way to go; one might convert to `data.frame` and have some filter-like operation
return non-automatic rownames.
---
 R/slide.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/slide.R b/R/slide.R
index c05fbe98f..e4ea80792 100644
--- a/R/slide.R
+++ b/R/slide.R
@@ -410,7 +410,7 @@ epi_slide_one_group <- function(
   # Returned values must be data.frame or vector.
   if ("other" %in% return_types) {
     cli_abort(
-      "epi_slide: slide computations must always return either data frames without rownames
+      "epi_slide: slide computations must always return either data frames
       or unnamed vectors (as determined by the vctrs package).",
       class = "epiprocess__invalid_slide_comp_value"
     )

From d9621015b88773ff0f1ac1d02685bd4ecd93bb0b Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Sun, 13 Apr 2025 01:11:01 -0700
Subject: [PATCH 107/107] fix: `purrr::partial` missing future arg placement

Use `... =` to specify this.  Prevents some confusing error messages on
```
archive_cases_dv_subset %>%
  epi_slide_opt(percent_cli, frollmean, 0, .window_size = 7)
```
but does not solve all issues that can happen with unnamed `...`.
---
 R/epi_slide_opt_archive.R | 2 +-
 R/epi_slide_opt_edf.R     | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/R/epi_slide_opt_archive.R b/R/epi_slide_opt_archive.R
index d0a16749a..ab930af61 100644
--- a/R/epi_slide_opt_archive.R
+++ b/R/epi_slide_opt_archive.R
@@ -107,7 +107,7 @@ epi_slide_opt.epi_archive <-
         # debug stack traces:
         .f
       } else {
-        purrr::partial(.f, ...)
+        purrr::partial(.f, ... = , ...) # `... =` stands in for future args
       }
     col_names_quo <- enquo(.col_names)
     if (is.null(.window_size)) {
diff --git a/R/epi_slide_opt_edf.R b/R/epi_slide_opt_edf.R
index de3ed45a5..a97fef7a7 100644
--- a/R/epi_slide_opt_edf.R
+++ b/R/epi_slide_opt_edf.R
@@ -62,6 +62,7 @@ upstream_slide_f_info <- function(.f, ...) {
   }
   f_from_package <- f_info_row$package
   if (f_from_package == "data.table" && "fill" %in% names(rlang::call_match(dots_expand = FALSE)[["..."]])) {
+    # XXX this doesn't detect with `fill` is passed positionally through dots...
     cli_abort("`epi_slide_opt` does not support `data.table::froll*()` with a
                custom `fill =` arg",
       class = "epiprocess__epi_slide_opt__fill_unsupported"
@@ -600,7 +601,7 @@ epi_slide_opt.epi_df <- function(.x, .col_names, .f, ...,
       # debug stack traces:
       .f
     } else {
-      purrr::partial(.f, ...)
+      purrr::partial(.f, ... = , ...) # `... =` stands in for future args
     }
 
   result <- .x %>%