@@ -261,58 +261,130 @@ epi_slide <- function(
261261 # Check for duplicated time values within groups
262262 assert(check_ukey_unique(ungroup(.x ), c(group_vars(.x ), " time_value" )))
263263
264- # Begin handling completion. This will create a complete time index between
265- # the smallest and largest time values in the data. This is used to ensure
266- # that the slide function is called with a complete window of data. Each slide
267- # group will filter this down to between its min and max time values. We also
268- # mark which dates were in the data and which were added by our completion.
269- date_seq_list <- full_date_seq(.x , window_args $ before , window_args $ after , time_type )
270- .x $ .real <- TRUE
264+ # # Begin handling completion. This will create a complete time index between
265+ # # the smallest and largest time values in the data. This is used to ensure
266+ # # that the slide function is called with a complete window of data. Each slide
267+ # # group will filter this down to between its min and max time values. We also
268+ # # mark which dates were in the data and which were added by our completion.
269+ # date_seq_list <- full_date_seq(.x, window_args$before, window_args$after, time_type)
270+ # .x$.real <- TRUE
271271
272- # Create a wrapper that calculates and passes `.ref_time_value` to the
273- # computation. `i` is contained in the `slide_comp_wrapper_factory`
274- # environment such that when called within `slide_one_grp` `i` advances
275- # through the list of reference time values within a group and then resets
276- # back to 1 when switching groups.
277- slide_comp_wrapper_factory <- function (kept_ref_time_values ) {
278- i <- 1L
279- slide_comp_wrapper <- function (.x , .group_key , ... ) {
280- .ref_time_value <- kept_ref_time_values [[i ]]
281- i <<- i + 1L
282- .slide_comp(.x , .group_key , .ref_time_value , ... )
272+ # # Create a wrapper that calculates and passes `.ref_time_value` to the
273+ # # computation. `i` is contained in the `slide_comp_wrapper_factory`
274+ # # environment such that when called within `slide_one_grp` `i` advances
275+ # # through the list of reference time values within a group and then resets
276+ # # back to 1 when switching groups.
277+ # slide_comp_wrapper_factory <- function(kept_ref_time_values) {
278+ # i <- 1L
279+ # slide_comp_wrapper <- function(.x, .group_key, ...) {
280+ # .ref_time_value <- kept_ref_time_values[[i]]
281+ # i <<- i + 1L
282+ # .slide_comp(.x, .group_key, .ref_time_value, ...)
283+ # }
284+ # slide_comp_wrapper
285+ # }
286+
287+ # # - If .x is not grouped, then the trivial group is applied:
288+ # # https://dplyr.tidyverse.org/reference/group_map.html
289+ # # - We create a lambda that forwards the necessary slide arguments to
290+ # # `epi_slide_one_group`.
291+ # # - `...` from top of `epi_slide` are forwarded to `.f` here through
292+ # # group_modify and through the lambda.
293+ # result <- group_map(
294+ # .x,
295+ # .f = function(.data_group, .group_key, ...) {
296+ # epi_slide_one_group(
297+ # .data_group, .group_key, ...,
298+ # .slide_comp_factory = slide_comp_wrapper_factory,
299+ # .before = window_args$before,
300+ # .after = window_args$after,
301+ # .ref_time_values = .ref_time_values,
302+ # .all_rows = .all_rows,
303+ # .new_col_name = .new_col_name,
304+ # .used_data_masking = used_data_masking,
305+ # .time_type = time_type,
306+ # .date_seq_list = date_seq_list
307+ # )
308+ # },
309+ # ...,
310+ # .keep = TRUE
311+ # ) %>%
312+ # list_rbind() %>%
313+ # `[`(.$.real, names(.) != ".real") %>%
314+ # arrange_col_canonical() %>%
315+ # group_by(!!!.x_orig_groups)
316+ before_n_steps <- time_delta_to_n_steps(window_args $ before , time_type )
317+ after_n_steps <- time_delta_to_n_steps(window_args $ after , time_type )
318+ unit_step <- unit_time_delta(time_type , format = " fast" )
319+ simple_hop <- time_slide_to_simple_hop(.slide_comp = .slide_comp , ... , .before_n_steps = before_n_steps , .after_n_steps = after_n_steps )
320+ result <- .x %> %
321+ group_modify(function (grp_data , grp_key ) {
322+ out_time_values <- ref_time_values_to_out_time_values(grp_data , .ref_time_values )
323+ res <- grp_data
324+ slide_values <- slide_window(grp_data , grp_key , simple_hop , before_n_steps , after_n_steps , unit_step , time_type , out_time_values )
325+ # FIXME check, de-dupe, simplify, refactor, ...
326+ if (.all_rows ) {
327+ new_slide_values <- vec_cast(rep(NA , nrow(res )), slide_values )
328+ vec_slice(new_slide_values , vec_match(out_time_values , res $ time_value )) <- slide_values
329+ slide_values <- new_slide_values
330+ } else {
331+ res <- vec_slice(res , vec_match(out_time_values , res $ time_value ))
332+ }
333+
334+ if (is.null(.new_col_name )) {
335+ if (inherits(slide_values , " data.frame" )) {
336+ # Sometimes slide_values can parrot back columns already in `res`; allow
337+ # this, but balk if a column has the same name as one in `res` but a
338+ # different value:
339+ comp_nms <- names(slide_values )
340+ overlaps_existing_names <- comp_nms %in% names(res )
341+ for (comp_i in which(overlaps_existing_names )) {
342+ if (! identical(slide_values [[comp_i ]], res [[comp_nms [[comp_i ]]]])) {
343+ lines <- c(
344+ cli :: format_error(c(
345+ " New column and old column clash" ,
346+ " x" = " slide computation output included a
347+ {format_varname(comp_nms[[comp_i]])} column, but `.x` already had a
348+ {format_varname(comp_nms[[comp_i]])} column with differing values" ,
349+ " Here are examples of differing values, where the grouping variables were
350+ {format_tibble_row(.group_key)}:"
351+ )),
352+ capture.output(print(waldo :: compare(
353+ res [[comp_nms [[comp_i ]]]], slide_values [[comp_i ]],
354+ x_arg = rlang :: expr_deparse(dplyr :: expr(`$`(!! " existing" , !! sym(comp_nms [[comp_i ]])))), # nolint: object_usage_linter
355+ y_arg = rlang :: expr_deparse(dplyr :: expr(`$`(!! " comp_value" , !! sym(comp_nms [[comp_i ]])))) # nolint: object_usage_linter
356+ ))),
357+ cli :: format_message(c(
358+ " >" = " You likely want to rename or remove this column from your slide
359+ computation's output, or debug why it has a different value."
360+ ))
361+ )
362+ rlang :: abort(paste(collapse = " \n " , lines ),
363+ class = " epiprocess__epi_slide_output_vs_existing_column_conflict"
364+ )
365+ }
366+ }
367+ # Unpack into separate columns (without name prefix). If there are
368+ # columns duplicating existing columns, de-dupe and order them as if they
369+ # didn't exist in slide_values.
370+ res <- dplyr :: bind_cols(res , slide_values [! overlaps_existing_names ])
371+ } else {
372+ # Apply default name (to vector or packed data.frame-type column):
373+ if (" slide_value" %in% names(res )) {
374+ cli_abort(c(" Cannot guess a good column name for your output" ,
375+ " x" = " `slide_value` already exists in `.x`" ,
376+ " >" = " Please provide a `.new_col_name`."
377+ ))
378+ }
379+ res [[" slide_value" ]] <- slide_values
283380 }
284- slide_comp_wrapper
381+ } else {
382+ # Vector or packed data.frame-type column (note: overlaps with existing
383+ # column names should already be forbidden by earlier validation):
384+ res [[.new_col_name ]] <- slide_values
285385 }
286-
287- # - If .x is not grouped, then the trivial group is applied:
288- # https://dplyr.tidyverse.org/reference/group_map.html
289- # - We create a lambda that forwards the necessary slide arguments to
290- # `epi_slide_one_group`.
291- # - `...` from top of `epi_slide` are forwarded to `.f` here through
292- # group_modify and through the lambda.
293- result <- group_map(
294- .x ,
295- .f = function (.data_group , .group_key , ... ) {
296- epi_slide_one_group(
297- .data_group , .group_key , ... ,
298- .slide_comp_factory = slide_comp_wrapper_factory ,
299- .before = window_args $ before ,
300- .after = window_args $ after ,
301- .ref_time_values = .ref_time_values ,
302- .all_rows = .all_rows ,
303- .new_col_name = .new_col_name ,
304- .used_data_masking = used_data_masking ,
305- .time_type = time_type ,
306- .date_seq_list = date_seq_list
307- )
308- },
309- ... ,
310- .keep = TRUE
311- ) %> %
312- list_rbind() %> %
313- `[`(. $ .real , names(. ) != " .real" ) %> %
314- arrange_col_canonical() %> %
315- group_by(!!! .x_orig_groups )
386+ res
387+ })
316388
317389 # If every group in epi_slide_one_group takes the
318390 # length(available_ref_time_values) == 0 branch then we end up here.
0 commit comments