From 713dde68d503fed9bd1ee38e1a20735416da8db5 Mon Sep 17 00:00:00 2001
From: Nat DeFries <42820733+nmdefries@users.noreply.github.com>
Date: Wed, 3 Mar 2021 14:57:25 -0500
Subject: [PATCH 01/23] reduce memory usage by subsetting columns

---
 facebook/delphiFacebook/NAMESPACE              |  1 +
 .../delphiFacebook/R/contingency_aggregate.R   | 18 +++++++++++++-----
 2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/facebook/delphiFacebook/NAMESPACE b/facebook/delphiFacebook/NAMESPACE
index 9ef255f84..07b9c5a32 100644
--- a/facebook/delphiFacebook/NAMESPACE
+++ b/facebook/delphiFacebook/NAMESPACE
@@ -63,6 +63,7 @@ importFrom(dplyr,"%>%")
 importFrom(dplyr,across)
 importFrom(dplyr,all_of)
 importFrom(dplyr,anti_join)
+importFrom(dplyr,any_of)
 importFrom(dplyr,arrange)
 importFrom(dplyr,bind_rows)
 importFrom(dplyr,case_when)
diff --git a/facebook/delphiFacebook/R/contingency_aggregate.R b/facebook/delphiFacebook/R/contingency_aggregate.R
index 00e9b065e..8188fbcc7 100644
--- a/facebook/delphiFacebook/R/contingency_aggregate.R
+++ b/facebook/delphiFacebook/R/contingency_aggregate.R
@@ -24,11 +24,23 @@
 #' @return none
 #'
 #' @import data.table
-#' @importFrom dplyr full_join %>%
+#' @importFrom dplyr full_join %>% select all_of any_of
 #' @importFrom purrr reduce
 #'
 #' @export
 produce_aggregates <- function(df, aggregations, cw_list, params) {
+  output <- post_process_aggs(df, aggregations, cw_list)
+  df <- output[[1]]
+  aggregations <- output[[2]]
+  
+  ## Keep only columns used in indicators, plus supporting columns.
+  df <- select(df, 
+               all_of(unique(aggregations$metric)), 
+               all_of(unique(aggregations$var_weight)), 
+               any_of( unique( unlist(aggregations$group_by) ) ), 
+               zip5,
+               start_dt)
+  
   msg_plain(paste0("Producing aggregates..."))
   ## For the date range lookups we do on df, use a data.table key. This puts the
   ## table in sorted order so data.table can use a binary search to find
@@ -40,10 +52,6 @@ produce_aggregates <- function(df, aggregations, cw_list, params) {
   # Keep only obs in desired date range.
   df <- df[start_dt >= params$start_time & start_dt <= params$end_time]
 
-  output <- post_process_aggs(df, aggregations, cw_list)
-  df <- output[[1]]
-  aggregations <- output[[2]]
-
   agg_groups <- unique(aggregations[c("group_by", "geo_level")])
 
   # For each unique combination of groupby_vars and geo level, run aggregation process once

From 5a3124934694ff73b25775af8884fea1a5bdfb3b Mon Sep 17 00:00:00 2001
From: Nat DeFries <42820733+nmdefries@users.noreply.github.com>
Date: Wed, 3 Mar 2021 17:48:35 -0500
Subject: [PATCH 02/23] switch to all_of to trigger error for missing columns

---
 facebook/delphiFacebook/R/contingency_aggregate.R | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/facebook/delphiFacebook/R/contingency_aggregate.R b/facebook/delphiFacebook/R/contingency_aggregate.R
index 8188fbcc7..b5b975ee3 100644
--- a/facebook/delphiFacebook/R/contingency_aggregate.R
+++ b/facebook/delphiFacebook/R/contingency_aggregate.R
@@ -24,7 +24,7 @@
 #' @return none
 #'
 #' @import data.table
-#' @importFrom dplyr full_join %>% select all_of any_of
+#' @importFrom dplyr full_join %>% select all_of
 #' @importFrom purrr reduce
 #'
 #' @export
@@ -34,10 +34,11 @@ produce_aggregates <- function(df, aggregations, cw_list, params) {
   aggregations <- output[[2]]
   
   ## Keep only columns used in indicators, plus supporting columns.
+  group_vars <- unique( unlist(aggregations$group_by) )
   df <- select(df, 
                all_of(unique(aggregations$metric)), 
                all_of(unique(aggregations$var_weight)), 
-               any_of( unique( unlist(aggregations$group_by) ) ), 
+               all_of( group_vars[group_vars != "geo_id"] ), 
                zip5,
                start_dt)
   

From 486c7fad73f89cd6da95d2cb475d88e9a62fa320 Mon Sep 17 00:00:00 2001
From: Nat DeFries <42820733+nmdefries@users.noreply.github.com>
Date: Thu, 11 Mar 2021 10:24:42 -0500
Subject: [PATCH 03/23] fix hesitant_and_* variables to be defined as among
 hesitant people

---
 facebook/delphiFacebook/NAMESPACE             |  1 -
 .../delphiFacebook/R/contingency_variables.R  | 79 +++++++++++++------
 2 files changed, 57 insertions(+), 23 deletions(-)

diff --git a/facebook/delphiFacebook/NAMESPACE b/facebook/delphiFacebook/NAMESPACE
index 07b9c5a32..9ef255f84 100644
--- a/facebook/delphiFacebook/NAMESPACE
+++ b/facebook/delphiFacebook/NAMESPACE
@@ -63,7 +63,6 @@ importFrom(dplyr,"%>%")
 importFrom(dplyr,across)
 importFrom(dplyr,all_of)
 importFrom(dplyr,anti_join)
-importFrom(dplyr,any_of)
 importFrom(dplyr,arrange)
 importFrom(dplyr,bind_rows)
 importFrom(dplyr,case_when)
diff --git a/facebook/delphiFacebook/R/contingency_variables.R b/facebook/delphiFacebook/R/contingency_variables.R
index 47aabc7ff..c95a554a6 100644
--- a/facebook/delphiFacebook/R/contingency_variables.R
+++ b/facebook/delphiFacebook/R/contingency_variables.R
@@ -345,14 +345,14 @@ create_derivative_columns <- function(df) {
   } else {
     df$b_65_or_older <- NA_real_
   }
-
-  if ("mc_accept_cov_vaccine" %in% names(df)) {
-    df$b_hesitant_cov_vaccine <- as.numeric(
-      df$mc_accept_cov_vaccine == "prob not vaccinate" | df$mc_accept_cov_vaccine == "def not vaccinate"
-    )
-  } else {
-    df$b_hesitant_cov_vaccine <- NA_real_
-  }
+  
+  if ("mc_accept_cov_vaccine" %in% names(df)) {	
+    df$b_hesitant_cov_vaccine <- as.numeric(	
+      df$mc_accept_cov_vaccine == "prob not vaccinate" | df$mc_accept_cov_vaccine == "def not vaccinate"	
+    )	
+  } else {	
+    df$b_hesitant_cov_vaccine <- NA_real_	
+  }	
 
   if ("mc_concerned_sideeffects" %in% names(df)) {
     df$b_concerned_sideeffects <- as.numeric(
@@ -362,31 +362,67 @@ create_derivative_columns <- function(df) {
     df$b_concerned_sideeffects <- NA_real_
   }
 
-  df$b_hesitant_sideeffects <- as.numeric(
-    df$b_hesitant_cov_vaccine & df$b_concerned_sideeffects
+  df$b_hesitant_sideeffects <- case_when(
+    is.na(df$b_hesitant_cov_vaccine) == TRUE ~ NA,
+    is.na(df$b_concerned_sideeffects) == TRUE ~ NA,
+    df$wave < 7 ~ NA,
+    df$b_hesitant_cov_vaccine == 0 ~ NA,
+    df$b_hesitant_cov_vaccine == 1 & df$b_concerned_sideeffects == 1 ~ TRUE,
+    df$b_hesitant_cov_vaccine == 1 & df$b_concerned_sideeffects == 0 ~ FALSE,
+    TRUE ~ NA
   )
-  df$b_hesitant_sideeffects[df$wave < 7] <- NA_real_
   
   if ( "b_vaccine_likely_friends" %in% names(df) &
        "b_vaccine_likely_local_health" %in% names(df) &
        "b_vaccine_likely_who" %in% names(df) &
        "b_vaccine_likely_govt_health" %in% names(df) &
        "b_vaccine_likely_politicians" %in% names(df) ) {
-    df$b_hesitant_trust_fam <- as.numeric(
-      df$b_hesitant_cov_vaccine & df$b_vaccine_likely_friends
+    df$b_hesitant_trust_fam <- case_when(
+      is.na(df$b_hesitant_cov_vaccine) == TRUE ~ NA,
+      is.na(df$b_vaccine_likely_friends) == TRUE ~ NA,
+      df$wave < 7 ~ NA,
+      df$b_hesitant_cov_vaccine == 0 ~ NA,
+      df$b_hesitant_cov_vaccine == 1 & df$b_vaccine_likely_friends == 1 ~ TRUE,
+      df$b_hesitant_cov_vaccine == 1 & df$b_vaccine_likely_friends == 0 ~ FALSE,
+      TRUE ~ NA
     )
-    df$b_hesitant_trust_healthcare <- as.numeric(
-      df$b_hesitant_cov_vaccine & df$b_vaccine_likely_local_health
+    df$b_hesitant_trust_healthcare <- case_when(
+      is.na(df$b_hesitant_cov_vaccine) == TRUE ~ NA,
+      is.na(df$b_vaccine_likely_local_health) == TRUE ~ NA,
+      df$wave < 7 ~ NA,
+      df$b_hesitant_cov_vaccine == 0 ~ NA,
+      df$b_hesitant_cov_vaccine == 1 & df$b_vaccine_likely_local_health == 1 ~ TRUE,
+      df$b_hesitant_cov_vaccine == 1 & df$b_vaccine_likely_local_health == 0 ~ FALSE,
+      TRUE ~ NA
     )
-    df$b_hesitant_trust_who <- as.numeric(
-      df$b_hesitant_cov_vaccine & df$b_vaccine_likely_who
+    df$b_hesitant_trust_who <- case_when(
+      is.na(df$b_hesitant_cov_vaccine) == TRUE ~ NA,
+      is.na(df$b_vaccine_likely_who) == TRUE ~ NA,
+      df$wave < 7 ~ NA,
+      df$b_hesitant_cov_vaccine == 0 ~ NA,
+      df$b_hesitant_cov_vaccine == 1 & df$b_vaccine_likely_who == 1 ~ TRUE,
+      df$b_hesitant_cov_vaccine == 1 & df$b_vaccine_likely_who == 0 ~ FALSE,
+      TRUE ~ NA
     )
-    df$b_hesitant_trust_govt <- as.numeric(
-      df$b_hesitant_cov_vaccine & df$b_vaccine_likely_govt_health
+    df$b_hesitant_trust_govt <- case_when(
+      is.na(df$b_hesitant_cov_vaccine) == TRUE ~ NA,
+      is.na(df$b_vaccine_likely_govt_health) == TRUE ~ NA,
+      df$wave < 7 ~ NA,
+      df$b_hesitant_cov_vaccine == 0 ~ NA,
+      df$b_hesitant_cov_vaccine == 1 & df$b_vaccine_likely_govt_health == 1 ~ TRUE,
+      df$b_hesitant_cov_vaccine == 1 & df$b_vaccine_likely_govt_health == 0 ~ FALSE,
+      TRUE ~ NA
     )
-    df$b_hesitant_trust_politicians <- as.numeric(
-      df$b_hesitant_cov_vaccine & df$b_vaccine_likely_politicians
+    df$b_hesitant_trust_politicians <- case_when(
+      is.na(df$b_hesitant_cov_vaccine) == TRUE ~ NA,
+      is.na(df$b_vaccine_likely_politicians) == TRUE ~ NA,
+      df$wave < 7 ~ NA,
+      df$b_hesitant_cov_vaccine == 0 ~ NA,
+      df$b_hesitant_cov_vaccine == 1 & df$b_vaccine_likely_politicians == 1 ~ TRUE,
+      df$b_hesitant_cov_vaccine == 1 & df$b_vaccine_likely_politicians == 0 ~ FALSE,
+      TRUE ~ NA
     )
+    
   } else {
     df$b_hesitant_trust_fam <- NA_real_
     df$b_hesitant_trust_healthcare <- NA_real_
@@ -429,7 +465,6 @@ remap_response <- function(df, col_var, map_old_new, default=NULL, response_type
     df[[col_var]] <- recode(df[[col_var]], !!!map_old_new, .default=default)
   } else if (response_type == "ms") {
     split_col <- split_options(df[[col_var]])
-    
     map_fn <- ifelse( is.null(getOption("mc.cores")) , mapply, mcmapply)
     df[[col_var]] <- map_fn(split_col, FUN=function(row) {
       if ( length(row) == 1 && all(is.na(row)) ) {

From c20a4a5a936f9eb1d60f4f2544d1f346a733a67f Mon Sep 17 00:00:00 2001
From: Nat DeFries <42820733+nmdefries@users.noreply.github.com>
Date: Fri, 12 Mar 2021 10:41:48 -0500
Subject: [PATCH 04/23] fix failing tests

---
 .../delphiFacebook/R/contingency_aggregate.R  | 64 +++++++++++++------
 .../delphiFacebook/R/contingency_variables.R  |  1 +
 2 files changed, 45 insertions(+), 20 deletions(-)

diff --git a/facebook/delphiFacebook/R/contingency_aggregate.R b/facebook/delphiFacebook/R/contingency_aggregate.R
index b5b975ee3..0af4ddbff 100644
--- a/facebook/delphiFacebook/R/contingency_aggregate.R
+++ b/facebook/delphiFacebook/R/contingency_aggregate.R
@@ -29,6 +29,17 @@
 #'
 #' @export
 produce_aggregates <- function(df, aggregations, cw_list, params) {
+  msg_plain(paste0("Producing aggregates..."))
+  ## For the date range lookups we do on df, use a data.table key. This puts the
+  ## table in sorted order so data.table can use a binary search to find
+  ## matching dates, rather than a linear scan, and is important for very large
+  ## input files.
+  df <- as.data.table(df)
+  setkeyv(df, "start_dt")
+
+  # Keep only obs in desired date range.
+  df <- df[start_dt >= params$start_time & start_dt <= params$end_time]
+  
   output <- post_process_aggs(df, aggregations, cw_list)
   df <- output[[1]]
   aggregations <- output[[2]]
@@ -42,17 +53,6 @@ produce_aggregates <- function(df, aggregations, cw_list, params) {
                zip5,
                start_dt)
   
-  msg_plain(paste0("Producing aggregates..."))
-  ## For the date range lookups we do on df, use a data.table key. This puts the
-  ## table in sorted order so data.table can use a binary search to find
-  ## matching dates, rather than a linear scan, and is important for very large
-  ## input files.
-  df <- as.data.table(df)
-  setkeyv(df, "start_dt")
-
-  # Keep only obs in desired date range.
-  df <- df[start_dt >= params$start_time & start_dt <= params$end_time]
-
   agg_groups <- unique(aggregations[c("group_by", "geo_level")])
 
   # For each unique combination of groupby_vars and geo level, run aggregation process once
@@ -168,14 +168,11 @@ post_process_aggs <- function(df, aggregations, cw_list) {
   # each unique level/response code; multi-select used for grouping are left as-is.
   #   - multiple choice items are left as-is
   
-  #### TODO: How do we want to handle multi-select items when used for grouping?
+  #### TODO: How do we want to handle multi-select items used for grouping?
   agg_groups <- unique(aggregations$group_by)
   group_cols_to_convert <- unique(do.call(c, agg_groups))
-  group_cols_to_convert <- group_cols_to_convert[startsWith(group_cols_to_convert, "b_")]
-
-  metric_cols_to_convert <- unique(aggregations$metric)
-
-  for (col_var in c(group_cols_to_convert, metric_cols_to_convert)) {
+  for (col_var in group_cols_to_convert) {
+    if (col_var == "geo_id") { next }
     if ( is.null(df[[col_var]]) ) {
       aggregations <- aggregations[aggregations$metric != col_var &
                                      !mapply(aggregations$group_by,
@@ -190,14 +187,41 @@ post_process_aggs <- function(df, aggregations, cw_list) {
 
     if (startsWith(col_var, "b_")) { # Binary
       output <- code_binary(df, aggregations, col_var)
-    } else if (startsWith(col_var, "ms_")) { # Multiselect
-      output <- code_multiselect(df, aggregations, col_var)
     } else if (startsWith(col_var, "n_")) { # Numeric free response
       output <- code_numeric_freeresponse(df, aggregations, col_var)
-    } else if (startsWith(col_var, "mc_")) { # Multiple choice
+    } else {
+      # Multiple choice, multi-select, and variables that are formatted differently
       output <- list(df, aggregations)
     }
+    df <- output[[1]]
+    aggregations <- output[[2]]
+  }
+  
+  metric_cols_to_convert <- unique(aggregations$metric)
+  for (col_var in metric_cols_to_convert) {
+    if ( is.null(df[[col_var]]) ) {
+      aggregations <- aggregations[aggregations$metric != col_var &
+                                     !mapply(aggregations$group_by,
+                                             FUN=function(x) {col_var %in% x}), ]
+      msg_plain(
+        paste0(
+          col_var, " is not defined. Removing all aggregations that use it. ", 
+          nrow(aggregations), " remaining")
+      )
+      next
+    }
     
+    if (startsWith(col_var, "b_")) { # Binary
+      output <- code_binary(df, aggregations, col_var)
+    } else if (startsWith(col_var, "n_")) { # Numeric free response
+      output <- code_numeric_freeresponse(df, aggregations, col_var)
+    }
+    else if (startsWith(col_var, "ms_")) { # Multi-select
+      output <- code_multiselect(df, aggregations, col_var)
+    } else {
+      # Multiple choice and variables that are formatted differently
+      output <- list(df, aggregations)
+    }
     df <- output[[1]]
     aggregations <- output[[2]]
   }
diff --git a/facebook/delphiFacebook/R/contingency_variables.R b/facebook/delphiFacebook/R/contingency_variables.R
index c95a554a6..3d419de3a 100644
--- a/facebook/delphiFacebook/R/contingency_variables.R
+++ b/facebook/delphiFacebook/R/contingency_variables.R
@@ -545,6 +545,7 @@ code_multiselect <- function(df, aggregations, col_var) {
             sep="_")
       }
     ))
+  
   #### TODO: eval(parse()) here is not the best approach, but I can't find another
   # way to get col_var (a string) to be used as a var that references a column
   # rather than as an actual string. This approach causes a shallow copy to be

From 65f158f8269642347f26d010fe737e4ebdf4f21d Mon Sep 17 00:00:00 2001
From: Nat DeFries <42820733+nmdefries@users.noreply.github.com>
Date: Fri, 12 Mar 2021 11:02:51 -0500
Subject: [PATCH 05/23] reduce logic complexity in input file choice

---
 facebook/delphiFacebook/NAMESPACE             |  1 -
 facebook/delphiFacebook/R/contingency_utils.R | 89 +++++--------------
 .../man/get_date_range_from_filenames.Rd      | 17 ----
 3 files changed, 21 insertions(+), 86 deletions(-)
 delete mode 100644 facebook/delphiFacebook/man/get_date_range_from_filenames.Rd

diff --git a/facebook/delphiFacebook/NAMESPACE b/facebook/delphiFacebook/NAMESPACE
index 9ef255f84..00c0dc09b 100644
--- a/facebook/delphiFacebook/NAMESPACE
+++ b/facebook/delphiFacebook/NAMESPACE
@@ -17,7 +17,6 @@ export(filter_complete_responses)
 export(filter_data_for_aggregation)
 export(filter_responses)
 export(floor_epiweek)
-export(get_date_range_from_filenames)
 export(get_filenames_in_range)
 export(get_range_prev_full_month)
 export(get_range_prev_full_period)
diff --git a/facebook/delphiFacebook/R/contingency_utils.R b/facebook/delphiFacebook/R/contingency_utils.R
index a8cc22a38..f52dac9f8 100644
--- a/facebook/delphiFacebook/R/contingency_utils.R
+++ b/facebook/delphiFacebook/R/contingency_utils.R
@@ -52,49 +52,25 @@ read_contingency_params <- function(path = "params.json", template_path = "param
 #' 
 #' @export
 update_params <- function(params) {
+  # Fill in end_time, if missing, with current time.
+  if (is.null(params$end_time)) {
+    params$end_time <- Sys.time()
+  }
+  
   if ( !is.null(params$start_date) ) {
-    # Use all data within the date range, either as explicitly set or assuming
-    # "now" is the end_date.
-    if (is.null(params$end_time)) {
-      params$end_time <- Sys.time()
-    }
     date_range <- list(params$start_time, params$end_time)
-    
-    if ( is.null(params$input) ) {
-      params$input <- get_filenames_in_range(date_range[[1]], date_range[[2]], params)
-    }
-
-  } else if ( is.null(params$end_date) & (is.null(params$input) | length(params$input) == 0) ) {
-    # Neither end_date nor list of input files is provided, assume want to use
-    # most current full time period and data.
-    date_range <- get_range_prev_full_period(Sys.Date(), params$aggregate_range)
-    params$input <- get_filenames_in_range(date_range[[1]], date_range[[2]], params)
-    
-  } else if ( !is.null(params$end_date) & (is.null(params$input) | length(params$input) == 0) ) {
-    # List of input files is not provided, assume want to use the full period
-    # preceding the provided end_date
-    date_range <- get_range_prev_full_period(
-      as_date(params$end_date), params$aggregate_range)
-    params$input <- get_filenames_in_range(date_range[[1]], date_range[[2]], params)
-    
-  } else if ( is.null(params$end_date) & !is.null(params$input) & length(params$input) != 0 ) {
-    # Use list of input files provided, even if it does not constitute a full 
-    # period. Use dates in input files to select range.
-    date_range <- get_date_range_from_filenames(params)
-    
-  } else if ( !is.null(params$end_date) & !is.null(params$input) & length(params$input) != 0 ) {
-    # Use the full period preceding the provided end_date AND use only the list
-    # of input files provided, even if they don't span the full period.
-    date_range <- get_range_prev_full_period(
-      as_date(params$end_date), params$aggregate_range)
+  } else {
+    # If start_date is not provided, assume want to use preceding full time period.
+    date_range <- get_range_prev_full_period(as_date(params$end_date), params$aggregate_range)
   }
-
+  
+  params$input <- get_filenames_in_range(date_range[[1]], date_range[[2]], params)
   if (length(params$input) == 0) {
     stop("no input files to read in")
   }
+  
   params$start_time <- date_range[[1]]
   params$end_time <- date_range[[2]]
-
   params$start_date <- as_date(date_range[[1]])
   params$end_date <- as_date(date_range[[2]])
 
@@ -115,12 +91,17 @@ update_params <- function(params) {
 get_filenames_in_range <- function(start_date, end_date, params) {
   start_date <- as_date(start_date) - days(params$backfill_days)
   end_date <- as_date(end_date)
-  date_pattern <- "^[0-9]{4}-[0-9]{2}-[0-9]{2}.*[.]csv$"
-  youtube_pattern <- ".*YouTube[.]csv$"
-  
-  filenames <- list.files(path=params$input_dir)
-  filenames <- filenames[grepl(date_pattern, filenames) & !grepl(youtube_pattern, filenames)]
   
+  if ( is.null(params$input) | length(params$input) == 0 ) {
+    date_pattern <- "^[0-9]{4}-[0-9]{2}-[0-9]{2}.*[.]csv$"
+    youtube_pattern <- ".*YouTube[.]csv$"
+    
+    filenames <- list.files(path=params$input_dir)
+    filenames <- filenames[grepl(date_pattern, filenames) & !grepl(youtube_pattern, filenames)]
+  } else {
+    filenames <- params$input
+  }
+    
   file_end_dates <- as_date(substr(filenames, 1, 10))
   file_start_dates <- as_date(substr(filenames, 12, 21))
   
@@ -133,34 +114,6 @@ get_filenames_in_range <- function(start_date, end_date, params) {
   return(filenames)
 }
 
-
-#' Get date range based on list of input files provided.
-#'
-#' @param params    Params object produced by read_params
-#'
-#' @return Unnamed list of two dates
-#' 
-#' @export
-get_date_range_from_filenames <- function(params) {
-  date_pattern <- "^[0-9]{4}-[0-9]{2}-[0-9]{2}.*[.]csv$"
-  youtube_pattern <- ".*YouTube[.]csv$"
-  
-  filenames <- params$input
-  filenames <- filenames[grepl(date_pattern, filenames) & !grepl(youtube_pattern, filenames)]
-
-  dates <- as.Date(unlist(lapply(filenames, function(filename) {
-    file_end_date <- as_date(substr(filenames, 1, 10))
-    file_start_date <- as_date(substr(filenames, 12, 21))
-    
-    return(c(file_end_date, file_start_date))
-  })), origin="1970-01-01")
-  
-  date_range <- list(min(dates), max(dates))
-  
-  return(date_range)
-}
-
-
 #' Check user-set aggregations for basic validity and add a few necessary cols.
 #'
 #' @param aggregations Data frame with columns `name`, `var_weight`, `metric`,
diff --git a/facebook/delphiFacebook/man/get_date_range_from_filenames.Rd b/facebook/delphiFacebook/man/get_date_range_from_filenames.Rd
deleted file mode 100644
index 3727e5cd9..000000000
--- a/facebook/delphiFacebook/man/get_date_range_from_filenames.Rd
+++ /dev/null
@@ -1,17 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/contingency_utils.R
-\name{get_date_range_from_filenames}
-\alias{get_date_range_from_filenames}
-\title{Get date range based on list of input files provided.}
-\usage{
-get_date_range_from_filenames(params)
-}
-\arguments{
-\item{params}{Params object produced by read_params}
-}
-\value{
-Unnamed list of two dates
-}
-\description{
-Get date range based on list of input files provided.
-}

From 1f10a1b2bacccf3fbe92e82fde7c8b69b6085020 Mon Sep 17 00:00:00 2001
From: Nat DeFries <42820733+nmdefries@users.noreply.github.com>
Date: Fri, 12 Mar 2021 15:18:27 -0500
Subject: [PATCH 06/23] fix relevant tests. support with new params setting

---
 .../delphiFacebook/R/contingency_aggregate.R  |  3 +-
 facebook/delphiFacebook/R/contingency_utils.R | 13 +++++++--
 .../tests/testthat/params-full.json           |  1 +
 .../tests/testthat/params-test.json           |  1 +
 .../tests/testthat/test-contingency-utils.R   | 29 ++++---------------
 5 files changed, 19 insertions(+), 28 deletions(-)

diff --git a/facebook/delphiFacebook/R/contingency_aggregate.R b/facebook/delphiFacebook/R/contingency_aggregate.R
index 0af4ddbff..bebf92786 100644
--- a/facebook/delphiFacebook/R/contingency_aggregate.R
+++ b/facebook/delphiFacebook/R/contingency_aggregate.R
@@ -215,8 +215,7 @@ post_process_aggs <- function(df, aggregations, cw_list) {
       output <- code_binary(df, aggregations, col_var)
     } else if (startsWith(col_var, "n_")) { # Numeric free response
       output <- code_numeric_freeresponse(df, aggregations, col_var)
-    }
-    else if (startsWith(col_var, "ms_")) { # Multi-select
+    } else if (startsWith(col_var, "ms_")) { # Multi-select
       output <- code_multiselect(df, aggregations, col_var)
     } else {
       # Multiple choice and variables that are formatted differently
diff --git a/facebook/delphiFacebook/R/contingency_utils.R b/facebook/delphiFacebook/R/contingency_utils.R
index f52dac9f8..8e2edbdde 100644
--- a/facebook/delphiFacebook/R/contingency_utils.R
+++ b/facebook/delphiFacebook/R/contingency_utils.R
@@ -33,6 +33,9 @@ read_contingency_params <- function(path = "params.json", template_path = "param
   contingency_params$num_filter <- if_else(contingency_params$debug, 2L, 100L)
   contingency_params$s_weight <- if_else(contingency_params$debug, 1.00, 0.01)
   contingency_params$s_mix_coef <- if_else(contingency_params$debug, 0.05, 0.05)
+  contingency_params$use_input_asis <- if_else(
+    is.null(contingency_params$use_input_asis), FALSE, contingency_params$use_input_asis
+  )
   
   return(contingency_params)
 }
@@ -57,15 +60,19 @@ update_params <- function(params) {
     params$end_time <- Sys.time()
   }
   
+  # Construct aggregate date range.
   if ( !is.null(params$start_date) ) {
     date_range <- list(params$start_time, params$end_time)
   } else {
     # If start_date is not provided, assume want to use preceding full time period.
-    date_range <- get_range_prev_full_period(as_date(params$end_date), params$aggregate_range)
+    date_range <- get_range_prev_full_period(
+      as_date(params$end_date)
+      , params$aggregate_range
+    )
   }
   
   params$input <- get_filenames_in_range(date_range[[1]], date_range[[2]], params)
-  if (length(params$input) == 0) {
+  if ( length(params[["input"]]) == 0 || all(is.na(params[["input"]])) ) {
     stop("no input files to read in")
   }
   
@@ -89,6 +96,8 @@ update_params <- function(params) {
 #' 
 #' @export
 get_filenames_in_range <- function(start_date, end_date, params) {
+  if (params$use_input_asis) { return(params$input) }
+  
   start_date <- as_date(start_date) - days(params$backfill_days)
   end_date <- as_date(end_date)
   
diff --git a/facebook/delphiFacebook/tests/testthat/params-full.json b/facebook/delphiFacebook/tests/testthat/params-full.json
index 0d0b7f009..295827f08 100644
--- a/facebook/delphiFacebook/tests/testthat/params-full.json
+++ b/facebook/delphiFacebook/tests/testthat/params-full.json
@@ -27,6 +27,7 @@
     "input": [
       "full_synthetic.csv"
     ],
+    "use_input_asis": true,
     "aggregate_range": "month",
     "debug": false
   }
diff --git a/facebook/delphiFacebook/tests/testthat/params-test.json b/facebook/delphiFacebook/tests/testthat/params-test.json
index 7b73cf159..052ba4539 100644
--- a/facebook/delphiFacebook/tests/testthat/params-test.json
+++ b/facebook/delphiFacebook/tests/testthat/params-test.json
@@ -29,6 +29,7 @@
     "input": [
       "responses.csv"
     ],
+    "use_input_asis": true,
     "aggregate_range": "month",
     "debug": false
   }
diff --git a/facebook/delphiFacebook/tests/testthat/test-contingency-utils.R b/facebook/delphiFacebook/tests/testthat/test-contingency-utils.R
index c81729a25..165017dc9 100644
--- a/facebook/delphiFacebook/tests/testthat/test-contingency-utils.R
+++ b/facebook/delphiFacebook/tests/testthat/test-contingency-utils.R
@@ -7,6 +7,7 @@ test_that("testing update_params command", {
   # Empty input list
   params <- list(
     input = c(),
+    use_input_asis = TRUE,
     aggregate_range = "month",
     end_date = "2020-02-01",
     input_dir = "./input"
@@ -17,6 +18,7 @@ test_that("testing update_params command", {
   # Use specified end date
   input_params <- list(
     input = c("full_response.csv"),
+    use_input_asis = TRUE,
     aggregate_range = "month",
     end_date = "2020-02-01"
   )
@@ -25,15 +27,15 @@ test_that("testing update_params command", {
   
   expected_output <- list(
     input = c("full_response.csv"),
+    use_input_asis = TRUE,
     aggregate_range = "month",
     end_date = ymd("2020-01-31"),
-    start_time = ymd_hms("2020-01-01 00:00:00", tz=timezone),
     end_time = ymd_hms("2020-01-31 23:59:59", tz=timezone),
+    start_time = ymd_hms("2020-01-01 00:00:00", tz=timezone),
     start_date = ymd("2020-01-01")
     )
   
   out <- update_params(input_params)
-  
   expect_identical(out, expected_output)
 })
 
@@ -56,6 +58,7 @@ test_that("testing get_filenames_in_range command", {
   
   params <- list(
     input = c(),
+    use_input_asis = FALSE,
     backfill_days = 4,
     input_dir = tdir
   )
@@ -73,28 +76,6 @@ test_that("testing get_filenames_in_range command", {
   expect_equal(out, expected_output)
 })
 
-test_that("testing get_date_range_from_filenames command", {
-  files <- c(
-    "2019-11-06.2019-10-30.2020-11-06.Survey_of_COVID-Like_Illness_-_TODEPLOY_......_-_US_Expansion.csv",
-    "2019-12-31.2019-12-24_With_Translations.csv",
-    "2020-01-06.2019-12-31_Wave_4.csv",
-    "2020-01-16.2020-01-09_YouTube.csv",
-    "2020-01-16.2020-01-09_Wave_4.csv",
-    "2020-02-06.2020-01-31_Wave_4.csv",
-    "2020-02-16.2020-02-09_Wave_3.csv"
-  )
-  
-  params <- list(
-    backfill_days = 4,
-    input = files
-  )
-  
-  expected_output <- list(ymd("2019-10-30"), ymd("2020-02-16"))
-  out <- get_date_range_from_filenames(params)
-  
-  expect_equal(out, expected_output)
-})
-
 test_that("testing verify_aggs command", {
   # Duplicate rows
   input_aggs <- tribble(

From 7328a4f0da7eabfc2cc78b1a9f2b04131a91ddae Mon Sep 17 00:00:00 2001
From: Nat DeFries <42820733+nmdefries@users.noreply.github.com>
Date: Fri, 12 Mar 2021 16:40:28 -0500
Subject: [PATCH 07/23] simplify hesitant_* definitions

---
 .../delphiFacebook/R/contingency_aggregate.R  |  4 +-
 .../delphiFacebook/R/contingency_variables.R  | 76 +++++++------------
 2 files changed, 28 insertions(+), 52 deletions(-)

diff --git a/facebook/delphiFacebook/R/contingency_aggregate.R b/facebook/delphiFacebook/R/contingency_aggregate.R
index bebf92786..036c95009 100644
--- a/facebook/delphiFacebook/R/contingency_aggregate.R
+++ b/facebook/delphiFacebook/R/contingency_aggregate.R
@@ -39,7 +39,7 @@ produce_aggregates <- function(df, aggregations, cw_list, params) {
 
   # Keep only obs in desired date range.
   df <- df[start_dt >= params$start_time & start_dt <= params$end_time]
-  
+
   output <- post_process_aggs(df, aggregations, cw_list)
   df <- output[[1]]
   aggregations <- output[[2]]
@@ -168,7 +168,7 @@ post_process_aggs <- function(df, aggregations, cw_list) {
   # each unique level/response code; multi-select used for grouping are left as-is.
   #   - multiple choice items are left as-is
   
-  #### TODO: How do we want to handle multi-select items used for grouping?
+  #### TODO: How do we want to handle multi-select items when used for grouping?
   agg_groups <- unique(aggregations$group_by)
   group_cols_to_convert <- unique(do.call(c, agg_groups))
   for (col_var in group_cols_to_convert) {
diff --git a/facebook/delphiFacebook/R/contingency_variables.R b/facebook/delphiFacebook/R/contingency_variables.R
index 3d419de3a..1f26a6ce3 100644
--- a/facebook/delphiFacebook/R/contingency_variables.R
+++ b/facebook/delphiFacebook/R/contingency_variables.R
@@ -345,14 +345,14 @@ create_derivative_columns <- function(df) {
   } else {
     df$b_65_or_older <- NA_real_
   }
-  
-  if ("mc_accept_cov_vaccine" %in% names(df)) {	
-    df$b_hesitant_cov_vaccine <- as.numeric(	
-      df$mc_accept_cov_vaccine == "prob not vaccinate" | df$mc_accept_cov_vaccine == "def not vaccinate"	
-    )	
-  } else {	
-    df$b_hesitant_cov_vaccine <- NA_real_	
-  }	
+
+  if ("mc_accept_cov_vaccine" %in% names(df)) {
+    df$b_hesitant_cov_vaccine <- as.numeric(
+      df$mc_accept_cov_vaccine == "prob not vaccinate" | df$mc_accept_cov_vaccine == "def not vaccinate"
+    )
+  } else {
+    df$b_hesitant_cov_vaccine <- NA_real_
+  }
 
   if ("mc_concerned_sideeffects" %in% names(df)) {
     df$b_concerned_sideeffects <- as.numeric(
@@ -363,13 +363,9 @@ create_derivative_columns <- function(df) {
   }
 
   df$b_hesitant_sideeffects <- case_when(
-    is.na(df$b_hesitant_cov_vaccine) == TRUE ~ NA,
-    is.na(df$b_concerned_sideeffects) == TRUE ~ NA,
-    df$wave < 7 ~ NA,
-    df$b_hesitant_cov_vaccine == 0 ~ NA,
-    df$b_hesitant_cov_vaccine == 1 & df$b_concerned_sideeffects == 1 ~ TRUE,
-    df$b_hesitant_cov_vaccine == 1 & df$b_concerned_sideeffects == 0 ~ FALSE,
-    TRUE ~ NA
+    df$b_hesitant_cov_vaccine == 1 & df$b_concerned_sideeffects == 1 ~ 1,
+    df$b_hesitant_cov_vaccine == 1 & df$b_concerned_sideeffects == 0 ~ 0,
+    TRUE ~ NA_real_
   )
   
   if ( "b_vaccine_likely_friends" %in% names(df) &
@@ -378,49 +374,29 @@ create_derivative_columns <- function(df) {
        "b_vaccine_likely_govt_health" %in% names(df) &
        "b_vaccine_likely_politicians" %in% names(df) ) {
     df$b_hesitant_trust_fam <- case_when(
-      is.na(df$b_hesitant_cov_vaccine) == TRUE ~ NA,
-      is.na(df$b_vaccine_likely_friends) == TRUE ~ NA,
-      df$wave < 7 ~ NA,
-      df$b_hesitant_cov_vaccine == 0 ~ NA,
-      df$b_hesitant_cov_vaccine == 1 & df$b_vaccine_likely_friends == 1 ~ TRUE,
-      df$b_hesitant_cov_vaccine == 1 & df$b_vaccine_likely_friends == 0 ~ FALSE,
-      TRUE ~ NA
+      df$b_hesitant_cov_vaccine == 1 & df$b_vaccine_likely_friends == 1 ~ 1,
+      df$b_hesitant_cov_vaccine == 1 & df$b_vaccine_likely_friends == 0 ~ 0,
+      TRUE ~ NA_real_
     )
     df$b_hesitant_trust_healthcare <- case_when(
-      is.na(df$b_hesitant_cov_vaccine) == TRUE ~ NA,
-      is.na(df$b_vaccine_likely_local_health) == TRUE ~ NA,
-      df$wave < 7 ~ NA,
-      df$b_hesitant_cov_vaccine == 0 ~ NA,
-      df$b_hesitant_cov_vaccine == 1 & df$b_vaccine_likely_local_health == 1 ~ TRUE,
-      df$b_hesitant_cov_vaccine == 1 & df$b_vaccine_likely_local_health == 0 ~ FALSE,
-      TRUE ~ NA
+      df$b_hesitant_cov_vaccine == 1 & df$b_vaccine_likely_local_health == 1 ~ 1,
+      df$b_hesitant_cov_vaccine == 1 & df$b_vaccine_likely_local_health == 0 ~ 0,
+      TRUE ~ NA_real_
     )
     df$b_hesitant_trust_who <- case_when(
-      is.na(df$b_hesitant_cov_vaccine) == TRUE ~ NA,
-      is.na(df$b_vaccine_likely_who) == TRUE ~ NA,
-      df$wave < 7 ~ NA,
-      df$b_hesitant_cov_vaccine == 0 ~ NA,
-      df$b_hesitant_cov_vaccine == 1 & df$b_vaccine_likely_who == 1 ~ TRUE,
-      df$b_hesitant_cov_vaccine == 1 & df$b_vaccine_likely_who == 0 ~ FALSE,
-      TRUE ~ NA
+      df$b_hesitant_cov_vaccine == 1 & df$b_vaccine_likely_who == 1 ~ 1,
+      df$b_hesitant_cov_vaccine == 1 & df$b_vaccine_likely_who == 0 ~ 0,
+      TRUE ~ NA_real_
     )
     df$b_hesitant_trust_govt <- case_when(
-      is.na(df$b_hesitant_cov_vaccine) == TRUE ~ NA,
-      is.na(df$b_vaccine_likely_govt_health) == TRUE ~ NA,
-      df$wave < 7 ~ NA,
-      df$b_hesitant_cov_vaccine == 0 ~ NA,
-      df$b_hesitant_cov_vaccine == 1 & df$b_vaccine_likely_govt_health == 1 ~ TRUE,
-      df$b_hesitant_cov_vaccine == 1 & df$b_vaccine_likely_govt_health == 0 ~ FALSE,
-      TRUE ~ NA
+      df$b_hesitant_cov_vaccine == 1 & df$b_vaccine_likely_govt_health == 1 ~ 1,
+      df$b_hesitant_cov_vaccine == 1 & df$b_vaccine_likely_govt_health == 0 ~ 0,
+      TRUE ~ NA_real_
     )
     df$b_hesitant_trust_politicians <- case_when(
-      is.na(df$b_hesitant_cov_vaccine) == TRUE ~ NA,
-      is.na(df$b_vaccine_likely_politicians) == TRUE ~ NA,
-      df$wave < 7 ~ NA,
-      df$b_hesitant_cov_vaccine == 0 ~ NA,
-      df$b_hesitant_cov_vaccine == 1 & df$b_vaccine_likely_politicians == 1 ~ TRUE,
-      df$b_hesitant_cov_vaccine == 1 & df$b_vaccine_likely_politicians == 0 ~ FALSE,
-      TRUE ~ NA
+      df$b_hesitant_cov_vaccine == 1 & df$b_vaccine_likely_politicians == 1 ~ 1,
+      df$b_hesitant_cov_vaccine == 1 & df$b_vaccine_likely_politicians == 0 ~ 0,
+      TRUE ~ NA_real_
     )
     
   } else {

From c59b3f4de3f444f69537308ecccd4ac1d4ba3e20 Mon Sep 17 00:00:00 2001
From: Nat DeFries <42820733+nmdefries@users.noreply.github.com>
Date: Fri, 12 Mar 2021 17:03:28 -0500
Subject: [PATCH 08/23] set weekly aggs to same as monthly aggs

---
 facebook/delphiFacebook/R/contingency_run.R | 121 ++++++++++++++++++++
 1 file changed, 121 insertions(+)

diff --git a/facebook/delphiFacebook/R/contingency_run.R b/facebook/delphiFacebook/R/contingency_run.R
index 1e97da99c..d079c9dff 100644
--- a/facebook/delphiFacebook/R/contingency_run.R
+++ b/facebook/delphiFacebook/R/contingency_run.R
@@ -23,6 +23,127 @@
 set_aggs <- function() {
   weekly_aggs <- tribble(
     ~name, ~metric, ~group_by, ~compute_fn, ~post_fn,
+    #### Cut 1: side effects if hesitant about getting vaccine and generally
+    # National
+    "pct_concerned_sideeffects", "b_concerned_sideeffects", c("mc_age", "mc_gender", "mc_race", "b_hispanic", "nation"), compute_binary, jeffreys_binary,
+    "pct_hesitant_sideeffects", "b_hesitant_sideeffects", c("mc_age", "mc_gender", "mc_race", "b_hispanic", "nation"), compute_binary, jeffreys_binary,
+    
+    # State
+    "pct_concerned_sideeffects", "b_concerned_sideeffects", c("mc_age", "mc_gender", "mc_race", "b_hispanic", "state"), compute_binary, jeffreys_binary,
+    "pct_hesitant_sideeffects", "b_hesitant_sideeffects", c("mc_age", "mc_gender", "mc_race", "b_hispanic", "state"), compute_binary, jeffreys_binary,
+    
+    # State marginal
+    "pct_concerned_sideeffects", "b_concerned_sideeffects", c("mc_age", "state"), compute_binary, jeffreys_binary,
+    "pct_concerned_sideeffects", "b_concerned_sideeffects", c("mc_gender", "state"), compute_binary, jeffreys_binary,
+    "pct_concerned_sideeffects", "b_concerned_sideeffects", c("mc_race", "b_hispanic", "state"), compute_binary, jeffreys_binary,
+    "pct_hesitant_sideeffects", "b_hesitant_sideeffects", c("mc_age", "state"), compute_binary, jeffreys_binary,
+    "pct_hesitant_sideeffects", "b_hesitant_sideeffects", c("mc_gender", "state"), compute_binary, jeffreys_binary,
+    "pct_hesitant_sideeffects", "b_hesitant_sideeffects", c("mc_race", "b_hispanic", "state"), compute_binary, jeffreys_binary,
+    
+    
+    
+    #### Cut 2: trust various institutions if hesitant about getting vaccine
+    # National
+    "pct_hesitant_trust_fam", "b_hesitant_trust_fam", c("mc_age", "mc_gender", "mc_race", "b_hispanic", "nation"), compute_binary, jeffreys_binary,
+    "pct_hesitant_trust_healthcare", "b_hesitant_trust_healthcare", c("mc_age", "mc_gender", "mc_race", "b_hispanic", "nation"), compute_binary, jeffreys_binary,
+    "pct_hesitant_trust_who", "b_hesitant_trust_who", c("mc_age", "mc_gender", "mc_race", "b_hispanic", "nation"), compute_binary, jeffreys_binary,
+    "pct_hesitant_trust_govt", "b_hesitant_trust_govt", c("mc_age", "mc_gender", "mc_race", "b_hispanic", "nation"), compute_binary, jeffreys_binary,
+    "pct_hesitant_trust_politicians", "b_hesitant_trust_politicians", c("mc_age", "mc_gender", "mc_race", "b_hispanic", "nation"), compute_binary, jeffreys_binary,
+    
+    # State
+    "pct_hesitant_trust_fam", "b_hesitant_trust_fam", c("mc_age", "mc_gender", "mc_race", "b_hispanic", "state"), compute_binary, jeffreys_binary,
+    "pct_hesitant_trust_healthcare", "b_hesitant_trust_healthcare", c("mc_age", "mc_gender", "mc_race", "b_hispanic", "state"), compute_binary, jeffreys_binary,
+    "pct_hesitant_trust_who", "b_hesitant_trust_who", c("mc_age", "mc_gender", "mc_race", "b_hispanic", "state"), compute_binary, jeffreys_binary,
+    "pct_hesitant_trust_govt", "b_hesitant_trust_govt", c("mc_age", "mc_gender", "mc_race", "b_hispanic", "state"), compute_binary, jeffreys_binary,
+    "pct_hesitant_trust_politicians", "b_hesitant_trust_politicians", c("mc_age", "mc_gender", "mc_race", "b_hispanic", "state"), compute_binary, jeffreys_binary,
+    
+    # State marginal
+    "pct_hesitant_trust_fam", "b_hesitant_trust_fam", c("mc_age", "state"), compute_binary, jeffreys_binary,
+    "pct_hesitant_trust_fam", "b_hesitant_trust_fam", c("mc_gender", "state"), compute_binary, jeffreys_binary,
+    "pct_hesitant_trust_fam", "b_hesitant_trust_fam", c("mc_race", "b_hispanic", "state"), compute_binary, jeffreys_binary,
+    "pct_hesitant_trust_healthcare", "b_hesitant_trust_healthcare", c("mc_age", "state"), compute_binary, jeffreys_binary,
+    "pct_hesitant_trust_healthcare", "b_hesitant_trust_healthcare", c("mc_gender", "state"), compute_binary, jeffreys_binary,
+    "pct_hesitant_trust_healthcare", "b_hesitant_trust_healthcare", c("mc_race", "b_hispanic", "state"), compute_binary, jeffreys_binary,
+    "pct_hesitant_trust_who", "b_hesitant_trust_who", c("mc_age", "state"), compute_binary, jeffreys_binary,
+    "pct_hesitant_trust_who", "b_hesitant_trust_who", c("mc_gender", "state"), compute_binary, jeffreys_binary,
+    "pct_hesitant_trust_who", "b_hesitant_trust_who", c("mc_race", "b_hispanic", "state"), compute_binary, jeffreys_binary,
+    "pct_hesitant_trust_govt", "b_hesitant_trust_govt", c("mc_age", "state"), compute_binary, jeffreys_binary,
+    "pct_hesitant_trust_govt", "b_hesitant_trust_govt", c("mc_gender", "state"), compute_binary, jeffreys_binary,
+    "pct_hesitant_trust_govt", "b_hesitant_trust_govt", c("mc_race", "b_hispanic", "state"), compute_binary, jeffreys_binary,
+    "pct_hesitant_trust_politicians", "b_hesitant_trust_politicians", c("mc_age", "state"), compute_binary, jeffreys_binary,
+    "pct_hesitant_trust_politicians", "b_hesitant_trust_politicians", c("mc_gender", "state"), compute_binary, jeffreys_binary,
+    "pct_hesitant_trust_politicians", "b_hesitant_trust_politicians", c("mc_race", "b_hispanic", "state"), compute_binary, jeffreys_binary,
+    
+    
+    
+    #### Cut 3: trust various institutions
+    # National
+    "pct_trust_fam", "b_vaccine_likely_friends", c("mc_age", "mc_gender", "mc_race", "b_hispanic", "nation"), compute_binary, jeffreys_binary,
+    "pct_trust_healthcare", "b_vaccine_likely_local_health", c("mc_age", "mc_gender", "mc_race", "b_hispanic", "nation"), compute_binary, jeffreys_binary,
+    "pct_trust_who", "b_vaccine_likely_who", c("mc_age", "mc_gender", "mc_race", "b_hispanic", "nation"), compute_binary, jeffreys_binary,
+    "pct_trust_govt", "b_vaccine_likely_govt_health", c("mc_age", "mc_gender", "mc_race", "b_hispanic", "nation"), compute_binary, jeffreys_binary,
+    "pct_trust_politicians", "b_vaccine_likely_politicians", c("mc_age", "mc_gender", "mc_race", "b_hispanic", "nation"), compute_binary, jeffreys_binary,
+    
+    # State
+    "pct_trust_fam", "b_vaccine_likely_friends", c("mc_age", "mc_gender", "mc_race", "b_hispanic", "state"), compute_binary, jeffreys_binary,
+    "pct_trust_healthcare", "b_vaccine_likely_local_health", c("mc_age", "mc_gender", "mc_race", "b_hispanic", "state"), compute_binary, jeffreys_binary,
+    "pct_trust_who", "b_vaccine_likely_who", c("mc_age", "mc_gender", "mc_race", "b_hispanic", "state"), compute_binary, jeffreys_binary,
+    "pct_trust_govt", "b_vaccine_likely_govt_health", c("mc_age", "mc_gender", "mc_race", "b_hispanic", "state"), compute_binary, jeffreys_binary,
+    "pct_trust_politicians", "b_vaccine_likely_politicians", c("mc_age", "mc_gender", "mc_race", "b_hispanic", "state"), compute_binary, jeffreys_binary,
+    
+    # State marginal
+    "pct_trust_fam", "b_vaccine_likely_friends", c("mc_age", "state"), compute_binary, jeffreys_binary,
+    "pct_trust_fam", "b_vaccine_likely_friends", c("mc_gender", "state"), compute_binary, jeffreys_binary,
+    "pct_trust_fam", "b_vaccine_likely_friends", c("mc_race", "b_hispanic", "state"), compute_binary, jeffreys_binary,
+    "pct_trust_healthcare", "b_vaccine_likely_local_health", c("mc_age", "state"), compute_binary, jeffreys_binary,
+    "pct_trust_healthcare", "b_vaccine_likely_local_health", c("mc_gender", "state"), compute_binary, jeffreys_binary,
+    "pct_trust_healthcare", "b_vaccine_likely_local_health", c("mc_race", "b_hispanic", "state"), compute_binary, jeffreys_binary,
+    "pct_trust_who", "b_vaccine_likely_who", c("mc_age", "state"), compute_binary, jeffreys_binary,
+    "pct_trust_who", "b_vaccine_likely_who", c("mc_gender", "state"), compute_binary, jeffreys_binary,
+    "pct_trust_who", "b_vaccine_likely_who", c("mc_race", "b_hispanic", "state"), compute_binary, jeffreys_binary,
+    "pct_trust_govt", "b_vaccine_likely_govt_health", c("mc_age", "state"), compute_binary, jeffreys_binary,
+    "pct_trust_govt", "b_vaccine_likely_govt_health", c("mc_gender", "state"), compute_binary, jeffreys_binary,
+    "pct_trust_govt", "b_vaccine_likely_govt_health", c("mc_race", "b_hispanic", "state"), compute_binary, jeffreys_binary,
+    "pct_trust_politicians", "b_vaccine_likely_politicians", c("mc_age", "state"), compute_binary, jeffreys_binary,
+    "pct_trust_politicians", "b_vaccine_likely_politicians", c("mc_gender", "state"), compute_binary, jeffreys_binary,
+    "pct_trust_politicians", "b_vaccine_likely_politicians", c("mc_race", "b_hispanic", "state"), compute_binary, jeffreys_binary,
+    
+    
+    
+    #### Cuts 4, 5, 6: vaccinated and accepting if senior, in healthcare, or generally
+    # National
+    "pct_vaccinated", "b_had_cov_vaccine", c("b_work_in_healthcare", "mc_age", "mc_gender", "mc_race", "b_hispanic", "nation"), compute_binary, jeffreys_binary,
+    "pct_vaccinated", "b_had_cov_vaccine", c("b_65_or_older", "mc_gender", "mc_race", "b_hispanic", "nation"), compute_binary, jeffreys_binary,
+    "pct_vaccinated", "b_had_cov_vaccine", c("mc_age", "mc_gender", "mc_race", "b_hispanic", "nation"), compute_binary, jeffreys_binary,
+    "pct_accepting", "b_accept_cov_vaccine", c("b_work_in_healthcare", "mc_age", "mc_gender", "mc_race", "b_hispanic", "nation"), compute_binary, jeffreys_binary,
+    "pct_accepting", "b_accept_cov_vaccine", c("b_65_or_older", "mc_gender", "mc_race", "b_hispanic", "nation"), compute_binary, jeffreys_binary,
+    "pct_accepting", "b_accept_cov_vaccine", c("mc_age", "mc_gender", "mc_race", "b_hispanic", "nation"), compute_binary, jeffreys_binary,
+    
+    # State
+    "pct_vaccinated", "b_had_cov_vaccine", c("b_work_in_healthcare", "mc_age", "mc_gender", "mc_race", "b_hispanic", "state"), compute_binary, jeffreys_binary,
+    "pct_vaccinated", "b_had_cov_vaccine", c("b_65_or_older", "mc_gender", "mc_race", "b_hispanic", "state"), compute_binary, jeffreys_binary,
+    "pct_vaccinated", "b_had_cov_vaccine", c("mc_age", "mc_gender", "mc_race", "b_hispanic", "state"), compute_binary, jeffreys_binary,
+    "pct_accepting", "b_accept_cov_vaccine", c("b_work_in_healthcare", "mc_age", "mc_gender", "mc_race", "b_hispanic", "state"), compute_binary, jeffreys_binary,
+    "pct_accepting", "b_accept_cov_vaccine", c("b_65_or_older", "mc_gender", "mc_race", "b_hispanic", "state"), compute_binary, jeffreys_binary,
+    "pct_accepting", "b_accept_cov_vaccine", c("mc_age", "mc_gender", "mc_race", "b_hispanic", "state"), compute_binary, jeffreys_binary,
+    
+    # State marginal
+    "pct_vaccinated", "b_had_cov_vaccine", c("b_work_in_healthcare", "mc_age", "state"), compute_binary, jeffreys_binary,
+    "pct_vaccinated", "b_had_cov_vaccine", c("b_work_in_healthcare","mc_gender", "state"), compute_binary, jeffreys_binary,
+    "pct_vaccinated", "b_had_cov_vaccine", c("b_work_in_healthcare", "mc_race", "b_hispanic", "state"), compute_binary, jeffreys_binary,
+    "pct_vaccinated", "b_had_cov_vaccine", c("b_65_or_older", "mc_gender", "state"), compute_binary, jeffreys_binary,
+    "pct_vaccinated", "b_had_cov_vaccine", c("b_65_or_older", "mc_race", "b_hispanic", "state"), compute_binary, jeffreys_binary,
+    "pct_vaccinated", "b_had_cov_vaccine", c("mc_age", "state"), compute_binary, jeffreys_binary,
+    "pct_vaccinated", "b_had_cov_vaccine", c("mc_gender", "state"), compute_binary, jeffreys_binary,
+    "pct_vaccinated", "b_had_cov_vaccine", c("mc_race", "b_hispanic", "state"), compute_binary, jeffreys_binary,
+    "pct_accepting", "b_accept_cov_vaccine", c("b_work_in_healthcare", "mc_age", "state"), compute_binary, jeffreys_binary,
+    "pct_accepting", "b_accept_cov_vaccine", c("b_work_in_healthcare","mc_gender", "state"), compute_binary, jeffreys_binary,
+    "pct_accepting", "b_accept_cov_vaccine", c("b_work_in_healthcare", "mc_race", "b_hispanic", "state"), compute_binary, jeffreys_binary,
+    "pct_accepting", "b_accept_cov_vaccine", c("b_65_or_older", "mc_gender", "state"), compute_binary, jeffreys_binary,
+    "pct_accepting", "b_accept_cov_vaccine", c("b_65_or_older", "mc_race", "b_hispanic", "state"), compute_binary, jeffreys_binary,
+    "pct_accepting", "b_accept_cov_vaccine", c("mc_age", "state"), compute_binary, jeffreys_binary,
+    "pct_accepting", "b_accept_cov_vaccine", c("mc_gender", "state"), compute_binary, jeffreys_binary,
+    "pct_accepting", "b_accept_cov_vaccine", c("mc_race", "b_hispanic", "state"), compute_binary, jeffreys_binary,
   )
 
   monthly_aggs <- tribble(

From eb6874ed945c2a0a612f7e427067a0025f0e4001 Mon Sep 17 00:00:00 2001
From: Nat DeFries <42820733+nmdefries@users.noreply.github.com>
Date: Fri, 12 Mar 2021 17:23:01 -0500
Subject: [PATCH 09/23] deduplicate process of checking column existence and
 format

---
 .../delphiFacebook/R/contingency_aggregate.R  | 57 ++++++-------------
 1 file changed, 18 insertions(+), 39 deletions(-)

diff --git a/facebook/delphiFacebook/R/contingency_aggregate.R b/facebook/delphiFacebook/R/contingency_aggregate.R
index 036c95009..9d34355f8 100644
--- a/facebook/delphiFacebook/R/contingency_aggregate.R
+++ b/facebook/delphiFacebook/R/contingency_aggregate.R
@@ -170,47 +170,26 @@ post_process_aggs <- function(df, aggregations, cw_list) {
   
   #### TODO: How do we want to handle multi-select items when used for grouping?
   agg_groups <- unique(aggregations$group_by)
-  group_cols_to_convert <- unique(do.call(c, agg_groups))
-  for (col_var in group_cols_to_convert) {
-    if (col_var == "geo_id") { next }
-    if ( is.null(df[[col_var]]) ) {
-      aggregations <- aggregations[aggregations$metric != col_var &
-                                     !mapply(aggregations$group_by,
-                                             FUN=function(x) {col_var %in% x}), ]
-      msg_plain(
-        paste0(
-          col_var, " is not defined. Removing all aggregations that use it. ", 
-          nrow(aggregations), " remaining")
-      )
-      next
-    }
-
-    if (startsWith(col_var, "b_")) { # Binary
-      output <- code_binary(df, aggregations, col_var)
-    } else if (startsWith(col_var, "n_")) { # Numeric free response
-      output <- code_numeric_freeresponse(df, aggregations, col_var)
-    } else {
-      # Multiple choice, multi-select, and variables that are formatted differently
-      output <- list(df, aggregations)
-    }
-    df <- output[[1]]
-    aggregations <- output[[2]]
-  }
+  group_cols <- unique(do.call(c, agg_groups))
+  group_cols <- group_cols[group_cols != "geo_id"]
   
   metric_cols_to_convert <- unique(aggregations$metric)
-  for (col_var in metric_cols_to_convert) {
-    if ( is.null(df[[col_var]]) ) {
-      aggregations <- aggregations[aggregations$metric != col_var &
-                                     !mapply(aggregations$group_by,
-                                             FUN=function(x) {col_var %in% x}), ]
-      msg_plain(
-        paste0(
-          col_var, " is not defined. Removing all aggregations that use it. ", 
-          nrow(aggregations), " remaining")
-      )
-      next
-    }
-    
+  
+  cols_check_available <- c(group_cols, metric_cols_to_convert)
+  cols_not_available <- cols_check_available[ !(cols_check_available %in% names(df)) ]
+  for (col_var in cols_not_available) {
+    # Remove from aggregations
+    aggregations <- aggregations[aggregations$metric != col_var &
+                                   !mapply(aggregations$group_by,
+                                           FUN=function(x) {col_var %in% x}), ]
+    msg_plain(
+      paste0(
+        col_var, " is not defined. Removing all aggregations that use it. ", 
+        nrow(aggregations), " remaining")
+  }
+  
+  group_cols_to_convert <- group_cols[startsWith(group_cols, "b_")]
+  for (col_var in c(group_cols_to_convert, metric_cols_to_convert) ) {
     if (startsWith(col_var, "b_")) { # Binary
       output <- code_binary(df, aggregations, col_var)
     } else if (startsWith(col_var, "n_")) { # Numeric free response

From 0257d95282a8654a78cdddaf39c75472cfcc700b Mon Sep 17 00:00:00 2001
From: Nat DeFries <42820733+nmdefries@users.noreply.github.com>
Date: Sat, 13 Mar 2021 10:03:34 -0500
Subject: [PATCH 10/23] add missing paren

---
 facebook/delphiFacebook/R/contingency_aggregate.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/facebook/delphiFacebook/R/contingency_aggregate.R b/facebook/delphiFacebook/R/contingency_aggregate.R
index 9d34355f8..c08fee77c 100644
--- a/facebook/delphiFacebook/R/contingency_aggregate.R
+++ b/facebook/delphiFacebook/R/contingency_aggregate.R
@@ -182,10 +182,10 @@ post_process_aggs <- function(df, aggregations, cw_list) {
     aggregations <- aggregations[aggregations$metric != col_var &
                                    !mapply(aggregations$group_by,
                                            FUN=function(x) {col_var %in% x}), ]
-    msg_plain(
-      paste0(
+    msg_plain(paste0(
         col_var, " is not defined. Removing all aggregations that use it. ", 
         nrow(aggregations), " remaining")
+    )
   }
   
   group_cols_to_convert <- group_cols[startsWith(group_cols, "b_")]

From 1aaf82696ab5ec43c9b2ba47e51dcb2576e99f1c Mon Sep 17 00:00:00 2001
From: Nat DeFries <42820733+nmdefries@users.noreply.github.com>
Date: Tue, 16 Mar 2021 17:19:38 -0400
Subject: [PATCH 11/23] create local wave variable

---
 facebook/delphiFacebook/R/variables.R | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/facebook/delphiFacebook/R/variables.R b/facebook/delphiFacebook/R/variables.R
index 49d0080a1..632190b57 100644
--- a/facebook/delphiFacebook/R/variables.R
+++ b/facebook/delphiFacebook/R/variables.R
@@ -291,6 +291,9 @@ code_testing <- function(input_data) {
 #'
 #' @importFrom dplyr coalesce
 code_vaccines <- function(input_data) {
+  wave <- unique(input_data$wave)
+  assert(length(wave) == 1, "can only code one wave at a time")
+  
   if ("V1" %in% names(input_data)) {
     # coded as 1 = Yes, 2 = No, 3 = don't know. We assume that don't know = no,
     # because, well, you'd know.

From 972aa8979aee3cf47c19cb8e4eec377f5b8a6889 Mon Sep 17 00:00:00 2001
From: Nat DeFries <42820733+nmdefries@users.noreply.github.com>
Date: Tue, 16 Mar 2021 17:50:58 -0400
Subject: [PATCH 12/23] update tests to match

---
 facebook/delphiFacebook/tests/testthat/test-variables.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/facebook/delphiFacebook/tests/testthat/test-variables.R b/facebook/delphiFacebook/tests/testthat/test-variables.R
index 0b76f231a..bc8bf60ac 100644
--- a/facebook/delphiFacebook/tests/testthat/test-variables.R
+++ b/facebook/delphiFacebook/tests/testthat/test-variables.R
@@ -149,6 +149,7 @@ test_that("household size correctly imputes zeros", {
 
 test_that("vaccine acceptance is correctly coded", {
   input_data <- data.frame(
+    wave = 1,
     V1 = c(2, 3, 2, NA, 1, NA),
     V3 = c(1, 2, 3, 4, NA, NA)
   )

From c0a59e206816df3a10af3bcc6dfb8bab8a5c80ef Mon Sep 17 00:00:00 2001
From: Nat DeFries <42820733+nmdefries@users.noreply.github.com>
Date: Sun, 14 Mar 2021 10:27:33 -0400
Subject: [PATCH 13/23] remove trailing whitespace. add all_of for explicitness

---
 .../delphiFacebook/R/contingency_aggregate.R  | 25 +++++++++----------
 facebook/delphiFacebook/R/contingency_write.R | 20 +++++++--------
 2 files changed, 22 insertions(+), 23 deletions(-)

diff --git a/facebook/delphiFacebook/R/contingency_aggregate.R b/facebook/delphiFacebook/R/contingency_aggregate.R
index c08fee77c..49a11009b 100644
--- a/facebook/delphiFacebook/R/contingency_aggregate.R
+++ b/facebook/delphiFacebook/R/contingency_aggregate.R
@@ -43,16 +43,16 @@ produce_aggregates <- function(df, aggregations, cw_list, params) {
   output <- post_process_aggs(df, aggregations, cw_list)
   df <- output[[1]]
   aggregations <- output[[2]]
-  
+
   ## Keep only columns used in indicators, plus supporting columns.
   group_vars <- unique( unlist(aggregations$group_by) )
-  df <- select(df, 
-               all_of(unique(aggregations$metric)), 
-               all_of(unique(aggregations$var_weight)), 
-               all_of( group_vars[group_vars != "geo_id"] ), 
+  df <- select(df,
+               all_of(unique(aggregations$metric)),
+               all_of(unique(aggregations$var_weight)),
+               all_of( group_vars[group_vars != "geo_id"] ),
                zip5,
                start_dt)
-  
+
   agg_groups <- unique(aggregations[c("group_by", "geo_level")])
 
   # For each unique combination of groupby_vars and geo level, run aggregation process once
@@ -167,14 +167,14 @@ post_process_aggs <- function(df, aggregations, cw_list) {
   #   - multi-select items are converted to a series of binary columns, one for
   # each unique level/response code; multi-select used for grouping are left as-is.
   #   - multiple choice items are left as-is
-  
+
   #### TODO: How do we want to handle multi-select items when used for grouping?
   agg_groups <- unique(aggregations$group_by)
   group_cols <- unique(do.call(c, agg_groups))
   group_cols <- group_cols[group_cols != "geo_id"]
-  
+
   metric_cols_to_convert <- unique(aggregations$metric)
-  
+
   cols_check_available <- c(group_cols, metric_cols_to_convert)
   cols_not_available <- cols_check_available[ !(cols_check_available %in% names(df)) ]
   for (col_var in cols_not_available) {
@@ -183,11 +183,11 @@ post_process_aggs <- function(df, aggregations, cw_list) {
                                    !mapply(aggregations$group_by,
                                            FUN=function(x) {col_var %in% x}), ]
     msg_plain(paste0(
-        col_var, " is not defined. Removing all aggregations that use it. ", 
+        col_var, " is not defined. Removing all aggregations that use it. ",
         nrow(aggregations), " remaining")
     )
   }
-  
+
   group_cols_to_convert <- group_cols[startsWith(group_cols, "b_")]
   for (col_var in c(group_cols_to_convert, metric_cols_to_convert) ) {
     if (startsWith(col_var, "b_")) { # Binary
@@ -226,7 +226,7 @@ post_process_aggs <- function(df, aggregations, cw_list) {
 #' @param params a named list with entries "s_weight", "s_mix_coef",
 #'   "num_filter"
 #'
-#' @importFrom dplyr inner_join bind_rows
+#' @importFrom dplyr inner_join bind_rows filter group_by summarize across all_of
 #' @importFrom parallel mclapply
 #' @importFrom stats complete.cases
 #'
@@ -261,7 +261,6 @@ summarize_aggs <- function(df, crosswalk_data, aggregations, geo_level, params)
     return(list())
   }
 
-
   ## Set an index on the groupby var columns so that the groupby step can be
   ## faster; data.table stores the sort order of the column and
   ## uses a binary search to find matching values, rather than a linear scan.
diff --git a/facebook/delphiFacebook/R/contingency_write.R b/facebook/delphiFacebook/R/contingency_write.R
index 97dc07ba9..8f0fd671f 100644
--- a/facebook/delphiFacebook/R/contingency_write.R
+++ b/facebook/delphiFacebook/R/contingency_write.R
@@ -1,5 +1,5 @@
 #' Write csv file for sharing with researchers.
-#' 
+#'
 #' CSV name includes date specifying start of time period aggregated, geo level,
 #' and grouping variables.
 #'
@@ -15,36 +15,36 @@
 #' @importFrom readr write_csv
 #' @importFrom dplyr arrange across
 #' @importFrom stringi stri_trim
-#' 
+#'
 #' @export
 write_contingency_tables <- function(data, params, geo_level, groupby_vars)
 {
   if (!is.null(data) && nrow(data) != 0) {
-    data <- arrange(data, across(groupby_vars))
-    
+    data <- arrange(data, across(all_of(groupby_vars)))
+
     # Format reported columns.
-    data <- mutate_at(data, vars(-c(groupby_vars)), 
+    data <- mutate_at(data, vars(-c(groupby_vars)),
                       function(x) {
                         stri_trim(
                           formatC(as.numeric(x), digits=7, format="f", drop0trailing=TRUE)
                         )
                       })
-    
+
     # Reduce verbosity of grouping vars for output purposes
     groupby_vars <- gsub("_", "", sub(
       ".+?_", "", groupby_vars[groupby_vars != "geo_id"]))
     filename <- sprintf("%s_%s.csv", format(params$start_date, "%Y%m%d"),
                         paste(c(geo_level, groupby_vars), collapse="_"))
     file_out <- file.path(params$export_dir, filename)
-    
+
     create_dir_not_exist(params$export_dir)
-    
+
     msg_df(sprintf("saving contingency table data to %-35s", filename), data)
     write_csv(data, file_out)
-    
+
   } else {
     msg_plain(sprintf(
-      "no aggregations produced for grouping variables %s (%s); CSV will not be saved", 
+      "no aggregations produced for grouping variables %s (%s); CSV will not be saved",
       paste(groupby_vars, collapse=", "), geo_level
     ))
   }

From 15df269991df585c22adf0d74f498c64097cc4e6 Mon Sep 17 00:00:00 2001
From: Nat DeFries <42820733+nmdefries@users.noreply.github.com>
Date: Sun, 14 Mar 2021 11:07:37 -0400
Subject: [PATCH 14/23] more column-existence/format deduplication

---
 .../delphiFacebook/R/contingency_aggregate.R  | 20 +++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/facebook/delphiFacebook/R/contingency_aggregate.R b/facebook/delphiFacebook/R/contingency_aggregate.R
index 49a11009b..3cdab7448 100644
--- a/facebook/delphiFacebook/R/contingency_aggregate.R
+++ b/facebook/delphiFacebook/R/contingency_aggregate.R
@@ -169,14 +169,14 @@ post_process_aggs <- function(df, aggregations, cw_list) {
   #   - multiple choice items are left as-is
 
   #### TODO: How do we want to handle multi-select items when used for grouping?
-  agg_groups <- unique(aggregations$group_by)
-  group_cols <- unique(do.call(c, agg_groups))
+  group_cols <- unique(do.call(c, aggregations$group_by))
   group_cols <- group_cols[group_cols != "geo_id"]
 
-  metric_cols_to_convert <- unique(aggregations$metric)
-
-  cols_check_available <- c(group_cols, metric_cols_to_convert)
-  cols_not_available <- cols_check_available[ !(cols_check_available %in% names(df)) ]
+  metric_cols <- unique(aggregations$metric)
+  
+  cols_check_available <- unique(c(group_cols, metric_cols))
+  available <- cols_check_available %in% names(df)
+  cols_not_available <- cols_check_available[ !available ]
   for (col_var in cols_not_available) {
     # Remove from aggregations
     aggregations <- aggregations[aggregations$metric != col_var &
@@ -188,8 +188,12 @@ post_process_aggs <- function(df, aggregations, cw_list) {
     )
   }
 
-  group_cols_to_convert <- group_cols[startsWith(group_cols, "b_")]
-  for (col_var in c(group_cols_to_convert, metric_cols_to_convert) ) {
+  cols_available <- cols_check_available[ available ]
+  for (col_var in cols_available) {
+    if ( col_var %in% group_cols & !(col_var %in% metric_cols) & !startsWith(col_var, "b_") ) {
+      next
+    }
+
     if (startsWith(col_var, "b_")) { # Binary
       output <- code_binary(df, aggregations, col_var)
     } else if (startsWith(col_var, "n_")) { # Numeric free response

From cdc5c3ecda4edcabb2e4157e83ef8162eed8de6a Mon Sep 17 00:00:00 2001
From: Nat DeFries <42820733+nmdefries@users.noreply.github.com>
Date: Wed, 17 Mar 2021 15:12:00 -0400
Subject: [PATCH 15/23] remove unused imports

---
 facebook/delphiFacebook/R/contingency_aggregate.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/facebook/delphiFacebook/R/contingency_aggregate.R b/facebook/delphiFacebook/R/contingency_aggregate.R
index 3cdab7448..eeeb8e75c 100644
--- a/facebook/delphiFacebook/R/contingency_aggregate.R
+++ b/facebook/delphiFacebook/R/contingency_aggregate.R
@@ -230,7 +230,7 @@ post_process_aggs <- function(df, aggregations, cw_list) {
 #' @param params a named list with entries "s_weight", "s_mix_coef",
 #'   "num_filter"
 #'
-#' @importFrom dplyr inner_join bind_rows filter group_by summarize across all_of
+#' @importFrom dplyr inner_join bind_rows
 #' @importFrom parallel mclapply
 #' @importFrom stats complete.cases
 #'

From 2ee7cd5e882883f7f9562463fb9985ed6144ab4a Mon Sep 17 00:00:00 2001
From: andrew <chinandrew96@gmail.com>
Date: Wed, 9 Dec 2020 02:02:16 -0800
Subject: [PATCH 16/23] Add hhs and nation

---
 .../delphi_doctor_visits/geo_maps.py          | 34 +++++++++++++++++++
 doctor_visits/delphi_doctor_visits/run.py     |  2 +-
 .../delphi_doctor_visits/update_sensor.py     | 11 ++++--
 3 files changed, 44 insertions(+), 3 deletions(-)

diff --git a/doctor_visits/delphi_doctor_visits/geo_maps.py b/doctor_visits/delphi_doctor_visits/geo_maps.py
index 9ba935683..3bc0cc2b1 100644
--- a/doctor_visits/delphi_doctor_visits/geo_maps.py
+++ b/doctor_visits/delphi_doctor_visits/geo_maps.py
@@ -61,6 +61,40 @@ def county_to_state(self, data):
 
         return data.groupby("state_id"), "state_id"
 
+    def county_to_hhs(self, data):
+        """Aggregate county data to the HHS region resolution.
+
+        Args:
+            data: dataframe aggregated to the daily-county resolution (all 7 cols expected)
+
+        Returns: tuple of dataframe at the daily-HHS resolution, and geo_id column name
+        """
+        data = self.gmpr.add_geocode(data,
+                                     "fips",
+                                     "hhs",
+                                     from_col="PatCountyFIPS")
+        data.drop(columns="PatCountyFIPS", inplace=True)
+        data = data.groupby(["ServiceDate", "hhs"]).sum().reset_index()
+
+        return data.groupby("hhs"), "hhs"
+
+    def county_to_nation(self, data):
+        """Aggregate county data to the nation resolution.
+
+        Args:
+            data: dataframe aggregated to the daily-county resolution (all 7 cols expected)
+
+        Returns: tuple of dataframe at the daily-nation resolution, and geo_id column name
+        """
+        data = self.gmpr.add_geocode(data,
+                                     "fips",
+                                     "nation",
+                                     from_col="PatCountyFIPS")
+        data.drop(columns="PatCountyFIPS", inplace=True)
+        data = data.groupby(["ServiceDate", "nation"]).sum().reset_index()
+
+        return data.groupby("nation"), "nation"
+
     def county_to_hrr(self, data):
         """Aggregate county data to the HRR resolution.
 
diff --git a/doctor_visits/delphi_doctor_visits/run.py b/doctor_visits/delphi_doctor_visits/run.py
index d85e0b632..1feb6c945 100644
--- a/doctor_visits/delphi_doctor_visits/run.py
+++ b/doctor_visits/delphi_doctor_visits/run.py
@@ -68,7 +68,7 @@ def run_module(params):
     logging.info("n_waiting_days:\t{n_waiting_days}")
 
     ## geographies
-    geos = ["state", "msa", "hrr", "county"]
+    geos = ["state", "msa", "hrr", "county", "hhs", "nation"]
 
 
     ## print out other vars
diff --git a/doctor_visits/delphi_doctor_visits/update_sensor.py b/doctor_visits/delphi_doctor_visits/update_sensor.py
index 725d4ca4f..b2981ad32 100644
--- a/doctor_visits/delphi_doctor_visits/update_sensor.py
+++ b/doctor_visits/delphi_doctor_visits/update_sensor.py
@@ -78,7 +78,7 @@ def update_sensor(
       startdate: first sensor date (YYYY-mm-dd)
       enddate: last sensor date (YYYY-mm-dd)
       dropdate: data drop date (YYYY-mm-dd)
-      geo: geographic resolution, one of ["county", "state", "msa", "hrr"]
+      geo: geographic resolution, one of ["county", "state", "msa", "hrr", "nation", "hhs"]
       parallel: boolean to run the sensor update in parallel
       weekday: boolean to adjust for weekday effects
       se: boolean to write out standard errors, if true, use an obfuscated name
@@ -142,8 +142,15 @@ def update_sensor(
         data_groups, _ = geo_map.county_to_msa(data)
     elif geo.lower() == "hrr":
         data_groups, _ = geo_map.county_to_hrr(data)
+    elif geo.lower() == "hhs":
+        data_groups, _ = geo_map.county_to_hhs(data)
+    elif geo.lower() == "nation":
+        data_groups, _ = geo_map.county_to_nation(data)
     else:
-        logging.error(f"{geo} is invalid, pick one of 'county', 'state', 'msa', 'hrr'")
+
+        logging.error(
+            f"{geo} is invalid, pick one of 'county', 'state', 'msa', 'hrr', 'hhs', 'nation'"
+        )
         return {}
     unique_geo_ids = list(data_groups.groups.keys())
 

From 5a0b2712391dcb30efbf74ae937f4b80367537ed Mon Sep 17 00:00:00 2001
From: andrew <chinandrew96@gmail.com>
Date: Wed, 17 Mar 2021 14:44:23 -0700
Subject: [PATCH 17/23] Change ifelse block to a dict of functions

---
 .../delphi_doctor_visits/geo_maps.py          |  9 ++++++++
 .../delphi_doctor_visits/update_sensor.py     | 22 ++-----------------
 2 files changed, 11 insertions(+), 20 deletions(-)

diff --git a/doctor_visits/delphi_doctor_visits/geo_maps.py b/doctor_visits/delphi_doctor_visits/geo_maps.py
index 3bc0cc2b1..716e8899d 100644
--- a/doctor_visits/delphi_doctor_visits/geo_maps.py
+++ b/doctor_visits/delphi_doctor_visits/geo_maps.py
@@ -7,6 +7,7 @@
 Created: 2020-04-18
 Last modified: 2020-04-30 by Aaron Rumack (add megacounty code)
 """
+from functools import partial
 
 import pandas as pd
 from delphi_utils.geomap import GeoMapper
@@ -20,6 +21,14 @@ class GeoMaps:
     def __init__(self):
         """Create the underlying GeoMapper."""
         self.gmpr = GeoMapper()
+        self.geo_func = {"county": partial(self.county_to_megacounty,
+                                           threshold_visits=Config.MIN_RECENT_VISITS,
+                                           threshold_len=Config.RECENT_LENGTH),
+                         "state": self.county_to_state,
+                         "msa": self.county_to_msa,
+                         "hrr": self.county_to_hrr,
+                         "hhs": self.county_to_hhs,
+                         "nation": self.county_to_nation}
 
     @staticmethod
     def convert_fips(x):
diff --git a/doctor_visits/delphi_doctor_visits/update_sensor.py b/doctor_visits/delphi_doctor_visits/update_sensor.py
index b2981ad32..01e1647fe 100644
--- a/doctor_visits/delphi_doctor_visits/update_sensor.py
+++ b/doctor_visits/delphi_doctor_visits/update_sensor.py
@@ -132,26 +132,8 @@ def update_sensor(
 
     # get right geography
     geo_map = GeoMaps()
-    if geo.lower() == "county":
-        data_groups, _ = geo_map.county_to_megacounty(
-            data, Config.MIN_RECENT_VISITS, Config.RECENT_LENGTH
-        )
-    elif geo.lower() == "state":
-        data_groups, _ = geo_map.county_to_state(data)
-    elif geo.lower() == "msa":
-        data_groups, _ = geo_map.county_to_msa(data)
-    elif geo.lower() == "hrr":
-        data_groups, _ = geo_map.county_to_hrr(data)
-    elif geo.lower() == "hhs":
-        data_groups, _ = geo_map.county_to_hhs(data)
-    elif geo.lower() == "nation":
-        data_groups, _ = geo_map.county_to_nation(data)
-    else:
-
-        logging.error(
-            f"{geo} is invalid, pick one of 'county', 'state', 'msa', 'hrr', 'hhs', 'nation'"
-        )
-        return {}
+    mapping_func = geo_map.geo_func[geo.lower()]
+    data_groups, _ = mapping_func(data)
     unique_geo_ids = list(data_groups.groups.keys())
 
     # run sensor fitting code (maybe in parallel)

From 65ab02db24ed036da7d01adf672a14212c59890d Mon Sep 17 00:00:00 2001
From: Nat DeFries <42820733+nmdefries@users.noreply.github.com>
Date: Thu, 18 Mar 2021 13:12:46 -0400
Subject: [PATCH 18/23] unify group_vars definition between funcs

---
 facebook/delphiFacebook/R/contingency_aggregate.R | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/facebook/delphiFacebook/R/contingency_aggregate.R b/facebook/delphiFacebook/R/contingency_aggregate.R
index eeeb8e75c..1c091f437 100644
--- a/facebook/delphiFacebook/R/contingency_aggregate.R
+++ b/facebook/delphiFacebook/R/contingency_aggregate.R
@@ -169,12 +169,12 @@ post_process_aggs <- function(df, aggregations, cw_list) {
   #   - multiple choice items are left as-is
 
   #### TODO: How do we want to handle multi-select items when used for grouping?
-  group_cols <- unique(do.call(c, aggregations$group_by))
-  group_cols <- group_cols[group_cols != "geo_id"]
+  group_vars <- unique( unlist(aggregations$group_by) )
+  group_vars <- group_vars[group_vars != "geo_id"]
 
   metric_cols <- unique(aggregations$metric)
   
-  cols_check_available <- unique(c(group_cols, metric_cols))
+  cols_check_available <- unique(c(group_vars, metric_cols))
   available <- cols_check_available %in% names(df)
   cols_not_available <- cols_check_available[ !available ]
   for (col_var in cols_not_available) {
@@ -190,7 +190,7 @@ post_process_aggs <- function(df, aggregations, cw_list) {
 
   cols_available <- cols_check_available[ available ]
   for (col_var in cols_available) {
-    if ( col_var %in% group_cols & !(col_var %in% metric_cols) & !startsWith(col_var, "b_") ) {
+    if ( col_var %in% group_vars & !(col_var %in% metric_cols) & !startsWith(col_var, "b_") ) {
       next
     }
 

From 572819fef3d040049e4c172c3f6b4563b69633d9 Mon Sep 17 00:00:00 2001
From: Nat DeFries <42820733+nmdefries@users.noreply.github.com>
Date: Thu, 18 Mar 2021 14:08:04 -0400
Subject: [PATCH 19/23] unify group_vars name between funcs

---
 .../delphiFacebook/R/contingency_aggregate.R   | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/facebook/delphiFacebook/R/contingency_aggregate.R b/facebook/delphiFacebook/R/contingency_aggregate.R
index 7aa8e5ddc..0fec6e66f 100644
--- a/facebook/delphiFacebook/R/contingency_aggregate.R
+++ b/facebook/delphiFacebook/R/contingency_aggregate.R
@@ -55,7 +55,7 @@ produce_aggregates <- function(df, aggregations, cw_list, params) {
 
   agg_groups <- unique(aggregations[c("group_by", "geo_level")])
 
-  # For each unique combination of groupby_vars and geo level, run aggregation process once
+  # For each unique combination of group_vars and geo level, run aggregation process once
   # and calculate all desired aggregations on the grouping. Rename columns. Save
   # to individual files
   for (group_ind in seq_along(agg_groups$group_by)) {
@@ -248,16 +248,16 @@ summarize_aggs <- function(df, crosswalk_data, aggregations, geo_level, params)
   ## inefficient; profiling shows the cost to be negligible, so shut it up
   df <- suppressWarnings(inner_join(df, crosswalk_data, by = "zip5"))
 
-  groupby_vars <- aggregations$group_by[[1]]
+  group_vars <- aggregations$group_by[[1]]
 
-  if (all(groupby_vars %in% names(df))) {
-    unique_group_combos <- unique(df[, groupby_vars, with=FALSE])
+  if (all(group_vars %in% names(df))) {
+    unique_group_combos <- unique(df[, group_vars, with=FALSE])
     unique_group_combos <- unique_group_combos[complete.cases(unique_group_combos)]
   } else {
     msg_plain(
       sprintf(
         "not all of groupby columns %s available in data; skipping aggregation",
-        paste(groupby_vars, collapse=", ")
+        paste(group_vars, collapse=", ")
       ))
   }
 
@@ -268,7 +268,7 @@ summarize_aggs <- function(df, crosswalk_data, aggregations, geo_level, params)
   ## Set an index on the groupby var columns so that the groupby step can be
   ## faster; data.table stores the sort order of the column and
   ## uses a binary search to find matching values, rather than a linear scan.
-  setindexv(df, groupby_vars)
+  setindexv(df, group_vars)
 
   calculate_group <- function(ii) {
     target_group <- unique_group_combos[ii]
@@ -301,15 +301,15 @@ summarize_aggs <- function(df, crosswalk_data, aggregations, geo_level, params)
   ## Do post-processing.
   for (row in seq_len(nrow(aggregations))) {
     aggregation <- aggregations$id[row]
-    groupby_vars <- aggregations$group_by[[row]]
+    group_vars <- aggregations$group_by[[row]]
     post_fn <- aggregations$post_fn[[row]]
 
     dfs_out[[aggregation]] <- dfs_out[[aggregation]][
-      rowSums(is.na(dfs_out[[aggregation]][, c("val", "sample_size", groupby_vars)])) == 0,
+      rowSums(is.na(dfs_out[[aggregation]][, c("val", "sample_size", group_vars)])) == 0,
     ]
 
     if (geo_level == "county") {
-      df_megacounties <- megacounty(dfs_out[[aggregation]], params$num_filter, groupby_vars)
+      df_megacounties <- megacounty(dfs_out[[aggregation]], params$num_filter, group_vars)
       dfs_out[[aggregation]] <- bind_rows(dfs_out[[aggregation]], df_megacounties)
     }
 

From 64883a1667a8903323a01831f17e2fecca5804cf Mon Sep 17 00:00:00 2001
From: Maria Jahja <mariajahja@gmail.com>
Date: Fri, 19 Mar 2021 13:57:36 -0400
Subject: [PATCH 20/23] update n_backfill_days to 70, fix logging format

---
 claims_hosp/params.json.template          |  2 +-
 doctor_visits/delphi_doctor_visits/run.py | 10 +++++-----
 doctor_visits/params.json.template        |  2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/claims_hosp/params.json.template b/claims_hosp/params.json.template
index dc0dfe024..e93fb1d2b 100644
--- a/claims_hosp/params.json.template
+++ b/claims_hosp/params.json.template
@@ -8,7 +8,7 @@
     "start_date": "2020-02-01",
     "end_date": null,
     "drop_date": null,
-    "n_backfill_days": 60,
+    "n_backfill_days": 70,
     "n_waiting_days": 3,
     "write_se": false,
     "obfuscated_prefix": "foo_obfuscated",
diff --git a/doctor_visits/delphi_doctor_visits/run.py b/doctor_visits/delphi_doctor_visits/run.py
index 1feb6c945..f3417d283 100644
--- a/doctor_visits/delphi_doctor_visits/run.py
+++ b/doctor_visits/delphi_doctor_visits/run.py
@@ -61,11 +61,11 @@ def run_module(params):
     startdate_dt = enddate_dt - timedelta(days=n_backfill_days)
     enddate = str(enddate_dt.date())
     startdate = str(startdate_dt.date())
-    logging.info("drop date:\t\t{dropdate}")
-    logging.info("first sensor date:\t{startdate}")
-    logging.info("last sensor date:\t{enddate}")
-    logging.info("n_backfill_days:\t{n_backfill_days}")
-    logging.info("n_waiting_days:\t{n_waiting_days}")
+    logging.info("drop date:\t\t%s", dropdate)
+    logging.info("first sensor date:\t%s", startdate)
+    logging.info("last sensor date:\t%s", enddate)
+    logging.info("n_backfill_days:\t%s", n_backfill_days)
+    logging.info("n_waiting_days:\t%s", n_waiting_days)
 
     ## geographies
     geos = ["state", "msa", "hrr", "county", "hhs", "nation"]
diff --git a/doctor_visits/params.json.template b/doctor_visits/params.json.template
index 9fc5b5aed..98b958926 100644
--- a/doctor_visits/params.json.template
+++ b/doctor_visits/params.json.template
@@ -5,7 +5,7 @@
   "indicator": {
     "input_file": "./input/SYNEDI_AGG_OUTPATIENT_18052020_1455CDT.csv.gz",
     "drop_date": "",
-    "n_backfill_days": 60,
+    "n_backfill_days": 70,
     "n_waiting_days": 3,
     "weekday": [true, false],
     "se": false,

From db86563d8e2de576646970c8a78726a822f38c32 Mon Sep 17 00:00:00 2001
From: Mike O'Brien <mikeob@google.com>
Date: Fri, 19 Mar 2021 16:21:18 -0400
Subject: [PATCH 21/23] validation params for safegraph

---
 .../templates/safegraph-params-prod.json.j2   | 28 +++++++++++++++++--
 safegraph/params.json.template                | 28 +++++++++++++++++--
 safegraph/run-safegraph.sh                    |  1 +
 3 files changed, 53 insertions(+), 4 deletions(-)

diff --git a/ansible/templates/safegraph-params-prod.json.j2 b/ansible/templates/safegraph-params-prod.json.j2
index 41550a8ae..4fc9d4785 100644
--- a/ansible/templates/safegraph-params-prod.json.j2
+++ b/ansible/templates/safegraph-params-prod.json.j2
@@ -14,7 +14,31 @@
     "sync": true,
     "wip_signal" : []
   },
-  "archive": {
-    "cache_dir": "./cache"
+  "validation": {
+    "common": {
+      "data_source": "safegraph",
+      "span_length": 14,
+      "end_date": "today",
+      "suppressed_errors": [
+        {"signal": "bars_visit_num"},
+        {"signal": "bars_visit_prop"},
+        {"signal": "restaurants_visit_num"},
+        {"signal": "restaurants_visit_prop"}
+      ]
+    },
+    "static": {
+      "minimum_sample_size": 100,
+      "missing_se_allowed": false,
+      "missing_sample_size_allowed": false
+    },
+    "dynamic": {
+      "ref_window_size": 7,
+      "smoothed_signals": [
+        "completely_home_prop_7dav",
+        "full_time_work_prop_7dav",
+        "part_time_work_prop_7dav",
+        "median_home_dwell_time_7dav"
+      ]
+    }
   }
 }
diff --git a/safegraph/params.json.template b/safegraph/params.json.template
index 0c830c41c..3649cacbd 100644
--- a/safegraph/params.json.template
+++ b/safegraph/params.json.template
@@ -17,7 +17,31 @@
                     "part_time_work_prop_7dav",
                     "full_time_work_prop_7dav"]
   },
-  "archive": {
-    "cache_dir": "./cache"
+  "validation": {
+    "common": {
+      "data_source": "safegraph",
+      "span_length": 14,
+      "end_date": "today",
+      "suppressed_errors": [
+        {"signal": "bars_visit_num"},
+        {"signal": "bars_visit_prop"},
+        {"signal": "restaurants_visit_num"},
+        {"signal": "restaurants_visit_prop"}
+      ]
+    },
+    "static": {
+      "minimum_sample_size": 100,
+      "missing_se_allowed": false,
+      "missing_sample_size_allowed": false
+    },
+    "dynamic": {
+      "ref_window_size": 7,
+      "smoothed_signals": [
+        "completely_home_prop_7dav",
+        "full_time_work_prop_7dav",
+        "part_time_work_prop_7dav",
+        "median_home_dwell_time_7dav"
+      ]
+    }
   }
 }
diff --git a/safegraph/run-safegraph.sh b/safegraph/run-safegraph.sh
index 01c54f0e6..a8ba8b90e 100644
--- a/safegraph/run-safegraph.sh
+++ b/safegraph/run-safegraph.sh
@@ -12,6 +12,7 @@ rm -f ./receiving/*
 # Run the indicator code.
 echo "Running the indicator..."
 env/bin/python -m delphi_safegraph
+env/bin/python -m delphi_utils.validator
 
 # Copy the files to the ingestion directory.
 echo "Copying files to the ingestion directory..."

From 797ec0a9fbf73e04f81b52d9c001b1cc631b5b33 Mon Sep 17 00:00:00 2001
From: Maria Jahja <mariajahja@gmail.com>
Date: Sun, 21 Mar 2021 01:11:21 -0400
Subject: [PATCH 22/23] add vi/gu to state count

---
 claims_hosp/delphi_claims_hosp/config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/claims_hosp/delphi_claims_hosp/config.py b/claims_hosp/delphi_claims_hosp/config.py
index d21b46c21..0ed640ca1 100644
--- a/claims_hosp/delphi_claims_hosp/config.py
+++ b/claims_hosp/delphi_claims_hosp/config.py
@@ -63,7 +63,7 @@ class GeoConstants:
     NUM_COUNTIES = 3141 + 52
     NUM_HRRS = 308
     NUM_MSAS = 392 + 52  # MSA + States
-    NUM_STATES = 52  # including DC and PR
+    NUM_STATES = 54  # including DC, PR, VI, GU
     NUM_HHSS = 10
     NUM_NATIONS = 1
 

From 6eabe7fdcb4e9860d72f72d3bfa909599553388b Mon Sep 17 00:00:00 2001
From: Kathryn M Mazaitis <krivard@cs.cmu.edu>
Date: Mon, 22 Mar 2021 10:50:40 -0400
Subject: [PATCH 23/23] Drop combo nmf and fb vaccine_likely_local_health from
 sirCAL

---
 ansible/templates/sir_complainsalot-params-prod.json.j2 | 4 ++--
 sir_complainsalot/params.json.template                  | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/ansible/templates/sir_complainsalot-params-prod.json.j2 b/ansible/templates/sir_complainsalot-params-prod.json.j2
index b605c536f..a15c7d70f 100644
--- a/ansible/templates/sir_complainsalot-params-prod.json.j2
+++ b/ansible/templates/sir_complainsalot-params-prod.json.j2
@@ -34,12 +34,12 @@
     "fb-survey": {
       "max_age": 3,
       "maintainers": ["U01069KCRS7"],
-      "retired-signals": ["smoothed_anxious_5d", "smoothed_wanxious_5d", "smoothed_depressed_5d", "smoothed_wdepressed_5d", "smoothed_felt_isolated_5d", "smoothed_wfelt_isolated_5d", "smoothed_large_event_1d", "smoothed_wlarge_event_1d", "smoothed_restaurant_1d", "smoothed_wrestaurant_1d", "smoothed_shop_1d", "smoothed_wshop_1d", "smoothed_spent_time_1d", "smoothed_wspent_time_1d", "smoothed_travel_outside_state_5d", "smoothed_wtravel_outside_state_5d", "smoothed_work_outside_home_1d", "smoothed_wwork_outside_home_1d", "smoothed_wearing_mask", "smoothed_wwearing_mask"]
+      "retired-signals": ["smoothed_anxious_5d", "smoothed_wanxious_5d", "smoothed_depressed_5d", "smoothed_wdepressed_5d", "smoothed_felt_isolated_5d", "smoothed_wfelt_isolated_5d", "smoothed_large_event_1d", "smoothed_wlarge_event_1d", "smoothed_restaurant_1d", "smoothed_wrestaurant_1d", "smoothed_shop_1d", "smoothed_wshop_1d", "smoothed_spent_time_1d", "smoothed_wspent_time_1d", "smoothed_travel_outside_state_5d", "smoothed_wtravel_outside_state_5d", "smoothed_work_outside_home_1d", "smoothed_wwork_outside_home_1d", "smoothed_wearing_mask", "smoothed_wwearing_mask", "smoothed_vaccine_likely_local_health", "smoothed_wvaccine_likely_local_health"]
     },
     "indicator-combination": {
       "max_age": 4,
       "maintainers": ["U01AP8GSWG3","U01069KCRS7"],
-      "retired-signals": ["nmf_day_doc_fbs_ght"]
+      "retired-signals": ["nmf_day_doc_fbs_ght", "nmf_day_doc_fbc_fbs_ght"]
     },
     "quidel": {
       "max_age":6,
diff --git a/sir_complainsalot/params.json.template b/sir_complainsalot/params.json.template
index 140e31538..1e2ee3d1a 100644
--- a/sir_complainsalot/params.json.template
+++ b/sir_complainsalot/params.json.template
@@ -35,12 +35,12 @@
     "fb-survey": {
       "max_age": 3,
       "maintainers": ["U01069KCRS7"],
-      "retired-signals": ["smoothed_anxious_5d", "smoothed_wanxious_5d", "smoothed_depressed_5d", "smoothed_wdepressed_5d", "smoothed_felt_isolated_5d", "smoothed_wfelt_isolated_5d", "smoothed_large_event_1d", "smoothed_wlarge_event_1d", "smoothed_restaurant_1d", "smoothed_wrestaurant_1d", "smoothed_shop_1d", "smoothed_wshop_1d", "smoothed_spent_time_1d", "smoothed_wspent_time_1d", "smoothed_travel_outside_state_5d", "smoothed_wtravel_outside_state_5d", "smoothed_work_outside_home_1d", "smoothed_wwork_outside_home_1d", "smoothed_wearing_mask", "smoothed_wwearing_mask"]
+      "retired-signals": ["smoothed_anxious_5d", "smoothed_wanxious_5d", "smoothed_depressed_5d", "smoothed_wdepressed_5d", "smoothed_felt_isolated_5d", "smoothed_wfelt_isolated_5d", "smoothed_large_event_1d", "smoothed_wlarge_event_1d", "smoothed_restaurant_1d", "smoothed_wrestaurant_1d", "smoothed_shop_1d", "smoothed_wshop_1d", "smoothed_spent_time_1d", "smoothed_wspent_time_1d", "smoothed_travel_outside_state_5d", "smoothed_wtravel_outside_state_5d", "smoothed_work_outside_home_1d", "smoothed_wwork_outside_home_1d", "smoothed_wearing_mask", "smoothed_wwearing_mask", "smoothed_vaccine_likely_local_health", "smoothed_wvaccine_likely_local_health"]
     },
     "indicator-combination": {
       "max_age": 3,
       "maintainers": ["U01AP8GSWG3","U01069KCRS7"],
-      "retired-signals": ["nmf_day_doc_fbs_ght"]
+      "retired-signals": ["nmf_day_doc_fbs_ght", "nmf_day_doc_fbc_fbs_ght"]
     },
     "quidel": {
       "max_age":6,