diff --git a/.github/workflows/R-CMD-check-full.yaml b/.github/workflows/R-CMD-check-full.yaml index f2b5fc3d..84b8898a 100644 --- a/.github/workflows/R-CMD-check-full.yaml +++ b/.github/workflows/R-CMD-check-full.yaml @@ -19,21 +19,26 @@ jobs: fail-fast: false matrix: config: + # Mac latest release - { os: macos-latest, r: "release" } + # Oldest R version we claim to support + - { os: macos-latest, r: "3.5" } + # Windows latest release - { os: windows-latest, r: "release" } - # Use 3.6 to trigger usage of RTools35 - - { os: windows-latest, r: "3.6" } + # Use 3.5 to trigger usage of RTools35 + - { os: windows-latest, r: "3.5" } # use 4.1 to check with rtools40's older compiler - { os: windows-latest, r: "4.1" } + # Ubuntu latest release - { os: ubuntu-latest, r: "devel", http-user-agent: "release" } - { os: ubuntu-latest, r: "release" } - { os: ubuntu-latest, r: "oldrel-1" } - { os: ubuntu-latest, r: "oldrel-2" } - { os: ubuntu-latest, r: "oldrel-3" } - { os: ubuntu-latest, r: "oldrel-4" } - # The oldest version of R we claim to support + # Oldest R version we claim to support - { os: ubuntu-latest, r: "3.5" } env: diff --git a/.github/workflows/test-coverage.yml b/.github/workflows/test-coverage.yml index 745873b3..74ccb483 100644 --- a/.github/workflows/test-coverage.yml +++ b/.github/workflows/test-coverage.yml @@ -47,10 +47,10 @@ jobs: - name: Upload test results if: failure() - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: coverage-test-failures path: ${{ runner.temp }}/package - name: Upload coverage reports to Codecov - uses: codecov/codecov-action@v3 + uses: codecov/codecov-action@v5 diff --git a/.lintr b/.lintr index c7c90554..def0c185 100644 --- a/.lintr +++ b/.lintr @@ -1,7 +1,7 @@ linters: linters_with_defaults( line_length_linter(120), - cyclocomp_linter = NULL, - object_length_linter(length = 40L) + object_length_linter(length = 40L), + return_linter = NULL ) exclusions: list( "renv", diff --git a/DESCRIPTION b/DESCRIPTION index 18b78398..9a6c5f76 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: epidatr Title: Client for Delphi's 'Epidata' API -Version: 1.2.0 +Version: 1.2.1 Authors@R: c( person("Logan", "Brooks", , "lcbrooks@andrew.cmu.edu", role = "aut"), person("Dmitry", "Shemetov", , "dshemeto@andrew.cmu.edu", role = "aut"), @@ -30,7 +30,7 @@ URL: https://cmu-delphi.github.io/epidatr/, https://cmu-delphi.github.io/delphi-epidata/, https://github.com/cmu-delphi/epidatr BugReports: https://github.com/cmu-delphi/epidatr/issues -Depends: +Depends: R (>= 3.5.0) Imports: cachem, @@ -45,6 +45,7 @@ Imports: purrr, rappdirs, readr, + rlang, tibble, usethis, xml2 @@ -54,13 +55,12 @@ Suggests: knitr, mapproj, maps, - rlang, rmarkdown, testthat (>= 3.1.5), withr -VignetteBuilder: +VignetteBuilder: knitr -Remotes: +Remotes: cmu-delphi/delphidocs Config/Needs/website: cmu-delphi/delphidocs Config/testthat/edition: 3 diff --git a/NAMESPACE b/NAMESPACE index c6fed522..19eefb85 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -53,6 +53,7 @@ import(cachem) import(glue) importFrom(MMWRweek,MMWRweek) importFrom(MMWRweek,MMWRweek2Date) +importFrom(cachem,is.key_missing) importFrom(checkmate,assert) importFrom(checkmate,assert_character) importFrom(checkmate,assert_integerish) @@ -80,7 +81,8 @@ importFrom(magrittr,"%>%") importFrom(openssl,md5) importFrom(purrr,map_chr) importFrom(purrr,map_lgl) -importFrom(readr,read_csv) +importFrom(rlang,dots_list) +importFrom(rlang,inject) importFrom(stats,na.omit) importFrom(tibble,as_tibble) importFrom(tibble,tibble) diff --git a/NEWS.md b/NEWS.md index c8e837c3..38c3c8b5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# epidatr 1.2.1 +## Patches +- Fix so that `covidcast_epidata()` will still print if fields are missing. + # epidatr 1.2.0 ## Changes @@ -8,6 +12,7 @@ - Support more date formats in function to convert dates to epiweeks. Use `parse_api_date` since it already supports both common formats. #276 - `EPIDATR_USE_CACHE` only supported exactly "TRUE" before. Now it supports all logical values and includes a warning when any value that can't be converted to logical is provided. #273 - `missing` doesn't count default values as non-missing. If a user doesn't pass `geo_values` or `time_values` (both of which default to `"*"` in `pub_covidcast`), or `dates` (in `pub_covid_hosp_state_timeseries`), the missing check fails. To avoid this, just don't check missingness of those two arguments. +- `fetch_args_list` now has an `refresh_cache` argument, which is `FALSE` by default. # epidatr 1.1.1 diff --git a/R/cache.R b/R/cache.R index bc30b48f..c9810f23 100644 --- a/R/cache.R +++ b/R/cache.R @@ -3,6 +3,7 @@ cache_environ <- new.env(parent = emptyenv()) cache_environ$use_cache <- NULL cache_environ$epidatr_cache <- NULL +cache_environ$cache_args <- NULL #' Create or renew a cache for this session #' @aliases set_cache @@ -169,6 +170,12 @@ set_cache <- function(cache_dir = NULL, max_age = days * 24 * 60 * 60, logfile = file.path(cache_dir, logfile) ) + cache_environ$cache_args <- list( + cache_dir = cache_dir, + days = days, + max_size = max_size, + logfile = logfile + ) } cli::cli_inform(c( @@ -183,9 +190,9 @@ set_cache <- function(cache_dir = NULL, #' Manually reset the cache, deleting all currently saved data and starting afresh #' @description #' Deletes the current cache and resets a new cache. Deletes local data! If you -#' are using a session unique cache, you will have to pass the arguments you -#' used for `set_cache` earlier, otherwise the system-wide `.Renviron`-based -#' defaults will be used. +#' are using a session unique cache, the previous settings will be reused. If +#' you pass in new `set_cache` arguments, they will take precedence over the +#' previous settings. #' @param disable instead of setting a new cache, disable caching entirely; #' defaults to `FALSE` #' @inheritDotParams set_cache @@ -195,14 +202,26 @@ set_cache <- function(cache_dir = NULL, #' [`disable_cache`] to only disable without deleting, and [`cache_info`] #' @export #' @import cachem +#' @importFrom rlang dots_list inject clear_cache <- function(..., disable = FALSE) { if (any(!is.na(cache_environ$epidatr_cache))) { cache_environ$epidatr_cache$destroy() + recovered_args <- cache_environ$cache_args + cache_environ$cache_args <- NULL + } else { + recovered_args <- list() } + args <- dots_list( + ..., + confirm = FALSE, + !!!recovered_args, + .homonyms = "first", + .ignore_empty = "all" + ) if (disable) { cache_environ$epidatr_cache <- NULL } else { - set_cache(...) + inject(set_cache(!!!args)) } } @@ -234,68 +253,85 @@ disable_cache <- function() { #' disable without deleting #' @export cache_info <- function() { - if (is.null(cache_environ$epidatr_cache)) { - return("there is no cache") - } else { + if (is_cache_enabled()) { return(cache_environ$epidatr_cache$info()) + } else { + return("there is no cache") } } -#' Dispatch caching +#' Check if the cache is enabled +#' @keywords internal +is_cache_enabled <- function() { + !is.null(cache_environ$epidatr_cache) +} + +#' Helper that checks whether a call is actually cachable +#' +#' The cacheable endpoints are those with `as_of` or `issues` parameters: +#' - pub_covidcast +#' - pub_covid_hosp_state_timeseries +#' - pub_ecdc_ili +#' - pub_flusurv +#' - pub_fluview_clinical +#' - pub_fluview +#' - pub_kcdc_ili +#' - pub_nidss_flu +#' - pub_paho_dengue +#' +#' @keywords internal +check_is_cachable <- function(epidata_call, fetch_args) { + as_of_cachable <- !is.null(epidata_call$params$as_of) && !identical(epidata_call$params$as_of, "*") + issues_cachable <- !is.null(epidata_call$params$issues) && !identical(epidata_call$params$issues, "*") + is_cachable <- ( + # Cache should be enabled + is_cache_enabled() && + # Call should be cachable + (as_of_cachable || issues_cachable) && + # This should not be a dry run + !fetch_args$dry_run && + # Base url should be null + is.null(fetch_args$base_url) && + # Don't cache debug calls + !fetch_args$debug && + # Format type should be json + fetch_args$format_type == "json" && + # Fields should be null + is.null(fetch_args$fields) && + # Disable date parsing should be false + !fetch_args$disable_date_parsing && + # Disable data frame parsing should be false + !fetch_args$disable_data_frame_parsing && + # Refresh cache should be false + fetch_args$refresh_cache == FALSE + ) + return(is_cachable) +} + +#' Check for warnings for the cache #' #' @description -#' The guts of caching, its interposed between fetch and the specific fetch -#' methods. Internal method only. +#' Adds warnings when arguments are potentially too recent to use with the cache. #' #' @param epidata_call the `epidata_call` object #' @param fetch_args the args list for fetch as generated by [`fetch_args_list()`] #' @keywords internal -#' @importFrom openssl md5 -cache_epidata_call <- function(epidata_call, fetch_args = fetch_args_list()) { - is_cachable <- check_is_cachable(epidata_call, fetch_args) - if (is_cachable) { - target <- request_url(epidata_call) - hashed <- md5(target) - cached <- cache_environ$epidatr_cache$get(hashed) - as_of_recent <- check_is_recent(epidata_call$params$as_of, 7) - issues_recent <- check_is_recent(epidata_call$params$issues, 7) - if (as_of_recent || issues_recent) { - cli::cli_warn( - c( - "Using cached results with `as_of` within the past week (or the future!). +check_for_cache_warnings <- function(epidata_call, fetch_args) { + as_of_recent <- check_is_recent(epidata_call$params$as_of, 7) + issues_recent <- check_is_recent(epidata_call$params$issues, 7) + if (as_of_recent || issues_recent) { + cli::cli_warn( + c( + "Using cached results with `as_of` within the past week (or the future!). This will likely result in an invalid cache. Consider", - "i" = "disabling the cache for this session with `disable_cache` or + "i" = "disabling the cache for this session with `disable_cache` or permanently with environmental variable `EPIDATR_USE_CACHE=FALSE`", - "i" = "setting `EPIDATR_CACHE_MAX_AGE_DAYS={Sys.getenv('EPIDATR_CACHE_MAX_AGE_DAYS + "i" = "setting `EPIDATR_CACHE_MAX_AGE_DAYS={Sys.getenv('EPIDATR_CACHE_MAX_AGE_DAYS ', unset = 1)}` to e.g. `3/24` (3 hours)." - ), - .frequency = "regularly", - .frequency_id = "cache timing issues", - class = "cache_recent_data" - ) - } - if (!is.key_missing(cached)) { - cli::cli_warn( - c( - "Loading from the cache at {cache_environ$epidatr_cache$info()$dir}; - see {cache_environ$epidatr_cache$info()$logfile} for more details." - ), - .frequency = "regularly", - .frequency_id = "using the cache", - class = "cache_access" - ) - return(cached[[1]]) - } - } - # need to actually get the data, since its either not in the cache or we're not caching - runtime <- system.time(if (epidata_call$only_supports_classic) { - fetched <- fetch_classic(epidata_call, fetch_args) - } else { - fetched <- fetch_tbl(epidata_call, fetch_args) - }) - # add it to the cache if appropriate - if (is_cachable) { - cache_environ$epidatr_cache$set(hashed, list(fetched, Sys.time(), runtime)) + ), + .frequency = "regularly", + .frequency_id = "cache timing issues", + class = "cache_recent_data" + ) } - return(fetched) } diff --git a/R/covidcast.R b/R/covidcast.R index 32868499..74c8a6f7 100644 --- a/R/covidcast.R +++ b/R/covidcast.R @@ -69,18 +69,18 @@ parse_source <- function(source, base_url) { #' @export as_tibble.covidcast_data_signal_list <- function(x, ...) { tib <- list() - tib$source <- unname(map_chr(x, "source")) - tib$signal <- unname(map_chr(x, "signal")) - tib$name <- unname(map_chr(x, "name")) - tib$active <- unname(map_lgl(x, "active")) - tib$short_description <- unname(map_chr(x, "short_description")) - tib$description <- unname(map_chr(x, "description")) - tib$time_type <- unname(map_chr(x, "time_type")) - tib$time_label <- unname(map_chr(x, "time_label")) - tib$value_label <- unname(map_chr(x, "value_label")) - tib$format <- unname(map_chr(x, "format")) - tib$category <- unname(map_chr(x, "category")) - tib$high_values_are <- unname(map_chr(x, "high_values_are")) + chr_fields <- c( + "source", "signal", "name", "short_description", + "description", "time_type", "time_label", "value_label", + "format", "category", "high_values_are" + ) + for (field in chr_fields) { + tib[[field]] <- unname(map_chr(x, field, .default = "")) + } + lgl_fields <- c("active") + for (field in lgl_fields) { + tib[[field]] <- unname(map_lgl(x, field, .default = "")) + } as_tibble(tib) } @@ -184,11 +184,10 @@ covidcast_epidata <- function(base_url = global_base_url, timeout_seconds = 30) #' @export as_tibble.covidcast_data_source_list <- function(x, ...) { tib <- list() - tib$source <- unname(map_chr(x, "source")) - tib$name <- unname(map_chr(x, "name")) - tib$description <- unname(map_chr(x, "description")) - tib$reference_signal <- unname(map_chr(x, "reference_signal")) - tib$license <- unname(map_chr(x, "license")) + fields <- c("source", "name", "description", "reference_signal", "license") + for (field in fields) { + tib[[field]] <- unname(map_chr(x, field, .default = "")) + } as_tibble(tib) } diff --git a/R/epidatacall.R b/R/epidatacall.R index 958e00aa..c97a6d16 100644 --- a/R/epidatacall.R +++ b/R/epidatacall.R @@ -154,6 +154,8 @@ print.epidata_call <- function(x, ...) { #' @param debug if `TRUE`, return the raw response from the API #' @param format_type the format to request from the API, one of classic, json, #' csv; this is only used by `fetch_debug`, and by default is `"json"` +#' @param refresh_cache if `TRUE`, ignore the cache, fetch the data from the +#' API, and update the cache, if it is enabled #' @return A `fetch_args` object containing all the specified options #' @export #' @aliases fetch_args @@ -168,7 +170,8 @@ fetch_args_list <- function( base_url = NULL, dry_run = FALSE, debug = FALSE, - format_type = c("json", "classic", "csv")) { + format_type = c("json", "classic", "csv"), + refresh_cache = FALSE) { rlang::check_dots_empty() assert_character(fields, null.ok = TRUE, any.missing = FALSE) @@ -180,6 +183,7 @@ fetch_args_list <- function( assert_logical(dry_run, null.ok = FALSE, len = 1L, any.missing = TRUE) assert_logical(debug, null.ok = FALSE, len = 1L, any.missing = FALSE) format_type <- match.arg(format_type) + assert_logical(refresh_cache, null.ok = FALSE, len = 1L, any.missing = FALSE) structure( list( @@ -191,7 +195,8 @@ fetch_args_list <- function( base_url = base_url, dry_run = dry_run, debug = debug, - format_type = format_type + format_type = format_type, + refresh_cache = refresh_cache ), class = "fetch_args" ) @@ -219,6 +224,9 @@ print.fetch_args <- function(x, ...) { #' - For `fetch`: a tibble or a JSON-like list #' @export #' @include cache.R +#' @importFrom openssl md5 +#' @importFrom cachem is.key_missing +#' @importFrom tibble tibble as_tibble #' fetch <- function(epidata_call, fetch_args = fetch_args_list()) { stopifnot(inherits(epidata_call, "epidata_call")) @@ -228,48 +236,49 @@ fetch <- function(epidata_call, fetch_args = fetch_args_list()) { epidata_call <- with_base_url(epidata_call, fetch_args$base_url) } + # Just display the epidata_call object, don't fetch the data if (fetch_args$dry_run) { return(epidata_call) } + # Just display the raw response from the API, don't parse if (fetch_args$debug) { return(fetch_debug(epidata_call, fetch_args)) } - cache_epidata_call(epidata_call, fetch_args = fetch_args) -} - -#' Fetches the data and returns a tibble -#' @rdname fetch_tbl -#' -#' @param epidata_call an instance of `epidata_call` -#' @param fetch_args a `fetch_args` object -#' @importFrom readr read_csv -#' @importFrom httr stop_for_status content -#' @importFrom tibble as_tibble tibble -#' @return -#' - For `fetch_tbl`: a [`tibble::tibble`] -#' @keywords internal -fetch_tbl <- function(epidata_call, fetch_args = fetch_args_list()) { - stopifnot(inherits(epidata_call, "epidata_call")) - stopifnot(inherits(fetch_args, "fetch_args")) + # Check if the data is cachable + is_cachable <- check_is_cachable(epidata_call, fetch_args) + if (is_cachable) { + check_for_cache_warnings(epidata_call, fetch_args) - if (epidata_call$only_supports_classic) { - cli::cli_abort( - c( - "This endpoint only supports the classic message format, due to non-standard behavior. - Use fetch_classic instead." - ), - epidata_call = epidata_call, - class = "only_supports_classic_format" - ) + # Check if the data is in the cache + target <- request_url(epidata_call) + hashed <- md5(target) + cached <- cache_environ$epidatr_cache$get(hashed) + if (!is.key_missing(cached)) { + return(cached[[1]]) # extract `fetched` from `fetch()`, no metadata + } } - response_content <- fetch_classic(epidata_call, fetch_args = fetch_args) - if (fetch_args$return_empty && length(response_content) == 0) { - return(tibble()) + # Need to actually get the data, since its either not in the cache or we're not caching + runtime <- system.time(if (epidata_call$only_supports_classic) { + fetch_args[["disable_data_frame_parsing"]] <- TRUE + fetched <- fetch_classic(epidata_call, fetch_args) + } else { + response_content <- fetch_classic(epidata_call, fetch_args = fetch_args) + if (fetch_args$return_empty && length(response_content) == 0) { + fetched <- tibble() + } else { + fetched <- parse_data_frame(epidata_call, response_content, fetch_args$disable_date_parsing) %>% as_tibble() + } + }) + + # Add it to the cache if appropriate + if (is_cachable || (fetch_args$refresh_cache && is_cache_enabled())) { + cache_environ$epidatr_cache$set(hashed, list(fetched, Sys.time(), runtime)) } - return(parse_data_frame(epidata_call, response_content, fetch_args$disable_date_parsing) %>% as_tibble()) + + return(fetched) } #' Fetches the data, raises on epidata errors, and returns the results as a @@ -303,6 +312,7 @@ fetch_classic <- function(epidata_call, fetch_args = fetch_args_list()) { ) } } + if (response_content$message != "success") { cli::cli_warn( c( @@ -311,6 +321,7 @@ fetch_classic <- function(epidata_call, fetch_args = fetch_args_list()) { class = "epidata_warning" ) } + return(response_content$epidata) } diff --git a/R/utils.R b/R/utils.R index 15356734..c74e9492 100644 --- a/R/utils.R +++ b/R/utils.R @@ -30,26 +30,6 @@ check_is_recent <- function(dates, max_age) { (!is.null(dates) && any(dates >= threshold)) } -#' helper that checks whether a call is actually cachable -#' -#' @keywords internal -check_is_cachable <- function(epidata_call, fetch_args) { - as_of_cachable <- (!is.null(epidata_call$params$as_of) && !identical(epidata_call$params$as_of, "*")) - issues_cachable <- (!is.null(epidata_call$params$issues) && all(!identical(epidata_call$params$issues, "*"))) - is_cachable <- ( - !is.null(cache_environ$epidatr_cache) && - (as_of_cachable || issues_cachable) && - !(fetch_args$dry_run) && - is.null(fetch_args$base_url) && - !fetch_args$debug && - fetch_args$format_type == "json" && - is.null(fetch_args$fields) && - !fetch_args$disable_date_parsing && - !fetch_args$disable_data_frame_parsing - ) - return(is_cachable) -} - #' helper to convert a date wildcard ("*") to an appropriate epirange #' #' @keywords internal diff --git a/man/cache_epidata_call.Rd b/man/check_for_cache_warnings.Rd similarity index 55% rename from man/cache_epidata_call.Rd rename to man/check_for_cache_warnings.Rd index 5d3c3f8b..978dbe42 100644 --- a/man/cache_epidata_call.Rd +++ b/man/check_for_cache_warnings.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/cache.R -\name{cache_epidata_call} -\alias{cache_epidata_call} -\title{Dispatch caching} +\name{check_for_cache_warnings} +\alias{check_for_cache_warnings} +\title{Check for warnings for the cache} \usage{ -cache_epidata_call(epidata_call, fetch_args = fetch_args_list()) +check_for_cache_warnings(epidata_call, fetch_args) } \arguments{ \item{epidata_call}{the \code{epidata_call} object} @@ -12,7 +12,6 @@ cache_epidata_call(epidata_call, fetch_args = fetch_args_list()) \item{fetch_args}{the args list for fetch as generated by \code{\link[=fetch_args_list]{fetch_args_list()}}} } \description{ -The guts of caching, its interposed between fetch and the specific fetch -methods. Internal method only. +Adds warnings when arguments are potentially too recent to use with the cache. } \keyword{internal} diff --git a/man/check_is_cachable.Rd b/man/check_is_cachable.Rd index 5238f89c..7b812594 100644 --- a/man/check_is_cachable.Rd +++ b/man/check_is_cachable.Rd @@ -1,12 +1,23 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R +% Please edit documentation in R/cache.R \name{check_is_cachable} \alias{check_is_cachable} -\title{helper that checks whether a call is actually cachable} +\title{Helper that checks whether a call is actually cachable} \usage{ check_is_cachable(epidata_call, fetch_args) } \description{ -helper that checks whether a call is actually cachable +The cacheable endpoints are those with \code{as_of} or \code{issues} parameters: +\itemize{ +\item pub_covidcast +\item pub_covid_hosp_state_timeseries +\item pub_ecdc_ili +\item pub_flusurv +\item pub_fluview_clinical +\item pub_fluview +\item pub_kcdc_ili +\item pub_nidss_flu +\item pub_paho_dengue +} } \keyword{internal} diff --git a/man/clear_cache.Rd b/man/clear_cache.Rd index f5d232b9..c34b0715 100644 --- a/man/clear_cache.Rd +++ b/man/clear_cache.Rd @@ -38,9 +38,9 @@ environment } \description{ Deletes the current cache and resets a new cache. Deletes local data! If you -are using a session unique cache, you will have to pass the arguments you -used for \code{set_cache} earlier, otherwise the system-wide \code{.Renviron}-based -defaults will be used. +are using a session unique cache, the previous settings will be reused. If +you pass in new \code{set_cache} arguments, they will take precedence over the +previous settings. } \seealso{ \code{\link{set_cache}} to start a new cache (and general caching info), diff --git a/man/covidcast_epidata.Rd b/man/covidcast_epidata.Rd index e11c63b1..fbf3f1cc 100644 --- a/man/covidcast_epidata.Rd +++ b/man/covidcast_epidata.Rd @@ -27,7 +27,7 @@ an object containing fields for every signal: \if{html}{\out{