|
23 | 23 | from .pull import pull_gs_data |
24 | 24 |
|
25 | 25 |
|
| 26 | +# pylint: disable=R0912 |
| 27 | +# pylint: disable=R0915 |
26 | 28 | def run_module(params): |
27 | 29 | """ |
28 | 30 | Run Google Symptoms module. |
@@ -57,22 +59,37 @@ def run_module(params): |
57 | 59 | num_export_days = params["indicator"]["num_export_days"] |
58 | 60 |
|
59 | 61 | if num_export_days is None: |
60 | | - # Calculate number of days based on what's missing from the API. |
| 62 | + # Generate a list of signals we expect to produce |
| 63 | + sensor_names = set( |
| 64 | + "_".join([metric, smoother, "search"]) |
| 65 | + for metric, smoother in product(COMBINED_METRIC, SMOOTHERS) |
| 66 | + ) |
| 67 | + |
| 68 | + # Fetch metadata to check how recent each signal is |
61 | 69 | metadata = covidcast.metadata() |
62 | | - gs_metadata = metadata[(metadata.data_source == "google-symptoms")] |
63 | | - |
64 | | - # Calculate number of days based on what validator expects. |
65 | | - max_expected_lag = lag_converter( |
66 | | - params["validation"]["common"].get("max_expected_lag", {"all": 4}) |
67 | | - ) |
68 | | - global_max_expected_lag = max( list(max_expected_lag.values()) ) |
69 | | - |
70 | | - # Select the larger number of days. Prevents validator from complaining about missing dates, |
71 | | - # and backfills in case of an outage. |
72 | | - num_export_days = max( |
73 | | - (datetime.today() - to_datetime(min(gs_metadata.max_time))).days + 1, |
74 | | - params["validation"]["common"].get("span_length", 14) + global_max_expected_lag |
75 | | - ) |
| 70 | + # Filter to only those we currently want to produce, ignore any old or deprecated signals |
| 71 | + gs_metadata = metadata[(metadata.data_source == "google-symptoms") & |
| 72 | + (metadata.signal.isin(sensor_names))] |
| 73 | + |
| 74 | + if sensor_names.difference(set(gs_metadata.signal)): |
| 75 | + # If any signal not in metadata yet, we need to backfill its full history. |
| 76 | + num_export_days = "all" |
| 77 | + else: |
| 78 | + # Calculate number of days based on what's missing from the API and |
| 79 | + # what the validator expects. |
| 80 | + max_expected_lag = lag_converter( |
| 81 | + params["validation"]["common"].get("max_expected_lag", {"all": 4}) |
| 82 | + ) |
| 83 | + global_max_expected_lag = max( list(max_expected_lag.values()) ) |
| 84 | + |
| 85 | + # Select the larger number of days. Prevents validator from complaining about |
| 86 | + # missing dates, and backfills in case of an outage. |
| 87 | + num_export_days = max( |
| 88 | + (datetime.today() - to_datetime(min(gs_metadata.max_time))).days + 1, |
| 89 | + params["validation"]["common"].get("span_length", 14) + global_max_expected_lag |
| 90 | + ) |
| 91 | + if num_export_days == "all": |
| 92 | + num_export_days = (export_end_date - export_start_date).days + 1 |
76 | 93 |
|
77 | 94 | logger = get_structured_logger( |
78 | 95 | __name__, filename=params["common"].get("log_filename"), |
|
0 commit comments