Skip to content

Commit 44ee8b2

Browse files
authored
Merge pull request #665 from cmu-delphi/krivard/sigsdash-coverage
Signals dash generator: store a longer time series by using covidcast/coverage endpoint
2 parents eb0f0be + 73182cd commit 44ee8b2

File tree

2 files changed

+21
-68
lines changed

2 files changed

+21
-68
lines changed

src/acquisition/covidcast/signal_dash_data_generator.py

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import pandas as pd
1010

1111
from dataclasses import dataclass
12+
from epiweeks import Week
1213
from typing import List
1314

1415
# first party
@@ -17,7 +18,9 @@
1718
from delphi.epidata.acquisition.covidcast.logger import get_structured_logger
1819

1920

20-
LOOKBACK_DAYS_FOR_COVERAGE = 28
21+
LOOKBACK_DAYS_FOR_COVERAGE = 56
22+
BASE_COVIDCAST = covidcast.covidcast.Epidata.BASE_URL[:-len("api.php")] + "covidcast"
23+
COVERAGE_URL = f"{BASE_COVIDCAST}/coverage?format=csv&signal={{source}}:{{signal}}&days={LOOKBACK_DAYS_FOR_COVERAGE}"
2124

2225
@dataclass
2326
class DashboardSignal:
@@ -195,27 +198,23 @@ def get_latest_time_value_from_metadata(dashboard_signal, metadata):
195198
def get_coverage(dashboard_signal: DashboardSignal,
196199
metadata) -> List[DashboardSignalCoverage]:
197200
"""Get the most recent coverage for the signal."""
198-
latest_time_value = get_latest_time_value_from_metadata(
199-
dashboard_signal, metadata)
200-
start_day = latest_time_value - datetime.timedelta(days = LOOKBACK_DAYS_FOR_COVERAGE)
201-
latest_data = covidcast.signal(
202-
dashboard_signal.source,
203-
dashboard_signal.covidcast_signal,
204-
end_day = latest_time_value,
205-
start_day = start_day)
206-
latest_data_without_megacounties = latest_data[~latest_data['geo_value'].str.endswith(
207-
'000')]
208-
count_by_geo_type_df = latest_data_without_megacounties.groupby(
209-
['geo_type', 'data_source', 'time_value', 'signal']).size().to_frame(
210-
'count').reset_index()
201+
count_by_geo_type_df = pd.read_csv(
202+
COVERAGE_URL.format(source=dashboard_signal.source,
203+
signal=dashboard_signal.covidcast_signal))
204+
try:
205+
count_by_geo_type_df["time_value"] = count_by_geo_type_df["time_value"].apply(
206+
lambda x: pd.to_datetime(str(x), format="%Y%m%d"))
207+
except:
208+
count_by_geo_type_df["time_value"] = count_by_geo_type_df["time_value"].apply(
209+
lambda x: pd.to_datetime(Week(x // 100, x % 100).startdate()))
211210

212211
signal_coverage_list = []
213212

214213
for _, row in count_by_geo_type_df.iterrows():
215214
signal_coverage = DashboardSignalCoverage(
216215
signal_id=dashboard_signal.db_id,
217216
date=row['time_value'].date(),
218-
geo_type=row['geo_type'],
217+
geo_type='county',
219218
count=row['count'])
220219
signal_coverage_list.append(signal_coverage)
221220

tests/acquisition/covidcast/test_signal_dash_data_generator.py

Lines changed: 7 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,8 @@ def test_get_latest_time_value_from_metadata(self):
182182
data_date = get_latest_time_value_from_metadata(signal, metadata)
183183
self.assertEqual(data_date, date(2021, 1, 1))
184184

185-
@patch("covidcast.signal")
185+
#@patch("covidcast.signal")
186+
@patch("pandas.read_csv")
186187
def test_get_coverage(self, mock_signal):
187188
signal = DashboardSignal(
188189
db_id=1, name="Change", source="chng",
@@ -198,18 +199,16 @@ def test_get_coverage(self, mock_signal):
198199
'signal'])
199200

200201
epidata_data = [
201-
['chng', 'chng-sig', pd.Timestamp("2020-01-01"), "state", "PA"],
202-
['chng', 'chng-sig', pd.Timestamp("2020-01-01"), "state", "NY"],
203-
['chng', 'chng-sig', pd.Timestamp("2020-01-02"), "state", "NY"],
202+
['chng', 'chng-sig', 20200101, 2],
203+
['chng', 'chng-sig', 20200102, 1],
204204
]
205205
epidata_df = pd.DataFrame(
206206
epidata_data,
207207
columns=[
208-
'data_source',
208+
'source',
209209
'signal',
210210
'time_value',
211-
'geo_type',
212-
'geo_value'])
211+
'count'])
213212

214213
mock_signal.return_value = epidata_df
215214

@@ -222,59 +221,14 @@ def test_get_coverage(self, mock_signal):
222221
2020,
223222
1,
224223
1),
225-
geo_type='state',
224+
geo_type='county',
226225
count=2),
227226
DashboardSignalCoverage(
228227
signal_id=1,
229228
date=date(
230229
2020,
231230
1,
232231
2),
233-
geo_type='state',
234-
count=1),
235-
]
236-
237-
self.assertListEqual(coverage, expected_coverage)
238-
239-
@patch("covidcast.signal")
240-
def test_get_coverage_megacounties_dropped(self, mock_signal):
241-
signal = DashboardSignal(
242-
db_id=1, name="Change", source="chng",
243-
covidcast_signal="chng-sig",
244-
latest_coverage_update=date(2021, 1, 1),
245-
latest_status_update=date(2021, 1, 1))
246-
data = [['chng', pd.Timestamp("2020-01-01"), "chng-sig"]]
247-
metadata = pd.DataFrame(
248-
data,
249-
columns=[
250-
'data_source',
251-
'max_time',
252-
'signal'])
253-
254-
epidata_data = [
255-
['chng', 'chng-sig', pd.Timestamp("2020-01-01"), "county", "11111"],
256-
['chng', 'chng-sig', pd.Timestamp("2020-01-01"), "county", "10000"],
257-
]
258-
epidata_df = pd.DataFrame(
259-
epidata_data,
260-
columns=[
261-
'data_source',
262-
'signal',
263-
'time_value',
264-
'geo_type',
265-
'geo_value'])
266-
267-
mock_signal.return_value = epidata_df
268-
269-
coverage = get_coverage(signal, metadata)
270-
271-
expected_coverage = [
272-
DashboardSignalCoverage(
273-
signal_id=1,
274-
date=date(
275-
2020,
276-
1,
277-
1),
278232
geo_type='county',
279233
count=1),
280234
]

0 commit comments

Comments
 (0)