Skip to content

Commit 9641f02

Browse files
committed
Update functions to combine tables
1 parent 48a1b5a commit 9641f02

File tree

1 file changed

+38
-23
lines changed

1 file changed

+38
-23
lines changed

src/acquisition/rvdss/utils.py

Lines changed: 38 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -246,38 +246,55 @@ def get_detections_data(base_url,headers,update_date):
246246

247247
def expand_detections_columns(new_data):
248248
# add extra columns - percent positivities
249-
new_data["adv_pct_positive"] = new_data["adv_positive_tests"]/new_data["adv_tests"]*100
250-
new_data["evrv_pct_positive"] = new_data["evrv_positive_tests"]/new_data["evrv_tests"]*100
249+
if "adv_positive_tests" in new_data.columns and "adv_tests" in new_data.columns:
250+
new_data["adv_pct_positive"] = new_data["adv_positive_tests"]/new_data["adv_tests"]*100
251+
252+
if "evrv_positive_tests" in new_data.columns and "evrv_tests" in new_data.columns:
253+
new_data["evrv_pct_positive"] = new_data["evrv_positive_tests"]/new_data["evrv_tests"]*100
251254

252-
if "flu_positive_tests" in new_data.columns:
255+
if "flu_positive_tests" in new_data.columns and "flu_tests" in new_data.columns:
253256
new_data["flu_pct_positive"] = new_data["flu_positive_tests"]/new_data["flu_tests"]*100
254257

255-
if "sarscov2_positive_tests" in new_data.columns:
258+
if "sarscov2_positive_tests" in new_data.columns and "sarscov2_tests" in new_data.columns:
256259
new_data["sarscov2_pct_positive"] = new_data["sarscov2_positive_tests"]/new_data["sarscov2_tests"]*100
257260

258-
new_data["flua_tests"] = new_data["flu_tests"]
259-
new_data["flub_tests"] = new_data["flu_tests"]
260-
new_data["flua_pct_positive"] = new_data["flua_positive_tests"]/new_data["flu_tests"]*100
261-
new_data["flub_pct_positive"] = new_data["flub_positive_tests"]/new_data["flu_tests"]*100
262-
263-
new_data["hcov_pct_positive"] = new_data["hcov_positive_tests"]/new_data["hcov_tests"]*100
264-
new_data["hmpv_pct_positive"] = new_data["hmpv_positive_tests"]/new_data["hmpv_tests"]*100
265-
new_data["rsv_pct_positive"] = new_data["rsv_positive_tests"]/new_data["rsv_tests"]*100
261+
if "flua_positive_tests" in new_data.columns and "flub_positive_tests" in new_data.columns:
262+
new_data["flua_tests"] = new_data["flu_tests"]
263+
new_data["flub_tests"] = new_data["flu_tests"]
264+
new_data["flua_pct_positive"] = new_data["flua_positive_tests"]/new_data["flu_tests"]*100
265+
new_data["flub_pct_positive"] = new_data["flub_positive_tests"]/new_data["flu_tests"]*100
266+
267+
if "hcov_positive_tests" in new_data.columns and "hcov_tests" in new_data.columns:
268+
new_data["hcov_pct_positive"] = new_data["hcov_positive_tests"]/new_data["hcov_tests"]*100
269+
270+
if "hmpv_positive_tests" in new_data.columns and "hmpv_tests" in new_data.columns:
271+
new_data["hmpv_pct_positive"] = new_data["hmpv_positive_tests"]/new_data["hmpv_tests"]*100
272+
273+
if "rsv_positive_tests" in new_data.columns and "rsv_tests" in new_data.columns:
274+
new_data["rsv_pct_positive"] = new_data["rsv_positive_tests"]/new_data["rsv_tests"]*100
266275

267-
new_data["hpiv_positive_tests"] = new_data["hpiv1_positive_tests"] + new_data["hpiv2_positive_tests"]+ new_data["hpiv3_positive_tests"]+new_data["hpiv4_positive_tests"]+new_data["hpivother_positive_tests"]
268-
new_data["hpiv_pct_positive"] = new_data["hpiv_positive_tests"]/new_data["hpiv_tests"]*100
276+
if "hpiv1_positive_tests" in new_data.columns and "hpiv_tests" in new_data.columns:
277+
new_data["hpiv_positive_tests"] = new_data["hpiv1_positive_tests"] + new_data["hpiv2_positive_tests"]+ new_data["hpiv3_positive_tests"]+new_data["hpiv4_positive_tests"]+new_data["hpivother_positive_tests"]
278+
new_data["hpiv_pct_positive"] = new_data["hpiv_positive_tests"]/new_data["hpiv_tests"]*100
269279

270280
return(new_data.set_index(['epiweek', 'time_value', 'issue', 'geo_type', 'geo_value']))
271281

272282
def duplicate_provincial_detections(data):
273-
dat = data.reset_index()
283+
dat = data.copy(deep=True)
284+
dat = dat.reset_index()
274285

275286
# provincial data
276-
provincial_detections = dat[dat['geo_value'].isin(PROVINCES)]
277-
provincial_detections['geo_type']="province"
287+
provincial_detections = dat.loc[dat['geo_value'].isin(PROVINCES)]
278288

279-
new_data = pd.concat([data,provincial_detections])
280-
return(new_data.set_index(['epiweek', 'time_value', 'issue', 'geo_type', 'geo_value']))
289+
if not provincial_detections.empty:
290+
provincial_detections['geo_type']="province"
291+
292+
provincial_detections = provincial_detections.set_index(['epiweek', 'time_value', 'issue', 'geo_type', 'geo_value'])
293+
new_data = pd.concat([data,provincial_detections])
294+
else:
295+
new_data = dat.set_index(['epiweek', 'time_value', 'issue', 'geo_type', 'geo_value'])
296+
297+
return(new_data)
281298

282299
def combine_tables(data_dict):
283300
num_tables = len(data_dict)
@@ -301,21 +318,19 @@ def combine_tables(data_dict):
301318
t[col] = np.where(t[colx].isnull(), t[coly], t[colx])
302319
t = t.drop([colx, coly],axis=1)
303320

304-
provincial_detections = duplicate_provincial_detections(t)
305-
combined_table = pd.concat([t,provincial_detections])
321+
table = duplicate_provincial_detections(t)
306322

307323
else:
308324
raise ValueError("Unexpected number of tables")
309325

310-
return(combined_table)
326+
return(table)
311327

312328
def fetch_archived_dashboard_data(url):
313329
headers = {
314330
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36'
315331
}
316332

317333
update_date = get_dashboard_update_date(url, headers)
318-
319334
detections_data = get_detections_data(url,headers,update_date)
320335
positive_data = get_positive_data(url,headers,update_date)
321336

0 commit comments

Comments
 (0)