Skip to content

Commit 57368e9

Browse files
authored
2034 add retry loop to Google symptoms data pull (#2057)
* first implementation * add testing and more robust conditions * revert unneeded change * only retry once and added other applicable error * lint * fixed test * lint
1 parent 3450dfc commit 57368e9

File tree

2 files changed

+61
-6
lines changed

2 files changed

+61
-6
lines changed

google_symptoms/delphi_google_symptoms/pull.py

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
"""Retrieve data and wrangle into appropriate format."""
22
# -*- coding: utf-8 -*-
3+
import random
34
import re
5+
import time
46
from datetime import date, datetime # pylint: disable=unused-import
57

68
import numpy as np
79
import pandas as pd
810
import pandas_gbq
11+
from google.api_core.exceptions import BadRequest, InternalServerError, ServerError
912
from google.oauth2 import service_account
1013

1114
from .constants import COMBINED_METRIC, DC_FIPS, DTYPE_CONVERSIONS, METRICS, SYMPTOM_SETS
@@ -184,16 +187,30 @@ def pull_gs_data_one_geolevel(level, date_range):
184187
pd.DataFrame
185188
"""
186189
query = produce_query(level, date_range)
190+
df = None
191+
192+
# recommends to only try once for 500/503 error
193+
try:
194+
df = pandas_gbq.read_gbq(query, progress_bar_type=None, dtypes=DTYPE_CONVERSIONS)
195+
# pylint: disable=W0703
196+
except Exception as e:
197+
# sometimes google throws out 400 error when it's 500
198+
# https://github.com/googleapis/python-bigquery/issues/23
199+
if (
200+
# pylint: disable=E1101
201+
(isinstance(e, BadRequest) and e.reason == "backendError")
202+
or isinstance(e, (ServerError, InternalServerError))
203+
):
204+
time.sleep(2 + random.randint(0, 1000) / 1000.0)
205+
else:
206+
raise e
207+
if df is None:
208+
df = pandas_gbq.read_gbq(query, progress_bar_type=None, dtypes=DTYPE_CONVERSIONS)
187209

188-
df = pandas_gbq.read_gbq(query, progress_bar_type=None, dtypes = DTYPE_CONVERSIONS)
189210
if len(df) == 0:
190-
df = pd.DataFrame(
191-
columns=["open_covid_region_code", "date"] +
192-
list(colname_map.keys())
193-
)
211+
df = pd.DataFrame(columns=["open_covid_region_code", "date"] + list(colname_map.keys()))
194212

195213
df = preprocess(df, level)
196-
197214
return df
198215

199216

google_symptoms/tests/test_pull.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,10 @@
22
import mock
33
from freezegun import freeze_time
44
from datetime import date, datetime
5+
from google.api_core.exceptions import BadRequest, ServerError
6+
57
import pandas as pd
8+
from google.rpc import error_details_pb2
69
from pandas.testing import assert_frame_equal
710

811
from delphi_google_symptoms.pull import (
@@ -120,6 +123,41 @@ def test_pull_one_gs_no_dates(self, mock_read_gbq):
120123
expected = pd.DataFrame(columns=new_keep_cols)
121124
assert_frame_equal(output, expected, check_dtype = False)
122125

126+
def test_pull_one_gs_retry_success(self):
127+
info = error_details_pb2.ErrorInfo(
128+
reason="backendError",
129+
)
130+
badRequestException = BadRequest(message="message", error_info=info)
131+
serverErrorException = ServerError(message="message")
132+
133+
with mock.patch("pandas_gbq.read_gbq") as mock_read_gbq:
134+
mock_read_gbq.side_effect = [badRequestException, pd.DataFrame()]
135+
136+
output = pull_gs_data_one_geolevel("state", ["", ""])
137+
expected = pd.DataFrame(columns=new_keep_cols)
138+
assert_frame_equal(output, expected, check_dtype = False)
139+
assert mock_read_gbq.call_count == 2
140+
141+
def test_pull_one_gs_retry_too_many(self):
142+
info = error_details_pb2.ErrorInfo(
143+
reason="backendError",
144+
)
145+
badRequestException = BadRequest(message="message", error_info=info)
146+
147+
with mock.patch("pandas_gbq.read_gbq") as mock_read_gbq:
148+
with pytest.raises(BadRequest):
149+
mock_read_gbq.side_effect = [badRequestException, badRequestException, pd.DataFrame()]
150+
pull_gs_data_one_geolevel("state", ["", ""])
151+
152+
153+
def test_pull_one_gs_retry_bad(self):
154+
badRequestException = BadRequest(message="message", )
155+
156+
with mock.patch("pandas_gbq.read_gbq") as mock_read_gbq:
157+
with pytest.raises(BadRequest):
158+
mock_read_gbq.side_effect = [badRequestException,pd.DataFrame()]
159+
pull_gs_data_one_geolevel("state", ["", ""])
160+
123161
def test_preprocess_no_data(self):
124162
output = preprocess(pd.DataFrame(columns=keep_cols), "state")
125163
expected = pd.DataFrame(columns=new_keep_cols)

0 commit comments

Comments
 (0)