Skip to content

Commit 7423be6

Browse files
committed
Pull dtype into a constant, read ints if you can, and lean on floaty_int if you can't.
1 parent 5d9aee7 commit 7423be6

File tree

1 file changed

+14
-7
lines changed

1 file changed

+14
-7
lines changed

src/acquisition/covidcast/csv_importer.py

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,16 @@ class CsvImporter:
3838
MIN_YEAR = 2019
3939
MAX_YEAR = 2030
4040

41+
DTYPES = {
42+
"geo_id": str,
43+
"val": float,
44+
"se": float,
45+
"sample_size": float,
46+
"missing_val": int,
47+
"missing_se": int,
48+
"missing_sample_size": int
49+
}
50+
4151
# NOTE: this should be a Python 3.7+ `dataclass`, but the server is on 3.4
4252
# See https://docs.python.org/3/library/dataclasses.html
4353
class RowValues:
@@ -183,10 +193,9 @@ def floaty_int(value):
183193
"""
184194

185195
float_value = float(value)
186-
int_value = round(float_value)
187-
if float_value != int_value:
196+
if not float_value.is_integer():
188197
raise ValueError('not an int: "%s"' % str(value))
189-
return int_value
198+
return int(float_value)
190199

191200
@staticmethod
192201
def maybe_apply(func, quantity):
@@ -341,12 +350,10 @@ def load_csv(filepath, geo_type, pandas=pandas):
341350
logger = get_structured_logger('load_csv')
342351

343352
try:
344-
dtypes = {"geo_id": str, "val": float, "se": float, "sample_size": float, "missing_val": float, "missing_se": float, "missing_sample_size": float}
345-
table = pandas.read_csv(filepath, dtype=dtypes)
353+
table = pandas.read_csv(filepath, dtype=CsvImporter.DTYPES)
346354
except ValueError as e:
347355
logger.warning(event='Failed to open CSV with specified dtypes, switching to str', detail=str(e), file=filepath)
348-
dtypes = {"geo_id": str, "val": str, "se": str, "sample_size": str, "missing_val": float, "missing_se": float, "missing_sample_size": float}
349-
table = pandas.read_csv(filepath, dtype=dtypes)
356+
table = pandas.read_csv(filepath, dtype='str')
350357

351358
if not CsvImporter.is_header_valid(table.columns):
352359
logger.warning(event='invalid header', detail=table.columns, file=filepath)

0 commit comments

Comments
 (0)