Skip to content

Commit dbea6ad

Browse files
authored
Merge pull request #1416 from cmu-delphi/7dav_new_geo_ids
7dav new geo ids
2 parents 03e1d65 + a96d888 commit dbea6ad

File tree

4 files changed

+61
-17
lines changed

4 files changed

+61
-17
lines changed

_delphi_utils_python/delphi_utils/validator/dynamic.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,8 @@ def validate(self, all_frames, report):
110110
self.check_max_allowed_max_date(
111111
max_date, geo_type, signal_type, report)
112112

113+
self.check_na_vals(geo_sig_df, geo_type, signal_type, report)
114+
113115
# Get relevant reference data from API dictionary.
114116
api_df_or_error = all_api_df[(geo_type, signal_type)]
115117

@@ -168,6 +170,43 @@ def validate(self, all_frames, report):
168170
if self.test_mode and kroc == 2:
169171
break
170172

173+
def check_na_vals(self, geo_sig_df, geo_type, signal_type, report):
174+
"""Check if there are any NA values.
175+
176+
In particular, make sure that error doesn't occur for new Geo IDs introduced.
177+
178+
Arguments:
179+
- geo_type: str; geo type name (county, msa, hrr, state) as in the CSV name
180+
- signal_type: str; signal name as in the CSV name
181+
- report: ValidationReport; report where results are added
182+
183+
Returns:
184+
- None
185+
"""
186+
def replace_first_six(df, start_date):
187+
x = df.val.isnull()
188+
# First 6 days have to be null
189+
x.iloc[:6] = False
190+
df = df[x]
191+
return df.time_value[df.time_value >= start_date]
192+
193+
grouped_df = geo_sig_df.groupby('geo_id')
194+
error_df = grouped_df.apply(replace_first_six,
195+
start_date = self.params.time_window.start_date)
196+
197+
if not error_df.empty:
198+
for index, value in error_df.iteritems():
199+
report.add_raised_error(
200+
ValidationFailure("check_val_missing",
201+
geo_type=geo_type,
202+
signal=signal_type,
203+
date=value,
204+
message=f"geo_id {index[0]}"
205+
)
206+
)
207+
208+
report.increment_total_checks()
209+
171210
def check_min_allowed_max_date(self, max_date, geo_type, signal_type, report):
172211
"""Check if time since data was generated is reasonable or too long ago.
173212

_delphi_utils_python/delphi_utils/validator/static.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -295,14 +295,6 @@ def check_bad_val(self, df_to_test, nameformat, signal_type, report):
295295

296296
report.increment_total_checks()
297297

298-
if df_to_test['val'].isnull().values.any():
299-
report.add_raised_error(
300-
ValidationFailure("check_val_missing",
301-
filename=nameformat,
302-
message="val column can't have any cell that is NA"))
303-
304-
report.increment_total_checks()
305-
306298
if not df_to_test[(df_to_test['val'] < 0)].empty:
307299
report.add_raised_error(
308300
ValidationFailure("check_val_lt_0",

_delphi_utils_python/tests/validator/test_dynamic.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,28 @@ def test_0_vs_many(self):
106106
assert len(report.raised_errors) == 1
107107
assert report.raised_errors[0].check_name == "check_rapid_change_num_rows"
108108

109+
class TestCheckNaVals:
110+
params = {
111+
"common": {
112+
"data_source": "",
113+
"span_length": 14,
114+
"end_date": "2020-09-02"
115+
}
116+
}
117+
def test_missing(self):
118+
validator = DynamicValidator(self.params)
119+
report = ValidationReport([])
120+
data = {"val": [np.nan] * 15, "geo_id": [0,1] * 7 + [2],
121+
"time_value": ["2021-08-30"] * 14 + ["2021-05-01"]}
122+
df = pd.DataFrame(data)
123+
df.time_value = (pd.to_datetime(df.time_value)).dt.date
124+
validator.check_na_vals(df, "geo", "signal", report)
125+
126+
assert len(report.raised_errors) == 2
127+
assert report.raised_errors[0].check_name == "check_val_missing"
128+
assert report.raised_errors[0].message == "geo_id 0"
129+
assert report.raised_errors[1].check_name == "check_val_missing"
130+
assert report.raised_errors[1].message == "geo_id 1"
109131

110132
class TestCheckAvgValDiffs:
111133
params = {

_delphi_utils_python/tests/validator/test_static.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -362,15 +362,6 @@ def test_empty_df(self):
362362

363363
assert len(report.raised_errors) == 0
364364

365-
def test_missing(self):
366-
validator = StaticValidator(self.params)
367-
report = ValidationReport([])
368-
df = pd.DataFrame([np.nan], columns=["val"])
369-
validator.check_bad_val(df, FILENAME, "signal", report)
370-
371-
assert len(report.raised_errors) == 1
372-
assert report.raised_errors[0].check_name == "check_val_missing"
373-
374365
def test_lt_0(self):
375366
validator = StaticValidator(self.params)
376367
report = ValidationReport([])

0 commit comments

Comments
 (0)