From 1098180ac63e1e60ffdf64138f850aac571fb6da Mon Sep 17 00:00:00 2001 From: QX Teo <37101453+qx-teo@users.noreply.github.com> Date: Fri, 10 Dec 2021 05:20:34 -0500 Subject: [PATCH 01/18] Update check_missing_values Moved check_val_missing to dynamic checks. Increased granularity, now informs for each geo_id+date combination for missing values Ignores first 6 days of the checking period since 7dav signals return NAs as part of smoothing --- .../delphi_utils/validator/dynamic.py | 36 +++++++++++++++++++ .../delphi_utils/validator/static.py | 8 ----- 2 files changed, 36 insertions(+), 8 deletions(-) diff --git a/_delphi_utils_python/delphi_utils/validator/dynamic.py b/_delphi_utils_python/delphi_utils/validator/dynamic.py index b81a445d1..82c2697c3 100644 --- a/_delphi_utils_python/delphi_utils/validator/dynamic.py +++ b/_delphi_utils_python/delphi_utils/validator/dynamic.py @@ -110,6 +110,8 @@ def validate(self, all_frames, report): self.check_max_allowed_max_date( max_date, geo_type, signal_type, report) + self.check_na_vals(geo_sig_df, geo_type, signal_type, report) + # Get relevant reference data from API dictionary. api_df_or_error = all_api_df[(geo_type, signal_type)] @@ -168,6 +170,40 @@ def validate(self, all_frames, report): if self.test_mode and kroc == 2: break + def check_na_vals(self, geo_sig_df, geo_type, signal_type, report): + """Check if there are any NA values. + + In particular, make sure that error doesn't occur for new Geo IDs introduced. + + Arguments: + - geo_type: str; geo type name (county, msa, hrr, state) as in the CSV name + - signal_type: str; signal name as in the CSV name + - report: ValidationReport; report where results are added + + Returns: + - None + """ + def replace_first_six(df): + x = df.val.isnull() + # First 6 days have to be null + x.iloc[:6] = False + return df.time_value[x] + + grouped_df = geo_sig_df.groupby('geo_id') + error_df = grouped_df.apply(replace_first_six) + + if not error_df.empty: + for index, value in error_df.iteritems(): + report.add_raised_error( + ValidationFailure(f"check_val_missing (geo_id {index[0]})", + geo_type=geo_type, + signal=signal_type, + date=value + ) + ) + + report.increment_total_checks() + def check_min_allowed_max_date(self, max_date, geo_type, signal_type, report): """Check if time since data was generated is reasonable or too long ago. diff --git a/_delphi_utils_python/delphi_utils/validator/static.py b/_delphi_utils_python/delphi_utils/validator/static.py index 767b5761c..48b17b888 100644 --- a/_delphi_utils_python/delphi_utils/validator/static.py +++ b/_delphi_utils_python/delphi_utils/validator/static.py @@ -295,14 +295,6 @@ def check_bad_val(self, df_to_test, nameformat, signal_type, report): report.increment_total_checks() - if df_to_test['val'].isnull().values.any(): - report.add_raised_error( - ValidationFailure("check_val_missing", - filename=nameformat, - message="val column can't have any cell that is NA")) - - report.increment_total_checks() - if not df_to_test[(df_to_test['val'] < 0)].empty: report.add_raised_error( ValidationFailure("check_val_lt_0", From 39891310d62a58d2214f7e9fa2cdbdaefa928413 Mon Sep 17 00:00:00 2001 From: QX Teo <37101453+qx-teo@users.noreply.github.com> Date: Fri, 10 Dec 2021 05:21:33 -0500 Subject: [PATCH 02/18] Fix NA_Values Testing Move check_na_values to test_dynamic, changed test assertions to fit --- .../tests/validator/test_dynamic.py | 20 ++++++++++++++++++- .../tests/validator/test_static.py | 9 --------- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/_delphi_utils_python/tests/validator/test_dynamic.py b/_delphi_utils_python/tests/validator/test_dynamic.py index 321ce63fb..ddef07579 100644 --- a/_delphi_utils_python/tests/validator/test_dynamic.py +++ b/_delphi_utils_python/tests/validator/test_dynamic.py @@ -105,7 +105,25 @@ def test_0_vs_many(self): assert len(report.raised_errors) == 1 assert report.raised_errors[0].check_name == "check_rapid_change_num_rows" - +class TestCheckNaVals: + params = { + "common": { + "data_source": "", + "span_length": 1, + "end_date": "2020-09-02" + } + } + def test_missing(self): + validator = DynamicValidator(self.params) + report = ValidationReport([]) + data = {"val": [np.nan] * 14, "geo_id": [0,1] * 7, "time_value": ["2021-09-01"] * 14 } + df = pd.DataFrame(data) + #df.set_index(range(7), inplace=True) + validator.check_na_vals(df, "geo", "signal", report) + + assert len(report.raised_errors) == 2 + assert report.raised_errors[0].check_name == "check_val_missing (geo_id 0)" + assert report.raised_errors[1].check_name == "check_val_missing (geo_id 1)" class TestCheckAvgValDiffs: params = { diff --git a/_delphi_utils_python/tests/validator/test_static.py b/_delphi_utils_python/tests/validator/test_static.py index 09286ba9c..bf270b4fd 100644 --- a/_delphi_utils_python/tests/validator/test_static.py +++ b/_delphi_utils_python/tests/validator/test_static.py @@ -362,15 +362,6 @@ def test_empty_df(self): assert len(report.raised_errors) == 0 - def test_missing(self): - validator = StaticValidator(self.params) - report = ValidationReport([]) - df = pd.DataFrame([np.nan], columns=["val"]) - validator.check_bad_val(df, FILENAME, "signal", report) - - assert len(report.raised_errors) == 1 - assert report.raised_errors[0].check_name == "check_val_missing" - def test_lt_0(self): validator = StaticValidator(self.params) report = ValidationReport([]) From 78af92f988c192ff17e77dd7234475d28e510520 Mon Sep 17 00:00:00 2001 From: QX Teo <37101453+qx-teo@users.noreply.github.com> Date: Wed, 19 Jan 2022 17:13:52 -0500 Subject: [PATCH 03/18] Clarify error messaging Keeping error_name as check_val_missing for easier processing, shifted relavant info to message --- _delphi_utils_python/delphi_utils/validator/dynamic.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/_delphi_utils_python/delphi_utils/validator/dynamic.py b/_delphi_utils_python/delphi_utils/validator/dynamic.py index 82c2697c3..68fc6b87e 100644 --- a/_delphi_utils_python/delphi_utils/validator/dynamic.py +++ b/_delphi_utils_python/delphi_utils/validator/dynamic.py @@ -195,10 +195,11 @@ def replace_first_six(df): if not error_df.empty: for index, value in error_df.iteritems(): report.add_raised_error( - ValidationFailure(f"check_val_missing (geo_id {index[0]})", + ValidationFailure("check_val_missing", geo_type=geo_type, signal=signal_type, - date=value + date=value, + message=f"geo_id {index[0]}" ) ) From 29c84d2ce9e09b4fec8d7ff05d87422ef7967826 Mon Sep 17 00:00:00 2001 From: QX Teo <37101453+qx-teo@users.noreply.github.com> Date: Wed, 19 Jan 2022 17:15:05 -0500 Subject: [PATCH 04/18] Improved testing Added na value outside of checking range to test for filtering --- _delphi_utils_python/tests/validator/test_dynamic.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/_delphi_utils_python/tests/validator/test_dynamic.py b/_delphi_utils_python/tests/validator/test_dynamic.py index ddef07579..133b8f28d 100644 --- a/_delphi_utils_python/tests/validator/test_dynamic.py +++ b/_delphi_utils_python/tests/validator/test_dynamic.py @@ -105,25 +105,29 @@ def test_0_vs_many(self): assert len(report.raised_errors) == 1 assert report.raised_errors[0].check_name == "check_rapid_change_num_rows" + class TestCheckNaVals: params = { "common": { "data_source": "", - "span_length": 1, + "span_length": 14, "end_date": "2020-09-02" } } def test_missing(self): validator = DynamicValidator(self.params) report = ValidationReport([]) - data = {"val": [np.nan] * 14, "geo_id": [0,1] * 7, "time_value": ["2021-09-01"] * 14 } + data = {"val": [np.nan] * 15, "geo_id": [0,1] * 7 + [2], + "time_value": ["2021-08-30"] * 14 + ["2021-05-01"]} df = pd.DataFrame(data) #df.set_index(range(7), inplace=True) validator.check_na_vals(df, "geo", "signal", report) assert len(report.raised_errors) == 2 - assert report.raised_errors[0].check_name == "check_val_missing (geo_id 0)" - assert report.raised_errors[1].check_name == "check_val_missing (geo_id 1)" + assert report.raised_errors[0].check_name == "check_val_missing" + assert report.raised_errors[0].message == "geo_id 0" + assert report.raised_errors[1].check_name == "check_val_missing" + assert report.raised_errors[1].message == "geo_id 1" class TestCheckAvgValDiffs: params = { From 13a253880e3a17b714c574b650e13f69d16a9a10 Mon Sep 17 00:00:00 2001 From: QX Teo <37101453+qx-teo@users.noreply.github.com> Date: Wed, 19 Jan 2022 17:43:09 -0500 Subject: [PATCH 05/18] Limit check_na_vals to checking window Apply filter to df to only consider dates within the checking window --- _delphi_utils_python/delphi_utils/validator/dynamic.py | 1 + _delphi_utils_python/delphi_utils/validator/validate.py | 2 +- _delphi_utils_python/tests/validator/test_dynamic.py | 2 ++ 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/_delphi_utils_python/delphi_utils/validator/dynamic.py b/_delphi_utils_python/delphi_utils/validator/dynamic.py index 68fc6b87e..4183e65f7 100644 --- a/_delphi_utils_python/delphi_utils/validator/dynamic.py +++ b/_delphi_utils_python/delphi_utils/validator/dynamic.py @@ -191,6 +191,7 @@ def replace_first_six(df): grouped_df = geo_sig_df.groupby('geo_id') error_df = grouped_df.apply(replace_first_six) + error_df = error_df[error_df.time_value >= self.params.time_window.start_date] if not error_df.empty: for index, value in error_df.iteritems(): diff --git a/_delphi_utils_python/delphi_utils/validator/validate.py b/_delphi_utils_python/delphi_utils/validator/validate.py index 9c4861b76..1adf0f41b 100644 --- a/_delphi_utils_python/delphi_utils/validator/validate.py +++ b/_delphi_utils_python/delphi_utils/validator/validate.py @@ -57,7 +57,7 @@ def validate(self): report = ValidationReport(self.suppressed_errors, self.data_source, self.dry_run) frames_list = load_all_files(self.export_dir, self.time_window.start_date, self.time_window.end_date) - self.static_validation.validate(frames_list, report) + #self.static_validation.validate(frames_list, report) all_frames = aggregate_frames(frames_list) self.dynamic_validation.validate(all_frames, report) return report diff --git a/_delphi_utils_python/tests/validator/test_dynamic.py b/_delphi_utils_python/tests/validator/test_dynamic.py index 133b8f28d..b2adf0f28 100644 --- a/_delphi_utils_python/tests/validator/test_dynamic.py +++ b/_delphi_utils_python/tests/validator/test_dynamic.py @@ -122,6 +122,8 @@ def test_missing(self): df = pd.DataFrame(data) #df.set_index(range(7), inplace=True) validator.check_na_vals(df, "geo", "signal", report) + import pdb + pdb.set_trace() assert len(report.raised_errors) == 2 assert report.raised_errors[0].check_name == "check_val_missing" From 91d6decf948497fd99cdb5fd0cf0d70110553f68 Mon Sep 17 00:00:00 2001 From: QX Teo <37101453+qx-teo@users.noreply.github.com> Date: Thu, 20 Jan 2022 00:55:58 -0500 Subject: [PATCH 06/18] Prevent filtering on empty df --- _delphi_utils_python/delphi_utils/validator/dynamic.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/_delphi_utils_python/delphi_utils/validator/dynamic.py b/_delphi_utils_python/delphi_utils/validator/dynamic.py index 4183e65f7..85ecd225e 100644 --- a/_delphi_utils_python/delphi_utils/validator/dynamic.py +++ b/_delphi_utils_python/delphi_utils/validator/dynamic.py @@ -191,9 +191,10 @@ def replace_first_six(df): grouped_df = geo_sig_df.groupby('geo_id') error_df = grouped_df.apply(replace_first_six) - error_df = error_df[error_df.time_value >= self.params.time_window.start_date] if not error_df.empty: + error_df = error_df[error_df.time_value >= + self.params.time_window.start_date] for index, value in error_df.iteritems(): report.add_raised_error( ValidationFailure("check_val_missing", From 7bbd515886d7a8b676d28d5cc5a08fd5c90a76bb Mon Sep 17 00:00:00 2001 From: QX Teo <37101453+qx-teo@users.noreply.github.com> Date: Thu, 20 Jan 2022 01:14:10 -0500 Subject: [PATCH 07/18] Fix lint --- _delphi_utils_python/delphi_utils/validator/dynamic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_delphi_utils_python/delphi_utils/validator/dynamic.py b/_delphi_utils_python/delphi_utils/validator/dynamic.py index 85ecd225e..0e12a1df8 100644 --- a/_delphi_utils_python/delphi_utils/validator/dynamic.py +++ b/_delphi_utils_python/delphi_utils/validator/dynamic.py @@ -193,7 +193,7 @@ def replace_first_six(df): error_df = grouped_df.apply(replace_first_six) if not error_df.empty: - error_df = error_df[error_df.time_value >= + error_df = error_df[error_df.time_value >= self.params.time_window.start_date] for index, value in error_df.iteritems(): report.add_raised_error( From 8e188158db176c3a5bbb28afff3b6d0fa5e8d7ef Mon Sep 17 00:00:00 2001 From: QX Teo <37101453+qx-teo@users.noreply.github.com> Date: Thu, 20 Jan 2022 01:53:36 -0500 Subject: [PATCH 08/18] Fix test Test dataframe had time_value in the wrong type, should be datetime.date --- _delphi_utils_python/delphi_utils/validator/dynamic.py | 10 +++++----- .../delphi_utils/validator/validate.py | 2 +- _delphi_utils_python/tests/validator/test_dynamic.py | 4 +--- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/_delphi_utils_python/delphi_utils/validator/dynamic.py b/_delphi_utils_python/delphi_utils/validator/dynamic.py index 0e12a1df8..82bfd869e 100644 --- a/_delphi_utils_python/delphi_utils/validator/dynamic.py +++ b/_delphi_utils_python/delphi_utils/validator/dynamic.py @@ -183,18 +183,18 @@ def check_na_vals(self, geo_sig_df, geo_type, signal_type, report): Returns: - None """ - def replace_first_six(df): + def replace_first_six(df, start_date): x = df.val.isnull() # First 6 days have to be null x.iloc[:6] = False - return df.time_value[x] + df = df[x] + return df.time_value[df.time_value >= start_date] grouped_df = geo_sig_df.groupby('geo_id') - error_df = grouped_df.apply(replace_first_six) + error_df = grouped_df.apply(replace_first_six, + start_date = self.params.time_window.start_date) if not error_df.empty: - error_df = error_df[error_df.time_value >= - self.params.time_window.start_date] for index, value in error_df.iteritems(): report.add_raised_error( ValidationFailure("check_val_missing", diff --git a/_delphi_utils_python/delphi_utils/validator/validate.py b/_delphi_utils_python/delphi_utils/validator/validate.py index 1adf0f41b..9c4861b76 100644 --- a/_delphi_utils_python/delphi_utils/validator/validate.py +++ b/_delphi_utils_python/delphi_utils/validator/validate.py @@ -57,7 +57,7 @@ def validate(self): report = ValidationReport(self.suppressed_errors, self.data_source, self.dry_run) frames_list = load_all_files(self.export_dir, self.time_window.start_date, self.time_window.end_date) - #self.static_validation.validate(frames_list, report) + self.static_validation.validate(frames_list, report) all_frames = aggregate_frames(frames_list) self.dynamic_validation.validate(all_frames, report) return report diff --git a/_delphi_utils_python/tests/validator/test_dynamic.py b/_delphi_utils_python/tests/validator/test_dynamic.py index b2adf0f28..1f0348315 100644 --- a/_delphi_utils_python/tests/validator/test_dynamic.py +++ b/_delphi_utils_python/tests/validator/test_dynamic.py @@ -120,10 +120,8 @@ def test_missing(self): data = {"val": [np.nan] * 15, "geo_id": [0,1] * 7 + [2], "time_value": ["2021-08-30"] * 14 + ["2021-05-01"]} df = pd.DataFrame(data) - #df.set_index(range(7), inplace=True) + df.time_value = (pd.to_datetime(df.time_value)).dt.date validator.check_na_vals(df, "geo", "signal", report) - import pdb - pdb.set_trace() assert len(report.raised_errors) == 2 assert report.raised_errors[0].check_name == "check_val_missing" From 9ca21f925efd087e143d4463e7623ec83323f255 Mon Sep 17 00:00:00 2001 From: Kathryn M Mazaitis Date: Mon, 31 Jan 2022 13:34:13 -0500 Subject: [PATCH 09/18] Flip order of filename and asset id Make it easier for a human to browse the input_cache folder --- .../delphi_dsew_community_profile/pull.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/dsew_community_profile/delphi_dsew_community_profile/pull.py b/dsew_community_profile/delphi_dsew_community_profile/pull.py index a65b26a07..39dcd0c7d 100644 --- a/dsew_community_profile/delphi_dsew_community_profile/pull.py +++ b/dsew_community_profile/delphi_dsew_community_profile/pull.py @@ -276,9 +276,13 @@ def _parse_sheet(self, sheet): def as_cached_filename(params, config): """Formulate a filename to uniquely identify this report in the input cache.""" + # eg "Community Profile Report 20220128.xlsx" + # but delimiters vary; don't get tripped up if they do something wacky like + # Community.Profile.Report.20220128.xlsx + name, _, ext = config['filename'].rpartition(".") return os.path.join( params['indicator']['input_cache'], - f"{config['assetId']}--{config['filename']}" + f"{name}--{config['assetId']}.{ext}" ) def fetch_listing(params): From 36820c44259f93a692a142782b8918ee060aa435 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Mon, 31 Jan 2022 17:29:32 -0500 Subject: [PATCH 10/18] initial add prop signals finish prop generation --- .../constants.py | 18 +++++-- .../delphi_dsew_community_profile/pull.py | 48 ++++++++++++++++--- .../delphi_dsew_community_profile/run.py | 7 +-- 3 files changed, 58 insertions(+), 15 deletions(-) diff --git a/dsew_community_profile/delphi_dsew_community_profile/constants.py b/dsew_community_profile/delphi_dsew_community_profile/constants.py index 51c62b5ea..7286398b4 100644 --- a/dsew_community_profile/delphi_dsew_community_profile/constants.py +++ b/dsew_community_profile/delphi_dsew_community_profile/constants.py @@ -50,22 +50,30 @@ class Transform: SIGNALS = { "total": { "is_rate" : False, - "api_name": "naats_total_7dav" + "api_name": "naats_total_7dav", + "make_prop": False }, "positivity": { "is_rate" : True, - "api_name": "naats_positivity_7dav" + "api_name": "naats_positivity_7dav", + "make_prop": False }, "confirmed covid-19 admissions": { "is_rate" : False, - "api_name": "confirmed_admissions_covid_1d_7dav" + "api_name": "confirmed_admissions_covid_1d_7dav", + "make_prop": True, + "api_prop_name": "confirmed_admissions_covid_1d_prop_7dav" } } COUNTS_7D_SIGNALS = {key for key, value in SIGNALS.items() if not value["is_rate"]} -def make_signal_name(key): +def make_signal_name(key, is_prop=False): """Convert a signal key to the corresponding signal name for the API.""" + if is_prop: + return SIGNALS[key]["api_prop_name"] return SIGNALS[key]["api_name"] -NEWLINE="\n" +NEWLINE = "\n" +NOT_PROP = "not prop" +IS_PROP = "prop" diff --git a/dsew_community_profile/delphi_dsew_community_profile/pull.py b/dsew_community_profile/delphi_dsew_community_profile/pull.py index a65b26a07..416310b79 100644 --- a/dsew_community_profile/delphi_dsew_community_profile/pull.py +++ b/dsew_community_profile/delphi_dsew_community_profile/pull.py @@ -11,8 +11,9 @@ from delphi_utils.geomap import GeoMapper -from .constants import TRANSFORMS, SIGNALS, COUNTS_7D_SIGNALS, NEWLINE -from .constants import DOWNLOAD_ATTACHMENT, DOWNLOAD_LISTING +from .constants import (TRANSFORMS, SIGNALS, COUNTS_7D_SIGNALS, NEWLINE, + IS_PROP, NOT_PROP, + DOWNLOAD_ATTACHMENT, DOWNLOAD_LISTING) # YYYYMMDD # example: "Community Profile Report 20211104.xlsx" @@ -248,7 +249,7 @@ def _parse_sheet(self, sheet): if (sheet.level == "msa" or sheet.level == "county") \ and self.publish_date < datetime.date(2021, 1, 8) \ and sig == "confirmed covid-19 admissions": - self.dfs[(sheet.level, sig)] = pd.DataFrame( + self.dfs[(sheet.level, sig, NOT_PROP)] = pd.DataFrame( columns = ["geo_id", "timestamp", "val", \ "se", "sample_size", "publish_date"] ) @@ -258,7 +259,7 @@ def _parse_sheet(self, sheet): assert len(sig_select) > 0, \ f"No {sig} in any of {select}\n\nAll headers:\n{NEWLINE.join(list(df.columns))}" - self.dfs[(sheet.level, sig)] = pd.concat([ + self.dfs[(sheet.level, sig, NOT_PROP)] = pd.concat([ pd.DataFrame({ "geo_id": sheet.geo_id_select(df).apply(sheet.geo_id_apply), "timestamp": pd.to_datetime(self.times[si[0]][sig]), @@ -271,7 +272,7 @@ def _parse_sheet(self, sheet): ]) for sig in COUNTS_7D_SIGNALS: - self.dfs[(sheet.level, sig)]["val"] /= 7 # 7-day total -> 7-day average + self.dfs[(sheet.level, sig, NOT_PROP)]["val"] /= 7 # 7-day total -> 7-day average def as_cached_filename(params, config): @@ -390,13 +391,46 @@ def fetch_new_reports(params, logger=None): # add nation from state geomapper = GeoMapper() for sig in SIGNALS: - state_key = ("state", sig) + state_key = ("state", sig, NOT_PROP) if state_key not in ret: continue - ret[("nation", sig)] = nation_from_state( + ret[("nation", sig, NOT_PROP)] = nation_from_state( ret[state_key].rename(columns={"geo_id": "state_id"}), sig, geomapper ) + for key, df in ret.copy().items(): + (geo, sig, _) = key + if SIGNALS[sig]["make_prop"]: + ret[(geo, sig, IS_PROP)] = generate_prop_signal(df, geo, geomapper) + return ret + +def generate_prop_signal(df, geo, geo_mapper): + """Transform base df into a proportion (per 100k population).""" + if geo == "state": + geo = "state_id" + if geo == "county": + geo = "fips" + + # Add population data + if geo == "msa": + map_df = geo_mapper.get_crosswalk("fips", geo) + map_df = geo_mapper.add_population_column( + map_df, "fips" + ).drop( + "fips", axis=1 + ).groupby( + geo + ).sum( + ).reset_index( + ) + df = pd.merge(df, map_df, left_on="geo_id", right_on=geo, how="inner") + else: + df = geo_mapper.add_population_column(df, geo, geocode_col="geo_id") + + df["val"] = round(df["val"]/df["population"]*100000, 7) + df.drop(["population", geo], axis=1, inplace=True) + + return df diff --git a/dsew_community_profile/delphi_dsew_community_profile/run.py b/dsew_community_profile/delphi_dsew_community_profile/run.py index d27c96216..27ce1fe5a 100644 --- a/dsew_community_profile/delphi_dsew_community_profile/run.py +++ b/dsew_community_profile/delphi_dsew_community_profile/run.py @@ -20,7 +20,7 @@ from delphi_utils.export import create_export_csv import pandas as pd -from .constants import make_signal_name +from .constants import make_signal_name, IS_PROP from .pull import fetch_new_reports @@ -58,14 +58,15 @@ def replace_date_param(p): run_stats = [] dfs = fetch_new_reports(params, logger) for key, df in dfs.items(): - (geo, sig) = key + (geo, sig, prop_flag) = key if sig not in params["indicator"]["export_signals"]: continue + is_prop = prop_flag == IS_PROP dates = create_export_csv( df, params['common']['export_dir'], geo, - make_signal_name(sig), + make_signal_name(sig, is_prop), **export_params ) if len(dates)>0: From 5b77a1d56fb9d67cdaf332693d2f9930132b918b Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Tue, 1 Feb 2022 15:03:43 -0500 Subject: [PATCH 11/18] tests --- dsew_community_profile/tests/test_pull.py | 83 ++++++++++++++++++++++- 1 file changed, 82 insertions(+), 1 deletion(-) diff --git a/dsew_community_profile/tests/test_pull.py b/dsew_community_profile/tests/test_pull.py index 60f0fa5dd..b898e21b6 100644 --- a/dsew_community_profile/tests/test_pull.py +++ b/dsew_community_profile/tests/test_pull.py @@ -9,7 +9,7 @@ from delphi_dsew_community_profile.pull import DatasetTimes from delphi_dsew_community_profile.pull import Dataset -from delphi_dsew_community_profile.pull import fetch_listing, nation_from_state +from delphi_dsew_community_profile.pull import fetch_listing, nation_from_state, generate_prop_signal example = namedtuple("example", "given expected") @@ -213,3 +213,84 @@ def test_nation_from_state(self): 'sample_size': [None],}), check_like=True ) + + def test_generate_prop_signal_msa(self): + geomapper = GeoMapper() + county_pop = geomapper.get_crosswalk("fips", "pop") + county_msa = geomapper.get_crosswalk("fips", "msa") + msa_pop = county_pop.merge(county_msa, on="fips", how="inner").groupby("msa").sum().reset_index() + + test_df = pd.DataFrame({ + 'geo_id': ['35620', '31080'], + 'timestamp': [datetime(year=2020, month=1, day=1)]*2, + 'val': [15., 150.], + 'se': [None, None], + 'sample_size': [None, None],}) + + nyc_pop = int(msa_pop.loc[msa_pop.msa == "35620", "pop"]) + la_pop = int(msa_pop.loc[msa_pop.msa == "31080", "pop"]) + + expected_df = pd.DataFrame({ + 'geo_id': ['35620', '31080'], + 'timestamp': [datetime(year=2020, month=1, day=1)]*2, + 'val': [15. / nyc_pop * 100000, 150. / la_pop * 100000], + 'se': [None, None], + 'sample_size': [None, None],}) + + pd.testing.assert_frame_equal( + generate_prop_signal( + test_df.copy(), + "msa", + geomapper + ), + expected_df, + check_like=True + ) + def test_generate_prop_signal_non_msa(self): + geomapper = GeoMapper() + + geos = { + "state": { + "code_name": "state_id", + "geo_names": ['pa', 'wv'] + }, + "county": { + "code_name": "fips", + "geo_names": ['36061', '06037'] + }, + # nation uses the same logic path so no need to test separately + "hhs": { + "code_name": "hhs", + "geo_names": ["1", "4"] + } + } + + for geo, settings in geos.items(): + geo_pop = geomapper.get_crosswalk(settings["code_name"], "pop") + + test_df = pd.DataFrame({ + 'geo_id': settings["geo_names"], + 'timestamp': [datetime(year=2020, month=1, day=1)]*2, + 'val': [15., 150.], + 'se': [None, None], + 'sample_size': [None, None],}) + + pop1 = int(geo_pop.loc[geo_pop[settings["code_name"]] == settings["geo_names"][0], "pop"]) + pop2 = int(geo_pop.loc[geo_pop[settings["code_name"]] == settings["geo_names"][1], "pop"]) + + expected_df = pd.DataFrame({ + 'geo_id': settings["geo_names"], + 'timestamp': [datetime(year=2020, month=1, day=1)]*2, + 'val': [15. / pop1 * 100000, 150. / pop2 * 100000], + 'se': [None, None], + 'sample_size': [None, None],}) + + pd.testing.assert_frame_equal( + generate_prop_signal( + test_df.copy(), + geo, + geomapper + ), + expected_df, + check_like=True + ) From 8b6c649fc4a7bc16bf462d99bc1a9360b07f5989 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Tue, 1 Feb 2022 15:06:28 -0500 Subject: [PATCH 12/18] add to validation smooth sigs list --- ansible/templates/dsew_community_profile-params-prod.json.j2 | 4 +++- dsew_community_profile/tests/params.json.template | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/ansible/templates/dsew_community_profile-params-prod.json.j2 b/ansible/templates/dsew_community_profile-params-prod.json.j2 index fd377d758..ec3e254c3 100644 --- a/ansible/templates/dsew_community_profile-params-prod.json.j2 +++ b/ansible/templates/dsew_community_profile-params-prod.json.j2 @@ -26,7 +26,9 @@ "ref_window_size": 7, "smoothed_signals": [ "naats_total_7dav", - "naats_positivity_7dav" + "naats_positivity_7dav", + "confirmed_admissions_covid_1d_prop_7dav", + "confirmed_admissions_covid_1d_7dav" ] } } diff --git a/dsew_community_profile/tests/params.json.template b/dsew_community_profile/tests/params.json.template index 89cee4bf0..645bd253f 100644 --- a/dsew_community_profile/tests/params.json.template +++ b/dsew_community_profile/tests/params.json.template @@ -25,7 +25,9 @@ "ref_window_size": 7, "smoothed_signals": [ "naats_total_7dav", - "naats_positivity_7dav" + "naats_positivity_7dav", + "confirmed_admissions_covid_1d_prop_7dav", + "confirmed_admissions_covid_1d_7dav" ] } } From 68365b3f7962e7c181a69bd831ef58c9dd3bf596 Mon Sep 17 00:00:00 2001 From: nmdefries <42820733+nmdefries@users.noreply.github.com> Date: Tue, 1 Feb 2022 17:32:37 -0500 Subject: [PATCH 13/18] Document make_signal_name Co-authored-by: Katie Mazaitis --- .../delphi_dsew_community_profile/constants.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/dsew_community_profile/delphi_dsew_community_profile/constants.py b/dsew_community_profile/delphi_dsew_community_profile/constants.py index 7286398b4..85f2e7e73 100644 --- a/dsew_community_profile/delphi_dsew_community_profile/constants.py +++ b/dsew_community_profile/delphi_dsew_community_profile/constants.py @@ -69,7 +69,11 @@ class Transform: COUNTS_7D_SIGNALS = {key for key, value in SIGNALS.items() if not value["is_rate"]} def make_signal_name(key, is_prop=False): - """Convert a signal key to the corresponding signal name for the API.""" + """Convert a signal key to the corresponding signal name for the API. + + Note, this function gets called twice with the same `key` for signals that support + population-proportion ("prop") variants. + """ if is_prop: return SIGNALS[key]["api_prop_name"] return SIGNALS[key]["api_name"] From 0fc1743771cd3713754a1d421ad448a843e68c2d Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Tue, 1 Feb 2022 18:01:58 -0500 Subject: [PATCH 14/18] prop constants to boolean --- .../delphi_dsew_community_profile/constants.py | 8 ++++---- .../delphi_dsew_community_profile/run.py | 5 ++--- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/dsew_community_profile/delphi_dsew_community_profile/constants.py b/dsew_community_profile/delphi_dsew_community_profile/constants.py index 85f2e7e73..1404e52f4 100644 --- a/dsew_community_profile/delphi_dsew_community_profile/constants.py +++ b/dsew_community_profile/delphi_dsew_community_profile/constants.py @@ -70,8 +70,8 @@ class Transform: def make_signal_name(key, is_prop=False): """Convert a signal key to the corresponding signal name for the API. - - Note, this function gets called twice with the same `key` for signals that support + + Note, this function gets called twice with the same `key` for signals that support population-proportion ("prop") variants. """ if is_prop: @@ -79,5 +79,5 @@ def make_signal_name(key, is_prop=False): return SIGNALS[key]["api_name"] NEWLINE = "\n" -NOT_PROP = "not prop" -IS_PROP = "prop" +IS_PROP = True +NOT_PROP = False diff --git a/dsew_community_profile/delphi_dsew_community_profile/run.py b/dsew_community_profile/delphi_dsew_community_profile/run.py index 27ce1fe5a..3ce69b325 100644 --- a/dsew_community_profile/delphi_dsew_community_profile/run.py +++ b/dsew_community_profile/delphi_dsew_community_profile/run.py @@ -20,7 +20,7 @@ from delphi_utils.export import create_export_csv import pandas as pd -from .constants import make_signal_name, IS_PROP +from .constants import make_signal_name from .pull import fetch_new_reports @@ -58,10 +58,9 @@ def replace_date_param(p): run_stats = [] dfs = fetch_new_reports(params, logger) for key, df in dfs.items(): - (geo, sig, prop_flag) = key + (geo, sig, is_prop) = key if sig not in params["indicator"]["export_signals"]: continue - is_prop = prop_flag == IS_PROP dates = create_export_csv( df, params['common']['export_dir'], From ccc5ed6abab54f0763fca50178437b49de42973d Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Tue, 1 Feb 2022 18:02:18 -0500 Subject: [PATCH 15/18] drop duplicate rounding --- dsew_community_profile/delphi_dsew_community_profile/pull.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dsew_community_profile/delphi_dsew_community_profile/pull.py b/dsew_community_profile/delphi_dsew_community_profile/pull.py index 416310b79..b1ac7069b 100644 --- a/dsew_community_profile/delphi_dsew_community_profile/pull.py +++ b/dsew_community_profile/delphi_dsew_community_profile/pull.py @@ -430,7 +430,7 @@ def generate_prop_signal(df, geo, geo_mapper): else: df = geo_mapper.add_population_column(df, geo, geocode_col="geo_id") - df["val"] = round(df["val"]/df["population"]*100000, 7) + df["val"] = df["val"] / df["population"] * 100000 df.drop(["population", geo], axis=1, inplace=True) return df From 043e485953bef40aa9128c532e3852397d9035c3 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Tue, 1 Feb 2022 18:05:10 -0500 Subject: [PATCH 16/18] add admissions to validator smoothing setting --- dsew_community_profile/params.json.template | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dsew_community_profile/params.json.template b/dsew_community_profile/params.json.template index 3a64d71ab..42fc7faad 100644 --- a/dsew_community_profile/params.json.template +++ b/dsew_community_profile/params.json.template @@ -32,7 +32,9 @@ "ref_window_size": 7, "smoothed_signals": [ "naats_total_7dav", - "naats_positivity_7dav" + "naats_positivity_7dav", + "confirmed_admissions_covid_1d_prop_7dav", + "confirmed_admissions_covid_1d_7dav" ] } } From 21aebd59b3948c17f36134f90e0bd11d78aaf21c Mon Sep 17 00:00:00 2001 From: Delphi Deploy Bot Date: Wed, 2 Feb 2022 15:24:08 +0000 Subject: [PATCH 17/18] chore: bump delphi_utils to 0.3.0 --- _delphi_utils_python/.bumpversion.cfg | 2 +- _delphi_utils_python/delphi_utils/__init__.py | 2 +- _delphi_utils_python/setup.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/_delphi_utils_python/.bumpversion.cfg b/_delphi_utils_python/.bumpversion.cfg index 2d63919ca..8ce49f20b 100644 --- a/_delphi_utils_python/.bumpversion.cfg +++ b/_delphi_utils_python/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.2.11 +current_version = 0.3.0 commit = True message = chore: bump delphi_utils to {new_version} tag = False diff --git a/_delphi_utils_python/delphi_utils/__init__.py b/_delphi_utils_python/delphi_utils/__init__.py index 2475e9ac4..898e40e4b 100644 --- a/_delphi_utils_python/delphi_utils/__init__.py +++ b/_delphi_utils_python/delphi_utils/__init__.py @@ -15,4 +15,4 @@ from .nancodes import Nans from .weekday import Weekday -__version__ = "0.2.11" +__version__ = "0.3.0" diff --git a/_delphi_utils_python/setup.py b/_delphi_utils_python/setup.py index 5cde1e57a..779ef0fd4 100644 --- a/_delphi_utils_python/setup.py +++ b/_delphi_utils_python/setup.py @@ -26,7 +26,7 @@ setup( name="delphi_utils", - version="0.2.11", + version="0.3.0", description="Shared Utility Functions for Indicators", long_description=long_description, long_description_content_type="text/markdown", From 806aa43b2265cf0b90224d7907063ed69a0e27c4 Mon Sep 17 00:00:00 2001 From: Delphi Deploy Bot Date: Wed, 2 Feb 2022 15:24:08 +0000 Subject: [PATCH 18/18] chore: bump covidcast-indicators to 0.3.0 --- .bumpversion.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 5a589e53a..a3af113e1 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.2.25 +current_version = 0.3.0 commit = True message = chore: bump covidcast-indicators to {new_version} tag = False