Skip to content

Commit c5d5dcc

Browse files
committed
hsanci -> hsa-nci + add comment about min() for dedupe
1 parent a1c2f61 commit c5d5dcc

File tree

2 files changed

+6
-3
lines changed

2 files changed

+6
-3
lines changed

nssp/delphi_nssp/constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
"state",
1010
"county",
1111
"hhs",
12-
"hsanci",
12+
"hsa-nci",
1313
]
1414

1515
SIGNALS_MAP = {

nssp/delphi_nssp/run.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -137,10 +137,13 @@ def run_module(params, logger=None):
137137
df = geo_mapper.add_geocode(df, "state_code", "hhs", from_col="state_code", new_col="geo_id")
138138
df = geo_mapper.aggregate_by_weighted_sum(df, "geo_id", "val", "timestamp", "population")
139139
df = df.rename(columns={"weighted_val": "val"})
140-
elif geo == "hsanci":
140+
elif geo == "hsa-nci":
141141
df = df[["hsa_nci_id", "val", "timestamp"]]
142142
df = df[df["hsa_nci_id"] != "All"]
143-
df = df.groupby(["hsa_nci_id", "timestamp"])['val'].min().reset_index()
143+
# We use min() below just to pick a representative value, since all
144+
# the values in a given HSA-NCI level are the same (the data is
145+
# reported at the HSA-NCI level).
146+
df = df.groupby(["hsa_nci_id", "timestamp"])["val"].min().reset_index()
144147
df = df.rename(columns={"hsa_nci_id": "geo_id"})
145148
else:
146149
df = df[df["county"] != "All"]

0 commit comments

Comments
 (0)