@@ -45,6 +45,7 @@ def add_needed_columns(df, col_names=None):
4545 df = add_default_nancodes (df )
4646 return df
4747
48+
4849def logging (start_time , run_stats , logger ):
4950 """Boilerplate making logs."""
5051 elapsed_time_in_seconds = round (time .time () - start_time , 2 )
@@ -137,6 +138,14 @@ def run_module(params, logger=None):
137138 df = geo_mapper .add_geocode (df , "state_code" , "hhs" , from_col = "state_code" , new_col = "geo_id" )
138139 df = geo_mapper .aggregate_by_weighted_sum (df , "geo_id" , "val" , "timestamp" , "population" )
139140 df = df .rename (columns = {"weighted_val" : "val" })
141+ elif geo == "hsa_nci" :
142+ df = df [["hsa_nci_id" , "val" , "timestamp" ]]
143+ df = df [df ["hsa_nci_id" ] != "All" ]
144+ # We use drop_duplicates below just to pick a representative value,
145+ # since all the values in a given HSA-NCI level are the same
146+ # (the data is reported at the HSA-NCI level).
147+ df .drop_duplicates (["hsa_nci_id" , "timestamp" , "val" ], inplace = True )
148+ df = df .rename (columns = {"hsa_nci_id" : "geo_id" })
140149 else :
141150 df = df [df ["county" ] != "All" ]
142151 df ["geo_id" ] = df ["fips" ]
0 commit comments