66import pandas as pd
77from sodapy import Socrata
88
9- from .constants import NEWLINE , SIGNALS , SIGNALS_MAP , TYPE_DICT
9+ from .constants import *
1010
1111
1212def warn_string (df , type_dict ):
@@ -28,19 +28,13 @@ def warn_string(df, type_dict):
2828
2929
3030def pull_nssp_data (socrata_token : str ):
31- """Pull the latest NSSP ER visits data, and conforms it into a dataset.
32-
33- The output dataset has:
34-
35- - Each row corresponds to a single observation
36- - Each row additionally has columns for the signals in SIGNALS
31+ """Pull the latest NSSP ER visits primary dataset
32+ https://data.cdc.gov/Public-Health-Surveillance/NSSP-Emergency-Department-Visit-Trajectories-by-St/rdmq-nq56/data_preview
3733
3834 Parameters
3935 ----------
4036 socrata_token: str
41- My App Token for pulling the NWSS data (could be the same as the nchs data)
42- test_file: Optional[str]
43- When not null, name of file from which to read test data
37+ My App Token for pulling the NSSP data (could be the same as the nchs data)
4438
4539 Returns
4640 -------
@@ -72,3 +66,57 @@ def pull_nssp_data(socrata_token: str):
7266
7367 keep_columns = ["timestamp" , "geography" , "county" , "fips" ]
7468 return df_ervisits [SIGNALS + keep_columns ]
69+
70+
71+ def secondary_pull_nssp_data (socrata_token : str ):
72+ """Pull the latest NSSP ER visits secondary dataset:
73+ https://data.cdc.gov/Public-Health-Surveillance/2023-Respiratory-Virus-Response-NSSP-Emergency-Dep/7mra-9cq9/data_preview
74+
75+ The output dataset has:
76+
77+ - Each row corresponds to a single observation
78+
79+ Parameters
80+ ----------
81+ socrata_token: str
82+ My App Token for pulling the NSSP data (could be the same as the nchs data)
83+
84+ Returns
85+ -------
86+ pd.DataFrame
87+ Dataframe as described above.
88+ """
89+ # Pull data from Socrata API
90+ client = Socrata ("data.cdc.gov" , socrata_token )
91+ results = []
92+ offset = 0
93+ limit = 50000 # maximum limit allowed by SODA 2.0
94+ while True :
95+ page = client .get ("7mra-9cq9" , limit = limit , offset = offset )
96+ if not page :
97+ break # exit the loop if no more results
98+ results .extend (page )
99+ offset += limit
100+ df_ervisits = pd .DataFrame .from_records (results )
101+ df_ervisits = df_ervisits .rename (columns = SECONDARY_COLS_MAP )
102+
103+ # geo_type is not provided in the dataset, so we infer it from the geo_value
104+ # which is either state names, "National" or hhs region numbers
105+ df_ervisits ['geo_type' ] = 'state'
106+
107+ df_ervisits .loc [df_ervisits ['geo_value' ] == 'National' , 'geo_type' ] = 'nation'
108+
109+ hhs_region_mask = df_ervisits ['geo_value' ].str .startswith ('Region ' )
110+ df_ervisits .loc [hhs_region_mask , 'geo_value' ] = df_ervisits .loc [hhs_region_mask , 'geo_value' ].str .replace ('Region ' , '' )
111+ df_ervisits .loc [hhs_region_mask , 'geo_type' ] = 'hhs'
112+
113+ df_ervisits ['signal' ] = df_ervisits ['signal' ].map (SECONDARY_SIGNALS_MAP )
114+
115+ df_ervisits = df_ervisits [SECONDARY_KEEP_COLS ]
116+
117+ try :
118+ df_ervisits = df_ervisits .astype (SECONDARY_TYPE_DICT )
119+ except KeyError as exc :
120+ raise ValueError (warn_string (df_ervisits , SECONDARY_TYPE_DICT )) from exc
121+
122+ return df_ervisits
0 commit comments