1010 pull_nhsn_data ,
1111 pull_data ,
1212 pull_data_from_file ,
13- pull_preliminary_nhsn_data , check_last_updated
13+ check_last_updated
1414)
1515from delphi_nhsn .constants import TYPE_DICT , PRELIM_TYPE_DICT , PRELIM_DATASET_ID , MAIN_DATASET_ID
1616
2121 "test_data" : TEST_DATA ,
2222 "msg_prefix" : "" ,
2323 "prelim_flag" : False ,
24+ "expected_data" : f"{ TEST_DIR } /test_data/expected_df.csv" ,
25+ "type_dict" : TYPE_DICT ,
2426 },
2527
2628 {"id" :PRELIM_DATASET_ID ,
2729 "test_data" :PRELIM_TEST_DATA ,
2830 "msg_prefix" : "Preliminary " ,
2931 "prelim_flag" : True ,
32+ "expected_data" : f"{ TEST_DIR } /test_data/expected_df_prelim.csv" ,
33+ "type_dict" : PRELIM_TYPE_DICT ,
3034 }
3135 ]
3236
@@ -75,123 +79,63 @@ def test_pull_from_file(self, caplog, dataset, params_w_patch):
7579
7680 @patch ("delphi_nhsn.pull.Socrata" )
7781 @patch ("delphi_nhsn.pull.create_backup_csv" )
78- def test_pull_nhsn_data_output (self , mock_create_backup , mock_socrata , caplog , params ):
82+ @pytest .mark .parametrize ('dataset' , DATASETS , ids = ["data" , "prelim_data" ])
83+ def test_pull_nhsn_data_output (self , mock_create_backup , mock_socrata , dataset , caplog , params ):
7984 now = time .time ()
8085 # Mock Socrata client and its get method
8186 mock_client = MagicMock ()
8287 mock_socrata .return_value = mock_client
83- mock_client .get .side_effect = [TEST_DATA ,[]]
84-
88+ mock_client .get .side_effect = [dataset ["test_data" ],[]]
8589 mock_client .get_metadata .return_value = {"rowsUpdatedAt" : now }
8690
8791 backup_dir = params ["common" ]["backup_dir" ]
8892 test_token = params ["indicator" ]["socrata_token" ]
8993 custom_run = params ["common" ]["custom_run" ]
90-
9194 logger = get_structured_logger ()
9295
93- result = pull_nhsn_data (test_token , backup_dir , custom_run , issue_date = None , logger = logger )
96+ expected_df = pd .read_csv (dataset ["expected_data" ])
97+
98+ result = pull_nhsn_data (test_token , backup_dir , custom_run , issue_date = None , logger = logger , preliminary = dataset ["prelim_flag" ])
9499 mock_create_backup .assert_called_once ()
95100
96- expected_columns = set (TYPE_DICT . keys () )
101+ expected_columns = set (expected_df . columns )
97102 assert set (result .columns ) == expected_columns
98103
99104 for column in list (result .columns ):
100105 # some states don't report confirmed admissions rsv
101- if column == "confirmed_admissions_rsv_ew" :
106+ if column == "confirmed_admissions_rsv_ew" and not dataset ["prelim_flag" ]:
107+ continue
108+ if column == "confirmed_admissions_rsv_ew_prelim" and dataset ["prelim_flag" ]:
102109 continue
103110 assert result [column ].notnull ().all (), f"{ column } has rogue NaN"
104111
112+ expected_df = expected_df .astype (dataset ["type_dict" ])
105113
106- @patch ("delphi_nhsn.pull.Socrata" )
107- def test_pull_nhsn_data_backup (self , mock_socrata , caplog , params ):
108- now = time .time ()
109- # Mock Socrata client and its get method
110- mock_client = MagicMock ()
111- mock_socrata .return_value = mock_client
112- mock_client .get .side_effect = [TEST_DATA , []]
113-
114- mock_client .get_metadata .return_value = {"rowsUpdatedAt" : now }
114+ pd .testing .assert_frame_equal (expected_df , result )
115115
116- today = pd .Timestamp .today ().strftime ("%Y%m%d" )
117- backup_dir = params ["common" ]["backup_dir" ]
118- custom_run = params ["common" ]["custom_run" ]
119- test_token = params ["indicator" ]["socrata_token" ]
120-
121- # Load test data
122- expected_data = pd .DataFrame (TEST_DATA )
123-
124- logger = get_structured_logger ()
125- # Call function with test token
126- pull_nhsn_data (test_token , backup_dir , custom_run , issue_date = None , logger = logger )
127-
128- # Check logger used:
129- assert "Backup file created" in caplog .text
130-
131- # Check that backup file was created
132- backup_files = glob .glob (f"{ backup_dir } /{ today } *" )
133- assert len (backup_files ) == 2 , "Backup file was not created"
134-
135- for backup_file in backup_files :
136- if backup_file .endswith (".csv.gz" ):
137- dtypes = expected_data .dtypes .to_dict ()
138- actual_data = pd .read_csv (backup_file , dtype = dtypes )
139- else :
140- actual_data = pd .read_parquet (backup_file )
141- pd .testing .assert_frame_equal (expected_data , actual_data )
142-
143- # clean up
144- for file in backup_files :
145- os .remove (file )
146116
147117 @patch ("delphi_nhsn.pull.Socrata" )
148- @patch ( "delphi_nhsn.pull.create_backup_csv" )
149- def test_pull_prelim_nhsn_data_output (self , mock_create_backup , mock_socrata , caplog , params ):
118+ @pytest . mark . parametrize ( 'dataset' , DATASETS , ids = [ "data" , "prelim_data" ] )
119+ def test_pull_nhsn_data_backup (self , mock_socrata , dataset , caplog , params ):
150120 now = time .time ()
151121 # Mock Socrata client and its get method
152122 mock_client = MagicMock ()
153123 mock_socrata .return_value = mock_client
154- mock_client .get .side_effect = [PRELIM_TEST_DATA , []]
124+ mock_client .get .side_effect = [dataset [ "test_data" ] , []]
155125
156126 mock_client .get_metadata .return_value = {"rowsUpdatedAt" : now }
157127
158- backup_dir = params ["common" ]["backup_dir" ]
159- test_token = params ["indicator" ]["socrata_token" ]
160- custom_run = params ["common" ]["custom_run" ]
161-
162- logger = get_structured_logger ()
163-
164- result = pull_preliminary_nhsn_data (test_token , backup_dir , custom_run , issue_date = None , logger = logger )
165- mock_create_backup .assert_called_once ()
166-
167- expected_columns = set (PRELIM_TYPE_DICT .keys ())
168- assert set (result .columns ) == expected_columns
169-
170- for column in list (result .columns ):
171- # some states don't report confirmed admissions rsv
172- if column == "confirmed_admissions_rsv_ew_prelim" :
173- continue
174- assert result [column ].notnull ().all (), f"{ column } has rogue NaN"
175- @patch ("delphi_nhsn.pull.Socrata" )
176- def test_pull_prelim_nhsn_data_backup (self , mock_socrata , caplog , params ):
177- now = time .time ()
178- # Mock Socrata client and its get method
179- mock_client = MagicMock ()
180- mock_socrata .return_value = mock_client
181- mock_client .get .side_effect = [PRELIM_TEST_DATA , []]
182-
183- mock_client .get_metadata .return_value = {"rowsUpdatedAt" : now }
184128 today = pd .Timestamp .today ().strftime ("%Y%m%d" )
185129 backup_dir = params ["common" ]["backup_dir" ]
186130 custom_run = params ["common" ]["custom_run" ]
187131 test_token = params ["indicator" ]["socrata_token" ]
188132
189133 # Load test data
190- expected_data = pd .DataFrame (PRELIM_TEST_DATA )
134+ expected_data = pd .DataFrame (dataset [ "test_data" ] )
191135
192136 logger = get_structured_logger ()
193137 # Call function with test token
194- pull_preliminary_nhsn_data (test_token , backup_dir , custom_run , issue_date = None , logger = logger )
138+ pull_nhsn_data (test_token , backup_dir , custom_run , issue_date = None , logger = logger , preliminary = dataset [ "prelim_flag" ] )
195139
196140 # Check logger used:
197141 assert "Backup file created" in caplog .text
@@ -212,6 +156,7 @@ def test_pull_prelim_nhsn_data_backup(self, mock_socrata, caplog, params):
212156 for file in backup_files :
213157 os .remove (file )
214158
159+
215160 @pytest .mark .parametrize ('dataset' , DATASETS , ids = ["data" , "prelim_data" ])
216161 @pytest .mark .parametrize ("updatedAt" , [time .time (), time .time () - 172800 ], ids = ["updated" , "stale" ])
217162 @patch ("delphi_nhsn.pull.Socrata" )
0 commit comments