|
| 1 | +import glob |
1 | 2 | import os |
2 | 3 | import pytest |
3 | 4 |
|
4 | 5 | import pandas as pd |
| 6 | + |
| 7 | +from delphi_utils import get_structured_logger |
5 | 8 | from delphi_utils.geomap import GeoMapper |
6 | 9 |
|
7 | 10 | from delphi_nchs_mortality.pull import pull_nchs_mortality_data, standardize_columns |
@@ -98,13 +101,30 @@ def test_bad_file_with_missing_cols(self): |
98 | 101 | with pytest.raises(ValueError): |
99 | 102 | pull_nchs_mortality_data(SOCRATA_TOKEN, backup_dir = "", custom_run = True, test_file = "bad_data_with_missing_cols.csv") |
100 | 103 |
|
101 | | - def test_backup_today_data(self): |
| 104 | + def test_backup_today_data(self, caplog): |
102 | 105 | today = pd.Timestamp.today().strftime("%Y%m%d") |
103 | 106 | backup_dir = "./raw_data_backups" |
104 | | - pull_nchs_mortality_data(SOCRATA_TOKEN, backup_dir = backup_dir, custom_run = False, test_file = "test_data.csv") |
105 | | - backup_file = f"{backup_dir}/{today}.csv.gz" |
106 | | - backup_df = pd.read_csv(backup_file) |
| 107 | + logger = get_structured_logger() |
| 108 | + pull_nchs_mortality_data(SOCRATA_TOKEN, backup_dir = backup_dir, custom_run = False, test_file = "test_data.csv", logger=logger) |
| 109 | + |
| 110 | + # Check logger used: |
| 111 | + assert "Backup file created" in caplog.text |
| 112 | + |
| 113 | + # Check that backup file was created |
| 114 | + backup_files = glob.glob(f"{backup_dir}/{today}*") |
| 115 | + assert len(backup_files) == 2, "Backup file was not created" |
| 116 | + |
107 | 117 | source_df = pd.read_csv("test_data/test_data.csv") |
| 118 | + for backup_file in backup_files: |
| 119 | + if backup_file.endswith(".csv.gz"): |
| 120 | + backup_df = pd.read_csv(backup_file) |
| 121 | + else: |
| 122 | + backup_df = pd.read_parquet(backup_file) |
| 123 | + pd.testing.assert_frame_equal(source_df, backup_df) |
| 124 | + |
| 125 | + backup_file_parquet = f"{backup_dir}/{today}.parquet" |
| 126 | + backup_df = pd.read_parquet(backup_file_parquet) |
108 | 127 | pd.testing.assert_frame_equal(source_df, backup_df) |
| 128 | + |
109 | 129 | if os.path.exists(backup_file): |
110 | 130 | os.remove(backup_file) |
0 commit comments