Skip to content

Commit cbc458b

Browse files
committed
also writing into parquet
1 parent 870024e commit cbc458b

File tree

2 files changed

+9
-4
lines changed

2 files changed

+9
-4
lines changed

_delphi_utils_python/delphi_utils/export.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -189,17 +189,18 @@ def create_backup_csv(
189189
issue = datetime.today().strftime("%Y%m%d")
190190

191191
backup_filename = [issue, geo_res, metric, sensor]
192-
backup_filename = "_".join(filter(None, backup_filename)) + ".csv.gz"
192+
backup_filename = "_".join(filter(None, backup_filename))
193193
backup_file = join(backup_dir, backup_filename)
194194
try:
195-
with gzip.open(backup_file, "wt", newline="") as f:
196-
df.to_csv(f, index=False, na_rep="NA")
195+
# defacto data format is csv, but parquet preserved data types (keeping both as intermidary measures)
196+
df.to_csv(f"{backup_file}.csv.gz", index=False, na_rep="NA", compression='gzip')
197+
df.to_parquet(f"{backup_file}.parquet", index=False)
197198

198199
if logger:
199200
logger.info(
200201
"Backup file created",
201202
backup_file=backup_file,
202-
backup_size=getsize(backup_file),
203+
backup_size=getsize(f"{backup_file}.csv.gz"),
203204
)
204205
#pylint: disable=W0703
205206
except Exception as e:

_delphi_utils_python/tests/test_export.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -402,3 +402,7 @@ def test_create_backup_regular(self, caplog, tmp_path):
402402

403403
actual = pd.read_csv(join(tmp_path, f"{today}_{geo_res}_{metric}_{sensor}.csv.gz"), dtype=dtypes, parse_dates=["timestamp"])
404404
assert self.DF.equals(actual)
405+
406+
actual_parquet = pd.read_parquet(join(tmp_path, f"{today}_{geo_res}_{metric}_{sensor}.parquet"))
407+
assert actual_parquet.equals(actual)
408+

0 commit comments

Comments
 (0)