From 2cd8ebc5b71ac77977a6e7d9c15183f3a7802839 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Po=C5=BAniak?= Date: Thu, 25 Sep 2025 04:03:50 -0700 Subject: [PATCH] Unzip dataset if it is in .bz2 format --- benchmark/dataset.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/benchmark/dataset.py b/benchmark/dataset.py index 3b992ecb..968f37d0 100644 --- a/benchmark/dataset.py +++ b/benchmark/dataset.py @@ -1,6 +1,7 @@ import os import shutil import tarfile +import bz2 import urllib.request import urllib.parse from dataclasses import dataclass, field @@ -201,6 +202,19 @@ def _extract_or_move_file(self, tmp_path, target_path): with tarfile.open(tmp_path) as file: file.extractall(target_path) os.remove(tmp_path) + elif tmp_path.endswith(".bz2"): + print(f"Extracting bz2: {tmp_path} -> {target_path}") + Path(target_path).parent.mkdir(exist_ok=True) + # Remove .bz2 extension from target path if present + if str(target_path).endswith(".bz2"): + final_target_path = str(target_path)[:-4] # Remove .bz2 + else: + final_target_path = target_path + + with bz2.BZ2File(tmp_path, 'rb') as f_in: + with open(final_target_path, 'wb') as f_out: + shutil.copyfileobj(f_in, f_out) + os.remove(tmp_path) else: print(f"Moving: {tmp_path} -> {target_path}") Path(target_path).parent.mkdir(exist_ok=True)