22import os
33import re
44import shutil
5+ import sys
56import tarfile
67import tempfile
78import urllib
@@ -52,17 +53,32 @@ def is_link_in_dir(info, base):
5253 return is_path_in_dir (info .linkname , base_dir = tip )
5354
5455
55- def filter_safe_paths (members ):
56+ def filter_safe_zipinfos (members ):
5657 base_dir = resolve_path ("." )
5758 for finfo in members :
5859 valid_path = False
59- if is_path_in_dir (finfo .name , base_dir ):
60+ if is_path_in_dir (finfo .filename , base_dir ):
6061 valid_path = True
6162 yield finfo
62- elif finfo .issym () or finfo .islnk ():
63+ if not valid_path :
64+ warnings .warn (
65+ "Skipping invalid path during archive extraction: "
66+ f"'{ finfo .name } '." ,
67+ stacklevel = 2 ,
68+ )
69+
70+
71+ def filter_safe_tarinfos (members ):
72+ base_dir = resolve_path ("." )
73+ for finfo in members :
74+ valid_path = False
75+ if finfo .issym () or finfo .islnk ():
6376 if is_link_in_dir (finfo , base_dir ):
6477 valid_path = True
6578 yield finfo
79+ elif is_path_in_dir (finfo .name , base_dir ):
80+ valid_path = True
81+ yield finfo
6682 if not valid_path :
6783 warnings .warn (
6884 "Skipping invalid path during archive extraction: "
@@ -71,6 +87,35 @@ def filter_safe_paths(members):
7187 )
7288
7389
90+ def extract_open_archive (archive , path = "." ):
91+ """Extracts an open tar or zip archive to the provided directory.
92+
93+ This function filters unsafe paths during extraction.
94+
95+ Args:
96+ archive: The archive object, either a `TarFile` or a `ZipFile`.
97+ path: Where to extract the archive file.
98+ """
99+ if isinstance (archive , zipfile .ZipFile ):
100+ # Zip archive.
101+ archive .extractall (
102+ path , members = filter_safe_zipinfos (archive .infolist ())
103+ )
104+ else :
105+ # Tar archive.
106+ extractall_kwargs = {}
107+ # The `filter="data"` option was added in Python 3.12. It became the
108+ # default starting from Python 3.14. So we only specify it between
109+ # those two versions.
110+ if sys .version_info >= (3 , 12 ) and sys .version_info < (3 , 14 ):
111+ extractall_kwargs = {"filter" : "data" }
112+ archive .extractall (
113+ path ,
114+ members = filter_safe_tarinfos (archive ),
115+ ** extractall_kwargs ,
116+ )
117+
118+
74119def extract_archive (file_path , path = "." , archive_format = "auto" ):
75120 """Extracts an archive if it matches a support format.
76121
@@ -112,14 +157,7 @@ def extract_archive(file_path, path=".", archive_format="auto"):
112157 if is_match_fn (file_path ):
113158 with open_fn (file_path ) as archive :
114159 try :
115- if zipfile .is_zipfile (file_path ):
116- # Zip archive.
117- archive .extractall (path )
118- else :
119- # Tar archive, perhaps unsafe. Filter paths.
120- archive .extractall (
121- path , members = filter_safe_paths (archive )
122- )
160+ extract_open_archive (archive , path )
123161 except (tarfile .TarError , RuntimeError , KeyboardInterrupt ):
124162 if os .path .exists (path ):
125163 if os .path .isfile (path ):
0 commit comments