Skip to content

Commit ec8418f

Browse files
humengyu2012hanouticelinagithub-actions[bot]
authored
Expand file lock scope to resolve concurrency issues during downloads (#3063)
* Fix race conditions on symlink-less filesystems by extending lock coverage * Update src/huggingface_hub/file_download.py Co-authored-by: célina <hanouticelina@gmail.com> * move normalization of lock_path before mkdir * Apply style fixes --------- Co-authored-by: célina <hanouticelina@gmail.com> Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
1 parent a7ee9ed commit ec8418f

File tree

1 file changed

+13
-11
lines changed

1 file changed

+13
-11
lines changed

src/huggingface_hub/file_download.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1130,16 +1130,6 @@ def _hf_hub_download_to_cache_dir(
11301130
# In that case store a ref.
11311131
_cache_commit_hash_for_specific_revision(storage_folder, revision, commit_hash)
11321132

1133-
# If file already exists, return it (except if force_download=True)
1134-
if not force_download:
1135-
if os.path.exists(pointer_path):
1136-
return pointer_path
1137-
1138-
if os.path.exists(blob_path):
1139-
# we have the blob already, but not the pointer
1140-
_create_symlink(blob_path, pointer_path, new_blob=False)
1141-
return pointer_path
1142-
11431133
# Prevent parallel downloads of the same file with a lock.
11441134
# etag could be duplicated across repos,
11451135
lock_path = os.path.join(locks_dir, repo_folder_name(repo_id=repo_id, repo_type=repo_type), f"{etag}.lock")
@@ -1152,9 +1142,21 @@ def _hf_hub_download_to_cache_dir(
11521142
if os.name == "nt" and len(os.path.abspath(blob_path)) > 255:
11531143
blob_path = "\\\\?\\" + os.path.abspath(blob_path)
11541144

1145+
Path(lock_path).parent.mkdir(parents=True, exist_ok=True)
1146+
1147+
# pointer already exists -> immediate return
1148+
if not force_download and os.path.exists(pointer_path):
1149+
return pointer_path
1150+
1151+
# Blob exists but pointer must be (safely) created -> take the lock
1152+
if not force_download and os.path.exists(blob_path):
1153+
with WeakFileLock(lock_path):
1154+
if not os.path.exists(pointer_path):
1155+
_create_symlink(blob_path, pointer_path, new_blob=False)
1156+
return pointer_path
1157+
11551158
# Local file doesn't exist or etag isn't a match => retrieve file from remote (or cache)
11561159

1157-
Path(lock_path).parent.mkdir(parents=True, exist_ok=True)
11581160
with WeakFileLock(lock_path):
11591161
_download_to_tmp_and_move(
11601162
incomplete_path=Path(blob_path + ".incomplete"),

0 commit comments

Comments
 (0)