Skip to content

Commit b1d8098

Browse files
authored
Fix 416 requested range not satisfiable (#2511)
* Fix 416 requested range not satisfiable * hashtag
1 parent ebac1b2 commit b1d8098

File tree

2 files changed

+29
-0
lines changed

2 files changed

+29
-0
lines changed

src/huggingface_hub/file_download.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -427,6 +427,10 @@ def http_get(
427427
The filename of the file that is being downloaded. Value is used only to display a nice progress bar. If
428428
not set, the filename is guessed from the URL or the `Content-Disposition` header.
429429
"""
430+
if expected_size is not None and resume_size == expected_size:
431+
# If the file is already fully downloaded, we don't need to download it again.
432+
return
433+
430434
hf_transfer = None
431435
if constants.HF_HUB_ENABLE_HF_TRANSFER:
432436
if resume_size != 0:

tests/test_file_download.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -740,6 +740,31 @@ def _mocked_hf_file_metadata(*args, **kwargs):
740740
with self.assertRaises(EnvironmentError):
741741
hf_hub_download(DUMMY_MODEL_ID, filename="pytorch_model.bin", cache_dir=cache_dir)
742742

743+
def test_hf_hub_download_when_tmp_file_is_complete(self):
744+
"""Regression test for #2511.
745+
746+
See https://github.com/huggingface/huggingface_hub/issues/2511.
747+
748+
When downloading a file, we first download to a temporary file and then move it to the final location.
749+
If the temporary file is already partially downloaded, we resume from where we left off.
750+
However, if the temporary file is already fully downloaded, we should try to make a GET call with an empty range.
751+
This was causing a "416 Range Not Satisfiable" error.
752+
"""
753+
with SoftTemporaryDirectory() as tmpdir:
754+
# Download the file once
755+
filepath = Path(hf_hub_download(DUMMY_MODEL_ID, filename="pytorch_model.bin", cache_dir=tmpdir))
756+
757+
# Fake tmp file
758+
incomplete_filepath = Path(str(filepath.resolve()) + ".incomplete")
759+
incomplete_filepath.write_bytes(filepath.read_bytes()) # fake a partial download
760+
filepath.resolve().unlink()
761+
762+
# delete snapshot folder to re-trigger a download
763+
shutil.rmtree(filepath.parents[2] / "snapshots")
764+
765+
# Download must not fail
766+
hf_hub_download(DUMMY_MODEL_ID, filename="pytorch_model.bin", cache_dir=tmpdir)
767+
743768
@expect_deprecation("cached_download")
744769
@expect_deprecation("url_to_filename")
745770
def test_cached_download_from_github(self):

0 commit comments

Comments
 (0)