Skip to content

Commit

Permalink
Raise after download if file size is not consistent (#1403)
Browse files Browse the repository at this point in the history
* Raise after download if file size is not consistent

* typo
  • Loading branch information
Wauplin authored Mar 24, 2023
1 parent 129994a commit 0d2fc77
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 0 deletions.
9 changes: 9 additions & 0 deletions src/huggingface_hub/file_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -538,6 +538,15 @@ def http_get(
if chunk: # filter out keep-alive new chunks
progress.update(len(chunk))
temp_file.write(chunk)

if total is not None and total != temp_file.tell():
raise EnvironmentError(
f"Consistency check failed: file should be of size {total} but has size"
f" {temp_file.tell()} ({displayed_name}).\nWe are sorry for the inconvenience. Please retry download and"
" pass `force_download=True, resume_download=False` as argument.\nIf the issue persists, please let us"
" know by opening an issue on https://github.com/huggingface/huggingface_hub."
)

progress.close()


Expand Down
35 changes: 35 additions & 0 deletions tests/test_file_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from huggingface_hub.file_download import (
_CACHED_NO_EXIST,
_create_symlink,
_request_wrapper,
cached_download,
filename_to_url,
get_hf_file_metadata,
Expand Down Expand Up @@ -489,6 +490,40 @@ def test_get_hf_file_metadata_from_a_lfs_file(self) -> None:
self.assertIn("cdn-lfs", metadata.location) # Redirection
self.assertEqual(metadata.size, 497933648) # Size of LFS file, not pointer

def test_file_consistency_check_fails_regular_file(self):
"""Regression test for #1396 (regular file).
Download fails if file size is different than the expected one (from headers metadata).
See https://github.com/huggingface/huggingface_hub/pull/1396."""
with SoftTemporaryDirectory() as cache_dir:

def _mocked_request_wrapper(*args, **kwargs):
response = _request_wrapper(*args, **kwargs)
response.headers["Content-Length"] = "450" # will expect 450 bytes but will download 496 bytes
return response

with patch("huggingface_hub.file_download._request_wrapper", _mocked_request_wrapper):
with self.assertRaises(EnvironmentError):
hf_hub_download(DUMMY_MODEL_ID, filename=CONFIG_NAME, cache_dir=cache_dir)

def test_file_consistency_check_fails_LFS_file(self):
"""Regression test for #1396 (LFS file).
Download fails if file size is different than the expected one (from headers metadata).
See https://github.com/huggingface/huggingface_hub/pull/1396."""
with SoftTemporaryDirectory() as cache_dir:

def _mocked_request_wrapper(*args, **kwargs):
response = _request_wrapper(*args, **kwargs)
response.headers["Content-Length"] = "65000" # will expect 65000 bytes but will download 65074 bytes
return response

with patch("huggingface_hub.file_download._request_wrapper", _mocked_request_wrapper):
with self.assertRaises(EnvironmentError):
hf_hub_download(DUMMY_MODEL_ID, filename="pytorch_model.bin", cache_dir=cache_dir)


@with_production_testing
@pytest.mark.usefixtures("fx_cache_dir")
Expand Down

0 comments on commit 0d2fc77

Please sign in to comment.