Skip to content

Commit

Permalink
Compute sha by chunks when uploading
Browse files Browse the repository at this point in the history
  • Loading branch information
Wauplin committed Jan 17, 2023
1 parent 66c328b commit cb1c89b
Showing 1 changed file with 12 additions and 22 deletions.
34 changes: 12 additions & 22 deletions src/huggingface_hub/utils/sha.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,6 @@
"""Utilities to efficiently compute the SHA 256 hash of a bunch of bytes"""

from functools import partial
"""Utilities to efficiently compute the SHA 256 hash of a bunch of bytes."""
from hashlib import sha256
from typing import BinaryIO, Iterable, Optional


def iter_fileobj(
fileobj: BinaryIO, chunk_size: Optional[int] = None
) -> Iterable[bytes]:
"""Returns an iterator over the content of ``fileobj`` in chunks of ``chunk_size``"""
chunk_size = chunk_size or -1
return iter(partial(fileobj.read, chunk_size), b"")


def sha_iter(iterable: Iterable[bytes]):
sha = sha256()
for chunk in iterable:
sha.update(chunk)
return sha.digest()
from typing import BinaryIO, Optional


def sha_fileobj(fileobj: BinaryIO, chunk_size: Optional[int] = None) -> bytes:
Expand All @@ -28,10 +11,17 @@ def sha_fileobj(fileobj: BinaryIO, chunk_size: Optional[int] = None) -> bytes:
fileobj (file-like object):
The File object to compute sha256 for, typically obtained with `open(path, "rb")`
chunk_size (`int`, *optional*):
The number of bytes to read from `fileobj` at once, defaults to 512
The number of bytes to read from `fileobj` at once, defaults to 1MB.
Returns:
`bytes`: `fileobj`'s sha256 hash as bytes
"""
chunk_size = chunk_size if chunk_size is not None else 512
return sha_iter(iter_fileobj(fileobj))
chunk_size = chunk_size if chunk_size is not None else 1024 * 1024

sha = sha256()
while True:
chunk = fileobj.read(chunk_size)
sha.update(chunk)
if not chunk:
break
return sha.digest()

0 comments on commit cb1c89b

Please sign in to comment.