Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ability to skip large files #472

Merged
merged 2 commits into from
Nov 25, 2021
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions src/huggingface_hub/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,7 @@ def __init__(
git_email: Optional[str] = None,
revision: Optional[str] = None,
private: bool = False,
skip_lfs: bool = False,
NielsRogge marked this conversation as resolved.
Show resolved Hide resolved
):
"""
Instantiate a local clone of a git repo.
Expand All @@ -384,7 +385,7 @@ def __init__(
repository url (e.g. ``'https://huggingface.co/philschmid/playground-tests'``).
repo_type (``str``, `optional`):
To set when creating a repo: et to "dataset" or "space" if creating a dataset or space, default is model.
use_auth_token (``str`` or ``bool``, `optional`, defaults ``None``):
use_auth_token (``str`` or ``bool``, `optional`, defaults to ``True``):
NielsRogge marked this conversation as resolved.
Show resolved Hide resolved
huggingface_token can be extract from ``HfApi().login(username, password)`` and is used to authenticate against the hub
(useful from Google Colab for instance).
git_user (``str``, `optional`):
Expand All @@ -394,15 +395,18 @@ def __init__(
revision (``str``, `optional`):
Revision to checkout after initializing the repository. If the revision doesn't exist, a
branch will be created with that revision name from the default branch's current HEAD.
private (``bool``, `optional`):
private (``bool``, `optional`, defaults to ``False``):
whether the repository is private or not.
skip_lfs (``bool``, `optional`, defaults to ``False``):
whether to skip git-LFS files or not.
"""

os.makedirs(local_dir, exist_ok=True)
self.local_dir = os.path.join(os.getcwd(), local_dir)
self.repo_type = repo_type
self.command_queue = []
self.private = private
self.skip_lfs = skip_lfs

self.check_git_versions()

Expand Down Expand Up @@ -566,9 +570,12 @@ def clone_from(self, repo_url: str, use_auth_token: Union[bool, str, None] = Non
# checks if repository is initialized in a empty repository or in one with files
if len(os.listdir(self.local_dir)) == 0:
logger.warning(f"Cloning {clean_repo_url} into local empty directory.")
prefix = ""
if self.skip_lfs:
prefix = "GIT_LFS_SKIP_SMUDGE=1 "
NielsRogge marked this conversation as resolved.
Show resolved Hide resolved
with lfs_log_progress():
subprocess.run(
f"git lfs clone {repo_url} .".split(),
f"{prefix}git lfs clone {repo_url} .".split(),
stderr=subprocess.PIPE,
stdout=subprocess.PIPE,
check=True,
Expand Down