Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Raise issue if trying to upload .git/ folder + ignore .git/ folder in upload_folder #1408

Merged
merged 10 commits into from
Mar 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/source/guides/upload.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@ Use the `allow_patterns` and `ignore_patterns` arguments to specify which files
Patterns are Standard Wildcards (globbing patterns) as documented [here](https://tldp.org/LDP/GNU-Linux-Tools-Summary/html/x11655.htm).
If both `allow_patterns` and `ignore_patterns` are provided, both constraints apply. By default, all files from the folder are uploaded.

Any `.git/` folder present in any subdirectory will be ignored. However, please be aware that the `.gitignore` file is not taken into account.
This means you must use `allow_patterns` and `ignore_patterns` to specify which files to upload instead.

```py
>>> api.upload_folder(
... folder_path="/path/to/local/folder",
Expand Down
5 changes: 5 additions & 0 deletions src/huggingface_hub/_commit_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,11 @@ def _validate_path_in_repo(path_in_repo: str) -> str:
raise ValueError(f"Invalid `path_in_repo` in CommitOperation: '{path_in_repo}'")
if path_in_repo.startswith("./"):
path_in_repo = path_in_repo[2:]
if any(part == ".git" for part in path_in_repo.split("/")):
raise ValueError(
"Invalid `path_in_repo` in CommitOperation: cannot update files under a '.git/' folder (path:"
f" '{path_in_repo}')."
)
return path_in_repo


Expand Down
13 changes: 12 additions & 1 deletion src/huggingface_hub/hf_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
import requests
from requests.exceptions import HTTPError

from huggingface_hub.utils import EntryNotFoundError, RepositoryNotFoundError, get_session
from huggingface_hub.utils import IGNORE_GIT_FOLDER_PATTERNS, EntryNotFoundError, RepositoryNotFoundError, get_session

from ._commit_api import (
CommitOperation,
Expand Down Expand Up @@ -86,6 +86,7 @@
USERNAME_PLACEHOLDER = "hf_user"
_REGEX_DISCUSSION_URL = re.compile(r".*/discussions/(\d+)$")


logger = logging.get_logger(__name__)


Expand Down Expand Up @@ -2606,6 +2607,9 @@ def upload_folder(
will delete any remote file under `experiment/logs/`. Note that the `.gitattributes` file will not be deleted
even if it matches the patterns.

Any `.git/` folder present in any subdirectory will be ignored. However, please be aware that the `.gitignore`
file is not taken into account.

Uses `HfApi.create_commit` under the hood.

Args:
Expand Down Expand Up @@ -2720,6 +2724,13 @@ def upload_folder(
if path_in_repo is None:
path_in_repo = ""

# Do not upload .git folder
if ignore_patterns is None:
ignore_patterns = []
elif isinstance(ignore_patterns, str):
ignore_patterns = [ignore_patterns]
ignore_patterns += IGNORE_GIT_FOLDER_PATTERNS

commit_message = (
commit_message if commit_message is not None else f"Upload {path_in_repo} with huggingface_hub"
)
Expand Down
2 changes: 1 addition & 1 deletion src/huggingface_hub/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
from ._headers import build_hf_headers, get_token_to_send
from ._hf_folder import HfFolder
from ._http import configure_http_backend, get_session, http_backoff
from ._paths import filter_repo_objects
from ._paths import filter_repo_objects, IGNORE_GIT_FOLDER_PATTERNS
from ._runtime import (
dump_environment_info,
get_fastai_version,
Expand Down
2 changes: 2 additions & 0 deletions src/huggingface_hub/utils/_paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@

T = TypeVar("T")

IGNORE_GIT_FOLDER_PATTERNS = [".git", ".git/*", "*/.git", "**/.git/**"]


def filter_repo_objects(
items: Iterable[T],
Expand Down
38 changes: 38 additions & 0 deletions tests/test_commit_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,44 @@ def test_path_in_repo_invalid(self) -> None:
CommitOperationDelete(path_in_repo=input)


class TestCommitOperationForbiddenPathInRepo(unittest.TestCase):
"""Commit operations must throw an error on files in the .git/ folder.

Server would error anyway so it's best to prevent early.
"""

INVALID_PATHS_IN_REPO = {
".git",
".git/path/to/file",
"./.git/path/to/file",
"subfolder/path/.git/to/file",
"./subfolder/path/.git/to/file",
}

VALID_PATHS_IN_REPO = {
".gitignore",
"path/to/.gitignore",
"path/to/something.git",
"path/to/something.git/more",
}

def test_cannot_update_file_in_git_folder(self):
for path in self.INVALID_PATHS_IN_REPO:
with self.subTest(msg=f"Add: '{path}'"):
with self.assertRaises(ValueError):
CommitOperationAdd(path_in_repo=path, path_or_fileobj=b"content")
with self.subTest(msg=f"Delete: '{path}'"):
with self.assertRaises(ValueError):
CommitOperationDelete(path_in_repo=path)

def test_valid_path_in_repo_containing_git(self):
for path in self.VALID_PATHS_IN_REPO:
with self.subTest(msg=f"Add: '{path}'"):
CommitOperationAdd(path_in_repo=path, path_or_fileobj=b"content")
with self.subTest(msg=f"Delete: '{path}'"):
CommitOperationDelete(path_in_repo=path)


class TestWarnOnOverwritingOperations(unittest.TestCase):
add_file_ab = CommitOperationAdd(path_in_repo="a/b.txt", path_or_fileobj=b"data")
add_file_abc = CommitOperationAdd(path_in_repo="a/b/c.md", path_or_fileobj=b"data")
Expand Down
Loading