From e55a027f7c3fb4dcda697a4dc7d0729e6659ad84 Mon Sep 17 00:00:00 2001 From: Lysandre Date: Mon, 19 Jul 2021 13:29:56 +0200 Subject: [PATCH 1/2] Add `filename` option to `lfs_track` --- src/huggingface_hub/repository.py | 9 +++++++-- tests/test_repository.py | 24 ++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/src/huggingface_hub/repository.py b/src/huggingface_hub/repository.py index 7a92c18080..e76490ccf5 100644 --- a/src/huggingface_hub/repository.py +++ b/src/huggingface_hub/repository.py @@ -438,16 +438,21 @@ def list_deleted_files(self) -> List[str]: return deleted_files - def lfs_track(self, patterns: Union[str, List[str]]): + def lfs_track(self, patterns: Union[str, List[str]], filename: bool = False): """ Tell git-lfs to track those files. + + Setting the `filename` argument to `True` will treat the arguments as literal filenames, + not as patterns. Any special glob characters in the filename will be escaped when + writing the .gitattributes file. """ if isinstance(patterns, str): patterns = [patterns] try: for pattern in patterns: + cmd = f"git lfs track {'--filename' if filename else ''} {pattern}" subprocess.run( - ["git", "lfs", "track", pattern], + cmd.split(), stderr=subprocess.PIPE, stdout=subprocess.PIPE, check=True, diff --git a/tests/test_repository.py b/tests/test_repository.py index f3403d28f6..19f13d1d34 100644 --- a/tests/test_repository.py +++ b/tests/test_repository.py @@ -107,6 +107,30 @@ def test_init_clone_in_empty_folder(self): self.assertIn("random_file.txt", os.listdir(WORKING_REPO_DIR)) + def test_git_lfs_filename(self): + os.mkdir(WORKING_REPO_DIR) + subprocess.run( + ["git", "init"], + stderr=subprocess.PIPE, + stdout=subprocess.PIPE, + check=True, + cwd=WORKING_REPO_DIR, + ) + + repo = Repository(WORKING_REPO_DIR) + + large_file = [100] * int(4e6) + with open(os.path.join(WORKING_REPO_DIR, "[].txt"), "w") as f: + f.write(json.dumps(large_file)) + + repo.git_add() + + repo.lfs_track(["[].txt"]) + self.assertFalse(is_tracked_with_lfs(f"{WORKING_REPO_DIR}/[].txt")) + + repo.lfs_track(["[].txt"], filename=True) + self.assertTrue(is_tracked_with_lfs(f"{WORKING_REPO_DIR}/[].txt")) + def test_init_clone_in_nonempty_folder(self): # Create dummy files # one is lfs-tracked, the other is not. From 0e8a614c43458ae382139552e7bfb84f1cd9aa51 Mon Sep 17 00:00:00 2001 From: Lysandre Debut Date: Tue, 20 Jul 2021 09:25:25 +0200 Subject: [PATCH 2/2] Correction of the official git-lfs doc Co-authored-by: Julien Chaumond --- src/huggingface_hub/repository.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/huggingface_hub/repository.py b/src/huggingface_hub/repository.py index e76490ccf5..172e2690ba 100644 --- a/src/huggingface_hub/repository.py +++ b/src/huggingface_hub/repository.py @@ -444,7 +444,7 @@ def lfs_track(self, patterns: Union[str, List[str]], filename: bool = False): Setting the `filename` argument to `True` will treat the arguments as literal filenames, not as patterns. Any special glob characters in the filename will be escaped when - writing the .gitattributes file. + writing to the .gitattributes file. """ if isinstance(patterns, str): patterns = [patterns]