From 79805259abae88769ccec9fdba7a9e06c696e2a9 Mon Sep 17 00:00:00 2001 From: Lucain Pouget Date: Fri, 28 Apr 2023 15:37:44 +0200 Subject: [PATCH 01/17] POC to run HfApi methods in the background --- src/huggingface_hub/hf_api.py | 40 ++++++++++++++++++++++-- src/huggingface_hub/utils/_typing.py | 4 +++ src/huggingface_hub/utils/_validators.py | 8 ++--- 3 files changed, 45 insertions(+), 7 deletions(-) diff --git a/src/huggingface_hub/hf_api.py b/src/huggingface_hub/hf_api.py index 5ee78ebca9..895f3b1191 100644 --- a/src/huggingface_hub/hf_api.py +++ b/src/huggingface_hub/hf_api.py @@ -17,8 +17,10 @@ import re import textwrap import warnings +from concurrent.futures import ThreadPoolExecutor from dataclasses import dataclass, field from datetime import datetime +from functools import wraps from itertools import islice from pathlib import Path from typing import Any, BinaryIO, Dict, Iterable, Iterator, List, Optional, Tuple, Union @@ -829,6 +831,38 @@ def __init__( self.library_version = library_version self.user_agent = user_agent + # Calls to the Hub can be run in the background. Tasks are queued to preserve order but do not block the main + # thread. Can be useful to upload data during a training. ThreadPoolExecutor is initialized the first time it's + # used. All methods flagged as `@detachable` can be run in the background. + self._thread_executor: Optional[ThreadPoolExecutor] = None + + def __getattribute__(self, key: str) -> Any: + """Wrap all methods of the class to make them detachable. + + Detachable methods can be run in the background by calling `.detach()` on them. Note that static type checking + is not possible on `.detach()` calls with the current implementation. + """ + attribute = super().__getattribute__(key) + if key.startswith("_") or not hasattr(attribute, "__self__"): + return attribute + + # If __self__ attribute is set, it means we are accessing a bounded method of the class. + # We want to wrap it to make it detachable. Only public methods (i.e. not starting with "_") are wrapped. + @wraps(attribute, updated=()) # hack to wrap a class as a function + class _detachable_method: + def __call__(_inner_self, *args, **kwargs): + # Default use case: run synchronously + return attribute(*args, **kwargs) + + def detach(_inner_self, *args, **kwargs): + # If .detach is called, queue it to the executor + if self._thread_executor is None: + self._thread_executor = ThreadPoolExecutor(max_workers=1) + return self._thread_executor.submit(attribute, *args, **kwargs) + + return _detachable_method() + + @validate_hf_hub_args def whoami(self, token: Optional[str] = None) -> Dict: """ Call HF API to know "whoami". @@ -874,7 +908,9 @@ def _is_valid_token(self, token: str) -> bool: return False def get_model_tags(self) -> ModelTags: - "Gets all valid model tags as a nested namespace object" + """ + List all valid model tags as a nested namespace object + """ path = f"{self.endpoint}/api/models-tags-by-type" r = get_session().get(path) hf_raise_for_status(r) @@ -883,7 +919,7 @@ def get_model_tags(self) -> ModelTags: def get_dataset_tags(self) -> DatasetTags: """ - Gets all valid dataset tags as a nested namespace object. + List all valid dataset tags as a nested namespace object. """ path = f"{self.endpoint}/api/datasets-tags-by-type" r = get_session().get(path) diff --git a/src/huggingface_hub/utils/_typing.py b/src/huggingface_hub/utils/_typing.py index 812c65ea39..c8885eb1eb 100644 --- a/src/huggingface_hub/utils/_typing.py +++ b/src/huggingface_hub/utils/_typing.py @@ -14,6 +14,7 @@ # limitations under the License. """Handle typing imports based on system compatibility.""" import sys +from typing import Callable, TypeVar if sys.version_info >= (3, 8): @@ -22,3 +23,6 @@ from typing_extensions import Literal, TypedDict # noqa: F401 HTTP_METHOD_T = Literal["GET", "OPTIONS", "HEAD", "POST", "PUT", "PATCH", "DELETE"] + +# type hint meaning "function signature not changed by decorator" +CallableT = TypeVar("CallableT", bound=Callable) diff --git a/src/huggingface_hub/utils/_validators.py b/src/huggingface_hub/utils/_validators.py index 5ec3de775f..5dd64fa514 100644 --- a/src/huggingface_hub/utils/_validators.py +++ b/src/huggingface_hub/utils/_validators.py @@ -18,7 +18,9 @@ import warnings from functools import wraps from itertools import chain -from typing import Any, Callable, Dict, TypeVar +from typing import Any, Dict + +from ._typing import CallableT REPO_ID_REGEX = re.compile( @@ -41,10 +43,6 @@ class HFValidationError(ValueError): """ -# type hint meaning "function signature not changed by decorator" -CallableT = TypeVar("CallableT", bound=Callable) - - def validate_hf_hub_args(fn: CallableT) -> CallableT: """Validate values received as argument for any public method of `huggingface_hub`. From 9ec1e0d76672e25395ca8f56707ce905e5d8822c Mon Sep 17 00:00:00 2001 From: Lucain Pouget Date: Tue, 9 May 2023 11:48:52 +0200 Subject: [PATCH 02/17] Renamed detach to threaded + fix repr --- src/huggingface_hub/hf_api.py | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/src/huggingface_hub/hf_api.py b/src/huggingface_hub/hf_api.py index 7473e32774..3dd66a154c 100644 --- a/src/huggingface_hub/hf_api.py +++ b/src/huggingface_hub/hf_api.py @@ -833,8 +833,8 @@ def __init__( # Calls to the Hub can be run in the background. Tasks are queued to preserve order but do not block the main # thread. Can be useful to upload data during a training. ThreadPoolExecutor is initialized the first time it's - # used. All methods flagged as `@detachable` can be run in the background. - self._thread_executor: Optional[ThreadPoolExecutor] = None + # used. + self._thread_pool: Optional[ThreadPoolExecutor] = None def __getattribute__(self, key: str) -> Any: """Wrap all methods of the class to make them detachable. @@ -846,21 +846,29 @@ def __getattribute__(self, key: str) -> Any: if key.startswith("_") or not hasattr(attribute, "__self__"): return attribute + # Attribute is a method to wrap + method = attribute + # If __self__ attribute is set, it means we are accessing a bounded method of the class. - # We want to wrap it to make it detachable. Only public methods (i.e. not starting with "_") are wrapped. - @wraps(attribute, updated=()) # hack to wrap a class as a function - class _detachable_method: + # We want to wrap it to make it "threaded". Only public methods (i.e. not starting with "_") are wrapped. + class _threaded_method: + def __getattr__(self, key: str) -> Any: + return getattr(method, key) + def __call__(_inner_self, *args, **kwargs): # Default use case: run synchronously - return attribute(*args, **kwargs) + return method(*args, **kwargs) + + def threaded(_inner_self, *args, **kwargs): + # If .threaded is called, queue it to the executor + if self._thread_pool is None: + self._thread_pool = ThreadPoolExecutor(max_workers=1) + return self._thread_pool.submit(method, *args, **kwargs) - def detach(_inner_self, *args, **kwargs): - # If .detach is called, queue it to the executor - if self._thread_executor is None: - self._thread_executor = ThreadPoolExecutor(max_workers=1) - return self._thread_executor.submit(attribute, *args, **kwargs) + def __repr__(self) -> str: + return repr(method) - return _detachable_method() + return wraps(method)(_threaded_method()) @validate_hf_hub_args def whoami(self, token: Optional[str] = None) -> Dict: From 9e28cbaa68c1982fb257615fbf67636e14538b95 Mon Sep 17 00:00:00 2001 From: Lucain Pouget Date: Tue, 9 May 2023 16:28:37 +0200 Subject: [PATCH 03/17] Complete Refacto --- Makefile | 2 + pyproject.toml | 3 + setup.cfg | 2 + src/huggingface_hub/_multi_commits.py | 4 +- src/huggingface_hub/_threaded_hf_api.py | 1292 +++++++++++++++++++++++ src/huggingface_hub/hf_api.py | 50 +- utils/check_threaded_hf_api.py | 195 ++++ 7 files changed, 1504 insertions(+), 44 deletions(-) create mode 100644 src/huggingface_hub/_threaded_hf_api.py create mode 100644 utils/check_threaded_hf_api.py diff --git a/Makefile b/Makefile index 3ba5f3dae5..3a6b8accf4 100644 --- a/Makefile +++ b/Makefile @@ -10,12 +10,14 @@ quality: mypy src python utils/check_contrib_list.py python utils/check_static_imports.py + python utils/check_threaded_hf_api.py style: black $(check_dirs) ruff $(check_dirs) --fix python utils/check_contrib_list.py --update python utils/check_static_imports.py --update + python utils/check_threaded_hf_api.py --update repocard: python utils/push_repocard_examples.py diff --git a/pyproject.toml b/pyproject.toml index c7e8c4c3ed..d02165f52e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,9 @@ ignore = ["E501", "F821"] select = ["E", "F", "I", "W"] line-length = 119 +[tool.ruff.per-file-ignores] +"src/huggingface_hub/_threaded_hf_api.py" = ["F405"] # generate code => we assume it's good + [tool.ruff.isort] lines-after-imports = 2 known-first-party = ["huggingface_hub"] diff --git a/setup.cfg b/setup.cfg index 9cc27b091c..50db28b53b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -52,6 +52,8 @@ use_parentheses = True exclude = .git,__pycache__,old,build,dist,.venv* ignore = B028, E203, E501, E741, W503 max-line-length = 119 +per-file-ignores = + src/huggingface_hub/_threaded_hf_api.py:F405 [tool:pytest] # -Werror::FutureWarning -> test fails if FutureWarning is thrown diff --git a/src/huggingface_hub/_multi_commits.py b/src/huggingface_hub/_multi_commits.py index 20576eaa86..585180a950 100644 --- a/src/huggingface_hub/_multi_commits.py +++ b/src/huggingface_hub/_multi_commits.py @@ -25,7 +25,7 @@ if TYPE_CHECKING: - from .hf_api import HfApi + from .hf_api import _HfApi class MultiCommitException(Exception): @@ -267,7 +267,7 @@ def __post_init__(self) -> None: def multi_commit_create_pull_request( - api: "HfApi", + api: "_HfApi", repo_id: str, commit_message: str, commit_description: Optional[str], diff --git a/src/huggingface_hub/_threaded_hf_api.py b/src/huggingface_hub/_threaded_hf_api.py new file mode 100644 index 0000000000..1ba008e586 --- /dev/null +++ b/src/huggingface_hub/_threaded_hf_api.py @@ -0,0 +1,1292 @@ +# coding=utf-8 +# Copyright 2022-present, the HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +WARNING: this file is automatically generated by `utils/check_threaded_hf_api.py`. Do not edit it manually. +You can check it is up-to-date by running `make quality` and update its content with `make style` if needed. + +The content of this file is mostly based on HfApi implementation. +""" +from concurrent.futures import Future, ThreadPoolExecutor +from typing import Dict, Optional, Union + +from .hf_api import * # noqa: F403 +from .hf_api import _HfApi + + +class _ThreadedHfApi(_HfApi): + _thread_pool: Optional[ThreadPoolExecutor] = None + + @property + def thread_pool(self) -> ThreadPoolExecutor: + # Calls to the Hub can be run in the background. Tasks are queued to preserve order but do not block the main + # thread. Can be useful to upload data during a training. ThreadPoolExecutor is initialized the first time it's + # used. Non-blocking methods are suffixed by `_threaded`. + if self._thread_pool is None: + self._thread_pool = ThreadPoolExecutor(max_workers=1) + return self._thread_pool + + def add_space_secret_threaded( + self, repo_id: str, key: str, value: str, *, token: Optional[str] = None + ) -> Future[None]: + """ + Adds or updates a secret in a Space. + + This is a non-blocking method. Check out [`add_space_secret`] documentation to learn how to use it. The threaded version + starts a background job in a separate thread and returns a Future object. The goal of background jobs is to + avoid blocking the main thread for example in a training. You should not expect a gain in performances by + parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve + order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) + documentation. + """ + return self.thread_pool.submit(self.add_space_secret, repo_id, key, value, token=token) + + def change_discussion_status_threaded( + self, + repo_id: str, + discussion_num: int, + new_status: Literal["open", "closed"], + *, + token: Optional[str] = None, + comment: Optional[str] = None, + repo_type: Optional[str] = None, + ) -> Future[DiscussionStatusChange]: + """ + Closes or re-opens a Discussion or Pull Request. + + This is a non-blocking method. Check out [`change_discussion_status`] documentation to learn how to use it. The threaded version + starts a background job in a separate thread and returns a Future object. The goal of background jobs is to + avoid blocking the main thread for example in a training. You should not expect a gain in performances by + parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve + order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) + documentation. + """ + return self.thread_pool.submit( + self.change_discussion_status, + repo_id, + discussion_num, + new_status, + token=token, + comment=comment, + repo_type=repo_type, + ) + + def comment_discussion_threaded( + self, + repo_id: str, + discussion_num: int, + comment: str, + *, + token: Optional[str] = None, + repo_type: Optional[str] = None, + ) -> Future[DiscussionComment]: + """ + Creates a new comment on the given Discussion. + + This is a non-blocking method. Check out [`comment_discussion`] documentation to learn how to use it. The threaded version + starts a background job in a separate thread and returns a Future object. The goal of background jobs is to + avoid blocking the main thread for example in a training. You should not expect a gain in performances by + parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve + order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) + documentation. + """ + return self.thread_pool.submit( + self.comment_discussion, repo_id, discussion_num, comment, token=token, repo_type=repo_type + ) + + def create_branch_threaded( + self, + repo_id: str, + *, + branch: str, + revision: Optional[str] = None, + token: Optional[str] = None, + repo_type: Optional[str] = None, + exist_ok: bool = False, + ) -> Future[None]: + """ + Create a new branch for a repo on the Hub, starting from the specified revision (defaults to `main`). + + This is a non-blocking method. Check out [`create_branch`] documentation to learn how to use it. The threaded version + starts a background job in a separate thread and returns a Future object. The goal of background jobs is to + avoid blocking the main thread for example in a training. You should not expect a gain in performances by + parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve + order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) + documentation. + """ + return self.thread_pool.submit( + self.create_branch, + repo_id, + branch=branch, + revision=revision, + token=token, + repo_type=repo_type, + exist_ok=exist_ok, + ) + + def create_commit_threaded( + self, + repo_id: str, + operations: Iterable[CommitOperation], + *, + commit_message: str, + commit_description: Optional[str] = None, + token: Optional[str] = None, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + create_pr: Optional[bool] = None, + num_threads: int = 5, + parent_commit: Optional[str] = None, + ) -> Future[CommitInfo]: + """ + Creates a commit in the given repo, deleting & uploading files as needed. + + This is a non-blocking method. Check out [`create_commit`] documentation to learn how to use it. The threaded version + starts a background job in a separate thread and returns a Future object. The goal of background jobs is to + avoid blocking the main thread for example in a training. You should not expect a gain in performances by + parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve + order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) + documentation. + """ + return self.thread_pool.submit( + self.create_commit, + repo_id, + operations, + commit_message=commit_message, + commit_description=commit_description, + token=token, + repo_type=repo_type, + revision=revision, + create_pr=create_pr, + num_threads=num_threads, + parent_commit=parent_commit, + ) + + def create_commits_on_pr_threaded( + self, + *, + repo_id: str, + addition_commits: List[List[CommitOperationAdd]], + deletion_commits: List[List[CommitOperationDelete]], + commit_message: str, + commit_description: Optional[str] = None, + token: Optional[str] = None, + repo_type: Optional[str] = None, + merge_pr: bool = True, + num_threads: int = 5, # TODO: use to multithread uploads + verbose: bool = False, + ) -> Future[str]: + """ + Push changes to the Hub in multiple commits. + + This is a non-blocking method. Check out [`create_commits_on_pr`] documentation to learn how to use it. The threaded version + starts a background job in a separate thread and returns a Future object. The goal of background jobs is to + avoid blocking the main thread for example in a training. You should not expect a gain in performances by + parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve + order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) + documentation. + """ + return self.thread_pool.submit( + self.create_commits_on_pr, + repo_id=repo_id, + addition_commits=addition_commits, + deletion_commits=deletion_commits, + commit_message=commit_message, + commit_description=commit_description, + token=token, + repo_type=repo_type, + merge_pr=merge_pr, + num_threads=num_threads, + verbose=verbose, + ) + + def create_discussion_threaded( + self, + repo_id: str, + title: str, + *, + token: Optional[str] = None, + description: Optional[str] = None, + repo_type: Optional[str] = None, + pull_request: bool = False, + ) -> Future[DiscussionWithDetails]: + """ + Creates a Discussion or Pull Request. + + This is a non-blocking method. Check out [`create_discussion`] documentation to learn how to use it. The threaded version + starts a background job in a separate thread and returns a Future object. The goal of background jobs is to + avoid blocking the main thread for example in a training. You should not expect a gain in performances by + parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve + order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) + documentation. + """ + return self.thread_pool.submit( + self.create_discussion, + repo_id, + title, + token=token, + description=description, + repo_type=repo_type, + pull_request=pull_request, + ) + + def create_pull_request_threaded( + self, + repo_id: str, + title: str, + *, + token: Optional[str] = None, + description: Optional[str] = None, + repo_type: Optional[str] = None, + ) -> Future[DiscussionWithDetails]: + """ + Creates a Pull Request . Pull Requests created programmatically will be in `"draft"` status. + + This is a non-blocking method. Check out [`create_pull_request`] documentation to learn how to use it. The threaded version + starts a background job in a separate thread and returns a Future object. The goal of background jobs is to + avoid blocking the main thread for example in a training. You should not expect a gain in performances by + parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve + order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) + documentation. + """ + return self.thread_pool.submit( + self.create_pull_request, repo_id, title, token=token, description=description, repo_type=repo_type + ) + + def create_repo_threaded( + self, + repo_id: str, + *, + token: Optional[str] = None, + private: bool = False, + repo_type: Optional[str] = None, + exist_ok: bool = False, + space_sdk: Optional[str] = None, + space_hardware: Optional[str] = None, + ) -> Future[RepoUrl]: + """ + Create an empty repo on the HuggingFace Hub. + + This is a non-blocking method. Check out [`create_repo`] documentation to learn how to use it. The threaded version + starts a background job in a separate thread and returns a Future object. The goal of background jobs is to + avoid blocking the main thread for example in a training. You should not expect a gain in performances by + parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve + order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) + documentation. + """ + return self.thread_pool.submit( + self.create_repo, + repo_id, + token=token, + private=private, + repo_type=repo_type, + exist_ok=exist_ok, + space_sdk=space_sdk, + space_hardware=space_hardware, + ) + + def create_tag_threaded( + self, + repo_id: str, + *, + tag: str, + tag_message: Optional[str] = None, + revision: Optional[str] = None, + token: Optional[str] = None, + repo_type: Optional[str] = None, + exist_ok: bool = False, + ) -> Future[None]: + """ + Tag a given commit of a repo on the Hub. + + This is a non-blocking method. Check out [`create_tag`] documentation to learn how to use it. The threaded version + starts a background job in a separate thread and returns a Future object. The goal of background jobs is to + avoid blocking the main thread for example in a training. You should not expect a gain in performances by + parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve + order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) + documentation. + """ + return self.thread_pool.submit( + self.create_tag, + repo_id, + tag=tag, + tag_message=tag_message, + revision=revision, + token=token, + repo_type=repo_type, + exist_ok=exist_ok, + ) + + def dataset_info_threaded( + self, + repo_id: str, + *, + revision: Optional[str] = None, + timeout: Optional[float] = None, + files_metadata: bool = False, + token: Optional[Union[bool, str]] = None, + ) -> Future[DatasetInfo]: + """ + Get info on one specific dataset on huggingface.co. + + This is a non-blocking method. Check out [`dataset_info`] documentation to learn how to use it. The threaded version + starts a background job in a separate thread and returns a Future object. The goal of background jobs is to + avoid blocking the main thread for example in a training. You should not expect a gain in performances by + parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve + order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) + documentation. + """ + return self.thread_pool.submit( + self.dataset_info, repo_id, revision=revision, timeout=timeout, files_metadata=files_metadata, token=token + ) + + def delete_branch_threaded( + self, + repo_id: str, + *, + branch: str, + token: Optional[str] = None, + repo_type: Optional[str] = None, + ) -> Future[None]: + """ + Delete a branch from a repo on the Hub. + + This is a non-blocking method. Check out [`delete_branch`] documentation to learn how to use it. The threaded version + starts a background job in a separate thread and returns a Future object. The goal of background jobs is to + avoid blocking the main thread for example in a training. You should not expect a gain in performances by + parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve + order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) + documentation. + """ + return self.thread_pool.submit(self.delete_branch, repo_id, branch=branch, token=token, repo_type=repo_type) + + def delete_file_threaded( + self, + path_in_repo: str, + repo_id: str, + *, + token: Optional[str] = None, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + commit_message: Optional[str] = None, + commit_description: Optional[str] = None, + create_pr: Optional[bool] = None, + parent_commit: Optional[str] = None, + ) -> Future[CommitInfo]: + """ + Deletes a file in the given repo. + + This is a non-blocking method. Check out [`delete_file`] documentation to learn how to use it. The threaded version + starts a background job in a separate thread and returns a Future object. The goal of background jobs is to + avoid blocking the main thread for example in a training. You should not expect a gain in performances by + parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve + order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) + documentation. + """ + return self.thread_pool.submit( + self.delete_file, + path_in_repo, + repo_id, + token=token, + repo_type=repo_type, + revision=revision, + commit_message=commit_message, + commit_description=commit_description, + create_pr=create_pr, + parent_commit=parent_commit, + ) + + def delete_folder_threaded( + self, + path_in_repo: str, + repo_id: str, + *, + token: Optional[str] = None, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + commit_message: Optional[str] = None, + commit_description: Optional[str] = None, + create_pr: Optional[bool] = None, + parent_commit: Optional[str] = None, + ) -> Future[CommitInfo]: + """ + Deletes a folder in the given repo. + + This is a non-blocking method. Check out [`delete_folder`] documentation to learn how to use it. The threaded version + starts a background job in a separate thread and returns a Future object. The goal of background jobs is to + avoid blocking the main thread for example in a training. You should not expect a gain in performances by + parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve + order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) + documentation. + """ + return self.thread_pool.submit( + self.delete_folder, + path_in_repo, + repo_id, + token=token, + repo_type=repo_type, + revision=revision, + commit_message=commit_message, + commit_description=commit_description, + create_pr=create_pr, + parent_commit=parent_commit, + ) + + def delete_repo_threaded( + self, + repo_id: str, + *, + token: Optional[str] = None, + repo_type: Optional[str] = None, + ): + """ + Delete a repo from the HuggingFace Hub. CAUTION: this is irreversible. + + This is a non-blocking method. Check out [`delete_repo`] documentation to learn how to use it. The threaded version + starts a background job in a separate thread and returns a Future object. The goal of background jobs is to + avoid blocking the main thread for example in a training. You should not expect a gain in performances by + parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve + order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) + documentation. + """ + return self.thread_pool.submit(self.delete_repo, repo_id, token=token, repo_type=repo_type) + + def delete_space_secret_threaded(self, repo_id: str, key: str, *, token: Optional[str] = None) -> Future[None]: + """ + Deletes a secret from a Space. + + This is a non-blocking method. Check out [`delete_space_secret`] documentation to learn how to use it. The threaded version + starts a background job in a separate thread and returns a Future object. The goal of background jobs is to + avoid blocking the main thread for example in a training. You should not expect a gain in performances by + parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve + order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) + documentation. + """ + return self.thread_pool.submit(self.delete_space_secret, repo_id, key, token=token) + + def delete_tag_threaded( + self, + repo_id: str, + *, + tag: str, + token: Optional[str] = None, + repo_type: Optional[str] = None, + ) -> Future[None]: + """ + Delete a tag from a repo on the Hub. + + This is a non-blocking method. Check out [`delete_tag`] documentation to learn how to use it. The threaded version + starts a background job in a separate thread and returns a Future object. The goal of background jobs is to + avoid blocking the main thread for example in a training. You should not expect a gain in performances by + parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve + order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) + documentation. + """ + return self.thread_pool.submit(self.delete_tag, repo_id, tag=tag, token=token, repo_type=repo_type) + + def duplicate_space_threaded( + self, + from_id: str, + to_id: Optional[str] = None, + *, + private: Optional[bool] = None, + token: Optional[str] = None, + exist_ok: bool = False, + ) -> Future[RepoUrl]: + """ + Duplicate a Space. + + This is a non-blocking method. Check out [`duplicate_space`] documentation to learn how to use it. The threaded version + starts a background job in a separate thread and returns a Future object. The goal of background jobs is to + avoid blocking the main thread for example in a training. You should not expect a gain in performances by + parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve + order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) + documentation. + """ + return self.thread_pool.submit( + self.duplicate_space, from_id, to_id, private=private, token=token, exist_ok=exist_ok + ) + + def edit_discussion_comment_threaded( + self, + repo_id: str, + discussion_num: int, + comment_id: str, + new_content: str, + *, + token: Optional[str] = None, + repo_type: Optional[str] = None, + ) -> Future[DiscussionComment]: + """ + Edits a comment on a Discussion / Pull Request. + + This is a non-blocking method. Check out [`edit_discussion_comment`] documentation to learn how to use it. The threaded version + starts a background job in a separate thread and returns a Future object. The goal of background jobs is to + avoid blocking the main thread for example in a training. You should not expect a gain in performances by + parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve + order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) + documentation. + """ + return self.thread_pool.submit( + self.edit_discussion_comment, + repo_id, + discussion_num, + comment_id, + new_content, + token=token, + repo_type=repo_type, + ) + + def get_dataset_tags_threaded(self) -> Future[DatasetTags]: + """ + List all valid dataset tags as a nested namespace object. + + This is a non-blocking method. Check out [`get_dataset_tags`] documentation to learn how to use it. The threaded version + starts a background job in a separate thread and returns a Future object. The goal of background jobs is to + avoid blocking the main thread for example in a training. You should not expect a gain in performances by + parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve + order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) + documentation. + """ + return self.thread_pool.submit( + self.get_dataset_tags, + ) + + def get_discussion_details_threaded( + self, + repo_id: str, + discussion_num: int, + *, + repo_type: Optional[str] = None, + token: Optional[str] = None, + ) -> Future[DiscussionWithDetails]: + """ + Fetches a Discussion's / Pull Request 's details from the Hub. + + This is a non-blocking method. Check out [`get_discussion_details`] documentation to learn how to use it. The threaded version + starts a background job in a separate thread and returns a Future object. The goal of background jobs is to + avoid blocking the main thread for example in a training. You should not expect a gain in performances by + parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve + order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) + documentation. + """ + return self.thread_pool.submit( + self.get_discussion_details, repo_id, discussion_num, repo_type=repo_type, token=token + ) + + def get_full_repo_name_threaded( + self, + model_id: str, + *, + organization: Optional[str] = None, + token: Optional[Union[bool, str]] = None, + ): + """ + Returns the repository name for a given model ID and optional + + This is a non-blocking method. Check out [`get_full_repo_name`] documentation to learn how to use it. The threaded version + starts a background job in a separate thread and returns a Future object. The goal of background jobs is to + avoid blocking the main thread for example in a training. You should not expect a gain in performances by + parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve + order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) + documentation. + """ + return self.thread_pool.submit(self.get_full_repo_name, model_id, organization=organization, token=token) + + def get_model_tags_threaded(self) -> Future[ModelTags]: + """ + List all valid model tags as a nested namespace object + + This is a non-blocking method. Check out [`get_model_tags`] documentation to learn how to use it. The threaded version + starts a background job in a separate thread and returns a Future object. The goal of background jobs is to + avoid blocking the main thread for example in a training. You should not expect a gain in performances by + parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve + order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) + documentation. + """ + return self.thread_pool.submit( + self.get_model_tags, + ) + + def get_repo_discussions_threaded( + self, + repo_id: str, + *, + repo_type: Optional[str] = None, + token: Optional[str] = None, + ) -> Future[Iterator[Discussion]]: + """ + Fetches Discussions and Pull Requests for the given repo. + + This is a non-blocking method. Check out [`get_repo_discussions`] documentation to learn how to use it. The threaded version + starts a background job in a separate thread and returns a Future object. The goal of background jobs is to + avoid blocking the main thread for example in a training. You should not expect a gain in performances by + parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve + order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) + documentation. + """ + return self.thread_pool.submit(self.get_repo_discussions, repo_id, repo_type=repo_type, token=token) + + def get_space_runtime_threaded(self, repo_id: str, *, token: Optional[str] = None) -> Future[SpaceRuntime]: + """ + Gets runtime information about a Space. + + This is a non-blocking method. Check out [`get_space_runtime`] documentation to learn how to use it. The threaded version + starts a background job in a separate thread and returns a Future object. The goal of background jobs is to + avoid blocking the main thread for example in a training. You should not expect a gain in performances by + parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve + order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) + documentation. + """ + return self.thread_pool.submit(self.get_space_runtime, repo_id, token=token) + + def hide_discussion_comment_threaded( + self, + repo_id: str, + discussion_num: int, + comment_id: str, + *, + token: Optional[str] = None, + repo_type: Optional[str] = None, + ) -> Future[DiscussionComment]: + """ + Hides a comment on a Discussion / Pull Request. + + This is a non-blocking method. Check out [`hide_discussion_comment`] documentation to learn how to use it. The threaded version + starts a background job in a separate thread and returns a Future object. The goal of background jobs is to + avoid blocking the main thread for example in a training. You should not expect a gain in performances by + parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve + order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) + documentation. + """ + return self.thread_pool.submit( + self.hide_discussion_comment, repo_id, discussion_num, comment_id, token=token, repo_type=repo_type + ) + + def like_threaded( + self, + repo_id: str, + *, + token: Optional[str] = None, + repo_type: Optional[str] = None, + ) -> Future[None]: + """ + Like a given repo on the Hub (e.g. set as favorite). + + This is a non-blocking method. Check out [`like`] documentation to learn how to use it. The threaded version + starts a background job in a separate thread and returns a Future object. The goal of background jobs is to + avoid blocking the main thread for example in a training. You should not expect a gain in performances by + parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve + order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) + documentation. + """ + return self.thread_pool.submit(self.like, repo_id, token=token, repo_type=repo_type) + + def list_datasets_threaded( + self, + *, + filter: Union[DatasetFilter, str, Iterable[str], None] = None, + author: Optional[str] = None, + search: Optional[str] = None, + sort: Union[Literal["lastModified"], str, None] = None, + direction: Optional[Literal[-1]] = None, + limit: Optional[int] = None, + cardData: Optional[bool] = None, # deprecated + full: Optional[bool] = None, + token: Optional[str] = None, + ) -> Future[List[DatasetInfo]]: + """ + Get the list of all the datasets on huggingface.co + + This is a non-blocking method. Check out [`list_datasets`] documentation to learn how to use it. The threaded version + starts a background job in a separate thread and returns a Future object. The goal of background jobs is to + avoid blocking the main thread for example in a training. You should not expect a gain in performances by + parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve + order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) + documentation. + """ + return self.thread_pool.submit( + self.list_datasets, + filter=filter, + author=author, + search=search, + sort=sort, + direction=direction, + limit=limit, + cardData=cardData, + full=full, + token=token, + ) + + def list_files_info_threaded( + self, + repo_id: str, + paths: Union[List[str], str, None] = None, + *, + expand: bool = False, + revision: Optional[str] = None, + repo_type: Optional[str] = None, + token: Optional[Union[bool, str]] = None, + ) -> Future[Iterable[RepoFile]]: + """ + List files on a repo and get information about them. + + This is a non-blocking method. Check out [`list_files_info`] documentation to learn how to use it. The threaded version + starts a background job in a separate thread and returns a Future object. The goal of background jobs is to + avoid blocking the main thread for example in a training. You should not expect a gain in performances by + parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve + order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) + documentation. + """ + return self.thread_pool.submit( + self.list_files_info, repo_id, paths, expand=expand, revision=revision, repo_type=repo_type, token=token + ) + + def list_liked_repos_threaded( + self, + user: Optional[str] = None, + *, + token: Optional[str] = None, + ) -> Future[UserLikes]: + """ + List all public repos liked by a user on huggingface.co. + + This is a non-blocking method. Check out [`list_liked_repos`] documentation to learn how to use it. The threaded version + starts a background job in a separate thread and returns a Future object. The goal of background jobs is to + avoid blocking the main thread for example in a training. You should not expect a gain in performances by + parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve + order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) + documentation. + """ + return self.thread_pool.submit(self.list_liked_repos, user, token=token) + + def list_metrics_threaded(self) -> Future[List[MetricInfo]]: + """ + Get the public list of all the metrics on huggingface.co + + This is a non-blocking method. Check out [`list_metrics`] documentation to learn how to use it. The threaded version + starts a background job in a separate thread and returns a Future object. The goal of background jobs is to + avoid blocking the main thread for example in a training. You should not expect a gain in performances by + parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve + order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) + documentation. + """ + return self.thread_pool.submit( + self.list_metrics, + ) + + def list_models_threaded( + self, + *, + filter: Union[ModelFilter, str, Iterable[str], None] = None, + author: Optional[str] = None, + search: Optional[str] = None, + emissions_thresholds: Optional[Tuple[float, float]] = None, + sort: Union[Literal["lastModified"], str, None] = None, + direction: Optional[Literal[-1]] = None, + limit: Optional[int] = None, + full: Optional[bool] = None, + cardData: bool = False, + fetch_config: bool = False, + token: Optional[Union[bool, str]] = None, + ) -> Future[List[ModelInfo]]: + """ + Get the list of all the models on huggingface.co + + This is a non-blocking method. Check out [`list_models`] documentation to learn how to use it. The threaded version + starts a background job in a separate thread and returns a Future object. The goal of background jobs is to + avoid blocking the main thread for example in a training. You should not expect a gain in performances by + parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve + order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) + documentation. + """ + return self.thread_pool.submit( + self.list_models, + filter=filter, + author=author, + search=search, + emissions_thresholds=emissions_thresholds, + sort=sort, + direction=direction, + limit=limit, + full=full, + cardData=cardData, + fetch_config=fetch_config, + token=token, + ) + + def list_repo_commits_threaded( + self, + repo_id: str, + *, + repo_type: Optional[str] = None, + token: Optional[Union[bool, str]] = None, + revision: Optional[str] = None, + formatted: bool = False, + ) -> Future[List[GitCommitInfo]]: + """ + Get the list of commits of a given revision for a repo on the Hub. + + This is a non-blocking method. Check out [`list_repo_commits`] documentation to learn how to use it. The threaded version + starts a background job in a separate thread and returns a Future object. The goal of background jobs is to + avoid blocking the main thread for example in a training. You should not expect a gain in performances by + parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve + order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) + documentation. + """ + return self.thread_pool.submit( + self.list_repo_commits, repo_id, repo_type=repo_type, token=token, revision=revision, formatted=formatted + ) + + def list_repo_files_threaded( + self, + repo_id: str, + *, + revision: Optional[str] = None, + repo_type: Optional[str] = None, + timeout: Optional[float] = None, + token: Optional[Union[bool, str]] = None, + ) -> Future[List[str]]: + """ + Get the list of files in a given repo. + + This is a non-blocking method. Check out [`list_repo_files`] documentation to learn how to use it. The threaded version + starts a background job in a separate thread and returns a Future object. The goal of background jobs is to + avoid blocking the main thread for example in a training. You should not expect a gain in performances by + parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve + order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) + documentation. + """ + return self.thread_pool.submit( + self.list_repo_files, repo_id, revision=revision, repo_type=repo_type, timeout=timeout, token=token + ) + + def list_repo_refs_threaded( + self, + repo_id: str, + *, + repo_type: Optional[str] = None, + token: Optional[Union[bool, str]] = None, + ) -> Future[GitRefs]: + """ + Get the list of refs of a given repo (both tags and branches). + + This is a non-blocking method. Check out [`list_repo_refs`] documentation to learn how to use it. The threaded version + starts a background job in a separate thread and returns a Future object. The goal of background jobs is to + avoid blocking the main thread for example in a training. You should not expect a gain in performances by + parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve + order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) + documentation. + """ + return self.thread_pool.submit(self.list_repo_refs, repo_id, repo_type=repo_type, token=token) + + def list_spaces_threaded( + self, + *, + filter: Union[str, Iterable[str], None] = None, + author: Optional[str] = None, + search: Optional[str] = None, + sort: Union[Literal["lastModified"], str, None] = None, + direction: Optional[Literal[-1]] = None, + limit: Optional[int] = None, + datasets: Union[str, Iterable[str], None] = None, + models: Union[str, Iterable[str], None] = None, + linked: bool = False, + full: Optional[bool] = None, + token: Optional[str] = None, + ) -> Future[List[SpaceInfo]]: + """ + Get the public list of all Spaces on huggingface.co + + This is a non-blocking method. Check out [`list_spaces`] documentation to learn how to use it. The threaded version + starts a background job in a separate thread and returns a Future object. The goal of background jobs is to + avoid blocking the main thread for example in a training. You should not expect a gain in performances by + parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve + order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) + documentation. + """ + return self.thread_pool.submit( + self.list_spaces, + filter=filter, + author=author, + search=search, + sort=sort, + direction=direction, + limit=limit, + datasets=datasets, + models=models, + linked=linked, + full=full, + token=token, + ) + + def merge_pull_request_threaded( + self, + repo_id: str, + discussion_num: int, + *, + token: Optional[str] = None, + comment: Optional[str] = None, + repo_type: Optional[str] = None, + ): + """ + Merges a Pull Request. + + This is a non-blocking method. Check out [`merge_pull_request`] documentation to learn how to use it. The threaded version + starts a background job in a separate thread and returns a Future object. The goal of background jobs is to + avoid blocking the main thread for example in a training. You should not expect a gain in performances by + parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve + order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) + documentation. + """ + return self.thread_pool.submit( + self.merge_pull_request, repo_id, discussion_num, token=token, comment=comment, repo_type=repo_type + ) + + def model_info_threaded( + self, + repo_id: str, + *, + revision: Optional[str] = None, + timeout: Optional[float] = None, + securityStatus: Optional[bool] = None, + files_metadata: bool = False, + token: Optional[Union[bool, str]] = None, + ) -> Future[ModelInfo]: + """ + Get info on one specific model on huggingface.co + + This is a non-blocking method. Check out [`model_info`] documentation to learn how to use it. The threaded version + starts a background job in a separate thread and returns a Future object. The goal of background jobs is to + avoid blocking the main thread for example in a training. You should not expect a gain in performances by + parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve + order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) + documentation. + """ + return self.thread_pool.submit( + self.model_info, + repo_id, + revision=revision, + timeout=timeout, + securityStatus=securityStatus, + files_metadata=files_metadata, + token=token, + ) + + def move_repo_threaded( + self, + from_id: str, + to_id: str, + *, + repo_type: Optional[str] = None, + token: Optional[str] = None, + ): + """ + Moving a repository from namespace1/repo_name1 to namespace2/repo_name2 + + This is a non-blocking method. Check out [`move_repo`] documentation to learn how to use it. The threaded version + starts a background job in a separate thread and returns a Future object. The goal of background jobs is to + avoid blocking the main thread for example in a training. You should not expect a gain in performances by + parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve + order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) + documentation. + """ + return self.thread_pool.submit(self.move_repo, from_id, to_id, repo_type=repo_type, token=token) + + def pause_space_threaded(self, repo_id: str, *, token: Optional[str] = None) -> Future[SpaceRuntime]: + """ + Pause your Space. + + This is a non-blocking method. Check out [`pause_space`] documentation to learn how to use it. The threaded version + starts a background job in a separate thread and returns a Future object. The goal of background jobs is to + avoid blocking the main thread for example in a training. You should not expect a gain in performances by + parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve + order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) + documentation. + """ + return self.thread_pool.submit(self.pause_space, repo_id, token=token) + + def rename_discussion_threaded( + self, + repo_id: str, + discussion_num: int, + new_title: str, + *, + token: Optional[str] = None, + repo_type: Optional[str] = None, + ) -> Future[DiscussionTitleChange]: + """ + Renames a Discussion. + + This is a non-blocking method. Check out [`rename_discussion`] documentation to learn how to use it. The threaded version + starts a background job in a separate thread and returns a Future object. The goal of background jobs is to + avoid blocking the main thread for example in a training. You should not expect a gain in performances by + parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve + order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) + documentation. + """ + return self.thread_pool.submit( + self.rename_discussion, repo_id, discussion_num, new_title, token=token, repo_type=repo_type + ) + + def repo_info_threaded( + self, + repo_id: str, + *, + revision: Optional[str] = None, + repo_type: Optional[str] = None, + timeout: Optional[float] = None, + files_metadata: bool = False, + token: Optional[Union[bool, str]] = None, + ) -> Future[Union[ModelInfo, DatasetInfo, SpaceInfo]]: + """ + Get the info object for a given repo of a given type. + + This is a non-blocking method. Check out [`repo_info`] documentation to learn how to use it. The threaded version + starts a background job in a separate thread and returns a Future object. The goal of background jobs is to + avoid blocking the main thread for example in a training. You should not expect a gain in performances by + parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve + order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) + documentation. + """ + return self.thread_pool.submit( + self.repo_info, + repo_id, + revision=revision, + repo_type=repo_type, + timeout=timeout, + files_metadata=files_metadata, + token=token, + ) + + def request_space_hardware_threaded( + self, + repo_id: str, + hardware: SpaceHardware, + *, + token: Optional[str] = None, + sleep_time: Optional[int] = None, + ) -> Future[SpaceRuntime]: + """ + Request new hardware for a Space. + + This is a non-blocking method. Check out [`request_space_hardware`] documentation to learn how to use it. The threaded version + starts a background job in a separate thread and returns a Future object. The goal of background jobs is to + avoid blocking the main thread for example in a training. You should not expect a gain in performances by + parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve + order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) + documentation. + """ + return self.thread_pool.submit( + self.request_space_hardware, repo_id, hardware, token=token, sleep_time=sleep_time + ) + + def restart_space_threaded(self, repo_id: str, *, token: Optional[str] = None) -> Future[SpaceRuntime]: + """ + Restart your Space. + + This is a non-blocking method. Check out [`restart_space`] documentation to learn how to use it. The threaded version + starts a background job in a separate thread and returns a Future object. The goal of background jobs is to + avoid blocking the main thread for example in a training. You should not expect a gain in performances by + parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve + order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) + documentation. + """ + return self.thread_pool.submit(self.restart_space, repo_id, token=token) + + def set_space_sleep_time_threaded( + self, repo_id: str, sleep_time: int, *, token: Optional[str] = None + ) -> Future[SpaceRuntime]: + """ + Set a custom sleep time for a Space running on upgraded hardware.. + + This is a non-blocking method. Check out [`set_space_sleep_time`] documentation to learn how to use it. The threaded version + starts a background job in a separate thread and returns a Future object. The goal of background jobs is to + avoid blocking the main thread for example in a training. You should not expect a gain in performances by + parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve + order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) + documentation. + """ + return self.thread_pool.submit(self.set_space_sleep_time, repo_id, sleep_time, token=token) + + def space_info_threaded( + self, + repo_id: str, + *, + revision: Optional[str] = None, + timeout: Optional[float] = None, + files_metadata: bool = False, + token: Optional[Union[bool, str]] = None, + ) -> Future[SpaceInfo]: + """ + Get info on one specific Space on huggingface.co. + + This is a non-blocking method. Check out [`space_info`] documentation to learn how to use it. The threaded version + starts a background job in a separate thread and returns a Future object. The goal of background jobs is to + avoid blocking the main thread for example in a training. You should not expect a gain in performances by + parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve + order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) + documentation. + """ + return self.thread_pool.submit( + self.space_info, repo_id, revision=revision, timeout=timeout, files_metadata=files_metadata, token=token + ) + + def unlike_threaded( + self, + repo_id: str, + *, + token: Optional[str] = None, + repo_type: Optional[str] = None, + ) -> Future[None]: + """ + Unlike a given repo on the Hub (e.g. remove from favorite list). + + This is a non-blocking method. Check out [`unlike`] documentation to learn how to use it. The threaded version + starts a background job in a separate thread and returns a Future object. The goal of background jobs is to + avoid blocking the main thread for example in a training. You should not expect a gain in performances by + parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve + order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) + documentation. + """ + return self.thread_pool.submit(self.unlike, repo_id, token=token, repo_type=repo_type) + + def update_repo_visibility_threaded( + self, + repo_id: str, + private: bool = False, + *, + token: Optional[str] = None, + organization: Optional[str] = None, + repo_type: Optional[str] = None, + name: Optional[str] = None, + ) -> Future[Dict[str, bool]]: + """ + Update the visibility setting of a repository. + + This is a non-blocking method. Check out [`update_repo_visibility`] documentation to learn how to use it. The threaded version + starts a background job in a separate thread and returns a Future object. The goal of background jobs is to + avoid blocking the main thread for example in a training. You should not expect a gain in performances by + parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve + order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) + documentation. + """ + return self.thread_pool.submit( + self.update_repo_visibility, + repo_id, + private, + token=token, + organization=organization, + repo_type=repo_type, + name=name, + ) + + def upload_file_threaded( + self, + *, + path_or_fileobj: Union[str, Path, bytes, BinaryIO], + path_in_repo: str, + repo_id: str, + token: Optional[str] = None, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + commit_message: Optional[str] = None, + commit_description: Optional[str] = None, + create_pr: Optional[bool] = None, + parent_commit: Optional[str] = None, + ) -> Future[str]: + """ + Upload a local file (up to 50 GB) to the given repo. The upload is done + + This is a non-blocking method. Check out [`upload_file`] documentation to learn how to use it. The threaded version + starts a background job in a separate thread and returns a Future object. The goal of background jobs is to + avoid blocking the main thread for example in a training. You should not expect a gain in performances by + parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve + order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) + documentation. + """ + return self.thread_pool.submit( + self.upload_file, + path_or_fileobj=path_or_fileobj, + path_in_repo=path_in_repo, + repo_id=repo_id, + token=token, + repo_type=repo_type, + revision=revision, + commit_message=commit_message, + commit_description=commit_description, + create_pr=create_pr, + parent_commit=parent_commit, + ) + + def upload_folder_threaded( + self, + *, + repo_id: str, + folder_path: Union[str, Path], + path_in_repo: Optional[str] = None, + commit_message: Optional[str] = None, + commit_description: Optional[str] = None, + token: Optional[str] = None, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + create_pr: Optional[bool] = None, + parent_commit: Optional[str] = None, + allow_patterns: Optional[Union[List[str], str]] = None, + ignore_patterns: Optional[Union[List[str], str]] = None, + delete_patterns: Optional[Union[List[str], str]] = None, + multi_commits: bool = False, + multi_commits_verbose: bool = False, + ): + """ + Upload a local folder to the given repo. The upload is done through a HTTP requests, and doesn't require git or + + This is a non-blocking method. Check out [`upload_folder`] documentation to learn how to use it. The threaded version + starts a background job in a separate thread and returns a Future object. The goal of background jobs is to + avoid blocking the main thread for example in a training. You should not expect a gain in performances by + parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve + order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) + documentation. + """ + return self.thread_pool.submit( + self.upload_folder, + repo_id=repo_id, + folder_path=folder_path, + path_in_repo=path_in_repo, + commit_message=commit_message, + commit_description=commit_description, + token=token, + repo_type=repo_type, + revision=revision, + create_pr=create_pr, + parent_commit=parent_commit, + allow_patterns=allow_patterns, + ignore_patterns=ignore_patterns, + delete_patterns=delete_patterns, + multi_commits=multi_commits, + multi_commits_verbose=multi_commits_verbose, + ) + + def whoami_threaded(self, token: Optional[str] = None) -> Future[Dict]: + """ + Call HF API to know "whoami". + + This is a non-blocking method. Check out [`whoami`] documentation to learn how to use it. The threaded version + starts a background job in a separate thread and returns a Future object. The goal of background jobs is to + avoid blocking the main thread for example in a training. You should not expect a gain in performances by + parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve + order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) + documentation. + """ + return self.thread_pool.submit(self.whoami, token) diff --git a/src/huggingface_hub/hf_api.py b/src/huggingface_hub/hf_api.py index 3dd66a154c..963c2f086f 100644 --- a/src/huggingface_hub/hf_api.py +++ b/src/huggingface_hub/hf_api.py @@ -17,10 +17,8 @@ import re import textwrap import warnings -from concurrent.futures import ThreadPoolExecutor from dataclasses import dataclass, field from datetime import datetime -from functools import wraps from itertools import islice from pathlib import Path from typing import Any, BinaryIO, Dict, Iterable, Iterator, List, Optional, Tuple, Union @@ -791,7 +789,7 @@ class UserLikes: spaces: List[str] -class HfApi: +class _HfApi: def __init__( self, endpoint: Optional[str] = None, @@ -831,45 +829,6 @@ def __init__( self.library_version = library_version self.user_agent = user_agent - # Calls to the Hub can be run in the background. Tasks are queued to preserve order but do not block the main - # thread. Can be useful to upload data during a training. ThreadPoolExecutor is initialized the first time it's - # used. - self._thread_pool: Optional[ThreadPoolExecutor] = None - - def __getattribute__(self, key: str) -> Any: - """Wrap all methods of the class to make them detachable. - - Detachable methods can be run in the background by calling `.detach()` on them. Note that static type checking - is not possible on `.detach()` calls with the current implementation. - """ - attribute = super().__getattribute__(key) - if key.startswith("_") or not hasattr(attribute, "__self__"): - return attribute - - # Attribute is a method to wrap - method = attribute - - # If __self__ attribute is set, it means we are accessing a bounded method of the class. - # We want to wrap it to make it "threaded". Only public methods (i.e. not starting with "_") are wrapped. - class _threaded_method: - def __getattr__(self, key: str) -> Any: - return getattr(method, key) - - def __call__(_inner_self, *args, **kwargs): - # Default use case: run synchronously - return method(*args, **kwargs) - - def threaded(_inner_self, *args, **kwargs): - # If .threaded is called, queue it to the executor - if self._thread_pool is None: - self._thread_pool = ThreadPoolExecutor(max_workers=1) - return self._thread_pool.submit(method, *args, **kwargs) - - def __repr__(self) -> str: - return repr(method) - - return wraps(method)(_threaded_method()) - @validate_hf_hub_args def whoami(self, token: Optional[str] = None) -> Dict: """ @@ -4922,6 +4881,13 @@ def _parse_revision_from_pr_url(pr_url: str) -> str: return f"refs/pr/{re_match[1]}" +from ._threaded_hf_api import _ThreadedHfApi # noqa: E402 to avoid circular import + + +class HfApi(_ThreadedHfApi): + pass + + api = HfApi() whoami = api.whoami diff --git a/utils/check_threaded_hf_api.py b/utils/check_threaded_hf_api.py new file mode 100644 index 0000000000..66510987e7 --- /dev/null +++ b/utils/check_threaded_hf_api.py @@ -0,0 +1,195 @@ +# coding=utf-8 +# Copyright 2022-present, the HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Contains a tool to add/check the definition of "async" methods of `HfApi` in `huggingface_hub.hf_api.py`.""" +import argparse +import inspect +import os +import re +import tempfile +from pathlib import Path +from typing import NoReturn + +import black +from ruff.__main__ import find_ruff_bin + +from huggingface_hub.hf_api import _HfApi + + +TEMPLATE_START = ''' +# coding=utf-8 +# Copyright 2022-present, the HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +WARNING: this file is automatically generated by `utils/check_threaded_hf_api.py`. Do not edit it manually. +You can check it is up-to-date by running `make quality` and update its content with `make style` if needed. + +The content of this file is mostly based on HfApi implementation. +""" +from concurrent.futures import Future, ThreadPoolExecutor +from typing import Dict, Optional, Union + +from .hf_api import * # noqa: F403 +from .hf_api import _HfApi + + +class _ThreadedHfApi(_HfApi): + _thread_pool: Optional[ThreadPoolExecutor] = None + + @property + def thread_pool(self) -> ThreadPoolExecutor: + # Calls to the Hub can be run in the background. Tasks are queued to preserve order but do not block the main + # thread. Can be useful to upload data during a training. ThreadPoolExecutor is initialized the first time it's + # used. Non-blocking methods are suffixed by `_threaded`. + if self._thread_pool is None: + self._thread_pool = ThreadPoolExecutor(max_workers=1) + return self._thread_pool + +''' + +TEMPLATE_DOCSTRING = """ + This is a non-blocking method. Check out [`{method_name}`] documentation to learn how to use it. The threaded version + starts a background job in a separate thread and returns a Future object. The goal of background jobs is to + avoid blocking the main thread for example in a training. You should not expect a gain in performances by + parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve + order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) + documentation.""" + +INDENT = " " * 8 + +HF_API_FILE_PATH = Path(__file__).parents[1] / "src" / "huggingface_hub" / "hf_api.py" +HF_API_FILE_CONTENT = HF_API_FILE_PATH.read_text() + +THREADED_HF_API_FILE_PATH = Path(__file__).parents[1] / "src" / "huggingface_hub" / "_threaded_hf_api.py" + + +def generate_threaded_method(method_name: str) -> str: + """Generate code to define a threaded version of a method of HfApi.""" + group = re.search(rf'(def {method_name}\(.*?)"""(.*?)"""', HF_API_FILE_CONTENT, re.DOTALL) + assert group is not None + + # signature + signature = group.group(1).strip() + signature = signature.replace(method_name, f"{method_name}_threaded", 1) + signature = re.sub(r"-> (.*)?:", r"-> Future[\1]:", signature) + signature = f"\n {signature}" + + # docstring + for line in group.group(2).strip().splitlines(): + if line.strip(): + line = line.strip() + break + else: + raise ValueError(f"No docstring found for {method_name}.") + docstring = f'{INDENT}"""\n{INDENT}{line}\n{TEMPLATE_DOCSTRING.format(method_name=method_name)}\n{INDENT}"""' + + # code + submit_args = "" + for parameter in inspect.signature(getattr(_HfApi, method_name)).parameters.values(): + if parameter.name == "self": + continue + elif parameter.kind == parameter.POSITIONAL_ONLY or parameter.kind == parameter.POSITIONAL_OR_KEYWORD: + submit_args += f"{parameter.name}," + elif parameter.kind == parameter.VAR_POSITIONAL: + submit_args += f"*{parameter.name}," + elif parameter.kind == parameter.KEYWORD_ONLY: + submit_args += f"{parameter.name}={parameter.name}," + elif parameter.kind == parameter.VAR_KEYWORD: + submit_args += f"**{parameter.name}," + else: + raise ValueError(f"Unknown Parameter kind: {parameter}") + submit_line = f"{INDENT}return self.thread_pool.submit(self.{method_name}, {submit_args.strip(',')})" + + return signature + "\n" + docstring + "\n" + submit_line + "\n" + + +def generate_threaded_hf_api() -> str: + raw_code = TEMPLATE_START + for method_name, _ in inspect.getmembers(_HfApi, predicate=inspect.isfunction): + if method_name.startswith("_"): # skip private methods + continue + raw_code += generate_threaded_method(method_name) + return format_generated_code(raw_code) + + +def format_generated_code(code: str) -> str: + """ + Format some code with black+ruff. Cannot be done "on the fly" so we first save the code in a temporary file. + """ + # Format with black + code = black.format_file_contents(code, fast=False, mode=black.FileMode(line_length=119)) + + # Format with ruff + with tempfile.TemporaryDirectory() as tmpdir: + filepath = Path(tmpdir) / "__init__.py" + filepath.write_text(code) + ruff_bin = find_ruff_bin() + os.spawnv(os.P_WAIT, ruff_bin, ["ruff", str(filepath), "--fix", "--quiet", "--ignore=F405"]) + return filepath.read_text() + + +def check_threaded_hf_api(update: bool) -> NoReturn: + """Check that the code defining the threaded version of HfApi is up-to-date.""" + # If expected `__init__.py` content is different, test fails. If '--update-init-file' + # is used, `__init__.py` file is updated before the test fails. + content = THREADED_HF_API_FILE_PATH.read_text() + expected_content = generate_threaded_hf_api() + if content != expected_content: + if update: + with THREADED_HF_API_FILE_PATH.open("w") as f: + f.write(expected_content) + + print( + "✅ _ThreadedHfApi implementation has been updated in `./src/huggingface_hub/_threaded_hf_api.py`." + "\n Please make sure the changes are accurate and commit them." + ) + exit(0) + else: + print( + "❌ Expected content mismatch in `./src/huggingface_hub/_threaded_hf_api.py`.\n It is most likely" + " that you modified `./src/huggingface_hub/hf_api.py`.\n Please run `make style` or `python" + " utils/check_threaded_hf_api.py --update`." + ) + exit(1) + + print("✅ All good! (threaded HfApi)") + exit(0) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--update", + action="store_true", + help="Whether to override `./src/huggingface_hub/_threaded_hf_api.py` if a change is detected.", + ) + args = parser.parse_args() + + check_threaded_hf_api(update=args.update) + +print(generate_threaded_hf_api()) +# import pdb + +# pdb.set_trace() From af118bc52041be643f454b9689628ce28459f746 Mon Sep 17 00:00:00 2001 From: Lucain Pouget Date: Tue, 9 May 2023 16:45:54 +0200 Subject: [PATCH 04/17] fix python37 --- src/huggingface_hub/_threaded_hf_api.py | 2 ++ utils/check_threaded_hf_api.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/huggingface_hub/_threaded_hf_api.py b/src/huggingface_hub/_threaded_hf_api.py index 1ba008e586..a0fd007357 100644 --- a/src/huggingface_hub/_threaded_hf_api.py +++ b/src/huggingface_hub/_threaded_hf_api.py @@ -18,6 +18,8 @@ The content of this file is mostly based on HfApi implementation. """ +from __future__ import annotations + from concurrent.futures import Future, ThreadPoolExecutor from typing import Dict, Optional, Union diff --git a/utils/check_threaded_hf_api.py b/utils/check_threaded_hf_api.py index 66510987e7..68d83cc7d9 100644 --- a/utils/check_threaded_hf_api.py +++ b/utils/check_threaded_hf_api.py @@ -48,6 +48,8 @@ The content of this file is mostly based on HfApi implementation. """ +from __future__ import annotations + from concurrent.futures import Future, ThreadPoolExecutor from typing import Dict, Optional, Union From c78917f104ac0fb008bb9c9b0fc6eb7ee3a0efbf Mon Sep 17 00:00:00 2001 From: Lucain Pouget Date: Tue, 9 May 2023 16:49:58 +0200 Subject: [PATCH 05/17] useless changes --- src/huggingface_hub/utils/_typing.py | 4 ---- src/huggingface_hub/utils/_validators.py | 6 ++++-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/huggingface_hub/utils/_typing.py b/src/huggingface_hub/utils/_typing.py index c8885eb1eb..812c65ea39 100644 --- a/src/huggingface_hub/utils/_typing.py +++ b/src/huggingface_hub/utils/_typing.py @@ -14,7 +14,6 @@ # limitations under the License. """Handle typing imports based on system compatibility.""" import sys -from typing import Callable, TypeVar if sys.version_info >= (3, 8): @@ -23,6 +22,3 @@ from typing_extensions import Literal, TypedDict # noqa: F401 HTTP_METHOD_T = Literal["GET", "OPTIONS", "HEAD", "POST", "PUT", "PATCH", "DELETE"] - -# type hint meaning "function signature not changed by decorator" -CallableT = TypeVar("CallableT", bound=Callable) diff --git a/src/huggingface_hub/utils/_validators.py b/src/huggingface_hub/utils/_validators.py index 5dd64fa514..d6db7b1081 100644 --- a/src/huggingface_hub/utils/_validators.py +++ b/src/huggingface_hub/utils/_validators.py @@ -18,9 +18,11 @@ import warnings from functools import wraps from itertools import chain -from typing import Any, Dict +from typing import Any, Callable, Dict, TypeVar -from ._typing import CallableT + +# type hint meaning "function signature not changed by decorator" +CallableT = TypeVar("CallableT", bound=Callable) REPO_ID_REGEX = re.compile( From 9ec4ee1ebe5d2e8a83609006c9f3cd74e4a8a625 Mon Sep 17 00:00:00 2001 From: Lucain Pouget Date: Tue, 9 May 2023 17:09:24 +0200 Subject: [PATCH 06/17] add test --- tests/test_hf_api.py | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/tests/test_hf_api.py b/tests/test_hf_api.py index 47105ad872..edb0ac7001 100644 --- a/tests/test_hf_api.py +++ b/tests/test_hf_api.py @@ -2429,6 +2429,45 @@ def test_pause_and_restart_space(self) -> None: self.assertIn(runtime_after_restart.stage, (SpaceStage.BUILDING, SpaceStage.RUNNING_BUILDING)) +class TestThreadedAPI(HfApiCommonTest): + def test_create_upload_and_delete_in_background(self) -> None: + repo_id = f"{USER}/{repo_name()}" + + t0 = time.time() + create_repo_future = self._api.create_repo_threaded(repo_id) + upload_future_1 = self._api.upload_file_threaded( + path_or_fileobj=b"1", path_in_repo="file.txt", repo_id=repo_id, commit_message="Upload 1" + ) + upload_future_2 = self._api.upload_file_threaded( + path_or_fileobj=b"2", path_in_repo="file.txt", repo_id=repo_id, commit_message="Upload 2" + ) + delete_file_future = self._api.delete_file_threaded( + path_in_repo="file.txt", repo_id=repo_id, commit_message="Delete 1" + ) + commits_future = self._api.list_repo_commits_threaded(repo_id=repo_id) + t1 = time.time() + + # all futures are queued instantly + self.assertLessEqual(t1 - t0, 0.01) + + # wait for the last job to complete + commits = commits_future.result() + + # all of them are not complete (ran in order) + self.assertTrue(create_repo_future.done()) + self.assertTrue(upload_future_1.done()) + self.assertTrue(upload_future_2.done()) + self.assertTrue(delete_file_future.done()) + self.assertTrue(commits_future.done()) + + # 4 commits, sorted in reverse order of creation + self.assertEqual(len(commits), 4) + self.assertEqual(commits[0].title, "Delete 1") + self.assertEqual(commits[1].title, "Upload 2") + self.assertEqual(commits[2].title, "Upload 1") + self.assertEqual(commits[3].title, "initial commit") + + class TestSpaceAPIMocked(unittest.TestCase): """ Testing Space hardware requests is resource intensive for the server (need to spawn From a40f4e28583fd533547c8b503327f93c624a9cc7 Mon Sep 17 00:00:00 2001 From: Lucain Pouget Date: Tue, 9 May 2023 17:19:35 +0200 Subject: [PATCH 07/17] update CI --- .github/workflows/python-quality.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/python-quality.yml b/.github/workflows/python-quality.yml index 9c785d7109..21f5ea8fd2 100644 --- a/.github/workflows/python-quality.yml +++ b/.github/workflows/python-quality.yml @@ -31,6 +31,7 @@ jobs: - run: ruff tests src contrib - run: python utils/check_contrib_list.py - run: python utils/check_static_imports.py + - run: python utils/check_threaded_hf_api.py # Run type checking at least on huggingface_hub root file to check all modules # that can be lazy-loaded actually exist. From 164cec8404906e49e9642558b265cb01160dd42d Mon Sep 17 00:00:00 2001 From: Lucain Pouget Date: Tue, 16 May 2023 16:53:21 +0200 Subject: [PATCH 08/17] first step to revert to as_future=True flad --- .github/workflows/python-quality.yml | 2 +- Makefile | 4 +- src/huggingface_hub/_multi_commits.py | 4 +- src/huggingface_hub/_threaded_hf_api.py | 1294 ---------------------- src/huggingface_hub/hf_api.py | 48 +- src/huggingface_hub/utils/_typing.py | 4 + src/huggingface_hub/utils/_validators.py | 6 +- utils/check_threaded_hf_api.py | 197 ---- 8 files changed, 50 insertions(+), 1509 deletions(-) delete mode 100644 src/huggingface_hub/_threaded_hf_api.py delete mode 100644 utils/check_threaded_hf_api.py diff --git a/.github/workflows/python-quality.yml b/.github/workflows/python-quality.yml index 21f5ea8fd2..a9a0a3e1b6 100644 --- a/.github/workflows/python-quality.yml +++ b/.github/workflows/python-quality.yml @@ -31,7 +31,7 @@ jobs: - run: ruff tests src contrib - run: python utils/check_contrib_list.py - run: python utils/check_static_imports.py - - run: python utils/check_threaded_hf_api.py + #- run: python utils/check_threaded_hf_api.py # Run type checking at least on huggingface_hub root file to check all modules # that can be lazy-loaded actually exist. diff --git a/Makefile b/Makefile index 3a6b8accf4..4584ba803d 100644 --- a/Makefile +++ b/Makefile @@ -10,14 +10,14 @@ quality: mypy src python utils/check_contrib_list.py python utils/check_static_imports.py - python utils/check_threaded_hf_api.py + # python utils/check_threaded_hf_api.py style: black $(check_dirs) ruff $(check_dirs) --fix python utils/check_contrib_list.py --update python utils/check_static_imports.py --update - python utils/check_threaded_hf_api.py --update + # python utils/check_threaded_hf_api.py --update repocard: python utils/push_repocard_examples.py diff --git a/src/huggingface_hub/_multi_commits.py b/src/huggingface_hub/_multi_commits.py index 585180a950..20576eaa86 100644 --- a/src/huggingface_hub/_multi_commits.py +++ b/src/huggingface_hub/_multi_commits.py @@ -25,7 +25,7 @@ if TYPE_CHECKING: - from .hf_api import _HfApi + from .hf_api import HfApi class MultiCommitException(Exception): @@ -267,7 +267,7 @@ def __post_init__(self) -> None: def multi_commit_create_pull_request( - api: "_HfApi", + api: "HfApi", repo_id: str, commit_message: str, commit_description: Optional[str], diff --git a/src/huggingface_hub/_threaded_hf_api.py b/src/huggingface_hub/_threaded_hf_api.py deleted file mode 100644 index a0fd007357..0000000000 --- a/src/huggingface_hub/_threaded_hf_api.py +++ /dev/null @@ -1,1294 +0,0 @@ -# coding=utf-8 -# Copyright 2022-present, the HuggingFace Inc. team. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -WARNING: this file is automatically generated by `utils/check_threaded_hf_api.py`. Do not edit it manually. -You can check it is up-to-date by running `make quality` and update its content with `make style` if needed. - -The content of this file is mostly based on HfApi implementation. -""" -from __future__ import annotations - -from concurrent.futures import Future, ThreadPoolExecutor -from typing import Dict, Optional, Union - -from .hf_api import * # noqa: F403 -from .hf_api import _HfApi - - -class _ThreadedHfApi(_HfApi): - _thread_pool: Optional[ThreadPoolExecutor] = None - - @property - def thread_pool(self) -> ThreadPoolExecutor: - # Calls to the Hub can be run in the background. Tasks are queued to preserve order but do not block the main - # thread. Can be useful to upload data during a training. ThreadPoolExecutor is initialized the first time it's - # used. Non-blocking methods are suffixed by `_threaded`. - if self._thread_pool is None: - self._thread_pool = ThreadPoolExecutor(max_workers=1) - return self._thread_pool - - def add_space_secret_threaded( - self, repo_id: str, key: str, value: str, *, token: Optional[str] = None - ) -> Future[None]: - """ - Adds or updates a secret in a Space. - - This is a non-blocking method. Check out [`add_space_secret`] documentation to learn how to use it. The threaded version - starts a background job in a separate thread and returns a Future object. The goal of background jobs is to - avoid blocking the main thread for example in a training. You should not expect a gain in performances by - parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve - order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) - documentation. - """ - return self.thread_pool.submit(self.add_space_secret, repo_id, key, value, token=token) - - def change_discussion_status_threaded( - self, - repo_id: str, - discussion_num: int, - new_status: Literal["open", "closed"], - *, - token: Optional[str] = None, - comment: Optional[str] = None, - repo_type: Optional[str] = None, - ) -> Future[DiscussionStatusChange]: - """ - Closes or re-opens a Discussion or Pull Request. - - This is a non-blocking method. Check out [`change_discussion_status`] documentation to learn how to use it. The threaded version - starts a background job in a separate thread and returns a Future object. The goal of background jobs is to - avoid blocking the main thread for example in a training. You should not expect a gain in performances by - parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve - order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) - documentation. - """ - return self.thread_pool.submit( - self.change_discussion_status, - repo_id, - discussion_num, - new_status, - token=token, - comment=comment, - repo_type=repo_type, - ) - - def comment_discussion_threaded( - self, - repo_id: str, - discussion_num: int, - comment: str, - *, - token: Optional[str] = None, - repo_type: Optional[str] = None, - ) -> Future[DiscussionComment]: - """ - Creates a new comment on the given Discussion. - - This is a non-blocking method. Check out [`comment_discussion`] documentation to learn how to use it. The threaded version - starts a background job in a separate thread and returns a Future object. The goal of background jobs is to - avoid blocking the main thread for example in a training. You should not expect a gain in performances by - parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve - order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) - documentation. - """ - return self.thread_pool.submit( - self.comment_discussion, repo_id, discussion_num, comment, token=token, repo_type=repo_type - ) - - def create_branch_threaded( - self, - repo_id: str, - *, - branch: str, - revision: Optional[str] = None, - token: Optional[str] = None, - repo_type: Optional[str] = None, - exist_ok: bool = False, - ) -> Future[None]: - """ - Create a new branch for a repo on the Hub, starting from the specified revision (defaults to `main`). - - This is a non-blocking method. Check out [`create_branch`] documentation to learn how to use it. The threaded version - starts a background job in a separate thread and returns a Future object. The goal of background jobs is to - avoid blocking the main thread for example in a training. You should not expect a gain in performances by - parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve - order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) - documentation. - """ - return self.thread_pool.submit( - self.create_branch, - repo_id, - branch=branch, - revision=revision, - token=token, - repo_type=repo_type, - exist_ok=exist_ok, - ) - - def create_commit_threaded( - self, - repo_id: str, - operations: Iterable[CommitOperation], - *, - commit_message: str, - commit_description: Optional[str] = None, - token: Optional[str] = None, - repo_type: Optional[str] = None, - revision: Optional[str] = None, - create_pr: Optional[bool] = None, - num_threads: int = 5, - parent_commit: Optional[str] = None, - ) -> Future[CommitInfo]: - """ - Creates a commit in the given repo, deleting & uploading files as needed. - - This is a non-blocking method. Check out [`create_commit`] documentation to learn how to use it. The threaded version - starts a background job in a separate thread and returns a Future object. The goal of background jobs is to - avoid blocking the main thread for example in a training. You should not expect a gain in performances by - parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve - order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) - documentation. - """ - return self.thread_pool.submit( - self.create_commit, - repo_id, - operations, - commit_message=commit_message, - commit_description=commit_description, - token=token, - repo_type=repo_type, - revision=revision, - create_pr=create_pr, - num_threads=num_threads, - parent_commit=parent_commit, - ) - - def create_commits_on_pr_threaded( - self, - *, - repo_id: str, - addition_commits: List[List[CommitOperationAdd]], - deletion_commits: List[List[CommitOperationDelete]], - commit_message: str, - commit_description: Optional[str] = None, - token: Optional[str] = None, - repo_type: Optional[str] = None, - merge_pr: bool = True, - num_threads: int = 5, # TODO: use to multithread uploads - verbose: bool = False, - ) -> Future[str]: - """ - Push changes to the Hub in multiple commits. - - This is a non-blocking method. Check out [`create_commits_on_pr`] documentation to learn how to use it. The threaded version - starts a background job in a separate thread and returns a Future object. The goal of background jobs is to - avoid blocking the main thread for example in a training. You should not expect a gain in performances by - parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve - order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) - documentation. - """ - return self.thread_pool.submit( - self.create_commits_on_pr, - repo_id=repo_id, - addition_commits=addition_commits, - deletion_commits=deletion_commits, - commit_message=commit_message, - commit_description=commit_description, - token=token, - repo_type=repo_type, - merge_pr=merge_pr, - num_threads=num_threads, - verbose=verbose, - ) - - def create_discussion_threaded( - self, - repo_id: str, - title: str, - *, - token: Optional[str] = None, - description: Optional[str] = None, - repo_type: Optional[str] = None, - pull_request: bool = False, - ) -> Future[DiscussionWithDetails]: - """ - Creates a Discussion or Pull Request. - - This is a non-blocking method. Check out [`create_discussion`] documentation to learn how to use it. The threaded version - starts a background job in a separate thread and returns a Future object. The goal of background jobs is to - avoid blocking the main thread for example in a training. You should not expect a gain in performances by - parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve - order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) - documentation. - """ - return self.thread_pool.submit( - self.create_discussion, - repo_id, - title, - token=token, - description=description, - repo_type=repo_type, - pull_request=pull_request, - ) - - def create_pull_request_threaded( - self, - repo_id: str, - title: str, - *, - token: Optional[str] = None, - description: Optional[str] = None, - repo_type: Optional[str] = None, - ) -> Future[DiscussionWithDetails]: - """ - Creates a Pull Request . Pull Requests created programmatically will be in `"draft"` status. - - This is a non-blocking method. Check out [`create_pull_request`] documentation to learn how to use it. The threaded version - starts a background job in a separate thread and returns a Future object. The goal of background jobs is to - avoid blocking the main thread for example in a training. You should not expect a gain in performances by - parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve - order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) - documentation. - """ - return self.thread_pool.submit( - self.create_pull_request, repo_id, title, token=token, description=description, repo_type=repo_type - ) - - def create_repo_threaded( - self, - repo_id: str, - *, - token: Optional[str] = None, - private: bool = False, - repo_type: Optional[str] = None, - exist_ok: bool = False, - space_sdk: Optional[str] = None, - space_hardware: Optional[str] = None, - ) -> Future[RepoUrl]: - """ - Create an empty repo on the HuggingFace Hub. - - This is a non-blocking method. Check out [`create_repo`] documentation to learn how to use it. The threaded version - starts a background job in a separate thread and returns a Future object. The goal of background jobs is to - avoid blocking the main thread for example in a training. You should not expect a gain in performances by - parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve - order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) - documentation. - """ - return self.thread_pool.submit( - self.create_repo, - repo_id, - token=token, - private=private, - repo_type=repo_type, - exist_ok=exist_ok, - space_sdk=space_sdk, - space_hardware=space_hardware, - ) - - def create_tag_threaded( - self, - repo_id: str, - *, - tag: str, - tag_message: Optional[str] = None, - revision: Optional[str] = None, - token: Optional[str] = None, - repo_type: Optional[str] = None, - exist_ok: bool = False, - ) -> Future[None]: - """ - Tag a given commit of a repo on the Hub. - - This is a non-blocking method. Check out [`create_tag`] documentation to learn how to use it. The threaded version - starts a background job in a separate thread and returns a Future object. The goal of background jobs is to - avoid blocking the main thread for example in a training. You should not expect a gain in performances by - parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve - order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) - documentation. - """ - return self.thread_pool.submit( - self.create_tag, - repo_id, - tag=tag, - tag_message=tag_message, - revision=revision, - token=token, - repo_type=repo_type, - exist_ok=exist_ok, - ) - - def dataset_info_threaded( - self, - repo_id: str, - *, - revision: Optional[str] = None, - timeout: Optional[float] = None, - files_metadata: bool = False, - token: Optional[Union[bool, str]] = None, - ) -> Future[DatasetInfo]: - """ - Get info on one specific dataset on huggingface.co. - - This is a non-blocking method. Check out [`dataset_info`] documentation to learn how to use it. The threaded version - starts a background job in a separate thread and returns a Future object. The goal of background jobs is to - avoid blocking the main thread for example in a training. You should not expect a gain in performances by - parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve - order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) - documentation. - """ - return self.thread_pool.submit( - self.dataset_info, repo_id, revision=revision, timeout=timeout, files_metadata=files_metadata, token=token - ) - - def delete_branch_threaded( - self, - repo_id: str, - *, - branch: str, - token: Optional[str] = None, - repo_type: Optional[str] = None, - ) -> Future[None]: - """ - Delete a branch from a repo on the Hub. - - This is a non-blocking method. Check out [`delete_branch`] documentation to learn how to use it. The threaded version - starts a background job in a separate thread and returns a Future object. The goal of background jobs is to - avoid blocking the main thread for example in a training. You should not expect a gain in performances by - parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve - order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) - documentation. - """ - return self.thread_pool.submit(self.delete_branch, repo_id, branch=branch, token=token, repo_type=repo_type) - - def delete_file_threaded( - self, - path_in_repo: str, - repo_id: str, - *, - token: Optional[str] = None, - repo_type: Optional[str] = None, - revision: Optional[str] = None, - commit_message: Optional[str] = None, - commit_description: Optional[str] = None, - create_pr: Optional[bool] = None, - parent_commit: Optional[str] = None, - ) -> Future[CommitInfo]: - """ - Deletes a file in the given repo. - - This is a non-blocking method. Check out [`delete_file`] documentation to learn how to use it. The threaded version - starts a background job in a separate thread and returns a Future object. The goal of background jobs is to - avoid blocking the main thread for example in a training. You should not expect a gain in performances by - parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve - order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) - documentation. - """ - return self.thread_pool.submit( - self.delete_file, - path_in_repo, - repo_id, - token=token, - repo_type=repo_type, - revision=revision, - commit_message=commit_message, - commit_description=commit_description, - create_pr=create_pr, - parent_commit=parent_commit, - ) - - def delete_folder_threaded( - self, - path_in_repo: str, - repo_id: str, - *, - token: Optional[str] = None, - repo_type: Optional[str] = None, - revision: Optional[str] = None, - commit_message: Optional[str] = None, - commit_description: Optional[str] = None, - create_pr: Optional[bool] = None, - parent_commit: Optional[str] = None, - ) -> Future[CommitInfo]: - """ - Deletes a folder in the given repo. - - This is a non-blocking method. Check out [`delete_folder`] documentation to learn how to use it. The threaded version - starts a background job in a separate thread and returns a Future object. The goal of background jobs is to - avoid blocking the main thread for example in a training. You should not expect a gain in performances by - parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve - order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) - documentation. - """ - return self.thread_pool.submit( - self.delete_folder, - path_in_repo, - repo_id, - token=token, - repo_type=repo_type, - revision=revision, - commit_message=commit_message, - commit_description=commit_description, - create_pr=create_pr, - parent_commit=parent_commit, - ) - - def delete_repo_threaded( - self, - repo_id: str, - *, - token: Optional[str] = None, - repo_type: Optional[str] = None, - ): - """ - Delete a repo from the HuggingFace Hub. CAUTION: this is irreversible. - - This is a non-blocking method. Check out [`delete_repo`] documentation to learn how to use it. The threaded version - starts a background job in a separate thread and returns a Future object. The goal of background jobs is to - avoid blocking the main thread for example in a training. You should not expect a gain in performances by - parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve - order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) - documentation. - """ - return self.thread_pool.submit(self.delete_repo, repo_id, token=token, repo_type=repo_type) - - def delete_space_secret_threaded(self, repo_id: str, key: str, *, token: Optional[str] = None) -> Future[None]: - """ - Deletes a secret from a Space. - - This is a non-blocking method. Check out [`delete_space_secret`] documentation to learn how to use it. The threaded version - starts a background job in a separate thread and returns a Future object. The goal of background jobs is to - avoid blocking the main thread for example in a training. You should not expect a gain in performances by - parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve - order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) - documentation. - """ - return self.thread_pool.submit(self.delete_space_secret, repo_id, key, token=token) - - def delete_tag_threaded( - self, - repo_id: str, - *, - tag: str, - token: Optional[str] = None, - repo_type: Optional[str] = None, - ) -> Future[None]: - """ - Delete a tag from a repo on the Hub. - - This is a non-blocking method. Check out [`delete_tag`] documentation to learn how to use it. The threaded version - starts a background job in a separate thread and returns a Future object. The goal of background jobs is to - avoid blocking the main thread for example in a training. You should not expect a gain in performances by - parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve - order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) - documentation. - """ - return self.thread_pool.submit(self.delete_tag, repo_id, tag=tag, token=token, repo_type=repo_type) - - def duplicate_space_threaded( - self, - from_id: str, - to_id: Optional[str] = None, - *, - private: Optional[bool] = None, - token: Optional[str] = None, - exist_ok: bool = False, - ) -> Future[RepoUrl]: - """ - Duplicate a Space. - - This is a non-blocking method. Check out [`duplicate_space`] documentation to learn how to use it. The threaded version - starts a background job in a separate thread and returns a Future object. The goal of background jobs is to - avoid blocking the main thread for example in a training. You should not expect a gain in performances by - parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve - order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) - documentation. - """ - return self.thread_pool.submit( - self.duplicate_space, from_id, to_id, private=private, token=token, exist_ok=exist_ok - ) - - def edit_discussion_comment_threaded( - self, - repo_id: str, - discussion_num: int, - comment_id: str, - new_content: str, - *, - token: Optional[str] = None, - repo_type: Optional[str] = None, - ) -> Future[DiscussionComment]: - """ - Edits a comment on a Discussion / Pull Request. - - This is a non-blocking method. Check out [`edit_discussion_comment`] documentation to learn how to use it. The threaded version - starts a background job in a separate thread and returns a Future object. The goal of background jobs is to - avoid blocking the main thread for example in a training. You should not expect a gain in performances by - parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve - order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) - documentation. - """ - return self.thread_pool.submit( - self.edit_discussion_comment, - repo_id, - discussion_num, - comment_id, - new_content, - token=token, - repo_type=repo_type, - ) - - def get_dataset_tags_threaded(self) -> Future[DatasetTags]: - """ - List all valid dataset tags as a nested namespace object. - - This is a non-blocking method. Check out [`get_dataset_tags`] documentation to learn how to use it. The threaded version - starts a background job in a separate thread and returns a Future object. The goal of background jobs is to - avoid blocking the main thread for example in a training. You should not expect a gain in performances by - parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve - order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) - documentation. - """ - return self.thread_pool.submit( - self.get_dataset_tags, - ) - - def get_discussion_details_threaded( - self, - repo_id: str, - discussion_num: int, - *, - repo_type: Optional[str] = None, - token: Optional[str] = None, - ) -> Future[DiscussionWithDetails]: - """ - Fetches a Discussion's / Pull Request 's details from the Hub. - - This is a non-blocking method. Check out [`get_discussion_details`] documentation to learn how to use it. The threaded version - starts a background job in a separate thread and returns a Future object. The goal of background jobs is to - avoid blocking the main thread for example in a training. You should not expect a gain in performances by - parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve - order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) - documentation. - """ - return self.thread_pool.submit( - self.get_discussion_details, repo_id, discussion_num, repo_type=repo_type, token=token - ) - - def get_full_repo_name_threaded( - self, - model_id: str, - *, - organization: Optional[str] = None, - token: Optional[Union[bool, str]] = None, - ): - """ - Returns the repository name for a given model ID and optional - - This is a non-blocking method. Check out [`get_full_repo_name`] documentation to learn how to use it. The threaded version - starts a background job in a separate thread and returns a Future object. The goal of background jobs is to - avoid blocking the main thread for example in a training. You should not expect a gain in performances by - parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve - order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) - documentation. - """ - return self.thread_pool.submit(self.get_full_repo_name, model_id, organization=organization, token=token) - - def get_model_tags_threaded(self) -> Future[ModelTags]: - """ - List all valid model tags as a nested namespace object - - This is a non-blocking method. Check out [`get_model_tags`] documentation to learn how to use it. The threaded version - starts a background job in a separate thread and returns a Future object. The goal of background jobs is to - avoid blocking the main thread for example in a training. You should not expect a gain in performances by - parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve - order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) - documentation. - """ - return self.thread_pool.submit( - self.get_model_tags, - ) - - def get_repo_discussions_threaded( - self, - repo_id: str, - *, - repo_type: Optional[str] = None, - token: Optional[str] = None, - ) -> Future[Iterator[Discussion]]: - """ - Fetches Discussions and Pull Requests for the given repo. - - This is a non-blocking method. Check out [`get_repo_discussions`] documentation to learn how to use it. The threaded version - starts a background job in a separate thread and returns a Future object. The goal of background jobs is to - avoid blocking the main thread for example in a training. You should not expect a gain in performances by - parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve - order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) - documentation. - """ - return self.thread_pool.submit(self.get_repo_discussions, repo_id, repo_type=repo_type, token=token) - - def get_space_runtime_threaded(self, repo_id: str, *, token: Optional[str] = None) -> Future[SpaceRuntime]: - """ - Gets runtime information about a Space. - - This is a non-blocking method. Check out [`get_space_runtime`] documentation to learn how to use it. The threaded version - starts a background job in a separate thread and returns a Future object. The goal of background jobs is to - avoid blocking the main thread for example in a training. You should not expect a gain in performances by - parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve - order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) - documentation. - """ - return self.thread_pool.submit(self.get_space_runtime, repo_id, token=token) - - def hide_discussion_comment_threaded( - self, - repo_id: str, - discussion_num: int, - comment_id: str, - *, - token: Optional[str] = None, - repo_type: Optional[str] = None, - ) -> Future[DiscussionComment]: - """ - Hides a comment on a Discussion / Pull Request. - - This is a non-blocking method. Check out [`hide_discussion_comment`] documentation to learn how to use it. The threaded version - starts a background job in a separate thread and returns a Future object. The goal of background jobs is to - avoid blocking the main thread for example in a training. You should not expect a gain in performances by - parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve - order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) - documentation. - """ - return self.thread_pool.submit( - self.hide_discussion_comment, repo_id, discussion_num, comment_id, token=token, repo_type=repo_type - ) - - def like_threaded( - self, - repo_id: str, - *, - token: Optional[str] = None, - repo_type: Optional[str] = None, - ) -> Future[None]: - """ - Like a given repo on the Hub (e.g. set as favorite). - - This is a non-blocking method. Check out [`like`] documentation to learn how to use it. The threaded version - starts a background job in a separate thread and returns a Future object. The goal of background jobs is to - avoid blocking the main thread for example in a training. You should not expect a gain in performances by - parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve - order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) - documentation. - """ - return self.thread_pool.submit(self.like, repo_id, token=token, repo_type=repo_type) - - def list_datasets_threaded( - self, - *, - filter: Union[DatasetFilter, str, Iterable[str], None] = None, - author: Optional[str] = None, - search: Optional[str] = None, - sort: Union[Literal["lastModified"], str, None] = None, - direction: Optional[Literal[-1]] = None, - limit: Optional[int] = None, - cardData: Optional[bool] = None, # deprecated - full: Optional[bool] = None, - token: Optional[str] = None, - ) -> Future[List[DatasetInfo]]: - """ - Get the list of all the datasets on huggingface.co - - This is a non-blocking method. Check out [`list_datasets`] documentation to learn how to use it. The threaded version - starts a background job in a separate thread and returns a Future object. The goal of background jobs is to - avoid blocking the main thread for example in a training. You should not expect a gain in performances by - parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve - order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) - documentation. - """ - return self.thread_pool.submit( - self.list_datasets, - filter=filter, - author=author, - search=search, - sort=sort, - direction=direction, - limit=limit, - cardData=cardData, - full=full, - token=token, - ) - - def list_files_info_threaded( - self, - repo_id: str, - paths: Union[List[str], str, None] = None, - *, - expand: bool = False, - revision: Optional[str] = None, - repo_type: Optional[str] = None, - token: Optional[Union[bool, str]] = None, - ) -> Future[Iterable[RepoFile]]: - """ - List files on a repo and get information about them. - - This is a non-blocking method. Check out [`list_files_info`] documentation to learn how to use it. The threaded version - starts a background job in a separate thread and returns a Future object. The goal of background jobs is to - avoid blocking the main thread for example in a training. You should not expect a gain in performances by - parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve - order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) - documentation. - """ - return self.thread_pool.submit( - self.list_files_info, repo_id, paths, expand=expand, revision=revision, repo_type=repo_type, token=token - ) - - def list_liked_repos_threaded( - self, - user: Optional[str] = None, - *, - token: Optional[str] = None, - ) -> Future[UserLikes]: - """ - List all public repos liked by a user on huggingface.co. - - This is a non-blocking method. Check out [`list_liked_repos`] documentation to learn how to use it. The threaded version - starts a background job in a separate thread and returns a Future object. The goal of background jobs is to - avoid blocking the main thread for example in a training. You should not expect a gain in performances by - parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve - order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) - documentation. - """ - return self.thread_pool.submit(self.list_liked_repos, user, token=token) - - def list_metrics_threaded(self) -> Future[List[MetricInfo]]: - """ - Get the public list of all the metrics on huggingface.co - - This is a non-blocking method. Check out [`list_metrics`] documentation to learn how to use it. The threaded version - starts a background job in a separate thread and returns a Future object. The goal of background jobs is to - avoid blocking the main thread for example in a training. You should not expect a gain in performances by - parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve - order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) - documentation. - """ - return self.thread_pool.submit( - self.list_metrics, - ) - - def list_models_threaded( - self, - *, - filter: Union[ModelFilter, str, Iterable[str], None] = None, - author: Optional[str] = None, - search: Optional[str] = None, - emissions_thresholds: Optional[Tuple[float, float]] = None, - sort: Union[Literal["lastModified"], str, None] = None, - direction: Optional[Literal[-1]] = None, - limit: Optional[int] = None, - full: Optional[bool] = None, - cardData: bool = False, - fetch_config: bool = False, - token: Optional[Union[bool, str]] = None, - ) -> Future[List[ModelInfo]]: - """ - Get the list of all the models on huggingface.co - - This is a non-blocking method. Check out [`list_models`] documentation to learn how to use it. The threaded version - starts a background job in a separate thread and returns a Future object. The goal of background jobs is to - avoid blocking the main thread for example in a training. You should not expect a gain in performances by - parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve - order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) - documentation. - """ - return self.thread_pool.submit( - self.list_models, - filter=filter, - author=author, - search=search, - emissions_thresholds=emissions_thresholds, - sort=sort, - direction=direction, - limit=limit, - full=full, - cardData=cardData, - fetch_config=fetch_config, - token=token, - ) - - def list_repo_commits_threaded( - self, - repo_id: str, - *, - repo_type: Optional[str] = None, - token: Optional[Union[bool, str]] = None, - revision: Optional[str] = None, - formatted: bool = False, - ) -> Future[List[GitCommitInfo]]: - """ - Get the list of commits of a given revision for a repo on the Hub. - - This is a non-blocking method. Check out [`list_repo_commits`] documentation to learn how to use it. The threaded version - starts a background job in a separate thread and returns a Future object. The goal of background jobs is to - avoid blocking the main thread for example in a training. You should not expect a gain in performances by - parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve - order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) - documentation. - """ - return self.thread_pool.submit( - self.list_repo_commits, repo_id, repo_type=repo_type, token=token, revision=revision, formatted=formatted - ) - - def list_repo_files_threaded( - self, - repo_id: str, - *, - revision: Optional[str] = None, - repo_type: Optional[str] = None, - timeout: Optional[float] = None, - token: Optional[Union[bool, str]] = None, - ) -> Future[List[str]]: - """ - Get the list of files in a given repo. - - This is a non-blocking method. Check out [`list_repo_files`] documentation to learn how to use it. The threaded version - starts a background job in a separate thread and returns a Future object. The goal of background jobs is to - avoid blocking the main thread for example in a training. You should not expect a gain in performances by - parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve - order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) - documentation. - """ - return self.thread_pool.submit( - self.list_repo_files, repo_id, revision=revision, repo_type=repo_type, timeout=timeout, token=token - ) - - def list_repo_refs_threaded( - self, - repo_id: str, - *, - repo_type: Optional[str] = None, - token: Optional[Union[bool, str]] = None, - ) -> Future[GitRefs]: - """ - Get the list of refs of a given repo (both tags and branches). - - This is a non-blocking method. Check out [`list_repo_refs`] documentation to learn how to use it. The threaded version - starts a background job in a separate thread and returns a Future object. The goal of background jobs is to - avoid blocking the main thread for example in a training. You should not expect a gain in performances by - parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve - order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) - documentation. - """ - return self.thread_pool.submit(self.list_repo_refs, repo_id, repo_type=repo_type, token=token) - - def list_spaces_threaded( - self, - *, - filter: Union[str, Iterable[str], None] = None, - author: Optional[str] = None, - search: Optional[str] = None, - sort: Union[Literal["lastModified"], str, None] = None, - direction: Optional[Literal[-1]] = None, - limit: Optional[int] = None, - datasets: Union[str, Iterable[str], None] = None, - models: Union[str, Iterable[str], None] = None, - linked: bool = False, - full: Optional[bool] = None, - token: Optional[str] = None, - ) -> Future[List[SpaceInfo]]: - """ - Get the public list of all Spaces on huggingface.co - - This is a non-blocking method. Check out [`list_spaces`] documentation to learn how to use it. The threaded version - starts a background job in a separate thread and returns a Future object. The goal of background jobs is to - avoid blocking the main thread for example in a training. You should not expect a gain in performances by - parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve - order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) - documentation. - """ - return self.thread_pool.submit( - self.list_spaces, - filter=filter, - author=author, - search=search, - sort=sort, - direction=direction, - limit=limit, - datasets=datasets, - models=models, - linked=linked, - full=full, - token=token, - ) - - def merge_pull_request_threaded( - self, - repo_id: str, - discussion_num: int, - *, - token: Optional[str] = None, - comment: Optional[str] = None, - repo_type: Optional[str] = None, - ): - """ - Merges a Pull Request. - - This is a non-blocking method. Check out [`merge_pull_request`] documentation to learn how to use it. The threaded version - starts a background job in a separate thread and returns a Future object. The goal of background jobs is to - avoid blocking the main thread for example in a training. You should not expect a gain in performances by - parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve - order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) - documentation. - """ - return self.thread_pool.submit( - self.merge_pull_request, repo_id, discussion_num, token=token, comment=comment, repo_type=repo_type - ) - - def model_info_threaded( - self, - repo_id: str, - *, - revision: Optional[str] = None, - timeout: Optional[float] = None, - securityStatus: Optional[bool] = None, - files_metadata: bool = False, - token: Optional[Union[bool, str]] = None, - ) -> Future[ModelInfo]: - """ - Get info on one specific model on huggingface.co - - This is a non-blocking method. Check out [`model_info`] documentation to learn how to use it. The threaded version - starts a background job in a separate thread and returns a Future object. The goal of background jobs is to - avoid blocking the main thread for example in a training. You should not expect a gain in performances by - parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve - order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) - documentation. - """ - return self.thread_pool.submit( - self.model_info, - repo_id, - revision=revision, - timeout=timeout, - securityStatus=securityStatus, - files_metadata=files_metadata, - token=token, - ) - - def move_repo_threaded( - self, - from_id: str, - to_id: str, - *, - repo_type: Optional[str] = None, - token: Optional[str] = None, - ): - """ - Moving a repository from namespace1/repo_name1 to namespace2/repo_name2 - - This is a non-blocking method. Check out [`move_repo`] documentation to learn how to use it. The threaded version - starts a background job in a separate thread and returns a Future object. The goal of background jobs is to - avoid blocking the main thread for example in a training. You should not expect a gain in performances by - parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve - order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) - documentation. - """ - return self.thread_pool.submit(self.move_repo, from_id, to_id, repo_type=repo_type, token=token) - - def pause_space_threaded(self, repo_id: str, *, token: Optional[str] = None) -> Future[SpaceRuntime]: - """ - Pause your Space. - - This is a non-blocking method. Check out [`pause_space`] documentation to learn how to use it. The threaded version - starts a background job in a separate thread and returns a Future object. The goal of background jobs is to - avoid blocking the main thread for example in a training. You should not expect a gain in performances by - parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve - order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) - documentation. - """ - return self.thread_pool.submit(self.pause_space, repo_id, token=token) - - def rename_discussion_threaded( - self, - repo_id: str, - discussion_num: int, - new_title: str, - *, - token: Optional[str] = None, - repo_type: Optional[str] = None, - ) -> Future[DiscussionTitleChange]: - """ - Renames a Discussion. - - This is a non-blocking method. Check out [`rename_discussion`] documentation to learn how to use it. The threaded version - starts a background job in a separate thread and returns a Future object. The goal of background jobs is to - avoid blocking the main thread for example in a training. You should not expect a gain in performances by - parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve - order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) - documentation. - """ - return self.thread_pool.submit( - self.rename_discussion, repo_id, discussion_num, new_title, token=token, repo_type=repo_type - ) - - def repo_info_threaded( - self, - repo_id: str, - *, - revision: Optional[str] = None, - repo_type: Optional[str] = None, - timeout: Optional[float] = None, - files_metadata: bool = False, - token: Optional[Union[bool, str]] = None, - ) -> Future[Union[ModelInfo, DatasetInfo, SpaceInfo]]: - """ - Get the info object for a given repo of a given type. - - This is a non-blocking method. Check out [`repo_info`] documentation to learn how to use it. The threaded version - starts a background job in a separate thread and returns a Future object. The goal of background jobs is to - avoid blocking the main thread for example in a training. You should not expect a gain in performances by - parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve - order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) - documentation. - """ - return self.thread_pool.submit( - self.repo_info, - repo_id, - revision=revision, - repo_type=repo_type, - timeout=timeout, - files_metadata=files_metadata, - token=token, - ) - - def request_space_hardware_threaded( - self, - repo_id: str, - hardware: SpaceHardware, - *, - token: Optional[str] = None, - sleep_time: Optional[int] = None, - ) -> Future[SpaceRuntime]: - """ - Request new hardware for a Space. - - This is a non-blocking method. Check out [`request_space_hardware`] documentation to learn how to use it. The threaded version - starts a background job in a separate thread and returns a Future object. The goal of background jobs is to - avoid blocking the main thread for example in a training. You should not expect a gain in performances by - parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve - order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) - documentation. - """ - return self.thread_pool.submit( - self.request_space_hardware, repo_id, hardware, token=token, sleep_time=sleep_time - ) - - def restart_space_threaded(self, repo_id: str, *, token: Optional[str] = None) -> Future[SpaceRuntime]: - """ - Restart your Space. - - This is a non-blocking method. Check out [`restart_space`] documentation to learn how to use it. The threaded version - starts a background job in a separate thread and returns a Future object. The goal of background jobs is to - avoid blocking the main thread for example in a training. You should not expect a gain in performances by - parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve - order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) - documentation. - """ - return self.thread_pool.submit(self.restart_space, repo_id, token=token) - - def set_space_sleep_time_threaded( - self, repo_id: str, sleep_time: int, *, token: Optional[str] = None - ) -> Future[SpaceRuntime]: - """ - Set a custom sleep time for a Space running on upgraded hardware.. - - This is a non-blocking method. Check out [`set_space_sleep_time`] documentation to learn how to use it. The threaded version - starts a background job in a separate thread and returns a Future object. The goal of background jobs is to - avoid blocking the main thread for example in a training. You should not expect a gain in performances by - parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve - order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) - documentation. - """ - return self.thread_pool.submit(self.set_space_sleep_time, repo_id, sleep_time, token=token) - - def space_info_threaded( - self, - repo_id: str, - *, - revision: Optional[str] = None, - timeout: Optional[float] = None, - files_metadata: bool = False, - token: Optional[Union[bool, str]] = None, - ) -> Future[SpaceInfo]: - """ - Get info on one specific Space on huggingface.co. - - This is a non-blocking method. Check out [`space_info`] documentation to learn how to use it. The threaded version - starts a background job in a separate thread and returns a Future object. The goal of background jobs is to - avoid blocking the main thread for example in a training. You should not expect a gain in performances by - parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve - order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) - documentation. - """ - return self.thread_pool.submit( - self.space_info, repo_id, revision=revision, timeout=timeout, files_metadata=files_metadata, token=token - ) - - def unlike_threaded( - self, - repo_id: str, - *, - token: Optional[str] = None, - repo_type: Optional[str] = None, - ) -> Future[None]: - """ - Unlike a given repo on the Hub (e.g. remove from favorite list). - - This is a non-blocking method. Check out [`unlike`] documentation to learn how to use it. The threaded version - starts a background job in a separate thread and returns a Future object. The goal of background jobs is to - avoid blocking the main thread for example in a training. You should not expect a gain in performances by - parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve - order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) - documentation. - """ - return self.thread_pool.submit(self.unlike, repo_id, token=token, repo_type=repo_type) - - def update_repo_visibility_threaded( - self, - repo_id: str, - private: bool = False, - *, - token: Optional[str] = None, - organization: Optional[str] = None, - repo_type: Optional[str] = None, - name: Optional[str] = None, - ) -> Future[Dict[str, bool]]: - """ - Update the visibility setting of a repository. - - This is a non-blocking method. Check out [`update_repo_visibility`] documentation to learn how to use it. The threaded version - starts a background job in a separate thread and returns a Future object. The goal of background jobs is to - avoid blocking the main thread for example in a training. You should not expect a gain in performances by - parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve - order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) - documentation. - """ - return self.thread_pool.submit( - self.update_repo_visibility, - repo_id, - private, - token=token, - organization=organization, - repo_type=repo_type, - name=name, - ) - - def upload_file_threaded( - self, - *, - path_or_fileobj: Union[str, Path, bytes, BinaryIO], - path_in_repo: str, - repo_id: str, - token: Optional[str] = None, - repo_type: Optional[str] = None, - revision: Optional[str] = None, - commit_message: Optional[str] = None, - commit_description: Optional[str] = None, - create_pr: Optional[bool] = None, - parent_commit: Optional[str] = None, - ) -> Future[str]: - """ - Upload a local file (up to 50 GB) to the given repo. The upload is done - - This is a non-blocking method. Check out [`upload_file`] documentation to learn how to use it. The threaded version - starts a background job in a separate thread and returns a Future object. The goal of background jobs is to - avoid blocking the main thread for example in a training. You should not expect a gain in performances by - parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve - order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) - documentation. - """ - return self.thread_pool.submit( - self.upload_file, - path_or_fileobj=path_or_fileobj, - path_in_repo=path_in_repo, - repo_id=repo_id, - token=token, - repo_type=repo_type, - revision=revision, - commit_message=commit_message, - commit_description=commit_description, - create_pr=create_pr, - parent_commit=parent_commit, - ) - - def upload_folder_threaded( - self, - *, - repo_id: str, - folder_path: Union[str, Path], - path_in_repo: Optional[str] = None, - commit_message: Optional[str] = None, - commit_description: Optional[str] = None, - token: Optional[str] = None, - repo_type: Optional[str] = None, - revision: Optional[str] = None, - create_pr: Optional[bool] = None, - parent_commit: Optional[str] = None, - allow_patterns: Optional[Union[List[str], str]] = None, - ignore_patterns: Optional[Union[List[str], str]] = None, - delete_patterns: Optional[Union[List[str], str]] = None, - multi_commits: bool = False, - multi_commits_verbose: bool = False, - ): - """ - Upload a local folder to the given repo. The upload is done through a HTTP requests, and doesn't require git or - - This is a non-blocking method. Check out [`upload_folder`] documentation to learn how to use it. The threaded version - starts a background job in a separate thread and returns a Future object. The goal of background jobs is to - avoid blocking the main thread for example in a training. You should not expect a gain in performances by - parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve - order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) - documentation. - """ - return self.thread_pool.submit( - self.upload_folder, - repo_id=repo_id, - folder_path=folder_path, - path_in_repo=path_in_repo, - commit_message=commit_message, - commit_description=commit_description, - token=token, - repo_type=repo_type, - revision=revision, - create_pr=create_pr, - parent_commit=parent_commit, - allow_patterns=allow_patterns, - ignore_patterns=ignore_patterns, - delete_patterns=delete_patterns, - multi_commits=multi_commits, - multi_commits_verbose=multi_commits_verbose, - ) - - def whoami_threaded(self, token: Optional[str] = None) -> Future[Dict]: - """ - Call HF API to know "whoami". - - This is a non-blocking method. Check out [`whoami`] documentation to learn how to use it. The threaded version - starts a background job in a separate thread and returns a Future object. The goal of background jobs is to - avoid blocking the main thread for example in a training. You should not expect a gain in performances by - parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve - order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) - documentation. - """ - return self.thread_pool.submit(self.whoami, token) diff --git a/src/huggingface_hub/hf_api.py b/src/huggingface_hub/hf_api.py index 63df591c5d..1c3c61e8df 100644 --- a/src/huggingface_hub/hf_api.py +++ b/src/huggingface_hub/hf_api.py @@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import inspect import json import pprint import re @@ -19,6 +20,7 @@ import warnings from dataclasses import dataclass, field from datetime import datetime +from functools import wraps from itertools import islice from pathlib import Path from typing import Any, BinaryIO, Dict, Iterable, Iterator, List, Optional, Tuple, Union @@ -91,7 +93,7 @@ from .utils._deprecation import ( _deprecate_arguments, ) -from .utils._typing import Literal, TypedDict +from .utils._typing import CallableT, Literal, TypedDict from .utils.endpoint_helpers import ( AttributeDictionary, DatasetFilter, @@ -788,7 +790,42 @@ class UserLikes: spaces: List[str] -class _HfApi: +def future_compatible(fn: CallableT) -> CallableT: + """Wrap a method of `HfApi` to handle `as_future=True`. + + A method flagged as "future_compatible" will be called in a thread if `as_future=True` and return a + `concurrent.futures.Future` instance. Otherwise, it will be called normally and return the result. + + This decorator is useful to make a method compatible with both synchronous and asynchronous code. + """ + sig = inspect.signature(fn) + args_params = list(sig.parameters)[1:] # remove "self" from list + + @wraps(fn) + def _inner(self, *args, **kwargs): + # Get `as_future` value if provided (default to False) + if "as_future" in kwargs: + as_future = kwargs["as_future"] + kwargs["as_future"] = False # avoid recursion error + else: + as_future = False + for param, value in zip(args_params, args): + if param == "as_future": + as_future = value + break + + # Call the function in a thread if `as_future=True` + if as_future: + return self.pool.submit(fn, self, *args, **kwargs) + + # Otherwise, call the function normally + return fn(self, *args, **kwargs) + + _inner.is_future_compatible = True # type: ignore + return _inner # type: ignore + + +class HfApi: def __init__( self, endpoint: Optional[str] = None, @@ -4866,13 +4903,6 @@ def _parse_revision_from_pr_url(pr_url: str) -> str: return f"refs/pr/{re_match[1]}" -from ._threaded_hf_api import _ThreadedHfApi # noqa: E402 to avoid circular import - - -class HfApi(_ThreadedHfApi): - pass - - api = HfApi() whoami = api.whoami diff --git a/src/huggingface_hub/utils/_typing.py b/src/huggingface_hub/utils/_typing.py index 812c65ea39..c8885eb1eb 100644 --- a/src/huggingface_hub/utils/_typing.py +++ b/src/huggingface_hub/utils/_typing.py @@ -14,6 +14,7 @@ # limitations under the License. """Handle typing imports based on system compatibility.""" import sys +from typing import Callable, TypeVar if sys.version_info >= (3, 8): @@ -22,3 +23,6 @@ from typing_extensions import Literal, TypedDict # noqa: F401 HTTP_METHOD_T = Literal["GET", "OPTIONS", "HEAD", "POST", "PUT", "PATCH", "DELETE"] + +# type hint meaning "function signature not changed by decorator" +CallableT = TypeVar("CallableT", bound=Callable) diff --git a/src/huggingface_hub/utils/_validators.py b/src/huggingface_hub/utils/_validators.py index d6db7b1081..5dd64fa514 100644 --- a/src/huggingface_hub/utils/_validators.py +++ b/src/huggingface_hub/utils/_validators.py @@ -18,11 +18,9 @@ import warnings from functools import wraps from itertools import chain -from typing import Any, Callable, Dict, TypeVar +from typing import Any, Dict - -# type hint meaning "function signature not changed by decorator" -CallableT = TypeVar("CallableT", bound=Callable) +from ._typing import CallableT REPO_ID_REGEX = re.compile( diff --git a/utils/check_threaded_hf_api.py b/utils/check_threaded_hf_api.py deleted file mode 100644 index 68d83cc7d9..0000000000 --- a/utils/check_threaded_hf_api.py +++ /dev/null @@ -1,197 +0,0 @@ -# coding=utf-8 -# Copyright 2022-present, the HuggingFace Inc. team. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Contains a tool to add/check the definition of "async" methods of `HfApi` in `huggingface_hub.hf_api.py`.""" -import argparse -import inspect -import os -import re -import tempfile -from pathlib import Path -from typing import NoReturn - -import black -from ruff.__main__ import find_ruff_bin - -from huggingface_hub.hf_api import _HfApi - - -TEMPLATE_START = ''' -# coding=utf-8 -# Copyright 2022-present, the HuggingFace Inc. team. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -WARNING: this file is automatically generated by `utils/check_threaded_hf_api.py`. Do not edit it manually. -You can check it is up-to-date by running `make quality` and update its content with `make style` if needed. - -The content of this file is mostly based on HfApi implementation. -""" -from __future__ import annotations - -from concurrent.futures import Future, ThreadPoolExecutor -from typing import Dict, Optional, Union - -from .hf_api import * # noqa: F403 -from .hf_api import _HfApi - - -class _ThreadedHfApi(_HfApi): - _thread_pool: Optional[ThreadPoolExecutor] = None - - @property - def thread_pool(self) -> ThreadPoolExecutor: - # Calls to the Hub can be run in the background. Tasks are queued to preserve order but do not block the main - # thread. Can be useful to upload data during a training. ThreadPoolExecutor is initialized the first time it's - # used. Non-blocking methods are suffixed by `_threaded`. - if self._thread_pool is None: - self._thread_pool = ThreadPoolExecutor(max_workers=1) - return self._thread_pool - -''' - -TEMPLATE_DOCSTRING = """ - This is a non-blocking method. Check out [`{method_name}`] documentation to learn how to use it. The threaded version - starts a background job in a separate thread and returns a Future object. The goal of background jobs is to - avoid blocking the main thread for example in a training. You should not expect a gain in performances by - parallelizing tasks with multiple threads as we favored a solution where jobs are run sequentially to preserve - order. If you need more flexibility, you can have a look to the [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) - documentation.""" - -INDENT = " " * 8 - -HF_API_FILE_PATH = Path(__file__).parents[1] / "src" / "huggingface_hub" / "hf_api.py" -HF_API_FILE_CONTENT = HF_API_FILE_PATH.read_text() - -THREADED_HF_API_FILE_PATH = Path(__file__).parents[1] / "src" / "huggingface_hub" / "_threaded_hf_api.py" - - -def generate_threaded_method(method_name: str) -> str: - """Generate code to define a threaded version of a method of HfApi.""" - group = re.search(rf'(def {method_name}\(.*?)"""(.*?)"""', HF_API_FILE_CONTENT, re.DOTALL) - assert group is not None - - # signature - signature = group.group(1).strip() - signature = signature.replace(method_name, f"{method_name}_threaded", 1) - signature = re.sub(r"-> (.*)?:", r"-> Future[\1]:", signature) - signature = f"\n {signature}" - - # docstring - for line in group.group(2).strip().splitlines(): - if line.strip(): - line = line.strip() - break - else: - raise ValueError(f"No docstring found for {method_name}.") - docstring = f'{INDENT}"""\n{INDENT}{line}\n{TEMPLATE_DOCSTRING.format(method_name=method_name)}\n{INDENT}"""' - - # code - submit_args = "" - for parameter in inspect.signature(getattr(_HfApi, method_name)).parameters.values(): - if parameter.name == "self": - continue - elif parameter.kind == parameter.POSITIONAL_ONLY or parameter.kind == parameter.POSITIONAL_OR_KEYWORD: - submit_args += f"{parameter.name}," - elif parameter.kind == parameter.VAR_POSITIONAL: - submit_args += f"*{parameter.name}," - elif parameter.kind == parameter.KEYWORD_ONLY: - submit_args += f"{parameter.name}={parameter.name}," - elif parameter.kind == parameter.VAR_KEYWORD: - submit_args += f"**{parameter.name}," - else: - raise ValueError(f"Unknown Parameter kind: {parameter}") - submit_line = f"{INDENT}return self.thread_pool.submit(self.{method_name}, {submit_args.strip(',')})" - - return signature + "\n" + docstring + "\n" + submit_line + "\n" - - -def generate_threaded_hf_api() -> str: - raw_code = TEMPLATE_START - for method_name, _ in inspect.getmembers(_HfApi, predicate=inspect.isfunction): - if method_name.startswith("_"): # skip private methods - continue - raw_code += generate_threaded_method(method_name) - return format_generated_code(raw_code) - - -def format_generated_code(code: str) -> str: - """ - Format some code with black+ruff. Cannot be done "on the fly" so we first save the code in a temporary file. - """ - # Format with black - code = black.format_file_contents(code, fast=False, mode=black.FileMode(line_length=119)) - - # Format with ruff - with tempfile.TemporaryDirectory() as tmpdir: - filepath = Path(tmpdir) / "__init__.py" - filepath.write_text(code) - ruff_bin = find_ruff_bin() - os.spawnv(os.P_WAIT, ruff_bin, ["ruff", str(filepath), "--fix", "--quiet", "--ignore=F405"]) - return filepath.read_text() - - -def check_threaded_hf_api(update: bool) -> NoReturn: - """Check that the code defining the threaded version of HfApi is up-to-date.""" - # If expected `__init__.py` content is different, test fails. If '--update-init-file' - # is used, `__init__.py` file is updated before the test fails. - content = THREADED_HF_API_FILE_PATH.read_text() - expected_content = generate_threaded_hf_api() - if content != expected_content: - if update: - with THREADED_HF_API_FILE_PATH.open("w") as f: - f.write(expected_content) - - print( - "✅ _ThreadedHfApi implementation has been updated in `./src/huggingface_hub/_threaded_hf_api.py`." - "\n Please make sure the changes are accurate and commit them." - ) - exit(0) - else: - print( - "❌ Expected content mismatch in `./src/huggingface_hub/_threaded_hf_api.py`.\n It is most likely" - " that you modified `./src/huggingface_hub/hf_api.py`.\n Please run `make style` or `python" - " utils/check_threaded_hf_api.py --update`." - ) - exit(1) - - print("✅ All good! (threaded HfApi)") - exit(0) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument( - "--update", - action="store_true", - help="Whether to override `./src/huggingface_hub/_threaded_hf_api.py` if a change is detected.", - ) - args = parser.parse_args() - - check_threaded_hf_api(update=args.update) - -print(generate_threaded_hf_api()) -# import pdb - -# pdb.set_trace() From 0ccb4ff821bed4a3ea1c8a1122e9de7fd0d6aac9 Mon Sep 17 00:00:00 2001 From: Lucain Pouget Date: Wed, 17 May 2023 09:49:03 +0200 Subject: [PATCH 09/17] Make create_commit, upload_file and upload_folder future-compatible --- .github/workflows/python-quality.yml | 1 - Makefile | 2 - pyproject.toml | 3 - setup.cfg | 2 - src/huggingface_hub/hf_api.py | 167 +++++++++++++++++++++++++-- 5 files changed, 156 insertions(+), 19 deletions(-) diff --git a/.github/workflows/python-quality.yml b/.github/workflows/python-quality.yml index a9a0a3e1b6..9c785d7109 100644 --- a/.github/workflows/python-quality.yml +++ b/.github/workflows/python-quality.yml @@ -31,7 +31,6 @@ jobs: - run: ruff tests src contrib - run: python utils/check_contrib_list.py - run: python utils/check_static_imports.py - #- run: python utils/check_threaded_hf_api.py # Run type checking at least on huggingface_hub root file to check all modules # that can be lazy-loaded actually exist. diff --git a/Makefile b/Makefile index 4584ba803d..3ba5f3dae5 100644 --- a/Makefile +++ b/Makefile @@ -10,14 +10,12 @@ quality: mypy src python utils/check_contrib_list.py python utils/check_static_imports.py - # python utils/check_threaded_hf_api.py style: black $(check_dirs) ruff $(check_dirs) --fix python utils/check_contrib_list.py --update python utils/check_static_imports.py --update - # python utils/check_threaded_hf_api.py --update repocard: python utils/push_repocard_examples.py diff --git a/pyproject.toml b/pyproject.toml index d02165f52e..c7e8c4c3ed 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,9 +16,6 @@ ignore = ["E501", "F821"] select = ["E", "F", "I", "W"] line-length = 119 -[tool.ruff.per-file-ignores] -"src/huggingface_hub/_threaded_hf_api.py" = ["F405"] # generate code => we assume it's good - [tool.ruff.isort] lines-after-imports = 2 known-first-party = ["huggingface_hub"] diff --git a/setup.cfg b/setup.cfg index 50db28b53b..9cc27b091c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -52,8 +52,6 @@ use_parentheses = True exclude = .git,__pycache__,old,build,dist,.venv* ignore = B028, E203, E501, E741, W503 max-line-length = 119 -per-file-ignores = - src/huggingface_hub/_threaded_hf_api.py:F405 [tool:pytest] # -Werror::FutureWarning -> test fails if FutureWarning is thrown diff --git a/src/huggingface_hub/hf_api.py b/src/huggingface_hub/hf_api.py index 1c3c61e8df..42df2e5981 100644 --- a/src/huggingface_hub/hf_api.py +++ b/src/huggingface_hub/hf_api.py @@ -18,12 +18,13 @@ import re import textwrap import warnings +from concurrent.futures import Future, ThreadPoolExecutor from dataclasses import dataclass, field from datetime import datetime from functools import wraps from itertools import islice from pathlib import Path -from typing import Any, BinaryIO, Dict, Iterable, Iterator, List, Optional, Tuple, Union +from typing import Any, BinaryIO, Dict, Iterable, Iterator, List, Optional, Tuple, Union, overload from urllib.parse import quote import requests @@ -816,7 +817,7 @@ def _inner(self, *args, **kwargs): # Call the function in a thread if `as_future=True` if as_future: - return self.pool.submit(fn, self, *args, **kwargs) + return self.thread_pool.submit(fn, self, *args, **kwargs) # Otherwise, call the function normally return fn(self, *args, **kwargs) @@ -864,6 +865,17 @@ def __init__( self.library_name = library_name self.library_version = library_version self.user_agent = user_agent + self._thread_pool: Optional[ThreadPoolExecutor] = None + + @property + def thread_pool(self) -> ThreadPoolExecutor: + # Calls to the Hub can be run in the background. Tasks are queued to preserve order but do not block the main + # thread. This can prove useful to upload data during a training. ThreadPoolExecutor is initialized the first + # time it's used. + # To run a method in the background, pass `as_future=True` when calling it. + if self._thread_pool is None: + self._thread_pool = ThreadPoolExecutor(max_workers=1) + return self._thread_pool @validate_hf_hub_args def whoami(self, token: Optional[str] = None) -> Dict: @@ -1394,12 +1406,8 @@ def list_spaces( @validate_hf_hub_args def like( - self, - repo_id: str, - *, - token: Optional[str] = None, - repo_type: Optional[str] = None, - ) -> None: + self, repo_id: str, *, token: Optional[str] = None, repo_type: Optional[str] = None + ) -> Union[None, Future[None]]: """ Like a given repo on the Hub (e.g. set as favorite). @@ -1439,6 +1447,7 @@ def like( headers=self._build_hf_headers(token=token), ) hf_raise_for_status(response) + return None @validate_hf_hub_args def unlike( @@ -2444,7 +2453,44 @@ def move_repo( ) raise + @overload + def create_commit( # type: ignore + self, + repo_id: str, + operations: Iterable[CommitOperation], + *, + commit_message: str, + commit_description: Optional[str] = None, + token: Optional[str] = None, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + create_pr: Optional[bool] = None, + num_threads: int = 5, + parent_commit: Optional[str] = None, + as_future: Literal[False] = ..., + ) -> CommitInfo: + ... + + @overload + def create_commit( + self, + repo_id: str, + operations: Iterable[CommitOperation], + *, + commit_message: str, + commit_description: Optional[str] = None, + token: Optional[str] = None, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + create_pr: Optional[bool] = None, + num_threads: int = 5, + parent_commit: Optional[str] = None, + as_future: Literal[True] = ..., + ) -> Future[CommitInfo]: + ... + @validate_hf_hub_args + @future_compatible def create_commit( self, repo_id: str, @@ -2458,7 +2504,8 @@ def create_commit( create_pr: Optional[bool] = None, num_threads: int = 5, parent_commit: Optional[str] = None, - ) -> CommitInfo: + as_future: bool = False, + ) -> Union[CommitInfo, Future[CommitInfo]]: """ Creates a commit in the given repo, deleting & uploading files as needed. @@ -2509,6 +2556,10 @@ def create_commit( is `True`, the pull request will be created from `parent_commit`. Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be especially useful if the repo is updated / committed to concurrently. + as_future (`bool`, *optional*): + Whether or not to run this method in the background. Background jobs are run sequentially without + blocking the main thread. Passing `as_future=True` will return a [Future](https://docs.python.org/3/library/concurrent.futures.html#future-objects) + object. Defaults to `False`. Returns: [`CommitInfo`]: @@ -2940,7 +2991,44 @@ def create_commits_on_pr( return pr.url + @overload + def upload_file( # type: ignore + self, + *, + path_or_fileobj: Union[str, Path, bytes, BinaryIO], + path_in_repo: str, + repo_id: str, + token: Optional[str] = None, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + commit_message: Optional[str] = None, + commit_description: Optional[str] = None, + create_pr: Optional[bool] = None, + parent_commit: Optional[str] = None, + as_future: Literal[False] = ..., + ) -> str: + ... + + @overload + def upload_file( + self, + *, + path_or_fileobj: Union[str, Path, bytes, BinaryIO], + path_in_repo: str, + repo_id: str, + token: Optional[str] = None, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + commit_message: Optional[str] = None, + commit_description: Optional[str] = None, + create_pr: Optional[bool] = None, + parent_commit: Optional[str] = None, + as_future: Literal[True] = ..., + ) -> Future[str]: + ... + @validate_hf_hub_args + @future_compatible def upload_file( self, *, @@ -2954,7 +3042,8 @@ def upload_file( commit_description: Optional[str] = None, create_pr: Optional[bool] = None, parent_commit: Optional[str] = None, - ) -> str: + as_future: bool = False, + ) -> Union[str, Future[str]]: """ Upload a local file (up to 50 GB) to the given repo. The upload is done through a HTTP post request, and doesn't require git or git-lfs to be @@ -2995,6 +3084,10 @@ def upload_file( If specified and `create_pr` is `True`, the pull request will be created from `parent_commit`. Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be especially useful if the repo is updated / committed to concurrently. + as_future (`bool`, *optional*): + Whether or not to run this method in the background. Background jobs are run sequentially without + blocking the main thread. Passing `as_future=True` will return a [Future](https://docs.python.org/3/library/concurrent.futures.html#future-objects) + object. Defaults to `False`. Returns: @@ -3089,7 +3182,54 @@ def upload_file( # Similar to `hf_hub_url` but it's "blob" instead of "resolve" return f"{self.endpoint}/{repo_id}/blob/{revision}/{path_in_repo}" + @overload + def upload_folder( # type: ignore + self, + *, + repo_id: str, + folder_path: Union[str, Path], + path_in_repo: Optional[str] = None, + commit_message: Optional[str] = None, + commit_description: Optional[str] = None, + token: Optional[str] = None, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + create_pr: Optional[bool] = None, + parent_commit: Optional[str] = None, + allow_patterns: Optional[Union[List[str], str]] = None, + ignore_patterns: Optional[Union[List[str], str]] = None, + delete_patterns: Optional[Union[List[str], str]] = None, + multi_commits: bool = False, + multi_commits_verbose: bool = False, + as_future: Literal[False] = ..., + ) -> str: + ... + + @overload + def upload_folder( + self, + *, + repo_id: str, + folder_path: Union[str, Path], + path_in_repo: Optional[str] = None, + commit_message: Optional[str] = None, + commit_description: Optional[str] = None, + token: Optional[str] = None, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + create_pr: Optional[bool] = None, + parent_commit: Optional[str] = None, + allow_patterns: Optional[Union[List[str], str]] = None, + ignore_patterns: Optional[Union[List[str], str]] = None, + delete_patterns: Optional[Union[List[str], str]] = None, + multi_commits: bool = False, + multi_commits_verbose: bool = False, + as_future: Literal[True] = ..., + ) -> Future[str]: + ... + @validate_hf_hub_args + @future_compatible def upload_folder( self, *, @@ -3108,7 +3248,8 @@ def upload_folder( delete_patterns: Optional[Union[List[str], str]] = None, multi_commits: bool = False, multi_commits_verbose: bool = False, - ): + as_future: bool = False, + ) -> Union[str, Future[str]]: """ Upload a local folder to the given repo. The upload is done through a HTTP requests, and doesn't require git or git-lfs to be installed. @@ -3179,6 +3320,10 @@ def upload_folder( If True, changes are pushed to a PR using a multi-commit process. Defaults to `False`. multi_commits_verbose (`bool`): If True and `multi_commits` is used, more information will be displayed to the user. + as_future (`bool`, *optional*): + Whether or not to run this method in the background. Background jobs are run sequentially without + blocking the main thread. Passing `as_future=True` will return a [Future](https://docs.python.org/3/library/concurrent.futures.html#future-objects) + object. Defaults to `False`. Returns: `str`: A URL to visualize the uploaded folder on the hub From 1d516d7b04b4fe37908c70e75bc9aafdec34c549 Mon Sep 17 00:00:00 2001 From: Lucain Pouget Date: Wed, 17 May 2023 11:21:16 +0200 Subject: [PATCH 10/17] Finally settle with a solution + tests + docs --- docs/source/guides/upload.mdx | 48 ++++++++++++ src/huggingface_hub/__init__.py | 2 + src/huggingface_hub/hf_api.py | 108 ++++++++++++++++++--------- src/huggingface_hub/utils/_typing.py | 5 ++ tests/test_hf_api.py | 61 ++++++++++----- 5 files changed, 171 insertions(+), 53 deletions(-) diff --git a/docs/source/guides/upload.mdx b/docs/source/guides/upload.mdx index 19be2c4c61..4de2333086 100644 --- a/docs/source/guides/upload.mdx +++ b/docs/source/guides/upload.mdx @@ -108,6 +108,54 @@ but before that, all previous logs on the repo on deleted. All of this in a sing ... ) ``` +### Non-blocking uploads + +In some cases, you want to push data without blocking your main thread. This is particularly useful to upload logs and +artifacts while continuing a training. To do so, you can use the `run_as_future` argument in both [`upload_file] and +[`upload_folder`]. This will return a [`concurrent.futures.Future`](https://docs.python.org/3/library/concurrent.futures.html#future-objects) +object that you can use to check the status of the upload. + +```py +>>> from huggingface_hub import HfApi +>>> api = HfApi() +>>> future = api.upload_folder( # Upload in the background (non-blocking action) +... repo_id="username/my-model", +... folder_path="checkpoints-001", +... run_as_future=True, +... ) +>>> future +Future(...) +>>> future.done() +False +>>> future.result() # Wait for the upload to complete (blocking action) +... +``` + + + +Background jobs are queued when using `run_as_future=True`. This means that you are guaranteed that the jobs will be +executed in the correct order. + + + +Even though background jobs are mostly useful to upload data/create commits, you can queue any method you like using +[`run_as_future`]. For instance, you can use it to create a repo and then upload data to it in the background. The +built-in `run_as_future` argument in upload methods is just an alias around it. + +```py +>>> from huggingface_hub import HfApi +>>> api = HfApi() +>>> api.run_as_future(api.create_repo, "username/my-model", exists_ok=True) +Future(...) +>>> api.upload_file( +... repo_id="username/my-model", +... path_in_repo="file.txt", +... path_or_fileobj=b"file content", +... run_as_future=True, +... ) +Future(...) +``` + ### Upload a folder by chunks [`upload_folder`] makes it easy to upload an entire folder to the Hub. However, for large folders (thousands of files or diff --git a/src/huggingface_hub/__init__.py b/src/huggingface_hub/__init__.py index 600fc80cb7..579e152315 100644 --- a/src/huggingface_hub/__init__.py +++ b/src/huggingface_hub/__init__.py @@ -176,6 +176,7 @@ "repo_type_and_id_from_hf_id", "request_space_hardware", "restart_space", + "run_as_future", "set_space_sleep_time", "space_info", "unlike", @@ -462,6 +463,7 @@ def __dir__(): repo_type_and_id_from_hf_id, # noqa: F401 request_space_hardware, # noqa: F401 restart_space, # noqa: F401 + run_as_future, # noqa: F401 set_space_sleep_time, # noqa: F401 space_info, # noqa: F401 unlike, # noqa: F401 diff --git a/src/huggingface_hub/hf_api.py b/src/huggingface_hub/hf_api.py index 42df2e5981..0cab61a188 100644 --- a/src/huggingface_hub/hf_api.py +++ b/src/huggingface_hub/hf_api.py @@ -24,7 +24,7 @@ from functools import wraps from itertools import islice from pathlib import Path -from typing import Any, BinaryIO, Dict, Iterable, Iterator, List, Optional, Tuple, Union, overload +from typing import Any, BinaryIO, Callable, Dict, Iterable, Iterator, List, Optional, Tuple, TypeVar, Union, overload from urllib.parse import quote import requests @@ -94,7 +94,7 @@ from .utils._deprecation import ( _deprecate_arguments, ) -from .utils._typing import CallableT, Literal, TypedDict +from .utils._typing import CallableT, Literal, ParamSpec, TypedDict from .utils.endpoint_helpers import ( AttributeDictionary, DatasetFilter, @@ -105,6 +105,9 @@ ) +P = ParamSpec("P") # Arguments +R = TypeVar("R") # Return type + USERNAME_PLACEHOLDER = "hf_user" _REGEX_DISCUSSION_URL = re.compile(r".*/discussions/(\d+)$") @@ -792,9 +795,9 @@ class UserLikes: def future_compatible(fn: CallableT) -> CallableT: - """Wrap a method of `HfApi` to handle `as_future=True`. + """Wrap a method of `HfApi` to handle `run_as_future=True`. - A method flagged as "future_compatible" will be called in a thread if `as_future=True` and return a + A method flagged as "future_compatible" will be called in a thread if `run_as_future=True` and return a `concurrent.futures.Future` instance. Otherwise, it will be called normally and return the result. This decorator is useful to make a method compatible with both synchronous and asynchronous code. @@ -804,20 +807,20 @@ def future_compatible(fn: CallableT) -> CallableT: @wraps(fn) def _inner(self, *args, **kwargs): - # Get `as_future` value if provided (default to False) - if "as_future" in kwargs: - as_future = kwargs["as_future"] - kwargs["as_future"] = False # avoid recursion error + # Get `run_as_future` value if provided (default to False) + if "run_as_future" in kwargs: + run_as_future = kwargs["run_as_future"] + kwargs["run_as_future"] = False # avoid recursion error else: - as_future = False + run_as_future = False for param, value in zip(args_params, args): - if param == "as_future": - as_future = value + if param == "run_as_future": + run_as_future = value break - # Call the function in a thread if `as_future=True` - if as_future: - return self.thread_pool.submit(fn, self, *args, **kwargs) + # Call the function in a thread if `run_as_future=True` + if run_as_future: + return self.run_as_future(fn, self, *args, **kwargs) # Otherwise, call the function normally return fn(self, *args, **kwargs) @@ -867,15 +870,45 @@ def __init__( self.user_agent = user_agent self._thread_pool: Optional[ThreadPoolExecutor] = None - @property - def thread_pool(self) -> ThreadPoolExecutor: - # Calls to the Hub can be run in the background. Tasks are queued to preserve order but do not block the main - # thread. This can prove useful to upload data during a training. ThreadPoolExecutor is initialized the first - # time it's used. - # To run a method in the background, pass `as_future=True` when calling it. + def run_as_future(self, fn: Callable[P, R], *args: P.args, **kwargs: P.kwargs) -> Future[R]: + """ + Run a method in the background and return a Future instance. + + The main goal is to run methods without blocking the main thread (e.g. to push data during a training). + Background jobs are queued to preserve order but are not ran in parallel. If you need to speed-up your scripts + by parallelizing lots of call to the API, you must setup and use your own [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor). + + Note: Most-used methods like [`upload_file`], [`upload_folder`] and [`create_commit`] have a `run_as_future: bool` + argument to directly call them in the background. This is equivalent to calling `api.run_as_future(...)` on them + but less verbose. + + Args: + fn (`Callable`): + The method to run in the background. + *args, **kwargs: + Arguments with which the method will be called. + + Return: + [`Future`](https://docs.python.org/3/library/concurrent.futures.html#future-objects): a Future instance to + get the result of the task. + + Example: + ```py + >>> from huggingface_hub import HfApi + >>> api = HfApi() + >>> future = api.run_as_future(api.whoami) # instant + >>> future.done() + False + >>> future.result() # wait until complete and return result + (...) + >>> future.done() + True + ``` + """ if self._thread_pool is None: self._thread_pool = ThreadPoolExecutor(max_workers=1) - return self._thread_pool + self._thread_pool + return self._thread_pool.submit(fn, *args, **kwargs) @validate_hf_hub_args def whoami(self, token: Optional[str] = None) -> Dict: @@ -2467,7 +2500,7 @@ def create_commit( # type: ignore create_pr: Optional[bool] = None, num_threads: int = 5, parent_commit: Optional[str] = None, - as_future: Literal[False] = ..., + run_as_future: Literal[False] = ..., ) -> CommitInfo: ... @@ -2485,7 +2518,7 @@ def create_commit( create_pr: Optional[bool] = None, num_threads: int = 5, parent_commit: Optional[str] = None, - as_future: Literal[True] = ..., + run_as_future: Literal[True] = ..., ) -> Future[CommitInfo]: ... @@ -2504,7 +2537,7 @@ def create_commit( create_pr: Optional[bool] = None, num_threads: int = 5, parent_commit: Optional[str] = None, - as_future: bool = False, + run_as_future: bool = False, ) -> Union[CommitInfo, Future[CommitInfo]]: """ Creates a commit in the given repo, deleting & uploading files as needed. @@ -2556,9 +2589,9 @@ def create_commit( is `True`, the pull request will be created from `parent_commit`. Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be especially useful if the repo is updated / committed to concurrently. - as_future (`bool`, *optional*): + run_as_future (`bool`, *optional*): Whether or not to run this method in the background. Background jobs are run sequentially without - blocking the main thread. Passing `as_future=True` will return a [Future](https://docs.python.org/3/library/concurrent.futures.html#future-objects) + blocking the main thread. Passing `run_as_future=True` will return a [Future](https://docs.python.org/3/library/concurrent.futures.html#future-objects) object. Defaults to `False`. Returns: @@ -3005,7 +3038,7 @@ def upload_file( # type: ignore commit_description: Optional[str] = None, create_pr: Optional[bool] = None, parent_commit: Optional[str] = None, - as_future: Literal[False] = ..., + run_as_future: Literal[False] = ..., ) -> str: ... @@ -3023,7 +3056,7 @@ def upload_file( commit_description: Optional[str] = None, create_pr: Optional[bool] = None, parent_commit: Optional[str] = None, - as_future: Literal[True] = ..., + run_as_future: Literal[True] = ..., ) -> Future[str]: ... @@ -3042,7 +3075,7 @@ def upload_file( commit_description: Optional[str] = None, create_pr: Optional[bool] = None, parent_commit: Optional[str] = None, - as_future: bool = False, + run_as_future: bool = False, ) -> Union[str, Future[str]]: """ Upload a local file (up to 50 GB) to the given repo. The upload is done @@ -3084,9 +3117,9 @@ def upload_file( If specified and `create_pr` is `True`, the pull request will be created from `parent_commit`. Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be especially useful if the repo is updated / committed to concurrently. - as_future (`bool`, *optional*): + run_as_future (`bool`, *optional*): Whether or not to run this method in the background. Background jobs are run sequentially without - blocking the main thread. Passing `as_future=True` will return a [Future](https://docs.python.org/3/library/concurrent.futures.html#future-objects) + blocking the main thread. Passing `run_as_future=True` will return a [Future](https://docs.python.org/3/library/concurrent.futures.html#future-objects) object. Defaults to `False`. @@ -3201,7 +3234,7 @@ def upload_folder( # type: ignore delete_patterns: Optional[Union[List[str], str]] = None, multi_commits: bool = False, multi_commits_verbose: bool = False, - as_future: Literal[False] = ..., + run_as_future: Literal[False] = ..., ) -> str: ... @@ -3224,7 +3257,7 @@ def upload_folder( delete_patterns: Optional[Union[List[str], str]] = None, multi_commits: bool = False, multi_commits_verbose: bool = False, - as_future: Literal[True] = ..., + run_as_future: Literal[True] = ..., ) -> Future[str]: ... @@ -3248,7 +3281,7 @@ def upload_folder( delete_patterns: Optional[Union[List[str], str]] = None, multi_commits: bool = False, multi_commits_verbose: bool = False, - as_future: bool = False, + run_as_future: bool = False, ) -> Union[str, Future[str]]: """ Upload a local folder to the given repo. The upload is done through a HTTP requests, and doesn't require git or @@ -3320,9 +3353,9 @@ def upload_folder( If True, changes are pushed to a PR using a multi-commit process. Defaults to `False`. multi_commits_verbose (`bool`): If True and `multi_commits` is used, more information will be displayed to the user. - as_future (`bool`, *optional*): + run_as_future (`bool`, *optional*): Whether or not to run this method in the background. Background jobs are run sequentially without - blocking the main thread. Passing `as_future=True` will return a [Future](https://docs.python.org/3/library/concurrent.futures.html#future-objects) + blocking the main thread. Passing `run_as_future=True` will return a [Future](https://docs.python.org/3/library/concurrent.futures.html#future-objects) object. Defaults to `False`. Returns: @@ -5088,6 +5121,9 @@ def _parse_revision_from_pr_url(pr_url: str) -> str: delete_tag = api.delete_tag get_full_repo_name = api.get_full_repo_name +# Background jobs +run_as_future = api.run_as_future + # Activity API list_liked_repos = api.list_liked_repos like = api.like diff --git a/src/huggingface_hub/utils/_typing.py b/src/huggingface_hub/utils/_typing.py index c8885eb1eb..d5ef802bac 100644 --- a/src/huggingface_hub/utils/_typing.py +++ b/src/huggingface_hub/utils/_typing.py @@ -22,6 +22,11 @@ else: from typing_extensions import Literal, TypedDict # noqa: F401 +if sys.version_info >= (3, 10): + from typing import ParamSpec +else: + from typing_extensions import ParamSpec # noqa: F401 + HTTP_METHOD_T = Literal["GET", "OPTIONS", "HEAD", "POST", "PUT", "PATCH", "DELETE"] # type hint meaning "function signature not changed by decorator" diff --git a/tests/test_hf_api.py b/tests/test_hf_api.py index 7e097b077d..695f0b108c 100644 --- a/tests/test_hf_api.py +++ b/tests/test_hf_api.py @@ -20,6 +20,7 @@ import types import unittest import warnings +from concurrent.futures import Future from functools import partial from io import BytesIO from pathlib import Path @@ -2384,44 +2385,70 @@ def test_pause_and_restart_space(self) -> None: self.assertIn(runtime_after_restart.stage, (SpaceStage.BUILDING, SpaceStage.RUNNING_BUILDING)) -class TestThreadedAPI(HfApiCommonTest): - def test_create_upload_and_delete_in_background(self) -> None: - repo_id = f"{USER}/{repo_name()}" +@pytest.mark.usefixtures("fx_cache_dir") +class TestCommitInBackground(HfApiCommonTest): + cache_dir: Path + + @use_tmp_repo() + def test_commit_to_repo_in_background(self, repo_url: RepoUrl) -> None: + repo_id = repo_url.repo_id + (self.cache_dir / "file.txt").write_text("content") + (self.cache_dir / "lfs.bin").write_text("content") t0 = time.time() - create_repo_future = self._api.create_repo_threaded(repo_id) - upload_future_1 = self._api.upload_file_threaded( - path_or_fileobj=b"1", path_in_repo="file.txt", repo_id=repo_id, commit_message="Upload 1" + upload_future_1 = self._api.upload_file( + path_or_fileobj=b"1", path_in_repo="1.txt", repo_id=repo_id, commit_message="Upload 1", run_as_future=True ) - upload_future_2 = self._api.upload_file_threaded( - path_or_fileobj=b"2", path_in_repo="file.txt", repo_id=repo_id, commit_message="Upload 2" + upload_future_2 = self._api.upload_file( + path_or_fileobj=b"2", path_in_repo="2.txt", repo_id=repo_id, commit_message="Upload 2", run_as_future=True ) - delete_file_future = self._api.delete_file_threaded( - path_in_repo="file.txt", repo_id=repo_id, commit_message="Delete 1" + upload_future_3 = self._api.upload_folder( + repo_id=repo_id, folder_path=self.cache_dir, commit_message="Upload folder", run_as_future=True ) - commits_future = self._api.list_repo_commits_threaded(repo_id=repo_id) t1 = time.time() # all futures are queued instantly self.assertLessEqual(t1 - t0, 0.01) # wait for the last job to complete - commits = commits_future.result() + upload_future_3.result() - # all of them are not complete (ran in order) - self.assertTrue(create_repo_future.done()) + # all of them are now complete (ran in order) self.assertTrue(upload_future_1.done()) self.assertTrue(upload_future_2.done()) - self.assertTrue(delete_file_future.done()) - self.assertTrue(commits_future.done()) + self.assertTrue(upload_future_3.done()) # 4 commits, sorted in reverse order of creation + commits = self._api.list_repo_commits(repo_id=repo_id) self.assertEqual(len(commits), 4) - self.assertEqual(commits[0].title, "Delete 1") + self.assertEqual(commits[0].title, "Upload folder") self.assertEqual(commits[1].title, "Upload 2") self.assertEqual(commits[2].title, "Upload 1") self.assertEqual(commits[3].title, "initial commit") + @use_tmp_repo() + def test_run_as_future(self, repo_url: RepoUrl) -> None: + repo_id = repo_url.repo_id + self._api.run_as_future(self._api.like, repo_id) + future_1 = self._api.run_as_future(self._api.model_info, repo_id=repo_id) + self._api.run_as_future(self._api.unlike, repo_id) + future_2 = self._api.run_as_future(self._api.model_info, repo_id=repo_id) + + self.assertIsInstance(future_1, Future) + self.assertIsInstance(future_2, Future) + + # Wait for first info future + info_1 = future_1.result() + self.assertFalse(future_2.done()) + + # Wait for second info future + info_2 = future_2.result() + self.assertTrue(future_2.done()) + + # Like/unlike is correct + self.assertEqual(info_1.likes, 1) + self.assertEqual(info_2.likes, 0) + class TestSpaceAPIMocked(unittest.TestCase): """ From 522fbc43394b465364bed949e04ab2e89016fcc1 Mon Sep 17 00:00:00 2001 From: Lucain Pouget Date: Wed, 17 May 2023 11:22:05 +0200 Subject: [PATCH 11/17] add legacy script for reference --- ...gacy_check_future_compatible_signatures.py | 231 ++++++++++++++++++ 1 file changed, 231 insertions(+) create mode 100644 utils/_legacy_check_future_compatible_signatures.py diff --git a/utils/_legacy_check_future_compatible_signatures.py b/utils/_legacy_check_future_compatible_signatures.py new file mode 100644 index 0000000000..bbafc93598 --- /dev/null +++ b/utils/_legacy_check_future_compatible_signatures.py @@ -0,0 +1,231 @@ +# coding=utf-8 +# Copyright 2022-present, the HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Contains a tool to add/check the definition of "async" methods of `HfApi` in `huggingface_hub.hf_api.py`. + +WARNING: this is a script kept to help with `@future_compatible` methods of `HfApi` but it is not 100% correct. +Keeping it here for reference but it is not used in the CI/Makefile. + +What is done correctly: +1. Add "as_future" as argument to the method signature +2. Set Union[T, Future[T]] as return type to the method signature +3. Document "as_future" argument in the docstring of the method + +What is NOT done correctly: +1. Generated stubs are grouped at the top of the `HfApi` class. They must be copy-pasted (overload definition must be +just before the method implementation) +2. `#type: ignore` must be adjusted in the first stub (if multiline definition) +""" +import argparse +import inspect +import os +import re +import tempfile +from pathlib import Path +from typing import Callable, NoReturn + +import black +from ruff.__main__ import find_ruff_bin + +from huggingface_hub.hf_api import HfApi + + +STUBS_SECTION_TEMPLATE = """ + ### Stubs section start ### + + # This section contains stubs for the methods that are marked as `@future_compatible`. Those methods have a + # different return type depending on the `as_future: bool` value. For better integrations with IDEs, we provide + # stubs for both return types. The actual implementation of those methods is written below. + + # WARNING: this section have been generated automatically. Do not modify it manually. If you modify it manually, your + # changes will be overwritten. To re-generate this section, run `make style` (or `python utils/check_future_compatible_signatures.py` + # directly). + + # FAQ: + # 1. Why should we have these? For better type annotation which helps with IDE features like autocompletion. + # 2. Why not a separate `hf_api.pyi` file? Would require to re-defined all the existing annotations from `hf_api.py`. + # 3. Why not at the end of the module? Because `@overload` methods must be defined first. + # 4. Why not another solution? I'd be glad, but this is the "less worse" I could find. + # For more details, see https://github.com/huggingface/huggingface_hub/pull/1458 + + + {stubs} + + # WARNING: this section have been generated automatically. Do not modify it manually. If you modify it manually, your + # changes will be overwritten. To re-generate this section, run `make style` (or `python utils/check_future_compatible_signatures.py` + # directly). + + ### Stubs section end ### +""" + +STUBS_SECTION_TEMPLATE_REGEX = re.compile(r"### Stubs section start ###.*### Stubs section end ###", re.DOTALL) + +AS_FUTURE_SIGNATURE_TEMPLATE = "as_future: bool = False" + +AS_FUTURE_DOCSTRING_TEMPLATE = """ + as_future (`bool`, *optional*): + Whether or not to run this method in the background. Background jobs are run sequentially without + blocking the main thread. Passing `as_future=True` will return a [Future](https://docs.python.org/3/library/concurrent.futures.html#future-objects) + object. Defaults to `False`.""" + +ARGS_DOCSTRING_REGEX = re.compile( + """ +^[ ]{8}Args: # Match args section ... +(.*?) # ... everything ... +^[ ]{8}\\S # ... until next section or end of docstring +""", + re.MULTILINE | re.IGNORECASE | re.VERBOSE | re.DOTALL, +) + +SIGNATURE_REGEX_FULL = re.compile(r"^\s*def.*?-> (.*?):", re.DOTALL | re.MULTILINE) +SIGNATURE_REGEX_RETURN_TYPE = re.compile(r"-> (.*?):") +SIGNATURE_REGEX_RETURN_TYPE_WITH_FUTURE = re.compile(r"-> Union\[(.*?), (.*?)\]:") + + +HF_API_FILE_PATH = Path(__file__).parents[1] / "src" / "huggingface_hub" / "hf_api.py" +HF_API_FILE_CONTENT = HF_API_FILE_PATH.read_text() + + +def generate_future_compatible_method(method: Callable, method_source: str) -> str: + # 1. Document `as_future` parameter + if AS_FUTURE_DOCSTRING_TEMPLATE not in method_source: + match = ARGS_DOCSTRING_REGEX.search(method_source) + if match is None: + raise ValueError(f"Could not find `Args` section in docstring of {method}.") + args_docs = match.group(1).strip() + method_source = method_source.replace(args_docs, args_docs + AS_FUTURE_DOCSTRING_TEMPLATE) + + # 2. Update signature + # 2.a. Add `as_future` parameter + if AS_FUTURE_SIGNATURE_TEMPLATE not in method_source: + match = SIGNATURE_REGEX_FULL.search(method_source) + if match is None: + raise ValueError(f"Could not find signature of {method} in source.") + method_source = method_source.replace( + match.group(), match.group().replace(") ->", f" {AS_FUTURE_SIGNATURE_TEMPLATE}) ->"), 1 + ) + + # 2.b. Update return value + if "Future[" not in method_source: + match = SIGNATURE_REGEX_RETURN_TYPE.search(method_source) + if match is None: + raise ValueError(f"Could not find return type of {method} in source.") + base_type = match.group(1).strip() + return_type = f"Union[{base_type}, Future[{base_type}]]" + return_value_replaced = match.group().replace(match.group(1), return_type) + method_source = method_source.replace(match.group(), return_value_replaced) + + # 3. Generate @overload stubs + match = SIGNATURE_REGEX_FULL.search(method_source) + if match is None: + raise ValueError(f"Could not find signature of {method} in source.") + method_sig = match.group() + + match = SIGNATURE_REGEX_RETURN_TYPE_WITH_FUTURE.search(method_sig) + if match is None: + raise ValueError(f"Could not find return type (with Future) of {method} in source.") + no_future_return_type = match.group(1).strip() + with_future_return_type = match.group(2).strip() + + # 3.a. Stub when `as_future=False` + no_future_stub = " @overload\n" + method_sig + no_future_stub = no_future_stub.replace(AS_FUTURE_SIGNATURE_TEMPLATE, "as_future: Literal[False] = ...") + no_future_stub = SIGNATURE_REGEX_RETURN_TYPE.sub(rf"-> {no_future_return_type}:", no_future_stub) + no_future_stub += " # type: ignore\n ..." # only the first stub requires "type: ignore" + + # 3.b. Stub when `as_future=True` + with_future_stub = " @overload\n" + method_sig + with_future_stub = with_future_stub.replace(AS_FUTURE_SIGNATURE_TEMPLATE, "as_future: Literal[True] = ...") + with_future_stub = SIGNATURE_REGEX_RETURN_TYPE.sub(rf"-> {with_future_return_type}:", with_future_stub) + with_future_stub += "\n ..." + + stubs_source = no_future_stub + "\n\n" + with_future_stub + "\n\n" + + # 4. All good! + return method_source, stubs_source + + +def generate_hf_api_module() -> str: + raw_code = HF_API_FILE_CONTENT + + # Process all Future-compatible methods + all_stubs_source = "" + for _, method in inspect.getmembers(HfApi, predicate=inspect.isfunction): + if not getattr(method, "is_future_compatible", False): + continue + source = inspect.getsource(method) + method_source, stubs_source = generate_future_compatible_method(method, source) + + raw_code = raw_code.replace(source, method_source) + all_stubs_source += "\n\n" + stubs_source + + # Generate code with stubs + generated_code = STUBS_SECTION_TEMPLATE_REGEX.sub(STUBS_SECTION_TEMPLATE.format(stubs=all_stubs_source), raw_code) + + # Format (black+ruff) + return format_generated_code(generated_code) + + +def format_generated_code(code: str) -> str: + """ + Format some code with black+ruff. Cannot be done "on the fly" so we first save the code in a temporary file. + """ + # Format with black + code = black.format_file_contents(code, fast=False, mode=black.FileMode(line_length=119)) + + # Format with ruff + with tempfile.TemporaryDirectory() as tmpdir: + filepath = Path(tmpdir) / "__init__.py" + filepath.write_text(code) + ruff_bin = find_ruff_bin() + os.spawnv(os.P_WAIT, ruff_bin, ["ruff", str(filepath), "--fix", "--quiet"]) + return filepath.read_text() + + +def check_future_compatible_hf_api(update: bool) -> NoReturn: + """Check that the code defining the threaded version of HfApi is up-to-date.""" + # If expected `__init__.py` content is different, test fails. If '--update-init-file' + # is used, `__init__.py` file is updated before the test fails. + expected_content = generate_hf_api_module() + if expected_content != HF_API_FILE_CONTENT: + if update: + with HF_API_FILE_PATH.open("w") as f: + f.write(expected_content) + + print( + "✅ Signature/docstring/annotations for Future-compatible methods have been updated in" + " `./src/huggingface_hub/hf_api.py`.\n Please make sure the changes are accurate and commit them." + ) + exit(0) + else: + print( + "❌ Expected content mismatch for Future compatible methods in `./src/huggingface_hub/hf_api.py`.\n " + " Please run `make style` or `python utils/check_future_compatible_signatures.py --update`." + ) + exit(1) + + print("✅ All good! (Future-compatible methods)") + exit(0) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--update", + action="store_true", + help="Whether to override `./src/huggingface_hub/hf_api.py` if a change is detected.", + ) + args = parser.parse_args() + + check_future_compatible_hf_api(update=args.update) From d440337f93025012cafd992d80c9bf99985c21d5 Mon Sep 17 00:00:00 2001 From: Lucain Pouget Date: Wed, 17 May 2023 11:34:44 +0200 Subject: [PATCH 12/17] fix tests --- tests/test_init_lazy_loading.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_init_lazy_loading.py b/tests/test_init_lazy_loading.py index 8faa320673..9312543128 100644 --- a/tests/test_init_lazy_loading.py +++ b/tests/test_init_lazy_loading.py @@ -27,7 +27,7 @@ def test_autocomplete_on_root_imports(self) -> None: # Assert docstring is find. This means autocomplete can also provide # the help section. signature_list = goto_list[0].get_signatures() - self.assertEqual(len(signature_list), 1) + self.assertEqual(len(signature_list), 2) # create_commit has 2 signatures (normal and `run_as_future`) self.assertTrue(signature_list[0].docstring().startswith("create_commit(repo_id: str,")) break else: From 5de0cdddd65b05e3c3803d5d893ac00d1b219f9a Mon Sep 17 00:00:00 2001 From: Lucain Pouget Date: Wed, 17 May 2023 11:36:23 +0200 Subject: [PATCH 13/17] fix typing in 3.7 --- src/huggingface_hub/hf_api.py | 5 ++--- src/huggingface_hub/utils/_typing.py | 5 ----- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/src/huggingface_hub/hf_api.py b/src/huggingface_hub/hf_api.py index 0cab61a188..484aed266e 100644 --- a/src/huggingface_hub/hf_api.py +++ b/src/huggingface_hub/hf_api.py @@ -94,7 +94,7 @@ from .utils._deprecation import ( _deprecate_arguments, ) -from .utils._typing import CallableT, Literal, ParamSpec, TypedDict +from .utils._typing import CallableT, Literal, TypedDict from .utils.endpoint_helpers import ( AttributeDictionary, DatasetFilter, @@ -105,7 +105,6 @@ ) -P = ParamSpec("P") # Arguments R = TypeVar("R") # Return type USERNAME_PLACEHOLDER = "hf_user" @@ -870,7 +869,7 @@ def __init__( self.user_agent = user_agent self._thread_pool: Optional[ThreadPoolExecutor] = None - def run_as_future(self, fn: Callable[P, R], *args: P.args, **kwargs: P.kwargs) -> Future[R]: + def run_as_future(self, fn: Callable[..., R], *args, **kwargs) -> Future[R]: """ Run a method in the background and return a Future instance. diff --git a/src/huggingface_hub/utils/_typing.py b/src/huggingface_hub/utils/_typing.py index d5ef802bac..c8885eb1eb 100644 --- a/src/huggingface_hub/utils/_typing.py +++ b/src/huggingface_hub/utils/_typing.py @@ -22,11 +22,6 @@ else: from typing_extensions import Literal, TypedDict # noqa: F401 -if sys.version_info >= (3, 10): - from typing import ParamSpec -else: - from typing_extensions import ParamSpec # noqa: F401 - HTTP_METHOD_T = Literal["GET", "OPTIONS", "HEAD", "POST", "PUT", "PATCH", "DELETE"] # type hint meaning "function signature not changed by decorator" From 51cf2bd4dc885396c5ce22ac71cec343d278a3be Mon Sep 17 00:00:00 2001 From: Lucain Pouget Date: Wed, 17 May 2023 11:44:20 +0200 Subject: [PATCH 14/17] fix typing for 3.7 --- src/huggingface_hub/hf_api.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/huggingface_hub/hf_api.py b/src/huggingface_hub/hf_api.py index 484aed266e..6227083513 100644 --- a/src/huggingface_hub/hf_api.py +++ b/src/huggingface_hub/hf_api.py @@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations import inspect import json import pprint From 714920c58590b5d83ee88adc880a227bffaed842 Mon Sep 17 00:00:00 2001 From: Lucain Pouget Date: Wed, 17 May 2023 11:58:17 +0200 Subject: [PATCH 15/17] minor changes --- docs/source/guides/upload.mdx | 2 +- src/huggingface_hub/hf_api.py | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/docs/source/guides/upload.mdx b/docs/source/guides/upload.mdx index 4de2333086..e76f087db2 100644 --- a/docs/source/guides/upload.mdx +++ b/docs/source/guides/upload.mdx @@ -108,7 +108,7 @@ but before that, all previous logs on the repo on deleted. All of this in a sing ... ) ``` -### Non-blocking uploads +### Non-blocking upload In some cases, you want to push data without blocking your main thread. This is particularly useful to upload logs and artifacts while continuing a training. To do so, you can use the `run_as_future` argument in both [`upload_file] and diff --git a/src/huggingface_hub/hf_api.py b/src/huggingface_hub/hf_api.py index 6227083513..20fbeeef49 100644 --- a/src/huggingface_hub/hf_api.py +++ b/src/huggingface_hub/hf_api.py @@ -799,8 +799,6 @@ def future_compatible(fn: CallableT) -> CallableT: A method flagged as "future_compatible" will be called in a thread if `run_as_future=True` and return a `concurrent.futures.Future` instance. Otherwise, it will be called normally and return the result. - - This decorator is useful to make a method compatible with both synchronous and asynchronous code. """ sig = inspect.signature(fn) args_params = list(sig.parameters)[1:] # remove "self" from list @@ -1439,7 +1437,11 @@ def list_spaces( @validate_hf_hub_args def like( - self, repo_id: str, *, token: Optional[str] = None, repo_type: Optional[str] = None + self, + repo_id: str, + *, + token: Optional[str] = None, + repo_type: Optional[str] = None, ) -> Union[None, Future[None]]: """ Like a given repo on the Hub (e.g. set as favorite). @@ -1480,7 +1482,6 @@ def like( headers=self._build_hf_headers(token=token), ) hf_raise_for_status(response) - return None @validate_hf_hub_args def unlike( From 1686cfc6fa35f4a03df0d42d0e6ecd0c8436fbd1 Mon Sep 17 00:00:00 2001 From: Lucain Pouget Date: Wed, 17 May 2023 12:01:37 +0200 Subject: [PATCH 16/17] make style --- src/huggingface_hub/hf_api.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/huggingface_hub/hf_api.py b/src/huggingface_hub/hf_api.py index 20fbeeef49..572f57a732 100644 --- a/src/huggingface_hub/hf_api.py +++ b/src/huggingface_hub/hf_api.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. from __future__ import annotations + import inspect import json import pprint From 741e201437a4372dbb02142cfbce49fb8670fca8 Mon Sep 17 00:00:00 2001 From: Lucain Pouget Date: Wed, 17 May 2023 12:04:29 +0200 Subject: [PATCH 17/17] ciode qualiryt --- src/huggingface_hub/hf_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/huggingface_hub/hf_api.py b/src/huggingface_hub/hf_api.py index 572f57a732..58e0884c87 100644 --- a/src/huggingface_hub/hf_api.py +++ b/src/huggingface_hub/hf_api.py @@ -1443,7 +1443,7 @@ def like( *, token: Optional[str] = None, repo_type: Optional[str] = None, - ) -> Union[None, Future[None]]: + ) -> None: """ Like a given repo on the Hub (e.g. set as favorite).