From a53c47bc509461a751ccde980fc142408c90c664 Mon Sep 17 00:00:00 2001 From: Chen Mulong Date: Tue, 12 Sep 2023 16:01:46 +0800 Subject: [PATCH] HF_HUB_KEEP_LOCK_FILES to prevent rare concurrency issue The lock file could be removed when the 2nd process gets the lock. And then the 3rd process will lock on a different lock file handle. Although the lock path stays the same. 1st proc gets the lock 2nd proc waits for the lock 1st proc releases the lock 1st proc removes the lock file 2nd proc gets the lock 3rd proc creates a new lock file and gets the lock Add ENV 'HF_HUB_KEEP_LOCK_FILES' to give user option to keep the lock files to prevent concurrency issues. --- .../en/package_reference/environment_variables.md | 6 ++++++ src/huggingface_hub/constants.py | 5 +++++ src/huggingface_hub/file_download.py | 10 ++++++---- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/docs/source/en/package_reference/environment_variables.md b/docs/source/en/package_reference/environment_variables.md index 32eeb553d77..f6bd8113f26 100644 --- a/docs/source/en/package_reference/environment_variables.md +++ b/docs/source/en/package_reference/environment_variables.md @@ -74,6 +74,12 @@ small files will be duplicated to ease user experience while bigger files are sy For more details, see the [download guide](../guides/download#download-files-to-local-folder). +### HF_HUB_KEEP_LOCK_FILES + +Lock files are used to ensure the downloading in parallel won't overwrite. By default, the lock file will be deleted +automatically. However, the deletion would cause concurrency issues in rare cases. Set this to `True` to prevent this +kind of concurrency issue. + ## Boolean values The following environment variables expect a boolean value. The variable will be considered diff --git a/src/huggingface_hub/constants.py b/src/huggingface_hub/constants.py index fec3285ce97..06b277d931b 100644 --- a/src/huggingface_hub/constants.py +++ b/src/huggingface_hub/constants.py @@ -130,6 +130,11 @@ def _as_int(value: Optional[str]) -> Optional[int]: _as_int(os.environ.get("HF_HUB_LOCAL_DIR_AUTO_SYMLINK_THRESHOLD")) or 5 * 1024 * 1024 ) +# Lock files are used to ensure the downloading in parallel won't overwrite. By default, the lock file will be deleted +# automatically. However, the deletion would cause concurrency issues in rare cases. Set this to TRUE to prevent this +# kind of concurrency issue. +HF_HUB_KEEP_LOCK_FILES: bool = _is_true(os.environ.get("HF_HUB_KEEP_LOCK_FILES")) + # List frameworks that are handled by the InferenceAPI service. Useful to scan endpoints and check which models are # deployed and running. Since 95% of the models are using the top 4 frameworks listed below, we scan only those by # default. We still keep the full list of supported frameworks in case we want to scan all of them. diff --git a/src/huggingface_hub/file_download.py b/src/huggingface_hub/file_download.py index 9191f2d8672..b3899d174e0 100644 --- a/src/huggingface_hub/file_download.py +++ b/src/huggingface_hub/file_download.py @@ -29,6 +29,7 @@ ENDPOINT, HF_HUB_DISABLE_SYMLINKS_WARNING, HF_HUB_ENABLE_HF_TRANSFER, + HF_HUB_KEEP_LOCK_FILES, HUGGINGFACE_CO_URL_TEMPLATE, HUGGINGFACE_HEADER_X_LINKED_ETAG, HUGGINGFACE_HEADER_X_LINKED_SIZE, @@ -1475,10 +1476,11 @@ def _resumable_file_manager() -> Generator[io.BufferedWriter, None, None]: _chmod_and_replace(temp_file.name, local_dir_filepath) pointer_path = local_dir_filepath # for return value - try: - os.remove(lock_path) - except OSError: - pass + if not HF_HUB_KEEP_LOCK_FILES: + try: + os.remove(lock_path) + except OSError: + pass return pointer_path