From 3b84e279e99b678de0189a0229313d10592d51fe Mon Sep 17 00:00:00 2001 From: Maximilien de Bayser Date: Fri, 7 Feb 2025 02:37:41 -0300 Subject: [PATCH] Prevent unecessary requests to huggingface hub (#12837) --- .../offline_mode/test_offline_mode.py | 21 ++++ vllm/transformers_utils/config.py | 115 ++++++++++++------ 2 files changed, 96 insertions(+), 40 deletions(-) diff --git a/tests/entrypoints/offline_mode/test_offline_mode.py b/tests/entrypoints/offline_mode/test_offline_mode.py index eac76f2ba0fa5..85156d6931c8c 100644 --- a/tests/entrypoints/offline_mode/test_offline_mode.py +++ b/tests/entrypoints/offline_mode/test_offline_mode.py @@ -4,6 +4,7 @@ import sys import pytest +import urllib3 from vllm import LLM from vllm.distributed import cleanup_dist_env_and_memory @@ -28,6 +29,15 @@ "tensor_parallel_size": 1, "tokenizer_mode": "mistral", }, + { + "model": "sentence-transformers/all-MiniLM-L12-v2", + "enforce_eager": True, + "gpu_memory_utilization": 0.20, + "max_model_len": 64, + "max_num_batched_tokens": 64, + "max_num_seqs": 64, + "tensor_parallel_size": 1, + }, ] @@ -47,6 +57,16 @@ def test_offline_mode(monkeypatch): # Set HF to offline mode and ensure we can still construct an LLM try: monkeypatch.setenv("HF_HUB_OFFLINE", "1") + monkeypatch.setenv("VLLM_NO_USAGE_STATS", "1") + + def disable_connect(*args, **kwargs): + raise RuntimeError("No http calls allowed") + + monkeypatch.setattr(urllib3.connection.HTTPConnection, "connect", + disable_connect) + monkeypatch.setattr(urllib3.connection.HTTPSConnection, "connect", + disable_connect) + # Need to re-import huggingface_hub and friends to setup offline mode _re_import_modules() # Cached model files should be used in offline mode @@ -56,6 +76,7 @@ def test_offline_mode(monkeypatch): # Reset the environment after the test # NB: Assuming tests are run in online mode monkeypatch.delenv("HF_HUB_OFFLINE") + monkeypatch.delenv("VLLM_NO_USAGE_STATS") _re_import_modules() pass diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py index 85056158bab54..fb5cc3ec0722b 100644 --- a/vllm/transformers_utils/config.py +++ b/vllm/transformers_utils/config.py @@ -10,7 +10,7 @@ from huggingface_hub import (file_exists, hf_hub_download, list_repo_files, try_to_load_from_cache) from huggingface_hub.utils import (EntryNotFoundError, HfHubHTTPError, - LocalEntryNotFoundError, + HFValidationError, LocalEntryNotFoundError, RepositoryNotFoundError, RevisionNotFoundError) from torch import nn @@ -265,49 +265,66 @@ def get_config( return config +def try_get_local_file(model: Union[str, Path], + file_name: str, + revision: Optional[str] = 'main') -> Optional[Path]: + file_path = Path(model) / file_name + if file_path.is_file(): + return file_path + else: + try: + cached_filepath = try_to_load_from_cache(repo_id=model, + filename=file_name, + revision=revision) + if isinstance(cached_filepath, str): + return Path(cached_filepath) + except HFValidationError: + ... + return None + + def get_hf_file_to_dict(file_name: str, model: Union[str, Path], revision: Optional[str] = 'main'): """ - Downloads a file from the Hugging Face Hub and returns + Downloads a file from the Hugging Face Hub and returns its contents as a dictionary. Parameters: - file_name (str): The name of the file to download. - model (str): The name of the model on the Hugging Face Hub. - - revision (str): The specific version of the model. + - revision (str): The specific version of the model. Returns: - - config_dict (dict): A dictionary containing + - config_dict (dict): A dictionary containing the contents of the downloaded file. """ - file_path = Path(model) / file_name - if file_or_path_exists(model=model, - config_name=file_name, - revision=revision): + file_path = try_get_local_file(model=model, + file_name=file_name, + revision=revision) - if not file_path.is_file(): - try: - hf_hub_file = hf_hub_download(model, - file_name, - revision=revision) - except (RepositoryNotFoundError, RevisionNotFoundError, - EntryNotFoundError, LocalEntryNotFoundError) as e: - logger.debug("File or repository not found in hf_hub_download", - e) - return None - except HfHubHTTPError as e: - logger.warning( - "Cannot connect to Hugging Face Hub. Skipping file " - "download for '%s':", - file_name, - exc_info=e) - return None - file_path = Path(hf_hub_file) + if file_path is None and file_or_path_exists( + model=model, config_name=file_name, revision=revision): + try: + hf_hub_file = hf_hub_download(model, file_name, revision=revision) + except (RepositoryNotFoundError, RevisionNotFoundError, + EntryNotFoundError, LocalEntryNotFoundError) as e: + logger.debug("File or repository not found in hf_hub_download", e) + return None + except HfHubHTTPError as e: + logger.warning( + "Cannot connect to Hugging Face Hub. Skipping file " + "download for '%s':", + file_name, + exc_info=e) + return None + file_path = Path(hf_hub_file) + if file_path is not None and file_path.is_file(): with open(file_path) as file: return json.load(file) + return None @@ -328,7 +345,12 @@ def get_pooling_config(model: str, revision: Optional[str] = 'main'): """ modules_file_name = "modules.json" - modules_dict = get_hf_file_to_dict(modules_file_name, model, revision) + + modules_dict = None + if file_or_path_exists(model=model, + config_name=modules_file_name, + revision=revision): + modules_dict = get_hf_file_to_dict(modules_file_name, model, revision) if modules_dict is None: return None @@ -382,17 +404,17 @@ def get_sentence_transformer_tokenizer_config(model: str, revision: Optional[str] = 'main' ): """ - Returns the tokenization configuration dictionary for a + Returns the tokenization configuration dictionary for a given Sentence Transformer BERT model. Parameters: - - model (str): The name of the Sentence Transformer + - model (str): The name of the Sentence Transformer BERT model. - revision (str, optional): The revision of the m odel to use. Defaults to 'main'. Returns: - - dict: A dictionary containing the configuration parameters + - dict: A dictionary containing the configuration parameters for the Sentence Transformer BERT model. """ sentence_transformer_config_files = [ @@ -404,20 +426,33 @@ def get_sentence_transformer_tokenizer_config(model: str, "sentence_xlm-roberta_config.json", "sentence_xlnet_config.json", ] - try: - # If model is on HuggingfaceHub, get the repo files - repo_files = list_repo_files(model, revision=revision, token=HF_TOKEN) - except Exception as e: - logger.debug("Error getting repo files", e) - repo_files = [] - encoder_dict = None - for config_name in sentence_transformer_config_files: - if config_name in repo_files or Path(model).exists(): - encoder_dict = get_hf_file_to_dict(config_name, model, revision) + + for config_file in sentence_transformer_config_files: + if try_get_local_file(model=model, + file_name=config_file, + revision=revision) is not None: + encoder_dict = get_hf_file_to_dict(config_file, model, revision) if encoder_dict: break + if not encoder_dict: + try: + # If model is on HuggingfaceHub, get the repo files + repo_files = list_repo_files(model, + revision=revision, + token=HF_TOKEN) + except Exception as e: + logger.debug("Error getting repo files", e) + repo_files = [] + + for config_name in sentence_transformer_config_files: + if config_name in repo_files: + encoder_dict = get_hf_file_to_dict(config_name, model, + revision) + if encoder_dict: + break + if not encoder_dict: return None