From 904941cc9e0828fe2afa7c628ff8fc716b0614df Mon Sep 17 00:00:00 2001 From: DarkLight1337 Date: Sat, 7 Dec 2024 14:08:37 +0000 Subject: [PATCH 1/2] Explicitly state that InternVL 2.5 is supported Signed-off-by: DarkLight1337 --- docs/source/models/supported_models.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/models/supported_models.rst b/docs/source/models/supported_models.rst index 5b416e04da745..d915def588e08 100644 --- a/docs/source/models/supported_models.rst +++ b/docs/source/models/supported_models.rst @@ -547,9 +547,9 @@ Text Generation - ✅︎ - * - :code:`InternVLChatModel` - - InternVL2 + - InternVL 2.5, Mono-InternVL, InternVL 2.0 - T + I\ :sup:`E+` - - :code:`OpenGVLab/Mono-InternVL-2B`, :code:`OpenGVLab/InternVL2-4B`, :code:`OpenGVLab/InternVL2-8B`, etc. + - :code:`OpenGVLab/InternVL2_5-4B`, :code:`OpenGVLab/Mono-InternVL-2B`, :code:`OpenGVLab/InternVL2-4B`, etc. - - ✅︎ * - :code:`LlavaForConditionalGeneration` From 92a1bf26238647dd4497bc0876e8c4cb9921e6c8 Mon Sep 17 00:00:00 2001 From: DarkLight1337 Date: Sat, 7 Dec 2024 14:53:45 +0000 Subject: [PATCH 2/2] Update links Signed-off-by: DarkLight1337 --- examples/offline_inference_vision_language.py | 2 +- examples/offline_inference_vision_language_multi_image.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/offline_inference_vision_language.py b/examples/offline_inference_vision_language.py index f08f22eec164a..56209c3c36ed4 100644 --- a/examples/offline_inference_vision_language.py +++ b/examples/offline_inference_vision_language.py @@ -223,7 +223,7 @@ def run_internvl(question: str, modality: str): # Stop tokens for InternVL # models variants may have different stop tokens # please refer to the model card for the correct "stop words": - # https://huggingface.co/OpenGVLab/InternVL2-2B#service + # https://huggingface.co/OpenGVLab/InternVL2-2B/blob/main/conversation.py stop_tokens = ["<|endoftext|>", "<|im_start|>", "<|im_end|>", "<|end|>"] stop_token_ids = [tokenizer.convert_tokens_to_ids(i) for i in stop_tokens] return llm, prompt, stop_token_ids diff --git a/examples/offline_inference_vision_language_multi_image.py b/examples/offline_inference_vision_language_multi_image.py index 788b604cfd4a0..928bbef54eab7 100644 --- a/examples/offline_inference_vision_language_multi_image.py +++ b/examples/offline_inference_vision_language_multi_image.py @@ -165,7 +165,7 @@ def load_internvl(question: str, image_urls: List[str]) -> ModelRequestData: # Stop tokens for InternVL # models variants may have different stop tokens # please refer to the model card for the correct "stop words": - # https://huggingface.co/OpenGVLab/InternVL2-2B#service + # https://huggingface.co/OpenGVLab/InternVL2-2B/blob/main/conversation.py stop_tokens = ["<|endoftext|>", "<|im_start|>", "<|im_end|>", "<|end|>"] stop_token_ids = [tokenizer.convert_tokens_to_ids(i) for i in stop_tokens]