Skip to content

Commit

Permalink
[VLM] Separate text-only and vision variants of the same model archit…
Browse files Browse the repository at this point in the history
…ecture (vllm-project#13157)
  • Loading branch information
DarkLight1337 authored and I746365 committed Feb 15, 2025
1 parent d88e8b1 commit 815a5fd
Show file tree
Hide file tree
Showing 15 changed files with 1,729 additions and 1,643 deletions.
17 changes: 8 additions & 9 deletions docs/source/models/supported_models.md
Original file line number Diff line number Diff line change
Expand Up @@ -699,10 +699,10 @@ See [this page](#generative-models) for more information on how to use generativ
*
* ✅︎
* ✅︎
- * `DeepseekVLV2ForCausalLM`
- * `DeepseekVLV2ForCausalLM`<sup>^</sup>
* DeepSeek-VL2
* T + I<sup>+</sup>
* `deepseek-ai/deepseek-vl2-tiny`, `deepseek-ai/deepseek-vl2-small`, `deepseek-ai/deepseek-vl2` etc. (see note)
* `deepseek-ai/deepseek-vl2-tiny`, `deepseek-ai/deepseek-vl2-small`, `deepseek-ai/deepseek-vl2` etc.
*
* ✅︎
* ✅︎
Expand All @@ -713,10 +713,10 @@ See [this page](#generative-models) for more information on how to use generativ
*
* ✅︎
* ✅︎
- * `ChatGLMModel`
- * `GLM4VForCausalLM`<sup>^</sup>
* GLM-4V
* T + I
* `THUDM/glm-4v-9b` etc.
* `THUDM/glm-4v-9b`, `THUDM/cogagent-9b-20241220` etc.
* ✅︎
* ✅︎
* ✅︎
Expand Down Expand Up @@ -825,7 +825,7 @@ See [this page](#generative-models) for more information on how to use generativ
*
* ✅︎
* ✅︎
- * `QWenLMHeadModel`
- * `QwenVLForConditionalGeneration`<sup>^</sup>
* Qwen-VL
* T + I<sup>E+</sup>
* `Qwen/Qwen-VL`, `Qwen/Qwen-VL-Chat`, etc.
Expand Down Expand Up @@ -862,13 +862,12 @@ See [this page](#generative-models) for more information on how to use generativ
* ✅︎
:::

<sup>^</sup> You need to set the architecture name via `--hf-overrides` to match the one in vLLM.
&nbsp;&nbsp;&nbsp;&nbsp;• For example, to use DeepSeek-VL2 series models:
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`--hf-overrides '{"architectures": ["DeepseekVLV2ForCausalLM"]}'`
<sup>E</sup> Pre-computed embeddings can be inputted for this modality.
<sup>+</sup> Multiple items can be inputted per text prompt for this modality.

:::{note}
To use DeepSeek-VL2 series models, you have to pass `--hf_overrides '{"architectures": ["DeepseekVLV2ForCausalLM"]}'` when running vLLM.
:::

:::{note}
H2O-VL series models will be available in V1 once we support backends other than FlashAttention.
:::
Expand Down
3 changes: 3 additions & 0 deletions examples/offline_inference/vision_language.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,9 @@ def run_glm4v(question: str, modality: str):
max_num_seqs=2,
trust_remote_code=True,
enforce_eager=True,
hf_overrides={"architectures": ["GLM4VForCausalLM"]},
disable_mm_preprocessor_cache=args.disable_mm_preprocessor_cache)

prompt = f"<|user|>\n<|begin_of_image|><|endoftext|><|end_of_image|>\
{question}<|assistant|>"

Expand Down Expand Up @@ -495,6 +497,7 @@ def run_qwen_vl(question: str, modality: str):
trust_remote_code=True,
max_model_len=1024,
max_num_seqs=2,
hf_overrides={"architectures": ["QwenVLForConditionalGeneration"]},
disable_mm_preprocessor_cache=args.disable_mm_preprocessor_cache,
)

Expand Down
5 changes: 3 additions & 2 deletions examples/offline_inference/vision_language_multi_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def load_deepseek_vl2(question: str, image_urls: List[str]):
)


def load_h2onvl(question: str, image_urls: List[str]) -> ModelRequestData:
def load_h2ovl(question: str, image_urls: List[str]) -> ModelRequestData:
model_name = "h2oai/h2ovl-mississippi-2b"

llm = LLM(
Expand Down Expand Up @@ -302,6 +302,7 @@ def load_qwen_vl_chat(question: str,
trust_remote_code=True,
max_model_len=1024,
max_num_seqs=2,
hf_overrides={"architectures": ["QwenVLForConditionalGeneration"]},
limit_mm_per_prompt={"image": len(image_urls)},
)
placeholders = "".join(f"Picture {i}: <img></img>\n"
Expand Down Expand Up @@ -452,7 +453,7 @@ def load_qwen2_5_vl(question, image_urls: List[str]) -> ModelRequestData:
model_example_map = {
"aria": load_aria,
"deepseek_vl_v2": load_deepseek_vl2,
"h2ovl_chat": load_h2onvl,
"h2ovl_chat": load_h2ovl,
"idefics3": load_idefics3,
"internvl_chat": load_internvl,
"mllama": load_mllama,
Expand Down
Loading

0 comments on commit 815a5fd

Please sign in to comment.