Skip to content

Commit

Permalink
Support Ovis2 models (#3163)
Browse files Browse the repository at this point in the history
* add transformers in gitignore

* fix a typo bug in text-caps

* add .run into gitignore

* add vlmeval to gitignore

* add my_model/ to gitignore

* support ovis2 models

* modify format
  • Loading branch information
DaozeZhang authored Feb 18, 2025
1 parent 805a12f commit 96eeecc
Show file tree
Hide file tree
Showing 7 changed files with 49 additions and 0 deletions.
6 changes: 6 additions & 0 deletions docs/source/Instruction/支持的模型和数据集.md
Original file line number Diff line number Diff line change
Expand Up @@ -539,6 +539,12 @@
|[AIDC-AI/Ovis1.6-Gemma2-9B-GPTQ-Int4](https://modelscope.cn/models/AIDC-AI/Ovis1.6-Gemma2-9B-GPTQ-Int4)|ovis1_6|ovis1_6|transformers>=4.42|vision|[AIDC-AI/Ovis1.6-Gemma2-9B-GPTQ-Int4](https://huggingface.co/AIDC-AI/Ovis1.6-Gemma2-9B-GPTQ-Int4)|
|[AIDC-AI/Ovis1.6-Gemma2-27B](https://modelscope.cn/models/AIDC-AI/Ovis1.6-Gemma2-27B)|ovis1_6|ovis1_6|transformers>=4.42|vision|[AIDC-AI/Ovis1.6-Gemma2-27B](https://huggingface.co/AIDC-AI/Ovis1.6-Gemma2-27B)|
|[AIDC-AI/Ovis1.6-Llama3.2-3B](https://modelscope.cn/models/AIDC-AI/Ovis1.6-Llama3.2-3B)|ovis1_6_llama3|ovis1_6_llama3|-|vision|[AIDC-AI/Ovis1.6-Llama3.2-3B](https://huggingface.co/AIDC-AI/Ovis1.6-Llama3.2-3B)|
|[AIDC-AI/Ovis2-1B](https://modelscope.cn/models/AIDC-AI/Ovis2-1B)|ovis2|ovis2|transformers>=4.46.2|vision|[AIDC-AI/Ovis2-1B](https://huggingface.co/AIDC-AI/Ovis2-1B)|
|[AIDC-AI/Ovis2-2B](https://modelscope.cn/models/AIDC-AI/Ovis2-2B)|ovis2|ovis2|transformers>=4.46.2|vision|[AIDC-AI/Ovis2-2B](https://huggingface.co/AIDC-AI/Ovis2-2B)|
|[AIDC-AI/Ovis2-4B](https://modelscope.cn/models/AIDC-AI/Ovis2-4B)|ovis2|ovis2|transformers>=4.46.2|vision|[AIDC-AI/Ovis2-4B](https://huggingface.co/AIDC-AI/Ovis2-4B)|
|[AIDC-AI/Ovis2-8B](https://modelscope.cn/models/AIDC-AI/Ovis2-8B)|ovis2|ovis2|transformers>=4.46.2|vision|[AIDC-AI/Ovis2-8B](https://huggingface.co/AIDC-AI/Ovis2-8B)|
|[AIDC-AI/Ovis2-16B](https://modelscope.cn/models/AIDC-AI/Ovis2-16B)|ovis2|ovis2|transformers>=4.46.2|vision|[AIDC-AI/Ovis2-16B](https://huggingface.co/AIDC-AI/Ovis2-16B)|
|[AIDC-AI/Ovis2-34B](https://modelscope.cn/models/AIDC-AI/Ovis2-34B)|ovis2|ovis2|transformers>=4.46.2|vision|[AIDC-AI/Ovis2-34B](https://huggingface.co/AIDC-AI/Ovis2-34B)|
|[ZhipuAI/glm-4v-9b](https://modelscope.cn/models/ZhipuAI/glm-4v-9b)|glm4v|glm4v|transformers>=4.42,<4.45|-|[THUDM/glm-4v-9b](https://huggingface.co/THUDM/glm-4v-9b)|
|[ZhipuAI/cogagent-9b-20241220](https://modelscope.cn/models/ZhipuAI/cogagent-9b-20241220)|glm4v|glm4v|transformers>=4.42|-|[THUDM/cogagent-9b-20241220](https://huggingface.co/THUDM/cogagent-9b-20241220)|
|[ZhipuAI/glm-edge-v-2b](https://modelscope.cn/models/ZhipuAI/glm-edge-v-2b)|glm_edge_v|glm_edge_v|transformers>=4.46|vision|[THUDM/glm-edge-v-2b](https://huggingface.co/THUDM/glm-edge-v-2b)|
Expand Down
6 changes: 6 additions & 0 deletions docs/source_en/Instruction/Supported-models-and-datasets.md
Original file line number Diff line number Diff line change
Expand Up @@ -539,6 +539,12 @@ The table below introduces the models integrated with ms-swift:
|[AIDC-AI/Ovis1.6-Gemma2-9B-GPTQ-Int4](https://modelscope.cn/models/AIDC-AI/Ovis1.6-Gemma2-9B-GPTQ-Int4)|ovis1_6|ovis1_6|transformers>=4.42|vision|[AIDC-AI/Ovis1.6-Gemma2-9B-GPTQ-Int4](https://huggingface.co/AIDC-AI/Ovis1.6-Gemma2-9B-GPTQ-Int4)|
|[AIDC-AI/Ovis1.6-Gemma2-27B](https://modelscope.cn/models/AIDC-AI/Ovis1.6-Gemma2-27B)|ovis1_6|ovis1_6|transformers>=4.42|vision|[AIDC-AI/Ovis1.6-Gemma2-27B](https://huggingface.co/AIDC-AI/Ovis1.6-Gemma2-27B)|
|[AIDC-AI/Ovis1.6-Llama3.2-3B](https://modelscope.cn/models/AIDC-AI/Ovis1.6-Llama3.2-3B)|ovis1_6_llama3|ovis1_6_llama3|-|vision|[AIDC-AI/Ovis1.6-Llama3.2-3B](https://huggingface.co/AIDC-AI/Ovis1.6-Llama3.2-3B)|
|[AIDC-AI/Ovis2-1B](https://modelscope.cn/models/AIDC-AI/Ovis2-1B)|ovis2|ovis2|transformers>=4.46.2|vision|[AIDC-AI/Ovis2-1B](https://huggingface.co/AIDC-AI/Ovis2-1B)|
|[AIDC-AI/Ovis2-2B](https://modelscope.cn/models/AIDC-AI/Ovis2-2B)|ovis2|ovis2|transformers>=4.46.2|vision|[AIDC-AI/Ovis2-2B](https://huggingface.co/AIDC-AI/Ovis2-2B)|
|[AIDC-AI/Ovis2-4B](https://modelscope.cn/models/AIDC-AI/Ovis2-4B)|ovis2|ovis2|transformers>=4.46.2|vision|[AIDC-AI/Ovis2-4B](https://huggingface.co/AIDC-AI/Ovis2-4B)|
|[AIDC-AI/Ovis2-8B](https://modelscope.cn/models/AIDC-AI/Ovis2-8B)|ovis2|ovis2|transformers>=4.46.2|vision|[AIDC-AI/Ovis2-8B](https://huggingface.co/AIDC-AI/Ovis2-8B)|
|[AIDC-AI/Ovis2-16B](https://modelscope.cn/models/AIDC-AI/Ovis2-16B)|ovis2|ovis2|transformers>=4.46.2|vision|[AIDC-AI/Ovis2-16B](https://huggingface.co/AIDC-AI/Ovis2-16B)|
|[AIDC-AI/Ovis2-34B](https://modelscope.cn/models/AIDC-AI/Ovis2-34B)|ovis2|ovis2|transformers>=4.46.2|vision|[AIDC-AI/Ovis2-34B](https://huggingface.co/AIDC-AI/Ovis2-34B)|
|[ZhipuAI/glm-4v-9b](https://modelscope.cn/models/ZhipuAI/glm-4v-9b)|glm4v|glm4v|transformers>=4.42,<4.45|-|[THUDM/glm-4v-9b](https://huggingface.co/THUDM/glm-4v-9b)|
|[ZhipuAI/cogagent-9b-20241220](https://modelscope.cn/models/ZhipuAI/cogagent-9b-20241220)|glm4v|glm4v|transformers>=4.42|-|[THUDM/cogagent-9b-20241220](https://huggingface.co/THUDM/cogagent-9b-20241220)|
|[ZhipuAI/glm-edge-v-2b](https://modelscope.cn/models/ZhipuAI/glm-edge-v-2b)|glm_edge_v|glm_edge_v|transformers>=4.46|vision|[THUDM/glm-edge-v-2b](https://huggingface.co/THUDM/glm-edge-v-2b)|
Expand Down
1 change: 1 addition & 0 deletions swift/llm/model/constant.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ class MLLMModelType:
qvq = 'qvq'
ovis1_6 = 'ovis1_6'
ovis1_6_llama3 = 'ovis1_6_llama3'
ovis2 = 'ovis2'

glm4v = 'glm4v'
glm_edge_v = 'glm_edge_v'
Expand Down
21 changes: 21 additions & 0 deletions swift/llm/model/model/qwen.py
Original file line number Diff line number Diff line change
Expand Up @@ -708,6 +708,27 @@ def update(self, key_states: torch.Tensor, value_states: torch.Tensor, layer_idx
tags=['vision'],
))

register_model(
ModelMeta(
MLLMModelType.ovis2,
[
ModelGroup([
Model('AIDC-AI/Ovis2-1B', 'AIDC-AI/Ovis2-1B'),
Model('AIDC-AI/Ovis2-2B', 'AIDC-AI/Ovis2-2B'),
Model('AIDC-AI/Ovis2-4B', 'AIDC-AI/Ovis2-4B'),
Model('AIDC-AI/Ovis2-8B', 'AIDC-AI/Ovis2-8B'),
Model('AIDC-AI/Ovis2-16B', 'AIDC-AI/Ovis2-16B'),
Model('AIDC-AI/Ovis2-34B', 'AIDC-AI/Ovis2-34B'),
]),
],
TemplateType.ovis2,
get_model_tokenizer_ovis,
model_arch=ModelArch.ovis1_6,
architectures=['Ovis'],
tags=['vision'],
requires=['transformers>=4.46.2'],
))

register_model(
ModelMeta(
RMModelType.qwen2_reward,
Expand Down
1 change: 1 addition & 0 deletions swift/llm/template/constant.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ class MLLMTemplateType:
qvq = 'qvq'
ovis1_6 = 'ovis1_6'
ovis1_6_llama3 = 'ovis1_6_llama3'
ovis2 = 'ovis2'

llama3_1_omni = 'llama3_1_omni'
llama3_2_vision = 'llama3_2_vision'
Expand Down
7 changes: 7 additions & 0 deletions swift/llm/template/template/qwen.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,13 @@ def _data_collator(self, batch: List[Dict[str, Any]], *, padding_to: Optional[in
template_cls=Ovis1_6Template,
))

register_template(
Qwen2_5TemplateMeta(
MLLMTemplateType.ovis2,
template_cls=Ovis1_6Template,
placeholder_tokens=['<|image_pad|>', '<|video_pad|>'],
))


@dataclass
class MarcoO1TemplateMeta(QwenTemplateMeta):
Expand Down
7 changes: 7 additions & 0 deletions tests/test_align/test_template/test_vision.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,12 @@ def test_ovis1_6_llama3():
assert response == '这是一只小猫。从图中可见的特征如大眼睛、细长的白色鼻毛和毛发的图案,表明它可能属于常见的猫种。猫的表情和毛发的质感显示出它年轻,可能是幼猫。'


def test_ovis2():
pt_engine = PtEngine('AIDC-AI/Ovis2-2B')
response = _infer_model(pt_engine)
assert response == '这是一张插画风格的图像,展示了一只可爱的猫咪。这只猫有灰白相间的毛发,大大的蓝色眼睛和竖立的耳朵,显得非常可爱和迷人。背景模糊,突出了猫咪的细节和表情。'


def test_paligemma():
pt_engine = PtEngine('AI-ModelScope/paligemma-3b-mix-224')
response = _infer_model(pt_engine, messages=[{'role': 'user', 'content': 'detect cat'}])
Expand Down Expand Up @@ -446,6 +452,7 @@ def test_ui_tars():
# test_llava()
# test_ovis1_6()
# test_ovis1_6_llama3()
# test_ovis2()
# test_yi_vl()
# test_deepseek_vl()
test_deepseek_janus()
Expand Down

0 comments on commit 96eeecc

Please sign in to comment.