Skip to content

Commit

Permalink
support moonlight (#3232)
Browse files Browse the repository at this point in the history
  • Loading branch information
Jintao-Huang authored Feb 24, 2025
1 parent 7746db2 commit a0cf96b
Show file tree
Hide file tree
Showing 7 changed files with 43 additions and 1 deletion.
2 changes: 2 additions & 0 deletions docs/source/Instruction/支持的模型和数据集.md
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,8 @@
|[damo/nlp_polylm_13b_text_generation](https://modelscope.cn/models/damo/nlp_polylm_13b_text_generation)|polylm|default|-|-|[DAMO-NLP-MT/polylm-13b](https://huggingface.co/DAMO-NLP-MT/polylm-13b)|
|[AI-ModelScope/aya-expanse-8b](https://modelscope.cn/models/AI-ModelScope/aya-expanse-8b)|aya|aya|transformers>=4.44.0|-|[CohereForAI/aya-expanse-8b](https://huggingface.co/CohereForAI/aya-expanse-8b)|
|[AI-ModelScope/aya-expanse-32b](https://modelscope.cn/models/AI-ModelScope/aya-expanse-32b)|aya|aya|transformers>=4.44.0|-|[CohereForAI/aya-expanse-32b](https://huggingface.co/CohereForAI/aya-expanse-32b)|
|[moonshotai/Moonlight-16B-A3B](https://modelscope.cn/models/moonshotai/Moonlight-16B-A3B)|moonlight|moonlight|transformers<4.49|-|[moonshotai/Moonlight-16B-A3B](https://huggingface.co/moonshotai/Moonlight-16B-A3B)|
|[moonshotai/Moonlight-16B-A3B-Instruct](https://modelscope.cn/models/moonshotai/Moonlight-16B-A3B-Instruct)|moonlight|moonlight|transformers<4.49|-|[moonshotai/Moonlight-16B-A3B-Instruct](https://huggingface.co/moonshotai/Moonlight-16B-A3B-Instruct)|
|[answerdotai/ModernBERT-base](https://modelscope.cn/models/answerdotai/ModernBERT-base)|modern_bert|dummy|transformers>=4.48|bert|[answerdotai/ModernBERT-base](https://huggingface.co/answerdotai/ModernBERT-base)|
|[answerdotai/ModernBERT-large](https://modelscope.cn/models/answerdotai/ModernBERT-large)|modern_bert|dummy|transformers>=4.48|bert|[answerdotai/ModernBERT-large](https://huggingface.co/answerdotai/ModernBERT-large)|
|[iic/gte-modernbert-base](https://modelscope.cn/models/iic/gte-modernbert-base)|modern_bert_gte|dummy|transformers>=4.48|bert, embedding|[Alibaba-NLP/gte-modernbert-base](https://huggingface.co/Alibaba-NLP/gte-modernbert-base)|
Expand Down
2 changes: 2 additions & 0 deletions docs/source_en/Instruction/Supported-models-and-datasets.md
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,8 @@ The table below introduces the models integrated with ms-swift:
|[damo/nlp_polylm_13b_text_generation](https://modelscope.cn/models/damo/nlp_polylm_13b_text_generation)|polylm|default|-|-|[DAMO-NLP-MT/polylm-13b](https://huggingface.co/DAMO-NLP-MT/polylm-13b)|
|[AI-ModelScope/aya-expanse-8b](https://modelscope.cn/models/AI-ModelScope/aya-expanse-8b)|aya|aya|transformers>=4.44.0|-|[CohereForAI/aya-expanse-8b](https://huggingface.co/CohereForAI/aya-expanse-8b)|
|[AI-ModelScope/aya-expanse-32b](https://modelscope.cn/models/AI-ModelScope/aya-expanse-32b)|aya|aya|transformers>=4.44.0|-|[CohereForAI/aya-expanse-32b](https://huggingface.co/CohereForAI/aya-expanse-32b)|
|[moonshotai/Moonlight-16B-A3B](https://modelscope.cn/models/moonshotai/Moonlight-16B-A3B)|moonlight|moonlight|transformers<4.49|-|[moonshotai/Moonlight-16B-A3B](https://huggingface.co/moonshotai/Moonlight-16B-A3B)|
|[moonshotai/Moonlight-16B-A3B-Instruct](https://modelscope.cn/models/moonshotai/Moonlight-16B-A3B-Instruct)|moonlight|moonlight|transformers<4.49|-|[moonshotai/Moonlight-16B-A3B-Instruct](https://huggingface.co/moonshotai/Moonlight-16B-A3B-Instruct)|
|[answerdotai/ModernBERT-base](https://modelscope.cn/models/answerdotai/ModernBERT-base)|modern_bert|dummy|transformers>=4.48|bert|[answerdotai/ModernBERT-base](https://huggingface.co/answerdotai/ModernBERT-base)|
|[answerdotai/ModernBERT-large](https://modelscope.cn/models/answerdotai/ModernBERT-large)|modern_bert|dummy|transformers>=4.48|bert|[answerdotai/ModernBERT-large](https://huggingface.co/answerdotai/ModernBERT-large)|
|[iic/gte-modernbert-base](https://modelscope.cn/models/iic/gte-modernbert-base)|modern_bert_gte|dummy|transformers>=4.48|bert, embedding|[Alibaba-NLP/gte-modernbert-base](https://huggingface.co/Alibaba-NLP/gte-modernbert-base)|
Expand Down
1 change: 1 addition & 0 deletions swift/llm/model/constant.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ class LLMModelType:
mamba = 'mamba'
polylm = 'polylm'
aya = 'aya'
moonlight = 'moonlight'


class BertModelType:
Expand Down
16 changes: 16 additions & 0 deletions swift/llm/model/model/deepseek.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,3 +280,19 @@ def get_model_tokenizer_deepseek_vl2(model_dir: str, *args, **kwargs):
architectures=['Qwen2ForCausalLM', 'LlamaForCausalLM'],
model_arch=ModelArch.llama,
))

register_model(
ModelMeta(
LLMModelType.moonlight,
[
ModelGroup([
Model('moonshotai/Moonlight-16B-A3B', 'moonshotai/Moonlight-16B-A3B'),
Model('moonshotai/Moonlight-16B-A3B-Instruct', 'moonshotai/Moonlight-16B-A3B-Instruct'),
]),
],
TemplateType.moonlight,
get_model_tokenizer_with_flash_attn,
architectures=['DeepseekV3ForCausalLM'],
model_arch=ModelArch.deepseek_v2,
requires=['transformers<4.49'],
))
1 change: 1 addition & 0 deletions swift/llm/template/constant.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ class LLMTemplateType:
xverse = 'xverse'
bluelm = 'bluelm'
orion = 'orion'
moonlight = 'moonlight'

aya = 'aya'
c4ai = 'c4ai'
Expand Down
11 changes: 11 additions & 0 deletions swift/llm/template/template/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,3 +245,14 @@ class TeleChatTemplateMeta(TemplateMeta):
suffix=['<|END_OF_TURN_TOKEN|>'],
default_system=AYA_SYSTEM,
system_prefix=['<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{{SYSTEM}}<|END_OF_TURN_TOKEN|']))

register_template(
TemplateMeta(
LLMTemplateType.moonlight,
prefix=[],
system_prefix=['<|im_system|>system<|im_middle|>{{SYSTEM}}<|im_end|>'],
prompt=['<|im_user|>user<|im_middle|>{{QUERY}}<|im_end|><|im_assistant|>assistant<|im_middle|>'],
chat_sep=['<|im_end|>'],
suffix=['<|im_end|>'],
default_system='You are a helpful assistant',
))
11 changes: 10 additions & 1 deletion tests/test_align/test_template/test_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,14 @@ def test_mistral_small():
assert response == response2


def test_moonlight():
pt_engine = PtEngine('moonshotai/Moonlight-16B-A3B-Instruct')
res = _infer_model(pt_engine)
pt_engine.default_template.template_backend = 'jinja'
res2 = _infer_model(pt_engine)
assert res == res2, f'res: {res}, res2: {res2}'


if __name__ == '__main__':
from swift.llm import PtEngine, RequestConfig, get_template, get_model_tokenizer
from swift.utils import get_logger, seed_everything
Expand Down Expand Up @@ -351,4 +359,5 @@ def test_mistral_small():
# test_internlm3()
# test_deepseek_r1_distill()
# test_qwen2_5_prm()
test_mistral_small()
# test_mistral_small()
test_moonlight()

0 comments on commit a0cf96b

Please sign in to comment.