From 3d5d7a41f0cfa19ec68abd2f3144c726e4a8dd58 Mon Sep 17 00:00:00 2001 From: Guang Yang Date: Wed, 10 Apr 2024 21:11:41 -0700 Subject: [PATCH] Add mistral models --- .ci/scripts/gather_test_models.py | 3 +++ model.py | 16 ++++++++++++++++ scripts/workflow.sh | 6 +++--- 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/.ci/scripts/gather_test_models.py b/.ci/scripts/gather_test_models.py index 0583a569c..2d3caee8f 100644 --- a/.ci/scripts/gather_test_models.py +++ b/.ci/scripts/gather_test_models.py @@ -15,6 +15,9 @@ "tinyllamas/stories15M": "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.pt,https://github.com/karpathy/llama2.c/raw/master/tokenizer.model,https://github.com/karpathy/llama2.c/raw/master/tokenizer.bin", # "tinyllamas/stories42M": "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories42M.pt,https://github.com/karpathy/llama2.c/raw/master/tokenizer.model,https://github.com/karpathy/llama2.c/raw/master/tokenizer.bin", "tinyllamas/stories110M": "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories110M.pt,https://github.com/karpathy/llama2.c/raw/master/tokenizer.model,https://github.com/karpathy/llama2.c/raw/master/tokenizer.bin", + "mistralai/Mistral-7B-v0.1": "https://huggingface.co/mistralai/Mistral-7B-v0.1/resolve/main/config.json,https://huggingface.co/mistralai/Mistral-7B-v0.1/resolve/main/generation_config.json,https://huggingface.co/mistralai/Mistral-7B-v0.1/resolve/main/model-00001-of-00002.safetensors,https://huggingface.co/mistralai/Mistral-7B-v0.1/resolve/main/model-00002-of-00002.safetensors,https://huggingface.co/mistralai/Mistral-7B-v0.1/resolve/main/model.safetensors.index.json,https://huggingface.co/mistralai/Mistral-7B-v0.1/resolve/main/pytorch_model-00001-of-00002.bin,https://huggingface.co/mistralai/Mistral-7B-v0.1/resolve/main/pytorch_model-00002-of-00002.bin,https://huggingface.co/mistralai/Mistral-7B-v0.1/resolve/main/pytorch_model.bin.index.json,https://huggingface.co/mistralai/Mistral-7B-v0.1/resolve/main/special_tokens_map.json,https://huggingface.co/mistralai/Mistral-7B-v0.1/resolve/main/tokenizer.json,https://huggingface.co/mistralai/Mistral-7B-v0.1/resolve/main/tokenizer.model,https://huggingface.co/mistralai/Mistral-7B-v0.1/resolve/main/tokenizer_config.json", + "mistralai/Mistral-7B-Instruct-v0.1": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1/resolve/main/config.json,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1/resolve/main/generation_config.json,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1/resolve/main/model-00001-of-00002.safetensors,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1/resolve/main/model-00002-of-00002.safetensors,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1/resolve/main/model.safetensors.index.json,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1/resolve/main/pytorch_model-00001-of-00002.bin,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1/resolve/main/pytorch_model-00002-of-00002.bin,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1/resolve/main/pytorch_model.bin.index.json,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1/resolve/main/special_tokens_map.json,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1/resolve/main/tokenizer.json,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1/resolve/main/tokenizer.model,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1/resolve/main/tokenizer_config.json", + "mistralai/Mistral-7B-Instruct-v0.2": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/resolve/main/config.json,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/resolve/main/generation_config.json,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/resolve/main/model-00001-of-00003.safetensors,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/resolve/main/model-00002-of-00003.safetensors,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/resolve/main/model-00003-of-00003.safetensors,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/resolve/main/model.safetensors.index.json,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/resolve/main/pytorch_model-00001-of-00003.bin,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/resolve/main/pytorch_model-00002-of-00003.bin,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/resolve/main/pytorch_model-00003-of-00003.bin,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/resolve/main/pytorch_model.bin.index.json,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/resolve/main/special_tokens_map.json,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/resolve/main/tokenizer.json,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/resolve/main/tokenizer.model,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/resolve/main/tokenizer_config.json", } JOB_RUNNERS = { diff --git a/model.py b/model.py index 972c1f736..df1aa9696 100644 --- a/model.py +++ b/model.py @@ -92,6 +92,22 @@ def from_name(cls, name: str): intermediate_size=14336, vocab_size=32000, ), + "Mistral-7B-Instruct-v0.1": dict( + n_layer=32, + n_head=32, + n_local_heads=8, + dim=4096, + intermediate_size=14336, + vocab_size=32000, + ), + "Mistral-7B-Instruct-v0.2": dict( + n_layer=32, + n_head=32, + n_local_heads=8, + dim=4096, + intermediate_size=14336, + vocab_size=32000, + ), "stories15M": dict(n_layer=6, n_head=6, dim=288), "stories110M": dict(n_layer=12, n_head=12, dim=768), } diff --git a/scripts/workflow.sh b/scripts/workflow.sh index 00c31266e..fb8bf8590 100644 --- a/scripts/workflow.sh +++ b/scripts/workflow.sh @@ -55,9 +55,9 @@ MODEL_REPOS=( "tinyllamas/stories15M" # "tinyllamas/stories42M" "tinyllamas/stories110M" - # "mistralai/Mistral-7B-v0.1" - # "mistralai/Mistral-7B-Instruct-v0.1" - # "mistralai/Mistral-7B-Instruct-v0.2" + "mistralai/Mistral-7B-v0.1" + "mistralai/Mistral-7B-Instruct-v0.1" + "mistralai/Mistral-7B-Instruct-v0.2" # "openlm-research/open_llama_7b" # "codellama/CodeLlama-7b-Python-hf" # "codellama/CodeLlama-34b-Python-hf"