support qwen model in chat task (#89)

ling0322 · Aug 19, 2024 · b78c02a · b78c02a
1 parent 712c47d
commit b78c02a
Show file tree

Hide file tree

Showing 4 changed files with 66 additions and 4 deletions.
diff --git a/go/skill/prompt_builder.go b/go/skill/prompt_builder.go
@@ -32,6 +32,8 @@ func newPromptBuilder(modelName string) (promptBuilder, error) {
 		return &Llama{}, nil
 	} else if modelName == "index" {
 		return &BilibiliIndex{}, nil
+	} else if modelName == "qwen" {
+		return &Qwen{}, nil
 	} else {
 		return nil, ErrInvalidModelForChat
 	}

diff --git a/go/skill/qwen.go b/go/skill/qwen.go
@@ -0,0 +1,61 @@
+// The MIT License (MIT)
+//
+// Copyright (c) 2024 Xiaoyang Chen
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software
+// and associated documentation files (the "Software"), to deal in the Software without
+// restriction, including without limitation the rights to use, copy, modify, merge, publish,
+// distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all copies or
+// substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+package skill
+
+import (
+	"errors"
+	"fmt"
+
+	"github.com/ling0322/libllm/go/llm"
+)
+
+type Qwen struct {
+}
+
+func (l *Qwen) Build(history []Message) (llm.Prompt, error) {
+	if len(history) == 0 {
+		return nil, errors.New("history is empty")
+	}
+
+	prompt := llm.NewPrompt()
+	for _, message := range history[:len(history)-1] {
+		prompt.AppendControlToken("<|im_start|>")
+		prompt.AppendText(fmt.Sprintf("%s\n%s", message.Role, message.Content))
+		prompt.AppendControlToken("<|im_end|>")
+		prompt.AppendText("\n")
+	}
+
+	lastMessage := history[len(history)-1]
+	if lastMessage.Role == "user" {
+		prompt.AppendControlToken("<|im_start|>")
+		prompt.AppendText(fmt.Sprintf("%s\n%s", lastMessage.Role, lastMessage.Content))
+		prompt.AppendControlToken("<|im_end|>")
+		prompt.AppendText("\n")
+		prompt.AppendControlToken("<|im_start|>")
+		prompt.AppendText("assistant\n")
+	} else if lastMessage.Role == "assistant" {
+		prompt.AppendControlToken("<|im_start|>")
+		prompt.AppendText(fmt.Sprintf("%s\n%s", lastMessage.Role, lastMessage.Content))
+	} else {
+		return nil, errors.New("last message should be either user or assistant")
+	}
+
+	return prompt, nil
+}
diff --git a/tools/llama_exporter.py b/tools/llama_exporter.py
@@ -137,7 +137,7 @@ def run_llama_chat(huggingface_name):
     response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
     print(response)
 
-MODEL_NAME = "meta-llama/Meta-Llama-3-8B-Instruct"
+MODEL_NAME = "meta-llama/Meta-Llama-3.1-8B"
 MODEL_BIN = "model.bin"
 MODEL_INI = "model.ini"
 TOKENIZER_BIN = "tokenizer.bin"

diff --git a/tools/qwen_exporter.py b/tools/qwen_exporter.py
@@ -109,7 +109,7 @@ def run_qwen(huggingface_name):
     response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
     print(response)
 
-MODEL_NAME = "Qwen/Qwen2-1.5B-Instruct"
+MODEL_NAME = "Qwen/Qwen2-7B-Instruct"
 MODEL_BIN = "model.bin"
 MODEL_INI = "model.ini"
 TOKENIZER_BIN = "tokenizer.bin"
@@ -130,8 +130,7 @@ def run_qwen(huggingface_name):
         sys.exit(0)
 
     tokenizer = AutoTokenizer.from_pretrained(args.huggingface_name, use_fast=False)
-    model = AutoModelForCausalLM.from_pretrained(args.huggingface_name, device_map="auto").eval()
-    model.to("cpu")
+    model = AutoModelForCausalLM.from_pretrained(args.huggingface_name, device_map="cpu").eval()
 
     with zipfile.ZipFile(args.output, "w", compression=zipfile.ZIP_STORED) as package:
         with package.open(MODEL_BIN, "w", force_zip64=True) as fp: