update

ling0322 · Jul 30, 2024 · 7a24b93 · 7a24b93
1 parent 7e88156
commit 7a24b93
Show file tree

Hide file tree

Showing 7 changed files with 244 additions and 20 deletions.
diff --git a/README.md b/README.md
@@ -13,6 +13,14 @@ Welcome to libLLM, an open-source project designed for efficient inference of la
 | Index-1.9B-Character (Role-playing) | 🤗[Huggingface](https://huggingface.co/ling0322/bilibili-index-1.9b-libllm/blob/main/bilibili-index-1.9b-character-q4.llmpkg) |
 | Index-1.9B-Chat | 🤗[Huggingface](https://huggingface.co/ling0322/bilibili-index-1.9b-libllm/blob/main/bilibili-index-1.9b-chat-q4.llmpkg) |
 
+## Quickstart
+
+To run and chat with Bilibili-Index-1.9B-Character:
+
+```bash
+$ llm chat -model index-character
+```
+
 ## Key features:
 
 - Optimized for everyday devices: libLLM has been optimized to run smoothly on common personal computers, ensuring the powerful capabilities of large language models are accessible to a wider range of users.
@@ -65,22 +73,24 @@ $ make -j
 ## Run libllm command line
 
 ```bash
-$ ./src/llm/llm -config ../model/chatglm3-6b-libllm-q4/chatglm3.config 
-INFO 2023-12-19T08:56:47Z lymath.cc:42] lymath: Use Avx512 backend.
-INFO 2023-12-19T08:56:48Z cuda_operators.cc:46] cuda numDevices = 1
-INFO 2023-12-19T08:56:48Z cuda_operators.cc:47] cuda:0 maxThreadsPerMultiProcessor = 2048
-INFO 2023-12-19T08:56:48Z cuda_operators.cc:49] cuda:0 multiProcessorCount = 20
-INFO 2023-12-19T08:56:48Z llm.cc:123] OMP max_threads = 20
-INFO 2023-12-19T08:56:48Z bpe_model.cc:34] read tokenizer from ../model/chatglm3-6b-libllm-q4/chatglm3.tokenizer.bin
-INFO 2023-12-19T08:56:48Z model_factory.cc:35] model_type = chatglm3
-INFO 2023-12-19T08:56:48Z model_factory.cc:36] device = cuda
-INFO 2023-12-19T08:56:48Z state_map.cc:58] read state map from ../model/chatglm3-6b-libllm-q4/chatglm3.q4.bin
-INFO 2023-12-19T08:56:51Z state_map.cc:68] reading ... 100.0%
-INFO 2023-12-19T08:56:51Z state_map.cc:69] 200 tensors read.
-> 你好
-
- 你好👋！我是人工智能助手 ChatGLM3-6B，很高兴见到你，欢迎问我任何问题。
-(29 token, time=0.92s, 31.75ms per token)
+$ src/libllm/llm chat -model index-character
+INFO 2024-07-30T12:02:28Z interface.cc:67] ISA support: AVX2=1 F16C=1 AVX512F=1
+INFO 2024-07-30T12:02:28Z interface.cc:71] Use Avx512 backend.
+INFO 2024-07-30T12:02:30Z matmul.cc:43] Use GEMM from cuBLAS.
+INFO 2024-07-30T12:02:30Z cuda_operators.cc:51] cuda numDevices = 2
+INFO 2024-07-30T12:02:30Z cuda_operators.cc:52] cuda:0 maxThreadsPerMultiProcessor = 2048
+INFO 2024-07-30T12:02:30Z cuda_operators.cc:54] cuda:0 multiProcessorCount = 20
+INFO 2024-07-30T12:02:30Z thread_pool.cc:73] ThreadPool started. numThreads=20
+INFO 2024-07-30T12:02:30Z llm.cc:204] read model package: /home/xiaoych/.libllm/models/bilibili-index-1.9b-character-q4.llmpkg
+INFO 2024-07-30T12:02:30Z model_for_generation.cc:43] model_type = index
+INFO 2024-07-30T12:02:30Z model_for_generation.cc:44] device = cuda
+INFO 2024-07-30T12:02:31Z state_map.cc:66] 220 tensors read.
+Please input your question.
+    Type ':new' to start a new session (clean history).
+    Type ':sys <system_prompt>' to set the system prompt and start a new session .
+> hi
+您好！我是Index，请问有什么我可以帮助您的吗？
+(12 tokens, time=0.76s, 63.47ms per token)
 > 
 ```
 

diff --git a/go/bin/chat.go b/go/bin/chat.go
@@ -30,7 +30,6 @@ import (
 	"strings"
 	"time"
 
-	"github.com/ling0322/libllm/go/llm"
 	"github.com/ling0322/libllm/go/skill"
 )
 
@@ -58,7 +57,7 @@ func chatMain(args []string) {
 	}
 
 	modelName := getModelArg(fs)
-	model, err := llm.NewModel(modelName, getDeviceArg())
+	model, err := createModelAutoDownload(modelName, getDeviceArg())
 	if err != nil {
 		log.Fatal(err)
 	}

diff --git a/go/bin/download.go b/go/bin/download.go
@@ -0,0 +1,187 @@
+package main
+
+import (
+	"errors"
+	"flag"
+	"fmt"
+	"io"
+	"log"
+	"log/slog"
+	"net/http"
+	"os"
+	"path"
+	"runtime"
+
+	"github.com/ling0322/libllm/go/llm"
+	"github.com/schollz/progressbar/v3"
+)
+
+var ErrInvalidModelName = errors.New("invalid model name")
+var ModelCacheDir = getModelCacheDir()
+
+var modelUrls = map[string]string{
+	"index-chat":      "https://huggingface.co/ling0322/bilibili-index-1.9b-libllm/resolve/main/bilibili-index-1.9b-chat-q4.llmpkg",
+	"index-character": "https://huggingface.co/ling0322/bilibili-index-1.9b-libllm/resolve/main/bilibili-index-1.9b-character-q4.llmpkg",
+}
+
+var modelFilenames = map[string]string{
+	"index-chat":      "bilibili-index-1.9b-chat-q4.llmpkg",
+	"index-character": "bilibili-index-1.9b-character-q4.llmpkg",
+}
+
+func getModelCacheDir() string {
+	var cacheDir string
+	if runtime.GOOS == "linux" || runtime.GOOS == "darwin" {
+		userDir, err := os.UserHomeDir()
+		if err != nil {
+			log.Fatal(err)
+		}
+		cacheDir = path.Join(userDir, ".libllm", "models")
+	} else if runtime.GOOS == "windows" {
+		binFile, err := os.Executable()
+		if err != nil {
+			log.Fatal(err)
+		}
+
+		binDir := path.Dir(binFile)
+		cacheDir = path.Join(binDir, "models")
+	}
+
+	return cacheDir
+}
+
+func downloadModel(name string) (modelPath string, err error) {
+	url, ok := modelUrls[name]
+	if !ok {
+		log.Fatal("invalid model name")
+	}
+
+	filename, ok := modelFilenames[name]
+	if !ok {
+		log.Fatal("invalid model name")
+	}
+	modelPath = path.Join(ModelCacheDir, filename)
+	slog.Info("download model", "url", url)
+
+	req, err := http.NewRequest("GET", url, nil)
+	if err != nil {
+		return
+	}
+
+	resp, err := http.DefaultClient.Do(req)
+	if err != nil {
+		return
+	}
+	defer resp.Body.Close()
+
+	modelDir := path.Dir(modelPath)
+	err = os.MkdirAll(modelDir, os.ModePerm)
+	if err != nil {
+		return "", fmt.Errorf("unable to create model cache directory: %w", err)
+	}
+
+	f, err := os.OpenFile(modelPath+".download", os.O_CREATE|os.O_WRONLY, 0644)
+	if err != nil {
+		return
+	}
+	defer f.Close()
+
+	bar := progressbar.DefaultBytes(
+		resp.ContentLength,
+		"Downloading",
+	)
+	_, err = io.Copy(io.MultiWriter(f, bar), resp.Body)
+	if err != nil {
+		return
+	}
+
+	err = os.Rename(modelPath+".download", modelPath)
+	if err != nil {
+		return
+	}
+
+	slog.Info("Save model", "path", modelPath)
+	return modelPath, nil
+}
+
+// check if model exists in the cache directory. If exists, retuen the model path, otherwise,
+// return the error.
+func checkModelInCache(name string) (modelPath string, err error) {
+	filename, ok := modelFilenames[name]
+	if !ok {
+		return "", ErrInvalidModelName
+	}
+	modelPath = path.Join(ModelCacheDir, filename)
+
+	_, err = os.Stat(modelPath)
+	if err != nil {
+		return "", err
+	}
+
+	return
+}
+
+func getOrDownloadModel(name string) (modelPath string, err error) {
+	modelPath, err = checkModelInCache(name)
+	if err == nil {
+		return
+	}
+
+	return downloadModel(name)
+}
+
+func createModelAutoDownload(nameOrPath string, device llm.Device) (llm.Model, error) {
+	var modelPath string
+	var err error
+
+	_, ok := modelFilenames[nameOrPath]
+	if ok {
+		modelPath, err = getOrDownloadModel(nameOrPath)
+	} else {
+		modelPath = nameOrPath
+	}
+
+	if err != nil {
+		return nil, err
+	}
+
+	_, err = os.Stat(modelPath)
+	if err != nil {
+		return nil, fmt.Errorf("model not exist: %s", modelPath)
+	}
+
+	return llm.NewModel(modelPath, device)
+}
+
+func printDownloadUsage(fs *flag.FlagSet) {
+	fmt.Fprintln(os.Stderr, "Usage: llm download [OPTIONS]")
+	fmt.Fprintln(os.Stderr, "")
+	fmt.Fprintln(os.Stderr, "Options:")
+	fs.PrintDefaults()
+	fmt.Fprintln(os.Stderr, "")
+}
+
+func downloadMain(args []string) {
+	fs := flag.NewFlagSet("", flag.ExitOnError)
+	fs.Usage = func() {
+		printDownloadUsage(fs)
+	}
+
+	addModelFlag(fs)
+	_ = fs.Parse(args)
+
+	if fs.NArg() != 0 {
+		fs.Usage()
+		os.Exit(1)
+	}
+
+	modelName := getModelArg(fs)
+	if modelPath, err := checkModelInCache(modelName); err == nil {
+		fmt.Printf("model \"%s\" already downloaded. Path is \"%s\"\n", modelName, modelPath)
+	}
+
+	_, err := downloadModel(modelName)
+	if err != nil {
+		log.Fatal(err)
+	}
+}
diff --git a/go/bin/go.mod b/go/bin/go.mod
@@ -10,3 +10,11 @@ require (
 	github.com/ling0322/libllm/go/llm v1.0.0
 	github.com/ling0322/libllm/go/skill v1.0.0
 )
+
+require (
+	github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
+	github.com/rivo/uniseg v0.4.7 // indirect
+	github.com/schollz/progressbar/v3 v3.14.5 // indirect
+	golang.org/x/sys v0.22.0 // indirect
+	golang.org/x/term v0.22.0 // indirect
+)
diff --git a/go/bin/go.sum b/go/bin/go.sum
@@ -0,0 +1,18 @@
+github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1e29fT/6vq2aBdFsgNPmy8qMdSay1npru+Sw=
+github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
+github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ=
+github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
+github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
+github.com/schollz/progressbar/v3 v3.14.5 h1:97RrSxbBASxQuZN9yemnyGrFZ/swnG6IrEe2R0BseX8=
+github.com/schollz/progressbar/v3 v3.14.5/go.mod h1:Nrzpuw3Nl0srLY0VlTvC4V6RL50pcEymjy6qyJAaLa0=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
+golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.22.0 h1:RI27ohtqKCnwULzJLqkv897zojh5/DwS/ENaMzUOaWI=
+golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/term v0.22.0 h1:BbsgPEJULsl2fV/AT3v15Mjva5yXKQDyKf+TbDz7QJk=
+golang.org/x/term v0.22.0/go.mod h1:F3qCibpT5AMpCRfhfT53vVJwhLtIVHhB9XDjfFvnMI4=
diff --git a/go/bin/main.go b/go/bin/main.go
@@ -30,6 +30,7 @@ func printCommandUsage() {
 	fmt.Fprintln(os.Stderr, "Commands:")
 	fmt.Fprintln(os.Stderr, "    chat           Chat with LLM")
 	fmt.Fprintln(os.Stderr, "    transcribe     Transcribe audio or video file to text")
+	fmt.Fprintln(os.Stderr, "    download       Download model to local")
 	fmt.Fprintln(os.Stderr, "")
 	fmt.Fprintln(os.Stderr, "Run 'llm COMMAND -h' for more information on a command.")
 }
@@ -46,6 +47,8 @@ func main() {
 		chatMain(os.Args[2:])
 	case "transcribe":
 		transcribeMain(os.Args[2:])
+	case "download":
+		downloadMain(os.Args[2:])
 	default:
 		fmt.Fprintf(os.Stderr, "Invalid command \"%s\"\n\n", command)
 		printCommandUsage()

diff --git a/go/bin/transcribe.go b/go/bin/transcribe.go
@@ -25,7 +25,6 @@ import (
 	"log"
 	"os"
 
-	"github.com/ling0322/libllm/go/llm"
 	"github.com/ling0322/libllm/go/skill"
 )
 
@@ -57,7 +56,7 @@ func transcribeMain(args []string) {
 		os.Exit(1)
 	}
 
-	model, err := llm.NewModel(modelFile, device)
+	model, err := createModelAutoDownload(modelFile, device)
 	if err != nil {
 		log.Fatal(err)
 	}