Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
ling0322 committed Jul 30, 2024
1 parent 7e88156 commit 7a24b93
Show file tree
Hide file tree
Showing 7 changed files with 244 additions and 20 deletions.
42 changes: 26 additions & 16 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,14 @@ Welcome to libLLM, an open-source project designed for efficient inference of la
| Index-1.9B-Character (Role-playing) | 🤗[Huggingface](https://huggingface.co/ling0322/bilibili-index-1.9b-libllm/blob/main/bilibili-index-1.9b-character-q4.llmpkg) |
| Index-1.9B-Chat | 🤗[Huggingface](https://huggingface.co/ling0322/bilibili-index-1.9b-libllm/blob/main/bilibili-index-1.9b-chat-q4.llmpkg) |

## Quickstart

To run and chat with Bilibili-Index-1.9B-Character:

```bash
$ llm chat -model index-character
```

## Key features:

- Optimized for everyday devices: libLLM has been optimized to run smoothly on common personal computers, ensuring the powerful capabilities of large language models are accessible to a wider range of users.
Expand Down Expand Up @@ -65,22 +73,24 @@ $ make -j
## Run libllm command line

```bash
$ ./src/llm/llm -config ../model/chatglm3-6b-libllm-q4/chatglm3.config
INFO 2023-12-19T08:56:47Z lymath.cc:42] lymath: Use Avx512 backend.
INFO 2023-12-19T08:56:48Z cuda_operators.cc:46] cuda numDevices = 1
INFO 2023-12-19T08:56:48Z cuda_operators.cc:47] cuda:0 maxThreadsPerMultiProcessor = 2048
INFO 2023-12-19T08:56:48Z cuda_operators.cc:49] cuda:0 multiProcessorCount = 20
INFO 2023-12-19T08:56:48Z llm.cc:123] OMP max_threads = 20
INFO 2023-12-19T08:56:48Z bpe_model.cc:34] read tokenizer from ../model/chatglm3-6b-libllm-q4/chatglm3.tokenizer.bin
INFO 2023-12-19T08:56:48Z model_factory.cc:35] model_type = chatglm3
INFO 2023-12-19T08:56:48Z model_factory.cc:36] device = cuda
INFO 2023-12-19T08:56:48Z state_map.cc:58] read state map from ../model/chatglm3-6b-libllm-q4/chatglm3.q4.bin
INFO 2023-12-19T08:56:51Z state_map.cc:68] reading ... 100.0%
INFO 2023-12-19T08:56:51Z state_map.cc:69] 200 tensors read.
> 你好

你好👋!我是人工智能助手 ChatGLM3-6B,很高兴见到你,欢迎问我任何问题。
(29 token, time=0.92s, 31.75ms per token)
$ src/libllm/llm chat -model index-character
INFO 2024-07-30T12:02:28Z interface.cc:67] ISA support: AVX2=1 F16C=1 AVX512F=1
INFO 2024-07-30T12:02:28Z interface.cc:71] Use Avx512 backend.
INFO 2024-07-30T12:02:30Z matmul.cc:43] Use GEMM from cuBLAS.
INFO 2024-07-30T12:02:30Z cuda_operators.cc:51] cuda numDevices = 2
INFO 2024-07-30T12:02:30Z cuda_operators.cc:52] cuda:0 maxThreadsPerMultiProcessor = 2048
INFO 2024-07-30T12:02:30Z cuda_operators.cc:54] cuda:0 multiProcessorCount = 20
INFO 2024-07-30T12:02:30Z thread_pool.cc:73] ThreadPool started. numThreads=20
INFO 2024-07-30T12:02:30Z llm.cc:204] read model package: /home/xiaoych/.libllm/models/bilibili-index-1.9b-character-q4.llmpkg
INFO 2024-07-30T12:02:30Z model_for_generation.cc:43] model_type = index
INFO 2024-07-30T12:02:30Z model_for_generation.cc:44] device = cuda
INFO 2024-07-30T12:02:31Z state_map.cc:66] 220 tensors read.
Please input your question.
Type ':new' to start a new session (clean history).
Type ':sys <system_prompt>' to set the system prompt and start a new session .
> hi
您好!我是Index,请问有什么我可以帮助您的吗?
(12 tokens, time=0.76s, 63.47ms per token)
>
```

Expand Down
3 changes: 1 addition & 2 deletions go/bin/chat.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ import (
"strings"
"time"

"github.com/ling0322/libllm/go/llm"
"github.com/ling0322/libllm/go/skill"
)

Expand Down Expand Up @@ -58,7 +57,7 @@ func chatMain(args []string) {
}

modelName := getModelArg(fs)
model, err := llm.NewModel(modelName, getDeviceArg())
model, err := createModelAutoDownload(modelName, getDeviceArg())
if err != nil {
log.Fatal(err)
}
Expand Down
187 changes: 187 additions & 0 deletions go/bin/download.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
package main

import (
"errors"
"flag"
"fmt"
"io"
"log"
"log/slog"
"net/http"
"os"
"path"
"runtime"

"github.com/ling0322/libllm/go/llm"
"github.com/schollz/progressbar/v3"
)

var ErrInvalidModelName = errors.New("invalid model name")
var ModelCacheDir = getModelCacheDir()

var modelUrls = map[string]string{
"index-chat": "https://huggingface.co/ling0322/bilibili-index-1.9b-libllm/resolve/main/bilibili-index-1.9b-chat-q4.llmpkg",
"index-character": "https://huggingface.co/ling0322/bilibili-index-1.9b-libllm/resolve/main/bilibili-index-1.9b-character-q4.llmpkg",
}

var modelFilenames = map[string]string{
"index-chat": "bilibili-index-1.9b-chat-q4.llmpkg",
"index-character": "bilibili-index-1.9b-character-q4.llmpkg",
}

func getModelCacheDir() string {
var cacheDir string
if runtime.GOOS == "linux" || runtime.GOOS == "darwin" {
userDir, err := os.UserHomeDir()
if err != nil {
log.Fatal(err)
}
cacheDir = path.Join(userDir, ".libllm", "models")
} else if runtime.GOOS == "windows" {
binFile, err := os.Executable()
if err != nil {
log.Fatal(err)
}

binDir := path.Dir(binFile)
cacheDir = path.Join(binDir, "models")
}

return cacheDir
}

func downloadModel(name string) (modelPath string, err error) {
url, ok := modelUrls[name]
if !ok {
log.Fatal("invalid model name")
}

filename, ok := modelFilenames[name]
if !ok {
log.Fatal("invalid model name")
}
modelPath = path.Join(ModelCacheDir, filename)
slog.Info("download model", "url", url)

req, err := http.NewRequest("GET", url, nil)
if err != nil {
return
}

resp, err := http.DefaultClient.Do(req)
if err != nil {
return
}
defer resp.Body.Close()

modelDir := path.Dir(modelPath)
err = os.MkdirAll(modelDir, os.ModePerm)
if err != nil {
return "", fmt.Errorf("unable to create model cache directory: %w", err)
}

f, err := os.OpenFile(modelPath+".download", os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
return
}
defer f.Close()

bar := progressbar.DefaultBytes(
resp.ContentLength,
"Downloading",
)
_, err = io.Copy(io.MultiWriter(f, bar), resp.Body)
if err != nil {
return
}

err = os.Rename(modelPath+".download", modelPath)
if err != nil {
return
}

slog.Info("Save model", "path", modelPath)
return modelPath, nil
}

// check if model exists in the cache directory. If exists, retuen the model path, otherwise,
// return the error.
func checkModelInCache(name string) (modelPath string, err error) {
filename, ok := modelFilenames[name]
if !ok {
return "", ErrInvalidModelName
}
modelPath = path.Join(ModelCacheDir, filename)

_, err = os.Stat(modelPath)
if err != nil {
return "", err
}

return
}

func getOrDownloadModel(name string) (modelPath string, err error) {
modelPath, err = checkModelInCache(name)
if err == nil {
return
}

return downloadModel(name)
}

func createModelAutoDownload(nameOrPath string, device llm.Device) (llm.Model, error) {
var modelPath string
var err error

_, ok := modelFilenames[nameOrPath]
if ok {
modelPath, err = getOrDownloadModel(nameOrPath)
} else {
modelPath = nameOrPath
}

if err != nil {
return nil, err
}

_, err = os.Stat(modelPath)
if err != nil {
return nil, fmt.Errorf("model not exist: %s", modelPath)
}

return llm.NewModel(modelPath, device)
}

func printDownloadUsage(fs *flag.FlagSet) {
fmt.Fprintln(os.Stderr, "Usage: llm download [OPTIONS]")
fmt.Fprintln(os.Stderr, "")
fmt.Fprintln(os.Stderr, "Options:")
fs.PrintDefaults()
fmt.Fprintln(os.Stderr, "")
}

func downloadMain(args []string) {
fs := flag.NewFlagSet("", flag.ExitOnError)
fs.Usage = func() {
printDownloadUsage(fs)
}

addModelFlag(fs)
_ = fs.Parse(args)

if fs.NArg() != 0 {
fs.Usage()
os.Exit(1)
}

modelName := getModelArg(fs)
if modelPath, err := checkModelInCache(modelName); err == nil {
fmt.Printf("model \"%s\" already downloaded. Path is \"%s\"\n", modelName, modelPath)
}

_, err := downloadModel(modelName)
if err != nil {
log.Fatal(err)
}
}
8 changes: 8 additions & 0 deletions go/bin/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,11 @@ require (
github.com/ling0322/libllm/go/llm v1.0.0
github.com/ling0322/libllm/go/skill v1.0.0
)

require (
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
github.com/rivo/uniseg v0.4.7 // indirect
github.com/schollz/progressbar/v3 v3.14.5 // indirect
golang.org/x/sys v0.22.0 // indirect
golang.org/x/term v0.22.0 // indirect
)
18 changes: 18 additions & 0 deletions go/bin/go.sum
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1e29fT/6vq2aBdFsgNPmy8qMdSay1npru+Sw=
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ=
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
github.com/schollz/progressbar/v3 v3.14.5 h1:97RrSxbBASxQuZN9yemnyGrFZ/swnG6IrEe2R0BseX8=
github.com/schollz/progressbar/v3 v3.14.5/go.mod h1:Nrzpuw3Nl0srLY0VlTvC4V6RL50pcEymjy6qyJAaLa0=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.22.0 h1:RI27ohtqKCnwULzJLqkv897zojh5/DwS/ENaMzUOaWI=
golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/term v0.22.0 h1:BbsgPEJULsl2fV/AT3v15Mjva5yXKQDyKf+TbDz7QJk=
golang.org/x/term v0.22.0/go.mod h1:F3qCibpT5AMpCRfhfT53vVJwhLtIVHhB9XDjfFvnMI4=
3 changes: 3 additions & 0 deletions go/bin/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ func printCommandUsage() {
fmt.Fprintln(os.Stderr, "Commands:")
fmt.Fprintln(os.Stderr, " chat Chat with LLM")
fmt.Fprintln(os.Stderr, " transcribe Transcribe audio or video file to text")
fmt.Fprintln(os.Stderr, " download Download model to local")
fmt.Fprintln(os.Stderr, "")
fmt.Fprintln(os.Stderr, "Run 'llm COMMAND -h' for more information on a command.")
}
Expand All @@ -46,6 +47,8 @@ func main() {
chatMain(os.Args[2:])
case "transcribe":
transcribeMain(os.Args[2:])
case "download":
downloadMain(os.Args[2:])
default:
fmt.Fprintf(os.Stderr, "Invalid command \"%s\"\n\n", command)
printCommandUsage()
Expand Down
3 changes: 1 addition & 2 deletions go/bin/transcribe.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ import (
"log"
"os"

"github.com/ling0322/libllm/go/llm"
"github.com/ling0322/libllm/go/skill"
)

Expand Down Expand Up @@ -57,7 +56,7 @@ func transcribeMain(args []string) {
os.Exit(1)
}

model, err := llm.NewModel(modelFile, device)
model, err := createModelAutoDownload(modelFile, device)
if err != nil {
log.Fatal(err)
}
Expand Down

0 comments on commit 7a24b93

Please sign in to comment.