Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support chat with LLM in parallel #947

Merged
merged 10 commits into from
Mar 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 0 additions & 32 deletions base/parallel/future.go

This file was deleted.

30 changes: 0 additions & 30 deletions base/parallel/future_test.go

This file was deleted.

File renamed without changes.
File renamed without changes.
47 changes: 47 additions & 0 deletions common/parallel/pool.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package parallel

import "sync"

type Pool interface {
Run(runner func())
Wait()
}

type SequentialPool struct{}

func NewSequentialPool() *SequentialPool {
return &SequentialPool{}
}

func (p *SequentialPool) Run(runner func()) {
runner()
}

func (p *SequentialPool) Wait() {}

type ConcurrentPool struct {
wg sync.WaitGroup
pool chan struct{}
}

func NewConcurrentPool(size int) *ConcurrentPool {
return &ConcurrentPool{
pool: make(chan struct{}, size),
}
}

func (p *ConcurrentPool) Run(runner func()) {
p.wg.Add(1)
go func() {
p.pool <- struct{}{}
defer func() {
<-p.pool
p.wg.Done()
}()
runner()
}()
}

func (p *ConcurrentPool) Wait() {
p.wg.Wait()
}
32 changes: 32 additions & 0 deletions common/parallel/pool_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package parallel

import (
"sync/atomic"
"testing"

"github.com/stretchr/testify/assert"
)

func TestSequentialPool(t *testing.T) {
pool := NewSequentialPool()
count := 0
for i := 0; i < 100; i++ {
pool.Run(func() {
count++
})
}
pool.Wait()
assert.Equal(t, 100, count)
}

func TestConcurrentPool(t *testing.T) {
pool := NewConcurrentPool(100)
count := atomic.Int64{}
for i := 0; i < 100; i++ {
pool.Run(func() {
count.Add(1)
})
}
pool.Wait()
assert.Equal(t, int64(100), count.Load())
}
46 changes: 46 additions & 0 deletions common/parallel/ratelimit.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
package parallel

import (
"time"

"github.com/juju/ratelimit"
)

var (
ChatCompletionBackoff = time.Duration(0)
ChatCompletionRequestsLimiter RateLimiter = &Unlimited{}
ChatCompletionTokensLimiter RateLimiter = &Unlimited{}
EmbeddingBackoff = time.Duration(0)
EmbeddingRequestsLimiter RateLimiter = &Unlimited{}
EmbeddingTokensLimiter RateLimiter = &Unlimited{}
)

func InitChatCompletionLimiters(rpm, tpm int) {
if rpm > 0 {
ChatCompletionBackoff = time.Minute / time.Duration(rpm)
ChatCompletionRequestsLimiter = ratelimit.NewBucketWithQuantum(time.Second, int64(rpm/60), int64(rpm/60))
}
if tpm > 0 {
ChatCompletionTokensLimiter = ratelimit.NewBucketWithQuantum(time.Second, int64(tpm/60), int64(tpm/60))
}
}

func InitEmbeddingLimiters(rpm, tpm int) {
if rpm > 0 {
EmbeddingBackoff = time.Minute / time.Duration(rpm)
EmbeddingRequestsLimiter = ratelimit.NewBucketWithQuantum(time.Second, int64(rpm/60), int64(rpm/60))
}
if tpm > 0 {
EmbeddingTokensLimiter = ratelimit.NewBucketWithQuantum(time.Second, int64(tpm/60), int64(tpm/60))
}
}

type RateLimiter interface {
Take(count int64) time.Duration
}

type Unlimited struct{}

func (n *Unlimited) Take(count int64) time.Duration {
return 0
}
29 changes: 29 additions & 0 deletions common/parallel/ratelimit_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package parallel

import (
"testing"
"time"

"github.com/stretchr/testify/assert"
)

func TestUnlimited(t *testing.T) {
rateLimiter := &Unlimited{}
assert.Zero(t, rateLimiter.Take(1))
}

func TestInitEmbeddingLimiters(t *testing.T) {
InitEmbeddingLimiters(120, 180)
assert.Equal(t, time.Duration(0), EmbeddingRequestsLimiter.Take(1))
assert.InDelta(t, time.Second, EmbeddingRequestsLimiter.Take(2), float64(time.Millisecond))
assert.Equal(t, time.Duration(0), EmbeddingTokensLimiter.Take(2))
assert.InDelta(t, 2*time.Second, EmbeddingTokensLimiter.Take(5), float64(time.Millisecond))
}

func TestInitChatCompletionLimiters(t *testing.T) {
InitChatCompletionLimiters(120, 180)
assert.Equal(t, time.Duration(0), ChatCompletionRequestsLimiter.Take(1))
assert.InDelta(t, time.Second, ChatCompletionRequestsLimiter.Take(2), float64(time.Millisecond))
assert.Equal(t, time.Duration(0), ChatCompletionTokensLimiter.Take(2))
assert.InDelta(t, 2*time.Second, ChatCompletionTokensLimiter.Take(5), float64(time.Millisecond))
}
4 changes: 4 additions & 0 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -220,8 +220,12 @@ type OpenAIConfig struct {
BaseURL string `mapstructure:"base_url"`
AuthToken string `mapstructure:"auth_token"`
ChatCompletionModel string `mapstructure:"chat_completion_model"`
ChatCompletionRPM int `mapstructure:"chat_completion_rpm"`
ChatCompletionTPM int `mapstructure:"chat_completion_tpm"`
EmbeddingModel string `mapstructure:"embedding_model"`
EmbeddingDimensions int `mapstructure:"embedding_dimensions"`
EmbeddingRPM int `mapstructure:"embedding_rpm"`
EmbeddingTPM int `mapstructure:"embedding_tpm"`
LogFile string `mapstructure:"log_file"`
}

Expand Down
15 changes: 15 additions & 0 deletions config/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -341,8 +341,23 @@ auth_token = "ollama"
# Name of chat completion model.
chat_completion_model = "qwen2.5"

# Maximum requests per minute for chat completion.
chat_completion_rpm = 15000

# Maximum tokens per minute for chat completion.
chat_completion_tpm = 1200000

# Name of embedding model.
embedding_model = "mxbai-embed-large"

# Dimensions of embedding vectors.
embedding_dimensions = 1024

# Maximum requests per minute for embedding.
embedding_rpm = 1800

# Maximum tokens per minute for embedding.
embedding_tpm = 1200000

# Log file for OpenAI API.
log_file = "openai.log"
4 changes: 4 additions & 0 deletions config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,12 @@ func TestUnmarshal(t *testing.T) {
assert.Equal(t, "http://localhost:11434/v1", config.OpenAI.BaseURL)
assert.Equal(t, "ollama", config.OpenAI.AuthToken)
assert.Equal(t, "qwen2.5", config.OpenAI.ChatCompletionModel)
assert.Equal(t, 15000, config.OpenAI.ChatCompletionRPM)
assert.Equal(t, 1200000, config.OpenAI.ChatCompletionTPM)
assert.Equal(t, "mxbai-embed-large", config.OpenAI.EmbeddingModel)
assert.Equal(t, 1024, config.OpenAI.EmbeddingDimensions)
assert.Equal(t, 1800, config.OpenAI.EmbeddingRPM)
assert.Equal(t, 1200000, config.OpenAI.EmbeddingTPM)
})
}
}
Expand Down
4 changes: 4 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ require (
github.com/araddon/dateparse v0.0.0-20210429162001-6b43995a97de
github.com/benhoyt/goawk v1.20.0
github.com/bits-and-blooms/bitset v1.2.1
github.com/cenkalti/backoff/v5 v5.0.2
github.com/chewxy/math32 v1.11.1
github.com/coreos/go-oidc/v3 v3.11.0
github.com/deckarep/golang-set/v2 v2.3.1
Expand All @@ -29,6 +30,7 @@ require (
github.com/jellydator/ttlcache/v3 v3.3.0
github.com/json-iterator/go v1.1.12
github.com/juju/errors v1.0.0
github.com/juju/ratelimit v1.0.2
github.com/klauspost/cpuid/v2 v2.2.3
github.com/lafikl/consistent v0.0.0-20220512074542-bdd3606bfc3e
github.com/lib/pq v1.10.6
Expand All @@ -52,6 +54,7 @@ require (
github.com/steinfletcher/apitest v1.5.17
github.com/stretchr/testify v1.10.0
github.com/thoas/go-funk v0.9.2
github.com/tiktoken-go/tokenizer v0.5.1
github.com/yuin/goldmark v1.7.8
go.mongodb.org/mongo-driver v1.16.1
go.opentelemetry.io/contrib/instrumentation/github.com/emicklei/go-restful/otelrestful v0.36.4
Expand Down Expand Up @@ -96,6 +99,7 @@ require (
github.com/chewxy/hm v1.0.0 // indirect
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
github.com/dlclark/regexp2 v1.11.5 // indirect
github.com/dustin/go-humanize v1.0.1 // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/fsnotify/fsnotify v1.7.0 // indirect
Expand Down
8 changes: 8 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@ github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA=
github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0=
github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8=
github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
github.com/cenkalti/backoff/v5 v5.0.2 h1:rIfFVxEf1QsI7E1ZHfp/B4DF/6QBAUhmgkxc0H7Zss8=
github.com/cenkalti/backoff/v5 v5.0.2/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw=
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
Expand Down Expand Up @@ -126,6 +128,8 @@ github.com/deckarep/golang-set/v2 v2.3.1 h1:vjmkvJt/IV27WXPyYQpAh4bRyWJc5Y435D17
github.com/deckarep/golang-set/v2 v2.3.1/go.mod h1:VAky9rY/yGXJOLEDv3OMci+7wtDpOF4IN+y82NBOac4=
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78=
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc=
github.com/dlclark/regexp2 v1.11.5 h1:Q/sSnsKerHeCkc/jSTNq1oCm7KiVgUMZRDUoRu0JQZQ=
github.com/dlclark/regexp2 v1.11.5/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
Expand Down Expand Up @@ -405,6 +409,8 @@ github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1
github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk=
github.com/juju/errors v1.0.0 h1:yiq7kjCLll1BiaRuNY53MGI0+EQ3rF6GB+wvboZDefM=
github.com/juju/errors v1.0.0/go.mod h1:B5x9thDqx0wIMH3+aLIMP9HjItInYWObRovoCFM5Qe8=
github.com/juju/ratelimit v1.0.2 h1:sRxmtRiajbvrcLQT7S+JbqU0ntsb9W2yhSdNN8tWfaI=
github.com/juju/ratelimit v1.0.2/go.mod h1:qapgC/Gy+xNh9UxzV13HGGl/6UXNN+ct+vwSgWNm/qk=
github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w=
github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM=
github.com/jung-kurt/gofpdf v1.0.0/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes=
Expand Down Expand Up @@ -637,6 +643,8 @@ github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSW
github.com/takama/daemon v0.0.0-20180403113744-aa76b0035d12/go.mod h1:So5Nv647d/sgbZNAfiWtw6egowH8vNNrPXAwooWeElk=
github.com/thoas/go-funk v0.9.2 h1:oKlNYv0AY5nyf9g+/GhMgS/UO2ces0QRdPKwkhY3VCk=
github.com/thoas/go-funk v0.9.2/go.mod h1:+IWnUfUmFO1+WVYQWQtIJHeRRdaIyyYglZN7xzUPe4Q=
github.com/tiktoken-go/tokenizer v0.5.1 h1:EOpjlSAVLPX+6ioMUufTI9xmzHU4SI4ARK0DgkBdz+g=
github.com/tiktoken-go/tokenizer v0.5.1/go.mod h1:6UCYI/DtOallbmL7sSy30p6YQv60qNyU/4aVigPOx6w=
github.com/xdg-go/pbkdf2 v1.0.0 h1:Su7DPu48wXMwC3bs7MCNG+z4FhcyEuz5dlvchbq0B0c=
github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI=
github.com/xdg-go/scram v1.1.2 h1:FHX5I5B4i4hKRVRBCFRxq1iQRej7WO3hhBuJf+UUySY=
Expand Down
Loading
Loading