Skip to content

Commit

Permalink
Cache optimized regexp matchers (#465)
Browse files Browse the repository at this point in the history
* Cache optimized regexp matchers

Signed-off-by: Marco Pracucci <[email protected]>

* Added BenchmarkNewFastRegexMatcher_CacheMisses

Signed-off-by: Marco Pracucci <[email protected]>

* Improved benchmark

Signed-off-by: Marco Pracucci <[email protected]>

* Improved benchmark

Signed-off-by: Marco Pracucci <[email protected]>

* Use LRU cache v2

Signed-off-by: Marco Pracucci <[email protected]>

* Run gofumpt

Signed-off-by: Marco Pracucci <[email protected]>

---------

Signed-off-by: Marco Pracucci <[email protected]>
  • Loading branch information
pracucci authored Mar 31, 2023
1 parent 174b26a commit 05a3a79
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 3 deletions.
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ require (
github.com/grafana/regexp v0.0.0-20221122212121-6b5c0a4cb7fd
github.com/grpc-ecosystem/grpc-gateway v1.16.0
github.com/hashicorp/consul/api v1.20.0
github.com/hashicorp/golang-lru/v2 v2.0.2
github.com/hashicorp/nomad/api v0.0.0-20230308192510-48e7d70fcd4b
github.com/hetznercloud/hcloud-go v1.41.0
github.com/ionos-cloud/sdk-go/v6 v6.1.4
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -444,6 +444,8 @@ github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ
github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/golang-lru v0.6.0 h1:uL2shRDx7RTrOrTCUZEGP/wJUFiUI8QT6E7z5o8jga4=
github.com/hashicorp/golang-lru v0.6.0/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4=
github.com/hashicorp/golang-lru/v2 v2.0.2 h1:Dwmkdr5Nc/oBiXgJS3CDHNhJtIHkuZ3DZF5twqnfBdU=
github.com/hashicorp/golang-lru/v2 v2.0.2/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO+LraFDTW64=
github.com/hashicorp/mdns v1.0.0/go.mod h1:tL+uN++7HEJ6SQLQ2/p+z2pH24WQKWjBPkE0mNTz8vQ=
github.com/hashicorp/mdns v1.0.4/go.mod h1:mtBihi+LeNXGtG8L9dX59gAEa12BDtBQSp4v/YAJqrc=
Expand Down
27 changes: 27 additions & 0 deletions model/labels/regexp.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (

"github.com/grafana/regexp"
"github.com/grafana/regexp/syntax"
lru "github.com/hashicorp/golang-lru/v2"
)

const (
Expand All @@ -29,6 +30,14 @@ const (
optimizeEqualStringMatchersThreshold = 16
)

var fastRegexMatcherCache *lru.Cache[string, *FastRegexMatcher]

func init() {
// Ignore error because it can only return error if size is invalid,
// but we're using an hardcoded size here.
fastRegexMatcherCache, _ = lru.New[string, *FastRegexMatcher](10000)
}

type FastRegexMatcher struct {
re *regexp.Regexp

Expand All @@ -43,6 +52,24 @@ type FastRegexMatcher struct {
}

func NewFastRegexMatcher(v string) (*FastRegexMatcher, error) {
// Check the cache.
if matcher, ok := fastRegexMatcherCache.Get(v); ok {
return matcher, nil
}

// Create a new matcher.
matcher, err := newFastRegexMatcherWithoutCache(v)
if err != nil {
return nil, err
}

// Cache it.
fastRegexMatcherCache.Add(v, matcher)

return matcher, nil
}

func newFastRegexMatcherWithoutCache(v string) (*FastRegexMatcher, error) {
parsed, err := syntax.Parse(v, syntax.Perl)
if err != nil {
return nil, err
Expand Down
43 changes: 40 additions & 3 deletions model/labels/regexp_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"fmt"
"math/rand"
"os"
"strconv"
"strings"
"testing"
"time"
Expand Down Expand Up @@ -89,10 +90,46 @@ func TestNewFastRegexMatcher(t *testing.T) {
}

func BenchmarkNewFastRegexMatcher(b *testing.B) {
for _, r := range regexes {
b.Run(getTestNameFromRegexp(r), func(b *testing.B) {
runBenchmark := func(newFunc func(v string) (*FastRegexMatcher, error)) func(b *testing.B) {
return func(b *testing.B) {
for _, r := range regexes {
b.Run(getTestNameFromRegexp(r), func(b *testing.B) {
for n := 0; n < b.N; n++ {
_, err := newFunc(r)
if err != nil {
b.Fatal(err)
}
}
})
}
}
}

b.Run("with cache", runBenchmark(NewFastRegexMatcher))
b.Run("without cache", runBenchmark(newFastRegexMatcherWithoutCache))
}

func BenchmarkNewFastRegexMatcher_CacheMisses(b *testing.B) {
// Init the random seed with a constant, so that it doesn't change between runs.
randGenerator := rand.New(rand.NewSource(1))

tests := map[string]string{
"simple regexp": randString(randGenerator, 10),
"complex regexp": strings.Join(randStrings(randGenerator, 100, 10), "|"),
}

for testName, regexpPrefix := range tests {
b.Run(testName, func(b *testing.B) {
// Ensure the cache is empty.
fastRegexMatcherCache.Purge()

b.ResetTimer()

for n := 0; n < b.N; n++ {
_, err := NewFastRegexMatcher(r)
// Unique regexp to emulate 100% cache misses.
regexp := regexpPrefix + strconv.Itoa(n)

_, err := NewFastRegexMatcher(regexp)
if err != nil {
b.Fatal(err)
}
Expand Down

0 comments on commit 05a3a79

Please sign in to comment.