Skip to content

Commit a1efafd

Browse files
authored
perf: Limit tokens per pattern to 128 (#13376)
1 parent 11e1976 commit a1efafd

File tree

3 files changed

+31
-18
lines changed

3 files changed

+31
-18
lines changed

pkg/pattern/drain/drain_test.go

+4-5
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ func TestDrain_TrainExtractsPatterns(t *testing.T) {
4444
`ts=2024-04-16T15:10:<_>.<_> caller=filetargetmanager.go:181 level=info component=logs logs_config=default msg="received file watcher event" name=/var/log/pods/<_><_><_>/<_><_><_>.<_> op=CREATE`,
4545
`ts=2024-04-16T15:10:<_>.<_> caller=filetargetmanager.go:181 level=info component=logs logs_config=default msg="received file watcher event" name=/var/log/pods/<_><_><_>/<_><_><_>.<_>.<_> op=CREATE`,
4646
`ts=2024-04-16T15:10:<_>.<_> caller=filetargetmanager.go:181 level=info component=logs logs_config=default msg="received file watcher event" name=/var/log/pods/hosted-grafana_.<_>/<_>/0.log.<_>.<_> op=CREATE`,
47-
`ts=2024-04-16T15:10:<_>.<_> caller=filetargetmanager.go:<_> level=info component=logs logs_config=default msg="<_> target" key="/var/log/pods/*<_>/<_>/*.log:{app=\"grafana\", conprof=\"true\", container=\"<_>\", instanceId=\"<_>\", job=\"hosted-grafana/grafana\", name=\"grafana\", namespace=\"hosted-grafana\", org=\"<_>\", plan=\"free\", pod=\"<_>\", pod_template_hash=\"<_>\", resource_version=\"<_>\", slug=\"<_>\", stackId=\"<_>\"}"`,
47+
`ts=2024-04-16T15:10:<_>.<_> caller=filetargetmanager.go:<_> level=info component=logs logs_config=default msg="<_> target" key="/var/log/pods/*<_>/<_>/*.log:{app=\"grafana\", conprof=\"true\", container=\"<_>\", instanceId=\"<_>\", job=\"hosted-grafana/grafana\", name=\"grafana\", namespace=\"hosted-grafana\", org=\"<_>\", plan=\"free\", pod=<_>`,
4848
`ts=2024-04-16T15:10:<_>.<_> caller=log.go:168 component=logs logs_config=default level=info msg="Re-opening moved/deleted file /var/log/pods/<_>/<_>/<_>.log ..."`,
4949
`ts=2024-04-16T15:10:<_>.<_> caller=log.go:168 component=logs logs_config=default level=info msg="Re-opening moved/deleted file /var/log/pods/hosted-grafana_.<_>/<_>/0.log ..."`,
5050
`ts=2024-04-16T15:10:<_>.<_> caller=log.go:168 component=logs logs_config=default level=info msg="Seeked /var/log/pods/<_>/<_>/0.log - &{Offset:0 Whence:0}"`,
@@ -118,12 +118,12 @@ func TestDrain_TrainExtractsPatterns(t *testing.T) {
118118
`E0507 11:59:<_>.<_> <_> pod_workers.go:1300] "Error syncing pod, skipping" err="failed to \"StartContainer\" for \"<_>\" with CreateContainerConfigError: \"secret \\\"<_>\\\" not found\"" pod="<_>/<_>" podUID="<_>"`,
119119
`E0507 11:59:<_>.<_> <_> pod_workers.go:1300] "Error syncing pod, skipping" err="failed to \"StartContainer\" for \"<_>\" with ImagePullBackOff: \"Back-off pulling image \\\"us.gcr.io/hosted-grafana/<_>:<_>.<_>.<_>\\\"\"" pod="<_>/<_>" podUID="<_>"`,
120120
`E0507 11:59:<_>.<_> <_> pod_workers.go:1300] "Error syncing pod, skipping" err="failed to \"StartContainer\" for \"<_>\" with ImagePullBackOff: \"Back-off pulling image \\\"us.gcr.io/kubernetes-dev/<_>:<_>\\\"\"" pod="<_>/<_>" podUID="<_>"`,
121-
`E0507 11:59:<_>.<_> <_> pod_workers.go:1300] "Error syncing pod, skipping" err="failed to \"StartContainer\" for \"grafana\" with ErrImagePull: \"[rpc error: code = NotFound desc = failed to pull and unpack image \\\"us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>\\\": failed to resolve reference \\\"us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>\\\": us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>: not found, failed to pull and unpack image \\\"us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>\\\": failed to resolve reference \\\"us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>\\\": unexpected status from HEAD request to https://us.gcr.io/v2/hosted-grafana/hosted-grafana-pro/manifests/<_>.1.<_>: 403 Forbidden]\"" pod="hosted-grafana/<_>" podUID="<_>"`,
121+
`E0507 11:59:<_>.<_> <_> pod_workers.go:1300] "Error syncing pod, skipping" err="failed to \"StartContainer\" for \"grafana\" with ErrImagePull: \"[rpc error: code = NotFound desc = failed to pull and unpack image \\\"us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>\\\": failed to resolve reference \\\"us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>\\\": us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>: not <_>`,
122122
`E0507 11:59:<_>.<_> <_> pod_workers.go:1300] "Error syncing pod, skipping" err="failed to \"StartContainer\" for \"pdc\" with ErrImageNeverPull: \"Container image \\\"us.gcr.io/hosted-grafana/pdc:0.1.415\\\" is not present with pull policy of Never\"" pod="pdc/<_>" podUID="<_>"`,
123123
`E0507 11:59:<_>.<_> <_> prober.go:104] "Probe errored" err="rpc error: code = NotFound desc = failed to exec in container: failed to load task: no running task found: task <_> not found: not found" probeType="Readiness" pod="hosted-grafana/<_>" podUID="<_>" containerName="grafana"`,
124124
`E0507 11:59:<_>.<_> <_> prober.go:239] "Unable to write all bytes from execInContainer" err="short write" expectedBytes=<_> actualBytes=10240`,
125125
`E0507 11:59:<_>.<_> <_> remote_image.go:180] "PullImage from image service failed" err="rpc error: code = NotFound desc = failed to pull and unpack image \"us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>\": failed to resolve reference \"us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>\": us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>: not found" image="us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>"`,
126-
`E0507 11:59:<_>.<_> <_> remote_image.go:180] "PullImage from image service failed" err="rpc error: code = Unknown desc = failed to pull and unpack image \"us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>\": failed to resolve reference \"us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>\": unexpected status from HEAD request to https://us.gcr.io/v2/hosted-grafana/hosted-grafana-pro/manifests/<_>.1.<_>: 403 Forbidden" image="us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>"`,
126+
`E0507 11:59:<_>.<_> <_> remote_image.go:180] "PullImage from image service failed" err="rpc error: code = Unknown desc = failed to pull and unpack image \"us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>\": failed to resolve reference \"us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>\": unexpected status from HEAD request to https://us.gcr.io/v2/hosted-grafana/hosted-grafana-pro/manifests/<_>.1.<_>: 403 Forbidden" image="us.gcr.io/hosted-grafana/<_>`,
127127
`E0507 11:59:<_>.<_> <_> remote_runtime.go:432] "ContainerStatus from runtime service failed" err="rpc error: code = NotFound desc = an error occurred when try to find container \"<_>\": not found" containerID="<_>"`,
128128
`E0507 11:59:<_>.<_> <_> remote_runtime.go:496] "ExecSync cmd from runtime service failed" err="rpc error: code = NotFound desc = failed to exec in container: failed to load task: no running task found: task <_> not found: not found" containerID="<_>" cmd=["/bin/hgrun","check"]`,
129129
`I0507 11:59:31.815514 2791 azure_credentials.go:220] image(us.gcr.io/hosted-grafana/hosted-grafana-pro) is not from ACR, return empty authentication`,
@@ -311,10 +311,9 @@ func TestDrain_TrainExtractsPatterns(t *testing.T) {
311311
`2024-05-08 15:23:56.<_> [DEBUG][76] felix/route_table.go 686: Reconcile against kernel programming ifaceName="<_>" ifaceRegex="^azv.*" ipVersion=0x4 tableIndex=0`,
312312
`2024-05-08 15:23:56.<_> [DEBUG][76] felix/route_table.go 880: Processing route: 254 <_> 10.68.10.<_>/32 ifaceName="<_>" ifaceRegex="^azv.*" ipVersion=0x4 tableIndex=0`,
313313
`2024-05-08 15:23:56.<_> [DEBUG][76] felix/route_table.go 915: Route is correct dest=10.68.10.<_>/32 ifaceName="<_>" ifaceRegex="^azv.*" ipVersion=0x4 tableIndex=0`,
314-
`2024-05-08 15:23:57.942 [WARNING][56] felix/table.go 654: Detected out-of-sync inserts, marking for resync actualRuleIDs=[]string{"", "", "", "", "", "", "", "", "", "", "", "", "tVnHkvAo15HuiPy0", "", ""} chainName="OUTPUT" expectedRuleIDs=[]string{"tVnHkvAo15HuiPy0", "", "", "", "", "", "", "", "", "", "", "", "", "", ""} ipVersion=0x4 table="raw"`,
315314
`2024-05-08 15:23:57.942 [WARNING][56] felix/table.go 654: Detected out-of-sync inserts, marking for resync actualRuleIDs=[]string{"", "", "", "", "6gwbT8clXdHdC1b1"} chainName="PREROUTING" expectedRuleIDs=[]string{"6gwbT8clXdHdC1b1", "", "", "", ""} ipVersion=0x4 table="raw"`,
316-
`2024-05-08 15:23:57.969 [WARNING][56] felix/table.go 654: Detected out-of-sync inserts, marking for resync actualRuleIDs=[]string{"", "", "", "", "Cz_u1IQiXIMmKD4c", "", "", "", "", "", "", "", "", "", "", "", ""} chainName="INPUT" expectedRuleIDs=[]string{"Cz_u1IQiXIMmKD4c", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""} ipVersion=0x4 table="filter"`,
317315
`2024-05-08 15:23:57.969 [WARNING][56] felix/table.go 654: Detected out-of-sync inserts, marking for resync actualRuleIDs=[]string{"", "", "", "", "tVnHkvAo15HuiPy0", "", "", "", "", ""} chainName="OUTPUT" expectedRuleIDs=[]string{"tVnHkvAo15HuiPy0", "", "", "", "", "", "", "", "", ""} ipVersion=0x4 table="filter"`,
316+
`2024-05-08 15:23:57.<_> [WARNING][56] felix/table.go 654: Detected out-of-sync inserts, marking for resync actualRuleIDs=[]string{"", "", "", "", "<_><_><_> "<_><_> "<_><_> "<_><_> "<_><_> "<_><_> "<_><_> "<_><_> <_>", "", ""<_> <_><_><_> <_>"<_> <_><_><_><_><_> <_><_><_><_><_><_><_><_>", "", "", "", "", "", "", "", "", ""<_>`,
318317
`2024-05-08 15:23:58.169 [INFO][2333] felix/summary.go 100: Summarising 35 dataplane reconciliation loops over 1m2s: avg=12ms longest=46ms (resync-filter-v4,resync-filter-v6,resync-mangle-v4,resync-mangle-v6,update-filter-v4,update-filter-v6)`,
319318
`2024-05-08 15:23:58.566 [DEBUG][3576126] felix/int_dataplane.go 957: Examining link for MTU calculation mtu=1500 name="eth0"`,
320319
`2024-05-08 15:23:58.680 [DEBUG][216945] felix/int_dataplane.go 1785: Reschedule kick received`,

pkg/pattern/drain/line_tokenizer.go

+10-13
Original file line numberDiff line numberDiff line change
@@ -38,39 +38,36 @@ func newPunctuationTokenizer() *punctuationTokenizer {
3838
}
3939

4040
func (p *punctuationTokenizer) Tokenize(line string) ([]string, interface{}) {
41-
tokens := make([]string, len(line)) // Maximum size is every character is punctuation
42-
spacesAfter := make([]int, strings.Count(line, " ")) // Could be a bitmap, but it's not worth it for a few bytes.
41+
tokens := make([]string, 0, 128)
42+
spacesAfter := make([]int, 0, 64)
4343

4444
start := 0
45-
nextTokenIdx := 0
46-
nextSpaceIdx := 0
4745
for i, char := range line {
46+
if len(tokens) >= cap(tokens)-1 {
47+
break
48+
}
4849
if unicode.IsLetter(char) || unicode.IsNumber(char) || char < 128 && p.excludeDelimiters[char] != 0 {
4950
continue
5051
}
5152
included := char < 128 && p.includeDelimiters[char] != 0
5253
if char == ' ' || included || unicode.IsPunct(char) {
5354
if i > start {
54-
tokens[nextTokenIdx] = line[start:i]
55-
nextTokenIdx++
55+
tokens = append(tokens, line[start:i])
5656
}
5757
if char == ' ' {
58-
spacesAfter[nextSpaceIdx] = nextTokenIdx - 1
59-
nextSpaceIdx++
58+
spacesAfter = append(spacesAfter, len(tokens)-1)
6059
} else {
61-
tokens[nextTokenIdx] = line[i : i+1]
62-
nextTokenIdx++
60+
tokens = append(tokens, line[i:i+1])
6361
}
6462
start = i + 1
6563
}
6664
}
6765

6866
if start < len(line) {
69-
tokens[nextTokenIdx] = line[start:]
70-
nextTokenIdx++
67+
tokens = append(tokens, line[start:])
7168
}
7269

73-
return tokens[:nextTokenIdx], spacesAfter[:nextSpaceIdx]
70+
return tokens, spacesAfter
7471
}
7572

7673
func (p *punctuationTokenizer) Join(tokens []string, state interface{}) string {

pkg/pattern/drain/line_tokenizer_test.go

+17
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package drain
22

33
import (
4+
"strings"
45
"testing"
56

67
"github.com/stretchr/testify/require"
@@ -88,6 +89,22 @@ var testCases = []TestCase{
8889
typeSplitting: {"ts=", "2024-05-30T12:50:36.648377186Z", "caller=", "scheduler_processor.go:143", "level=", "warn", "msg=", "\"error", "contacting", "scheduler\"", "err=", "\"rpc", "error:", "code", "=", ``, "Unavailable", "desc", "=", ``, "connection", "error:", "desc", "=", ``, `\"error`, "reading", "server", "preface:", `EOF\""`, "addr=", "10.0.151.101:9095"},
8990
},
9091
},
92+
{
93+
name: "Exactly 128 tokens are not combined",
94+
line: strings.Repeat(`A `, 126) + "127 128",
95+
want: map[string][]string{
96+
typePunctuation: append(strings.Split(strings.Repeat(`A `, 126), " ")[:126], "127", "128"),
97+
typeSplitting: append(strings.Split(strings.Repeat(`A `, 126), " ")[:126], "127", "128"),
98+
},
99+
},
100+
{
101+
name: "More than 128 tokens combined suffix into one token",
102+
line: strings.Repeat(`A `, 126) + "127 128 129",
103+
want: map[string][]string{
104+
typePunctuation: append(strings.Split(strings.Repeat(`A `, 126), " ")[:126], "127", "128 129"),
105+
typeSplitting: append(strings.Split(strings.Repeat(`A `, 126), " ")[:126], "127", "128", "129"),
106+
},
107+
},
91108
{
92109
name: "Only punctation",
93110
line: `!@£$%^&*()`,

0 commit comments

Comments
 (0)