Skip to content

Commit f6f8bab

Browse files
authored
feat: Add utf8 support to Pattern Lexer to support utf8 chars (#13085)
1 parent 21dd4af commit f6f8bab

File tree

7 files changed

+252
-174
lines changed

7 files changed

+252
-174
lines changed

pkg/logql/log/pattern/lexer.go

+4-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
package pattern
22

3+
import "unicode/utf8"
4+
35
type lexer struct {
46
data []byte
57
p, pe, cs int
@@ -57,6 +59,7 @@ func (lex *lexer) identifier(out *exprSymType) (int, error) {
5759

5860
// nolint
5961
func (lex *lexer) literal(out *exprSymType) (int, error) {
60-
out.literal = rune(lex.data[lex.ts])
62+
decoded, _ := utf8.DecodeRune(lex.data[lex.ts:lex.te])
63+
out.literal = decoded
6164
return LITERAL, nil
6265
}

pkg/logql/log/pattern/lexer.rl

+15-1
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,25 @@ package pattern
1313
}
1414
}%%
1515

16+
%%{
17+
utf8 = (
18+
0x00..0x7F |
19+
0xC2..0xDF 0x80..0xBF |
20+
0xE0 0xA0..0xBF 0x80..0xBF |
21+
0xE1..0xEC 0x80..0xBF 0x80..0xBF |
22+
0xED 0x80..0x9F 0x80..0xBF |
23+
0xEE..0xEF 0x80..0xBF 0x80..0xBF |
24+
0xF0 0x90..0xBF 0x80..0xBF 0x80..0xBF |
25+
0xF1..0xF3 0x80..0xBF 0x80..0xBF 0x80..0xBF |
26+
0xF4 0x80..0x8F 0x80..0xBF 0x80..0xBF
27+
);
28+
}%%
29+
1630
const LEXER_ERROR = 0
1731

1832
%%{
1933
identifier = '<' (alpha| '_') (alnum | '_' )* '>';
20-
literal = any;
34+
literal = utf8;
2135
}%%
2236

2337
func (lex *lexer) Lex(out *exprSymType) int {

0 commit comments

Comments
 (0)