Skip to content

Commit ce02cc2

Browse files
authored
feat(structured-metadata-api): add structured metadata to /detected_fields API (#13604)
1 parent 1008315 commit ce02cc2

File tree

2 files changed

+160
-11
lines changed

2 files changed

+160
-11
lines changed

pkg/querier/querier.go

+52-11
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ package querier
33
import (
44
"context"
55
"flag"
6-
"fmt"
76
"net/http"
87
"sort"
98
"strconv"
@@ -25,6 +24,7 @@ import (
2524
"golang.org/x/sync/errgroup"
2625
"google.golang.org/grpc/health/grpc_health_v1"
2726

27+
"github.com/grafana/loki/pkg/push"
2828
"github.com/grafana/loki/v3/pkg/compactor/deletion"
2929
"github.com/grafana/loki/v3/pkg/indexgateway"
3030
"github.com/grafana/loki/v3/pkg/iter"
@@ -1116,7 +1116,7 @@ func (q *SingleTenantQuerier) DetectedFields(ctx context.Context, req *logproto.
11161116
return nil, err
11171117
}
11181118

1119-
detectedFields := parseDetectedFields(ctx, req.FieldLimit, streams)
1119+
detectedFields := parseDetectedFields(req.FieldLimit, streams)
11201120

11211121
fields := make([]*logproto.DetectedField, len(detectedFields))
11221122
fieldCount := 0
@@ -1209,16 +1209,39 @@ func determineType(value string) logproto.DetectedFieldType {
12091209
return logproto.DetectedFieldString
12101210
}
12111211

1212-
func parseDetectedFields(ctx context.Context, limit uint32, streams logqlmodel.Streams) map[string]*parsedFields {
1212+
func parseDetectedFields(limit uint32, streams logqlmodel.Streams) map[string]*parsedFields {
12131213
detectedFields := make(map[string]*parsedFields, limit)
12141214
fieldCount := uint32(0)
1215+
emtpyparser := ""
12151216

12161217
for _, stream := range streams {
1217-
level.Debug(spanlogger.FromContext(ctx)).Log(
1218-
"detected_fields", "true",
1219-
"msg", fmt.Sprintf("looking for detected fields in stream %d with %d lines", stream.Hash, len(stream.Entries)))
1220-
12211218
for _, entry := range stream.Entries {
1219+
structuredMetadata := getStructuredMetadata(entry)
1220+
for k, vals := range structuredMetadata {
1221+
df, ok := detectedFields[k]
1222+
if !ok && fieldCount < limit {
1223+
df = newParsedFields(&emtpyparser)
1224+
detectedFields[k] = df
1225+
fieldCount++
1226+
}
1227+
1228+
if df == nil {
1229+
continue
1230+
}
1231+
1232+
detectType := true
1233+
for _, v := range vals {
1234+
parsedFields := detectedFields[k]
1235+
if detectType {
1236+
// we don't want to determine the type for every line, so we assume the type in each stream will be the same, and re-detect the type for the next stream
1237+
parsedFields.DetermineType(v)
1238+
detectType = false
1239+
}
1240+
1241+
parsedFields.Insert(v)
1242+
}
1243+
}
1244+
12221245
detected, parser := parseLine(entry.Line)
12231246
for k, vals := range detected {
12241247
df, ok := detectedFields[k]
@@ -1247,17 +1270,35 @@ func parseDetectedFields(ctx context.Context, limit uint32, streams logqlmodel.S
12471270

12481271
parsedFields.Insert(v)
12491272
}
1250-
1251-
level.Debug(spanlogger.FromContext(ctx)).Log(
1252-
"detected_fields", "true",
1253-
"msg", fmt.Sprintf("detected field %s with %d values", k, len(vals)))
12541273
}
12551274
}
12561275
}
12571276

12581277
return detectedFields
12591278
}
12601279

1280+
func getStructuredMetadata(entry push.Entry) map[string][]string {
1281+
labels := map[string]map[string]struct{}{}
1282+
for _, lbl := range entry.StructuredMetadata {
1283+
if values, ok := labels[lbl.Name]; ok {
1284+
values[lbl.Value] = struct{}{}
1285+
} else {
1286+
labels[lbl.Name] = map[string]struct{}{lbl.Value: {}}
1287+
}
1288+
}
1289+
1290+
result := make(map[string][]string, len(labels))
1291+
for lbl, values := range labels {
1292+
vals := make([]string, 0, len(values))
1293+
for v := range values {
1294+
vals = append(vals, v)
1295+
}
1296+
result[lbl] = vals
1297+
}
1298+
1299+
return result
1300+
}
1301+
12611302
func parseLine(line string) (map[string][]string, *string) {
12621303
parser := "logfmt"
12631304
logFmtParser := logql_log.NewLogfmtParser(true, false)

pkg/querier/querier_test.go

+108
Original file line numberDiff line numberDiff line change
@@ -1759,6 +1759,52 @@ func TestQuerier_DetectedFields(t *testing.T) {
17591759
}
17601760
})
17611761

1762+
t.Run("returns detected fields with structured metadata from queried logs", func(t *testing.T) {
1763+
store := newStoreMock()
1764+
store.On("SelectLogs", mock.Anything, mock.Anything).
1765+
Return(mockLogfmtStreamIterator(1, 5), nil)
1766+
1767+
queryClient := newQueryClientMock()
1768+
queryClient.On("Recv").
1769+
Return(mockQueryResponse([]logproto.Stream{mockLogfmtStreamWithStructuredMetadata(1, 5)}), nil)
1770+
1771+
ingesterClient := newQuerierClientMock()
1772+
ingesterClient.On("Query", mock.Anything, mock.Anything, mock.Anything).
1773+
Return(queryClient, nil)
1774+
1775+
querier, err := newQuerier(
1776+
conf,
1777+
mockIngesterClientConfig(),
1778+
newIngesterClientMockFactory(ingesterClient),
1779+
mockReadRingWithOneActiveIngester(),
1780+
&mockDeleteGettter{},
1781+
store, limits)
1782+
require.NoError(t, err)
1783+
1784+
resp, err := querier.DetectedFields(ctx, &request)
1785+
require.NoError(t, err)
1786+
1787+
detectedFields := resp.Fields
1788+
// log lines come from querier_mock_test.go
1789+
// message="line %d" count=%d fake=true bytes=%dMB duration=%dms percent=%f even=%t
1790+
assert.Len(t, detectedFields, 9)
1791+
expectedCardinality := map[string]uint64{
1792+
"variable": 5,
1793+
"constant": 1,
1794+
"message": 5,
1795+
"count": 5,
1796+
"fake": 1,
1797+
"bytes": 5,
1798+
"duration": 5,
1799+
"percent": 5,
1800+
"even": 2,
1801+
}
1802+
for _, d := range detectedFields {
1803+
card := expectedCardinality[d.Label]
1804+
assert.Equal(t, card, d.Cardinality, "Expected cardinality mismatch for: %s", d.Label)
1805+
}
1806+
})
1807+
17621808
t.Run("correctly identifies different field types", func(t *testing.T) {
17631809
store := newStoreMock()
17641810
store.On("SelectLogs", mock.Anything, mock.Anything).
@@ -1814,6 +1860,68 @@ func TestQuerier_DetectedFields(t *testing.T) {
18141860
assert.Equal(t, logproto.DetectedFieldFloat, floatField.Type)
18151861
assert.Equal(t, logproto.DetectedFieldBoolean, evenField.Type)
18161862
})
1863+
1864+
t.Run("correctly identifies parser to use with logfmt and structured metadata", func(t *testing.T) {
1865+
store := newStoreMock()
1866+
store.On("SelectLogs", mock.Anything, mock.Anything).
1867+
Return(mockLogfmtStreamIterator(1, 2), nil)
1868+
1869+
queryClient := newQueryClientMock()
1870+
queryClient.On("Recv").
1871+
Return(mockQueryResponse([]logproto.Stream{mockLogfmtStreamWithStructuredMetadata(1, 2)}), nil)
1872+
1873+
ingesterClient := newQuerierClientMock()
1874+
ingesterClient.On("Query", mock.Anything, mock.Anything, mock.Anything).
1875+
Return(queryClient, nil)
1876+
1877+
querier, err := newQuerier(
1878+
conf,
1879+
mockIngesterClientConfig(),
1880+
newIngesterClientMockFactory(ingesterClient),
1881+
mockReadRingWithOneActiveIngester(),
1882+
&mockDeleteGettter{},
1883+
store, limits)
1884+
require.NoError(t, err)
1885+
1886+
resp, err := querier.DetectedFields(ctx, &request)
1887+
require.NoError(t, err)
1888+
1889+
detectedFields := resp.Fields
1890+
// log lines come from querier_mock_test.go
1891+
// message="line %d" count=%d fake=true bytes=%dMB duration=%dms percent=%f even=%t
1892+
assert.Len(t, detectedFields, 9)
1893+
1894+
var messageField, countField, bytesField, durationField, floatField, evenField, constantField, variableField *logproto.DetectedField
1895+
for _, field := range detectedFields {
1896+
switch field.Label {
1897+
case "message":
1898+
messageField = field
1899+
case "count":
1900+
countField = field
1901+
case "bytes":
1902+
bytesField = field
1903+
case "duration":
1904+
durationField = field
1905+
case "percent":
1906+
floatField = field
1907+
case "even":
1908+
evenField = field
1909+
case "constant":
1910+
constantField = field
1911+
case "variable":
1912+
variableField = field
1913+
}
1914+
}
1915+
1916+
assert.Equal(t, []string{"logfmt"}, messageField.Parsers)
1917+
assert.Equal(t, []string{"logfmt"}, countField.Parsers)
1918+
assert.Equal(t, []string{"logfmt"}, bytesField.Parsers)
1919+
assert.Equal(t, []string{"logfmt"}, durationField.Parsers)
1920+
assert.Equal(t, []string{"logfmt"}, floatField.Parsers)
1921+
assert.Equal(t, []string{"logfmt"}, evenField.Parsers)
1922+
assert.Equal(t, []string{""}, constantField.Parsers)
1923+
assert.Equal(t, []string{""}, variableField.Parsers)
1924+
})
18171925
}
18181926

18191927
func BenchmarkQuerierDetectedFields(b *testing.B) {

0 commit comments

Comments
 (0)