diff --git a/CHANGELOG.md b/CHANGELOG.md index 901f48e3586..a15d93c1621 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ ## main / unreleased * [CHANGE] Disable tempo-query by default in Jsonnet libs. [#2462](https://github.com/grafana/tempo/pull/2462) (@electron0zero) +* [ENHANCEMENT] Fill parent ID column and nested set columns [#2487](https://github.com/grafana/tempo/pull/2487) (@stoewer) * [ENHANCEMENT] log client ip to help identify which client is no org id [#2436](https://github.com/grafana/tempo/pull/2436) * [ENHANCEMENT] Add `spss` parameter to `/api/search/tags`[#2308] to configure the spans per span set in response * [BUGFIX] Fix Search SLO by routing tags to a new handler. [#2468](https://github.com/grafana/tempo/issues/2468) (@electron0zero) diff --git a/pkg/util/traceid.go b/pkg/util/traceid.go index c3f99c95392..4ef48376513 100644 --- a/pkg/util/traceid.go +++ b/pkg/util/traceid.go @@ -6,6 +6,7 @@ import ( "errors" "fmt" "strings" + "unsafe" ) func HexStringToTraceID(id string) ([]byte, error) { @@ -59,6 +60,41 @@ func SpanIDToHexString(byteID []byte) string { return fmt.Sprintf("%016s", id) } +// spanKindFNVHashes contains pre-calculated FNV hashes for all span kind values (and two spares) +// defined in the OTEL spec. +var spanKindFNVHashes = [...]uint64{ + 0xa8c7f832281a39c5, // unspecified + 0xe3757ca7d64666ea, // internal + 0x1e23011d8472940f, // server + 0x58d08593329ec134, // client + 0x937e0a08e0caee59, // producer + 0xce2b8e7e8ef71b7e, // consumer + 0x8d912f43d2348a3, // spare 1 + 0x43869769eb4f75c8, // spare 2 +} + +// SpanIDAndKindToToken converts a span ID into a token for use as key in a hash map. The token is generated such +// that it has a low collision probability. In zipkin traces the span id is not guaranteed to be unique as it +// is shared between client and server spans. Therefore, it is sometimes required to take the span kind into account. +func SpanIDAndKindToToken(id []byte, kind int) uint64 { + return SpanIDToUint64(id) ^ spanKindFNVHashes[kind] +} + +// SpanIDToUint64 converts a span ID into an uint64 representation. This is useful when using a span ID as key +// in a map. If the ID is longer than 8 bytes, the bytes at larger positions are discarded. The function does +// not make any guarantees about the endianess or ordering of converted IDs. +// +// Note: span IDs are not always unique within a trace (e.g. zipkin traces) SpanIDAndKindToToken could be more +// appropriate in some cases. +func SpanIDToUint64(id []byte) uint64 { + if len(id) < 8 { + var idArray [8]byte + copy(idArray[:], id) + return *(*uint64)(unsafe.Pointer(&idArray[0])) + } + return *(*uint64)(unsafe.Pointer(&id[0])) +} + // EqualHexStringTraceIDs compares two trace ID strings and compares the // resulting bytes after padding. Returns true unless there is a reason not // to. diff --git a/pkg/util/traceid_test.go b/pkg/util/traceid_test.go index c75a0f6c904..a725cb54ee9 100644 --- a/pkg/util/traceid_test.go +++ b/pkg/util/traceid_test.go @@ -2,6 +2,7 @@ package util import ( "errors" + "math/rand" "testing" "github.com/stretchr/testify/assert" @@ -129,6 +130,114 @@ func TestSpanIDToHexString(t *testing.T) { } } +func TestSpanIDToUint64(t *testing.T) { + tc := []struct { + spanID []byte + expected uint64 + }{ + { + spanID: []byte{0x60, 0xd8, 0xa9, 0xbd}, + expected: 0xbd_a9_d8_60, + }, + { + spanID: []byte{0x8e, 0xf6, 0x37, 0x90, 0x22, 0x57, 0xb7, 0x43}, + expected: 0x43_b7_57_22_90_37_f6_8e, + }, + { + spanID: []byte{0x18, 0xcc, 0xd9, 0x6d, 0x70, 0xc1, 0xbd, 0xf9}, + expected: 0xf9_bd_c1_70_6d_d9_cc_18, + }, + { + spanID: []byte{0x8e, 0xf6, 0x37, 0x90, 0x22, 0x57, 0xb7, 0x43, 0xff}, + expected: 0x43_b7_57_22_90_37_f6_8e, + }, + } + + for _, tt := range tc { + token := SpanIDToUint64(tt.spanID) + assert.Equalf(t, tt.expected, token, "SpanIDToToken(%v) reurned 0x%x but 0x%x was expected", tt.spanID, token, tt.expected) + } +} + +func TestSpanIDAndKindToToken(t *testing.T) { + tc := []struct { + spanID []byte + expected uint64 + }{ + { + spanID: []byte{0x60, 0xd8, 0xa9, 0xbd}, + }, + { + spanID: []byte{0x8e, 0xf6, 0x37, 0x90, 0x22, 0x57, 0xb7, 0x43}, + }, + { + spanID: []byte{0x18, 0xcc, 0xd9, 0x6d, 0x70, 0xc1, 0xbd, 0xf9}, + }, + { + spanID: []byte{0x8e, 0xf6, 0x37, 0x90, 0x22, 0x57, 0xb7, 0x43, 0xff}, + }, + } + + for _, tt := range tc { + tokenIDOnly := SpanIDToUint64(tt.spanID) + tokensForKind := map[uint64]struct{}{} + + for kind := 0; kind < 8; kind++ { + token := SpanIDAndKindToToken(tt.spanID, kind) + + _, exists := tokensForKind[token] + assert.False(t, exists, "token expected to be unique for different span kind") + assert.NotEqual(t, tokenIDOnly, token) + tokensForKind[token] = struct{}{} + } + } +} + +var tokenToPreventOptimization uint64 + +func BenchmarkSpanIDAndKindToToken(b *testing.B) { + type testDataSpanID struct { + SpanID []byte + Kind int + } + + randomTestCasesSpanID := func(n int, idLen int) []testDataSpanID { + testCases := make([]testDataSpanID, 0, n) + for i := 0; i < n; i++ { + id := make([]byte, idLen) + for j := range id { + id[j] = byte(rand.Intn(256)) + } + testCases = append(testCases, testDataSpanID{SpanID: id, Kind: rand.Intn(6)}) + } + return testCases + } + + benchmarks := []struct { + name string + data []testDataSpanID + }{ + { + name: "id length 4", + data: randomTestCasesSpanID(1_000, 4), + }, + { + name: "id length 8", + data: randomTestCasesSpanID(1_000, 8), + }, + } + for _, bc := range benchmarks { + b.Run(bc.name, func(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + d := bc.data[i%len(bc.data)] + tokenToPreventOptimization = SpanIDAndKindToToken(d.SpanID, d.Kind) + } + b.ReportAllocs() + }) + } +} + func TestEqualHexStringTraceIDs(t *testing.T) { a := "82f6471b46d25e23418a0a99d4c2cda" b := "082f6471b46d25e23418a0a99d4c2cda" diff --git a/tempodb/encoding/vparquet/combiner.go b/tempodb/encoding/vparquet/combiner.go index 0b901212f20..2b0ba1007ce 100644 --- a/tempodb/encoding/vparquet/combiner.go +++ b/tempodb/encoding/vparquet/combiner.go @@ -2,34 +2,10 @@ package vparquet import ( "bytes" - "encoding/binary" - "hash" - "hash/fnv" "sort" -) - -// token is uint64 to reduce hash collision rates. Experimentally, it was observed -// that fnv32 could approach a collision rate of 1 in 10,000. fnv64 avoids collisions -// when tested against traces with up to 1M spans (see matching test). A collision -// results in a dropped span during combine. -type token uint64 - -func newHash() hash.Hash64 { - return fnv.New64() -} -// tokenForID returns a token for use in a hash map given a span id and span kind -// buffer must be a 4 byte slice and is reused for writing the span kind to the hashing function -// kind is used along with the actual id b/c in zipkin traces span id is not guaranteed to be unique -// as it is shared between client and server spans. -func tokenForID(h hash.Hash64, buffer []byte, kind int32, b []byte) token { - binary.LittleEndian.PutUint32(buffer, uint32(kind)) - - h.Reset() - _, _ = h.Write(b) - _, _ = h.Write(buffer) - return token(h.Sum64()) -} + "github.com/grafana/tempo/pkg/util" +) func CombineTraces(traces ...*Trace) *Trace { if len(traces) == 1 { @@ -52,7 +28,7 @@ func CombineTraces(traces ...*Trace) *Trace { // * Don't scan/hash the spans for the last input (final=true). type Combiner struct { result *Trace - spans map[token]struct{} + spans map[uint64]struct{} combined bool } @@ -72,9 +48,6 @@ func (c *Combiner) ConsumeWithFinal(tr *Trace, final bool) (spanCount int) { return } - h := newHash() - buffer := make([]byte, 4) - // First call? if c.result == nil { c.result = tr @@ -87,12 +60,12 @@ func (c *Combiner) ConsumeWithFinal(tr *Trace, final bool) (spanCount int) { n += len(ils.Spans) } } - c.spans = make(map[token]struct{}, n) + c.spans = make(map[uint64]struct{}, n) for _, b := range c.result.ResourceSpans { for _, ils := range b.ScopeSpans { for _, s := range ils.Spans { - c.spans[tokenForID(h, buffer, int32(s.Kind), s.ID)] = struct{}{} + c.spans[util.SpanIDAndKindToToken(s.ID, s.Kind)] = struct{}{} } } } @@ -122,7 +95,7 @@ func (c *Combiner) ConsumeWithFinal(tr *Trace, final bool) (spanCount int) { notFoundSpans := ils.Spans[:0] for _, s := range ils.Spans { // if not already encountered, then keep - token := tokenForID(h, buffer, int32(s.Kind), s.ID) + token := util.SpanIDAndKindToToken(s.ID, s.Kind) _, ok := c.spans[token] if !ok { notFoundSpans = append(notFoundSpans, s) diff --git a/tempodb/encoding/vparquet2/combiner.go b/tempodb/encoding/vparquet2/combiner.go index a723e02f32d..7ea7a54cd5a 100644 --- a/tempodb/encoding/vparquet2/combiner.go +++ b/tempodb/encoding/vparquet2/combiner.go @@ -2,34 +2,10 @@ package vparquet2 import ( "bytes" - "encoding/binary" - "hash" - "hash/fnv" "sort" -) - -// token is uint64 to reduce hash collision rates. Experimentally, it was observed -// that fnv32 could approach a collision rate of 1 in 10,000. fnv64 avoids collisions -// when tested against traces with up to 1M spans (see matching test). A collision -// results in a dropped span during combine. -type token uint64 - -func newHash() hash.Hash64 { - return fnv.New64() -} -// tokenForID returns a token for use in a hash map given a span id and span kind -// buffer must be a 4 byte slice and is reused for writing the span kind to the hashing function -// kind is used along with the actual id b/c in zipkin traces span id is not guaranteed to be unique -// as it is shared between client and server spans. -func tokenForID(h hash.Hash64, buffer []byte, kind int32, b []byte) token { - binary.LittleEndian.PutUint32(buffer, uint32(kind)) - - h.Reset() - _, _ = h.Write(b) - _, _ = h.Write(buffer) - return token(h.Sum64()) -} + "github.com/grafana/tempo/pkg/util" +) func CombineTraces(traces ...*Trace) *Trace { if len(traces) == 1 { @@ -52,7 +28,7 @@ func CombineTraces(traces ...*Trace) *Trace { // * Don't scan/hash the spans for the last input (final=true). type Combiner struct { result *Trace - spans map[token]struct{} + spans map[uint64]struct{} combined bool } @@ -72,9 +48,6 @@ func (c *Combiner) ConsumeWithFinal(tr *Trace, final bool) (spanCount int) { return } - h := newHash() - buffer := make([]byte, 4) - // First call? if c.result == nil { c.result = tr @@ -87,12 +60,12 @@ func (c *Combiner) ConsumeWithFinal(tr *Trace, final bool) (spanCount int) { n += len(ils.Spans) } } - c.spans = make(map[token]struct{}, n) + c.spans = make(map[uint64]struct{}, n) for _, b := range c.result.ResourceSpans { for _, ils := range b.ScopeSpans { for _, s := range ils.Spans { - c.spans[tokenForID(h, buffer, int32(s.Kind), s.SpanID)] = struct{}{} + c.spans[util.SpanIDAndKindToToken(s.SpanID, s.Kind)] = struct{}{} } } } @@ -122,7 +95,7 @@ func (c *Combiner) ConsumeWithFinal(tr *Trace, final bool) (spanCount int) { notFoundSpans := ils.Spans[:0] for _, s := range ils.Spans { // if not already encountered, then keep - token := tokenForID(h, buffer, int32(s.Kind), s.SpanID) + token := util.SpanIDAndKindToToken(s.SpanID, s.Kind) _, ok := c.spans[token] if !ok { notFoundSpans = append(notFoundSpans, s) @@ -160,6 +133,7 @@ func (c *Combiner) Result() (*Trace, int) { if c.result != nil && c.combined { // Only if anything combined SortTrace(c.result) + assignNestedSetModelBounds(c.result) spanCount = len(c.spans) } diff --git a/tempodb/encoding/vparquet2/combiner_test.go b/tempodb/encoding/vparquet2/combiner_test.go index b5f6b9803e6..dff666e5cc7 100644 --- a/tempodb/encoding/vparquet2/combiner_test.go +++ b/tempodb/encoding/vparquet2/combiner_test.go @@ -135,8 +135,10 @@ func TestCombiner(t *testing.T) { { Spans: []Span{ { - SpanID: []byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}, - StatusCode: 0, + SpanID: []byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}, + StatusCode: 0, + NestedSetLeft: 1, + NestedSetRight: 2, }, }, }, @@ -179,8 +181,10 @@ func TestCombiner(t *testing.T) { { Spans: []Span{ { - SpanID: []byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}, - StatusCode: 0, + SpanID: []byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}, + StatusCode: 0, + NestedSetLeft: 1, + NestedSetRight: 4, }, }, }, @@ -194,9 +198,12 @@ func TestCombiner(t *testing.T) { { Spans: []Span{ { - SpanID: []byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02}, - ParentSpanID: []byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}, - StatusCode: 0, + SpanID: []byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02}, + ParentSpanID: []byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}, + StatusCode: 0, + ParentID: 1, + NestedSetLeft: 2, + NestedSetRight: 3, }, }, }, diff --git a/tempodb/encoding/vparquet2/nested_set_model.go b/tempodb/encoding/vparquet2/nested_set_model.go new file mode 100644 index 00000000000..fd6de609905 --- /dev/null +++ b/tempodb/encoding/vparquet2/nested_set_model.go @@ -0,0 +1,148 @@ +package vparquet2 + +import ( + v1 "github.com/grafana/tempo/pkg/tempopb/trace/v1" + "github.com/grafana/tempo/pkg/util" +) + +// spanNode is a wrapper around a span that is used to build and travers spans as a tree. +type spanNode struct { + parent *spanNode + span *Span + children []*spanNode + nextChild int +} + +// assignNestedSetModelBounds calculates and assigns the values Span.NestedSetLeft, Span.NestedSetRight, +// and Span.ParentID for all spans in a trace. +func assignNestedSetModelBounds(trace *Trace) { + // count spans in order be able to pre-allocate tree nodes + var spanCount int + for _, rs := range trace.ResourceSpans { + for _, ss := range rs.ScopeSpans { + spanCount += len(ss.Spans) + } + } + + // find root spans and map span IDs to tree nodes + var ( + undoAssignment bool + allNodes = make([]spanNode, 0, spanCount) + nodesByID = make(map[uint64][]*spanNode, spanCount) + rootNodes []*spanNode + ) + + for _, rs := range trace.ResourceSpans { + for _, ss := range rs.ScopeSpans { + for i, s := range ss.Spans { + allNodes = append(allNodes, spanNode{span: &ss.Spans[i]}) + node := &allNodes[len(allNodes)-1] + + if s.IsRoot() { + rootNodes = append(rootNodes, node) + } + + id := util.SpanIDToUint64(s.SpanID) + if nodes, ok := nodesByID[id]; ok { + // zipkin traces may contain client/server spans with the same IDs + nodesByID[id] = append(nodes, node) + if len(nodes) > 2 { + undoAssignment = true + } + } else { + nodesByID[id] = []*spanNode{node} + } + } + } + } + + // check preconditions before assignment + if len(rootNodes) == 0 { + return + } + if undoAssignment { + for _, nodes := range nodesByID { + for _, n := range nodes { + n.span.NestedSetLeft = 0 + n.span.NestedSetRight = 0 + n.span.ParentID = 0 + } + } + return + } + + // build the tree + for i := range allNodes { + node := &allNodes[i] + parent := findParentNodeInMap(nodesByID, node) + if parent == nil { + continue + } + node.parent = parent + parent.children = append(parent.children, node) + } + + // traverse the tree depth first. When going down the tree, assign NestedSetLeft + // and assign NestedSetRight when going up. + nestedSetBound := int32(1) + for _, root := range rootNodes { + node := root + node.span.NestedSetLeft = nestedSetBound + nestedSetBound++ + + for node != nil { + if node.nextChild < len(node.children) { + // the current node has children that were not visited: go down to next child + + next := node.children[node.nextChild] + node.nextChild++ + + next.span.NestedSetLeft = nestedSetBound + next.span.ParentID = node.span.NestedSetLeft // the left bound of the parent serves as numeric span ID + nestedSetBound++ + node = next + } else { + // all children of the current node were visited: go up + + node.span.NestedSetRight = nestedSetBound + nestedSetBound++ + + node = node.parent + } + } + } +} + +// findParentNodeInMap finds the tree node containing the parent span for another node. zipkin traces can +// contain client/server span pairs with identical span IDs. In those cases the span kind is used to find +// the matching parent span. +func findParentNodeInMap(nodesByID map[uint64][]*spanNode, node *spanNode) *spanNode { + if node.span.IsRoot() { + return nil + } + + parentID := util.SpanIDToUint64(node.span.ParentSpanID) + nodes := nodesByID[parentID] + + switch len(nodes) { + case 0: + return nil + case 1: + return nodes[0] + case 2: + // handle client/server spans with the same span ID + kindWant := int(v1.Span_SPAN_KIND_SERVER) + if node.span.Kind == int(v1.Span_SPAN_KIND_SERVER) { + kindWant = int(v1.Span_SPAN_KIND_CLIENT) + } + + if nodes[0].span.Kind == kindWant { + return nodes[0] + } + if nodes[1].span.Kind == kindWant { + return nodes[1] + } + } + + return nil +} diff --git a/tempodb/encoding/vparquet2/nested_set_model_test.go b/tempodb/encoding/vparquet2/nested_set_model_test.go new file mode 100644 index 00000000000..426eb0ea442 --- /dev/null +++ b/tempodb/encoding/vparquet2/nested_set_model_test.go @@ -0,0 +1,264 @@ +package vparquet2 + +import ( + "testing" + + v1 "github.com/grafana/tempo/pkg/tempopb/trace/v1" + + "github.com/grafana/tempo/pkg/util" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestAssignNestedSetModelBounds(t *testing.T) { + tests := []struct { + name string + trace [][]Span + expected [][]Span + }{ + { + name: "single span", + trace: [][]Span{ + { + {SpanID: []byte("aaaaaaaa")}, + }, + }, + expected: [][]Span{ + { + {SpanID: []byte("aaaaaaaa"), NestedSetLeft: 1, NestedSetRight: 2}, + }, + }, + }, + { + name: "linear trace", + trace: [][]Span{ + { + {SpanID: []byte("aaaaaaaa")}, + {SpanID: []byte("cccccccc"), ParentSpanID: []byte("bbbbbbbb")}, + {SpanID: []byte("bbbbbbbb"), ParentSpanID: []byte("aaaaaaaa")}, + }, + }, + expected: [][]Span{ + { + {SpanID: []byte("aaaaaaaa"), NestedSetLeft: 1, NestedSetRight: 6}, + {SpanID: []byte("bbbbbbbb"), ParentSpanID: []byte("aaaaaaaa"), NestedSetLeft: 2, NestedSetRight: 5, ParentID: 1}, + {SpanID: []byte("cccccccc"), ParentSpanID: []byte("bbbbbbbb"), NestedSetLeft: 3, NestedSetRight: 4, ParentID: 2}, + }, + }, + }, + { + name: "branched trace", + trace: [][]Span{ + { + {SpanID: []byte("aaaaaaaa")}, + {SpanID: []byte("bbbbbbbb"), ParentSpanID: []byte("aaaaaaaa")}, + {SpanID: []byte("cccccccc"), ParentSpanID: []byte("bbbbbbbb")}, + {SpanID: []byte("dddddddd"), ParentSpanID: []byte("bbbbbbbb")}, + {SpanID: []byte("eeeeeeee"), ParentSpanID: []byte("dddddddd")}, + {SpanID: []byte("ffffffff"), ParentSpanID: []byte("dddddddd")}, + }, + }, + expected: [][]Span{ + { + {SpanID: []byte("aaaaaaaa"), NestedSetLeft: 1, NestedSetRight: 12}, + {SpanID: []byte("bbbbbbbb"), ParentSpanID: []byte("aaaaaaaa"), NestedSetLeft: 2, NestedSetRight: 11, ParentID: 1}, + {SpanID: []byte("cccccccc"), ParentSpanID: []byte("bbbbbbbb"), NestedSetLeft: 3, NestedSetRight: 4, ParentID: 2}, + {SpanID: []byte("dddddddd"), ParentSpanID: []byte("bbbbbbbb"), NestedSetLeft: 5, NestedSetRight: 10, ParentID: 2}, + {SpanID: []byte("eeeeeeee"), ParentSpanID: []byte("dddddddd"), NestedSetLeft: 6, NestedSetRight: 7, ParentID: 5}, + {SpanID: []byte("ffffffff"), ParentSpanID: []byte("dddddddd"), NestedSetLeft: 8, NestedSetRight: 9, ParentID: 5}, + }, + }, + }, + { + name: "multiple scope spans", + trace: [][]Span{ + { + {SpanID: []byte("aaaaaaaa")}, + {SpanID: []byte("bbbbbbbb"), ParentSpanID: []byte("aaaaaaaa")}, + {SpanID: []byte("cccccccc"), ParentSpanID: []byte("bbbbbbbb")}, + {SpanID: []byte("dddddddd"), ParentSpanID: []byte("bbbbbbbb")}, + }, + { + {SpanID: []byte("eeeeeeee"), ParentSpanID: []byte("dddddddd")}, + {SpanID: []byte("ffffffff"), ParentSpanID: []byte("dddddddd")}, + }, + }, + expected: [][]Span{ + { + {SpanID: []byte("aaaaaaaa"), NestedSetLeft: 1, NestedSetRight: 12}, + {SpanID: []byte("bbbbbbbb"), ParentSpanID: []byte("aaaaaaaa"), NestedSetLeft: 2, NestedSetRight: 11, ParentID: 1}, + {SpanID: []byte("cccccccc"), ParentSpanID: []byte("bbbbbbbb"), NestedSetLeft: 3, NestedSetRight: 4, ParentID: 2}, + {SpanID: []byte("dddddddd"), ParentSpanID: []byte("bbbbbbbb"), NestedSetLeft: 5, NestedSetRight: 10, ParentID: 2}, + }, + { + {SpanID: []byte("eeeeeeee"), ParentSpanID: []byte("dddddddd"), NestedSetLeft: 6, NestedSetRight: 7, ParentID: 5}, + {SpanID: []byte("ffffffff"), ParentSpanID: []byte("dddddddd"), NestedSetLeft: 8, NestedSetRight: 9, ParentID: 5}, + }, + }, + }, + { + name: "multiple roots", + trace: [][]Span{ + { + {SpanID: []byte("aaaaaaaa")}, + {SpanID: []byte("bbbbbbbb"), ParentSpanID: []byte("aaaaaaaa")}, + {SpanID: []byte("cccccccc"), ParentSpanID: []byte("bbbbbbbb")}, + {SpanID: []byte("dddddddd"), ParentSpanID: []byte("bbbbbbbb")}, + {SpanID: []byte("eeeeeeee"), ParentSpanID: []byte("dddddddd")}, + {SpanID: []byte("ffffffff"), ParentSpanID: []byte("dddddddd")}, + + {SpanID: []byte("gggggggg")}, + {SpanID: []byte("iiiiiiii"), ParentSpanID: []byte("hhhhhhhh")}, + {SpanID: []byte("hhhhhhhh"), ParentSpanID: []byte("gggggggg")}, + }, + }, + expected: [][]Span{ + { + {SpanID: []byte("aaaaaaaa"), NestedSetLeft: 1, NestedSetRight: 12}, + {SpanID: []byte("bbbbbbbb"), ParentSpanID: []byte("aaaaaaaa"), NestedSetLeft: 2, NestedSetRight: 11, ParentID: 1}, + {SpanID: []byte("cccccccc"), ParentSpanID: []byte("bbbbbbbb"), NestedSetLeft: 3, NestedSetRight: 4, ParentID: 2}, + {SpanID: []byte("dddddddd"), ParentSpanID: []byte("bbbbbbbb"), NestedSetLeft: 5, NestedSetRight: 10, ParentID: 2}, + {SpanID: []byte("eeeeeeee"), ParentSpanID: []byte("dddddddd"), NestedSetLeft: 6, NestedSetRight: 7, ParentID: 5}, + {SpanID: []byte("ffffffff"), ParentSpanID: []byte("dddddddd"), NestedSetLeft: 8, NestedSetRight: 9, ParentID: 5}, + + {SpanID: []byte("gggggggg"), NestedSetLeft: 13, NestedSetRight: 18}, + {SpanID: []byte("hhhhhhhh"), ParentSpanID: []byte("gggggggg"), NestedSetLeft: 14, NestedSetRight: 17, ParentID: 13}, + {SpanID: []byte("iiiiiiii"), ParentSpanID: []byte("hhhhhhhh"), NestedSetLeft: 15, NestedSetRight: 16, ParentID: 14}, + }, + }, + }, + { + name: "interrupted", + trace: [][]Span{ + { + {SpanID: []byte("aaaaaaaa")}, + {SpanID: []byte("bbbbbbbb"), ParentSpanID: []byte("aaaaaaaa")}, + {SpanID: []byte("cccccccc"), ParentSpanID: []byte("bbbbbbbb")}, + {SpanID: []byte("dddddddd"), ParentSpanID: []byte("bbbbbbbb")}, + + {SpanID: []byte("eeeeeeee"), ParentSpanID: []byte("xxxxxxxx")}, // <- interrupted + {SpanID: []byte("ffffffff"), ParentSpanID: []byte("eeeeeeee")}, + }, + }, + expected: [][]Span{ + { + {SpanID: []byte("aaaaaaaa"), NestedSetLeft: 1, NestedSetRight: 8}, + {SpanID: []byte("bbbbbbbb"), ParentSpanID: []byte("aaaaaaaa"), NestedSetLeft: 2, NestedSetRight: 7, ParentID: 1}, + {SpanID: []byte("cccccccc"), ParentSpanID: []byte("bbbbbbbb"), NestedSetLeft: 3, NestedSetRight: 4, ParentID: 2}, + {SpanID: []byte("dddddddd"), ParentSpanID: []byte("bbbbbbbb"), NestedSetLeft: 5, NestedSetRight: 6, ParentID: 2}, + + {SpanID: []byte("eeeeeeee"), ParentSpanID: []byte("xxxxxxxx")}, // <- interrupted + {SpanID: []byte("ffffffff"), ParentSpanID: []byte("eeeeeeee")}, + }, + }, + }, + { + name: "partially assigned", + trace: [][]Span{ + { + {SpanID: []byte("aaaaaaaa"), NestedSetLeft: 1, NestedSetRight: 4}, + {SpanID: []byte("bbbbbbbb"), ParentSpanID: []byte("aaaaaaaa"), NestedSetLeft: 2, NestedSetRight: 0, ParentID: 1}, + }, + }, + expected: [][]Span{ + { + {SpanID: []byte("aaaaaaaa"), NestedSetLeft: 1, NestedSetRight: 4}, + {SpanID: []byte("bbbbbbbb"), ParentSpanID: []byte("aaaaaaaa"), NestedSetLeft: 2, NestedSetRight: 3, ParentID: 1}, + }, + }, + }, + { + name: "non unique IDs", + trace: [][]Span{ + { + {SpanID: []byte("bbbbbbbb"), ParentSpanID: []byte("aaaaaaaa"), Kind: int(v1.Span_SPAN_KIND_CLIENT)}, + {SpanID: []byte("cccccccc"), ParentSpanID: []byte("bbbbbbbb")}, + {SpanID: []byte("bbbbbbbb"), ParentSpanID: []byte("bbbbbbbb"), Kind: int(v1.Span_SPAN_KIND_SERVER)}, + {SpanID: []byte("dddddddd"), ParentSpanID: []byte("bbbbbbbb")}, + {SpanID: []byte("aaaaaaaa")}, + }, + }, + expected: [][]Span{ + { + {SpanID: []byte("aaaaaaaa"), NestedSetLeft: 1, NestedSetRight: 10}, + {SpanID: []byte("bbbbbbbb"), ParentSpanID: []byte("bbbbbbbb"), Kind: int(v1.Span_SPAN_KIND_SERVER), NestedSetLeft: 3, NestedSetRight: 8, ParentID: 2}, + {SpanID: []byte("bbbbbbbb"), ParentSpanID: []byte("aaaaaaaa"), Kind: int(v1.Span_SPAN_KIND_CLIENT), NestedSetLeft: 2, NestedSetRight: 9, ParentID: 1}, + {SpanID: []byte("cccccccc"), ParentSpanID: []byte("bbbbbbbb"), NestedSetLeft: 4, NestedSetRight: 5, ParentID: 3}, + {SpanID: []byte("dddddddd"), ParentSpanID: []byte("bbbbbbbb"), NestedSetLeft: 6, NestedSetRight: 7, ParentID: 3}, + }, + }, + }, + { + name: "non unique IDs 2x", + trace: [][]Span{ + { + {SpanID: []byte("aaaaaaaa")}, + {SpanID: []byte("bbbbbbbb"), ParentSpanID: []byte("aaaaaaaa"), Kind: int(v1.Span_SPAN_KIND_CLIENT)}, + {SpanID: []byte("bbbbbbbb"), ParentSpanID: []byte("bbbbbbbb"), Kind: int(v1.Span_SPAN_KIND_SERVER)}, + {SpanID: []byte("cccccccc"), ParentSpanID: []byte("cccccccc"), Kind: int(v1.Span_SPAN_KIND_SERVER)}, + {SpanID: []byte("cccccccc"), ParentSpanID: []byte("bbbbbbbb"), Kind: int(v1.Span_SPAN_KIND_CLIENT)}, + }, + }, + expected: [][]Span{ + { + {SpanID: []byte("aaaaaaaa"), NestedSetLeft: 1, NestedSetRight: 10}, + {SpanID: []byte("bbbbbbbb"), ParentSpanID: []byte("aaaaaaaa"), Kind: int(v1.Span_SPAN_KIND_CLIENT), NestedSetLeft: 2, NestedSetRight: 9, ParentID: 1}, + {SpanID: []byte("cccccccc"), ParentSpanID: []byte("bbbbbbbb"), Kind: int(v1.Span_SPAN_KIND_CLIENT), NestedSetLeft: 4, NestedSetRight: 7, ParentID: 3}, + {SpanID: []byte("bbbbbbbb"), ParentSpanID: []byte("bbbbbbbb"), Kind: int(v1.Span_SPAN_KIND_SERVER), NestedSetLeft: 3, NestedSetRight: 8, ParentID: 2}, + {SpanID: []byte("cccccccc"), ParentSpanID: []byte("cccccccc"), Kind: int(v1.Span_SPAN_KIND_SERVER), NestedSetLeft: 5, NestedSetRight: 6, ParentID: 4}, + }, + }, + }, + } + + makeTrace := func(traceSpans [][]Span) *Trace { + var resourceSpans ResourceSpans + for _, spans := range traceSpans { + scopeSpans := ScopeSpans{Spans: append([]Span{}, spans...)} + resourceSpans.ScopeSpans = append(resourceSpans.ScopeSpans, scopeSpans) + } + return &Trace{ResourceSpans: []ResourceSpans{resourceSpans}} + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + trace := makeTrace(tt.trace) + expected := makeTrace(tt.expected) + assignNestedSetModelBounds(trace) + assertEqualNestedSetModelBounds(t, trace, expected) + }) + } +} + +func assertEqualNestedSetModelBounds(t testing.TB, actual, expected *Trace) { + t.Helper() + + actualSpans := map[uint64]*Span{} + actualCount := 0 + for _, rs := range actual.ResourceSpans { + for _, ss := range rs.ScopeSpans { + for i, s := range ss.Spans { + actualSpans[util.SpanIDAndKindToToken(s.SpanID, s.Kind)] = &ss.Spans[i] + actualCount++ + } + } + } + + expectedCount := 0 + for _, rs := range expected.ResourceSpans { + for _, ss := range rs.ScopeSpans { + for _, exp := range ss.Spans { + expectedCount++ + act, ok := actualSpans[util.SpanIDAndKindToToken(exp.SpanID, exp.Kind)] + require.Truef(t, ok, "span '%v' expected but was missing", string(exp.SpanID)) + assert.Equalf(t, exp.NestedSetLeft, act.NestedSetLeft, "span '%v' NestedSetLeft is expected %d but was %d", string(exp.SpanID), exp.NestedSetLeft, act.NestedSetLeft) + assert.Equalf(t, exp.NestedSetRight, act.NestedSetRight, "span '%v' NestedSetRight is expected %d but was %d", string(exp.SpanID), exp.NestedSetRight, act.NestedSetRight) + assert.Equalf(t, exp.ParentID, act.ParentID, "span '%v' ParentID is expected %d but was %d", string(exp.SpanID), exp.ParentID, act.ParentID) + assert.Equalf(t, exp.ParentSpanID, act.ParentSpanID, "span '%v' ParentSpanID is expected %d but was %d", string(exp.SpanID), string(exp.ParentSpanID), string(act.ParentSpanID)) + assert.Equalf(t, exp.Kind, act.Kind, "span '%v' Kind is expected %d but was %d", string(exp.SpanID), exp.Kind, act.Kind) + } + } + } + + assert.Equalf(t, expectedCount, actualCount, "expected %d spans but found %d instead", expectedCount, actualCount) +} diff --git a/tempodb/encoding/vparquet2/schema.go b/tempodb/encoding/vparquet2/schema.go index 2c272929010..0c2ce2ab6e0 100644 --- a/tempodb/encoding/vparquet2/schema.go +++ b/tempodb/encoding/vparquet2/schema.go @@ -135,8 +135,8 @@ type Span struct { // friendly like trace ID, and []byte is half the size of string. SpanID []byte `parquet:","` ParentSpanID []byte `parquet:","` - ParentID int32 `parquet:",delta"` - NestedSetLeft int32 `parquet:",delta"` + ParentID int32 `parquet:",delta"` // can be zero for non-root spans, use IsRoot to check for root spans + NestedSetLeft int32 `parquet:",delta"` // doubles as numeric ID and is used to fill ParentID of child spans NestedSetRight int32 `parquet:",delta"` Name string `parquet:",snappy,dict"` Kind int `parquet:",delta"` @@ -158,6 +158,10 @@ type Span struct { HttpStatusCode *int64 `parquet:",snappy,optional"` } +func (s *Span) IsRoot() bool { + return len(s.ParentSpanID) == 0 +} + type InstrumentationScope struct { Name string `parquet:",snappy,dict"` Version string `parquet:",snappy,dict"` @@ -422,6 +426,8 @@ func traceToParquet(id common.ID, tr *tempopb.Trace, ot *Trace) *Trace { } } + assignNestedSetModelBounds(ot) + return ot } diff --git a/tempodb/encoding/vparquet2/schema_test.go b/tempodb/encoding/vparquet2/schema_test.go index e58cb61b80d..be1d8f18d2c 100644 --- a/tempodb/encoding/vparquet2/schema_test.go +++ b/tempodb/encoding/vparquet2/schema_test.go @@ -17,6 +17,7 @@ import ( v1_resource "github.com/grafana/tempo/pkg/tempopb/resource/v1" v1_trace "github.com/grafana/tempo/pkg/tempopb/trace/v1" "github.com/grafana/tempo/pkg/util/test" + "github.com/grafana/tempo/tempodb/encoding/common" ) func TestProtoParquetRoundTrip(t *testing.T) { @@ -106,6 +107,188 @@ func TestFieldsAreCleared(t *testing.T) { require.Equal(t, simpleTrace, actualTrace) } +func TestTraceToParquet(t *testing.T) { + strPtr := func(s string) *string { return &s } + intPtr := func(i int64) *int64 { return &i } + + traceID := common.ID{0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F} + + tsc := []struct { + name string + id common.ID + trace tempopb.Trace + expected Trace + }{ + { + name: "span and resource attributes", + id: traceID, + trace: tempopb.Trace{ + Batches: []*v1_trace.ResourceSpans{{ + Resource: &v1_resource.Resource{ + Attributes: []*v1.KeyValue{ + {Key: "res.attr", Value: &v1.AnyValue{Value: &v1.AnyValue_IntValue{IntValue: 123}}}, + {Key: "service.name", Value: &v1.AnyValue{Value: &v1.AnyValue_StringValue{StringValue: "service-a"}}}, + {Key: "cluster", Value: &v1.AnyValue{Value: &v1.AnyValue_StringValue{StringValue: "cluster-a"}}}, + {Key: "namespace", Value: &v1.AnyValue{Value: &v1.AnyValue_StringValue{StringValue: "namespace-a"}}}, + {Key: "pod", Value: &v1.AnyValue{Value: &v1.AnyValue_StringValue{StringValue: "pod-a"}}}, + {Key: "container", Value: &v1.AnyValue{Value: &v1.AnyValue_StringValue{StringValue: "container-a"}}}, + {Key: "k8s.cluster.name", Value: &v1.AnyValue{Value: &v1.AnyValue_StringValue{StringValue: "k8s-cluster-a"}}}, + {Key: "k8s.namespace.name", Value: &v1.AnyValue{Value: &v1.AnyValue_StringValue{StringValue: "k8s-namespace-a"}}}, + {Key: "k8s.pod.name", Value: &v1.AnyValue{Value: &v1.AnyValue_StringValue{StringValue: "k8s-pod-a"}}}, + {Key: "k8s.container.name", Value: &v1.AnyValue{Value: &v1.AnyValue_StringValue{StringValue: "k8s-container-a"}}}, + }, + }, + ScopeSpans: []*v1_trace.ScopeSpans{{ + Scope: &v1.InstrumentationScope{}, + Spans: []*v1_trace.Span{{ + Name: "span-a", + SpanId: common.ID{0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}, + Attributes: []*v1.KeyValue{ + {Key: "span.attr", Value: &v1.AnyValue{Value: &v1.AnyValue_StringValue{StringValue: "aaa"}}}, + {Key: "http.method", Value: &v1.AnyValue{Value: &v1.AnyValue_StringValue{StringValue: "POST"}}}, + {Key: "http.url", Value: &v1.AnyValue{Value: &v1.AnyValue_StringValue{StringValue: "https://example.com"}}}, + {Key: "http.status_code", Value: &v1.AnyValue{Value: &v1.AnyValue_IntValue{IntValue: 201}}}, + }, + }}, + }}, + }}, + }, + expected: Trace{ + TraceID: traceID, + TraceIDText: "102030405060708090a0b0c0d0e0f", + RootSpanName: "span-a", + RootServiceName: "service-a", + ResourceSpans: []ResourceSpans{{ + Resource: Resource{ + ServiceName: "service-a", + Cluster: strPtr("cluster-a"), + Namespace: strPtr("namespace-a"), + Pod: strPtr("pod-a"), + Container: strPtr("container-a"), + K8sClusterName: strPtr("k8s-cluster-a"), + K8sNamespaceName: strPtr("k8s-namespace-a"), + K8sPodName: strPtr("k8s-pod-a"), + K8sContainerName: strPtr("k8s-container-a"), + Attrs: []Attribute{ + {Key: "res.attr", ValueInt: intPtr(int64(123))}, + }, + }, + ScopeSpans: []ScopeSpans{{ + Spans: []Span{{ + Name: "span-a", + SpanID: []byte{0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}, + NestedSetLeft: 1, + NestedSetRight: 2, + HttpMethod: strPtr("POST"), + HttpUrl: strPtr("https://example.com"), + HttpStatusCode: intPtr(201), + Attrs: []Attribute{ + {Key: "span.attr", Value: strPtr("aaa")}, + }, + }}, + }}, + }}, + }, + }, + { + name: "nested set model bounds", + id: traceID, + trace: tempopb.Trace{ + Batches: []*v1_trace.ResourceSpans{{ + Resource: &v1_resource.Resource{ + Attributes: []*v1.KeyValue{ + {Key: "service.name", Value: &v1.AnyValue{Value: &v1.AnyValue_StringValue{StringValue: "service-a"}}}, + }, + }, + ScopeSpans: []*v1_trace.ScopeSpans{{ + Scope: &v1.InstrumentationScope{}, + Spans: []*v1_trace.Span{ + { + Name: "span-a", + SpanId: common.ID{0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}, + Attributes: []*v1.KeyValue{ + {Key: "span.attr", Value: &v1.AnyValue{Value: &v1.AnyValue_StringValue{StringValue: "aaa"}}}, + }, + }, + { + Name: "span-b", + SpanId: common.ID{0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02}, + ParentSpanId: common.ID{0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}, + Attributes: []*v1.KeyValue{ + {Key: "span.attr", Value: &v1.AnyValue{Value: &v1.AnyValue_StringValue{StringValue: "bbb"}}}, + }, + }, + { + Name: "span-c", + SpanId: common.ID{0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03}, + ParentSpanId: common.ID{0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}, + Attributes: []*v1.KeyValue{ + {Key: "span.attr", Value: &v1.AnyValue{Value: &v1.AnyValue_StringValue{StringValue: "ccc"}}}, + }, + }, + }, + }}, + }}, + }, + expected: Trace{ + TraceID: traceID, + TraceIDText: "102030405060708090a0b0c0d0e0f", + RootSpanName: "span-a", + RootServiceName: "service-a", + ResourceSpans: []ResourceSpans{{ + Resource: Resource{ + ServiceName: "service-a", + Attrs: []Attribute{}, + }, + ScopeSpans: []ScopeSpans{{ + Spans: []Span{ + { + Name: "span-a", + SpanID: []byte{0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}, + NestedSetLeft: 1, + NestedSetRight: 6, + Attrs: []Attribute{ + {Key: "span.attr", Value: strPtr("aaa")}, + }, + }, + { + Name: "span-b", + SpanID: []byte{0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02}, + ParentSpanID: []byte{0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}, + ParentID: 1, + NestedSetLeft: 2, + NestedSetRight: 3, + Attrs: []Attribute{ + {Key: "span.attr", Value: strPtr("bbb")}, + }, + }, + { + Name: "span-c", + SpanID: []byte{0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03}, + ParentSpanID: []byte{0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}, + ParentID: 1, + NestedSetLeft: 4, + NestedSetRight: 5, + Attrs: []Attribute{ + {Key: "span.attr", Value: strPtr("ccc")}, + }, + }, + }, + }}, + }}, + }, + }, + } + + for _, tt := range tsc { + t.Run(tt.name, func(t *testing.T) { + var actual Trace + traceToParquet(tt.id, &tt.trace, &actual) + assert.Equal(t, tt.expected, actual) + }) + } +} + func BenchmarkProtoToParquet(b *testing.B) { batchCount := 100 spanCounts := []int{