grafana · stoewer · Jun 1, 2023 · May 11, 2023 · May 4, 2023 · May 12, 2023
@@ -1,6 +1,7 @@
 ## main / unreleased
 
 * [CHANGE] Disable tempo-query by default in Jsonnet libs. [#2462](https://github.com/grafana/tempo/pull/2462) (@electron0zero)
+* [ENHANCEMENT] Fill parent ID column and nested set columns [#2487](https://github.com/grafana/tempo/pull/2487) (@stoewer)
 * [ENHANCEMENT] log client ip to help identify which client is no org id [#2436](https://github.com/grafana/tempo/pull/2436)
 * [ENHANCEMENT] Add `spss` parameter to `/api/search/tags`[#2308] to configure the spans per span set in response
 * [BUGFIX] Fix Search SLO by routing tags to a new handler. [#2468](https://github.com/grafana/tempo/issues/2468) (@electron0zero)

@@ -6,6 +6,7 @@ import (
 	"errors"
 	"fmt"
 	"strings"
+	"unsafe"
 )
 
 func HexStringToTraceID(id string) ([]byte, error) {
@@ -59,6 +60,41 @@ func SpanIDToHexString(byteID []byte) string {
 	return fmt.Sprintf("%016s", id)
 }
 
+// spanKindFNVHashes contains pre-calculated FNV hashes for all span kind values (and two spares)
+// defined in the OTEL spec.
+var spanKindFNVHashes = [...]uint64{
+	0xa8c7f832281a39c5, // unspecified
+	0xe3757ca7d64666ea, // internal
+	0x1e23011d8472940f, // server
+	0x58d08593329ec134, // client
+	0x937e0a08e0caee59, // producer
+	0xce2b8e7e8ef71b7e, // consumer
+	0x8d912f43d2348a3,  // spare 1
+	0x43869769eb4f75c8, // spare 2
+}
+
+// SpanIDAndKindToToken converts a span ID into a token for use as key in a hash map. The token is generated such
+// that it has a low collision probability. In zipkin traces the span id is not guaranteed to be unique as it
+// is shared between client and server spans. Therefore, it is sometimes required to take the span kind into account.
+func SpanIDAndKindToToken(id []byte, kind int) uint64 {
+	return SpanIDToUint64(id) ^ spanKindFNVHashes[kind]
+}
+
+// SpanIDToUint64 converts a span ID into an uint64 representation. This is useful when using a span ID as key
+// in a map. If the ID is longer than 8 bytes, the bytes at larger positions are discarded. The function does
+// not make any guarantees about the endianess or ordering of converted IDs.
+//
+// Note: span IDs are not always unique within a trace (e.g. zipkin traces) SpanIDAndKindToToken could be more
+// appropriate in some cases.
+func SpanIDToUint64(id []byte) uint64 {
+	if len(id) < 8 {
+		var idArray [8]byte
+		copy(idArray[:], id)
+		return *(*uint64)(unsafe.Pointer(&idArray[0]))
+	}
+	return *(*uint64)(unsafe.Pointer(&id[0]))
+}
+
 // EqualHexStringTraceIDs compares two trace ID strings and compares the
 // resulting bytes after padding.  Returns true unless there is a reason not
 // to.

@@ -2,6 +2,7 @@ package util
 
 import (
 	"errors"
+	"math/rand"
 	"testing"
 
 	"github.com/stretchr/testify/assert"
@@ -129,6 +130,114 @@ func TestSpanIDToHexString(t *testing.T) {
 	}
 }
 
+func TestSpanIDToUint64(t *testing.T) {
+	tc := []struct {
+		spanID   []byte
+		expected uint64
+	}{
+		{
+			spanID:   []byte{0x60, 0xd8, 0xa9, 0xbd},
+			expected: 0xbd_a9_d8_60,
+		},
+		{
+			spanID:   []byte{0x8e, 0xf6, 0x37, 0x90, 0x22, 0x57, 0xb7, 0x43},
+			expected: 0x43_b7_57_22_90_37_f6_8e,
+		},
+		{
+			spanID:   []byte{0x18, 0xcc, 0xd9, 0x6d, 0x70, 0xc1, 0xbd, 0xf9},
+			expected: 0xf9_bd_c1_70_6d_d9_cc_18,
+		},
+		{
+			spanID:   []byte{0x8e, 0xf6, 0x37, 0x90, 0x22, 0x57, 0xb7, 0x43, 0xff},
+			expected: 0x43_b7_57_22_90_37_f6_8e,
+		},
+	}
+
+	for _, tt := range tc {
+		token := SpanIDToUint64(tt.spanID)
+		assert.Equalf(t, tt.expected, token, "SpanIDToToken(%v) reurned 0x%x but 0x%x was expected", tt.spanID, token, tt.expected)
+	}
+}
+
+func TestSpanIDAndKindToToken(t *testing.T) {
+	tc := []struct {
+		spanID   []byte
+		expected uint64
+	}{
+		{
+			spanID: []byte{0x60, 0xd8, 0xa9, 0xbd},
+		},
+		{
+			spanID: []byte{0x8e, 0xf6, 0x37, 0x90, 0x22, 0x57, 0xb7, 0x43},
+		},
+		{
+			spanID: []byte{0x18, 0xcc, 0xd9, 0x6d, 0x70, 0xc1, 0xbd, 0xf9},
+		},
+		{
+			spanID: []byte{0x8e, 0xf6, 0x37, 0x90, 0x22, 0x57, 0xb7, 0x43, 0xff},
+		},
+	}
+
+	for _, tt := range tc {
+		tokenIDOnly := SpanIDToUint64(tt.spanID)
+		tokensForKind := map[uint64]struct{}{}
+
+		for kind := 0; kind < 8; kind++ {
+			token := SpanIDAndKindToToken(tt.spanID, kind)
+
+			_, exists := tokensForKind[token]
+			assert.False(t, exists, "token expected to be unique for different span kind")
+			assert.NotEqual(t, tokenIDOnly, token)
+			tokensForKind[token] = struct{}{}
+		}
+	}
+}
+
+var tokenToPreventOptimization uint64
+
+func BenchmarkSpanIDAndKindToToken(b *testing.B) {
+	type testDataSpanID struct {
+		SpanID []byte
+		Kind   int
+	}
+
+	randomTestCasesSpanID := func(n int, idLen int) []testDataSpanID {
+		testCases := make([]testDataSpanID, 0, n)
+		for i := 0; i < n; i++ {
+			id := make([]byte, idLen)
+			for j := range id {
+				id[j] = byte(rand.Intn(256))
+			}
+			testCases = append(testCases, testDataSpanID{SpanID: id, Kind: rand.Intn(6)})
+		}
+		return testCases
+	}
+
+	benchmarks := []struct {
+		name string
+		data []testDataSpanID
+	}{
+		{
+			name: "id length 4",
+			data: randomTestCasesSpanID(1_000, 4),
+		},
+		{
+			name: "id length 8",
+			data: randomTestCasesSpanID(1_000, 8),
+		},
+	}
+	for _, bc := range benchmarks {
+		b.Run(bc.name, func(b *testing.B) {
+			b.ResetTimer()
+			for i := 0; i < b.N; i++ {
+				d := bc.data[i%len(bc.data)]
+				tokenToPreventOptimization = SpanIDAndKindToToken(d.SpanID, d.Kind)
+			}
+			b.ReportAllocs()
+		})
+	}
+}
+
 func TestEqualHexStringTraceIDs(t *testing.T) {
 	a := "82f6471b46d25e23418a0a99d4c2cda"
 	b := "082f6471b46d25e23418a0a99d4c2cda"

@@ -2,34 +2,10 @@ package vparquet
 
 import (
 	"bytes"
-	"encoding/binary"
-	"hash"
-	"hash/fnv"
 	"sort"
-)
-
-// token is uint64 to reduce hash collision rates.  Experimentally, it was observed
-// that fnv32 could approach a collision rate of 1 in 10,000. fnv64 avoids collisions
-// when tested against traces with up to 1M spans (see matching test). A collision
-// results in a dropped span during combine.
-type token uint64
-
-func newHash() hash.Hash64 {
-	return fnv.New64()
-}
 
-// tokenForID returns a token for use in a hash map given a span id and span kind
-// buffer must be a 4 byte slice and is reused for writing the span kind to the hashing function
-// kind is used along with the actual id b/c in zipkin traces span id is not guaranteed to be unique
-// as it is shared between client and server spans.
-func tokenForID(h hash.Hash64, buffer []byte, kind int32, b []byte) token {
-	binary.LittleEndian.PutUint32(buffer, uint32(kind))
-
-	h.Reset()
-	_, _ = h.Write(b)
-	_, _ = h.Write(buffer)
-	return token(h.Sum64())
-}
+	"github.com/grafana/tempo/pkg/util"
+)
 
 func CombineTraces(traces ...*Trace) *Trace {
 	if len(traces) == 1 {
@@ -52,7 +28,7 @@ func CombineTraces(traces ...*Trace) *Trace {
 // * Don't scan/hash the spans for the last input (final=true).
 type Combiner struct {
 	result   *Trace
-	spans    map[token]struct{}
+	spans    map[uint64]struct{}
 	combined bool
 }
 
@@ -72,9 +48,6 @@ func (c *Combiner) ConsumeWithFinal(tr *Trace, final bool) (spanCount int) {
 		return
 	}
 
-	h := newHash()
-	buffer := make([]byte, 4)
-
 	// First call?
 	if c.result == nil {
 		c.result = tr
@@ -87,12 +60,12 @@ func (c *Combiner) ConsumeWithFinal(tr *Trace, final bool) (spanCount int) {
 				n += len(ils.Spans)
 			}
 		}
-		c.spans = make(map[token]struct{}, n)
+		c.spans = make(map[uint64]struct{}, n)
 
 		for _, b := range c.result.ResourceSpans {
 			for _, ils := range b.ScopeSpans {
 				for _, s := range ils.Spans {
-					c.spans[tokenForID(h, buffer, int32(s.Kind), s.ID)] = struct{}{}
+					c.spans[util.SpanIDAndKindToToken(s.ID, s.Kind)] = struct{}{}
 				}
 			}
 		}
@@ -122,7 +95,7 @@ func (c *Combiner) ConsumeWithFinal(tr *Trace, final bool) (spanCount int) {
 			notFoundSpans := ils.Spans[:0]
 			for _, s := range ils.Spans {
 				// if not already encountered, then keep
-				token := tokenForID(h, buffer, int32(s.Kind), s.ID)
+				token := util.SpanIDAndKindToToken(s.ID, s.Kind)
 				_, ok := c.spans[token]
 				if !ok {
 					notFoundSpans = append(notFoundSpans, s)

@@ -2,34 +2,10 @@ package vparquet2
 
 import (
 	"bytes"
-	"encoding/binary"
-	"hash"
-	"hash/fnv"
 	"sort"
-)
-
-// token is uint64 to reduce hash collision rates.  Experimentally, it was observed
-// that fnv32 could approach a collision rate of 1 in 10,000. fnv64 avoids collisions
-// when tested against traces with up to 1M spans (see matching test). A collision
-// results in a dropped span during combine.
-type token uint64
-
-func newHash() hash.Hash64 {
-	return fnv.New64()
-}
 
-// tokenForID returns a token for use in a hash map given a span id and span kind
-// buffer must be a 4 byte slice and is reused for writing the span kind to the hashing function
-// kind is used along with the actual id b/c in zipkin traces span id is not guaranteed to be unique
-// as it is shared between client and server spans.
-func tokenForID(h hash.Hash64, buffer []byte, kind int32, b []byte) token {
-	binary.LittleEndian.PutUint32(buffer, uint32(kind))
-
-	h.Reset()
-	_, _ = h.Write(b)
-	_, _ = h.Write(buffer)
-	return token(h.Sum64())
-}
+	"github.com/grafana/tempo/pkg/util"
+)
 
 func CombineTraces(traces ...*Trace) *Trace {
 	if len(traces) == 1 {
@@ -52,7 +28,7 @@ func CombineTraces(traces ...*Trace) *Trace {
 // * Don't scan/hash the spans for the last input (final=true).
 type Combiner struct {
 	result   *Trace
-	spans    map[token]struct{}
+	spans    map[uint64]struct{}
 	combined bool
 }
 
@@ -72,9 +48,6 @@ func (c *Combiner) ConsumeWithFinal(tr *Trace, final bool) (spanCount int) {
 		return
 	}
 
-	h := newHash()
-	buffer := make([]byte, 4)
-
 	// First call?
 	if c.result == nil {
 		c.result = tr
@@ -87,12 +60,12 @@ func (c *Combiner) ConsumeWithFinal(tr *Trace, final bool) (spanCount int) {
 				n += len(ils.Spans)
 			}
 		}
-		c.spans = make(map[token]struct{}, n)
+		c.spans = make(map[uint64]struct{}, n)
 
 		for _, b := range c.result.ResourceSpans {
 			for _, ils := range b.ScopeSpans {
 				for _, s := range ils.Spans {
-					c.spans[tokenForID(h, buffer, int32(s.Kind), s.SpanID)] = struct{}{}
+					c.spans[util.SpanIDAndKindToToken(s.SpanID, s.Kind)] = struct{}{}
 				}
 			}
 		}
@@ -122,7 +95,7 @@ func (c *Combiner) ConsumeWithFinal(tr *Trace, final bool) (spanCount int) {
 			notFoundSpans := ils.Spans[:0]
 			for _, s := range ils.Spans {
 				// if not already encountered, then keep
-				token := tokenForID(h, buffer, int32(s.Kind), s.SpanID)
+				token := util.SpanIDAndKindToToken(s.SpanID, s.Kind)
 				_, ok := c.spans[token]
 				if !ok {
 					notFoundSpans = append(notFoundSpans, s)
@@ -160,6 +133,7 @@ func (c *Combiner) Result() (*Trace, int) {
 	if c.result != nil && c.combined {
 		// Only if anything combined
 		SortTrace(c.result)
+		assignNestedSetModelBounds(c.result)
 		spanCount = len(c.spans)
 	}
 

@@ -135,8 +135,10 @@ func TestCombiner(t *testing.T) {
 							{
 								Spans: []Span{
 									{
-										SpanID:     []byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
-										StatusCode: 0,
+										SpanID:         []byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
+										StatusCode:     0,
+										NestedSetLeft:  1,
+										NestedSetRight: 2,
 									},
 								},
 							},
@@ -179,8 +181,10 @@ func TestCombiner(t *testing.T) {
 							{
 								Spans: []Span{
 									{
-										SpanID:     []byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
-										StatusCode: 0,
+										SpanID:         []byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
+										StatusCode:     0,
+										NestedSetLeft:  1,
+										NestedSetRight: 4,
 									},
 								},
 							},
@@ -194,9 +198,12 @@ func TestCombiner(t *testing.T) {
 							{
 								Spans: []Span{
 									{
-										SpanID:       []byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02},
-										ParentSpanID: []byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
-										StatusCode:   0,
+										SpanID:         []byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02},
+										ParentSpanID:   []byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
+										StatusCode:     0,
+										ParentID:       1,
+										NestedSetLeft:  2,
+										NestedSetRight: 3,
 									},
 								},
 							},