Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fill parent ID column and nested set columns #2487

Merged
merged 23 commits into from
Jun 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
9a0a803
Add helper function to convert span IDs into uint64
stoewer May 11, 2023
eab5c68
Add function to assign nested set model bounds to traces
stoewer May 4, 2023
0da8134
Apply nested set model bounds to created and combined traces
stoewer May 12, 2023
e42060c
Unit test for traceToParquet function
stoewer May 16, 2023
964424f
Add PR to CHANGELOG
stoewer May 18, 2023
35183e2
Fix liter issues
stoewer May 18, 2023
54c2796
Small optimizations
stoewer May 18, 2023
df43352
Use [8]byte array instead of uint64 for IDs
stoewer May 18, 2023
4c9aece
Merge branch 'main' into fill-nested-set-model-columns
stoewer May 23, 2023
4252752
Force nested set bounds reassignment for combined traces
stoewer May 25, 2023
c6e3197
Merge branch 'main' into fill-nested-set-model-columns
stoewer May 25, 2023
2807b39
Safe method to check whether a span is a root span
stoewer May 25, 2023
af7e6b6
Add comments to explain algorithm
stoewer May 25, 2023
62023e2
Make stack implementation more generic and move it to pkg/util
stoewer May 25, 2023
b063eb7
Change the assignment algorithm to account for zipkin spans
stoewer May 26, 2023
1350a0a
Remove unuse stack implementation
stoewer May 29, 2023
10f68ac
Use the same span ID to token function for nested set values and comb…
stoewer May 29, 2023
2891818
Improve unit tests
stoewer May 29, 2023
6e0ba8f
Improve comments
stoewer May 29, 2023
2d971fd
Reformat code
stoewer May 29, 2023
ac38e8f
Remove unused parameter from unit test
stoewer May 31, 2023
3a6834d
Merge branch 'main' into fill-nested-set-model-columns
stoewer May 31, 2023
5e5ae4e
Remove check to skip the assignmnt (and the force assignment parameter)
stoewer Jun 1, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
## main / unreleased

* [CHANGE] Disable tempo-query by default in Jsonnet libs. [#2462](https://github.com/grafana/tempo/pull/2462) (@electron0zero)
* [ENHANCEMENT] Fill parent ID column and nested set columns [#2487](https://github.com/grafana/tempo/pull/2487) (@stoewer)
* [ENHANCEMENT] log client ip to help identify which client is no org id [#2436](https://github.com/grafana/tempo/pull/2436)
* [ENHANCEMENT] Add `spss` parameter to `/api/search/tags`[#2308] to configure the spans per span set in response
* [BUGFIX] Fix Search SLO by routing tags to a new handler. [#2468](https://github.com/grafana/tempo/issues/2468) (@electron0zero)
Expand Down
36 changes: 36 additions & 0 deletions pkg/util/traceid.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"errors"
"fmt"
"strings"
"unsafe"
)

func HexStringToTraceID(id string) ([]byte, error) {
Expand Down Expand Up @@ -59,6 +60,41 @@ func SpanIDToHexString(byteID []byte) string {
return fmt.Sprintf("%016s", id)
}

// spanKindFNVHashes contains pre-calculated FNV hashes for all span kind values (and two spares)
// defined in the OTEL spec.
var spanKindFNVHashes = [...]uint64{
0xa8c7f832281a39c5, // unspecified
0xe3757ca7d64666ea, // internal
0x1e23011d8472940f, // server
0x58d08593329ec134, // client
0x937e0a08e0caee59, // producer
0xce2b8e7e8ef71b7e, // consumer
0x8d912f43d2348a3, // spare 1
0x43869769eb4f75c8, // spare 2
}

// SpanIDAndKindToToken converts a span ID into a token for use as key in a hash map. The token is generated such
// that it has a low collision probability. In zipkin traces the span id is not guaranteed to be unique as it
// is shared between client and server spans. Therefore, it is sometimes required to take the span kind into account.
func SpanIDAndKindToToken(id []byte, kind int) uint64 {
return SpanIDToUint64(id) ^ spanKindFNVHashes[kind]
}

// SpanIDToUint64 converts a span ID into an uint64 representation. This is useful when using a span ID as key
// in a map. If the ID is longer than 8 bytes, the bytes at larger positions are discarded. The function does
// not make any guarantees about the endianess or ordering of converted IDs.
//
// Note: span IDs are not always unique within a trace (e.g. zipkin traces) SpanIDAndKindToToken could be more
// appropriate in some cases.
func SpanIDToUint64(id []byte) uint64 {
if len(id) < 8 {
var idArray [8]byte
copy(idArray[:], id)
return *(*uint64)(unsafe.Pointer(&idArray[0]))
}
return *(*uint64)(unsafe.Pointer(&id[0]))
}

// EqualHexStringTraceIDs compares two trace ID strings and compares the
// resulting bytes after padding. Returns true unless there is a reason not
// to.
Expand Down
109 changes: 109 additions & 0 deletions pkg/util/traceid_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package util

import (
"errors"
"math/rand"
"testing"

"github.com/stretchr/testify/assert"
Expand Down Expand Up @@ -129,6 +130,114 @@ func TestSpanIDToHexString(t *testing.T) {
}
}

func TestSpanIDToUint64(t *testing.T) {
tc := []struct {
spanID []byte
expected uint64
}{
{
spanID: []byte{0x60, 0xd8, 0xa9, 0xbd},
expected: 0xbd_a9_d8_60,
},
{
spanID: []byte{0x8e, 0xf6, 0x37, 0x90, 0x22, 0x57, 0xb7, 0x43},
expected: 0x43_b7_57_22_90_37_f6_8e,
},
{
spanID: []byte{0x18, 0xcc, 0xd9, 0x6d, 0x70, 0xc1, 0xbd, 0xf9},
expected: 0xf9_bd_c1_70_6d_d9_cc_18,
},
{
spanID: []byte{0x8e, 0xf6, 0x37, 0x90, 0x22, 0x57, 0xb7, 0x43, 0xff},
expected: 0x43_b7_57_22_90_37_f6_8e,
},
}

for _, tt := range tc {
token := SpanIDToUint64(tt.spanID)
assert.Equalf(t, tt.expected, token, "SpanIDToToken(%v) reurned 0x%x but 0x%x was expected", tt.spanID, token, tt.expected)
}
}

func TestSpanIDAndKindToToken(t *testing.T) {
tc := []struct {
spanID []byte
expected uint64
}{
{
spanID: []byte{0x60, 0xd8, 0xa9, 0xbd},
},
{
spanID: []byte{0x8e, 0xf6, 0x37, 0x90, 0x22, 0x57, 0xb7, 0x43},
},
{
spanID: []byte{0x18, 0xcc, 0xd9, 0x6d, 0x70, 0xc1, 0xbd, 0xf9},
},
{
spanID: []byte{0x8e, 0xf6, 0x37, 0x90, 0x22, 0x57, 0xb7, 0x43, 0xff},
},
}

for _, tt := range tc {
tokenIDOnly := SpanIDToUint64(tt.spanID)
tokensForKind := map[uint64]struct{}{}

for kind := 0; kind < 8; kind++ {
token := SpanIDAndKindToToken(tt.spanID, kind)

_, exists := tokensForKind[token]
assert.False(t, exists, "token expected to be unique for different span kind")
assert.NotEqual(t, tokenIDOnly, token)
tokensForKind[token] = struct{}{}
}
}
}

var tokenToPreventOptimization uint64

func BenchmarkSpanIDAndKindToToken(b *testing.B) {
type testDataSpanID struct {
SpanID []byte
Kind int
}

randomTestCasesSpanID := func(n int, idLen int) []testDataSpanID {
testCases := make([]testDataSpanID, 0, n)
for i := 0; i < n; i++ {
id := make([]byte, idLen)
for j := range id {
id[j] = byte(rand.Intn(256))
}
testCases = append(testCases, testDataSpanID{SpanID: id, Kind: rand.Intn(6)})
}
return testCases
}

benchmarks := []struct {
name string
data []testDataSpanID
}{
{
name: "id length 4",
data: randomTestCasesSpanID(1_000, 4),
},
{
name: "id length 8",
data: randomTestCasesSpanID(1_000, 8),
},
}
for _, bc := range benchmarks {
b.Run(bc.name, func(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
d := bc.data[i%len(bc.data)]
tokenToPreventOptimization = SpanIDAndKindToToken(d.SpanID, d.Kind)
}
b.ReportAllocs()
})
}
}

func TestEqualHexStringTraceIDs(t *testing.T) {
a := "82f6471b46d25e23418a0a99d4c2cda"
b := "082f6471b46d25e23418a0a99d4c2cda"
Expand Down
39 changes: 6 additions & 33 deletions tempodb/encoding/vparquet/combiner.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,34 +2,10 @@ package vparquet

import (
"bytes"
"encoding/binary"
"hash"
"hash/fnv"
"sort"
)

// token is uint64 to reduce hash collision rates. Experimentally, it was observed
// that fnv32 could approach a collision rate of 1 in 10,000. fnv64 avoids collisions
// when tested against traces with up to 1M spans (see matching test). A collision
// results in a dropped span during combine.
type token uint64

func newHash() hash.Hash64 {
return fnv.New64()
}

// tokenForID returns a token for use in a hash map given a span id and span kind
// buffer must be a 4 byte slice and is reused for writing the span kind to the hashing function
// kind is used along with the actual id b/c in zipkin traces span id is not guaranteed to be unique
// as it is shared between client and server spans.
func tokenForID(h hash.Hash64, buffer []byte, kind int32, b []byte) token {
binary.LittleEndian.PutUint32(buffer, uint32(kind))

h.Reset()
_, _ = h.Write(b)
_, _ = h.Write(buffer)
return token(h.Sum64())
}
"github.com/grafana/tempo/pkg/util"
)

func CombineTraces(traces ...*Trace) *Trace {
if len(traces) == 1 {
Expand All @@ -52,7 +28,7 @@ func CombineTraces(traces ...*Trace) *Trace {
// * Don't scan/hash the spans for the last input (final=true).
type Combiner struct {
result *Trace
spans map[token]struct{}
spans map[uint64]struct{}
combined bool
}

Expand All @@ -72,9 +48,6 @@ func (c *Combiner) ConsumeWithFinal(tr *Trace, final bool) (spanCount int) {
return
}

h := newHash()
buffer := make([]byte, 4)

// First call?
if c.result == nil {
c.result = tr
Expand All @@ -87,12 +60,12 @@ func (c *Combiner) ConsumeWithFinal(tr *Trace, final bool) (spanCount int) {
n += len(ils.Spans)
}
}
c.spans = make(map[token]struct{}, n)
c.spans = make(map[uint64]struct{}, n)

for _, b := range c.result.ResourceSpans {
for _, ils := range b.ScopeSpans {
for _, s := range ils.Spans {
c.spans[tokenForID(h, buffer, int32(s.Kind), s.ID)] = struct{}{}
c.spans[util.SpanIDAndKindToToken(s.ID, s.Kind)] = struct{}{}
}
}
}
Expand Down Expand Up @@ -122,7 +95,7 @@ func (c *Combiner) ConsumeWithFinal(tr *Trace, final bool) (spanCount int) {
notFoundSpans := ils.Spans[:0]
for _, s := range ils.Spans {
// if not already encountered, then keep
token := tokenForID(h, buffer, int32(s.Kind), s.ID)
token := util.SpanIDAndKindToToken(s.ID, s.Kind)
_, ok := c.spans[token]
if !ok {
notFoundSpans = append(notFoundSpans, s)
Expand Down
40 changes: 7 additions & 33 deletions tempodb/encoding/vparquet2/combiner.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,34 +2,10 @@ package vparquet2

import (
"bytes"
"encoding/binary"
"hash"
"hash/fnv"
"sort"
)

// token is uint64 to reduce hash collision rates. Experimentally, it was observed
// that fnv32 could approach a collision rate of 1 in 10,000. fnv64 avoids collisions
// when tested against traces with up to 1M spans (see matching test). A collision
// results in a dropped span during combine.
type token uint64

func newHash() hash.Hash64 {
return fnv.New64()
}

// tokenForID returns a token for use in a hash map given a span id and span kind
// buffer must be a 4 byte slice and is reused for writing the span kind to the hashing function
// kind is used along with the actual id b/c in zipkin traces span id is not guaranteed to be unique
// as it is shared between client and server spans.
func tokenForID(h hash.Hash64, buffer []byte, kind int32, b []byte) token {
binary.LittleEndian.PutUint32(buffer, uint32(kind))

h.Reset()
_, _ = h.Write(b)
_, _ = h.Write(buffer)
return token(h.Sum64())
}
"github.com/grafana/tempo/pkg/util"
)

func CombineTraces(traces ...*Trace) *Trace {
if len(traces) == 1 {
Expand All @@ -52,7 +28,7 @@ func CombineTraces(traces ...*Trace) *Trace {
// * Don't scan/hash the spans for the last input (final=true).
type Combiner struct {
result *Trace
spans map[token]struct{}
spans map[uint64]struct{}
combined bool
}

Expand All @@ -72,9 +48,6 @@ func (c *Combiner) ConsumeWithFinal(tr *Trace, final bool) (spanCount int) {
return
}

h := newHash()
buffer := make([]byte, 4)

// First call?
if c.result == nil {
c.result = tr
Expand All @@ -87,12 +60,12 @@ func (c *Combiner) ConsumeWithFinal(tr *Trace, final bool) (spanCount int) {
n += len(ils.Spans)
}
}
c.spans = make(map[token]struct{}, n)
c.spans = make(map[uint64]struct{}, n)

for _, b := range c.result.ResourceSpans {
for _, ils := range b.ScopeSpans {
for _, s := range ils.Spans {
c.spans[tokenForID(h, buffer, int32(s.Kind), s.SpanID)] = struct{}{}
c.spans[util.SpanIDAndKindToToken(s.SpanID, s.Kind)] = struct{}{}
}
}
}
Expand Down Expand Up @@ -122,7 +95,7 @@ func (c *Combiner) ConsumeWithFinal(tr *Trace, final bool) (spanCount int) {
notFoundSpans := ils.Spans[:0]
for _, s := range ils.Spans {
// if not already encountered, then keep
token := tokenForID(h, buffer, int32(s.Kind), s.SpanID)
token := util.SpanIDAndKindToToken(s.SpanID, s.Kind)
_, ok := c.spans[token]
if !ok {
notFoundSpans = append(notFoundSpans, s)
Expand Down Expand Up @@ -160,6 +133,7 @@ func (c *Combiner) Result() (*Trace, int) {
if c.result != nil && c.combined {
// Only if anything combined
SortTrace(c.result)
assignNestedSetModelBounds(c.result)
spanCount = len(c.spans)
}

Expand Down
21 changes: 14 additions & 7 deletions tempodb/encoding/vparquet2/combiner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,10 @@ func TestCombiner(t *testing.T) {
{
Spans: []Span{
{
SpanID: []byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
StatusCode: 0,
SpanID: []byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
StatusCode: 0,
NestedSetLeft: 1,
NestedSetRight: 2,
},
},
},
Expand Down Expand Up @@ -179,8 +181,10 @@ func TestCombiner(t *testing.T) {
{
Spans: []Span{
{
SpanID: []byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
StatusCode: 0,
SpanID: []byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
StatusCode: 0,
NestedSetLeft: 1,
NestedSetRight: 4,
},
},
},
Expand All @@ -194,9 +198,12 @@ func TestCombiner(t *testing.T) {
{
Spans: []Span{
{
SpanID: []byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02},
ParentSpanID: []byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
StatusCode: 0,
SpanID: []byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02},
ParentSpanID: []byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
StatusCode: 0,
ParentID: 1,
NestedSetLeft: 2,
NestedSetRight: 3,
},
},
},
Expand Down
Loading