From 7560336c15b55901dc80e7fc11d2c8976b20967b Mon Sep 17 00:00:00 2001 From: Bertrand Mermet Date: Wed, 25 Jul 2018 15:44:08 +0200 Subject: [PATCH 1/2] Force sampling of transactions with priority 2 Makes sure that when a span from a trace with a sampling priority of 2 matches the critera to be analyzed it is kept even if the rate based sampling policy would discard it. --- cmd/trace-agent/agent.go | 11 ++++++-- cmd/trace-agent/transaction_sampler.go | 10 +++++-- cmd/trace-agent/transaction_sampler_test.go | 31 ++++++++++++++------- sampler/prioritysampler.go | 5 ++-- sampler/prioritysampler_test.go | 14 +++++----- 5 files changed, 47 insertions(+), 24 deletions(-) diff --git a/cmd/trace-agent/agent.go b/cmd/trace-agent/agent.go index ef666ab4b..6857d2f65 100644 --- a/cmd/trace-agent/agent.go +++ b/cmd/trace-agent/agent.go @@ -21,7 +21,6 @@ import ( const ( processStatsInterval = time.Minute - samplingPriorityKey = "_sampling_priority_v1" ) type processedTrace struct { @@ -39,6 +38,14 @@ func (pt *processedTrace) weight() float64 { return pt.Root.Weight() } +func (pt *processedTrace) getSamplingPriority() (int, bool) { + if pt.Root == nil { + return 0, false + } + priorityFloat, hasPriority := pt.Root.Metrics[sampler.SamplingPriorityKey] + return int(priorityFloat), hasPriority +} + // Agent struct holds all the sub-routines structs and make the data flow between them type Agent struct { Receiver *HTTPReceiver @@ -193,7 +200,7 @@ func (a *Agent) Process(t model.Trace) { samplers = append(samplers, a.ScoreSampler) } - priority, hasPriority := root.Metrics[samplingPriorityKey] + priority, hasPriority := root.Metrics[sampler.SamplingPriorityKey] if hasPriority { // If Priority is defined, send to priority sampling, regardless of priority value. // The sampler will keep or discard the trace, but we send everything so that it diff --git a/cmd/trace-agent/transaction_sampler.go b/cmd/trace-agent/transaction_sampler.go index 295e7b349..204ace254 100644 --- a/cmd/trace-agent/transaction_sampler.go +++ b/cmd/trace-agent/transaction_sampler.go @@ -43,9 +43,11 @@ func newTransactionSampler(analyzedSpansByService map[string]map[string]float64) func (s *transactionSampler) Extract(t processedTrace) []*model.Span { var transactions []*model.Span + // Get the trace priority + priority, hasPriority := t.getSamplingPriority() // inspect the WeightedTrace so that we can identify top-level spans for _, span := range t.WeightedTrace { - if s.shouldAnalyze(span) { + if s.shouldAnalyze(span, hasPriority, priority) { transactions = append(transactions, span.Span) } } @@ -53,10 +55,12 @@ func (s *transactionSampler) Extract(t processedTrace) []*model.Span { return transactions } -func (s *transactionSampler) shouldAnalyze(span *model.WeightedSpan) bool { +func (s *transactionSampler) shouldAnalyze(span *model.WeightedSpan, hasPriority bool, priority int) bool { if operations, ok := s.analyzedSpansByService[span.Service]; ok { if analyzeRate, ok := operations[span.Name]; ok { - if sampler.SampleByRate(span.TraceID, analyzeRate) { + // If the trace has been manually sampled, we keep all matching spans + highPriority := hasPriority && priority >= 2 + if highPriority || sampler.SampleByRate(span.TraceID, analyzeRate) { return true } } diff --git a/cmd/trace-agent/transaction_sampler_test.go b/cmd/trace-agent/transaction_sampler_test.go index d0bed201d..e0b24acf3 100644 --- a/cmd/trace-agent/transaction_sampler_test.go +++ b/cmd/trace-agent/transaction_sampler_test.go @@ -5,13 +5,18 @@ import ( "testing" "github.com/DataDog/datadog-trace-agent/model" + "github.com/DataDog/datadog-trace-agent/sampler" "github.com/stretchr/testify/assert" ) -func createTrace(serviceName string, operationName string, topLevel bool) processedTrace { +func createTrace(serviceName string, operationName string, topLevel bool, hasPriority bool, priority int) processedTrace { ws := model.WeightedSpan{TopLevel: topLevel, Span: &model.Span{Service: serviceName, Name: operationName}} + if hasPriority { + ws.Metrics = make(map[string]float64) + ws.Metrics[sampler.SamplingPriorityKey] = float64(priority) + } wt := model.WeightedTrace{&ws} - return processedTrace{WeightedTrace: wt} + return processedTrace{WeightedTrace: wt, Root: ws.Span} } func TestTransactionSampler(t *testing.T) { @@ -21,19 +26,25 @@ func TestTransactionSampler(t *testing.T) { config["myService"] = make(map[string]float64) config["myService"]["myOperation"] = 1 + config["mySampledService"] = make(map[string]float64) + config["mySampledService"]["myOperation"] = 0 + tests := []struct { name string trace processedTrace expectedSampling bool }{ - {"Top-level service and span name match", createTrace("myService", "myOperation", true), true}, - {"Top-level service name doesn't match", createTrace("otherService", "myOperation", true), false}, - {"Top-level span name doesn't match", createTrace("myService", "otherOperation", true), false}, - {"Top-level service and span name don't match", createTrace("otherService", "otherOperation", true), false}, - {"Non top-level service and span name match", createTrace("myService", "myOperation", false), true}, - {"Non top-level service name doesn't match", createTrace("otherService", "myOperation", false), false}, - {"Non top-level span name doesn't match", createTrace("myService", "otherOperation", false), false}, - {"Non top-level service and span name don't match", createTrace("otherService", "otherOperation", false), false}, + {"Top-level service and span name match", createTrace("myService", "myOperation", true, false, 0), true}, + {"Top-level service name doesn't match", createTrace("otherService", "myOperation", true, false, 0), false}, + {"Top-level span name doesn't match", createTrace("myService", "otherOperation", true, false, 0), false}, + {"Top-level service and span name don't match", createTrace("otherService", "otherOperation", true, false, 0), false}, + {"Non top-level service and span name match", createTrace("myService", "myOperation", false, false, 0), true}, + {"Non top-level service name doesn't match", createTrace("otherService", "myOperation", false, false, 0), false}, + {"Non top-level span name doesn't match", createTrace("myService", "otherOperation", false, false, 0), false}, + {"Non top-level service and span name don't match", createTrace("otherService", "otherOperation", false, false, 0), false}, + {"Match, sampling rate 0, no priority", createTrace("mySampledService", "myOperation", true, false, 0), false}, + {"Match, sampling rate 0, priority 1", createTrace("mySampledService", "myOperation", true, true, 1), false}, + {"Match, sampling rate 0, priority 2", createTrace("mySampledService", "myOperation", true, true, 2), true}, } for _, test := range tests { diff --git a/sampler/prioritysampler.go b/sampler/prioritysampler.go index ccea03cda..e84214ae4 100644 --- a/sampler/prioritysampler.go +++ b/sampler/prioritysampler.go @@ -22,7 +22,8 @@ import ( ) const ( - samplingPriorityKey = "_sampling_priority_v1" + // SamplingPriorityKey is the key of the sampling priority value in the metrics dictionnary of the root span + SamplingPriorityKey = "_sampling_priority_v1" syncPeriod = 3 * time.Second ) @@ -91,7 +92,7 @@ func (s *PriorityEngine) Sample(trace model.Trace, root *model.Span, env string) return false } - samplingPriority := root.Metrics[samplingPriorityKey] + samplingPriority := root.Metrics[SamplingPriorityKey] // Regardless of rates, sampling here is based on the metadata set // by the client library. Which, is turn, is based on agent hints, diff --git a/sampler/prioritysampler_test.go b/sampler/prioritysampler_test.go index 492e0dfe4..40babed39 100644 --- a/sampler/prioritysampler_test.go +++ b/sampler/prioritysampler_test.go @@ -48,7 +48,7 @@ func getTestTraceWithService(t *testing.T, service string, s *PriorityEngine) (m if r <= rate { priority = 1 } - trace[0].Metrics = map[string]float64{samplingPriorityKey: priority} + trace[0].Metrics = map[string]float64{SamplingPriorityKey: priority} return trace, trace[0] } @@ -67,7 +67,7 @@ func TestPrioritySample(t *testing.T) { s = getTestPriorityEngine() trace, root = getTestTraceWithService(t, "my-service", s) - root.Metrics[samplingPriorityKey] = -1 + root.Metrics[SamplingPriorityKey] = -1 assert.False(s.Sample(trace, root, env), "trace with negative priority is dropped") assert.Equal(0.0, s.Sampler.Backend.GetTotalScore(), "sampling a priority -1 trace should *NOT* impact sampler backend") assert.Equal(0.0, s.Sampler.Backend.GetSampledScore(), "sampling a priority -1 trace should *NOT* impact sampler backend") @@ -75,7 +75,7 @@ func TestPrioritySample(t *testing.T) { s = getTestPriorityEngine() trace, root = getTestTraceWithService(t, "my-service", s) - root.Metrics[samplingPriorityKey] = 0 + root.Metrics[SamplingPriorityKey] = 0 assert.False(s.Sample(trace, root, env), "trace with priority 0 is dropped") assert.True(0.0 < s.Sampler.Backend.GetTotalScore(), "sampling a priority 0 trace should increase total score") assert.Equal(0.0, s.Sampler.Backend.GetSampledScore(), "sampling a priority 0 trace should *NOT* increase sampled score") @@ -83,7 +83,7 @@ func TestPrioritySample(t *testing.T) { s = getTestPriorityEngine() trace, root = getTestTraceWithService(t, "my-service", s) - root.Metrics[samplingPriorityKey] = 1 + root.Metrics[SamplingPriorityKey] = 1 assert.True(s.Sample(trace, root, env), "trace with priority 1 is kept") assert.True(0.0 < s.Sampler.Backend.GetTotalScore(), "sampling a priority 0 trace should increase total score") assert.True(0.0 < s.Sampler.Backend.GetSampledScore(), "sampling a priority 0 trace should increase sampled score") @@ -91,7 +91,7 @@ func TestPrioritySample(t *testing.T) { s = getTestPriorityEngine() trace, root = getTestTraceWithService(t, "my-service", s) - root.Metrics[samplingPriorityKey] = 2 + root.Metrics[SamplingPriorityKey] = 2 assert.True(s.Sample(trace, root, env), "trace with priority 2 is kept") assert.Equal(0.0, s.Sampler.Backend.GetTotalScore(), "sampling a priority 2 trace should *NOT* increase total score") assert.Equal(0.0, s.Sampler.Backend.GetSampledScore(), "sampling a priority 2 trace should *NOT* increase sampled score") @@ -99,12 +99,12 @@ func TestPrioritySample(t *testing.T) { s = getTestPriorityEngine() trace, root = getTestTraceWithService(t, "my-service", s) - root.Metrics[samplingPriorityKey] = 999 + root.Metrics[SamplingPriorityKey] = 999 assert.True(s.Sample(trace, root, env), "trace with high priority is kept") assert.Equal(0.0, s.Sampler.Backend.GetTotalScore(), "sampling a high priority trace should *NOT* increase total score") assert.Equal(0.0, s.Sampler.Backend.GetSampledScore(), "sampling a high priority trace should *NOT* increase sampled score") - delete(root.Metrics, samplingPriorityKey) + delete(root.Metrics, SamplingPriorityKey) assert.False(s.Sample(trace, root, env), "this should not happen but a trace without priority sampling set should be dropped") } From f903d5f45272d77893ffc34e250ea40454ebe5a3 Mon Sep 17 00:00:00 2001 From: Bertrand Mermet Date: Fri, 27 Jul 2018 18:50:53 +0200 Subject: [PATCH 2/2] Address comments --- cmd/trace-agent/agent.go | 4 ++-- sampler/prioritysampler.go | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cmd/trace-agent/agent.go b/cmd/trace-agent/agent.go index 6857d2f65..42c32d2fb 100644 --- a/cmd/trace-agent/agent.go +++ b/cmd/trace-agent/agent.go @@ -42,8 +42,8 @@ func (pt *processedTrace) getSamplingPriority() (int, bool) { if pt.Root == nil { return 0, false } - priorityFloat, hasPriority := pt.Root.Metrics[sampler.SamplingPriorityKey] - return int(priorityFloat), hasPriority + p, ok := pt.Root.Metrics[sampler.SamplingPriorityKey] + return int(p), ok } // Agent struct holds all the sub-routines structs and make the data flow between them diff --git a/sampler/prioritysampler.go b/sampler/prioritysampler.go index e84214ae4..87b0d1029 100644 --- a/sampler/prioritysampler.go +++ b/sampler/prioritysampler.go @@ -22,7 +22,7 @@ import ( ) const ( - // SamplingPriorityKey is the key of the sampling priority value in the metrics dictionnary of the root span + // SamplingPriorityKey is the key of the sampling priority value in the metrics map of the root span SamplingPriorityKey = "_sampling_priority_v1" syncPeriod = 3 * time.Second )