From 7560336c15b55901dc80e7fc11d2c8976b20967b Mon Sep 17 00:00:00 2001
From: Bertrand Mermet <bertrand.mermet@datadoghq.com>
Date: Wed, 25 Jul 2018 15:44:08 +0200
Subject: [PATCH 1/2] Force sampling of transactions with priority 2

Makes sure that when a span from a trace with a sampling priority of 2
matches the critera to be analyzed it is kept even if the rate based
sampling policy would discard it.
---
 cmd/trace-agent/agent.go                    | 11 ++++++--
 cmd/trace-agent/transaction_sampler.go      | 10 +++++--
 cmd/trace-agent/transaction_sampler_test.go | 31 ++++++++++++++-------
 sampler/prioritysampler.go                  |  5 ++--
 sampler/prioritysampler_test.go             | 14 +++++-----
 5 files changed, 47 insertions(+), 24 deletions(-)

diff --git a/cmd/trace-agent/agent.go b/cmd/trace-agent/agent.go
index ef666ab4b..6857d2f65 100644
--- a/cmd/trace-agent/agent.go
+++ b/cmd/trace-agent/agent.go
@@ -21,7 +21,6 @@ import (
 
 const (
 	processStatsInterval = time.Minute
-	samplingPriorityKey  = "_sampling_priority_v1"
 )
 
 type processedTrace struct {
@@ -39,6 +38,14 @@ func (pt *processedTrace) weight() float64 {
 	return pt.Root.Weight()
 }
 
+func (pt *processedTrace) getSamplingPriority() (int, bool) {
+	if pt.Root == nil {
+		return 0, false
+	}
+	priorityFloat, hasPriority := pt.Root.Metrics[sampler.SamplingPriorityKey]
+	return int(priorityFloat), hasPriority
+}
+
 // Agent struct holds all the sub-routines structs and make the data flow between them
 type Agent struct {
 	Receiver           *HTTPReceiver
@@ -193,7 +200,7 @@ func (a *Agent) Process(t model.Trace) {
 		samplers = append(samplers, a.ScoreSampler)
 	}
 
-	priority, hasPriority := root.Metrics[samplingPriorityKey]
+	priority, hasPriority := root.Metrics[sampler.SamplingPriorityKey]
 	if hasPriority {
 		// If Priority is defined, send to priority sampling, regardless of priority value.
 		// The sampler will keep or discard the trace, but we send everything so that it
diff --git a/cmd/trace-agent/transaction_sampler.go b/cmd/trace-agent/transaction_sampler.go
index 295e7b349..204ace254 100644
--- a/cmd/trace-agent/transaction_sampler.go
+++ b/cmd/trace-agent/transaction_sampler.go
@@ -43,9 +43,11 @@ func newTransactionSampler(analyzedSpansByService map[string]map[string]float64)
 func (s *transactionSampler) Extract(t processedTrace) []*model.Span {
 	var transactions []*model.Span
 
+	// Get the trace priority
+	priority, hasPriority := t.getSamplingPriority()
 	// inspect the WeightedTrace so that we can identify top-level spans
 	for _, span := range t.WeightedTrace {
-		if s.shouldAnalyze(span) {
+		if s.shouldAnalyze(span, hasPriority, priority) {
 			transactions = append(transactions, span.Span)
 		}
 	}
@@ -53,10 +55,12 @@ func (s *transactionSampler) Extract(t processedTrace) []*model.Span {
 	return transactions
 }
 
-func (s *transactionSampler) shouldAnalyze(span *model.WeightedSpan) bool {
+func (s *transactionSampler) shouldAnalyze(span *model.WeightedSpan, hasPriority bool, priority int) bool {
 	if operations, ok := s.analyzedSpansByService[span.Service]; ok {
 		if analyzeRate, ok := operations[span.Name]; ok {
-			if sampler.SampleByRate(span.TraceID, analyzeRate) {
+			// If the trace has been manually sampled, we keep all matching spans
+			highPriority := hasPriority && priority >= 2
+			if highPriority || sampler.SampleByRate(span.TraceID, analyzeRate) {
 				return true
 			}
 		}
diff --git a/cmd/trace-agent/transaction_sampler_test.go b/cmd/trace-agent/transaction_sampler_test.go
index d0bed201d..e0b24acf3 100644
--- a/cmd/trace-agent/transaction_sampler_test.go
+++ b/cmd/trace-agent/transaction_sampler_test.go
@@ -5,13 +5,18 @@ import (
 	"testing"
 
 	"github.com/DataDog/datadog-trace-agent/model"
+	"github.com/DataDog/datadog-trace-agent/sampler"
 	"github.com/stretchr/testify/assert"
 )
 
-func createTrace(serviceName string, operationName string, topLevel bool) processedTrace {
+func createTrace(serviceName string, operationName string, topLevel bool, hasPriority bool, priority int) processedTrace {
 	ws := model.WeightedSpan{TopLevel: topLevel, Span: &model.Span{Service: serviceName, Name: operationName}}
+	if hasPriority {
+		ws.Metrics = make(map[string]float64)
+		ws.Metrics[sampler.SamplingPriorityKey] = float64(priority)
+	}
 	wt := model.WeightedTrace{&ws}
-	return processedTrace{WeightedTrace: wt}
+	return processedTrace{WeightedTrace: wt, Root: ws.Span}
 }
 
 func TestTransactionSampler(t *testing.T) {
@@ -21,19 +26,25 @@ func TestTransactionSampler(t *testing.T) {
 	config["myService"] = make(map[string]float64)
 	config["myService"]["myOperation"] = 1
 
+	config["mySampledService"] = make(map[string]float64)
+	config["mySampledService"]["myOperation"] = 0
+
 	tests := []struct {
 		name             string
 		trace            processedTrace
 		expectedSampling bool
 	}{
-		{"Top-level service and span name match", createTrace("myService", "myOperation", true), true},
-		{"Top-level service name doesn't match", createTrace("otherService", "myOperation", true), false},
-		{"Top-level span name doesn't match", createTrace("myService", "otherOperation", true), false},
-		{"Top-level service and span name don't match", createTrace("otherService", "otherOperation", true), false},
-		{"Non top-level service and span name match", createTrace("myService", "myOperation", false), true},
-		{"Non top-level service name doesn't match", createTrace("otherService", "myOperation", false), false},
-		{"Non top-level span name doesn't match", createTrace("myService", "otherOperation", false), false},
-		{"Non top-level service and span name don't match", createTrace("otherService", "otherOperation", false), false},
+		{"Top-level service and span name match", createTrace("myService", "myOperation", true, false, 0), true},
+		{"Top-level service name doesn't match", createTrace("otherService", "myOperation", true, false, 0), false},
+		{"Top-level span name doesn't match", createTrace("myService", "otherOperation", true, false, 0), false},
+		{"Top-level service and span name don't match", createTrace("otherService", "otherOperation", true, false, 0), false},
+		{"Non top-level service and span name match", createTrace("myService", "myOperation", false, false, 0), true},
+		{"Non top-level service name doesn't match", createTrace("otherService", "myOperation", false, false, 0), false},
+		{"Non top-level span name doesn't match", createTrace("myService", "otherOperation", false, false, 0), false},
+		{"Non top-level service and span name don't match", createTrace("otherService", "otherOperation", false, false, 0), false},
+		{"Match, sampling rate 0, no priority", createTrace("mySampledService", "myOperation", true, false, 0), false},
+		{"Match, sampling rate 0, priority 1", createTrace("mySampledService", "myOperation", true, true, 1), false},
+		{"Match, sampling rate 0, priority 2", createTrace("mySampledService", "myOperation", true, true, 2), true},
 	}
 
 	for _, test := range tests {
diff --git a/sampler/prioritysampler.go b/sampler/prioritysampler.go
index ccea03cda..e84214ae4 100644
--- a/sampler/prioritysampler.go
+++ b/sampler/prioritysampler.go
@@ -22,7 +22,8 @@ import (
 )
 
 const (
-	samplingPriorityKey = "_sampling_priority_v1"
+	// SamplingPriorityKey is the key of the sampling priority value in the metrics dictionnary of the root span
+	SamplingPriorityKey = "_sampling_priority_v1"
 	syncPeriod          = 3 * time.Second
 )
 
@@ -91,7 +92,7 @@ func (s *PriorityEngine) Sample(trace model.Trace, root *model.Span, env string)
 		return false
 	}
 
-	samplingPriority := root.Metrics[samplingPriorityKey]
+	samplingPriority := root.Metrics[SamplingPriorityKey]
 
 	// Regardless of rates, sampling here is based on the metadata set
 	// by the client library. Which, is turn, is based on agent hints,
diff --git a/sampler/prioritysampler_test.go b/sampler/prioritysampler_test.go
index 492e0dfe4..40babed39 100644
--- a/sampler/prioritysampler_test.go
+++ b/sampler/prioritysampler_test.go
@@ -48,7 +48,7 @@ func getTestTraceWithService(t *testing.T, service string, s *PriorityEngine) (m
 	if r <= rate {
 		priority = 1
 	}
-	trace[0].Metrics = map[string]float64{samplingPriorityKey: priority}
+	trace[0].Metrics = map[string]float64{SamplingPriorityKey: priority}
 	return trace, trace[0]
 }
 
@@ -67,7 +67,7 @@ func TestPrioritySample(t *testing.T) {
 	s = getTestPriorityEngine()
 	trace, root = getTestTraceWithService(t, "my-service", s)
 
-	root.Metrics[samplingPriorityKey] = -1
+	root.Metrics[SamplingPriorityKey] = -1
 	assert.False(s.Sample(trace, root, env), "trace with negative priority is dropped")
 	assert.Equal(0.0, s.Sampler.Backend.GetTotalScore(), "sampling a priority -1 trace should *NOT* impact sampler backend")
 	assert.Equal(0.0, s.Sampler.Backend.GetSampledScore(), "sampling a priority -1 trace should *NOT* impact sampler backend")
@@ -75,7 +75,7 @@ func TestPrioritySample(t *testing.T) {
 	s = getTestPriorityEngine()
 	trace, root = getTestTraceWithService(t, "my-service", s)
 
-	root.Metrics[samplingPriorityKey] = 0
+	root.Metrics[SamplingPriorityKey] = 0
 	assert.False(s.Sample(trace, root, env), "trace with priority 0 is dropped")
 	assert.True(0.0 < s.Sampler.Backend.GetTotalScore(), "sampling a priority 0 trace should increase total score")
 	assert.Equal(0.0, s.Sampler.Backend.GetSampledScore(), "sampling a priority 0 trace should *NOT* increase sampled score")
@@ -83,7 +83,7 @@ func TestPrioritySample(t *testing.T) {
 	s = getTestPriorityEngine()
 	trace, root = getTestTraceWithService(t, "my-service", s)
 
-	root.Metrics[samplingPriorityKey] = 1
+	root.Metrics[SamplingPriorityKey] = 1
 	assert.True(s.Sample(trace, root, env), "trace with priority 1 is kept")
 	assert.True(0.0 < s.Sampler.Backend.GetTotalScore(), "sampling a priority 0 trace should increase total score")
 	assert.True(0.0 < s.Sampler.Backend.GetSampledScore(), "sampling a priority 0 trace should increase sampled score")
@@ -91,7 +91,7 @@ func TestPrioritySample(t *testing.T) {
 	s = getTestPriorityEngine()
 	trace, root = getTestTraceWithService(t, "my-service", s)
 
-	root.Metrics[samplingPriorityKey] = 2
+	root.Metrics[SamplingPriorityKey] = 2
 	assert.True(s.Sample(trace, root, env), "trace with priority 2 is kept")
 	assert.Equal(0.0, s.Sampler.Backend.GetTotalScore(), "sampling a priority 2 trace should *NOT* increase total score")
 	assert.Equal(0.0, s.Sampler.Backend.GetSampledScore(), "sampling a priority 2 trace should *NOT* increase sampled score")
@@ -99,12 +99,12 @@ func TestPrioritySample(t *testing.T) {
 	s = getTestPriorityEngine()
 	trace, root = getTestTraceWithService(t, "my-service", s)
 
-	root.Metrics[samplingPriorityKey] = 999
+	root.Metrics[SamplingPriorityKey] = 999
 	assert.True(s.Sample(trace, root, env), "trace with high priority is kept")
 	assert.Equal(0.0, s.Sampler.Backend.GetTotalScore(), "sampling a high priority trace should *NOT* increase total score")
 	assert.Equal(0.0, s.Sampler.Backend.GetSampledScore(), "sampling a high priority trace should *NOT* increase sampled score")
 
-	delete(root.Metrics, samplingPriorityKey)
+	delete(root.Metrics, SamplingPriorityKey)
 	assert.False(s.Sample(trace, root, env), "this should not happen but a trace without priority sampling set should be dropped")
 }
 

From f903d5f45272d77893ffc34e250ea40454ebe5a3 Mon Sep 17 00:00:00 2001
From: Bertrand Mermet <bertrand.mermet@datadoghq.com>
Date: Fri, 27 Jul 2018 18:50:53 +0200
Subject: [PATCH 2/2] Address comments

---
 cmd/trace-agent/agent.go   | 4 ++--
 sampler/prioritysampler.go | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/cmd/trace-agent/agent.go b/cmd/trace-agent/agent.go
index 6857d2f65..42c32d2fb 100644
--- a/cmd/trace-agent/agent.go
+++ b/cmd/trace-agent/agent.go
@@ -42,8 +42,8 @@ func (pt *processedTrace) getSamplingPriority() (int, bool) {
 	if pt.Root == nil {
 		return 0, false
 	}
-	priorityFloat, hasPriority := pt.Root.Metrics[sampler.SamplingPriorityKey]
-	return int(priorityFloat), hasPriority
+	p, ok := pt.Root.Metrics[sampler.SamplingPriorityKey]
+	return int(p), ok
 }
 
 // Agent struct holds all the sub-routines structs and make the data flow between them
diff --git a/sampler/prioritysampler.go b/sampler/prioritysampler.go
index e84214ae4..87b0d1029 100644
--- a/sampler/prioritysampler.go
+++ b/sampler/prioritysampler.go
@@ -22,7 +22,7 @@ import (
 )
 
 const (
-	// SamplingPriorityKey is the key of the sampling priority value in the metrics dictionnary of the root span
+	// SamplingPriorityKey is the key of the sampling priority value in the metrics map of the root span
 	SamplingPriorityKey = "_sampling_priority_v1"
 	syncPeriod          = 3 * time.Second
 )