From 75c993cb402cb66dd75f3a3050f5cd784197cb75 Mon Sep 17 00:00:00 2001 From: Yang Song Date: Tue, 24 Sep 2024 11:04:52 -0400 Subject: [PATCH 1/2] [connector/datadog] Support obfuscating sql queries in APM stats --- .chloggen/dd-conn-obfuscate.yaml | 27 +++++++ .../datadogconnector/connector_native.go | 39 +++++++++- .../datadogconnector/connector_native_test.go | 74 +++++++++++++++++++ connector/datadogconnector/go.mod | 2 +- 4 files changed, 139 insertions(+), 3 deletions(-) create mode 100644 .chloggen/dd-conn-obfuscate.yaml diff --git a/.chloggen/dd-conn-obfuscate.yaml b/.chloggen/dd-conn-obfuscate.yaml new file mode 100644 index 000000000000..11a4871eabc8 --- /dev/null +++ b/.chloggen/dd-conn-obfuscate.yaml @@ -0,0 +1,27 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: enhancement + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: datadogconnector + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Support obfuscating sql queries in APM stats + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [] diff --git a/connector/datadogconnector/connector_native.go b/connector/datadogconnector/connector_native.go index a78f410845b1..5a80f54a1fbf 100644 --- a/connector/datadogconnector/connector_native.go +++ b/connector/datadogconnector/connector_native.go @@ -9,6 +9,7 @@ import ( "time" "github.com/DataDog/datadog-agent/comp/otelcol/otlp/components/statsprocessor" + "github.com/DataDog/datadog-agent/pkg/obfuscate" pb "github.com/DataDog/datadog-agent/pkg/proto/pbgo/trace" "github.com/DataDog/datadog-agent/pkg/trace/config" "github.com/DataDog/datadog-agent/pkg/trace/stats" @@ -48,6 +49,10 @@ type traceToMetricConnectorNative struct { // resulting from ingested traces. statsout chan *pb.StatsPayload + // obfuscator is used to obfuscate sensitive data from various span + // tags based on their type. + obfuscator *obfuscate.Obfuscator + // exit specifies the exit channel, which will be closed upon shutdown. exit chan struct{} @@ -73,6 +78,8 @@ func newTraceToMetricConnectorNative(set component.TelemetrySettings, cfg compon } tcfg := getTraceAgentCfg(set.Logger, cfg.(*Config).Traces, attributesTranslator) + oconf := tcfg.Obfuscation.Export(tcfg) + oconf.Statsd = metricsClient return &traceToMetricConnectorNative{ logger: set.Logger, translator: trans, @@ -82,6 +89,7 @@ func newTraceToMetricConnectorNative(set component.TelemetrySettings, cfg compon concentrator: stats.NewConcentrator(tcfg, statsWriter, time.Now(), metricsClient), statsout: statsout, metricsConsumer: metricsConsumer, + obfuscator: obfuscate.NewObfuscator(oconf), exit: make(chan struct{}), }, nil } @@ -103,8 +111,9 @@ func (c *traceToMetricConnectorNative) Shutdown(context.Context) error { return nil } c.logger.Info("Shutting down datadog connector") - c.logger.Info("Stopping concentrator") - // stop the concentrator and wait for the run loop to exit + c.logger.Info("Stopping obfuscator and concentrator") + // stop the obfuscator and concentrator and wait for the run loop to exit + c.obfuscator.Stop() c.concentrator.Stop() c.exit <- struct{}{} // signal exit <-c.exit // wait for close @@ -135,6 +144,15 @@ func (c *traceToMetricConnectorNative) run() { if len(stats.Stats) == 0 { continue } + + for _, csp := range stats.Stats { + for _, group := range csp.Stats { + for _, b := range group.Stats { + c.obfuscateStatsGroup(b) + } + } + } + var mx pmetric.Metrics var err error @@ -158,3 +176,20 @@ func (c *traceToMetricConnectorNative) run() { } } } + +// fork of https://github.com/DataDog/datadog-agent/blob/7642cf1aa659f82744038602044a8a00aa1a0dfb/pkg/trace/agent/obfuscate.go#L109 +func (c *traceToMetricConnectorNative) obfuscateStatsGroup(b *pb.ClientGroupedStats) { + o := c.obfuscator + switch b.Type { + case "sql", "cassandra": + oq, err := o.ObfuscateSQLString(b.Resource) + if err != nil { + c.logger.Error(fmt.Sprintf("Error obfuscating stats group resource %q: %v", b.Resource, err)) + b.Resource = "Non-parsable SQL query" + } else { + b.Resource = oq.Query + } + case "redis": + b.Resource = o.QuantizeRedisString(b.Resource) + } +} diff --git a/connector/datadogconnector/connector_native_test.go b/connector/datadogconnector/connector_native_test.go index 991565941eaa..4ea743365bd5 100644 --- a/connector/datadogconnector/connector_native_test.go +++ b/connector/datadogconnector/connector_native_test.go @@ -249,3 +249,77 @@ func TestMeasuredAndClientKindNative(t *testing.T) { t.Errorf("Diff between APM stats -want +got:\n%v", diff) } } + +func TestObfuscate(t *testing.T) { + cfg := NewFactory().CreateDefaultConfig().(*Config) + cfg.Traces.BucketInterval = time.Second + connector, metricsSink := creteConnectorNativeWithCfg(t, cfg) + err := connector.Start(context.Background(), componenttest.NewNopHost()) + require.NoError(t, err) + defer func() { + require.NoError(t, connector.Shutdown(context.Background())) + }() + + td := ptrace.NewTraces() + res := td.ResourceSpans().AppendEmpty().Resource() + res.Attributes().PutStr("service.name", "svc") + res.Attributes().PutStr(conventions127.AttributeDeploymentEnvironmentName, "my-env") + + ss := td.ResourceSpans().At(0).ScopeSpans().AppendEmpty().Spans() + s := ss.AppendEmpty() + s.SetName("name") + s.SetKind(ptrace.SpanKindClient) + s.SetTraceID(testTraceID) + s.SetSpanID(testSpanID1) + s.Attributes().PutStr("span.type", "sql") + s.Attributes().PutStr("operation.name", "sql_query") + s.Attributes().PutStr("resource.name", "SELECT username FROM users WHERE id = 123") // id value 123 should be obfuscated + + err = connector.ConsumeTraces(context.Background(), td) + require.NoError(t, err) + + timeout := time.Now().Add(1 * time.Minute) + for time.Now().Before(timeout) { + if len(metricsSink.AllMetrics()) > 0 { + break + } + time.Sleep(100 * time.Millisecond) + } + + metrics := metricsSink.AllMetrics() + require.Len(t, metrics, 1) + + ch := make(chan []byte, 100) + tr := newTranslatorWithStatsChannel(t, zap.NewNop(), ch) + _, err = tr.MapMetrics(context.Background(), metrics[0], nil) + require.NoError(t, err) + msg := <-ch + sp := &pb.StatsPayload{} + + err = proto.Unmarshal(msg, sp) + require.NoError(t, err) + assert.Len(t, sp.Stats, 1) + assert.Len(t, sp.Stats[0].Stats, 1) + assert.Equal(t, "my-env", sp.Stats[0].Env) + assert.Len(t, sp.Stats[0].Stats[0].Stats, 1) + cgss := sp.Stats[0].Stats[0].Stats + expected := []*pb.ClientGroupedStats{ + { + Service: "svc", + Name: "sql_query", + Resource: "SELECT username FROM users WHERE id = ?", + Type: "sql", + Hits: 1, + TopLevelHits: 1, + SpanKind: "client", + IsTraceRoot: pb.Trilean_TRUE, + }, + } + if diff := cmp.Diff( + cgss, + expected, + protocmp.Transform(), + protocmp.IgnoreFields(&pb.ClientGroupedStats{}, "duration", "okSummary", "errorSummary")); diff != "" { + t.Errorf("Diff between APM stats -want +got:\n%v", diff) + } +} diff --git a/connector/datadogconnector/go.mod b/connector/datadogconnector/go.mod index 7da950b0e2c8..d6ced9f4bb1a 100644 --- a/connector/datadogconnector/go.mod +++ b/connector/datadogconnector/go.mod @@ -5,6 +5,7 @@ go 1.22.0 require ( github.com/DataDog/datadog-agent/comp/otelcol/otlp/components/metricsclient v0.57.1 github.com/DataDog/datadog-agent/comp/otelcol/otlp/components/statsprocessor v0.57.1 + github.com/DataDog/datadog-agent/pkg/obfuscate v0.59.0-devel.0.20240911192058-0c2181220f85 github.com/DataDog/datadog-agent/pkg/proto v0.57.1 github.com/DataDog/datadog-agent/pkg/trace v0.59.0-devel.0.20240911192058-0c2181220f85 github.com/DataDog/datadog-go/v5 v5.5.0 @@ -73,7 +74,6 @@ require ( github.com/DataDog/datadog-agent/pkg/logs/sources v0.57.1 // indirect github.com/DataDog/datadog-agent/pkg/logs/status/statusinterface v0.57.1 // indirect github.com/DataDog/datadog-agent/pkg/logs/status/utils v0.57.1 // indirect - github.com/DataDog/datadog-agent/pkg/obfuscate v0.59.0-devel.0.20240911192058-0c2181220f85 // indirect github.com/DataDog/datadog-agent/pkg/remoteconfig/state v0.57.1 // indirect github.com/DataDog/datadog-agent/pkg/status/health v0.57.1 // indirect github.com/DataDog/datadog-agent/pkg/telemetry v0.57.1 // indirect From 040d29c9ac038aed36f052b2373356845c02febd Mon Sep 17 00:00:00 2001 From: Yang Song Date: Tue, 24 Sep 2024 11:29:06 -0400 Subject: [PATCH 2/2] Update .chloggen/dd-conn-obfuscate.yaml --- .chloggen/dd-conn-obfuscate.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.chloggen/dd-conn-obfuscate.yaml b/.chloggen/dd-conn-obfuscate.yaml index 11a4871eabc8..07f2df2a1e08 100644 --- a/.chloggen/dd-conn-obfuscate.yaml +++ b/.chloggen/dd-conn-obfuscate.yaml @@ -10,7 +10,7 @@ component: datadogconnector note: Support obfuscating sql queries in APM stats # Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. -issues: [] +issues: [35401] # (Optional) One or more lines of additional information to render under the primary note. # These lines will be padded with 2 spaces and then inserted directly into the document.