Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(metrics): don't forward duplicate metrics from annotated Pods #3171

Merged
merged 1 commit into from
Jul 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .changelog/3171.fixed.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
fix(metrics): don't forward duplicate metrics from annotated Pods
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ processors:
metrics:
metric:
# we let the metrics from annotations ("kubernetes-pods") through as they are
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# we let the metrics from annotations ("kubernetes-pods") through as they are
# we let the metrics from annotations ("pod-annotations") through as they are

- resource.attributes["service.name"] != "kubernetes-pods" and IsMatch(name, "scrape_.*")
- resource.attributes["service.name"] != "pod-annotations" and IsMatch(name, "scrape_.*")

receivers:
prometheus:
Expand All @@ -51,7 +51,7 @@ receivers:
## - prometheus.io/path: /metrics - path which the metric should be scrape from
## - prometheus.io/port: 9113 - port which the metric should be scrape from
## rel: https://github.com/prometheus-operator/kube-prometheus/pull/16#issuecomment-424318647
- job_name: "kubernetes-pods"
- job_name: "pod-annotations"
kubernetes_sd_configs:
- role: pod
relabel_configs:
Expand Down
7 changes: 7 additions & 0 deletions deploy/helm/sumologic/conf/metrics/otelcol/processors.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,13 @@ routing:
- exporters:
- sumologic/state
value: /prometheus.metrics.state
## custom metrics
## This entry is necessary due to a bug in routing processor that prevents the routing key from being deleted
## if the default exporter is chosen
## See: https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/24644
- exporters:
- sumologic/default
value: /prometheus.metrics.applications.custom

## Configuration for Source Processor
## Source processor adds Sumo Logic related metadata
Expand Down
65 changes: 64 additions & 1 deletion deploy/helm/sumologic/values.yaml

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,9 @@ data:
- exporters:
- sumologic/state
value: /prometheus.metrics.state
- exporters:
- sumologic/default
value: /prometheus.metrics.applications.custom
source:
collector: kubernetes
sumologic_schema:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,9 @@ data:
- exporters:
- sumologic/state
value: /prometheus.metrics.state
- exporters:
- sumologic/default
value: /prometheus.metrics.applications.custom
source:
collector: kubernetes
sumologic_schema:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ spec:
metrics:
metric:
# we let the metrics from annotations ("kubernetes-pods") through as they are
- resource.attributes["service.name"] != "kubernetes-pods" and IsMatch(name, "scrape_.*")
- resource.attributes["service.name"] != "pod-annotations" and IsMatch(name, "scrape_.*")

receivers:
prometheus:
Expand All @@ -106,7 +106,7 @@ spec:
## - prometheus.io/path: /metrics - path which the metric should be scrape from
## - prometheus.io/port: 9113 - port which the metric should be scrape from
## rel: https://github.com/prometheus-operator/kube-prometheus/pull/16#issuecomment-424318647
- job_name: "kubernetes-pods"
- job_name: "pod-annotations"
kubernetes_sd_configs:
- role: pod
relabel_configs:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ spec:
metrics:
metric:
# we let the metrics from annotations ("kubernetes-pods") through as they are
- resource.attributes["service.name"] != "kubernetes-pods" and IsMatch(name, "scrape_.*")
- resource.attributes["service.name"] != "pod-annotations" and IsMatch(name, "scrape_.*")

receivers:
prometheus:
Expand All @@ -128,7 +128,7 @@ spec:
## - prometheus.io/path: /metrics - path which the metric should be scrape from
## - prometheus.io/port: 9113 - port which the metric should be scrape from
## rel: https://github.com/prometheus-operator/kube-prometheus/pull/16#issuecomment-424318647
- job_name: "kubernetes-pods"
- job_name: "pod-annotations"
kubernetes_sd_configs:
- role: pod
relabel_configs:
Expand Down
98 changes: 53 additions & 45 deletions tests/integration/features.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,6 @@ func GetMetricsFeature(expectedMetrics []string, metricsCollector MetricsCollect
Assess("expected metrics are present",
stepfuncs.WaitUntilExpectedMetricsPresent(
expectedMetrics,
internal.ReceiverMockNamespace,
internal.ReceiverMockServiceName,
internal.ReceiverMockServicePort,
2*time.Minute, // take longer to account for recording rule metrics
tickDuration,
),
Expand Down Expand Up @@ -131,6 +128,59 @@ func GetMetricsFeature(expectedMetrics []string, metricsCollector MetricsCollect
Feature()
}

func GetTelegrafMetricsFeature(expectedMetrics []string, metricsCollector MetricsCollector, errOnExtra bool) features.Feature {
return features.New("telegraf_metrics").
Setup(stepfuncs.KubectlApplyFOpt(internal.NginxTelegrafMetricsTest, internal.NginxTelegrafNamespace)).
Assess("expected metrics are present",
stepfuncs.WaitUntilExpectedMetricsPresentWithFilters(
expectedMetrics,
receivermock.MetadataFilters{"deployment": "nginx", "endpoint": "/metrics"},
errOnExtra,
waitDuration,
tickDuration,
),
).
Assess("expected labels are present for annotation metrics",
func(ctx context.Context, t *testing.T, envConf *envconf.Config) context.Context {
metricFilters := receivermock.MetadataFilters{"__name__": "nginx_accepts", "endpoint": "/metrics"}
releaseName := ctxopts.HelmRelease(ctx)
namespace := ctxopts.Namespace(ctx)
expectedLabels := receivermock.Labels{
"cluster": "kubernetes",
"_origin": "kubernetes",
"deployment": "nginx",
"endpoint": "/metrics",
"namespace": internal.NginxTelegrafNamespace,
"node": internal.NodeNameRegex,
"pod_labels_app": "nginx",
"pod_labels_pod-template-hash": ".+",
"pod": "nginx-.+",
"replicaset": "nginx-.*",
"service": "nginx",
"app": "nginx",
"host": "nginx-.+",
"port": "80",
"server": "localhost",
"pod_template_hash": ".+",
}
expectedLabels = addCollectorSpecificMetricLabels(expectedLabels, releaseName, namespace, metricsCollector)

// drop some unnecessary labels
delete(expectedLabels, "k8s.node.name")
delete(expectedLabels, "prometheus_service")

// Prometheus currently suffers from a bug where it removes the job label, but Otel keeps it
if metricsCollector == Otelcol {
expectedLabels["job"] = "pod-annotations"
}

return stepfuncs.WaitUntilExpectedMetricLabelsPresent(metricFilters, expectedLabels, waitDuration, tickDuration)(ctx, t, envConf)
},
).
Teardown(stepfuncs.KubectlDeleteFOpt(internal.NginxTelegrafMetricsTest, internal.NginxTelegrafNamespace)).
Feature()
}

// addCollectorSpecificMetricLabels adds labels which are present only for the specific metric collector or metadata Service
func addCollectorSpecificMetricLabels(labels receivermock.Labels, releaseName string, serviceMonitorNamespace string, collector MetricsCollector) receivermock.Labels {
outputLabels := make(receivermock.Labels, len(labels))
Expand Down Expand Up @@ -194,9 +244,6 @@ func GetLogsFeature() features.Feature {
"pod_labels_app": internal.LogsGeneratorName,
"deployment": internal.LogsGeneratorName,
},
internal.ReceiverMockNamespace,
internal.ReceiverMockServiceName,
internal.ReceiverMockServicePort,
waitDuration,
tickDuration,
)).
Expand All @@ -207,9 +254,6 @@ func GetLogsFeature() features.Feature {
"pod_labels_app": internal.LogsGeneratorName,
"daemonset": internal.LogsGeneratorName,
},
internal.ReceiverMockNamespace,
internal.ReceiverMockServiceName,
internal.ReceiverMockServicePort,
waitDuration,
tickDuration,
)).
Expand Down Expand Up @@ -242,9 +286,6 @@ func GetLogsFeature() features.Feature {
),
"_sourceHost": internal.EmptyRegex,
},
internal.ReceiverMockNamespace,
internal.ReceiverMockServiceName,
internal.ReceiverMockServicePort,
waitDuration,
tickDuration,
)).
Expand Down Expand Up @@ -276,9 +317,6 @@ func GetLogsFeature() features.Feature {
),
"_sourceHost": internal.EmptyRegex,
},
internal.ReceiverMockNamespace,
internal.ReceiverMockServiceName,
internal.ReceiverMockServicePort,
waitDuration,
tickDuration,
)).
Expand All @@ -290,9 +328,6 @@ func GetLogsFeature() features.Feature {
"_sourceCategory": "kubernetes/system",
"_sourceHost": internal.NodeNameRegex,
},
internal.ReceiverMockNamespace,
internal.ReceiverMockServiceName,
internal.ReceiverMockServicePort,
waitDuration,
tickDuration,
)).
Expand All @@ -304,9 +339,6 @@ func GetLogsFeature() features.Feature {
"_sourceCategory": "kubernetes/kubelet",
"_sourceHost": internal.NodeNameRegex,
},
internal.ReceiverMockNamespace,
internal.ReceiverMockServiceName,
internal.ReceiverMockServicePort,
waitDuration,
tickDuration,
)).
Expand Down Expand Up @@ -338,9 +370,6 @@ func GetMultilineLogsFeature() features.Feature {
"namespace": internal.MultilineLogsNamespace,
"pod_labels_example": internal.MultilineLogsPodName,
},
internal.ReceiverMockNamespace,
internal.ReceiverMockServiceName,
internal.ReceiverMockServicePort,
waitDuration,
tickDuration,
)).
Expand All @@ -357,9 +386,6 @@ func GetEventsFeature() features.Feature {
"_sourceCategory": fmt.Sprintf("%s/events", internal.ClusterName),
"cluster": "kubernetes",
},
internal.ReceiverMockNamespace,
internal.ReceiverMockServiceName,
internal.ReceiverMockServicePort,
waitDuration,
tickDuration,
)).
Expand Down Expand Up @@ -391,9 +417,6 @@ func GetTracesFeature() features.Feature {
// "_sourceCategory": "kubernetes/customer/trace/tester/customer/trace/tester",
"_sourceName": fmt.Sprintf("%s.%s.%s", internal.TracesGeneratorNamespace, internal.TracesGeneratorName, internal.TracesGeneratorName),
},
internal.ReceiverMockNamespace,
internal.ReceiverMockServiceName,
internal.ReceiverMockServicePort,
waitDuration,
tickDuration,
)).
Expand All @@ -413,9 +436,6 @@ func GetTracesFeature() features.Feature {
// "_sourceCategory": "kubernetes/customer/trace/tester/customer/trace/tester",
"_sourceName": fmt.Sprintf("%s.%s.%s", internal.TracesGeneratorNamespace, internal.TracesGeneratorName, internal.TracesGeneratorName),
},
internal.ReceiverMockNamespace,
internal.ReceiverMockServiceName,
internal.ReceiverMockServicePort,
Comment on lines -416 to -418
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We are not going to check receiver-mock metrics anymore?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't change any of the test checks, I just moved this logic to a separate function which gets these values from module constants.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do want to get rid of checking for receiver-mock metrics, because they're annoying to work with - receiver-mock only emits some of them based on ingested data, so they're different for each test - but I didn't do that in this PR.

waitDuration,
tickDuration,
)).
Expand All @@ -435,9 +455,6 @@ func GetTracesFeature() features.Feature {
// "_sourceCategory": "kubernetes/customer/trace/tester/customer/trace/tester",
"_sourceName": fmt.Sprintf("%s.%s.%s", internal.TracesGeneratorNamespace, internal.TracesGeneratorName, internal.TracesGeneratorName),
},
internal.ReceiverMockNamespace,
internal.ReceiverMockServiceName,
internal.ReceiverMockServicePort,
waitDuration,
tickDuration,
)).
Expand All @@ -458,18 +475,12 @@ func GetTracesFeature() features.Feature {
"_sourceName": fmt.Sprintf("%s.%s.%s", internal.TracesGeneratorNamespace, internal.TracesGeneratorName, internal.TracesGeneratorName),
"otel.library.name": "jaegerThriftHttp",
},
internal.ReceiverMockNamespace,
internal.ReceiverMockServiceName,
internal.ReceiverMockServicePort,
waitDuration,
tickDuration,
)).
Assess("wait for all spans", stepfuncs.WaitUntilExpectedSpansPresent(
4*tracesPerExporter*spansPerTrace, // there are 4 exporters
map[string]string{},
internal.ReceiverMockNamespace,
internal.ReceiverMockServiceName,
internal.ReceiverMockServicePort,
waitDuration,
tickDuration,
)).
Expand All @@ -492,9 +503,6 @@ func GetTailingSidecarFeature() features.Feature {
"namespace": internal.TailingSidecarTestNamespace,
"deployment": internal.TailingSidecarTestDeploymentName,
},
internal.ReceiverMockNamespace,
internal.ReceiverMockServiceName,
internal.ReceiverMockServicePort,
waitDuration,
tickDuration,
)).
Expand Down
4 changes: 3 additions & 1 deletion tests/integration/helm_fluentbit_fluentd_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,11 @@ func Test_Helm_FluentBit_Fluentd(t *testing.T) {

featMetrics := GetMetricsFeature(expectedMetrics, Fluentd)

featTelegrafMetrics := GetTelegrafMetricsFeature(internal.NginxMetrics, Fluentd, false)

featLogs := GetLogsFeature()

featEvents := GetEventsFeature()

testenv.Test(t, featInstall, featMetrics, featLogs, featEvents)
testenv.Test(t, featInstall, featMetrics, featTelegrafMetrics, featLogs, featEvents)
}
4 changes: 3 additions & 1 deletion tests/integration/helm_ot_default_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ func Test_Helm_Default_OT(t *testing.T) {

featMetrics := GetMetricsFeature(expectedMetrics, Prometheus)

featTelegrafMetrics := GetTelegrafMetricsFeature(internal.NginxMetrics, Prometheus, false)

featLogs := GetLogsFeature()

featMultilineLogs := GetMultilineLogsFeature()
Expand All @@ -37,5 +39,5 @@ func Test_Helm_Default_OT(t *testing.T) {

featTraces := GetTracesFeature()

testenv.Test(t, featInstall, featMetrics, featLogs, featMultilineLogs, featEvents, featTraces)
testenv.Test(t, featInstall, featMetrics, featTelegrafMetrics, featLogs, featMultilineLogs, featEvents, featTraces)
}
4 changes: 3 additions & 1 deletion tests/integration/helm_ot_metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,5 +58,7 @@ func Test_Helm_OT_Metrics(t *testing.T) {

featMetrics := GetMetricsFeature(expectedMetrics, Otelcol)

testenv.Test(t, featInstall, featMetrics)
featTelegrafMetrics := GetTelegrafMetricsFeature(internal.NginxMetrics, Otelcol, false)

testenv.Test(t, featInstall, featMetrics, featTelegrafMetrics)
}
13 changes: 13 additions & 0 deletions tests/integration/internal/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ const (
TailingSidecarTest = "yamls/tailing-sidecar-test.yaml"
TailingSidecarTestDeploymentName = "test-tailing-sidecar-operator"

NginxTelegrafMetricsTest = "yamls/nginx.yaml"
NginxTelegrafNamespace = "nginx"

// useful regular expressions for matching metadata
PodDeploymentSuffixRegex = "-[a-z0-9]{9,10}-[a-z0-9]{4,5}" // the Pod suffix for Deployments
PodDaemonSetSuffixRegex = "-[a-z0-9]{4,5}"
Expand Down Expand Up @@ -387,6 +390,16 @@ var (
"receiver_mock_logs_bytes_ip_count",
"receiver_mock_metrics_ip_count",
}

NginxMetrics = []string{
"nginx_accepts",
"nginx_active",
"nginx_handled",
"nginx_reading",
"nginx_requests",
"nginx_waiting",
"nginx_writing",
}
)

var (
Expand Down
14 changes: 14 additions & 0 deletions tests/integration/internal/k8s/pods.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
package k8s

import (
"context"
"fmt"
"testing"
"time"

"github.com/SumoLogic/sumologic-kubernetes-collection/tests/integration/internal"
"github.com/SumoLogic/sumologic-kubernetes-collection/tests/integration/internal/ctxopts"
"github.com/gruntwork-io/terratest/modules/k8s"
"github.com/gruntwork-io/terratest/modules/retry"
"github.com/stretchr/testify/require"
Expand Down Expand Up @@ -72,6 +75,17 @@ func WaitUntilPodsAvailableE(
return nil
}

func WaitUntilReceiverMockAvailable(
ctx context.Context,
t *testing.T,
waitDuration time.Duration,
tickDuration time.Duration,
) {
kubectlOpts := *ctxopts.KubectlOptions(ctx)
kubectlOpts.Namespace = internal.ReceiverMockNamespace
k8s.WaitUntilServiceAvailable(t, &kubectlOpts, internal.ReceiverMockServiceName, int(waitDuration), tickDuration)
}

func formatSelectors(listOptions v1.ListOptions) string {
return fmt.Sprintf("LabelSelector: %q, FieldSelector: %q",
listOptions.LabelSelector, listOptions.FieldSelector,
Expand Down
Loading