From 8fcbd6462f2274c2aec33c61bf1a2fb3bae1ff80 Mon Sep 17 00:00:00 2001 From: jvoravong <47871238+jvoravong@users.noreply.github.com> Date: Fri, 11 Mar 2022 11:06:07 -0700 Subject: [PATCH] [receiver/k8sclusterreceiver] Fix k8s node and container cpu metrics not being reported properly (#8245) * Added a feature gate that enables k8s node and container cpu metrics to be reported as double values * Refactored testutils/metrics.go methods to support asserting int and double metrics * Added tests for node cpu metrics being reported as double cpu units with the feature gate enabled * patch2 code review changes * patch3 code review changes Co-authored-by: Alex Boten --- CHANGELOG.md | 3 + receiver/k8sclusterreceiver/README.md | 37 +++++++++ .../internal/collection/collector.go | 29 +++++++ .../internal/collection/containers.go | 17 ++-- .../internal/collection/cronjobs_test.go | 2 +- .../internal/collection/daemonsets_test.go | 8 +- .../internal/collection/deployments_test.go | 4 +- .../internal/collection/hpa_test.go | 8 +- .../internal/collection/jobs_test.go | 16 ++-- .../internal/collection/namespaces_test.go | 2 +- .../internal/collection/nodes.go | 63 ++++++++------- .../internal/collection/nodes_test.go | 78 +++++++++++++++++-- .../internal/collection/pods_test.go | 78 +++++++++++++++++-- .../internal/collection/replicasets_test.go | 4 +- .../internal/collection/statefulsets_test.go | 8 +- .../internal/testutils/metrics.go | 35 ++++++--- .../internal/utils/timeseries.go | 11 +++ 17 files changed, 322 insertions(+), 81 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 431f18f530dd..b4989413a76e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,9 @@ - `zipkinexporter`: Set "error" tag value when status is set to error (#8187) - `prometheusremotewriteexporter`: Correctly handle metric labels which collide after sanitization (#8378) +- `k8sclusterreceiver`: Add support to enable k8s node and container cpu metrics to be reported as double values (#8245) + - Use "--feature-gates=receiver.k8sclusterreceiver.reportCpuMetricsAsDouble" to enable reporting node and container + cpu metrics as a double values. ### 🚀 New components 🚀 diff --git a/receiver/k8sclusterreceiver/README.md b/receiver/k8sclusterreceiver/README.md index 65cfc2d8d4f7..6959dc4cfd6a 100644 --- a/receiver/k8sclusterreceiver/README.md +++ b/receiver/k8sclusterreceiver/README.md @@ -52,6 +52,43 @@ Example: The full list of settings exposed for this receiver are documented [here](./config.go) with detailed sample configurations [here](./testdata/config.yaml). +### Feature Gate Configurations +- `receiver.k8sclusterreceiver.reportCpuMetricsAsDouble` + - Description + - The k8s container and node cpu metrics being reported by the k8sclusterreceiver are transitioning from being + reported as integer millicpu units to being reported as double cpu units to adhere to opentelemetry cpu metric + specifications. Please update any monitoring this might affect, the change will cause cpu metrics to be double + instead of integer values as well as metric values will be scaled down by 1000x. You can control whether the + k8sclusterreceiver reports container and node cpu metrics in double cpu units instead of integer millicpu units + with the feature gate listed below. + - Affected Metrics + - k8s.container.cpu_request + - k8s.container.cpu_limit + - k8s.node.allocatable_cpu + - Stages and Timeline + - Alpha + - In this stage the feature is disabled by default and must be enabled by the user. + - Target version + - v0.47.0 + - Beta + - In this stage the feature enabled by default but can be disabled by the user. + - Target version + - v0.50.0 + - Generally Available + - In this stage the feature is permanently enabled and the feature gate is no longer available. + - Target version + - v0.53.0 + - Usage + - Feature gate identifiers prefixed with - will disable the gate and prefixing with + or with no prefix will enable the gate. + - Start the otelcol with the feature gate enabled: + - otelcol {other_arguments} --feature-gates=receiver.k8sclusterreceiver.reportCpuMetricsAsDouble + - Start the otelcol with the feature gate disabled: + - otelcol {other_arguments} --feature-gates=-receiver.k8sclusterreceiver.reportCpuMetricsAsDouble + - More Information + - [collector.go where the the feature gate is registered](./internal/collection/collector.go) + - https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/8115 + - https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/metrics/semantic_conventions/system-metrics.md#systemcpu---processor-metrics + ### node_conditions_to_report For example, with the config below the receiver will emit two metrics diff --git a/receiver/k8sclusterreceiver/internal/collection/collector.go b/receiver/k8sclusterreceiver/internal/collection/collector.go index 9c374f673ab3..14de5ec63b46 100644 --- a/receiver/k8sclusterreceiver/internal/collection/collector.go +++ b/receiver/k8sclusterreceiver/internal/collection/collector.go @@ -21,6 +21,7 @@ import ( agentmetricspb "github.com/census-instrumentation/opencensus-proto/gen-go/agent/metrics/v1" quotav1 "github.com/openshift/api/quota/v1" "go.opentelemetry.io/collector/model/pdata" + "go.opentelemetry.io/collector/service/featuregate" "go.uber.org/zap" appsv1 "k8s.io/api/apps/v1" autoscalingv2beta2 "k8s.io/api/autoscaling/v2beta2" @@ -64,6 +65,9 @@ const ( k8sKindReplicationController = "ReplicationController" k8sKindReplicaSet = "ReplicaSet" k8sStatefulSet = "StatefulSet" + + // ID for a temporary feature gate + reportCPUMetricsAsDoubleFeatureGateID = "receiver.k8sclusterreceiver.reportCpuMetricsAsDouble" ) // DataCollector wraps around a metricsStore and a metadaStore exposing @@ -78,8 +82,33 @@ type DataCollector struct { allocatableTypesToReport []string } +var reportCPUMetricsAsDoubleFeatureGate = featuregate.Gate{ + ID: reportCPUMetricsAsDoubleFeatureGateID, + Enabled: false, + Description: "The k8s container and node cpu metrics being reported by the k8sclusterreceiver are transitioning " + + "from being reported as integer millicpu units to being reported as double cpu units to adhere to " + + "opentelemetry cpu metric specifications. You can control whether the k8sclusterreceiver reports container " + + "and node cpu metrics in double cpu units instead of integer millicpu units with the " + + "receiver.k8sclusterreceiver.reportCpuMetricsAsDouble feature gate. " + + "For more details see: " + + "https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/receiver/k8sclusterreceiver/README.md#feature-gate-configurations ", +} + +func init() { + featuregate.Register(reportCPUMetricsAsDoubleFeatureGate) +} + // NewDataCollector returns a DataCollector. func NewDataCollector(logger *zap.Logger, nodeConditionsToReport, allocatableTypesToReport []string) *DataCollector { + if featuregate.IsEnabled(reportCPUMetricsAsDoubleFeatureGateID) { + logger.Info("The receiver.k8sclusterreceiver.reportCpuMetricsAsDouble feature gate is enabled. This " + + "otel collector will report double cpu units, which is good for future support!") + } else { + logger.Info("WARNING - Breaking Change: " + reportCPUMetricsAsDoubleFeatureGate.Description) + logger.Info("The receiver.k8sclusterreceiver.reportCpuMetricsAsDouble feature gate is disabled. This " + + "otel collector will report integer cpu units, be aware this will not be supported in the future.") + } + return &DataCollector{ logger: logger, metricsStore: &metricsStore{ diff --git a/receiver/k8sclusterreceiver/internal/collection/containers.go b/receiver/k8sclusterreceiver/internal/collection/containers.go index e7535b9137e3..d24c72f4299d 100644 --- a/receiver/k8sclusterreceiver/internal/collection/containers.go +++ b/receiver/k8sclusterreceiver/internal/collection/containers.go @@ -20,6 +20,7 @@ import ( metricspb "github.com/census-instrumentation/opencensus-proto/gen-go/metrics/v1" resourcepb "github.com/census-instrumentation/opencensus-proto/gen-go/resource/v1" conventions "go.opentelemetry.io/collector/model/semconv/v1.6.1" + "go.opentelemetry.io/collector/service/featuregate" "go.uber.org/zap" corev1 "k8s.io/api/core/v1" @@ -110,20 +111,26 @@ func getSpecMetricsForContainer(c corev1.Container) []*metricspb.Metric { }, } { for k, v := range t.rl { - val := v.Value() + val := utils.GetInt64TimeSeries(v.Value()) + valType := metricspb.MetricDescriptor_GAUGE_INT64 if k == corev1.ResourceCPU { - val = v.MilliValue() + if featuregate.IsEnabled(reportCPUMetricsAsDoubleFeatureGateID) { + // cpu metrics must be of the double type to adhere to opentelemetry system.cpu metric specifications + valType = metricspb.MetricDescriptor_GAUGE_DOUBLE + val = utils.GetDoubleTimeSeries(float64(v.MilliValue()) / 1000.0) + } else { + val = utils.GetInt64TimeSeries(v.MilliValue()) + } } - metrics = append(metrics, &metricspb.Metric{ MetricDescriptor: &metricspb.MetricDescriptor{ Name: fmt.Sprintf("k8s.container.%s_%s", k, t.typ), Description: t.description, - Type: metricspb.MetricDescriptor_GAUGE_INT64, + Type: valType, }, Timeseries: []*metricspb.TimeSeries{ - utils.GetInt64TimeSeries(val), + val, }, }, ) diff --git a/receiver/k8sclusterreceiver/internal/collection/cronjobs_test.go b/receiver/k8sclusterreceiver/internal/collection/cronjobs_test.go index 1b76cd6fe6e3..d4d89f9465c3 100644 --- a/receiver/k8sclusterreceiver/internal/collection/cronjobs_test.go +++ b/receiver/k8sclusterreceiver/internal/collection/cronjobs_test.go @@ -44,7 +44,7 @@ func TestCronJobMetrics(t *testing.T) { }, ) - testutils.AssertMetrics(t, actualResourceMetrics[0].metrics[0], "k8s.cronjob.active_jobs", + testutils.AssertMetricsInt(t, actualResourceMetrics[0].metrics[0], "k8s.cronjob.active_jobs", metricspb.MetricDescriptor_GAUGE_INT64, 2) } diff --git a/receiver/k8sclusterreceiver/internal/collection/daemonsets_test.go b/receiver/k8sclusterreceiver/internal/collection/daemonsets_test.go index b5db650aa89c..f75032004640 100644 --- a/receiver/k8sclusterreceiver/internal/collection/daemonsets_test.go +++ b/receiver/k8sclusterreceiver/internal/collection/daemonsets_test.go @@ -44,16 +44,16 @@ func TestDaemonsetMetrics(t *testing.T) { }, ) - testutils.AssertMetrics(t, rm.metrics[0], "k8s.daemonset.current_scheduled_nodes", + testutils.AssertMetricsInt(t, rm.metrics[0], "k8s.daemonset.current_scheduled_nodes", metricspb.MetricDescriptor_GAUGE_INT64, 3) - testutils.AssertMetrics(t, rm.metrics[1], "k8s.daemonset.desired_scheduled_nodes", + testutils.AssertMetricsInt(t, rm.metrics[1], "k8s.daemonset.desired_scheduled_nodes", metricspb.MetricDescriptor_GAUGE_INT64, 5) - testutils.AssertMetrics(t, rm.metrics[2], "k8s.daemonset.misscheduled_nodes", + testutils.AssertMetricsInt(t, rm.metrics[2], "k8s.daemonset.misscheduled_nodes", metricspb.MetricDescriptor_GAUGE_INT64, 1) - testutils.AssertMetrics(t, rm.metrics[3], "k8s.daemonset.ready_nodes", + testutils.AssertMetricsInt(t, rm.metrics[3], "k8s.daemonset.ready_nodes", metricspb.MetricDescriptor_GAUGE_INT64, 2) } diff --git a/receiver/k8sclusterreceiver/internal/collection/deployments_test.go b/receiver/k8sclusterreceiver/internal/collection/deployments_test.go index 324f69b0f6b5..cca72b2816c7 100644 --- a/receiver/k8sclusterreceiver/internal/collection/deployments_test.go +++ b/receiver/k8sclusterreceiver/internal/collection/deployments_test.go @@ -44,10 +44,10 @@ func TestDeploymentMetrics(t *testing.T) { }, ) - testutils.AssertMetrics(t, rm.metrics[0], "k8s.deployment.desired", + testutils.AssertMetricsInt(t, rm.metrics[0], "k8s.deployment.desired", metricspb.MetricDescriptor_GAUGE_INT64, 10) - testutils.AssertMetrics(t, rm.metrics[1], "k8s.deployment.available", + testutils.AssertMetricsInt(t, rm.metrics[1], "k8s.deployment.available", metricspb.MetricDescriptor_GAUGE_INT64, 3) } diff --git a/receiver/k8sclusterreceiver/internal/collection/hpa_test.go b/receiver/k8sclusterreceiver/internal/collection/hpa_test.go index 1d0f95a8ca48..988b148a60f9 100644 --- a/receiver/k8sclusterreceiver/internal/collection/hpa_test.go +++ b/receiver/k8sclusterreceiver/internal/collection/hpa_test.go @@ -44,16 +44,16 @@ func TestHPAMetrics(t *testing.T) { }, ) - testutils.AssertMetrics(t, rm.metrics[0], "k8s.hpa.max_replicas", + testutils.AssertMetricsInt(t, rm.metrics[0], "k8s.hpa.max_replicas", metricspb.MetricDescriptor_GAUGE_INT64, 10) - testutils.AssertMetrics(t, rm.metrics[1], "k8s.hpa.min_replicas", + testutils.AssertMetricsInt(t, rm.metrics[1], "k8s.hpa.min_replicas", metricspb.MetricDescriptor_GAUGE_INT64, 2) - testutils.AssertMetrics(t, rm.metrics[2], "k8s.hpa.current_replicas", + testutils.AssertMetricsInt(t, rm.metrics[2], "k8s.hpa.current_replicas", metricspb.MetricDescriptor_GAUGE_INT64, 5) - testutils.AssertMetrics(t, rm.metrics[3], "k8s.hpa.desired_replicas", + testutils.AssertMetricsInt(t, rm.metrics[3], "k8s.hpa.desired_replicas", metricspb.MetricDescriptor_GAUGE_INT64, 7) } diff --git a/receiver/k8sclusterreceiver/internal/collection/jobs_test.go b/receiver/k8sclusterreceiver/internal/collection/jobs_test.go index fe94bc21f4e9..0cb3decfb2f5 100644 --- a/receiver/k8sclusterreceiver/internal/collection/jobs_test.go +++ b/receiver/k8sclusterreceiver/internal/collection/jobs_test.go @@ -43,19 +43,19 @@ func TestJobMetrics(t *testing.T) { }, ) - testutils.AssertMetrics(t, actualResourceMetrics[0].metrics[0], "k8s.job.active_pods", + testutils.AssertMetricsInt(t, actualResourceMetrics[0].metrics[0], "k8s.job.active_pods", metricspb.MetricDescriptor_GAUGE_INT64, 2) - testutils.AssertMetrics(t, actualResourceMetrics[0].metrics[1], "k8s.job.failed_pods", + testutils.AssertMetricsInt(t, actualResourceMetrics[0].metrics[1], "k8s.job.failed_pods", metricspb.MetricDescriptor_GAUGE_INT64, 0) - testutils.AssertMetrics(t, actualResourceMetrics[0].metrics[2], "k8s.job.successful_pods", + testutils.AssertMetricsInt(t, actualResourceMetrics[0].metrics[2], "k8s.job.successful_pods", metricspb.MetricDescriptor_GAUGE_INT64, 3) - testutils.AssertMetrics(t, actualResourceMetrics[0].metrics[3], "k8s.job.desired_successful_pods", + testutils.AssertMetricsInt(t, actualResourceMetrics[0].metrics[3], "k8s.job.desired_successful_pods", metricspb.MetricDescriptor_GAUGE_INT64, 10) - testutils.AssertMetrics(t, actualResourceMetrics[0].metrics[4], "k8s.job.max_parallel_pods", + testutils.AssertMetricsInt(t, actualResourceMetrics[0].metrics[4], "k8s.job.max_parallel_pods", metricspb.MetricDescriptor_GAUGE_INT64, 2) // Test with nil values. @@ -65,13 +65,13 @@ func TestJobMetrics(t *testing.T) { require.Equal(t, 1, len(actualResourceMetrics)) require.Equal(t, 3, len(actualResourceMetrics[0].metrics)) - testutils.AssertMetrics(t, actualResourceMetrics[0].metrics[0], "k8s.job.active_pods", + testutils.AssertMetricsInt(t, actualResourceMetrics[0].metrics[0], "k8s.job.active_pods", metricspb.MetricDescriptor_GAUGE_INT64, 2) - testutils.AssertMetrics(t, actualResourceMetrics[0].metrics[1], "k8s.job.failed_pods", + testutils.AssertMetricsInt(t, actualResourceMetrics[0].metrics[1], "k8s.job.failed_pods", metricspb.MetricDescriptor_GAUGE_INT64, 0) - testutils.AssertMetrics(t, actualResourceMetrics[0].metrics[2], "k8s.job.successful_pods", + testutils.AssertMetricsInt(t, actualResourceMetrics[0].metrics[2], "k8s.job.successful_pods", metricspb.MetricDescriptor_GAUGE_INT64, 3) } diff --git a/receiver/k8sclusterreceiver/internal/collection/namespaces_test.go b/receiver/k8sclusterreceiver/internal/collection/namespaces_test.go index 3bf1a315a7a5..9a150e7e575b 100644 --- a/receiver/k8sclusterreceiver/internal/collection/namespaces_test.go +++ b/receiver/k8sclusterreceiver/internal/collection/namespaces_test.go @@ -42,7 +42,7 @@ func TestNamespaceMetrics(t *testing.T) { }, ) - testutils.AssertMetrics(t, actualResourceMetrics[0].metrics[0], "k8s.namespace.phase", + testutils.AssertMetricsInt(t, actualResourceMetrics[0].metrics[0], "k8s.namespace.phase", metricspb.MetricDescriptor_GAUGE_INT64, 0) } diff --git a/receiver/k8sclusterreceiver/internal/collection/nodes.go b/receiver/k8sclusterreceiver/internal/collection/nodes.go index a4e7d1e9f674..fd29d42e4bb8 100644 --- a/receiver/k8sclusterreceiver/internal/collection/nodes.go +++ b/receiver/k8sclusterreceiver/internal/collection/nodes.go @@ -22,6 +22,7 @@ import ( resourcepb "github.com/census-instrumentation/opencensus-proto/gen-go/resource/v1" "github.com/iancoleman/strcase" conventions "go.opentelemetry.io/collector/model/semconv/v1.6.1" + "go.opentelemetry.io/collector/service/featuregate" "go.uber.org/zap" corev1 "k8s.io/api/core/v1" @@ -61,27 +62,46 @@ func getMetricsForNode(node *corev1.Node, nodeConditionTypesToReport, allocatabl }, }) } + // Adding 'node allocatable type' metrics for _, nodeAllocatableTypeValue := range allocatableTypesToReport { nodeAllocatableMetric := getNodeAllocatableMetric(nodeAllocatableTypeValue) v1NodeAllocatableTypeValue := corev1.ResourceName(nodeAllocatableTypeValue) - metricValue, err := nodeAllocatableValue(node, v1NodeAllocatableTypeValue) - - // metrics will be skipped if metric not present in node or value is not convertable to int64 - if err != nil { - logger.Debug(err.Error()) + valType := metricspb.MetricDescriptor_GAUGE_INT64 + quantity, ok := node.Status.Allocatable[v1NodeAllocatableTypeValue] + if !ok { + logger.Debug(fmt.Errorf("allocatable type %v not found in node %v", nodeAllocatableTypeValue, + node.GetName()).Error()) + continue + } + val := utils.GetInt64TimeSeries(quantity.Value()) + + if featuregate.IsEnabled(reportCPUMetricsAsDoubleFeatureGateID) { + // cpu metrics must be of the double type to adhere to opentelemetry system.cpu metric specifications + if v1NodeAllocatableTypeValue == corev1.ResourceCPU { + val = utils.GetDoubleTimeSeries(float64(quantity.MilliValue()) / 1000.0) + valType = metricspb.MetricDescriptor_GAUGE_DOUBLE + } } else { - metrics = append(metrics, &metricspb.Metric{ - MetricDescriptor: &metricspb.MetricDescriptor{ - Name: nodeAllocatableMetric, - Description: allocatableDesciption[v1NodeAllocatableTypeValue.String()], - Type: metricspb.MetricDescriptor_GAUGE_INT64, - }, - Timeseries: []*metricspb.TimeSeries{ - utils.GetInt64TimeSeries(metricValue), - }, - }) + // metrics will be skipped if metric not present in node or value is not convertable to int64 + valInt64, ok := quantity.AsInt64() + if !ok { + logger.Debug(fmt.Errorf("metric %s has value %v which is not convertable to int64", + v1NodeAllocatableTypeValue, node.GetName()).Error()) + continue + } + val = utils.GetInt64TimeSeries(valInt64) } + metrics = append(metrics, &metricspb.Metric{ + MetricDescriptor: &metricspb.MetricDescriptor{ + Name: nodeAllocatableMetric, + Description: allocatableDesciption[v1NodeAllocatableTypeValue.String()], + Type: valType, + }, + Timeseries: []*metricspb.TimeSeries{ + val, + }, + }) } return []*resourceMetrics{ @@ -117,19 +137,6 @@ var nodeConditionValues = map[corev1.ConditionStatus]int64{ corev1.ConditionUnknown: -1, } -func nodeAllocatableValue(node *corev1.Node, allocatableType corev1.ResourceName) (int64, error) { - value, ok := node.Status.Allocatable[allocatableType] - if !ok { - return 0, fmt.Errorf("allocatable type %v not found in node %v", allocatableType, node.GetName()) - } - - val, ok := value.AsInt64() - if !ok { - return 0, fmt.Errorf("metric %s has value %v which is not convertable to int64", allocatableType, value) - } - return val, nil -} - func nodeConditionValue(node *corev1.Node, condType corev1.NodeConditionType) int64 { status := corev1.ConditionUnknown for _, c := range node.Status.Conditions { diff --git a/receiver/k8sclusterreceiver/internal/collection/nodes_test.go b/receiver/k8sclusterreceiver/internal/collection/nodes_test.go index 85d23bcf070d..9446de03662a 100644 --- a/receiver/k8sclusterreceiver/internal/collection/nodes_test.go +++ b/receiver/k8sclusterreceiver/internal/collection/nodes_test.go @@ -19,6 +19,7 @@ import ( metricspb "github.com/census-instrumentation/opencensus-proto/gen-go/metrics/v1" "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/service/featuregate" "go.uber.org/zap" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" @@ -28,7 +29,9 @@ import ( "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/k8sclusterreceiver/internal/testutils" ) -func TestNodeMetrics(t *testing.T) { +func TestNodeMetricsReportCPUMetricsAsInt(t *testing.T) { + // disable the feature gate + featuregate.Apply(map[string]bool{reportCPUMetricsAsDoubleFeatureGateID: false}) n := newNode("1") actualResourceMetrics := getMetricsForNode(n, []string{"Ready", "MemoryPressure"}, []string{"cpu", "memory", "ephemeral-storage", "storage"}, zap.NewNop()) @@ -44,24 +47,87 @@ func TestNodeMetrics(t *testing.T) { }, ) - testutils.AssertMetrics(t, actualResourceMetrics[0].metrics[0], "k8s.node.condition_ready", + testutils.AssertMetricsInt(t, actualResourceMetrics[0].metrics[0], "k8s.node.condition_ready", metricspb.MetricDescriptor_GAUGE_INT64, 1) - testutils.AssertMetrics(t, actualResourceMetrics[0].metrics[1], "k8s.node.condition_memory_pressure", + testutils.AssertMetricsInt(t, actualResourceMetrics[0].metrics[1], "k8s.node.condition_memory_pressure", metricspb.MetricDescriptor_GAUGE_INT64, 0) - testutils.AssertMetrics(t, actualResourceMetrics[0].metrics[2], "k8s.node.allocatable_cpu", + testutils.AssertMetricsInt(t, actualResourceMetrics[0].metrics[2], "k8s.node.allocatable_cpu", metricspb.MetricDescriptor_GAUGE_INT64, 123) - testutils.AssertMetrics(t, actualResourceMetrics[0].metrics[3], "k8s.node.allocatable_memory", + testutils.AssertMetricsInt(t, actualResourceMetrics[0].metrics[3], "k8s.node.allocatable_memory", metricspb.MetricDescriptor_GAUGE_INT64, 456) - testutils.AssertMetrics(t, actualResourceMetrics[0].metrics[4], "k8s.node.allocatable_ephemeral_storage", + testutils.AssertMetricsInt(t, actualResourceMetrics[0].metrics[4], "k8s.node.allocatable_ephemeral_storage", metricspb.MetricDescriptor_GAUGE_INT64, 1234) +} + +func TestNodeMetricsReportCPUMetricsAsDouble(t *testing.T) { + // enable the feature gate + featuregate.Apply(map[string]bool{reportCPUMetricsAsDoubleFeatureGateID: true}) + n := newNode("1") + + actualResourceMetrics := getMetricsForNode(n, []string{"Ready", "MemoryPressure"}, []string{"cpu", "memory", "ephemeral-storage", "storage"}, zap.NewNop()) + + require.Equal(t, 1, len(actualResourceMetrics)) + + require.Equal(t, 5, len(actualResourceMetrics[0].metrics)) + testutils.AssertResource(t, actualResourceMetrics[0].resource, k8sType, + map[string]string{ + "k8s.node.uid": "test-node-1-uid", + "k8s.node.name": "test-node-1", + "k8s.cluster.name": "test-cluster", + }, + ) + testutils.AssertMetricsInt(t, actualResourceMetrics[0].metrics[0], "k8s.node.condition_ready", + metricspb.MetricDescriptor_GAUGE_INT64, 1) + + testutils.AssertMetricsInt(t, actualResourceMetrics[0].metrics[1], "k8s.node.condition_memory_pressure", + metricspb.MetricDescriptor_GAUGE_INT64, 0) + + testutils.AssertMetricsDouble(t, actualResourceMetrics[0].metrics[2], "k8s.node.allocatable_cpu", + metricspb.MetricDescriptor_GAUGE_DOUBLE, 3.14) + + testutils.AssertMetricsInt(t, actualResourceMetrics[0].metrics[3], "k8s.node.allocatable_memory", + metricspb.MetricDescriptor_GAUGE_INT64, 456) + + testutils.AssertMetricsInt(t, actualResourceMetrics[0].metrics[4], "k8s.node.allocatable_ephemeral_storage", + metricspb.MetricDescriptor_GAUGE_INT64, 1234) } func newNode(id string) *corev1.Node { + if featuregate.IsEnabled(reportCPUMetricsAsDoubleFeatureGateID) { + return &corev1.Node{ + ObjectMeta: v1.ObjectMeta{ + Name: "test-node-" + id, + UID: types.UID("test-node-" + id + "-uid"), + ClusterName: "test-cluster", + Labels: map[string]string{ + "foo": "bar", + "foo1": "", + }, + }, + Status: corev1.NodeStatus{ + Conditions: []corev1.NodeCondition{ + { + Type: corev1.NodeReady, + Status: corev1.ConditionTrue, + }, + { + Status: corev1.ConditionFalse, + Type: corev1.NodeMemoryPressure, + }, + }, + Allocatable: corev1.ResourceList{ + corev1.ResourceCPU: *resource.NewMilliQuantity(3140, resource.DecimalSI), + corev1.ResourceMemory: *resource.NewQuantity(456, resource.DecimalSI), + corev1.ResourceEphemeralStorage: *resource.NewQuantity(1234, resource.DecimalSI), + }, + }, + } + } return &corev1.Node{ ObjectMeta: v1.ObjectMeta{ Name: "test-node-" + id, diff --git a/receiver/k8sclusterreceiver/internal/collection/pods_test.go b/receiver/k8sclusterreceiver/internal/collection/pods_test.go index 3e5ae3a0f264..fa3263db6a74 100644 --- a/receiver/k8sclusterreceiver/internal/collection/pods_test.go +++ b/receiver/k8sclusterreceiver/internal/collection/pods_test.go @@ -22,6 +22,7 @@ import ( metricspb "github.com/census-instrumentation/opencensus-proto/gen-go/metrics/v1" resourcepb "github.com/census-instrumentation/opencensus-proto/gen-go/resource/v1" "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/service/featuregate" "go.uber.org/zap" "go.uber.org/zap/zapcore" "go.uber.org/zap/zaptest/observer" @@ -34,7 +35,10 @@ import ( "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/k8sclusterreceiver/internal/testutils" ) -func TestPodAndContainerMetrics(t *testing.T) { +func TestPodAndContainerMetricsReportCPUMetricsAsInt(t *testing.T) { + // disable the feature gate + featuregate.Apply(map[string]bool{reportCPUMetricsAsDoubleFeatureGateID: false}) + pod := newPodWithContainer( "1", podSpecWithContainer("container-name"), @@ -60,7 +64,7 @@ func TestPodAndContainerMetrics(t *testing.T) { }, ) - testutils.AssertMetrics(t, rm.Metrics[0], "k8s.pod.phase", + testutils.AssertMetricsInt(t, rm.Metrics[0], "k8s.pod.phase", metricspb.MetricDescriptor_GAUGE_INT64, 3) rm = rms[1] @@ -80,19 +84,81 @@ func TestPodAndContainerMetrics(t *testing.T) { }, ) - testutils.AssertMetrics(t, rm.Metrics[0], "k8s.container.restarts", + testutils.AssertMetricsInt(t, rm.Metrics[0], "k8s.container.restarts", metricspb.MetricDescriptor_GAUGE_INT64, 3) - testutils.AssertMetrics(t, rm.Metrics[1], "k8s.container.ready", + testutils.AssertMetricsInt(t, rm.Metrics[1], "k8s.container.ready", metricspb.MetricDescriptor_GAUGE_INT64, 1) - testutils.AssertMetrics(t, rm.Metrics[2], "k8s.container.cpu_request", + testutils.AssertMetricsInt(t, rm.Metrics[2], "k8s.container.cpu_request", metricspb.MetricDescriptor_GAUGE_INT64, 10000) - testutils.AssertMetrics(t, rm.Metrics[3], "k8s.container.cpu_limit", + testutils.AssertMetricsInt(t, rm.Metrics[3], "k8s.container.cpu_limit", metricspb.MetricDescriptor_GAUGE_INT64, 20000) } +func TestPodAndContainerMetricsReportCPUMetricsAsDouble(t *testing.T) { + // enable the feature gate + featuregate.Apply(map[string]bool{reportCPUMetricsAsDoubleFeatureGateID: true}) + + pod := newPodWithContainer( + "1", + podSpecWithContainer("container-name"), + podStatusWithContainer("container-name", containerIDWithPreifx("container-id")), + ) + dc := NewDataCollector(zap.NewNop(), []string{}, []string{}) + + dc.SyncMetrics(pod) + actualResourceMetrics := dc.metricsStore.metricsCache + + rms := actualResourceMetrics["test-pod-1-uid"] + require.NotNil(t, rms) + + rm := rms[0] + require.Equal(t, 1, len(rm.Metrics)) + testutils.AssertResource(t, rm.Resource, k8sType, + map[string]string{ + "k8s.pod.uid": "test-pod-1-uid", + "k8s.pod.name": "test-pod-1", + "k8s.node.name": "test-node", + "k8s.namespace.name": "test-namespace", + "k8s.cluster.name": "test-cluster", + }, + ) + + testutils.AssertMetricsInt(t, rm.Metrics[0], "k8s.pod.phase", + metricspb.MetricDescriptor_GAUGE_INT64, 3) + + rm = rms[1] + + require.Equal(t, 4, len(rm.Metrics)) + testutils.AssertResource(t, rm.Resource, "container", + map[string]string{ + "container.id": "container-id", + "k8s.container.name": "container-name", + "container.image.name": "container-image-name", + "container.image.tag": "latest", + "k8s.pod.uid": "test-pod-1-uid", + "k8s.pod.name": "test-pod-1", + "k8s.node.name": "test-node", + "k8s.namespace.name": "test-namespace", + "k8s.cluster.name": "test-cluster", + }, + ) + + testutils.AssertMetricsInt(t, rm.Metrics[0], "k8s.container.restarts", + metricspb.MetricDescriptor_GAUGE_INT64, 3) + + testutils.AssertMetricsInt(t, rm.Metrics[1], "k8s.container.ready", + metricspb.MetricDescriptor_GAUGE_INT64, 1) + + testutils.AssertMetricsDouble(t, rm.Metrics[2], "k8s.container.cpu_request", + metricspb.MetricDescriptor_GAUGE_DOUBLE, 10.0) + + testutils.AssertMetricsDouble(t, rm.Metrics[3], "k8s.container.cpu_limit", + metricspb.MetricDescriptor_GAUGE_DOUBLE, 20.0) +} + func newPodWithContainer(id string, spec *corev1.PodSpec, status *corev1.PodStatus) *corev1.Pod { return &corev1.Pod{ ObjectMeta: v1.ObjectMeta{ diff --git a/receiver/k8sclusterreceiver/internal/collection/replicasets_test.go b/receiver/k8sclusterreceiver/internal/collection/replicasets_test.go index 4decff0e8c91..d1685bd38662 100644 --- a/receiver/k8sclusterreceiver/internal/collection/replicasets_test.go +++ b/receiver/k8sclusterreceiver/internal/collection/replicasets_test.go @@ -44,10 +44,10 @@ func TestReplicasetMetrics(t *testing.T) { }, ) - testutils.AssertMetrics(t, rm.metrics[0], "k8s.replicaset.desired", + testutils.AssertMetricsInt(t, rm.metrics[0], "k8s.replicaset.desired", metricspb.MetricDescriptor_GAUGE_INT64, 3) - testutils.AssertMetrics(t, rm.metrics[1], "k8s.replicaset.available", + testutils.AssertMetricsInt(t, rm.metrics[1], "k8s.replicaset.available", metricspb.MetricDescriptor_GAUGE_INT64, 2) } diff --git a/receiver/k8sclusterreceiver/internal/collection/statefulsets_test.go b/receiver/k8sclusterreceiver/internal/collection/statefulsets_test.go index 7db719b657ad..a100e2404ed2 100644 --- a/receiver/k8sclusterreceiver/internal/collection/statefulsets_test.go +++ b/receiver/k8sclusterreceiver/internal/collection/statefulsets_test.go @@ -44,16 +44,16 @@ func TestStatefulsettMetrics(t *testing.T) { }, ) - testutils.AssertMetrics(t, rm.metrics[0], "k8s.statefulset.desired_pods", + testutils.AssertMetricsInt(t, rm.metrics[0], "k8s.statefulset.desired_pods", metricspb.MetricDescriptor_GAUGE_INT64, 10) - testutils.AssertMetrics(t, rm.metrics[1], "k8s.statefulset.ready_pods", + testutils.AssertMetricsInt(t, rm.metrics[1], "k8s.statefulset.ready_pods", metricspb.MetricDescriptor_GAUGE_INT64, 7) - testutils.AssertMetrics(t, rm.metrics[2], "k8s.statefulset.current_pods", + testutils.AssertMetricsInt(t, rm.metrics[2], "k8s.statefulset.current_pods", metricspb.MetricDescriptor_GAUGE_INT64, 5) - testutils.AssertMetrics(t, rm.metrics[3], "k8s.statefulset.updated_pods", + testutils.AssertMetricsInt(t, rm.metrics[3], "k8s.statefulset.updated_pods", metricspb.MetricDescriptor_GAUGE_INT64, 3) } diff --git a/receiver/k8sclusterreceiver/internal/testutils/metrics.go b/receiver/k8sclusterreceiver/internal/testutils/metrics.go index a03fd971e4ae..35e22cefa6d8 100644 --- a/receiver/k8sclusterreceiver/internal/testutils/metrics.go +++ b/receiver/k8sclusterreceiver/internal/testutils/metrics.go @@ -53,12 +53,33 @@ func AssertMetricsWithLabels(t *testing.T, actualMetric *metricspb.Metric, "mismatching labels", ) - AssertMetrics(t, actualMetric, expectedMetric, expectedType, expectedValue) + AssertMetricsInt(t, actualMetric, expectedMetric, expectedType, expectedValue) } -func AssertMetrics(t *testing.T, actualMetric *metricspb.Metric, - expectedMetric string, expectedType metricspb.MetricDescriptor_Type, - expectedValue int64) { +func AssertMetricsInt(t *testing.T, actualMetric *metricspb.Metric, expectedMetric string, + expectedType metricspb.MetricDescriptor_Type, expectedValue int64) { + assertMetricsBase(t, actualMetric, expectedMetric, expectedType) + + require.Equal(t, + expectedValue, + actualMetric.Timeseries[0].Points[0].GetInt64Value(), + "mismatching metric values", + ) +} + +func AssertMetricsDouble(t *testing.T, actualMetric *metricspb.Metric, expectedMetric string, + expectedType metricspb.MetricDescriptor_Type, expectedValue float64) { + assertMetricsBase(t, actualMetric, expectedMetric, expectedType) + + require.Equal(t, + expectedValue, + actualMetric.Timeseries[0].Points[0].GetDoubleValue(), + "mismatching metric values", + ) +} + +func assertMetricsBase(t *testing.T, actualMetric *metricspb.Metric, expectedMetric string, + expectedType metricspb.MetricDescriptor_Type) { require.Equal(t, expectedMetric, @@ -76,12 +97,6 @@ func AssertMetrics(t *testing.T, actualMetric *metricspb.Metric, actualMetric.MetricDescriptor.Type, "mismatching metric types", ) - - require.Equal(t, - expectedValue, - actualMetric.Timeseries[0].Points[0].GetInt64Value(), - "mismatching metric values", - ) } // getLabelsMap returns a map of labels. diff --git a/receiver/k8sclusterreceiver/internal/utils/timeseries.go b/receiver/k8sclusterreceiver/internal/utils/timeseries.go index f8af70071964..ab8917f09694 100644 --- a/receiver/k8sclusterreceiver/internal/utils/timeseries.go +++ b/receiver/k8sclusterreceiver/internal/utils/timeseries.go @@ -26,3 +26,14 @@ func GetInt64TimeSeriesWithLabels(val int64, labelVals []*v1.LabelValue) *v1.Tim Points: []*v1.Point{{Value: &v1.Point_Int64Value{Int64Value: val}}}, } } + +func GetDoubleTimeSeries(val float64) *v1.TimeSeries { + return GetDoubleTimeSeriesWithLabels(val, nil) +} + +func GetDoubleTimeSeriesWithLabels(val float64, labelVals []*v1.LabelValue) *v1.TimeSeries { + return &v1.TimeSeries{ + LabelValues: labelVals, + Points: []*v1.Point{{Value: &v1.Point_DoubleValue{DoubleValue: val}}}, + } +}