diff --git a/api/v1beta1/flowcollector_types.go b/api/v1beta1/flowcollector_types.go
index 18c48d90f..040b1dfec 100644
--- a/api/v1beta1/flowcollector_types.go
+++ b/api/v1beta1/flowcollector_types.go
@@ -352,13 +352,13 @@ type FLPMetrics struct {
// +optional
IgnoreTags []string `json:"ignoreTags"`
- // `includeList` is a list of metric names to specify which metrics to generate.
- // The names correspond to the name in Prometheus, without the prefix. For example,
+ // `includeList` is a list of metric names to specify which ones to generate.
+ // The names correspond to the names in Prometheus without the prefix. For example,
// `namespace_egress_packets_total` will show up as `netobserv_namespace_egress_packets_total` in Prometheus.
- // Available names are: `namespace_egress_bytes_total`, `namespace_egress_packets_total`, `namespace_ingress_bytes_total`,
- // `namespace_ingress_packets_total`, `namespace_flows_total`, `node_egress_bytes_total`, `node_egress_packets_total`,
- // `node_ingress_bytes_total`, `node_ingress_packets_total`, `node_flows_total`, `workload_egress_bytes_total`,
- // `workload_egress_packets_total`, `workload_ingress_bytes_total`, `workload_ingress_packets_total`, `workload_flows_total`.
+ // Note that the more metrics you add, the bigger is the impact on Prometheus workload resources.
+ // Metrics enabled by default are:
+ // `namespace_flows_total`, `node_ingress_bytes_total`, `workload_ingress_bytes_total`, `namespace_drop_packets_total` (when `PacketDrop` feature is enabled), `namespace_rtt_seconds` (when `FlowRTT` feature is enabled).
+ // More information, with full list of available metrics: https://github.com/netobserv/network-observability-operator/blob/main/docs/Metrics.md
// +optional
IncludeList *[]string `json:"includeList,omitempty"`
diff --git a/api/v1beta1/flowcollector_webhook_test.go b/api/v1beta1/flowcollector_webhook_test.go
index 1617dae0a..2876b6a31 100644
--- a/api/v1beta1/flowcollector_webhook_test.go
+++ b/api/v1beta1/flowcollector_webhook_test.go
@@ -118,14 +118,14 @@ func TestBeta1ConversionRoundtrip_Metrics(t *testing.T) {
assert.Equal([]v1beta2.FLPAlert{v1beta2.AlertLokiError}, converted.Spec.Processor.Metrics.DisableAlerts)
assert.NotNil(converted.Spec.Processor.Metrics.IncludeList)
- assert.Equal([]string{"namespace_egress_packets_total", "namespace_flows_total"}, *converted.Spec.Processor.Metrics.IncludeList)
+ assert.Equal([]string{"namespace_egress_packets_total", "namespace_flows_total", "namespace_rtt_seconds", "namespace_drop_packets_total"}, *converted.Spec.Processor.Metrics.IncludeList)
// Other way
var back FlowCollector
err = back.ConvertFrom(&converted)
assert.NoError(err)
// Here, includeList is preserved; it takes precedence over ignoreTags
- assert.Equal([]string{"namespace_egress_packets_total", "namespace_flows_total"}, *back.Spec.Processor.Metrics.IncludeList)
+ assert.Equal([]string{"namespace_egress_packets_total", "namespace_flows_total", "namespace_rtt_seconds", "namespace_drop_packets_total"}, *back.Spec.Processor.Metrics.IncludeList)
assert.Equal(initial.Spec.Processor.Metrics.DisableAlerts, back.Spec.Processor.Metrics.DisableAlerts)
assert.Equal(initial.Spec.Processor.Metrics.Server, back.Spec.Processor.Metrics.Server)
}
diff --git a/api/v1beta2/flowcollector_types.go b/api/v1beta2/flowcollector_types.go
index d5e6fc1ad..7839ccdfe 100644
--- a/api/v1beta2/flowcollector_types.go
+++ b/api/v1beta2/flowcollector_types.go
@@ -344,13 +344,13 @@ type FLPMetrics struct {
// +optional
Server MetricsServerConfig `json:"server,omitempty"`
- // `includeList` is a list of metric names to specify which metrics to generate.
- // The names correspond to the name in Prometheus, without the prefix. For example,
+ // `includeList` is a list of metric names to specify which ones to generate.
+ // The names correspond to the names in Prometheus without the prefix. For example,
// `namespace_egress_packets_total` will show up as `netobserv_namespace_egress_packets_total` in Prometheus.
- // Available names are: `namespace_egress_bytes_total`, `namespace_egress_packets_total`, `namespace_ingress_bytes_total`,
- // `namespace_ingress_packets_total`, `namespace_flows_total`, `node_egress_bytes_total`, `node_egress_packets_total`,
- // `node_ingress_bytes_total`, `node_ingress_packets_total`, `node_flows_total`, `workload_egress_bytes_total`,
- // `workload_egress_packets_total`, `workload_ingress_bytes_total`, `workload_ingress_packets_total`, `workload_flows_total`.
+ // Note that the more metrics you add, the bigger is the impact on Prometheus workload resources.
+ // Metrics enabled by default are:
+ // `namespace_flows_total`, `node_ingress_bytes_total`, `workload_ingress_bytes_total`, `namespace_drop_packets_total` (when `PacketDrop` feature is enabled), `namespace_rtt_seconds` (when `FlowRTT` feature is enabled).
+ // More information, with full list of available metrics: https://github.com/netobserv/network-observability-operator/blob/main/docs/Metrics.md
// +optional
IncludeList *[]string `json:"includeList,omitempty"`
diff --git a/bundle/manifests/flows.netobserv.io_flowcollectors.yaml b/bundle/manifests/flows.netobserv.io_flowcollectors.yaml
index 278f34968..a0a7678e5 100644
--- a/bundle/manifests/flows.netobserv.io_flowcollectors.yaml
+++ b/bundle/manifests/flows.netobserv.io_flowcollectors.yaml
@@ -4827,17 +4827,16 @@ spec:
type: array
includeList:
description: '`includeList` is a list of metric names to specify
- which metrics to generate. The names correspond to the name
- in Prometheus, without the prefix. For example, `namespace_egress_packets_total`
+ which ones to generate. The names correspond to the names
+ in Prometheus without the prefix. For example, `namespace_egress_packets_total`
will show up as `netobserv_namespace_egress_packets_total`
- in Prometheus. Available names are: `namespace_egress_bytes_total`,
- `namespace_egress_packets_total`, `namespace_ingress_bytes_total`,
- `namespace_ingress_packets_total`, `namespace_flows_total`,
- `node_egress_bytes_total`, `node_egress_packets_total`,
- `node_ingress_bytes_total`, `node_ingress_packets_total`,
- `node_flows_total`, `workload_egress_bytes_total`, `workload_egress_packets_total`,
- `workload_ingress_bytes_total`, `workload_ingress_packets_total`,
- `workload_flows_total`.'
+ in Prometheus. Note that the more metrics you add, the bigger
+ is the impact on Prometheus workload resources. Metrics
+ enabled by default are: `namespace_flows_total`, `node_ingress_bytes_total`,
+ `workload_ingress_bytes_total`, `namespace_drop_packets_total`
+ (when `PacketDrop` feature is enabled), `namespace_rtt_seconds`
+ (when `FlowRTT` feature is enabled). More information, with
+ full list of available metrics: https://github.com/netobserv/network-observability-operator/blob/main/docs/Metrics.md'
items:
type: string
type: array
@@ -7704,17 +7703,16 @@ spec:
type: array
includeList:
description: '`includeList` is a list of metric names to specify
- which metrics to generate. The names correspond to the name
- in Prometheus, without the prefix. For example, `namespace_egress_packets_total`
+ which ones to generate. The names correspond to the names
+ in Prometheus without the prefix. For example, `namespace_egress_packets_total`
will show up as `netobserv_namespace_egress_packets_total`
- in Prometheus. Available names are: `namespace_egress_bytes_total`,
- `namespace_egress_packets_total`, `namespace_ingress_bytes_total`,
- `namespace_ingress_packets_total`, `namespace_flows_total`,
- `node_egress_bytes_total`, `node_egress_packets_total`,
- `node_ingress_bytes_total`, `node_ingress_packets_total`,
- `node_flows_total`, `workload_egress_bytes_total`, `workload_egress_packets_total`,
- `workload_ingress_bytes_total`, `workload_ingress_packets_total`,
- `workload_flows_total`.'
+ in Prometheus. Note that the more metrics you add, the bigger
+ is the impact on Prometheus workload resources. Metrics
+ enabled by default are: `namespace_flows_total`, `node_ingress_bytes_total`,
+ `workload_ingress_bytes_total`, `namespace_drop_packets_total`
+ (when `PacketDrop` feature is enabled), `namespace_rtt_seconds`
+ (when `FlowRTT` feature is enabled). More information, with
+ full list of available metrics: https://github.com/netobserv/network-observability-operator/blob/main/docs/Metrics.md'
items:
type: string
type: array
diff --git a/config/crd/bases/flows.netobserv.io_flowcollectors.yaml b/config/crd/bases/flows.netobserv.io_flowcollectors.yaml
index 72f629672..699acb2e9 100644
--- a/config/crd/bases/flows.netobserv.io_flowcollectors.yaml
+++ b/config/crd/bases/flows.netobserv.io_flowcollectors.yaml
@@ -4813,17 +4813,16 @@ spec:
type: array
includeList:
description: '`includeList` is a list of metric names to specify
- which metrics to generate. The names correspond to the name
- in Prometheus, without the prefix. For example, `namespace_egress_packets_total`
+ which ones to generate. The names correspond to the names
+ in Prometheus without the prefix. For example, `namespace_egress_packets_total`
will show up as `netobserv_namespace_egress_packets_total`
- in Prometheus. Available names are: `namespace_egress_bytes_total`,
- `namespace_egress_packets_total`, `namespace_ingress_bytes_total`,
- `namespace_ingress_packets_total`, `namespace_flows_total`,
- `node_egress_bytes_total`, `node_egress_packets_total`,
- `node_ingress_bytes_total`, `node_ingress_packets_total`,
- `node_flows_total`, `workload_egress_bytes_total`, `workload_egress_packets_total`,
- `workload_ingress_bytes_total`, `workload_ingress_packets_total`,
- `workload_flows_total`.'
+ in Prometheus. Note that the more metrics you add, the bigger
+ is the impact on Prometheus workload resources. Metrics
+ enabled by default are: `namespace_flows_total`, `node_ingress_bytes_total`,
+ `workload_ingress_bytes_total`, `namespace_drop_packets_total`
+ (when `PacketDrop` feature is enabled), `namespace_rtt_seconds`
+ (when `FlowRTT` feature is enabled). More information, with
+ full list of available metrics: https://github.com/netobserv/network-observability-operator/blob/main/docs/Metrics.md'
items:
type: string
type: array
@@ -7690,17 +7689,16 @@ spec:
type: array
includeList:
description: '`includeList` is a list of metric names to specify
- which metrics to generate. The names correspond to the name
- in Prometheus, without the prefix. For example, `namespace_egress_packets_total`
+ which ones to generate. The names correspond to the names
+ in Prometheus without the prefix. For example, `namespace_egress_packets_total`
will show up as `netobserv_namespace_egress_packets_total`
- in Prometheus. Available names are: `namespace_egress_bytes_total`,
- `namespace_egress_packets_total`, `namespace_ingress_bytes_total`,
- `namespace_ingress_packets_total`, `namespace_flows_total`,
- `node_egress_bytes_total`, `node_egress_packets_total`,
- `node_ingress_bytes_total`, `node_ingress_packets_total`,
- `node_flows_total`, `workload_egress_bytes_total`, `workload_egress_packets_total`,
- `workload_ingress_bytes_total`, `workload_ingress_packets_total`,
- `workload_flows_total`.'
+ in Prometheus. Note that the more metrics you add, the bigger
+ is the impact on Prometheus workload resources. Metrics
+ enabled by default are: `namespace_flows_total`, `node_ingress_bytes_total`,
+ `workload_ingress_bytes_total`, `namespace_drop_packets_total`
+ (when `PacketDrop` feature is enabled), `namespace_rtt_seconds`
+ (when `FlowRTT` feature is enabled). More information, with
+ full list of available metrics: https://github.com/netobserv/network-observability-operator/blob/main/docs/Metrics.md'
items:
type: string
type: array
diff --git a/controllers/flowlogspipeline/flp_test.go b/controllers/flowlogspipeline/flp_test.go
index 04981295e..8d61e552d 100644
--- a/controllers/flowlogspipeline/flp_test.go
+++ b/controllers/flowlogspipeline/flp_test.go
@@ -915,10 +915,13 @@ func TestMergeMetricsConfiguration_Default(t *testing.T) {
jsonStages, _ := json.Marshal(stages)
assert.Equal(`[{"name":"ipfix"},{"name":"extract_conntrack","follows":"ipfix"},{"name":"enrich","follows":"extract_conntrack"},{"name":"loki","follows":"enrich"},{"name":"stdout","follows":"enrich"},{"name":"prometheus","follows":"enrich"}]`, string(jsonStages))
names := getSortedMetricsNames(parameters[5].Encode.Prom.Metrics)
- assert.Len(names, 3)
- assert.Equal("namespace_flows_total", names[0])
- assert.Equal("node_ingress_bytes_total", names[1])
- assert.Equal("workload_ingress_bytes_total", names[2])
+ assert.Equal([]string{
+ "namespace_drop_packets_total",
+ "namespace_flows_total",
+ "namespace_rtt_seconds",
+ "node_ingress_bytes_total",
+ "workload_ingress_bytes_total",
+ }, names)
assert.Equal("netobserv_", parameters[5].Encode.Prom.Prefix)
}
diff --git a/docs/FlowCollector.md b/docs/FlowCollector.md
index b5f52f3a4..97affae55 100644
--- a/docs/FlowCollector.md
+++ b/docs/FlowCollector.md
@@ -8556,7 +8556,7 @@ target specifies the target value for the given metric
includeList |
[]string |
- `includeList` is a list of metric names to specify which metrics to generate. The names correspond to the name in Prometheus, without the prefix. For example, `namespace_egress_packets_total` will show up as `netobserv_namespace_egress_packets_total` in Prometheus. Available names are: `namespace_egress_bytes_total`, `namespace_egress_packets_total`, `namespace_ingress_bytes_total`, `namespace_ingress_packets_total`, `namespace_flows_total`, `node_egress_bytes_total`, `node_egress_packets_total`, `node_ingress_bytes_total`, `node_ingress_packets_total`, `node_flows_total`, `workload_egress_bytes_total`, `workload_egress_packets_total`, `workload_ingress_bytes_total`, `workload_ingress_packets_total`, `workload_flows_total`.
+ `includeList` is a list of metric names to specify which ones to generate. The names correspond to the names in Prometheus without the prefix. For example, `namespace_egress_packets_total` will show up as `netobserv_namespace_egress_packets_total` in Prometheus. Note that the more metrics you add, the bigger is the impact on Prometheus workload resources. Metrics enabled by default are: `namespace_flows_total`, `node_ingress_bytes_total`, `workload_ingress_bytes_total`, `namespace_drop_packets_total` (when `PacketDrop` feature is enabled), `namespace_rtt_seconds` (when `FlowRTT` feature is enabled). More information, with full list of available metrics: https://github.com/netobserv/network-observability-operator/blob/main/docs/Metrics.md
|
false |
@@ -13717,7 +13717,7 @@ target specifies the target value for the given metric
includeList |
[]string |
- `includeList` is a list of metric names to specify which metrics to generate. The names correspond to the name in Prometheus, without the prefix. For example, `namespace_egress_packets_total` will show up as `netobserv_namespace_egress_packets_total` in Prometheus. Available names are: `namespace_egress_bytes_total`, `namespace_egress_packets_total`, `namespace_ingress_bytes_total`, `namespace_ingress_packets_total`, `namespace_flows_total`, `node_egress_bytes_total`, `node_egress_packets_total`, `node_ingress_bytes_total`, `node_ingress_packets_total`, `node_flows_total`, `workload_egress_bytes_total`, `workload_egress_packets_total`, `workload_ingress_bytes_total`, `workload_ingress_packets_total`, `workload_flows_total`.
+ `includeList` is a list of metric names to specify which ones to generate. The names correspond to the names in Prometheus without the prefix. For example, `namespace_egress_packets_total` will show up as `netobserv_namespace_egress_packets_total` in Prometheus. Note that the more metrics you add, the bigger is the impact on Prometheus workload resources. Metrics enabled by default are: `namespace_flows_total`, `node_ingress_bytes_total`, `workload_ingress_bytes_total`, `namespace_drop_packets_total` (when `PacketDrop` feature is enabled), `namespace_rtt_seconds` (when `FlowRTT` feature is enabled). More information, with full list of available metrics: https://github.com/netobserv/network-observability-operator/blob/main/docs/Metrics.md
|
false |
diff --git a/docs/Metrics.md b/docs/Metrics.md
index 599af7d74..461fa67d5 100644
--- a/docs/Metrics.md
+++ b/docs/Metrics.md
@@ -1,35 +1,39 @@
# Metrics in the NetObserv Operator
-Configuration of metrics to be collected are stored in the metrics_definitions folder.
-These are defined in yaml files according to the format handled by the flp confgenerator.
-The flp confgenerator was modified to produce output that can be easily consumed by the NetObserv Operator.
-The flp confgenerator was further modified so that it may be called as a module, and provides its output as a data structure returned from a function rather than a yaml file.
-All metrics that may be produced are included in the metrics_definitions library, and they are associated with tags.
-A parameter is added to the Operator CRD to specify tags of metrics to not produce.
-
-On each iteration of the Operator, the Operator checks whether the CRD has been modified.
-If the CRD has changed, the Operator reconciles the state of the cluster to the specification in the CRD.
-
-The implementation of the Operator specifies the flp Network Transform enrichment (in particular, kubernetes features).
-The actual metrics to produce are taken from the metrics_definitions, based on the enrichment defined in the Operator.
-The Operator allocates the extract_aggregate and encode_prom Stage structures for the flp pipeline,
-and extract_aggregate and encode_prom entries are filled in using the results from the confgenerator.
-The configuration is placed into a configMap.
-Flp is then deployed using this combined configuration.
-The configuration is not changed during runtime.
-In order to change the configuration (e.g. exclude a different set of metrics), flp must be redeployed.
-
-Note that there are 2 data paths in flp. Data that is ingested is enriched and is then passed directly to Loki.
-In addition, after the enrichment, we derive metrics (from the metrics_definitions), aggregate them, and report to prometheus.
-The metrics_definitions does not impact the data that is sent to Loki.
-
-In the metrics_definitions yaml files, there are tags associated with each metric.
-A user may specify to skip metrics that have a particular tag.
-This is specified by a field in the CRD.
-These tags are then specified to the confgenerator module to produce metrics that are not associated with the specified tag.
-
-## Parameters added to CRD to support metrics
-Note: These parameters may be changed between interations, in which case the Operator redeploys flp.
-- ignoreMetrics (list of tags to specify which metrics to ignore)
-
-
+The NetObserv operator uses [flowlogs-pipeline](https://github.com/netobserv/flowlogs-pipeline/) to generate metrics out of flow logs.
+
+They can be configured in the `FlowCollector` custom resource, via `spec.processor.metrics.includeList`. It is a list of metric names that tells which ones to generate.
+
+The names correspond to the names in Prometheus without their prefix. For example, `namespace_egress_packets_total` will show up as `netobserv_namespace_egress_packets_total` in Prometheus.
+
+Note that the more metrics you add, the bigger is the impact on Prometheus workload resources. Some metrics in particular have a bigger cardinality, such as all metrics starting with `workload_`, which may result in stressing Prometheus if too many of them are enabled. It is recommended to monitor the impact on Prometheus when adding more metrics.
+
+Available names are: (names followed by `*` are enabled by default)
+- `namespace_egress_bytes_total`
+- `namespace_egress_packets_total`
+- `namespace_ingress_bytes_total`
+- `namespace_ingress_packets_total`
+- `namespace_flows_total` `*`
+- `node_egress_bytes_total`
+- `node_egress_packets_total`
+- `node_ingress_bytes_total` `*`
+- `node_ingress_packets_total`
+- `node_flows_total`
+- `workload_egress_bytes_total`
+- `workload_egress_packets_total`
+- `workload_ingress_bytes_total` `*`
+- `workload_ingress_packets_total`
+- `workload_flows_total`
+
+When the `PacketDrop` feature is enabled in `spec.agent.ebpf.features` (with privileged mode), additional metrics are available:
+- `namespace_drop_bytes_total`
+- `namespace_drop_packets_total` `*`
+- `node_drop_bytes_total`
+- `node_drop_packets_total`
+- `workload_drop_bytes_total`
+- `workload_drop_packets_total`
+
+When the `FlowRTT` feature is enabled in `spec.agent.ebpf.features`, additional metrics are available:
+- `namespace_rtt_seconds` `*`
+- `node_rtt_seconds`
+- `workload_rtt_seconds`
diff --git a/hack/cloned.flows.netobserv.io_flowcollectors.yaml b/hack/cloned.flows.netobserv.io_flowcollectors.yaml
index 6d06de434..d0bf5a406 100644
--- a/hack/cloned.flows.netobserv.io_flowcollectors.yaml
+++ b/hack/cloned.flows.netobserv.io_flowcollectors.yaml
@@ -3339,7 +3339,7 @@ spec:
type: string
type: array
includeList:
- description: '`includeList` is a list of metric names to specify which metrics to generate. The names correspond to the name in Prometheus, without the prefix. For example, `namespace_egress_packets_total` will show up as `netobserv_namespace_egress_packets_total` in Prometheus. Available names are: `namespace_egress_bytes_total`, `namespace_egress_packets_total`, `namespace_ingress_bytes_total`, `namespace_ingress_packets_total`, `namespace_flows_total`, `node_egress_bytes_total`, `node_egress_packets_total`, `node_ingress_bytes_total`, `node_ingress_packets_total`, `node_flows_total`, `workload_egress_bytes_total`, `workload_egress_packets_total`, `workload_ingress_bytes_total`, `workload_ingress_packets_total`, `workload_flows_total`.'
+ description: '`includeList` is a list of metric names to specify which ones to generate. The names correspond to the names in Prometheus without the prefix. For example, `namespace_egress_packets_total` will show up as `netobserv_namespace_egress_packets_total` in Prometheus. Note that the more metrics you add, the bigger is the impact on Prometheus workload resources. Metrics enabled by default are: `namespace_flows_total`, `node_ingress_bytes_total`, `workload_ingress_bytes_total`, `namespace_drop_packets_total` (when `PacketDrop` feature is enabled), `namespace_rtt_seconds` (when `FlowRTT` feature is enabled). More information, with full list of available metrics: https://github.com/netobserv/network-observability-operator/blob/main/docs/Metrics.md'
items:
type: string
type: array
@@ -5320,7 +5320,7 @@ spec:
type: string
type: array
includeList:
- description: '`includeList` is a list of metric names to specify which metrics to generate. The names correspond to the name in Prometheus, without the prefix. For example, `namespace_egress_packets_total` will show up as `netobserv_namespace_egress_packets_total` in Prometheus. Available names are: `namespace_egress_bytes_total`, `namespace_egress_packets_total`, `namespace_ingress_bytes_total`, `namespace_ingress_packets_total`, `namespace_flows_total`, `node_egress_bytes_total`, `node_egress_packets_total`, `node_ingress_bytes_total`, `node_ingress_packets_total`, `node_flows_total`, `workload_egress_bytes_total`, `workload_egress_packets_total`, `workload_ingress_bytes_total`, `workload_ingress_packets_total`, `workload_flows_total`.'
+ description: '`includeList` is a list of metric names to specify which ones to generate. The names correspond to the names in Prometheus without the prefix. For example, `namespace_egress_packets_total` will show up as `netobserv_namespace_egress_packets_total` in Prometheus. Note that the more metrics you add, the bigger is the impact on Prometheus workload resources. Metrics enabled by default are: `namespace_flows_total`, `node_ingress_bytes_total`, `workload_ingress_bytes_total`, `namespace_drop_packets_total` (when `PacketDrop` feature is enabled), `namespace_rtt_seconds` (when `FlowRTT` feature is enabled). More information, with full list of available metrics: https://github.com/netobserv/network-observability-operator/blob/main/docs/Metrics.md'
items:
type: string
type: array
diff --git a/pkg/dashboards/dashboard.go b/pkg/dashboards/dashboard.go
index 6f52c7559..76e72ceed 100644
--- a/pkg/dashboards/dashboard.go
+++ b/pkg/dashboards/dashboard.go
@@ -16,19 +16,21 @@ type rowInfo struct {
// Queries
const (
- layerApps = "Applications"
- layerInfra = "Infrastructure"
- appsFilters1 = `SrcK8S_Namespace!~"|$NETOBSERV_NS|openshift.*"`
- appsFilters2 = `SrcK8S_Namespace=~"$NETOBSERV_NS|openshift.*",DstK8S_Namespace!~"|$NETOBSERV_NS|openshift.*"`
- infraFilters1 = `SrcK8S_Namespace=~"$NETOBSERV_NS|openshift.*"`
- infraFilters2 = `SrcK8S_Namespace!~"$NETOBSERV_NS|openshift.*",DstK8S_Namespace=~"$NETOBSERV_NS|openshift.*"`
- metricTagNamespaces = "namespaces"
- metricTagNodes = "nodes"
- metricTagWorkloads = "workloads"
- metricTagIngress = "ingress"
- metricTagEgress = "egress"
- metricTagBytes = "bytes"
- metricTagPackets = "packets"
+ layerApps = "Applications"
+ layerInfra = "Infrastructure"
+ appsFilters1 = `SrcK8S_Namespace!~"|$NETOBSERV_NS|openshift.*"`
+ appsFilters2 = `SrcK8S_Namespace=~"$NETOBSERV_NS|openshift.*",DstK8S_Namespace!~"|$NETOBSERV_NS|openshift.*"`
+ infraFilters1 = `SrcK8S_Namespace=~"$NETOBSERV_NS|openshift.*"`
+ infraFilters2 = `SrcK8S_Namespace!~"$NETOBSERV_NS|openshift.*",DstK8S_Namespace=~"$NETOBSERV_NS|openshift.*"`
+ metricTagNamespaces = "namespaces"
+ metricTagNodes = "nodes"
+ metricTagWorkloads = "workloads"
+ metricTagIngress = "ingress"
+ metricTagEgress = "egress"
+ metricTagBytes = "bytes"
+ metricTagPackets = "packets"
+ metricTagDropBytes = "drop_bytes"
+ metricTagDropPackets = "drop_packets"
)
var (
@@ -85,6 +87,7 @@ var (
func init() {
for _, group := range []string{metricTagNodes, metricTagNamespaces, metricTagWorkloads} {
groupTrimmed := strings.TrimSuffix(group, "s")
+ // byte/pkt rates
for _, vt := range []string{metricTagBytes, metricTagPackets} {
for _, dir := range []string{metricTagEgress, metricTagIngress} {
rowsInfo = append(rowsInfo, rowInfo{
@@ -95,6 +98,15 @@ func init() {
})
}
}
+ // drops
+ for _, vt := range []string{metricTagDropBytes, metricTagDropPackets} {
+ rowsInfo = append(rowsInfo, rowInfo{
+ metric: fmt.Sprintf("netobserv_%s_%s_total", groupTrimmed, vt),
+ group: group,
+ valueType: vt,
+ })
+ }
+ // TODO: RTT dashboard (after dashboard refactoring for exposed metrics; need to handle histogram queries)
}
}
@@ -217,6 +229,10 @@ func flowMetricsRow(netobsNs string, rowInfo rowInfo) string {
vt = "byte"
case metricTagPackets:
vt = "packet"
+ case metricTagDropBytes:
+ vt = "drop bytes"
+ case metricTagDropPackets:
+ vt = "drop packets"
}
title := fmt.Sprintf("Top %s rates %s per source and destination %s", vt, verb, rowInfo.group)
var panels string
diff --git a/pkg/dashboards/dashboard_test.go b/pkg/dashboards/dashboard_test.go
index 01430c265..87bf99a09 100644
--- a/pkg/dashboards/dashboard_test.go
+++ b/pkg/dashboards/dashboard_test.go
@@ -18,7 +18,7 @@ func TestCreateFlowMetricsDashboard_All(t *testing.T) {
assert.NoError(err)
assert.Equal("NetObserv", d.Title)
- assert.Len(d.Rows, 12)
+ assert.Len(d.Rows, 18)
// First row
row := 0
@@ -28,8 +28,8 @@ func TestCreateFlowMetricsDashboard_All(t *testing.T) {
assert.Len(d.Rows[row].Panels[0].Targets, 1)
assert.Contains(d.Rows[row].Panels[0].Targets[0].Expr, "label_replace(label_replace(topk(10,sum(rate(netobserv_node_egress_bytes_total[1m])) by (SrcK8S_HostName, DstK8S_HostName))")
- // 6th row
- row = 5
+ // 8th row
+ row = 7
assert.Equal("Top byte rates received per source and destination namespaces", d.Rows[row].Title)
assert.Len(d.Rows[row].Panels, 2)
assert.Equal("Applications", d.Rows[row].Panels[0].Title)
@@ -42,8 +42,8 @@ func TestCreateFlowMetricsDashboard_All(t *testing.T) {
`label_replace(label_replace(topk(10,sum(rate(netobserv_namespace_ingress_bytes_total{SrcK8S_Namespace=~"netobserv|openshift.*"}[1m]) or rate(netobserv_namespace_ingress_bytes_total{SrcK8S_Namespace!~"netobserv|openshift.*",DstK8S_Namespace=~"netobserv|openshift.*"}[1m])) by (SrcK8S_Namespace, DstK8S_Namespace))`,
)
- // 12th row
- row = 11
+ // 16th row
+ row = 15
assert.Equal("Top packet rates received per source and destination workloads", d.Rows[row].Title)
assert.Len(d.Rows[row].Panels, 2)
assert.Equal("Applications", d.Rows[row].Panels[0].Title)
@@ -88,7 +88,7 @@ func TestCreateFlowMetricsDashboard_DefaultList(t *testing.T) {
assert.NoError(err)
assert.Equal("NetObserv", d.Title)
- assert.Len(d.Rows, 3)
+ assert.Len(d.Rows, 4)
// First row
row := 0
@@ -113,8 +113,8 @@ func TestCreateFlowMetricsDashboard_DefaultList(t *testing.T) {
`label_replace(label_replace(topk(10,sum(rate(netobserv_workload_ingress_bytes_total{SrcK8S_Namespace=~"netobserv|openshift.*"}[1m]) or rate(netobserv_workload_ingress_bytes_total{SrcK8S_Namespace!~"netobserv|openshift.*",DstK8S_Namespace=~"netobserv|openshift.*"}[1m])) by (SrcK8S_Namespace, DstK8S_Namespace))`,
)
- // 3rd row
- row = 2
+ // 4th row
+ row = 3
assert.Equal("Top byte rates received per source and destination workloads", d.Rows[row].Title)
assert.Len(d.Rows[row].Panels, 2)
assert.Equal("Applications", d.Rows[row].Panels[0].Title)
diff --git a/pkg/metrics/predefined_metrics.go b/pkg/metrics/predefined_metrics.go
index c5f49e316..92a9f51c4 100644
--- a/pkg/metrics/predefined_metrics.go
+++ b/pkg/metrics/predefined_metrics.go
@@ -34,7 +34,13 @@ var (
}
predefinedMetrics []taggedMetricDefinition
// Note that we set default in-code rather than in CRD, in order to keep track of value being unset or set intentionnally in FlowCollector
- DefaultIncludeList = []string{"node_ingress_bytes_total", "workload_ingress_bytes_total", "namespace_flows_total"}
+ DefaultIncludeList = []string{
+ "node_ingress_bytes_total",
+ "workload_ingress_bytes_total",
+ "namespace_flows_total",
+ "namespace_drop_packets_total",
+ "namespace_rtt_seconds",
+ }
// Pre-deprecation default IgnoreTags list (1.4) - used before switching to whitelist approach,
// to make sure there is no unintended new metrics being collected
// Don't add anything here: this is not meant to evolve
@@ -61,7 +67,7 @@ func init() {
ValueKey: valueField,
Filters: []flpapi.PromMetricsFilter{
{Key: "Duplicate", Value: "false"},
- {Key: "FlowDirection", Value: mapDirection[dir], Type: "regex"},
+ {Key: "FlowDirection", Value: mapDirection[dir], Type: flpapi.PromFilterRegex},
},
Labels: labels,
},
@@ -78,6 +84,47 @@ func init() {
},
tags: []string{group, group + "-flows", "flows"},
})
+ // RTT metrics
+ predefinedMetrics = append(predefinedMetrics, taggedMetricDefinition{
+ PromMetricsItem: flpapi.PromMetricsItem{
+ Name: fmt.Sprintf("%s_rtt_seconds", groupTrimmed),
+ Type: "histogram",
+ ValueKey: "TimeFlowRttNs",
+ Filters: []flpapi.PromMetricsFilter{
+ {Key: "TimeFlowRttNs", Type: flpapi.PromFilterPresence},
+ },
+ Labels: labels,
+ ValueScale: 1_000_000_000, // ns => s
+ },
+ tags: []string{group, "rtt"},
+ })
+ // Drops metrics
+ predefinedMetrics = append(predefinedMetrics, taggedMetricDefinition{
+ PromMetricsItem: flpapi.PromMetricsItem{
+ Name: fmt.Sprintf("%s_drop_packets_total", groupTrimmed),
+ Type: "counter",
+ ValueKey: "PktDropPackets",
+ Filters: []flpapi.PromMetricsFilter{
+ {Key: "Duplicate", Value: "false"},
+ {Key: "PktDropPackets", Type: flpapi.PromFilterPresence},
+ },
+ Labels: labels,
+ },
+ tags: []string{group, tagPackets, "drops"},
+ })
+ predefinedMetrics = append(predefinedMetrics, taggedMetricDefinition{
+ PromMetricsItem: flpapi.PromMetricsItem{
+ Name: fmt.Sprintf("%s_drop_bytes_total", groupTrimmed),
+ Type: "counter",
+ ValueKey: "PktDropBytes",
+ Filters: []flpapi.PromMetricsFilter{
+ {Key: "Duplicate", Value: "false"},
+ {Key: "PktDropBytes", Type: flpapi.PromFilterPresence},
+ },
+ Labels: labels,
+ },
+ tags: []string{group, tagBytes, "drop"},
+ })
}
}
diff --git a/pkg/metrics/predefined_metrics_test.go b/pkg/metrics/predefined_metrics_test.go
index e2a2108ef..eec99b716 100644
--- a/pkg/metrics/predefined_metrics_test.go
+++ b/pkg/metrics/predefined_metrics_test.go
@@ -11,7 +11,17 @@ func TestIncludeExclude(t *testing.T) {
// IgnoreTags set, Include list unset => resolving ignore tags
res := GetEnabledNames([]string{"egress", "packets", "flows"}, nil)
- assert.Equal([]string{"node_ingress_bytes_total", "namespace_ingress_bytes_total", "workload_ingress_bytes_total"}, res)
+ assert.Equal([]string{
+ "node_ingress_bytes_total",
+ "node_rtt_seconds",
+ "node_drop_bytes_total",
+ "namespace_ingress_bytes_total",
+ "namespace_rtt_seconds",
+ "namespace_drop_bytes_total",
+ "workload_ingress_bytes_total",
+ "workload_rtt_seconds",
+ "workload_drop_bytes_total",
+ }, res)
// IgnoreTags set, Include list set => keep include list
res = GetEnabledNames([]string{"egress", "packets"}, &[]string{"namespace_flows_total"})