Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NETOBSERV-1334: DNS metrics and dashboards #489

Merged
merged 5 commits into from
Nov 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions api/v1alpha1/flowcollector_webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ func (r *FlowCollector) ConvertTo(dstRaw conversion.Hub) error {
dst.Spec.Loki.Enable = restored.Spec.Loki.Enable

if restored.Spec.Processor.Metrics.IncludeList != nil {
list := make([]string, len(*restored.Spec.Processor.Metrics.IncludeList))
list := make([]v1beta2.FLPMetric, len(*restored.Spec.Processor.Metrics.IncludeList))
copy(list, *restored.Spec.Processor.Metrics.IncludeList)
dst.Spec.Processor.Metrics.IncludeList = &list
}
Expand Down Expand Up @@ -184,7 +184,6 @@ func Convert_v1beta2_ServerTLS_To_v1alpha1_ServerTLS(in *v1beta2.ServerTLS, out
// we have new defined fields in v1beta2 not in v1beta1
// nolint:golint,stylecheck,revive
func Convert_v1alpha1_FLPMetrics_To_v1beta2_FLPMetrics(in *FLPMetrics, out *v1beta2.FLPMetrics, s apiconversion.Scope) error {
includeList := metrics.GetEnabledNames(in.IgnoreTags, nil)
out.IncludeList = &includeList
out.IncludeList = metrics.GetAsIncludeList(in.IgnoreTags, nil)
return autoConvert_v1alpha1_FLPMetrics_To_v1beta2_FLPMetrics(in, out, s)
}
13 changes: 8 additions & 5 deletions api/v1beta1/flowcollector_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -234,9 +234,7 @@ type FlowCollectorEBPF struct {
// - `PacketDrop`: enable the packets drop flows logging feature. This feature requires mounting
// the kernel debug filesystem, so the eBPF pod has to run as privileged.
// If the `spec.agent.eBPF.privileged` parameter is not set, an error is reported.<br>
// - `DNSTracking`: enable the DNS tracking feature. This feature requires mounting
// the kernel debug filesystem hence the eBPF pod has to run as privileged.
// If the `spec.agent.eBPF.privileged` parameter is not set, an error is reported.<br>
// - `DNSTracking`: enable the DNS tracking feature.<br>
// - `FlowRTT` [unsupported (*)]: enable flow latency (RTT) calculations in the eBPF agent during TCP handshakes. This feature better works with `sampling` set to 1.<br>
// +optional
Features []AgentFeature `json:"features,omitempty"`
Expand Down Expand Up @@ -338,6 +336,10 @@ const (
// +kubebuilder:validation:Enum:="NetObservNoFlows";"NetObservLokiError"
type FLPAlert string

// Metric name. More information in https://github.com/netobserv/network-observability-operator/blob/main/docs/Metrics.md.
// +kubebuilder:validation:Enum:="namespace_egress_bytes_total";"namespace_egress_packets_total";"namespace_ingress_bytes_total";"namespace_ingress_packets_total";"namespace_flows_total";"node_egress_bytes_total";"node_egress_packets_total";"node_ingress_bytes_total";"node_ingress_packets_total";"node_flows_total";"workload_egress_bytes_total";"workload_egress_packets_total";"workload_ingress_bytes_total";"workload_ingress_packets_total";"workload_flows_total";"namespace_drop_bytes_total";"namespace_drop_packets_total";"node_drop_bytes_total";"node_drop_packets_total";"workload_drop_bytes_total";"workload_drop_packets_total";"namespace_rtt_seconds";"node_rtt_seconds";"workload_rtt_seconds";"namespace_dns_latency_seconds";"node_dns_latency_seconds";"workload_dns_latency_seconds"
type FLPMetric string

// `FLPMetrics` define the desired FLP configuration regarding metrics
type FLPMetrics struct {
// Metrics server endpoint configuration for Prometheus scraper
Expand All @@ -357,10 +359,11 @@ type FLPMetrics struct {
// `namespace_egress_packets_total` will show up as `netobserv_namespace_egress_packets_total` in Prometheus.
// Note that the more metrics you add, the bigger is the impact on Prometheus workload resources.
// Metrics enabled by default are:
// `namespace_flows_total`, `node_ingress_bytes_total`, `workload_ingress_bytes_total`, `namespace_drop_packets_total` (when `PacketDrop` feature is enabled), `namespace_rtt_seconds` (when `FlowRTT` feature is enabled).
// `namespace_flows_total`, `node_ingress_bytes_total`, `workload_ingress_bytes_total`, `namespace_drop_packets_total` (when `PacketDrop` feature is enabled),
// `namespace_rtt_seconds` (when `FlowRTT` feature is enabled), `namespace_dns_latency_seconds` (when `DNSTracking` feature is enabled).
// More information, with full list of available metrics: https://github.com/netobserv/network-observability-operator/blob/main/docs/Metrics.md
// +optional
IncludeList *[]string `json:"includeList,omitempty"`
IncludeList *[]FLPMetric `json:"includeList,omitempty"`

// `disableAlerts` is a list of alerts that should be disabled.
// Possible values are:<br>
Expand Down
3 changes: 1 addition & 2 deletions api/v1beta1/flowcollector_webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,6 @@ func Convert_v1beta1_FLPMetrics_To_v1beta2_FLPMetrics(in *FLPMetrics, out *v1bet
if err != nil {
return err
}
includeList := metrics.GetEnabledNames(in.IgnoreTags, in.IncludeList)
out.IncludeList = &includeList
out.IncludeList = metrics.GetAsIncludeList(in.IgnoreTags, out.IncludeList)
return nil
}
40 changes: 36 additions & 4 deletions api/v1beta1/flowcollector_webhook_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,16 +116,21 @@ func TestBeta1ConversionRoundtrip_Metrics(t *testing.T) {
err := initial.ConvertTo(&converted)
assert.NoError(err)

expectedDefaultMetrics := []v1beta2.FLPMetric{"namespace_egress_packets_total", "namespace_flows_total", "namespace_rtt_seconds", "namespace_drop_packets_total", "namespace_dns_latency_seconds"}
assert.Equal([]v1beta2.FLPAlert{v1beta2.AlertLokiError}, converted.Spec.Processor.Metrics.DisableAlerts)
assert.NotNil(converted.Spec.Processor.Metrics.IncludeList)
assert.Equal([]string{"namespace_egress_packets_total", "namespace_flows_total", "namespace_rtt_seconds", "namespace_drop_packets_total"}, *converted.Spec.Processor.Metrics.IncludeList)
assert.Equal(expectedDefaultMetrics, *converted.Spec.Processor.Metrics.IncludeList)

// Other way
var back FlowCollector
err = back.ConvertFrom(&converted)
assert.NoError(err)
// Here, includeList is preserved; it takes precedence over ignoreTags
assert.Equal([]string{"namespace_egress_packets_total", "namespace_flows_total", "namespace_rtt_seconds", "namespace_drop_packets_total"}, *back.Spec.Processor.Metrics.IncludeList)
var expectedBeta1 []FLPMetric
for _, m := range expectedDefaultMetrics {
expectedBeta1 = append(expectedBeta1, FLPMetric(m))
}
assert.Equal(expectedBeta1, *back.Spec.Processor.Metrics.IncludeList)
assert.Equal(initial.Spec.Processor.Metrics.DisableAlerts, back.Spec.Processor.Metrics.DisableAlerts)
assert.Equal(initial.Spec.Processor.Metrics.Server, back.Spec.Processor.Metrics.Server)
}
Expand All @@ -139,7 +144,7 @@ func TestBeta2ConversionRoundtrip_Metrics(t *testing.T) {
Processor: v1beta2.FlowCollectorFLP{
Metrics: v1beta2.FLPMetrics{
DisableAlerts: []v1beta2.FLPAlert{v1beta2.AlertLokiError},
IncludeList: &[]string{"namespace_egress_packets_total", "namespace_flows_total"},
IncludeList: &[]v1beta2.FLPMetric{"namespace_egress_packets_total", "namespace_flows_total"},
},
},
},
Expand All @@ -151,10 +156,37 @@ func TestBeta2ConversionRoundtrip_Metrics(t *testing.T) {

assert.Equal([]FLPAlert{AlertLokiError}, converted.Spec.Processor.Metrics.DisableAlerts)
assert.NotNil(converted.Spec.Processor.Metrics.IncludeList)
assert.Equal([]string{"namespace_egress_packets_total", "namespace_flows_total"}, *converted.Spec.Processor.Metrics.IncludeList)
assert.Equal([]FLPMetric{"namespace_egress_packets_total", "namespace_flows_total"}, *converted.Spec.Processor.Metrics.IncludeList)

var back v1beta2.FlowCollector
err = converted.ConvertTo(&back)
assert.NoError(err)
assert.Equal(initial.Spec.Processor.Metrics, back.Spec.Processor.Metrics)
}

func TestBeta2ConversionRoundtrip_Metrics_Default(t *testing.T) {
// Testing beta2 -> beta1 -> beta2
assert := assert.New(t)

initial := v1beta2.FlowCollector{
Spec: v1beta2.FlowCollectorSpec{
Processor: v1beta2.FlowCollectorFLP{
Metrics: v1beta2.FLPMetrics{
DisableAlerts: []v1beta2.FLPAlert{v1beta2.AlertLokiError},
},
},
},
}

var converted FlowCollector
err := converted.ConvertFrom(&initial)
assert.NoError(err)

assert.Empty(converted.Spec.Processor.Metrics.IgnoreTags)
assert.Nil(converted.Spec.Processor.Metrics.IncludeList)

var back v1beta2.FlowCollector
err = converted.ConvertTo(&back)
assert.NoError(err)
assert.Nil(back.Spec.Processor.Metrics.IncludeList)
}
4 changes: 2 additions & 2 deletions api/v1beta1/zz_generated.conversion.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions api/v1beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 8 additions & 5 deletions api/v1beta2/flowcollector_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -234,9 +234,7 @@ type FlowCollectorEBPF struct {
// - `PacketDrop`: enable the packets drop flows logging feature. This feature requires mounting
// the kernel debug filesystem, so the eBPF pod has to run as privileged.
// If the `spec.agent.eBPF.privileged` parameter is not set, an error is reported.<br>
// - `DNSTracking`: enable the DNS tracking feature. This feature requires mounting
// the kernel debug filesystem hence the eBPF pod has to run as privileged.
// If the `spec.agent.eBPF.privileged` parameter is not set, an error is reported.<br>
// - `DNSTracking`: enable the DNS tracking feature.<br>
// - `FlowRTT` [unsupported (*)]: enable flow latency (RTT) calculations in the eBPF agent during TCP handshakes. This feature better works with `sampling` set to 1.<br>
// +optional
Features []AgentFeature `json:"features,omitempty"`
Expand Down Expand Up @@ -338,6 +336,10 @@ const (
// +kubebuilder:validation:Enum:="NetObservNoFlows";"NetObservLokiError"
type FLPAlert string

// Metric name. More information in https://github.com/netobserv/network-observability-operator/blob/main/docs/Metrics.md.
// +kubebuilder:validation:Enum:="namespace_egress_bytes_total";"namespace_egress_packets_total";"namespace_ingress_bytes_total";"namespace_ingress_packets_total";"namespace_flows_total";"node_egress_bytes_total";"node_egress_packets_total";"node_ingress_bytes_total";"node_ingress_packets_total";"node_flows_total";"workload_egress_bytes_total";"workload_egress_packets_total";"workload_ingress_bytes_total";"workload_ingress_packets_total";"workload_flows_total";"namespace_drop_bytes_total";"namespace_drop_packets_total";"node_drop_bytes_total";"node_drop_packets_total";"workload_drop_bytes_total";"workload_drop_packets_total";"namespace_rtt_seconds";"node_rtt_seconds";"workload_rtt_seconds";"namespace_dns_latency_seconds";"node_dns_latency_seconds";"workload_dns_latency_seconds"
type FLPMetric string

// `FLPMetrics` define the desired FLP configuration regarding metrics
type FLPMetrics struct {
// Metrics server endpoint configuration for Prometheus scraper
Expand All @@ -349,10 +351,11 @@ type FLPMetrics struct {
// `namespace_egress_packets_total` will show up as `netobserv_namespace_egress_packets_total` in Prometheus.
// Note that the more metrics you add, the bigger is the impact on Prometheus workload resources.
// Metrics enabled by default are:
// `namespace_flows_total`, `node_ingress_bytes_total`, `workload_ingress_bytes_total`, `namespace_drop_packets_total` (when `PacketDrop` feature is enabled), `namespace_rtt_seconds` (when `FlowRTT` feature is enabled).
// `namespace_flows_total`, `node_ingress_bytes_total`, `workload_ingress_bytes_total`, `namespace_drop_packets_total` (when `PacketDrop` feature is enabled),
// `namespace_rtt_seconds` (when `FlowRTT` feature is enabled), `namespace_dns_latency_seconds` (when `DNSTracking` feature is enabled).
// More information, with full list of available metrics: https://github.com/netobserv/network-observability-operator/blob/main/docs/Metrics.md
// +optional
IncludeList *[]string `json:"includeList,omitempty"`
IncludeList *[]FLPMetric `json:"includeList,omitempty"`

// `disableAlerts` is a list of alerts that should be disabled.
// Possible values are:<br>
Expand Down
4 changes: 2 additions & 2 deletions api/v1beta2/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

78 changes: 66 additions & 12 deletions bundle/manifests/flows.netobserv.io_flowcollectors.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2554,10 +2554,7 @@ spec:
This feature requires mounting the kernel debug filesystem,
so the eBPF pod has to run as privileged. If the `spec.agent.eBPF.privileged`
parameter is not set, an error is reported.<br> - `DNSTracking`:
enable the DNS tracking feature. This feature requires mounting
the kernel debug filesystem hence the eBPF pod has to run
as privileged. If the `spec.agent.eBPF.privileged` parameter
is not set, an error is reported.<br> - `FlowRTT` [unsupported
enable the DNS tracking feature.<br> - `FlowRTT` [unsupported
(*)]: enable flow latency (RTT) calculations in the eBPF
agent during TCP handshakes. This feature better works with
`sampling` set to 1.<br>'
Expand Down Expand Up @@ -4835,9 +4832,39 @@ spec:
enabled by default are: `namespace_flows_total`, `node_ingress_bytes_total`,
`workload_ingress_bytes_total`, `namespace_drop_packets_total`
(when `PacketDrop` feature is enabled), `namespace_rtt_seconds`
(when `FlowRTT` feature is enabled). More information, with
full list of available metrics: https://github.com/netobserv/network-observability-operator/blob/main/docs/Metrics.md'
(when `FlowRTT` feature is enabled), `namespace_dns_latency_seconds`
(when `DNSTracking` feature is enabled). More information,
with full list of available metrics: https://github.com/netobserv/network-observability-operator/blob/main/docs/Metrics.md'
items:
description: Metric name. More information in https://github.com/netobserv/network-observability-operator/blob/main/docs/Metrics.md.
enum:
- namespace_egress_bytes_total
- namespace_egress_packets_total
- namespace_ingress_bytes_total
- namespace_ingress_packets_total
- namespace_flows_total
- node_egress_bytes_total
- node_egress_packets_total
- node_ingress_bytes_total
- node_ingress_packets_total
- node_flows_total
- workload_egress_bytes_total
- workload_egress_packets_total
- workload_ingress_bytes_total
- workload_ingress_packets_total
- workload_flows_total
- namespace_drop_bytes_total
- namespace_drop_packets_total
- node_drop_bytes_total
- node_drop_packets_total
- workload_drop_bytes_total
- workload_drop_packets_total
- namespace_rtt_seconds
- node_rtt_seconds
- workload_rtt_seconds
- namespace_dns_latency_seconds
- node_dns_latency_seconds
- workload_dns_latency_seconds
type: string
type: array
server:
Expand Down Expand Up @@ -5206,10 +5233,7 @@ spec:
This feature requires mounting the kernel debug filesystem,
so the eBPF pod has to run as privileged. If the `spec.agent.eBPF.privileged`
parameter is not set, an error is reported.<br> - `DNSTracking`:
enable the DNS tracking feature. This feature requires mounting
the kernel debug filesystem hence the eBPF pod has to run
as privileged. If the `spec.agent.eBPF.privileged` parameter
is not set, an error is reported.<br> - `FlowRTT` [unsupported
enable the DNS tracking feature.<br> - `FlowRTT` [unsupported
(*)]: enable flow latency (RTT) calculations in the eBPF
agent during TCP handshakes. This feature better works with
`sampling` set to 1.<br>'
Expand Down Expand Up @@ -7711,9 +7735,39 @@ spec:
enabled by default are: `namespace_flows_total`, `node_ingress_bytes_total`,
`workload_ingress_bytes_total`, `namespace_drop_packets_total`
(when `PacketDrop` feature is enabled), `namespace_rtt_seconds`
(when `FlowRTT` feature is enabled). More information, with
full list of available metrics: https://github.com/netobserv/network-observability-operator/blob/main/docs/Metrics.md'
(when `FlowRTT` feature is enabled), `namespace_dns_latency_seconds`
(when `DNSTracking` feature is enabled). More information,
with full list of available metrics: https://github.com/netobserv/network-observability-operator/blob/main/docs/Metrics.md'
items:
description: Metric name. More information in https://github.com/netobserv/network-observability-operator/blob/main/docs/Metrics.md.
enum:
- namespace_egress_bytes_total
- namespace_egress_packets_total
- namespace_ingress_bytes_total
- namespace_ingress_packets_total
- namespace_flows_total
- node_egress_bytes_total
- node_egress_packets_total
- node_ingress_bytes_total
- node_ingress_packets_total
- node_flows_total
- workload_egress_bytes_total
- workload_egress_packets_total
- workload_ingress_bytes_total
- workload_ingress_packets_total
- workload_flows_total
- namespace_drop_bytes_total
- namespace_drop_packets_total
- node_drop_bytes_total
- node_drop_packets_total
- workload_drop_bytes_total
- workload_drop_packets_total
- namespace_rtt_seconds
- node_rtt_seconds
- workload_rtt_seconds
- namespace_dns_latency_seconds
- node_dns_latency_seconds
- workload_dns_latency_seconds
type: string
type: array
server:
Expand Down
Loading
Loading