diff --git a/api/v1beta1/flowcollector_types.go b/api/v1beta1/flowcollector_types.go index 23ea993f3..c3845d409 100644 --- a/api/v1beta1/flowcollector_types.go +++ b/api/v1beta1/flowcollector_types.go @@ -271,6 +271,16 @@ type MetricsServerConfig struct { TLS ServerTLS `json:"tls"` } +const ( + AlertNoFlows = "NetObservNoFlows" + AlertLokiError = "NetObservLokiError" +) + +// Name of a processor alert +// Possible values are: `NetObservNoFlows`, which is triggered when no flows are being observed for a certain period, and `NetObservLokiError`, which is triggered when flows are being dropped due to Loki errors.disableAlerts is a list of alerts related to FLP that should not be created +// +kubebuilder:validation:Enum:="NetObservNoFlows";"NetObservLokiError" +type FLPAlert string + // FLPMetrics define the desired FLP configuration regarding metrics type FLPMetrics struct { // metricsServer endpoint configuration for Prometheus scraper @@ -281,9 +291,9 @@ type FLPMetrics struct { //+kubebuilder:default:={"egress","packets"} IgnoreTags []string `json:"ignoreTags,omitempty"` - // disableAlerts is a list of alerts related to FLP that should not be created + // disableAlerts is a list of alerts that should not be created. // +optional - DisableAlerts []string `json:"disableAlerts,omitempty"` + DisableAlerts []FLPAlert `json:"disableAlerts,omitempty"` } const ( diff --git a/api/v1beta1/zz_generated.deepcopy.go b/api/v1beta1/zz_generated.deepcopy.go index 693f431ea..2b4ae4527 100644 --- a/api/v1beta1/zz_generated.deepcopy.go +++ b/api/v1beta1/zz_generated.deepcopy.go @@ -129,7 +129,7 @@ func (in *FLPMetrics) DeepCopyInto(out *FLPMetrics) { } if in.DisableAlerts != nil { in, out := &in.DisableAlerts, &out.DisableAlerts - *out = make([]string, len(*in)) + *out = make([]FLPAlert, len(*in)) copy(*out, *in) } } diff --git a/bundle/manifests/flows.netobserv.io_flowcollectors.yaml b/bundle/manifests/flows.netobserv.io_flowcollectors.yaml index 80b9ff3b3..64711e83d 100644 --- a/bundle/manifests/flows.netobserv.io_flowcollectors.yaml +++ b/bundle/manifests/flows.netobserv.io_flowcollectors.yaml @@ -4082,6 +4082,15 @@ spec: description: Metrics define the processor configuration regarding metrics properties: + disableAlerts: + description: disableAlerts is a list of alerts related to + FLP that should not be created + items: + enum: + - NetObservNoFlows + - NetObservLokiError + type: string + type: array ignoreTags: default: - egress diff --git a/bundle/manifests/netobserv-operator.clusterserviceversion.yaml b/bundle/manifests/netobserv-operator.clusterserviceversion.yaml index a0f55f85a..7693bbfc9 100644 --- a/bundle/manifests/netobserv-operator.clusterserviceversion.yaml +++ b/bundle/manifests/netobserv-operator.clusterserviceversion.yaml @@ -304,6 +304,7 @@ metadata: "kafkaConsumerReplicas": 3, "logLevel": "info", "metrics": { + "disableAlerts": [], "ignoreTags": [ "egress", "packets" diff --git a/config/crd/bases/flows.netobserv.io_flowcollectors.yaml b/config/crd/bases/flows.netobserv.io_flowcollectors.yaml index a9b69f749..b9248fe11 100644 --- a/config/crd/bases/flows.netobserv.io_flowcollectors.yaml +++ b/config/crd/bases/flows.netobserv.io_flowcollectors.yaml @@ -4070,9 +4070,18 @@ spec: metrics properties: disableAlerts: - description: disableAlerts is a list of alerts related to - FLP that should not be created + description: disableAlerts is a list of alerts that should + not be created. items: + description: 'Name of a processor alert Possible values + are: `NetObservNoFlows`, which is triggered when no flows + are being observed for a certain period, and `NetObservLokiError`, + which is triggered when flows are being dropped due to + Loki errors.disableAlerts is a list of alerts related + to FLP that should not be created' + enum: + - NetObservNoFlows + - NetObservLokiError type: string type: array ignoreTags: diff --git a/controllers/flowlogspipeline/flp_common_objects.go b/controllers/flowlogspipeline/flp_common_objects.go index 93c04aeff..00d44e950 100644 --- a/controllers/flowlogspipeline/flp_common_objects.go +++ b/controllers/flowlogspipeline/flp_common_objects.go @@ -712,7 +712,7 @@ func (b *builder) serviceMonitor() *monitoringv1.ServiceMonitor { return &flpServiceMonitorObject } -func shouldAddAlert(name string, disabledList []string) bool { +func shouldAddAlert(name flowslatest.FLPAlert, disabledList []flowslatest.FLPAlert) bool { for _, disabledAlert := range disabledList { if name == disabledAlert { return false @@ -725,9 +725,9 @@ func (b *builder) prometheusRule() *monitoringv1.PrometheusRule { rules := []monitoringv1.Rule{} // Not receiving flows - if shouldAddAlert("NetObservNoFlows", b.desired.Processor.Metrics.DisableAlerts) { + if shouldAddAlert(flowslatest.AlertNoFlows, b.desired.Processor.Metrics.DisableAlerts) { rules = append(rules, monitoringv1.Rule{ - Alert: "NetObservNoFlows", + Alert: flowslatest.AlertNoFlows, Annotations: map[string]string{ "description": "NetObserv flowlogs-pipeline is not receiving any flow, this is either a connection issue with the agent, or an agent issue", "summary": "NetObserv flowlogs-pipeline is not receiving any flow", @@ -741,9 +741,9 @@ func (b *builder) prometheusRule() *monitoringv1.PrometheusRule { } // Flows getting dropped by loki library - if shouldAddAlert("NetObservLokiError", b.desired.Processor.Metrics.DisableAlerts) { + if shouldAddAlert(flowslatest.AlertLokiError, b.desired.Processor.Metrics.DisableAlerts) { rules = append(rules, monitoringv1.Rule{ - Alert: "NetObservLokiError", + Alert: flowslatest.AlertLokiError, Annotations: map[string]string{ "description": "NetObserv flowlogs-pipeline is dropping flows because of loki errors, loki may be down or having issues ingesting every flows. Please check loki and flowlogs-pipeline logs.", "summary": "NetObserv flowlogs-pipeline is dropping flows because of loki errors", diff --git a/docs/FlowCollector.md b/docs/FlowCollector.md index 00045a3e3..6586c6e93 100644 --- a/docs/FlowCollector.md +++ b/docs/FlowCollector.md @@ -7294,9 +7294,9 @@ Metrics define the processor configuration regarding metrics