Skip to content

Commit

Permalink
helm: Added support for operator failover
Browse files Browse the repository at this point in the history
Added support to run multiple instances of the Tetragon
Operator. This also includes slight adjustments to the
rollingUpdate strategy and adding a podAntiAffinity. This
prevents situations where upgrades are stuck because of
impossible to reach maxUnavailable values and running
both replicas on the same node.

Signed-off-by: Philip Schmid <[email protected]>
  • Loading branch information
PhilipSchmid committed Feb 27, 2025
1 parent 9f60b23 commit d7193ed
Show file tree
Hide file tree
Showing 7 changed files with 80 additions and 10 deletions.
6 changes: 4 additions & 2 deletions docs/content/en/docs/reference/helm-chart.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 4 additions & 2 deletions install/kubernetes/tetragon/README.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions install/kubernetes/tetragon/templates/operator_configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,7 @@ data:
skip-pod-info-crd: {{ not .Values.tetragonOperator.podInfo.enabled | quote }}
skip-tracing-policy-crd: {{ not .Values.tetragonOperator.tracingPolicy.enabled | quote }}
force-update-crds: {{ .Values.tetragonOperator.forceUpdateCRDs | quote }}
{{- if gt (int .Values.tetragonOperator.replicas) 1 }}
leader-elect: "true"
{{- end }}
{{- end }}
20 changes: 16 additions & 4 deletions install/kubernetes/tetragon/templates/operator_deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ spec:
selector:
matchLabels:
{{- include "tetragon-operator.selectorLabels" . | nindent 6 }}
replicas: 1
replicas: {{ .Values.tetragonOperator.replicas }}
template:
metadata:
{{- with .Values.tetragonOperator.podAnnotations }}
Expand Down Expand Up @@ -107,8 +107,20 @@ spec:
{{- with .Values.tetragonOperator.extraVolumes }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.tetragonOperator.strategy }}
# Ensure operator update on single node k8s clusters, by using rolling update with maxUnavailable=100% in case
# of one replica and no user configured Recreate strategy.
# Otherwise an update might get stuck due to the default maxUnavailable=50% in combination with the
# podAntiAffinity which prevents deployments of multiple operator replicas on the same node.
{{- if and (eq (.Values.tetragonOperator.replicas | toString) "1") (eq .Values.tetragonOperator.strategy.type "RollingUpdate") }}
strategy:
{{- toYaml . | nindent 4 }}
{{- end }}
rollingUpdate:
maxSurge: {{ .Values.tetragonOperator.strategy.rollingUpdate.maxSurge }}
maxUnavailable: 100%
type: RollingUpdate
{{- else }}
{{- with .Values.tetragonOperator.strategy }}
strategy:
{{- toYaml . | trim | nindent 4 }}
{{- end }}
{{- end }}
{{- end }}
22 changes: 22 additions & 0 deletions install/kubernetes/tetragon/templates/operator_role.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{{- if and (and .Values.tetragonOperator.enabled .Values.serviceAccount.create) (gt (int .Values.tetragonOperator.replicas) 1) }}
kind: Role
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: {{ .Release.Name }}-operator
namespace: {{ .Release.Namespace }}
labels:
{{- include "tetragon-operator.labels" . | nindent 4 }}
rules:
# For tetragon-operator running with multiple replicas
#
# Tetragon operator running in HA mode requires the use of ResourceLock for Leader Election
# between multiple running instances.
- apiGroups:
- coordination.k8s.io
resources:
- leases
verbs:
- create
- get
- update
{{- end }}
17 changes: 17 additions & 0 deletions install/kubernetes/tetragon/templates/operator_rolebinding.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{{- if and (and .Values.tetragonOperator.enabled .Values.serviceAccount.create) (gt (int .Values.tetragonOperator.replicas) 1) }}
kind: RoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: {{ .Release.Name }}-operator-rolebinding
namespace: {{ .Release.Namespace }}
labels:
{{- include "tetragon-operator.labels" . | nindent 4 }}
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: {{ .Release.Name }}-operator
subjects:
- kind: ServiceAccount
namespace: {{ .Release.Namespace }}
name: {{ include "tetragon-operator.serviceAccount" . }}
{{- end }}
16 changes: 14 additions & 2 deletions install/kubernetes/tetragon/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,8 @@ tetragon:
tetragonOperator:
# -- Enables the Tetragon Operator.
enabled: true
# -- Number of replicas to run for the tetragon-operator deployment
replicas: 1
# -- Annotations for the Tetragon Operator Deployment.
annotations: {}
# -- Annotations for the Tetragon Operator Deployment Pods.
Expand Down Expand Up @@ -283,11 +285,21 @@ tetragonOperator:
cpu: 10m
memory: 64Mi
# -- resources for the Tetragon Operator Deployment update strategy
strategy: {}
strategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 25%
maxUnavailable: 50%
# -- Steer the Tetragon Operator Deployment Pod placement via nodeSelector, tolerations and affinity rules.
nodeSelector: {}
tolerations: []
affinity: {}
affinity:
podAntiAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- topologyKey: kubernetes.io/hostname
labelSelector:
matchLabels:
app.kubernetes.io/name: tetragon-operator
# -- tetragon-operator image.
image:
override: ~
Expand Down

0 comments on commit d7193ed

Please sign in to comment.