Skip to content

Commit

Permalink
operator: Added K8s lease configuration options
Browse files Browse the repository at this point in the history
Signed-off-by: Philip Schmid <[email protected]>
  • Loading branch information
PhilipSchmid committed Feb 27, 2025
1 parent d7193ed commit 28f2479
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 23 deletions.
6 changes: 6 additions & 0 deletions docs/content/en/docs/reference/helm-chart.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions install/kubernetes/tetragon/README.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ data:
skip-tracing-policy-crd: {{ not .Values.tetragonOperator.tracingPolicy.enabled | quote }}
force-update-crds: {{ .Values.tetragonOperator.forceUpdateCRDs | quote }}
{{- if gt (int .Values.tetragonOperator.replicas) 1 }}
leader-elect: "true"
leader-elect: {{ .Values.tetragonOperator.failoverLease.enabled | quote }}
leader-election-namespace: {{ .Values.tetragonOperator.failoverLease.namespace | quote }}
leader-election-lease-duration: {{ .Values.tetragonOperator.failoverLease.leaseDuration | quote }}
leader-election-renew-deadline: {{ .Values.tetragonOperator.failoverLease.leaseRenewDeadline | quote }}
leader-election-retry-period: {{ .Values.tetragonOperator.failoverLease.leaseRetryPeriod | quote }}
{{- end }}
{{- end }}
12 changes: 12 additions & 0 deletions install/kubernetes/tetragon/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,18 @@ tetragonOperator:
enabled: true
# -- Number of replicas to run for the tetragon-operator deployment
replicas: 1
# -- Lease handling for an automated failover when running multiple replicas
failoverLease:
# -- Enable lease failover functionality
enabled: false
# -- Kubernetes Namespace in which the Lease resource is created. Defaults to the namespace where Tetragon is deployed in, if it's empty.
namespace: ""
# -- If a lease is not renewed for X duration, the current leader is considered dead, a new leader is picked
leaseDuration: 15s
# -- The interval at which the leader will renew the lease
leaseRenewDeadline: 5s
# -- The timeout between retries if renewal fails
leaseRetryPeriod: 2s
# -- Annotations for the Tetragon Operator Deployment.
annotations: {}
# -- Annotations for the Tetragon Operator Deployment Pods.
Expand Down
51 changes: 29 additions & 22 deletions operator/cmd/serve/serve.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ package serve

import (
"fmt"
"time"

"github.com/bombsimon/logrusr/v4"
"github.com/cilium/cilium/pkg/logging"
Expand All @@ -25,11 +26,15 @@ import (
)

var (
metricsAddr string
enableLeaderElection bool
probeAddr string
scheme = runtime.NewScheme()
setupLog = ctrl.Log.WithName("setup")
metricsAddr string
enableLeaderElection bool
leaderElectionNamespace string
leaderElectionLeaseDuration time.Duration
leaderElectionRenewDeadline time.Duration
leaderElectionRetryPeriod time.Duration
probeAddr string
scheme = runtime.NewScheme()
setupLog = ctrl.Log.WithName("setup")
)

func init() {
Expand All @@ -46,23 +51,17 @@ func New() *cobra.Command {
ctrl.SetLogger(log)
common.Initialize(cmd)
mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{
Scheme: scheme,
Metrics: metricsserver.Options{BindAddress: metricsAddr},
WebhookServer: webhook.NewServer(webhook.Options{Port: 9443}),
HealthProbeBindAddress: probeAddr,
LeaderElection: enableLeaderElection,
LeaderElectionID: "f161f714.tetragon.cilium.io",
// LeaderElectionReleaseOnCancel defines if the leader should step down voluntarily
// when the Manager ends. This requires the binary to immediately end when the
// Manager is stopped, otherwise, this setting is unsafe. Setting this significantly
// speeds up voluntary leader transitions as the new leader don't have to wait
// LeaseDuration time first.
//
// In the default scaffold provided, the program ends immediately after
// the manager stops, so would be fine to enable this option. However,
// if you are doing or is intended to do any operation such as perform cleanups
// after the manager stops then its usage might be unsafe.
// LeaderElectionReleaseOnCancel: true,
Scheme: scheme,
Metrics: metricsserver.Options{BindAddress: metricsAddr},
WebhookServer: webhook.NewServer(webhook.Options{Port: 9443}),
HealthProbeBindAddress: probeAddr,
LeaderElection: enableLeaderElection,
LeaderElectionID: "tetragon-operator-resource-lock",
LeaderElectionNamespace: leaderElectionNamespace,
LeaderElectionReleaseOnCancel: true,
LeaseDuration: &leaderElectionLeaseDuration,
RenewDeadline: &leaderElectionRenewDeadline,
RetryPeriod: &leaderElectionRetryPeriod,
})
if err != nil {
return fmt.Errorf("unable to start manager: %w", err)
Expand Down Expand Up @@ -95,6 +94,14 @@ func New() *cobra.Command {
cmd.Flags().BoolVar(&enableLeaderElection, "leader-elect", false,
"Enable leader election for controller manager. "+
"Enabling this will ensure there is only one active controller manager.")
cmd.Flags().StringVar(&leaderElectionNamespace, "leader-election-namespace", "",
"Kubernetes namespace in which the leader election Lease resource should be created.")
cmd.Flags().DurationVar(&leaderElectionLeaseDuration, "leader-election-lease-duration", 15*time.Second,
"Duration that non-leader operator candidates will wait before forcing to acquire leadership")
cmd.Flags().DurationVar(&leaderElectionRenewDeadline, "leader-election-renew-deadline", 5*time.Second,
"Duration that current acting master will retry refreshing leadership in before giving up the lock")
cmd.Flags().DurationVar(&leaderElectionRetryPeriod, "leader-election-retry-period", 2*time.Second,
"Duration that LeaderElector clients should wait between retries of the actions")
common.AddCommonFlags(&cmd)
viper.BindPFlags(cmd.Flags())
return &cmd
Expand Down

0 comments on commit 28f2479

Please sign in to comment.