Skip to content

Commit

Permalink
Improve logging for fdbclient and lock client to see what subreconcil…
Browse files Browse the repository at this point in the history
…er issued the command
  • Loading branch information
johscheuer committed Aug 27, 2024
1 parent df0ef6d commit a83c6f8
Show file tree
Hide file tree
Showing 19 changed files with 93 additions and 57 deletions.
6 changes: 3 additions & 3 deletions controllers/bounce_processes.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,12 @@ import (
type bounceProcesses struct{}

// reconcile runs the reconciler's work.
func (bounceProcesses) reconcile(_ context.Context, r *FoundationDBClusterReconciler, cluster *fdbv1beta2.FoundationDBCluster, status *fdbv1beta2.FoundationDBStatus, logger logr.Logger) *requeue {
func (c bounceProcesses) reconcile(_ context.Context, r *FoundationDBClusterReconciler, cluster *fdbv1beta2.FoundationDBCluster, status *fdbv1beta2.FoundationDBStatus, logger logr.Logger) *requeue {
if !pointer.BoolDeref(cluster.Spec.AutomationOptions.KillProcesses, true) {
return nil
}

adminClient, err := r.getDatabaseClientProvider().GetAdminClient(cluster, r)
adminClient, err := r.getAdminClient(logger, cluster)
if err != nil {
return &requeue{curError: err}
}
Expand Down Expand Up @@ -111,7 +111,7 @@ func (bounceProcesses) reconcile(_ context.Context, r *FoundationDBClusterReconc
var lockClient fdbadminclient.LockClient
useLocks := cluster.ShouldUseLocks()
if useLocks {
lockClient, err = r.getLockClient(cluster)
lockClient, err = r.getLockClient(logger, cluster)
if err != nil {
return &requeue{curError: err}
}
Expand Down
2 changes: 1 addition & 1 deletion controllers/change_coordinators.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ func (c changeCoordinators) reconcile(ctx context.Context, r *FoundationDBCluste
return nil
}

adminClient, err := r.getDatabaseClientProvider().GetAdminClient(cluster, r)
adminClient, err := r.getAdminClient(logger, cluster)
if err != nil {
return &requeue{curError: err, delayedRequeue: true}
}
Expand Down
2 changes: 1 addition & 1 deletion controllers/check_client_compatibility.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ func (c checkClientCompatibility) reconcile(_ context.Context, r *FoundationDBCl
return nil
}

adminClient, err := r.getDatabaseClientProvider().GetAdminClient(cluster, r)
adminClient, err := r.getAdminClient(logger, cluster)
if err != nil {
return &requeue{curError: err}
}
Expand Down
2 changes: 1 addition & 1 deletion controllers/choose_removals.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ func (c chooseRemovals) reconcile(ctx context.Context, r *FoundationDBClusterRec

// If the status is not cached, we have to fetch it.
if status == nil {
adminClient, err := r.getDatabaseClientProvider().GetAdminClient(cluster, r)
adminClient, err := r.getAdminClient(logger, cluster)
if err != nil {
return &requeue{curError: err, delayedRequeue: true}
}
Expand Down
28 changes: 19 additions & 9 deletions controllers/cluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/serializer/yaml"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/uuid"
"regexp"
"sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/handler"
Expand Down Expand Up @@ -123,7 +124,7 @@ func (r *FoundationDBClusterReconciler) Reconcile(ctx context.Context, request c
return ctrl.Result{}, err
}

clusterLog := globalControllerLogger.WithValues("namespace", cluster.Namespace, "cluster", cluster.Name)
clusterLog := globalControllerLogger.WithValues("namespace", cluster.Namespace, "cluster", cluster.Name, "traceID", uuid.NewUUID())
cacheStatus := cluster.CacheDatabaseStatusForReconciliation(r.CacheDatabaseStatusForReconciliationDefault)
// Printout the duration of the reconciliation, independent if the reconciliation was successful or had an error.
startTime := time.Now()
Expand All @@ -142,7 +143,7 @@ func (r *FoundationDBClusterReconciler) Reconcile(ctx context.Context, request c
return ctrl.Result{}, err
}

adminClient, err := r.getDatabaseClientProvider().GetAdminClient(cluster, r)
adminClient, err := r.getAdminClient(clusterLog, cluster)
if err != nil {
return ctrl.Result{}, err
}
Expand Down Expand Up @@ -452,14 +453,23 @@ func (r *FoundationDBClusterReconciler) getDatabaseClientProvider() fdbadminclie
panic("Cluster reconciler does not have a DatabaseClientProvider defined")
}

func (r *FoundationDBClusterReconciler) getLockClient(cluster *fdbv1beta2.FoundationDBCluster) (fdbadminclient.LockClient, error) {
return r.getDatabaseClientProvider().GetLockClient(cluster)
// getAdminClient gets the admin client for a reconciler.
func (r *FoundationDBClusterReconciler) getAdminClient(logger logr.Logger, cluster *fdbv1beta2.FoundationDBCluster) (fdbadminclient.AdminClient, error) {
if r.DatabaseClientProvider != nil {
return r.DatabaseClientProvider.GetAdminClientWithLogger(cluster, r, logger)
}

panic("Cluster reconciler does not have a DatabaseClientProvider defined")
}

func (r *FoundationDBClusterReconciler) getLockClient(logger logr.Logger, cluster *fdbv1beta2.FoundationDBCluster) (fdbadminclient.LockClient, error) {
return r.getDatabaseClientProvider().GetLockClientWithLogger(cluster, logger)
}

// takeLock attempts to acquire a lock.
func (r *FoundationDBClusterReconciler) takeLock(logger logr.Logger, cluster *fdbv1beta2.FoundationDBCluster, action string) (bool, error) {
logger.Info("Taking lock on cluster", "namespace", cluster.Namespace, "cluster", cluster.Name, "action", action)
lockClient, err := r.getLockClient(cluster)
logger.Info("Taking lock on cluster", "action", action)
lockClient, err := r.getLockClient(logger, cluster)
if err != nil {
return false, err
}
Expand All @@ -477,8 +487,8 @@ func (r *FoundationDBClusterReconciler) takeLock(logger logr.Logger, cluster *fd

// releaseLock attempts to release a lock.
func (r *FoundationDBClusterReconciler) releaseLock(logger logr.Logger, cluster *fdbv1beta2.FoundationDBCluster) error {
logger.Info("Release lock on cluster", "namespace", cluster.Namespace, "cluster", cluster.Name)
lockClient, err := r.getLockClient(cluster)
logger.Info("Release lock on cluster")
lockClient, err := r.getLockClient(logger, cluster)
if err != nil {
return err
}
Expand Down Expand Up @@ -575,7 +585,7 @@ func (r *FoundationDBClusterReconciler) getStatusFromClusterOrDummyStatus(logger
cluster.Status.ConnectionString = connectionString
}

adminClient, err := r.getDatabaseClientProvider().GetAdminClient(cluster, r)
adminClient, err := r.getAdminClient(logger, cluster)
if err != nil {
return nil, err
}
Expand Down
2 changes: 1 addition & 1 deletion controllers/cluster_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2516,7 +2516,7 @@ var _ = Describe("cluster_controller", func() {
})

It("should update the deny list", func() {
lockClient, err := clusterReconciler.getLockClient(cluster)
lockClient, err := clusterReconciler.getLockClient(testLogger, cluster)
Expect(err).NotTo(HaveOccurred())
list, err := lockClient.GetDenyList()
Expect(err).NotTo(HaveOccurred())
Expand Down
20 changes: 11 additions & 9 deletions controllers/exclude_processes.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,13 @@ type excludeProcesses struct{}

// reconcile runs the reconciler's work.
func (e excludeProcesses) reconcile(ctx context.Context, r *FoundationDBClusterReconciler, cluster *fdbv1beta2.FoundationDBCluster, status *fdbv1beta2.FoundationDBStatus, logger logr.Logger) *requeue {
adminClient, err := r.getDatabaseClientProvider().GetAdminClient(cluster, r)
adminClient, err := r.getAdminClient(logger, cluster)
if err != nil {
return &requeue{curError: err}
}
defer adminClient.Close()

adminClient.WithValues()
// If the status is not cached, we have to fetch it.
if status == nil {
status, err = adminClient.GetStatus()
Expand All @@ -72,7 +73,7 @@ func (e excludeProcesses) reconcile(ctx context.Context, r *FoundationDBClusterR

// Make sure the exclusions are coordinated across multiple operator instances.
if cluster.ShouldUseLocks() {
lockClient, err := r.getLockClient(cluster)
lockClient, err := r.getLockClient(logger, cluster)
if err != nil {
return &requeue{curError: err}
}
Expand All @@ -83,9 +84,9 @@ func (e excludeProcesses) reconcile(ctx context.Context, r *FoundationDBClusterR
}

defer func() {
err = lockClient.ReleaseLock()
if err != nil {
logger.Error(err, "could not release lock")
lockErr := lockClient.ReleaseLock()
if lockErr != nil {
logger.Error(lockErr, "could not release lock")
}
}()
}
Expand Down Expand Up @@ -180,11 +181,12 @@ func (e excludeProcesses) reconcile(ctx context.Context, r *FoundationDBClusterR
return &requeue{curError: err, delayedRequeue: true}
}

if coordinatorErr != nil {
return &requeue{curError: err, delayedRequeue: true}
}
// Only if a coordinator was excluded we have to check for an error and update the cluster.
if coordinatorExcluded {
if coordinatorErr != nil {
return &requeue{curError: coordinatorErr, delayedRequeue: true}
}

if coordinatorExcluded && coordinatorErr == nil {
err = r.updateOrApply(ctx, cluster)
if err != nil {
return &requeue{curError: err, delayedRequeue: true}
Expand Down
4 changes: 2 additions & 2 deletions controllers/maintenance_mode_checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,12 @@ import (
type maintenanceModeChecker struct{}

// reconcile runs the reconciler's work.
func (maintenanceModeChecker) reconcile(_ context.Context, r *FoundationDBClusterReconciler, cluster *fdbv1beta2.FoundationDBCluster, status *fdbv1beta2.FoundationDBStatus, logger logr.Logger) *requeue {
func (c maintenanceModeChecker) reconcile(_ context.Context, r *FoundationDBClusterReconciler, cluster *fdbv1beta2.FoundationDBCluster, status *fdbv1beta2.FoundationDBStatus, logger logr.Logger) *requeue {
if !cluster.ResetMaintenanceMode() {
return nil
}

adminClient, err := r.DatabaseClientProvider.GetAdminClient(cluster, r)
adminClient, err := r.getAdminClient(logger, cluster)
if err != nil {
return &requeue{curError: err, delayedRequeue: true}
}
Expand Down
4 changes: 2 additions & 2 deletions controllers/remove_incompatible_processes.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ import (
type removeIncompatibleProcesses struct{}

// reconcile runs the reconciler's work.
func (removeIncompatibleProcesses) reconcile(ctx context.Context, r *FoundationDBClusterReconciler, cluster *fdbv1beta2.FoundationDBCluster, status *fdbv1beta2.FoundationDBStatus, logger logr.Logger) *requeue {
func (c removeIncompatibleProcesses) reconcile(ctx context.Context, r *FoundationDBClusterReconciler, cluster *fdbv1beta2.FoundationDBCluster, status *fdbv1beta2.FoundationDBStatus, logger logr.Logger) *requeue {
err := processIncompatibleProcesses(ctx, r, logger, cluster, status)

if err != nil {
Expand Down Expand Up @@ -64,7 +64,7 @@ func processIncompatibleProcesses(ctx context.Context, r *FoundationDBClusterRec

// If the status is not cached, we have to fetch it.
if status == nil {
adminClient, err := r.getDatabaseClientProvider().GetAdminClient(cluster, r.Client)
adminClient, err := r.getAdminClient(logger, cluster)
if err != nil {
return err
}
Expand Down
24 changes: 9 additions & 15 deletions controllers/remove_process_groups.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"context"
"errors"
"fmt"
"github.com/FoundationDB/fdb-kubernetes-operator/pkg/fdbadminclient"
"net"
"strconv"
"time"
Expand All @@ -45,7 +46,7 @@ type removeProcessGroups struct{}

// reconcile runs the reconciler's work.
func (u removeProcessGroups) reconcile(ctx context.Context, r *FoundationDBClusterReconciler, cluster *fdbv1beta2.FoundationDBCluster, status *fdbv1beta2.FoundationDBStatus, logger logr.Logger) *requeue {
adminClient, err := r.DatabaseClientProvider.GetAdminClient(cluster, r)
adminClient, err := r.getAdminClient(logger, cluster)
if err != nil {
return &requeue{curError: err}
}
Expand Down Expand Up @@ -116,7 +117,7 @@ func (u removeProcessGroups) reconcile(ctx context.Context, r *FoundationDBClust
// last minute).
waitTime, allowed := removals.RemovalAllowed(lastDeletion, time.Now().Unix(), cluster.GetWaitBetweenRemovalsSeconds())
if !allowed {
return &requeue{message: fmt.Sprintf("not allowed to remove process groups, waiting: %v", waitTime), delay: time.Duration(waitTime) * time.Second}
return &requeue{message: fmt.Sprintf("not allowed to remove process groups, waiting: %vs", waitTime), delay: time.Duration(waitTime) * time.Second}
}
}

Expand All @@ -129,7 +130,7 @@ func (u removeProcessGroups) reconcile(ctx context.Context, r *FoundationDBClust

// This will return a map of the newly removed ProcessGroups and the ProcessGroups with the ResourcesTerminating condition
removedProcessGroups := r.removeProcessGroups(ctx, logger, cluster, zoneRemovals, zonedRemovals[removals.TerminatingZone])
err = includeProcessGroup(ctx, logger, r, cluster, removedProcessGroups, status)
err = includeProcessGroup(ctx, logger, r, cluster, removedProcessGroups, status, adminClient)
if err != nil {
return &requeue{curError: err, delayedRequeue: true}
}
Expand Down Expand Up @@ -257,7 +258,7 @@ func confirmRemoval(ctx context.Context, logger logr.Logger, r *FoundationDBClus
return true, canBeIncluded, nil
}

func includeProcessGroup(ctx context.Context, logger logr.Logger, r *FoundationDBClusterReconciler, cluster *fdbv1beta2.FoundationDBCluster, removedProcessGroups map[fdbv1beta2.ProcessGroupID]bool, status *fdbv1beta2.FoundationDBStatus) error {
func includeProcessGroup(ctx context.Context, logger logr.Logger, r *FoundationDBClusterReconciler, cluster *fdbv1beta2.FoundationDBCluster, removedProcessGroups map[fdbv1beta2.ProcessGroupID]bool, status *fdbv1beta2.FoundationDBStatus, adminClient fdbadminclient.AdminClient) error {
fdbProcessesToInclude, err := getProcessesToInclude(logger, cluster, removedProcessGroups, status)
if err != nil {
return err
Expand All @@ -269,7 +270,7 @@ func includeProcessGroup(ctx context.Context, logger logr.Logger, r *FoundationD

// Make sure the inclusion are coordinated across multiple operator instances.
if cluster.ShouldUseLocks() {
lockClient, err := r.getLockClient(cluster)
lockClient, err := r.getLockClient(logger, cluster)
if err != nil {
return err
}
Expand All @@ -280,9 +281,9 @@ func includeProcessGroup(ctx context.Context, logger logr.Logger, r *FoundationD
}

defer func() {
err = lockClient.ReleaseLock()
if err != nil {
logger.Error(err, "could not release lock")
releaseErr := lockClient.ReleaseLock()
if releaseErr != nil {
logger.Error(releaseErr, "could not release lock")
}
}()
}
Expand All @@ -293,14 +294,7 @@ func includeProcessGroup(ctx context.Context, logger logr.Logger, r *FoundationD
return err
}

adminClient, err := r.getDatabaseClientProvider().GetAdminClient(cluster, r)
if err != nil {
return err
}
defer adminClient.Close()

r.Recorder.Event(cluster, corev1.EventTypeNormal, "IncludingProcesses", fmt.Sprintf("Including removed processes: %v", fdbProcessesToInclude))

err = adminClient.IncludeProcesses(fdbProcessesToInclude)
if err != nil {
return err
Expand Down
2 changes: 1 addition & 1 deletion controllers/replace_failed_process_groups.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ func (c replaceFailedProcessGroups) reconcile(ctx context.Context, r *Foundation

// If the status is not cached, we have to fetch it.
if status == nil {
adminClient, err := r.DatabaseClientProvider.GetAdminClient(cluster, r)
adminClient, err := r.getAdminClient(logger, cluster)
if err != nil {
return &requeue{curError: err, delayedRequeue: true}
}
Expand Down
2 changes: 1 addition & 1 deletion controllers/update_database_configuration.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ func (u updateDatabaseConfiguration) reconcile(_ context.Context, r *FoundationD
return nil
}

adminClient, err := r.getDatabaseClientProvider().GetAdminClient(cluster, r)
adminClient, err := r.getAdminClient(logger, cluster)
if err != nil {
return &requeue{curError: err, delayedRequeue: true}
}
Expand Down
4 changes: 2 additions & 2 deletions controllers/update_lock_configuration.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,12 @@ import (
type updateLockConfiguration struct{}

// reconcile runs the reconciler's work.
func (updateLockConfiguration) reconcile(_ context.Context, r *FoundationDBClusterReconciler, cluster *fdbv1beta2.FoundationDBCluster, _ *fdbv1beta2.FoundationDBStatus, _ logr.Logger) *requeue {
func (updateLockConfiguration) reconcile(_ context.Context, r *FoundationDBClusterReconciler, cluster *fdbv1beta2.FoundationDBCluster, _ *fdbv1beta2.FoundationDBStatus, logger logr.Logger) *requeue {
if len(cluster.Spec.LockOptions.DenyList) == 0 || !cluster.ShouldUseLocks() || !cluster.Status.Configured {
return nil
}

lockClient, err := r.getLockClient(cluster)
lockClient, err := r.getLockClient(logger, cluster)
if err != nil {
return &requeue{curError: err, delayedRequeue: true}
}
Expand Down
4 changes: 2 additions & 2 deletions controllers/update_pods.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ import (
type updatePods struct{}

// reconcile runs the reconciler's work.
func (updatePods) reconcile(ctx context.Context, r *FoundationDBClusterReconciler, cluster *fdbv1beta2.FoundationDBCluster, status *fdbv1beta2.FoundationDBStatus, logger logr.Logger) *requeue {
func (u updatePods) reconcile(ctx context.Context, r *FoundationDBClusterReconciler, cluster *fdbv1beta2.FoundationDBCluster, status *fdbv1beta2.FoundationDBStatus, logger logr.Logger) *requeue {
// TODO(johscheuer): Remove the pvc map an make direct calls.
pvcs := &corev1.PersistentVolumeClaimList{}
err := r.List(ctx, pvcs, internal.GetPodListOptions(cluster, "", "")...)
Expand Down Expand Up @@ -72,7 +72,7 @@ func (updatePods) reconcile(ctx context.Context, r *FoundationDBClusterReconcile
return nil
}

adminClient, err := r.getDatabaseClientProvider().GetAdminClient(cluster, r.Client)
adminClient, err := r.getAdminClient(logger, cluster)
if err != nil {
return &requeue{curError: err, delayedRequeue: true}
}
Expand Down
6 changes: 3 additions & 3 deletions controllers/update_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ import (
type updateStatus struct{}

// reconcile runs the reconciler's work.
func (updateStatus) reconcile(ctx context.Context, r *FoundationDBClusterReconciler, cluster *fdbv1beta2.FoundationDBCluster, databaseStatus *fdbv1beta2.FoundationDBStatus, logger logr.Logger) *requeue {
func (c updateStatus) reconcile(ctx context.Context, r *FoundationDBClusterReconciler, cluster *fdbv1beta2.FoundationDBCluster, databaseStatus *fdbv1beta2.FoundationDBStatus, logger logr.Logger) *requeue {
originalStatus := cluster.Status.DeepCopy()
clusterStatus := fdbv1beta2.FoundationDBClusterStatus{}
clusterStatus.Generations.Reconciled = cluster.Status.Generations.Reconciled
Expand Down Expand Up @@ -209,7 +209,7 @@ func (updateStatus) reconcile(ctx context.Context, r *FoundationDBClusterReconci
}

if len(cluster.Spec.LockOptions.DenyList) > 0 && cluster.ShouldUseLocks() && clusterStatus.Configured {
lockClient, err := r.getLockClient(cluster)
lockClient, err := r.getLockClient(logger, cluster)
if err != nil {
return &requeue{curError: err}
}
Expand Down Expand Up @@ -308,7 +308,7 @@ func tryConnectionOptions(logger logr.Logger, cluster *fdbv1beta2.FoundationDBCl
for _, connectionString := range connectionStrings {
logger.Info("Attempting to get connection string from cluster", "connectionString", connectionString)
cluster.Status.ConnectionString = connectionString
adminClient, clientErr := r.getDatabaseClientProvider().GetAdminClient(cluster, r)
adminClient, clientErr := r.getAdminClient(logger, cluster)
if clientErr != nil {
return originalConnectionString, clientErr
}
Expand Down
Loading

0 comments on commit a83c6f8

Please sign in to comment.