Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve logging for fdbclient and lock client to see what subreconciler issued the command #2116

Merged
merged 2 commits into from
Aug 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions controllers/bounce_processes.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,12 @@ import (
type bounceProcesses struct{}

// reconcile runs the reconciler's work.
func (bounceProcesses) reconcile(_ context.Context, r *FoundationDBClusterReconciler, cluster *fdbv1beta2.FoundationDBCluster, status *fdbv1beta2.FoundationDBStatus, logger logr.Logger) *requeue {
func (c bounceProcesses) reconcile(_ context.Context, r *FoundationDBClusterReconciler, cluster *fdbv1beta2.FoundationDBCluster, status *fdbv1beta2.FoundationDBStatus, logger logr.Logger) *requeue {
if !pointer.BoolDeref(cluster.Spec.AutomationOptions.KillProcesses, true) {
return nil
}

adminClient, err := r.getDatabaseClientProvider().GetAdminClient(cluster, r)
adminClient, err := r.getAdminClient(logger, cluster)
if err != nil {
return &requeue{curError: err}
}
Expand Down Expand Up @@ -111,7 +111,7 @@ func (bounceProcesses) reconcile(_ context.Context, r *FoundationDBClusterReconc
var lockClient fdbadminclient.LockClient
useLocks := cluster.ShouldUseLocks()
if useLocks {
lockClient, err = r.getLockClient(cluster)
lockClient, err = r.getLockClient(logger, cluster)
if err != nil {
return &requeue{curError: err}
}
Expand Down
2 changes: 1 addition & 1 deletion controllers/change_coordinators.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ func (c changeCoordinators) reconcile(ctx context.Context, r *FoundationDBCluste
return nil
}

adminClient, err := r.getDatabaseClientProvider().GetAdminClient(cluster, r)
adminClient, err := r.getAdminClient(logger, cluster)
if err != nil {
return &requeue{curError: err, delayedRequeue: true}
}
Expand Down
2 changes: 1 addition & 1 deletion controllers/check_client_compatibility.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ func (c checkClientCompatibility) reconcile(_ context.Context, r *FoundationDBCl
return nil
}

adminClient, err := r.getDatabaseClientProvider().GetAdminClient(cluster, r)
adminClient, err := r.getAdminClient(logger, cluster)
if err != nil {
return &requeue{curError: err}
}
Expand Down
2 changes: 1 addition & 1 deletion controllers/choose_removals.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ func (c chooseRemovals) reconcile(ctx context.Context, r *FoundationDBClusterRec

// If the status is not cached, we have to fetch it.
if status == nil {
adminClient, err := r.getDatabaseClientProvider().GetAdminClient(cluster, r)
adminClient, err := r.getAdminClient(logger, cluster)
if err != nil {
return &requeue{curError: err, delayedRequeue: true}
}
Expand Down
28 changes: 19 additions & 9 deletions controllers/cluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/serializer/yaml"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/uuid"
"regexp"
"sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/handler"
Expand Down Expand Up @@ -123,7 +124,7 @@ func (r *FoundationDBClusterReconciler) Reconcile(ctx context.Context, request c
return ctrl.Result{}, err
}

clusterLog := globalControllerLogger.WithValues("namespace", cluster.Namespace, "cluster", cluster.Name)
clusterLog := globalControllerLogger.WithValues("namespace", cluster.Namespace, "cluster", cluster.Name, "traceID", uuid.NewUUID())
cacheStatus := cluster.CacheDatabaseStatusForReconciliation(r.CacheDatabaseStatusForReconciliationDefault)
// Printout the duration of the reconciliation, independent if the reconciliation was successful or had an error.
startTime := time.Now()
Expand All @@ -142,7 +143,7 @@ func (r *FoundationDBClusterReconciler) Reconcile(ctx context.Context, request c
return ctrl.Result{}, err
}

adminClient, err := r.getDatabaseClientProvider().GetAdminClient(cluster, r)
adminClient, err := r.getAdminClient(clusterLog, cluster)
if err != nil {
return ctrl.Result{}, err
}
Expand Down Expand Up @@ -452,14 +453,23 @@ func (r *FoundationDBClusterReconciler) getDatabaseClientProvider() fdbadminclie
panic("Cluster reconciler does not have a DatabaseClientProvider defined")
}

func (r *FoundationDBClusterReconciler) getLockClient(cluster *fdbv1beta2.FoundationDBCluster) (fdbadminclient.LockClient, error) {
return r.getDatabaseClientProvider().GetLockClient(cluster)
// getAdminClient gets the admin client for a reconciler.
func (r *FoundationDBClusterReconciler) getAdminClient(logger logr.Logger, cluster *fdbv1beta2.FoundationDBCluster) (fdbadminclient.AdminClient, error) {
if r.DatabaseClientProvider != nil {
return r.DatabaseClientProvider.GetAdminClientWithLogger(cluster, r, logger)
}

panic("Cluster reconciler does not have a DatabaseClientProvider defined")
}

func (r *FoundationDBClusterReconciler) getLockClient(logger logr.Logger, cluster *fdbv1beta2.FoundationDBCluster) (fdbadminclient.LockClient, error) {
return r.getDatabaseClientProvider().GetLockClientWithLogger(cluster, logger)
}

// takeLock attempts to acquire a lock.
func (r *FoundationDBClusterReconciler) takeLock(logger logr.Logger, cluster *fdbv1beta2.FoundationDBCluster, action string) (bool, error) {
logger.Info("Taking lock on cluster", "namespace", cluster.Namespace, "cluster", cluster.Name, "action", action)
lockClient, err := r.getLockClient(cluster)
logger.Info("Taking lock on cluster", "action", action)
lockClient, err := r.getLockClient(logger, cluster)
if err != nil {
return false, err
}
Expand All @@ -477,8 +487,8 @@ func (r *FoundationDBClusterReconciler) takeLock(logger logr.Logger, cluster *fd

// releaseLock attempts to release a lock.
func (r *FoundationDBClusterReconciler) releaseLock(logger logr.Logger, cluster *fdbv1beta2.FoundationDBCluster) error {
logger.Info("Release lock on cluster", "namespace", cluster.Namespace, "cluster", cluster.Name)
lockClient, err := r.getLockClient(cluster)
logger.Info("Release lock on cluster")
lockClient, err := r.getLockClient(logger, cluster)
if err != nil {
return err
}
Expand Down Expand Up @@ -575,7 +585,7 @@ func (r *FoundationDBClusterReconciler) getStatusFromClusterOrDummyStatus(logger
cluster.Status.ConnectionString = connectionString
}

adminClient, err := r.getDatabaseClientProvider().GetAdminClient(cluster, r)
adminClient, err := r.getAdminClient(logger, cluster)
if err != nil {
return nil, err
}
Expand Down
2 changes: 1 addition & 1 deletion controllers/cluster_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2516,7 +2516,7 @@ var _ = Describe("cluster_controller", func() {
})

It("should update the deny list", func() {
lockClient, err := clusterReconciler.getLockClient(cluster)
lockClient, err := clusterReconciler.getLockClient(testLogger, cluster)
Expect(err).NotTo(HaveOccurred())
list, err := lockClient.GetDenyList()
Expect(err).NotTo(HaveOccurred())
Expand Down
20 changes: 11 additions & 9 deletions controllers/exclude_processes.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,13 @@ type excludeProcesses struct{}

// reconcile runs the reconciler's work.
func (e excludeProcesses) reconcile(ctx context.Context, r *FoundationDBClusterReconciler, cluster *fdbv1beta2.FoundationDBCluster, status *fdbv1beta2.FoundationDBStatus, logger logr.Logger) *requeue {
adminClient, err := r.getDatabaseClientProvider().GetAdminClient(cluster, r)
adminClient, err := r.getAdminClient(logger, cluster)
if err != nil {
return &requeue{curError: err}
}
defer adminClient.Close()

adminClient.WithValues()
// If the status is not cached, we have to fetch it.
if status == nil {
status, err = adminClient.GetStatus()
Expand All @@ -72,7 +73,7 @@ func (e excludeProcesses) reconcile(ctx context.Context, r *FoundationDBClusterR

// Make sure the exclusions are coordinated across multiple operator instances.
if cluster.ShouldUseLocks() {
lockClient, err := r.getLockClient(cluster)
lockClient, err := r.getLockClient(logger, cluster)
if err != nil {
return &requeue{curError: err}
}
Expand All @@ -83,9 +84,9 @@ func (e excludeProcesses) reconcile(ctx context.Context, r *FoundationDBClusterR
}

defer func() {
err = lockClient.ReleaseLock()
if err != nil {
logger.Error(err, "could not release lock")
lockErr := lockClient.ReleaseLock()
if lockErr != nil {
logger.Error(lockErr, "could not release lock")
}
}()
}
Expand Down Expand Up @@ -180,11 +181,12 @@ func (e excludeProcesses) reconcile(ctx context.Context, r *FoundationDBClusterR
return &requeue{curError: err, delayedRequeue: true}
}

if coordinatorErr != nil {
return &requeue{curError: err, delayedRequeue: true}
}
// Only if a coordinator was excluded we have to check for an error and update the cluster.
if coordinatorExcluded {
if coordinatorErr != nil {
return &requeue{curError: coordinatorErr, delayedRequeue: true}
}

if coordinatorExcluded && coordinatorErr == nil {
err = r.updateOrApply(ctx, cluster)
if err != nil {
return &requeue{curError: err, delayedRequeue: true}
Expand Down
4 changes: 2 additions & 2 deletions controllers/maintenance_mode_checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,12 @@ import (
type maintenanceModeChecker struct{}

// reconcile runs the reconciler's work.
func (maintenanceModeChecker) reconcile(_ context.Context, r *FoundationDBClusterReconciler, cluster *fdbv1beta2.FoundationDBCluster, status *fdbv1beta2.FoundationDBStatus, logger logr.Logger) *requeue {
func (c maintenanceModeChecker) reconcile(_ context.Context, r *FoundationDBClusterReconciler, cluster *fdbv1beta2.FoundationDBCluster, status *fdbv1beta2.FoundationDBStatus, logger logr.Logger) *requeue {
if !cluster.ResetMaintenanceMode() {
return nil
}

adminClient, err := r.DatabaseClientProvider.GetAdminClient(cluster, r)
adminClient, err := r.getAdminClient(logger, cluster)
if err != nil {
return &requeue{curError: err, delayedRequeue: true}
}
Expand Down
4 changes: 2 additions & 2 deletions controllers/remove_incompatible_processes.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ import (
type removeIncompatibleProcesses struct{}

// reconcile runs the reconciler's work.
func (removeIncompatibleProcesses) reconcile(ctx context.Context, r *FoundationDBClusterReconciler, cluster *fdbv1beta2.FoundationDBCluster, status *fdbv1beta2.FoundationDBStatus, logger logr.Logger) *requeue {
func (c removeIncompatibleProcesses) reconcile(ctx context.Context, r *FoundationDBClusterReconciler, cluster *fdbv1beta2.FoundationDBCluster, status *fdbv1beta2.FoundationDBStatus, logger logr.Logger) *requeue {
err := processIncompatibleProcesses(ctx, r, logger, cluster, status)

if err != nil {
Expand Down Expand Up @@ -64,7 +64,7 @@ func processIncompatibleProcesses(ctx context.Context, r *FoundationDBClusterRec

// If the status is not cached, we have to fetch it.
if status == nil {
adminClient, err := r.getDatabaseClientProvider().GetAdminClient(cluster, r.Client)
adminClient, err := r.getAdminClient(logger, cluster)
if err != nil {
return err
}
Expand Down
24 changes: 9 additions & 15 deletions controllers/remove_process_groups.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"context"
"errors"
"fmt"
"github.com/FoundationDB/fdb-kubernetes-operator/pkg/fdbadminclient"
"net"
"strconv"
"time"
Expand All @@ -45,7 +46,7 @@ type removeProcessGroups struct{}

// reconcile runs the reconciler's work.
func (u removeProcessGroups) reconcile(ctx context.Context, r *FoundationDBClusterReconciler, cluster *fdbv1beta2.FoundationDBCluster, status *fdbv1beta2.FoundationDBStatus, logger logr.Logger) *requeue {
adminClient, err := r.DatabaseClientProvider.GetAdminClient(cluster, r)
adminClient, err := r.getAdminClient(logger, cluster)
if err != nil {
return &requeue{curError: err}
}
Expand Down Expand Up @@ -116,7 +117,7 @@ func (u removeProcessGroups) reconcile(ctx context.Context, r *FoundationDBClust
// last minute).
waitTime, allowed := removals.RemovalAllowed(lastDeletion, time.Now().Unix(), cluster.GetWaitBetweenRemovalsSeconds())
if !allowed {
return &requeue{message: fmt.Sprintf("not allowed to remove process groups, waiting: %v", waitTime), delay: time.Duration(waitTime) * time.Second}
return &requeue{message: fmt.Sprintf("not allowed to remove process groups, waiting: %vs", waitTime), delay: time.Duration(waitTime) * time.Second}
}
}

Expand All @@ -129,7 +130,7 @@ func (u removeProcessGroups) reconcile(ctx context.Context, r *FoundationDBClust

// This will return a map of the newly removed ProcessGroups and the ProcessGroups with the ResourcesTerminating condition
removedProcessGroups := r.removeProcessGroups(ctx, logger, cluster, zoneRemovals, zonedRemovals[removals.TerminatingZone])
err = includeProcessGroup(ctx, logger, r, cluster, removedProcessGroups, status)
err = includeProcessGroup(ctx, logger, r, cluster, removedProcessGroups, status, adminClient)
if err != nil {
return &requeue{curError: err, delayedRequeue: true}
}
Expand Down Expand Up @@ -257,7 +258,7 @@ func confirmRemoval(ctx context.Context, logger logr.Logger, r *FoundationDBClus
return true, canBeIncluded, nil
}

func includeProcessGroup(ctx context.Context, logger logr.Logger, r *FoundationDBClusterReconciler, cluster *fdbv1beta2.FoundationDBCluster, removedProcessGroups map[fdbv1beta2.ProcessGroupID]bool, status *fdbv1beta2.FoundationDBStatus) error {
func includeProcessGroup(ctx context.Context, logger logr.Logger, r *FoundationDBClusterReconciler, cluster *fdbv1beta2.FoundationDBCluster, removedProcessGroups map[fdbv1beta2.ProcessGroupID]bool, status *fdbv1beta2.FoundationDBStatus, adminClient fdbadminclient.AdminClient) error {
fdbProcessesToInclude, err := getProcessesToInclude(logger, cluster, removedProcessGroups, status)
if err != nil {
return err
Expand All @@ -269,7 +270,7 @@ func includeProcessGroup(ctx context.Context, logger logr.Logger, r *FoundationD

// Make sure the inclusion are coordinated across multiple operator instances.
if cluster.ShouldUseLocks() {
lockClient, err := r.getLockClient(cluster)
lockClient, err := r.getLockClient(logger, cluster)
if err != nil {
return err
}
Expand All @@ -280,9 +281,9 @@ func includeProcessGroup(ctx context.Context, logger logr.Logger, r *FoundationD
}

defer func() {
err = lockClient.ReleaseLock()
if err != nil {
logger.Error(err, "could not release lock")
releaseErr := lockClient.ReleaseLock()
if releaseErr != nil {
logger.Error(releaseErr, "could not release lock")
}
}()
}
Expand All @@ -293,14 +294,7 @@ func includeProcessGroup(ctx context.Context, logger logr.Logger, r *FoundationD
return err
}

adminClient, err := r.getDatabaseClientProvider().GetAdminClient(cluster, r)
if err != nil {
return err
}
defer adminClient.Close()

r.Recorder.Event(cluster, corev1.EventTypeNormal, "IncludingProcesses", fmt.Sprintf("Including removed processes: %v", fdbProcessesToInclude))

err = adminClient.IncludeProcesses(fdbProcessesToInclude)
if err != nil {
return err
Expand Down
2 changes: 1 addition & 1 deletion controllers/replace_failed_process_groups.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ func (c replaceFailedProcessGroups) reconcile(ctx context.Context, r *Foundation

// If the status is not cached, we have to fetch it.
if status == nil {
adminClient, err := r.DatabaseClientProvider.GetAdminClient(cluster, r)
adminClient, err := r.getAdminClient(logger, cluster)
if err != nil {
return &requeue{curError: err, delayedRequeue: true}
}
Expand Down
2 changes: 1 addition & 1 deletion controllers/update_database_configuration.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ func (u updateDatabaseConfiguration) reconcile(_ context.Context, r *FoundationD
return nil
}

adminClient, err := r.getDatabaseClientProvider().GetAdminClient(cluster, r)
adminClient, err := r.getAdminClient(logger, cluster)
if err != nil {
return &requeue{curError: err, delayedRequeue: true}
}
Expand Down
4 changes: 2 additions & 2 deletions controllers/update_lock_configuration.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,12 @@ import (
type updateLockConfiguration struct{}

// reconcile runs the reconciler's work.
func (updateLockConfiguration) reconcile(_ context.Context, r *FoundationDBClusterReconciler, cluster *fdbv1beta2.FoundationDBCluster, _ *fdbv1beta2.FoundationDBStatus, _ logr.Logger) *requeue {
func (updateLockConfiguration) reconcile(_ context.Context, r *FoundationDBClusterReconciler, cluster *fdbv1beta2.FoundationDBCluster, _ *fdbv1beta2.FoundationDBStatus, logger logr.Logger) *requeue {
if len(cluster.Spec.LockOptions.DenyList) == 0 || !cluster.ShouldUseLocks() || !cluster.Status.Configured {
return nil
}

lockClient, err := r.getLockClient(cluster)
lockClient, err := r.getLockClient(logger, cluster)
if err != nil {
return &requeue{curError: err, delayedRequeue: true}
}
Expand Down
4 changes: 2 additions & 2 deletions controllers/update_pods.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ import (
type updatePods struct{}

// reconcile runs the reconciler's work.
func (updatePods) reconcile(ctx context.Context, r *FoundationDBClusterReconciler, cluster *fdbv1beta2.FoundationDBCluster, status *fdbv1beta2.FoundationDBStatus, logger logr.Logger) *requeue {
func (u updatePods) reconcile(ctx context.Context, r *FoundationDBClusterReconciler, cluster *fdbv1beta2.FoundationDBCluster, status *fdbv1beta2.FoundationDBStatus, logger logr.Logger) *requeue {
// TODO(johscheuer): Remove the pvc map an make direct calls.
pvcs := &corev1.PersistentVolumeClaimList{}
err := r.List(ctx, pvcs, internal.GetPodListOptions(cluster, "", "")...)
Expand Down Expand Up @@ -72,7 +72,7 @@ func (updatePods) reconcile(ctx context.Context, r *FoundationDBClusterReconcile
return nil
}

adminClient, err := r.getDatabaseClientProvider().GetAdminClient(cluster, r.Client)
adminClient, err := r.getAdminClient(logger, cluster)
if err != nil {
return &requeue{curError: err, delayedRequeue: true}
}
Expand Down
6 changes: 3 additions & 3 deletions controllers/update_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ import (
type updateStatus struct{}

// reconcile runs the reconciler's work.
func (updateStatus) reconcile(ctx context.Context, r *FoundationDBClusterReconciler, cluster *fdbv1beta2.FoundationDBCluster, databaseStatus *fdbv1beta2.FoundationDBStatus, logger logr.Logger) *requeue {
func (c updateStatus) reconcile(ctx context.Context, r *FoundationDBClusterReconciler, cluster *fdbv1beta2.FoundationDBCluster, databaseStatus *fdbv1beta2.FoundationDBStatus, logger logr.Logger) *requeue {
originalStatus := cluster.Status.DeepCopy()
clusterStatus := fdbv1beta2.FoundationDBClusterStatus{}
clusterStatus.Generations.Reconciled = cluster.Status.Generations.Reconciled
Expand Down Expand Up @@ -209,7 +209,7 @@ func (updateStatus) reconcile(ctx context.Context, r *FoundationDBClusterReconci
}

if len(cluster.Spec.LockOptions.DenyList) > 0 && cluster.ShouldUseLocks() && clusterStatus.Configured {
lockClient, err := r.getLockClient(cluster)
lockClient, err := r.getLockClient(logger, cluster)
if err != nil {
return &requeue{curError: err}
}
Expand Down Expand Up @@ -308,7 +308,7 @@ func tryConnectionOptions(logger logr.Logger, cluster *fdbv1beta2.FoundationDBCl
for _, connectionString := range connectionStrings {
logger.Info("Attempting to get connection string from cluster", "connectionString", connectionString)
cluster.Status.ConnectionString = connectionString
adminClient, clientErr := r.getDatabaseClientProvider().GetAdminClient(cluster, r)
adminClient, clientErr := r.getAdminClient(logger, cluster)
if clientErr != nil {
return originalConnectionString, clientErr
}
Expand Down
3 changes: 1 addition & 2 deletions fdbclient/admin_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,13 +95,12 @@ type cliAdminClient struct {
}

// NewCliAdminClient generates an Admin client for a cluster
func NewCliAdminClient(cluster *fdbv1beta2.FoundationDBCluster, _ client.Client, log logr.Logger) (fdbadminclient.AdminClient, error) {
func NewCliAdminClient(cluster *fdbv1beta2.FoundationDBCluster, _ client.Client, logger logr.Logger) (fdbadminclient.AdminClient, error) {
clusterFile, err := createClusterFile(cluster)
if err != nil {
return nil, err
}

logger := log.WithValues("namespace", cluster.Namespace, "cluster", cluster.Name)
return &cliAdminClient{
Cluster: cluster,
clusterFilePath: clusterFile,
Expand Down
Loading
Loading