Skip to content

Commit

Permalink
Add ControlPlaneVersionSkew MS preflight check
Browse files Browse the repository at this point in the history
  • Loading branch information
sbueringer committed Mar 7, 2025
1 parent 3a8728f commit 42bdca3
Show file tree
Hide file tree
Showing 9 changed files with 306 additions and 15 deletions.
17 changes: 14 additions & 3 deletions api/v1beta1/common_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -218,27 +218,38 @@ const (

// MachineSetPreflightCheckKubeadmVersionSkew is the name of the preflight check
// that verifies if the machine being created or remediated for the MachineSet conforms to the kubeadm version
// skew policy that requires the machine to be at the same version as the control plane.
// skew policy that requires the machine to be at the same minor version as the control plane.
// Note: This is a stopgap while the root cause of the problem is fixed in kubeadm; this check will become
// a no-op when this check will be available in kubeadm, and then eventually be dropped when all the
// supported Kuberenetes/kubeadm versions have implemented the fix.
// supported Kubernetes/kubeadm versions have implemented the fix.
// The preflight check is only run if a ControlPlane is used (controlPlaneRef must exist in the Cluster),
// the ControlPlane has a version, the MachineSet has a version and the MachineSet uses the Kubeadm bootstrap
// provider.
MachineSetPreflightCheckKubeadmVersionSkew MachineSetPreflightCheck = "KubeadmVersionSkew"

// MachineSetPreflightCheckKubernetesVersionSkew is the name of the preflight check that verifies
// if the machines being created or remediated for the MachineSet conform to the Kubernetes version skew policy
// that requires the machines to be at a version that is not more than 2 minor lower than the ControlPlane version.
// that requires the machines to be at a version that is not more than 2 (< v1.28) or 3 (>= v1.28) minor
// lower than the ControlPlane version.
// The preflight check is only run if a ControlPlane is used (controlPlaneRef must exist in the Cluster),
// the ControlPlane has a version and the MachineSet has a version.
MachineSetPreflightCheckKubernetesVersionSkew MachineSetPreflightCheck = "KubernetesVersionSkew"

// MachineSetPreflightCheckControlPlaneIsStable is the name of the preflight check
// that verifies if the control plane is not provisioning and not upgrading.
// For Clusters with a managed topology it also checks if a control plane upgrade is pending.
// The preflight check is only run if a ControlPlane is used (controlPlaneRef must exist in the Cluster)
// and the ControlPlane has a version.
MachineSetPreflightCheckControlPlaneIsStable MachineSetPreflightCheck = "ControlPlaneIsStable"

// MachineSetPreflightCheckControlPlaneVersionSkew is the name of the preflight check
// that verifies if the machine being created or remediated for the MachineSet has exactly the same version
// as the control plane.
// The idea behind this check is that it doesn't make sense to create a Machine with an old version, if we already
// know based on the control plane version that the Machine has to be replaced soon.
// The preflight check is only run if the Cluster has a managed topology, a ControlPlane is used (controlPlaneRef
// must exist in the Cluster), the ControlPlane has a version and the MachineSet has a version.
MachineSetPreflightCheckControlPlaneVersionSkew MachineSetPreflightCheck = "ControlPlaneVersionSkew"
)

// NodeOutdatedRevisionTaint can be added to Nodes at rolling updates in general triggered by updating MachineDeployment
Expand Down
5 changes: 5 additions & 0 deletions controllers/alias.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,12 @@ import (
"regexp"
"time"

"k8s.io/apimachinery/pkg/util/sets"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller"

clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
"sigs.k8s.io/cluster-api/controllers/clustercache"
runtimeclient "sigs.k8s.io/cluster-api/exp/runtime/client"
clustercontroller "sigs.k8s.io/cluster-api/internal/controllers/cluster"
Expand Down Expand Up @@ -94,6 +96,8 @@ type MachineSetReconciler struct {
APIReader client.Reader
ClusterCache clustercache.ClusterCache

PreflightChecks sets.Set[clusterv1.MachineSetPreflightCheck]

// WatchFilterValue is the label value used to filter events prior to reconciliation.
WatchFilterValue string
}
Expand All @@ -103,6 +107,7 @@ func (r *MachineSetReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Ma
Client: r.Client,
APIReader: r.APIReader,
ClusterCache: r.ClusterCache,
PreflightChecks: r.PreflightChecks,
WatchFilterValue: r.WatchFilterValue,
}).SetupWithManager(ctx, mgr, options)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ Enabling `MachineSetPreflightChecks` provides safety in such circumstances by ma

### `ControlPlaneIsStable`

* This preflight check ensures that the ControlPlane is currently stable i.e. the ControlPlane is currently neither provisioning, upgrading nor pending an upgrade.
* This preflight check ensures that the ControlPlane is currently stable i.e. the ControlPlane is currently neither provisioning, upgrading.
* For Clusters with a managed topology it also checks if a control plane upgrade is pending.
* This preflight check is only performed if:
* The Cluster uses a ControlPlane provider.
* ControlPlane version is defined (`ControlPlane.spec.version` is set).
Expand All @@ -40,11 +41,25 @@ Enabling `MachineSetPreflightChecks` provides safety in such circumstances by ma
* MachineSet version is defined (`MachineSet.spec.template.spec.version` is set).
* MachineSet uses the `Kubeadm` Bootstrap provider.

## Opting out of PreflightChecks
### `ControlPlaneVersionSkew`

Once the feature flag is enabled the preflight checks are enabled for all the MachineSets including new and existing MachineSets.
It is possible to opt-out of one or all of the preflight checks on a per MachineSet basis by specifying a comma-separated list of the preflight checks on the
`machineset.cluster.x-k8s.io/skip-preflight-checks` annotation on the MachineSet.
* This preflight check ensures that the MachineSet and the ControlPlane have the same version. The idea behind this
check is that it doesn't make sense to create a Machine with an old version, if we already know based on the control
plane version that the Machine has to be replaced soon.
* This preflight check is only performed if:
* The Cluster has a managed topology
* The Cluster uses a ControlPlane provider.
* ControlPlane version is defined (`ControlPlane.spec.version` is set).
* MachineSet version is defined (`MachineSet.spec.template.spec.version` is set).

## Configuring MachineSet PreflightChecks

Per default all preflight checks are enabled for all MachineSets including new and existing MachineSets.
The enabled preflight checks can be overwritten with the `--machineset-preflight-checks` command-line flag.

It is also possible to opt-out of one or all of the preflight checks on a per MachineSet basis by specifying a
comma-separated list of the preflight checks via the `machineset.cluster.x-k8s.io/skip-preflight-checks` annotation
on the MachineSet.

Examples:
* To opt out of all the preflight checks set the `machineset.cluster.x-k8s.io/skip-preflight-checks: All` annotation.
Expand Down
2 changes: 2 additions & 0 deletions internal/controllers/machineset/machineset_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@ type Reconciler struct {
APIReader client.Reader
ClusterCache clustercache.ClusterCache

PreflightChecks sets.Set[clusterv1.MachineSetPreflightCheck]

// WatchFilterValue is the label value used to filter events prior to reconciliation.
WatchFilterValue string

Expand Down
7 changes: 5 additions & 2 deletions internal/controllers/machineset/machineset_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import (
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/client-go/tools/record"
"k8s.io/utils/ptr"
"sigs.k8s.io/controller-runtime/pkg/client"
Expand Down Expand Up @@ -1711,7 +1712,8 @@ func TestMachineSetReconciler_reconcileUnhealthyMachines(t *testing.T) {
machines := []*clusterv1.Machine{unhealthyMachine, healthyMachine}
fakeClient := fake.NewClientBuilder().WithObjects(controlPlaneUpgrading, unhealthyMachine, healthyMachine).WithStatusSubresource(&clusterv1.Machine{}).Build()
r := &Reconciler{
Client: fakeClient,
Client: fakeClient,
PreflightChecks: sets.Set[clusterv1.MachineSetPreflightCheck]{}.Insert(clusterv1.MachineSetPreflightCheckAll),
}
s := &scope{
cluster: cluster,
Expand Down Expand Up @@ -2324,7 +2326,8 @@ func TestMachineSetReconciler_syncReplicas(t *testing.T) {

fakeClient := fake.NewClientBuilder().WithObjects(controlPlaneUpgrading, machineSet).WithStatusSubresource(&clusterv1.MachineSet{}).Build()
r := &Reconciler{
Client: fakeClient,
Client: fakeClient,
PreflightChecks: sets.Set[clusterv1.MachineSetPreflightCheck]{}.Insert(clusterv1.MachineSetPreflightCheckAll),
}
s := &scope{
cluster: cluster,
Expand Down
30 changes: 26 additions & 4 deletions internal/controllers/machineset/machineset_preflight.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ func (r *Reconciler) runPreflightChecks(ctx context.Context, cluster *clusterv1.

skipped := skippedPreflightChecks(ms)
// If all the preflight checks are skipped then return early.
if skipped.Has(clusterv1.MachineSetPreflightCheckAll) {
if len(r.PreflightChecks) == 0 || skipped.Has(clusterv1.MachineSetPreflightCheckAll) {
return nil, nil
}

Expand Down Expand Up @@ -90,7 +90,7 @@ func (r *Reconciler) runPreflightChecks(ctx context.Context, cluster *clusterv1.
errList := []error{}
preflightCheckErrs := []preflightCheckErrorMessage{}
// Run the control-plane-stable preflight check.
if !skipped.Has(clusterv1.MachineSetPreflightCheckControlPlaneIsStable) {
if shouldRun(r.PreflightChecks, skipped, clusterv1.MachineSetPreflightCheckControlPlaneIsStable) {
preflightCheckErr, err := r.controlPlaneStablePreflightCheck(controlPlane, cluster, *cpVersion)
if err != nil {
errList = append(errList, err)
Expand All @@ -109,15 +109,15 @@ func (r *Reconciler) runPreflightChecks(ctx context.Context, cluster *clusterv1.
}

// Run the kubernetes-version skew preflight check.
if !skipped.Has(clusterv1.MachineSetPreflightCheckKubernetesVersionSkew) {
if shouldRun(r.PreflightChecks, skipped, clusterv1.MachineSetPreflightCheckKubernetesVersionSkew) {
preflightCheckErr := r.kubernetesVersionPreflightCheck(cpSemver, msSemver)
if preflightCheckErr != nil {
preflightCheckErrs = append(preflightCheckErrs, preflightCheckErr)
}
}

// Run the kubeadm-version skew preflight check.
if !skipped.Has(clusterv1.MachineSetPreflightCheckKubeadmVersionSkew) {
if shouldRun(r.PreflightChecks, skipped, clusterv1.MachineSetPreflightCheckKubeadmVersionSkew) {
preflightCheckErr, err := r.kubeadmVersionPreflightCheck(cpSemver, msSemver, ms)
if err != nil {
errList = append(errList, err)
Expand All @@ -126,6 +126,13 @@ func (r *Reconciler) runPreflightChecks(ctx context.Context, cluster *clusterv1.
preflightCheckErrs = append(preflightCheckErrs, preflightCheckErr)
}
}

// Run the control plane version skew preflight check.
if shouldRun(r.PreflightChecks, skipped, clusterv1.MachineSetPreflightCheckControlPlaneVersionSkew) {
if preflightCheckErr := r.controlPlaneVersionPreflightCheck(cluster, *cpVersion, msVersion); preflightCheckErr != nil {
preflightCheckErrs = append(preflightCheckErrs, preflightCheckErr)
}
}
}

if len(errList) > 0 {
Expand All @@ -142,6 +149,11 @@ func (r *Reconciler) runPreflightChecks(ctx context.Context, cluster *clusterv1.
return nil, nil
}

func shouldRun(preflightChecks, skippedPreflightChecks sets.Set[clusterv1.MachineSetPreflightCheck], preflightCheck clusterv1.MachineSetPreflightCheck) bool {
return (preflightChecks.Has(clusterv1.MachineSetPreflightCheckAll) || preflightChecks.Has(preflightCheck)) &&
!(skippedPreflightChecks.Has(clusterv1.MachineSetPreflightCheckAll) || skippedPreflightChecks.Has(preflightCheck))
}

func (r *Reconciler) controlPlaneStablePreflightCheck(controlPlane *unstructured.Unstructured, cluster *clusterv1.Cluster, controlPlaneVersion string) (preflightCheckErrorMessage, error) {
cpKlogRef := klog.KRef(controlPlane.GetNamespace(), controlPlane.GetName())

Expand Down Expand Up @@ -216,6 +228,16 @@ func (r *Reconciler) kubeadmVersionPreflightCheck(cpSemver, msSemver semver.Vers
return nil, nil
}

func (r *Reconciler) controlPlaneVersionPreflightCheck(cluster *clusterv1.Cluster, cpVersion, msVersion string) preflightCheckErrorMessage {
if feature.Gates.Enabled(feature.ClusterTopology) && cluster.Spec.Topology != nil {
if cpVersion != msVersion {
return ptr.To(fmt.Sprintf("MachineSet version (%s) is not yet the same as the ControlPlane version (%s), waiting for version to be propagated to the MachineSet (%q preflight check failed)", msVersion, cpVersion, clusterv1.MachineSetPreflightCheckControlPlaneVersionSkew))
}
}

return nil
}

func skippedPreflightChecks(ms *clusterv1.MachineSet) sets.Set[clusterv1.MachineSetPreflightCheck] {
skipped := sets.Set[clusterv1.MachineSetPreflightCheck]{}
if ms == nil {
Expand Down
Loading

0 comments on commit 42bdca3

Please sign in to comment.