Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

✨ Implement MachineDeployment rolloutAfter support #7053

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions api/v1alpha3/conversion.go
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,10 @@ func (src *MachineDeployment) ConvertTo(dstRaw conversion.Hub) error {

dst.Spec.Template.Spec.NodeDeletionTimeout = restored.Spec.Template.Spec.NodeDeletionTimeout
dst.Spec.Template.Spec.NodeVolumeDetachTimeout = restored.Spec.Template.Spec.NodeVolumeDetachTimeout
if restored.Spec.RolloutAfter != nil {
dst.Spec.RolloutAfter = restored.Spec.RolloutAfter
}

dst.Status.Conditions = restored.Status.Conditions
return nil
}
Expand Down Expand Up @@ -316,6 +320,10 @@ func Convert_v1beta1_MachineSpec_To_v1alpha3_MachineSpec(in *clusterv1.MachineSp
return autoConvert_v1beta1_MachineSpec_To_v1alpha3_MachineSpec(in, out, s)
}

func Convert_v1beta1_MachineDeploymentSpec_To_v1alpha3_MachineDeploymentSpec(in *clusterv1.MachineDeploymentSpec, out *MachineDeploymentSpec, s apiconversion.Scope) error {
return autoConvert_v1beta1_MachineDeploymentSpec_To_v1alpha3_MachineDeploymentSpec(in, out, s)
}

func Convert_v1beta1_MachineDeploymentStatus_To_v1alpha3_MachineDeploymentStatus(in *clusterv1.MachineDeploymentStatus, out *MachineDeploymentStatus, s apiconversion.Scope) error {
// Status.Conditions was introduced in v1alpha4, thus requiring a custom conversion function; the values is going to be preserved in an annotation thus allowing roundtrip without loosing informations
return autoConvert_v1beta1_MachineDeploymentStatus_To_v1alpha3_MachineDeploymentStatus(in, out, s)
Expand Down
16 changes: 6 additions & 10 deletions api/v1alpha3/zz_generated.conversion.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions api/v1alpha4/conversion.go
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,10 @@ func (src *MachineDeployment) ConvertTo(dstRaw conversion.Hub) error {

dst.Spec.Template.Spec.NodeDeletionTimeout = restored.Spec.Template.Spec.NodeDeletionTimeout
dst.Spec.Template.Spec.NodeVolumeDetachTimeout = restored.Spec.Template.Spec.NodeVolumeDetachTimeout
if restored.Spec.RolloutAfter != nil {
dst.Spec.RolloutAfter = restored.Spec.RolloutAfter
}

return nil
}

Expand Down Expand Up @@ -312,6 +316,10 @@ func Convert_v1beta1_MachineSpec_To_v1alpha4_MachineSpec(in *clusterv1.MachineSp
return autoConvert_v1beta1_MachineSpec_To_v1alpha4_MachineSpec(in, out, s)
}

func Convert_v1beta1_MachineDeploymentSpec_To_v1alpha4_MachineDeploymentSpec(in *clusterv1.MachineDeploymentSpec, out *MachineDeploymentSpec, s apiconversion.Scope) error {
return autoConvert_v1beta1_MachineDeploymentSpec_To_v1alpha4_MachineDeploymentSpec(in, out, s)
}

func Convert_v1beta1_Topology_To_v1alpha4_Topology(in *clusterv1.Topology, out *Topology, s apiconversion.Scope) error {
// spec.topology.variables has been added with v1beta1.
return autoConvert_v1beta1_Topology_To_v1alpha4_Topology(in, out, s)
Expand Down
16 changes: 6 additions & 10 deletions api/v1alpha4/zz_generated.conversion.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions api/v1beta1/machinedeployment_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,14 @@ type MachineDeploymentSpec struct {
// +kubebuilder:validation:MinLength=1
ClusterName string `json:"clusterName"`

// RolloutAfter is a field to indicate a rollout should be performed
// after the specified time even if no changes have been made to the
// MachineDeployment.
// Any changes to other fields of the spec are not affected by this field and will be rolled out as normal.
//
// +optional
RolloutAfter *metav1.Time `json:"rolloutAfter,omitempty"`

// Number of desired machines. Defaults to 1.
// This is a pointer to distinguish between explicit zero and not specified.
// +optional
Expand Down
4 changes: 4 additions & 0 deletions api/v1beta1/machineset_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ const (
// MachineSetTopologyFinalizer is the finalizer used by the topology MachineDeployment controller to
// clean up referenced template resources if necessary when a MachineSet is being deleted.
MachineSetTopologyFinalizer = "machineset.topology.cluster.x-k8s.io"

// MachineSetLastRolloutAfterAnnotation gets set during creation of a MachineSet to the value of
// MachineDeployment.Spec.RolloutAfter, if it is not nil.
MachineSetLastRolloutAfterAnnotation = "machineset.clusters.x-k8s.io/lastRollout"
)

// ANCHOR: MachineSetSpec
Expand Down
4 changes: 4 additions & 0 deletions api/v1beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 7 additions & 1 deletion api/v1beta1/zz_generated.openapi.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions config/crd/bases/cluster.x-k8s.io_machinedeployments.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

70 changes: 57 additions & 13 deletions internal/controllers/machinedeployment/machinedeployment_sync.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package machinedeployment
import (
"context"
"fmt"
"hash/fnv"
"sort"
"strconv"

Expand Down Expand Up @@ -77,10 +78,15 @@ func (r *Reconciler) sync(ctx context.Context, d *clusterv1.MachineDeployment, m
// Note that currently the deployment controller is using caches to avoid querying the server for reads.
// This may lead to stale reads of machine sets, thus incorrect deployment status.
func (r *Reconciler) getAllMachineSetsAndSyncRevision(ctx context.Context, d *clusterv1.MachineDeployment, msList []*clusterv1.MachineSet, createIfNotExisted bool) (*clusterv1.MachineSet, []*clusterv1.MachineSet, error) {
_, allOldMSs := mdutil.FindOldMachineSets(d, msList)
now := metav1.Now()

_, allOldMSs, err := mdutil.FindOldMachineSets(&now, d, msList)
if err != nil {
return nil, nil, err
}

// Get new machine set with the updated revision number
newMS, err := r.getNewMachineSet(ctx, d, msList, allOldMSs, createIfNotExisted)
newMS, err := r.getNewMachineSet(ctx, &now, d, msList, allOldMSs, createIfNotExisted)
if err != nil {
return nil, nil, err
}
Expand All @@ -93,10 +99,13 @@ func (r *Reconciler) getAllMachineSetsAndSyncRevision(ctx context.Context, d *cl
// 2. If there's existing new MS, update its revision number if it's smaller than (maxOldRevision + 1), where maxOldRevision is the max revision number among all old MSes.
// 3. If there's no existing new MS and createIfNotExisted is true, create one with appropriate revision number (maxOldRevision + 1) and replicas.
// Note that the machine-template-hash will be added to adopted MSes and machines.
func (r *Reconciler) getNewMachineSet(ctx context.Context, d *clusterv1.MachineDeployment, msList, oldMSs []*clusterv1.MachineSet, createIfNotExisted bool) (*clusterv1.MachineSet, error) {
func (r *Reconciler) getNewMachineSet(ctx context.Context, now *metav1.Time, d *clusterv1.MachineDeployment, msList, oldMSs []*clusterv1.MachineSet, createIfNotExisted bool) (*clusterv1.MachineSet, error) {
log := ctrl.LoggerFrom(ctx)

existingNewMS := mdutil.FindNewMachineSet(d, msList)
existingNewMS, err := mdutil.FindNewMachineSet(now, d, msList)
if err != nil {
return nil, err
}

// Calculate the max revision number among all old MSes
maxOldRevision := mdutil.MaxRevision(oldMSs, log)
Expand All @@ -116,7 +125,7 @@ func (r *Reconciler) getNewMachineSet(ctx context.Context, d *clusterv1.MachineD
}

// Set existing new machine set's annotation
annotationsUpdated := mdutil.SetNewMachineSetAnnotations(d, msCopy, newRevision, true, log)
annotationsUpdated := mdutil.SetNewMachineSetAnnotations(now, d, msCopy, newRevision, true, log)

minReadySecondsNeedsUpdate := msCopy.Spec.MinReadySeconds != *d.Spec.MinReadySeconds
deletePolicyNeedsUpdate := d.Spec.Strategy.RollingUpdate.DeletePolicy != nil && msCopy.Spec.DeletePolicy != *d.Spec.Strategy.RollingUpdate.DeletePolicy
Expand All @@ -141,19 +150,18 @@ func (r *Reconciler) getNewMachineSet(ctx context.Context, d *clusterv1.MachineD
return nil, nil
}

// new MachineSet does not exist, create one.
// New MachineSet does not exist, create one.
newMSTemplate := *d.Spec.Template.DeepCopy()
hash, err := mdutil.ComputeSpewHash(&newMSTemplate)
machineSetName, machineDeploymentHash, err := generateMachineSetName(d, now)
if err != nil {
return nil, err
}
machineTemplateSpecHash := fmt.Sprintf("%d", hash)
newMSTemplate.Labels = mdutil.CloneAndAddLabel(d.Spec.Template.Labels,
clusterv1.MachineDeploymentUniqueLabel, machineTemplateSpecHash)

newMSTemplate.Labels = mdutil.CloneAndAddLabel(d.Spec.Template.Labels, clusterv1.MachineDeploymentUniqueLabel, machineDeploymentHash)

// Add machineTemplateHash label to selector.
newMSSelector := mdutil.CloneSelectorAndAddLabel(&d.Spec.Selector,
clusterv1.MachineDeploymentUniqueLabel, machineTemplateSpecHash)
clusterv1.MachineDeploymentUniqueLabel, machineDeploymentHash)

minReadySeconds := int32(0)
if d.Spec.MinReadySeconds != nil {
Expand All @@ -164,7 +172,7 @@ func (r *Reconciler) getNewMachineSet(ctx context.Context, d *clusterv1.MachineD
newMS := clusterv1.MachineSet{
ObjectMeta: metav1.ObjectMeta{
// Make the name deterministic, to ensure idempotence
Name: d.Name + "-" + apirand.SafeEncodeString(machineTemplateSpecHash),
Name: machineSetName,
Namespace: d.Namespace,
Labels: newMSTemplate.Labels,
OwnerReferences: []metav1.OwnerReference{*metav1.NewControllerRef(d, machineDeploymentKind)},
Expand Down Expand Up @@ -207,7 +215,7 @@ func (r *Reconciler) getNewMachineSet(ctx context.Context, d *clusterv1.MachineD
*(newMS.Spec.Replicas) = newReplicasCount

// Set new machine set's annotation
mdutil.SetNewMachineSetAnnotations(d, &newMS, newRevision, false, log)
mdutil.SetNewMachineSetAnnotations(now, d, &newMS, newRevision, false, log)
// Create the new MachineSet. If it already exists, then we need to check for possible
// hash collisions. If there is any other error, we need to report it in the status of
// the Deployment.
Expand Down Expand Up @@ -254,6 +262,42 @@ func (r *Reconciler) getNewMachineSet(ctx context.Context, d *clusterv1.MachineD
return createdMS, err
}

func generateMachineSetName(d *clusterv1.MachineDeployment, now *metav1.Time) (string, string, error) {
Copy link
Member

@fabriziopandini fabriziopandini Sep 29, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hash is currently used as:

  • UID to identify machines belonging to the MS
  • Adding a unique suffix to the MS set name

Given that I'm really wondering if we should drop the current spew/hash logic and simply use a random string + a check that verifies that the random string is not already taken by an existing MS (for this MD). It seems that the code could be re-entrant also it this way and we can get rid of all this complex logic (*) ...

@vincepri @enxebre @sbueringer opinions?

(*) this could be a separated PR that we merge as precedence of this one

Copy link
Member

@sbueringer sbueringer Nov 29, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@fabriziopandini Sorry missed the mention somehow.

Sounds fine to me, assuming we can make this re-entrant (I didn't look at the code in detail to see how this would be achieved).

+100 to making this a separate PR independent of this work and the propagation work

Would be nice to get rid of the hash early in the v1.4 cycle to give us time to discover potential side effects

template := *d.Spec.Template.DeepCopy()
hash, err := mdutil.ComputeSpewHash(&template)
if err != nil {
return "", "", err
}
machineDeploymentHash := fmt.Sprintf("%d", hash)

if d.Spec.RolloutAfter != nil {
// If the reconciliation time is after spec.rolloutAfter then a rollout should happen or has already happened.
// We include the rolloutAfter hash into the MachineSet name so the first time that
// the reconciliation time is after spec.rolloutAfter then a new MachineSet is created with a name
// which does not clash with the one for the existing MachineSet with the same MachineDeployment template
// and the rollout is orchestrated as usual.
if now.After(d.Spec.RolloutAfter.Time) {
hasher := fnv.New32a()

templateAndRolloutAfter := struct {
Template *clusterv1.MachineTemplateSpec
RolloutAfter *metav1.Time
}{
Template: &template,
RolloutAfter: d.Spec.RolloutAfter,
}
if err := mdutil.SpewHashObject(hasher, templateAndRolloutAfter); err != nil {
return "", "", err
}
rolloutAfterHash := hasher.Sum32()
machineDeploymentHash = fmt.Sprintf("%d", rolloutAfterHash)
}
}
machineSetName := d.Name + "-" + apirand.SafeEncodeString(machineDeploymentHash)

return machineSetName, machineDeploymentHash, nil
}

// scale scales proportionally in order to mitigate risk. Otherwise, scaling up can increase the size
// of the new machine set and scaling down can decrease the sizes of the old ones, both of which would
// have the effect of hastening the rollout progress, which could produce a higher proportion of unavailable
Expand Down
Loading