From f1c55a582bf019409fe4ee1862bdb2011817e770 Mon Sep 17 00:00:00 2001 From: sbadiger Date: Mon, 1 Feb 2021 15:43:15 -0800 Subject: [PATCH 1/6] fix make vet errors. Signed-off-by: sbadiger --- api/v1alpha1/rollingupgrade_types.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/api/v1alpha1/rollingupgrade_types.go b/api/v1alpha1/rollingupgrade_types.go index 5804dc57..f7bb97a6 100644 --- a/api/v1alpha1/rollingupgrade_types.go +++ b/api/v1alpha1/rollingupgrade_types.go @@ -17,7 +17,6 @@ limitations under the License. package v1alpha1 import ( - "fmt" "strconv" "strings" @@ -238,7 +237,7 @@ func (r *RollingUpgrade) Validate() (bool, error) { // validating the Mode value if strategy.Mode == "" { r.Spec.Strategy.Mode = UpdateStrategyModeLazy - } else if !common.ContainsEqualFold(AllowedStrategyMode, strategy.Mode) { + } else if !common.ContainsEqualFold(AllowedStrategyMode, string(strategy.Mode)) { err := fmt.Errorf("%s: Invalid value for startegy Mode - %d", r.Name, strategy.MaxUnavailable.IntVal) return false, err } From 8f349a61a0a9b1eed4c5f7a9c3422600f5b17ef7 Mon Sep 17 00:00:00 2001 From: sbadiger Date: Mon, 1 Feb 2021 15:52:20 -0800 Subject: [PATCH 2/6] Terminate instances and run v2 for first time. Signed-off-by: sbadiger --- api/v1alpha1/rollingupgrade_types.go | 18 ++++++-- controllers/cloud.go | 1 + controllers/providers/aws/utils.go | 15 ++++++- controllers/rollingupgrade_controller.go | 26 ++++++++++-- controllers/upgrade.go | 54 +++++++++++++++++++++++- 5 files changed, 106 insertions(+), 8 deletions(-) diff --git a/api/v1alpha1/rollingupgrade_types.go b/api/v1alpha1/rollingupgrade_types.go index f7bb97a6..829551da 100644 --- a/api/v1alpha1/rollingupgrade_types.go +++ b/api/v1alpha1/rollingupgrade_types.go @@ -116,9 +116,21 @@ const ( ) var ( - FiniteStates = []string{StatusComplete, StatusError} - AllowedStrategyType = []string{string(RandomUpdateStrategy), string(UniformAcrossAzUpdateStrategy)} - AllowedStrategyMode = []string{string(UpdateStrategyModeLazy), string(UpdateStrategyModeEager)} + FiniteStates = []string{StatusComplete, StatusError} + AllowedStrategyType = []string{string(RandomUpdateStrategy), string(UniformAcrossAzUpdateStrategy)} + AllowedStrategyMode = []string{string(UpdateStrategyModeLazy), string(UpdateStrategyModeEager)} + ASGNonRunningInstanceStates = []string{Pending, PendingWait, PendingProceed, Terminating, TerminatingWait, TerminatingProceed} +) + +//ASG Instance states +const ( + InService = "InService" + Pending = "Pending" + PendingWait = "Pending:Wait" + PendingProceed = "Pending:Proceed" + Terminating = "Terminating" + TerminatingWait = "Terminating:Wait" + TerminatingProceed = "Terminating:Proceed" ) // RollingUpgradeCondition describes the state of the RollingUpgrade diff --git a/controllers/cloud.go b/controllers/cloud.go index 1bf779e4..81c06137 100644 --- a/controllers/cloud.go +++ b/controllers/cloud.go @@ -29,6 +29,7 @@ import ( var ( instanceStateTagKey = "upgrademgr.keikoproj.io/state" inProgressTagValue = "in-progress" + completedTagValue = "completed" ) type DiscoveredState struct { diff --git a/controllers/providers/aws/utils.go b/controllers/providers/aws/utils.go index f14bc819..ec89b296 100644 --- a/controllers/providers/aws/utils.go +++ b/controllers/providers/aws/utils.go @@ -96,7 +96,6 @@ func GetScalingAZs(instances []*autoscaling.Instance) []string { // return &autoscaling.Instance{} // } - // func ListScalingInstanceIDs(group *autoscaling.Group) []string { // instanceIDs := make([]string, 0) // for _, instance := range group.Instances { @@ -116,3 +115,17 @@ func GetTemplateLatestVersion(templates []*ec2.LaunchTemplate, templateName stri } return "0" } + +func TagEC2instance(instanceID, tagKey, tagValue string, client ec2iface.EC2API) error { + input := &ec2.CreateTagsInput{ + Resources: aws.StringSlice([]string{instanceID}), + Tags: []*ec2.Tag{ + { + Key: aws.String(tagKey), + Value: aws.String(tagValue), + }, + }, + } + _, err := client.CreateTags(input) + return err +} diff --git a/controllers/rollingupgrade_controller.go b/controllers/rollingupgrade_controller.go index 46f954df..8ea4b1a0 100644 --- a/controllers/rollingupgrade_controller.go +++ b/controllers/rollingupgrade_controller.go @@ -123,11 +123,11 @@ func (r *RollingUpgradeReconciler) Reconcile(ctx context.Context, req ctrl.Reque } r.Info("admitted new rollingupgrade", "name", req.NamespacedName, "scalingGroup", scalingGroupName) - r.AdmissionMap.Store(req.NamespacedName, scalingGroupName) + r.AdmissionMap.Store(rollingUpgrade.NamespacedName(), scalingGroupName) rollingUpgrade.SetCurrentStatus(v1alpha1.StatusInit) - discoveredState := NewDiscoveredState(r.Auth, r.Logger) - if err := discoveredState.Discover(); err != nil { + r.Cloud = NewDiscoveredState(r.Auth, r.Logger) + if err := r.Cloud.Discover(); err != nil { rollingUpgrade.SetCurrentStatus(v1alpha1.StatusError) return ctrl.Result{}, err } @@ -138,6 +138,11 @@ func (r *RollingUpgradeReconciler) Reconcile(ctx context.Context, req ctrl.Reque return ctrl.Result{}, err } + // check if all instances are rotated. + if r.IsRollingUpgradeCompleted(rollingUpgrade) { + rollingUpgrade.SetCurrentStatus(v1alpha1.StatusComplete) + } + return reconcile.Result{RequeueAfter: time.Second * 10}, nil } @@ -161,3 +166,18 @@ func (r *RollingUpgradeReconciler) UpdateStatus(rollingUpgrade *v1alpha1.Rolling r.Error(err, "failed to update status", "name", rollingUpgrade.NamespacedName()) } } + +func (r *RollingUpgradeReconciler) IsRollingUpgradeCompleted(rollingUpgrade *v1alpha1.RollingUpgrade) bool { + r.Info("checking if rolling upgrade is completed", "name", rollingUpgrade.NamespacedName()) + if err := r.Cloud.Discover(); err != nil { + rollingUpgrade.SetCurrentStatus(v1alpha1.StatusError) + return false + } + scalingGroup := awsprovider.SelectScalingGroup(rollingUpgrade.ScalingGroupName(), r.Cloud.ScalingGroups) + for _, instance := range scalingGroup.Instances { + if r.IsInstanceDrifted(rollingUpgrade, instance) { + return false + } + } + return true +} diff --git a/controllers/upgrade.go b/controllers/upgrade.go index 5dd7ac33..30cc1d4a 100644 --- a/controllers/upgrade.go +++ b/controllers/upgrade.go @@ -23,6 +23,7 @@ import ( "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/autoscaling" "github.com/keikoproj/upgrade-manager/api/v1alpha1" + "github.com/keikoproj/upgrade-manager/controllers/common" awsprovider "github.com/keikoproj/upgrade-manager/controllers/providers/aws" kubeprovider "github.com/keikoproj/upgrade-manager/controllers/providers/kubernetes" "k8s.io/apimachinery/pkg/util/intstr" @@ -64,7 +65,9 @@ func (r *RollingUpgradeReconciler) RotateNodes(rollingUpgrade *v1alpha1.RollingU rollingUpgrade.SetTotalNodes(len(scalingGroup.Instances)) rotationTargets := r.SelectTargets(rollingUpgrade, scalingGroup) - r.ReplaceNodeBatch(rollingUpgrade, rotationTargets) + if ok, err := r.ReplaceNodeBatch(rollingUpgrade, rotationTargets); !ok { + return err + } return nil } @@ -79,6 +82,7 @@ func (r *RollingUpgradeReconciler) ReplaceNodeBatch(rollingUpgrade *v1alpha1.Rol for _, target := range batch { _ = target // Add in-progress tag + r.SetInstanceTag(rollingUpgrade, target, instanceStateTagKey, inProgressTagValue) // Standby @@ -97,6 +101,10 @@ func (r *RollingUpgradeReconciler) ReplaceNodeBatch(rollingUpgrade *v1alpha1.Rol // Is drained? // Terminate - set lastTerminateTime + if ok := r.TerminateNodeInstances(rollingUpgrade, target); !ok { + return true, nil + } + r.SetInstanceTag(rollingUpgrade, target, instanceStateTagKey, completedTagValue) } case v1alpha1.UpdateStrategyModeLazy: for _, target := range batch { @@ -113,6 +121,44 @@ func (r *RollingUpgradeReconciler) ReplaceNodeBatch(rollingUpgrade *v1alpha1.Rol return true, nil } +func (r *RollingUpgradeReconciler) SetInstanceTag(rollingUpgrade *v1alpha1.RollingUpgrade, instance *autoscaling.Instance, tagKey string, tagVal string) error { + return awsprovider.TagEC2instance(aws.StringValue(instance.InstanceId), tagKey, tagVal, r.Auth.Ec2Client) +} + +func (r *RollingUpgradeReconciler) CanTerminate(instance *autoscaling.Instance) bool { + instances, err := r.Auth.DescribeTaggedInstanceIDs(instanceStateTagKey, completedTagValue) + if err != nil { + r.Error(err, "unable to get tagged instances") + //Exit. + } + + instanceID := aws.StringValue(instance.InstanceId) + if common.ContainsEqualFold(instances, instanceID) { + return false + } + + if common.ContainsEqualFold(v1alpha1.ASGNonRunningInstanceStates, aws.StringValue(instance.LifecycleState)) { + return false + } + return true +} + +func (r *RollingUpgradeReconciler) TerminateNodeInstances(rollingUpgrade *v1alpha1.RollingUpgrade, instance *autoscaling.Instance) bool { + instanceID := aws.StringValue(instance.InstanceId) + input := &autoscaling.TerminateInstanceInAutoScalingGroupInput{ + InstanceId: aws.String(instanceID), + ShouldDecrementDesiredCapacity: aws.Bool(false), + } + + r.Info("Terminating node-instance", "instance", instanceID) + _, err := r.Auth.AsgClient.TerminateInstanceInAutoScalingGroup(input) + if err != nil { + r.Error(err, "Failed to terminate instance", "name", rollingUpgrade.NamespacedName(), "instance", instanceID) + return false + } + return true +} + func (r *RollingUpgradeReconciler) SelectTargets(rollingUpgrade *v1alpha1.RollingUpgrade, scalingGroup *autoscaling.Group) []*autoscaling.Instance { var ( batchSize = rollingUpgrade.MaxUnavailable() @@ -144,6 +190,9 @@ func (r *RollingUpgradeReconciler) SelectTargets(rollingUpgrade *v1alpha1.Rollin if rollingUpgrade.UpdateStrategyType() == v1alpha1.RandomUpdateStrategy { for _, instance := range scalingGroup.Instances { if r.IsInstanceDrifted(rollingUpgrade, instance) { + if !r.CanTerminate(instance) { + continue + } targets = append(targets, instance) } } @@ -155,6 +204,9 @@ func (r *RollingUpgradeReconciler) SelectTargets(rollingUpgrade *v1alpha1.Rollin } else if rollingUpgrade.UpdateStrategyType() == v1alpha1.UniformAcrossAzUpdateStrategy { for _, instance := range scalingGroup.Instances { if r.IsInstanceDrifted(rollingUpgrade, instance) { + if !r.CanTerminate(instance) { + continue + } targets = append(targets, instance) } } From 21882165c522ebe83c7cc21b65608f5fa9a208e7 Mon Sep 17 00:00:00 2001 From: sbadiger Date: Wed, 3 Feb 2021 13:30:19 -0800 Subject: [PATCH 3/6] Addressing review comments Signed-off-by: sbadiger --- api/v1alpha1/rollingupgrade_types.go | 24 ++++------ controllers/providers/aws/autoscaling.go | 14 ++++++ controllers/providers/aws/ec2.go | 14 ++++++ controllers/providers/aws/utils.go | 14 ------ controllers/rollingupgrade_controller.go | 20 -------- controllers/upgrade.go | 61 +++++++++--------------- 6 files changed, 60 insertions(+), 87 deletions(-) diff --git a/api/v1alpha1/rollingupgrade_types.go b/api/v1alpha1/rollingupgrade_types.go index 829551da..5c7eb7cf 100644 --- a/api/v1alpha1/rollingupgrade_types.go +++ b/api/v1alpha1/rollingupgrade_types.go @@ -21,6 +21,7 @@ import ( "strconv" "strings" + "github.com/aws/aws-sdk-go/service/autoscaling" "github.com/keikoproj/upgrade-manager/controllers/common" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -116,21 +117,14 @@ const ( ) var ( - FiniteStates = []string{StatusComplete, StatusError} - AllowedStrategyType = []string{string(RandomUpdateStrategy), string(UniformAcrossAzUpdateStrategy)} - AllowedStrategyMode = []string{string(UpdateStrategyModeLazy), string(UpdateStrategyModeEager)} - ASGNonRunningInstanceStates = []string{Pending, PendingWait, PendingProceed, Terminating, TerminatingWait, TerminatingProceed} -) - -//ASG Instance states -const ( - InService = "InService" - Pending = "Pending" - PendingWait = "Pending:Wait" - PendingProceed = "Pending:Proceed" - Terminating = "Terminating" - TerminatingWait = "Terminating:Wait" - TerminatingProceed = "Terminating:Proceed" + FiniteStates = []string{StatusComplete, StatusError} + AllowedStrategyType = []string{string(RandomUpdateStrategy), string(UniformAcrossAzUpdateStrategy)} + AllowedStrategyMode = []string{string(UpdateStrategyModeLazy), string(UpdateStrategyModeEager)} + ASGTerminatingInstanceStates = []string{ + autoscaling.LifecycleStateTerminating, + autoscaling.LifecycleStateTerminatingWait, + autoscaling.LifecycleStateTerminatingProceed, + } ) // RollingUpgradeCondition describes the state of the RollingUpgrade diff --git a/controllers/providers/aws/autoscaling.go b/controllers/providers/aws/autoscaling.go index 2a49c6bc..90d3b2a6 100644 --- a/controllers/providers/aws/autoscaling.go +++ b/controllers/providers/aws/autoscaling.go @@ -17,6 +17,7 @@ limitations under the License. package aws import ( + "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/autoscaling" ) @@ -31,3 +32,16 @@ func (a *AmazonClientSet) DescribeScalingGroups() ([]*autoscaling.Group, error) } return scalingGroups, nil } + +func (a *AmazonClientSet) TerminateInstance(instance *autoscaling.Instance) error { + instanceID := aws.StringValue(instance.InstanceId) + input := &autoscaling.TerminateInstanceInAutoScalingGroupInput{ + InstanceId: aws.String(instanceID), + ShouldDecrementDesiredCapacity: aws.Bool(false), + } + + if _, err := a.AsgClient.TerminateInstanceInAutoScalingGroup(input); err != nil { + return err + } + return nil +} diff --git a/controllers/providers/aws/ec2.go b/controllers/providers/aws/ec2.go index f7c7644a..c0fde6a8 100644 --- a/controllers/providers/aws/ec2.go +++ b/controllers/providers/aws/ec2.go @@ -57,3 +57,17 @@ func (a *AmazonClientSet) DescribeTaggedInstanceIDs(tagKey, tagValue string) ([] }) return instances, err } + +func (a *AmazonClientSet) TagEC2instance(instanceID, tagKey, tagValue string) error { + input := &ec2.CreateTagsInput{ + Resources: aws.StringSlice([]string{instanceID}), + Tags: []*ec2.Tag{ + { + Key: aws.String(tagKey), + Value: aws.String(tagValue), + }, + }, + } + _, err := a.Ec2Client.CreateTags(input) + return err +} diff --git a/controllers/providers/aws/utils.go b/controllers/providers/aws/utils.go index ec89b296..95d22402 100644 --- a/controllers/providers/aws/utils.go +++ b/controllers/providers/aws/utils.go @@ -115,17 +115,3 @@ func GetTemplateLatestVersion(templates []*ec2.LaunchTemplate, templateName stri } return "0" } - -func TagEC2instance(instanceID, tagKey, tagValue string, client ec2iface.EC2API) error { - input := &ec2.CreateTagsInput{ - Resources: aws.StringSlice([]string{instanceID}), - Tags: []*ec2.Tag{ - { - Key: aws.String(tagKey), - Value: aws.String(tagValue), - }, - }, - } - _, err := client.CreateTags(input) - return err -} diff --git a/controllers/rollingupgrade_controller.go b/controllers/rollingupgrade_controller.go index 8ea4b1a0..2ae22aa2 100644 --- a/controllers/rollingupgrade_controller.go +++ b/controllers/rollingupgrade_controller.go @@ -138,11 +138,6 @@ func (r *RollingUpgradeReconciler) Reconcile(ctx context.Context, req ctrl.Reque return ctrl.Result{}, err } - // check if all instances are rotated. - if r.IsRollingUpgradeCompleted(rollingUpgrade) { - rollingUpgrade.SetCurrentStatus(v1alpha1.StatusComplete) - } - return reconcile.Result{RequeueAfter: time.Second * 10}, nil } @@ -166,18 +161,3 @@ func (r *RollingUpgradeReconciler) UpdateStatus(rollingUpgrade *v1alpha1.Rolling r.Error(err, "failed to update status", "name", rollingUpgrade.NamespacedName()) } } - -func (r *RollingUpgradeReconciler) IsRollingUpgradeCompleted(rollingUpgrade *v1alpha1.RollingUpgrade) bool { - r.Info("checking if rolling upgrade is completed", "name", rollingUpgrade.NamespacedName()) - if err := r.Cloud.Discover(); err != nil { - rollingUpgrade.SetCurrentStatus(v1alpha1.StatusError) - return false - } - scalingGroup := awsprovider.SelectScalingGroup(rollingUpgrade.ScalingGroupName(), r.Cloud.ScalingGroups) - for _, instance := range scalingGroup.Instances { - if r.IsInstanceDrifted(rollingUpgrade, instance) { - return false - } - } - return true -} diff --git a/controllers/upgrade.go b/controllers/upgrade.go index 30cc1d4a..22978f4a 100644 --- a/controllers/upgrade.go +++ b/controllers/upgrade.go @@ -64,6 +64,13 @@ func (r *RollingUpgradeReconciler) RotateNodes(rollingUpgrade *v1alpha1.RollingU ) rollingUpgrade.SetTotalNodes(len(scalingGroup.Instances)) + + // check if all instances are rotated. + if r.IsScalingGroupDrifted(rollingUpgrade) { + rollingUpgrade.SetCurrentStatus(v1alpha1.StatusComplete) + return nil + } + rotationTargets := r.SelectTargets(rollingUpgrade, scalingGroup) if ok, err := r.ReplaceNodeBatch(rollingUpgrade, rotationTargets); !ok { return err @@ -101,7 +108,8 @@ func (r *RollingUpgradeReconciler) ReplaceNodeBatch(rollingUpgrade *v1alpha1.Rol // Is drained? // Terminate - set lastTerminateTime - if ok := r.TerminateNodeInstances(rollingUpgrade, target); !ok { + if err := r.Auth.TerminateInstance(target); err != nil { + r.Info("Instance termination failed. Will retry in next reconcile", "name", rollingUpgrade.NamespacedName(), "instance", aws.StringValue(target.InstanceId)) return true, nil } r.SetInstanceTag(rollingUpgrade, target, instanceStateTagKey, completedTagValue) @@ -122,41 +130,7 @@ func (r *RollingUpgradeReconciler) ReplaceNodeBatch(rollingUpgrade *v1alpha1.Rol } func (r *RollingUpgradeReconciler) SetInstanceTag(rollingUpgrade *v1alpha1.RollingUpgrade, instance *autoscaling.Instance, tagKey string, tagVal string) error { - return awsprovider.TagEC2instance(aws.StringValue(instance.InstanceId), tagKey, tagVal, r.Auth.Ec2Client) -} - -func (r *RollingUpgradeReconciler) CanTerminate(instance *autoscaling.Instance) bool { - instances, err := r.Auth.DescribeTaggedInstanceIDs(instanceStateTagKey, completedTagValue) - if err != nil { - r.Error(err, "unable to get tagged instances") - //Exit. - } - - instanceID := aws.StringValue(instance.InstanceId) - if common.ContainsEqualFold(instances, instanceID) { - return false - } - - if common.ContainsEqualFold(v1alpha1.ASGNonRunningInstanceStates, aws.StringValue(instance.LifecycleState)) { - return false - } - return true -} - -func (r *RollingUpgradeReconciler) TerminateNodeInstances(rollingUpgrade *v1alpha1.RollingUpgrade, instance *autoscaling.Instance) bool { - instanceID := aws.StringValue(instance.InstanceId) - input := &autoscaling.TerminateInstanceInAutoScalingGroupInput{ - InstanceId: aws.String(instanceID), - ShouldDecrementDesiredCapacity: aws.Bool(false), - } - - r.Info("Terminating node-instance", "instance", instanceID) - _, err := r.Auth.AsgClient.TerminateInstanceInAutoScalingGroup(input) - if err != nil { - r.Error(err, "Failed to terminate instance", "name", rollingUpgrade.NamespacedName(), "instance", instanceID) - return false - } - return true + return r.Auth.TagEC2instance(aws.StringValue(instance.InstanceId), tagKey, tagVal) } func (r *RollingUpgradeReconciler) SelectTargets(rollingUpgrade *v1alpha1.RollingUpgrade, scalingGroup *autoscaling.Group) []*autoscaling.Instance { @@ -190,7 +164,7 @@ func (r *RollingUpgradeReconciler) SelectTargets(rollingUpgrade *v1alpha1.Rollin if rollingUpgrade.UpdateStrategyType() == v1alpha1.RandomUpdateStrategy { for _, instance := range scalingGroup.Instances { if r.IsInstanceDrifted(rollingUpgrade, instance) { - if !r.CanTerminate(instance) { + if common.ContainsEqualFold(v1alpha1.ASGTerminatingInstanceStates, aws.StringValue(instance.LifecycleState)) { continue } targets = append(targets, instance) @@ -204,7 +178,7 @@ func (r *RollingUpgradeReconciler) SelectTargets(rollingUpgrade *v1alpha1.Rollin } else if rollingUpgrade.UpdateStrategyType() == v1alpha1.UniformAcrossAzUpdateStrategy { for _, instance := range scalingGroup.Instances { if r.IsInstanceDrifted(rollingUpgrade, instance) { - if !r.CanTerminate(instance) { + if common.ContainsEqualFold(v1alpha1.ASGTerminatingInstanceStates, aws.StringValue(instance.LifecycleState)) { continue } targets = append(targets, instance) @@ -309,3 +283,14 @@ func (r *RollingUpgradeReconciler) IsInstanceDrifted(rollingUpgrade *v1alpha1.Ro r.Info("node refresh not required", "name", rollingUpgrade.NamespacedName(), "instance", instanceID) return false } + +func (r *RollingUpgradeReconciler) IsScalingGroupDrifted(rollingUpgrade *v1alpha1.RollingUpgrade) bool { + r.Info("checking if rolling upgrade is completed", "name", rollingUpgrade.NamespacedName()) + scalingGroup := awsprovider.SelectScalingGroup(rollingUpgrade.ScalingGroupName(), r.Cloud.ScalingGroups) + for _, instance := range scalingGroup.Instances { + if r.IsInstanceDrifted(rollingUpgrade, instance) { + return false + } + } + return true +} From 722f48d29ca2625071d89d8e12923877a4bb4931 Mon Sep 17 00:00:00 2001 From: sbadiger Date: Wed, 3 Feb 2021 15:07:19 -0800 Subject: [PATCH 4/6] addressing more review comments Signed-off-by: sbadiger --- api/v1alpha1/rollingupgrade_types.go | 12 +++--------- controllers/cloud.go | 1 - controllers/providers/aws/autoscaling.go | 8 ++++++++ controllers/upgrade.go | 19 ++++++------------- 4 files changed, 17 insertions(+), 23 deletions(-) diff --git a/api/v1alpha1/rollingupgrade_types.go b/api/v1alpha1/rollingupgrade_types.go index 5c7eb7cf..f7bb97a6 100644 --- a/api/v1alpha1/rollingupgrade_types.go +++ b/api/v1alpha1/rollingupgrade_types.go @@ -21,7 +21,6 @@ import ( "strconv" "strings" - "github.com/aws/aws-sdk-go/service/autoscaling" "github.com/keikoproj/upgrade-manager/controllers/common" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -117,14 +116,9 @@ const ( ) var ( - FiniteStates = []string{StatusComplete, StatusError} - AllowedStrategyType = []string{string(RandomUpdateStrategy), string(UniformAcrossAzUpdateStrategy)} - AllowedStrategyMode = []string{string(UpdateStrategyModeLazy), string(UpdateStrategyModeEager)} - ASGTerminatingInstanceStates = []string{ - autoscaling.LifecycleStateTerminating, - autoscaling.LifecycleStateTerminatingWait, - autoscaling.LifecycleStateTerminatingProceed, - } + FiniteStates = []string{StatusComplete, StatusError} + AllowedStrategyType = []string{string(RandomUpdateStrategy), string(UniformAcrossAzUpdateStrategy)} + AllowedStrategyMode = []string{string(UpdateStrategyModeLazy), string(UpdateStrategyModeEager)} ) // RollingUpgradeCondition describes the state of the RollingUpgrade diff --git a/controllers/cloud.go b/controllers/cloud.go index 81c06137..1bf779e4 100644 --- a/controllers/cloud.go +++ b/controllers/cloud.go @@ -29,7 +29,6 @@ import ( var ( instanceStateTagKey = "upgrademgr.keikoproj.io/state" inProgressTagValue = "in-progress" - completedTagValue = "completed" ) type DiscoveredState struct { diff --git a/controllers/providers/aws/autoscaling.go b/controllers/providers/aws/autoscaling.go index 90d3b2a6..d578f1c8 100644 --- a/controllers/providers/aws/autoscaling.go +++ b/controllers/providers/aws/autoscaling.go @@ -21,6 +21,14 @@ import ( "github.com/aws/aws-sdk-go/service/autoscaling" ) +var ( + TerminatingInstanceStates = []string{ + autoscaling.LifecycleStateTerminating, + autoscaling.LifecycleStateTerminatingWait, + autoscaling.LifecycleStateTerminatingProceed, + } +) + func (a *AmazonClientSet) DescribeScalingGroups() ([]*autoscaling.Group, error) { scalingGroups := []*autoscaling.Group{} err := a.AsgClient.DescribeAutoScalingGroupsPages(&autoscaling.DescribeAutoScalingGroupsInput{}, func(page *autoscaling.DescribeAutoScalingGroupsOutput, lastPage bool) bool { diff --git a/controllers/upgrade.go b/controllers/upgrade.go index 22978f4a..d7a2eddc 100644 --- a/controllers/upgrade.go +++ b/controllers/upgrade.go @@ -89,7 +89,7 @@ func (r *RollingUpgradeReconciler) ReplaceNodeBatch(rollingUpgrade *v1alpha1.Rol for _, target := range batch { _ = target // Add in-progress tag - r.SetInstanceTag(rollingUpgrade, target, instanceStateTagKey, inProgressTagValue) + r.Auth.TagEC2instance(aws.StringValue(target.InstanceId), instanceStateTagKey, inProgressTagValue) // Standby @@ -109,10 +109,9 @@ func (r *RollingUpgradeReconciler) ReplaceNodeBatch(rollingUpgrade *v1alpha1.Rol // Terminate - set lastTerminateTime if err := r.Auth.TerminateInstance(target); err != nil { - r.Info("Instance termination failed. Will retry in next reconcile", "name", rollingUpgrade.NamespacedName(), "instance", aws.StringValue(target.InstanceId)) + r.Info("failed to terminate instance", "name", rollingUpgrade.NamespacedName(), "instance", aws.StringValue(target.InstanceId)) return true, nil } - r.SetInstanceTag(rollingUpgrade, target, instanceStateTagKey, completedTagValue) } case v1alpha1.UpdateStrategyModeLazy: for _, target := range batch { @@ -129,10 +128,6 @@ func (r *RollingUpgradeReconciler) ReplaceNodeBatch(rollingUpgrade *v1alpha1.Rol return true, nil } -func (r *RollingUpgradeReconciler) SetInstanceTag(rollingUpgrade *v1alpha1.RollingUpgrade, instance *autoscaling.Instance, tagKey string, tagVal string) error { - return r.Auth.TagEC2instance(aws.StringValue(instance.InstanceId), tagKey, tagVal) -} - func (r *RollingUpgradeReconciler) SelectTargets(rollingUpgrade *v1alpha1.RollingUpgrade, scalingGroup *autoscaling.Group) []*autoscaling.Instance { var ( batchSize = rollingUpgrade.MaxUnavailable() @@ -164,9 +159,6 @@ func (r *RollingUpgradeReconciler) SelectTargets(rollingUpgrade *v1alpha1.Rollin if rollingUpgrade.UpdateStrategyType() == v1alpha1.RandomUpdateStrategy { for _, instance := range scalingGroup.Instances { if r.IsInstanceDrifted(rollingUpgrade, instance) { - if common.ContainsEqualFold(v1alpha1.ASGTerminatingInstanceStates, aws.StringValue(instance.LifecycleState)) { - continue - } targets = append(targets, instance) } } @@ -178,9 +170,6 @@ func (r *RollingUpgradeReconciler) SelectTargets(rollingUpgrade *v1alpha1.Rollin } else if rollingUpgrade.UpdateStrategyType() == v1alpha1.UniformAcrossAzUpdateStrategy { for _, instance := range scalingGroup.Instances { if r.IsInstanceDrifted(rollingUpgrade, instance) { - if common.ContainsEqualFold(v1alpha1.ASGTerminatingInstanceStates, aws.StringValue(instance.LifecycleState)) { - continue - } targets = append(targets, instance) } } @@ -213,6 +202,10 @@ func (r *RollingUpgradeReconciler) IsInstanceDrifted(rollingUpgrade *v1alpha1.Ro instanceID = aws.StringValue(instance.InstanceId) ) + // if an instance is in terminating state, ignore. + if common.ContainsEqualFold(awsprovider.TerminatingInstanceStates, aws.StringValue(instance.LifecycleState)) { + return false + } // check if there is atleast one node that meets the force-referesh criteria if rollingUpgrade.IsForceRefresh() { var ( From ec9c6a2973fd809d4de4586a3fc29b86637b3b65 Mon Sep 17 00:00:00 2001 From: sbadiger Date: Wed, 3 Feb 2021 15:14:49 -0800 Subject: [PATCH 5/6] Log error message Signed-off-by: sbadiger --- controllers/upgrade.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/controllers/upgrade.go b/controllers/upgrade.go index d7a2eddc..35849a01 100644 --- a/controllers/upgrade.go +++ b/controllers/upgrade.go @@ -109,7 +109,7 @@ func (r *RollingUpgradeReconciler) ReplaceNodeBatch(rollingUpgrade *v1alpha1.Rol // Terminate - set lastTerminateTime if err := r.Auth.TerminateInstance(target); err != nil { - r.Info("failed to terminate instance", "name", rollingUpgrade.NamespacedName(), "instance", aws.StringValue(target.InstanceId)) + r.Error(err, "failed to terminate instance", "name", rollingUpgrade.NamespacedName(), "instance", aws.StringValue(target.InstanceId)) return true, nil } } From c34797b8c154f0678877ddcd6adaf9574310f1b2 Mon Sep 17 00:00:00 2001 From: sbadiger Date: Wed, 3 Feb 2021 15:23:09 -0800 Subject: [PATCH 6/6] error handling for instance tagging Signed-off-by: sbadiger --- controllers/upgrade.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/controllers/upgrade.go b/controllers/upgrade.go index 35849a01..f23b8d46 100644 --- a/controllers/upgrade.go +++ b/controllers/upgrade.go @@ -89,7 +89,9 @@ func (r *RollingUpgradeReconciler) ReplaceNodeBatch(rollingUpgrade *v1alpha1.Rol for _, target := range batch { _ = target // Add in-progress tag - r.Auth.TagEC2instance(aws.StringValue(target.InstanceId), instanceStateTagKey, inProgressTagValue) + if err := r.Auth.TagEC2instance(aws.StringValue(target.InstanceId), instanceStateTagKey, inProgressTagValue); err != nil { + r.Error(err, "failed to set instance tag", "name", rollingUpgrade.NamespacedName(), "instance", aws.StringValue(target.InstanceId)) + } // Standby @@ -109,7 +111,7 @@ func (r *RollingUpgradeReconciler) ReplaceNodeBatch(rollingUpgrade *v1alpha1.Rol // Terminate - set lastTerminateTime if err := r.Auth.TerminateInstance(target); err != nil { - r.Error(err, "failed to terminate instance", "name", rollingUpgrade.NamespacedName(), "instance", aws.StringValue(target.InstanceId)) + r.Info("failed to terminate instance", "name", rollingUpgrade.NamespacedName(), "instance", aws.StringValue(target.InstanceId), "message", err) return true, nil } }