Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ignoreDrainFailure and DrainTimeout as controller arguments #300

Merged
merged 1 commit into from
Aug 17, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions controllers/common/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,10 @@ func ContainsEqualFold(slice []string, s string) bool {
}
return false
}

func IntMax(a, b int) int {
if a > b {
return a
}
return b
}
27 changes: 16 additions & 11 deletions controllers/rollingupgrade_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,17 +42,19 @@ import (
type RollingUpgradeReconciler struct {
client.Client
logr.Logger
Scheme *runtime.Scheme
AdmissionMap sync.Map
CacheConfig *cache.Config
EventWriter *kubeprovider.EventWriter
maxParallel int
ScriptRunner ScriptRunner
Auth *RollingUpgradeAuthenticator
DrainGroupMapper *sync.Map
DrainErrorMapper *sync.Map
ClusterNodesMap *sync.Map
ReconcileMap *sync.Map
Scheme *runtime.Scheme
AdmissionMap sync.Map
CacheConfig *cache.Config
EventWriter *kubeprovider.EventWriter
maxParallel int
ScriptRunner ScriptRunner
Auth *RollingUpgradeAuthenticator
DrainGroupMapper *sync.Map
DrainErrorMapper *sync.Map
ClusterNodesMap *sync.Map
ReconcileMap *sync.Map
DrainTimeout int
IgnoreDrainFailures bool
}

// RollingUpgradeAuthenticator has the clients for providers
Expand Down Expand Up @@ -166,6 +168,9 @@ func (r *RollingUpgradeReconciler) Reconcile(ctx context.Context, req ctrl.Reque
c.ClusterNodes = r.getClusterNodes()
return c
}(),

DrainTimeout: r.DrainTimeout,
IgnoreDrainFailures: r.IgnoreDrainFailures,
}

// process node rotation
Expand Down
13 changes: 9 additions & 4 deletions controllers/upgrade.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ type RollingUpgradeContext struct {
RollingUpgrade *v1alpha1.RollingUpgrade
DrainManager *DrainManager
metricsMutex *sync.Mutex

DrainTimeout int
IgnoreDrainFailures bool
}

func (r *RollingUpgradeContext) RotateNodes() error {
Expand Down Expand Up @@ -114,7 +117,8 @@ func (r *RollingUpgradeContext) RotateNodes() error {

func (r *RollingUpgradeContext) ReplaceNodeBatch(batch []*autoscaling.Instance) (bool, error) {
var (
mode = r.RollingUpgrade.StrategyMode()
mode = r.RollingUpgrade.StrategyMode()
drainTimeout = common.IntMax(r.DrainTimeout, r.RollingUpgrade.DrainTimeout())
)

r.Info("rotating batch", "instances", awsprovider.GetInstanceIDs(batch), "name", r.RollingUpgrade.NamespacedName())
Expand Down Expand Up @@ -267,10 +271,11 @@ func (r *RollingUpgradeContext) ReplaceNodeBatch(batch []*autoscaling.Instance)
// Turns onto NodeRotationDrain
r.NodeStep(inProcessingNodes, nodeSteps, r.RollingUpgrade.Spec.AsgName, nodeName, v1alpha1.NodeRotationDrain)

if err := r.Auth.DrainNode(node, time.Duration(r.RollingUpgrade.PostDrainDelaySeconds()), r.RollingUpgrade.DrainTimeout(), r.Auth.Kubernetes); err != nil {
if !r.RollingUpgrade.IsIgnoreDrainFailures() {
if err := r.Auth.DrainNode(node, time.Duration(r.RollingUpgrade.PostDrainDelaySeconds()), drainTimeout, r.Auth.Kubernetes); err != nil {
// ignore drain failures if either of spec or controller args have set ignoreDrainFailures to true.
if !r.RollingUpgrade.IsIgnoreDrainFailures() && !r.IgnoreDrainFailures {
r.DrainManager.DrainErrors <- errors.Errorf("DrainNode failed: instanceID - %v, %v", instanceID, err.Error())
//TODO: BREAK AFTER ERRORS?
return
}
}
}
Expand Down
7 changes: 7 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,14 @@ func main() {
maxParallel int
maxAPIRetries int
debugMode bool
drainTimeout int
ignoreDrainFailures bool
logMode string
)

flag.BoolVar(&debugMode, "debug", false, "enable debug logging")
flag.IntVar(&drainTimeout, "drain-timeout", 900, "when the drain command should timeout")
flag.BoolVar(&ignoreDrainFailures, "ignore-drain-failures", false, "proceed with instance termination despite drain failures.")
flag.StringVar(&logMode, "log-format", "text", "Log mode: supported values: text, json.")
flag.StringVar(&metricsAddr, "metrics-addr", ":8080", "The address the metric endpoint binds to.")
flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")
Expand Down Expand Up @@ -201,6 +205,9 @@ func main() {
DrainErrorMapper: &sync.Map{},
ClusterNodesMap: &sync.Map{},
ReconcileMap: &sync.Map{},

DrainTimeout: drainTimeout,
IgnoreDrainFailures: ignoreDrainFailures,
}

reconciler.SetMaxParallel(maxParallel)
Expand Down