Skip to content

Commit

Permalink
Set default DefaultAbortScaleDownDelaySeconds to 30s
Browse files Browse the repository at this point in the history
- Fix bg e2e: scaledown aborted pod in 30sec; so check the delay
  annotation
- canary e2e test: canay pod is kept running when
  abortScaleDownDelaySeconds=0

Signed-off-by: Hui Kang <[email protected]>
  • Loading branch information
Hui Kang committed Jul 17, 2021
1 parent 42aa2d9 commit 549fc14
Show file tree
Hide file tree
Showing 11 changed files with 178 additions and 20 deletions.
6 changes: 4 additions & 2 deletions docs/features/scaledown-aborted-rs.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
# Scaledown New Replicaset on Aborted Rollout

Upon an abort, we should scale down the new replicaset for all strategies. Users can then choose to leave the new replicaset scaled up indefinitely by setting abortScaleDownDelaySeconds to 0, or adjust the value to something larger (or smaller).
Upon an aborted update, we may scale down the new replicaset for all strategies. Users can then choose to leave the new replicaset scaled up indefinitely by setting abortScaleDownDelaySeconds to 0, or adjust the value to something larger (or smaller).

`abortScaleDownDelaySeconds = nil` is the default.
The following table summarizes the behavior under combinations of rollout strategy and `abortScaleDownDelaySeconds`. Note that `abortScaleDownDelaySeconds` is not applicable to argo-rollouts v1.0.
`abortScaleDownDelaySeconds = nil` is the default, which means in v1.1 across all rollout strategies, the new replicaset
is scaled down in 30 seconds on abort by default.

| strategy | v1.0 behavior | abortScaleDownDelaySeconds | v1.1 behavior |
|--------------------------------------------:|:-----------------------------:|:--------------------------:|:-----------------------------:|
Expand Down
12 changes: 6 additions & 6 deletions docs/features/specification.md
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,8 @@ spec:
# down. Defaults to nil
scaleDownDelayRevisionLimit: 2

# Adds a delay before scaling down the preview replicaset if update is
# aborted. Default is 0, meaning preview replicaset won't be scaled down.
# Add a delay in second before scaling down the preview replicaset
# if update is aborted. 0 means not to scale down. Default is 30 second
abortScaleDownDelaySeconds: 30

# Anti Affinity configuration between desired and previous ReplicaSet.
Expand Down Expand Up @@ -299,10 +299,10 @@ spec:
rootService: root-svc # optional
trafficSplitName: rollout-example-traffic-split # optional

# Add a delay before scaling down the canary pods when update
# is aborted for canary strategy using replicas of setCanaryScale.
# Default is 0, meaning canary pods are not scaled down.
AbortScaleDownDelaySeconds: 30
# Add a delay in second before scaling down the canary pods when update
# is aborted for canary strategy with traffic routing (not applicable for basic canary).
# 0 means canary pods are not scaled down. Default is 30 seconds.
abortScaleDownDelaySeconds: 30

status:
pauseConditions:
Expand Down
4 changes: 2 additions & 2 deletions pkg/apiclient/rollout/rollout.swagger.json
Original file line number Diff line number Diff line change
Expand Up @@ -666,7 +666,7 @@
"abortScaleDownDelaySeconds": {
"type": "integer",
"format": "int32",
"title": "AbortScaleDownDelaySeconds adds a delay before scaling down the preview replicaset\nif update is aborted. Default is 0, meaning preview replicaset is not scaled down.\n+optional"
"title": "AbortScaleDownDelaySeconds adds a delay in second before scaling down the preview replicaset\nif update is aborted. 0 means not to scale down.\nDefault is 30 second\n+optional"
}
},
"title": "BlueGreenStrategy defines parameters for Blue Green deployment"
Expand Down Expand Up @@ -775,7 +775,7 @@
"abortScaleDownDelaySeconds": {
"type": "integer",
"format": "int32",
"title": "AbortScaleDownDelaySeconds adds a delay before scaling down the canary pods when update\nis aborted for canary strategy with traffic routing (not applicable for basic canary).\nDefault is 0, meaning canary pods are not scaled down.\n+optional"
"title": "AbortScaleDownDelaySeconds adds a delay in second before scaling down the canary pods when update\nis aborted for canary strategy with traffic routing (not applicable for basic canary).\n0 means canary pods are not scaled down.\nDefault is 30 seconds.\n+optional"
}
},
"title": "CanaryStrategy defines parameters for a Replica Based Canary"
Expand Down
10 changes: 6 additions & 4 deletions pkg/apis/rollouts/v1alpha1/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -202,8 +202,9 @@ type BlueGreenStrategy struct {
// ActiveMetadata specify labels and annotations which will be attached to the active pods for
// the duration which they act as a active pod, and will be removed after
ActiveMetadata *PodTemplateMetadata `json:"activeMetadata,omitempty" protobuf:"bytes,13,opt,name=activeMetadata"`
// AbortScaleDownDelaySeconds adds a delay before scaling down the preview replicaset
// if update is aborted. Default is 0, meaning preview replicaset is not scaled down.
// AbortScaleDownDelaySeconds adds a delay in second before scaling down the preview replicaset
// if update is aborted. 0 means not to scale down.
// Default is 30 second
// +optional
AbortScaleDownDelaySeconds *int32 `json:"abortScaleDownDelaySeconds,omitempty" protobuf:"varint,14,opt,name=abortScaleDownDelaySeconds"`
}
Expand Down Expand Up @@ -286,9 +287,10 @@ type CanaryStrategy struct {
// ScaleDownDelayRevisionLimit limits the number of old RS that can run at one time before getting scaled down
// +optional
ScaleDownDelayRevisionLimit *int32 `json:"scaleDownDelayRevisionLimit,omitempty" protobuf:"varint,12,opt,name=scaleDownDelayRevisionLimit"`
// AbortScaleDownDelaySeconds adds a delay before scaling down the canary pods when update
// AbortScaleDownDelaySeconds adds a delay in second before scaling down the canary pods when update
// is aborted for canary strategy with traffic routing (not applicable for basic canary).
// Default is 0, meaning canary pods are not scaled down.
// 0 means canary pods are not scaled down.
// Default is 30 seconds.
// +optional
AbortScaleDownDelaySeconds *int32 `json:"abortScaleDownDelaySeconds,omitempty" protobuf:"varint,13,opt,name=abortScaleDownDelaySeconds"`
}
Expand Down
6 changes: 4 additions & 2 deletions rollout/bluegreen_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,11 @@ var (

func newBlueGreenRollout(name string, replicas int, revisionHistoryLimit *int32, activeSvc string, previewSvc string) *v1alpha1.Rollout {
rollout := newRollout(name, replicas, revisionHistoryLimit, map[string]string{"foo": "bar"})
abortScaleDownDelaySeconds := int32(0)
rollout.Spec.Strategy.BlueGreen = &v1alpha1.BlueGreenStrategy{
ActiveService: activeSvc,
PreviewService: previewSvc,
ActiveService: activeSvc,
PreviewService: previewSvc,
AbortScaleDownDelaySeconds: &abortScaleDownDelaySeconds,
}
rollout.Status.CurrentStepHash = conditions.ComputeStepHash(rollout)
rollout.Status.CurrentPodHash = controller.ComputeHash(&rollout.Spec.Template, rollout.Status.CollisionCount)
Expand Down
2 changes: 1 addition & 1 deletion rollout/replicaset.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ func (c *rolloutContext) removeScaleDownDelay(rs *appsv1.ReplicaSet) error {
}

// addScaleDownDelay injects the `scale-down-deadline` annotation to the ReplicaSet, or if
// scaleDownDelaySeconds is zero, removes it if it exists
// scaleDownDelaySeconds is zero, removes the annotation if it exists
func (c *rolloutContext) addScaleDownDelay(rs *appsv1.ReplicaSet, scaleDownDelaySeconds time.Duration) error {
if rs == nil {
return nil
Expand Down
6 changes: 4 additions & 2 deletions test/e2e/analysis_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@ spec:
replicas: 2
strategy:
blueGreen:
abortScaleDownDelaySeconds: 0
activeService: pre-promotion-fail-active
previewService: pre-promotion-fail-preview
previewReplicaCount: 1
Expand Down Expand Up @@ -402,7 +403,7 @@ spec:
WaitForRolloutStatus("Paused").
Then().
ExpectRevisionPodCount("1", 1).
ExpectRevisionPodCount("2", 0).
ExpectRevisionScaleDown("2", true).
ExpectRevisionPodCount("3", 1).
ExpectActiveRevision("1").
ExpectPreviewRevision("3").
Expand All @@ -411,7 +412,8 @@ spec:
WaitForRolloutStatus("Healthy").
Then().
ExpectRevisionPodCount("1", 1).
ExpectRevisionPodCount("2", 0).
ExpectRevisionScaleDown("1", true).
ExpectRevisionScaleDown("2", true).
ExpectRevisionPodCount("3", 1).
ExpectActiveRevision("3").
ExpectPreviewRevision("3").
Expand Down
14 changes: 14 additions & 0 deletions test/e2e/canary_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -487,3 +487,17 @@ func (s *CanarySuite) TestCanaryScaleDownOnAbort() {
Then().
ExpectRevisionPodCount("2", 0)
}

func (s *CanarySuite) TestCanaryUnScaleDownOnAbort() {
s.Given().
HealthyRollout(`@functional/canary-unscaledownonabort.yaml`).
When().
UpdateSpec(). // update to revision 2
WaitForRolloutStatus("Paused").
AbortRollout().
WaitForRolloutStatus("Degraded").
Sleep(3*time.Second).
Then().
ExpectRevisionPodCount("2", 1).
ExpectRevisionScaleDown("2", false)
}
96 changes: 96 additions & 0 deletions test/e2e/functional/canary-unscaledownonabort.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
apiVersion: v1
kind: Service
metadata:
name: canary-scaledowndelay-root
spec:
type: NodePort
ports:
- port: 80
targetPort: http
protocol: TCP
name: http
selector:
app: canary-scaledowndelay
---
apiVersion: v1
kind: Service
metadata:
name: canary-scaledowndelay-canary
spec:
type: NodePort
ports:
- port: 80
targetPort: http
protocol: TCP
name: http
selector:
app: canary-scaledowndelay
---
apiVersion: v1
kind: Service
metadata:
name: canary-scaledowndelay-stable
spec:
type: NodePort
ports:
- port: 80
targetPort: http
protocol: TCP
name: http
selector:
app: canary-scaledowndelay
---
apiVersion: networking.k8s.io/v1beta1
kind: Ingress
metadata:
name: canary-scaledowndelay-ingress
annotations:
kubernetes.io/ingress.class: alb
spec:
rules:
- http:
paths:
- path: /*
backend:
serviceName: canary-scaledowndelay-root
servicePort: use-annotation
---
apiVersion: argoproj.io/v1alpha1
kind: Rollout
metadata:
name: canary-scaledownd-on-abort
spec:
selector:
matchLabels:
app: canary-scaledowndelay
template:
metadata:
labels:
app: canary-scaledowndelay
spec:
containers:
- name: canary-scaledowndelay
image: nginx:1.19-alpine
ports:
- name: http
containerPort: 80
protocol: TCP
resources:
requests:
memory: 16Mi
cpu: 5m
strategy:
canary:
abortScaleDownDelaySeconds: 0
scaleDownDelayRevisionLimit: 1
canaryService: canary-scaledowndelay-canary
stableService: canary-scaledowndelay-stable
steps:
- setCanaryScale:
replicas: 1
- pause: {}
trafficRouting:
alb:
ingress: canary-scaledowndelay-ingress
rootService: canary-scaledowndelay-root
servicePort: 80
40 changes: 40 additions & 0 deletions test/fixtures/then.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

"github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1"
rov1 "github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1"
"github.com/argoproj/argo-rollouts/utils/defaults"
replicasetutil "github.com/argoproj/argo-rollouts/utils/replicaset"
Expand Down Expand Up @@ -73,6 +74,7 @@ func (t *Then) ExpectReplicaCounts(desired, current, updated, ready, available i
}

type PodExpectation func(*corev1.PodList) bool
type ReplicasetExpectation func(*appsv1.ReplicaSet) bool

func (t *Then) ExpectPods(expectation string, expectFunc PodExpectation) *Then {
t.t.Helper()
Expand Down Expand Up @@ -115,6 +117,44 @@ func (t *Then) ExpectRevisionPodCount(revision string, expectedCount int) *Then
return t.expectPodCountByHash(description, hash, expectedCount)
}

func (t *Then) ExpectRevisionScaleDown(revision string, expectScaleDown bool) *Then {
t.t.Helper()
rs := t.GetReplicaSetByRevision(revision)
description := fmt.Sprintf("revision:%s", revision)
return t.expectRSScaleDownByName(description, rs.Name, expectScaleDown)
}

func (t *Then) expectRSScaleDownByName(description, name string, expectScaleDown bool) *Then {
return t.ExpectRS(fmt.Sprintf("RS %s scale down", name), name, func(rs *appsv1.ReplicaSet) bool {
hasScaleDownDelay := false

if _, ok := rs.Annotations[v1alpha1.DefaultReplicaSetScaleDownDeadlineAnnotationKey]; ok {
hasScaleDownDelay = true
}

metExpectation := hasScaleDownDelay == expectScaleDown
if !metExpectation {
t.log.Warnf("unexpected %s (rs %s): expected to be scaled down is %v", description, name, expectScaleDown)
}
return metExpectation
})
}

func (t *Then) ExpectRS(expectation string, name string, expectFunc ReplicasetExpectation) *Then {
t.t.Helper()
_, err := metav1.LabelSelectorAsSelector(t.Rollout().Spec.Selector)
t.CheckError(err)
rs, err := t.kubeClient.AppsV1().ReplicaSets(t.namespace).Get(t.Context, name, metav1.GetOptions{})
// List(t.Context, metav1.ListOptions{LabelSelector: selector.String()})
t.CheckError(err)
if !expectFunc(rs) {
t.log.Errorf("rs expectation '%s' failed", expectation)
t.t.FailNow()
}
t.log.Infof("rs expectation '%s' met", expectation)
return t
}

func (t *Then) expectPodCountByHash(description, hash string, expectedCount int) *Then {
return t.ExpectPods(fmt.Sprintf("%s pod count == %d", description, expectedCount), func(pods *corev1.PodList) bool {
count := 0
Expand Down
2 changes: 1 addition & 1 deletion utils/defaults/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ const (
// DefaultScaleDownDelaySeconds default seconds before scaling down old replicaset after switching services
DefaultScaleDownDelaySeconds = int32(30)
// DefaultAbortScaleDownDelaySeconds default seconds before scaling down old replicaset after switching services
DefaultAbortScaleDownDelaySeconds = int32(0)
DefaultAbortScaleDownDelaySeconds = int32(30)
// DefaultAutoPromotionEnabled default value for auto promoting a blueGreen strategy
DefaultAutoPromotionEnabled = true
// DefaultConsecutiveErrorLimit is the default number times a metric can error in sequence before
Expand Down

0 comments on commit 549fc14

Please sign in to comment.