Skip to content

Commit

Permalink
Revert "fix: canary scaledown event could violate maxUnavailable (arg…
Browse files Browse the repository at this point in the history
…oproj#1429)"

This reverts commit 7c6a0c5.
  • Loading branch information
jessesuen committed Sep 2, 2021
1 parent c44fc3f commit 5774de2
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 143 deletions.
58 changes: 2 additions & 56 deletions test/e2e/canary_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,58 +112,6 @@ func (s *CanarySuite) TestRolloutScalingWhenPaused() {
ExpectCanaryStablePodCount(1, 3)
}


// TestRolloutWithMaxSurgeScalingDuringUpdate verifies behavior when scaling a rollout up/down in middle of update and with maxSurge 100%
func (s *CanarySuite) TestRolloutWithMaxSurgeScalingDuringUpdate() {
s.Given().
HealthyRollout(`
apiVersion: argoproj.io/v1alpha1
kind: Rollout
metadata:
name: updatescaling
spec:
replicas: 4
strategy:
canary:
maxSurge: 100%
selector:
matchLabels:
app: updatescaling
template:
metadata:
labels:
app: updatescaling
spec:
containers:
- name: updatescaling
image: nginx:1.19-alpine
resources:
requests:
memory: 16Mi
cpu: 1m`).
When().
PatchSpec(`
spec:
template:
spec:
containers:
- name: updatescaling
command: [/bad-command]`).
WaitForRolloutReplicas(7).
Then().
ExpectCanaryStablePodCount(4, 3).
When().
ScaleRollout(8).
WaitForRolloutReplicas(11).
Then().
ExpectCanaryStablePodCount(8, 3).
When().
ScaleRollout(4).
WaitForRolloutReplicas(7).
Then().
ExpectCanaryStablePodCount(4, 3)
}

// TestRolloutScalingDuringUpdate verifies behavior when scaling a rollout up/down in middle of update
func (s *CanarySuite) TestRolloutScalingDuringUpdate() {
s.Given().
Expand Down Expand Up @@ -212,10 +160,8 @@ spec:
// See: https://github.com/argoproj/argo-rollouts/issues/738
ExpectCanaryStablePodCount(6, 4).
When().
ScaleRollout(4).
WaitForRolloutReplicas(6).
Then().
ExpectCanaryStablePodCount(2, 4)
ScaleRollout(4)
// WaitForRolloutReplicas(4) // this doesn't work yet (bug)
}

// TestReduceWeightAndHonorMaxUnavailable verifies we honor maxUnavailable when decreasing weight or aborting
Expand Down
93 changes: 22 additions & 71 deletions utils/replicaset/canary.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,93 +175,44 @@ func CalculateReplicaCountsForCanary(rollout *v1alpha1.Rollout, newRS *appsv1.Re
}

minAvailableReplicaCount := rolloutSpecReplica - MaxUnavailable(rollout)

// isIncreasing indicates if we are supposed to be increasing our canary replica count.
// If so, we can ignore pod availability of the stableRS. Otherwise, if we are reducing our
// weight (e.g. we are aborting), then we can ignore pod availability of the canaryRS.
isIncreasing := newRS == nil || desiredNewRSReplicaCount >= *newRS.Spec.Replicas
replicasToScaleDown := GetReplicasForScaleDown(newRS, !isIncreasing) + GetReplicasForScaleDown(stableRS, isIncreasing)

if replicasToScaleDown <= minAvailableReplicaCount {
// Cannot scale down stableRS or newRS without going below min available replica count
return newRSReplicaCount, stableRSReplicaCount
}

scaleDownCount := replicasToScaleDown - minAvailableReplicaCount

if !isIncreasing {
// Skip scalingDown Stable replicaSet when Canary availability is not taken into calculation for scaleDown
newRSReplicaCount = calculateScaleDownReplicaCount(newRS, desiredNewRSReplicaCount, scaleDownCount, newRSReplicaCount)
newRSReplicaCount, stableRSReplicaCount = adjustReplicaWithinLimits(newRS, stableRS, newRSReplicaCount, stableRSReplicaCount, maxReplicaCountAllowed, minAvailableReplicaCount)
} else {
// Skip scalingDown canary replicaSet when StableSet availability is not taken into calculation for scaleDown
stableRSReplicaCount = calculateScaleDownReplicaCount(stableRS, desiredStableRSReplicaCount, scaleDownCount, stableRSReplicaCount)
stableRSReplicaCount, newRSReplicaCount = adjustReplicaWithinLimits(stableRS, newRS, stableRSReplicaCount, newRSReplicaCount, maxReplicaCountAllowed, minAvailableReplicaCount)
}
return newRSReplicaCount, stableRSReplicaCount
}

// calculateScaleDownReplicaCount calculates drainRSReplicaCount
// drainRSReplicaCount can be either stableRS count or canaryRS count
// drainRSReplicaCount corresponds to RS whose availability is not considered in calculating replicasToScaleDown
func calculateScaleDownReplicaCount(drainRS *appsv1.ReplicaSet, desireRSReplicaCount int32, scaleDownCount int32, drainRSReplicaCount int32) int32 {
if drainRS != nil && *drainRS.Spec.Replicas > desireRSReplicaCount {
// if the controller doesn't have to use every replica to achieve the desired count,
// it can scales down to the desired count or get closer to desired state.
drainRSReplicaCount = maxValue(desireRSReplicaCount, *drainRS.Spec.Replicas-scaleDownCount)
if newRS != nil && *newRS.Spec.Replicas > desiredNewRSReplicaCount {
// if the controller doesn't have to use every replica to achieve the desired count, it only scales down to the
// desired count.
if *newRS.Spec.Replicas-scaleDownCount < desiredNewRSReplicaCount {
newRSReplicaCount = desiredNewRSReplicaCount
// Calculating how many replicas were used to scale down to the desired count
scaleDownCount = scaleDownCount - (*newRS.Spec.Replicas - desiredNewRSReplicaCount)
} else {
// The controller is using every replica it can to get closer to desired state.
newRSReplicaCount = *newRS.Spec.Replicas - scaleDownCount
scaleDownCount = 0
}
}
return drainRSReplicaCount
}

// adjustReplicaWithinLimits adjusts replicaCounters to be within maxSurge & maxUnavailable limits
// drainRSReplicaCount corresponds to RS whose availability is not considered in calculating replicasToScaleDown
// adjustRSReplicaCount corresponds to RS whose availability is to taken account while adjusting maxUnavailable limit
func adjustReplicaWithinLimits(drainRS *appsv1.ReplicaSet, adjustRS *appsv1.ReplicaSet, drainRSReplicaCount int32, adjustRSReplicaCount int32, maxReplicaCountAllowed int32, minAvailableReplicaCount int32) (int32, int32) {
extraAvailableAdjustRS := int32(0)
totalAvailableReplicas := int32(0)
// calculates current limit over the allowed value
overTheLimitVal := maxValue(0, adjustRSReplicaCount+drainRSReplicaCount-maxReplicaCountAllowed)
if drainRS != nil {
totalAvailableReplicas = totalAvailableReplicas + minValue(drainRS.Status.AvailableReplicas, drainRSReplicaCount)
}
if adjustRS != nil {
// 1. adjust adjustRSReplicaCount to be within maxSurge
adjustRSReplicaCount = adjustRSReplicaCount - overTheLimitVal
// 2. Calculate availability corresponding to adjusted count
totalAvailableReplicas = totalAvailableReplicas + minValue(adjustRS.Status.AvailableReplicas, adjustRSReplicaCount)
// 3. Calculate decrease in availability of adjustRS because of (1)
extraAvailableAdjustRS = maxValue(0, adjustRS.Status.AvailableReplicas-adjustRSReplicaCount)

// 4. Now calculate how far count is from maxUnavailable limit
moreToNeedAvailableReplicas := maxValue(0, minAvailableReplicaCount-totalAvailableReplicas)
// 5. From (3), we got the count for decrease in availability because of (1),
// take the min of (3) & (4) and add it back to adjustRS
// remaining of moreToNeedAvailableReplicas can be ignored as it is part of drainRS,
// there is no case of deviating from maxUnavailable limit from drainRS as in the event of said case,
// scaleDown calculation wont even occur as we are checking
// replicasToScaleDown <= minAvailableReplicaCount in caller function
adjustRSReplicaCount = adjustRSReplicaCount + minValue(extraAvailableAdjustRS, moreToNeedAvailableReplicas)
// 6. Calculate final overTheLimit because of adjustment
overTheLimitVal = maxValue(0, adjustRSReplicaCount+drainRSReplicaCount-maxReplicaCountAllowed)
// 7. we can safely subtract from drainRS and other cases like deviation from maxUnavailable limit
// wont occur as said in (5)
drainRSReplicaCount = drainRSReplicaCount - overTheLimitVal
}

return drainRSReplicaCount, adjustRSReplicaCount
}

func minValue(countA int32, countB int32) int32 {
if countA > countB {
return countB
if scaleStableRS && *stableRS.Spec.Replicas > desiredStableRSReplicaCount {
// This follows the same logic as scaling down the newRS except with the stableRS and it does not need to
// set the scaleDownCount again since it's not used again
if *stableRS.Spec.Replicas-scaleDownCount < desiredStableRSReplicaCount {
stableRSReplicaCount = desiredStableRSReplicaCount
} else {
stableRSReplicaCount = *stableRS.Spec.Replicas - scaleDownCount
}
}
return countA
}

func maxValue(countA int32, countB int32) int32 {
if countA < countB {
return countB
}
return countA
return newRSReplicaCount, stableRSReplicaCount
}

// BeforeStartingStep checks if canary rollout is at the starting step
Expand Down
16 changes: 0 additions & 16 deletions utils/replicaset/canary_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -339,22 +339,6 @@ func TestCalculateReplicaCountsForCanary(t *testing.T) {
expectedStableReplicaCount: 7,
expectedCanaryReplicaCount: 3,
},
{
name: "Scale down stable and canary available",
rolloutSpecReplicas: 10,
setWeight: 100,
maxSurge: intstr.FromInt(1),
maxUnavailable: intstr.FromInt(0),

stableSpecReplica: 10,
stableAvailableReplica: 2,

canarySpecReplica: 10,
canaryAvailableReplica: 8,

expectedStableReplicaCount: 2,
expectedCanaryReplicaCount: 9,
},
{
name: "Do not scale down newRS or stable when older RS count >= scaleDownCount",
rolloutSpecReplicas: 10,
Expand Down

0 comments on commit 5774de2

Please sign in to comment.