Skip to content

Commit

Permalink
fix: retry Experiment ReplicaSet scaling conflict errors
Browse files Browse the repository at this point in the history
Signed-off-by: Jesse Suen <[email protected]>
  • Loading branch information
jessesuen committed Jan 15, 2022
1 parent 0f93f9b commit c974df4
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 6 deletions.
31 changes: 29 additions & 2 deletions experiments/experiment_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,17 @@ import (
"testing"
"time"

"k8s.io/utils/pointer"

"github.com/stretchr/testify/assert"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
k8serrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
kubeinformers "k8s.io/client-go/informers"
k8sfake "k8s.io/client-go/kubernetes/fake"
kubetesting "k8s.io/client-go/testing"
"k8s.io/utils/pointer"

"github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1"
"github.com/argoproj/argo-rollouts/pkg/client/clientset/versioned/fake"
Expand Down Expand Up @@ -414,6 +415,32 @@ func TestFailAddScaleDownDelay(t *testing.T) {
assert.Equal(t, newStatus.Phase, v1alpha1.AnalysisPhaseError)
}

func TestFailAddScaleDownDelayIsConflict(t *testing.T) {
templates := generateTemplates("bar")
ex := newExperiment("foo", templates, "")
ex.Spec.ScaleDownDelaySeconds = pointer.Int32Ptr(0)
ex.Status.TemplateStatuses = []v1alpha1.TemplateStatus{
generateTemplatesStatus("bar", 1, 1, v1alpha1.TemplateStatusRunning, now()),
}
rs := templateToRS(ex, templates[0], 1)
rs.Spec.Replicas = pointer.Int32(0)

exCtx := newTestContext(ex, rs)
exCtx.templateRSs["bar"] = rs

fakeClient := exCtx.kubeclientset.(*k8sfake.Clientset)
updateCalled := false
fakeClient.PrependReactor("update", "replicasets", func(action kubetesting.Action) (handled bool, ret runtime.Object, err error) {
updateCalled = true
return true, nil, k8serrors.NewConflict(schema.GroupResource{}, "guestbook", errors.New("intentional-error"))
})
newStatus := exCtx.reconcile()
assert.True(t, updateCalled)
assert.Equal(t, v1alpha1.TemplateStatusRunning, newStatus.TemplateStatuses[0].Status)
assert.Equal(t, "", newStatus.TemplateStatuses[0].Message)
assert.Equal(t, newStatus.Phase, v1alpha1.AnalysisPhaseRunning)
}

// TestDeleteOutdatedService verifies that outdated service for Template in templateServices map is deleted and new service is created
func TestDeleteOutdatedService(t *testing.T) {
templates := generateTemplates("bar")
Expand Down
12 changes: 8 additions & 4 deletions experiments/replicaset.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,10 @@ import (
"fmt"
"time"

"github.com/argoproj/argo-rollouts/utils/defaults"
timeutil "github.com/argoproj/argo-rollouts/utils/time"

log "github.com/sirupsen/logrus"
appsv1 "k8s.io/api/apps/v1"
"k8s.io/apimachinery/pkg/api/errors"
k8serrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
patchtypes "k8s.io/apimachinery/pkg/types"
Expand All @@ -20,10 +18,12 @@ import (

"github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1"
"github.com/argoproj/argo-rollouts/utils/conditions"
"github.com/argoproj/argo-rollouts/utils/defaults"
experimentutil "github.com/argoproj/argo-rollouts/utils/experiment"
logutil "github.com/argoproj/argo-rollouts/utils/log"
"github.com/argoproj/argo-rollouts/utils/record"
replicasetutil "github.com/argoproj/argo-rollouts/utils/replicaset"
timeutil "github.com/argoproj/argo-rollouts/utils/time"
)

const (
Expand Down Expand Up @@ -268,7 +268,11 @@ func (ec *experimentContext) scaleReplicaSetAndRecordEvent(rs *appsv1.ReplicaSet
}
scaled, newRS, err := ec.scaleReplicaSet(rs, newScale, scalingOperation)
if err != nil {
// TODO(jessesuen): gracefully handle conflict issues
if k8serrors.IsConflict(err) {
ec.log.Warnf("Retrying scaling of ReplicaSet '%s': %s", rs.Name, err)
ec.enqueueExperimentAfter(ec.ex, time.Second)
return false, nil, nil
}
msg := fmt.Sprintf("Failed to scale %s %s: %v", rs.Name, scalingOperation, err)
ec.recorder.Warnf(ec.ex, record.EventOptions{EventReason: "ReplicaSetUpdateError"}, msg)
} else {
Expand Down

0 comments on commit c974df4

Please sign in to comment.