From eb26809dd66e550015f894f1bbaa580d45c03392 Mon Sep 17 00:00:00 2001 From: mitchell amihod Date: Wed, 5 Jul 2023 10:23:30 -0400 Subject: [PATCH 1/4] docs(example): Add example on how to execute subset of e2e tests (#2867) Signed-off-by: mitchell amihod <4623+meeech@users.noreply.github.com> --- docs/CONTRIBUTING.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md index a325587804..a37ad29fc0 100644 --- a/docs/CONTRIBUTING.md +++ b/docs/CONTRIBUTING.md @@ -104,6 +104,12 @@ Then run the e2e tests: make test-e2e ``` +To run a subset of e2e tests, you need to specify the suite with `-run`, and the specific test regex with `-testify.m`. + +``` +E2E_TEST_OPTIONS="-run 'TestCanarySuite' -testify.m 'TestCanaryScaleDownOnAbortNoTrafficRouting'" make test-e2e +``` + ## Controller architecture Argo Rollouts is actually a collection of individual controllers From 65fbefe5232c38be04a4daaeb557bade3111379a Mon Sep 17 00:00:00 2001 From: Ben Kochie Date: Fri, 7 Jul 2023 16:17:25 +0200 Subject: [PATCH 2/4] fix(controller): Remove name label from some k8s client metrics on events and replicasets (#2851) BREAKING CHANGE The metric labels have changed on controller_clientset_k8s_request_total to not include the name of the resource for events and replicasets. These names have generated hashes in them and cause really high cardinality. Remove name label from k8s some client metrics The `name` label in the `controller_clientset_k8s_request_total` metric produce an excessive amount of cardinality for `events` and `replicasets`. This can lead to hundreds of thousands of unique metrics over a couple weeks in a large deployment. Set the name to "N/A" for these client request types. Signed-off-by: SuperQ --- controller/metrics/client.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/controller/metrics/client.go b/controller/metrics/client.go index 01367745ef..b800a74bd8 100644 --- a/controller/metrics/client.go +++ b/controller/metrics/client.go @@ -27,7 +27,7 @@ func (m *K8sRequestsCountProvider) IncKubernetesRequest(resourceInfo kubeclientm namespace := resourceInfo.Namespace kind := resourceInfo.Kind statusCode := strconv.Itoa(resourceInfo.StatusCode) - if resourceInfo.Verb == kubeclientmetrics.List { + if resourceInfo.Verb == kubeclientmetrics.List || kind == "events" || kind == "replicasets" { name = "N/A" } if resourceInfo.Verb == kubeclientmetrics.Unknown { From f1de890c6501da0866db994e8323d4f5fc29342a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 11 Jul 2023 09:46:06 -0500 Subject: [PATCH 3/4] chore(deps): bump google.golang.org/grpc from 1.56.1 to 1.56.2 (#2872) Bumps [google.golang.org/grpc](https://github.com/grpc/grpc-go) from 1.56.1 to 1.56.2. - [Release notes](https://github.com/grpc/grpc-go/releases) - [Commits](https://github.com/grpc/grpc-go/compare/v1.56.1...v1.56.2) --- updated-dependencies: - dependency-name: google.golang.org/grpc dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 52e9c7b244..465afc74fa 100644 --- a/go.mod +++ b/go.mod @@ -37,7 +37,7 @@ require ( github.com/tj/assert v0.0.3 github.com/valyala/fasttemplate v1.2.2 google.golang.org/genproto v0.0.0-20230410155749-daa745c078e1 - google.golang.org/grpc v1.56.1 + google.golang.org/grpc v1.56.2 google.golang.org/protobuf v1.31.0 gopkg.in/yaml.v2 v2.4.0 k8s.io/api v0.25.8 diff --git a/go.sum b/go.sum index fcb015bd94..d21daece7d 100644 --- a/go.sum +++ b/go.sum @@ -1056,8 +1056,8 @@ google.golang.org/grpc v1.31.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM google.golang.org/grpc v1.33.1/go.mod h1:fr5YgcSWrqhRRxogOsw7RzIpsmvOZ6IcH4kBYTpR3n0= google.golang.org/grpc v1.36.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= google.golang.org/grpc v1.40.0/go.mod h1:ogyxbiOoUXAkP+4+xa6PZSE9DZgIHtSpzjDTB9KAK34= -google.golang.org/grpc v1.56.1 h1:z0dNfjIl0VpaZ9iSVjA6daGatAYwPGstTjt5vkRMFkQ= -google.golang.org/grpc v1.56.1/go.mod h1:I9bI3vqKfayGqPUAwGdOSu7kt6oIJLixfffKrpXqQ9s= +google.golang.org/grpc v1.56.2 h1:fVRFRnXvU+x6C4IlHZewvJOVHoOv1TUuQyoRsYnB4bI= +google.golang.org/grpc v1.56.2/go.mod h1:I9bI3vqKfayGqPUAwGdOSu7kt6oIJLixfffKrpXqQ9s= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= From 3c5ac368649527940b33a7bc08f3c45f14ec0c77 Mon Sep 17 00:00:00 2001 From: mitchell amihod Date: Tue, 11 Jul 2023 10:53:30 -0400 Subject: [PATCH 4/4] fix(trafficrouting): apply stable selectors on canary service on rollout abort #2781 (#2818) * Re-apply stable to simple canary service on abort when no traffic routing is used fixes #2781 * remove redundant block of code Signed-off-by: mitchell amihod <4623+meeech@users.noreply.github.com> * Add an e2e covering the fix. Basically a copy/pasta of test that was checking for this when using traffic routing Signed-off-by: mitchell amihod <4623+meeech@users.noreply.github.com> --------- Signed-off-by: mitchell amihod <4623+meeech@users.noreply.github.com> --- rollout/canary_test.go | 134 ++++++++++++++++-- rollout/service.go | 9 ++ rollout/trafficrouting.go | 6 +- test/e2e/canary_test.go | 25 +++- ...nary-scaledownonabortnotrafficrouting.yaml | 91 ++++++++++++ 5 files changed, 247 insertions(+), 18 deletions(-) create mode 100644 test/e2e/functional/canary-scaledownonabortnotrafficrouting.yaml diff --git a/rollout/canary_test.go b/rollout/canary_test.go index 56445f3978..1811f6a4c4 100644 --- a/rollout/canary_test.go +++ b/rollout/canary_test.go @@ -427,7 +427,6 @@ func TestResetCurrentStepIndexOnStepChange(t *testing.T) { newConditions := generateConditionsPatch(true, conditions.ReplicaSetUpdatedReason, r2, false, "", false) expectedPatch := fmt.Sprintf(expectedPatchWithoutPodHash, expectedCurrentPodHash, expectedCurrentStepHash, newConditions) assert.Equal(t, calculatePatch(r2, expectedPatch), patch) - } func TestResetCurrentStepIndexOnPodSpecChange(t *testing.T) { @@ -469,7 +468,6 @@ func TestResetCurrentStepIndexOnPodSpecChange(t *testing.T) { expectedPatch := fmt.Sprintf(expectedPatchWithoutPodHash, expectedCurrentPodHash, newConditions) assert.Equal(t, calculatePatch(r2, expectedPatch), patch) - } func TestCanaryRolloutCreateFirstReplicasetNoSteps(t *testing.T) { @@ -708,7 +706,6 @@ func TestCanaryRolloutScaleDownOldRsDontScaleDownTooMuch(t *testing.T) { assert.Equal(t, int32(0), *updatedRS1.Spec.Replicas) updatedRS2 := f.getUpdatedReplicaSet(updatedRS2Index) assert.Equal(t, int32(4), *updatedRS2.Spec.Replicas) - } // TestCanaryDontScaleDownOldRsDuringInterruptedUpdate tests when we need to prevent scale down an @@ -1019,9 +1016,8 @@ func TestSyncRolloutWaitAddToQueue(t *testing.T) { c, i, k8sI := f.newController(func() time.Duration { return 30 * time.Minute }) f.runController(key, true, false, c, i, k8sI) - //When the controller starts, it will enqueue the rollout while syncing the informer and during the reconciliation step + // When the controller starts, it will enqueue the rollout while syncing the informer and during the reconciliation step assert.Equal(t, 2, f.enqueuedObjects[key]) - } func TestSyncRolloutIgnoreWaitOutsideOfReconciliationPeriod(t *testing.T) { @@ -1034,7 +1030,7 @@ func TestSyncRolloutIgnoreWaitOutsideOfReconciliationPeriod(t *testing.T) { }, { Pause: &v1alpha1.RolloutPause{ - Duration: v1alpha1.DurationFromInt(3600), //1 hour + Duration: v1alpha1.DurationFromInt(3600), // 1 hour }, }, } @@ -1068,9 +1064,8 @@ func TestSyncRolloutIgnoreWaitOutsideOfReconciliationPeriod(t *testing.T) { key := fmt.Sprintf("%s/%s", r2.Namespace, r2.Name) c, i, k8sI := f.newController(func() time.Duration { return 30 * time.Minute }) f.runController(key, true, false, c, i, k8sI) - //When the controller starts, it will enqueue the rollout so we expect the rollout to enqueue at least once. + // When the controller starts, it will enqueue the rollout so we expect the rollout to enqueue at least once. assert.Equal(t, 1, f.enqueuedObjects[key]) - } func TestSyncRolloutWaitIncrementStepIndex(t *testing.T) { @@ -1084,7 +1079,8 @@ func TestSyncRolloutWaitIncrementStepIndex(t *testing.T) { Pause: &v1alpha1.RolloutPause{ Duration: v1alpha1.DurationFromInt(5), }, - }, { + }, + { Pause: &v1alpha1.RolloutPause{}, }, } @@ -1236,6 +1232,7 @@ func TestCanarySVCSelectors(t *testing.T) { }, }, } + rc := rolloutContext{ log: logutil.WithRollout(rollout), reconcilerBase: reconcilerBase{ @@ -1266,6 +1263,7 @@ func TestCanarySVCSelectors(t *testing.T) { }, }, } + stopchan := make(chan struct{}) defer close(stopchan) informers.Start(stopchan) @@ -1286,6 +1284,124 @@ func TestCanarySVCSelectors(t *testing.T) { } } +func TestCanarySVCSelectorsBasicCanaryAbortServiceSwitchBack(t *testing.T) { + for _, tc := range []struct { + canaryReplicas int32 + canaryAvailReplicas int32 + shouldAbortRollout bool + shouldTargetNewRS bool + }{ + {2, 2, false, true}, // Rollout, canaryService should point at the canary RS + {2, 2, true, false}, // Rollout aborted, canaryService should point at the stable RS + } { + namespace := "namespace" + selectorLabel := "selector-labels-test" + selectorNewRSVal := "new-rs-xxx" + selectorStableRSVal := "stable-rs-xxx" + stableService := &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: "stable", + Namespace: namespace, + Annotations: map[string]string{v1alpha1.ManagedByRolloutsKey: selectorLabel}, + Labels: map[string]string{ + v1alpha1.DefaultRolloutUniqueLabelKey: selectorStableRSVal, + }, + }, + } + canaryService := &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: "canary", + Namespace: namespace, + Annotations: map[string]string{v1alpha1.ManagedByRolloutsKey: selectorLabel}, + }, + } + kubeclient := k8sfake.NewSimpleClientset(stableService, canaryService) + informers := k8sinformers.NewSharedInformerFactory(kubeclient, 0) + servicesLister := informers.Core().V1().Services().Lister() + + rollout := &v1alpha1.Rollout{ + ObjectMeta: metav1.ObjectMeta{ + Name: selectorLabel, + Namespace: namespace, + }, + Spec: v1alpha1.RolloutSpec{ + Strategy: v1alpha1.RolloutStrategy{ + Canary: &v1alpha1.CanaryStrategy{ + StableService: stableService.Name, + CanaryService: canaryService.Name, + }, + }, + }, + } + + pc := pauseContext{ + rollout: rollout, + } + if tc.shouldAbortRollout { + pc.AddAbort("Add Abort") + } + + rc := rolloutContext{ + log: logutil.WithRollout(rollout), + pauseContext: &pc, + reconcilerBase: reconcilerBase{ + servicesLister: servicesLister, + kubeclientset: kubeclient, + recorder: record.NewFakeEventRecorder(), + }, + rollout: rollout, + newRS: &v1.ReplicaSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "canary", + Namespace: namespace, + Labels: map[string]string{ + v1alpha1.DefaultRolloutUniqueLabelKey: selectorNewRSVal, + }, + }, + Spec: v1.ReplicaSetSpec{ + Replicas: pointer.Int32Ptr(tc.canaryReplicas), + }, + Status: v1.ReplicaSetStatus{ + AvailableReplicas: tc.canaryAvailReplicas, + }, + }, + stableRS: &v1.ReplicaSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "stable", + Namespace: namespace, + Labels: map[string]string{ + v1alpha1.DefaultRolloutUniqueLabelKey: selectorStableRSVal, + }, + }, + Spec: v1.ReplicaSetSpec{ + Replicas: pointer.Int32Ptr(tc.canaryReplicas), + }, + Status: v1.ReplicaSetStatus{ + AvailableReplicas: tc.canaryAvailReplicas, + }, + }, + } + + stopchan := make(chan struct{}) + defer close(stopchan) + informers.Start(stopchan) + informers.WaitForCacheSync(stopchan) + err := rc.reconcileStableAndCanaryService() + assert.NoError(t, err, "unable to reconcileStableAndCanaryService") + updatedCanarySVC, err := servicesLister.Services(rc.rollout.Namespace).Get(canaryService.Name) + assert.NoError(t, err, "unable to get updated canary service") + if tc.shouldTargetNewRS { + assert.Equal(t, selectorNewRSVal, updatedCanarySVC.Spec.Selector[v1alpha1.DefaultRolloutUniqueLabelKey], + "canary SVC should have newRS selector label when newRS has %d replicas and %d AvailableReplicas", + tc.canaryReplicas, tc.canaryAvailReplicas) + } else { + assert.Equal(t, selectorStableRSVal, updatedCanarySVC.Spec.Selector[v1alpha1.DefaultRolloutUniqueLabelKey], + "canary SVC should have stableRS selector label when newRS has %d replicas and %d AvailableReplicas", + tc.canaryReplicas, tc.canaryAvailReplicas) + } + } +} + func TestCanaryRolloutWithInvalidCanaryServiceName(t *testing.T) { f := newFixture(t) defer f.Close() diff --git a/rollout/service.go b/rollout/service.go index de63f527b3..f808cb55fc 100644 --- a/rollout/service.go +++ b/rollout/service.go @@ -257,6 +257,15 @@ func (c *rolloutContext) reconcileStableAndCanaryService() error { if err != nil { return err } + + if c.pauseContext != nil && c.pauseContext.IsAborted() && c.rollout.Spec.Strategy.Canary.TrafficRouting == nil { + err = c.ensureSVCTargets(c.rollout.Spec.Strategy.Canary.CanaryService, c.stableRS, true) + if err != nil { + return err + } + return nil + } + err = c.ensureSVCTargets(c.rollout.Spec.Strategy.Canary.CanaryService, c.newRS, true) if err != nil { return err diff --git a/rollout/trafficrouting.go b/rollout/trafficrouting.go index c00eb66e1b..1b694d517a 100644 --- a/rollout/trafficrouting.go +++ b/rollout/trafficrouting.go @@ -143,11 +143,7 @@ func (c *rolloutContext) reconcileTrafficRouting() error { c.newStatus.Canary.Weights = nil return nil } - if reconcilers == nil { - // Not using traffic routing - c.newStatus.Canary.Weights = nil - return nil - } + c.log.Infof("Found %d TrafficRouting Reconcilers", len(reconcilers)) // iterate over the list of trafficReconcilers for _, reconciler := range reconcilers { diff --git a/test/e2e/canary_test.go b/test/e2e/canary_test.go index 00b588c598..fe22175074 100644 --- a/test/e2e/canary_test.go +++ b/test/e2e/canary_test.go @@ -443,7 +443,7 @@ spec: port: 80 periodSeconds: 30 strategy: - canary: + canary: steps: - setWeight: 20 - pause: {} @@ -539,7 +539,24 @@ func (s *CanarySuite) TestCanaryScaleDownOnAbort() { AbortRollout(). WaitForRolloutStatus("Degraded"). Then(). - //Expect that the canary service selector has been moved back to stable + // Expect that the canary service selector has been moved back to stable + ExpectServiceSelector("canary-scaledowndelay-canary", map[string]string{"app": "canary-scaledowndelay", "rollouts-pod-template-hash": "66597877b7"}, false). + When(). + Sleep(3*time.Second). + Then(). + ExpectRevisionPodCount("2", 0) +} + +func (s *CanarySuite) TestCanaryScaleDownOnAbortNoTrafficRouting() { + s.Given(). + HealthyRollout(`@functional/canary-scaledownonabortnotrafficrouting.yaml`). + When(). + UpdateSpec(). // update to revision 2 + WaitForRolloutStatus("Paused"). + AbortRollout(). + WaitForRolloutStatus("Degraded"). + Then(). + // Expect that the canary service selector has been moved back to stable ExpectServiceSelector("canary-scaledowndelay-canary", map[string]string{"app": "canary-scaledowndelay", "rollouts-pod-template-hash": "66597877b7"}, false). When(). Sleep(3*time.Second). @@ -590,7 +607,7 @@ func (s *CanarySuite) TestCanaryDynamicStableScale() { WaitForRevisionPodCount("2", 1). Then(). ExpectRevisionPodCount("1", 4). - //Assert that the canary service selector is still not set to stable rs because of dynamic stable scale still in progress + // Assert that the canary service selector is still not set to stable rs because of dynamic stable scale still in progress Assert(func(t *fixtures.Then) { canarySvc, stableSvc := t.GetServices() assert.NotEqual(s.T(), canarySvc.Spec.Selector["rollouts-pod-template-hash"], stableSvc.Spec.Selector["rollouts-pod-template-hash"]) @@ -599,7 +616,7 @@ func (s *CanarySuite) TestCanaryDynamicStableScale() { MarkPodsReady("1", 1). // mark last remaining stable pod as ready (4/4 stable are ready) WaitForRevisionPodCount("2", 0). Then(). - //Expect that the canary service selector is now set to stable because of dynamic stable scale is over and we have all pods up on stable rs + // Expect that the canary service selector is now set to stable because of dynamic stable scale is over and we have all pods up on stable rs ExpectServiceSelector("dynamic-stable-scale-canary", map[string]string{"app": "dynamic-stable-scale", "rollouts-pod-template-hash": "868d98995b"}, false). ExpectRevisionPodCount("1", 4) } diff --git a/test/e2e/functional/canary-scaledownonabortnotrafficrouting.yaml b/test/e2e/functional/canary-scaledownonabortnotrafficrouting.yaml new file mode 100644 index 0000000000..0c42735db3 --- /dev/null +++ b/test/e2e/functional/canary-scaledownonabortnotrafficrouting.yaml @@ -0,0 +1,91 @@ +apiVersion: v1 +kind: Service +metadata: + name: canary-scaledowndelay-root +spec: + type: NodePort + ports: + - port: 80 + targetPort: http + protocol: TCP + name: http + selector: + app: canary-scaledowndelay +--- +apiVersion: v1 +kind: Service +metadata: + name: canary-scaledowndelay-canary +spec: + type: NodePort + ports: + - port: 80 + targetPort: http + protocol: TCP + name: http + selector: + app: canary-scaledowndelay +--- +apiVersion: v1 +kind: Service +metadata: + name: canary-scaledowndelay-stable +spec: + type: NodePort + ports: + - port: 80 + targetPort: http + protocol: TCP + name: http + selector: + app: canary-scaledowndelay +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: canary-scaledowndelay-ingress + annotations: + kubernetes.io/ingress.class: alb +spec: + rules: + - http: + paths: + - path: /* + pathType: Prefix + backend: + service: + name: canary-scaledowndelay-root + port: + name: use-annotation +--- +apiVersion: argoproj.io/v1alpha1 +kind: Rollout +metadata: + name: canary-scaledownd-on-abort +spec: + selector: + matchLabels: + app: canary-scaledowndelay + template: + metadata: + labels: + app: canary-scaledowndelay + spec: + containers: + - name: canary-scaledowndelay + image: nginx:1.19-alpine + ports: + - name: http + containerPort: 80 + protocol: TCP + resources: + requests: + memory: 16Mi + cpu: 5m + strategy: + canary: + canaryService: canary-scaledowndelay-canary + stableService: canary-scaledowndelay-stable + steps: + - setWeight: 50 + - pause: {}