diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index 15b61ce402..e5af64f91b 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -35,4 +35,4 @@ jobs: - name: "run tests" env: KUBE_VERSION: ${{ matrix.kube-version }} - run: make prepare-e2e e2e KUBE_VERSION=$KUBE_VERSION + run: make prepare-e2e e2e e2e-upgrade KUBE_VERSION=$KUBE_VERSION diff --git a/Makefile b/Makefile index d3602da554..8a4c444a40 100644 --- a/Makefile +++ b/Makefile @@ -158,6 +158,11 @@ generate: controller-gen api-docs e2e: $(KUTTL) test +# end-to-end-test for testing upgrading +.PHONY: e2e-upgrade +e2e-upgrade: + $(KUTTL) test --config kuttl-test-upgrade.yaml + .PHONY: prepare-e2e prepare-e2e: kuttl set-test-image-vars set-image-controller container container-target-allocator start-kind install-metrics-server load-image-all mkdir -p tests/_build/crds tests/_build/manifests diff --git a/kuttl-test-upgrade.yaml b/kuttl-test-upgrade.yaml new file mode 100644 index 0000000000..e77a2e2fae --- /dev/null +++ b/kuttl-test-upgrade.yaml @@ -0,0 +1,26 @@ +# Make sure that the OT operator after upgrading itself, can upgrade the OT collectors without error. +# The test is based on the version v0.49.0, a breaking change was introduced from PR +# https://github.com/open-telemetry/opentelemetry-operator/pull/797, which added a version label "app.kubernetes.io/version", +# The version label would change between OT operator upgrade, and since at the time, the collector pod selector was the same +# as this labels, resulted in selector being modified during reconciliation which caused error due to the selector is immutable. +# Please be aware of that the collector labels are changeable in various ways, so this issue may happen in any operator < v0.52.0 +# which changed the selector to be a static set of labels. +# The fix for this issue including: +# https://github.com/open-telemetry/opentelemetry-operator/issues/840, make the selector be a static set of labels; +# https://github.com/open-telemetry/opentelemetry-operator/issues/1117, delete the old collector to let the operator +# create a new one when the selector changed. +apiVersion: kuttl.dev/v1beta1 +kind: TestSuite +crdDir: ./tests/_build/crds/ +artifactsDir: ./tests/_build/artifacts/ +kindContainers: + - local/opentelemetry-operator:e2e + - ghcr.io/open-telemetry/opentelemetry-operator/opentelemetry-operator:v0.49.0 +commands: + - command: make cert-manager + - command: kubectl apply -f https://github.com/open-telemetry/opentelemetry-operator/releases/download/v0.49.0/opentelemetry-operator.yaml + - command: kubectl rollout status -w deployment/opentelemetry-operator-controller-manager -n opentelemetry-operator-system + - command: sleep 60s +testDirs: + - ./tests/e2e-upgrade/ +timeout: 150 diff --git a/pkg/collector/reconcile/daemonset.go b/pkg/collector/reconcile/daemonset.go index be597cd7e3..a14649c59c 100644 --- a/pkg/collector/reconcile/daemonset.go +++ b/pkg/collector/reconcile/daemonset.go @@ -19,6 +19,7 @@ import ( "fmt" appsv1 "k8s.io/api/apps/v1" + apiequality "k8s.io/apimachinery/pkg/api/equality" k8serrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" @@ -70,6 +71,16 @@ func expectedDaemonSets(ctx context.Context, params Params, expected []appsv1.Da return fmt.Errorf("failed to get: %w", err) } + // Selector is an immutable field, if set, we cannot modify it otherwise we will face reconciliation error. + if !apiequality.Semantic.DeepEqual(desired.Spec.Selector, existing.Spec.Selector) { + params.Log.V(2).Info("Spec.Selector change detected, trying to delete, the new collector daemonset will be created in the next reconcile cycle", "daemonset.name", existing.Name, "daemonset.namespace", existing.Namespace) + + if err := params.Client.Delete(ctx, existing); err != nil { + return fmt.Errorf("failed to request deleting daemonset: %w", err) + } + continue + } + // it exists already, merge the two if the end result isn't identical to the existing one updated := existing.DeepCopy() if updated.Annotations == nil { @@ -89,9 +100,6 @@ func expectedDaemonSets(ctx context.Context, params Params, expected []appsv1.Da updated.ObjectMeta.Labels[k] = v } - // Selector is an immutable field, if set, we cannot modify it otherwise we will face reconciliation error. - updated.Spec.Selector = existing.Spec.Selector.DeepCopy() - patch := client.MergeFrom(existing) if err := params.Client.Patch(ctx, updated, patch); err != nil { return fmt.Errorf("failed to apply changes: %w", err) diff --git a/pkg/collector/reconcile/daemonset_test.go b/pkg/collector/reconcile/daemonset_test.go index 8a4751e8b6..02d8d0c8aa 100644 --- a/pkg/collector/reconcile/daemonset_test.go +++ b/pkg/collector/reconcile/daemonset_test.go @@ -130,4 +130,34 @@ func TestExpectedDaemonsets(t *testing.T) { assert.True(t, exists) }) + + t.Run("change Spec.Selector should recreate daemonset", func(t *testing.T) { + + oldDs := collector.DaemonSet(param.Config, logger, param.Instance) + oldDs.Spec.Selector.MatchLabels["app.kubernetes.io/version"] = "latest" + oldDs.Spec.Template.Labels["app.kubernetes.io/version"] = "latest" + oldDs.Name = "update-ds" + + err := expectedDaemonSets(context.Background(), param, []v1.DaemonSet{oldDs}) + assert.NoError(t, err) + exists, err := populateObjectIfExists(t, &v1.DaemonSet{}, types.NamespacedName{Namespace: "default", Name: oldDs.Name}) + assert.NoError(t, err) + assert.True(t, exists) + + newDs := collector.DaemonSet(param.Config, logger, param.Instance) + newDs.Name = oldDs.Name + err = expectedDaemonSets(context.Background(), param, []v1.DaemonSet{newDs}) + assert.NoError(t, err) + exists, err = populateObjectIfExists(t, &v1.DaemonSet{}, types.NamespacedName{Namespace: "default", Name: oldDs.Name}) + assert.NoError(t, err) + assert.False(t, exists) + + err = expectedDaemonSets(context.Background(), param, []v1.DaemonSet{newDs}) + assert.NoError(t, err) + actual := v1.DaemonSet{} + exists, err = populateObjectIfExists(t, &actual, types.NamespacedName{Namespace: "default", Name: oldDs.Name}) + assert.NoError(t, err) + assert.True(t, exists) + assert.Equal(t, newDs.Spec.Selector.MatchLabels, actual.Spec.Selector.MatchLabels) + }) } diff --git a/pkg/collector/reconcile/deployment.go b/pkg/collector/reconcile/deployment.go index 3cf567ce27..5e4b4bd18d 100644 --- a/pkg/collector/reconcile/deployment.go +++ b/pkg/collector/reconcile/deployment.go @@ -19,6 +19,7 @@ import ( "fmt" appsv1 "k8s.io/api/apps/v1" + apiequality "k8s.io/apimachinery/pkg/api/equality" k8serrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" @@ -76,6 +77,16 @@ func expectedDeployments(ctx context.Context, params Params, expected []appsv1.D return fmt.Errorf("failed to get: %w", err) } + // Selector is an immutable field, if set, we cannot modify it otherwise we will face reconciliation error. + if !apiequality.Semantic.DeepEqual(desired.Spec.Selector, existing.Spec.Selector) { + params.Log.V(2).Info("Spec.Selector change detected, trying to delete, the new collector deployment will be created in the next reconcile cycle ", "deployment.name", existing.Name, "deployment.namespace", existing.Namespace) + + if err := params.Client.Delete(ctx, existing); err != nil { + return fmt.Errorf("failed to delete deployment: %w", err) + } + continue + } + // it exists already, merge the two if the end result isn't identical to the existing one updated := existing.DeepCopy() if updated.Annotations == nil { @@ -95,9 +106,6 @@ func expectedDeployments(ctx context.Context, params Params, expected []appsv1.D updated.ObjectMeta.Labels[k] = v } - // Selector is an immutable field, if set, we cannot modify it otherwise we will face reconciliation error. - updated.Spec.Selector = existing.Spec.Selector.DeepCopy() - patch := client.MergeFrom(existing) if err := params.Client.Patch(ctx, updated, patch); err != nil { diff --git a/pkg/collector/reconcile/deployment_test.go b/pkg/collector/reconcile/deployment_test.go index d5d794d889..ab146b469a 100644 --- a/pkg/collector/reconcile/deployment_test.go +++ b/pkg/collector/reconcile/deployment_test.go @@ -342,6 +342,36 @@ func TestExpectedDeployments(t *testing.T) { assert.True(t, exists) }) + + t.Run("change Spec.Selector should recreate deployment", func(t *testing.T) { + + oldDeploy := collector.Deployment(param.Config, logger, param.Instance) + oldDeploy.Spec.Selector.MatchLabels["app.kubernetes.io/version"] = "latest" + oldDeploy.Spec.Template.Labels["app.kubernetes.io/version"] = "latest" + oldDeploy.Name = "update-deploy" + + err := expectedDeployments(context.Background(), param, []v1.Deployment{oldDeploy}) + assert.NoError(t, err) + exists, err := populateObjectIfExists(t, &v1.Deployment{}, types.NamespacedName{Namespace: "default", Name: oldDeploy.Name}) + assert.NoError(t, err) + assert.True(t, exists) + + newDeploy := collector.Deployment(param.Config, logger, param.Instance) + newDeploy.Name = oldDeploy.Name + err = expectedDeployments(context.Background(), param, []v1.Deployment{newDeploy}) + assert.NoError(t, err) + exists, err = populateObjectIfExists(t, &v1.Deployment{}, types.NamespacedName{Namespace: "default", Name: oldDeploy.Name}) + assert.NoError(t, err) + assert.False(t, exists) + + err = expectedDeployments(context.Background(), param, []v1.Deployment{newDeploy}) + assert.NoError(t, err) + actual := v1.Deployment{} + exists, err = populateObjectIfExists(t, &actual, types.NamespacedName{Namespace: "default", Name: oldDeploy.Name}) + assert.NoError(t, err) + assert.True(t, exists) + assert.Equal(t, newDeploy.Spec.Selector.MatchLabels, actual.Spec.Selector.MatchLabels) + }) } func TestCurrentReplicasWithHPA(t *testing.T) { diff --git a/pkg/collector/reconcile/statefulset.go b/pkg/collector/reconcile/statefulset.go index 372b28d3b0..e663734329 100644 --- a/pkg/collector/reconcile/statefulset.go +++ b/pkg/collector/reconcile/statefulset.go @@ -19,6 +19,7 @@ import ( "fmt" appsv1 "k8s.io/api/apps/v1" + apiequality "k8s.io/apimachinery/pkg/api/equality" k8serrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" @@ -71,6 +72,16 @@ func expectedStatefulSets(ctx context.Context, params Params, expected []appsv1. return fmt.Errorf("failed to get: %w", err) } + // Selector is an immutable field, if set, we cannot modify it otherwise we will face reconciliation error. + if !apiequality.Semantic.DeepEqual(desired.Spec.Selector, existing.Spec.Selector) { + params.Log.V(2).Info("Spec.Selector change detected, trying to delete, the new collector statfulset will be created in the next reconcile cycle", "statefulset.name", existing.Name, "statefulset.namespace", existing.Namespace) + + if err := params.Client.Delete(ctx, existing); err != nil { + return fmt.Errorf("failed to delete statefulset: %w", err) + } + continue + } + // it exists already, merge the two if the end result isn't identical to the existing one updated := existing.DeepCopy() if updated.Annotations == nil { @@ -90,9 +101,6 @@ func expectedStatefulSets(ctx context.Context, params Params, expected []appsv1. updated.ObjectMeta.Labels[k] = v } - // Selector is an immutable field, if set, we cannot modify it otherwise we will face reconciliation error. - updated.Spec.Selector = existing.Spec.Selector.DeepCopy() - patch := client.MergeFrom(existing) if err := params.Client.Patch(ctx, updated, patch); err != nil { return fmt.Errorf("failed to apply changes: %w", err) diff --git a/pkg/collector/reconcile/statefulset_test.go b/pkg/collector/reconcile/statefulset_test.go index 4a0de27f27..bf83534295 100644 --- a/pkg/collector/reconcile/statefulset_test.go +++ b/pkg/collector/reconcile/statefulset_test.go @@ -132,4 +132,34 @@ func TestExpectedStatefulsets(t *testing.T) { assert.True(t, exists) }) + + t.Run("change Spec.Selector should recreate statefulset", func(t *testing.T) { + + oldSs := collector.StatefulSet(param.Config, logger, param.Instance) + oldSs.Spec.Selector.MatchLabels["app.kubernetes.io/version"] = "latest" + oldSs.Spec.Template.Labels["app.kubernetes.io/version"] = "latest" + oldSs.Name = "update-ss" + + err := expectedStatefulSets(context.Background(), param, []v1.StatefulSet{oldSs}) + assert.NoError(t, err) + exists, err := populateObjectIfExists(t, &v1.StatefulSet{}, types.NamespacedName{Namespace: "default", Name: oldSs.Name}) + assert.NoError(t, err) + assert.True(t, exists) + + newSs := collector.StatefulSet(param.Config, logger, param.Instance) + newSs.Name = oldSs.Name + err = expectedStatefulSets(context.Background(), param, []v1.StatefulSet{newSs}) + assert.NoError(t, err) + exists, err = populateObjectIfExists(t, &v1.StatefulSet{}, types.NamespacedName{Namespace: "default", Name: oldSs.Name}) + assert.NoError(t, err) + assert.False(t, exists) + + err = expectedStatefulSets(context.Background(), param, []v1.StatefulSet{newSs}) + assert.NoError(t, err) + actual := v1.StatefulSet{} + exists, err = populateObjectIfExists(t, &actual, types.NamespacedName{Namespace: "default", Name: oldSs.Name}) + assert.NoError(t, err) + assert.True(t, exists) + assert.Equal(t, newSs.Spec.Selector.MatchLabels, actual.Spec.Selector.MatchLabels) + }) } diff --git a/tests/e2e-upgrade/upgrade-test/00-assert.yaml b/tests/e2e-upgrade/upgrade-test/00-assert.yaml new file mode 100644 index 0000000000..f6462600fd --- /dev/null +++ b/tests/e2e-upgrade/upgrade-test/00-assert.yaml @@ -0,0 +1,69 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: simplest-collector + annotations: + operatorVersion: "v0.49.0" +spec: + selector: + matchLabels: + app.kubernetes.io/version: latest +status: + readyReplicas: 1 +--- + +apiVersion: v1 +kind: Service +metadata: + name: simplest-collector-headless +spec: + ports: + - appProtocol: grpc + name: jaeger-grpc + port: 14250 + protocol: TCP + targetPort: 14250 + - appProtocol: grpc + name: otlp-grpc + port: 4317 + protocol: TCP + targetPort: 4317 + - appProtocol: http + name: otlp-http + port: 4318 + protocol: TCP + targetPort: 4318 + - appProtocol: http + name: otlp-http-legacy + port: 55681 + protocol: TCP + targetPort: 4318 + +--- + +apiVersion: v1 +kind: Service +metadata: + name: simplest-collector +spec: + ports: + - appProtocol: grpc + name: jaeger-grpc + port: 14250 + protocol: TCP + targetPort: 14250 + - appProtocol: grpc + name: otlp-grpc + port: 4317 + protocol: TCP + targetPort: 4317 + - appProtocol: http + name: otlp-http + port: 4318 + protocol: TCP + targetPort: 4318 + - appProtocol: http + name: otlp-http-legacy + port: 55681 + protocol: TCP + targetPort: 4318 diff --git a/tests/e2e-upgrade/upgrade-test/00-install.yaml b/tests/e2e-upgrade/upgrade-test/00-install.yaml new file mode 100644 index 0000000000..b0e18de3c6 --- /dev/null +++ b/tests/e2e-upgrade/upgrade-test/00-install.yaml @@ -0,0 +1,28 @@ +apiVersion: opentelemetry.io/v1alpha1 +kind: OpenTelemetryCollector +metadata: + name: simplest + annotations: + operatorVersion: "v0.49.0" +spec: + replicas: 1 + config: | + receivers: + jaeger: + protocols: + grpc: + otlp: + protocols: + grpc: + http: + processors: + + exporters: + logging: + + service: + pipelines: + traces: + receivers: [jaeger,otlp] + processors: [] + exporters: [logging] diff --git a/tests/e2e-upgrade/upgrade-test/01-upgrade-operator.yaml b/tests/e2e-upgrade/upgrade-test/01-upgrade-operator.yaml new file mode 100644 index 0000000000..346d4facde --- /dev/null +++ b/tests/e2e-upgrade/upgrade-test/01-upgrade-operator.yaml @@ -0,0 +1,6 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - command: kubectl apply -f ../../_build/manifests/01-opentelemetry-operator.yaml + - command: kubectl rollout status -w deployment/opentelemetry-operator-controller-manager -n opentelemetry-operator-system + - command: sleep 60s diff --git a/tests/e2e-upgrade/upgrade-test/02-assert.yaml b/tests/e2e-upgrade/upgrade-test/02-assert.yaml new file mode 100644 index 0000000000..b40c5a7bb0 --- /dev/null +++ b/tests/e2e-upgrade/upgrade-test/02-assert.yaml @@ -0,0 +1,8 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: simplest-collector + annotations: + operatorVersion: "latest" +status: + readyReplicas: 2 diff --git a/tests/e2e-upgrade/upgrade-test/02-upgrade-collector.yaml b/tests/e2e-upgrade/upgrade-test/02-upgrade-collector.yaml new file mode 100644 index 0000000000..dceb4999cd --- /dev/null +++ b/tests/e2e-upgrade/upgrade-test/02-upgrade-collector.yaml @@ -0,0 +1,28 @@ +apiVersion: opentelemetry.io/v1alpha1 +kind: OpenTelemetryCollector +metadata: + name: simplest + annotations: + operatorVersion: "latest" +spec: + replicas: 2 + config: | + receivers: + jaeger: + protocols: + grpc: + otlp: + protocols: + grpc: + http: + processors: + + exporters: + logging: + + service: + pipelines: + traces: + receivers: [jaeger,otlp] + processors: [] + exporters: [logging]