From 6f3ff65f26778413bd0e482516172023e18d09e9 Mon Sep 17 00:00:00 2001 From: Maciej Zimnoch Date: Fri, 28 Jun 2024 17:01:14 +0200 Subject: [PATCH 1/2] Bump default ScyllaDB version used in E2E's to 6.0.1 --- test/e2e/fixture/scylla/scyllacluster.yaml.tmpl | 2 +- test/e2e/set/scyllacluster/config.go | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/test/e2e/fixture/scylla/scyllacluster.yaml.tmpl b/test/e2e/fixture/scylla/scyllacluster.yaml.tmpl index a07bc51c386..ab246210a79 100644 --- a/test/e2e/fixture/scylla/scyllacluster.yaml.tmpl +++ b/test/e2e/fixture/scylla/scyllacluster.yaml.tmpl @@ -8,7 +8,7 @@ metadata: bar: foo spec: agentVersion: 3.3.0 - version: 5.4.3 + version: 6.0.1 developerMode: true exposeOptions: nodeService: diff --git a/test/e2e/set/scyllacluster/config.go b/test/e2e/set/scyllacluster/config.go index 05f30366409..eb5b699b910 100644 --- a/test/e2e/set/scyllacluster/config.go +++ b/test/e2e/set/scyllacluster/config.go @@ -5,10 +5,10 @@ import ( ) const ( - updateFromScyllaVersion = "5.4.0" - updateToScyllaVersion = "5.4.3" - upgradeFromScyllaVersion = "5.2.15" - upgradeToScyllaVersion = "5.4.3" + updateFromScyllaVersion = "6.0.0" + updateToScyllaVersion = "6.0.1" + upgradeFromScyllaVersion = "5.4.7" + upgradeToScyllaVersion = "6.0.1" testTimeout = 45 * time.Minute From cdc3c1f9956247e3993adb17d020f0838e43354b Mon Sep 17 00:00:00 2001 From: Maciej Zimnoch Date: Fri, 28 Jun 2024 17:01:19 +0200 Subject: [PATCH 2/2] Align E2E tests to ScyllaDB 6.0 changes. Scaling E2E was aligned to make sure it doesn't break minimal required quorum on scaling changes. Existing test scaled below keyspace RF which is no longer possible, as Scylla rejects decommision when there's a keyspace having RF higher than node count. Test step checking decommission of drained node was moved earlier to fix the same quorum breakage. Alternator E2E required a table name change from which we are getting password. Table name was renamed in 6.0. Restore E2E was parametrized to make sure we test the procedure for both default ScyllaDB version and 2024.1 where a workaround explained in the documentation is required. --- .../scyllacluster/scyllacluster_alternator.go | 2 +- .../scyllacluster/scyllacluster_scaling.go | 142 ++++++++---------- .../scyllamanager_object_storage.go | 78 +++++++--- 3 files changed, 120 insertions(+), 102 deletions(-) diff --git a/test/e2e/set/scyllacluster/scyllacluster_alternator.go b/test/e2e/set/scyllacluster/scyllacluster_alternator.go index c17461ef61e..96442aa5c51 100644 --- a/test/e2e/set/scyllacluster/scyllacluster_alternator.go +++ b/test/e2e/set/scyllacluster/scyllacluster_alternator.go @@ -128,7 +128,7 @@ authorizer: CassandraAuthorizer } q := cqlSession.Query( - `SELECT salted_hash FROM system_auth.roles WHERE role = ?`, + `SELECT salted_hash FROM system.roles WHERE role = ?`, awsCredentials.AccessKeyID, ).WithContext(ctx) err = q.Scan(&awsCredentials.SecretAccessKey) diff --git a/test/e2e/set/scyllacluster/scyllacluster_scaling.go b/test/e2e/set/scyllacluster/scyllacluster_scaling.go index da39d769cb0..1577702e424 100644 --- a/test/e2e/set/scyllacluster/scyllacluster_scaling.go +++ b/test/e2e/set/scyllacluster/scyllacluster_scaling.go @@ -28,9 +28,9 @@ var _ = g.Describe("ScyllaCluster", func() { defer cancel() sc := f.GetDefaultScyllaCluster() - sc.Spec.Datacenter.Racks[0].Members = 1 + sc.Spec.Datacenter.Racks[0].Members = 3 - framework.By("Creating a ScyllaCluster with 1 member") + framework.By("Creating a ScyllaCluster with 3 members") sc, err := f.ScyllaClient().ScyllaV1().ScyllaClusters(f.Namespace()).Create(ctx, sc, metav1.CreateOptions{}) o.Expect(err).NotTo(o.HaveOccurred()) @@ -45,22 +45,22 @@ var _ = g.Describe("ScyllaCluster", func() { hosts, hostIDs, err := utils.GetBroadcastRPCAddressesAndUUIDs(ctx, f.KubeClient().CoreV1(), sc) o.Expect(err).NotTo(o.HaveOccurred()) - o.Expect(hosts).To(o.HaveLen(1)) - o.Expect(hostIDs).To(o.HaveLen(1)) - diRF1 := insertAndVerifyCQLData(ctx, hosts) - defer diRF1.Close() + o.Expect(hosts).To(o.HaveLen(3)) + o.Expect(hostIDs).To(o.HaveLen(3)) + diRF3 := insertAndVerifyCQLData(ctx, hosts) + defer diRF3.Close() - framework.By("Scaling the ScyllaCluster to 3 replicas") + framework.By("Scaling the ScyllaCluster to 5 replicas") sc, err = f.ScyllaClient().ScyllaV1().ScyllaClusters(f.Namespace()).Patch( ctx, sc.Name, types.JSONPatchType, - []byte(`[{"op": "replace", "path": "/spec/datacenter/racks/0/members", "value": 3}]`), + []byte(`[{"op": "replace", "path": "/spec/datacenter/racks/0/members", "value": 5}]`), metav1.PatchOptions{}, ) o.Expect(err).NotTo(o.HaveOccurred()) o.Expect(sc.Spec.Datacenter.Racks).To(o.HaveLen(1)) - o.Expect(sc.Spec.Datacenter.Racks[0].Members).To(o.BeEquivalentTo(3)) + o.Expect(sc.Spec.Datacenter.Racks[0].Members).To(o.BeEquivalentTo(5)) framework.By("Waiting for the ScyllaCluster to roll out (RV=%s)", sc.ResourceVersion) waitCtx2, waitCtx2Cancel := utils.ContextForRollout(ctx, sc) @@ -75,63 +75,17 @@ var _ = g.Describe("ScyllaCluster", func() { oldHostIDs := hostIDs hosts, hostIDs, err = utils.GetBroadcastRPCAddressesAndUUIDs(ctx, f.KubeClient().CoreV1(), sc) o.Expect(err).NotTo(o.HaveOccurred()) - o.Expect(oldHosts).To(o.HaveLen(1)) - o.Expect(oldHostIDs).To(o.HaveLen(1)) - o.Expect(hosts).To(o.HaveLen(3)) - o.Expect(hostIDs).To(o.HaveLen(3)) - o.Expect(hostIDs).To(o.ContainElements(oldHostIDs)) - - verifyCQLData(ctx, diRF1) - - // Statistically, some data should land on the 3rd node that will give us a chance to ensure - // it was stream correctly when downscaling. - diRF2 := insertAndVerifyCQLData(ctx, hosts[0:2]) - defer diRF2.Close() - - diRF3 := insertAndVerifyCQLData(ctx, hosts) - defer diRF3.Close() - - framework.By("Scaling the ScyllaCluster down to 2 replicas") - sc, err = f.ScyllaClient().ScyllaV1().ScyllaClusters(sc.Namespace).Patch( - ctx, - sc.Name, - types.JSONPatchType, - []byte(`[{"op": "replace", "path": "/spec/datacenter/racks/0/members", "value": 2}]`), - metav1.PatchOptions{}, - ) - o.Expect(err).NotTo(o.HaveOccurred()) - o.Expect(sc.Spec.Datacenter.Racks[0].Members).To(o.BeEquivalentTo(2)) - - framework.By("Waiting for the ScyllaCluster to roll out (RV=%s)", sc.ResourceVersion) - waitCtx3, waitCtx3Cancel := utils.ContextForRollout(ctx, sc) - defer waitCtx3Cancel() - sc, err = controllerhelpers.WaitForScyllaClusterState(waitCtx3, f.ScyllaClient().ScyllaV1().ScyllaClusters(sc.Namespace), sc.Name, controllerhelpers.WaitForStateOptions{}, utils.IsScyllaClusterRolledOut) - o.Expect(err).NotTo(o.HaveOccurred()) - - verifyScyllaCluster(ctx, f.KubeClient(), sc) - waitForFullQuorum(ctx, f.KubeClient().CoreV1(), sc) - - oldHosts = hosts - oldHostIDs = hostIDs - hosts, hostIDs, err = utils.GetBroadcastRPCAddressesAndUUIDs(ctx, f.KubeClient().CoreV1(), sc) - o.Expect(err).NotTo(o.HaveOccurred()) o.Expect(oldHosts).To(o.HaveLen(3)) o.Expect(oldHostIDs).To(o.HaveLen(3)) - o.Expect(hosts).To(o.HaveLen(2)) - o.Expect(hostIDs).To(o.HaveLen(2)) - o.Expect(oldHostIDs).To(o.ContainElements(hostIDs)) - - verifyCQLData(ctx, diRF1) + o.Expect(hosts).To(o.HaveLen(5)) + o.Expect(hostIDs).To(o.HaveLen(5)) + o.Expect(hostIDs).To(o.ContainElements(oldHostIDs)) - // The 2 nodes out of 3 we used earlier may not be the ones that got left. Although discovery will still - // make sure the missing one is picked up, let's avoid having a down node in the pool and refresh it. - err = diRF2.SetClientEndpoints(hosts) - o.Expect(err).NotTo(o.HaveOccurred()) - verifyCQLData(ctx, diRF2) + verifyCQLData(ctx, diRF3) - podName := naming.StatefulSetNameForRack(sc.Spec.Datacenter.Racks[0], sc) + "-1" + podName := naming.StatefulSetNameForRack(sc.Spec.Datacenter.Racks[0], sc) + "-4" svcName := podName - framework.By("Marking ScyllaCluster node #2 (%s) for maintenance", podName) + framework.By("Marking ScyllaCluster node #4 (%s) for maintenance", podName) svc := &corev1.Service{ ObjectMeta: metav1.ObjectMeta{ Labels: map[string]string{ @@ -150,7 +104,7 @@ var _ = g.Describe("ScyllaCluster", func() { ) o.Expect(err).NotTo(o.HaveOccurred()) - framework.By("Manually draining ScyllaCluster node #2 (%s)", podName) + framework.By("Manually draining ScyllaCluster node #4 (%s)", podName) ec := &corev1.EphemeralContainer{ TargetContainerName: naming.ScyllaContainerName, EphemeralContainerCommon: corev1.EphemeralContainerCommon{ @@ -168,16 +122,48 @@ var _ = g.Describe("ScyllaCluster", func() { o.Expect(ephemeralContainerState.State.Terminated).NotTo(o.BeNil()) o.Expect(ephemeralContainerState.State.Terminated.ExitCode).To(o.BeEquivalentTo(0)) - framework.By("Scaling the ScyllaCluster down to 1 replicas") + framework.By("Scaling the ScyllaCluster down to 4 replicas") + sc, err = f.ScyllaClient().ScyllaV1().ScyllaClusters(sc.Namespace).Patch( + ctx, + sc.Name, + types.JSONPatchType, + []byte(`[{"op": "replace", "path": "/spec/datacenter/racks/0/members", "value": 4}]`), + metav1.PatchOptions{}, + ) + o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(sc.Spec.Datacenter.Racks[0].Members).To(o.BeEquivalentTo(4)) + + framework.By("Waiting for the ScyllaCluster to roll out (RV=%s)", sc.ResourceVersion) + waitCtx3, waitCtx3Cancel := utils.ContextForRollout(ctx, sc) + defer waitCtx3Cancel() + sc, err = controllerhelpers.WaitForScyllaClusterState(waitCtx3, f.ScyllaClient().ScyllaV1().ScyllaClusters(sc.Namespace), sc.Name, controllerhelpers.WaitForStateOptions{}, utils.IsScyllaClusterRolledOut) + o.Expect(err).NotTo(o.HaveOccurred()) + + verifyScyllaCluster(ctx, f.KubeClient(), sc) + waitForFullQuorum(ctx, f.KubeClient().CoreV1(), sc) + + oldHosts = hosts + oldHostIDs = hostIDs + hosts, hostIDs, err = utils.GetBroadcastRPCAddressesAndUUIDs(ctx, f.KubeClient().CoreV1(), sc) + o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(oldHosts).To(o.HaveLen(5)) + o.Expect(oldHostIDs).To(o.HaveLen(5)) + o.Expect(hosts).To(o.HaveLen(4)) + o.Expect(hostIDs).To(o.HaveLen(4)) + o.Expect(oldHostIDs).To(o.ContainElements(hostIDs)) + + verifyCQLData(ctx, diRF3) + + framework.By("Scaling the ScyllaCluster down to 3 replicas") sc, err = f.ScyllaClient().ScyllaV1().ScyllaClusters(f.Namespace()).Patch( ctx, sc.Name, types.JSONPatchType, - []byte(`[{"op": "replace", "path": "/spec/datacenter/racks/0/members", "value": 1}]`), + []byte(`[{"op": "replace", "path": "/spec/datacenter/racks/0/members", "value": 3}]`), metav1.PatchOptions{}, ) o.Expect(err).NotTo(o.HaveOccurred()) - o.Expect(sc.Spec.Datacenter.Racks[0].Members).To(o.BeEquivalentTo(1)) + o.Expect(sc.Spec.Datacenter.Racks[0].Members).To(o.BeEquivalentTo(3)) framework.By("Waiting for the ScyllaCluster to roll out (RV=%s)", sc.ResourceVersion) waitCtx5, waitCtx5Cancel := utils.ContextForRollout(ctx, sc) @@ -192,24 +178,24 @@ var _ = g.Describe("ScyllaCluster", func() { oldHostIDs = hostIDs hosts, hostIDs, err = utils.GetBroadcastRPCAddressesAndUUIDs(ctx, f.KubeClient().CoreV1(), sc) o.Expect(err).NotTo(o.HaveOccurred()) - o.Expect(oldHosts).To(o.HaveLen(2)) - o.Expect(oldHostIDs).To(o.HaveLen(2)) - o.Expect(hosts).To(o.HaveLen(1)) - o.Expect(hostIDs).To(o.HaveLen(1)) + o.Expect(oldHosts).To(o.HaveLen(4)) + o.Expect(oldHostIDs).To(o.HaveLen(4)) + o.Expect(hosts).To(o.HaveLen(3)) + o.Expect(hostIDs).To(o.HaveLen(3)) o.Expect(oldHostIDs).To(o.ContainElements(hostIDs)) - verifyCQLData(ctx, diRF1) + verifyCQLData(ctx, diRF3) - framework.By("Scaling the ScyllaCluster back to 3 replicas to make sure there isn't an old (decommissioned) storage in place") + framework.By("Scaling the ScyllaCluster back to 5 replicas to make sure there isn't an old (decommissioned) storage in place") sc, err = f.ScyllaClient().ScyllaV1().ScyllaClusters(f.Namespace()).Patch( ctx, sc.Name, types.JSONPatchType, - []byte(`[{"op": "replace", "path": "/spec/datacenter/racks/0/members", "value": 3}]`), + []byte(`[{"op": "replace", "path": "/spec/datacenter/racks/0/members", "value": 5}]`), metav1.PatchOptions{}, ) o.Expect(err).NotTo(o.HaveOccurred()) - o.Expect(sc.Spec.Datacenter.Racks[0].Members).To(o.BeEquivalentTo(3)) + o.Expect(sc.Spec.Datacenter.Racks[0].Members).To(o.BeEquivalentTo(5)) framework.By("Waiting for the ScyllaCluster to roll out (RV=%s)", sc.ResourceVersion) waitCtx6, waitCtx6Cancel := utils.ContextForRollout(ctx, sc) @@ -224,14 +210,12 @@ var _ = g.Describe("ScyllaCluster", func() { oldHostIDs = hostIDs hosts, hostIDs, err = utils.GetBroadcastRPCAddressesAndUUIDs(ctx, f.KubeClient().CoreV1(), sc) o.Expect(err).NotTo(o.HaveOccurred()) - o.Expect(oldHosts).To(o.HaveLen(1)) - o.Expect(oldHostIDs).To(o.HaveLen(1)) - o.Expect(hosts).To(o.HaveLen(3)) - o.Expect(hostIDs).To(o.HaveLen(3)) + o.Expect(oldHosts).To(o.HaveLen(3)) + o.Expect(oldHostIDs).To(o.HaveLen(3)) + o.Expect(hosts).To(o.HaveLen(5)) + o.Expect(hostIDs).To(o.HaveLen(5)) o.Expect(hostIDs).To(o.ContainElements(oldHostIDs)) - verifyCQLData(ctx, diRF1) - verifyCQLData(ctx, diRF2) verifyCQLData(ctx, diRF3) }) }) diff --git a/test/e2e/set/scyllacluster/scyllamanager_object_storage.go b/test/e2e/set/scyllacluster/scyllamanager_object_storage.go index 1ca365e3fd8..30236a6c0c3 100644 --- a/test/e2e/set/scyllacluster/scyllamanager_object_storage.go +++ b/test/e2e/set/scyllacluster/scyllamanager_object_storage.go @@ -32,13 +32,27 @@ var _ = g.Describe("Scylla Manager integration", framework.RequiresObjectStorage f := framework.NewFramework("scyllacluster") - g.It("should register cluster, sync backup tasks and support manual restore procedure", func() { + type entry struct { + scyllaRepository string + scyllaVersion string + preTargetClusterCreateHook func(cluster *scyllav1.ScyllaCluster) + postSchemaRestoreHook func(context.Context, *framework.Framework, *scyllav1.ScyllaCluster) + } + + g.DescribeTable("should register cluster, sync backup tasks and support manual restore procedure", func(e entry) { ctx, cancel := context.WithTimeout(context.Background(), testTimeout) defer cancel() sourceSC := f.GetDefaultScyllaCluster() sourceSC.Spec.Datacenter.Racks[0].Members = 1 + if len(e.scyllaRepository) != 0 { + sourceSC.Spec.Repository = e.scyllaRepository + } + if len(e.scyllaVersion) != 0 { + sourceSC.Spec.Version = e.scyllaVersion + } + objectStorageType := f.GetObjectStorageType() switch objectStorageType { case framework.ObjectStorageTypeGCS: @@ -266,8 +280,12 @@ var _ = g.Describe("Scylla Manager integration", framework.RequiresObjectStorage targetSC := f.GetDefaultScyllaCluster() targetSC.Spec.Datacenter.Racks[0].Members = sourceSC.Spec.Datacenter.Racks[0].Members - // Restoring schema with ScyllaDB OS 5.4.X or ScyllaDB Enterprise 2024.1.X and consistent_cluster_management isn’t supported. - targetSC.Spec.ScyllaArgs = "--consistent-cluster-management=false" + targetSC.Spec.Repository = sourceSC.Spec.Repository + targetSC.Spec.Version = sourceSC.Spec.Version + + if e.preTargetClusterCreateHook != nil { + e.preTargetClusterCreateHook(targetSC) + } switch objectStorageType { case framework.ObjectStorageTypeGCS: @@ -382,24 +400,9 @@ var _ = g.Describe("Scylla Manager integration", framework.RequiresObjectStorage verifyScyllaCluster(ctx, f.KubeClient(), targetSC) waitForFullQuorum(ctx, f.KubeClient().CoreV1(), targetSC) - framework.By("Enabling raft in target cluster") - _, err = f.ScyllaClient().ScyllaV1().ScyllaClusters(f.Namespace()).Patch( - ctx, - targetSC.Name, - types.JSONPatchType, - []byte(`[{"op":"replace","path":"/spec/scyllaArgs","value":"--consistent-cluster-management=true"}]`), - metav1.PatchOptions{}, - ) - o.Expect(err).NotTo(o.HaveOccurred()) - - framework.By("Waiting for the target ScyllaCluster to roll out") - waitCtx10, waitCtx10Cancel := utils.ContextForRollout(ctx, targetSC) - defer waitCtx10Cancel() - targetSC, err = controllerhelpers.WaitForScyllaClusterState(waitCtx10, f.ScyllaClient().ScyllaV1().ScyllaClusters(targetSC.Namespace), targetSC.Name, controllerhelpers.WaitForStateOptions{}, utils.IsScyllaClusterRolledOut) - o.Expect(err).NotTo(o.HaveOccurred()) - - verifyScyllaCluster(ctx, f.KubeClient(), targetSC) - waitForFullQuorum(ctx, f.KubeClient().CoreV1(), targetSC) + if e.postSchemaRestoreHook != nil { + e.postSchemaRestoreHook(ctx, f, targetSC) + } framework.By("Creating a tables restore task") stdout, stderr, err = utils.ExecWithOptions(f.AdminClientConfig(), f.KubeAdminClient().CoreV1(), utils.ExecOptions{ @@ -438,7 +441,38 @@ var _ = g.Describe("Scylla Manager integration", framework.RequiresObjectStorage o.Expect(err).NotTo(o.HaveOccurred()) verifyCQLData(ctx, di) - }) + }, + g.Entry("using default ScyllaDB version", entry{}), + // Restoring schema with ScyllaDB OS 5.4.X or ScyllaDB Enterprise 2024.1.X and consistent_cluster_management isn’t supported. + // This test validates a workaround explained in the docs - https://operator.docs.scylladb.com/stable/nodeoperations/restore.html + g.Entry("using workaround for consistent_cluster_management for ScyllaDB Enterprise 2024.1.X", entry{ + scyllaRepository: "docker.io/scylladb/scylla-enterprise", + scyllaVersion: "2024.1.5", + preTargetClusterCreateHook: func(targetCluster *scyllav1.ScyllaCluster) { + targetCluster.Spec.ScyllaArgs = "--consistent-cluster-management=false" + }, + postSchemaRestoreHook: func(ctx context.Context, f *framework.Framework, targetSC *scyllav1.ScyllaCluster) { + framework.By("Enabling raft in target cluster") + _, err := f.ScyllaClient().ScyllaV1().ScyllaClusters(f.Namespace()).Patch( + ctx, + targetSC.Name, + types.JSONPatchType, + []byte(`[{"op":"replace","path":"/spec/scyllaArgs","value":"--consistent-cluster-management=true"}]`), + metav1.PatchOptions{}, + ) + o.Expect(err).NotTo(o.HaveOccurred()) + + framework.By("Waiting for the target ScyllaCluster to roll out") + waitCtx, waitCtxCancel := utils.ContextForRollout(ctx, targetSC) + defer waitCtxCancel() + targetSC, err = controllerhelpers.WaitForScyllaClusterState(waitCtx, f.ScyllaClient().ScyllaV1().ScyllaClusters(targetSC.Namespace), targetSC.Name, controllerhelpers.WaitForStateOptions{}, utils.IsScyllaClusterRolledOut) + o.Expect(err).NotTo(o.HaveOccurred()) + + verifyScyllaCluster(ctx, f.KubeClient(), targetSC) + waitForFullQuorum(ctx, f.KubeClient().CoreV1(), targetSC) + }, + }), + ) g.It("should discover cluster and sync errors for invalid tasks and invalid updates to existing tasks", func() { ctx, cancel := context.WithTimeout(context.Background(), testTimeout)