diff --git a/api/v1beta1/verticadb_types.go b/api/v1beta1/verticadb_types.go index 87eb2a75a..80a89806a 100644 --- a/api/v1beta1/verticadb_types.go +++ b/api/v1beta1/verticadb_types.go @@ -499,9 +499,30 @@ type Subcluster struct { // at least one primary subcluster in the database. IsPrimary bool `json:"isPrimary"` + // +kubebuilder:validation:Optional + // +operator-sdk:csv:customresourcedefinitions:type=spec,xDescriptors="urn:alm:descriptor:com.tectonic.ui:hidden" + // Internal state that indicates whether this is a standby subcluster for a + // primary. Standby are transient subclusters that are created during an + // online image change. + IsStandby bool `json:"isStandby,omitempty"` + + // +kubebuilder:validation:Optional + // +operator-sdk:csv:customresourcedefinitions:type=spec,xDescriptors="urn:alm:descriptor:com.tectonic.ui:hidden" + // If this is a standby subcluster, this is the name of the primary + // subcluster it was created for. This is state internally managed for an + // online image change. + StandbyParent string `json:"standbyParent,omitempty"` + + // +kubebuilder:validation:Optional + // +operator-sdk:csv:customresourcedefinitions:type=spec,xDescriptors="urn:alm:descriptor:com.tectonic.ui:hidden" + // This allows a different image to be used for the subcluster than the one + // in VerticaDB. This is intended to be used internally by the online image + // change process. + ImageOverride string `json:"imageOverride,omitempty"` + // +operator-sdk:csv:customresourcedefinitions:type=spec // A map of label keys and values to restrict Vertica node scheduling to workers - // with matchiing labels. + // with matching labels. // More info: https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#nodeselector NodeSelector map[string]string `json:"nodeSelector,omitempty"` @@ -619,8 +640,12 @@ const ( AutoRestartVertica VerticaDBConditionType = "AutoRestartVertica" // DBInitialized indicates the database has been created or revived DBInitialized VerticaDBConditionType = "DBInitialized" - // ImageChangeInProgress indicates if the vertica server is in the process of having its image change - ImageChangeInProgress VerticaDBConditionType = "ImageChangeInProgress" + // ImageChangeInProgress indicates if the vertica server is in the process + // of having its image change. We have two additional conditions to + // distinguish between online and offline image change. + ImageChangeInProgress VerticaDBConditionType = "ImageChangeInProgress" + OfflineImageChangeInProgress VerticaDBConditionType = "OfflineImageChangeInProgress" + OnlineImageChangeInProgress VerticaDBConditionType = "OnlineImageChangeInProgress" ) // Fixed index entries for each condition. @@ -628,22 +653,28 @@ const ( AutoRestartVerticaIndex = iota DBInitializedIndex ImageChangeInProgressIndex + OfflineImageChangeInProgressIndex + OnlineImageChangeInProgressIndex ) // VerticaDBConditionIndexMap is a map of the VerticaDBConditionType to its // index in the condition array var VerticaDBConditionIndexMap = map[VerticaDBConditionType]int{ - AutoRestartVertica: AutoRestartVerticaIndex, - DBInitialized: DBInitializedIndex, - ImageChangeInProgress: ImageChangeInProgressIndex, + AutoRestartVertica: AutoRestartVerticaIndex, + DBInitialized: DBInitializedIndex, + ImageChangeInProgress: ImageChangeInProgressIndex, + OfflineImageChangeInProgress: OfflineImageChangeInProgressIndex, + OnlineImageChangeInProgress: OnlineImageChangeInProgressIndex, } // VerticaDBConditionNameMap is the reverse of VerticaDBConditionIndexMap. It // maps an index to the condition name. var VerticaDBConditionNameMap = map[int]VerticaDBConditionType{ - AutoRestartVerticaIndex: AutoRestartVertica, - DBInitializedIndex: DBInitialized, - ImageChangeInProgressIndex: ImageChangeInProgress, + AutoRestartVerticaIndex: AutoRestartVertica, + DBInitializedIndex: DBInitialized, + ImageChangeInProgressIndex: ImageChangeInProgress, + OfflineImageChangeInProgressIndex: OfflineImageChangeInProgress, + OnlineImageChangeInProgressIndex: OnlineImageChangeInProgress, } // VerticaDBCondition defines condition for VerticaDB @@ -798,7 +829,7 @@ func MakeVDB() *VerticaDB { DBName: "db", ShardCount: 12, Subclusters: []Subcluster{ - {Name: "defaultsubcluster", Size: 3, ServiceType: corev1.ServiceTypeClusterIP}, + {Name: "defaultsubcluster", Size: 3, ServiceType: corev1.ServiceTypeClusterIP, IsPrimary: true}, }, }, } @@ -814,6 +845,19 @@ func (v *VerticaDB) GenSubclusterMap() map[string]*Subcluster { return scMap } +// GenSubclusterStandbyMap will create a map of primary subclusters to their +// standby subcluster. It returns an empty map if there are no standbys. +func (v *VerticaDB) GenSubclusterStandbyMap() map[string]string { + m := map[string]string{} + for i := range v.Spec.Subclusters { + sc := &v.Spec.Subclusters[i] + if sc.IsStandby { + m[sc.StandbyParent] = sc.Name + } + } + return m +} + // IsValidSubclusterName validates the subcluster name is valid. We have rules // about its name because it is included in the name of the statefulset, so we // must adhere to the Kubernetes rules for object names. @@ -860,3 +904,20 @@ func (v *VerticaDB) GetCommunalPath() string { func (v *VerticaDB) GetDepotPath() string { return fmt.Sprintf("%s/%s", v.Spec.Local.DepotPath, v.Spec.DBName) } + +const ( + PrimarySubclusterType = "primary" + StandbySubclusterType = "standby" + SecondarySubclusterType = "secondary" +) + +// GetType returns the type of the subcluster in string form +func (s *Subcluster) GetType() string { + if s.IsPrimary { + if s.IsStandby { + return StandbySubclusterType + } + return PrimarySubclusterType + } + return SecondarySubclusterType +} diff --git a/api/v1beta1/verticadb_types_test.go b/api/v1beta1/verticadb_types_test.go index 5e7a7f121..9f58f3416 100644 --- a/api/v1beta1/verticadb_types_test.go +++ b/api/v1beta1/verticadb_types_test.go @@ -36,4 +36,19 @@ var _ = Describe("verticadb_types", func() { vdb.Spec.Communal.IncludeUIDInPath = false Expect(vdb.GetCommunalPath()).ShouldNot(ContainSubstring(string(vdb.ObjectMeta.UID))) }) + + It("should generate map of standbys", func() { + vdb := MakeVDB() + vdb.Spec.Subclusters = []Subcluster{ + {Name: "sc1", IsPrimary: true}, + {Name: "sc1-standby", IsPrimary: false, IsStandby: true, StandbyParent: "sc1"}, + {Name: "sc2", IsPrimary: false}, + {Name: "sc3", IsPrimary: true}, + {Name: "sc3-standby", IsPrimary: false, IsStandby: true, StandbyParent: "sc3"}, + } + m := vdb.GenSubclusterStandbyMap() + Expect(m["sc1"]).Should(Equal("sc1-standby")) + Expect(m["sc3"]).Should(Equal("sc3-standby")) + Expect(m["sc2"]).Should(Equal("")) + }) }) diff --git a/pkg/controllers/builder.go b/pkg/controllers/builder.go index b4f85f6cc..a7011a42a 100644 --- a/pkg/controllers/builder.go +++ b/pkg/controllers/builder.go @@ -32,16 +32,15 @@ const SuperuserPasswordPath = "superuser-passwd" // buildExtSvc creates desired spec for the external service. func buildExtSvc(nm types.NamespacedName, vdb *vapi.VerticaDB, sc *vapi.Subcluster) *corev1.Service { - scHandle := makeSubclusterHandle(sc) return &corev1.Service{ ObjectMeta: metav1.ObjectMeta{ Name: nm.Name, Namespace: nm.Namespace, - Labels: makeLabelsForSvcObject(vdb, scHandle, "external"), + Labels: makeLabelsForSvcObject(vdb, sc, "external"), Annotations: makeAnnotationsForObject(vdb), }, Spec: corev1.ServiceSpec{ - Selector: makeSvcSelectorLabels(vdb, scHandle), + Selector: makeSvcSelectorLabels(vdb, sc), Type: sc.ServiceType, Ports: []corev1.ServicePort{ {Port: 5433, Name: "vertica", NodePort: sc.NodePort}, @@ -326,7 +325,7 @@ func buildPodSpec(vdb *vapi.VerticaDB, sc *vapi.Subcluster) corev1.PodSpec { // makeServerContainer builds the spec for the server container func makeServerContainer(vdb *vapi.VerticaDB, sc *vapi.Subcluster) corev1.Container { return corev1.Container{ - Image: vdb.Spec.Image, + Image: pickImage(vdb, sc), ImagePullPolicy: vdb.Spec.ImagePullPolicy, Name: names.ServerContainer, Resources: sc.Resources, @@ -375,6 +374,16 @@ func makeContainers(vdb *vapi.VerticaDB, sc *vapi.Subcluster) []corev1.Container return cnts } +// pickImage will pick the correct image for the subcluster to use +func pickImage(vdb *vapi.VerticaDB, sc *vapi.Subcluster) string { + // The ImageOverride exists to allow standby subclusters created for + // primaries to continue to use the old image during an online image change. + if sc.ImageOverride != "" { + return sc.ImageOverride + } + return vdb.Spec.Image +} + // getStorageClassName returns a pointer to the StorageClass func getStorageClassName(vdb *vapi.VerticaDB) *string { if vdb.Spec.Local.StorageClass == "" { @@ -384,26 +393,26 @@ func getStorageClassName(vdb *vapi.VerticaDB) *string { } // buildStsSpec builds manifest for a subclusters statefulset -func buildStsSpec(nm types.NamespacedName, vdb *vapi.VerticaDB, scHandle *SubclusterHandle) *appsv1.StatefulSet { +func buildStsSpec(nm types.NamespacedName, vdb *vapi.VerticaDB, sc *vapi.Subcluster) *appsv1.StatefulSet { return &appsv1.StatefulSet{ ObjectMeta: metav1.ObjectMeta{ Name: nm.Name, Namespace: nm.Namespace, - Labels: makeLabelsForObject(vdb, scHandle), + Labels: makeLabelsForObject(vdb, sc), Annotations: makeAnnotationsForObject(vdb), }, Spec: appsv1.StatefulSetSpec{ Selector: &metav1.LabelSelector{ - MatchLabels: makeSvcSelectorLabels(vdb, scHandle), + MatchLabels: makeSvcSelectorLabels(vdb, sc), }, ServiceName: names.GenHlSvcName(vdb).Name, - Replicas: &scHandle.Size, + Replicas: &sc.Size, Template: corev1.PodTemplateSpec{ ObjectMeta: metav1.ObjectMeta{ - Labels: makeLabelsForObject(vdb, scHandle), + Labels: makeLabelsForObject(vdb, sc), Annotations: makeAnnotationsForObject(vdb), }, - Spec: buildPodSpec(vdb, &scHandle.Subcluster), + Spec: buildPodSpec(vdb, sc), }, UpdateStrategy: makeUpdateStrategy(vdb), PodManagementPolicy: appsv1.ParallelPodManagement, @@ -431,13 +440,12 @@ func buildStsSpec(nm types.NamespacedName, vdb *vapi.VerticaDB, scHandle *Subclu // This is only here for testing purposes when we need to construct the pods ourselves. This // bit is typically handled by the statefulset controller. func buildPod(vdb *vapi.VerticaDB, sc *vapi.Subcluster, podIndex int32) *corev1.Pod { - scHandle := makeSubclusterHandle(sc) nm := names.GenPodName(vdb, sc, podIndex) pod := &corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: nm.Name, Namespace: nm.Namespace, - Labels: makeLabelsForObject(vdb, scHandle), + Labels: makeLabelsForObject(vdb, sc), Annotations: makeAnnotationsForObject(vdb), }, Spec: buildPodSpec(vdb, sc), @@ -572,3 +580,23 @@ func getK8sAffinity(a vapi.Affinity) *corev1.Affinity { PodAntiAffinity: a.PodAntiAffinity, } } + +// buildStandby creates a Standby subcluster based on a primary +func buildStandby(sc *vapi.Subcluster, imageOverride string) *vapi.Subcluster { + return &vapi.Subcluster{ + Name: fmt.Sprintf("%s-standby", sc.Name), + Size: 1, + IsStandby: true, + StandbyParent: sc.Name, + ImageOverride: imageOverride, + IsPrimary: false, + NodeSelector: sc.NodeSelector, + Affinity: sc.Affinity, + PriorityClassName: sc.PriorityClassName, + Tolerations: sc.Tolerations, + Resources: sc.Resources, + ServiceType: sc.ServiceType, + NodePort: sc.NodePort, + ExternalIPs: sc.ExternalIPs, + } +} diff --git a/pkg/controllers/imagechange.go b/pkg/controllers/imagechange.go index 73585e809..cb7d09e69 100644 --- a/pkg/controllers/imagechange.go +++ b/pkg/controllers/imagechange.go @@ -25,6 +25,7 @@ import ( "github.com/vertica/vertica-kubernetes/pkg/names" "github.com/vertica/vertica-kubernetes/pkg/status" "github.com/vertica/vertica-kubernetes/pkg/version" + appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" ctrl "sigs.k8s.io/controller-runtime" ) @@ -35,18 +36,21 @@ type ImageChangeManager struct { Log logr.Logger Finder SubclusterFinder ContinuingImageChange bool // true if UpdateInProgress was already set upon entry + StatusCondition vapi.VerticaDBConditionType // Function that will check if the image policy allows for a type of upgrade (offline or online) IsAllowedForImageChangePolicyFunc func(vdb *vapi.VerticaDB) bool } // MakeImageChangeManager will construct a ImageChangeManager object func MakeImageChangeManager(vdbrecon *VerticaDBReconciler, log logr.Logger, vdb *vapi.VerticaDB, + statusCondition vapi.VerticaDBConditionType, isAllowedForImageChangePolicyFunc func(vdb *vapi.VerticaDB) bool) *ImageChangeManager { return &ImageChangeManager{ VRec: vdbrecon, Vdb: vdb, Log: log, Finder: MakeSubclusterFinder(vdbrecon.Client, vdb), + StatusCondition: statusCondition, IsAllowedForImageChangePolicyFunc: isAllowedForImageChangePolicyFunc, } } @@ -75,9 +79,9 @@ func (i *ImageChangeManager) IsImageChangeNeeded(ctx context.Context) (bool, err // is already occurring. func (i *ImageChangeManager) isImageChangeInProgress() (bool, error) { // We first check if the status condition indicates the image change is in progress - inx, ok := vapi.VerticaDBConditionIndexMap[vapi.ImageChangeInProgress] + inx, ok := vapi.VerticaDBConditionIndexMap[i.StatusCondition] if !ok { - return false, fmt.Errorf("verticaDB condition '%s' missing from VerticaDBConditionType", vapi.ImageChangeInProgress) + return false, fmt.Errorf("verticaDB condition '%s' missing from VerticaDBConditionType", i.StatusCondition) } if inx < len(i.Vdb.Status.Conditions) && i.Vdb.Status.Conditions[inx].Status == corev1.ConditionTrue { // Set a flag to indicate that we are continuing an image change. This silences the ImageChangeStarted event. @@ -135,11 +139,19 @@ func (i *ImageChangeManager) finishImageChange(ctx context.Context) (ctrl.Result return ctrl.Result{}, nil } -// toggleImageChangeInProgress is a helper for updating the ImageChangeInProgress condition +// toggleImageChangeInProgress is a helper for updating the +// ImageChangeInProgress condition's. We set the ImageChangeInProgress plus the +// one defined in i.StatusCondition. func (i *ImageChangeManager) toggleImageChangeInProgress(ctx context.Context, newVal corev1.ConditionStatus) error { - return status.UpdateCondition(ctx, i.VRec.Client, i.Vdb, + err := status.UpdateCondition(ctx, i.VRec.Client, i.Vdb, vapi.VerticaDBCondition{Type: vapi.ImageChangeInProgress, Status: newVal}, ) + if err != nil { + return err + } + return status.UpdateCondition(ctx, i.VRec.Client, i.Vdb, + vapi.VerticaDBCondition{Type: i.StatusCondition, Status: newVal}, + ) } // setImageChangeStatus is a helper to set the imageChangeStatus message. @@ -147,6 +159,94 @@ func (i *ImageChangeManager) setImageChangeStatus(ctx context.Context, msg strin return status.UpdateImageChangeStatus(ctx, i.VRec.Client, i.Vdb, msg) } +// updateImageInStatefulSets will change the image in each of the statefulsets. +// Caller can indicate whether primary or secondary types change. +func (i *ImageChangeManager) updateImageInStatefulSets(ctx context.Context, chgPrimary, chgSecondary bool) (int, ctrl.Result, error) { + numStsChanged := 0 // Count to keep track of the nubmer of statefulsets updated + + // We use FindExisting for the finder because we only want to work with sts + // that already exist. This is necessary incase the image change was paired + // with a scaling operation. The pod change due to the scaling operation + // doesn't take affect until after the image change. + stss, err := i.Finder.FindStatefulSets(ctx, FindExisting) + if err != nil { + return numStsChanged, ctrl.Result{}, err + } + for inx := range stss.Items { + sts := &stss.Items[inx] + + if !chgPrimary && sts.Labels[SubclusterTypeLabel] == PrimarySubclusterType { + continue + } + if !chgSecondary && sts.Labels[SubclusterTypeLabel] == SecondarySubclusterType { + continue + } + if sts.Labels[SubclusterTypeLabel] == StandbySubclusterType { + continue + } + + // Skip the statefulset if it already has the proper image. + if sts.Spec.Template.Spec.Containers[names.ServerContainerIndex].Image != i.Vdb.Spec.Image { + i.Log.Info("Updating image in old statefulset", "name", sts.ObjectMeta.Name) + err = i.setImageChangeStatus(ctx, "Rescheduling pods with new image name") + if err != nil { + return numStsChanged, ctrl.Result{}, err + } + sts.Spec.Template.Spec.Containers[names.ServerContainerIndex].Image = i.Vdb.Spec.Image + // We change the update strategy to OnDelete. We don't want the k8s + // sts controller to interphere and do a rolling update after the + // update has completed. We don't explicitly change this back. The + // ObjReconciler will handle it for us. + sts.Spec.UpdateStrategy.Type = appsv1.OnDeleteStatefulSetStrategyType + err = i.VRec.Client.Update(ctx, sts) + if err != nil { + return numStsChanged, ctrl.Result{}, err + } + numStsChanged++ + } + } + return numStsChanged, ctrl.Result{}, nil +} + +// deletePodsRunningOldImage will delete pods that have the old image. It will return the +// number of pods that were deleted. Callers can control whether to delete pods +// just for the primary or primary/secondary. +func (i *ImageChangeManager) deletePodsRunningOldImage(ctx context.Context, delSecondary bool) (int, ctrl.Result, error) { + numPodsDeleted := 0 // Tracks the number of pods that were deleted + + // We use FindExisting for the finder because we only want to work with pods + // that already exist. This is necessary in case the image change was paired + // with a scaling operation. The pod change due to the scaling operation + // doesn't take affect until after the image change. + pods, err := i.Finder.FindPods(ctx, FindExisting) + if err != nil { + return numPodsDeleted, ctrl.Result{}, err + } + for inx := range pods.Items { + pod := &pods.Items[inx] + + // We aren't deleting secondary pods, so we only continue if the pod is + // for a primary + if !delSecondary { + scType, ok := pod.Labels[SubclusterTypeLabel] + if ok && scType != vapi.PrimarySubclusterType { + continue + } + } + + // Skip the pod if it already has the proper image. + if pod.Spec.Containers[names.ServerContainerIndex].Image != i.Vdb.Spec.Image { + i.Log.Info("Deleting pod that had old image", "name", pod.ObjectMeta.Name) + err = i.VRec.Client.Delete(ctx, pod) + if err != nil { + return numPodsDeleted, ctrl.Result{}, err + } + numPodsDeleted++ + } + } + return numPodsDeleted, ctrl.Result{}, nil +} + // onlineImageChangeAllowed returns true if image change must be done online func onlineImageChangeAllowed(vdb *vapi.VerticaDB) bool { if vdb.Spec.ImageChangePolicy == vapi.OfflineImageChange { diff --git a/pkg/controllers/imagechange_test.go b/pkg/controllers/imagechange_test.go index 76f06dec0..bafa76db8 100644 --- a/pkg/controllers/imagechange_test.go +++ b/pkg/controllers/imagechange_test.go @@ -21,7 +21,11 @@ import ( . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" vapi "github.com/vertica/vertica-kubernetes/api/v1beta1" + "github.com/vertica/vertica-kubernetes/pkg/names" "github.com/vertica/vertica-kubernetes/pkg/version" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + ctrl "sigs.k8s.io/controller-runtime" ) var _ = Describe("imagechange", func() { @@ -67,7 +71,81 @@ var _ = Describe("imagechange", func() { createPods(ctx, vdb, AllPodsRunning) defer deletePods(ctx, vdb) - mgr := MakeImageChangeManager(vrec, logger, vdb, func(vdb *vapi.VerticaDB) bool { return true }) + mgr := MakeImageChangeManager(vrec, logger, vdb, vapi.OnlineImageChangeInProgress, + func(vdb *vapi.VerticaDB) bool { return true }) Expect(mgr.IsImageChangeNeeded(ctx)).Should(Equal(false)) }) + + It("should change the image of just the primaries or just secondaries", func() { + const OldImage = "old-image" + const NewImage1 = "new-image-1" + const NewImage2 = "new-image-2" + vdb := vapi.MakeVDB() + vdb.Spec.Image = OldImage + vdb.Spec.Subclusters = []vapi.Subcluster{ + {Name: "sc1", Size: 2, IsPrimary: true}, + {Name: "sc2", Size: 3, IsPrimary: false}, + } + createPods(ctx, vdb, AllPodsRunning) + defer deletePods(ctx, vdb) + vdb.Spec.Image = NewImage1 + createVdb(ctx, vdb) + defer deleteVdb(ctx, vdb) + + mgr := MakeImageChangeManager(vrec, logger, vdb, vapi.OfflineImageChangeInProgress, + func(vdb *vapi.VerticaDB) bool { return true }) + Expect(mgr.IsImageChangeNeeded(ctx)).Should(Equal(true)) + stsChange, res, err := mgr.updateImageInStatefulSets(ctx, true, false) + Expect(err).Should(Succeed()) + Expect(res).Should(Equal(ctrl.Result{})) + Expect(stsChange).Should(Equal(1)) + + sts := &appsv1.StatefulSet{} + Expect(k8sClient.Get(ctx, names.GenStsName(vdb, &vdb.Spec.Subclusters[0]), sts)).Should(Succeed()) + Expect(sts.Spec.Template.Spec.Containers[ServerContainerIndex].Image).Should(Equal(NewImage1)) + Expect(k8sClient.Get(ctx, names.GenStsName(vdb, &vdb.Spec.Subclusters[1]), sts)).Should(Succeed()) + Expect(sts.Spec.Template.Spec.Containers[ServerContainerIndex].Image).Should(Equal(OldImage)) + + vdb.Spec.Image = NewImage2 + Expect(k8sClient.Update(ctx, vdb)).Should(Succeed()) + + stsChange, res, err = mgr.updateImageInStatefulSets(ctx, false, true) + Expect(err).Should(Succeed()) + Expect(res).Should(Equal(ctrl.Result{})) + Expect(stsChange).Should(Equal(1)) + + Expect(k8sClient.Get(ctx, names.GenStsName(vdb, &vdb.Spec.Subclusters[0]), sts)).Should(Succeed()) + Expect(sts.Spec.Template.Spec.Containers[ServerContainerIndex].Image).Should(Equal(NewImage1)) + Expect(k8sClient.Get(ctx, names.GenStsName(vdb, &vdb.Spec.Subclusters[1]), sts)).Should(Succeed()) + Expect(sts.Spec.Template.Spec.Containers[ServerContainerIndex].Image).Should(Equal(NewImage2)) + }) + + It("should delete pods of primaries only", func() { + vdb := vapi.MakeVDB() + vdb.Spec.Subclusters = []vapi.Subcluster{ + {Name: "sc1", Size: 1, IsPrimary: true}, + {Name: "sc2", Size: 1, IsPrimary: false}, + } + createPods(ctx, vdb, AllPodsRunning) + defer deletePods(ctx, vdb) + vdb.Spec.Image = "new-image" // Change image to force pod deletion + + mgr := MakeImageChangeManager(vrec, logger, vdb, vapi.OfflineImageChangeInProgress, + func(vdb *vapi.VerticaDB) bool { return true }) + numPodsDeleted, res, err := mgr.deletePodsRunningOldImage(ctx, false) // pods from primaries only + Expect(err).Should(Succeed()) + Expect(res).Should(Equal(ctrl.Result{})) + Expect(numPodsDeleted).Should(Equal(1)) + + pod := &corev1.Pod{} + Expect(k8sClient.Get(ctx, names.GenPodName(vdb, &vdb.Spec.Subclusters[0], 0), pod)).ShouldNot(Succeed()) + Expect(k8sClient.Get(ctx, names.GenPodName(vdb, &vdb.Spec.Subclusters[1], 0), pod)).Should(Succeed()) + + numPodsDeleted, res, err = mgr.deletePodsRunningOldImage(ctx, true) // pods from secondary and primaries + Expect(err).Should(Succeed()) + Expect(res).Should(Equal(ctrl.Result{})) + Expect(numPodsDeleted).Should(Equal(1)) + + Expect(k8sClient.Get(ctx, names.GenPodName(vdb, &vdb.Spec.Subclusters[1], 0), pod)).ShouldNot(Succeed()) + }) }) diff --git a/pkg/controllers/labels_annotations.go b/pkg/controllers/labels_annotations.go index abbe9f69f..d73ae1054 100644 --- a/pkg/controllers/labels_annotations.go +++ b/pkg/controllers/labels_annotations.go @@ -28,10 +28,10 @@ const ( ) // makeSubclusterLabels returns the labels added for the subcluster -func makeSubclusterLabels(sc *SubclusterHandle) map[string]string { +func makeSubclusterLabels(sc *vapi.Subcluster) map[string]string { return map[string]string{ SubclusterNameLabel: sc.Name, - SubclusterTypeLabel: sc.GetSubclusterType(), + SubclusterTypeLabel: sc.GetType(), } } @@ -48,7 +48,7 @@ func makeOperatorLabels(vdb *vapi.VerticaDB) map[string]string { } // makeCommonLabels returns the labels that are common to all objects. -func makeCommonLabels(vdb *vapi.VerticaDB, sc *SubclusterHandle) map[string]string { +func makeCommonLabels(vdb *vapi.VerticaDB, sc *vapi.Subcluster) map[string]string { labels := makeOperatorLabels(vdb) // Remaining labels are for objects that are subcluster specific @@ -64,7 +64,7 @@ func makeCommonLabels(vdb *vapi.VerticaDB, sc *SubclusterHandle) map[string]stri } // makeLabelsForObjects constructs the labels for a new k8s object -func makeLabelsForObject(vdb *vapi.VerticaDB, sc *SubclusterHandle) map[string]string { +func makeLabelsForObject(vdb *vapi.VerticaDB, sc *vapi.Subcluster) map[string]string { labels := makeCommonLabels(vdb, sc) // Add any custom labels that were in the spec. @@ -76,7 +76,7 @@ func makeLabelsForObject(vdb *vapi.VerticaDB, sc *SubclusterHandle) map[string]s } // makeLabelsForSvcObject will create the set of labels for use with service objects -func makeLabelsForSvcObject(vdb *vapi.VerticaDB, sc *SubclusterHandle, svcType string) map[string]string { +func makeLabelsForSvcObject(vdb *vapi.VerticaDB, sc *vapi.Subcluster, svcType string) map[string]string { labels := makeLabelsForObject(vdb, sc) labels[SvcTypeLabel] = svcType return labels @@ -93,7 +93,7 @@ func makeAnnotationsForObject(vdb *vapi.VerticaDB) map[string]string { } // makeSvcSelectorLabels returns the labels that are used for selectors in service objects. -func makeSvcSelectorLabels(vdb *vapi.VerticaDB, sc *SubclusterHandle) map[string]string { +func makeSvcSelectorLabels(vdb *vapi.VerticaDB, sc *vapi.Subcluster) map[string]string { // The selector will simply use the common labels for all objects. return makeCommonLabels(vdb, sc) } diff --git a/pkg/controllers/obj_reconcile.go b/pkg/controllers/obj_reconcile.go index 0f0c02e37..87a7a7e66 100644 --- a/pkg/controllers/obj_reconcile.go +++ b/pkg/controllers/obj_reconcile.go @@ -147,14 +147,15 @@ func (o *ObjReconciler) checkSecretHasKeys(ctx context.Context, secretType, secr // checkForCreatedSubcluster handles reconciliation of one subcluster that should exist func (o *ObjReconciler) checkForCreatedSubcluster(ctx context.Context, sc *vapi.Subcluster) (ctrl.Result, error) { - if err := o.reconcileExtSvc(ctx, sc); err != nil { - return ctrl.Result{}, err + // Standby's never have their own service objects. They always reuse the + // one we create for the primary. + if !sc.IsStandby { + if err := o.reconcileExtSvc(ctx, sc); err != nil { + return ctrl.Result{}, err + } } - sch := makeSubclusterHandle(sc) - nm := names.GenStsName(o.Vdb, &sch.Subcluster) - _, res, err := o.reconcileSts(ctx, nm, sch) - return res, err + return o.reconcileSts(ctx, sc) } // checkForDeletedSubcluster will remove any objects that were created for @@ -268,27 +269,28 @@ func (o *ObjReconciler) createService(ctx context.Context, svc *corev1.Service, // reconcileSts reconciles the statefulset for a particular subcluster. Returns // true if any create/update was done. -func (o *ObjReconciler) reconcileSts(ctx context.Context, nm types.NamespacedName, sch *SubclusterHandle) (bool, ctrl.Result, error) { +func (o *ObjReconciler) reconcileSts(ctx context.Context, sc *vapi.Subcluster) (ctrl.Result, error) { + nm := names.GenStsName(o.Vdb, sc) curSts := &appsv1.StatefulSet{} - expSts := buildStsSpec(nm, o.Vdb, sch) + expSts := buildStsSpec(nm, o.Vdb, sc) err := o.VRec.Client.Get(ctx, nm, curSts) if err != nil && errors.IsNotFound(err) { o.Log.Info("Creating statefulset", "Name", nm, "Size", expSts.Spec.Replicas, "Image", expSts.Spec.Template.Spec.Containers[0].Image) err = ctrl.SetControllerReference(o.Vdb, expSts, o.VRec.Scheme) if err != nil { - return false, ctrl.Result{}, err + return ctrl.Result{}, err } // Invalidate the pod facts cache since we are creating a new sts o.PFacts.Invalidate() - return true, ctrl.Result{}, o.VRec.Client.Create(ctx, expSts) + return ctrl.Result{}, o.VRec.Client.Create(ctx, expSts) } // We can only remove pods if we have called 'admintools -t db_remove_node' // and done the uninstall. If we haven't yet done that we will requeue the // reconciliation. This will cause us to go through the remove node and // uninstall reconcile actors to properly handle the scale down. - if r, e := o.checkForOrphanAdmintoolsConfEntries(sch.Size, curSts); r.Requeue || e != nil { - return false, r, e + if r, e := o.checkForOrphanAdmintoolsConfEntries(sc.Size, curSts); r.Requeue || e != nil { + return r, e } // To distinguish when this is called as part of the upgrade reconciler, we @@ -307,15 +309,15 @@ func (o *ObjReconciler) reconcileSts(ctx context.Context, nm types.NamespacedNam curSts.DeepCopyInto(origSts) expSts.Spec.DeepCopyInto(&curSts.Spec) if err := o.VRec.Client.Patch(ctx, curSts, patch); err != nil { - return false, ctrl.Result{}, err + return ctrl.Result{}, err } if !reflect.DeepEqual(curSts.Spec, origSts.Spec) { o.Log.Info("Patching statefulset", "Name", expSts.Name, "Image", expSts.Spec.Template.Spec.Containers[0].Image) // Invalidate the pod facts cache since we are about to change the sts o.PFacts.Invalidate() - return true, ctrl.Result{}, nil + return ctrl.Result{}, nil } - return false, ctrl.Result{}, nil + return ctrl.Result{}, nil } // checkForOrphanAdmintoolsConfEntries will check whether it is okay to proceed diff --git a/pkg/controllers/offlineimagechange_reconcile.go b/pkg/controllers/offlineimagechange_reconcile.go index c8b874a5d..40af3d853 100644 --- a/pkg/controllers/offlineimagechange_reconcile.go +++ b/pkg/controllers/offlineimagechange_reconcile.go @@ -25,7 +25,6 @@ import ( "github.com/vertica/vertica-kubernetes/pkg/cmds" "github.com/vertica/vertica-kubernetes/pkg/events" "github.com/vertica/vertica-kubernetes/pkg/names" - appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" ctrl "sigs.k8s.io/controller-runtime" @@ -47,7 +46,7 @@ func MakeOfflineImageChangeReconciler(vdbrecon *VerticaDBReconciler, log logr.Lo vdb *vapi.VerticaDB, prunner cmds.PodRunner, pfacts *PodFacts) ReconcileActor { return &OfflineImageChangeReconciler{VRec: vdbrecon, Log: log, Vdb: vdb, PRunner: prunner, PFacts: pfacts, Finder: MakeSubclusterFinder(vdbrecon.Client, vdb), - Manager: *MakeImageChangeManager(vdbrecon, log, vdb, offlineImageChangeAllowed), + Manager: *MakeImageChangeManager(vdbrecon, log, vdb, vapi.OfflineImageChangeInProgress, offlineImageChangeAllowed), } } @@ -140,37 +139,11 @@ func (o *OfflineImageChangeReconciler) stopCluster(ctx context.Context) (ctrl.Re // Since there will be processing after to delete the pods so that they come up // with the new image. func (o *OfflineImageChangeReconciler) updateImageInStatefulSets(ctx context.Context) (ctrl.Result, error) { - // We use FindExisting for the finder because we only want to work with sts - // that already exist. This is necessary incase the image change was paired - // with a scaling operation. The pod change due to the scaling operation - // doesn't take affect until after the image change. - stss, err := o.Finder.FindStatefulSets(ctx, FindExisting) - if err != nil { - return ctrl.Result{}, err - } - for i := range stss.Items { - sts := &stss.Items[i] - // Skip the statefulset if it already has the proper image. - if sts.Spec.Template.Spec.Containers[names.ServerContainerIndex].Image != o.Vdb.Spec.Image { - o.Log.Info("Updating image in old statefulset", "name", sts.ObjectMeta.Name) - err = o.Manager.setImageChangeStatus(ctx, "Rescheduling pods with new image name") - if err != nil { - return ctrl.Result{}, err - } - sts.Spec.Template.Spec.Containers[names.ServerContainerIndex].Image = o.Vdb.Spec.Image - // We change the update strategy to OnDelete. We don't want the k8s - // sts controller to interphere and do a rolling update after the - // update has completed. We don't explicitly change this back. The - // ObjReconciler will handle it for us. - sts.Spec.UpdateStrategy.Type = appsv1.OnDeleteStatefulSetStrategyType - err = o.VRec.Client.Update(ctx, sts) - if err != nil { - return ctrl.Result{}, err - } - o.PFacts.Invalidate() - } + numStsChanged, res, err := o.Manager.updateImageInStatefulSets(ctx, true, true) + if numStsChanged > 0 { + o.PFacts.Invalidate() } - return ctrl.Result{}, nil + return res, err } // deletePods will delete pods that are running the old image. The assumption @@ -178,27 +151,11 @@ func (o *OfflineImageChangeReconciler) updateImageInStatefulSets(ctx context.Con // the sts is OnDelete. Deleting the pods ensures they get rescheduled with the // new image. func (o *OfflineImageChangeReconciler) deletePods(ctx context.Context) (ctrl.Result, error) { - // We use FindExisting for the finder because we only want to work with pods - // that already exist. This is necessary in case the image change was paired - // with a scaling operation. The pod change due to the scaling operation - // doesn't take affect until after the image change. - pods, err := o.Finder.FindPods(ctx, FindExisting) - if err != nil { - return ctrl.Result{}, err + numPodsDeleted, res, err := o.Manager.deletePodsRunningOldImage(ctx, true) + if numPodsDeleted > 0 { + o.PFacts.Invalidate() } - for i := range pods.Items { - pod := &pods.Items[i] - // Skip the pod if it already has the proper image. - if pod.Spec.Containers[names.ServerContainerIndex].Image != o.Vdb.Spec.Image { - o.Log.Info("Deleting pod that had old image", "name", pod.ObjectMeta.Name) - err = o.VRec.Client.Delete(ctx, pod) - if err != nil { - return ctrl.Result{}, err - } - o.PFacts.Invalidate() - } - } - return ctrl.Result{}, nil + return res, err } // checkForNewPods will check to ensure at least one pod exists with the new image. @@ -235,7 +192,7 @@ func (o *OfflineImageChangeReconciler) restartCluster(ctx context.Context) (ctrl } // The restart reconciler is called after this reconciler. But we call the // restart reconciler here so that we restart while the status condition is set. - r := MakeRestartReconciler(o.VRec, o.Log, o.Vdb, o.PRunner, o.PFacts) + r := MakeRestartReconciler(o.VRec, o.Log, o.Vdb, o.PRunner, o.PFacts, true) return r.Reconcile(ctx, &ctrl.Request{}) } diff --git a/pkg/controllers/offlineimagechange_reconcile_test.go b/pkg/controllers/offlineimagechange_reconcile_test.go index d883522b0..23b35ee3a 100644 --- a/pkg/controllers/offlineimagechange_reconcile_test.go +++ b/pkg/controllers/offlineimagechange_reconcile_test.go @@ -46,7 +46,7 @@ var _ = Describe("offlineimagechange_reconcile", func() { updateVdbToCauseImageChange(ctx, vdb, NewImage) - r, _, _ := createImageChangeReconciler(vdb) + r, _, _ := createOfflineImageChangeReconciler(vdb) Expect(r.Reconcile(ctx, &ctrl.Request{})).Should(Equal(ctrl.Result{Requeue: true})) Expect(k8sClient.Get(ctx, names.GenStsName(vdb, &vdb.Spec.Subclusters[0]), sts)).Should(Succeed()) @@ -62,7 +62,7 @@ var _ = Describe("offlineimagechange_reconcile", func() { updateVdbToCauseImageChange(ctx, vdb, "container1:newimage") - r, fpr, _ := createImageChangeReconciler(vdb) + r, fpr, _ := createOfflineImageChangeReconciler(vdb) Expect(r.Reconcile(ctx, &ctrl.Request{})).Should(Equal(ctrl.Result{Requeue: true})) h := fpr.FindCommands("admintools -t stop_db") Expect(len(h)).Should(Equal(1)) @@ -77,7 +77,7 @@ var _ = Describe("offlineimagechange_reconcile", func() { updateVdbToCauseImageChange(ctx, vdb, "container2:newimage") - r, _, _ := createImageChangeReconciler(vdb) + r, _, _ := createOfflineImageChangeReconciler(vdb) Expect(r.Reconcile(ctx, &ctrl.Request{})).Should(Equal(ctrl.Result{Requeue: true})) // Delete the sts in preparation of recrating everything with the new // image. Pods will come up not running to force a requeue by the @@ -96,7 +96,7 @@ var _ = Describe("offlineimagechange_reconcile", func() { updateVdbToCauseImageChange(ctx, vdb, "container2:newimage") - r, _, _ := createImageChangeReconciler(vdb) + r, _, _ := createOfflineImageChangeReconciler(vdb) Expect(r.Reconcile(ctx, &ctrl.Request{})).Should(Equal(ctrl.Result{Requeue: true})) finder := MakeSubclusterFinder(k8sClient, vdb) @@ -114,7 +114,7 @@ var _ = Describe("offlineimagechange_reconcile", func() { defer deletePods(ctx, vdb) updateVdbToCauseImageChange(ctx, vdb, "container2:newimage") - r, fpr, pfacts := createImageChangeReconciler(vdb) + r, fpr, pfacts := createOfflineImageChangeReconciler(vdb) Expect(pfacts.Collect(ctx, vdb)).Should(Succeed()) pfacts.Detail[names.GenPodName(vdb, &vdb.Spec.Subclusters[0], 0)].upNode = false Expect(r.Reconcile(ctx, &ctrl.Request{})).Should(Equal(ctrl.Result{Requeue: true})) @@ -132,7 +132,7 @@ var _ = Describe("offlineimagechange_reconcile", func() { defer deletePods(ctx, vdb) updateVdbToCauseImageChange(ctx, vdb, "container3:newimage") - r, fpr, pfacts := createImageChangeReconciler(vdb) + r, fpr, pfacts := createOfflineImageChangeReconciler(vdb) Expect(pfacts.Collect(ctx, vdb)).Should(Succeed()) // Fail stop_db so that the reconciler fails @@ -158,8 +158,8 @@ func updateVdbToCauseImageChange(ctx context.Context, vdb *vapi.VerticaDB, newIm ExpectWithOffset(1, k8sClient.Update(ctx, vdb)).Should(Succeed()) } -// createImageChangeReconciler is a helper to run the ImageChangeReconciler. -func createImageChangeReconciler(vdb *vapi.VerticaDB) (*OfflineImageChangeReconciler, *cmds.FakePodRunner, *PodFacts) { +// createOfflineImageChangeReconciler is a helper to run the ImageChangeReconciler. +func createOfflineImageChangeReconciler(vdb *vapi.VerticaDB) (*OfflineImageChangeReconciler, *cmds.FakePodRunner, *PodFacts) { fpr := &cmds.FakePodRunner{Results: cmds.CmdResults{}} pfacts := MakePodFacts(k8sClient, fpr) actor := MakeOfflineImageChangeReconciler(vrec, logger, vdb, fpr, &pfacts) diff --git a/pkg/controllers/onlineimagechange_reconcile_test.go b/pkg/controllers/onlineimagechange_reconcile_test.go new file mode 100644 index 000000000..77f03bbb5 --- /dev/null +++ b/pkg/controllers/onlineimagechange_reconcile_test.go @@ -0,0 +1,114 @@ +/* + (c) Copyright [2021] Micro Focus or one of its affiliates. + Licensed under the Apache License, Version 2.0 (the "License"); + You may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package controllers + +import ( + "context" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + vapi "github.com/vertica/vertica-kubernetes/api/v1beta1" + "github.com/vertica/vertica-kubernetes/pkg/cmds" + "github.com/vertica/vertica-kubernetes/pkg/names" + appsv1 "k8s.io/api/apps/v1" + ctrl "sigs.k8s.io/controller-runtime" + "yunion.io/x/pkg/tristate" +) + +var _ = Describe("onlineimagechange_reconcile", func() { + ctx := context.Background() + const OldImage = "old-image" + const NewImageName = "different-image" + + It("should properly report if primaries don't have matching image in vdb", func() { + vdb := vapi.MakeVDB() + createPods(ctx, vdb, AllPodsRunning) + defer deletePods(ctx, vdb) + + r := createOnlineImageChangeReconciler(vdb) + Expect(r.loadSubclusterState(ctx)).Should(Equal(ctrl.Result{})) + Expect(r.allPrimariesHaveNewImage()).Should(BeTrue()) + vdb.Spec.Image = NewImageName + Expect(r.allPrimariesHaveNewImage()).Should(BeFalse()) + }) + + It("should create and delete standby subclusters", func() { + vdb := vapi.MakeVDB() + scs := []vapi.Subcluster{ + {Name: "sc1-secondary", IsPrimary: false, Size: 5}, + {Name: "sc2-secondary", IsPrimary: false, Size: 1}, + {Name: "sc3-primary", IsPrimary: true, Size: 3}, + } + vdb.Spec.Subclusters = scs + createVdb(ctx, vdb) + defer deleteVdb(ctx, vdb) + createPods(ctx, vdb, AllPodsRunning) + defer deletePods(ctx, vdb) + defer deleteSvcs(ctx, vdb) + vdb.Spec.Image = NewImageName // Trigger an upgrade + + r := createOnlineImageChangeReconciler(vdb) + Expect(r.loadSubclusterState(ctx)).Should(Equal(ctrl.Result{})) + Expect(r.createStandbySts(ctx)).Should(Equal(ctrl.Result{})) + + fetchVdb := &vapi.VerticaDB{} + Expect(k8sClient.Get(ctx, vdb.ExtractNamespacedName(), fetchVdb)) + defer deletePods(ctx, fetchVdb) // Add to defer again for pods in standbys + createPods(ctx, fetchVdb, AllPodsRunning) + + fscs := fetchVdb.Spec.Subclusters + Expect(len(fscs)).Should(Equal(4)) // orig + 1 standbys + Expect(fscs[3].Name).Should(Equal("sc3-primary-standby")) + + Expect(r.loadSubclusterState(ctx)).Should(Equal(ctrl.Result{})) // Collect state again for new pods/sts + + // Override the pod facts so that newly created pod shows up as not + // install and db doesn't exist. This is needed to allow the sts + // deletion to occur. + pn := names.GenPodName(fetchVdb, &fscs[3], 0) + r.PFacts.Detail[pn].isInstalled = tristate.False + r.PFacts.Detail[pn].dbExists = tristate.False + + sts := &appsv1.StatefulSet{} + Expect(k8sClient.Get(ctx, names.GenStsName(vdb, &fscs[3]), sts)).Should(Succeed()) + Expect(r.deleteStandbySts(ctx)).Should(Equal(ctrl.Result{})) + Expect(k8sClient.Get(ctx, names.GenStsName(vdb, &fscs[3]), sts)).ShouldNot(Succeed()) + }) + + It("should be able to figure out what the old image was", func() { + vdb := vapi.MakeVDB() + vdb.Spec.Image = OldImage + createVdb(ctx, vdb) + defer deleteVdb(ctx, vdb) + createPods(ctx, vdb, AllPodsRunning) + defer deletePods(ctx, vdb) + vdb.Spec.Image = NewImageName // Trigger an upgrade + + r := createOnlineImageChangeReconciler(vdb) + Expect(r.loadSubclusterState(ctx)).Should(Equal(ctrl.Result{})) + oldImage, ok := r.fetchOldImage() + Expect(ok).Should(BeTrue()) + Expect(oldImage).Should(Equal(OldImage)) + }) +}) + +// createOnlineImageChangeReconciler is a helper to run the OnlineImageChangeReconciler. +func createOnlineImageChangeReconciler(vdb *vapi.VerticaDB) *OnlineImageChangeReconciler { + fpr := &cmds.FakePodRunner{Results: cmds.CmdResults{}} + pfacts := MakePodFacts(k8sClient, fpr) + actor := MakeOnlineImageChangeReconciler(vrec, logger, vdb, fpr, &pfacts) + return actor.(*OnlineImageChangeReconciler) +} diff --git a/pkg/controllers/onlineimagechange_reconciler.go b/pkg/controllers/onlineimagechange_reconciler.go index af72eecf5..67e477842 100644 --- a/pkg/controllers/onlineimagechange_reconciler.go +++ b/pkg/controllers/onlineimagechange_reconciler.go @@ -17,31 +17,35 @@ package controllers import ( "context" + "fmt" "github.com/go-logr/logr" vapi "github.com/vertica/vertica-kubernetes/api/v1beta1" "github.com/vertica/vertica-kubernetes/pkg/cmds" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/util/retry" ctrl "sigs.k8s.io/controller-runtime" ) // OnlineImageChangeReconciler will handle the process when the vertica image // changes. It does this while keeping the database online. type OnlineImageChangeReconciler struct { - VRec *VerticaDBReconciler - Log logr.Logger - Vdb *vapi.VerticaDB // Vdb is the CRD we are acting on. - PRunner cmds.PodRunner - PFacts *PodFacts - Finder SubclusterFinder - Manager ImageChangeManager + VRec *VerticaDBReconciler + Log logr.Logger + Vdb *vapi.VerticaDB // Vdb is the CRD we are acting on. + PRunner cmds.PodRunner + PFacts *PodFacts + Finder SubclusterFinder + Manager ImageChangeManager + Subclusters []*SubclusterHandle } // MakeOnlineImageChangeReconciler will build an OnlineImageChangeReconciler object func MakeOnlineImageChangeReconciler(vdbrecon *VerticaDBReconciler, log logr.Logger, vdb *vapi.VerticaDB, prunner cmds.PodRunner, pfacts *PodFacts) ReconcileActor { - return &OfflineImageChangeReconciler{VRec: vdbrecon, Log: log, Vdb: vdb, PRunner: prunner, PFacts: pfacts, + return &OnlineImageChangeReconciler{VRec: vdbrecon, Log: log, Vdb: vdb, PRunner: prunner, PFacts: pfacts, Finder: MakeSubclusterFinder(vdbrecon.Client, vdb), - Manager: *MakeImageChangeManager(vdbrecon, log, vdb, onlineImageChangeAllowed), + Manager: *MakeImageChangeManager(vdbrecon, log, vdb, vapi.OnlineImageChangeInProgress, onlineImageChangeAllowed), } } @@ -52,16 +56,17 @@ func (o *OnlineImageChangeReconciler) Reconcile(ctx context.Context, req *ctrl.R return ctrl.Result{}, err } - if err := o.PFacts.Collect(ctx, o.Vdb); err != nil { - return ctrl.Result{}, err - } - // Functions to perform when the image changes. Order matters. funcs := []func(context.Context) (ctrl.Result, error){ // Initiate an image change by setting condition and event recording o.Manager.startImageChange, - // Create a secondary standby subcluster for each primary - o.createStandbySubclusters, + // Load up state that is used for the subsequent steps + o.loadSubclusterState, + // Setup a secondary standby subcluster for each primary + o.createStandbySts, + o.installStandbyNodes, + o.addStandbySubclusters, + o.addStandbyNodes, // Reroute all traffic from primary subclusters to their standby's o.rerouteClientTrafficToStandby, // Drain all connections from the primary subcluster. This waits for @@ -77,8 +82,10 @@ func (o *OnlineImageChangeReconciler) Reconcile(ctx context.Context, req *ctrl.R // Drain all connections from the standby subclusters to prepare them // for being removed. o.drainStandbys, - // Will delete the standby subclusters now that the primaries are back up. - o.deleteStandbySubclusters, + // Will cleanup the standby subclusters now that the primaries are back up. + o.removeStandbySubclusters, + o.uninstallStandbyNodes, + o.deleteStandbySts, // With the primaries back up, we can do a "rolling upgrade" style of // update for the secondary subclusters. o.startRollingUpgradeOfSecondarySubclusters, @@ -94,12 +101,74 @@ func (o *OnlineImageChangeReconciler) Reconcile(ctx context.Context, req *ctrl.R return ctrl.Result{}, nil } -// createStandbySubclusters this will create a secondary subcluster to accept +// loadSubclusterState will load state into the OnlineImageChangeReconciler that +// is used in subsequent steps. +func (o *OnlineImageChangeReconciler) loadSubclusterState(ctx context.Context) (ctrl.Result, error) { + var err error + err = o.PFacts.Collect(ctx, o.Vdb) + if err != nil { + return ctrl.Result{}, err + } + + o.Subclusters, err = o.Finder.FindSubclusterHandles(ctx, FindExisting) + if err != nil { + return ctrl.Result{}, err + } + return ctrl.Result{}, nil +} + +// createStandbySts this will create a secondary subcluster to accept // traffic from the primaries when they are down. These subclusters are scalled // standby and are transient since they only exist for the life of the image // change. -func (o *OnlineImageChangeReconciler) createStandbySubclusters(ctx context.Context) (ctrl.Result, error) { - return ctrl.Result{}, nil +func (o *OnlineImageChangeReconciler) createStandbySts(ctx context.Context) (ctrl.Result, error) { + if o.skipStandbySetup() { + return ctrl.Result{}, nil + } + + if err := o.addStandbysToVdb(ctx); err != nil { + return ctrl.Result{}, err + } + o.Log.Info("Adding standby's", "num subclusters", len(o.Vdb.Spec.Subclusters)) + + actor := MakeObjReconciler(o.VRec, o.Log, o.Vdb, o.PFacts) + o.traceActorReconcile(actor) + return actor.Reconcile(ctx, &ctrl.Request{}) +} + +// installStandbyNodes will ensure we have installed vertica on +// each of the standby nodes. +func (o *OnlineImageChangeReconciler) installStandbyNodes(ctx context.Context) (ctrl.Result, error) { + if o.skipStandbySetup() { + return ctrl.Result{}, nil + } + + actor := MakeInstallReconciler(o.VRec, o.Log, o.Vdb, o.PRunner, o.PFacts) + o.traceActorReconcile(actor) + return actor.Reconcile(ctx, &ctrl.Request{}) +} + +// addStandbySubclusters will register new standby subclusters with Vertica +func (o *OnlineImageChangeReconciler) addStandbySubclusters(ctx context.Context) (ctrl.Result, error) { + if o.skipStandbySetup() { + return ctrl.Result{}, nil + } + + actor := MakeDBAddSubclusterReconciler(o.VRec, o.Log, o.Vdb, o.PRunner, o.PFacts) + o.traceActorReconcile(actor) + return actor.Reconcile(ctx, &ctrl.Request{}) +} + +// addStandbyNodes will ensure nodes on the standby's have been +// added to the cluster. +func (o *OnlineImageChangeReconciler) addStandbyNodes(ctx context.Context) (ctrl.Result, error) { + if o.skipStandbySetup() { + return ctrl.Result{}, nil + } + + actor := MakeDBAddNodeReconciler(o.VRec, o.Log, o.Vdb, o.PRunner, o.PFacts) + o.traceActorReconcile(actor) + return actor.Reconcile(ctx, &ctrl.Request{}) } // rerouteClientTrafficToStandby will update the service objects for each of the @@ -121,12 +190,29 @@ func (o *OnlineImageChangeReconciler) drainPrimaries(ctx context.Context) (ctrl. // mode as all of the pods in the primary will be rescheduled with the new // image. func (o *OnlineImageChangeReconciler) changeImageInPrimaries(ctx context.Context) (ctrl.Result, error) { - return ctrl.Result{}, nil + numStsChanged, res, err := o.Manager.updateImageInStatefulSets(ctx, true, false) + if numStsChanged > 0 { + o.Log.Info("changed image in statefulsets", "num", numStsChanged) + o.PFacts.Invalidate() + } + return res, err } // restartPrimaries will restart all of the pods in the primary subclusters. func (o *OnlineImageChangeReconciler) restartPrimaries(ctx context.Context) (ctrl.Result, error) { - return ctrl.Result{}, nil + numPodsDeleted, res, err := o.Manager.deletePodsRunningOldImage(ctx, false) + if res.Requeue || err != nil { + return res, err + } + if numPodsDeleted > 0 { + o.Log.Info("deleted pods running old image", "num", numPodsDeleted) + o.PFacts.Invalidate() + } + + const DoNotRestartReadOnly = false + actor := MakeRestartReconciler(o.VRec, o.Log, o.Vdb, o.PRunner, o.PFacts, DoNotRestartReadOnly) + o.traceActorReconcile(actor) + return actor.Reconcile(ctx, &ctrl.Request{}) } // rerouteClientTrafficToPrimary will update the service objects of the primary @@ -143,9 +229,30 @@ func (o *OnlineImageChangeReconciler) drainStandbys(ctx context.Context) (ctrl.R return ctrl.Result{}, nil } -// deleteStandbySubclusters will delete any standby subclusters that were created for the image change. -func (o *OnlineImageChangeReconciler) deleteStandbySubclusters(ctx context.Context) (ctrl.Result, error) { - return ctrl.Result{}, nil +// removeStandbySubclusters will drive subcluster removal of any standbys +func (o *OnlineImageChangeReconciler) removeStandbySubclusters(ctx context.Context) (ctrl.Result, error) { + actor := MakeDBRemoveSubclusterReconciler(o.VRec, o.Log, o.Vdb, o.PRunner, o.PFacts) + o.traceActorReconcile(actor) + return actor.Reconcile(ctx, &ctrl.Request{}) +} + +// uninstallStandbyNodes will drive uninstall logic for any +// standby nodes. +func (o *OnlineImageChangeReconciler) uninstallStandbyNodes(ctx context.Context) (ctrl.Result, error) { + actor := MakeUninstallReconciler(o.VRec, o.Log, o.Vdb, o.PRunner, o.PFacts) + o.traceActorReconcile(actor) + return actor.Reconcile(ctx, &ctrl.Request{}) +} + +// deleteStandbySts will delete any standby subclusters that were created for the image change. +func (o *OnlineImageChangeReconciler) deleteStandbySts(ctx context.Context) (ctrl.Result, error) { + if err := o.removeStandbysFromVdb(ctx); err != nil { + return ctrl.Result{}, err + } + + actor := MakeObjReconciler(o.VRec, o.Log, o.Vdb, o.PFacts) + o.traceActorReconcile(actor) + return actor.Reconcile(ctx, &ctrl.Request{}) } // startRollingUpgradeOfSecondarySubclusters will update the image of each of @@ -155,3 +262,106 @@ func (o *OnlineImageChangeReconciler) deleteStandbySubclusters(ctx context.Conte func (o *OnlineImageChangeReconciler) startRollingUpgradeOfSecondarySubclusters(ctx context.Context) (ctrl.Result, error) { return ctrl.Result{}, nil } + +// allPrimariesHaveNewImage returns true if all of the primary subclusters have the new image +func (o *OnlineImageChangeReconciler) allPrimariesHaveNewImage() bool { + for i := range o.Subclusters { + sc := o.Subclusters[i] + if sc.IsPrimary && sc.Image != o.Vdb.Spec.Image { + return false + } + } + return true +} + +// fetchOldImage will return the old image that existed prior to the image +// change process. If we cannot determine the old image, then the bool return +// value returns false. +func (o *OnlineImageChangeReconciler) fetchOldImage() (string, bool) { + for i := range o.Subclusters { + sc := o.Subclusters[i] + if sc.Image != o.Vdb.Spec.Image { + return sc.Image, true + } + } + return "", false +} + +// skipStandbySetup will return true if we can skip creation, install and +// scale-out of the standby subcluster +func (o *OnlineImageChangeReconciler) skipStandbySetup() bool { + // We can skip this entirely if all of the primary subclusters already have + // the new image. This is an indication that we have already created the + // standbys and done the image change. + return o.allPrimariesHaveNewImage() +} + +// addStandbysToVdb will create standby subclusters for each primary. The +// standbys are added to the Vdb struct inplace. +func (o *OnlineImageChangeReconciler) addStandbysToVdb(ctx context.Context) error { + oldImage, ok := o.fetchOldImage() + if !ok { + return fmt.Errorf("could not determine the old image name. "+ + "Only available image is %s", o.Vdb.Spec.Image) + } + + return retry.RetryOnConflict(retry.DefaultBackoff, func() error { + // Always fetch the latest to minimize the chance of getting a conflict error. + nm := types.NamespacedName{Namespace: o.Vdb.Namespace, Name: o.Vdb.Name} + if err := o.VRec.Client.Get(ctx, nm, o.Vdb); err != nil { + return err + } + + // Figure out if any standbys need to be added + standbyMap := o.Vdb.GenSubclusterStandbyMap() + standbys := []vapi.Subcluster{} + for i := range o.Vdb.Spec.Subclusters { + sc := &o.Vdb.Spec.Subclusters[i] + if sc.IsPrimary { + _, ok := standbyMap[sc.Name] + if !ok { + standbys = append(standbys, *buildStandby(sc, oldImage)) + } + } + } + + if len(standbys) > 0 { + if err := o.Manager.setImageChangeStatus(ctx, "Creating standby secondary subclusters"); err != nil { + return err + } + o.Vdb.Spec.Subclusters = append(o.Vdb.Spec.Subclusters, standbys...) + return o.VRec.Client.Update(ctx, o.Vdb) + } + return nil + }) +} + +// removeStandbysFromVdb will delete any standby subclusters that exist. The +// standbys will be removed from the Vdb struct inplace. +func (o *OnlineImageChangeReconciler) removeStandbysFromVdb(ctx context.Context) error { + return retry.RetryOnConflict(retry.DefaultBackoff, func() error { + // Always fetch the latest to minimize the chance of getting a conflict error. + nm := types.NamespacedName{Namespace: o.Vdb.Namespace, Name: o.Vdb.Name} + if err := o.VRec.Client.Get(ctx, nm, o.Vdb); err != nil { + return err + } + + scToKeep := []vapi.Subcluster{} + for i := range o.Vdb.Spec.Subclusters { + sc := &o.Vdb.Spec.Subclusters[i] + if !sc.IsStandby { + scToKeep = append(scToKeep, *sc) + } + } + + if len(scToKeep) != len(o.Vdb.Spec.Subclusters) { + o.Vdb.Spec.Subclusters = scToKeep + return o.VRec.Client.Update(ctx, o.Vdb) + } + return nil + }) +} + +func (o *OnlineImageChangeReconciler) traceActorReconcile(actor ReconcileActor) { + o.Log.Info("starting actor for online image change", "name", fmt.Sprintf("%T", actor)) +} diff --git a/pkg/controllers/podfacts.go b/pkg/controllers/podfacts.go index bf99d2e7d..bd2497afd 100644 --- a/pkg/controllers/podfacts.go +++ b/pkg/controllers/podfacts.go @@ -532,11 +532,13 @@ func (p *PodFacts) findRunningPod() (*PodFact, bool) { // findRestartablePods returns a list of pod facts that can be restarted. // An empty list implies there are no pods that need to be restarted. -// We treat read-only nodes as being restartable because they are in the -// read-only state due to losing of cluster quorum. -func (p *PodFacts) findRestartablePods() []*PodFact { +// We allow read-only nodes to be treated as being restartable because they are +// in the read-only state due to losing of cluster quorum. This is an option +// for online upgrade, which want to keep the read-only up to keep the cluster +// accessible. +func (p *PodFacts) findRestartablePods(restartReadOnly bool) []*PodFact { return p.filterPods(func(v *PodFact) bool { - return (!v.upNode || v.readOnly) && v.dbExists.IsTrue() && v.isPodRunning + return (!v.upNode || (restartReadOnly && v.readOnly)) && v.dbExists.IsTrue() && v.isPodRunning }) } diff --git a/pkg/controllers/restart_reconcile.go b/pkg/controllers/restart_reconcile.go index cda2bf3aa..6557bff96 100644 --- a/pkg/controllers/restart_reconcile.go +++ b/pkg/controllers/restart_reconcile.go @@ -48,18 +48,20 @@ type verticaIPLookup map[string]string // RestartReconciler will ensure each pod has a running vertica process type RestartReconciler struct { - VRec *VerticaDBReconciler - Log logr.Logger - Vdb *vapi.VerticaDB // Vdb is the CRD we are acting on. - PRunner cmds.PodRunner - PFacts *PodFacts - ATPod types.NamespacedName // The pod that we run admintools from + VRec *VerticaDBReconciler + Log logr.Logger + Vdb *vapi.VerticaDB // Vdb is the CRD we are acting on. + PRunner cmds.PodRunner + PFacts *PodFacts + ATPod types.NamespacedName // The pod that we run admintools from + RestartReadOnly bool // Whether to restart nodes that are in read-only mode } // MakeRestartReconciler will build a RestartReconciler object func MakeRestartReconciler(vdbrecon *VerticaDBReconciler, log logr.Logger, - vdb *vapi.VerticaDB, prunner cmds.PodRunner, pfacts *PodFacts) ReconcileActor { - return &RestartReconciler{VRec: vdbrecon, Log: log, Vdb: vdb, PRunner: prunner, PFacts: pfacts} + vdb *vapi.VerticaDB, prunner cmds.PodRunner, pfacts *PodFacts, restartReadOnly bool) ReconcileActor { + return &RestartReconciler{VRec: vdbrecon, Log: log, Vdb: vdb, PRunner: prunner, PFacts: pfacts, + RestartReadOnly: restartReadOnly} } // Reconcile will ensure each pod is UP in the vertica sense. @@ -119,7 +121,7 @@ func (r *RestartReconciler) reconcileCluster(ctx context.Context) (ctrl.Result, // read-only nodes. This is needed before re_ip, as re_ip can only work if // the database isn't running, which would be case if there are read-only // nodes. - downPods := r.PFacts.findRestartablePods() + downPods := r.PFacts.findRestartablePods(r.RestartReadOnly) if res, err := r.killOldProcesses(ctx, downPods); res.Requeue || err != nil { return res, err } @@ -146,7 +148,7 @@ func (r *RestartReconciler) reconcileNodes(ctx context.Context) (ctrl.Result, er // Find any pods that need to be restarted. These only include running pods. // If there is a pod that is not yet running, we leave them off for now. // When it does start running there will be another reconciliation cycle. - downPods := r.PFacts.findRestartablePods() + downPods := r.PFacts.findRestartablePods(r.RestartReadOnly) if len(downPods) > 0 { if ok := r.setATPod(); !ok { r.Log.Info("No pod found to run admintools from. Requeue reconciliation.") diff --git a/pkg/controllers/restart_reconciler_test.go b/pkg/controllers/restart_reconciler_test.go index f7af159f1..72941a303 100644 --- a/pkg/controllers/restart_reconciler_test.go +++ b/pkg/controllers/restart_reconciler_test.go @@ -37,6 +37,10 @@ var _ = Describe("restart_reconciler", func() { const Node1OldIP = "10.10.1.1" const Node2OldIP = "10.10.1.2" const Node3OldIP = "10.10.1.3" + const RestartProcessReadOnly = true + const RestartSkipReadOnly = false + const PodNotReadOnly = false + const PodReadOnly = true It("restartReconciler should not return an error if the sts doesn't exist", func() { vdb := vapi.MakeVDB() @@ -45,7 +49,7 @@ var _ = Describe("restart_reconciler", func() { fpr := &cmds.FakePodRunner{} pfacts := MakePodFacts(k8sClient, fpr) - recon := MakeRestartReconciler(vrec, logger, vdb, fpr, &pfacts) + recon := MakeRestartReconciler(vrec, logger, vdb, fpr, &pfacts, RestartProcessReadOnly) Expect(recon.Reconcile(ctx, &ctrl.Request{})).Should(Equal(ctrl.Result{})) }) @@ -59,13 +63,13 @@ var _ = Describe("restart_reconciler", func() { defer deletePods(ctx, vdb) fpr := &cmds.FakePodRunner{} - pfacts := createPodFactsWithRestartNeeded(ctx, vdb, sc, fpr, []int32{1}) + pfacts := createPodFactsWithRestartNeeded(ctx, vdb, sc, fpr, []int32{1}, PodNotReadOnly) downPod := &corev1.Pod{} downPodNm := names.GenPodName(vdb, sc, 1) Expect(k8sClient.Get(ctx, downPodNm, downPod)).Should(Succeed()) - r := MakeRestartReconciler(vrec, logger, vdb, fpr, pfacts) + r := MakeRestartReconciler(vrec, logger, vdb, fpr, pfacts, RestartProcessReadOnly) Expect(r.Reconcile(ctx, &ctrl.Request{})).Should(Equal(ctrl.Result{})) restartCmd := fpr.FindCommands("restart_node") Expect(len(restartCmd)).Should(Equal(1)) @@ -92,9 +96,9 @@ var _ = Describe("restart_reconciler", func() { } fpr := &cmds.FakePodRunner{} - pfacts := createPodFactsWithRestartNeeded(ctx, vdb, sc, fpr, []int32{1}) + pfacts := createPodFactsWithRestartNeeded(ctx, vdb, sc, fpr, []int32{1}, PodNotReadOnly) - r := MakeRestartReconciler(vrec, logger, vdb, fpr, pfacts) + r := MakeRestartReconciler(vrec, logger, vdb, fpr, pfacts, RestartProcessReadOnly) Expect(k8sClient.Get(ctx, nm, vdb)).Should(Succeed()) Expect(r.Reconcile(ctx, &ctrl.Request{})).Should(Equal(ctrl.Result{})) restartCmd := fpr.FindCommands("restart_node") @@ -121,7 +125,7 @@ var _ = Describe("restart_reconciler", func() { defer deletePods(ctx, vdb) fpr := &cmds.FakePodRunner{Results: make(cmds.CmdResults)} - pfacts := createPodFactsWithRestartNeeded(ctx, vdb, sc, fpr, []int32{1, 4}) + pfacts := createPodFactsWithRestartNeeded(ctx, vdb, sc, fpr, []int32{1, 4}, PodNotReadOnly) // Setup the pod runner to fail the admintools command. atPod := names.GenPodName(vdb, sc, 3) @@ -134,7 +138,7 @@ var _ = Describe("restart_reconciler", func() { }, } - act := MakeRestartReconciler(vrec, logger, vdb, fpr, pfacts) + act := MakeRestartReconciler(vrec, logger, vdb, fpr, pfacts, RestartProcessReadOnly) r := act.(*RestartReconciler) r.ATPod = atPod Expect(r.Reconcile(ctx, &ctrl.Request{})).Should(Equal(ctrl.Result{ @@ -173,9 +177,9 @@ var _ = Describe("restart_reconciler", func() { defer deletePods(ctx, vdb) fpr := &cmds.FakePodRunner{Results: make(cmds.CmdResults)} - pfacts := createPodFactsWithRestartNeeded(ctx, vdb, sc, fpr, []int32{0, 1, 2}) + pfacts := createPodFactsWithRestartNeeded(ctx, vdb, sc, fpr, []int32{0, 1, 2}, PodNotReadOnly) setVerticaNodeNameInPodFacts(vdb, sc, pfacts) - act := MakeRestartReconciler(vrec, logger, vdb, fpr, pfacts) + act := MakeRestartReconciler(vrec, logger, vdb, fpr, pfacts, RestartProcessReadOnly) r := act.(*RestartReconciler) oldIPs := make(verticaIPLookup) oldIPs["node0001"] = Node1OldIP @@ -200,9 +204,9 @@ var _ = Describe("restart_reconciler", func() { defer deletePods(ctx, vdb) fpr := &cmds.FakePodRunner{Results: make(cmds.CmdResults)} - pfacts := createPodFactsWithRestartNeeded(ctx, vdb, sc, fpr, []int32{0, 1}) + pfacts := createPodFactsWithRestartNeeded(ctx, vdb, sc, fpr, []int32{0, 1}, PodNotReadOnly) setVerticaNodeNameInPodFacts(vdb, sc, pfacts) - act := MakeRestartReconciler(vrec, logger, vdb, fpr, pfacts) + act := MakeRestartReconciler(vrec, logger, vdb, fpr, pfacts, RestartProcessReadOnly) r := act.(*RestartReconciler) oldIPs := make(verticaIPLookup) oldIPs["node0001"] = Node1OldIP @@ -219,12 +223,12 @@ var _ = Describe("restart_reconciler", func() { defer deletePods(ctx, vdb) fpr := &cmds.FakePodRunner{Results: make(cmds.CmdResults)} - pfacts := createPodFactsWithRestartNeeded(ctx, vdb, sc, fpr, []int32{0, 1}) + pfacts := createPodFactsWithRestartNeeded(ctx, vdb, sc, fpr, []int32{0, 1}, PodNotReadOnly) setVerticaNodeNameInPodFacts(vdb, sc, pfacts) // Mark one of the pods as uninstalled. This pod won't be included in the map file uninstallPod := names.GenPodName(vdb, sc, 1) pfacts.Detail[uninstallPod].isInstalled = tristate.False - act := MakeRestartReconciler(vrec, logger, vdb, fpr, pfacts) + act := MakeRestartReconciler(vrec, logger, vdb, fpr, pfacts, RestartProcessReadOnly) r := act.(*RestartReconciler) atPod := names.GenPodName(vdb, sc, 0) fpr.Results = cmds.CmdResults{ @@ -252,9 +256,9 @@ var _ = Describe("restart_reconciler", func() { atPod := names.GenPodName(vdb, sc, 0) fpr := &cmds.FakePodRunner{} - pfacts := createPodFactsWithRestartNeeded(ctx, vdb, sc, fpr, []int32{0, 1, 2}) + pfacts := createPodFactsWithRestartNeeded(ctx, vdb, sc, fpr, []int32{0, 1, 2}, PodNotReadOnly) setVerticaNodeNameInPodFacts(vdb, sc, pfacts) - act := MakeRestartReconciler(vrec, logger, vdb, fpr, pfacts) + act := MakeRestartReconciler(vrec, logger, vdb, fpr, pfacts, RestartProcessReadOnly) r := act.(*RestartReconciler) fpr.Results = cmds.CmdResults{ atPod: []cmds.CmdResult{ @@ -282,9 +286,9 @@ var _ = Describe("restart_reconciler", func() { atPod := names.GenPodName(vdb, sc, 0) fpr := &cmds.FakePodRunner{} - pfacts := createPodFactsWithRestartNeeded(ctx, vdb, sc, fpr, []int32{0, 1}) + pfacts := createPodFactsWithRestartNeeded(ctx, vdb, sc, fpr, []int32{0, 1}, PodNotReadOnly) setVerticaNodeNameInPodFacts(vdb, sc, pfacts) - act := MakeRestartReconciler(vrec, logger, vdb, fpr, pfacts) + act := MakeRestartReconciler(vrec, logger, vdb, fpr, pfacts, RestartProcessReadOnly) r := act.(*RestartReconciler) fpr.Results = cmds.CmdResults{ atPod: []cmds.CmdResult{ @@ -309,7 +313,7 @@ var _ = Describe("restart_reconciler", func() { defer deletePods(ctx, vdb) fpr := &cmds.FakePodRunner{Results: make(cmds.CmdResults)} - pfacts := createPodFactsWithRestartNeeded(ctx, vdb, sc, fpr, []int32{0, 1}) + pfacts := createPodFactsWithRestartNeeded(ctx, vdb, sc, fpr, []int32{0, 1}, PodNotReadOnly) setVerticaNodeNameInPodFacts(vdb, sc, pfacts) // Setup the pod runner to grep out admintools.conf @@ -320,7 +324,7 @@ var _ = Describe("restart_reconciler", func() { }, } - act := MakeRestartReconciler(vrec, logger, vdb, fpr, pfacts) + act := MakeRestartReconciler(vrec, logger, vdb, fpr, pfacts, RestartProcessReadOnly) r := act.(*RestartReconciler) r.ATPod = atPod Expect(r.Reconcile(ctx, &ctrl.Request{})).Should(Equal(ctrl.Result{})) @@ -339,7 +343,7 @@ var _ = Describe("restart_reconciler", func() { fpr := &cmds.FakePodRunner{Results: make(cmds.CmdResults)} pfacts := MakePodFacts(k8sClient, fpr) - act := MakeRestartReconciler(vrec, logger, vdb, fpr, &pfacts) + act := MakeRestartReconciler(vrec, logger, vdb, fpr, &pfacts, RestartProcessReadOnly) r := act.(*RestartReconciler) stateMap := r.parseClusterNodeStatus( " Node | Host | State | Version | DB \n" + @@ -375,7 +379,7 @@ var _ = Describe("restart_reconciler", func() { pfacts.Detail[downPodNm].readOnly = true } - r := MakeRestartReconciler(vrec, logger, vdb, fpr, &pfacts) + r := MakeRestartReconciler(vrec, logger, vdb, fpr, &pfacts, RestartProcessReadOnly) Expect(r.Reconcile(ctx, &ctrl.Request{})).Should(Equal(ctrl.Result{})) listCmd := fpr.FindCommands("start_db") Expect(len(listCmd)).Should(Equal(1)) @@ -393,7 +397,7 @@ var _ = Describe("restart_reconciler", func() { fpr := &cmds.FakePodRunner{Results: make(cmds.CmdResults)} const DownPodIndex = 0 - pfacts := createPodFactsWithRestartNeeded(ctx, vdb, sc, fpr, []int32{DownPodIndex}) + pfacts := createPodFactsWithRestartNeeded(ctx, vdb, sc, fpr, []int32{DownPodIndex}, PodNotReadOnly) setVerticaNodeNameInPodFacts(vdb, sc, pfacts) atPod := names.GenPodName(vdb, sc, 3) @@ -406,7 +410,7 @@ var _ = Describe("restart_reconciler", func() { }, } - act := MakeRestartReconciler(vrec, logger, vdb, fpr, pfacts) + act := MakeRestartReconciler(vrec, logger, vdb, fpr, pfacts, RestartProcessReadOnly) r := act.(*RestartReconciler) r.ATPod = atPod Expect(r.Reconcile(ctx, &ctrl.Request{})).Should(Equal(ctrl.Result{Requeue: true})) @@ -424,10 +428,10 @@ var _ = Describe("restart_reconciler", func() { defer deletePods(ctx, vdb) fpr := &cmds.FakePodRunner{Results: make(cmds.CmdResults)} - pfacts := createPodFactsWithRestartNeeded(ctx, vdb, sc, fpr, []int32{0, 1}) + pfacts := createPodFactsWithRestartNeeded(ctx, vdb, sc, fpr, []int32{0, 1}, PodNotReadOnly) atPod := names.GenPodName(vdb, sc, 0) - act := MakeRestartReconciler(vrec, logger, vdb, fpr, pfacts) + act := MakeRestartReconciler(vrec, logger, vdb, fpr, pfacts, RestartProcessReadOnly) r := act.(*RestartReconciler) r.ATPod = atPod Expect(r.restartCluster(ctx)).Should(Equal(ctrl.Result{})) @@ -446,9 +450,9 @@ var _ = Describe("restart_reconciler", func() { defer deletePods(ctx, vdb) fpr := &cmds.FakePodRunner{Results: make(cmds.CmdResults)} - pfacts := createPodFactsWithRestartNeeded(ctx, vdb, sc, fpr, []int32{0}) + pfacts := createPodFactsWithRestartNeeded(ctx, vdb, sc, fpr, []int32{0}, PodNotReadOnly) - act := MakeRestartReconciler(vrec, logger, vdb, fpr, pfacts) + act := MakeRestartReconciler(vrec, logger, vdb, fpr, pfacts, RestartProcessReadOnly) r := act.(*RestartReconciler) Expect(r.reconcileNodes(ctx)).Should(Equal(ctrl.Result{})) restart := fpr.FindCommands("/opt/vertica/bin/admintools", "-t", "restart_node") @@ -484,7 +488,7 @@ var _ = Describe("restart_reconciler", func() { }, } - act := MakeRestartReconciler(vrec, logger, vdb, fpr, pfacts) + act := MakeRestartReconciler(vrec, logger, vdb, fpr, pfacts, RestartProcessReadOnly) r := act.(*RestartReconciler) r.ATPod = atPod Expect(r.Reconcile(ctx, &ctrl.Request{})).Should(Equal(ctrl.Result{})) @@ -508,9 +512,33 @@ var _ = Describe("restart_reconciler", func() { fpr := &cmds.FakePodRunner{Results: make(cmds.CmdResults)} const DownPodIndex = 1 - pfacts := createPodFactsWithRestartNeeded(ctx, vdb, sc, fpr, []int32{DownPodIndex}) + pfacts := createPodFactsWithRestartNeeded(ctx, vdb, sc, fpr, []int32{DownPodIndex}, PodNotReadOnly) - r := MakeRestartReconciler(vrec, logger, vdb, fpr, pfacts) + r := MakeRestartReconciler(vrec, logger, vdb, fpr, pfacts, RestartProcessReadOnly) Expect(r.Reconcile(ctx, &ctrl.Request{})).Should(Equal(ctrl.Result{Requeue: true})) }) + + It("should avoid restart_node of read-only nodes when that setting is used", func() { + vdb := vapi.MakeVDB() + vdb.Spec.Subclusters[0].Size = 2 + createVdb(ctx, vdb) + defer deleteVdb(ctx, vdb) + sc := &vdb.Spec.Subclusters[0] + createPods(ctx, vdb, AllPodsRunning) + defer deletePods(ctx, vdb) + + fpr := &cmds.FakePodRunner{Results: make(cmds.CmdResults)} + const DownPodIndex = 0 + pfacts := createPodFactsWithRestartNeeded(ctx, vdb, sc, fpr, []int32{DownPodIndex}, PodReadOnly) + + act := MakeRestartReconciler(vrec, logger, vdb, fpr, pfacts, RestartSkipReadOnly) + Expect(act.Reconcile(ctx, &ctrl.Request{})).Should(Equal(ctrl.Result{})) + restart := fpr.FindCommands("/opt/vertica/bin/admintools", "-t", "restart_node") + Expect(len(restart)).Should(Equal(0)) + + act = MakeRestartReconciler(vrec, logger, vdb, fpr, pfacts, RestartProcessReadOnly) + Expect(act.Reconcile(ctx, &ctrl.Request{})).Should(Equal(ctrl.Result{})) + restart = fpr.FindCommands("/opt/vertica/bin/admintools", "-t", "restart_node") + Expect(len(restart)).Should(Equal(1)) + }) }) diff --git a/pkg/controllers/subcluster_handle.go b/pkg/controllers/subcluster_handle.go index b38dd7608..cfeaf3be3 100644 --- a/pkg/controllers/subcluster_handle.go +++ b/pkg/controllers/subcluster_handle.go @@ -28,10 +28,6 @@ import ( type SubclusterHandle struct { vapi.Subcluster - // Indicates whether this subcluster is a transient standby that is created - // for online upgrade. - IsStandby bool - // The name of the image that is currently being run in this subcluster. If // the corresponding sts doesn't exist, then this will be left blank. Image string @@ -66,15 +62,6 @@ func (s *SubclusterHandle) SetIsAcceptingTraffic(svcLabels map[string]string) er return nil } -// makeSubclusterHandle will form a SubclusterHandle from a Subcluster object -// found in the VerticaDB -func makeSubclusterHandle(sc *vapi.Subcluster) *SubclusterHandle { - return &SubclusterHandle{ - Subcluster: *sc, - IsStandby: false, // Assume not a standby since it is from VerticaDB - } -} - // makeSubclusterHandleFromSts will form a SubclusterHandle from a StatefulSet // object. func makeSubclusterHandleFromSts(sts *appsv1.StatefulSet, svcMap map[string]corev1.Service) *SubclusterHandle { diff --git a/pkg/controllers/suite_test.go b/pkg/controllers/suite_test.go index 405bbf976..bc69f7862 100644 --- a/pkg/controllers/suite_test.go +++ b/pkg/controllers/suite_test.go @@ -108,13 +108,18 @@ const ( func createPods(ctx context.Context, vdb *vapi.VerticaDB, podRunningState PodRunningState) { for i := range vdb.Spec.Subclusters { sc := &vdb.Spec.Subclusters[i] - sch := makeSubclusterHandle(sc) - sts := buildStsSpec(names.GenStsName(vdb, sc), vdb, sch) - ExpectWithOffset(1, k8sClient.Create(ctx, sts)).Should(Succeed()) + sts := &appsv1.StatefulSet{} + if err := k8sClient.Get(ctx, names.GenStsName(vdb, sc), sts); kerrors.IsNotFound(err) { + sts = buildStsSpec(names.GenStsName(vdb, sc), vdb, sc) + ExpectWithOffset(1, k8sClient.Create(ctx, sts)).Should(Succeed()) + } for j := int32(0); j < sc.Size; j++ { - pod := buildPod(vdb, sc, j) - ExpectWithOffset(1, k8sClient.Create(ctx, pod)).Should(Succeed()) - setPodStatusHelper(ctx, 2 /* funcOffset */, names.GenPodName(vdb, sc, j), int32(i), j, podRunningState, false) + pod := &corev1.Pod{} + if err := k8sClient.Get(ctx, names.GenPodName(vdb, sc, j), pod); kerrors.IsNotFound(err) { + pod = buildPod(vdb, sc, j) + ExpectWithOffset(1, k8sClient.Create(ctx, pod)).Should(Succeed()) + setPodStatusHelper(ctx, 2 /* funcOffset */, names.GenPodName(vdb, sc, j), int32(i), j, podRunningState, false) + } } // Update the status in the sts to reflect the number of pods we created sts.Status.Replicas = sc.Size @@ -175,8 +180,10 @@ func deletePods(ctx context.Context, vdb *vapi.VerticaDB) { } } sts := &appsv1.StatefulSet{} - ExpectWithOffset(1, k8sClient.Get(ctx, names.GenStsName(vdb, sc), sts)).Should(Succeed()) - ExpectWithOffset(1, k8sClient.Delete(ctx, sts)).Should(Succeed()) + err := k8sClient.Get(ctx, names.GenStsName(vdb, sc), sts) + if !kerrors.IsNotFound(err) { + ExpectWithOffset(1, k8sClient.Delete(ctx, sts)).Should(Succeed()) + } } } @@ -272,12 +279,14 @@ func createPodFactsWithInstallNeeded(ctx context.Context, vdb *vapi.VerticaDB, f } func createPodFactsWithRestartNeeded(ctx context.Context, vdb *vapi.VerticaDB, sc *vapi.Subcluster, - fpr *cmds.FakePodRunner, podsDownByIndex []int32) *PodFacts { + fpr *cmds.FakePodRunner, podsDownByIndex []int32, readOnly bool) *PodFacts { pfacts := MakePodFacts(k8sClient, fpr) ExpectWithOffset(1, pfacts.Collect(ctx, vdb)).Should(Succeed()) for _, podIndex := range podsDownByIndex { downPodNm := names.GenPodName(vdb, sc, podIndex) - pfacts.Detail[downPodNm].upNode = false + // If readOnly is true, pod will be up and running. + pfacts.Detail[downPodNm].upNode = readOnly + pfacts.Detail[downPodNm].readOnly = readOnly } return &pfacts } diff --git a/pkg/controllers/verticadb_controller.go b/pkg/controllers/verticadb_controller.go index d40aa1e8a..1481e4d02 100644 --- a/pkg/controllers/verticadb_controller.go +++ b/pkg/controllers/verticadb_controller.go @@ -109,7 +109,7 @@ func (r *VerticaDBReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( MakeOfflineImageChangeReconciler(r, log, vdb, prunner, &pfacts), MakeOnlineImageChangeReconciler(r, log, vdb, prunner, &pfacts), // Handles restart + re_ip of vertica - MakeRestartReconciler(r, log, vdb, prunner, &pfacts), + MakeRestartReconciler(r, log, vdb, prunner, &pfacts, true), MakeStatusReconciler(r.Client, r.Scheme, log, vdb, &pfacts), // Handles calls to admintools -t db_remove_subcluster MakeDBRemoveSubclusterReconciler(r, log, vdb, prunner, &pfacts), diff --git a/tests/e2e/upgrade-vertica-ks-0/75-assert.yaml b/tests/e2e/upgrade-vertica-ks-0/75-assert.yaml index cb60e8a25..2ee43baf7 100644 --- a/tests/e2e/upgrade-vertica-ks-0/75-assert.yaml +++ b/tests/e2e/upgrade-vertica-ks-0/75-assert.yaml @@ -40,3 +40,5 @@ status: status: "False" - type: ImageChangeInProgress status: "False" + - type: OfflineImageChangeInProgress + status: "False"