Skip to content

Commit

Permalink
Online image change: handling of the standby subcluster (#127)
Browse files Browse the repository at this point in the history
This is another PR for online-upgrade. It will handle creation and removal of
the standby subcluster during the online-image change process.

- new state was added to the vapi.Subcluster for this. Originally, I was
  planning to keep most of this in the SubclusterHandle struct, but we already
  pass around vapi.Subcluster so it made it easier to have it their
- new status conditions for offline and online image change. These are intended
  to be used by the operator to know what image change to continue with once an
  image change has started
- filled out more of the logic in onlineimagechange_reconciler.go. It will
  scale-out a new standby subcluster for each primary, then scale them down
  when we are finishing the image change.
- moved more logic into imagechange.go that is common between online and
  offline image change
- restart logic was changed to allow option to restart read-only nodes. When
  restarting for online, we will skip the read-only modes. Offline restarts
  everything.
  • Loading branch information
spilchen authored Dec 16, 2021
1 parent 2233a9f commit c761ce2
Show file tree
Hide file tree
Showing 18 changed files with 797 additions and 202 deletions.
81 changes: 71 additions & 10 deletions api/v1beta1/verticadb_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -499,9 +499,30 @@ type Subcluster struct {
// at least one primary subcluster in the database.
IsPrimary bool `json:"isPrimary"`

// +kubebuilder:validation:Optional
// +operator-sdk:csv:customresourcedefinitions:type=spec,xDescriptors="urn:alm:descriptor:com.tectonic.ui:hidden"
// Internal state that indicates whether this is a standby subcluster for a
// primary. Standby are transient subclusters that are created during an
// online image change.
IsStandby bool `json:"isStandby,omitempty"`

// +kubebuilder:validation:Optional
// +operator-sdk:csv:customresourcedefinitions:type=spec,xDescriptors="urn:alm:descriptor:com.tectonic.ui:hidden"
// If this is a standby subcluster, this is the name of the primary
// subcluster it was created for. This is state internally managed for an
// online image change.
StandbyParent string `json:"standbyParent,omitempty"`

// +kubebuilder:validation:Optional
// +operator-sdk:csv:customresourcedefinitions:type=spec,xDescriptors="urn:alm:descriptor:com.tectonic.ui:hidden"
// This allows a different image to be used for the subcluster than the one
// in VerticaDB. This is intended to be used internally by the online image
// change process.
ImageOverride string `json:"imageOverride,omitempty"`

// +operator-sdk:csv:customresourcedefinitions:type=spec
// A map of label keys and values to restrict Vertica node scheduling to workers
// with matchiing labels.
// with matching labels.
// More info: https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#nodeselector
NodeSelector map[string]string `json:"nodeSelector,omitempty"`

Expand Down Expand Up @@ -619,31 +640,41 @@ const (
AutoRestartVertica VerticaDBConditionType = "AutoRestartVertica"
// DBInitialized indicates the database has been created or revived
DBInitialized VerticaDBConditionType = "DBInitialized"
// ImageChangeInProgress indicates if the vertica server is in the process of having its image change
ImageChangeInProgress VerticaDBConditionType = "ImageChangeInProgress"
// ImageChangeInProgress indicates if the vertica server is in the process
// of having its image change. We have two additional conditions to
// distinguish between online and offline image change.
ImageChangeInProgress VerticaDBConditionType = "ImageChangeInProgress"
OfflineImageChangeInProgress VerticaDBConditionType = "OfflineImageChangeInProgress"
OnlineImageChangeInProgress VerticaDBConditionType = "OnlineImageChangeInProgress"
)

// Fixed index entries for each condition.
const (
AutoRestartVerticaIndex = iota
DBInitializedIndex
ImageChangeInProgressIndex
OfflineImageChangeInProgressIndex
OnlineImageChangeInProgressIndex
)

// VerticaDBConditionIndexMap is a map of the VerticaDBConditionType to its
// index in the condition array
var VerticaDBConditionIndexMap = map[VerticaDBConditionType]int{
AutoRestartVertica: AutoRestartVerticaIndex,
DBInitialized: DBInitializedIndex,
ImageChangeInProgress: ImageChangeInProgressIndex,
AutoRestartVertica: AutoRestartVerticaIndex,
DBInitialized: DBInitializedIndex,
ImageChangeInProgress: ImageChangeInProgressIndex,
OfflineImageChangeInProgress: OfflineImageChangeInProgressIndex,
OnlineImageChangeInProgress: OnlineImageChangeInProgressIndex,
}

// VerticaDBConditionNameMap is the reverse of VerticaDBConditionIndexMap. It
// maps an index to the condition name.
var VerticaDBConditionNameMap = map[int]VerticaDBConditionType{
AutoRestartVerticaIndex: AutoRestartVertica,
DBInitializedIndex: DBInitialized,
ImageChangeInProgressIndex: ImageChangeInProgress,
AutoRestartVerticaIndex: AutoRestartVertica,
DBInitializedIndex: DBInitialized,
ImageChangeInProgressIndex: ImageChangeInProgress,
OfflineImageChangeInProgressIndex: OfflineImageChangeInProgress,
OnlineImageChangeInProgressIndex: OnlineImageChangeInProgress,
}

// VerticaDBCondition defines condition for VerticaDB
Expand Down Expand Up @@ -798,7 +829,7 @@ func MakeVDB() *VerticaDB {
DBName: "db",
ShardCount: 12,
Subclusters: []Subcluster{
{Name: "defaultsubcluster", Size: 3, ServiceType: corev1.ServiceTypeClusterIP},
{Name: "defaultsubcluster", Size: 3, ServiceType: corev1.ServiceTypeClusterIP, IsPrimary: true},
},
},
}
Expand All @@ -814,6 +845,19 @@ func (v *VerticaDB) GenSubclusterMap() map[string]*Subcluster {
return scMap
}

// GenSubclusterStandbyMap will create a map of primary subclusters to their
// standby subcluster. It returns an empty map if there are no standbys.
func (v *VerticaDB) GenSubclusterStandbyMap() map[string]string {
m := map[string]string{}
for i := range v.Spec.Subclusters {
sc := &v.Spec.Subclusters[i]
if sc.IsStandby {
m[sc.StandbyParent] = sc.Name
}
}
return m
}

// IsValidSubclusterName validates the subcluster name is valid. We have rules
// about its name because it is included in the name of the statefulset, so we
// must adhere to the Kubernetes rules for object names.
Expand Down Expand Up @@ -860,3 +904,20 @@ func (v *VerticaDB) GetCommunalPath() string {
func (v *VerticaDB) GetDepotPath() string {
return fmt.Sprintf("%s/%s", v.Spec.Local.DepotPath, v.Spec.DBName)
}

const (
PrimarySubclusterType = "primary"
StandbySubclusterType = "standby"
SecondarySubclusterType = "secondary"
)

// GetType returns the type of the subcluster in string form
func (s *Subcluster) GetType() string {
if s.IsPrimary {
if s.IsStandby {
return StandbySubclusterType
}
return PrimarySubclusterType
}
return SecondarySubclusterType
}
15 changes: 15 additions & 0 deletions api/v1beta1/verticadb_types_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,19 @@ var _ = Describe("verticadb_types", func() {
vdb.Spec.Communal.IncludeUIDInPath = false
Expect(vdb.GetCommunalPath()).ShouldNot(ContainSubstring(string(vdb.ObjectMeta.UID)))
})

It("should generate map of standbys", func() {
vdb := MakeVDB()
vdb.Spec.Subclusters = []Subcluster{
{Name: "sc1", IsPrimary: true},
{Name: "sc1-standby", IsPrimary: false, IsStandby: true, StandbyParent: "sc1"},
{Name: "sc2", IsPrimary: false},
{Name: "sc3", IsPrimary: true},
{Name: "sc3-standby", IsPrimary: false, IsStandby: true, StandbyParent: "sc3"},
}
m := vdb.GenSubclusterStandbyMap()
Expect(m["sc1"]).Should(Equal("sc1-standby"))
Expect(m["sc3"]).Should(Equal("sc3-standby"))
Expect(m["sc2"]).Should(Equal(""))
})
})
52 changes: 40 additions & 12 deletions pkg/controllers/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,15 @@ const SuperuserPasswordPath = "superuser-passwd"

// buildExtSvc creates desired spec for the external service.
func buildExtSvc(nm types.NamespacedName, vdb *vapi.VerticaDB, sc *vapi.Subcluster) *corev1.Service {
scHandle := makeSubclusterHandle(sc)
return &corev1.Service{
ObjectMeta: metav1.ObjectMeta{
Name: nm.Name,
Namespace: nm.Namespace,
Labels: makeLabelsForSvcObject(vdb, scHandle, "external"),
Labels: makeLabelsForSvcObject(vdb, sc, "external"),
Annotations: makeAnnotationsForObject(vdb),
},
Spec: corev1.ServiceSpec{
Selector: makeSvcSelectorLabels(vdb, scHandle),
Selector: makeSvcSelectorLabels(vdb, sc),
Type: sc.ServiceType,
Ports: []corev1.ServicePort{
{Port: 5433, Name: "vertica", NodePort: sc.NodePort},
Expand Down Expand Up @@ -326,7 +325,7 @@ func buildPodSpec(vdb *vapi.VerticaDB, sc *vapi.Subcluster) corev1.PodSpec {
// makeServerContainer builds the spec for the server container
func makeServerContainer(vdb *vapi.VerticaDB, sc *vapi.Subcluster) corev1.Container {
return corev1.Container{
Image: vdb.Spec.Image,
Image: pickImage(vdb, sc),
ImagePullPolicy: vdb.Spec.ImagePullPolicy,
Name: names.ServerContainer,
Resources: sc.Resources,
Expand Down Expand Up @@ -375,6 +374,16 @@ func makeContainers(vdb *vapi.VerticaDB, sc *vapi.Subcluster) []corev1.Container
return cnts
}

// pickImage will pick the correct image for the subcluster to use
func pickImage(vdb *vapi.VerticaDB, sc *vapi.Subcluster) string {
// The ImageOverride exists to allow standby subclusters created for
// primaries to continue to use the old image during an online image change.
if sc.ImageOverride != "" {
return sc.ImageOverride
}
return vdb.Spec.Image
}

// getStorageClassName returns a pointer to the StorageClass
func getStorageClassName(vdb *vapi.VerticaDB) *string {
if vdb.Spec.Local.StorageClass == "" {
Expand All @@ -384,26 +393,26 @@ func getStorageClassName(vdb *vapi.VerticaDB) *string {
}

// buildStsSpec builds manifest for a subclusters statefulset
func buildStsSpec(nm types.NamespacedName, vdb *vapi.VerticaDB, scHandle *SubclusterHandle) *appsv1.StatefulSet {
func buildStsSpec(nm types.NamespacedName, vdb *vapi.VerticaDB, sc *vapi.Subcluster) *appsv1.StatefulSet {
return &appsv1.StatefulSet{
ObjectMeta: metav1.ObjectMeta{
Name: nm.Name,
Namespace: nm.Namespace,
Labels: makeLabelsForObject(vdb, scHandle),
Labels: makeLabelsForObject(vdb, sc),
Annotations: makeAnnotationsForObject(vdb),
},
Spec: appsv1.StatefulSetSpec{
Selector: &metav1.LabelSelector{
MatchLabels: makeSvcSelectorLabels(vdb, scHandle),
MatchLabels: makeSvcSelectorLabels(vdb, sc),
},
ServiceName: names.GenHlSvcName(vdb).Name,
Replicas: &scHandle.Size,
Replicas: &sc.Size,
Template: corev1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Labels: makeLabelsForObject(vdb, scHandle),
Labels: makeLabelsForObject(vdb, sc),
Annotations: makeAnnotationsForObject(vdb),
},
Spec: buildPodSpec(vdb, &scHandle.Subcluster),
Spec: buildPodSpec(vdb, sc),
},
UpdateStrategy: makeUpdateStrategy(vdb),
PodManagementPolicy: appsv1.ParallelPodManagement,
Expand Down Expand Up @@ -431,13 +440,12 @@ func buildStsSpec(nm types.NamespacedName, vdb *vapi.VerticaDB, scHandle *Subclu
// This is only here for testing purposes when we need to construct the pods ourselves. This
// bit is typically handled by the statefulset controller.
func buildPod(vdb *vapi.VerticaDB, sc *vapi.Subcluster, podIndex int32) *corev1.Pod {
scHandle := makeSubclusterHandle(sc)
nm := names.GenPodName(vdb, sc, podIndex)
pod := &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: nm.Name,
Namespace: nm.Namespace,
Labels: makeLabelsForObject(vdb, scHandle),
Labels: makeLabelsForObject(vdb, sc),
Annotations: makeAnnotationsForObject(vdb),
},
Spec: buildPodSpec(vdb, sc),
Expand Down Expand Up @@ -572,3 +580,23 @@ func getK8sAffinity(a vapi.Affinity) *corev1.Affinity {
PodAntiAffinity: a.PodAntiAffinity,
}
}

// buildStandby creates a Standby subcluster based on a primary
func buildStandby(sc *vapi.Subcluster, imageOverride string) *vapi.Subcluster {
return &vapi.Subcluster{
Name: fmt.Sprintf("%s-standby", sc.Name),
Size: 1,
IsStandby: true,
StandbyParent: sc.Name,
ImageOverride: imageOverride,
IsPrimary: false,
NodeSelector: sc.NodeSelector,
Affinity: sc.Affinity,
PriorityClassName: sc.PriorityClassName,
Tolerations: sc.Tolerations,
Resources: sc.Resources,
ServiceType: sc.ServiceType,
NodePort: sc.NodePort,
ExternalIPs: sc.ExternalIPs,
}
}
Loading

0 comments on commit c761ce2

Please sign in to comment.