Skip to content

Commit

Permalink
enable mcloneset deleting pvc when pod hanging
Browse files Browse the repository at this point in the history
  • Loading branch information
sunshuai09 committed Nov 2, 2022
1 parent a639a6f commit 0b57d3f
Show file tree
Hide file tree
Showing 5 changed files with 127 additions and 5 deletions.
3 changes: 3 additions & 0 deletions apis/apps/v1alpha1/cloneset_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,9 @@ type CloneSetScaleStrategy struct {
// The scale will fail if the number of unavailable pods were greater than this MaxUnavailable at scaling up.
// MaxUnavailable works only when scaling up.
MaxUnavailable *intstr.IntOrString `json:"maxUnavailable,omitempty"`

// Decide if cloneset reuses pvc when building pod.
DisablePVCReuse bool `json:"disablePVCReuse,omitempty"`
}

// CloneSetUpdateStrategy defines strategies for pods update.
Expand Down
3 changes: 3 additions & 0 deletions config/crd/bases/apps.kruise.io_clonesets.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,9 @@ spec:
description: ScaleStrategy indicates the ScaleStrategy that will be
employed to create and delete Pods in the CloneSet.
properties:
disablePVCReuse:
description: Decide if cloneset reuses pvc when building pod.
type: boolean
maxUnavailable:
anyOf:
- type: integer
Expand Down
4 changes: 4 additions & 0 deletions config/crd/bases/apps.kruise.io_uniteddeployments.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -617,6 +617,10 @@ spec:
that will be employed to create and delete Pods in the
CloneSet.
properties:
disablePVCReuse:
description: Decide if cloneset reuses pvc when building
pod.
type: boolean
maxUnavailable:
anyOf:
- type: integer
Expand Down
88 changes: 88 additions & 0 deletions pkg/controller/cloneset/sync/cloneset_scale.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,16 @@ import (
clonesetutils "github.com/openkruise/kruise/pkg/controller/cloneset/utils"
"github.com/openkruise/kruise/pkg/util"
"github.com/openkruise/kruise/pkg/util/expectations"
"github.com/openkruise/kruise/pkg/util/fieldindex"
"github.com/openkruise/kruise/pkg/util/lifecycle"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/rand"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/klog/v2"
kubecontroller "k8s.io/kubernetes/pkg/controller"
"sigs.k8s.io/controller-runtime/pkg/client"
)

const (
Expand All @@ -60,6 +65,21 @@ func (r *realControl) Scale(
return false, nil
}

// If cloneset doesn't want to reuse pvc, clean up
// the existing pvc first. Then it looks like the pod
// is deleted by controller, new pod can be created.
if updateCS.Spec.ScaleStrategy.DisablePVCReuse {
ins := getInstanceIDsFromPods(pods)
usingPVCs, uselessPVCs := classifyPVCs(ins, pvcs)
if len(uselessPVCs) > 0 {
klog.V(3).Infof("Begin to clean up cloneset %s useless PVCs", controllerKey)
if modified, err := r.cleanupPVCs(updateCS, uselessPVCs); err != nil || modified {
return modified, err
}
pvcs = usingPVCs
}
}

// 1. manage pods to delete and in preDelete
podsSpecifiedToDelete, podsInPreDelete, numToDelete := getPlannedDeletedPods(updateCS, pods)
if modified, err := r.managePreparingDelete(updateCS, pods, podsInPreDelete, numToDelete); err != nil || modified {
Expand Down Expand Up @@ -403,3 +423,71 @@ func (r *realControl) choosePodsToDelete(cs *appsv1alpha1.CloneSet, totalDiff in

return podsToDelete
}

func (r *realControl) cleanupPVCs(cs *appsv1alpha1.CloneSet, pvcs []*v1.PersistentVolumeClaim) (bool, error) {
var modified bool
for _, pvc := range pvcs {
// If the pvc has another ownerReference except cloneset, skip it
// and to avoid pvc deleted by mistake cased by instant pod status change,
// query the newest pod status again.
if len(pvc.OwnerReferences) > 1 || !isOwnerPodInactive(r.Client, cs, pvc) {
klog.Errorf("Skip deleting PVC %s", pvc.Name)
continue
}
clonesetutils.ScaleExpectations.ExpectScale(clonesetutils.GetControllerKey(cs), expectations.Delete, pvc.Name)
if err := r.Delete(context.TODO(), pvc); err != nil {
clonesetutils.ScaleExpectations.ObserveScale(clonesetutils.GetControllerKey(cs), expectations.Delete, pvc.Name)
r.recorder.Eventf(cs, v1.EventTypeWarning, "FailedCleanUp", "failed to clean up %s: %v", pvc.Name, err)
return modified, err
}
}
return modified, nil
}

func getInstanceIDsFromPods(pods []*v1.Pod) sets.String {
ins := sets.NewString()
for _, pod := range pods {
ins.Insert(pod.Labels[appsv1alpha1.CloneSetInstanceID])
}
return ins
}

func classifyPVCs(ids sets.String, pvcs []*v1.PersistentVolumeClaim) (using, useless []*v1.PersistentVolumeClaim) {
usingMap := map[types.UID]*v1.PersistentVolumeClaim{}
uselessMap := map[types.UID]*v1.PersistentVolumeClaim{}
for _, pvc := range pvcs {
if ids.Has(pvc.Labels[appsv1alpha1.CloneSetInstanceID]) {
usingMap[pvc.UID] = pvc
} else {
uselessMap[pvc.UID] = pvc
}
}

for _, p := range usingMap {
using = append(using, p)
}
for _, p := range uselessMap {
useless = append(useless, p)
}
return using, useless
}

func isOwnerPodInactive(reader client.Reader, cs *appsv1alpha1.CloneSet, pvc *v1.PersistentVolumeClaim) bool {
opts := &client.ListOptions{
Namespace: cs.Namespace,
FieldSelector: fields.SelectorFromSet(fields.Set{fieldindex.IndexNameForOwnerRefUID: string(cs.UID)}),
}
podList, err := clonesetutils.GetAllPods(reader, opts)
if err != nil {
klog.Errorf("Could not get cloneset %s owned pod", clonesetutils.GetControllerKey(cs))
return false
}

// If pod is inactive or not found which means already deleted, return true.
for _, pod := range podList {
if clonesetutils.IsPVCAndPodRelated(pvc, pod) && kubecontroller.IsPodActive(pod) {
return false
}
}
return true
}
34 changes: 29 additions & 5 deletions pkg/controller/cloneset/utils/cloneset_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,17 +93,17 @@ func GetControllerKey(cs *appsv1alpha1.CloneSet) string {

// GetActivePods returns all active pods in this namespace.
func GetActivePods(reader client.Reader, opts *client.ListOptions) ([]*v1.Pod, error) {
podList := &v1.PodList{}
if err := reader.List(context.TODO(), podList, opts, utilclient.DisableDeepCopy); err != nil {
podList, err := GetAllPods(reader, opts)
if err != nil {
return nil, err
}

// Ignore inactive pods
var activePods []*v1.Pod
for i, pod := range podList.Items {
for i, pod := range podList {
// Consider all rebuild pod as active pod, should not recreate
if kubecontroller.IsPodActive(&pod) {
activePods = append(activePods, &podList.Items[i])
if kubecontroller.IsPodActive(pod) {
activePods = append(activePods, podList[i])
}
}
return activePods, nil
Expand Down Expand Up @@ -233,3 +233,27 @@ func DoItSlowly(count int, initialBatchSize int, fn func() error) (int, error) {
}
return successes, nil
}

// GetAllPods returns all pods in this namespace.
func GetAllPods(reader client.Reader, opts *client.ListOptions) ([]*v1.Pod, error) {
podList := &v1.PodList{}
if err := reader.List(context.TODO(), podList, opts, utilclient.DisableDeepCopy); err != nil {
return nil, err
}

var pods []*v1.Pod
for i := range podList.Items {
pods = append(pods, &podList.Items[i])
}
return pods, nil
}

// Judge if the pvc belongs to the pod.
func IsPVCAndPodRelated(pvc *v1.PersistentVolumeClaim, pod *v1.Pod) bool {
pvcIns := pvc.Labels[appsv1alpha1.CloneSetInstanceID]
podIns := pod.Labels[appsv1alpha1.CloneSetInstanceID]
if pvcIns == "" || podIns == "" {
return false
}
return pvcIns == podIns
}

0 comments on commit 0b57d3f

Please sign in to comment.