From ff87c34e1e06490cbcecb035a2fc1b35d58cddb0 Mon Sep 17 00:00:00 2001 From: Hemant Kumar Date: Thu, 13 Jan 2022 10:44:08 -0500 Subject: [PATCH 1/5] Fix code to handling recovery from expansion failure --- pkg/controller/controller.go | 41 +++-- pkg/controller/expand_and_recover.go | 220 +++++++++++++++++++++++++++ pkg/controller/resize_status.go | 111 ++++++++++++++ pkg/features/features.go | 9 +- pkg/resizer/csi_resizer.go | 4 + pkg/resizer/resizer.go | 2 + pkg/resizer/trivial_resizer.go | 4 + pkg/util/util.go | 10 +- pkg/util/util_test.go | 2 +- 9 files changed, 382 insertions(+), 21 deletions(-) create mode 100644 pkg/controller/expand_and_recover.go create mode 100644 pkg/controller/resize_status.go diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go index 989a53e14..df5014c2a 100644 --- a/pkg/controller/controller.go +++ b/pkg/controller/controller.go @@ -343,12 +343,18 @@ func (ctrl *resizeController) syncPVC(key string) error { klog.V(4).Infof("No need to resize PVC %q", util.PVCKey(pvc)) return nil } - if !ctrl.pvNeedResize(pvc, pv) { - klog.V(4).Infof("No need to resize PV %q", pv.Name) - return nil - } - return ctrl.resizePVC(pvc, pv) + if utilfeature.DefaultFeatureGate.Enabled(features.RecoverVolumeExpansionFailure) { + _, _, err, _ := ctrl.expandAndRecover(pvc, pv) + return err + } else { + if !ctrl.pvNeedResize(pvc, pv) { + klog.V(4).Infof("No need to resize PV %q", pv.Name) + return nil + } + + return ctrl.resizePVC(pvc, pv) + } } // pvcNeedResize returns true is a pvc requests a resize operation. @@ -476,7 +482,7 @@ func (ctrl *resizeController) resizeVolume( } klog.V(4).Infof("Resize volume succeeded for volume %q, start to update PV's capacity", pv.Name) - err = ctrl.updatePVCapacity(pv, pvc.Status.Capacity[v1.ResourceStorage], newSize, fsResizeRequired) + _, err = ctrl.updatePVCapacity(pv, pvc.Status.Capacity[v1.ResourceStorage], newSize, fsResizeRequired) if err != nil { return newSize, fsResizeRequired, err } @@ -496,7 +502,7 @@ func (ctrl *resizeController) markPVCAsFSResizeRequired(pvc *v1.PersistentVolume newPVC.Status.Conditions = util.MergeResizeConditionsOfPVC(newPVC.Status.Conditions, []v1.PersistentVolumeClaimCondition{pvcCondition}) - _, err := ctrl.patchClaim(pvc, newPVC) + _, err := ctrl.patchClaim(pvc, newPVC, true /* addResourceVersionCheck */) if err != nil { return fmt.Errorf("Mark PVC %q as file system resize required failed: %v", util.PVCKey(pvc), err) @@ -509,6 +515,7 @@ func (ctrl *resizeController) markPVCAsFSResizeRequired(pvc *v1.PersistentVolume return nil } +// legacy markPVCResizeInProgress function, should be removed once RecoverFromVolumeExpansionFailure feature goes GA. func (ctrl *resizeController) markPVCResizeInProgress(pvc *v1.PersistentVolumeClaim) (*v1.PersistentVolumeClaim, error) { // Mark PVC as Resize Started progressCondition := v1.PersistentVolumeClaimCondition{ @@ -520,7 +527,7 @@ func (ctrl *resizeController) markPVCResizeInProgress(pvc *v1.PersistentVolumeCl newPVC.Status.Conditions = util.MergeResizeConditionsOfPVC(newPVC.Status.Conditions, []v1.PersistentVolumeClaimCondition{progressCondition}) - updatedPVC, err := ctrl.patchClaim(pvc, newPVC) + updatedPVC, err := ctrl.patchClaim(pvc, newPVC, true /* addResourceVersionCheck */) if err != nil { return nil, err } @@ -534,7 +541,7 @@ func (ctrl *resizeController) markPVCResizeFinished( newPVC.Status.Capacity[v1.ResourceStorage] = newSize newPVC.Status.Conditions = util.MergeResizeConditionsOfPVC(pvc.Status.Conditions, []v1.PersistentVolumeClaimCondition{}) - _, err := ctrl.patchClaim(pvc, newPVC) + _, err := ctrl.patchClaim(pvc, newPVC, true /* addResourceVersionCheck */) if err != nil { return fmt.Errorf("Mark PVC %q as resize finished failed: %v", util.PVCKey(pvc), err) } @@ -545,8 +552,8 @@ func (ctrl *resizeController) markPVCResizeFinished( return nil } -func (ctrl *resizeController) patchClaim(oldPVC, newPVC *v1.PersistentVolumeClaim) (*v1.PersistentVolumeClaim, error) { - patchBytes, err := util.GetPVCPatchData(oldPVC, newPVC) +func (ctrl *resizeController) patchClaim(oldPVC, newPVC *v1.PersistentVolumeClaim, addResourceVersionCheck bool) (*v1.PersistentVolumeClaim, error) { + patchBytes, err := util.GetPVCPatchData(oldPVC, newPVC, addResourceVersionCheck) if err != nil { return nil, fmt.Errorf("can't patch status of PVC %s as generate path data failed: %v", util.PVCKey(oldPVC), err) } @@ -575,7 +582,11 @@ func (ctrl *resizeController) deletePreResizeCapAnnotation(pv *v1.PersistentVolu return err } -func (ctrl *resizeController) updatePVCapacity(pv *v1.PersistentVolume, oldCapacity, newCapacity resource.Quantity, fsResizeRequired bool) error { +func (ctrl *resizeController) updatePVCapacity( + pv *v1.PersistentVolume, + oldCapacity, newCapacity resource.Quantity, + fsResizeRequired bool) (*v1.PersistentVolume, error) { + klog.V(4).Infof("Resize volume succeeded for volume %q, start to update PV's capacity", pv.Name) newPV := pv.DeepCopy() newPV.Spec.Capacity[v1.ResourceStorage] = newCapacity @@ -590,11 +601,11 @@ func (ctrl *resizeController) updatePVCapacity(pv *v1.PersistentVolume, oldCapac } } - _, err := ctrl.patchPersistentVolume(pv, newPV) + updatedPV, err := ctrl.patchPersistentVolume(pv, newPV) if err != nil { - return fmt.Errorf("updating capacity of PV %q to %s failed: %v", pv.Name, newCapacity.String(), err) + return pv, fmt.Errorf("updating capacity of PV %q to %s failed: %v", pv.Name, newCapacity.String(), err) } - return nil + return updatedPV, nil } func (ctrl *resizeController) patchPersistentVolume(oldPV, newPV *v1.PersistentVolume) (*v1.PersistentVolume, error) { diff --git a/pkg/controller/expand_and_recover.go b/pkg/controller/expand_and_recover.go new file mode 100644 index 000000000..bb5784c2f --- /dev/null +++ b/pkg/controller/expand_and_recover.go @@ -0,0 +1,220 @@ +/* +Copyright 2018 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "fmt" + "github.com/kubernetes-csi/external-resizer/pkg/util" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/klog/v2" +) + +func (ctrl *resizeController) expandAndRecover(pvc *v1.PersistentVolumeClaim, pv *v1.PersistentVolume) (*v1.PersistentVolumeClaim, *v1.PersistentVolume, error, bool) { + if !ctrl.pvCanBeExpanded(pv, pvc) { + klog.V(4).Infof("No need to resize PV %q", pv.Name) + return pvc, pv, nil, false + } + resizeCalled := false + + // if we are here that already means pvc.Spec.Size > pvc.Status.Size + pvcSpecSize := pvc.Spec.Resources.Requests[v1.ResourceStorage] + pvcStatusSize := pvc.Status.Capacity[v1.ResourceStorage] + pvSize := pv.Spec.Capacity[v1.ResourceStorage] + + newSize := pvcSpecSize + resizeStatus := v1.PersistentVolumeClaimNoExpansionInProgress + if pvc.Status.ResizeStatus != nil { + resizeStatus = *pvc.Status.ResizeStatus + } + + var allocatedSize *resource.Quantity + t, ok := pvc.Status.AllocatedResources[v1.ResourceStorage] + if ok { + allocatedSize = &t + } + + if pvSize.Cmp(pvcSpecSize) < 0 { + // PV is smaller than user requested size. In general some control-plane volume expansion + // is necessary at this point but if we were expanding the PVC before we should let + // previous operation finished before starting expansion to new user requested size. + switch resizeStatus { + case v1.PersistentVolumeClaimControllerExpansionInProgress: + case v1.PersistentVolumeClaimNodeExpansionPending: + case v1.PersistentVolumeClaimNodeExpansionInProgress: + case v1.PersistentVolumeClaimNodeExpansionFailed: + if allocatedSize != nil { + newSize = *allocatedSize + } + default: + newSize = pvcSpecSize + } + } else { + // PV has already been expanded and hence we can be here for following reasons: + // 1. If expansion is pending on the node and this was just a spurious update event + // we don't need to do anything and let kubelet handle it. + // 2. It could be that - although we successfully expanded the volume, we failed to + // record our work in API objects, in which case - we should resume resizing operation + // and let API objects be updated. + // 3. Controller successfully expanded the volume, but expansion is failing on the node + // and before kubelet can retry failed node expansion - controller must verify if it is + // safe to do so. + // 4. While expansion was still pending on the node, user reduced the pvc size. + switch resizeStatus { + case v1.PersistentVolumeClaimNodeExpansionInProgress: + case v1.PersistentVolumeClaimNodeExpansionPending: + // we don't need to do any work. We could be here because of a spurious update event. + // This is case #1 + return pvc, pv, nil, resizeCalled + case v1.PersistentVolumeClaimNodeExpansionFailed: + // This is case#3, we need to reset the pvc status in such a way that kubelet can safely retry volume + // expansion. + if ctrl.resizer.DriverSupportsControlPlaneExpansion() && allocatedSize != nil { + newSize = *allocatedSize + } else { + newSize = pvcSpecSize + } + case v1.PersistentVolumeClaimControllerExpansionInProgress: + case v1.PersistentVolumeClaimControllerExpansionFailed: + case v1.PersistentVolumeClaimNoExpansionInProgress: + // This is case#2 or it could also be case#4 when user manually shrunk the PVC + // after expanding it. + if allocatedSize != nil { + newSize = *allocatedSize + } + default: + // It is impossible for ResizeStatus to be nil and allocatedSize to be not nil but somehow + // if we do end up in this state, it is safest to resume expansion to last recorded size in + // allocatedSize variable. + if pvc.Status.ResizeStatus == nil && allocatedSize != nil { + newSize = *allocatedSize + } else { + newSize = pvcSpecSize + } + } + } + var err error + pvc, err = ctrl.markControllerResizeInProgress(pvc, newSize) + if err != nil { + return pvc, pv, fmt.Errorf("marking pvc %q as resizing failed: %v", util.PVCKey(pvc), err), resizeCalled + } + + // if pvc previously failed to expand because it can't be expanded when in-use + // we must not try expansion here + if ctrl.usedPVCs.hasInUseErrors(pvc) && ctrl.usedPVCs.checkForUse(pvc) { + // Record an event to indicate that resizer is not expanding the pvc + msg := fmt.Sprintf("Unable to expand %s because CSI driver %s only supports offline expansion and volume is currently in-use", util.PVCKey(pvc), ctrl.resizer.Name()) + ctrl.eventRecorder.Event(pvc, v1.EventTypeWarning, util.VolumeResizeFailed, msg) + return pvc, pv, fmt.Errorf(msg), false + } + + // Record an event to indicate that external resizer is resizing this volume. + ctrl.eventRecorder.Event(pvc, v1.EventTypeNormal, util.VolumeResizing, + fmt.Sprintf("External resizer is resizing volume %s", pv.Name)) + + // before trying expansion we will remove the PVC from map + // that tracks PVCs which can't be expanded when in-use. If + // pvc indeed can not be expanded when in-use then it will be added + // back when expansion fails with in-use error. + ctrl.usedPVCs.removePVCWithInUseError(pvc) + pvc, pv, err = ctrl.callResizeOnPlugin(pvc, pv, newSize, pvcStatusSize) + + if err != nil { + // Record an event to indicate that resize operation is failed. + ctrl.eventRecorder.Eventf(pvc, v1.EventTypeWarning, util.VolumeResizeFailed, err.Error()) + } + + klog.V(4).Infof("Update capacity of PV %q to %s succeeded", pv.Name, newSize.String()) + return pvc, pv, nil, true +} + +func (ctrl *resizeController) callResizeOnPlugin( + pvc *v1.PersistentVolumeClaim, + pv *v1.PersistentVolume, + newSize, oldSize resource.Quantity) (*v1.PersistentVolumeClaim, *v1.PersistentVolume, error) { + updatedSize, fsResizeRequired, err := ctrl.resizer.Resize(pv, newSize) + + if err != nil { + // if this error was a in-use error then it must be tracked so as we don't retry without + // first verifying if volume is in-use + if inUseError(err) { + ctrl.usedPVCs.addPVCWithInUseError(pvc) + } + if isFinalError(err) { + var markExpansionFailedError error + pvc, markExpansionFailedError = ctrl.markControllerExpansionFailed(pvc) + if markExpansionFailedError != nil { + return pvc, pv, fmt.Errorf("resizing failed in controller with %v but failed to update PVC %s with: %v", err, util.PVCKey(pvc), markExpansionFailedError) + } + } + return pvc, pv, fmt.Errorf("resize volume %q by resizer %q failed: %v", pv.Name, ctrl.name, err) + } + + klog.V(4).Infof("Resize volume succeeded for volume %q, start to update PV's capacity", pv.Name) + + pv, err = ctrl.updatePVCapacity(pv, oldSize, updatedSize, fsResizeRequired) + if err != nil { + return pvc, pv, fmt.Errorf("error updating pv %q by resizer: %v", pv.Name, err) + } + + if fsResizeRequired { + pvc, err = ctrl.markForPendingNodeExpansion(pvc) + return pvc, pv, err + } + pvc, err = ctrl.markOverallExpansionAsFinished(pvc, updatedSize) + return pvc, pv, err +} + +// checks if pv can be expanded +func (ctrl *resizeController) pvCanBeExpanded(pv *v1.PersistentVolume, pvc *v1.PersistentVolumeClaim) bool { + if !ctrl.resizer.CanSupport(pv, pvc) { + klog.V(4).Infof("Resizer %q doesn't support PV %q", ctrl.name, pv.Name) + return false + } + + if (pv.Spec.ClaimRef == nil) || (pvc.Namespace != pv.Spec.ClaimRef.Namespace) || (pvc.UID != pv.Spec.ClaimRef.UID) { + klog.V(4).Infof("persistent volume is not bound to PVC being updated: %s", util.PVCKey(pvc)) + return false + } + return true +} + +func isFinalError(err error) bool { + // Sources: + // https://github.com/grpc/grpc/blob/master/doc/statuscodes.md + // https://github.com/container-storage-interface/spec/blob/master/spec.md + st, ok := status.FromError(err) + if !ok { + // This is not gRPC error. The operation must have failed before gRPC + // method was called, otherwise we would get gRPC error. + // We don't know if any previous volume operation is in progress, be on the safe side. + return false + } + switch st.Code() { + case codes.Canceled, // gRPC: Client Application cancelled the request + codes.DeadlineExceeded, // gRPC: Timeout + codes.Unavailable, // gRPC: Server shutting down, TCP connection broken - previous volume operation may be still in progress. + codes.ResourceExhausted, // gRPC: Server temporarily out of resources - previous volume operation may be still in progress. + codes.Aborted: // CSI: Operation pending for volume + return false + } + // All other errors mean that operation either did not + // even start or failed. It is for sure not in progress. + return true +} diff --git a/pkg/controller/resize_status.go b/pkg/controller/resize_status.go new file mode 100644 index 000000000..5ad679ebb --- /dev/null +++ b/pkg/controller/resize_status.go @@ -0,0 +1,111 @@ +/* +Copyright 2018 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "fmt" + + "github.com/kubernetes-csi/external-resizer/pkg/util" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/klog/v2" +) + +// markControllerResizeInProgress will mark PVC for controller resize, this function is newer version that uses +// resizeStatus and sets allocatedResources. +func (ctrl *resizeController) markControllerResizeInProgress( + pvc *v1.PersistentVolumeClaim, newSize resource.Quantity) (*v1.PersistentVolumeClaim, error) { + + progressCondition := v1.PersistentVolumeClaimCondition{ + Type: v1.PersistentVolumeClaimResizing, + Status: v1.ConditionTrue, + LastTransitionTime: metav1.Now(), + } + controllerExpansionInProgress := v1.PersistentVolumeClaimControllerExpansionInProgress + conditions := []v1.PersistentVolumeClaimCondition{progressCondition} + + newPVC := pvc.DeepCopy() + newPVC.Status.Conditions = util.MergeResizeConditionsOfPVC(newPVC.Status.Conditions, conditions) + newPVC.Status.ResizeStatus = &controllerExpansionInProgress + newPVC.Status.AllocatedResources = v1.ResourceList{v1.ResourceStorage: newSize} + updatedPVC, err := ctrl.patchClaim(pvc, newPVC, true /* addResourceVersionCheck */) + if err != nil { + return nil, err + } + return updatedPVC, nil +} + +// markForPendingNodeExpansion is new set of functions designed around feature RecoverVolumeExpansionFailure +// which correctly sets pvc.Status.ResizeStatus +func (ctrl *resizeController) markForPendingNodeExpansion(pvc *v1.PersistentVolumeClaim) (*v1.PersistentVolumeClaim, error) { + pvcCondition := v1.PersistentVolumeClaimCondition{ + Type: v1.PersistentVolumeClaimFileSystemResizePending, + Status: v1.ConditionTrue, + LastTransitionTime: metav1.Now(), + Message: "Waiting for user to (re-)start a pod to finish file system resize of volume on node.", + } + + nodeExpansionPendingVar := v1.PersistentVolumeClaimNodeExpansionPending + newPVC := pvc.DeepCopy() + newPVC.Status.Conditions = util.MergeResizeConditionsOfPVC(newPVC.Status.Conditions, + []v1.PersistentVolumeClaimCondition{pvcCondition}) + newPVC.Status.ResizeStatus = &nodeExpansionPendingVar + updatedPVC, err := ctrl.patchClaim(pvc, newPVC, true /* addResourceVersionCheck */) + + if err != nil { + return updatedPVC, fmt.Errorf("Mark PVC %q as node expansion required failed: %v", util.PVCKey(pvc), err) + } + + klog.V(4).Infof("Mark PVC %q as file system resize required", util.PVCKey(pvc)) + ctrl.eventRecorder.Eventf(pvc, v1.EventTypeNormal, + util.FileSystemResizeRequired, "Require file system resize of volume on node") + + return updatedPVC, nil +} + +func (ctrl *resizeController) markControllerExpansionFailed(pvc *v1.PersistentVolumeClaim) (*v1.PersistentVolumeClaim, error) { + expansionFailedOnController := v1.PersistentVolumeClaimControllerExpansionFailed + newPVC := pvc.DeepCopy() + newPVC.Status.ResizeStatus = &expansionFailedOnController + + updatedPVC, err := ctrl.patchClaim(pvc, newPVC, false /* addResourceVersionCheck */) + if err != nil { + return pvc, fmt.Errorf("Mark PVC %q as controller expansion failed, errored with: %v", util.PVCKey(pvc), err) + } + return updatedPVC, nil +} + +func (ctrl *resizeController) markOverallExpansionAsFinished( + pvc *v1.PersistentVolumeClaim, + newSize resource.Quantity) (*v1.PersistentVolumeClaim, error) { + newPVC := pvc.DeepCopy() + newPVC.Status.Capacity[v1.ResourceStorage] = newSize + newPVC.Status.Conditions = util.MergeResizeConditionsOfPVC(pvc.Status.Conditions, []v1.PersistentVolumeClaimCondition{}) + expansionFinished := v1.PersistentVolumeClaimNoExpansionInProgress + newPVC.Status.ResizeStatus = &expansionFinished + + updatedPVC, err := ctrl.patchClaim(pvc, newPVC, true /* addResourceVersionCheck */) + if err != nil { + return pvc, fmt.Errorf("Mark PVC %q as resize finished failed: %v", util.PVCKey(pvc), err) + } + + klog.V(4).Infof("Resize PVC %q finished", util.PVCKey(pvc)) + ctrl.eventRecorder.Eventf(pvc, v1.EventTypeNormal, util.VolumeResizeSuccess, "Resize volume succeeded") + + return updatedPVC, nil +} diff --git a/pkg/features/features.go b/pkg/features/features.go index edc220709..8ef442b3e 100644 --- a/pkg/features/features.go +++ b/pkg/features/features.go @@ -25,6 +25,12 @@ const ( // owner: @sunpa93 // alpha: v1.22 AnnotateFsResize featuregate.Feature = "AnnotateFsResize" + + // owner: @gnufied + // alpha: v1.23 + // + // Allows users to recover from volume expansion failures + RecoverVolumeExpansionFailure featuregate.Feature = "RecoverVolumeExpansionFailure" ) func init() { @@ -32,5 +38,6 @@ func init() { } var defaultResizerFeatureGates = map[featuregate.Feature]featuregate.FeatureSpec{ - AnnotateFsResize: {Default: false, PreRelease: featuregate.Alpha}, + AnnotateFsResize: {Default: false, PreRelease: featuregate.Alpha}, + RecoverVolumeExpansionFailure: {Default: false, PreRelease: featuregate.Alpha}, } diff --git a/pkg/resizer/csi_resizer.go b/pkg/resizer/csi_resizer.go index c2d495e24..e8b8d7572 100644 --- a/pkg/resizer/csi_resizer.go +++ b/pkg/resizer/csi_resizer.go @@ -125,6 +125,10 @@ func (r *csiResizer) CanSupport(pv *v1.PersistentVolume, pvc *v1.PersistentVolum return true } +func (r *csiResizer) DriverSupportsControlPlaneExpansion() bool { + return true +} + // Resize resizes the persistence volume given request size // It supports both CSI volume and migrated in-tree volume func (r *csiResizer) Resize(pv *v1.PersistentVolume, requestSize resource.Quantity) (resource.Quantity, bool, error) { diff --git a/pkg/resizer/resizer.go b/pkg/resizer/resizer.go index cf96788f8..229bbf9ab 100644 --- a/pkg/resizer/resizer.go +++ b/pkg/resizer/resizer.go @@ -25,6 +25,8 @@ import ( type Resizer interface { // Name returns the resizer's name. Name() string + // DriverSupportsControlPlaneExpansion returns true if driver really supports control-plane expansion + DriverSupportsControlPlaneExpansion() bool // CanSupport returns true if resizer supports resize operation of this PV // with its corresponding PVC. CanSupport(pv *v1.PersistentVolume, pvc *v1.PersistentVolumeClaim) bool diff --git a/pkg/resizer/trivial_resizer.go b/pkg/resizer/trivial_resizer.go index 42a22c036..507f920b0 100644 --- a/pkg/resizer/trivial_resizer.go +++ b/pkg/resizer/trivial_resizer.go @@ -56,6 +56,10 @@ func (r *trivialResizer) CanSupport(pv *v1.PersistentVolume, pvc *v1.PersistentV return true } +func (r *trivialResizer) DriverSupportsControlPlaneExpansion() bool { + return false +} + func (r *trivialResizer) Resize(pv *v1.PersistentVolume, requestSize resource.Quantity) (newSize resource.Quantity, fsResizeRequired bool, err error) { return requestSize, true, nil } diff --git a/pkg/util/util.go b/pkg/util/util.go index 3c1b4fb07..e1492b6ff 100644 --- a/pkg/util/util.go +++ b/pkg/util/util.go @@ -77,15 +77,17 @@ func MergeResizeConditionsOfPVC(oldConditions, newConditions []v1.PersistentVolu return resultConditions } -func GetPVCPatchData(oldPVC, newPVC *v1.PersistentVolumeClaim) ([]byte, error) { +func GetPVCPatchData(oldPVC, newPVC *v1.PersistentVolumeClaim, addResourceVersionCheck bool) ([]byte, error) { patchBytes, err := GetPatchData(oldPVC, newPVC) if err != nil { return patchBytes, err } - patchBytes, err = addResourceVersion(patchBytes, oldPVC.ResourceVersion) - if err != nil { - return nil, fmt.Errorf("apply ResourceVersion to patch data failed: %v", err) + if addResourceVersionCheck { + patchBytes, err = addResourceVersion(patchBytes, oldPVC.ResourceVersion) + if err != nil { + return nil, fmt.Errorf("apply ResourceVersion to patch data failed: %v", err) + } } return patchBytes, nil } diff --git a/pkg/util/util_test.go b/pkg/util/util_test.go index b0322203d..01d72a485 100644 --- a/pkg/util/util_test.go +++ b/pkg/util/util_test.go @@ -18,7 +18,7 @@ func TestGetPVCPatchData(t *testing.T) { newPVC := c.OldPVC.DeepCopy() newPVC.Status.Conditions = append(newPVC.Status.Conditions, v1.PersistentVolumeClaimCondition{Type: VolumeResizing, Status: v1.ConditionTrue}) - patchBytes, err := GetPVCPatchData(c.OldPVC, newPVC) + patchBytes, err := GetPVCPatchData(c.OldPVC, newPVC, true /*addResourceVersionCheck*/) if err != nil { t.Errorf("Case %d: Get patch data failed: %v", i, err) } From dea68891d0394c33fa2b14d90f8c7599d05604ad Mon Sep 17 00:00:00 2001 From: Hemant Kumar Date: Thu, 13 Jan 2022 11:20:11 -0500 Subject: [PATCH 2/5] start adding tests for expand and recover --- pkg/controller/expand_and_recover_test.go | 148 ++++++++++++++++++++++ 1 file changed, 148 insertions(+) create mode 100644 pkg/controller/expand_and_recover_test.go diff --git a/pkg/controller/expand_and_recover_test.go b/pkg/controller/expand_and_recover_test.go new file mode 100644 index 000000000..f266c95ca --- /dev/null +++ b/pkg/controller/expand_and_recover_test.go @@ -0,0 +1,148 @@ +package controller + +import ( + "context" + "github.com/kubernetes-csi/external-resizer/pkg/csi" + "github.com/kubernetes-csi/external-resizer/pkg/features" + "github.com/kubernetes-csi/external-resizer/pkg/resizer" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + utilfeature "k8s.io/apiserver/pkg/util/feature" + "k8s.io/client-go/util/workqueue" + featuregatetesting "k8s.io/component-base/featuregate/testing" + "testing" + "time" +) + +func TestExpandAndRecover(t *testing.T) { + fsVolumeMode := v1.PersistentVolumeFilesystem + var tests = []struct { + name string + pvc *v1.PersistentVolumeClaim + pv *v1.PersistentVolume + recoverFeatureGate bool + disableNodeExpansion bool + // expectations of test + expectedResizeStatus v1.PersistentVolumeClaimResizeStatus + expectedAllocatedSize resource.Quantity + expectResizeCall bool + }{ + { + name: "pvc.spec.size > pv.spec.size, recover_expansion=on", + pvc: getTestPVC("test-vol0", "2G", "1G", "", v1.PersistentVolumeClaimNoExpansionInProgress), + pv: createPV(1, "claim01", defaultNS, "test-uid", &fsVolumeMode), + recoverFeatureGate: true, + expectedResizeStatus: v1.PersistentVolumeClaimNodeExpansionPending, + expectedAllocatedSize: resource.MustParse("2G"), + expectResizeCall: true, + }, + { + name: "pvc.spec.size = pv.spec.size, recover_expansion=on", + pvc: getTestPVC("test-vol0", "1G", "1G", "", v1.PersistentVolumeClaimNoExpansionInProgress), + pv: createPV(1, "claim01", defaultNS, "test-uid", &fsVolumeMode), + recoverFeatureGate: true, + expectedResizeStatus: v1.PersistentVolumeClaimNodeExpansionPending, + expectedAllocatedSize: resource.MustParse("1G"), + expectResizeCall: true, + }, + { + name: "pvc.spec.size = pv.spec.size, recover_expansion=on", + pvc: getTestPVC("test-vol0", "1G", "1G", "1G", v1.PersistentVolumeClaimNodeExpansionPending), + pv: createPV(1, "claim01", defaultNS, "test-uid", &fsVolumeMode), + recoverFeatureGate: true, + expectedResizeStatus: v1.PersistentVolumeClaimNodeExpansionPending, + expectedAllocatedSize: resource.MustParse("1G"), + expectResizeCall: false, + }, + { + name: "pvc.spec.size > pv.spec.size, recover_expansion=on, disable_node_expansion=true", + pvc: getTestPVC("test-vol0", "2G", "1G", "", v1.PersistentVolumeClaimNoExpansionInProgress), + pv: createPV(1, "claim01", defaultNS, "test-uid", &fsVolumeMode), + disableNodeExpansion: true, + recoverFeatureGate: true, + expectedResizeStatus: v1.PersistentVolumeClaimNoExpansionInProgress, + expectedAllocatedSize: resource.MustParse("2G"), + expectResizeCall: true, + }, + { + name: "pv.spec.size >= pvc.spec.size, recover_expansion=on, resize_status=node_expansion_failed", + pvc: getTestPVC("test-vol0", "2G", "1G", "2G", v1.PersistentVolumeClaimNodeExpansionFailed), + pv: createPV(1, "claim01", defaultNS, "test-uid", &fsVolumeMode), + recoverFeatureGate: true, + expectedResizeStatus: v1.PersistentVolumeClaimNodeExpansionPending, + expectedAllocatedSize: resource.MustParse("2G"), + expectResizeCall: false, + }, + } + for i := range tests { + test := tests[i] + t.Run(test.name, func(t *testing.T) { + defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.RecoverVolumeExpansionFailure, test.recoverFeatureGate)() + client := csi.NewMockClient("mock", !test.disableNodeExpansion, true, true, true) + driverName, _ := client.GetDriverName(context.TODO()) + + var initialObjects []runtime.Object + initialObjects = append(initialObjects, test.pvc) + initialObjects = append(initialObjects, test.pv) + + kubeClient, informerFactory := fakeK8s(initialObjects) + + csiResizer, err := resizer.NewResizerFromClient(client, 15*time.Second, kubeClient, informerFactory, driverName) + if err != nil { + t.Fatalf("Test %s: Unable to create resizer: %v", test.name, err) + } + controller := NewResizeController(driverName, + csiResizer, kubeClient, + time.Second, informerFactory, + workqueue.DefaultControllerRateLimiter(), true /*handleVolumeInUseError*/) + + ctrlInstance, _ := controller.(*resizeController) + pvc, _, err, resizeCalled := ctrlInstance.expandAndRecover(test.pvc, test.pv) + if err != nil { + t.Fatalf("expansion failed with %v", err) + } + if *pvc.Status.ResizeStatus != test.expectedResizeStatus { + t.Fatalf("expected resize status to be %s, got %s", test.expectedResizeStatus, *pvc.Status.ResizeStatus) + } + + actualAllocatedSize := pvc.Status.AllocatedResources.Storage() + + if test.expectedAllocatedSize.Cmp(*actualAllocatedSize) != 0 { + t.Fatalf("expansion failed: expected allocated size %s, got %s", test.expectedAllocatedSize.String(), actualAllocatedSize.String()) + } + if test.expectResizeCall != resizeCalled { + t.Fatalf("expansion failed: expected resize called %t, got %t", test.expectResizeCall, resizeCalled) + } + }) + } +} + +func getTestPVC(volumeName string, specSize, statusSize, allocatedSize string, resizeStatus v1.PersistentVolumeClaimResizeStatus) *v1.PersistentVolumeClaim { + pvc := &v1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "claim01", + Namespace: defaultNS, + UID: "test-uid", + }, + Spec: v1.PersistentVolumeClaimSpec{ + AccessModes: []v1.PersistentVolumeAccessMode{v1.ReadWriteOnce}, + Resources: v1.ResourceRequirements{Requests: v1.ResourceList{v1.ResourceStorage: resource.MustParse(specSize)}}, + VolumeName: volumeName, + }, + Status: v1.PersistentVolumeClaimStatus{ + Phase: v1.ClaimBound, + }, + } + if len(statusSize) > 0 { + pvc.Status.Capacity = v1.ResourceList{v1.ResourceStorage: resource.MustParse(statusSize)} + } + if len(allocatedSize) > 0 { + pvc.Status.AllocatedResources = v1.ResourceList{v1.ResourceStorage: resource.MustParse(allocatedSize)} + } + if len(resizeStatus) > 0 { + pvc.Status.ResizeStatus = &resizeStatus + } + return pvc +} From 4127d626a066a18d07b24ae08b3c00a58c0272de Mon Sep 17 00:00:00 2001 From: Hemant Kumar Date: Thu, 13 Jan 2022 11:33:43 -0500 Subject: [PATCH 3/5] Fix tests for recovery from expansion --- pkg/controller/controller_test.go | 1 + pkg/controller/expand_and_recover_test.go | 13 +++++++------ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/pkg/controller/controller_test.go b/pkg/controller/controller_test.go index a5a00d4e5..ba960f58e 100644 --- a/pkg/controller/controller_test.go +++ b/pkg/controller/controller_test.go @@ -485,6 +485,7 @@ func createPV(capacityGB int, pvcName, pvcNamespace string, pvcUID types.UID, vo Name: pvcName, UID: pvcUID, } + pv.Status.Phase = v1.VolumeBound } return pv } diff --git a/pkg/controller/expand_and_recover_test.go b/pkg/controller/expand_and_recover_test.go index f266c95ca..dc267b3b7 100644 --- a/pkg/controller/expand_and_recover_test.go +++ b/pkg/controller/expand_and_recover_test.go @@ -69,18 +69,18 @@ func TestExpandAndRecover(t *testing.T) { { name: "pv.spec.size >= pvc.spec.size, recover_expansion=on, resize_status=node_expansion_failed", pvc: getTestPVC("test-vol0", "2G", "1G", "2G", v1.PersistentVolumeClaimNodeExpansionFailed), - pv: createPV(1, "claim01", defaultNS, "test-uid", &fsVolumeMode), + pv: createPV(2, "claim01", defaultNS, "test-uid", &fsVolumeMode), recoverFeatureGate: true, expectedResizeStatus: v1.PersistentVolumeClaimNodeExpansionPending, expectedAllocatedSize: resource.MustParse("2G"), - expectResizeCall: false, + expectResizeCall: true, }, } for i := range tests { test := tests[i] t.Run(test.name, func(t *testing.T) { defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.RecoverVolumeExpansionFailure, test.recoverFeatureGate)() - client := csi.NewMockClient("mock", !test.disableNodeExpansion, true, true, true) + client := csi.NewMockClient("foo", !test.disableNodeExpansion, true, true, true) driverName, _ := client.GetDriverName(context.TODO()) var initialObjects []runtime.Object @@ -103,6 +103,9 @@ func TestExpandAndRecover(t *testing.T) { if err != nil { t.Fatalf("expansion failed with %v", err) } + if test.expectResizeCall != resizeCalled { + t.Fatalf("expansion failed: expected resize called %t, got %t", test.expectResizeCall, resizeCalled) + } if *pvc.Status.ResizeStatus != test.expectedResizeStatus { t.Fatalf("expected resize status to be %s, got %s", test.expectedResizeStatus, *pvc.Status.ResizeStatus) } @@ -112,9 +115,7 @@ func TestExpandAndRecover(t *testing.T) { if test.expectedAllocatedSize.Cmp(*actualAllocatedSize) != 0 { t.Fatalf("expansion failed: expected allocated size %s, got %s", test.expectedAllocatedSize.String(), actualAllocatedSize.String()) } - if test.expectResizeCall != resizeCalled { - t.Fatalf("expansion failed: expected resize called %t, got %t", test.expectResizeCall, resizeCalled) - } + }) } } From 9f08c69cd39e69357e73a6dbfa0c42f21817f0b9 Mon Sep 17 00:00:00 2001 From: Hemant Kumar Date: Fri, 14 Jan 2022 07:53:59 -0500 Subject: [PATCH 4/5] Add more tests for pending expansion fix bug with switch..case statement --- pkg/controller/controller.go | 8 ++--- pkg/controller/expand_and_recover.go | 38 +++++++++++++++-------- pkg/controller/expand_and_recover_test.go | 18 +++++++++++ 3 files changed, 47 insertions(+), 17 deletions(-) diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go index df5014c2a..6f5a54b59 100644 --- a/pkg/controller/controller.go +++ b/pkg/controller/controller.go @@ -529,7 +529,7 @@ func (ctrl *resizeController) markPVCResizeInProgress(pvc *v1.PersistentVolumeCl updatedPVC, err := ctrl.patchClaim(pvc, newPVC, true /* addResourceVersionCheck */) if err != nil { - return nil, err + return updatedPVC, err } return updatedPVC, nil } @@ -555,16 +555,16 @@ func (ctrl *resizeController) markPVCResizeFinished( func (ctrl *resizeController) patchClaim(oldPVC, newPVC *v1.PersistentVolumeClaim, addResourceVersionCheck bool) (*v1.PersistentVolumeClaim, error) { patchBytes, err := util.GetPVCPatchData(oldPVC, newPVC, addResourceVersionCheck) if err != nil { - return nil, fmt.Errorf("can't patch status of PVC %s as generate path data failed: %v", util.PVCKey(oldPVC), err) + return oldPVC, fmt.Errorf("can't patch status of PVC %s as generate path data failed: %v", util.PVCKey(oldPVC), err) } updatedClaim, updateErr := ctrl.kubeClient.CoreV1().PersistentVolumeClaims(oldPVC.Namespace). Patch(context.TODO(), oldPVC.Name, types.StrategicMergePatchType, patchBytes, metav1.PatchOptions{}, "status") if updateErr != nil { - return nil, fmt.Errorf("can't patch status of PVC %s with %v", util.PVCKey(oldPVC), updateErr) + return oldPVC, fmt.Errorf("can't patch status of PVC %s with %v", util.PVCKey(oldPVC), updateErr) } err = ctrl.claims.Update(updatedClaim) if err != nil { - return nil, fmt.Errorf("error updating PVC %s in local cache: %v", util.PVCKey(newPVC), err) + return oldPVC, fmt.Errorf("error updating PVC %s in local cache: %v", util.PVCKey(newPVC), err) } return updatedClaim, nil diff --git a/pkg/controller/expand_and_recover.go b/pkg/controller/expand_and_recover.go index bb5784c2f..8b151fb0b 100644 --- a/pkg/controller/expand_and_recover.go +++ b/pkg/controller/expand_and_recover.go @@ -31,7 +31,7 @@ func (ctrl *resizeController) expandAndRecover(pvc *v1.PersistentVolumeClaim, pv klog.V(4).Infof("No need to resize PV %q", pv.Name) return pvc, pv, nil, false } - resizeCalled := false + resizeNotCalled := false // if we are here that already means pvc.Spec.Size > pvc.Status.Size pvcSpecSize := pvc.Spec.Resources.Requests[v1.ResourceStorage] @@ -53,15 +53,26 @@ func (ctrl *resizeController) expandAndRecover(pvc *v1.PersistentVolumeClaim, pv if pvSize.Cmp(pvcSpecSize) < 0 { // PV is smaller than user requested size. In general some control-plane volume expansion // is necessary at this point but if we were expanding the PVC before we should let - // previous operation finished before starting expansion to new user requested size. + // previous operation finish before starting expansion to new user requested size. switch resizeStatus { - case v1.PersistentVolumeClaimControllerExpansionInProgress: - case v1.PersistentVolumeClaimNodeExpansionPending: - case v1.PersistentVolumeClaimNodeExpansionInProgress: - case v1.PersistentVolumeClaimNodeExpansionFailed: + case v1.PersistentVolumeClaimControllerExpansionInProgress, + v1.PersistentVolumeClaimNodeExpansionFailed: if allocatedSize != nil { newSize = *allocatedSize } + case v1.PersistentVolumeClaimNodeExpansionPending, + v1.PersistentVolumeClaimNodeExpansionInProgress: + if allocatedSize != nil { + newSize = *allocatedSize + } + // If PV is already greater or equal to whatever newSize is, then we should + // let previously issued node expansion finish before starting new one. + // This case is essentially an optimization on previous case, generally it is safe + // to let volume plugin receive the expansion call (expansion calls are idempotent) + // but having this check here *avoids* unnecessary RPC calls to plugin. + if pvSize.Cmp(newSize) >= 0 { + return pvc, pv, nil, resizeNotCalled + } default: newSize = pvcSpecSize } @@ -77,11 +88,11 @@ func (ctrl *resizeController) expandAndRecover(pvc *v1.PersistentVolumeClaim, pv // safe to do so. // 4. While expansion was still pending on the node, user reduced the pvc size. switch resizeStatus { - case v1.PersistentVolumeClaimNodeExpansionInProgress: - case v1.PersistentVolumeClaimNodeExpansionPending: + case v1.PersistentVolumeClaimNodeExpansionInProgress, + v1.PersistentVolumeClaimNodeExpansionPending: // we don't need to do any work. We could be here because of a spurious update event. // This is case #1 - return pvc, pv, nil, resizeCalled + return pvc, pv, nil, resizeNotCalled case v1.PersistentVolumeClaimNodeExpansionFailed: // This is case#3, we need to reset the pvc status in such a way that kubelet can safely retry volume // expansion. @@ -90,9 +101,9 @@ func (ctrl *resizeController) expandAndRecover(pvc *v1.PersistentVolumeClaim, pv } else { newSize = pvcSpecSize } - case v1.PersistentVolumeClaimControllerExpansionInProgress: - case v1.PersistentVolumeClaimControllerExpansionFailed: - case v1.PersistentVolumeClaimNoExpansionInProgress: + case v1.PersistentVolumeClaimControllerExpansionInProgress, + v1.PersistentVolumeClaimControllerExpansionFailed, + v1.PersistentVolumeClaimNoExpansionInProgress: // This is case#2 or it could also be case#4 when user manually shrunk the PVC // after expanding it. if allocatedSize != nil { @@ -112,7 +123,7 @@ func (ctrl *resizeController) expandAndRecover(pvc *v1.PersistentVolumeClaim, pv var err error pvc, err = ctrl.markControllerResizeInProgress(pvc, newSize) if err != nil { - return pvc, pv, fmt.Errorf("marking pvc %q as resizing failed: %v", util.PVCKey(pvc), err), resizeCalled + return pvc, pv, fmt.Errorf("marking pvc %q as resizing failed: %v", util.PVCKey(pvc), err), resizeNotCalled } // if pvc previously failed to expand because it can't be expanded when in-use @@ -138,6 +149,7 @@ func (ctrl *resizeController) expandAndRecover(pvc *v1.PersistentVolumeClaim, pv if err != nil { // Record an event to indicate that resize operation is failed. ctrl.eventRecorder.Eventf(pvc, v1.EventTypeWarning, util.VolumeResizeFailed, err.Error()) + return pvc, pv, err, true } klog.V(4).Infof("Update capacity of PV %q to %s succeeded", pv.Name, newSize.String()) diff --git a/pkg/controller/expand_and_recover_test.go b/pkg/controller/expand_and_recover_test.go index dc267b3b7..77b283805 100644 --- a/pkg/controller/expand_and_recover_test.go +++ b/pkg/controller/expand_and_recover_test.go @@ -75,6 +75,24 @@ func TestExpandAndRecover(t *testing.T) { expectedAllocatedSize: resource.MustParse("2G"), expectResizeCall: true, }, + { + name: "pvc.spec.size > pv.spec.size, recover_expansion=on, resize_status-node_expansion_pending", + pvc: getTestPVC("test-vol0", "10G", "1G", "3G", v1.PersistentVolumeClaimNodeExpansionPending), + pv: createPV(3, "claim01", defaultNS, "test-uid", &fsVolumeMode), + recoverFeatureGate: true, + expectedResizeStatus: v1.PersistentVolumeClaimNodeExpansionPending, + expectedAllocatedSize: resource.MustParse("3G"), + expectResizeCall: false, + }, + { + name: "pvc.spec.size > pv.spec.size, recover_expansion=on, resize_status-node_expansion_inprogress", + pvc: getTestPVC("test-vol0", "10G", "1G", "3G", v1.PersistentVolumeClaimNodeExpansionInProgress), + pv: createPV(3, "claim01", defaultNS, "test-uid", &fsVolumeMode), + recoverFeatureGate: true, + expectedResizeStatus: v1.PersistentVolumeClaimNodeExpansionInProgress, + expectedAllocatedSize: resource.MustParse("3G"), + expectResizeCall: false, + }, } for i := range tests { test := tests[i] From 8c552c3224db809a53e7e34359e5ddf1182d7c4d Mon Sep 17 00:00:00 2001 From: Hemant Kumar Date: Tue, 18 Jan 2022 14:34:12 -0500 Subject: [PATCH 5/5] Fix review comments and add tests --- pkg/controller/controller.go | 8 ++-- pkg/controller/expand_and_recover.go | 6 ++- pkg/controller/expand_and_recover_test.go | 52 ++++++++++++++--------- pkg/controller/resize_status.go | 8 +++- 4 files changed, 46 insertions(+), 28 deletions(-) diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go index 6f5a54b59..b466fd0ab 100644 --- a/pkg/controller/controller.go +++ b/pkg/controller/controller.go @@ -528,10 +528,7 @@ func (ctrl *resizeController) markPVCResizeInProgress(pvc *v1.PersistentVolumeCl []v1.PersistentVolumeClaimCondition{progressCondition}) updatedPVC, err := ctrl.patchClaim(pvc, newPVC, true /* addResourceVersionCheck */) - if err != nil { - return updatedPVC, err - } - return updatedPVC, nil + return updatedPVC, err } func (ctrl *resizeController) markPVCResizeFinished( @@ -552,6 +549,9 @@ func (ctrl *resizeController) markPVCResizeFinished( return nil } +// Patches a given PVC with changes from newPVC. If addResourceVersionCheck is true +// then a version check is added to the patch to ensure that we are not patching +// old(and possibly outdated) PVC objects. func (ctrl *resizeController) patchClaim(oldPVC, newPVC *v1.PersistentVolumeClaim, addResourceVersionCheck bool) (*v1.PersistentVolumeClaim, error) { patchBytes, err := util.GetPVCPatchData(oldPVC, newPVC, addResourceVersionCheck) if err != nil { diff --git a/pkg/controller/expand_and_recover.go b/pkg/controller/expand_and_recover.go index 8b151fb0b..dca26e0f3 100644 --- a/pkg/controller/expand_and_recover.go +++ b/pkg/controller/expand_and_recover.go @@ -1,5 +1,5 @@ /* -Copyright 2018 The Kubernetes Authors. +Copyright 2022 The Kubernetes Authors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -31,6 +31,8 @@ func (ctrl *resizeController) expandAndRecover(pvc *v1.PersistentVolumeClaim, pv klog.V(4).Infof("No need to resize PV %q", pv.Name) return pvc, pv, nil, false } + // only used as a sentinel value when function returns without + // actually performing expansion on the volume. resizeNotCalled := false // if we are here that already means pvc.Spec.Size > pvc.Status.Size @@ -132,7 +134,7 @@ func (ctrl *resizeController) expandAndRecover(pvc *v1.PersistentVolumeClaim, pv // Record an event to indicate that resizer is not expanding the pvc msg := fmt.Sprintf("Unable to expand %s because CSI driver %s only supports offline expansion and volume is currently in-use", util.PVCKey(pvc), ctrl.resizer.Name()) ctrl.eventRecorder.Event(pvc, v1.EventTypeWarning, util.VolumeResizeFailed, msg) - return pvc, pv, fmt.Errorf(msg), false + return pvc, pv, fmt.Errorf(msg), resizeNotCalled } // Record an event to indicate that external resizer is resizing this volume. diff --git a/pkg/controller/expand_and_recover_test.go b/pkg/controller/expand_and_recover_test.go index 77b283805..eca0bc5c2 100644 --- a/pkg/controller/expand_and_recover_test.go +++ b/pkg/controller/expand_and_recover_test.go @@ -19,76 +19,86 @@ import ( func TestExpandAndRecover(t *testing.T) { fsVolumeMode := v1.PersistentVolumeFilesystem var tests = []struct { - name string - pvc *v1.PersistentVolumeClaim - pv *v1.PersistentVolume - recoverFeatureGate bool - disableNodeExpansion bool + name string + pvc *v1.PersistentVolumeClaim + pv *v1.PersistentVolume + disableNodeExpansion bool + disableControllerExpansion bool // expectations of test expectedResizeStatus v1.PersistentVolumeClaimResizeStatus expectedAllocatedSize resource.Quantity expectResizeCall bool }{ { - name: "pvc.spec.size > pv.spec.size, recover_expansion=on", + name: "pvc.spec.size > pv.spec.size, resize_status=node_expansion_inprogress", pvc: getTestPVC("test-vol0", "2G", "1G", "", v1.PersistentVolumeClaimNoExpansionInProgress), pv: createPV(1, "claim01", defaultNS, "test-uid", &fsVolumeMode), - recoverFeatureGate: true, expectedResizeStatus: v1.PersistentVolumeClaimNodeExpansionPending, expectedAllocatedSize: resource.MustParse("2G"), expectResizeCall: true, }, { - name: "pvc.spec.size = pv.spec.size, recover_expansion=on", + name: "pvc.spec.size = pv.spec.size, resize_status=no_expansion_inprogress", pvc: getTestPVC("test-vol0", "1G", "1G", "", v1.PersistentVolumeClaimNoExpansionInProgress), pv: createPV(1, "claim01", defaultNS, "test-uid", &fsVolumeMode), - recoverFeatureGate: true, expectedResizeStatus: v1.PersistentVolumeClaimNodeExpansionPending, expectedAllocatedSize: resource.MustParse("1G"), expectResizeCall: true, }, { - name: "pvc.spec.size = pv.spec.size, recover_expansion=on", + name: "pvc.spec.size > pv.spec.size, resize_status=controller_expansion_failed", + pvc: getTestPVC("test-vol0", "5G" /*specSize*/, "3G" /*statusSize*/, "10G" /*allocatedSize*/, v1.PersistentVolumeClaimControllerExpansionFailed), + pv: createPV(3, "claim01", defaultNS, "test-uid", &fsVolumeMode), + expectedResizeStatus: v1.PersistentVolumeClaimNodeExpansionPending, + expectedAllocatedSize: resource.MustParse("5G"), + expectResizeCall: true, + }, + { + name: "pvc.spec.size = pv.spec.size, resize_status=node_expansion_failed, disable_controller_expansion=true", + pvc: getTestPVC("test-vol0", "5G" /*specSize*/, "3G" /*statusSize*/, "10G" /*allocatedSize*/, v1.PersistentVolumeClaimNodeExpansionFailed), + pv: createPV(10, "claim01", defaultNS, "test-uid", &fsVolumeMode), + disableControllerExpansion: true, + expectedResizeStatus: v1.PersistentVolumeClaimNodeExpansionPending, + expectedAllocatedSize: resource.MustParse("5G"), + expectResizeCall: true, + }, + { + name: "pvc.spec.size = pv.spec.size, resize_status=node_expansion_pending", pvc: getTestPVC("test-vol0", "1G", "1G", "1G", v1.PersistentVolumeClaimNodeExpansionPending), pv: createPV(1, "claim01", defaultNS, "test-uid", &fsVolumeMode), - recoverFeatureGate: true, expectedResizeStatus: v1.PersistentVolumeClaimNodeExpansionPending, expectedAllocatedSize: resource.MustParse("1G"), expectResizeCall: false, }, { - name: "pvc.spec.size > pv.spec.size, recover_expansion=on, disable_node_expansion=true", + name: "pvc.spec.size > pv.spec.size, disable_node_expansion=true, resize_status=no_expansion_inprogress", pvc: getTestPVC("test-vol0", "2G", "1G", "", v1.PersistentVolumeClaimNoExpansionInProgress), pv: createPV(1, "claim01", defaultNS, "test-uid", &fsVolumeMode), disableNodeExpansion: true, - recoverFeatureGate: true, expectedResizeStatus: v1.PersistentVolumeClaimNoExpansionInProgress, expectedAllocatedSize: resource.MustParse("2G"), expectResizeCall: true, }, { - name: "pv.spec.size >= pvc.spec.size, recover_expansion=on, resize_status=node_expansion_failed", + name: "pv.spec.size >= pvc.spec.size, resize_status=node_expansion_failed", pvc: getTestPVC("test-vol0", "2G", "1G", "2G", v1.PersistentVolumeClaimNodeExpansionFailed), pv: createPV(2, "claim01", defaultNS, "test-uid", &fsVolumeMode), - recoverFeatureGate: true, expectedResizeStatus: v1.PersistentVolumeClaimNodeExpansionPending, expectedAllocatedSize: resource.MustParse("2G"), expectResizeCall: true, }, { - name: "pvc.spec.size > pv.spec.size, recover_expansion=on, resize_status-node_expansion_pending", + name: "pvc.spec.size > pv.spec.size, resize_status-node_expansion_pending", pvc: getTestPVC("test-vol0", "10G", "1G", "3G", v1.PersistentVolumeClaimNodeExpansionPending), pv: createPV(3, "claim01", defaultNS, "test-uid", &fsVolumeMode), - recoverFeatureGate: true, expectedResizeStatus: v1.PersistentVolumeClaimNodeExpansionPending, expectedAllocatedSize: resource.MustParse("3G"), expectResizeCall: false, }, { - name: "pvc.spec.size > pv.spec.size, recover_expansion=on, resize_status-node_expansion_inprogress", + name: "pvc.spec.size > pv.spec.size, resize_status-node_expansion_inprogress", pvc: getTestPVC("test-vol0", "10G", "1G", "3G", v1.PersistentVolumeClaimNodeExpansionInProgress), pv: createPV(3, "claim01", defaultNS, "test-uid", &fsVolumeMode), - recoverFeatureGate: true, expectedResizeStatus: v1.PersistentVolumeClaimNodeExpansionInProgress, expectedAllocatedSize: resource.MustParse("3G"), expectResizeCall: false, @@ -97,8 +107,8 @@ func TestExpandAndRecover(t *testing.T) { for i := range tests { test := tests[i] t.Run(test.name, func(t *testing.T) { - defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.RecoverVolumeExpansionFailure, test.recoverFeatureGate)() - client := csi.NewMockClient("foo", !test.disableNodeExpansion, true, true, true) + defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.RecoverVolumeExpansionFailure, true)() + client := csi.NewMockClient("foo", !test.disableNodeExpansion, !test.disableControllerExpansion, true, true) driverName, _ := client.GetDriverName(context.TODO()) var initialObjects []runtime.Object diff --git a/pkg/controller/resize_status.go b/pkg/controller/resize_status.go index 5ad679ebb..4ed8e1e1f 100644 --- a/pkg/controller/resize_status.go +++ b/pkg/controller/resize_status.go @@ -1,5 +1,5 @@ /* -Copyright 2018 The Kubernetes Authors. +Copyright 2022 The Kubernetes Authors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -83,6 +83,12 @@ func (ctrl *resizeController) markControllerExpansionFailed(pvc *v1.PersistentVo newPVC := pvc.DeepCopy() newPVC.Status.ResizeStatus = &expansionFailedOnController + // We are setting addResourceVersionCheck as false as an optimization + // because if expansion fails on controller and somehow we can't update PVC + // because our version of object is slightly older then the entire resize + // operation must be restarted before ResizeStatus can be set to Expansionfailedoncontroller. + // Setting addResourceVersionCheck to `false` ensures that we set `ResizeStatus` + // even if our version of PVC was slightly older. updatedPVC, err := ctrl.patchClaim(pvc, newPVC, false /* addResourceVersionCheck */) if err != nil { return pvc, fmt.Errorf("Mark PVC %q as controller expansion failed, errored with: %v", util.PVCKey(pvc), err)