From 444d46d54255c25d88539585218e202664888c98 Mon Sep 17 00:00:00 2001 From: Madhu Rajanna Date: Mon, 8 Jul 2019 15:22:03 +0530 Subject: [PATCH] Add support for standalone snapshot creation with the current implementation in ceph-csi, it's not possible to delete the cloned volume if the snapshot is present due to the child linking, to remove this dependency we had a discussion and come up with an idea to separate out the clone and snapshot, so that we can delete the snapshot and cloned image in any order. the steps followed to create an independent snapshot as follows * Create a temporary snapshot from the parent volume * Clone a new image from a temporary snapshot with options `--rbd-default-clone-format 2 --image-feature layering,deep-flatten` * Deletetemprary snapshot created * Create a snapshot with requested Name * Clone a new image from the snapshot with user-provided options * Check the depth of the image as the maximum number of nested volume clones can be (max 16 can be changed based on the configuration) if the depth is reached flatten the newly cloned image * Delete the cloned image (earlier we were removing the image with `rbd rm` command with the new design we will be moving the images to the trash) same applies for normal volume deletion also * Delete the temporary cloned image which was created for a snapshot * Delete the snapshot example commands:- ``` 1) rbd snap create @ 2) rbd clone --rbd-default-clone-format 2 --image-feature layering,deep-flatten @ 3) rbd snap rm @ 4) rbd snap create @ 5) rbd clone --rbd-default-clone-format 2 --image-feature @ ``` Signed-off-by: Madhu Rajanna --- cmd/cephcsi.go | 5 +- .../templates/provisioner-statefulset.yaml | 1 + .../kubernetes/csi-rbdplugin-provisioner.yaml | 1 + e2e/cephfs.go | 36 +-- e2e/rbd.go | 102 +------- e2e/utils.go | 213 ++++++++++++++- pkg/rbd/controllerserver.go | 243 +++++++++++++----- pkg/rbd/rbd.go | 7 +- pkg/rbd/rbd_journal.go | 14 + pkg/rbd/rbd_util.go | 106 ++++---- pkg/util/cephcmds.go | 36 ++- pkg/util/voljournal.go | 51 ++++ 12 files changed, 555 insertions(+), 260 deletions(-) diff --git a/cmd/cephcsi.go b/cmd/cephcsi.go index 665b106d73a..6c7526259ea 100644 --- a/cmd/cephcsi.go +++ b/cmd/cephcsi.go @@ -47,7 +47,8 @@ var ( metadataStorage = flag.String("metadatastorage", "", "metadata persistence method [node|k8s_configmap]") // rbd related flags - containerized = flag.Bool("containerized", true, "whether run as containerized") + containerized = flag.Bool("containerized", true, "whether run as containerized") + rbdMaxCloneDepth = flag.Uint("rbdmaximumclonedepth", 16, "Maximum number of nested volume clones that are taken before a flatten occurs") // cephfs related flags volumeMounter = flag.String("volumemounter", "", "default volume mounter (possible options are 'kernel', 'fuse')") @@ -117,7 +118,7 @@ func main() { } } driver := rbd.NewDriver() - driver.Run(dname, *nodeID, *endpoint, *instanceID, *containerized, cp) + driver.Run(dname, *nodeID, *endpoint, *instanceID, *containerized, *rbdMaxCloneDepth, cp) case cephfsType: cephfs.PluginFolder += dname diff --git a/deploy/rbd/helm/templates/provisioner-statefulset.yaml b/deploy/rbd/helm/templates/provisioner-statefulset.yaml index 633cddcad59..8f5266434dd 100644 --- a/deploy/rbd/helm/templates/provisioner-statefulset.yaml +++ b/deploy/rbd/helm/templates/provisioner-statefulset.yaml @@ -88,6 +88,7 @@ spec: - "--v=5" - "--drivername=$(DRIVER_NAME)" - "--containerized=true" + - "--rbdmaximumclonedepth=14" env: - name: HOST_ROOTFS value: "/rootfs" diff --git a/deploy/rbd/kubernetes/csi-rbdplugin-provisioner.yaml b/deploy/rbd/kubernetes/csi-rbdplugin-provisioner.yaml index 8511a259180..3501622cb1d 100644 --- a/deploy/rbd/kubernetes/csi-rbdplugin-provisioner.yaml +++ b/deploy/rbd/kubernetes/csi-rbdplugin-provisioner.yaml @@ -85,6 +85,7 @@ spec: - "--v=5" - "--drivername=rbd.csi.ceph.com" - "--containerized=true" + - "--rbdmaximumclonedepth=14" env: - name: HOST_ROOTFS value: "/rootfs" diff --git a/e2e/cephfs.go b/e2e/cephfs.go index 3d7fcb95c35..45cc7a82445 100644 --- a/e2e/cephfs.go +++ b/e2e/cephfs.go @@ -1,7 +1,6 @@ package e2e import ( - "fmt" "time" . "github.com/onsi/ginkgo" // nolint @@ -74,7 +73,6 @@ var _ = Describe("cephfs", func() { By("create and delete a PVC", func() { By("create a PVC and Bind it to an app", func() { validatePVCAndAppBinding(pvcPath, appPath, f) - }) By("create a PVC and Bind it to an app with normal user", func() { @@ -82,38 +80,8 @@ var _ = Describe("cephfs", func() { }) By("create/delete multiple PVCs and Apps", func() { - totalCount := 2 - pvc, err := loadPVC(pvcPath) - if err != nil { - Fail(err.Error()) - } - pvc.Namespace = f.UniqueName - - app, err := loadApp(appPath) - if err != nil { - Fail(err.Error()) - } - app.Namespace = f.UniqueName - // create pvc and app - for i := 0; i < totalCount; i++ { - name := fmt.Sprintf("%s%d", f.UniqueName, i) - err := createPVCAndApp(name, f, pvc, app) - if err != nil { - Fail(err.Error()) - } - - } - // TODO add cephfs backend validation - - // delete pvc and app - for i := 0; i < totalCount; i++ { - name := fmt.Sprintf("%s%d", f.UniqueName, i) - err := deletePVCAndApp(name, f, pvc, app) - if err != nil { - Fail(err.Error()) - } - - } + totalCount := 10 + validatePVCAndApp(false, pvcPath, appPath, totalCount, f) }) }) diff --git a/e2e/rbd.go b/e2e/rbd.go index 8882b2712ab..f63b83af8d1 100644 --- a/e2e/rbd.go +++ b/e2e/rbd.go @@ -1,7 +1,6 @@ package e2e import ( - "fmt" "time" . "github.com/onsi/ginkgo" // nolint @@ -68,6 +67,8 @@ var _ = Describe("RBD", func() { appClonePath := rbdExamplePath + "pod-restore.yaml" snapshotPath := rbdExamplePath + "snapshot.yaml" + totalCount := 20 + By("checking provisioner statefulset is running") timeout := time.Duration(deployTimeout) * time.Minute err := framework.WaitForStatefulSetReplicasReady(rbdDeploymentName, namespace, f.ClientSet, 1*time.Second, timeout) @@ -89,105 +90,18 @@ var _ = Describe("RBD", func() { validateNormalUserPVCAccess(pvcPath, f) }) - By("create a PVC clone and Bind it to an app", func() { - createRBDSnapshotClass(f) - pvc, err := loadPVC(pvcPath) - if err != nil { - Fail(err.Error()) - } - - pvc.Namespace = f.UniqueName - e2elog.Logf("The PVC template %+v", pvc) - err = createPVCAndvalidatePV(f.ClientSet, pvc, deployTimeout) - if err != nil { - Fail(err.Error()) - } - // validate created backend rbd images - images := listRBDImages(f) - if len(images) != 1 { - e2elog.Logf("backend image count %d expected image count %d", len(images), 1) - Fail("validate backend image failed") - } - snap := getSnapshot(snapshotPath) - snap.Namespace = f.UniqueName - snap.Spec.Source.Name = pvc.Name - snap.Spec.Source.Kind = "PersistentVolumeClaim" - err = createSnapshot(&snap, deployTimeout) - if err != nil { - Fail(err.Error()) - } - pool := "replicapool" - snapList, err := listSnapshots(f, pool, images[0]) - if err != nil { - Fail(err.Error()) - } - if len(snapList) != 1 { - e2elog.Logf("backend snapshot not matching kube snap count,snap count = % kube snap count %d", len(snapList), 1) - Fail("validate backend snapshot failed") - } - - validatePVCAndAppBinding(pvcClonePath, appClonePath, f) - - err = deleteSnapshot(&snap, deployTimeout) - if err != nil { - Fail(err.Error()) - } - err = deletePVCAndValidatePV(f.ClientSet, pvc, deployTimeout) - if err != nil { - Fail(err.Error()) - } - }) - // skipped raw pvc test in travis // By("create a block type PVC and Bind it to an app", func() { // validatePVCAndAppBinding(rawPvcPath, rawAppPath, f) // }) By("create/delete multiple PVCs and Apps", func() { - totalCount := 2 - pvc, err := loadPVC(pvcPath) - if err != nil { - Fail(err.Error()) - } - pvc.Namespace = f.UniqueName - - app, err := loadApp(appPath) - if err != nil { - Fail(err.Error()) - } - app.Namespace = f.UniqueName - // create pvc and app - for i := 0; i < totalCount; i++ { - name := fmt.Sprintf("%s%d", f.UniqueName, i) - err := createPVCAndApp(name, f, pvc, app) - if err != nil { - Fail(err.Error()) - } - - } - // validate created backend rbd images - images := listRBDImages(f) - if len(images) != totalCount { - e2elog.Logf("backend image creation not matching pvc count, image count = % pvc count %d", len(images), totalCount) - Fail("validate multiple pvc failed") - } - - // delete pvc and app - for i := 0; i < totalCount; i++ { - name := fmt.Sprintf("%s%d", f.UniqueName, i) - err := deletePVCAndApp(name, f, pvc, app) - if err != nil { - Fail(err.Error()) - } - - } - - // validate created backend rbd images - images = listRBDImages(f) - if len(images) > 0 { - e2elog.Logf("left out rbd backend images count %d", len(images)) - Fail("validate multiple pvc failed") - } + validatePVCAndApp(true, pvcPath, appPath, totalCount, f) + }) + + By("create/delete multiple clone PVCs with datasource=snapsphot and Apps", func() { + createRBDSnapshotClass(f) + validateCloneFromSnapshot(pvcPath, appPath, snapshotPath, pvcClonePath, appClonePath, totalCount, f) }) }) }) diff --git a/e2e/utils.go b/e2e/utils.go index a84de2c3ed8..a7dbd84dc1c 100644 --- a/e2e/utils.go +++ b/e2e/utils.go @@ -516,12 +516,15 @@ func checkCephPods(ns string, c kubernetes.Interface, count, t int, opt *metav1. // createPVCAndApp creates pvc and pod // if name is not empty same will be set as pvc and app name -func createPVCAndApp(name string, f *framework.Framework, pvc *v1.PersistentVolumeClaim, app *v1.Pod) error { +func createPVCAndApp(pvcName, snapName string, f *framework.Framework, pvc *v1.PersistentVolumeClaim, app *v1.Pod) error { - if name != "" { - pvc.Name = name - app.Name = name - app.Spec.Volumes[0].PersistentVolumeClaim.ClaimName = name + if pvcName != "" { + pvc.Name = pvcName + app.Name = pvcName + app.Spec.Volumes[0].PersistentVolumeClaim.ClaimName = pvcName + } + if snapName != "" { + pvc.Spec.DataSource.Name = snapName } err := createPVCAndvalidatePV(f.ClientSet, pvc, deployTimeout) if err != nil { @@ -533,12 +536,16 @@ func createPVCAndApp(name string, f *framework.Framework, pvc *v1.PersistentVolu // deletePVCAndApp delete pvc and pod // if name is not empty same will be set as pvc and app name -func deletePVCAndApp(name string, f *framework.Framework, pvc *v1.PersistentVolumeClaim, app *v1.Pod) error { +func deletePVCAndApp(pvcName, snapName string, f *framework.Framework, pvc *v1.PersistentVolumeClaim, app *v1.Pod) error { + + if pvcName != "" { + pvc.Name = pvcName + app.Name = pvcName + app.Spec.Volumes[0].PersistentVolumeClaim.ClaimName = pvcName + } - if name != "" { - pvc.Name = name - app.Name = name - app.Spec.Volumes[0].PersistentVolumeClaim.ClaimName = name + if snapName != "" { + pvc.Spec.DataSource.Name = snapName } err := deletePod(app.Name, app.Namespace, f.ClientSet, deployTimeout) @@ -563,12 +570,12 @@ func validatePVCAndAppBinding(pvcPath, appPath string, f *framework.Framework) { } app.Namespace = f.UniqueName - err = createPVCAndApp("", f, pvc, app) + err = createPVCAndApp("", "", f, pvc, app) if err != nil { Fail(err.Error()) } - err = deletePVCAndApp("", f, pvc, app) + err = deletePVCAndApp("", "", f, pvc, app) if err != nil { Fail(err.Error()) } @@ -745,3 +752,185 @@ func listSnapshots(f *framework.Framework, pool, imageName string) ([]snapInfo, err := json.Unmarshal([]byte(stdout), &snapInfos) return snapInfos, err } + +func validateCloneFromSnapshot(pvcPath, appPath, snapshotPath, pvcClonePath, appClonePath string, total int, f *framework.Framework) { + + pvc, err := loadPVC(pvcPath) + if err != nil { + Fail(err.Error()) + } + + app, err := loadApp(appPath) + if err != nil { + Fail(err.Error()) + } + pvcClone, err := loadPVC(pvcClonePath) + if err != nil { + Fail(err.Error()) + } + appClone, err := loadApp(appClonePath) + if err != nil { + Fail(err.Error()) + } + + pvc.Namespace = f.UniqueName + app.Namespace = f.UniqueName + pvcClone.Namespace = f.UniqueName + appClone.Namespace = f.UniqueName + err = createPVCAndApp("", "", f, pvc, app) + if err != nil { + Fail(err.Error()) + } + // validate created backend rbd images + images := listRBDImages(f) + if len(images) != 1 { + e2elog.Logf("backend image count %d expected image count %d", len(images), 1) + Fail("validate backend image failed") + } + snap := getSnapshot(snapshotPath) + snap.Namespace = f.UniqueName + snap.Spec.Source.Name = pvc.Name + snap.Spec.Source.Kind = "PersistentVolumeClaim" + for i := 0; i < total; i++ { + snap.Name = fmt.Sprintf("%s%d", f.UniqueName, i) + err = createSnapshot(&snap, deployTimeout) + if err != nil { + Fail(err.Error()) + } + } + pool := "replicapool" + snapList, err := listSnapshots(f, pool, images[0]) + if err != nil { + Fail(err.Error()) + } + //check any stale snapshosts present on parent volume + if len(snapList) != 0 { + e2elog.Logf("stale backend snapshot count = %v %d", len(snapList)) + Fail("validate backend snapshot failed") + } + + // total images to be present at backend + // parentPVC+total snapshots + // validate created backend rbd images + images = listRBDImages(f) + count := 1 + total + if len(images) != count { + e2elog.Logf("backend image creation not matching pvc count, image count = %d pvc count %d", len(images), count) + Fail("validate multiple snapshot failed") + } + + for i := 0; i < total; i++ { + name := fmt.Sprintf("%s%d", f.UniqueName, i) + err = createPVCAndApp(name, name, f, pvcClone, appClone) + if err != nil { + Fail(err.Error()) + } + } + + // total images to be present at backend + // parentPVC+total*snapshot+total*pvc + // validate created backend rbd images + images = listRBDImages(f) + count = 1 + total + total + if len(images) != count { + e2elog.Logf("backend image creation not matching pvc count, image count = %v pvc count %d", len(images), count) + Fail("validate multiple snapshot failed") + } + + deleteAndValidateSnapshot(images[0], pool, f, total, &snap) + + for i := 0; i < total; i++ { + name := fmt.Sprintf("%s%d", f.UniqueName, i) + err = deletePVCAndApp(name, name, f, pvcClone, appClone) + if err != nil { + Fail(err.Error()) + } + } + + err = deletePVCAndApp("", "", f, pvc, app) + if err != nil { + Fail(err.Error()) + } + + // validate created backend rbd images + images = listRBDImages(f) + if len(images) != 0 { + e2elog.Logf("backend image count %d expected image count %d", len(images), 0) + Fail("validate backend image failed") + } +} + +func deleteAndValidateSnapshot(image, pool string, f *framework.Framework, total int, snap *v1alpha1.VolumeSnapshot) { + // delete all snapshots + for i := 0; i < total; i++ { + snap.Name = fmt.Sprintf("%s%d", f.UniqueName, i) + err := deleteSnapshot(snap, deployTimeout) + if err != nil { + Fail(err.Error()) + } + } + + snapList, err := listSnapshots(f, pool, image) + if err != nil { + Fail(err.Error()) + } + if len(snapList) != 0 { + e2elog.Logf("stale snapshot in backend, count = %v ", len(snapList)) + Fail("validate backend snapshot failed") + } + + // validate created backend rbd images + images := listRBDImages(f) + if len(images) != 1 { + e2elog.Logf("backend image count %d expected image count %d", len(images), 1) + Fail("validate backend image failed") + } + +} +func validatePVCAndApp(rbd bool, pvcPath, appPath string, totalCount int, f *framework.Framework) { + pvc, err := loadPVC(pvcPath) + if err != nil { + Fail(err.Error()) + } + pvc.Namespace = f.UniqueName + + app, err := loadApp(appPath) + if err != nil { + Fail(err.Error()) + } + app.Namespace = f.UniqueName + // create pvc and app + for i := 0; i < totalCount; i++ { + name := fmt.Sprintf("%s%d", f.UniqueName, i) + err := createPVCAndApp(name, "", f, pvc, app) + if err != nil { + Fail(err.Error()) + } + + } + if rbd { + // validate created backend rbd images + images := listRBDImages(f) + if len(images) != totalCount { + e2elog.Logf("backend image creation not matching pvc count, image count = % pvc count %d", len(images), totalCount) + Fail("validate multiple pvc failed") + } + } + // delete pvc and app + for i := 0; i < totalCount; i++ { + name := fmt.Sprintf("%s%d", f.UniqueName, i) + err := deletePVCAndApp(name, "", f, pvc, app) + if err != nil { + Fail(err.Error()) + } + + } + if rbd { + // validate created backend rbd images + images := listRBDImages(f) + if len(images) > 0 { + e2elog.Logf("left out rbd backend images count %d", len(images)) + Fail("validate multiple pvc failed") + } + } +} diff --git a/pkg/rbd/controllerserver.go b/pkg/rbd/controllerserver.go index da417614d60..46734a5f80c 100644 --- a/pkg/rbd/controllerserver.go +++ b/pkg/rbd/controllerserver.go @@ -17,10 +17,9 @@ limitations under the License. package rbd import ( - "fmt" - csicommon "github.com/ceph/ceph-csi/pkg/csi-common" "github.com/ceph/ceph-csi/pkg/util" + "github.com/pkg/errors" "github.com/container-storage-interface/spec/lib/go/csi" "github.com/kubernetes-csi/csi-lib-utils/protosanitizer" @@ -224,6 +223,12 @@ func (cs *ControllerServer) checkSnapshot(req *csi.CreateVolumeRequest, rbdVol * return status.Error(codes.Internal, err.Error()) } klog.V(4).Infof("create volume %s from snapshot %s", req.GetName(), rbdSnap.RbdSnapName) + + // do we need to call this in go routine to make it background job + err = flattenRbdImage(rbdVol, rbdMaxCloneDepth, cr) + if err != nil { + klog.Errorf("failed to flatten image %v", err) + } return nil } @@ -385,22 +390,11 @@ func (cs *ControllerServer) CreateSnapshot(ctx context.Context, req *csi.CreateS if err != nil { return nil, status.Error(codes.Internal, err.Error()) } - // Fetch source volume information - rbdVol := new(rbdVolume) - err = genVolFromVolID(rbdVol, req.GetSourceVolumeId(), cr) + rbdVol, err := validateVolHasSnapFeature(req.GetSourceVolumeId(), cr) if err != nil { - if _, ok := err.(ErrImageNotFound); ok { - return nil, status.Errorf(codes.NotFound, "source Volume ID %s not found", req.GetSourceVolumeId()) - } - return nil, status.Errorf(codes.Internal, err.Error()) - } - - // Check if source volume was created with required image features for snaps - if !hasSnapshotFeature(rbdVol.ImageFeatures) { - return nil, status.Errorf(codes.InvalidArgument, "volume(%s) has not snapshot feature(layering)", req.GetSourceVolumeId()) + return nil, err } - // Create snap volume rbdSnap := genSnapFromOptions(rbdVol, req.GetParameters()) rbdSnap.RbdImageName = rbdVol.RbdImageName @@ -411,8 +405,16 @@ func (cs *ControllerServer) CreateSnapshot(ctx context.Context, req *csi.CreateS idLk := snapshotNameLocker.Lock(req.GetName()) defer snapshotNameLocker.Unlock(idLk, req.GetName()) - // Need to check for already existing snapshot name, and if found - // check for the requested source volume id and already allocated source volume id + // updating the rbdImage name to point to temparory cloned image name + img, err := getCloneImageName(rbdSnap, cr) + if err != nil { + if _, ok := err.(ErrImageNotFound); !ok { + return nil, status.Error(codes.Internal, err.Error()) + } + } else { + rbdSnap.RbdImageName = img + } + found, err := checkSnapExists(rbdSnap, cr) if err != nil { if _, ok := err.(util.ErrSnapNameConflict); ok { @@ -433,6 +435,8 @@ func (cs *ControllerServer) CreateSnapshot(ctx context.Context, req *csi.CreateS }, nil } + // update the rbd image name to clone from parent volume + rbdSnap.RbdImageName = rbdVol.RbdImageName err = reserveSnap(rbdSnap, cr) if err != nil { return nil, status.Error(codes.Internal, err.Error()) @@ -446,10 +450,34 @@ func (cs *ControllerServer) CreateSnapshot(ctx context.Context, req *csi.CreateS } }() - err = cs.doSnapshot(rbdSnap, cr) + // generate temp snap struct and vol struct + tmpSnap := &rbdSnapshot{} + copySnapStruct(tmpSnap, rbdSnap) + tmpSnap.RequestName = snapJournal.GetTmpNamePrefix(req.GetName(), true, true) + // generate tempVolume struct + tmpVol := generateVolFromSnap(rbdSnap) + tmpVol.ImageFeatures = "layering,deep-flatten" + tmpVol.RequestName = volJournal.GetTmpNamePrefix(req.GetName(), true, false) + err = cs.createSnapFromClone(tmpVol, tmpSnap, cr) if err != nil { return nil, err } + // update the image to create snapshot + rbdSnap.RbdImageName = tmpVol.RbdImageName + err = createSnapshot(rbdSnap, cr) + if err != nil { + return nil, err + } + err = updateReservedSnap(rbdSnap, cr) + if err != nil { + klog.Errorf("failed to update snapshot parent name: %v", err) + return nil, status.Error(codes.Internal, err.Error()) + } + err = getSnapshotMetadata(rbdSnap, cr) + if err != nil { + klog.Errorf("failed to fetch snapshot metadata: %v", err) + return nil, status.Error(codes.Internal, err.Error()) + } return &csi.CreateSnapshotResponse{ Snapshot: &csi.Snapshot{ @@ -462,66 +490,126 @@ func (cs *ControllerServer) CreateSnapshot(ctx context.Context, req *csi.CreateS }, nil } -func (cs *ControllerServer) validateSnapshotReq(req *csi.CreateSnapshotRequest) error { - if err := cs.Driver.ValidateControllerServiceRequest(csi.ControllerServiceCapability_RPC_CREATE_DELETE_SNAPSHOT); err != nil { - klog.Warningf("invalid create snapshot req: %v", protosanitizer.StripSecrets(req)) - return err +func copySnapStruct(tmpSnap, rbdSnap *rbdSnapshot) { + tmpSnap.ClusterID = rbdSnap.ClusterID + tmpSnap.Monitors = rbdSnap.Monitors + tmpSnap.Pool = rbdSnap.Pool + tmpSnap.RbdImageName = rbdSnap.RbdImageName + tmpSnap.RequestName = rbdSnap.RequestName + tmpSnap.SizeBytes = rbdSnap.SizeBytes + tmpSnap.SourceVolumeID = rbdSnap.SourceVolumeID +} +func getCloneImageName(rbdSnap *rbdSnapshot, cr *util.Credentials) (string, error) { + tmpVol := &rbdVolume{} + tmpVol.Monitors = rbdSnap.Monitors + tmpVol.Pool = rbdSnap.Pool + tmpVol.ClusterID = rbdSnap.ClusterID + tmpVol.VolSize = rbdSnap.SizeBytes + tmpVol.RequestName = volJournal.GetTmpNamePrefix(rbdSnap.RequestName, true, false) + found, err := checkVolExists(tmpVol, cr) + if err != nil { + return "", err } - - // Check sanity of request Snapshot Name, Source Volume Id - if req.Name == "" { - return status.Error(codes.InvalidArgument, "snapshot Name cannot be empty") + if found { + return tmpVol.RbdImageName, nil } - if req.SourceVolumeId == "" { - return status.Error(codes.InvalidArgument, "source Volume ID cannot be empty") + return "", ErrImageNotFound{tmpVol.RequestName, errors.New("failed to get clone image name")} +} +func validateVolHasSnapFeature(parentVolID string, cr *util.Credentials) (*rbdVolume, error) { + // validate parent volume + rbdVol := new(rbdVolume) + err := genVolFromVolID(rbdVol, parentVolID, cr) + if err != nil { + if _, ok := err.(ErrImageNotFound); ok { + return nil, status.Errorf(codes.NotFound, "source Volume ID %s not found", parentVolID) + } + return nil, status.Errorf(codes.Internal, err.Error()) } - return nil + // Check if source volume was created with required image features for snaps + if !hasSnapshotFeature(rbdVol.ImageFeatures) { + return nil, status.Errorf(codes.InvalidArgument, "volume(%s) has not snapshot feature(layering)", parentVolID) + } + return rbdVol, nil } -func (cs *ControllerServer) doSnapshot(rbdSnap *rbdSnapshot, cr *util.Credentials) (err error) { - err = createSnapshot(rbdSnap, cr) - // If snap creation fails, even due to snapname already used, fail, next attempt will get a new - // uuid for use as the snap name +func (cs *ControllerServer) createSnapFromClone(rbdVol *rbdVolume, rbdSnap *rbdSnapshot, cr *util.Credentials) error { + + var ( + err error + snapErr error + volErr error + ) + found, err := checkSnapExists(rbdSnap, cr) if err != nil { - klog.Errorf("failed to create snapshot: %v", err) - return status.Error(codes.Internal, err.Error()) + if _, ok := err.(util.ErrSnapNameConflict); ok { + return status.Error(codes.AlreadyExists, err.Error()) + } + return status.Errorf(codes.Internal, err.Error()) } - defer func() { + + if found { + klog.Infof("found temp snapshot image %s ", rbdSnap.RequestName) + // check clone volume Exist + _, err = checkVolExists(rbdVol, cr) if err != nil { - errDefer := deleteSnapshot(rbdSnap, cr) - if errDefer != nil { - klog.Errorf("failed to delete snapshot: %v", errDefer) - err = fmt.Errorf("snapshot created but failed to delete snapshot due to"+ - " other failures: %v", err) - } - err = status.Error(codes.Internal, err.Error()) + return status.Error(codes.Internal, err.Error()) } - }() - - err = protectSnapshot(rbdSnap, cr) + klog.Infof("found temp clone image %s for snapshot %s", rbdVol.RequestName, rbdSnap.RequestName) + goto deleteSnap + } + err = reserveSnap(rbdSnap, cr) if err != nil { - klog.Errorf("failed to protect snapshot: %v", err) return status.Error(codes.Internal, err.Error()) } - defer func() { - if err != nil { - errDefer := unprotectSnapshot(rbdSnap, cr) - if errDefer != nil { - klog.Errorf("failed to unprotect snapshot: %v", errDefer) - err = fmt.Errorf("snapshot created but failed to unprotect snapshot due to"+ - " other failures: %v", err) - } - err = status.Error(codes.Internal, err.Error()) - } - }() - err = getSnapshotMetadata(rbdSnap, cr) + // createsnapshot + snapErr = createSnapshot(rbdSnap, cr) + + if snapErr != nil { + errDefer := undoSnapReservation(rbdSnap, cr) + if errDefer != nil { + klog.Warningf("failed undoing reservation of snapshot: %s (%s)", rbdSnap.RequestName, errDefer) + } + return snapErr + } + err = reserveVol(rbdVol, cr) if err != nil { - klog.Errorf("failed to fetch snapshot metadata: %v", err) return status.Error(codes.Internal, err.Error()) } + volErr = restoreSnapshot(rbdVol, rbdSnap, cr) + if volErr != nil { + errDefer := undoVolReservation(rbdVol, cr) + if errDefer != nil { + klog.Warningf("failed undoing reservation of volume: %s (%s)", rbdVol.RequestName, errDefer) + } + return volErr + } +deleteSnap: + // delete snapshot + if err := deleteSnapshot(rbdSnap, cr); err != nil { + return status.Errorf(codes.Internal, + "failed to delete snapshot: %s/%s with error: %v", + rbdSnap.Pool, rbdSnap.RbdSnapName, err) + } + return nil +} + +func (cs *ControllerServer) validateSnapshotReq(req *csi.CreateSnapshotRequest) error { + if err := cs.Driver.ValidateControllerServiceRequest(csi.ControllerServiceCapability_RPC_CREATE_DELETE_SNAPSHOT); err != nil { + klog.Warningf("invalid create snapshot req: %v", protosanitizer.StripSecrets(req)) + return err + } + + // Check sanity of request Snapshot Name, Source Volume Id + if req.Name == "" { + return status.Error(codes.InvalidArgument, "snapshot Name cannot be empty") + } + if req.SourceVolumeId == "" { + return status.Error(codes.InvalidArgument, "source Volume ID cannot be empty") + } + return nil } @@ -572,21 +660,42 @@ func (cs *ControllerServer) DeleteSnapshot(ctx context.Context, req *csi.DeleteS idLk := snapshotNameLocker.Lock(rbdSnap.RequestName) defer snapshotNameLocker.Unlock(idLk, rbdSnap.RequestName) - // Unprotect snapshot - err = unprotectSnapshot(rbdSnap, cr) + // generate tempVolume struct + rbdVol := generateVolFromSnap(rbdSnap) + rbdVol.RequestName = volJournal.GetTmpNamePrefix(rbdSnap.RequestName, true, false) + found, err := checkVolExists(rbdVol, cr) if err != nil { - return nil, status.Errorf(codes.FailedPrecondition, - "failed to unprotect snapshot: %s/%s with error: %v", - rbdSnap.Pool, rbdSnap.RbdSnapName, err) + if _, ok := err.(ErrImageNotFound); !ok { + return nil, status.Error(codes.Internal, err.Error()) + } } - // Deleting snapshot klog.V(4).Infof("deleting Snaphot %s", rbdSnap.RbdSnapName) if err := deleteSnapshot(rbdSnap, cr); err != nil { - return nil, status.Errorf(codes.FailedPrecondition, + return nil, status.Errorf(codes.Internal, "failed to delete snapshot: %s/%s with error: %v", rbdSnap.Pool, rbdSnap.RbdSnapName, err) } + if found { + // TODO need to delete stale volumes + // Deleting rbd image + klog.V(4).Infof("deleting image %s", rbdVol.RbdImageName) + if err := deleteImage(rbdVol, cr); err != nil { + klog.Errorf("failed to delete rbd image: %s/%s with error: %v", + rbdVol.Pool, rbdVol.RbdImageName, err) + return nil, status.Error(codes.Internal, err.Error()) + } + } return &csi.DeleteSnapshotResponse{}, nil } + +func generateVolFromSnap(snap *rbdSnapshot) *rbdVolume { + vol := &rbdVolume{ + Pool: snap.Pool, + Monitors: snap.Monitors, + VolSize: snap.SizeBytes, + ClusterID: snap.ClusterID, + } + return vol +} diff --git a/pkg/rbd/rbd.go b/pkg/rbd/rbd.go index a33dc0c8d29..36aefe7f19b 100644 --- a/pkg/rbd/rbd.go +++ b/pkg/rbd/rbd.go @@ -59,6 +59,9 @@ var ( // VolumeName to backing RBD images volJournal *util.CSIJournal snapJournal *util.CSIJournal + + // rbdMaxCloneDepth is the maximum number of nested volume clones that are taken before a flatten occurs + rbdMaxCloneDepth uint ) // NewDriver returns new rbd driver @@ -99,7 +102,7 @@ func NewNodeServer(d *csicommon.CSIDriver, containerized bool) (*NodeServer, err // Run start a non-blocking grpc controller,node and identityserver for // rbd CSI driver which can serve multiple parallel requests -func (r *Driver) Run(driverName, nodeID, endpoint, instanceID string, containerized bool, cachePersister util.CachePersister) { +func (r *Driver) Run(driverName, nodeID, endpoint, instanceID string, containerized bool, cachePersister util.CachePersister, cloneDepth uint) { var err error klog.Infof("Driver: %v version: %v", driverName, version) @@ -109,6 +112,8 @@ func (r *Driver) Run(driverName, nodeID, endpoint, instanceID string, containeri klog.Fatalf("failed to write ceph configuration file (%v)", err) } + rbdMaxCloneDepth = cloneDepth + // Use passed in instance ID, if provided for omap suffix naming if instanceID != "" { CSIInstanceID = instanceID diff --git a/pkg/rbd/rbd_journal.go b/pkg/rbd/rbd_journal.go index 4ed90e57e10..da4e1ad707a 100644 --- a/pkg/rbd/rbd_journal.go +++ b/pkg/rbd/rbd_journal.go @@ -18,6 +18,7 @@ package rbd import ( "fmt" + "strings" "github.com/ceph/ceph-csi/pkg/util" @@ -141,6 +142,8 @@ func checkSnapExists(rbdSnap *rbdSnapshot, cr *util.Credentials) (bool, error) { return false, err } + // TODO validate snap belongs to same parent + klog.V(4).Infof("found existing snap (%s) with snap name (%s) for request (%s)", rbdSnap.SnapID, rbdSnap.RbdSnapName, rbdSnap.RequestName) @@ -229,6 +232,17 @@ func reserveSnap(rbdSnap *rbdSnapshot, cr *util.Credentials) error { return nil } +// updateReservedSnap is a helper routine to update the snap parent name +func updateReservedSnap(rbdSnap *rbdSnapshot, cr *util.Credentials) error { + snapUUID := strings.Replace(rbdSnap.RbdSnapName, snapJournal.NamingPrefix(), "", 1) + err := snapJournal.UpdateReservedKey(rbdSnap.Monitors, cr, rbdSnap.Pool, snapUUID, rbdSnap.RbdImageName) + if err == nil { + klog.V(4).Infof("updated snapshot %s with parent name %s", rbdSnap.RbdSnapName, rbdSnap.RbdImageName) + + } + return err +} + // reserveVol is a helper routine to request a rbdVolume name reservation and generate the // volume ID for the generated name func reserveVol(rbdVol *rbdVolume, cr *util.Credentials) error { diff --git a/pkg/rbd/rbd_util.go b/pkg/rbd/rbd_util.go index 83b5c13f29a..3923670c42b 100644 --- a/pkg/rbd/rbd_util.go +++ b/pkg/rbd/rbd_util.go @@ -177,8 +177,8 @@ func deleteImage(pOpts *rbdVolume, cr *util.Credentials) error { return fmt.Errorf("rbd %s is still being used", image) } - klog.V(4).Infof("rbd: rm %s using mon %s, pool %s", image, pOpts.Monitors, pOpts.Pool) - args := []string{"rm", image, "--pool", pOpts.Pool, "--id", cr.ID, "-m", pOpts.Monitors, + klog.V(4).Infof("rbd: trash mv %s using mon %s, pool %s", image, pOpts.Monitors, pOpts.Pool) + args := []string{"trash", "mv", image, "--pool", pOpts.Pool, "--id", cr.ID, "-m", pOpts.Monitors, "--key=" + cr.Key} output, err = execCommand("rbd", args) if err != nil { @@ -478,32 +478,13 @@ func hasSnapshotFeature(imageFeatures string) bool { return false } -func protectSnapshot(pOpts *rbdSnapshot, cr *util.Credentials) error { - var output []byte - - image := pOpts.RbdImageName - snapName := pOpts.RbdSnapName - - klog.V(4).Infof("rbd: snap protect %s using mon %s, pool %s ", image, pOpts.Monitors, pOpts.Pool) - args := []string{"snap", "protect", "--pool", pOpts.Pool, "--snap", snapName, image, "--id", - cr.ID, "-m", pOpts.Monitors, "--key=" + cr.Key} - - output, err := execCommand("rbd", args) - - if err != nil { - return errors.Wrapf(err, "failed to protect snapshot, command output: %s", string(output)) - } - - return nil -} - func createSnapshot(pOpts *rbdSnapshot, cr *util.Credentials) error { var output []byte image := pOpts.RbdImageName snapName := pOpts.RbdSnapName - klog.V(4).Infof("rbd: snap create %s using mon %s, pool %s", image, pOpts.Monitors, pOpts.Pool) + klog.V(4).Infof("rbd: snap create %s/%s using mon %s, pool %s", image, snapName, pOpts.Monitors, pOpts.Pool) args := []string{"snap", "create", "--pool", pOpts.Pool, "--snap", snapName, image, "--id", cr.ID, "-m", pOpts.Monitors, "--key=" + cr.Key} @@ -516,32 +497,13 @@ func createSnapshot(pOpts *rbdSnapshot, cr *util.Credentials) error { return nil } -func unprotectSnapshot(pOpts *rbdSnapshot, cr *util.Credentials) error { - var output []byte - - image := pOpts.RbdImageName - snapName := pOpts.RbdSnapName - - klog.V(4).Infof("rbd: snap unprotect %s using mon %s, pool %s", image, pOpts.Monitors, pOpts.Pool) - args := []string{"snap", "unprotect", "--pool", pOpts.Pool, "--snap", snapName, image, "--id", - cr.ID, "-m", pOpts.Monitors, "--key=" + cr.Key} - - output, err := execCommand("rbd", args) - - if err != nil { - return errors.Wrapf(err, "failed to unprotect snapshot, command output: %s", string(output)) - } - - return nil -} - func deleteSnapshot(pOpts *rbdSnapshot, cr *util.Credentials) error { var output []byte image := pOpts.RbdImageName snapName := pOpts.RbdSnapName - klog.V(4).Infof("rbd: snap rm %s using mon %s, pool %s", image, pOpts.Monitors, pOpts.Pool) + klog.V(4).Infof("rbd: snap rm %s/%s using mon %s, pool %s", image, snapName, pOpts.Monitors, pOpts.Pool) args := []string{"snap", "rm", "--pool", pOpts.Pool, "--snap", snapName, image, "--id", cr.ID, "-m", pOpts.Monitors, "--key=" + cr.Key} @@ -567,8 +529,7 @@ func restoreSnapshot(pVolOpts *rbdVolume, pSnapOpts *rbdSnapshot, cr *util.Crede klog.V(4).Infof("rbd: clone %s using mon %s, pool %s", image, pVolOpts.Monitors, pVolOpts.Pool) args := []string{"clone", pSnapOpts.Pool + "/" + pSnapOpts.RbdImageName + "@" + snapName, - pVolOpts.Pool + "/" + image, "--id", cr.ID, "-m", pVolOpts.Monitors, "--key=" + cr.Key} - + pVolOpts.Pool + "/" + image, "--rbd-default-clone-format=2", "--id", cr.ID, "-m", pVolOpts.Monitors, "--key=" + cr.Key, "--image-feature", pVolOpts.ImageFeatures} output, err := execCommand("rbd", args) if err != nil { @@ -604,13 +565,21 @@ func getSnapshotMetadata(pSnapOpts *rbdSnapshot, cr *util.Credentials) error { return nil } +// parentInfo spec for parent volume info +type parentInfo struct { + Image string `json:"image"` + Pool string `json:"pool"` + Snapshot string `json:"snapshost"` +} + // imageInfo strongly typed JSON spec for image info type imageInfo struct { - ObjectUUID string `json:"name"` - Size int64 `json:"size"` - Format int64 `json:"format"` - Features []string `json:"features"` - CreatedAt string `json:"create_timestamp"` + ObjectUUID string `json:"name"` + Size int64 `json:"size"` + Format int64 `json:"format"` + Features []string `json:"features"` + CreatedAt string `json:"create_timestamp"` + Parent parentInfo `json:"parent"` } // getImageInfo queries rbd about the given image and returns its metadata, and returns @@ -704,3 +673,42 @@ func getSnapInfo(monitors string, cr *util.Credentials, poolName, imageName, sna return snpInfo, ErrSnapNotFound{snapName, fmt.Errorf("snap (%s) for image (%s) not found", snapName, poolName+"/"+imageName)} } + +func flattenRbdImage(rbdVol *rbdVolume, maxDepth uint, cr *util.Credentials) error { + if maxDepth > 0 { + d, err := getCloneDepth(rbdVol.Monitors, rbdVol.Pool, rbdVol.RbdImageName, 0, cr) + if err != nil { + return err + } + klog.Infof("image depth is %v and maximum configured clone depth is %v ", d, maxDepth) + if d >= int(maxDepth) { + klog.Infof("maximum clone depth (%d) has been reached, flattening %v volume ", maxDepth, rbdVol.RbdImageName) + args := []string{"-m", rbdVol.Monitors, + "--id", cr.ID, + "--pool", rbdVol.Pool, + "--no-progress", + "--image", rbdVol.RbdImageName, + "--key=" + cr.Key, + "-c", util.CephConfigPath, + "flatten"} + + _, err := execCommand("rbd", args) + + return err + } + } + + return nil +} + +func getCloneDepth(monitors, poolName, imageName string, depth int, cr *util.Credentials) (int, error) { + image, err := getImageInfo(monitors, cr, poolName, imageName) + if err != nil { + return 0, err + } + if image.Parent.Image != "" { + depth++ + return getCloneDepth(monitors, image.Parent.Pool, image.Parent.Image, depth, cr) + } + return depth, err +} diff --git a/pkg/util/cephcmds.go b/pkg/util/cephcmds.go index eb61f6f4496..319d2201b95 100644 --- a/pkg/util/cephcmds.go +++ b/pkg/util/cephcmds.go @@ -142,6 +142,41 @@ func SetOMapKeyValue(monitors string, cr *Credentials, poolName, namespace, oMap return nil } +// UpdateOMapValue updates the given key with new value into the provided Ceph omap name +func UpdateOMapValue(monitors string, cr *Credentials, poolName, namespace, oMapName, oMapKey, keyValue string) error { + err := deleteOMapkey(monitors, cr, poolName, namespace, oMapName, oMapKey) + if err != nil { + return err + } + err = SetOMapKeyValue(monitors, cr, poolName, namespace, oMapName, oMapKey, keyValue) + return err +} + +func deleteOMapkey(monitors string, cr *Credentials, poolName, namespace, oMapName, oMapKey string) error { + // Command: "rados rmomapkey oMapName oMapKey" + args := []string{ + "-m", monitors, + "--id", cr.ID, + "--key=" + cr.Key, + "-c", CephConfigPath, + "-p", poolName, + "rmomapkey", oMapName, oMapKey, + } + + if namespace != "" { + args = append(args, "--namespace="+namespace) + } + + _, _, err := ExecCommand("rados", args[:]...) + if err != nil { + klog.Errorf("failed removing key (%s), from omap (%s) in "+ + "pool (%s): (%v)", oMapKey, oMapName, poolName, err) + return err + } + + return nil +} + // GetOMapValue gets the value for the given key from the named omap func GetOMapValue(monitors string, cr *Credentials, poolName, namespace, oMapName, oMapKey string) (string, error) { // Command: "rados getomapval oMapName oMapKey " @@ -236,7 +271,6 @@ func CreateObject(monitors string, cr *Credentials, poolName, namespace, objectN if namespace != "" { args = append(args, "--namespace="+namespace) } - stdout, _, err := ExecCommand("rados", args[:]...) if err != nil { klog.Errorf("failed creating omap (%s) in pool (%s): (%v)", objectName, poolName, err) diff --git a/pkg/util/voljournal.go b/pkg/util/voljournal.go index 197192fd502..5b5aa4fb3fe 100644 --- a/pkg/util/voljournal.go +++ b/pkg/util/voljournal.go @@ -117,8 +117,16 @@ type CSIJournal struct { // namespace in which the RADOS objects are stored, default is no namespace namespace string + + // tmpNamePrefix prefix for cloned images, which will help to filter and delete stale clone volumes + tmpNamePrefix string } +var ( + tmpSnapName = "csi-tmp-snap-" + tmpVolName = "csi-tmp-clone-" +) + // CSIVolumeJournal returns an instance of volume keys func NewCSIVolumeJournal() *CSIJournal { return &CSIJournal{ @@ -129,6 +137,7 @@ func NewCSIVolumeJournal() *CSIJournal { namingPrefix: "csi-vol-", cephSnapSourceKey: "", namespace: "", + tmpNamePrefix: tmpVolName, } } @@ -142,6 +151,7 @@ func NewCSISnapshotJournal() *CSIJournal { namingPrefix: "csi-snap-", cephSnapSourceKey: "csi.source", namespace: "", + tmpNamePrefix: tmpSnapName, } } @@ -160,6 +170,36 @@ func (cj *CSIJournal) SetNamespace(ns string) { cj.namespace = ns } +// SetTmpNamePrefix updates the temp name in which all temoprary cloned volume +// starts with. This will help driver to cleanup the stale clone volume +// created by it. +// this will set the temp name as tmpNamePrefix+driverName +func (cj *CSIJournal) SetTmpNamePrefix(name string) { + cj.tmpNamePrefix += name + "-" +} + +// GetTmpNamePrefix returns the temporary name prefix +/* if first is set +if the tmpName is csi-tmp-snap-rbd.ceph.csi.com then it will return +csi-tmp-snap1-rbd.ceph.csi.com +if first is not set returns csi-tmp-snap2-rbd.ceph.csi.com +this will help us storing the snap info on rados and also helps in cleaning the +stale snaps +*/ +func (cj *CSIJournal) GetTmpNamePrefix(sufix string, first, isSnap bool) string { + if !isSnap { + return cj.tmpNamePrefix + sufix + } + t := strings.Split(cj.tmpNamePrefix, tmpSnapName) + if len(t) != 2 { + return "" + } + if first { + return tmpSnapName + "1" + t[1] + sufix + } + return tmpSnapName + "2" + t[1] + sufix +} + /* CheckReservation checks if given request name contains a valid reservation - If there is a valid reservation, then the corresponding UUID for the volume/snapshot is returned @@ -367,6 +407,17 @@ func (cj *CSIJournal) ReserveName(monitors string, cr *Credentials, pool, reqNam return volUUID, nil } +/* +UpdateReservedKey updates the omap key with new value +*/ +func (cj *CSIJournal) UpdateReservedKey(monitors string, cr *Credentials, pool, volUUID, parentName string) error { + + // Update UUID directory to store source volume UUID in case of snapshots + err := UpdateOMapValue(monitors, cr, pool, cj.namespace, cj.cephUUIDDirectoryPrefix+volUUID, + cj.cephSnapSourceKey, parentName) + return err +} + /* GetObjectUUIDData fetches all keys from a UUID directory Return values: