Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support snapshot in rbdplugin #61

Merged
merged 7 commits into from
Aug 9, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions examples/rbd/secret.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,5 @@ metadata:
data:
# Key value corresponds to a user name defined in ceph cluster
admin: BASE64-ENCODED-PASSWORD
# Key value corresponds to a user name defined in ceph cluster
kubernetes: BASE64-ENCODED-PASSWORD
4 changes: 4 additions & 0 deletions examples/rbd/storageclass.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,8 @@ parameters:
csiProvisionerSecretNamespace: default
csiNodePublishSecretName: csi-rbd-secret
csiNodePublishSecretNamespace: default

# Ceph users for operating RBD
adminid: admin
userid: kubernetes
reclaimPolicy: Delete
262 changes: 255 additions & 7 deletions pkg/rbd/controllerserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@ package rbd

import (
"fmt"
"os/exec"
"path"
"syscall"
"time"

"github.com/container-storage-interface/spec/lib/go/csi/v0"
"github.com/golang/glog"
Expand Down Expand Up @@ -77,7 +80,7 @@ func (cs *controllerServer) CreateVolume(ctx context.Context, req *csi.CreateVol
return nil, err
}

// Generating Volume Name and Volume ID, as accoeding to CSI spec they MUST be different
// Generating Volume Name and Volume ID, as according to CSI spec they MUST be different
volName := req.GetName()
uniqueID := uuid.NewUUID().String()
if len(volName) == 0 {
Expand All @@ -95,21 +98,46 @@ func (cs *controllerServer) CreateVolume(ctx context.Context, req *csi.CreateVol
volSizeGB := int(volSizeBytes / 1024 / 1024 / 1024)

// Check if there is already RBD image with requested name
found, _, _ := rbdStatus(rbdVol, req.GetControllerCreateSecrets())
found, _, _ := rbdStatus(rbdVol, rbdVol.UserId, req.GetControllerCreateSecrets())
if !found {
if err := createRBDImage(rbdVol, volSizeGB, req.GetControllerCreateSecrets()); err != nil {
// if VolumeContentSource is not nil, this request is for snapshot
if req.VolumeContentSource != nil {
snapshot := req.VolumeContentSource.GetSnapshot()
if snapshot == nil {
return nil, status.Error(codes.InvalidArgument, "Volume Snapshot cannot be empty")
}

snapshotID := snapshot.GetId()
if len(snapshotID) == 0 {
return nil, status.Error(codes.InvalidArgument, "Volume Snapshot ID cannot be empty")
}

rbdSnap := &rbdSnapshot{}
if err := loadSnapInfo(snapshotID, path.Join(PluginFolder, "controller-snap"), rbdSnap); err != nil {
return nil, err
}

err = restoreSnapshot(rbdVol, rbdSnap, rbdVol.AdminId, req.GetControllerCreateSecrets())
if err != nil {
glog.Warningf("failed to create volume: %v", err)
return nil, err
}
glog.V(4).Infof("create volume %s from snapshot %s", volName, rbdSnap.SnapName)
} else {
if err := createRBDImage(rbdVol, volSizeGB, rbdVol.AdminId, req.GetControllerCreateSecrets()); err != nil {
if err != nil {
glog.Warningf("failed to create volume: %v", err)
return nil, err
}
}
glog.V(4).Infof("create volume %s", volName)
}
glog.V(4).Infof("create volume %s", volName)
}

// Storing volInfo into a persistent file.
if err := persistVolInfo(volumeID, path.Join(PluginFolder, "controller"), rbdVol); err != nil {
glog.Warningf("rbd: failed to store volInfo with error: %v", err)
}
rbdVolumes[volumeID] = *rbdVol
rbdVolumes[volumeID] = rbdVol
return &csi.CreateVolumeResponse{
Volume: &csi.Volume{
Id: volumeID,
Expand All @@ -133,7 +161,7 @@ func (cs *controllerServer) DeleteVolume(ctx context.Context, req *csi.DeleteVol
volName := rbdVol.VolName
// Deleting rbd image
glog.V(4).Infof("deleting volume %s", volName)
if err := deleteRBDImage(rbdVol, req.GetControllerDeleteSecrets()); err != nil {
if err := deleteRBDImage(rbdVol, rbdVol.AdminId, req.GetControllerDeleteSecrets()); err != nil {
glog.V(3).Infof("failed to delete rbd image: %s/%s with error: %v", rbdVol.Pool, volName, err)
return nil, err
}
Expand Down Expand Up @@ -162,3 +190,223 @@ func (cs *controllerServer) ControllerUnpublishVolume(ctx context.Context, req *
func (cs *controllerServer) ControllerPublishVolume(ctx context.Context, req *csi.ControllerPublishVolumeRequest) (*csi.ControllerPublishVolumeResponse, error) {
return &csi.ControllerPublishVolumeResponse{}, nil
}

func (cs *controllerServer) CreateSnapshot(ctx context.Context, req *csi.CreateSnapshotRequest) (*csi.CreateSnapshotResponse, error) {
if err := cs.Driver.ValidateControllerServiceRequest(csi.ControllerServiceCapability_RPC_CREATE_DELETE_SNAPSHOT); err != nil {
glog.Warningf("invalid create snapshot req: %v", req)
return nil, err
}

// Check sanity of request Snapshot Name, Source Volume Id
if len(req.Name) == 0 {
return nil, status.Error(codes.InvalidArgument, "Snapshot Name cannot be empty")
}
if len(req.SourceVolumeId) == 0 {
return nil, status.Error(codes.InvalidArgument, "Source Volume ID cannot be empty")
}

// Need to check for already existing snapshot name, and if found
// check for the requested source volume id and already allocated source volume id
if exSnap, err := getRBDSnapshotByName(req.GetName()); err == nil {
if req.SourceVolumeId == exSnap.SourceVolumeID {
return &csi.CreateSnapshotResponse{
Snapshot: &csi.Snapshot{
SizeBytes: exSnap.SizeBytes,
Id: exSnap.SnapID,
SourceVolumeId: exSnap.SourceVolumeID,
CreatedAt: exSnap.CreatedAt,
Status: &csi.SnapshotStatus{
Type: csi.SnapshotStatus_READY,
},
},
}, nil
}
return nil, status.Error(codes.AlreadyExists, fmt.Sprintf("Snapshot with the same name: %s but with different source volume id already exist", req.GetName()))
}

rbdSnap, err := getRBDSnapshotOptions(req.GetParameters())
if err != nil {
return nil, err
}

// Generating Snapshot Name and Snapshot ID, as according to CSI spec they MUST be different
snapName := req.GetName()
uniqueID := uuid.NewUUID().String()
rbdVolume, err := getRBDVolumeByID(req.GetSourceVolumeId())
if err != nil {
return nil, status.Error(codes.NotFound, fmt.Sprintf("Source Volume ID %s cannot found", req.GetSourceVolumeId()))
}
if !hasSnapshotFeature(rbdVolume.ImageFeatures) {
return nil, fmt.Errorf("Volume(%s) has not snapshot feature(layering)", req.GetSourceVolumeId())
}

rbdSnap.VolName = rbdVolume.VolName
rbdSnap.SnapName = snapName
snapshotID := "csi-rbd-" + rbdVolume.VolName + "-snap-" + uniqueID
rbdSnap.SnapID = snapshotID
rbdSnap.SourceVolumeID = req.GetSourceVolumeId()
rbdSnap.SizeBytes = rbdVolume.VolSize

err = createSnapshot(rbdSnap, rbdSnap.AdminId, req.GetCreateSnapshotSecrets())
// if we already have the snapshot, return the snapshot
if err != nil {
if exitErr, ok := err.(*exec.ExitError); ok {
if status, ok := exitErr.Sys().(syscall.WaitStatus); ok {
if status.ExitStatus() == int(syscall.EEXIST) {
glog.Warningf("Snapshot with the same name: %s, we return this.", req.GetName())
} else {
glog.Warningf("failed to create snapshot: %v", err)
return nil, err
}
} else {
glog.Warningf("failed to create snapshot: %v", err)
return nil, err
}
} else {
glog.Warningf("failed to create snapshot: %v", err)
return nil, err
}
} else {
glog.V(4).Infof("create snapshot %s", snapName)
err = protectSnapshot(rbdSnap, rbdSnap.AdminId, req.GetCreateSnapshotSecrets())

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry for the late comment, but why are you protecting the snapshot? Snapshot protection was only required to support RBD cloning, but with Mimic and its new clone v2 support, snapshot protection is on its way to being deprecated. Since this is all new functionality, it would be great if we could start w/ a clean implementation.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks @dillaman

@sngchlko do you have a chance to consider this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dillaman Thanks for the review. Good point!!
But I think only supporting snapshot with Mimic is not good.
In my case, I have to use the luminous version as I installed the ceph with a rook project which supports only luminous now.
And people who want to use snapshot feature in CSI must upgrade to Mimic.
How about using protect operation according to version?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sngchlko Like I said, the "protect" operation is only relevant for cloning images (using older versions of Ceph). Creating a snapshot only requires the "rbd snap create" command.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dillaman You are right. But I think we need protecting snapshot for creating a new image from the snapshot. CSI spec seems to require creating new volume at the time of restoring the snapshot. AFAIK, cloning the snapshot requires protecting the snapshot.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sngchlko I'm not familiar w/ the CSI spec, can you provide a link? I see [1] but it states revert isn't part of it. I would really like to avoid going down the tangled code of the OpenStack world due to how snapshots and clones needed to be handled before, which is why we added clone v2. Since this is all new functionality, it would be great if we could start off on a clean footing and avoid having to handle things like renaming protected snapshots "XYZ_deleteme" and images as "XYZ_deleteme" if they have a linked clone.

[1] https://docs.google.com/document/d/1oXVuDTNhXerr1UJ48tQrxF9J6vbm2LHVVC3z28655Qo/edit#heading=h.aqok97b5hs7t

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dillaman Sure, you can find VolumeContentSource for restoring the snapshot in CSI spec. If VolumeContentSource has something, it creates the new volume from the snapshot.
I think I can't determine about supporting snapshot only Mimic.
@rootfs, @gman0 What do you think about this?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sngchlko Thanks. If the only use-case for snapshots right now is to restore and if restore requires creating new volumes (instead of rolling back), perhaps the short-term (pre-Mimic) thing to do would be to protect the snapshot upon create from snapshot, flatten the new volume, and unprotect the snapshot. Mimic and later clients can just directly clone the snapshot into a new image. --- or just only support creating images from a snapshot if you are using Mimic or later and enjoy the simpler CSI code that doesn't need to manage snapshot lifecycles.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dillaman I fully understand your concerns. I created #70. Thanks!


if err != nil {
err = deleteSnapshot(rbdSnap, rbdSnap.AdminId, req.GetCreateSnapshotSecrets())
if err != nil {
return nil, fmt.Errorf("snapshot is created but failed to protect and delete snapshot: %v", err)
}
return nil, fmt.Errorf("Snapshot is created but failed to protect snapshot")
}
}

rbdSnap.CreatedAt = time.Now().UnixNano()

// Storing snapInfo into a persistent file.
if err := persistSnapInfo(snapshotID, path.Join(PluginFolder, "controller-snap"), rbdSnap); err != nil {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also see
#62 (comment)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree using local storage is the problem.
I think we need external storage to share volume or snapshot information between rbd-plugins.
How about using etcd to store information?
cc: @gman0

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sngchlko @rootfs let's discuss this issue after this is done here #66

glog.Warningf("rbd: failed to store snapInfo with error: %v", err)

// Unprotect snapshot
err := unprotectSnapshot(rbdSnap, rbdSnap.AdminId, req.GetCreateSnapshotSecrets())
if err != nil {
return nil, status.Error(codes.Unknown, fmt.Sprintf("This Snapshot should be removed but failed to unprotect snapshot: %s/%s with error: %v", rbdSnap.Pool, rbdSnap.SnapName, err))
}

// Deleting snapshot
glog.V(4).Infof("deleting Snaphot %s", rbdSnap.SnapName)
if err := deleteSnapshot(rbdSnap, rbdSnap.AdminId, req.GetCreateSnapshotSecrets()); err != nil {
return nil, status.Error(codes.Unknown, fmt.Sprintf("This Snapshot should be removed but failed to delete snapshot: %s/%s with error: %v", rbdSnap.Pool, rbdSnap.SnapName, err))
}

return nil, err
}
rbdSnapshots[snapshotID] = rbdSnap
return &csi.CreateSnapshotResponse{
Snapshot: &csi.Snapshot{
SizeBytes: rbdSnap.SizeBytes,
Id: snapshotID,
SourceVolumeId: req.GetSourceVolumeId(),
CreatedAt: rbdSnap.CreatedAt,
Status: &csi.SnapshotStatus{
Type: csi.SnapshotStatus_READY,
},
},
}, nil
}

func (cs *controllerServer) DeleteSnapshot(ctx context.Context, req *csi.DeleteSnapshotRequest) (*csi.DeleteSnapshotResponse, error) {
if err := cs.Driver.ValidateControllerServiceRequest(csi.ControllerServiceCapability_RPC_CREATE_DELETE_SNAPSHOT); err != nil {
glog.Warningf("invalid delete snapshot req: %v", req)
return nil, err
}

snapshotID := req.GetSnapshotId()
if len(snapshotID) == 0 {
return nil, status.Error(codes.InvalidArgument, "Snapshot ID cannot be empty")
}
rbdSnap := &rbdSnapshot{}
if err := loadSnapInfo(snapshotID, path.Join(PluginFolder, "controller-snap"), rbdSnap); err != nil {
return nil, err
}

// Unprotect snapshot
err := unprotectSnapshot(rbdSnap, rbdSnap.AdminId, req.GetDeleteSnapshotSecrets())
if err != nil {
return nil, status.Error(codes.FailedPrecondition, fmt.Sprintf("failed to unprotect snapshot: %s/%s with error: %v", rbdSnap.Pool, rbdSnap.SnapName, err))
}

// Deleting snapshot
glog.V(4).Infof("deleting Snaphot %s", rbdSnap.SnapName)
if err := deleteSnapshot(rbdSnap, rbdSnap.AdminId, req.GetDeleteSnapshotSecrets()); err != nil {
return nil, status.Error(codes.FailedPrecondition, fmt.Sprintf("failed to delete snapshot: %s/%s with error: %v", rbdSnap.Pool, rbdSnap.SnapName, err))
}

// Removing persistent storage file for the unmapped snapshot
if err := deleteSnapInfo(snapshotID, path.Join(PluginFolder, "controller-snap")); err != nil {
return nil, err
}

delete(rbdSnapshots, snapshotID)

return &csi.DeleteSnapshotResponse{}, nil
}

func (cs *controllerServer) ListSnapshots(ctx context.Context, req *csi.ListSnapshotsRequest) (*csi.ListSnapshotsResponse, error) {
if err := cs.Driver.ValidateControllerServiceRequest(csi.ControllerServiceCapability_RPC_LIST_SNAPSHOTS); err != nil {
glog.Warningf("invalid list snapshot req: %v", req)
return nil, err
}

sourceVolumeId := req.GetSourceVolumeId()

// TODO (sngchlko) list with token

// list only a specific snapshot which has snapshot ID
if snapshotID := req.GetSnapshotId(); len(snapshotID) != 0 {
if rbdSnap, ok := rbdSnapshots[snapshotID]; ok {
// if source volume ID also set, check source volume id on the cache.
if len(sourceVolumeId) != 0 && rbdSnap.SourceVolumeID != sourceVolumeId {
return nil, status.Error(codes.Unknown, fmt.Sprintf("Requested Source Volume ID %s is different from %s", sourceVolumeId, rbdSnap.SourceVolumeID))
}
return &csi.ListSnapshotsResponse{
Entries: []*csi.ListSnapshotsResponse_Entry{
{
Snapshot: &csi.Snapshot{
SizeBytes: rbdSnap.SizeBytes,
Id: rbdSnap.SnapID,
SourceVolumeId: rbdSnap.SourceVolumeID,
CreatedAt: rbdSnap.CreatedAt,
Status: &csi.SnapshotStatus{
Type: csi.SnapshotStatus_READY,
},
},
},
},
}, nil
} else {
return nil, status.Error(codes.NotFound, fmt.Sprintf("Snapshot ID %s cannot found", snapshotID))
}
}

entries := []*csi.ListSnapshotsResponse_Entry{}
for _, rbdSnap := range rbdSnapshots {
// if source volume ID also set, check source volume id on the cache.
if len(sourceVolumeId) != 0 && rbdSnap.SourceVolumeID != sourceVolumeId {
continue
}
entries = append(entries, &csi.ListSnapshotsResponse_Entry{
Snapshot: &csi.Snapshot{
SizeBytes: rbdSnap.SizeBytes,
Id: rbdSnap.SnapID,
SourceVolumeId: rbdSnap.SourceVolumeID,
CreatedAt: rbdSnap.CreatedAt,
Status: &csi.SnapshotStatus{
Type: csi.SnapshotStatus_READY,
},
},
})
}

return &csi.ListSnapshotsResponse{
Entries: entries,
}, nil
}
2 changes: 1 addition & 1 deletion pkg/rbd/nodeserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ func (ns *nodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublis
}
volOptions.VolName = volName
// Mapping RBD image
devicePath, err := attachRBDImage(volOptions, req.GetNodePublishSecrets())
devicePath, err := attachRBDImage(volOptions, volOptions.UserId, req.GetNodePublishSecrets())
if err != nil {
return nil, err
}
Expand Down
Loading