Skip to content

Commit

Permalink
Persisting driver state and reload state on restart.
Browse files Browse the repository at this point in the history
Upon restarts, driver has to recognize the provisioned volumes on last run.  For
that driver has to persist its state(provisioned volume information). We have
two options:

1. Per node state: Persist the driver state on each node specific to that node.
Upon driver start, it has to load the persisted information. Whenever a node
registers, Controller/Master has to collect the volume information stored on
that node.

2. Cluster level state: Persist the consolidated cluster level driver state, say
to a configmap on Kubernetes. This is supposed to done at Controller side, but not
clear how to pass node specific information to a node.

This change implements the fist option, i.e, per node state.

Defined a new interface, StateManager, that handles the state persistency
irrespective of either per node or cluster. Also provided an implementation of
StateManager that persists the state to a file.

Made changes to RegistrySrerver, so that it informs the listeners upon
registering of a node controller. The ControllerServer on master, listens on
this and collects the volume information on node, by calling
csi.ControllerServer.ListVolumes.

FIXES intel#25 - Clean handling of driver restart
  • Loading branch information
avalluri committed May 17, 2019
1 parent 06c0d7a commit 7ab9cb7
Show file tree
Hide file tree
Showing 18 changed files with 800 additions and 51 deletions.
2 changes: 1 addition & 1 deletion DEVELOPMENT.md
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ argument name | meaning | type
-mode string | driver run mode | string | controller, node |
-nodeid string | node id | string | | nodeid
-registryEndpoint string | endpoint to connect/listen registry server | string | |

-statePath | Directory path where to persist the state of the driver running on a node | string | absolute directory path on node | /var/lib/<drivername>

Environment variables
---------------------
Expand Down
7 changes: 7 additions & 0 deletions deploy/kubernetes-1.13/pmem-csi-direct-testing.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,7 @@ spec:
- -caFile=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt
- -certFile=/certs/$(KUBE_NODE_NAME).crt
- -keyFile=/certs/$(KUBE_NODE_NAME).key
- -statePath=/var/lib/pmem-csi.intel.com
- -v=5
- -coverprofile=/var/lib/pmem-csi-coverage/pmem-csi-driver-node-*.out
env:
Expand Down Expand Up @@ -331,6 +332,8 @@ spec:
name: registry-cert
- mountPath: /dev
name: dev-dir
- mountPath: /var/lib/pmem-csi.intel.com
name: pmem-state-dir
- mountPath: /sys
name: sys-dir
- mountPath: /var/lib/pmem-csi-coverage
Expand Down Expand Up @@ -371,6 +374,10 @@ spec:
- name: registry-cert
secret:
secretName: pmem-csi-node-secrets
- hostPath:
path: /var/lib/pmem-csi.intel.com
type: DirectoryOrCreate
name: pmem-state-dir
- hostPath:
path: /dev
type: DirectoryOrCreate
Expand Down
7 changes: 7 additions & 0 deletions deploy/kubernetes-1.13/pmem-csi-direct.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@ spec:
- -caFile=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt
- -certFile=/certs/$(KUBE_NODE_NAME).crt
- -keyFile=/certs/$(KUBE_NODE_NAME).key
- -statePath=/var/lib/pmem-csi.intel.com
env:
- name: CSI_ENDPOINT
value: unix:///csi/csi.sock
Expand Down Expand Up @@ -252,6 +253,8 @@ spec:
name: registry-cert
- mountPath: /dev
name: dev-dir
- mountPath: /var/lib/pmem-csi.intel.com
name: pmem-state-dir
- mountPath: /sys
name: sys-dir
- args:
Expand Down Expand Up @@ -289,6 +292,10 @@ spec:
- name: registry-cert
secret:
secretName: pmem-csi-node-secrets
- hostPath:
path: /var/lib/pmem-csi.intel.com
type: DirectoryOrCreate
name: pmem-state-dir
- hostPath:
path: /dev
type: DirectoryOrCreate
Expand Down
7 changes: 7 additions & 0 deletions deploy/kubernetes-1.13/pmem-csi-lvm-testing.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,7 @@ spec:
- -caFile=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt
- -certFile=/certs/$(KUBE_NODE_NAME).crt
- -keyFile=/certs/$(KUBE_NODE_NAME).key
- -statePath=/var/lib/pmem-csi.intel.com
- -v=5
- -coverprofile=/var/lib/pmem-csi-coverage/pmem-csi-driver-node-*.out
env:
Expand Down Expand Up @@ -331,6 +332,8 @@ spec:
name: registry-cert
- mountPath: /dev
name: dev-dir
- mountPath: /var/lib/pmem-csi.intel.com
name: pmem-state-dir
- mountPath: /var/lib/pmem-csi-coverage
name: coverage-dir
- args:
Expand Down Expand Up @@ -409,6 +412,10 @@ spec:
- name: registry-cert
secret:
secretName: pmem-csi-node-secrets
- hostPath:
path: /var/lib/pmem-csi.intel.com
type: DirectoryOrCreate
name: pmem-state-dir
- hostPath:
path: /dev
type: DirectoryOrCreate
Expand Down
7 changes: 7 additions & 0 deletions deploy/kubernetes-1.13/pmem-csi-lvm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@ spec:
- -caFile=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt
- -certFile=/certs/$(KUBE_NODE_NAME).crt
- -keyFile=/certs/$(KUBE_NODE_NAME).key
- -statePath=/var/lib/pmem-csi.intel.com
env:
- name: CSI_ENDPOINT
value: unix:///csi/csi.sock
Expand Down Expand Up @@ -252,6 +253,8 @@ spec:
name: registry-cert
- mountPath: /dev
name: dev-dir
- mountPath: /var/lib/pmem-csi.intel.com
name: pmem-state-dir
- args:
- -v=3
- --kubelet-registration-path=/var/lib/kubelet/plugins/pmem-csi.intel.com/csi.sock
Expand Down Expand Up @@ -318,6 +321,10 @@ spec:
- name: registry-cert
secret:
secretName: pmem-csi-node-secrets
- hostPath:
path: /var/lib/pmem-csi.intel.com
type: DirectoryOrCreate
name: pmem-state-dir
- hostPath:
path: /dev
type: DirectoryOrCreate
Expand Down
7 changes: 7 additions & 0 deletions deploy/kubernetes-1.14/pmem-csi-direct-testing.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,7 @@ spec:
- -caFile=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt
- -certFile=/certs/$(KUBE_NODE_NAME).crt
- -keyFile=/certs/$(KUBE_NODE_NAME).key
- -statePath=/var/lib/pmem-csi.intel.com
- -v=5
- -coverprofile=/var/lib/pmem-csi-coverage/pmem-csi-driver-node-*.out
env:
Expand Down Expand Up @@ -351,6 +352,8 @@ spec:
name: registry-cert
- mountPath: /dev
name: dev-dir
- mountPath: /var/lib/pmem-csi.intel.com
name: pmem-state-dir
- mountPath: /sys
name: sys-dir
- mountPath: /var/lib/pmem-csi-coverage
Expand Down Expand Up @@ -391,6 +394,10 @@ spec:
- name: registry-cert
secret:
secretName: pmem-csi-node-secrets
- hostPath:
path: /var/lib/pmem-csi.intel.com
type: DirectoryOrCreate
name: pmem-state-dir
- hostPath:
path: /dev
type: DirectoryOrCreate
Expand Down
7 changes: 7 additions & 0 deletions deploy/kubernetes-1.14/pmem-csi-direct.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,7 @@ spec:
- -caFile=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt
- -certFile=/certs/$(KUBE_NODE_NAME).crt
- -keyFile=/certs/$(KUBE_NODE_NAME).key
- -statePath=/var/lib/pmem-csi.intel.com
env:
- name: CSI_ENDPOINT
value: unix:///csi/csi.sock
Expand Down Expand Up @@ -272,6 +273,8 @@ spec:
name: registry-cert
- mountPath: /dev
name: dev-dir
- mountPath: /var/lib/pmem-csi.intel.com
name: pmem-state-dir
- mountPath: /sys
name: sys-dir
- args:
Expand Down Expand Up @@ -309,6 +312,10 @@ spec:
- name: registry-cert
secret:
secretName: pmem-csi-node-secrets
- hostPath:
path: /var/lib/pmem-csi.intel.com
type: DirectoryOrCreate
name: pmem-state-dir
- hostPath:
path: /dev
type: DirectoryOrCreate
Expand Down
7 changes: 7 additions & 0 deletions deploy/kubernetes-1.14/pmem-csi-lvm-testing.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,7 @@ spec:
- -caFile=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt
- -certFile=/certs/$(KUBE_NODE_NAME).crt
- -keyFile=/certs/$(KUBE_NODE_NAME).key
- -statePath=/var/lib/pmem-csi.intel.com
- -v=5
- -coverprofile=/var/lib/pmem-csi-coverage/pmem-csi-driver-node-*.out
env:
Expand Down Expand Up @@ -351,6 +352,8 @@ spec:
name: registry-cert
- mountPath: /dev
name: dev-dir
- mountPath: /var/lib/pmem-csi.intel.com
name: pmem-state-dir
- mountPath: /var/lib/pmem-csi-coverage
name: coverage-dir
- args:
Expand Down Expand Up @@ -429,6 +432,10 @@ spec:
- name: registry-cert
secret:
secretName: pmem-csi-node-secrets
- hostPath:
path: /var/lib/pmem-csi.intel.com
type: DirectoryOrCreate
name: pmem-state-dir
- hostPath:
path: /dev
type: DirectoryOrCreate
Expand Down
7 changes: 7 additions & 0 deletions deploy/kubernetes-1.14/pmem-csi-lvm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,7 @@ spec:
- -caFile=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt
- -certFile=/certs/$(KUBE_NODE_NAME).crt
- -keyFile=/certs/$(KUBE_NODE_NAME).key
- -statePath=/var/lib/pmem-csi.intel.com
env:
- name: CSI_ENDPOINT
value: unix:///csi/csi.sock
Expand Down Expand Up @@ -272,6 +273,8 @@ spec:
name: registry-cert
- mountPath: /dev
name: dev-dir
- mountPath: /var/lib/pmem-csi.intel.com
name: pmem-state-dir
- args:
- -v=3
- --kubelet-registration-path=/var/lib/kubelet/plugins/pmem-csi.intel.com/csi.sock
Expand Down Expand Up @@ -338,6 +341,10 @@ spec:
- name: registry-cert
secret:
secretName: pmem-csi-node-secrets
- hostPath:
path: /var/lib/pmem-csi.intel.com
type: DirectoryOrCreate
name: pmem-state-dir
- hostPath:
path: /dev
type: DirectoryOrCreate
Expand Down
9 changes: 8 additions & 1 deletion deploy/kustomize/driver/pmem-csi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,8 @@ spec:
"-registryEndpoint=$(PMEM_CSI_CONTROLLER_PORT_10000_TCP)",
"-caFile=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt",
"-certFile=/certs/$(KUBE_NODE_NAME).crt",
"-keyFile=/certs/$(KUBE_NODE_NAME).key"
"-keyFile=/certs/$(KUBE_NODE_NAME).key",
"-statePath=/var/lib/pmem-csi.intel.com"
]
# Passing /dev to container may cause container creation error because
# termination-log is located on /dev/ by default, re-locate to /tmp
Expand Down Expand Up @@ -148,6 +149,8 @@ spec:
mountPath: /certs/
- name : dev-dir
mountPath: /dev
- name: pmem-state-dir
mountPath: /var/lib/pmem-csi.intel.com
- name: driver-registrar
imagePullPolicy: Always
image: quay.io/k8scsi/csi-node-driver-registrar:v1.X.Y
Expand Down Expand Up @@ -179,6 +182,10 @@ spec:
- name: registry-cert
secret:
secretName: pmem-csi-node-secrets
- name: pmem-state-dir
hostPath:
path: /var/lib/pmem-csi.intel.com
type: DirectoryOrCreate
- name: dev-dir
hostPath:
path: /dev
Expand Down
62 changes: 52 additions & 10 deletions pkg/pmem-csi-driver/controllerserver-master.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,6 @@ type pmemVolume struct {
// ID of nodes where the volume provisioned/attached
// It would be one if simple volume, else would be more than one for "cached" volume
nodeIDs map[string]VolumeStatus
// VolumeType
volumeType PmemPersistencyModel
}

type masterController struct {
Expand All @@ -65,6 +63,7 @@ type masterController struct {

var _ csi.ControllerServer = &masterController{}
var _ PmemService = &masterController{}
var _ RegistryListener = &masterController{}
var volumeMutex = keymutex.NewHashed(-1)

func NewMasterControllerServer(rs *registryServer) *masterController {
Expand All @@ -73,17 +72,62 @@ func NewMasterControllerServer(rs *registryServer) *masterController {
csi.ControllerServiceCapability_RPC_LIST_VOLUMES,
csi.ControllerServiceCapability_RPC_GET_CAPACITY,
}
return &masterController{
cs := &masterController{
DefaultControllerServer: NewDefaultControllerServer(serverCaps),
rs: rs,
pmemVolumes: map[string]*pmemVolume{},
}

rs.AddListener(cs)

return cs
}

func (cs *masterController) RegisterService(rpcServer *grpc.Server) {
csi.RegisterControllerServer(rpcServer, cs)
}

// OnNodeAdded retrieves the existing volumes at recently added Node.
// It uses ControllerServer.ListVolume() CSI call to retrieve volumes.
func (cs *masterController) OnNodeAdded(ctx context.Context, node NodeInfo) {
conn, err := cs.rs.ConnectToNodeController(node.NodeID)
if err != nil {
glog.Warningf("Failed to connect to node controller at : %s on node %s: %s", node.Endpoint, node.NodeID, err.Error())
return
}

csiClient := csi.NewControllerClient(conn)
resp, err := csiClient.ListVolumes(ctx, &csi.ListVolumesRequest{})
if err != nil {
glog.Warningf("Failed to get volumes on node %s: %s", node.NodeID, err.Error())
}

glog.V(5).Infof("Found Volumes at %s: %v", node.NodeID, resp.Entries)

for _, entry := range resp.Entries {
v := entry.GetVolume()
if v == nil { /* this shouldn't happen */
continue
}
if vol, ok := cs.pmemVolumes[v.VolumeId]; ok && vol != nil {
// This is possibly Cache volume, so just add this node id.
vol.nodeIDs[node.NodeID] = Created
} else {
cs.pmemVolumes[v.VolumeId] = &pmemVolume{
id: v.VolumeId,
size: v.CapacityBytes,
name: v.VolumeContext["Name"],
nodeIDs: map[string]VolumeStatus{
node.NodeID: Created,
},
}
}
}
}

func (cs *masterController) OnNodeDeleted(ctx context.Context, node NodeInfo) {
}

func (cs *masterController) CreateVolume(ctx context.Context, req *csi.CreateVolumeRequest) (*csi.CreateVolumeResponse, error) {
var vol *pmemVolume
chosenNodes := map[string]VolumeStatus{}
Expand Down Expand Up @@ -117,14 +161,13 @@ func (cs *masterController) CreateVolume(ctx context.Context, req *csi.CreateVol
id, _ := uuid.NewUUID() //nolint: gosec
volumeID := id.String()
inTopology := []*csi.Topology{}
volumeType := pmemPersistencyModelNone
cacheCount := uint64(1)

if req.Parameters == nil {
req.Parameters = map[string]string{}
} else {
if val, ok := req.Parameters[pmemParameterKeyPersistencyModel]; ok {
volumeType = PmemPersistencyModel(val)
volumeType := PmemPersistencyModel(val)
if volumeType == pmemPersistencyModelCache {
if val, ok := req.Parameters[pmemParameterKeyCacheSize]; ok {
c, err := strconv.ParseUint(val, 10, 64)
Expand Down Expand Up @@ -191,11 +234,10 @@ func (cs *masterController) CreateVolume(ctx context.Context, req *csi.CreateVol
glog.V(3).Infof("Chosen nodes: %v", chosenNodes)

vol = &pmemVolume{
id: volumeID,
name: req.Name,
size: asked,
nodeIDs: chosenNodes,
volumeType: volumeType,
id: volumeID,
name: req.Name,
size: asked,
nodeIDs: chosenNodes,
}
cs.pmemVolumes[volumeID] = vol
glog.V(3).Infof("CreateVolume: Record new volume as %v", *vol)
Expand Down
Loading

0 comments on commit 7ab9cb7

Please sign in to comment.