Skip to content

Commit d7f1a5e

Browse files
committed
csi: checkpoint volume claim garbage collection
Adds a `CSIVolumeClaim` type to be tracked as current and past claims on a volume. Allows for a client RPC failure during node or controller detachment without having to keep the allocation around after the first garbage collection eval. This changeset lays groundwork for moving the actual detachment RPCs into a volume watching loop outside the GC eval.
1 parent 77c7ed3 commit d7f1a5e

File tree

12 files changed

+269
-130
lines changed

12 files changed

+269
-130
lines changed

client/allocrunner/csi_hook.go

+1
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ func (c *csiHook) claimVolumesFromAlloc() (map[string]*volumeAndRequest, error)
104104
req := &structs.CSIVolumeClaimRequest{
105105
VolumeID: pair.request.Source,
106106
AllocationID: c.alloc.ID,
107+
NodeID: c.alloc.NodeID,
107108
Claim: claimType,
108109
}
109110
req.Region = c.alloc.Job.Region

client/pluginmanager/csimanager/instance_test.go

-2
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ package csimanager
22

33
import (
44
"context"
5-
"fmt"
65
"sync"
76
"testing"
87
"time"
@@ -47,7 +46,6 @@ func TestInstanceManager_Shutdown(t *testing.T) {
4746
im.shutdownCtxCancelFn = cancelFn
4847
im.shutdownCh = make(chan struct{})
4948
im.updater = func(_ string, info *structs.CSIInfo) {
50-
fmt.Println(info)
5149
lock.Lock()
5250
defer lock.Unlock()
5351
pluginHealth = info.Healthy

nomad/core_sched.go

+78-54
Original file line numberDiff line numberDiff line change
@@ -749,17 +749,15 @@ func volumeClaimReap(srv RPCServer, volID, namespace, region, leaderACL string,
749749
return err
750750
}
751751

752-
gcClaims, nodeClaims := collectClaimsToGCImpl(vol, runningAllocs)
752+
nodeClaims := collectClaimsToGCImpl(vol, runningAllocs)
753753

754754
var result *multierror.Error
755-
for _, claim := range gcClaims {
755+
for _, claim := range vol.PastClaims {
756756
nodeClaims, err = volumeClaimReapImpl(srv,
757757
&volumeClaimReapArgs{
758758
vol: vol,
759759
plug: plug,
760-
allocID: claim.allocID,
761-
nodeID: claim.nodeID,
762-
mode: claim.mode,
760+
claim: claim,
763761
namespace: namespace,
764762
region: region,
765763
leaderACL: leaderACL,
@@ -775,48 +773,47 @@ func volumeClaimReap(srv RPCServer, volID, namespace, region, leaderACL string,
775773

776774
}
777775

778-
type gcClaimRequest struct {
779-
allocID string
780-
nodeID string
781-
mode structs.CSIVolumeClaimMode
782-
}
783-
784-
func collectClaimsToGCImpl(vol *structs.CSIVolume, runningAllocs bool) ([]gcClaimRequest, map[string]int) {
785-
gcAllocs := []gcClaimRequest{}
776+
func collectClaimsToGCImpl(vol *structs.CSIVolume, runningAllocs bool) map[string]int {
786777
nodeClaims := map[string]int{} // node IDs -> count
787778

788779
collectFunc := func(allocs map[string]*structs.Allocation,
789-
mode structs.CSIVolumeClaimMode) {
790-
for _, alloc := range allocs {
791-
// we call denormalize on the volume above to populate
792-
// Allocation pointers. But the alloc might have been
793-
// garbage collected concurrently, so if the alloc is
794-
// still nil we can safely skip it.
795-
if alloc == nil {
796-
continue
780+
claims map[string]*structs.CSIVolumeClaim) {
781+
782+
for allocID, alloc := range allocs {
783+
claim, ok := claims[allocID]
784+
if !ok {
785+
// COMPAT(1.0): the CSIVolumeClaim fields were added
786+
// after 0.11.1, so claims made before that may be
787+
// missing this value. note that we'll have non-nil
788+
// allocs here because we called denormalize on the
789+
// value.
790+
claim = &structs.CSIVolumeClaim{
791+
AllocationID: allocID,
792+
NodeID: alloc.NodeID,
793+
State: structs.CSIVolumeClaimStateTaken,
794+
}
797795
}
798-
nodeClaims[alloc.NodeID]++
796+
nodeClaims[claim.NodeID]++
799797
if runningAllocs || alloc.Terminated() {
800-
gcAllocs = append(gcAllocs, gcClaimRequest{
801-
allocID: alloc.ID,
802-
nodeID: alloc.NodeID,
803-
mode: mode,
804-
})
798+
// only overwrite the PastClaim if this is new,
799+
// so that we can track state between subsequent calls
800+
if _, exists := vol.PastClaims[claim.AllocationID]; !exists {
801+
claim.State = structs.CSIVolumeClaimStateTaken
802+
vol.PastClaims[claim.AllocationID] = claim
803+
}
805804
}
806805
}
807806
}
808807

809-
collectFunc(vol.WriteAllocs, structs.CSIVolumeClaimWrite)
810-
collectFunc(vol.ReadAllocs, structs.CSIVolumeClaimRead)
811-
return gcAllocs, nodeClaims
808+
collectFunc(vol.WriteAllocs, vol.WriteClaims)
809+
collectFunc(vol.ReadAllocs, vol.ReadClaims)
810+
return nodeClaims
812811
}
813812

814813
type volumeClaimReapArgs struct {
815814
vol *structs.CSIVolume
816815
plug *structs.CSIPlugin
817-
allocID string
818-
nodeID string
819-
mode structs.CSIVolumeClaimMode
816+
claim *structs.CSIVolumeClaim
820817
region string
821818
namespace string
822819
leaderACL string
@@ -825,42 +822,78 @@ type volumeClaimReapArgs struct {
825822

826823
func volumeClaimReapImpl(srv RPCServer, args *volumeClaimReapArgs) (map[string]int, error) {
827824
vol := args.vol
828-
nodeID := args.nodeID
825+
claim := args.claim
826+
827+
var err error
828+
var nReq *cstructs.ClientCSINodeDetachVolumeRequest
829+
830+
checkpoint := func(claimState structs.CSIVolumeClaimState) error {
831+
req := &structs.CSIVolumeClaimRequest{
832+
VolumeID: vol.ID,
833+
AllocationID: claim.AllocationID,
834+
Claim: structs.CSIVolumeClaimRelease,
835+
WriteRequest: structs.WriteRequest{
836+
Region: args.region,
837+
Namespace: args.namespace,
838+
AuthToken: args.leaderACL,
839+
},
840+
}
841+
return srv.RPC("CSIVolume.Claim", req, &structs.CSIVolumeClaimResponse{})
842+
}
843+
844+
// previous checkpoints may have set the past claim state already.
845+
// in practice we should never see CSIVolumeClaimStateControllerDetached
846+
// but having an option for the state makes it easy to add a checkpoint
847+
// in a backwards compatible way if we need one later
848+
switch claim.State {
849+
case structs.CSIVolumeClaimStateNodeDetached:
850+
goto NODE_DETACHED
851+
case structs.CSIVolumeClaimStateControllerDetached:
852+
goto RELEASE_CLAIM
853+
case structs.CSIVolumeClaimStateReadyToFree:
854+
goto RELEASE_CLAIM
855+
}
829856

830857
// (1) NodePublish / NodeUnstage must be completed before controller
831858
// operations or releasing the claim.
832-
nReq := &cstructs.ClientCSINodeDetachVolumeRequest{
859+
nReq = &cstructs.ClientCSINodeDetachVolumeRequest{
833860
PluginID: args.plug.ID,
834861
VolumeID: vol.ID,
835862
ExternalID: vol.RemoteID(),
836-
AllocID: args.allocID,
837-
NodeID: nodeID,
863+
AllocID: claim.AllocationID,
864+
NodeID: claim.NodeID,
838865
AttachmentMode: vol.AttachmentMode,
839866
AccessMode: vol.AccessMode,
840-
ReadOnly: args.mode == structs.CSIVolumeClaimRead,
867+
ReadOnly: claim.Mode == structs.CSIVolumeClaimRead,
841868
}
842-
err := srv.RPC("ClientCSI.NodeDetachVolume", nReq,
869+
err = srv.RPC("ClientCSI.NodeDetachVolume", nReq,
843870
&cstructs.ClientCSINodeDetachVolumeResponse{})
844871
if err != nil {
845872
return args.nodeClaims, err
846873
}
847-
args.nodeClaims[nodeID]--
874+
err = checkpoint(structs.CSIVolumeClaimStateNodeDetached)
875+
if err != nil {
876+
return args.nodeClaims, err
877+
}
878+
879+
NODE_DETACHED:
880+
args.nodeClaims[claim.NodeID]--
848881

849882
// (2) we only emit the controller unpublish if no other allocs
850883
// on the node need it, but we also only want to make this
851884
// call at most once per node
852-
if vol.ControllerRequired && args.nodeClaims[nodeID] < 1 {
885+
if vol.ControllerRequired && args.nodeClaims[claim.NodeID] < 1 {
853886

854887
// we need to get the CSI Node ID, which is not the same as
855888
// the Nomad Node ID
856889
ws := memdb.NewWatchSet()
857-
targetNode, err := srv.State().NodeByID(ws, nodeID)
890+
targetNode, err := srv.State().NodeByID(ws, claim.NodeID)
858891
if err != nil {
859892
return args.nodeClaims, err
860893
}
861894
if targetNode == nil {
862895
return args.nodeClaims, fmt.Errorf("%s: %s",
863-
structs.ErrUnknownNodePrefix, nodeID)
896+
structs.ErrUnknownNodePrefix, claim.NodeID)
864897
}
865898
targetCSIInfo, ok := targetNode.CSINodePlugins[args.plug.ID]
866899
if !ok {
@@ -879,18 +912,9 @@ func volumeClaimReapImpl(srv RPCServer, args *volumeClaimReapArgs) (map[string]i
879912
}
880913
}
881914

915+
RELEASE_CLAIM:
882916
// (3) release the claim from the state store, allowing it to be rescheduled
883-
req := &structs.CSIVolumeClaimRequest{
884-
VolumeID: vol.ID,
885-
AllocationID: args.allocID,
886-
Claim: structs.CSIVolumeClaimRelease,
887-
WriteRequest: structs.WriteRequest{
888-
Region: args.region,
889-
Namespace: args.namespace,
890-
AuthToken: args.leaderACL,
891-
},
892-
}
893-
err = srv.RPC("CSIVolume.Claim", req, &structs.CSIVolumeClaimResponse{})
917+
err = checkpoint(structs.CSIVolumeClaimStateReadyToFree)
894918
if err != nil {
895919
return args.nodeClaims, err
896920
}

nomad/core_sched_test.go

+30-32
Original file line numberDiff line numberDiff line change
@@ -2286,12 +2286,22 @@ func TestCSI_GCVolumeClaims_Collection(t *testing.T) {
22862286
require.NoError(t, err)
22872287

22882288
// Claim the volumes and verify the claims were set
2289-
err = state.CSIVolumeClaim(index, ns, volId0, alloc1, structs.CSIVolumeClaimWrite)
2289+
err = state.CSIVolumeClaim(index, ns, volId0, &structs.CSIVolumeClaim{
2290+
AllocationID: alloc1.ID,
2291+
NodeID: alloc1.NodeID,
2292+
Mode: structs.CSIVolumeClaimWrite,
2293+
})
22902294
index++
22912295
require.NoError(t, err)
2292-
err = state.CSIVolumeClaim(index, ns, volId0, alloc2, structs.CSIVolumeClaimRead)
2296+
2297+
err = state.CSIVolumeClaim(index, ns, volId0, &structs.CSIVolumeClaim{
2298+
AllocationID: alloc2.ID,
2299+
NodeID: alloc2.NodeID,
2300+
Mode: structs.CSIVolumeClaimRead,
2301+
})
22932302
index++
22942303
require.NoError(t, err)
2304+
22952305
vol, err = state.CSIVolumeByID(ws, ns, volId0)
22962306
require.NoError(t, err)
22972307
require.Len(t, vol.ReadAllocs, 1)
@@ -2306,9 +2316,9 @@ func TestCSI_GCVolumeClaims_Collection(t *testing.T) {
23062316
vol, err = state.CSIVolumeDenormalize(ws, vol)
23072317
require.NoError(t, err)
23082318

2309-
gcClaims, nodeClaims := collectClaimsToGCImpl(vol, false)
2319+
nodeClaims := collectClaimsToGCImpl(vol, false)
23102320
require.Equal(t, nodeClaims[node.ID], 2)
2311-
require.Len(t, gcClaims, 2)
2321+
require.Len(t, vol.PastClaims, 2)
23122322
}
23132323

23142324
func TestCSI_GCVolumeClaims_Reap(t *testing.T) {
@@ -2326,7 +2336,6 @@ func TestCSI_GCVolumeClaims_Reap(t *testing.T) {
23262336

23272337
cases := []struct {
23282338
Name string
2329-
Claim gcClaimRequest
23302339
ClaimsCount map[string]int
23312340
ControllerRequired bool
23322341
ExpectedErr string
@@ -2338,12 +2347,7 @@ func TestCSI_GCVolumeClaims_Reap(t *testing.T) {
23382347
srv *MockRPCServer
23392348
}{
23402349
{
2341-
Name: "NodeDetachVolume fails",
2342-
Claim: gcClaimRequest{
2343-
allocID: alloc.ID,
2344-
nodeID: node.ID,
2345-
mode: structs.CSIVolumeClaimRead,
2346-
},
2350+
Name: "NodeDetachVolume fails",
23472351
ClaimsCount: map[string]int{node.ID: 1},
23482352
ControllerRequired: true,
23492353
ExpectedErr: "node plugin missing",
@@ -2355,36 +2359,26 @@ func TestCSI_GCVolumeClaims_Reap(t *testing.T) {
23552359
},
23562360
},
23572361
{
2358-
Name: "ControllerDetachVolume no controllers",
2359-
Claim: gcClaimRequest{
2360-
allocID: alloc.ID,
2361-
nodeID: node.ID,
2362-
mode: structs.CSIVolumeClaimRead,
2363-
},
2364-
ClaimsCount: map[string]int{node.ID: 1},
2365-
ControllerRequired: true,
2366-
ExpectedErr: fmt.Sprintf(
2367-
"Unknown node: %s", node.ID),
2362+
Name: "ControllerDetachVolume no controllers",
2363+
ClaimsCount: map[string]int{node.ID: 1},
2364+
ControllerRequired: true,
2365+
ExpectedErr: fmt.Sprintf("Unknown node: %s", node.ID),
23682366
ExpectedClaimsCount: 0,
23692367
ExpectedNodeDetachVolumeCount: 1,
23702368
ExpectedControllerDetachVolumeCount: 0,
2369+
ExpectedVolumeClaimCount: 1,
23712370
srv: &MockRPCServer{
23722371
state: s.State(),
23732372
},
23742373
},
23752374
{
2376-
Name: "ControllerDetachVolume node-only",
2377-
Claim: gcClaimRequest{
2378-
allocID: alloc.ID,
2379-
nodeID: node.ID,
2380-
mode: structs.CSIVolumeClaimRead,
2381-
},
2375+
Name: "ControllerDetachVolume node-only",
23822376
ClaimsCount: map[string]int{node.ID: 1},
23832377
ControllerRequired: false,
23842378
ExpectedClaimsCount: 0,
23852379
ExpectedNodeDetachVolumeCount: 1,
23862380
ExpectedControllerDetachVolumeCount: 0,
2387-
ExpectedVolumeClaimCount: 1,
2381+
ExpectedVolumeClaimCount: 2,
23882382
srv: &MockRPCServer{
23892383
state: s.State(),
23902384
},
@@ -2394,12 +2388,16 @@ func TestCSI_GCVolumeClaims_Reap(t *testing.T) {
23942388
for _, tc := range cases {
23952389
t.Run(tc.Name, func(t *testing.T) {
23962390
vol.ControllerRequired = tc.ControllerRequired
2391+
claim := &structs.CSIVolumeClaim{
2392+
AllocationID: alloc.ID,
2393+
NodeID: node.ID,
2394+
State: structs.CSIVolumeClaimStateTaken,
2395+
Mode: structs.CSIVolumeClaimRead,
2396+
}
23972397
nodeClaims, err := volumeClaimReapImpl(tc.srv, &volumeClaimReapArgs{
23982398
vol: vol,
23992399
plug: plugin,
2400-
allocID: tc.Claim.allocID,
2401-
nodeID: tc.Claim.nodeID,
2402-
mode: tc.Claim.mode,
2400+
claim: claim,
24032401
region: "global",
24042402
namespace: "default",
24052403
leaderACL: "not-in-use",
@@ -2411,7 +2409,7 @@ func TestCSI_GCVolumeClaims_Reap(t *testing.T) {
24112409
require.NoError(err)
24122410
}
24132411
require.Equal(tc.ExpectedClaimsCount,
2414-
nodeClaims[tc.Claim.nodeID], "expected claims")
2412+
nodeClaims[claim.NodeID], "expected claims remaining")
24152413
require.Equal(tc.ExpectedNodeDetachVolumeCount,
24162414
tc.srv.countCSINodeDetachVolume, "node detach RPC count")
24172415
require.Equal(tc.ExpectedControllerDetachVolumeCount,

nomad/csi_endpoint.go

+6-1
Original file line numberDiff line numberDiff line change
@@ -400,22 +400,27 @@ func (v *CSIVolume) controllerPublishVolume(req *structs.CSIVolumeClaimRequest,
400400
return nil
401401
}
402402

403+
// get Nomad's ID for the client node (not the storage provider's ID)
403404
targetNode, err := state.NodeByID(ws, alloc.NodeID)
404405
if err != nil {
405406
return err
406407
}
407408
if targetNode == nil {
408409
return fmt.Errorf("%s: %s", structs.ErrUnknownNodePrefix, alloc.NodeID)
409410
}
411+
412+
// get the the storage provider's ID for the client node (not
413+
// Nomad's ID for the node)
410414
targetCSIInfo, ok := targetNode.CSINodePlugins[plug.ID]
411415
if !ok {
412416
return fmt.Errorf("Failed to find NodeInfo for node: %s", targetNode.ID)
413417
}
418+
externalNodeID := targetCSIInfo.NodeInfo.ID
414419

415420
method := "ClientCSI.ControllerAttachVolume"
416421
cReq := &cstructs.ClientCSIControllerAttachVolumeRequest{
417422
VolumeID: vol.RemoteID(),
418-
ClientCSINodeID: targetCSIInfo.NodeInfo.ID,
423+
ClientCSINodeID: externalNodeID,
419424
AttachmentMode: vol.AttachmentMode,
420425
AccessMode: vol.AccessMode,
421426
ReadOnly: req.Claim == structs.CSIVolumeClaimRead,

0 commit comments

Comments
 (0)