@@ -3,13 +3,15 @@ package client
3
3
import (
4
4
"context"
5
5
"errors"
6
+ "fmt"
6
7
"time"
7
8
8
9
metrics "github.com/armon/go-metrics"
9
10
grpc_retry "github.com/grpc-ecosystem/go-grpc-middleware/retry"
10
11
"github.com/hashicorp/nomad/client/dynamicplugins"
11
12
"github.com/hashicorp/nomad/client/pluginmanager/csimanager"
12
13
"github.com/hashicorp/nomad/client/structs"
14
+ nstructs "github.com/hashicorp/nomad/nomad/structs"
13
15
"github.com/hashicorp/nomad/plugins/csi"
14
16
)
15
17
@@ -46,7 +48,9 @@ func (c *CSI) ControllerValidateVolume(req *structs.ClientCSIControllerValidateV
46
48
47
49
plugin , err := c .findControllerPlugin (req .PluginID )
48
50
if err != nil {
49
- return err
51
+ // the server's view of the plugin health is stale, so let it know it
52
+ // should retry with another controller instance
53
+ return fmt .Errorf ("%w: %v" , nstructs .ErrCSIClientRPCRetryable , err )
50
54
}
51
55
defer plugin .Close ()
52
56
@@ -78,7 +82,9 @@ func (c *CSI) ControllerAttachVolume(req *structs.ClientCSIControllerAttachVolum
78
82
defer metrics .MeasureSince ([]string {"client" , "csi_controller" , "publish_volume" }, time .Now ())
79
83
plugin , err := c .findControllerPlugin (req .PluginID )
80
84
if err != nil {
81
- return err
85
+ // the server's view of the plugin health is stale, so let it know it
86
+ // should retry with another controller instance
87
+ return fmt .Errorf ("%w: %v" , nstructs .ErrCSIClientRPCRetryable , err )
82
88
}
83
89
defer plugin .Close ()
84
90
@@ -123,7 +129,9 @@ func (c *CSI) ControllerDetachVolume(req *structs.ClientCSIControllerDetachVolum
123
129
defer metrics .MeasureSince ([]string {"client" , "csi_controller" , "unpublish_volume" }, time .Now ())
124
130
plugin , err := c .findControllerPlugin (req .PluginID )
125
131
if err != nil {
126
- return err
132
+ // the server's view of the plugin health is stale, so let it know it
133
+ // should retry with another controller instance
134
+ return fmt .Errorf ("%w: %v" , nstructs .ErrCSIClientRPCRetryable , err )
127
135
}
128
136
defer plugin .Close ()
129
137
@@ -152,9 +160,14 @@ func (c *CSI) ControllerDetachVolume(req *structs.ClientCSIControllerDetachVolum
152
160
grpc_retry .WithMax (3 ),
153
161
grpc_retry .WithBackoff (grpc_retry .BackoffExponential (100 * time .Millisecond )))
154
162
if err != nil {
163
+ if errors .Is (err , nstructs .ErrCSIClientRPCIgnorable ) {
164
+ // if the controller detach previously happened but the server failed to
165
+ // checkpoint, we'll get an error from the plugin but can safely ignore it.
166
+ c .c .logger .Debug ("could not unpublish volume: %v" , err )
167
+ return nil
168
+ }
155
169
return err
156
170
}
157
-
158
171
return nil
159
172
}
160
173
@@ -191,7 +204,10 @@ func (c *CSI) NodeDetachVolume(req *structs.ClientCSINodeDetachVolumeRequest, re
191
204
}
192
205
193
206
err = mounter .UnmountVolume (ctx , req .VolumeID , req .ExternalID , req .AllocID , usageOpts )
194
- if err != nil {
207
+ if err != nil && ! errors .Is (err , nstructs .ErrCSIClientRPCIgnorable ) {
208
+ // if the unmounting previously happened but the server failed to
209
+ // checkpoint, we'll get an error from Unmount but can safely
210
+ // ignore it.
195
211
return err
196
212
}
197
213
return nil
0 commit comments