7
7
"time"
8
8
9
9
corev1 "k8s.io/api/core/v1"
10
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
10
11
"k8s.io/apimachinery/pkg/util/wait"
11
12
"k8s.io/client-go/kubernetes"
12
13
"k8s.io/kubectl/pkg/drain"
@@ -68,29 +69,35 @@ func (d *Drainer) DrainNode(ctx context.Context, node *corev1.Node, fullNodeDrai
68
69
return false , nil
69
70
}
70
71
71
- drainHelper := createDrainHelper (d .kubeClient , ctx , fullNodeDrain )
72
72
backoff := wait.Backoff {
73
73
Steps : 5 ,
74
- Duration : 10 * time .Second ,
74
+ Duration : 5 * time .Second ,
75
75
Factor : 2 ,
76
76
}
77
77
var lastErr error
78
-
79
78
reqLogger .Info ("drainNode(): Start draining" )
80
- if err = wait .ExponentialBackoff (backoff , func () (bool , error ) {
79
+ if err = wait .ExponentialBackoffWithContext (ctx , backoff , func (ctx context.Context ) (bool , error ) {
80
+ drainHelper := createDrainHelper (d .kubeClient , ctx , fullNodeDrain )
81
81
err := drain .RunCordonOrUncordon (drainHelper , node , true )
82
82
if err != nil {
83
83
lastErr = err
84
84
reqLogger .Info ("drainNode(): Cordon failed, retrying" , "error" , err )
85
85
return false , nil
86
86
}
87
87
err = drain .RunNodeDrain (drainHelper , node .Name )
88
- if err == nil {
89
- return true , nil
88
+ if err != nil {
89
+ lastErr = err
90
+ reqLogger .Info ("drainNode(): Draining failed, retrying" , "error" , err )
91
+ return false , nil
90
92
}
91
- lastErr = err
92
- reqLogger .Info ("drainNode(): Draining failed, retrying" , "error" , err )
93
- return false , nil
93
+
94
+ err = d .removeDaemonSetsFromNode (ctx , node .Name )
95
+ if err != nil {
96
+ lastErr = err
97
+ return false , nil
98
+ }
99
+
100
+ return true , nil
94
101
}); err != nil {
95
102
if wait .Interrupted (err ) {
96
103
reqLogger .Info ("drainNode(): failed to drain node" , "steps" , backoff .Steps , "error" , lastErr )
@@ -131,6 +138,28 @@ func (d *Drainer) CompleteDrainNode(ctx context.Context, node *corev1.Node) (boo
131
138
return completed , nil
132
139
}
133
140
141
+ // removeDaemonSetsFromNode go over all the remain pods and search for DaemonSets that have SR-IOV devices to remove them
142
+ // we can't use the drain from core kubernetes as it doesn't support removing pods that are part of a DaemonSets
143
+ func (d * Drainer ) removeDaemonSetsFromNode (ctx context.Context , nodeName string ) error {
144
+ reqLogger := log .FromContext (ctx )
145
+ reqLogger .Info ("drainNode(): remove DaemonSets using sriov devices from node" , "nodeName" , nodeName )
146
+
147
+ podList , err := d .kubeClient .CoreV1 ().Pods ("" ).List (ctx , metav1.ListOptions {FieldSelector : fmt .Sprintf ("spec.nodeName=%s" , nodeName )})
148
+ if err != nil {
149
+ reqLogger .Info ("drainNode(): Failed to list pods, retrying" , "error" , err )
150
+ return err
151
+ }
152
+
153
+ // remove pods that are owned by a DaemonSet and use SR-IOV devices
154
+ dsPodsList := getDsPodsToRemove (podList )
155
+ drainHelper := createDrainHelper (d .kubeClient , ctx , true )
156
+ err = drainHelper .DeleteOrEvictPods (dsPodsList )
157
+ if err != nil {
158
+ reqLogger .Error (err , "failed to delete or evict pods from node" , "nodeName" , nodeName )
159
+ }
160
+ return err
161
+ }
162
+
134
163
// createDrainHelper function to create a drain helper
135
164
// if fullDrain is false we only remove pods that have the resourcePrefix
136
165
// if not we remove all the pods in the node
@@ -150,25 +179,21 @@ func createDrainHelper(kubeClient kubernetes.Interface, ctx context.Context, ful
150
179
}
151
180
log .Log .Info (fmt .Sprintf ("%s pod from Node %s/%s" , verbStr , pod .Namespace , pod .Name ))
152
181
},
153
- Ctx : ctx ,
154
- Out : writer {logger .Info },
155
- ErrOut : writer {func (msg string , kv ... interface {}) { logger .Error (nil , msg , kv ... ) }},
182
+ Ctx : ctx ,
183
+ Out : writer {logger .Info },
184
+ ErrOut : writer {func (msg string , kv ... interface {}) {
185
+ logger .Error (nil , msg , kv ... )
186
+ }},
156
187
}
157
188
158
189
// when we just want to drain and not reboot we can only remove the pods using sriov devices
159
190
if ! fullDrain {
160
191
deleteFunction := func (p corev1.Pod ) drain.PodDeleteStatus {
161
- for _ , c := range p .Spec .Containers {
162
- if c .Resources .Requests != nil {
163
- for r := range c .Resources .Requests {
164
- if strings .HasPrefix (r .String (), vars .ResourcePrefix ) {
165
- return drain.PodDeleteStatus {
166
- Delete : true ,
167
- Reason : "pod contain SR-IOV device" ,
168
- Message : "SR-IOV network operator draining the node" ,
169
- }
170
- }
171
- }
192
+ if podHasSRIOVDevice (& p ) {
193
+ return drain.PodDeleteStatus {
194
+ Delete : true ,
195
+ Reason : "pod contains SR-IOV device" ,
196
+ Message : "SR-IOV network operator draining the node" ,
172
197
}
173
198
}
174
199
return drain.PodDeleteStatus {Delete : false }
@@ -179,3 +204,38 @@ func createDrainHelper(kubeClient kubernetes.Interface, ctx context.Context, ful
179
204
180
205
return drainer
181
206
}
207
+
208
+ func podHasSRIOVDevice (p * corev1.Pod ) bool {
209
+ for _ , c := range p .Spec .Containers {
210
+ if c .Resources .Requests != nil {
211
+ for r := range c .Resources .Requests {
212
+ if strings .HasPrefix (r .String (), vars .ResourcePrefix ) {
213
+ return true
214
+ }
215
+ }
216
+ }
217
+ }
218
+
219
+ return false
220
+ }
221
+
222
+ func podsHasDSOwner (p * corev1.Pod ) bool {
223
+ for _ , o := range p .OwnerReferences {
224
+ if o .Kind == "DaemonSet" {
225
+ return true
226
+ }
227
+ }
228
+
229
+ return false
230
+ }
231
+
232
+ func getDsPodsToRemove (pl * corev1.PodList ) []corev1.Pod {
233
+ podsToRemove := []corev1.Pod {}
234
+ for _ , pod := range pl .Items {
235
+ if podsHasDSOwner (& pod ) && podHasSRIOVDevice (& pod ) {
236
+ podsToRemove = append (podsToRemove , pod )
237
+ }
238
+ }
239
+
240
+ return podsToRemove
241
+ }
0 commit comments