-
Notifications
You must be signed in to change notification settings - Fork 4k
/
Copy pathovh_cloud_node_group.go
373 lines (305 loc) · 11.9 KB
/
ovh_cloud_node_group.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
/*
Copyright 2020 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package ovhcloud
import (
"context"
"fmt"
"math"
"math/rand"
"strings"
"sync"
"time"
apiv1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/klog/v2"
schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/ovhcloud/sdk"
"k8s.io/autoscaler/cluster-autoscaler/config"
"k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
)
const providerIDPrefix = "openstack:///"
// NodeGroup implements cloudprovider.NodeGroup interface.
type NodeGroup struct {
sdk.NodePool
Manager *OvhCloudManager
CurrentSize int
mutex sync.Mutex
}
// MaxSize returns maximum size of the node pool.
func (ng *NodeGroup) MaxSize() int {
return int(ng.MaxNodes)
}
// MinSize returns minimum size of the node pool.
func (ng *NodeGroup) MinSize() int {
return int(ng.MinNodes)
}
// TargetSize returns the current TARGET size of the node pool. It is possible that the
// number is different from the number of nodes registered in Kubernetes.
func (ng *NodeGroup) TargetSize() (int, error) {
// By default, fetch the API desired nodes before using target size from autoscaler
if ng.CurrentSize == -1 {
return int(ng.DesiredNodes), nil
}
return ng.CurrentSize, nil
}
// IncreaseSize increases node pool size.
func (ng *NodeGroup) IncreaseSize(delta int) error {
// Do not use node group which does not support autoscaling
if !ng.Autoscale {
return nil
}
klog.V(4).Infof("Increasing NodeGroup size by %d node(s)", delta)
// First, verify the NodeGroup can be increased
if delta <= 0 {
return fmt.Errorf("increase size node group delta must be positive")
}
size, err := ng.TargetSize()
if err != nil {
return fmt.Errorf("failed to get NodeGroup target size")
}
if size+delta > ng.MaxSize() {
return fmt.Errorf("node group size would be above minimum size - desired: %d, max: %d", size+delta, ng.MaxSize())
}
// Then, forge current size and parameters
ng.CurrentSize = size + delta
desired := uint32(ng.CurrentSize)
opts := sdk.UpdateNodePoolOpts{
DesiredNodes: &desired,
}
klog.V(4).Infof("Upscaling node pool %s to %d desired nodes", ng.ID, desired)
// Call API to increase desired nodes number, automatically creating new nodes
resp, err := ng.Manager.Client.UpdateNodePool(context.Background(), ng.Manager.ProjectID, ng.Manager.ClusterID, ng.ID, &opts)
if err != nil {
return fmt.Errorf("failed to increase node pool desired size: %w", err)
}
ng.Status = resp.Status
return nil
}
// DeleteNodes deletes the nodes from the group.
func (ng *NodeGroup) DeleteNodes(nodes []*apiv1.Node) error {
// DeleteNodes is called in goroutine so it can run in parallel
// Goroutines created in: ScaleDown.scheduleDeleteEmptyNodes()
// Adding mutex to ensure CurrentSize attribute keeps consistency
ng.mutex.Lock()
defer ng.mutex.Unlock()
// Do not use node group which does not support autoscaling
if !ng.Autoscale {
return nil
}
klog.V(4).Infof("Deleting %d node(s)", len(nodes))
// First, verify the NodeGroup can be decreased
size, err := ng.TargetSize()
if err != nil {
return fmt.Errorf("failed to get NodeGroup target size")
}
if size-len(nodes) < ng.MinSize() {
return fmt.Errorf("node group size would be below minimum size - desired: %d, max: %d", size-len(nodes), ng.MinSize())
}
nodeProviderIds := make([]string, 0)
for _, node := range nodes {
nodeProviderIds = append(nodeProviderIds, node.Spec.ProviderID)
}
desired := uint32(size - len(nodes))
opts := sdk.UpdateNodePoolOpts{
DesiredNodes: &desired,
NodesToRemove: nodeProviderIds,
}
klog.V(4).Infof("Downscaling node pool %s to %d desired nodes by deleting the following nodes: %s", ng.ID, desired, nodeProviderIds)
// Call API to remove nodes from a NodeGroup
resp, err := ng.Manager.Client.UpdateNodePool(context.Background(), ng.Manager.ProjectID, ng.Manager.ClusterID, ng.ID, &opts)
if err != nil {
return fmt.Errorf("failed to delete node pool nodes: %w", err)
}
// Update the node group
ng.Status = resp.Status
ng.CurrentSize = size - len(nodes)
return nil
}
// DecreaseTargetSize decreases the target size of the node group. This function
// doesn't permit to delete any existing node and can be used only to reduce the
// request for new nodes that have not been yet fulfilled. Delta should be negative.
// It is assumed that cloud provider will not delete the existing nodes if the size
// when there is an option to just decrease the target.
func (ng *NodeGroup) DecreaseTargetSize(delta int) error {
// Cancellation of node provisioning is not supported yet
return cloudprovider.ErrNotImplemented
}
// Id returns node pool id.
func (ng *NodeGroup) Id() string {
return ng.Name
}
// Debug returns a debug string for the NodeGroup.
func (ng *NodeGroup) Debug() string {
// Printing name (target size - min size - max size)
return fmt.Sprintf("%s (%d:%d:%d)", ng.Id(), ng.CurrentSize, ng.MinSize(), ng.MaxSize())
}
// Nodes returns a list of all nodes that belong to this node group.
func (ng *NodeGroup) Nodes() ([]cloudprovider.Instance, error) {
// Fetch all nodes contained in the node group
nodes, err := ng.Manager.Client.ListNodePoolNodes(context.Background(), ng.Manager.ProjectID, ng.Manager.ClusterID, ng.ID)
if err != nil {
return nil, fmt.Errorf("failed to list node pool nodes: %w", err)
}
klog.V(4).Infof("%d nodes are listed in node pool %s", len(nodes), ng.ID)
// Cast all API nodes into instance interface
instances := make([]cloudprovider.Instance, 0)
for _, node := range nodes {
instance := cloudprovider.Instance{
Id: fmt.Sprintf("%s%s", providerIDPrefix, node.InstanceID),
Status: toInstanceStatus(node.Status),
}
instances = append(instances, instance)
// Store the associated node group in cache for future reference
ng.Manager.setNodeGroupPerProviderID(instance.Id, ng)
}
return instances, nil
}
// TemplateNodeInfo returns a node template for this node group.
func (ng *NodeGroup) TemplateNodeInfo() (*schedulerframework.NodeInfo, error) {
// Forge node template in a node group
node := &apiv1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("%s-node-%d", ng.Id(), rand.Int63()),
Labels: ng.Template.Metadata.Labels,
Annotations: ng.Template.Metadata.Annotations,
Finalizers: ng.Template.Metadata.Finalizers,
},
Spec: apiv1.NodeSpec{
Taints: ng.Template.Spec.Taints,
},
Status: apiv1.NodeStatus{
Capacity: apiv1.ResourceList{},
Conditions: cloudprovider.BuildReadyConditions(),
},
}
// Add the nodepool label
if node.ObjectMeta.Labels == nil {
node.ObjectMeta.Labels = make(map[string]string)
}
node.ObjectMeta.Labels[NodePoolLabel] = ng.Id()
flavor, err := ng.Manager.getFlavorByName(ng.Flavor)
if err != nil {
return nil, fmt.Errorf("failed to get specs for flavor %q: %w", ng.Flavor, err)
}
node.Status.Capacity[apiv1.ResourcePods] = *resource.NewQuantity(110, resource.DecimalSI)
node.Status.Capacity[apiv1.ResourceCPU] = *resource.NewQuantity(int64(flavor.VCPUs), resource.DecimalSI)
node.Status.Capacity[gpu.ResourceNvidiaGPU] = *resource.NewQuantity(int64(flavor.GPUs), resource.DecimalSI)
node.Status.Capacity[apiv1.ResourceMemory] = *resource.NewQuantity(int64(flavor.RAM)*int64(math.Pow(1024, 3)), resource.DecimalSI)
node.Status.Allocatable = node.Status.Capacity
// Setup node info template
nodeInfo := schedulerframework.NewNodeInfo(cloudprovider.BuildKubeProxy(ng.Id()))
nodeInfo.SetNode(node)
return nodeInfo, nil
}
// Exist checks if the node group really exists on the cloud provider side. Allows to tell the
// theoretical node group from the real one.
func (ng *NodeGroup) Exist() bool {
return ng.Id() != ""
}
// Create creates the node group on the cloud provider side.
func (ng *NodeGroup) Create() (cloudprovider.NodeGroup, error) {
klog.V(4).Info("Creating a new NodeGroup")
// Forge create node pool parameters (defaulting b2-7 for now)
name := ng.Id()
size := uint32(ng.CurrentSize)
min := uint32(ng.MinSize())
max := uint32(ng.MaxSize())
opts := sdk.CreateNodePoolOpts{
FlavorName: "b2-7",
Name: &name,
DesiredNodes: &size,
MinNodes: &min,
MaxNodes: &max,
Autoscale: true,
}
// Call API to add a node pool in the project/cluster
np, err := ng.Manager.Client.CreateNodePool(context.Background(), ng.Manager.ProjectID, ng.Manager.ClusterID, &opts)
if err != nil {
return nil, fmt.Errorf("failed to create node pool: %w", err)
}
// Forge a node group interface given the API response
return &NodeGroup{
NodePool: *np,
Manager: ng.Manager,
CurrentSize: int(ng.DesiredNodes),
}, nil
}
// Delete deletes the node group on the cloud provider side.
// This will be executed only for autoprovisioned node groups, once their size drops to 0.
func (ng *NodeGroup) Delete() error {
klog.V(4).Infof("Deleting NodeGroup %s", ng.Id())
// Call API to delete the node pool given its project and cluster
_, err := ng.Manager.Client.DeleteNodePool(context.Background(), ng.Manager.ProjectID, ng.Manager.ClusterID, ng.ID)
if err != nil {
return fmt.Errorf("failed to delete node pool: %w", err)
}
return nil
}
// Autoprovisioned returns true if the node group is autoprovisioned.
func (ng *NodeGroup) Autoprovisioned() bool {
// This is not handled yet.
return false
}
// GetOptions returns NodeGroupAutoscalingOptions that should be used for this particular
// NodeGroup. Returning a nil will result in using default options.
func (ng *NodeGroup) GetOptions(defaults config.NodeGroupAutoscalingOptions) (*config.NodeGroupAutoscalingOptions, error) {
// If node group autoscaling options nil, return defaults
if ng.Autoscaling == nil {
return nil, nil
}
// Forge autoscaling configuration from node pool
cfg := &config.NodeGroupAutoscalingOptions{
ScaleDownUnneededTime: time.Duration(ng.Autoscaling.ScaleDownUnneededTimeSeconds) * time.Second,
ScaleDownUnreadyTime: time.Duration(ng.Autoscaling.ScaleDownUnreadyTimeSeconds) * time.Second,
}
// Switch utilization threshold from defaults given flavor type
if ng.isGpu() {
cfg.ScaleDownUtilizationThreshold = defaults.ScaleDownUtilizationThreshold
cfg.ScaleDownGpuUtilizationThreshold = float64(ng.Autoscaling.ScaleDownUtilizationThreshold) // Use this one
} else {
cfg.ScaleDownUtilizationThreshold = float64(ng.Autoscaling.ScaleDownUtilizationThreshold) // Use this one
cfg.ScaleDownGpuUtilizationThreshold = defaults.ScaleDownGpuUtilizationThreshold
}
return cfg, nil
}
// isGpu checks if a node group is using GPU machines
func (ng *NodeGroup) isGpu() bool {
flavor, err := ng.Manager.getFlavorByName(ng.Flavor)
if err != nil {
// Fallback when we are unable to get the flavor: refer to the only category
// known to be a GPU flavor category
return strings.HasPrefix(ng.Flavor, GPUMachineCategory)
}
return flavor.GPUs > 0
}
// toInstanceStatus casts a node status into an instance status
func toInstanceStatus(status string) *cloudprovider.InstanceStatus {
state := &cloudprovider.InstanceStatus{}
switch status {
case "INSTALLING", "REDEPLOYING":
state.State = cloudprovider.InstanceCreating
case "DELETING":
state.State = cloudprovider.InstanceDeleting
case "READY":
state.State = cloudprovider.InstanceRunning
default:
state.ErrorInfo = &cloudprovider.InstanceErrorInfo{
ErrorClass: cloudprovider.OtherErrorClass,
ErrorCode: status,
ErrorMessage: "error",
}
}
return state
}