Skip to content

Commit 6ccd00f

Browse files
authored
feat(eks): spot support for managed nodegroups (#11962)
This PR adds the `CapacityType` support and allows users to create Spot managed node groups for Amazon EKS. 1. The `CapacityType` attribute is supported by cloudformation but not yet documented. We tentatively use addPropertyOverride() to enable it. 2. `instanceType` will be deprecated and we introduced the new `instanceTypes` 3. `instanceTypes` with different CPU architectures will throw an error. 4. `amiType` is still optional, however, when specified, incorrect `amiType` will throw the error. 5. According to the [document](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-eks-nodegroup.html#cfn-eks-nodegroup-instancetypes), we are allowed to specify instance type(s) in either `instanceTypes` property or launch template but not both. As we can't check the content of the launch template passed in, we allow `instanceTypes` and launch template both specified and encourage to use `instanceTypes` when possible. ## Sample ```ts cluster.addNodegroupCapacity('extra-ng-spot', { instanceTypes: [ new ec2.InstanceType('c5.large'), new ec2.InstanceType('c5a.large'), new ec2.InstanceType('c5d.large'), ], minSize: 3, capacityType: eks.CapacityType.SPOT, }); ``` Closes #11827 ---- *By submitting this pull request, I confirm that my contribution is made under the terms of the Apache-2.0 license*
1 parent 579b923 commit 6ccd00f

File tree

6 files changed

+394
-47
lines changed

6 files changed

+394
-47
lines changed

packages/@aws-cdk/aws-eks/README.md

+26-3
Original file line numberDiff line numberDiff line change
@@ -201,14 +201,35 @@ const cluster = new eks.Cluster(this, 'HelloEKS', {
201201
});
202202

203203
cluster.addNodegroupCapacity('custom-node-group', {
204-
instanceType: new ec2.InstanceType('m5.large'),
204+
instanceTypes: [new ec2.InstanceType('m5.large')],
205205
minSize: 4,
206206
diskSize: 100,
207207
amiType: eks.NodegroupAmiType.AL2_X86_64_GPU,
208208
...
209209
});
210210
```
211211

212+
#### Spot Instances Support
213+
214+
Use `capacityType` to create managed node groups comprised of spot instances. To maximize the availability of your applications while using
215+
Spot Instances, we recommend that you configure a Spot managed node group to use multiple instance types with the `instanceTypes` property.
216+
217+
> For more details visit [Managed node group capacity types](https://docs.aws.amazon.com/eks/latest/userguide/managed-node-groups.html#managed-node-group-capacity-types).
218+
219+
220+
```ts
221+
cluster.addNodegroupCapacity('extra-ng-spot', {
222+
instanceTypes: [
223+
new ec2.InstanceType('c5.large'),
224+
new ec2.InstanceType('c5a.large'),
225+
new ec2.InstanceType('c5d.large'),
226+
],
227+
minSize: 3,
228+
capacityType: eks.CapacityType.SPOT,
229+
});
230+
231+
```
232+
212233
#### Launch Template Support
213234

214235
You can specify a launch template that the node group will use. Note that when using a custom AMI, Amazon EKS doesn't merge any user data.
@@ -236,7 +257,9 @@ cluster.addNodegroupCapacity('extra-ng', {
236257
});
237258
```
238259

239-
> For more details visit [Launch Template Support](https://docs.aws.amazon.com/en_ca/eks/latest/userguide/launch-templates.html).
260+
You may specify one or instance types in either the `instanceTypes` property of `NodeGroup` or in the launch template, **but not both**.
261+
262+
> For more details visit [Launch Template Support](https://docs.aws.amazon.com/eks/latest/userguide/launch-templates.html).
240263
241264
Graviton 2 instance types are supported including `c6g`, `m6g`, `r6g` and `t4g`.
242265

@@ -552,7 +575,7 @@ Amazon Linux 2 AMI for ARM64 will be automatically selected.
552575
```ts
553576
// add a managed ARM64 nodegroup
554577
cluster.addNodegroupCapacity('extra-ng-arm', {
555-
instanceType: new ec2.InstanceType('m6g.medium'),
578+
instanceTypes: [new ec2.InstanceType('m6g.medium')],
556579
minSize: 2,
557580
});
558581

packages/@aws-cdk/aws-eks/lib/cluster.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -1123,7 +1123,7 @@ export class Cluster extends ClusterBase {
11231123
this.addAutoScalingGroupCapacity('DefaultCapacity', { instanceType, minCapacity }) : undefined;
11241124

11251125
this.defaultNodegroup = props.defaultCapacityType !== DefaultCapacityType.EC2 ?
1126-
this.addNodegroupCapacity('DefaultCapacity', { instanceType, minSize: minCapacity }) : undefined;
1126+
this.addNodegroupCapacity('DefaultCapacity', { instanceTypes: [instanceType], minSize: minCapacity }) : undefined;
11271127
}
11281128

11291129
const outputConfigCommand = props.outputConfigCommand === undefined ? true : props.outputConfigCommand;

packages/@aws-cdk/aws-eks/lib/managed-nodegroup.ts

+70-10
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import { InstanceType, ISecurityGroup, SubnetSelection } from '@aws-cdk/aws-ec2';
22
import { IRole, ManagedPolicy, Role, ServicePrincipal } from '@aws-cdk/aws-iam';
3-
import { IResource, Resource } from '@aws-cdk/core';
3+
import { IResource, Resource, Annotations } from '@aws-cdk/core';
44
import { Construct } from 'constructs';
55
import { Cluster, ICluster } from './cluster';
66
import { CfnNodegroup } from './eks.generated';
@@ -37,6 +37,20 @@ export enum NodegroupAmiType {
3737
AL2_ARM_64 = 'AL2_ARM_64'
3838
}
3939

40+
/**
41+
* Capacity type of the managed node group
42+
*/
43+
export enum CapacityType {
44+
/**
45+
* spot instances
46+
*/
47+
SPOT = 'SPOT',
48+
/**
49+
* on-demand instances
50+
*/
51+
ON_DEMAND = 'ON_DEMAND'
52+
}
53+
4054
/**
4155
* The remote access (SSH) configuration to use with your node group.
4256
*
@@ -95,7 +109,7 @@ export interface NodegroupOptions {
95109
/**
96110
* The AMI type for your node group.
97111
*
98-
* @default - auto-determined from the instanceType property.
112+
* @default - auto-determined from the instanceTypes property.
99113
*/
100114
readonly amiType?: NodegroupAmiType;
101115
/**
@@ -138,8 +152,15 @@ export interface NodegroupOptions {
138152
* `AL2_x86_64_GPU` with the amiType parameter.
139153
*
140154
* @default t3.medium
155+
* @deprecated Use `instanceTypes` instead.
141156
*/
142157
readonly instanceType?: InstanceType;
158+
/**
159+
* The instance types to use for your node group.
160+
* @default t3.medium will be used according to the cloudformation document.
161+
* @see - https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-eks-nodegroup.html#cfn-eks-nodegroup-instancetypes
162+
*/
163+
readonly instanceTypes?: InstanceType[];
143164
/**
144165
* The Kubernetes labels to be applied to the nodes in the node group when they are created.
145166
*
@@ -183,6 +204,12 @@ export interface NodegroupOptions {
183204
* @default - no launch template
184205
*/
185206
readonly launchTemplateSpec?: LaunchTemplateSpec;
207+
/**
208+
* The capacity type of the nodegroup.
209+
*
210+
* @default - ON_DEMAND
211+
*/
212+
readonly capacityType?: CapacityType;
186213
}
187214

188215
/**
@@ -199,6 +226,10 @@ export interface NodegroupProps extends NodegroupOptions {
199226
* The Nodegroup resource class
200227
*/
201228
export class Nodegroup extends Resource implements INodegroup {
229+
/**
230+
* Default instanceTypes
231+
*/
232+
public static readonly DEFAULT_INSTANCE_TYPES = [new InstanceType('t3.medium')];
202233
/**
203234
* Import the Nodegroup from attributes
204235
*/
@@ -253,6 +284,25 @@ export class Nodegroup extends Resource implements INodegroup {
253284
throw new Error(`Minimum capacity ${this.minSize} can't be greater than desired size ${this.desiredSize}`);
254285
}
255286

287+
if (props.instanceType && props.instanceTypes) {
288+
throw new Error('"instanceType is deprecated, please use "instanceTypes" only.');
289+
}
290+
291+
if (props.instanceType) {
292+
Annotations.of(this).addWarning('"instanceType" is deprecated and will be removed in the next major version. please use "instanceTypes" instead');
293+
}
294+
const instanceTypes = props.instanceTypes ?? (props.instanceType ? [props.instanceType] : Nodegroup.DEFAULT_INSTANCE_TYPES);
295+
// get unique AMI types from instanceTypes
296+
const uniqAmiTypes = getAmiTypes(instanceTypes);
297+
// uniqAmiTypes.length should be at least 1
298+
if (uniqAmiTypes.length > 1) {
299+
throw new Error('instanceTypes of different CPU architectures is not allowed');
300+
}
301+
const determinedAmiType = uniqAmiTypes[0];
302+
if (props.amiType && props.amiType !== determinedAmiType) {
303+
throw new Error(`The specified AMI does not match the instance types architecture, either specify ${determinedAmiType} or dont specify any`);
304+
}
305+
256306
if (!props.nodeRole) {
257307
const ngRole = new Role(this, 'NodeGroupRole', {
258308
assumedBy: new ServicePrincipal('ec2.amazonaws.com'),
@@ -271,11 +321,12 @@ export class Nodegroup extends Resource implements INodegroup {
271321
nodegroupName: props.nodegroupName,
272322
nodeRole: this.role.roleArn,
273323
subnets: this.cluster.vpc.selectSubnets(props.subnets).subnetIds,
274-
amiType: props.amiType ?? (props.instanceType ? getAmiTypeForInstanceType(props.instanceType).toString() :
275-
undefined),
324+
// AmyType is not allowed by CFN when specifying an image id in your launch template.
325+
amiType: props.launchTemplateSpec === undefined ? determinedAmiType : undefined,
276326
diskSize: props.diskSize,
277327
forceUpdateEnabled: props.forceUpdate ?? true,
278-
instanceTypes: props.instanceType ? [props.instanceType.toString()] : undefined,
328+
instanceTypes: props.instanceTypes ? props.instanceTypes.map(t => t.toString()) :
329+
props.instanceType ? [props.instanceType.toString()] : undefined,
279330
labels: props.labels,
280331
releaseVersion: props.releaseVersion,
281332
remoteAccess: props.remoteAccess ? {
@@ -291,17 +342,21 @@ export class Nodegroup extends Resource implements INodegroup {
291342
tags: props.tags,
292343
});
293344

345+
if (props.capacityType) {
346+
resource.addPropertyOverride('CapacityType', props.capacityType.valueOf());
347+
}
348+
294349
if (props.launchTemplateSpec) {
295350
if (props.diskSize) {
296351
// see - https://docs.aws.amazon.com/eks/latest/userguide/launch-templates.html
297352
// and https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-eks-nodegroup.html#cfn-eks-nodegroup-disksize
298353
throw new Error('diskSize must be specified within the launch template');
299354
}
300-
if (props.instanceType) {
301-
// see - https://docs.aws.amazon.com/eks/latest/userguide/launch-templates.html
302-
// and https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-eks-nodegroup.html#cfn-eks-nodegroup-disksize
303-
throw new Error('Instance types must be specified within the launch template');
304-
}
355+
/**
356+
* Instance types can be specified either in `instanceType` or launch template but not both. AS we can not check the content of
357+
* the provided launch template and the `instanceType` property is preferrable. We allow users to define `instanceType` property here.
358+
* see - https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-eks-nodegroup.html#cfn-eks-nodegroup-instancetypes
359+
*/
305360
// TODO: update this when the L1 resource spec is updated.
306361
resource.addPropertyOverride('LaunchTemplate', {
307362
Id: props.launchTemplateSpec.id,
@@ -340,3 +395,8 @@ function getAmiTypeForInstanceType(instanceType: InstanceType) {
340395
NodegroupAmiType.AL2_X86_64;
341396
}
342397

398+
function getAmiTypes(instanceType: InstanceType[]) {
399+
const amiTypes = instanceType.map(i =>getAmiTypeForInstanceType(i));
400+
// retuen unique AMI types
401+
return [...new Set(amiTypes)];
402+
}

packages/@aws-cdk/aws-eks/test/integ.eks-cluster.expected.json

+110
Original file line numberDiff line numberDiff line change
@@ -1251,6 +1251,13 @@
12511251
]
12521252
},
12531253
"\\\",\\\"username\\\":\\\"system:node:{{EC2PrivateDNSName}}\\\",\\\"groups\\\":[\\\"system:bootstrappers\\\",\\\"system:nodes\\\"]},{\\\"rolearn\\\":\\\"",
1254+
{
1255+
"Fn::GetAtt": [
1256+
"ClusterNodegroupextrangspotNodeGroupRoleB53B4857",
1257+
"Arn"
1258+
]
1259+
},
1260+
"\\\",\\\"username\\\":\\\"system:node:{{EC2PrivateDNSName}}\\\",\\\"groups\\\":[\\\"system:bootstrappers\\\",\\\"system:nodes\\\"]},{\\\"rolearn\\\":\\\"",
12541261
{
12551262
"Fn::GetAtt": [
12561263
"ClusterNodegroupextrangarmNodeGroupRoleADF5749F",
@@ -3251,6 +3258,109 @@
32513258
}
32523259
}
32533260
},
3261+
"ClusterNodegroupextrangspotNodeGroupRoleB53B4857": {
3262+
"Type": "AWS::IAM::Role",
3263+
"Properties": {
3264+
"AssumeRolePolicyDocument": {
3265+
"Statement": [
3266+
{
3267+
"Action": "sts:AssumeRole",
3268+
"Effect": "Allow",
3269+
"Principal": {
3270+
"Service": {
3271+
"Fn::Join": [
3272+
"",
3273+
[
3274+
"ec2.",
3275+
{
3276+
"Ref": "AWS::URLSuffix"
3277+
}
3278+
]
3279+
]
3280+
}
3281+
}
3282+
}
3283+
],
3284+
"Version": "2012-10-17"
3285+
},
3286+
"ManagedPolicyArns": [
3287+
{
3288+
"Fn::Join": [
3289+
"",
3290+
[
3291+
"arn:",
3292+
{
3293+
"Ref": "AWS::Partition"
3294+
},
3295+
":iam::aws:policy/AmazonEKSWorkerNodePolicy"
3296+
]
3297+
]
3298+
},
3299+
{
3300+
"Fn::Join": [
3301+
"",
3302+
[
3303+
"arn:",
3304+
{
3305+
"Ref": "AWS::Partition"
3306+
},
3307+
":iam::aws:policy/AmazonEKS_CNI_Policy"
3308+
]
3309+
]
3310+
},
3311+
{
3312+
"Fn::Join": [
3313+
"",
3314+
[
3315+
"arn:",
3316+
{
3317+
"Ref": "AWS::Partition"
3318+
},
3319+
":iam::aws:policy/AmazonEC2ContainerRegistryReadOnly"
3320+
]
3321+
]
3322+
}
3323+
]
3324+
}
3325+
},
3326+
"ClusterNodegroupextrangspotB327AE6B": {
3327+
"Type": "AWS::EKS::Nodegroup",
3328+
"Properties": {
3329+
"ClusterName": {
3330+
"Ref": "Cluster9EE0221C"
3331+
},
3332+
"NodeRole": {
3333+
"Fn::GetAtt": [
3334+
"ClusterNodegroupextrangspotNodeGroupRoleB53B4857",
3335+
"Arn"
3336+
]
3337+
},
3338+
"Subnets": [
3339+
{
3340+
"Ref": "VpcPrivateSubnet1Subnet536B997A"
3341+
},
3342+
{
3343+
"Ref": "VpcPrivateSubnet2Subnet3788AAA1"
3344+
},
3345+
{
3346+
"Ref": "VpcPrivateSubnet3SubnetF258B56E"
3347+
}
3348+
],
3349+
"AmiType": "AL2_x86_64",
3350+
"ForceUpdateEnabled": true,
3351+
"InstanceTypes": [
3352+
"c5.large",
3353+
"c5a.large",
3354+
"c5d.large"
3355+
],
3356+
"ScalingConfig": {
3357+
"DesiredSize": 3,
3358+
"MaxSize": 3,
3359+
"MinSize": 3
3360+
},
3361+
"CapacityType": "SPOT"
3362+
}
3363+
},
32543364
"ClusterNodegroupextrangarmNodeGroupRoleADF5749F": {
32553365
"Type": "AWS::IAM::Role",
32563366
"Properties": {

packages/@aws-cdk/aws-eks/test/integ.eks-cluster.ts

+16
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@ class EksClusterStack extends TestStack {
5353

5454
this.assertNodeGroupX86();
5555

56+
this.assertNodeGroupSpot();
57+
5658
this.assertNodeGroupArm();
5759

5860
this.assertNodeGroupCustomAmi();
@@ -162,6 +164,20 @@ class EksClusterStack extends TestStack {
162164
nodeRole: this.cluster.defaultCapacity ? this.cluster.defaultCapacity.role : undefined,
163165
});
164166
}
167+
private assertNodeGroupSpot() {
168+
// add a extra nodegroup
169+
this.cluster.addNodegroupCapacity('extra-ng-spot', {
170+
instanceTypes: [
171+
new ec2.InstanceType('c5.large'),
172+
new ec2.InstanceType('c5a.large'),
173+
new ec2.InstanceType('c5d.large'),
174+
],
175+
minSize: 3,
176+
// reusing the default capacity nodegroup instance role when available
177+
nodeRole: this.cluster.defaultCapacity ? this.cluster.defaultCapacity.role : undefined,
178+
capacityType: eks.CapacityType.SPOT,
179+
});
180+
}
165181
private assertNodeGroupCustomAmi() {
166182
// add a extra nodegroup
167183
const userData = ec2.UserData.forLinux();

0 commit comments

Comments
 (0)