Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add GH workflow for automatically updating nvidia device plugin static manifest #7898

Merged
merged 4 commits into from
Jul 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 45 additions & 39 deletions .github/workflows/update-generated.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: Update generated files
on:
workflow_dispatch: {}
schedule:
- cron: "0 5 * * Thu"
- cron: "0 5 * * Thu"

permissions:
id-token: write
Expand All @@ -15,47 +15,53 @@ jobs:
strategy:
fail-fast: false
matrix:
resource: ["coredns", "aws-node"]
resource: ["coredns", "aws-node", "nvidia-device-plugin"]
name: Update ${{ matrix.resource }} and open PR
runs-on: ubuntu-latest
container: public.ecr.aws/eksctl/eksctl-build:833f4464e865a6398788bf6cbc5447967b8974b7
env:
GOPRIVATE: ""
steps:
- name: Checkout
uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 #v4.1.2
with:
token: ${{ secrets.EKSCTLBOT_TOKEN }}
fetch-depth: 0
- name: Configure AWS credentials for coredns update
if: ${{ matrix.resource == 'coredns' }}
uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2
with:
aws-region: us-west-2
role-duration-seconds: 900
role-session-name: eksctl-update-coredns-assets
role-to-assume: ${{ secrets.UPDATE_COREDNS_ROLE_ARN }}
- name: Setup identity as eksctl-bot
uses: ./.github/actions/setup-identity
with:
token: "${{ secrets.EKSCTLBOT_TOKEN }}"
- name: Cache go-build and mod
uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 #v4.0.2
with:
path: |
~/.cache/go-build/
~/go/pkg/mod/
key: go-${{ hashFiles('go.sum') }}
restore-keys: |
go-
- name: Update ${{ matrix.resource }}
run: make update-${{ matrix.resource }}
- name: Upsert pull request
uses: peter-evans/create-pull-request@70a41aba780001da0a30141984ae2a0c95d8704e #v6.0.2
with:
token: ${{ secrets.EKSCTLBOT_TOKEN }}
commit-message: update ${{ matrix.resource }}
committer: eksctl-bot <[email protected]>
title: 'Update ${{ matrix.resource }}'
branch: update-${{ matrix.resource }}
labels: area/tech-debt
- name: Checkout
uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 #v4.1.2
with:
token: ${{ secrets.EKSCTLBOT_TOKEN }}
fetch-depth: 0
- name: Configure AWS credentials for coredns update
if: ${{ matrix.resource == 'coredns' }}
uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2
with:
aws-region: us-west-2
role-duration-seconds: 900
role-session-name: eksctl-update-coredns-assets
role-to-assume: ${{ secrets.UPDATE_COREDNS_ROLE_ARN }}
- name: Setup identity as eksctl-bot
uses: ./.github/actions/setup-identity
with:
token: "${{ secrets.EKSCTLBOT_TOKEN }}"
- name: Cache go-build and mod
uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 #v4.0.2
with:
path: |
~/.cache/go-build/
~/go/pkg/mod/
key: go-${{ hashFiles('go.sum') }}
restore-keys: |
go-
- name: Update ${{ matrix.resource }}
run: make update-${{ matrix.resource }}
- name: Upsert pull request
uses: peter-evans/create-pull-request@70a41aba780001da0a30141984ae2a0c95d8704e #v6.0.2
with:
token: ${{ secrets.EKSCTLBOT_TOKEN }}
commit-message: update ${{ matrix.resource }}${{ env.LATEST_RELEASE_TAG }}
committer: eksctl-bot <[email protected]>
title: 'Update ${{ matrix.resource }}${{ env.LATEST_RELEASE_TAG }}'
branch: update-${{ matrix.resource }}
labels: area/tech-debt
body: |
Auto-generated by [eksctl Update Generated Files GitHub workflow][1]

[1]: https://github.com/eksctl-io/eksctl/blob/main/.github/workflows/update-generated.yaml

Please manually test before approving and merging.
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,9 @@ generate-all: generate-always $(conditionally_generated_files) ## Re-generate al
check-all-generated-files-up-to-date: generate-all ## Run the generate all command and verify there is no new diff
git diff --quiet -- $(conditionally_generated_files) || (git --no-pager diff $(conditionally_generated_files); echo "HINT: to fix this, run 'git commit $(conditionally_generated_files) --message \"Update generated files\"'"; exit 1)

.PHONY: update-nvidia-device-plugin
update-nvidia-device-plugin: ## fetch the latest static manifest
pkg/addons/assets/scripts/update_nvidia_device_plugin.sh

.PHONY: update-aws-node
update-aws-node: ## Re-download the aws-node manifests from AWS
Expand Down
33 changes: 33 additions & 0 deletions pkg/addons/assets/scripts/update_nvidia_device_plugin.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/bin/bash

get_latest_release_tag() {
curl -sL https://api.github.com/repos/NVIDIA/k8s-device-plugin/releases/latest | jq -r '.tag_name'
}

latest_release_tag=$(get_latest_release_tag)

# Check if the latest release tag was found
if [ -z "$latest_release_tag" ]; then
echo "Could not find the latest release tag."
exit 1
fi

# If running in GitHub Actions, export the release tag for use in the workflow
if [ "$GITHUB_ACTIONS" = "true" ]; then
echo "LATEST_RELEASE_TAG= to $latest_release_tag" >> $GITHUB_ENV
else
echo "Found the latest release tag: $latest_release_tag"
fi

assets_addons_dir="pkg/addons/assets"

curl -sL "https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/$latest_release_tag/deployments/static/nvidia-device-plugin.yml" -o "$assets_addons_dir/nvidia-device-plugin.yaml"


# Check if the download was successful
if [ $? -eq 0 ]; then
echo "Downloaded the latest NVIDIA device plugin manifest to $assets_addons_dir/nvidia-device-plugin.yaml"
else
echo "Failed to download the NVIDIA device plugin manifest."
exit 1
fi
21 changes: 20 additions & 1 deletion pkg/addons/default/scripts/update_aws_node.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,31 @@ get_latest_release_tag() {

latest_release_tag=$(get_latest_release_tag)

# Check if the latest release tag was found
if [ -z "$latest_release_tag" ]; then
echo "Could not find the latest release tag."
exit 1
fi

# If running in GitHub Actions, export the release tag for use in the workflow
if [ "$GITHUB_ACTIONS" = "true" ]; then
echo "LATEST_RELEASE_TAG= to $latest_release_tag" >> $GITHUB_ENV
else
echo "Found the latest release tag: $latest_release_tag"
fi

default_addons_dir="pkg/addons/default"

# Download the latest aws-k8s-cni.yaml file
curl -sL "$base_url$latest_release_tag/config/master/aws-k8s-cni.yaml?raw=1" --output "$default_addons_dir/assets/aws-node.yaml"

echo "found latest release tag:" $latest_release_tag
# Check if the download was successful
if [ $? -eq 0 ]; then
echo "Downloaded the latest AWS Node manifest to $default_addons_dir/assets/aws-node.yaml"
else
echo "Failed to download the latest AWS Node manifest."
exit 1
fi

# Update the unit test file
sed -i "s/expectedVersion = \"\(.*\)\"/expectedVersion = \"$latest_release_tag\"/g" "$default_addons_dir/aws_node_test.go"
6 changes: 4 additions & 2 deletions pkg/printers/testdata/jsontest_2clusters.golden
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@
"RoleArn": null,
"Status": "ACTIVE",
"Tags": null,
"Version": null
"Version": null,
"UpgradePolicy": null
},
{
"Id": null,
Expand Down Expand Up @@ -73,6 +74,7 @@
"RoleArn": null,
"Status": "ACTIVE",
"Tags": null,
"Version": null
"Version": null,
"UpgradePolicy": null
}
]
3 changes: 2 additions & 1 deletion pkg/printers/testdata/jsontest_single.golden
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
"RoleArn": null,
"Status": "ACTIVE",
"Tags": null,
"Version": null
"Version": null,
"UpgradePolicy": null
}
]
2 changes: 2 additions & 0 deletions pkg/printers/testdata/yamltest_2clusters.golden
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
Status: ACTIVE
Tags: null
Version: null
UpgradePolicy: null
- Id: null
Arn: arn-87654321
CertificateAuthority: null
Expand Down Expand Up @@ -62,3 +63,4 @@
Status: ACTIVE
Tags: null
Version: null
UpgradePolicy: null
1 change: 1 addition & 0 deletions pkg/printers/testdata/yamltest_single.golden
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,4 @@
Status: ACTIVE
Tags: null
Version: null
UpgradePolicy: null
10 changes: 10 additions & 0 deletions userdocs/src/usage/gpu-support.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,17 @@ use `--install-nvidia-plugin=false` with the create command. For example:

```
eksctl create cluster --node-type=p2.xlarge --install-nvidia-plugin=false
```

and, for versions 0.15.0 and above,

```
kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/<VERSION>/deployments/static/nvidia-device-plugin.yml
```

or, for older versions,

```
kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/<VERSION>/nvidia-device-plugin.yml
```

Expand Down