Skip to content

Commit

Permalink
fix: spot shutdown
Browse files Browse the repository at this point in the history
  • Loading branch information
ludamad committed Apr 12, 2024
1 parent 8ad7917 commit 62d2138
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 7 deletions.
17 changes: 14 additions & 3 deletions dist/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -714,6 +714,7 @@ class UserData {
const runnerNameBase = `${this.config.githubJobId}-ec2`;
// space-separated registration tokens
const tokensSpaceSep = tokens.map((t) => t.token).join(" ");
const bumpShutdown = `shutdown -c ; shutdown -P +${this.config.ec2InstanceTtl}`;
// Note, we dont make the runner ephemeral as we start fresh runners as needed
// and delay shutdowns whenever jobs start
// TODO could deregister runners right before shutdown starts
Expand All @@ -722,19 +723,29 @@ class UserData {
`exec 1>/run/log.out 2>&1`,
`shutdown -P +${this.config.ec2InstanceTtl}`,
"cd /run",
`echo "shutdown -c ; shutdown -P +${this.config.ec2InstanceTtl}" > /run/delay_shutdown.sh`,
`mkdir -p shutdown-refcount`,
// Shutdown rules:
// - github actions job starts and ends always bump +ec2InstanceTtl minutes
// - when the amount of started jobs (start_run_* files) equal the amount of finished jobs (end_run_* files), we shutdown in 5 minutes
`echo "${bumpShutdown}; touch /run/shutdown-refcount/start_run_$(date +%s)_$RANDOM" > /run/delay_shutdown.sh`,
`echo "[ $(find /run/shutdown-refcount/ -name 'start_run_*' | wc -l) -eq $(find /run/shutdown-refcount/ -name 'end_run_*' | wc -l) ] && shutdown -P 5" > /run/if_refcount0_shutdown.sh`,
`echo "${bumpShutdown}; touch /run/shutdown-refcount/end_run_$(date +%s)_$RANDOM ; /run/if_refcount0_shutdown.sh " > /run/refcount_and_delay_shutdown.sh`,
"chmod +x /run/delay_shutdown.sh",
"chmod +x /run/refcount_and_delay_shutdown.sh",
"chmod +x /run/if_refcount0_shutdown.sh",
"export ACTIONS_RUNNER_HOOK_JOB_STARTED=/run/delay_shutdown.sh",
"export ACTIONS_RUNNER_HOOK_JOB_COMPLETED=/run/refcount_and_delay_shutdown.sh",
"mkdir -p actions-runner && cd actions-runner",
'echo "ACTIONS_RUNNER_HOOK_JOB_STARTED=/run/delay_shutdown.sh" > .env',
'echo "ACTIONS_RUNNER_HOOK_JOB_COMPLETED=/run/refcount_and_delay_shutdown.sh" > .env',
`GH_RUNNER_VERSION=${githubActionRunnerVersion}`,
'case $(uname -m) in aarch64) ARCH="arm64" ;; amd64|x86_64) ARCH="x64" ;; esac && export RUNNER_ARCH=${ARCH}',
"curl -O -L https://github.com/actions/runner/releases/download/v${GH_RUNNER_VERSION}/actions-runner-linux-${RUNNER_ARCH}-${GH_RUNNER_VERSION}.tar.gz",
"tar xzf ./actions-runner-linux-${RUNNER_ARCH}-${GH_RUNNER_VERSION}.tar.gz",
"export RUNNER_ALLOW_RUNASROOT=1",
'mv externals ..',
"mv externals ..",
// Note sharing bin doesn't work due to using it as a folder, and we don't bother splitting up sharing bin
'rm ./actions-runner-linux-${RUNNER_ARCH}-${GH_RUNNER_VERSION}.tar.gz',
"rm ./actions-runner-linux-${RUNNER_ARCH}-${GH_RUNNER_VERSION}.tar.gz",
'[ -n "$(command -v yum)" ] && yum install libicu -y',
`TOKENS=(${tokensSpaceSep}) ; echo ${tokensSpaceSep} > /run/github-runner-tokens`,
`for i in {0..${this.config.githubActionRunnerConcurrency - 1}}; do`,
Expand Down
19 changes: 15 additions & 4 deletions src/ec2/userdata.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ export class UserData {
const runnerNameBase = `${this.config.githubJobId}-ec2`;
// space-separated registration tokens
const tokensSpaceSep = tokens.map((t) => t.token).join(" ");
const bumpShutdown = `shutdown -c ; shutdown -P +${this.config.ec2InstanceTtl}`;
// Note, we dont make the runner ephemeral as we start fresh runners as needed
// and delay shutdowns whenever jobs start
// TODO could deregister runners right before shutdown starts
Expand All @@ -29,22 +30,32 @@ export class UserData {
`exec 1>/run/log.out 2>&1`, // Log to /run/log.out
`shutdown -P +${this.config.ec2InstanceTtl}`,
"cd /run",
`echo "shutdown -c ; shutdown -P +${this.config.ec2InstanceTtl}" > /run/delay_shutdown.sh`,
`mkdir -p shutdown-refcount`,
// Shutdown rules:
// - github actions job starts and ends always bump +ec2InstanceTtl minutes
// - when the amount of started jobs (start_run_* files) equal the amount of finished jobs (end_run_* files), we shutdown in 5 minutes
`echo "${bumpShutdown}; touch /run/shutdown-refcount/start_run_$(date +%s)_$RANDOM" > /run/delay_shutdown.sh`,
`echo "[ $(find /run/shutdown-refcount/ -name 'start_run_*' | wc -l) -eq $(find /run/shutdown-refcount/ -name 'end_run_*' | wc -l) ] && shutdown -P 5" > /run/if_refcount0_shutdown.sh`,
`echo "${bumpShutdown}; touch /run/shutdown-refcount/end_run_$(date +%s)_$RANDOM ; /run/if_refcount0_shutdown.sh " > /run/refcount_and_delay_shutdown.sh`,
"chmod +x /run/delay_shutdown.sh",
"chmod +x /run/refcount_and_delay_shutdown.sh",
"chmod +x /run/if_refcount0_shutdown.sh",
"export ACTIONS_RUNNER_HOOK_JOB_STARTED=/run/delay_shutdown.sh",
"export ACTIONS_RUNNER_HOOK_JOB_COMPLETED=/run/refcount_and_delay_shutdown.sh",
"mkdir -p actions-runner && cd actions-runner",
'echo "ACTIONS_RUNNER_HOOK_JOB_STARTED=/run/delay_shutdown.sh" > .env',
'echo "ACTIONS_RUNNER_HOOK_JOB_COMPLETED=/run/refcount_and_delay_shutdown.sh" > .env',
`GH_RUNNER_VERSION=${githubActionRunnerVersion}`,
'case $(uname -m) in aarch64) ARCH="arm64" ;; amd64|x86_64) ARCH="x64" ;; esac && export RUNNER_ARCH=${ARCH}',
"curl -O -L https://github.com/actions/runner/releases/download/v${GH_RUNNER_VERSION}/actions-runner-linux-${RUNNER_ARCH}-${GH_RUNNER_VERSION}.tar.gz",
"tar xzf ./actions-runner-linux-${RUNNER_ARCH}-${GH_RUNNER_VERSION}.tar.gz",
"export RUNNER_ALLOW_RUNASROOT=1",
'mv externals ..', // we share the big binaries between all the runner folders, symlink instead of copy them
"mv externals ..", // we share the big binaries between all the runner folders, symlink instead of copy them
// Note sharing bin doesn't work due to using it as a folder, and we don't bother splitting up sharing bin
'rm ./actions-runner-linux-${RUNNER_ARCH}-${GH_RUNNER_VERSION}.tar.gz', // cleanup as we will copy our runner folder
"rm ./actions-runner-linux-${RUNNER_ARCH}-${GH_RUNNER_VERSION}.tar.gz", // cleanup as we will copy our runner folder
'[ -n "$(command -v yum)" ] && yum install libicu -y',
`TOKENS=(${tokensSpaceSep}) ; echo ${tokensSpaceSep} > /run/github-runner-tokens`, // for debugging failed attempts
`for i in {0..${this.config.githubActionRunnerConcurrency - 1}}; do`,
`for i in {0..${this.config.githubActionRunnerConcurrency - 1}}; do`,
` ( cp -r . ../${runnerNameBase}-$i && ln -s $(pwd)/../externals ../${runnerNameBase}-$i && cd ../${runnerNameBase}-$i; ./config.sh --unattended --url https://github.com/${github.context.repo.owner}/${github.context.repo.repo} --token \${TOKENS[i]} --labels ${this.config.githubActionRunnerLabel} --replace --name ${runnerNameBase}-$i ; ./run.sh ) &`,
"done",
"wait", // Wait for all background processes to finish
Expand Down

0 comments on commit 62d2138

Please sign in to comment.