From d335f89a4c1972eb2b51f79a74dc0674e4c71f0f Mon Sep 17 00:00:00 2001 From: samuel40791765 Date: Mon, 16 Dec 2024 23:38:27 +0000 Subject: [PATCH] Prune hanging instances longer than 3 hours --- .../ci/cdk/cdk/ssm/general_test_run_ssm_document.yaml | 4 +--- tests/ci/lambda/src/bin/purge-stale-builds.rs | 10 ++++++++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/tests/ci/cdk/cdk/ssm/general_test_run_ssm_document.yaml b/tests/ci/cdk/cdk/ssm/general_test_run_ssm_document.yaml index 69503f58470..53f97b4ed02 100644 --- a/tests/ci/cdk/cdk/ssm/general_test_run_ssm_document.yaml +++ b/tests/ci/cdk/cdk/ssm/general_test_run_ssm_document.yaml @@ -10,9 +10,6 @@ mainSteps: inputs: timeoutSeconds: '7200' runCommand: - # TODO (P131897680): Parallelize the FIPS and sanitizer tests. The instance timeout can be lowered - # once we do so. - # # Fallback plan to shut down the ec2 instance in 90 minutes in case it's not terminated. # Codebuild just "stops" the instance calling the script, so "trap cleanup" is not executed. - shutdown -P +90 @@ -28,6 +25,7 @@ mainSteps: # install aws-cli - killall apt apt-get - apt-get update + - apt-get -y remove needrestart - apt-get -y install unzip - curl "https://awscli.amazonaws.com/awscli-exe-linux-${AWS_CLI_PREFIX}64.zip" -o "awscliv2.zip" - unzip awscliv2.zip diff --git a/tests/ci/lambda/src/bin/purge-stale-builds.rs b/tests/ci/lambda/src/bin/purge-stale-builds.rs index ab6ec4c327c..59e65123953 100644 --- a/tests/ci/lambda/src/bin/purge-stale-builds.rs +++ b/tests/ci/lambda/src/bin/purge-stale-builds.rs @@ -4,6 +4,7 @@ use std::time::{SystemTime, UNIX_EPOCH}; use aws_sdk_codebuild::types::BuildBatchFilter; use aws_sdk_ec2::operation::describe_instances::DescribeInstancesOutput; +use aws_sdk_ec2::primitives::DateTime; use aws_sdk_ec2::types::Filter; use aws_sdk_ssm::types::DocumentKeyValuesFilter; use lambda_runtime::{service_fn, Error, LambdaEvent}; @@ -115,6 +116,8 @@ async fn handle(_event: LambdaEvent) -> Result<(), Error> { let mut ec2_terminated_instances: Vec = vec![]; let mut stopped_builds: u64 = 0; + let now_as_secs = DateTime::from(SystemTime::now()).secs(); + for (k, v) in &pull_requests { if v.len() <= 1 { continue; @@ -158,6 +161,11 @@ async fn handle(_event: LambdaEvent) -> Result<(), Error> { .push(instance.instance_id().unwrap().to_string()); } } + let launch_elapsed_time = now_as_secs - instance.launch_time().unwrap().secs(); + log::info!("Launch time: {:?}", launch_elapsed_time); + if launch_elapsed_time > 30 { + log::info!("Longer than 30 seconds"); + } } } } @@ -168,6 +176,8 @@ async fn handle(_event: LambdaEvent) -> Result<(), Error> { } } + log::info!("time {:?}", now_as_secs); + log::info!("Terminating instances {:?}", ec2_terminated_instances); if let Some(ref ec2_client) = ec2_client_optional { if !ec2_terminated_instances.is_empty() {