AztecProtocol · ludamad · Apr 25, 2024 · Apr 25, 2024 · Apr 25, 2024 · Apr 25, 2024
diff --git a/.github/spot-runner-action/dist/index.js b/.github/spot-runner-action/dist/index.js
@@ -200,15 +200,6 @@ class Ec2Instance {
             }
         });
     }
-    // async runInstances(params: RunInstancesRequest) {
-    //   const client = await this.getEc2Client();
-    //   try {
-    //     return (await client.runInstances(params).promise()).Instances;
-    //   } catch (error) {
-    //     core.error(`Failed to create instance(s)`);
-    //     throw error;
-    //   }
-    // }
     getSubnetAzId() {
         var _a;
         return __awaiter(this, void 0, void 0, function* () {
@@ -329,82 +320,12 @@ class Ec2Instance {
                     DefaultTargetCapacityType: useOnDemand ? "on-demand" : "spot",
                 },
             };
-            // const config: SpotFleetRequestConfigData = {
-            //   IamFleetRole:
-            //     "arn:aws:iam::278380418400:role/aws-ec2-spot-fleet-tagging-role",
-            //   TargetCapacity: 1,
-            //   // We always ask for 1 instance, but might ask for 100% on demand or spot
-            //   OnDemandTargetCapacity: useOnDemand ? 1 : 0,
-            //   TerminateInstancesWithExpiration: true,
-            //   Type: "request",
-            //   LaunchSpecifications:
-            // };
-            // const params: RequestSpotFleetRequest = {
-            //   SpotFleetRequestConfig: config,
-            // };
             const client = yield this.getEc2Client();
             const fleet = yield client.createFleet(createFleetRequest).promise();
             const instances = ((fleet === null || fleet === void 0 ? void 0 : fleet.Instances) || [])[0] || {};
             return (instances.InstanceIds || [])[0];
         });
     }
-    // async getOnDemandInstanceConfiguration(
-    //   ec2SpotInstanceStrategy: string
-    // ): Promise<RunInstancesRequest> {
-    //   const userData = new UserData(this.config);
-    //   const params: RunInstancesRequest = {
-    //     ImageId: this.config.ec2AmiId,
-    //     InstanceInitiatedShutdownBehavior: "terminate",
-    //     InstanceMarketOptions: {},
-    //     InstanceType: "",
-    //     MaxCount: 1,
-    //     MinCount: 1,
-    //     SecurityGroupIds: [this.config.ec2SecurityGroupId],
-    //     SubnetId: this.config.ec2SubnetId,
-    //     KeyName: this.config.ec2KeyName,
-    //     Placement: {
-    //       AvailabilityZone: await this.getSubnetAz(),
-    //     },
-    //     TagSpecifications: [
-    //       {
-    //         ResourceType: "instance",
-    //         Tags: this.tags,
-    //       },
-    //     ],
-    //     // <aztec>parity with build-system
-    //     BlockDeviceMappings: [
-    //       {
-    //         DeviceName: "/dev/sda1",
-    //         Ebs: {
-    //           VolumeSize: 32,
-    //         },
-    //       },
-    //     ],
-    //     // parity with build-system</aztec>
-    //     UserData: await userData.getUserData(),
-    //   };
-    //   switch (ec2SpotInstanceStrategy.toLowerCase()) {
-    //     case "besteffort":
-    //     case "spotonly": {
-    //       params.InstanceMarketOptions = {
-    //         MarketType: "spot",
-    //         SpotOptions: {
-    //           InstanceInterruptionBehavior: "terminate",
-    //           SpotInstanceType: "one-time",
-    //         },
-    //       };
-    //       break;
-    //     }
-    //     case "none": {
-    //       params.InstanceMarketOptions = {};
-    //       break;
-    //     }
-    //     default: {
-    //       throw new TypeError("Invalid value for ec2_spot_instance_strategy");
-    //     }
-    //   }
-    //   return params;
-    // }
     getInstanceStatus(instanceId) {
         return __awaiter(this, void 0, void 0, function* () {
             const client = yield this.getEc2Client();
@@ -420,7 +341,7 @@ class Ec2Instance {
             }
         });
     }
-    getInstancesForTags() {
+    getInstancesForTags(instanceStatus) {
         return __awaiter(this, void 0, void 0, function* () {
             const client = yield this.getEc2Client();
             const filters = [
@@ -438,6 +359,10 @@ class Ec2Instance {
                 for (const reservation of (yield client.describeInstances(params).promise()).Reservations || []) {
                     instances = instances.concat(reservation.Instances || []);
                 }
+                if (instanceStatus) {
+                    // Filter instances that are stopped
+                    instances = instances.filter((instance) => { var _a; return ((_a = instance === null || instance === void 0 ? void 0 : instance.State) === null || _a === void 0 ? void 0 : _a.Name) === instanceStatus; });
+                }
                 return instances;
             }
             catch (error) {
@@ -716,9 +641,8 @@ function pollSpotStatus(config, ec2Client, ghClient) {
     return __awaiter(this, void 0, void 0, function* () {
         // 12 iters x 10000 ms = 2 minutes
         for (let iter = 0; iter < 12; iter++) {
-            const instances = yield ec2Client.getInstancesForTags();
-            const hasInstance = instances.filter((i) => { var _a; return ((_a = i.State) === null || _a === void 0 ? void 0 : _a.Name) === "running"; }).length > 0;
-            if (!hasInstance) {
+            const instances = yield ec2Client.getInstancesForTags("running");
+            if (instances.length <= 0) {
                 // we need to start an instance
                 return "none";
             }
@@ -742,14 +666,18 @@ function start() {
     return __awaiter(this, void 0, void 0, function* () {
         const config = new config_1.ActionConfig();
         if (config.subaction === "stop") {
-            yield stop();
+            yield terminate();
             return;
         }
         else if (config.subaction === "restart") {
-            yield stop();
+            yield terminate();
             // then we make a fresh instance
         }
-        else if (config.subaction !== "start") {
+        else if (config.subaction === "start") {
+            // We need to terminate
+            yield terminate("stopped");
+        }
+        else {
             throw new Error("Unexpected subaction: " + config.subaction);
         }
         // subaction is 'start' or 'restart'estart'
@@ -765,7 +693,7 @@ function start() {
             if (config.subaction === "restart") {
                 throw new Error("Taking down spot we just started. This seems wrong, erroring out.");
             }
-            yield stop();
+            yield terminate();
         }
         var ec2SpotStrategies;
         switch (config.ec2SpotInstanceStrategy) {
@@ -831,14 +759,14 @@ function start() {
         }
     });
 }
-function stop() {
+function terminate(instanceStatus) {
     return __awaiter(this, void 0, void 0, function* () {
         try {
             core.info("Starting instance cleanup");
             const config = new config_1.ActionConfig();
             const ec2Client = new ec2_1.Ec2Instance(config);
             const ghClient = new github_1.GithubClient(config);
-            const instances = yield ec2Client.getInstancesForTags();
+            const instances = yield ec2Client.getInstancesForTags(instanceStatus);
             yield ec2Client.terminateInstances(instances.map((i) => i.InstanceId));
             core.info("Clearing previously installed runners");
             const result = yield ghClient.removeRunnersWithLabels([config.githubJobId]);
@@ -860,7 +788,7 @@ function stop() {
             start();
         }
         catch (error) {
-            stop();
+            terminate();
             (0, utils_1.assertIsError)(error);
             core.error(error);
             core.setFailed(error.message);

diff --git a/.github/spot-runner-action/src/ec2.ts b/.github/spot-runner-action/src/ec2.ts
@@ -117,17 +117,6 @@ export class Ec2Instance {
     }
   }
 
-  // async runInstances(params: RunInstancesRequest) {
-  //   const client = await this.getEc2Client();
-
-  //   try {
-  //     return (await client.runInstances(params).promise()).Instances;
-  //   } catch (error) {
-  //     core.error(`Failed to create instance(s)`);
-  //     throw error;
-  //   }
-  // }
-
   async getSubnetAzId() {
     const client = await this.getEc2Client();
     try {
@@ -252,86 +241,12 @@ export class Ec2Instance {
         DefaultTargetCapacityType: useOnDemand ? "on-demand" : "spot",
       },
     };
-    // const config: SpotFleetRequestConfigData = {
-    //   IamFleetRole:
-    //     "arn:aws:iam::278380418400:role/aws-ec2-spot-fleet-tagging-role",
-    //   TargetCapacity: 1,
-    //   // We always ask for 1 instance, but might ask for 100% on demand or spot
-    //   OnDemandTargetCapacity: useOnDemand ? 1 : 0,
-    //   TerminateInstancesWithExpiration: true,
-    //   Type: "request",
-    //   LaunchSpecifications:
-    // };
-    // const params: RequestSpotFleetRequest = {
-    //   SpotFleetRequestConfig: config,
-    // };
     const client = await this.getEc2Client();
     const fleet = await client.createFleet(createFleetRequest).promise();
     const instances: CreateFleetInstance = (fleet?.Instances || [])[0] || {};
     return (instances.InstanceIds || [])[0];
   }
 
-  // async getOnDemandInstanceConfiguration(
-  //   ec2SpotInstanceStrategy: string
-  // ): Promise<RunInstancesRequest> {
-  //   const userData = new UserData(this.config);
-
-  //   const params: RunInstancesRequest = {
-  //     ImageId: this.config.ec2AmiId,
-  //     InstanceInitiatedShutdownBehavior: "terminate",
-  //     InstanceMarketOptions: {},
-  //     InstanceType: "",
-  //     MaxCount: 1,
-  //     MinCount: 1,
-  //     SecurityGroupIds: [this.config.ec2SecurityGroupId],
-  //     SubnetId: this.config.ec2SubnetId,
-  //     KeyName: this.config.ec2KeyName,
-  //     Placement: {
-  //       AvailabilityZone: await this.getSubnetAz(),
-  //     },
-  //     TagSpecifications: [
-  //       {
-  //         ResourceType: "instance",
-  //         Tags: this.tags,
-  //       },
-  //     ],
-  //     // <aztec>parity with build-system
-  //     BlockDeviceMappings: [
-  //       {
-  //         DeviceName: "/dev/sda1",
-  //         Ebs: {
-  //           VolumeSize: 32,
-  //         },
-  //       },
-  //     ],
-  //     // parity with build-system</aztec>
-  //     UserData: await userData.getUserData(),
-  //   };
-
-  //   switch (ec2SpotInstanceStrategy.toLowerCase()) {
-  //     case "besteffort":
-  //     case "spotonly": {
-  //       params.InstanceMarketOptions = {
-  //         MarketType: "spot",
-  //         SpotOptions: {
-  //           InstanceInterruptionBehavior: "terminate",
-  //           SpotInstanceType: "one-time",
-  //         },
-  //       };
-  //       break;
-  //     }
-  //     case "none": {
-  //       params.InstanceMarketOptions = {};
-  //       break;
-  //     }
-  //     default: {
-  //       throw new TypeError("Invalid value for ec2_spot_instance_strategy");
-  //     }
-  //   }
-
-  //   return params;
-  // }
-
   async getInstanceStatus(instanceId: string) {
     const client = await this.getEc2Client();
     try {
@@ -347,7 +262,7 @@ export class Ec2Instance {
     }
   }
 
-  async getInstancesForTags(): Promise<AWS.EC2.Instance[]> {
+  async getInstancesForTags(instanceStatus?: string): Promise<AWS.EC2.Instance[]> {
     const client = await this.getEc2Client();
     const filters: FilterInterface[] = [
       {
@@ -367,6 +282,12 @@ export class Ec2Instance {
       ).Reservations || []) {
         instances = instances.concat(reservation.Instances || []);
       }
+      if (instanceStatus) {
+        // Filter instances that are stopped
+        instances = instances.filter(
+          (instance) => instance?.State?.Name === instanceStatus
+        );
+      }
       return instances;
     } catch (error) {
       core.error(

diff --git a/.github/spot-runner-action/src/main.ts b/.github/spot-runner-action/src/main.ts
@@ -11,10 +11,8 @@ async function pollSpotStatus(
 ): Promise<"usable" | "unusable" | "none"> {
   // 12 iters x 10000 ms = 2 minutes
   for (let iter = 0; iter < 12; iter++) {
-    const instances = await ec2Client.getInstancesForTags();
-    const hasInstance =
-      instances.filter((i) => i.State?.Name === "running").length > 0;
-    if (!hasInstance) {
+    const instances = await ec2Client.getInstancesForTags("running");
+    if (instances.length <= 0) {
       // we need to start an instance
       return "none";
     }
@@ -38,12 +36,15 @@ async function pollSpotStatus(
 async function start() {
   const config = new ActionConfig();
   if (config.subaction === "stop") {
-    await stop();
+    await terminate();
     return;
   } else if (config.subaction === "restart") {
-    await stop();
+    await terminate();
     // then we make a fresh instance
-  } else if (config.subaction !== "start") {
+  } else if (config.subaction === "start") {
+    // We need to terminate
+    await terminate("stopped");
+  } else {
     throw new Error("Unexpected subaction: " + config.subaction);
   }
   // subaction is 'start' or 'restart'estart'
@@ -65,7 +66,7 @@ async function start() {
         "Taking down spot we just started. This seems wrong, erroring out."
       );
     }
-    await stop();
+    await terminate();
   }
 
   var ec2SpotStrategies: string[];
@@ -138,13 +139,13 @@ async function start() {
   }
 }
 
-async function stop() {
+async function terminate(instanceStatus?: string) {
   try {
     core.info("Starting instance cleanup");
     const config = new ActionConfig();
     const ec2Client = new Ec2Instance(config);
     const ghClient = new GithubClient(config);
-    const instances = await ec2Client.getInstancesForTags();
+    const instances = await ec2Client.getInstancesForTags(instanceStatus);
     await ec2Client.terminateInstances(instances.map((i) => i.InstanceId!));
     core.info("Clearing previously installed runners");
     const result = await ghClient.removeRunnersWithLabels([config.githubJobId]);
@@ -164,7 +165,7 @@ async function stop() {
   try {
     start();
   } catch (error) {
-    stop();
+    terminate();
     assertIsError(error);
     core.error(error);
     core.setFailed(error.message);

diff --git a/scripts/attach_ebs_cache.sh b/scripts/attach_ebs_cache.sh
@@ -87,8 +87,12 @@ while [ "$(aws ec2 describe-volumes \
   elapsed_time=$((elapsed_time + WAIT_INTERVAL))
 done
 
-# Attach volume to the instance
+# First, make sure this is detached from any instances stuck in stopping state
+aws ec2 detach-volume \
+  --region $REGION \
+  --volume-id $VOLUME_ID || true
 
+# Attach volume to the instance
 aws ec2 attach-volume \
   --region $REGION \
   --volume-id $VOLUME_ID \