diff --git a/enos/enos-scenario-upgrade.hcl b/enos/enos-scenario-upgrade.hcl
index a3be7ddaef3..28aa6fd2d6d 100644
--- a/enos/enos-scenario-upgrade.hcl
+++ b/enos/enos-scenario-upgrade.hcl
@@ -108,11 +108,14 @@ scenario "upgrade" {
 
     module = module.test_cluster_health
     variables {
-      nomad_addr      = step.provision_cluster.nomad_addr
-      ca_file         = step.provision_cluster.ca_file
-      cert_file       = step.provision_cluster.cert_file
-      key_file        = step.provision_cluster.key_file
-      nomad_token     = step.provision_cluster.nomad_token
+      # connecting to the Nomad API
+      nomad_addr  = step.provision_cluster.nomad_addr
+      ca_file     = step.provision_cluster.ca_file
+      cert_file   = step.provision_cluster.cert_file
+      key_file    = step.provision_cluster.key_file
+      nomad_token = step.provision_cluster.nomad_token
+
+      # configuring assertions
       server_count    = var.server_count
       client_count    = local.clients_count
       jobs_count      = step.run_initial_workloads.jobs_count
@@ -178,11 +181,14 @@ scenario "upgrade" {
     ]
 
     variables {
-      nomad_addr           = step.provision_cluster.nomad_addr
-      ca_file              = step.provision_cluster.ca_file
-      cert_file            = step.provision_cluster.cert_file
-      key_file             = step.provision_cluster.key_file
-      nomad_token          = step.provision_cluster.nomad_token
+      # connecting to the Nomad API
+      nomad_addr  = step.provision_cluster.nomad_addr
+      ca_file     = step.provision_cluster.ca_file
+      cert_file   = step.provision_cluster.cert_file
+      key_file    = step.provision_cluster.key_file
+      nomad_token = step.provision_cluster.nomad_token
+
+      # driving the upgrade
       servers              = step.provision_cluster.servers
       ssh_key_path         = step.provision_cluster.ssh_key_file
       artifactory_username = var.artifactory_username
@@ -202,11 +208,14 @@ scenario "upgrade" {
 
     module = module.test_cluster_health
     variables {
-      nomad_addr      = step.provision_cluster.nomad_addr
-      ca_file         = step.provision_cluster.ca_file
-      cert_file       = step.provision_cluster.cert_file
-      key_file        = step.provision_cluster.key_file
-      nomad_token     = step.provision_cluster.nomad_token
+      # connecting to the Nomad API
+      nomad_addr  = step.provision_cluster.nomad_addr
+      ca_file     = step.provision_cluster.ca_file
+      cert_file   = step.provision_cluster.cert_file
+      key_file    = step.provision_cluster.key_file
+      nomad_token = step.provision_cluster.nomad_token
+
+      # configuring assertions
       server_count    = var.server_count
       client_count    = local.clients_count
       jobs_count      = step.run_initial_workloads.jobs_count
@@ -251,14 +260,14 @@ scenario "upgrade" {
     depends_on = [step.server_upgrade_test_cluster_health]
 
     description = <<-EOF
-     Takes the clients one by one, writes some dynamic metadata to them, 
+     Takes the clients one by one, writes some dynamic metadata to them,
     updates the binary with the new one previously fetched and restarts them.
 
-    Important: The path where the binary will be placed is hardcoded to match 
+    Important: The path where the binary will be placed is hardcoded to match
     what the provision-cluster module does. It can be configurable in the future
     but for now it is:
 
-     * "C:/opt/nomad.exe" for windows 
+     * "C:/opt/nomad.exe" for windows
      * "/usr/local/bin/nomad" for linux
 
     To ensure the clients are upgraded one by one, they use the depends_on meta,
@@ -274,11 +283,14 @@ scenario "upgrade" {
     ]
 
     variables {
-      nomad_addr           = step.provision_cluster.nomad_addr
-      ca_file              = step.provision_cluster.ca_file
-      cert_file            = step.provision_cluster.cert_file
-      key_file             = step.provision_cluster.key_file
-      nomad_token          = step.provision_cluster.nomad_token
+      # connecting to the Nomad API
+      nomad_addr  = step.provision_cluster.nomad_addr
+      ca_file     = step.provision_cluster.ca_file
+      cert_file   = step.provision_cluster.cert_file
+      key_file    = step.provision_cluster.key_file
+      nomad_token = step.provision_cluster.nomad_token
+
+      # configuring assertions
       clients              = step.provision_cluster.clients
       ssh_key_path         = step.provision_cluster.ssh_key_file
       artifactory_username = var.artifactory_username
@@ -292,17 +304,20 @@ scenario "upgrade" {
     depends_on = [step.upgrade_clients]
 
     description = <<-EOF
-    Verify the health of the cluster by checking the status of all servers, nodes, 
+    Verify the health of the cluster by checking the status of all servers, nodes,
     jobs and allocs and stopping random allocs to check for correct reschedules"
     EOF
 
     module = module.test_cluster_health
     variables {
-      nomad_addr      = step.provision_cluster.nomad_addr
-      ca_file         = step.provision_cluster.ca_file
-      cert_file       = step.provision_cluster.cert_file
-      key_file        = step.provision_cluster.key_file
-      nomad_token     = step.provision_cluster.nomad_token
+      # connecting to the Nomad API
+      nomad_addr  = step.provision_cluster.nomad_addr
+      ca_file     = step.provision_cluster.ca_file
+      cert_file   = step.provision_cluster.cert_file
+      key_file    = step.provision_cluster.key_file
+      nomad_token = step.provision_cluster.nomad_token
+
+      # configuring assertions
       server_count    = var.server_count
       client_count    = local.clients_count
       jobs_count      = step.run_initial_workloads.jobs_count
diff --git a/enos/modules/fetch_artifactory/scripts/install.sh b/enos/modules/fetch_artifactory/scripts/install.sh
index bf9249fad29..de49644e3c1 100755
--- a/enos/modules/fetch_artifactory/scripts/install.sh
+++ b/enos/modules/fetch_artifactory/scripts/install.sh
@@ -2,25 +2,15 @@
 # Copyright (c) HashiCorp, Inc.
 # SPDX-License-Identifier: BUSL-1.1
 
-set -xeuo pipefail
+set -euo pipefail
 
 wget --header="Authorization: Bearer $TOKEN" -O "$LOCAL_ZIP" "$URL"
 
-if [ $? -eq 0 ]; then
-    echo "File downloaded successfully: $LOCAL_ZIP"
-else
-    echo "Error downloading file." >&2
-    exit 1
-fi
+echo "File downloaded to $LOCAL_ZIP"
 
 mkdir -p "$BINARY_PATH"
 unzip -o "$LOCAL_ZIP" -d "$BINARY_PATH"
 
-if [ $? -eq 0 ]; then
-    echo "File unzipped successfully to $BINARY_PATH"
-else
-    echo "Error unzipping file." >&2
-    exit 1
-fi
+echo "File unzipped to $BINARY_PATH"
 
 rm "$LOCAL_ZIP"
diff --git a/enos/modules/run_workloads/scripts/wait_for_nomad_api.sh b/enos/modules/run_workloads/scripts/wait_for_nomad_api.sh
old mode 100644
new mode 100755
index 4e325446e09..cf38b0c6ab1
--- a/enos/modules/run_workloads/scripts/wait_for_nomad_api.sh
+++ b/enos/modules/run_workloads/scripts/wait_for_nomad_api.sh
@@ -2,7 +2,7 @@
 # Copyright (c) HashiCorp, Inc.
 # SPDX-License-Identifier: BUSL-1.1
 
-set -xeuo pipefail
+set -euo pipefail
 
 TIMEOUT=10
 INTERVAL=2
diff --git a/enos/modules/test_cluster_health/scripts/allocs.sh b/enos/modules/test_cluster_health/scripts/allocs.sh
index 41ad7b274fc..f8cc5abe5d9 100755
--- a/enos/modules/test_cluster_health/scripts/allocs.sh
+++ b/enos/modules/test_cluster_health/scripts/allocs.sh
@@ -5,38 +5,43 @@
 set -euo pipefail
 
 error_exit() {
-    printf 'Error: %s' "${1}" 
+    printf 'Error: %s' "${1}"
     exit 1
 }
 
-MAX_WAIT_TIME=40
+MAX_WAIT_TIME=120
 POLL_INTERVAL=2
 
 elapsed_time=0
 
 # Quality: nomad_allocs_status: A GET call to /v1/allocs returns the correct number of allocations and they are all running
 
-while true; do    
-    allocs=$(nomad alloc status -json)
-    if [ $? -ne 0 ]; then
-        error_exit "Error running 'nomad alloc status': $allocs"
-    fi
+running_allocs=
+allocs_length=
 
-    running_allocs=$(echo $allocs | jq '[.[] | select(.ClientStatus == "running")]')
-    allocs_length=$(echo $running_allocs | jq 'length')
-    if [ -z "$allocs_length" ];  then
-        error_exit "No allocs found"
-    fi
+checkAllocsCount() {
+    local allocs
+    allocs=$(nomad alloc status -json) || error_exit "Failed to check alloc status"
+
+    running_allocs=$(echo "$allocs" | jq '[.[] | select(.ClientStatus == "running")]')
+    allocs_length=$(echo "$running_allocs" | jq 'length') \
+        || error_exit "Invalid alloc status -json output"
 
     if [ "$allocs_length" -eq "$ALLOC_COUNT" ]; then
-       break
+        return 0
     fi
 
+    return 1
+}
+
+while true; do
+    checkAllocsCount && break
+
     if [ "$elapsed_time" -ge "$MAX_WAIT_TIME" ]; then
-        error_exit "Some allocs are not running:\n$(nomad alloc status -json | jq -r '.[] | select(.ClientStatus != "running") | .ID')"   error_exit "Unexpected number of ready clients: $clients_length"
+        error_exit "Some allocs are not running:\n$(nomad alloc status -json | jq -r '.[] | select(.ClientStatus != "running") | .ID')"
     fi
 
-    echo "Running allocs: $$running_allocs, expected "$ALLOC_COUNT". Waiting for $elapsed_time  Retrying in $POLL_INTERVAL seconds..."
+    echo "Running allocs: $running_allocs, expected $ALLOC_COUNT. Waiting for $elapsed_time  Retrying in $POLL_INTERVAL seconds..."
     sleep $POLL_INTERVAL
     elapsed_time=$((elapsed_time + POLL_INTERVAL))
 done
@@ -48,19 +53,16 @@ echo "All ALLOCS are running."
 random_index=$((RANDOM % allocs_length))
 random_alloc_id=$(echo "$running_allocs" | jq -r ".[${random_index}].ID")
 
-error_ms=$(nomad alloc stop "$random_alloc_id" 2>&1)
-if [ $? -ne 0 ]; then
-    error_exit "Failed to stop allocation $random_alloc_id. Error: $error_msg"
-fi
+nomad alloc stop "$random_alloc_id" \
+    || error_exit "Failed to stop allocation $random_alloc_id"
 
 echo "Waiting for allocation $random_alloc_id to reach 'complete' status..."
 elapsed_time=0
 
 while true; do
-    alloc_status=$(nomad alloc status -json "$random_alloc_id" | jq -r '.ClientStatus') 
-    
-    if [ "$alloc_status" == "complete" ]; then 
-        break 
+    alloc_status=$(nomad alloc status -json "$random_alloc_id" | jq -r '.ClientStatus')
+    if [ "$alloc_status" == "complete" ]; then
+        break
     fi
 
     if [ "$elapsed_time" -ge "$MAX_WAIT_TIME" ]; then
@@ -76,18 +78,17 @@ echo "Waiting for all the allocations to be running again"
 elapsed_time=0
 
 while true; do
-    new_allocs=$(nomad alloc status -json | jq '[.[] | select(.ClientStatus == "running")]')
-    running_new_allocs=$(echo "$new_allocs" | jq 'length')
-    
-    if [ "$running_new_allocs" == "$ALLOC_COUNT" ]; then
-        break
-    fi
-    
+    # reset
+    running_allocs=
+    allocs_length=
+
+    checkAllocsCount && break
+
     if [ "$elapsed_time" -ge "$MAX_WAIT_TIME" ]; then
-        error_exit "Expected $ALLOC_COUNT running allocations, found $running_new_allocs after $elapsed_time seconds"
+        error_exit "Expected $ALLOC_COUNT running allocations, found $running_allocs after $elapsed_time seconds"
     fi
 
-    echo "Expected $ALLOC_COUNT running allocations, found $running_new_allocs Retrying in $POLL_INTERVAL seconds..."
+    echo "Expected $ALLOC_COUNT running allocations, found $running_allocs Retrying in $POLL_INTERVAL seconds..."
     sleep $POLL_INTERVAL
     elapsed_time=$((elapsed_time + POLL_INTERVAL))
 done
diff --git a/enos/modules/test_cluster_health/scripts/clients.sh b/enos/modules/test_cluster_health/scripts/clients.sh
index 7895214dbfe..3a5e480ff70 100755
--- a/enos/modules/test_cluster_health/scripts/clients.sh
+++ b/enos/modules/test_cluster_health/scripts/clients.sh
@@ -5,7 +5,7 @@
 set -euo pipefail
 
 error_exit() {
-    printf 'Error: %s' "${1}" 
+    printf 'Error: %s' "${1}"
     exit 1
 }
 
@@ -15,32 +15,43 @@ MAX_WAIT_TIME=20  # Maximum wait time in seconds
 POLL_INTERVAL=2   # Interval between status checks
 
 elapsed_time=0
+ready_clients=
+last_error=
 
-while true; do
-    clients_length=$(nomad node status -json | jq '[.[] | select(.Status == "ready")] | length')
+checkReadyClients() {
+    local clients_length
+
+    ready_clients=$(nomad node status -json | jq '[.[] | select(.Status == "ready")]') ||
+        error_exit "Could not query node status"
 
+    clients_length=$(echo "$ready_clients" | jq 'length')
     if [ "$clients_length" -eq "$CLIENT_COUNT" ]; then
-        break
+        last_error=
+        return 0
     fi
 
-    if [ "$elapsed_time" -ge "$MAX_WAIT_TIME" ]; then
-        error_exit "Unexpected number of ready clients: $clients_length"
-    fi
+    last_error="Unexpected number of ready clients: $clients_length"
+    return 1
+}
 
-    sleep "$POLL_INTERVAL"
-    elapsed_time=$((elapsed_time + POLL_INTERVAL))
-done
+checkEligibleClients() {
+    echo "$ready_clients" | jq -e '
+        map(select(.SchedulingEligibility != "eligible")) | length == 0' && return 0
 
-clients=$(nomad node status -json)
-running_clients=$(echo "$clients" | jq '[.[] | select(.Status == "ready")]')
+    last_error=$(echo "$ready_clients" | jq -r '
+        map(select(.SchedulingEligibility != "eligible")) | "\(.[].ID) is ineligible"')
+    return 1
+}
 
-echo "$running_clients" | jq -c '.[]' | while read -r node; do
-    status=$(echo "$node" | jq -r '.Status')
-    eligibility=$(echo "$node" | jq -r '.SchedulingEligibility')
+while true; do
+    checkReadyClients && checkEligibleClients && break
 
-    if [ "$eligibility" != "eligible" ]; then
-        error_exit "Client $(echo "$node" | jq -r '.Name') is not eligible!"
+    if [ "$elapsed_time" -ge "$MAX_WAIT_TIME" ]; then
+        error_exit "$last_error"
     fi
+
+    sleep "$POLL_INTERVAL"
+    elapsed_time=$((elapsed_time + POLL_INTERVAL))
 done
 
-echo "All CLIENTS are eligible and running."
+echo "All clients are eligible and running."
diff --git a/enos/modules/test_cluster_health/scripts/jobs.sh b/enos/modules/test_cluster_health/scripts/jobs.sh
index c338b985d61..167a6650f1b 100755
--- a/enos/modules/test_cluster_health/scripts/jobs.sh
+++ b/enos/modules/test_cluster_health/scripts/jobs.sh
@@ -5,7 +5,7 @@
 set -euo pipefail
 
 error_exit() {
-    printf 'Error: %s' "${1}" 
+    printf 'Error: %s' "${1}"
     exit 1
 }
 
diff --git a/enos/modules/test_cluster_health/scripts/servers.sh b/enos/modules/test_cluster_health/scripts/servers.sh
index 40756c0a0e7..39d6953897e 100755
--- a/enos/modules/test_cluster_health/scripts/servers.sh
+++ b/enos/modules/test_cluster_health/scripts/servers.sh
@@ -5,7 +5,7 @@
 set -euo pipefail
 
 error_exit() {
-    printf 'Error: %s' "${1}" 
+    printf 'Error: %s' "${1}"
     exit 1
 }
 
@@ -13,58 +13,80 @@ MAX_WAIT_TIME=40
 POLL_INTERVAL=2
 
 elapsed_time=0
+last_error=
+leader_last_index=
+leader_last_term=
 
 # Quality: nomad_agent_info: A GET call to /v1/agent/members returns the correct number of running servers and they are all alive
 
-while true; do  
-    servers=$(nomad operator autopilot health -json)
-    servers_healthy=$(echo "$servers" | jq -r '[.Servers[] | select(.Healthy == true) | .ID] | length')
+checkAutopilotHealth() {
+    local autopilotHealth servers_healthy leader
+    autopilotHealth=$(nomad operator autopilot health -json) || return 1
+    servers_healthy=$(echo "$autopilotHealth" |
+                          jq -r '[.Servers[] | select(.Healthy == true) | .ID] | length')
 
     if [ "$servers_healthy" -eq 0 ]; then
         error_exit "No servers found."
     fi
 
     if [ "$servers_healthy" -eq "$SERVER_COUNT" ]; then
-        break
+        leader=$(echo "$autopilotHealth" | jq -r '.Servers[] | select(.Leader == true)')
+        leader_last_index=$(echo "$leader" | jq -r '.LastIndex')
+        leader_last_term=$(echo "$leader" | jq -r '.LastTerm')
+        return 0
     fi
 
+    last_error="Expected $SERVER_COUNT healthy servers but have $servers_healthy"
+    return 1
+}
+
+while true; do
+    checkAutopilotHealth && break
+
     if [ "$elapsed_time" -ge "$MAX_WAIT_TIME" ]; then
-        error_exit "Unexpected number of healthy servers: $servers_healthy after $elapsed_time seconds."
+        error_exit "$last_error after $elapsed_time seconds."
     fi
 
-    echo "Servers found: $servers_healthy, expected: $SERVER_COUNT. Waiting for $elapsed_time seconds. Retrying in $POLL_INTERVAL seconds..."
+    echo "$last_error after $elapsed_time seconds. Retrying in $POLL_INTERVAL seconds..."
     sleep "$POLL_INTERVAL"
     elapsed_time=$((elapsed_time + POLL_INTERVAL))
 done
+
 # Quality: nomad_agent_info_self: A GET call to /v1/agent/self against every server returns the same last_log_index as the leader"
-# We use the leader's last log index to use as teh measure for the other servers. 
+# We use the leader's last log index to use as teh measure for the other servers.
 
-leader=$(echo $servers | jq -r '.Servers[] | select(.Leader == true)')
-leader_last_index=$(echo $leader | jq -r '.LastIndex')
-leader_last_term=$(echo $leader | jq -r '.LastTerm')
+checkServerHealth() {
+    local ip node_info
+    ip=$1
+    echo "Checking server health for $ip"
 
-for ip in $SERVERS; do
-while true; do  
-        node_info=$(nomad agent-info -address "https://$ip:4646" -json)
-        if [ $? -ne 0 ]; then
-            error_exit "Unable to get info for node at $ip"
-        fi
+    node_info=$(nomad agent-info -address "https://$ip:4646" -json) \
+        || error_exit "Unable to get info for node at $ip"
 
-        last_log_index=$(echo "$node_info" | jq -r '.stats.raft.last_log_index')
-        last_leader_term=$(echo "$node_info" | jq -r '.stats.raft.last_log_term')
+    last_log_index=$(echo "$node_info" | jq -r '.stats.raft.last_log_index')
+    last_log_term=$(echo "$node_info" | jq -r '.stats.raft.last_log_term')
 
-        if [ "$last_log_index" -ge "$leader_last_index" ] && [ "$last_leader_term" -ge "$leader_last_term" ]; then
-            break
-        fi
+    if [ "$last_log_index" -ge "$leader_last_index" ] &&
+           [ "$last_log_term" -ge "$leader_last_term" ]; then
+        return 0
+    fi
+
+    last_error="Expected node at $ip to have last log index $leader_last_index and last term $leader_last_term, but found $last_log_index and $last_log_term"
+    return 1
+}
+
+for ip in $SERVERS; do
+    while true; do
+        checkServerHealth "$ip" && break
 
         if [ "$elapsed_time" -ge "$MAX_WAIT_TIME" ]; then
-            error_exit "Expected node at $ip to have last log index $leader_last_index and last term $leader_last_term, but found $last_log_index and $last_leader_term after $elapsed_time seconds."
+            error_exit "$last_error after $elapsed_time seconds."
         fi
 
-        echo "Expected log at $leader_last_index, found $last_log_index. Retrying in $POLL_INTERVAL seconds..."
+        echo "$last_error after $elapsed_time seconds. Retrying in $POLL_INTERVAL seconds..."
         sleep "$POLL_INTERVAL"
         elapsed_time=$((elapsed_time + POLL_INTERVAL))
-    done    
+    done
 done
 
 echo "All servers are alive and up to date."
diff --git a/enos/modules/test_cluster_health/scripts/versions.sh b/enos/modules/test_cluster_health/scripts/versions.sh
old mode 100644
new mode 100755
diff --git a/enos/modules/test_cluster_health/scripts/wait_for_nomad_api.sh b/enos/modules/test_cluster_health/scripts/wait_for_nomad_api.sh
old mode 100644
new mode 100755
index 4e325446e09..cf38b0c6ab1
--- a/enos/modules/test_cluster_health/scripts/wait_for_nomad_api.sh
+++ b/enos/modules/test_cluster_health/scripts/wait_for_nomad_api.sh
@@ -2,7 +2,7 @@
 # Copyright (c) HashiCorp, Inc.
 # SPDX-License-Identifier: BUSL-1.1
 
-set -xeuo pipefail
+set -euo pipefail
 
 TIMEOUT=10
 INTERVAL=2
diff --git a/enos/modules/upgrade_clients/scripts/set_metadata.sh b/enos/modules/upgrade_clients/scripts/set_metadata.sh
old mode 100644
new mode 100755
index 77ed5a5770e..45fb65981fd
--- a/enos/modules/upgrade_clients/scripts/set_metadata.sh
+++ b/enos/modules/upgrade_clients/scripts/set_metadata.sh
@@ -4,16 +4,15 @@
 
 set -euo pipefail
 
-client_id=$(nomad node status -address "https://$CLIENT_IP:4646" -self -json | jq '.ID' | tr -d '"')
-if [ -z "$client_id" ]; then
-  echo "No client found at $CLIENT_IP"
-  exit 1
+if ! client_id=$(nomad node status -address "http://$CLIENT_IP:4646" -self -json | jq '.ID' | tr -d '"'); then
+    echo "No client found at $CLIENT_IP"
+    exit 1
 fi
 
-nomad node meta apply -node-id $client_id node_ip="$CLIENT_IP" nomad_addr=$NOMAD_ADDR
-if [ $? -nq 0 ]; then
-  echo "Failed to set metadata for node: $client_id at $CLIENT_IP"
-  exit 1
+if ! nomad node meta apply \
+     -node-id "$client_id" node_ip="$CLIENT_IP" nomad_addr="$NOMAD_ADDR"; then
+    echo "Failed to set metadata for node: $client_id at $CLIENT_IP"
+    exit 1
 fi
 
 echo "Metadata updated in $client_id at $CLIENT_IP"
diff --git a/enos/modules/upgrade_clients/scripts/verify_metadata.sh b/enos/modules/upgrade_clients/scripts/verify_metadata.sh
old mode 100644
new mode 100755
index 7bf8b86cc5d..898718b6960
--- a/enos/modules/upgrade_clients/scripts/verify_metadata.sh
+++ b/enos/modules/upgrade_clients/scripts/verify_metadata.sh
@@ -5,7 +5,7 @@
 set -euo pipefail
 
 error_exit() {
-    printf 'Error: %s' "${1}" 
+    printf 'Error: %s' "${1}"
     exit 1
 }
 
@@ -13,63 +13,55 @@ MAX_WAIT_TIME=10  # Maximum wait time in seconds
 POLL_INTERVAL=2   # Interval between status checks
 
 elapsed_time=0
+last_error=
+client_id=
 
-while true; do
-    if nomad node status -address "https://$CLIENT_IP:4646" -self &>/dev/null; then
-        exit 0
+checkClientReady() {
+    local client client_status
+    echo "Checking client health for $CLIENT_IP"
+
+    client=$(nomad node status -address "https://$CLIENT_IP:4646" -self -json) ||
+        error_exit "Unable to get info for node at $CLIENT_IP"
+
+    client_status=$(echo "$client" | jq  -r '.Status')
+    if [ "$client_status" == "ready" ]; then
+        client_id=$(echo "$client" | jq '.ID' | tr -d '"')
+        last_error=
+        return 0
     fi
 
+    last_error="Node at $CLIENT_IP is ${client_status}, not ready"
+    return 1
+}
+
+while true; do
+    checkClientReady && break
     if [ "$elapsed_time" -ge "$MAX_WAIT_TIME" ]; then
-        error_exit "Node at $NOMAD_ADDR did not become available within $elapsed_time seconds."
+        error_exit "$last_error within $elapsed_time seconds."
         exit 1
     fi
 
-    echo "Node at $NOMAD_ADDR not available yet. Retrying in $POLL_INTERVAL seconds..."
+    echo "$last_error within $elapsed_time seconds. Retrying in $POLL_INTERVAL seconds..."
     sleep "$POLL_INTERVAL"
     elapsed_time=$((elapsed_time + POLL_INTERVAL))
 done
 
-elapsed_time=0
-
-while true; do
-  client=$(nomad node status -address "https://$CLIENT_IP:4646" -self -json)
-  if [ -z "$client" ]; then
-    error_exit "No client found at $CLIENT_IP"
-  fi
-
-  client_status=$(echo $client | jq  -r '.Status')
-  if [ "$client_status" == "ready" ]; then 
-    break 
-  fi
-
-  if [ "$elapsed_time" -ge "$MAX_WAIT_TIME" ]; then
-      error_exit "Client at $CLIENT_IP did not reach 'ready' status within $MAX_WAIT_TIME seconds."
-
-  fi
-
-  echo "Current status: $client_status, not 'ready'. Waiting for $elapsed_time  Retrying in $POLL_INTERVAL seconds..."
-  sleep $POLL_INTERVAL
-  elapsed_time=$((elapsed_time + POLL_INTERVAL))
-done
-
 # Quality: "nomad_node_metadata: A GET call to /v1/node/:node-id returns the same  node.Meta for each node before and after a node upgrade"
 
-client_id=$(echo $client | jq '.ID' | tr -d '"')
-client_meta=$(nomad node meta read -json -node-id $client_id)
-if [ $? -nq 0 ]; then
-  echo "Failed to read metadata for node: $client_id"
-  exit 1
+if ! client_meta=$(nomad node meta read -json -node-id "$client_id"); then
+    echo "Failed to read metadata for node: $client_id"
+    exit 1
 fi
 
-node_ip=$(echo $client_meta | jq -r '.Dynamic.node_ip' ) 
-if ["$node_ip" != "$CLIENT_IP" ]; then
-  echo "Wrong value returned for node_ip: $node_ip"
+meta_node_ip=$(echo "$client_meta" | jq -r '.Dynamic.node_ip' )
+if [ "$meta_node_ip" != "$CLIENT_IP" ]; then
+  echo "Wrong value returned for node_ip: $meta_node_ip"
   exit 1
 fi
 
-nomad_addr=$(echo $client_meta | jq -r '.Dynamic.nomad_addr' ) 
-if ["$nomad_addr" != $NOMAD_ADDR ]; then
-  echo "Wrong value returned for nomad_addr: $nomad_addr"
+meta_nomad_addr=$(echo "$client_meta" | jq -r '.Dynamic.nomad_addr' )
+if [ "$meta_nomad_addr" != "$NOMAD_ADDR" ]; then
+  echo "Wrong value returned for nomad_addr: $meta_nomad_addr"
   exit 1
 fi
 
diff --git a/enos/modules/upgrade_clients/scripts/wait_for_nomad_api.sh b/enos/modules/upgrade_clients/scripts/wait_for_nomad_api.sh
old mode 100644
new mode 100755
diff --git a/enos/modules/upgrade_servers/scripts/wait_for_stable_cluster.sh b/enos/modules/upgrade_servers/scripts/wait_for_stable_cluster.sh
old mode 100644
new mode 100755
index f57021f5fdd..fbe93181aee
--- a/enos/modules/upgrade_servers/scripts/wait_for_stable_cluster.sh
+++ b/enos/modules/upgrade_servers/scripts/wait_for_stable_cluster.sh
@@ -5,57 +5,79 @@
 set -euo pipefail
 
 error_exit() {
-    printf 'Error: %s' "${1}" 
+    printf 'Error: %s' "${1}"
     exit 1
 }
 
-MAX_WAIT_TIME=40
+MAX_WAIT_TIME=10 #40
 POLL_INTERVAL=2
 
 elapsed_time=0
+last_config_index=
+last_error=
 
-while true; do  
-    servers=$(nomad operator api /v1/operator/raft/configuration)
-    leader=$(echo $servers | jq -r '[.Servers[] | select(.Leader == true)'])
-    echo $servers | jq '.'
-    echo $leader
-    if [ $(echo "$leader" | jq 'length') -eq 1 ]; then
-      break
+checkRaftConfiguration() {
+    local raftConfig leader
+    raftConfig=$(nomad operator api /v1/operator/raft/configuration) || return 1
+    leader=$(echo "$raftConfig" | jq -r '[.Servers[] | select(.Leader == true)'])
+
+    echo "$raftConfig" | jq '.'
+    echo "$leader"
+    if [ "$(echo "$leader" | jq 'length')" -eq 1 ]; then
+        last_config_index=$(echo "$raftConfig" | jq -r '.Index')
+        echo "last_config_index: $last_config_index"
+        return 0
     fi
 
+    last_error="No leader found"
+    return 1
+}
+
+while true; do
+    checkRaftConfiguration && break
     if [ "$elapsed_time" -ge "$MAX_WAIT_TIME" ]; then
-        error_exit "No leader found after $elapsed_time seconds."
+        error_exit "${last_error} after $elapsed_time seconds."
     fi
 
-    echo "No leader found yet after $elapsed_time seconds. Retrying in $POLL_INTERVAL seconds..."
+    echo "${last_error} after $elapsed_time seconds. Retrying in $POLL_INTERVAL seconds..."
     sleep "$POLL_INTERVAL"
     elapsed_time=$((elapsed_time + POLL_INTERVAL))
 done
 
-last_config_index=$(echo $servers | jq -r '.Index')
-echo "last_config_index: $last_config_index"
 
-for ip in $SERVERS; do
-while true; do  
-        echo $ip
-        node_info=$(nomad agent-info -address "https://$ip:4646" -json)
-        if [ $? -ne 0 ]; then
-            error_exit "Unable to get info for node at $ip"
-        fi
+# reset timer
+elapsed_time=0
+last_log_index=
 
-        last_log_index=$(echo "$node_info" | jq -r '.stats.raft.last_log_index')
-        if [ "$last_log_index" -ge "$last_config_index" ]; then
-            break
-        fi
+checkServerHealth() {
+    local ip node_info
+    ip=$1
+    echo "Checking server health for $ip"
+
+    node_info=$(nomad agent-info -address "https://$ip:4646" -json) \
+        || error_exit "Unable to get info for node at $ip"
+
+    last_log_index=$(echo "$node_info" | jq -r '.stats.raft.last_log_index')
+    if [ "$last_log_index" -ge "$last_config_index" ]; then
+        return 0
+    fi
+
+    last_error="Expected node at $ip to have last log index at least $last_config_index but found $last_log_index"
+    return 1
+}
+
+for ip in $SERVERS; do
+    while true; do
+        checkServerHealth "$ip" && break
 
         if [ "$elapsed_time" -ge "$MAX_WAIT_TIME" ]; then
-            error_exit "Expected node at $ip to have last log index at least $last_config_index but found $last_log_index after $elapsed_time seconds."
+            error_exit "$last_error after $elapsed_time seconds."
         fi
 
-        echo "Expected log at $leader_last_index, found $last_log_index. Retrying in $POLL_INTERVAL seconds..."
+        echo "${last_error} after $elapsed_time seconds. Retrying in $POLL_INTERVAL seconds..."
         sleep "$POLL_INTERVAL"
         elapsed_time=$((elapsed_time + POLL_INTERVAL))
-    done    
+    done
 done
 
 echo "All servers are alive and up to date."